mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-21 12:49:06 +00:00
New summary (#283)
* handover final summary to Zephyr deployment * fix display error * push new summary feature * fix failing test case * Added markdown support for final summary * update UI render issue * retain sentence tokenizer call --------- Co-authored-by: Koper <andreas@monadical.com>
This commit is contained in:
@@ -23,7 +23,7 @@ class ModalLLM(LLM):
|
||||
"""
|
||||
# TODO: Query the specific GPU platform
|
||||
# Replace this with a HTTP call
|
||||
return ["lmsys/vicuna-13b-v1.5"]
|
||||
return ["lmsys/vicuna-13b-v1.5", "HuggingFaceH4/zephyr-7b-alpha"]
|
||||
|
||||
async def _generate(
|
||||
self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs
|
||||
@@ -33,6 +33,13 @@ class ModalLLM(LLM):
|
||||
json_payload["gen_schema"] = gen_schema
|
||||
if gen_cfg:
|
||||
json_payload["gen_cfg"] = gen_cfg
|
||||
|
||||
# Handing over generation of the final summary to Zephyr model
|
||||
# but replacing the Vicuna model will happen after more testing
|
||||
# TODO: Create a mapping of model names and cloud deployments
|
||||
if self.model_name == "HuggingFaceH4/zephyr-7b-alpha":
|
||||
self.llm_url = settings.ZEPHYR_LLM_URL + "/llm"
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await retry(client.post)(
|
||||
self.llm_url,
|
||||
|
||||
Reference in New Issue
Block a user