New summary (#283)

* handover final summary to Zephyr deployment

* fix display error

* push new summary feature

* fix failing test case

* Added markdown support for final summary

* update UI render issue

* retain sentence tokenizer call

---------

Co-authored-by: Koper <andreas@monadical.com>
This commit is contained in:
projects-g
2023-10-13 22:53:29 +05:30
committed by GitHub
parent 38cd0385b4
commit 1d92d43fe0
13 changed files with 933 additions and 23 deletions

View File

@@ -23,7 +23,7 @@ class ModalLLM(LLM):
"""
# TODO: Query the specific GPU platform
# Replace this with a HTTP call
return ["lmsys/vicuna-13b-v1.5"]
return ["lmsys/vicuna-13b-v1.5", "HuggingFaceH4/zephyr-7b-alpha"]
async def _generate(
self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs
@@ -33,6 +33,13 @@ class ModalLLM(LLM):
json_payload["gen_schema"] = gen_schema
if gen_cfg:
json_payload["gen_cfg"] = gen_cfg
# Handing over generation of the final summary to Zephyr model
# but replacing the Vicuna model will happen after more testing
# TODO: Create a mapping of model names and cloud deployments
if self.model_name == "HuggingFaceH4/zephyr-7b-alpha":
self.llm_url = settings.ZEPHYR_LLM_URL + "/llm"
async with httpx.AsyncClient() as client:
response = await retry(client.post)(
self.llm_url,