New summary (#283)

* handover final summary to Zephyr deployment * fix display error * push new summary feature * fix failing test case * Added markdown support for final summary * update UI render issue * retain sentence tokenizer call --------- Co-authored-by: Koper <andreas@monadical.com>
2025-12-21 12:49:06 +00:00 · 2023-10-13 22:53:29 +05:30
parent 38cd0385b4
commit 1d92d43fe0
13 changed files with 933 additions and 23 deletions
--- a/server/reflector/llm/llm_modal.py
+++ b/server/reflector/llm/llm_modal.py
@@ -23,7 +23,7 @@ class ModalLLM(LLM):
        """
        # TODO: Query the specific GPU platform
        # Replace this with a HTTP call
-        return ["lmsys/vicuna-13b-v1.5"]
+        return ["lmsys/vicuna-13b-v1.5", "HuggingFaceH4/zephyr-7b-alpha"]

    async def _generate(
        self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs
@@ -33,6 +33,13 @@ class ModalLLM(LLM):
            json_payload["gen_schema"] = gen_schema
        if gen_cfg:
            json_payload["gen_cfg"] = gen_cfg
+
+        # Handing over generation of the final summary to Zephyr model
+        # but replacing the Vicuna model will happen after more testing
+        # TODO: Create a mapping of model names and cloud deployments
+        if self.model_name == "HuggingFaceH4/zephyr-7b-alpha":
+            self.llm_url = settings.ZEPHYR_LLM_URL + "/llm"
+
        async with httpx.AsyncClient() as client:
            response = await retry(client.post)(
                self.llm_url,