New summary (#283)

* handover final summary to Zephyr deployment * fix display error * push new summary feature * fix failing test case * Added markdown support for final summary * update UI render issue * retain sentence tokenizer call --------- Co-authored-by: Koper <andreas@monadical.com>
2026-04-24 06:05:19 +00:00 · 2023-10-13 22:53:29 +05:30
parent 38cd0385b4
commit 1d92d43fe0
13 changed files with 933 additions and 23 deletions
--- a/server/reflector/llm/base.py
+++ b/server/reflector/llm/base.py
@@ -258,7 +258,7 @@ class LLM:
        """
        Choose the token size to set as the threshold to pack the LLM calls
        """
-        buffer_token_size = 25
+        buffer_token_size = 100
        default_output_tokens = 1000
        context_window = self.tokenizer.model_max_length
        tokens = self.tokenizer.tokenize(
--- a/server/reflector/llm/llm_modal.py
+++ b/server/reflector/llm/llm_modal.py
@@ -23,7 +23,7 @@ class ModalLLM(LLM):
        """
        # TODO: Query the specific GPU platform
        # Replace this with a HTTP call
-        return ["lmsys/vicuna-13b-v1.5"]
+        return ["lmsys/vicuna-13b-v1.5", "HuggingFaceH4/zephyr-7b-alpha"]

    async def _generate(
        self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs
@@ -33,6 +33,13 @@ class ModalLLM(LLM):
            json_payload["gen_schema"] = gen_schema
        if gen_cfg:
            json_payload["gen_cfg"] = gen_cfg
+
+        # Handing over generation of the final summary to Zephyr model
+        # but replacing the Vicuna model will happen after more testing
+        # TODO: Create a mapping of model names and cloud deployments
+        if self.model_name == "HuggingFaceH4/zephyr-7b-alpha":
+            self.llm_url = settings.ZEPHYR_LLM_URL + "/llm"
+
        async with httpx.AsyncClient() as client:
            response = await retry(client.post)(
                self.llm_url,
--- a/server/reflector/llm/llm_params.py
+++ b/server/reflector/llm/llm_params.py
@@ -144,7 +144,76 @@ class TopicParams(LLMTaskParams):
        return self._task_params


+class BulletedSummaryParams(LLMTaskParams):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._gen_cfg = GenerationConfig(
+            max_new_tokens=800,
+            num_beams=1,
+            do_sample=True,
+            temperature=0.2,
+            early_stopping=True,
+        )
+        self._instruct = """
+        Given a meeting transcript, extract the key things discussed in the
+         form of a list.
+
+        While generating the response, follow the constraints mentioned below.
+
+        Summary constraints:
+        i) Do not add new content, except to fix spelling or punctuation.
+        ii) Do not add any prefixes or numbering in the response.
+        iii) The summarization should be as information dense as possible.
+        iv) Do not add any additional sections like Note, Conclusion, etc. in
+        the response.
+
+        Response format:
+        i) The response should be in the form of a bulleted list.
+        ii) Iteratively merge all the relevant paragraphs together to keep the
+         number of paragraphs to a minimum.
+        iii) Remove any unfinished sentences from the final response.
+        iv) Do not include narrative or reporting clauses.
+        v) Use "*" as the bullet icon.
+    """
+        self._task_params = TaskParams(
+            instruct=self._instruct, gen_schema=None, gen_cfg=self._gen_cfg
+        )
+
+    def _get_task_params(self) -> TaskParams:
+        """gen_schema
+        Return the parameters associated with a specific LLM task
+        """
+        return self._task_params
+
+
+class MergedSummaryParams(LLMTaskParams):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._gen_cfg = GenerationConfig(
+            max_new_tokens=600,
+            num_beams=1,
+            do_sample=True,
+            temperature=0.2,
+            early_stopping=True,
+        )
+        self._instruct = """
+        Given the key points of a meeting, summarize the points to describe the
+         meeting in the form of paragraphs.
+        """
+        self._task_params = TaskParams(
+            instruct=self._instruct, gen_schema=None, gen_cfg=self._gen_cfg
+        )
+
+    def _get_task_params(self) -> TaskParams:
+        """gen_schema
+        Return the parameters associated with a specific LLM task
+        """
+        return self._task_params
+
+
 LLMTaskParams.register("topic", TopicParams)
 LLMTaskParams.register("final_title", FinalTitleParams)
 LLMTaskParams.register("final_short_summary", FinalShortSummaryParams)
 LLMTaskParams.register("final_long_summary", FinalLongSummaryParams)
+LLMTaskParams.register("bullet_summary", BulletedSummaryParams)
+LLMTaskParams.register("merged_summary", MergedSummaryParams)