New summary (#283)

* handover final summary to Zephyr deployment

* fix display error

* push new summary feature

* fix failing test case

* Added markdown support for final summary

* update UI render issue

* retain sentence tokenizer call

---------

Co-authored-by: Koper <andreas@monadical.com>
This commit is contained in:
projects-g
2023-10-13 22:53:29 +05:30
committed by GitHub
parent 38cd0385b4
commit 1d92d43fe0
13 changed files with 933 additions and 23 deletions

View File

@@ -258,7 +258,7 @@ class LLM:
"""
Choose the token size to set as the threshold to pack the LLM calls
"""
buffer_token_size = 25
buffer_token_size = 100
default_output_tokens = 1000
context_window = self.tokenizer.model_max_length
tokens = self.tokenizer.tokenize(

View File

@@ -23,7 +23,7 @@ class ModalLLM(LLM):
"""
# TODO: Query the specific GPU platform
# Replace this with a HTTP call
return ["lmsys/vicuna-13b-v1.5"]
return ["lmsys/vicuna-13b-v1.5", "HuggingFaceH4/zephyr-7b-alpha"]
async def _generate(
self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs
@@ -33,6 +33,13 @@ class ModalLLM(LLM):
json_payload["gen_schema"] = gen_schema
if gen_cfg:
json_payload["gen_cfg"] = gen_cfg
# Handing over generation of the final summary to Zephyr model
# but replacing the Vicuna model will happen after more testing
# TODO: Create a mapping of model names and cloud deployments
if self.model_name == "HuggingFaceH4/zephyr-7b-alpha":
self.llm_url = settings.ZEPHYR_LLM_URL + "/llm"
async with httpx.AsyncClient() as client:
response = await retry(client.post)(
self.llm_url,

View File

@@ -144,7 +144,76 @@ class TopicParams(LLMTaskParams):
return self._task_params
class BulletedSummaryParams(LLMTaskParams):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._gen_cfg = GenerationConfig(
max_new_tokens=800,
num_beams=1,
do_sample=True,
temperature=0.2,
early_stopping=True,
)
self._instruct = """
Given a meeting transcript, extract the key things discussed in the
form of a list.
While generating the response, follow the constraints mentioned below.
Summary constraints:
i) Do not add new content, except to fix spelling or punctuation.
ii) Do not add any prefixes or numbering in the response.
iii) The summarization should be as information dense as possible.
iv) Do not add any additional sections like Note, Conclusion, etc. in
the response.
Response format:
i) The response should be in the form of a bulleted list.
ii) Iteratively merge all the relevant paragraphs together to keep the
number of paragraphs to a minimum.
iii) Remove any unfinished sentences from the final response.
iv) Do not include narrative or reporting clauses.
v) Use "*" as the bullet icon.
"""
self._task_params = TaskParams(
instruct=self._instruct, gen_schema=None, gen_cfg=self._gen_cfg
)
def _get_task_params(self) -> TaskParams:
"""gen_schema
Return the parameters associated with a specific LLM task
"""
return self._task_params
class MergedSummaryParams(LLMTaskParams):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._gen_cfg = GenerationConfig(
max_new_tokens=600,
num_beams=1,
do_sample=True,
temperature=0.2,
early_stopping=True,
)
self._instruct = """
Given the key points of a meeting, summarize the points to describe the
meeting in the form of paragraphs.
"""
self._task_params = TaskParams(
instruct=self._instruct, gen_schema=None, gen_cfg=self._gen_cfg
)
def _get_task_params(self) -> TaskParams:
"""gen_schema
Return the parameters associated with a specific LLM task
"""
return self._task_params
LLMTaskParams.register("topic", TopicParams)
LLMTaskParams.register("final_title", FinalTitleParams)
LLMTaskParams.register("final_short_summary", FinalShortSummaryParams)
LLMTaskParams.register("final_long_summary", FinalLongSummaryParams)
LLMTaskParams.register("bullet_summary", BulletedSummaryParams)
LLMTaskParams.register("merged_summary", MergedSummaryParams)

View File

@@ -1,3 +1,4 @@
import nltk
from reflector.llm import LLM, LLMTaskParams
from reflector.processors.base import Processor
from reflector.processors.types import FinalLongSummary, TitleSummary
@@ -10,36 +11,58 @@ class TranscriptFinalLongSummaryProcessor(Processor):
INPUT_TYPE = TitleSummary
OUTPUT_TYPE = FinalLongSummary
TASK = "final_long_summary"
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.chunks: list[TitleSummary] = []
self.llm = LLM.get_instance()
self.params = LLMTaskParams.get_instance(self.TASK).task_params
self.llm = LLM.get_instance(model_name="HuggingFaceH4/zephyr-7b-alpha")
async def _push(self, data: TitleSummary):
self.chunks.append(data)
async def get_bullet_summary(self, text: str) -> str:
params = LLMTaskParams.get_instance("bullet_summary").task_params
chunks = list(self.llm.split_corpus(corpus=text, task_params=params))
bullet_summary = ""
for chunk in chunks:
prompt = self.llm.create_prompt(instruct=params.instruct, text=chunk)
summary_result = await self.llm.generate(
prompt=prompt,
gen_schema=params.gen_schema,
gen_cfg=params.gen_cfg,
logger=self.logger,
)
bullet_summary += summary_result["long_summary"]
return bullet_summary
async def get_merged_summary(self, text: str) -> str:
params = LLMTaskParams.get_instance("merged_summary").task_params
chunks = list(self.llm.split_corpus(corpus=text, task_params=params))
merged_summary = ""
for chunk in chunks:
prompt = self.llm.create_prompt(instruct=params.instruct, text=chunk)
summary_result = await self.llm.generate(
prompt=prompt,
gen_schema=params.gen_schema,
gen_cfg=params.gen_cfg,
logger=self.logger,
)
merged_summary += summary_result["long_summary"]
return merged_summary
async def get_long_summary(self, text: str) -> str:
"""
Generate a long version of the final summary
"""
self.logger.info(f"Smoothing out {len(text)} length summary to a long summary")
chunks = list(self.llm.split_corpus(corpus=text, task_params=self.params))
bullet_summary = await self.get_bullet_summary(text)
merged_summary = await self.get_merged_summary(bullet_summary)
accumulated_summaries = ""
for chunk in chunks:
prompt = self.llm.create_prompt(instruct=self.params.instruct, text=chunk)
summary_result = await self.llm.generate(
prompt=prompt,
gen_schema=self.params.gen_schema,
gen_cfg=self.params.gen_cfg,
logger=self.logger,
)
accumulated_summaries += summary_result["long_summary"]
return merged_summary
return accumulated_summaries
def sentence_tokenize(self, text: str) -> [str]:
return nltk.sent_tokenize(text)
async def _flush(self):
if not self.chunks:
@@ -49,11 +72,25 @@ class TranscriptFinalLongSummaryProcessor(Processor):
accumulated_summaries = " ".join([chunk.summary for chunk in self.chunks])
long_summary = await self.get_long_summary(accumulated_summaries)
# Format the output as much as possible to be handled
# by front-end for displaying
summary_sentences = []
for sentence in self.sentence_tokenize(long_summary):
sentence = str(sentence).strip()
if sentence.startswith("- "):
sentence.replace("- ", "* ")
else:
sentence = "* " + sentence
sentence += " \n"
summary_sentences.append(sentence)
formatted_long_summary = "".join(summary_sentences)
last_chunk = self.chunks[-1]
duration = last_chunk.timestamp + last_chunk.duration
final_long_summary = FinalLongSummary(
long_summary=long_summary,
long_summary=formatted_long_summary,
duration=duration,
)
await self.emit(final_long_summary)

View File

@@ -72,6 +72,7 @@ class Settings(BaseSettings):
LLM_TIMEOUT: int = 60 * 5 # take cold start into account
LLM_MAX_TOKENS: int = 1024
LLM_TEMPERATURE: float = 0.7
ZEPHYR_LLM_URL: str | None = None
# LLM Banana configuration
LLM_BANANA_API_KEY: str | None = None