diff --git a/server/reflector/llm/base.py b/server/reflector/llm/base.py index 9d6a8558..446e38e6 100644 --- a/server/reflector/llm/base.py +++ b/server/reflector/llm/base.py @@ -215,6 +215,9 @@ class LLM: # Change ( ABC ), [ ABC ], etc. ==> (ABC), [ABC], etc. pattern = r"(?<=[\[\{\(])\s+|\s+(?=[\]\}\)])" title = re.sub(pattern, "", modified_title) + # Irrespective of casing changes, the starting letter + # of title is always upper-cased + title = title[0].upper() + title[1:] except Exception as e: reflector_logger.info( f"Failed to ensure casing on {title=} " f"with exception : {str(e)}" @@ -226,13 +229,12 @@ class LLM: """ List of manual trimming to the title. - Longer titles currently run into - "Discussion on", "Discussion about", etc. that don't really + Longer titles currently run into prefix of phrases that don't really add any descriptive information and in some cases, this behaviour can be repeated for several consecutive topics. We want to handle these cases. """ - phrases_to_remove = ["Discussion on", "Discussion about"] + phrases_to_remove = ["Discussing", "Discussion on", "Discussion about"] try: pattern = ( r"\b(?:" diff --git a/server/reflector/processors/transcript_final_title.py b/server/reflector/processors/transcript_final_title.py index cc05337b..0a8aead8 100644 --- a/server/reflector/processors/transcript_final_title.py +++ b/server/reflector/processors/transcript_final_title.py @@ -60,8 +60,8 @@ class TranscriptFinalTitleProcessor(Processor): accumulated_titles = ".".join([chunk.title for chunk in self.chunks]) title_result = await self.get_title(accumulated_titles) - final_title = self.llm.ensure_casing(title_result["title"]) - final_title = self.llm.trim_title(final_title) + final_title = self.llm.trim_title(title_result["title"]) + final_title = self.llm.ensure_casing(final_title) final_title = FinalTitle(title=final_title) await self.emit(final_title) diff --git a/server/reflector/processors/transcript_topic_detector.py b/server/reflector/processors/transcript_topic_detector.py index 3f7c7105..43bf9762 100644 --- a/server/reflector/processors/transcript_topic_detector.py +++ b/server/reflector/processors/transcript_topic_detector.py @@ -54,9 +54,8 @@ class TranscriptTopicDetectorProcessor(Processor): text = self.transcript.text self.logger.info(f"Topic detector got {len(text)} length transcript") topic_result = await self.get_topic(text=text) - - title = self.llm.ensure_casing(topic_result["title"]) - title = self.llm.trim_title(title) + title = self.llm.trim_title(topic_result["title"]) + title = self.llm.ensure_casing(title) summary = TitleSummary( title=title,