mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2025-12-20 20:29:06 +00:00
update casing and trimming
This commit is contained in:
@@ -215,6 +215,9 @@ class LLM:
|
||||
# Change ( ABC ), [ ABC ], etc. ==> (ABC), [ABC], etc.
|
||||
pattern = r"(?<=[\[\{\(])\s+|\s+(?=[\]\}\)])"
|
||||
title = re.sub(pattern, "", modified_title)
|
||||
# Irrespective of casing changes, the starting letter
|
||||
# of title is always upper-cased
|
||||
title = title[0].upper() + title[1:]
|
||||
except Exception as e:
|
||||
reflector_logger.info(
|
||||
f"Failed to ensure casing on {title=} " f"with exception : {str(e)}"
|
||||
@@ -226,13 +229,12 @@ class LLM:
|
||||
"""
|
||||
List of manual trimming to the title.
|
||||
|
||||
Longer titles currently run into
|
||||
"Discussion on", "Discussion about", etc. that don't really
|
||||
Longer titles currently run into prefix of phrases that don't really
|
||||
add any descriptive information and in some cases, this behaviour
|
||||
can be repeated for several consecutive topics. We want to handle
|
||||
these cases.
|
||||
"""
|
||||
phrases_to_remove = ["Discussion on", "Discussion about"]
|
||||
phrases_to_remove = ["Discussing", "Discussion on", "Discussion about"]
|
||||
try:
|
||||
pattern = (
|
||||
r"\b(?:"
|
||||
|
||||
@@ -60,8 +60,8 @@ class TranscriptFinalTitleProcessor(Processor):
|
||||
|
||||
accumulated_titles = ".".join([chunk.title for chunk in self.chunks])
|
||||
title_result = await self.get_title(accumulated_titles)
|
||||
final_title = self.llm.ensure_casing(title_result["title"])
|
||||
final_title = self.llm.trim_title(final_title)
|
||||
final_title = self.llm.trim_title(title_result["title"])
|
||||
final_title = self.llm.ensure_casing(final_title)
|
||||
|
||||
final_title = FinalTitle(title=final_title)
|
||||
await self.emit(final_title)
|
||||
|
||||
@@ -54,9 +54,8 @@ class TranscriptTopicDetectorProcessor(Processor):
|
||||
text = self.transcript.text
|
||||
self.logger.info(f"Topic detector got {len(text)} length transcript")
|
||||
topic_result = await self.get_topic(text=text)
|
||||
|
||||
title = self.llm.ensure_casing(topic_result["title"])
|
||||
title = self.llm.trim_title(title)
|
||||
title = self.llm.trim_title(topic_result["title"])
|
||||
title = self.llm.ensure_casing(title)
|
||||
|
||||
summary = TitleSummary(
|
||||
title=title,
|
||||
|
||||
Reference in New Issue
Block a user