update casing and trimming

This commit is contained in:
Gokul Mohanarangan
2023-09-22 07:29:01 +05:30
parent ab41ce90e8
commit 009d52ea23
3 changed files with 9 additions and 8 deletions

View File

@@ -215,6 +215,9 @@ class LLM:
# Change ( ABC ), [ ABC ], etc. ==> (ABC), [ABC], etc. # Change ( ABC ), [ ABC ], etc. ==> (ABC), [ABC], etc.
pattern = r"(?<=[\[\{\(])\s+|\s+(?=[\]\}\)])" pattern = r"(?<=[\[\{\(])\s+|\s+(?=[\]\}\)])"
title = re.sub(pattern, "", modified_title) title = re.sub(pattern, "", modified_title)
# Irrespective of casing changes, the starting letter
# of title is always upper-cased
title = title[0].upper() + title[1:]
except Exception as e: except Exception as e:
reflector_logger.info( reflector_logger.info(
f"Failed to ensure casing on {title=} " f"with exception : {str(e)}" f"Failed to ensure casing on {title=} " f"with exception : {str(e)}"
@@ -226,13 +229,12 @@ class LLM:
""" """
List of manual trimming to the title. List of manual trimming to the title.
Longer titles currently run into Longer titles currently run into prefix of phrases that don't really
"Discussion on", "Discussion about", etc. that don't really
add any descriptive information and in some cases, this behaviour add any descriptive information and in some cases, this behaviour
can be repeated for several consecutive topics. We want to handle can be repeated for several consecutive topics. We want to handle
these cases. these cases.
""" """
phrases_to_remove = ["Discussion on", "Discussion about"] phrases_to_remove = ["Discussing", "Discussion on", "Discussion about"]
try: try:
pattern = ( pattern = (
r"\b(?:" r"\b(?:"

View File

@@ -60,8 +60,8 @@ class TranscriptFinalTitleProcessor(Processor):
accumulated_titles = ".".join([chunk.title for chunk in self.chunks]) accumulated_titles = ".".join([chunk.title for chunk in self.chunks])
title_result = await self.get_title(accumulated_titles) title_result = await self.get_title(accumulated_titles)
final_title = self.llm.ensure_casing(title_result["title"]) final_title = self.llm.trim_title(title_result["title"])
final_title = self.llm.trim_title(final_title) final_title = self.llm.ensure_casing(final_title)
final_title = FinalTitle(title=final_title) final_title = FinalTitle(title=final_title)
await self.emit(final_title) await self.emit(final_title)

View File

@@ -54,9 +54,8 @@ class TranscriptTopicDetectorProcessor(Processor):
text = self.transcript.text text = self.transcript.text
self.logger.info(f"Topic detector got {len(text)} length transcript") self.logger.info(f"Topic detector got {len(text)} length transcript")
topic_result = await self.get_topic(text=text) topic_result = await self.get_topic(text=text)
title = self.llm.trim_title(topic_result["title"])
title = self.llm.ensure_casing(topic_result["title"]) title = self.llm.ensure_casing(title)
title = self.llm.trim_title(title)
summary = TitleSummary( summary = TitleSummary(
title=title, title=title,