add profanity filter, post-process topic/title

This commit is contained in:
Gokul Mohanarangan
2023-09-21 11:12:00 +05:30
parent 19dfb1d027
commit ab41ce90e8
8 changed files with 224 additions and 5 deletions

View File

@@ -6,11 +6,12 @@ from typing import TypeVar
import nltk
from prometheus_client import Counter, Histogram
from transformers import GenerationConfig
from reflector.llm.llm_params import TaskParams
from reflector.logger import logger as reflector_logger
from reflector.settings import settings
from reflector.utils.retry import retry
from transformers import GenerationConfig
T = TypeVar("T", bound="LLM")
@@ -221,6 +222,30 @@ class LLM:
return title
def trim_title(self, title: str) -> str:
"""
List of manual trimming to the title.
Longer titles currently run into
"Discussion on", "Discussion about", etc. that don't really
add any descriptive information and in some cases, this behaviour
can be repeated for several consecutive topics. We want to handle
these cases.
"""
phrases_to_remove = ["Discussion on", "Discussion about"]
try:
pattern = (
r"\b(?:"
+ "|".join(re.escape(phrase) for phrase in phrases_to_remove)
+ r")\b"
)
title = re.sub(pattern, "", title, flags=re.IGNORECASE)
except Exception as e:
reflector_logger.info(
f"Failed to trim {title=} " f"with exception : {str(e)}"
)
return title
async def _generate(
self, prompt: str, gen_schema: dict | None, gen_cfg: dict | None, **kwargs
) -> str: