feat: remove profanity filter that was there for conference (#652)

2025-12-22 05:09:05 +00:00 · 2025-09-15 18:19:19 -06:00
parent c546e69739
commit b42f7cfc60
3 changed files with 14 additions and 60 deletions
--- a/server/reflector/processors/types.py
+++ b/server/reflector/processors/types.py
@@ -4,11 +4,8 @@ import tempfile
 from pathlib import Path
 from typing import Annotated, TypedDict

-from profanityfilter import ProfanityFilter
 from pydantic import BaseModel, Field, PrivateAttr

-from reflector.redis_cache import redis_cache
-

 class DiarizationSegment(TypedDict):
    """Type definition for diarization segment containing speaker information"""
@@ -20,9 +17,6 @@ class DiarizationSegment(TypedDict):

 PUNC_RE = re.compile(r"[.;:?!…]")

-profanity_filter = ProfanityFilter()
-profanity_filter.set_censor("*")
-

 class AudioFile(BaseModel):
    name: str
@@ -124,21 +118,11 @@ def words_to_segments(words: list[Word]) -> list[TranscriptSegment]:

 class Transcript(BaseModel):
    translation: str | None = None
-    words: list[Word] = None
-
-    @property
-    def raw_text(self):
-        # Uncensored text
-        return "".join([word.text for word in self.words])
-
-    @redis_cache(prefix="profanity", duration=3600 * 24 * 7)
-    def _get_censored_text(self, text: str):
-        return profanity_filter.censor(text).strip()
+    words: list[Word] = []

    @property
    def text(self):
-        # Censored text
-        return self._get_censored_text(self.raw_text)
+        return "".join([word.text for word in self.words])

    @property
    def human_timestamp(self):
@@ -170,12 +154,6 @@ class Transcript(BaseModel):
            word.start += offset
            word.end += offset

-    def clone(self):
-        words = [
-            Word(text=word.text, start=word.start, end=word.end) for word in self.words
-        ]
-        return Transcript(text=self.text, translation=self.translation, words=words)
-
    def as_segments(self) -> list[TranscriptSegment]:
        return words_to_segments(self.words)