feat: remove profanity filter that was there for conference (#652)

This commit is contained in:
2025-09-15 18:19:19 -06:00
committed by GitHub
parent c546e69739
commit b42f7cfc60
3 changed files with 14 additions and 60 deletions

View File

@@ -4,11 +4,8 @@ import tempfile
from pathlib import Path
from typing import Annotated, TypedDict
from profanityfilter import ProfanityFilter
from pydantic import BaseModel, Field, PrivateAttr
from reflector.redis_cache import redis_cache
class DiarizationSegment(TypedDict):
"""Type definition for diarization segment containing speaker information"""
@@ -20,9 +17,6 @@ class DiarizationSegment(TypedDict):
PUNC_RE = re.compile(r"[.;:?!…]")
profanity_filter = ProfanityFilter()
profanity_filter.set_censor("*")
class AudioFile(BaseModel):
name: str
@@ -124,21 +118,11 @@ def words_to_segments(words: list[Word]) -> list[TranscriptSegment]:
class Transcript(BaseModel):
translation: str | None = None
words: list[Word] = None
@property
def raw_text(self):
# Uncensored text
return "".join([word.text for word in self.words])
@redis_cache(prefix="profanity", duration=3600 * 24 * 7)
def _get_censored_text(self, text: str):
return profanity_filter.censor(text).strip()
words: list[Word] = []
@property
def text(self):
# Censored text
return self._get_censored_text(self.raw_text)
return "".join([word.text for word in self.words])
@property
def human_timestamp(self):
@@ -170,12 +154,6 @@ class Transcript(BaseModel):
word.start += offset
word.end += offset
def clone(self):
words = [
Word(text=word.text, start=word.start, end=word.end) for word in self.words
]
return Transcript(text=self.text, translation=self.translation, words=words)
def as_segments(self) -> list[TranscriptSegment]:
return words_to_segments(self.words)