From b42f7cfc606783afcee792590efcc78b507468ab Mon Sep 17 00:00:00 2001 From: Mathieu Virbel Date: Mon, 15 Sep 2025 18:19:19 -0600 Subject: [PATCH] feat: remove profanity filter that was there for conference (#652) --- server/pyproject.toml | 1 - server/reflector/processors/types.py | 26 ++------------- server/uv.lock | 47 +++++++--------------------- 3 files changed, 14 insertions(+), 60 deletions(-) diff --git a/server/pyproject.toml b/server/pyproject.toml index 47d314d9..d055f461 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -27,7 +27,6 @@ dependencies = [ "prometheus-fastapi-instrumentator>=6.1.0", "sentencepiece>=0.1.99", "protobuf>=4.24.3", - "profanityfilter>=2.0.6", "celery>=5.3.4", "redis>=5.0.1", "python-jose[cryptography]>=3.3.0", diff --git a/server/reflector/processors/types.py b/server/reflector/processors/types.py index 480086af..7096e81c 100644 --- a/server/reflector/processors/types.py +++ b/server/reflector/processors/types.py @@ -4,11 +4,8 @@ import tempfile from pathlib import Path from typing import Annotated, TypedDict -from profanityfilter import ProfanityFilter from pydantic import BaseModel, Field, PrivateAttr -from reflector.redis_cache import redis_cache - class DiarizationSegment(TypedDict): """Type definition for diarization segment containing speaker information""" @@ -20,9 +17,6 @@ class DiarizationSegment(TypedDict): PUNC_RE = re.compile(r"[.;:?!…]") -profanity_filter = ProfanityFilter() -profanity_filter.set_censor("*") - class AudioFile(BaseModel): name: str @@ -124,21 +118,11 @@ def words_to_segments(words: list[Word]) -> list[TranscriptSegment]: class Transcript(BaseModel): translation: str | None = None - words: list[Word] = None - - @property - def raw_text(self): - # Uncensored text - return "".join([word.text for word in self.words]) - - @redis_cache(prefix="profanity", duration=3600 * 24 * 7) - def _get_censored_text(self, text: str): - return profanity_filter.censor(text).strip() + words: list[Word] = [] @property def text(self): - # Censored text - return self._get_censored_text(self.raw_text) + return "".join([word.text for word in self.words]) @property def human_timestamp(self): @@ -170,12 +154,6 @@ class Transcript(BaseModel): word.start += offset word.end += offset - def clone(self): - words = [ - Word(text=word.text, start=word.start, end=word.end) for word in self.words - ] - return Transcript(text=self.text, translation=self.translation, words=words) - def as_segments(self) -> list[TranscriptSegment]: return words_to_segments(self.words) diff --git a/server/uv.lock b/server/uv.lock index 5604f922..b93d0ac3 100644 --- a/server/uv.lock +++ b/server/uv.lock @@ -1325,15 +1325,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9c/1f/19ebc343cc71a7ffa78f17018535adc5cbdd87afb31d7c34874680148b32/ifaddr-0.2.0-py3-none-any.whl", hash = "sha256:085e0305cfe6f16ab12d72e2024030f5d52674afad6911bb1eee207177b8a748", size = 12314, upload-time = "2022-06-15T21:40:25.756Z" }, ] -[[package]] -name = "inflection" -version = "0.5.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e1/7e/691d061b7329bc8d54edbf0ec22fbfb2afe61facb681f9aaa9bff7a27d04/inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417", size = 15091, upload-time = "2020-08-22T08:16:29.139Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/59/91/aa6bde563e0085a02a435aa99b49ef75b0a4b062635e606dab23ce18d720/inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2", size = 9454, upload-time = "2020-08-22T08:16:27.816Z" }, -] - [[package]] name = "iniconfig" version = "2.1.0" @@ -2311,18 +2302,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/74/c1/bb7e334135859c3a92ec399bc89293ea73f28e815e35b43929c8db6af030/primePy-1.3-py3-none-any.whl", hash = "sha256:5ed443718765be9bf7e2ff4c56cdff71b42140a15b39d054f9d99f0009e2317a", size = 4040, upload-time = "2018-05-29T17:18:17.53Z" }, ] -[[package]] -name = "profanityfilter" -version = "2.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "inflection" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8d/03/08740b5e0800f9eb9f675c149a497a3f3735e7b04e414bcce64136e7e487/profanityfilter-2.1.0.tar.gz", hash = "sha256:0ede04e92a9d7255faa52b53776518edc6586dda828aca677c74b5994dfdd9d8", size = 7910, upload-time = "2024-11-25T22:31:51.194Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/03/eb18f72dc6e6398e75e3762677f18ab3a773a384b18efd3ed9119844e892/profanityfilter-2.1.0-py2.py3-none-any.whl", hash = "sha256:e1bc07012760fd74512a335abb93a36877831ed26abab78bfe31bebb68f8c844", size = 7483, upload-time = "2024-11-25T22:31:50.129Z" }, -] - [[package]] name = "prometheus-client" version = "0.22.1" @@ -3131,7 +3110,6 @@ dependencies = [ { name = "loguru" }, { name = "nltk" }, { name = "openai" }, - { name = "profanityfilter" }, { name = "prometheus-fastapi-instrumentator" }, { name = "protobuf" }, { name = "psycopg2-binary" }, @@ -3208,7 +3186,6 @@ requires-dist = [ { name = "loguru", specifier = ">=0.7.0" }, { name = "nltk", specifier = ">=3.8.1" }, { name = "openai", specifier = ">=1.59.7" }, - { name = "profanityfilter", specifier = ">=2.0.6" }, { name = "prometheus-fastapi-instrumentator", specifier = ">=6.1.0" }, { name = "protobuf", specifier = ">=4.24.3" }, { name = "psycopg2-binary", specifier = ">=2.9.10" }, @@ -3954,8 +3931,8 @@ dependencies = [ { name = "typing-extensions", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'darwin'" }, ] wheels = [ - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:3d05017d19bc99741288e458888283a44b0ee881d53f05f72f8b1cfea8998122" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:a47b7986bee3f61ad217d8a8ce24605809ab425baf349f97de758815edd2ef54" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp311-none-macosx_11_0_arm64.whl" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl" }, ] [[package]] @@ -3980,16 +3957,16 @@ dependencies = [ { name = "typing-extensions", marker = "platform_python_implementation == 'PyPy' or sys_platform != 'darwin'" }, ] wheels = [ - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp311-cp311-linux_s390x.whl", hash = "sha256:2bfc013dd6efdc8f8223a0241d3529af9f315dffefb53ffa3bf14d3f10127da6" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:680129efdeeec3db5da3f88ee5d28c1b1e103b774aef40f9d638e2cce8f8d8d8" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:cb06175284673a581dd91fb1965662ae4ecaba6e5c357aa0ea7bb8b84b6b7eeb" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:7631ef49fbd38d382909525b83696dc12a55d68492ade4ace3883c62b9fc140f" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp311-cp311-win_arm64.whl", hash = "sha256:41e6fc5ec0914fcdce44ccf338b1d19a441b55cafdd741fd0bf1af3f9e4cfd14" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-linux_s390x.whl", hash = "sha256:0e34e276722ab7dd0dffa9e12fe2135a9b34a0e300c456ed7ad6430229404eb5" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:610f600c102386e581327d5efc18c0d6edecb9820b4140d26163354a99cd800d" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:cb9a8ba8137ab24e36bf1742cb79a1294bd374db570f09fc15a5e1318160db4e" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:2be20b2c05a0cce10430cc25f32b689259640d273232b2de357c35729132256d" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:99fc421a5d234580e45957a7b02effbf3e1c884a5dd077afc85352c77bf41434" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp311-cp311-linux_s390x.whl" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp311-cp311-win_amd64.whl" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp311-cp311-win_arm64.whl" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-linux_s390x.whl" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_amd64.whl" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp312-cp312-win_arm64.whl" }, ] [[package]]