From 0e2ae5fca86c883547d8c7ecb787d776c4adcf58 Mon Sep 17 00:00:00 2001 From: Mathieu Virbel Date: Wed, 16 Jul 2025 18:58:57 -0600 Subject: [PATCH] fix: punkt -> punkt_tab + pre-download nltk packages to prevent runtime not working (#489) --- server/Dockerfile | 4 ++++ server/reflector/llm/base.py | 8 +++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/server/Dockerfile b/server/Dockerfile index 58bac55a..6c672ee4 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -16,10 +16,14 @@ WORKDIR /app COPY pyproject.toml uv.lock /app/ RUN touch README.md && env uv sync --compile-bytecode --locked +# pre-download nltk packages +RUN uv run python -c "import nltk; nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger')" + # bootstrap COPY alembic.ini runserver.sh /app/ COPY images /app/images COPY migrations /app/migrations COPY reflector /app/reflector WORKDIR /app + CMD ["./runserver.sh"] diff --git a/server/reflector/llm/base.py b/server/reflector/llm/base.py index a934b6f0..5531f7a4 100644 --- a/server/reflector/llm/base.py +++ b/server/reflector/llm/base.py @@ -45,7 +45,7 @@ class LLM: downloads only if needed. """ if not cls._nltk_downloaded: - nltk.download("punkt") + nltk.download("punkt_tab") # For POS tagging nltk.download("averaged_perceptron_tagger") cls._nltk_downloaded = True @@ -222,7 +222,7 @@ class LLM: title = modified_title[0].upper() + modified_title[1:] except Exception as e: reflector_logger.info( - f"Failed to ensure casing on {title=} " f"with exception : {str(e)}" + f"Failed to ensure casing on {title=} with exception : {str(e)}" ) return title @@ -245,9 +245,7 @@ class LLM: ) title = re.sub(pattern, "", title, flags=re.IGNORECASE) except Exception as e: - reflector_logger.info( - f"Failed to trim {title=} " f"with exception : {str(e)}" - ) + reflector_logger.info(f"Failed to trim {title=} with exception : {str(e)}") return title async def _generate(