Update docs and fix LLM JSON parsing

- Use load_dotenv(".env") explicitly in all doc examples - Move pydantic imports (BaseModel, Field) to setup cell in all examples - Add separate display cell pattern for DataFrame inspection - Fix LLM control character error: sanitize JSON before Pydantic parsing - Remove debug print from llm.py
2026-02-10 19:45:04 -06:00
parent 8eb1fb87a7
commit 46dfebd05f
2 changed files with 50 additions and 14 deletions
--- a/workflows/lib/llm.py
+++ b/workflows/lib/llm.py
@@ -1,6 +1,7 @@
 """Simple LLM helper for workbooks using Mirascope v2."""

 import os
+import re
 from typing import TypeVar

 from mirascope import llm
@@ -23,6 +24,15 @@ llm.register_provider(
 )


+def _sanitize_json(text: str) -> str:
+    """Strip control characters (U+0000–U+001F) that break JSON parsing.
+
+    Some LLMs emit literal newlines/tabs inside JSON string values,
+    which is invalid per the JSON spec. Replace them with spaces.
+    """
+    return re.sub(r"[\x00-\x1f]+", " ", text)
+
+
 async def llm_call(
    prompt: str,
    response_model: type[T],
@@ -47,4 +57,8 @@ async def llm_call(
        return f"{system_prompt}\n\n{prompt}"

    response = await _call()
-    return response.parse()
+    try:
+        return response.parse()
+    except Exception:
+        # Fallback: sanitize control characters and parse manually
+        return response_model.model_validate_json(_sanitize_json(response.content))