Update docs and fix LLM JSON parsing

- Use load_dotenv(".env") explicitly in all doc examples
- Move pydantic imports (BaseModel, Field) to setup cell in all examples
- Add separate display cell pattern for DataFrame inspection
- Fix LLM control character error: sanitize JSON before Pydantic parsing
- Remove debug print from llm.py
This commit is contained in:
2026-02-10 19:45:04 -06:00
parent 8eb1fb87a7
commit 46dfebd05f
2 changed files with 50 additions and 14 deletions

View File

@@ -1,6 +1,7 @@
"""Simple LLM helper for workbooks using Mirascope v2."""
import os
import re
from typing import TypeVar
from mirascope import llm
@@ -23,6 +24,15 @@ llm.register_provider(
)
def _sanitize_json(text: str) -> str:
"""Strip control characters (U+0000U+001F) that break JSON parsing.
Some LLMs emit literal newlines/tabs inside JSON string values,
which is invalid per the JSON spec. Replace them with spaces.
"""
return re.sub(r"[\x00-\x1f]+", " ", text)
async def llm_call(
prompt: str,
response_model: type[T],
@@ -47,4 +57,8 @@ async def llm_call(
return f"{system_prompt}\n\n{prompt}"
response = await _call()
return response.parse()
try:
return response.parse()
except Exception:
# Fallback: sanitize control characters and parse manually
return response_model.model_validate_json(_sanitize_json(response.content))