Update docs and fix LLM JSON parsing
- Use load_dotenv(".env") explicitly in all doc examples
- Move pydantic imports (BaseModel, Field) to setup cell in all examples
- Add separate display cell pattern for DataFrame inspection
- Fix LLM control character error: sanitize JSON before Pydantic parsing
- Remove debug print from llm.py
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""Simple LLM helper for workbooks using Mirascope v2."""
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import TypeVar
|
||||
|
||||
from mirascope import llm
|
||||
@@ -23,6 +24,15 @@ llm.register_provider(
|
||||
)
|
||||
|
||||
|
||||
def _sanitize_json(text: str) -> str:
|
||||
"""Strip control characters (U+0000–U+001F) that break JSON parsing.
|
||||
|
||||
Some LLMs emit literal newlines/tabs inside JSON string values,
|
||||
which is invalid per the JSON spec. Replace them with spaces.
|
||||
"""
|
||||
return re.sub(r"[\x00-\x1f]+", " ", text)
|
||||
|
||||
|
||||
async def llm_call(
|
||||
prompt: str,
|
||||
response_model: type[T],
|
||||
@@ -47,4 +57,8 @@ async def llm_call(
|
||||
return f"{system_prompt}\n\n{prompt}"
|
||||
|
||||
response = await _call()
|
||||
return response.parse()
|
||||
try:
|
||||
return response.parse()
|
||||
except Exception:
|
||||
# Fallback: sanitize control characters and parse manually
|
||||
return response_model.model_validate_json(_sanitize_json(response.content))
|
||||
|
||||
Reference in New Issue
Block a user