mirror of
https://github.com/Monadical-SAS/reflector.git
synced 2026-04-05 13:26:48 +00:00
feat: local LLM via Ollama + structured output response_format
- Add setup script (scripts/setup-local-llm.sh) for one-command Ollama setup Mac: native Metal GPU, Linux: containerized via docker-compose profiles - Add ollama-gpu and ollama-cpu docker-compose profiles for Linux - Add extra_hosts to server/hatchet-worker-llm for host.docker.internal - Pass response_format JSON schema in StructuredOutputWorkflow.extract() enabling grammar-based constrained decoding on Ollama/llama.cpp/vLLM/OpenAI - Update .env.example with Ollama as default LLM option - Add Ollama PRD and local dev setup docs
This commit is contained in:
@@ -286,6 +286,92 @@ class TestStructuredOutputWorkflow:
|
||||
assert mock_settings.llm.acomplete.call_count == 2
|
||||
|
||||
|
||||
class TestResponseFormat:
|
||||
"""Test that response_format with JSON schema is passed to acomplete"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_acomplete_called_with_response_format(self):
|
||||
"""acomplete() should receive response_format containing Pydantic JSON schema"""
|
||||
workflow = StructuredOutputWorkflow(
|
||||
output_cls=TestResponse,
|
||||
max_retries=3,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
with (
|
||||
patch("reflector.llm.TreeSummarize") as mock_summarize,
|
||||
patch("reflector.llm.Settings") as mock_settings,
|
||||
):
|
||||
mock_summarizer = MagicMock()
|
||||
mock_summarize.return_value = mock_summarizer
|
||||
mock_summarizer.aget_response = AsyncMock(return_value="Some analysis")
|
||||
|
||||
mock_settings.llm.acomplete = AsyncMock(
|
||||
return_value=make_completion_response(
|
||||
'{"title": "Test", "summary": "Summary", "confidence": 0.95}'
|
||||
)
|
||||
)
|
||||
|
||||
result = await workflow.run(
|
||||
prompt="Extract data",
|
||||
texts=["Some text"],
|
||||
tone_name=None,
|
||||
)
|
||||
|
||||
assert "success" in result
|
||||
|
||||
# Verify response_format was passed
|
||||
call_kwargs = mock_settings.llm.acomplete.call_args
|
||||
assert "response_format" in call_kwargs.kwargs
|
||||
rf = call_kwargs.kwargs["response_format"]
|
||||
assert rf["type"] == "json_schema"
|
||||
assert rf["json_schema"]["name"] == "TestResponse"
|
||||
assert rf["json_schema"]["schema"] == TestResponse.model_json_schema()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_response_format_present_on_retry(self):
|
||||
"""response_format should be passed on retry attempts too"""
|
||||
workflow = StructuredOutputWorkflow(
|
||||
output_cls=TestResponse,
|
||||
max_retries=3,
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
with (
|
||||
patch("reflector.llm.TreeSummarize") as mock_summarize,
|
||||
patch("reflector.llm.Settings") as mock_settings,
|
||||
):
|
||||
mock_summarizer = MagicMock()
|
||||
mock_summarize.return_value = mock_summarizer
|
||||
mock_summarizer.aget_response = AsyncMock(return_value="Some analysis")
|
||||
|
||||
call_count = {"count": 0}
|
||||
|
||||
async def acomplete_handler(*args, **kwargs):
|
||||
call_count["count"] += 1
|
||||
if call_count["count"] == 1:
|
||||
return make_completion_response('{"title": "Only title"}')
|
||||
return make_completion_response(
|
||||
'{"title": "Test", "summary": "Summary", "confidence": 0.9}'
|
||||
)
|
||||
|
||||
mock_settings.llm.acomplete = AsyncMock(side_effect=acomplete_handler)
|
||||
|
||||
result = await workflow.run(
|
||||
prompt="Extract data",
|
||||
texts=["Some text"],
|
||||
tone_name=None,
|
||||
)
|
||||
|
||||
assert "success" in result
|
||||
assert call_count["count"] == 2
|
||||
|
||||
# Both calls should have response_format
|
||||
for call in mock_settings.llm.acomplete.call_args_list:
|
||||
assert "response_format" in call.kwargs
|
||||
assert call.kwargs["response_format"]["type"] == "json_schema"
|
||||
|
||||
|
||||
class TestNetworkErrorRetries:
|
||||
"""Test that network error retries are handled by OpenAILike, not Workflow"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user