diff --git a/server/reflector/llm.py b/server/reflector/llm.py index f7c9137d..f6ba84f4 100644 --- a/server/reflector/llm.py +++ b/server/reflector/llm.py @@ -206,6 +206,12 @@ class LLM: """Configure llamaindex Settings with OpenAILike LLM""" session_id = llm_session_id.get() or f"fallback-{uuid4().hex}" + extra_body: dict = {"litellm_session_id": session_id} + # Only send enable_thinking when explicitly set (not None/unset). + # Models that don't support it will ignore the param. + if self.settings_obj.LLM_ENABLE_THINKING is not None: + extra_body["enable_thinking"] = self.settings_obj.LLM_ENABLE_THINKING + Settings.llm = OpenAILike( model=self.model_name, api_base=self.url, @@ -215,7 +221,7 @@ class LLM: is_function_calling_model=False, temperature=self.temperature, max_tokens=self.max_tokens, - additional_kwargs={"extra_body": {"litellm_session_id": session_id}}, + additional_kwargs={"extra_body": extra_body}, ) async def get_response( diff --git a/server/reflector/settings.py b/server/reflector/settings.py index 4955d568..04d172f0 100644 --- a/server/reflector/settings.py +++ b/server/reflector/settings.py @@ -75,6 +75,7 @@ class Settings(BaseSettings): LLM_URL: str | None = None LLM_API_KEY: str | None = None LLM_CONTEXT_WINDOW: int = 16000 + LLM_ENABLE_THINKING: bool | None = None LLM_PARSE_MAX_RETRIES: int = ( 3 # Max retries for JSON/validation errors (total attempts = retries + 1) diff --git a/server/tests/test_llm_retry.py b/server/tests/test_llm_retry.py index 5a43c8c5..57ddce4a 100644 --- a/server/tests/test_llm_retry.py +++ b/server/tests/test_llm_retry.py @@ -8,6 +8,7 @@ from pydantic import BaseModel, Field from workflows.errors import WorkflowRuntimeError, WorkflowTimeoutError from reflector.llm import LLM, LLMParseError, StructuredOutputWorkflow +from reflector.settings import Settings from reflector.utils.retry import RetryException @@ -26,6 +27,57 @@ def make_completion_response(text: str): return response +class TestLLMEnableThinking: + """Test that LLM_ENABLE_THINKING setting is passed through to OpenAILike""" + + def test_enable_thinking_false_passed_in_extra_body(self): + """enable_thinking=False should be in extra_body when LLM_ENABLE_THINKING=False""" + settings = Settings( + LLM_ENABLE_THINKING=False, + LLM_URL="http://fake", + LLM_API_KEY="fake", + ) + + with ( + patch("reflector.llm.OpenAILike") as mock_openai, + patch("reflector.llm.Settings"), + ): + LLM(settings=settings) + extra_body = mock_openai.call_args.kwargs["additional_kwargs"]["extra_body"] + assert extra_body["enable_thinking"] is False + + def test_enable_thinking_true_passed_in_extra_body(self): + """enable_thinking=True should be in extra_body when LLM_ENABLE_THINKING=True""" + settings = Settings( + LLM_ENABLE_THINKING=True, + LLM_URL="http://fake", + LLM_API_KEY="fake", + ) + + with ( + patch("reflector.llm.OpenAILike") as mock_openai, + patch("reflector.llm.Settings"), + ): + LLM(settings=settings) + extra_body = mock_openai.call_args.kwargs["additional_kwargs"]["extra_body"] + assert extra_body["enable_thinking"] is True + + def test_enable_thinking_none_not_in_extra_body(self): + """enable_thinking should not be in extra_body when LLM_ENABLE_THINKING is None (default)""" + settings = Settings( + LLM_URL="http://fake", + LLM_API_KEY="fake", + ) + + with ( + patch("reflector.llm.OpenAILike") as mock_openai, + patch("reflector.llm.Settings"), + ): + LLM(settings=settings) + extra_body = mock_openai.call_args.kwargs["additional_kwargs"]["extra_body"] + assert "enable_thinking" not in extra_body + + class TestLLMParseErrorRecovery: """Test parse error recovery with Workflow feedback loop"""