Compare commits

...

2 Commits

Author SHA1 Message Date
Igor Loskutov
8373874cbd feat: add LLM_ENABLE_THINKING env var for thinking-mode LLMs
Some LLMs (e.g. GLM-4.5-Air) default to thinking mode which returns
content in reasoning_content instead of content field, breaking
structured output parsing. This setting passes enable_thinking through
extra_body to control the behavior per deployment.

Three states: None (default, don't send), True, False.
2026-02-06 20:19:53 -05:00
cd2255cfbc chore(main): release 0.33.0 (#847) 2026-02-06 18:12:06 -05:00
4 changed files with 72 additions and 1 deletions

View File

@@ -1,5 +1,17 @@
# Changelog
## [0.33.0](https://github.com/Monadical-SAS/reflector/compare/v0.32.2...v0.33.0) (2026-02-05)
### Features
* Daily+hatchet default ([#846](https://github.com/Monadical-SAS/reflector/issues/846)) ([15ab2e3](https://github.com/Monadical-SAS/reflector/commit/15ab2e306eacf575494b4b5d2b2ad779d44a1c7f))
### Bug Fixes
* websocket tests ([#825](https://github.com/Monadical-SAS/reflector/issues/825)) ([1ce1c7a](https://github.com/Monadical-SAS/reflector/commit/1ce1c7a910b6c374115d2437b17f9d288ef094dc))
## [0.32.2](https://github.com/Monadical-SAS/reflector/compare/v0.32.1...v0.32.2) (2026-02-03)

View File

@@ -206,6 +206,12 @@ class LLM:
"""Configure llamaindex Settings with OpenAILike LLM"""
session_id = llm_session_id.get() or f"fallback-{uuid4().hex}"
extra_body: dict = {"litellm_session_id": session_id}
# Only send enable_thinking when explicitly set (not None/unset).
# Models that don't support it will ignore the param.
if self.settings_obj.LLM_ENABLE_THINKING is not None:
extra_body["enable_thinking"] = self.settings_obj.LLM_ENABLE_THINKING
Settings.llm = OpenAILike(
model=self.model_name,
api_base=self.url,
@@ -215,7 +221,7 @@ class LLM:
is_function_calling_model=False,
temperature=self.temperature,
max_tokens=self.max_tokens,
additional_kwargs={"extra_body": {"litellm_session_id": session_id}},
additional_kwargs={"extra_body": extra_body},
)
async def get_response(

View File

@@ -75,6 +75,7 @@ class Settings(BaseSettings):
LLM_URL: str | None = None
LLM_API_KEY: str | None = None
LLM_CONTEXT_WINDOW: int = 16000
LLM_ENABLE_THINKING: bool | None = None
LLM_PARSE_MAX_RETRIES: int = (
3 # Max retries for JSON/validation errors (total attempts = retries + 1)

View File

@@ -8,6 +8,7 @@ from pydantic import BaseModel, Field
from workflows.errors import WorkflowRuntimeError, WorkflowTimeoutError
from reflector.llm import LLM, LLMParseError, StructuredOutputWorkflow
from reflector.settings import Settings
from reflector.utils.retry import RetryException
@@ -26,6 +27,57 @@ def make_completion_response(text: str):
return response
class TestLLMEnableThinking:
"""Test that LLM_ENABLE_THINKING setting is passed through to OpenAILike"""
def test_enable_thinking_false_passed_in_extra_body(self):
"""enable_thinking=False should be in extra_body when LLM_ENABLE_THINKING=False"""
settings = Settings(
LLM_ENABLE_THINKING=False,
LLM_URL="http://fake",
LLM_API_KEY="fake",
)
with (
patch("reflector.llm.OpenAILike") as mock_openai,
patch("reflector.llm.Settings"),
):
LLM(settings=settings)
extra_body = mock_openai.call_args.kwargs["additional_kwargs"]["extra_body"]
assert extra_body["enable_thinking"] is False
def test_enable_thinking_true_passed_in_extra_body(self):
"""enable_thinking=True should be in extra_body when LLM_ENABLE_THINKING=True"""
settings = Settings(
LLM_ENABLE_THINKING=True,
LLM_URL="http://fake",
LLM_API_KEY="fake",
)
with (
patch("reflector.llm.OpenAILike") as mock_openai,
patch("reflector.llm.Settings"),
):
LLM(settings=settings)
extra_body = mock_openai.call_args.kwargs["additional_kwargs"]["extra_body"]
assert extra_body["enable_thinking"] is True
def test_enable_thinking_none_not_in_extra_body(self):
"""enable_thinking should not be in extra_body when LLM_ENABLE_THINKING is None (default)"""
settings = Settings(
LLM_URL="http://fake",
LLM_API_KEY="fake",
)
with (
patch("reflector.llm.OpenAILike") as mock_openai,
patch("reflector.llm.Settings"),
):
LLM(settings=settings)
extra_body = mock_openai.call_args.kwargs["additional_kwargs"]["extra_body"]
assert "enable_thinking" not in extra_body
class TestLLMParseErrorRecovery:
"""Test parse error recovery with Workflow feedback loop"""