Compare commits

...

2 Commits

Author SHA1 Message Date
Igor Loskutov
8373874cbd feat: add LLM_ENABLE_THINKING env var for thinking-mode LLMs
Some LLMs (e.g. GLM-4.5-Air) default to thinking mode which returns
content in reasoning_content instead of content field, breaking
structured output parsing. This setting passes enable_thinking through
extra_body to control the behavior per deployment.

Three states: None (default, don't send), True, False.
2026-02-06 20:19:53 -05:00
cd2255cfbc chore(main): release 0.33.0 (#847) 2026-02-06 18:12:06 -05:00
4 changed files with 72 additions and 1 deletions

View File

@@ -1,5 +1,17 @@
# Changelog # Changelog
## [0.33.0](https://github.com/Monadical-SAS/reflector/compare/v0.32.2...v0.33.0) (2026-02-05)
### Features
* Daily+hatchet default ([#846](https://github.com/Monadical-SAS/reflector/issues/846)) ([15ab2e3](https://github.com/Monadical-SAS/reflector/commit/15ab2e306eacf575494b4b5d2b2ad779d44a1c7f))
### Bug Fixes
* websocket tests ([#825](https://github.com/Monadical-SAS/reflector/issues/825)) ([1ce1c7a](https://github.com/Monadical-SAS/reflector/commit/1ce1c7a910b6c374115d2437b17f9d288ef094dc))
## [0.32.2](https://github.com/Monadical-SAS/reflector/compare/v0.32.1...v0.32.2) (2026-02-03) ## [0.32.2](https://github.com/Monadical-SAS/reflector/compare/v0.32.1...v0.32.2) (2026-02-03)

View File

@@ -206,6 +206,12 @@ class LLM:
"""Configure llamaindex Settings with OpenAILike LLM""" """Configure llamaindex Settings with OpenAILike LLM"""
session_id = llm_session_id.get() or f"fallback-{uuid4().hex}" session_id = llm_session_id.get() or f"fallback-{uuid4().hex}"
extra_body: dict = {"litellm_session_id": session_id}
# Only send enable_thinking when explicitly set (not None/unset).
# Models that don't support it will ignore the param.
if self.settings_obj.LLM_ENABLE_THINKING is not None:
extra_body["enable_thinking"] = self.settings_obj.LLM_ENABLE_THINKING
Settings.llm = OpenAILike( Settings.llm = OpenAILike(
model=self.model_name, model=self.model_name,
api_base=self.url, api_base=self.url,
@@ -215,7 +221,7 @@ class LLM:
is_function_calling_model=False, is_function_calling_model=False,
temperature=self.temperature, temperature=self.temperature,
max_tokens=self.max_tokens, max_tokens=self.max_tokens,
additional_kwargs={"extra_body": {"litellm_session_id": session_id}}, additional_kwargs={"extra_body": extra_body},
) )
async def get_response( async def get_response(

View File

@@ -75,6 +75,7 @@ class Settings(BaseSettings):
LLM_URL: str | None = None LLM_URL: str | None = None
LLM_API_KEY: str | None = None LLM_API_KEY: str | None = None
LLM_CONTEXT_WINDOW: int = 16000 LLM_CONTEXT_WINDOW: int = 16000
LLM_ENABLE_THINKING: bool | None = None
LLM_PARSE_MAX_RETRIES: int = ( LLM_PARSE_MAX_RETRIES: int = (
3 # Max retries for JSON/validation errors (total attempts = retries + 1) 3 # Max retries for JSON/validation errors (total attempts = retries + 1)

View File

@@ -8,6 +8,7 @@ from pydantic import BaseModel, Field
from workflows.errors import WorkflowRuntimeError, WorkflowTimeoutError from workflows.errors import WorkflowRuntimeError, WorkflowTimeoutError
from reflector.llm import LLM, LLMParseError, StructuredOutputWorkflow from reflector.llm import LLM, LLMParseError, StructuredOutputWorkflow
from reflector.settings import Settings
from reflector.utils.retry import RetryException from reflector.utils.retry import RetryException
@@ -26,6 +27,57 @@ def make_completion_response(text: str):
return response return response
class TestLLMEnableThinking:
"""Test that LLM_ENABLE_THINKING setting is passed through to OpenAILike"""
def test_enable_thinking_false_passed_in_extra_body(self):
"""enable_thinking=False should be in extra_body when LLM_ENABLE_THINKING=False"""
settings = Settings(
LLM_ENABLE_THINKING=False,
LLM_URL="http://fake",
LLM_API_KEY="fake",
)
with (
patch("reflector.llm.OpenAILike") as mock_openai,
patch("reflector.llm.Settings"),
):
LLM(settings=settings)
extra_body = mock_openai.call_args.kwargs["additional_kwargs"]["extra_body"]
assert extra_body["enable_thinking"] is False
def test_enable_thinking_true_passed_in_extra_body(self):
"""enable_thinking=True should be in extra_body when LLM_ENABLE_THINKING=True"""
settings = Settings(
LLM_ENABLE_THINKING=True,
LLM_URL="http://fake",
LLM_API_KEY="fake",
)
with (
patch("reflector.llm.OpenAILike") as mock_openai,
patch("reflector.llm.Settings"),
):
LLM(settings=settings)
extra_body = mock_openai.call_args.kwargs["additional_kwargs"]["extra_body"]
assert extra_body["enable_thinking"] is True
def test_enable_thinking_none_not_in_extra_body(self):
"""enable_thinking should not be in extra_body when LLM_ENABLE_THINKING is None (default)"""
settings = Settings(
LLM_URL="http://fake",
LLM_API_KEY="fake",
)
with (
patch("reflector.llm.OpenAILike") as mock_openai,
patch("reflector.llm.Settings"),
):
LLM(settings=settings)
extra_body = mock_openai.call_args.kwargs["additional_kwargs"]["extra_body"]
assert "enable_thinking" not in extra_body
class TestLLMParseErrorRecovery: class TestLLMParseErrorRecovery:
"""Test parse error recovery with Workflow feedback loop""" """Test parse error recovery with Workflow feedback loop"""