Skip to content

Commit 7598aed

Browse files
committed
fix(openai): tool_calls dropped when content chunk precedes tool deltas in stream
get_response_for_chat() built its return value with a Python `or` chain: return completion["content"] or (completion["tool_calls"] and {...}) or None Models like Qwen and DeepSeek emit a non-empty content chunk (often "\n\n" or a brief reasoning prefix) before streaming the tool-call deltas. Because a non-empty string is truthy, the `or` chain short-circuited at the content branch and returned just the whitespace string, silently discarding all accumulated tool_call data. Fix: check tool_calls first. When tool_calls are present, return them as the primary output. If the content is non-whitespace (e.g. a genuine reasoning preamble) it is included alongside the tool_calls rather than dropped. The function_call (legacy OpenAI format) and plain content paths are unchanged. Fixes langfuse/langfuse#12490
1 parent e10e522 commit 7598aed

2 files changed

Lines changed: 279 additions & 21 deletions

File tree

langfuse/openai.py

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -722,27 +722,30 @@ def _extract_streamed_openai_response(resource: Any, chunks: Any) -> Any:
722722
completion += choice.get("text", "")
723723

724724
def get_response_for_chat() -> Any:
725-
return (
726-
completion["content"]
727-
or (
728-
completion["function_call"]
729-
and {
730-
"role": "assistant",
731-
"function_call": completion["function_call"],
732-
}
733-
)
734-
or (
735-
completion["tool_calls"]
736-
and {
737-
"role": "assistant",
738-
# "tool_calls": [{"function": completion["tool_calls"]}],
739-
"tool_calls": [
740-
{"function": data} for data in completion["tool_calls"]
741-
],
742-
}
743-
)
744-
or None
745-
)
725+
# tool_calls must be checked before content: models like Qwen/DeepSeek emit
726+
# whitespace content chunks (e.g. "\n\n") before streaming tool call deltas,
727+
# which would otherwise cause the content branch to short-circuit and silently
728+
# drop the collected tool_calls.
729+
if completion["tool_calls"]:
730+
result: Any = {
731+
"role": "assistant",
732+
"tool_calls": [
733+
{"function": data} for data in completion["tool_calls"]
734+
],
735+
}
736+
# Preserve non-whitespace content that co-exists with tool calls
737+
# (some reasoning models emit a brief text preamble before calling tools).
738+
if completion["content"] and completion["content"].strip():
739+
result["content"] = completion["content"]
740+
return result
741+
742+
if completion["function_call"]:
743+
return {
744+
"role": "assistant",
745+
"function_call": completion["function_call"],
746+
}
747+
748+
return completion["content"] or None
746749

747750
return (
748751
model,
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
"""Unit tests for _extract_streamed_openai_response / get_response_for_chat.
2+
3+
Covers the bug where a non-empty content chunk (e.g. "\n\n") emitted before
4+
tool-call deltas caused get_response_for_chat() to short-circuit and silently
5+
drop all collected tool_calls from the logged generation output.
6+
7+
No real OpenAI API calls — chunks are built from SimpleNamespace objects that
8+
mirror the __dict__ structure of openai-python v1 Pydantic models.
9+
"""
10+
11+
import types
12+
from dataclasses import dataclass
13+
from typing import Any, List, Optional
14+
from unittest.mock import patch
15+
16+
import pytest
17+
18+
from langfuse.openai import OpenAiDefinition, _extract_streamed_openai_response
19+
20+
21+
# ---------------------------------------------------------------------------
22+
# Helpers: fake OpenAI v1 streaming chunk objects
23+
# ---------------------------------------------------------------------------
24+
25+
def _make_tool_call_delta(
26+
name: Optional[str] = None,
27+
arguments: str = "",
28+
index: int = 0,
29+
tool_id: Optional[str] = None,
30+
call_type: Optional[str] = None,
31+
) -> Any:
32+
"""Build a ChoiceDeltaToolCall-alike SimpleNamespace."""
33+
function = types.SimpleNamespace(name=name, arguments=arguments)
34+
return types.SimpleNamespace(
35+
index=index,
36+
id=tool_id,
37+
type=call_type,
38+
function=function,
39+
)
40+
41+
42+
def _make_chunk(
43+
content: Optional[str] = None,
44+
tool_calls: Optional[List[Any]] = None,
45+
function_call: Any = None,
46+
role: Optional[str] = None,
47+
finish_reason: Optional[str] = None,
48+
model: str = "gpt-4o",
49+
) -> Any:
50+
"""Build a streaming chunk SimpleNamespace (mirrors chunk.__dict__ in v1)."""
51+
delta = types.SimpleNamespace(
52+
role=role,
53+
content=content,
54+
tool_calls=tool_calls,
55+
function_call=function_call,
56+
)
57+
choice = types.SimpleNamespace(delta=delta, finish_reason=finish_reason)
58+
return types.SimpleNamespace(model=model, choices=[choice], usage=None)
59+
60+
61+
def _chat_resource() -> OpenAiDefinition:
62+
return OpenAiDefinition(
63+
module="openai",
64+
object="ChatCompletion",
65+
method="create",
66+
type="chat",
67+
sync=True,
68+
)
69+
70+
71+
def _run(chunks: List[Any]) -> Any:
72+
"""Run _extract_streamed_openai_response with is_openai_v1 patched to True."""
73+
with patch("langfuse.openai._is_openai_v1", return_value=True):
74+
_, response, _, _ = _extract_streamed_openai_response(_chat_resource(), iter(chunks))
75+
return response
76+
77+
78+
# ---------------------------------------------------------------------------
79+
# Bug reproduction: content chunk before tool_calls
80+
# ---------------------------------------------------------------------------
81+
82+
83+
class TestToolCallsWithPrecedingContentChunk:
84+
"""
85+
Models like Qwen/DeepSeek sometimes emit a whitespace content chunk
86+
(e.g. "\n\n") before beginning to stream tool-call deltas. Previously
87+
get_response_for_chat() evaluated `completion["content"] or ...` and
88+
returned the content string immediately, dropping the tool_calls entirely.
89+
"""
90+
91+
def test_tool_calls_not_dropped_when_whitespace_content_precedes_them(self):
92+
chunks = [
93+
_make_chunk(role="assistant"),
94+
_make_chunk(content="\n\n"), # spurious whitespace before tool call
95+
_make_chunk(
96+
tool_calls=[_make_tool_call_delta(name="get_weather", arguments="")],
97+
),
98+
_make_chunk(
99+
tool_calls=[_make_tool_call_delta(name=None, arguments='{"city": "Paris"}')],
100+
),
101+
_make_chunk(finish_reason="tool_calls"),
102+
]
103+
result = _run(chunks)
104+
105+
assert isinstance(result, dict), "Expected a dict, not a plain string"
106+
assert "tool_calls" in result, "tool_calls must not be dropped"
107+
assert result["tool_calls"][0]["function"]["name"] == "get_weather"
108+
assert result["tool_calls"][0]["function"]["arguments"] == '{"city": "Paris"}'
109+
110+
def test_whitespace_only_content_not_included_in_result(self):
111+
"""A leading "\n\n" is whitespace-only and should be omitted from output."""
112+
chunks = [
113+
_make_chunk(role="assistant"),
114+
_make_chunk(content="\n\n"),
115+
_make_chunk(
116+
tool_calls=[_make_tool_call_delta(name="search", arguments='{"q":"hi"}')],
117+
),
118+
_make_chunk(finish_reason="tool_calls"),
119+
]
120+
result = _run(chunks)
121+
122+
assert "content" not in result or result.get("content") is None
123+
124+
def test_meaningful_content_preserved_alongside_tool_calls(self):
125+
"""When content has real text (not just whitespace), it should be kept."""
126+
chunks = [
127+
_make_chunk(role="assistant"),
128+
_make_chunk(content="Sure, let me check that. "),
129+
_make_chunk(
130+
tool_calls=[_make_tool_call_delta(name="lookup", arguments='{"id":1}')],
131+
),
132+
_make_chunk(finish_reason="tool_calls"),
133+
]
134+
result = _run(chunks)
135+
136+
assert "tool_calls" in result
137+
assert result.get("content") == "Sure, let me check that. "
138+
139+
def test_non_whitespace_content_before_tool_calls_preserves_both(self):
140+
chunks = [
141+
_make_chunk(role="assistant"),
142+
_make_chunk(content="I'll call"),
143+
_make_chunk(content=" the tool."),
144+
_make_chunk(
145+
tool_calls=[_make_tool_call_delta(name="do_thing", arguments="{}")],
146+
),
147+
_make_chunk(finish_reason="tool_calls"),
148+
]
149+
result = _run(chunks)
150+
151+
assert result["tool_calls"][0]["function"]["name"] == "do_thing"
152+
assert result.get("content") == "I'll call the tool."
153+
154+
155+
# ---------------------------------------------------------------------------
156+
# Baseline: pure content response (no tools)
157+
# ---------------------------------------------------------------------------
158+
159+
160+
class TestPureContentResponse:
161+
def test_plain_text_response_returned_as_string(self):
162+
chunks = [
163+
_make_chunk(role="assistant"),
164+
_make_chunk(content="Hello, "),
165+
_make_chunk(content="world!"),
166+
_make_chunk(finish_reason="stop"),
167+
]
168+
result = _run(chunks)
169+
assert result == "Hello, world!"
170+
171+
def test_empty_stream_returns_none(self):
172+
result = _run([])
173+
assert result is None
174+
175+
176+
# ---------------------------------------------------------------------------
177+
# Pure tool-call response (no content at all)
178+
# ---------------------------------------------------------------------------
179+
180+
181+
class TestPureToolCallResponse:
182+
def test_tool_calls_returned_without_content(self):
183+
chunks = [
184+
_make_chunk(role="assistant"),
185+
_make_chunk(
186+
tool_calls=[_make_tool_call_delta(name="get_price", arguments='{"sku":"A1"}')],
187+
),
188+
_make_chunk(finish_reason="tool_calls"),
189+
]
190+
result = _run(chunks)
191+
192+
assert isinstance(result, dict)
193+
assert "tool_calls" in result
194+
assert result["tool_calls"][0]["function"]["name"] == "get_price"
195+
assert "content" not in result
196+
197+
def test_multiple_tool_calls_all_returned(self):
198+
chunks = [
199+
_make_chunk(role="assistant"),
200+
_make_chunk(
201+
tool_calls=[_make_tool_call_delta(name="tool_a", arguments='{"x":1}')],
202+
),
203+
# second tool call — name triggers a new entry in the accumulator
204+
_make_chunk(
205+
tool_calls=[_make_tool_call_delta(name="tool_b", arguments='{"y":2}')],
206+
),
207+
_make_chunk(finish_reason="tool_calls"),
208+
]
209+
result = _run(chunks)
210+
211+
assert len(result["tool_calls"]) == 2
212+
names = {tc["function"]["name"] for tc in result["tool_calls"]}
213+
assert names == {"tool_a", "tool_b"}
214+
215+
216+
# ---------------------------------------------------------------------------
217+
# Legacy function_call (OpenAI v0 format)
218+
# ---------------------------------------------------------------------------
219+
220+
221+
class TestFunctionCallResponse:
222+
def test_function_call_returned_when_no_tool_calls(self):
223+
# function_call uses a different delta key path; simulate with direct
224+
# injection via a SimpleNamespace that has function_call set
225+
chunks = [
226+
_make_chunk(role="assistant"),
227+
]
228+
# Patch the completion dict after the fact is tricky; instead, build
229+
# a chunk that triggers the function_call accumulation path.
230+
fn_chunk = types.SimpleNamespace(
231+
model="gpt-3.5-turbo",
232+
choices=[
233+
types.SimpleNamespace(
234+
delta=types.SimpleNamespace(
235+
role=None,
236+
content=None,
237+
tool_calls=None,
238+
function_call=types.SimpleNamespace(
239+
name="old_fn",
240+
arguments='{"a":1}',
241+
),
242+
),
243+
finish_reason=None,
244+
)
245+
],
246+
usage=None,
247+
)
248+
with patch("langfuse.openai._is_openai_v1", return_value=True):
249+
_, result, _, _ = _extract_streamed_openai_response(
250+
_chat_resource(), iter([fn_chunk])
251+
)
252+
253+
assert isinstance(result, dict)
254+
assert "function_call" in result
255+
assert result["function_call"]["name"] == "old_fn"

0 commit comments

Comments
 (0)