|
| 1 | +"""Unit tests for _extract_streamed_openai_response / get_response_for_chat. |
| 2 | +
|
| 3 | +Covers the bug where a non-empty content chunk (e.g. "\n\n") emitted before |
| 4 | +tool-call deltas caused get_response_for_chat() to short-circuit and silently |
| 5 | +drop all collected tool_calls from the logged generation output. |
| 6 | +
|
| 7 | +No real OpenAI API calls — chunks are built from SimpleNamespace objects that |
| 8 | +mirror the __dict__ structure of openai-python v1 Pydantic models. |
| 9 | +""" |
| 10 | + |
| 11 | +import types |
| 12 | +from dataclasses import dataclass |
| 13 | +from typing import Any, List, Optional |
| 14 | +from unittest.mock import patch |
| 15 | + |
| 16 | +import pytest |
| 17 | + |
| 18 | +from langfuse.openai import OpenAiDefinition, _extract_streamed_openai_response |
| 19 | + |
| 20 | + |
| 21 | +# --------------------------------------------------------------------------- |
| 22 | +# Helpers: fake OpenAI v1 streaming chunk objects |
| 23 | +# --------------------------------------------------------------------------- |
| 24 | + |
| 25 | +def _make_tool_call_delta( |
| 26 | + name: Optional[str] = None, |
| 27 | + arguments: str = "", |
| 28 | + index: int = 0, |
| 29 | + tool_id: Optional[str] = None, |
| 30 | + call_type: Optional[str] = None, |
| 31 | +) -> Any: |
| 32 | + """Build a ChoiceDeltaToolCall-alike SimpleNamespace.""" |
| 33 | + function = types.SimpleNamespace(name=name, arguments=arguments) |
| 34 | + return types.SimpleNamespace( |
| 35 | + index=index, |
| 36 | + id=tool_id, |
| 37 | + type=call_type, |
| 38 | + function=function, |
| 39 | + ) |
| 40 | + |
| 41 | + |
| 42 | +def _make_chunk( |
| 43 | + content: Optional[str] = None, |
| 44 | + tool_calls: Optional[List[Any]] = None, |
| 45 | + function_call: Any = None, |
| 46 | + role: Optional[str] = None, |
| 47 | + finish_reason: Optional[str] = None, |
| 48 | + model: str = "gpt-4o", |
| 49 | +) -> Any: |
| 50 | + """Build a streaming chunk SimpleNamespace (mirrors chunk.__dict__ in v1).""" |
| 51 | + delta = types.SimpleNamespace( |
| 52 | + role=role, |
| 53 | + content=content, |
| 54 | + tool_calls=tool_calls, |
| 55 | + function_call=function_call, |
| 56 | + ) |
| 57 | + choice = types.SimpleNamespace(delta=delta, finish_reason=finish_reason) |
| 58 | + return types.SimpleNamespace(model=model, choices=[choice], usage=None) |
| 59 | + |
| 60 | + |
| 61 | +def _chat_resource() -> OpenAiDefinition: |
| 62 | + return OpenAiDefinition( |
| 63 | + module="openai", |
| 64 | + object="ChatCompletion", |
| 65 | + method="create", |
| 66 | + type="chat", |
| 67 | + sync=True, |
| 68 | + ) |
| 69 | + |
| 70 | + |
| 71 | +def _run(chunks: List[Any]) -> Any: |
| 72 | + """Run _extract_streamed_openai_response with is_openai_v1 patched to True.""" |
| 73 | + with patch("langfuse.openai._is_openai_v1", return_value=True): |
| 74 | + _, response, _, _ = _extract_streamed_openai_response(_chat_resource(), iter(chunks)) |
| 75 | + return response |
| 76 | + |
| 77 | + |
| 78 | +# --------------------------------------------------------------------------- |
| 79 | +# Bug reproduction: content chunk before tool_calls |
| 80 | +# --------------------------------------------------------------------------- |
| 81 | + |
| 82 | + |
| 83 | +class TestToolCallsWithPrecedingContentChunk: |
| 84 | + """ |
| 85 | + Models like Qwen/DeepSeek sometimes emit a whitespace content chunk |
| 86 | + (e.g. "\n\n") before beginning to stream tool-call deltas. Previously |
| 87 | + get_response_for_chat() evaluated `completion["content"] or ...` and |
| 88 | + returned the content string immediately, dropping the tool_calls entirely. |
| 89 | + """ |
| 90 | + |
| 91 | + def test_tool_calls_not_dropped_when_whitespace_content_precedes_them(self): |
| 92 | + chunks = [ |
| 93 | + _make_chunk(role="assistant"), |
| 94 | + _make_chunk(content="\n\n"), # spurious whitespace before tool call |
| 95 | + _make_chunk( |
| 96 | + tool_calls=[_make_tool_call_delta(name="get_weather", arguments="")], |
| 97 | + ), |
| 98 | + _make_chunk( |
| 99 | + tool_calls=[_make_tool_call_delta(name=None, arguments='{"city": "Paris"}')], |
| 100 | + ), |
| 101 | + _make_chunk(finish_reason="tool_calls"), |
| 102 | + ] |
| 103 | + result = _run(chunks) |
| 104 | + |
| 105 | + assert isinstance(result, dict), "Expected a dict, not a plain string" |
| 106 | + assert "tool_calls" in result, "tool_calls must not be dropped" |
| 107 | + assert result["tool_calls"][0]["function"]["name"] == "get_weather" |
| 108 | + assert result["tool_calls"][0]["function"]["arguments"] == '{"city": "Paris"}' |
| 109 | + |
| 110 | + def test_whitespace_only_content_not_included_in_result(self): |
| 111 | + """A leading "\n\n" is whitespace-only and should be omitted from output.""" |
| 112 | + chunks = [ |
| 113 | + _make_chunk(role="assistant"), |
| 114 | + _make_chunk(content="\n\n"), |
| 115 | + _make_chunk( |
| 116 | + tool_calls=[_make_tool_call_delta(name="search", arguments='{"q":"hi"}')], |
| 117 | + ), |
| 118 | + _make_chunk(finish_reason="tool_calls"), |
| 119 | + ] |
| 120 | + result = _run(chunks) |
| 121 | + |
| 122 | + assert "content" not in result or result.get("content") is None |
| 123 | + |
| 124 | + def test_meaningful_content_preserved_alongside_tool_calls(self): |
| 125 | + """When content has real text (not just whitespace), it should be kept.""" |
| 126 | + chunks = [ |
| 127 | + _make_chunk(role="assistant"), |
| 128 | + _make_chunk(content="Sure, let me check that. "), |
| 129 | + _make_chunk( |
| 130 | + tool_calls=[_make_tool_call_delta(name="lookup", arguments='{"id":1}')], |
| 131 | + ), |
| 132 | + _make_chunk(finish_reason="tool_calls"), |
| 133 | + ] |
| 134 | + result = _run(chunks) |
| 135 | + |
| 136 | + assert "tool_calls" in result |
| 137 | + assert result.get("content") == "Sure, let me check that. " |
| 138 | + |
| 139 | + def test_non_whitespace_content_before_tool_calls_preserves_both(self): |
| 140 | + chunks = [ |
| 141 | + _make_chunk(role="assistant"), |
| 142 | + _make_chunk(content="I'll call"), |
| 143 | + _make_chunk(content=" the tool."), |
| 144 | + _make_chunk( |
| 145 | + tool_calls=[_make_tool_call_delta(name="do_thing", arguments="{}")], |
| 146 | + ), |
| 147 | + _make_chunk(finish_reason="tool_calls"), |
| 148 | + ] |
| 149 | + result = _run(chunks) |
| 150 | + |
| 151 | + assert result["tool_calls"][0]["function"]["name"] == "do_thing" |
| 152 | + assert result.get("content") == "I'll call the tool." |
| 153 | + |
| 154 | + |
| 155 | +# --------------------------------------------------------------------------- |
| 156 | +# Baseline: pure content response (no tools) |
| 157 | +# --------------------------------------------------------------------------- |
| 158 | + |
| 159 | + |
| 160 | +class TestPureContentResponse: |
| 161 | + def test_plain_text_response_returned_as_string(self): |
| 162 | + chunks = [ |
| 163 | + _make_chunk(role="assistant"), |
| 164 | + _make_chunk(content="Hello, "), |
| 165 | + _make_chunk(content="world!"), |
| 166 | + _make_chunk(finish_reason="stop"), |
| 167 | + ] |
| 168 | + result = _run(chunks) |
| 169 | + assert result == "Hello, world!" |
| 170 | + |
| 171 | + def test_empty_stream_returns_none(self): |
| 172 | + result = _run([]) |
| 173 | + assert result is None |
| 174 | + |
| 175 | + |
| 176 | +# --------------------------------------------------------------------------- |
| 177 | +# Pure tool-call response (no content at all) |
| 178 | +# --------------------------------------------------------------------------- |
| 179 | + |
| 180 | + |
| 181 | +class TestPureToolCallResponse: |
| 182 | + def test_tool_calls_returned_without_content(self): |
| 183 | + chunks = [ |
| 184 | + _make_chunk(role="assistant"), |
| 185 | + _make_chunk( |
| 186 | + tool_calls=[_make_tool_call_delta(name="get_price", arguments='{"sku":"A1"}')], |
| 187 | + ), |
| 188 | + _make_chunk(finish_reason="tool_calls"), |
| 189 | + ] |
| 190 | + result = _run(chunks) |
| 191 | + |
| 192 | + assert isinstance(result, dict) |
| 193 | + assert "tool_calls" in result |
| 194 | + assert result["tool_calls"][0]["function"]["name"] == "get_price" |
| 195 | + assert "content" not in result |
| 196 | + |
| 197 | + def test_multiple_tool_calls_all_returned(self): |
| 198 | + chunks = [ |
| 199 | + _make_chunk(role="assistant"), |
| 200 | + _make_chunk( |
| 201 | + tool_calls=[_make_tool_call_delta(name="tool_a", arguments='{"x":1}')], |
| 202 | + ), |
| 203 | + # second tool call — name triggers a new entry in the accumulator |
| 204 | + _make_chunk( |
| 205 | + tool_calls=[_make_tool_call_delta(name="tool_b", arguments='{"y":2}')], |
| 206 | + ), |
| 207 | + _make_chunk(finish_reason="tool_calls"), |
| 208 | + ] |
| 209 | + result = _run(chunks) |
| 210 | + |
| 211 | + assert len(result["tool_calls"]) == 2 |
| 212 | + names = {tc["function"]["name"] for tc in result["tool_calls"]} |
| 213 | + assert names == {"tool_a", "tool_b"} |
| 214 | + |
| 215 | + |
| 216 | +# --------------------------------------------------------------------------- |
| 217 | +# Legacy function_call (OpenAI v0 format) |
| 218 | +# --------------------------------------------------------------------------- |
| 219 | + |
| 220 | + |
| 221 | +class TestFunctionCallResponse: |
| 222 | + def test_function_call_returned_when_no_tool_calls(self): |
| 223 | + # function_call uses a different delta key path; simulate with direct |
| 224 | + # injection via a SimpleNamespace that has function_call set |
| 225 | + chunks = [ |
| 226 | + _make_chunk(role="assistant"), |
| 227 | + ] |
| 228 | + # Patch the completion dict after the fact is tricky; instead, build |
| 229 | + # a chunk that triggers the function_call accumulation path. |
| 230 | + fn_chunk = types.SimpleNamespace( |
| 231 | + model="gpt-3.5-turbo", |
| 232 | + choices=[ |
| 233 | + types.SimpleNamespace( |
| 234 | + delta=types.SimpleNamespace( |
| 235 | + role=None, |
| 236 | + content=None, |
| 237 | + tool_calls=None, |
| 238 | + function_call=types.SimpleNamespace( |
| 239 | + name="old_fn", |
| 240 | + arguments='{"a":1}', |
| 241 | + ), |
| 242 | + ), |
| 243 | + finish_reason=None, |
| 244 | + ) |
| 245 | + ], |
| 246 | + usage=None, |
| 247 | + ) |
| 248 | + with patch("langfuse.openai._is_openai_v1", return_value=True): |
| 249 | + _, result, _, _ = _extract_streamed_openai_response( |
| 250 | + _chat_resource(), iter([fn_chunk]) |
| 251 | + ) |
| 252 | + |
| 253 | + assert isinstance(result, dict) |
| 254 | + assert "function_call" in result |
| 255 | + assert result["function_call"]["name"] == "old_fn" |
0 commit comments