@@ -350,6 +350,193 @@ def test_chat_model_stream_no_duplicate_usage_chunks():
350350 assert len (usage_chunks ) == 1 , f"Expected exactly 1 usage chunk, got { len (usage_chunks )} "
351351
352352
353+ def test_chat_model_stream_usage_only_final_chunk ():
354+ """Test that a final chunk with only usage data (no choices) correctly emits usage metadata."""
355+ from unittest .mock import Mock , patch
356+
357+ mock_usage = Mock ()
358+ mock_usage .prompt_tokens = 15
359+ mock_usage .completion_tokens = 10
360+
361+ # Simulate GPT-5 streaming behavior: content chunks followed by usage-only chunk
362+ mock_chunks = [
363+ Mock (
364+ choices = [
365+ Mock (
366+ delta = Mock (
367+ role = "assistant" ,
368+ content = "Hello" ,
369+ model_dump = Mock (return_value = {"role" : "assistant" , "content" : "Hello" }),
370+ ),
371+ finish_reason = None ,
372+ logprobs = None ,
373+ )
374+ ],
375+ usage = None ,
376+ ),
377+ Mock (
378+ choices = [
379+ Mock (
380+ delta = Mock (
381+ role = "assistant" ,
382+ content = " world" ,
383+ model_dump = Mock (return_value = {"role" : "assistant" , "content" : " world" }),
384+ ),
385+ finish_reason = "stop" ,
386+ logprobs = None ,
387+ )
388+ ],
389+ usage = None ,
390+ ),
391+ # Final chunk with ONLY usage data, no choices/delta
392+ Mock (
393+ choices = [],
394+ usage = mock_usage ,
395+ ),
396+ ]
397+
398+ # Verify mock structure matches GPT-5 behavior
399+ # Final chunk has empty choices list and usage data (no delta)
400+ assert len (mock_chunks [2 ].choices ) == 0
401+ assert mock_chunks [2 ].usage is not None
402+
403+ with patch ("databricks_langchain.chat_models.get_openai_client" ) as mock_get_client :
404+ mock_client = Mock ()
405+ mock_get_client .return_value = mock_client
406+ mock_client .chat .completions .create .return_value = iter (mock_chunks )
407+
408+ llm = ChatDatabricks (model = "test-model" )
409+ messages = [HumanMessage (content = "Hello" )]
410+
411+ chunks = list (llm .stream (messages , stream_usage = True ))
412+
413+ # Should get content chunks plus one usage chunk
414+ content_chunks = [chunk for chunk in chunks if chunk .content != "" ]
415+ assert len (content_chunks ) == 2
416+ assert content_chunks [0 ].content == "Hello"
417+ assert content_chunks [1 ].content == " world"
418+
419+ # Should emit exactly ONE usage chunk
420+ usage_chunks = [
421+ chunk for chunk in chunks if chunk .content == "" and chunk .usage_metadata is not None
422+ ]
423+ assert len (usage_chunks ) == 1 , f"Expected exactly 1 usage chunk, got { len (usage_chunks )} "
424+
425+ # Verify usage chunk has correct metadata
426+ usage_chunk = usage_chunks [0 ]
427+ assert isinstance (usage_chunk , AIMessageChunk )
428+ assert usage_chunk .content == ""
429+ assert usage_chunk .usage_metadata ["input_tokens" ] == 15
430+ assert usage_chunk .usage_metadata ["output_tokens" ] == 10
431+ assert usage_chunk .usage_metadata ["total_tokens" ] == 25
432+
433+
434+ def test_chat_model_stream_usage_only_chunk_missing_tokens ():
435+ """Test that a usage-only chunk with missing token data doesn't emit usage metadata."""
436+ from unittest .mock import Mock , patch
437+
438+ mock_usage = Mock ()
439+ mock_usage .prompt_tokens = None # Missing prompt_tokens
440+ mock_usage .completion_tokens = 10
441+
442+ mock_chunks = [
443+ Mock (
444+ choices = [
445+ Mock (
446+ delta = Mock (
447+ role = "assistant" ,
448+ content = "Hello" ,
449+ model_dump = Mock (return_value = {"role" : "assistant" , "content" : "Hello" }),
450+ ),
451+ finish_reason = "stop" ,
452+ logprobs = None ,
453+ )
454+ ],
455+ usage = None ,
456+ ),
457+ # Final chunk with usage data but missing prompt_tokens
458+ Mock (
459+ choices = [],
460+ usage = mock_usage ,
461+ ),
462+ ]
463+
464+ with patch ("databricks_langchain.chat_models.get_openai_client" ) as mock_get_client :
465+ mock_client = Mock ()
466+ mock_get_client .return_value = mock_client
467+ mock_client .chat .completions .create .return_value = iter (mock_chunks )
468+
469+ llm = ChatDatabricks (model = "test-model" )
470+ messages = [HumanMessage (content = "Hello" )]
471+
472+ chunks = list (llm .stream (messages , stream_usage = True ))
473+
474+ # Should get content chunks but NO usage chunk (due to missing tokens)
475+ content_chunks = [chunk for chunk in chunks if chunk .content != "" ]
476+ assert len (content_chunks ) == 1
477+
478+ # Should NOT emit a usage chunk when tokens are missing
479+ usage_chunks = [
480+ chunk for chunk in chunks if chunk .content == "" and chunk .usage_metadata is not None
481+ ]
482+ assert len (usage_chunks ) == 0 , (
483+ f"Expected 0 usage chunks when tokens are missing, got { len (usage_chunks )} "
484+ )
485+
486+
487+ def test_chat_model_stream_usage_only_chunk_stream_usage_false ():
488+ """Test that a usage-only chunk is ignored when stream_usage=False."""
489+ from unittest .mock import Mock , patch
490+
491+ mock_usage = Mock ()
492+ mock_usage .prompt_tokens = 15
493+ mock_usage .completion_tokens = 10
494+
495+ mock_chunks = [
496+ Mock (
497+ choices = [
498+ Mock (
499+ delta = Mock (
500+ role = "assistant" ,
501+ content = "Hello" ,
502+ model_dump = Mock (return_value = {"role" : "assistant" , "content" : "Hello" }),
503+ ),
504+ finish_reason = "stop" ,
505+ logprobs = None ,
506+ )
507+ ],
508+ usage = None ,
509+ ),
510+ # Final chunk with usage data
511+ Mock (
512+ choices = [],
513+ usage = mock_usage ,
514+ ),
515+ ]
516+
517+ with patch ("databricks_langchain.chat_models.get_openai_client" ) as mock_get_client :
518+ mock_client = Mock ()
519+ mock_get_client .return_value = mock_client
520+ mock_client .chat .completions .create .return_value = iter (mock_chunks )
521+
522+ llm = ChatDatabricks (model = "test-model" )
523+ messages = [HumanMessage (content = "Hello" )]
524+
525+ chunks = list (llm .stream (messages , stream_usage = False ))
526+
527+ # Should get content chunks only
528+ content_chunks = [chunk for chunk in chunks if chunk .content != "" ]
529+ assert len (content_chunks ) == 1
530+
531+ # Should NOT emit a usage chunk when stream_usage=False
532+ usage_chunks = [
533+ chunk for chunk in chunks if chunk .content == "" and chunk .usage_metadata is not None
534+ ]
535+ assert len (usage_chunks ) == 0 , (
536+ f"Expected 0 usage chunks when stream_usage=False, got { len (usage_chunks )} "
537+ )
538+
539+
353540class GetWeather (BaseModel ):
354541 """Get the current weather in a given location"""
355542
0 commit comments