@@ -591,7 +591,8 @@ def _extract_streamed_response_api_response(chunks: Any) -> Any:
591591 for raw_chunk in chunks :
592592 chunk = raw_chunk .__dict__
593593 if raw_response := chunk .get ("response" , None ):
594- usage = chunk .get ("usage" , None )
594+ usage = chunk .get ("usage" , None ) or getattr (raw_response , "usage" , None )
595+
595596 response = raw_response .__dict__
596597 model = response .get ("model" )
597598
@@ -613,7 +614,7 @@ def _extract_streamed_response_api_response(chunks: Any) -> Any:
613614
614615def _extract_streamed_openai_response (resource : Any , chunks : Any ) -> Any :
615616 completion : Any = defaultdict (lambda : None ) if resource .type == "chat" else ""
616- model , usage = None , None
617+ model , usage , finish_reason = None , None , None
617618
618619 for chunk in chunks :
619620 if _is_openai_v1 ():
@@ -629,6 +630,7 @@ def _extract_streamed_openai_response(resource: Any, chunks: Any) -> Any:
629630 choice = choice .__dict__
630631 if resource .type == "chat" :
631632 delta = choice .get ("delta" , None )
633+ finish_reason = choice .get ("finish_reason" , None )
632634
633635 if _is_openai_v1 ():
634636 delta = delta .__dict__
@@ -727,7 +729,7 @@ def get_response_for_chat() -> Any:
727729 model ,
728730 get_response_for_chat () if resource .type == "chat" else completion ,
729731 usage ,
730- None ,
732+ { "finish_reason" : finish_reason } if finish_reason is not None else None ,
731733 )
732734
733735
0 commit comments