@@ -160,6 +160,22 @@ def _process_tools_stream(index: int, delta: str, parser_dict: Dict, request: Ch
160160 return normal_text , calls
161161
162162
163+ def _split_tool_argument_delta (arguments : Optional [str ]) -> List [str ]:
164+ """Split a complete JSON argument string into OpenAI-style deltas."""
165+ if not arguments :
166+ return []
167+ if len (arguments ) <= 2 :
168+ return [arguments ]
169+ if arguments [0 ] in "{[" and arguments [- 1 ] in "}]" :
170+ middle = arguments [1 :- 1 ]
171+ chunks = [arguments [0 ]]
172+ if middle :
173+ chunks .append (middle )
174+ chunks .append (arguments [- 1 ])
175+ return [chunk for chunk in chunks if chunk ]
176+ return [arguments ]
177+
178+
163179async def chat_completions_impl (request : ChatCompletionRequest , raw_request : Request ) -> Response :
164180 from .api_http import g_objs
165181
@@ -342,6 +358,7 @@ async def chat_completions_impl(request: ChatCompletionRequest, raw_request: Req
342358 ToolCall (
343359 id = tool_id ,
344360 index = getattr (call_info , "tool_index" , None ),
361+ type = "function" ,
345362 function = FunctionResponse (name = call_info .name , arguments = call_info .parameters ),
346363 )
347364 )
@@ -408,7 +425,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
408425 choices = [choice_data ],
409426 model = request .model ,
410427 )
411- yield f"data: { chunk .model_dump_json ()} \n \n "
428+ yield f"data: { chunk .model_dump_json (exclude_none = True )} \n \n "
412429
413430 if request .tool_choice != "none" and request .tools :
414431 # parse_increment => returns (normal_text, calls)
@@ -417,7 +434,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
417434 )
418435
419436 # 1) if there's normal_text, output it as normal content
420- if normal_text :
437+ if normal_text and ( normal_text . strip () or not has_emitted_tool_calls [ sub_req_id ]) :
421438 choice_data = ChatCompletionStreamResponseChoice (
422439 index = choice_index ,
423440 delta = DeltaMessage (role = "assistant" , content = normal_text ),
@@ -429,7 +446,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
429446 choices = [choice_data ],
430447 model = request .model ,
431448 )
432- yield f"data: { chunk .model_dump_json ()} \n \n "
449+ yield f"data: { chunk .model_dump_json (exclude_none = True )} \n \n "
433450
434451 # 2) if we found calls, we output them as separate chunk(s)
435452 history_tool_calls_cnt = _get_history_tool_calls_cnt (request )
@@ -456,7 +473,8 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
456473 call_item .parameters = remaining_call
457474
458475 tool_parser = getattr (g_objs .args , "tool_call_parser" , None ) or "llama3"
459- id_key = (choice_index , call_item .tool_index )
476+ stream_index = getattr (call_item , "tool_index" , None )
477+ id_key = (choice_index , stream_index )
460478 if call_item .name :
461479 if id_key not in stream_tool_call_ids :
462480 stream_tool_call_ids [id_key ] = _process_tool_call_id (
@@ -468,26 +486,74 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
468486 tool_call_id = stream_tool_call_ids .get (id_key )
469487 function_name = None
470488
471- tool_call = ToolCall (
472- id = tool_call_id ,
473- index = getattr (call_item , "tool_index" , None ),
474- function = FunctionResponse (
475- name = function_name ,
476- arguments = call_item .parameters ,
477- ),
478- )
479- choice_data = ChatCompletionStreamResponseChoice (
480- index = choice_index ,
481- delta = DeltaMessage (role = "assistant" , tool_calls = [tool_call ]),
482- finish_reason = None ,
483- )
484- chunk = ChatCompletionStreamResponse (
485- id = group_request_id ,
486- created = created_time ,
487- choices = [choice_data ],
488- model = request .model ,
489- )
490- yield f"data: { chunk .model_dump_json ()} \n \n "
489+ is_tool_head = call_item .name is not None
490+
491+ if is_tool_head and call_item .parameters :
492+ head_tool_call = ToolCall (
493+ id = tool_call_id ,
494+ index = stream_index ,
495+ type = "function" ,
496+ function = FunctionResponse (
497+ name = function_name ,
498+ arguments = "" ,
499+ ),
500+ )
501+ head_choice = ChatCompletionStreamResponseChoice (
502+ index = choice_index ,
503+ delta = DeltaMessage (tool_calls = [head_tool_call ]),
504+ finish_reason = None ,
505+ )
506+ head_chunk = ChatCompletionStreamResponse (
507+ id = group_request_id ,
508+ created = created_time ,
509+ choices = [head_choice ],
510+ model = request .model ,
511+ )
512+ yield f"data: { head_chunk .model_dump_json (exclude_none = True )} \n \n "
513+
514+ for arg_delta in _split_tool_argument_delta (call_item .parameters ):
515+ arg_tool_call = ToolCall (
516+ index = stream_index ,
517+ function = FunctionResponse (arguments = arg_delta ),
518+ )
519+ arg_choice = ChatCompletionStreamResponseChoice (
520+ index = choice_index ,
521+ delta = DeltaMessage (tool_calls = [arg_tool_call ]),
522+ finish_reason = None ,
523+ )
524+ arg_chunk = ChatCompletionStreamResponse (
525+ id = group_request_id ,
526+ created = created_time ,
527+ choices = [arg_choice ],
528+ model = request .model ,
529+ )
530+ yield f"data: { arg_chunk .model_dump_json (exclude_none = True )} \n \n "
531+ else :
532+ tool_call = ToolCall (
533+ id = tool_call_id if is_tool_head else None ,
534+ index = stream_index ,
535+ type = "function" if is_tool_head else None ,
536+ function = FunctionResponse (
537+ name = function_name ,
538+ arguments = (
539+ (call_item .parameters if call_item .parameters is not None else "" )
540+ if is_tool_head
541+ else call_item .parameters
542+ ),
543+ ),
544+ )
545+ choice_data = ChatCompletionStreamResponseChoice (
546+ index = choice_index ,
547+ delta = DeltaMessage (tool_calls = [tool_call ]),
548+ finish_reason = None ,
549+ )
550+ chunk = ChatCompletionStreamResponse (
551+ id = group_request_id ,
552+ created = created_time ,
553+ choices = [choice_data ],
554+ model = request .model ,
555+ )
556+ yield f"data: { chunk .model_dump_json (exclude_none = True )} \n \n "
491557 else :
492558 delta_message = DeltaMessage (role = "assistant" , content = delta )
493559 stream_choice = ChatCompletionStreamResponseChoice (
@@ -499,7 +565,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
499565 model = request .model ,
500566 choices = [stream_choice ],
501567 )
502- yield f"data: { stream_resp .model_dump_json ()} \n \n "
568+ yield f"data: { stream_resp .model_dump_json (exclude_none = True )} \n \n "
503569
504570 # Emit a per-choice final empty chunk with finish_reason.
505571 if current_finish_reason is not None :
@@ -516,7 +582,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
516582 model = request .model ,
517583 choices = [final_choice ],
518584 )
519- yield f"data: { final_chunk .model_dump_json ()} \n \n "
585+ yield f"data: { final_chunk .model_dump_json (exclude_none = True )} \n \n "
520586
521587 if request .stream_options and request .stream_options .include_usage :
522588 usage = UsageInfo (
@@ -531,7 +597,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
531597 model = request .model ,
532598 usage = usage ,
533599 )
534- yield f"data: { usage_chunk .model_dump_json ()} \n \n "
600+ yield f"data: { usage_chunk .model_dump_json (exclude_none = True )} \n \n "
535601
536602 background_tasks = BackgroundTasks ()
537603 return StreamingResponse (stream_results (), media_type = "text/event-stream" , background = background_tasks )
0 commit comments