@@ -2011,7 +2011,33 @@ def generate_streaming(tools, functions, function_call, prompt):
2011
2011
tool_id = "" .join ([random .choice (string .ascii_letters + string .digits ) for _ in range (24 )])
2012
2012
completion = create_completion (prompt = prompt , stop = stops , grammar = grammar )
2013
2013
completion_text = ""
2014
+ first = True
2014
2015
for chunk in completion :
2016
+ # Yield the tool/function name first
2017
+ if first :
2018
+ if tools is not None :
2019
+ func_call_dict = {
2020
+ "tool_calls" : [
2021
+ {
2022
+ "index" : 0 ,
2023
+ "id" : "call_" + tool_id ,
2024
+ "type" : "function" ,
2025
+ "function" : {"name" : function_call ["name" ], "arguments" : "" },
2026
+ }
2027
+ ]
2028
+ }
2029
+ else :
2030
+ func_call_dict = {"function_call" : {"name" : function_call ["name" ], "arguments" : "" }}
2031
+ yield llama_types .CreateChatCompletionStreamResponse (
2032
+ id = "chat" + chunk ["id" ],
2033
+ object = "chat.completion.chunk" ,
2034
+ created = chunk ["created" ],
2035
+ model = chunk ["model" ],
2036
+ choices = [
2037
+ {"index" : 0 , "logprobs" : None , "delta" : {"role" : None , "content" : None , ** func_call_dict }}
2038
+ ],
2039
+ )
2040
+ first = False
2015
2041
if tools is not None :
2016
2042
func_call_dict = {
2017
2043
"tool_calls" : [
@@ -2046,6 +2072,23 @@ def generate_streaming(tools, functions, function_call, prompt):
2046
2072
}
2047
2073
],
2048
2074
)
2075
+ # Yield tool_call/function_call stop message
2076
+ yield {
2077
+ "id" : "chat" + chunk ["id" ],
2078
+ "object" : "chat.completion.chunk" ,
2079
+ "created" : chunk ["created" ],
2080
+ "model" : chunk ["model" ],
2081
+ "choices" : [
2082
+ {
2083
+ "index" : 0 ,
2084
+ "finish_reason" : "tool_calls" if tools is not None else "function_call" ,
2085
+ "logprobs" : None ,
2086
+ "delta" : {
2087
+ "role" : None , "content" : None , "function_call" : None , "tool_calls" : None
2088
+ },
2089
+ }
2090
+ ],
2091
+ }
2049
2092
# If "auto" or no tool_choice/function_call
2050
2093
elif isinstance (function_call , str ) and function_call == "auto" :
2051
2094
tool_index = 0
@@ -2240,7 +2283,7 @@ def generate_streaming(tools, functions, function_call, prompt):
2240
2283
prompt += "\n <|from|>assistant\n <|recipient|>"
2241
2284
tool_index += 1
2242
2285
else :
2243
- # Yield tool_call stop message
2286
+ # Yield tool_call/function_call stop message
2244
2287
yield {
2245
2288
"id" : "chat" + chunk_id ,
2246
2289
"object" : "chat.completion.chunk" ,
0 commit comments