@@ -2073,12 +2073,12 @@ def generate_streaming(tools, functions, function_call, prompt):
2073
2073
],
2074
2074
)
2075
2075
# Yield tool_call/function_call stop message
2076
- yield {
2077
- "id" : "chat" + chunk ["id" ],
2078
- " object" : "chat.completion.chunk" ,
2079
- " created" : chunk ["created" ],
2080
- " model" : chunk ["model" ],
2081
- " choices" : [
2076
+ yield llama_types . CreateChatCompletionStreamResponse (
2077
+ id = "chat" + chunk ["id" ],
2078
+ object = "chat.completion.chunk" ,
2079
+ created = chunk ["created" ],
2080
+ model = chunk ["model" ],
2081
+ choices = [
2082
2082
{
2083
2083
"index" : 0 ,
2084
2084
"finish_reason" : "tool_calls" if tools is not None else "function_call" ,
@@ -2088,7 +2088,7 @@ def generate_streaming(tools, functions, function_call, prompt):
2088
2088
},
2089
2089
}
2090
2090
],
2091
- }
2091
+ )
2092
2092
# If "auto" or no tool_choice/function_call
2093
2093
elif isinstance (function_call , str ) and function_call == "auto" :
2094
2094
tool_index = 0
@@ -2108,20 +2108,20 @@ def generate_streaming(tools, functions, function_call, prompt):
2108
2108
if function_name == "all" :
2109
2109
prompt += "all\n <|content|>"
2110
2110
# Yield the first empty message for content
2111
- yield {
2112
- "id" : "chat" + chunk_id ,
2113
- " model" : chunk ["model" ],
2114
- " created" : chunk_created ,
2115
- " object" : "chat.completion.chunk" ,
2116
- " choices" : [
2111
+ yield llama_types . CreateChatCompletionStreamResponse (
2112
+ id = "chat" + chunk_id ,
2113
+ model = chunk ["model" ],
2114
+ created = chunk_created ,
2115
+ object = "chat.completion.chunk" ,
2116
+ choices = [
2117
2117
{
2118
2118
"index" : 0 ,
2119
2119
"delta" : {"role" : "assistant" , "content" : "" },
2120
2120
"logprobs" : None ,
2121
2121
"finish_reason" : None ,
2122
2122
}
2123
2123
],
2124
- }
2124
+ )
2125
2125
else :
2126
2126
prompt += f"{ function_name } \n <|content|>"
2127
2127
grammar = get_grammar (function_name )
@@ -2221,20 +2221,20 @@ def generate_streaming(tools, functions, function_call, prompt):
2221
2221
prompt += f"{ cleaned_completion_text } \n <|from|>assistant\n <|recipient|>"
2222
2222
else :
2223
2223
# Yield stop message
2224
- yield {
2225
- "id" : "chat" + chunk_id ,
2226
- " model" : chunk ["model" ],
2227
- " created" : chunk_created ,
2228
- " object" : "chat.completion.chunk" ,
2229
- " choices" : [
2224
+ yield llama_types . CreateChatCompletionStreamResponse (
2225
+ id = "chat" + chunk_id ,
2226
+ model = chunk ["model" ],
2227
+ created = chunk_created ,
2228
+ object = "chat.completion.chunk" ,
2229
+ choices = [
2230
2230
{
2231
2231
"index" : 0 ,
2232
2232
"delta" : {},
2233
2233
"logprobs" : None ,
2234
2234
"finish_reason" : "stop" ,
2235
2235
}
2236
2236
],
2237
- }
2237
+ )
2238
2238
break
2239
2239
else :
2240
2240
# Check whether the model wants to generate another turn
@@ -2284,25 +2284,22 @@ def generate_streaming(tools, functions, function_call, prompt):
2284
2284
tool_index += 1
2285
2285
else :
2286
2286
# Yield tool_call/function_call stop message
2287
- yield {
2288
- "id" : "chat" + chunk_id ,
2289
- " object" : "chat.completion.chunk" ,
2290
- " created" : chunk_created ,
2291
- " model" : chunk ["model" ],
2292
- " choices" : [
2287
+ yield llama_types . CreateChatCompletionStreamResponse (
2288
+ id = "chat" + chunk_id ,
2289
+ object = "chat.completion.chunk" ,
2290
+ created = chunk_created ,
2291
+ model = chunk ["model" ],
2292
+ choices = [
2293
2293
{
2294
2294
"index" : 0 ,
2295
2295
"finish_reason" : "tool_calls" if tools is not None else "function_call" ,
2296
2296
"logprobs" : None ,
2297
2297
"delta" : {
2298
- "role" : None ,
2299
- "content" : None ,
2300
- "function_call" : None ,
2301
- "tool_calls" : None ,
2298
+ "role" : None , "content" : None , "function_call" : None , "tool_calls" : None
2302
2299
},
2303
2300
}
2304
2301
],
2305
- }
2302
+ )
2306
2303
break
2307
2304
2308
2305
if stream is not False :
0 commit comments