File tree Expand file tree Collapse file tree 2 files changed +17
-5
lines changed
Filter options
Expand file tree Collapse file tree 2 files changed +17
-5
lines changed
Original file line number Diff line number Diff line change @@ -318,7 +318,14 @@ def chat_completion_handler(
318
318
stop = stop + rstop
319
319
320
320
if response_format is not None and response_format ["type" ] == "json_object" :
321
- grammar = llama_grammar .LlamaGrammar .from_string (llama_grammar .JSON_GBNF )
321
+ try :
322
+ # create grammar from json schema
323
+ if "schema" in response_format :
324
+ grammar = llama_grammar .LlamaGrammar .from_json_schema (
325
+ json .dumps (response_format ["schema" ])
326
+ )
327
+ except Exception as e :
328
+ grammar = llama_grammar .LlamaGrammar .from_string (llama_grammar .JSON_GBNF )
322
329
323
330
completion_or_chunks = llama .create_completion (
324
331
prompt = prompt ,
@@ -1434,10 +1441,14 @@ def __call__(
1434
1441
prompt = llama .input_ids [: llama .n_tokens ].tolist ()
1435
1442
1436
1443
if response_format is not None and response_format ["type" ] == "json_object" :
1437
- with suppress_stdout_stderr (disable = self .verbose ):
1438
- grammar = llama_grammar .LlamaGrammar .from_string (
1439
- llama_grammar .JSON_GBNF
1440
- )
1444
+ try :
1445
+ # create grammar from json schema
1446
+ if "schema" in response_format :
1447
+ grammar = llama_grammar .LlamaGrammar .from_json_schema (
1448
+ json .dumps (response_format ["schema" ])
1449
+ )
1450
+ except Exception as e :
1451
+ grammar = llama_grammar .LlamaGrammar .from_string (llama_grammar .JSON_GBNF )
1441
1452
1442
1453
return _convert_completion_to_chat (
1443
1454
llama .create_completion (
Original file line number Diff line number Diff line change @@ -154,6 +154,7 @@ class ChatCompletionFunctionCallOption(TypedDict):
154
154
155
155
class ChatCompletionRequestResponseFormat (TypedDict ):
156
156
type : Literal ["text" , "json_object" ]
157
+ schema : NotRequired [JsonType ] # https://docs.endpoints.anyscale.com/guides/json_mode/
157
158
158
159
159
160
class ChatCompletionRequestMessageContentPartText (TypedDict ):
You can’t perform that action at this time.
0 commit comments