CyberSys
diff --git a/‎llama_cpp/llama_chat_format.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_chat_format.py
+16-5Lines changed: 16 additions & 5 deletions b/‎llama_cpp/llama_chat_format.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_chat_format.py
+16-5Lines changed: 16 additions & 5 deletions
diff --git a/‎llama_cpp/llama_types.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_types.py
+1Lines changed: 1 addition & 0 deletions b/‎llama_cpp/llama_types.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_types.py
+1Lines changed: 1 addition & 0 deletions
@@ -318,7 +318,14 @@ def chat_completion_handler(
             stop = stop + rstop
 
         if response_format is not None and response_format["type"] == "json_object":
-            grammar = llama_grammar.LlamaGrammar.from_string(llama_grammar.JSON_GBNF)
+            try:
+                # create grammar from json schema
+                if "schema" in response_format:
+                    grammar = llama_grammar.LlamaGrammar.from_json_schema(
+                        json.dumps(response_format["schema"])
+                    )
+            except Exception as e:
+                grammar = llama_grammar.LlamaGrammar.from_string(llama_grammar.JSON_GBNF)
 
         completion_or_chunks = llama.create_completion(
             prompt=prompt,
@@ -1434,10 +1441,14 @@ def __call__(
         prompt = llama.input_ids[: llama.n_tokens].tolist()
 
         if response_format is not None and response_format["type"] == "json_object":
-            with suppress_stdout_stderr(disable=self.verbose):
-                grammar = llama_grammar.LlamaGrammar.from_string(
-                    llama_grammar.JSON_GBNF
-                )
+            try:
+                # create grammar from json schema
+                if "schema" in response_format:
+                    grammar = llama_grammar.LlamaGrammar.from_json_schema(
+                        json.dumps(response_format["schema"])
+                    )
+            except Exception as e:
+                grammar = llama_grammar.LlamaGrammar.from_string(llama_grammar.JSON_GBNF)
 
         return _convert_completion_to_chat(
             llama.create_completion(
 
@@ -154,6 +154,7 @@ class ChatCompletionFunctionCallOption(TypedDict):
 
 class ChatCompletionRequestResponseFormat(TypedDict):
     type: Literal["text", "json_object"]
+    schema: NotRequired[JsonType] # https://docs.endpoints.anyscale.com/guides/json_mode/
 
 
 class ChatCompletionRequestMessageContentPartText(TypedDict):