bmedi
diff --git a/‎examples/hf_pull/main.py
Copy file name to clipboard
+39Lines changed: 39 additions & 0 deletions b/‎examples/hf_pull/main.py
Copy file name to clipboard
+39Lines changed: 39 additions & 0 deletions
@@ -0,0 +1,39 @@
+import llama_cpp
+import llama_cpp.llama_tokenizer
+
+
+llama = llama_cpp.Llama.from_pretrained(
+    repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
+    filename="*q8_0.gguf",
+    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
+    verbose=False
+)
+
+response = llama.create_chat_completion(
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    response_format={
+        "type": "json_object",
+        "schema": {
+            "type": "object",
+            "properties": {
+                "country": {"type": "string"},
+                "capital": {"type": "string"}
+            },
+            "required": ["country", "capital"],
+        }
+    },
+    stream=True
+)
+
+for chunk in response:
+    delta = chunk["choices"][0]["delta"]
+    if "content" not in delta:
+        continue
+    print(delta["content"], end="", flush=True)
+
+print()