Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 3af7b21

Browse filesBrowse files
authored
Add functionary support (abetlen#784)
* Add common grammars and json-schema-to-grammar utility function from llama.cpp * Pass functions to format function * Add basic functionary formatting * Add LlamaChatHandler for more complex chat use cases * Add function calling example notebook * Add support for regular chat completions alongside function calling
1 parent df31303 commit 3af7b21
Copy full SHA for 3af7b21

File tree

Expand file treeCollapse file tree

5 files changed

+936
-99
lines changed
Filter options
Expand file treeCollapse file tree

5 files changed

+936
-99
lines changed

‎examples/notebooks/Functions.ipynb

Copy file name to clipboard
+225Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"{\n",
13+
" \"id\": \"chatcmpl-a6db1bbb-a128-4c28-88fe-30717ec806b2\",\n",
14+
" \"object\": \"chat.completion\",\n",
15+
" \"created\": 1698989577,\n",
16+
" \"model\": \"gpt-3.5-turbo-0613\",\n",
17+
" \"choices\": [\n",
18+
" {\n",
19+
" \"index\": 0,\n",
20+
" \"message\": {\n",
21+
" \"role\": \"assistant\",\n",
22+
" \"content\": \"The current weather in Boston is sunny with a temperature of 72 degrees\"\n",
23+
" },\n",
24+
" \"finish_reason\": \"length\"\n",
25+
" }\n",
26+
" ],\n",
27+
" \"usage\": {\n",
28+
" \"prompt_tokens\": 135,\n",
29+
" \"completion_tokens\": 16,\n",
30+
" \"total_tokens\": 151\n",
31+
" }\n",
32+
"}\n"
33+
]
34+
}
35+
],
36+
"source": [
37+
"import openai\n",
38+
"import json\n",
39+
"\n",
40+
"openai.api_key = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\" # can be anything\n",
41+
"openai.api_base = \"http://100.64.159.73:8000/v1\"\n",
42+
"\n",
43+
"# Example dummy function hard coded to return the same weather\n",
44+
"# In production, this could be your backend API or an external API\n",
45+
"def get_current_weather(location, unit=\"fahrenheit\"):\n",
46+
" \"\"\"Get the current weather in a given location\"\"\"\n",
47+
" weather_info = {\n",
48+
" \"location\": location,\n",
49+
" \"temperature\": \"72\",\n",
50+
" \"unit\": unit,\n",
51+
" \"forecast\": [\"sunny\", \"windy\"],\n",
52+
" }\n",
53+
" return json.dumps(weather_info)\n",
54+
"\n",
55+
"def run_conversation():\n",
56+
" # Step 1: send the conversation and available functions to GPT\n",
57+
" messages = [{\"role\": \"user\", \"content\": \"What's the weather like in Boston?\"}]\n",
58+
" functions = [\n",
59+
" {\n",
60+
" \"name\": \"get_current_weather\",\n",
61+
" \"description\": \"Get the current weather in a given location\",\n",
62+
" \"parameters\": {\n",
63+
" \"type\": \"object\",\n",
64+
" \"properties\": {\n",
65+
" \"location\": {\n",
66+
" \"type\": \"string\",\n",
67+
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
68+
" },\n",
69+
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
70+
" },\n",
71+
" \"required\": [\"location\"],\n",
72+
" },\n",
73+
" }\n",
74+
" ]\n",
75+
" response = openai.ChatCompletion.create(\n",
76+
" model=\"gpt-3.5-turbo-0613\",\n",
77+
" messages=messages,\n",
78+
" functions=functions,\n",
79+
" function_call=\"auto\", # auto is default, but we'll be explicit\n",
80+
" )\n",
81+
" response_message = response[\"choices\"][0][\"message\"]\n",
82+
"\n",
83+
" # Step 2: check if GPT wanted to call a function\n",
84+
" if response_message.get(\"function_call\"):\n",
85+
" # Step 3: call the function\n",
86+
" # Note: the JSON response may not always be valid; be sure to handle errors\n",
87+
" available_functions = {\n",
88+
" \"get_current_weather\": get_current_weather,\n",
89+
" } # only one function in this example, but you can have multiple\n",
90+
" function_name = response_message[\"function_call\"][\"name\"]\n",
91+
" fuction_to_call = available_functions[function_name]\n",
92+
" function_args = json.loads(response_message[\"function_call\"][\"arguments\"])\n",
93+
" function_response = fuction_to_call(\n",
94+
" location=function_args.get(\"location\"),\n",
95+
" unit=function_args.get(\"unit\"),\n",
96+
" )\n",
97+
"\n",
98+
" # Step 4: send the info on the function call and function response to GPT\n",
99+
" messages.append(response_message) # extend conversation with assistant's reply\n",
100+
" messages.append(\n",
101+
" {\n",
102+
" \"role\": \"function\",\n",
103+
" \"name\": function_name,\n",
104+
" \"content\": function_response,\n",
105+
" }\n",
106+
" ) # extend conversation with function response\n",
107+
" second_response = openai.ChatCompletion.create(\n",
108+
" model=\"gpt-3.5-turbo-0613\",\n",
109+
" messages=messages,\n",
110+
" ) # get a new response from GPT where it can see the function response\n",
111+
" return second_response\n",
112+
" else:\n",
113+
" print(response)\n",
114+
" print(\"No function\")\n",
115+
"\n",
116+
"print(run_conversation())"
117+
]
118+
},
119+
{
120+
"cell_type": "code",
121+
"execution_count": 2,
122+
"metadata": {},
123+
"outputs": [
124+
{
125+
"name": "stdout",
126+
"output_type": "stream",
127+
"text": [
128+
"name='Jason' age=25\n"
129+
]
130+
}
131+
],
132+
"source": [
133+
"from pydantic import BaseModel\n",
134+
"from instructor import patch\n",
135+
"\n",
136+
"patch()\n",
137+
"\n",
138+
"class UserDetail(BaseModel):\n",
139+
" name: str\n",
140+
" age: int\n",
141+
"\n",
142+
"user: UserDetail = openai.ChatCompletion.create(\n",
143+
" model=\"gpt-3.5-turbo\",\n",
144+
" response_model=UserDetail,\n",
145+
" messages=[\n",
146+
" {\"role\": \"user\", \"content\": \"Extract Jason is 25 years old\"},\n",
147+
" ]\n",
148+
")\n",
149+
"print(user)"
150+
]
151+
},
152+
{
153+
"cell_type": "code",
154+
"execution_count": 3,
155+
"metadata": {},
156+
"outputs": [
157+
{
158+
"name": "stdout",
159+
"output_type": "stream",
160+
"text": [
161+
"{\n",
162+
" \"id\": \"chatcmpl-59bcefad-9df5-4d6b-802c-5537b3e9044e\",\n",
163+
" \"object\": \"chat.completion\",\n",
164+
" \"created\": 1698989585,\n",
165+
" \"model\": \"gpt-3.5-turbo-0613\",\n",
166+
" \"choices\": [\n",
167+
" {\n",
168+
" \"index\": 0,\n",
169+
" \"message\": {\n",
170+
" \"role\": \"assistant\",\n",
171+
" \"content\": \"I don't have up-to-date information on the current weather conditions\"\n",
172+
" },\n",
173+
" \"finish_reason\": \"length\"\n",
174+
" }\n",
175+
" ],\n",
176+
" \"usage\": {\n",
177+
" \"prompt_tokens\": 62,\n",
178+
" \"completion_tokens\": 16,\n",
179+
" \"total_tokens\": 78\n",
180+
" }\n",
181+
"}\n"
182+
]
183+
}
184+
],
185+
"source": [
186+
"response = openai.ChatCompletion.create(\n",
187+
" model=\"gpt-3.5-turbo-0613\",\n",
188+
" messages=[\n",
189+
" {\"role\": \"user\", \"content\": \"What's the weather like in Boston?\"}\n",
190+
" ]\n",
191+
")\n",
192+
"print(response)"
193+
]
194+
},
195+
{
196+
"cell_type": "code",
197+
"execution_count": null,
198+
"metadata": {},
199+
"outputs": [],
200+
"source": []
201+
}
202+
],
203+
"metadata": {
204+
"kernelspec": {
205+
"display_name": "python-3.8.10",
206+
"language": "python",
207+
"name": "python3"
208+
},
209+
"language_info": {
210+
"codemirror_mode": {
211+
"name": "ipython",
212+
"version": 3
213+
},
214+
"file_extension": ".py",
215+
"mimetype": "text/x-python",
216+
"name": "python",
217+
"nbconvert_exporter": "python",
218+
"pygments_lexer": "ipython3",
219+
"version": "3.11.5+"
220+
},
221+
"orig_nbformat": 4
222+
},
223+
"nbformat": 4,
224+
"nbformat_minor": 2
225+
}

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+7-87Lines changed: 7 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from . import llama_cpp
2525
from .llama_types import *
2626
from .llama_grammar import LlamaGrammar
27-
from . import llama_chat_format
27+
import llama_cpp.llama_chat_format as llama_chat_format
2828

2929
import numpy as np
3030
import numpy.typing as npt
@@ -428,7 +428,7 @@ def __init__(
428428

429429
if self.verbose:
430430
print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
431-
431+
432432
self.chat_format = chat_format
433433

434434
self._n_vocab = self.n_vocab()
@@ -1539,78 +1539,6 @@ def __call__(
15391539
grammar=grammar,
15401540
)
15411541

1542-
def _convert_text_completion_to_chat(
1543-
self, completion: Completion
1544-
) -> ChatCompletion:
1545-
return {
1546-
"id": "chat" + completion["id"],
1547-
"object": "chat.completion",
1548-
"created": completion["created"],
1549-
"model": completion["model"],
1550-
"choices": [
1551-
{
1552-
"index": 0,
1553-
"message": {
1554-
"role": "assistant",
1555-
"content": completion["choices"][0]["text"],
1556-
},
1557-
"finish_reason": completion["choices"][0]["finish_reason"],
1558-
}
1559-
],
1560-
"usage": completion["usage"],
1561-
}
1562-
1563-
def _convert_text_completion_chunks_to_chat(
1564-
self,
1565-
chunks: Iterator[CompletionChunk],
1566-
) -> Iterator[ChatCompletionChunk]:
1567-
for i, chunk in enumerate(chunks):
1568-
if i == 0:
1569-
yield {
1570-
"id": "chat" + chunk["id"],
1571-
"model": chunk["model"],
1572-
"created": chunk["created"],
1573-
"object": "chat.completion.chunk",
1574-
"choices": [
1575-
{
1576-
"index": 0,
1577-
"delta": {
1578-
"role": "assistant",
1579-
},
1580-
"finish_reason": None,
1581-
}
1582-
],
1583-
}
1584-
yield {
1585-
"id": "chat" + chunk["id"],
1586-
"model": chunk["model"],
1587-
"created": chunk["created"],
1588-
"object": "chat.completion.chunk",
1589-
"choices": [
1590-
{
1591-
"index": 0,
1592-
"delta": {
1593-
"content": chunk["choices"][0]["text"],
1594-
}
1595-
if chunk["choices"][0]["finish_reason"] is None
1596-
else {},
1597-
"finish_reason": chunk["choices"][0]["finish_reason"],
1598-
}
1599-
],
1600-
}
1601-
1602-
def _convert_completion_to_chat(
1603-
self,
1604-
completion_or_chunks: Union[Completion, Iterator[CompletionChunk]],
1605-
stream: bool = False,
1606-
) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
1607-
if stream:
1608-
chunks: Iterator[CompletionChunk] = completion_or_chunks # type: ignore
1609-
return self._convert_text_completion_chunks_to_chat(chunks)
1610-
else:
1611-
completion: Completion = completion_or_chunks # type: ignore
1612-
return self._convert_text_completion_to_chat(completion)
1613-
16141542
def create_chat_completion(
16151543
self,
16161544
messages: List[ChatCompletionRequestMessage],
@@ -1648,19 +1576,12 @@ def create_chat_completion(
16481576
Returns:
16491577
Generated chat completion or a stream of chat completion chunks.
16501578
"""
1651-
1652-
format = llama_chat_format.get_chat_format(self.chat_format)
1653-
result = format(
1579+
handler = llama_chat_format.get_chat_completion_handler(self.chat_format)
1580+
return handler(
1581+
self,
16541582
messages=messages,
1655-
)
1656-
prompt = result.prompt
1657-
if result.stop is not None:
1658-
stop = [] if stop is None else [stop] if isinstance(stop, str) else stop
1659-
rstop = result.stop if isinstance(result.stop, list) else [result.stop]
1660-
stop = stop + rstop
1661-
1662-
completion_or_chunks = self.create_completion(
1663-
prompt=prompt,
1583+
functions=functions,
1584+
function_call=function_call,
16641585
temperature=temperature,
16651586
top_p=top_p,
16661587
top_k=top_k,
@@ -1678,7 +1599,6 @@ def create_chat_completion(
16781599
logits_processor=logits_processor,
16791600
grammar=grammar,
16801601
)
1681-
return self._convert_completion_to_chat(completion_or_chunks, stream=stream) # type: ignore
16821602

16831603
def _free_model(self, *, _lbatch_free=llama_cpp._lib.llama_batch_free, _lfree_model=llama_cpp._lib.llama_free_model, _free=llama_cpp._lib.llama_free):
16841604
batch = getattr(self, 'batch', None)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.