Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 3cb696c

Browse filesBrowse files
committed
Merge commit 'f2901d840e15ce2770eac300172471aa029c5fd5' into main
2 parents 3337a98 + f2901d8 commit 3cb696c
Copy full SHA for 3cb696c

File tree

Expand file treeCollapse file tree

8 files changed

+160
-39
lines changed
Filter options
Expand file treeCollapse file tree

8 files changed

+160
-39
lines changed

‎.github/workflows/build-and-release.yaml

Copy file name to clipboardExpand all lines: .github/workflows/build-and-release.yaml
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ jobs:
3333
3434
- name: Build wheels
3535
run: python -m cibuildwheel --output-dir wheelhouse
36+
env:
37+
# disable repair
38+
CIBW_REPAIR_WHEEL_COMMAND: ""
3639

3740
- uses: actions/upload-artifact@v3
3841
with:

‎CHANGELOG.md

Copy file name to clipboardExpand all lines: CHANGELOG.md
+22Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.2.18]
11+
12+
- Update llama.cpp to ggerganov/llama.cpp@6bb4908a17150b49373b5f977685b2e180a04f6f
13+
14+
## [0.2.17]
15+
16+
- Update llama.cpp to ggerganov/llama.cpp@df9d1293defe783f42bc83af732d3c670552c541
17+
- Hotfix: Set `CUDA_ARCHITECTURES=OFF` for `llava_shared` target on Windows by @abetlen in 4388f3341413110217b98c4f097ac5c590bdf40b
18+
19+
## [0.2.16]
20+
21+
- Update llama.cpp to ggerganov/llama.cp@a75fa576abba9d37f463580c379e4bbf1e1ad03c
22+
- Add `set_seed` to `Llama` class by @abetlen in fd41ed3a908761d286102a019a34c2938a15118d
23+
- Fix server doc arguments by @kjunggithub in #892
24+
- Fix response_format handler in llava chat handler by @abetlen in b62c44983921197ed10a7d29dc4ba920e9979380
25+
- Fix default max_tokens, chat completion is now unlimited (to context length) and completion is 16 tokens to match OpenAI defaults by @abetlen in e7962d2c733cbbeec5a37392c81f64185a9a39e8
26+
- Fix json_schema_to_gbnf helper so that it takes a json schema string as input instead by @abetlen in faeae181b1e868643c0dc28fcf039f077baf0829
27+
- Add support for $ref and $def in json_schema_to_gbnf to handle more complex function schemas by @abetlen in 770df344369c0630df1be14be9f9e301e7c56d24
28+
- Update functionary chat handler for new OpenAI api by abetlen in 1b376c62b775b401653facf25a519d116aafe99a
29+
- Fix add default stop sequence to chatml chat format by @abetlen in b84d76a844149216d511cfd8cdb9827148a1853c
30+
- Fix sampling bug when logits_all=False by @abetlen in 6f0b0b1b840af846938ed74d0e8170a91c40e617
31+
1032
## [0.2.15]
1133

1234
- Update llama.cpp to ggerganov/llama.cpp@0a7c980b6f94a049cb804573df2d8092a34df8e4

‎CMakeLists.txt

Copy file name to clipboardExpand all lines: CMakeLists.txt
+8Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ option(LLAMA_BUILD "Build llama.cpp shared library and install alongside python
66

77
if (LLAMA_BUILD)
88
set(BUILD_SHARED_LIBS "On")
9+
10+
# Building llama
911
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
1012
# Need to disable these llama.cpp flags on Apple x86_64,
1113
# otherwise users may encounter invalid instruction errors
@@ -41,8 +43,14 @@ if (LLAMA_BUILD)
4143
FILES $<TARGET_RUNTIME_DLLS:llama>
4244
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
4345
)
46+
47+
# Building llava
4448
add_subdirectory(vendor/llama.cpp/examples/llava)
4549
set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
50+
# Set CUDA_ARCHITECTURES to OFF on windows
51+
if (WIN32)
52+
set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
53+
endif()
4654
install(
4755
TARGETS llava_shared
4856
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp

‎examples/notebooks/Functions.ipynb

Copy file name to clipboardExpand all lines: examples/notebooks/Functions.ipynb
+103-18Lines changed: 103 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,41 @@
11
{
22
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Functions\n",
8+
"\n",
9+
"The OpenAI compatbile web server in `llama-cpp-python` supports function calling.\n",
10+
"\n",
11+
"Function calling allows API clients to specify a schema that gives the model a format it should respond in.\n",
12+
"Function calling in `llama-cpp-python` works by combining models pretrained for function calling such as [`functionary`](https://huggingface.co/abetlen/functionary-7b-v1-GGUF) with constrained sampling to produce a response that is compatible with the schema.\n",
13+
"\n",
14+
"Note however that this improves but does not guarantee that the response will be compatible with the schema.\n",
15+
"\n",
16+
"## Requirements\n",
17+
"\n",
18+
"Before we begin you will need the following:\n",
19+
"\n",
20+
"- A running `llama-cpp-python` server with a function calling compatible model. [See here](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)\n",
21+
"- The OpenAI Python Client `pip install openai`\n",
22+
"- (Optional) The Instructor Python Library `pip install instructor`\n",
23+
"\n",
24+
"## Function Calling with OpenAI Python Client\n",
25+
"\n",
26+
"We'll start with a basic demo that only uses the OpenAI Python Client."
27+
]
28+
},
329
{
430
"cell_type": "code",
5-
"execution_count": 29,
31+
"execution_count": 4,
632
"metadata": {},
733
"outputs": [
834
{
935
"name": "stdout",
1036
"output_type": "stream",
1137
"text": [
12-
"ChatCompletion(id='chatcmpl-b6dcbb47-1120-4761-8cd9-83542c97647b', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content=\"The current temperature in San Francisco is 72 degrees Fahrenheit. It's a sunny day with clear skies, making it perfect for outdoor activities.\\n \", role='assistant', function_call=None, tool_calls=None))], created=1699602158, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=38, prompt_tokens=135, total_tokens=173))\n"
38+
"ChatCompletion(id='chatcmpl-a2d9eb9f-7354-472f-b6ad-4d7a807729a3', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='The current weather in San Francisco is **72°F** (22°C).\\n ', role='assistant', function_call=None, tool_calls=None))], created=1699638365, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=22, prompt_tokens=136, total_tokens=158))\n"
1339
]
1440
}
1541
],
@@ -20,7 +46,7 @@
2046
"\n",
2147
"client = openai.OpenAI(\n",
2248
" api_key = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\", # can be anything\n",
23-
" base_url = \"http://100.64.159.73:8000/v1\"\n",
49+
" base_url = \"http://100.64.159.73:8000/v1\" # NOTE: Replace with IP address and port of your llama-cpp-python server\n",
2450
")\n",
2551
"\n",
2652
"# Example dummy function hard coded to return the same weather\n",
@@ -100,9 +126,32 @@
100126
"print(run_conversation())"
101127
]
102128
},
129+
{
130+
"cell_type": "markdown",
131+
"metadata": {},
132+
"source": [
133+
"# Function Calling with Instructor\n",
134+
"\n",
135+
"The above example is a bit verbose and requires you to manually verify the schema.\n",
136+
"\n",
137+
"For our next examples we'll use the `instructor` library to simplify the process and accomplish a number of different tasks with function calling.\n",
138+
"\n",
139+
"You'll first need to install the [`instructor`](https://github.com/jxnl/instructor/).\n",
140+
"\n",
141+
"You can do so by running the following command in your terminal:\n",
142+
"\n",
143+
"```bash\n",
144+
"pip install instructor\n",
145+
"```\n",
146+
"\n",
147+
"Below we'll go through a few basic examples taken directly from the [instructor cookbook](https://jxnl.github.io/instructor/)\n",
148+
"\n",
149+
"## Basic Usage"
150+
]
151+
},
103152
{
104153
"cell_type": "code",
105-
"execution_count": 30,
154+
"execution_count": 5,
106155
"metadata": {},
107156
"outputs": [
108157
{
@@ -139,11 +188,28 @@
139188
"print(user)"
140189
]
141190
},
191+
{
192+
"cell_type": "markdown",
193+
"metadata": {},
194+
"source": [
195+
"## Text Classification\n",
196+
"\n",
197+
"### Single-Label Classification"
198+
]
199+
},
142200
{
143201
"cell_type": "code",
144-
"execution_count": 31,
202+
"execution_count": 7,
145203
"metadata": {},
146-
"outputs": [],
204+
"outputs": [
205+
{
206+
"name": "stdout",
207+
"output_type": "stream",
208+
"text": [
209+
"class_label=<Labels.SPAM: 'spam'>\n"
210+
]
211+
}
212+
],
147213
"source": [
148214
"import enum\n",
149215
"\n",
@@ -172,19 +238,27 @@
172238
" ) # type: ignore\n",
173239
"\n",
174240
"prediction = classify(\"Hello there I'm a Nigerian prince and I want to give you money\")\n",
175-
"assert prediction.class_label == Labels.SPAM"
241+
"assert prediction.class_label == Labels.SPAM\n",
242+
"print(prediction)"
243+
]
244+
},
245+
{
246+
"cell_type": "markdown",
247+
"metadata": {},
248+
"source": [
249+
"### Multi-Label Classification"
176250
]
177251
},
178252
{
179253
"cell_type": "code",
180-
"execution_count": 32,
254+
"execution_count": 12,
181255
"metadata": {},
182256
"outputs": [
183257
{
184258
"name": "stdout",
185259
"output_type": "stream",
186260
"text": [
187-
"class_labels=[<MultiLabels.BILLING: 'billing'>, <MultiLabels.TECH_ISSUE: 'tech_issue'>]\n"
261+
"class_labels=[<MultiLabels.TECH_ISSUE: 'tech_issue'>, <MultiLabels.BILLING: 'billing'>]\n"
188262
]
189263
}
190264
],
@@ -223,16 +297,27 @@
223297
"print(prediction)"
224298
]
225299
},
300+
{
301+
"cell_type": "markdown",
302+
"metadata": {},
303+
"source": [
304+
"## Self-Critique"
305+
]
306+
},
226307
{
227308
"cell_type": "code",
228-
"execution_count": 33,
309+
"execution_count": 13,
229310
"metadata": {},
230311
"outputs": [
231312
{
232313
"name": "stdout",
233314
"output_type": "stream",
234315
"text": [
235-
"question='What is the meaning of life?' answer='The meaning of life, according to the Devil, is to live a life of sin and debauchery.'\n"
316+
"question='What is the meaning of life?' answer='According to the Devil, the meaning of life is to live a life of sin and debauchery.'\n",
317+
"1 validation error for QuestionAnswerNoEvil\n",
318+
"answer\n",
319+
" Assertion failed, The statement promotes sin and debauchery, which can be considered objectionable. [type=assertion_error, input_value='According to the Devil, ... of sin and debauchery.', input_type=str]\n",
320+
" For further information visit https://errors.pydantic.dev/2.3/v/assertion_error\n"
236321
]
237322
}
238323
],
@@ -294,6 +379,13 @@
294379
" print(e)"
295380
]
296381
},
382+
{
383+
"cell_type": "markdown",
384+
"metadata": {},
385+
"source": [
386+
"## Answering Questions with Validated Citations"
387+
]
388+
},
297389
{
298390
"cell_type": "code",
299391
"execution_count": 42,
@@ -366,13 +458,6 @@
366458
"qa = ask_ai(question, context)\n",
367459
"print(qa)"
368460
]
369-
},
370-
{
371-
"cell_type": "code",
372-
"execution_count": null,
373-
"metadata": {},
374-
"outputs": [],
375-
"source": []
376461
}
377462
],
378463
"metadata": {

‎llama_cpp/__init__.py

Copy file name to clipboard
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .llama_cpp import *
22
from .llama import *
33

4-
__version__ = "0.2.15"
4+
__version__ = "0.2.18"

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+6-7Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,27 +1019,26 @@ def eval(self, tokens: Sequence[int]):
10191019
"""
10201020
assert self._ctx.ctx is not None
10211021
assert self._batch.batch is not None
1022-
n_ctx = self._n_ctx
1022+
self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1)
10231023
for i in range(0, len(tokens), self.n_batch):
10241024
batch = tokens[i : min(len(tokens), i + self.n_batch)]
1025-
n_past = min(n_ctx - len(batch), self.n_tokens)
1025+
n_past = self.n_tokens
10261026
n_tokens = len(batch)
1027-
self._ctx.kv_cache_seq_rm(-1, n_past, -1)
10281027
self._batch.set_batch(
10291028
batch=batch, n_past=n_past, logits_all=self.context_params.logits_all
10301029
)
10311030
self._ctx.decode(self._batch)
10321031
# Save tokens
1033-
self.input_ids[self.n_tokens : self.n_tokens + n_tokens] = batch
1032+
self.input_ids[n_past : n_past + n_tokens] = batch
10341033
# Save logits
1035-
rows = n_tokens if self.context_params.logits_all else 1
1034+
rows = n_tokens
10361035
cols = self._n_vocab
10371036
offset = (
10381037
0 if self.context_params.logits_all else n_tokens - 1
10391038
) # NOTE: Only save the last token logits if logits_all is False
1040-
self.scores[self.n_tokens + offset : self.n_tokens + n_tokens, :].reshape(
1039+
self.scores[n_past + offset : n_past + n_tokens, :].reshape(
10411040
-1
1042-
)[:] = self._ctx.get_logits()[: rows * cols]
1041+
)[:] = self._ctx.get_logits()[offset * cols: rows * cols]
10431042
# Update n_tokens
10441043
self.n_tokens += n_tokens
10451044

‎tests/test_llama.py

Copy file name to clipboardExpand all lines: tests/test_llama.py
+16-12Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
import ctypes
2+
13
import pytest
4+
25
import llama_cpp
36

47
MODEL = "./vendor/llama.cpp/models/ggml-vocab-llama.gguf"
@@ -36,19 +39,20 @@ def test_llama_cpp_tokenization():
3639

3740

3841
def test_llama_patch(monkeypatch):
39-
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True)
42+
n_ctx = 128
43+
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True, n_ctx=n_ctx)
4044
n_vocab = llama_cpp.llama_n_vocab(llama._model.model)
45+
assert n_vocab == 32000
4146

4247
## Set up mock function
43-
def mock_eval(*args, **kwargs):
48+
def mock_decode(*args, **kwargs):
4449
return 0
4550

4651
def mock_get_logits(*args, **kwargs):
47-
return (llama_cpp.c_float * n_vocab)(
48-
*[llama_cpp.c_float(0) for _ in range(n_vocab)]
49-
)
52+
size = n_vocab * n_ctx
53+
return (llama_cpp.c_float * size)()
5054

51-
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_eval)
55+
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_decode)
5256
monkeypatch.setattr("llama_cpp.llama_cpp.llama_get_logits", mock_get_logits)
5357

5458
output_text = " jumps over the lazy dog."
@@ -126,19 +130,19 @@ def test_llama_pickle():
126130

127131

128132
def test_utf8(monkeypatch):
129-
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True)
133+
n_ctx = 512
134+
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True, n_ctx=n_ctx, logits_all=True)
130135
n_vocab = llama.n_vocab()
131136

132137
## Set up mock function
133-
def mock_eval(*args, **kwargs):
138+
def mock_decode(*args, **kwargs):
134139
return 0
135140

136141
def mock_get_logits(*args, **kwargs):
137-
return (llama_cpp.c_float * n_vocab)(
138-
*[llama_cpp.c_float(0) for _ in range(n_vocab)]
139-
)
142+
size = n_vocab * n_ctx
143+
return (llama_cpp.c_float * size)()
140144

141-
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_eval)
145+
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_decode)
142146
monkeypatch.setattr("llama_cpp.llama_cpp.llama_get_logits", mock_get_logits)
143147

144148
output_text = "😀"

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.