Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 3af167d

Browse filesBrowse files
committed
Merge tag 'v0.2.16' into main
2 parents cc0fe43 + b7e60b6 commit 3af167d
Copy full SHA for 3af167d

File tree

Expand file treeCollapse file tree

6 files changed

+40
-21
lines changed
Filter options
Expand file treeCollapse file tree

6 files changed

+40
-21
lines changed

‎.github/workflows/build-and-release.yaml

Copy file name to clipboardExpand all lines: .github/workflows/build-and-release.yaml
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ jobs:
3333
3434
- name: Build wheels
3535
run: python -m cibuildwheel --output-dir wheelhouse
36+
env:
37+
# disable repair
38+
CIBW_REPAIR_WHEEL_COMMAND: ""
3639

3740
- uses: actions/upload-artifact@v3
3841
with:

‎CHANGELOG.md

Copy file name to clipboardExpand all lines: CHANGELOG.md
+13Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.2.16]
11+
12+
- Update llama.cpp to ggerganov/llama.cp@a75fa576abba9d37f463580c379e4bbf1e1ad03c
13+
- Add `set_seed` to `Llama` class by @abetlen in fd41ed3a908761d286102a019a34c2938a15118d
14+
- Fix server doc arguments by @kjunggithub in #892
15+
- Fix response_format handler in llava chat handler by @abetlen in b62c44983921197ed10a7d29dc4ba920e9979380
16+
- Fix default max_tokens, chat completion is now unlimited (to context length) and completion is 16 tokens to match OpenAI defaults by @abetlen in e7962d2c733cbbeec5a37392c81f64185a9a39e8
17+
- Fix json_schema_to_gbnf helper so that it takes a json schema string as input instead by @abetlen in faeae181b1e868643c0dc28fcf039f077baf0829
18+
- Add support for $ref and $def in json_schema_to_gbnf to handle more complex function schemas by @abetlen in 770df344369c0630df1be14be9f9e301e7c56d24
19+
- Update functionary chat handler for new OpenAI api by abetlen in 1b376c62b775b401653facf25a519d116aafe99a
20+
- Fix add default stop sequence to chatml chat format by @abetlen in b84d76a844149216d511cfd8cdb9827148a1853c
21+
- Fix sampling bug when logits_all=False by @abetlen in 6f0b0b1b840af846938ed74d0e8170a91c40e617
22+
1023
## [0.2.15]
1124

1225
- Update llama.cpp to ggerganov/llama.cpp@0a7c980b6f94a049cb804573df2d8092a34df8e4

‎llama_cpp/__init__.py

Copy file name to clipboard
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .llama_cpp import *
22
from .llama import *
33

4-
__version__ = "0.2.15"
4+
__version__ = "0.2.16"

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+6-7Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,27 +1019,26 @@ def eval(self, tokens: Sequence[int]):
10191019
"""
10201020
assert self._ctx.ctx is not None
10211021
assert self._batch.batch is not None
1022-
n_ctx = self._n_ctx
1022+
self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1)
10231023
for i in range(0, len(tokens), self.n_batch):
10241024
batch = tokens[i : min(len(tokens), i + self.n_batch)]
1025-
n_past = min(n_ctx - len(batch), self.n_tokens)
1025+
n_past = self.n_tokens
10261026
n_tokens = len(batch)
1027-
self._ctx.kv_cache_seq_rm(-1, n_past, -1)
10281027
self._batch.set_batch(
10291028
batch=batch, n_past=n_past, logits_all=self.context_params.logits_all
10301029
)
10311030
self._ctx.decode(self._batch)
10321031
# Save tokens
1033-
self.input_ids[self.n_tokens : self.n_tokens + n_tokens] = batch
1032+
self.input_ids[n_past : n_past + n_tokens] = batch
10341033
# Save logits
1035-
rows = n_tokens if self.context_params.logits_all else 1
1034+
rows = n_tokens
10361035
cols = self._n_vocab
10371036
offset = (
10381037
0 if self.context_params.logits_all else n_tokens - 1
10391038
) # NOTE: Only save the last token logits if logits_all is False
1040-
self.scores[self.n_tokens + offset : self.n_tokens + n_tokens, :].reshape(
1039+
self.scores[n_past + offset : n_past + n_tokens, :].reshape(
10411040
-1
1042-
)[:] = self._ctx.get_logits()[: rows * cols]
1041+
)[:] = self._ctx.get_logits()[offset * cols: rows * cols]
10431042
# Update n_tokens
10441043
self.n_tokens += n_tokens
10451044

‎tests/test_llama.py

Copy file name to clipboardExpand all lines: tests/test_llama.py
+16-12Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
import ctypes
2+
13
import pytest
4+
25
import llama_cpp
36

47
MODEL = "./vendor/llama.cpp/models/ggml-vocab-llama.gguf"
@@ -36,19 +39,20 @@ def test_llama_cpp_tokenization():
3639

3740

3841
def test_llama_patch(monkeypatch):
39-
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True)
42+
n_ctx = 128
43+
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True, n_ctx=n_ctx)
4044
n_vocab = llama_cpp.llama_n_vocab(llama._model.model)
45+
assert n_vocab == 32000
4146

4247
## Set up mock function
43-
def mock_eval(*args, **kwargs):
48+
def mock_decode(*args, **kwargs):
4449
return 0
4550

4651
def mock_get_logits(*args, **kwargs):
47-
return (llama_cpp.c_float * n_vocab)(
48-
*[llama_cpp.c_float(0) for _ in range(n_vocab)]
49-
)
52+
size = n_vocab * n_ctx
53+
return (llama_cpp.c_float * size)()
5054

51-
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_eval)
55+
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_decode)
5256
monkeypatch.setattr("llama_cpp.llama_cpp.llama_get_logits", mock_get_logits)
5357

5458
output_text = " jumps over the lazy dog."
@@ -126,19 +130,19 @@ def test_llama_pickle():
126130

127131

128132
def test_utf8(monkeypatch):
129-
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True)
133+
n_ctx = 512
134+
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True, n_ctx=n_ctx, logits_all=True)
130135
n_vocab = llama.n_vocab()
131136

132137
## Set up mock function
133-
def mock_eval(*args, **kwargs):
138+
def mock_decode(*args, **kwargs):
134139
return 0
135140

136141
def mock_get_logits(*args, **kwargs):
137-
return (llama_cpp.c_float * n_vocab)(
138-
*[llama_cpp.c_float(0) for _ in range(n_vocab)]
139-
)
142+
size = n_vocab * n_ctx
143+
return (llama_cpp.c_float * size)()
140144

141-
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_eval)
145+
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_decode)
142146
monkeypatch.setattr("llama_cpp.llama_cpp.llama_get_logits", mock_get_logits)
143147

144148
output_text = "😀"

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.