Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 422beeb

Browse filesBrowse files
authored
Merge branch 'abetlen:main' into main
2 parents 5442c78 + 755f9fa commit 422beeb
Copy full SHA for 422beeb

File tree

Expand file treeCollapse file tree

5 files changed

+66
-11
lines changed
Filter options
Expand file treeCollapse file tree

5 files changed

+66
-11
lines changed

‎README.md

Copy file name to clipboardExpand all lines: README.md
+7-1Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,18 @@ This package provides:
1717

1818
## Installation
1919

20-
Install from PyPI:
20+
Install from PyPI (requires a c compiler):
2121

2222
```bash
2323
pip install llama-cpp-python
2424
```
2525

26+
The above command will attempt to install the package and build build `llama.cpp` from source.
27+
This is the recommended installation method as it ensures that `llama.cpp` is built with the available optimizations for your system.
28+
29+
This method defaults to using `make` to build `llama.cpp` on Linux / MacOS and `cmake` on Windows.
30+
You can force the use of `cmake` on Linux / MacOS setting the `FORCE_CMAKE=1` environment variable before installing.
31+
2632
## High-level API
2733

2834
```python

‎examples/low_level_api/low_level_api_chat_cpp.py

Copy file name to clipboardExpand all lines: examples/low_level_api/low_level_api_chat_cpp.py
+3-3Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ def __init__(self, params: GptParams) -> None:
201201
# tokenize a prompt
202202
def _tokenize(self, prompt, bos=True):
203203
_arr = (llama_cpp.llama_token * (len(prompt) + 1))()
204-
_n = llama_cpp.llama_tokenize(self.ctx, prompt.encode("utf8"), _arr, len(_arr), bos)
204+
_n = llama_cpp.llama_tokenize(self.ctx, prompt.encode("utf8", errors="ignore"), _arr, len(_arr), bos)
205205
return _arr[:_n]
206206

207207
def set_color(self, c):
@@ -342,7 +342,7 @@ def exit(self):
342342
# return past text
343343
def past(self):
344344
for id in self.last_n_tokens[-self.n_past:]:
345-
yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8")
345+
yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8", errors="ignore")
346346

347347
# write input
348348
def input(self, prompt: str):
@@ -356,7 +356,7 @@ def input(self, prompt: str):
356356
def output(self):
357357
self.remaining_tokens = self.params.n_predict
358358
for id in self.generate():
359-
yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8")
359+
yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8", errors="ignore")
360360

361361
# read user input
362362
def read_input(self):

‎examples/low_level_api/low_level_api_llama_cpp.py

Copy file name to clipboardExpand all lines: examples/low_level_api/low_level_api_llama_cpp.py
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
if not input_noecho:
7171
for id in embd:
7272
print(
73-
llama_cpp.llama_token_to_str(ctx, id).decode("utf-8"),
73+
llama_cpp.llama_token_to_str(ctx, id).decode("utf-8", errors="ignore"),
7474
end="",
7575
flush=True,
7676
)

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+20-5Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,7 @@ def _create_completion(
446446
self.load_state(self.cache[prompt_tokens])
447447

448448
finish_reason = "length"
449+
multibyte_fix = 0
449450
for token in self.generate(
450451
prompt_tokens,
451452
top_k=top_k,
@@ -467,6 +468,20 @@ def _create_completion(
467468
completion_tokens.append(token)
468469

469470
all_text = self.detokenize(completion_tokens)
471+
472+
# Contains multi-byte UTF8
473+
for k,char in enumerate(all_text[-3:]):
474+
k = 3 - k
475+
for num,pattern in [(2, 192), (3, 224), (4, 240)]:
476+
# Bitwise AND check
477+
if (num > k and pattern & char == pattern):
478+
multibyte_fix = num - k
479+
480+
# Stop incomplete bytes from passing
481+
if (multibyte_fix > 0):
482+
multibyte_fix -= 1
483+
continue
484+
470485
any_stop = [s for s in stop_sequences if s in all_text]
471486
if len(any_stop) > 0:
472487
first_stop = any_stop[0]
@@ -495,7 +510,7 @@ def _create_completion(
495510
"model": self.model_path,
496511
"choices": [
497512
{
498-
"text": text[start:].decode("utf-8"),
513+
"text": text[start:].decode("utf-8", errors="ignore"),
499514
"index": 0,
500515
"logprobs": None,
501516
"finish_reason": None,
@@ -516,7 +531,7 @@ def _create_completion(
516531
"model": self.model_path,
517532
"choices": [
518533
{
519-
"text": text[returned_characters:].decode("utf-8"),
534+
"text": text[returned_characters:].decode("utf-8", errors="ignore"),
520535
"index": 0,
521536
"logprobs": None,
522537
"finish_reason": finish_reason,
@@ -525,7 +540,7 @@ def _create_completion(
525540
}
526541
return
527542

528-
text_str = text.decode("utf-8")
543+
text_str = text.decode("utf-8", errors="ignore")
529544

530545
if echo:
531546
text_str = prompt + text_str
@@ -543,7 +558,7 @@ def _create_completion(
543558

544559
all_tokens = prompt_tokens + completion_tokens
545560
all_token_strs = [
546-
self.detokenize([token]).decode("utf-8") for token in all_tokens
561+
self.detokenize([token]).decode("utf-8", errors="ignore") for token in all_tokens
547562
]
548563
all_logprobs = [
549564
[Llama.logit_to_logprob(logit) for logit in row]
@@ -562,7 +577,7 @@ def _create_completion(
562577
)
563578
token_logprobs.append(sorted_logprobs[int(token)][0])
564579
top_logprob = {
565-
self.detokenize([llama_cpp.llama_token(i)]).decode("utf-8"): logprob
580+
self.detokenize([llama_cpp.llama_token(i)]).decode("utf-8", errors="ignore"): logprob
566581
for logprob, i in sorted_logprobs[:logprobs]
567582
}
568583
top_logprob.update({token_str: sorted_logprobs[int(token)][0]})

‎tests/test_llama.py

Copy file name to clipboardExpand all lines: tests/test_llama.py
+35-1Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,4 +93,38 @@ def test_llama_pickle():
9393

9494
text = b"Hello World"
9595

96-
assert llama.detokenize(llama.tokenize(text)) == text
96+
assert llama.detokenize(llama.tokenize(text)) == text
97+
98+
def test_utf8(monkeypatch):
99+
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True)
100+
101+
## Set up mock function
102+
def mock_eval(*args, **kwargs):
103+
return 0
104+
105+
monkeypatch.setattr("llama_cpp.llama_cpp.llama_eval", mock_eval)
106+
107+
output_text = "😀"
108+
output_tokens = llama.tokenize(output_text.encode("utf-8"))
109+
token_eos = llama.token_eos()
110+
n = 0
111+
112+
def mock_sample(*args, **kwargs):
113+
nonlocal n
114+
if n < len(output_tokens):
115+
n += 1
116+
return output_tokens[n - 1]
117+
else:
118+
return token_eos
119+
120+
monkeypatch.setattr("llama_cpp.llama_cpp.llama_sample_top_p_top_k", mock_sample)
121+
122+
## Test basic completion with utf8 multibyte
123+
n = 0 # reset
124+
completion = llama.create_completion("", max_tokens=4)
125+
assert completion["choices"][0]["text"] == output_text
126+
127+
## Test basic completion with incomplete utf8 multibyte
128+
n = 0 # reset
129+
completion = llama.create_completion("", max_tokens=1)
130+
assert completion["choices"][0]["text"] == ""

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.