Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 8ac5946

Browse filesBrowse files
committed
Strip leading space when de-tokenizing.
1 parent c2d1dea commit 8ac5946
Copy full SHA for 8ac5946

File tree

Expand file treeCollapse file tree

2 files changed

+21
-9
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+21
-9
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+5-5Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -445,17 +445,17 @@ def detokenize(self, tokens: List[int]) -> bytes:
445445
"""
446446
assert self.ctx is not None
447447
output = b""
448-
buffer_size = 32
448+
buffer_size = 8
449449
buffer = (ctypes.c_char * buffer_size)()
450450
for token in tokens:
451-
if token == llama_cpp.llama_token_bos(self.ctx):
452-
continue
453451
n = llama_cpp.llama_token_to_str(
454452
self.ctx, llama_cpp.llama_token(token), buffer, buffer_size
455453
)
456454
assert n <= buffer_size
457455
output += bytes(buffer[:n])
458-
return output
456+
# NOTE: Llama1 models automatically added a space at the start of the prompt
457+
# this line removes a leading space if the first token is a beginning of sentence token
458+
return output[1:] if len(tokens) > 0 and tokens[0] == self.token_bos() else output
459459

460460
def set_cache(self, cache: Optional[BaseLlamaCache]):
461461
"""Set the cache.
@@ -886,7 +886,7 @@ def _create_completion(
886886
created: int = int(time.time())
887887
completion_tokens: List[int] = []
888888
# Add blank space to start of prompt to match OG llama tokenizer
889-
prompt_tokens: List[int] = self.tokenize(b" " + prompt.encode("utf-8"))
889+
prompt_tokens: List[int] = self.tokenize(prompt.encode("utf-8")) if prompt != "" else [self.token_bos()]
890890
text: bytes = b""
891891
returned_tokens: int = 0
892892
stop = (

‎tests/test_llama.py

Copy file name to clipboardExpand all lines: tests/test_llama.py
+16-4Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,32 @@
1+
import pytest
12
import llama_cpp
23

34
MODEL = "./vendor/llama.cpp/models/ggml-vocab-llama.gguf"
45

56

6-
def test_llama():
7-
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True)
7+
def test_llama_cpp_tokenization():
8+
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True, verbose=False)
89

910
assert llama
1011
assert llama.ctx is not None
1112

1213
text = b"Hello World"
1314

14-
assert llama.detokenize(llama.tokenize(text)) == text
15+
tokens = llama.tokenize(text)
16+
assert tokens[0] == llama.token_bos()
17+
assert tokens == [1, 15043, 2787]
18+
detokenized = llama.detokenize(tokens)
19+
assert detokenized == text
20+
21+
tokens = llama.tokenize(text, add_bos=False)
22+
assert tokens[0] != llama.token_bos()
23+
assert tokens == [15043, 2787]
24+
25+
detokenized = llama.detokenize(tokens)
26+
assert detokenized != text
1527

1628

17-
# @pytest.mark.skip(reason="need to update sample mocking")
29+
@pytest.mark.skip(reason="bug in tokenization where leading space is always inserted even if not after eos")
1830
def test_llama_patch(monkeypatch):
1931
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True)
2032
n_vocab = llama_cpp.llama_n_vocab(llama.ctx)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.