Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 6e1a73b

Browse filesBrowse files
authored
Merge branch 'main' into patch-1
2 parents 4100bde + 4887973 commit 6e1a73b
Copy full SHA for 6e1a73b

File tree

Expand file treeCollapse file tree

4 files changed

+28
-33
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+28
-33
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+3-3Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -445,17 +445,17 @@ def detokenize(self, tokens: List[int]) -> bytes:
445445
"""
446446
assert self.model is not None
447447
output = b""
448-
size = 16
448+
size = 32
449449
buffer = (ctypes.c_char * size)()
450450
for token in tokens:
451-
n = llama_cpp.llama_token_to_str_with_model(
451+
n = llama_cpp.llama_token_to_piece_with_model(
452452
self.model, llama_cpp.llama_token(token), buffer, size
453453
)
454454
assert n <= size
455455
output += bytes(buffer[:n])
456456
# NOTE: Llama1 models automatically added a space at the start of the prompt
457457
# this line removes a leading space if the first token is a beginning of sentence token
458-
return output
458+
return output[1:] if len(tokens) > 0 and tokens[0] == self.token_bos() else output
459459

460460
def set_cache(self, cache: Optional[BaseLlamaCache]):
461461
"""Set the cache.

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+21-26Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -973,48 +973,43 @@ def llama_tokenize_with_model(
973973
_lib.llama_tokenize_with_model.restype = c_int
974974

975975

976-
# // Token Id -> String. Uses the vocabulary in the provided context
977-
# // Does not write null terminator to the buffer
978-
# LLAMA_API int llama_token_to_str(
976+
# // Token Id -> Piece.
977+
# // Uses the vocabulary in the provided context.
978+
# // Does not write null terminator to the buffer.
979+
# // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
980+
# LLAMA_API int llama_token_to_piece(
979981
# const struct llama_context * ctx,
980-
# llama_token token,
981-
# char * buf,
982-
# int length);
983-
def llama_token_to_str(
982+
# llama_token token,
983+
# char * buf,
984+
# int length);
985+
def llama_token_to_piece(
984986
ctx: llama_context_p, token: llama_token, buf: bytes, length: c_int
985987
) -> int:
986-
return _lib.llama_token_to_str(ctx, token, buf, length)
988+
return _lib.llama_token_to_piece(ctx, token, buf, length)
987989

988990

989-
_lib.llama_tokenize_with_model.argtypes = [
990-
llama_model_p,
991-
c_char_p,
992-
llama_token_p,
993-
c_int,
994-
c_bool,
995-
]
996-
_lib.llama_tokenize_with_model.restype = c_int
991+
_lib.llama_token_to_piece.argtypes = [llama_context_p, llama_token, c_char_p, c_int]
992+
_lib.llama_token_to_piece.restype = c_int
997993

998994

999-
# LLAMA_API int llama_token_to_str_with_model(
1000-
# const struct llama_model * model,
1001-
# llama_token token,
1002-
# char * buf,
1003-
# int length);
1004-
def llama_token_to_str_with_model(
995+
# LLAMA_API int llama_token_to_piece_with_model(
996+
# const struct llama_model * model,
997+
# llama_token token,
998+
# char * buf,
999+
# int length);
1000+
def llama_token_to_piece_with_model(
10051001
model: llama_model_p, token: llama_token, buf: bytes, length: c_int
10061002
) -> int:
1007-
return _lib.llama_token_to_str_with_model(model, token, buf, length)
1003+
return _lib.llama_token_to_piece_with_model(model, token, buf, length)
10081004

10091005

1010-
_lib.llama_token_to_str_with_model.argtypes = [
1006+
_lib.llama_token_to_piece_with_model.argtypes = [
10111007
llama_model_p,
10121008
llama_token,
10131009
c_char_p,
10141010
c_int,
10151011
]
1016-
_lib.llama_token_to_str_with_model.restype = c_int
1017-
1012+
_lib.llama_token_to_piece_with_model.restype = c_int
10181013

10191014
# //
10201015
# // Grammar

‎tests/test_llama.py

Copy file name to clipboardExpand all lines: tests/test_llama.py
+3-3Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,16 @@ def test_llama_cpp_tokenization():
1414

1515
tokens = llama.tokenize(text)
1616
assert tokens[0] == llama.token_bos()
17-
assert tokens == [1, 10994, 2787]
17+
assert tokens == [1, 15043, 2787]
1818
detokenized = llama.detokenize(tokens)
1919
assert detokenized == text
2020

2121
tokens = llama.tokenize(text, add_bos=False)
2222
assert tokens[0] != llama.token_bos()
23-
assert tokens == [10994, 2787]
23+
assert tokens == [15043, 2787]
2424

2525
detokenized = llama.detokenize(tokens)
26-
assert detokenized == text
26+
assert detokenized != text
2727

2828

2929
@pytest.mark.skip(reason="bug in tokenization where leading space is always inserted even if not after eos")

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.