File tree Expand file tree Collapse file tree 3 files changed +5
-5
lines changed
Filter options
Expand file tree Collapse file tree 3 files changed +5
-5
lines changed
Original file line number Diff line number Diff line change @@ -455,7 +455,7 @@ def detokenize(self, tokens: List[int]) -> bytes:
455
455
output += bytes (buffer [:n ])
456
456
# NOTE: Llama1 models automatically added a space at the start of the prompt
457
457
# this line removes a leading space if the first token is a beginning of sentence token
458
- return output [ 1 :] if len ( tokens ) > 0 and tokens [ 0 ] == self . token_bos () else output
458
+ return output
459
459
460
460
def set_cache (self , cache : Optional [BaseLlamaCache ]):
461
461
"""Set the cache.
Original file line number Diff line number Diff line change @@ -14,16 +14,16 @@ def test_llama_cpp_tokenization():
14
14
15
15
tokens = llama .tokenize (text )
16
16
assert tokens [0 ] == llama .token_bos ()
17
- assert tokens == [1 , 15043 , 2787 ]
17
+ assert tokens == [1 , 10994 , 2787 ]
18
18
detokenized = llama .detokenize (tokens )
19
19
assert detokenized == text
20
20
21
21
tokens = llama .tokenize (text , add_bos = False )
22
22
assert tokens [0 ] != llama .token_bos ()
23
- assert tokens == [15043 , 2787 ]
23
+ assert tokens == [10994 , 2787 ]
24
24
25
25
detokenized = llama .detokenize (tokens )
26
- assert detokenized ! = text
26
+ assert detokenized = = text
27
27
28
28
29
29
@pytest .mark .skip (reason = "bug in tokenization where leading space is always inserted even if not after eos" )
You can’t perform that action at this time.
0 commit comments