2
2
3
3
import os
4
4
import sys
5
- import abc
6
5
import uuid
7
6
import time
8
7
import multiprocessing
15
14
Iterator ,
16
15
Deque ,
17
16
Callable ,
18
- Any ,
19
17
)
20
18
from collections import deque
21
19
31
29
LlamaDiskCache , # type: ignore
32
30
LlamaRAMCache , # type: ignore
33
31
)
32
+ from .llama_tokenizer import (
33
+ BaseLlamaTokenizer ,
34
+ LlamaTokenizer
35
+ )
34
36
import llama_cpp .llama_cpp as llama_cpp
35
37
import llama_cpp .llama_chat_format as llama_chat_format
36
38
@@ -1747,69 +1749,6 @@ def longest_token_prefix(a: Sequence[int], b: Sequence[int]):
1747
1749
return longest_prefix
1748
1750
1749
1751
1750
- class BaseLlamaTokenizer (abc .ABC ):
1751
- @abc .abstractmethod
1752
- def tokenize (self , text : bytes , add_bos : bool = True , special : bool = True ) -> List [int ]:
1753
- raise NotImplementedError
1754
-
1755
- @abc .abstractmethod
1756
- def detokenize (self , tokens : List [int ], prev_tokens : Optional [List [int ]] = None ) -> bytes :
1757
- raise NotImplementedError
1758
-
1759
-
1760
- class LlamaTokenizer (BaseLlamaTokenizer ):
1761
- def __init__ (self , llama : Llama ):
1762
- self .llama = llama
1763
- self ._model = llama ._model # type: ignore
1764
-
1765
- def tokenize (self , text : bytes , add_bos : bool = True , special : bool = True ) -> List [int ]:
1766
- return self ._model .tokenize (text , add_bos = add_bos , special = special )
1767
-
1768
- def detokenize (self , tokens : List [int ], prev_tokens : Optional [List [int ]] = None ) -> bytes :
1769
- if prev_tokens is not None :
1770
- return self ._model .detokenize (tokens [len (prev_tokens ):])
1771
- else :
1772
- return self ._model .detokenize (tokens )
1773
-
1774
- def encode (self , text : str , add_bos : bool = True , special : bool = True ) -> List [int ]:
1775
- return self .tokenize (
1776
- text .encode ("utf-8" , errors = "ignore" ), add_bos = add_bos , special = special
1777
- )
1778
-
1779
- def decode (self , tokens : List [int ]) -> str :
1780
- return self .detokenize (tokens ).decode ("utf-8" , errors = "ignore" )
1781
-
1782
- @classmethod
1783
- def from_ggml_file (cls , path : str ) -> "LlamaTokenizer" :
1784
- return cls (Llama (model_path = path , vocab_only = True ))
1785
-
1786
-
1787
- class LlamaHFTokenizer (BaseLlamaTokenizer ):
1788
- def __init__ (self , hf_tokenizer : Any ):
1789
- self .hf_tokenizer = hf_tokenizer
1790
-
1791
- def tokenize (self , text : bytes , add_bos : bool = True , special : bool = True ) -> List [int ]:
1792
- return self .hf_tokenizer .encode (text .decode ("utf-8" , errors = "ignore" ), add_special_tokens = special )
1793
-
1794
- def detokenize (self , tokens : List [int ], prev_tokens : Optional [List [int ]] = None ) -> bytes :
1795
- if prev_tokens is not None :
1796
- text = self .hf_tokenizer .decode (tokens ).encode ("utf-8" , errors = "ignore" )
1797
- prev_text = self .hf_tokenizer .decode (prev_tokens ).encode ("utf-8" , errors = "ignore" )
1798
- return text [len (prev_text ):]
1799
- else :
1800
- return self .hf_tokenizer .decode (tokens ).encode ("utf-8" , errors = "ignore" )
1801
-
1802
- @classmethod
1803
- def from_pretrained (cls , pretrained_model_name_or_path : str ) -> "LlamaHFTokenizer" :
1804
- try :
1805
- from transformers import AutoTokenizer
1806
- except ImportError :
1807
- raise ImportError (
1808
- "The `transformers` library is required to use the `HFTokenizer`."
1809
- "You can install it with `pip install transformers`."
1810
- )
1811
- hf_tokenizer = AutoTokenizer .from_pretrained (pretrained_model_name_or_path = pretrained_model_name_or_path )
1812
- return cls (hf_tokenizer )
1813
1752
1814
1753
1815
1754
class LlamaState :
0 commit comments