File tree Expand file tree Collapse file tree 2 files changed +8
-4
lines changed
Filter options
Expand file tree Collapse file tree 2 files changed +8
-4
lines changed
Original file line number Diff line number Diff line change @@ -199,14 +199,18 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
199
199
200
200
# enum llama_vocab_type {
201
201
# LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab
202
- # LLAMA_VOCAB_TYPE_SPM = 1, // SentencePiece
203
- # LLAMA_VOCAB_TYPE_BPE = 2, // Byte Pair Encoding
204
- # LLAMA_VOCAB_TYPE_WPM = 3, // WordPiece
202
+ # LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback
203
+ # LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE
204
+ # LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece
205
205
# };
206
206
LLAMA_VOCAB_TYPE_NONE = 0
207
+ """For models without vocab"""
207
208
LLAMA_VOCAB_TYPE_SPM = 1
209
+ """LLaMA tokenizer based on byte-level BPE with byte fallback"""
208
210
LLAMA_VOCAB_TYPE_BPE = 2
211
+ """GPT-2 tokenizer based on byte-level BPE"""
209
212
LLAMA_VOCAB_TYPE_WPM = 3
213
+ """BERT tokenizer based on WordPiece"""
210
214
211
215
212
216
# // note: these values should be synchronized with ggml_rope
You can’t perform that action at this time.
0 commit comments