@@ -2271,6 +2271,20 @@ def llama_token_eos(model: llama_model_p, /) -> int:
2271
2271
...
2272
2272
2273
2273
2274
+ # LLAMA_API llama_token llama_token_cls(const struct llama_model * model); // classification
2275
+ @ctypes_function ("llama_token_cls" , [llama_model_p_ctypes ], llama_token )
2276
+ def llama_token_cls (model : llama_model_p , / ) -> int :
2277
+ """classification"""
2278
+ ...
2279
+
2280
+
2281
+ # LLAMA_API llama_token llama_token_sep(const struct llama_model * model); // sentence separator
2282
+ @ctypes_function ("llama_token_sep" , [llama_model_p_ctypes ], llama_token )
2283
+ def llama_token_sep (model : llama_model_p , / ) -> int :
2284
+ """sentence separator"""
2285
+ ...
2286
+
2287
+
2274
2288
# LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line
2275
2289
@ctypes_function ("llama_token_nl" , [llama_model_p_ctypes ], llama_token )
2276
2290
def llama_token_nl (model : llama_model_p , / ) -> int :
@@ -2326,16 +2340,16 @@ def llama_token_eot(model: llama_model_p, /) -> int: ...
2326
2340
# /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
2327
2341
# /// @return Returns the number of tokens on success, no more than n_tokens_max
2328
2342
# /// @return Returns a negative number on failure - the number of tokens that would have been returned
2329
- # /// @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
2330
- # /// Does not insert a leading space.
2343
+ # /// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
2344
+ # /// as plaintext. Does not insert a leading space.
2331
2345
# LLAMA_API int32_t llama_tokenize(
2332
2346
# const struct llama_model * model,
2333
2347
# const char * text,
2334
2348
# int32_t text_len,
2335
2349
# llama_token * tokens,
2336
2350
# int32_t n_tokens_max,
2337
- # bool add_bos ,
2338
- # bool special );
2351
+ # bool add_special ,
2352
+ # bool parse_special );
2339
2353
@ctypes_function (
2340
2354
"llama_tokenize" ,
2341
2355
[
@@ -2355,8 +2369,8 @@ def llama_tokenize(
2355
2369
text_len : Union [ctypes .c_int , int ],
2356
2370
tokens : CtypesArray [llama_token ],
2357
2371
n_tokens_max : Union [ctypes .c_int , int ],
2358
- add_bos : Union [ctypes .c_bool , bool ],
2359
- special : Union [ctypes .c_bool , bool ],
2372
+ add_special : Union [ctypes .c_bool , bool ],
2373
+ parse_special : Union [ctypes .c_bool , bool ],
2360
2374
/ ,
2361
2375
) -> int :
2362
2376
"""Convert the provided text into tokens.
@@ -2367,9 +2381,8 @@ def llama_tokenize(
2367
2381
text_len: The length of the text.
2368
2382
tokens: The tokens pointer must be large enough to hold the resulting tokens.
2369
2383
n_max_tokens: The maximum number of tokens to return.
2370
- add_bos: Whether to add a beginning-of-sentence token.
2371
- special: Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
2372
- Does not insert a leading space.
2384
+ add_special: Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.
2385
+ parse_special: Allow parsing special tokens.
2373
2386
2374
2387
Returns:
2375
2388
Returns the number of tokens on success, no more than n_tokens_max
0 commit comments