256
256
# // note: these values should be synchronized with ggml_rope
257
257
# // TODO: maybe move this enum to ggml.h (ggml_rope_type)
258
258
# enum llama_rope_type {
259
- # LLAMA_ROPE_TYPE_NONE = -1,
260
- # LLAMA_ROPE_TYPE_NORM = 0,
261
- # LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
259
+ # LLAMA_ROPE_TYPE_NONE = -1,
260
+ # LLAMA_ROPE_TYPE_NORM = 0,
261
+ # LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
262
+ # LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE,
263
+ # LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION,
262
264
# };
263
265
LLAMA_ROPE_TYPE_NONE = - 1
264
266
LLAMA_ROPE_TYPE_NORM = 0
265
267
LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX = 2
268
+ LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE = 8
269
+ LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION = 24
266
270
267
271
268
272
# enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
@@ -1265,6 +1269,7 @@ def llama_rope_freq_scale_train(model: llama_model_p, /) -> float:
1265
1269
# // Functions to access the model's GGUF metadata scalar values
1266
1270
# // - The functions return the length of the string on success, or -1 on failure
1267
1271
# // - The output string is always null-terminated and cleared on failure
1272
+ # // - When retrieving a string, an extra byte must be allocated to account for the null terminator
1268
1273
# // - GGUF array values are not supported by these functions
1269
1274
1270
1275
@@ -1378,18 +1383,6 @@ def llama_model_n_params(model: llama_model_p, /) -> int:
1378
1383
...
1379
1384
1380
1385
1381
- # // Get a llama model tensor
1382
- # LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name);
1383
- @ctypes_function (
1384
- "llama_get_model_tensor" , [llama_model_p_ctypes , ctypes .c_char_p ], ctypes .c_void_p
1385
- )
1386
- def llama_get_model_tensor (
1387
- model : llama_model_p , name : Union [ctypes .c_char_p , bytes ], /
1388
- ) -> ctypes .c_void_p :
1389
- """Get a llama model tensor"""
1390
- ...
1391
-
1392
-
1393
1386
# // Returns true if the model contains an encoder that requires llama_encode() call
1394
1387
# LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
1395
1388
@ctypes_function ("llama_model_has_encoder" , [llama_model_p_ctypes ], ctypes .c_bool )
@@ -3336,41 +3329,22 @@ def llama_sampler_init_grammar(
3336
3329
...
3337
3330
3338
3331
3332
+ # /// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
3339
3333
# LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
3340
- # int32_t n_vocab, // llama_n_vocab()
3341
- # llama_token special_eos_id, // llama_token_eos()
3342
- # llama_token linefeed_id, // llama_token_nl()
3343
- # int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
3344
- # float penalty_repeat, // 1.0 = disabled
3345
- # float penalty_freq, // 0.0 = disabled
3346
- # float penalty_present, // 0.0 = disabled
3347
- # bool penalize_nl, // consider newlines as a repeatable token
3348
- # bool ignore_eos); // ignore the end-of-sequence token
3334
+ # int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
3335
+ # float penalty_repeat, // 1.0 = disabled
3336
+ # float penalty_freq, // 0.0 = disabled
3337
+ # float penalty_present); // 0.0 = disabled
3349
3338
@ctypes_function (
3350
3339
"llama_sampler_init_penalties" ,
3351
- [
3352
- ctypes .c_int32 ,
3353
- llama_token ,
3354
- llama_token ,
3355
- ctypes .c_int32 ,
3356
- ctypes .c_float ,
3357
- ctypes .c_float ,
3358
- ctypes .c_float ,
3359
- ctypes .c_bool ,
3360
- ctypes .c_bool ,
3361
- ],
3340
+ [ctypes .c_int32 , ctypes .c_float , ctypes .c_float , ctypes .c_float ],
3362
3341
llama_sampler_p_ctypes ,
3363
3342
)
3364
3343
def llama_sampler_init_penalties (
3365
- n_vocab : int ,
3366
- special_eos_id : int ,
3367
- linefeed_id : int ,
3368
3344
penalty_last_n : int ,
3369
3345
penalty_repeat : float ,
3370
3346
penalty_freq : float ,
3371
3347
penalty_present : float ,
3372
- penalize_nl : bool ,
3373
- ignore_eos : bool ,
3374
3348
/ ,
3375
3349
) -> llama_sampler_p :
3376
3350
...
0 commit comments