@@ -82,7 +82,7 @@ def rope_freq_scale_train(self) -> float:
82
82
def desc (self ) -> str :
83
83
assert self .model is not None
84
84
buf = ctypes .create_string_buffer (1024 )
85
- llama_cpp .llama_model_desc (self .model , buf , 1024 ) # type: ignore
85
+ llama_cpp .llama_model_desc (self .model , buf , 1024 )
86
86
return buf .value .decode ("utf-8" )
87
87
88
88
def size (self ) -> int :
@@ -184,7 +184,7 @@ def tokenize(self, text: bytes, add_bos: bool, special: bool):
184
184
def token_to_piece (self , token : int ) -> bytes :
185
185
assert self .model is not None
186
186
buf = ctypes .create_string_buffer (32 )
187
- llama_cpp .llama_token_to_piece (self .model , token , buf , 32 ) # type: ignore
187
+ llama_cpp .llama_token_to_piece (self .model , token , buf , 32 )
188
188
return bytes (buf )
189
189
190
190
def detokenize (self , tokens : List [int ]) -> bytes :
@@ -349,7 +349,7 @@ def sample_repetition_penalties(
349
349
assert self .ctx is not None
350
350
llama_cpp .llama_sample_repetition_penalties (
351
351
self .ctx ,
352
- ctypes .byref (candidates .candidates ), # type: ignore
352
+ llama_cpp .byref (candidates .candidates ),
353
353
last_tokens_data ,
354
354
penalty_last_n ,
355
355
penalty_repeat ,
@@ -367,7 +367,7 @@ def sample_classifier_free_guidance(
367
367
assert guidance_ctx .ctx is not None
368
368
llama_cpp .llama_sample_classifier_free_guidance (
369
369
self .ctx ,
370
- ctypes .byref (candidates .candidates ), # type: ignore
370
+ llama_cpp .byref (candidates .candidates ),
371
371
guidance_ctx .ctx ,
372
372
scale ,
373
373
)
@@ -376,55 +376,55 @@ def sample_softmax(self, candidates: "_LlamaTokenDataArray"):
376
376
assert self .ctx is not None
377
377
llama_cpp .llama_sample_softmax (
378
378
self .ctx ,
379
- ctypes .byref (candidates .candidates ), # type: ignore
379
+ llama_cpp .byref (candidates .candidates ),
380
380
)
381
381
382
382
def sample_top_k (self , candidates : "_LlamaTokenDataArray" , k : int , min_keep : int ):
383
383
assert self .ctx is not None
384
384
llama_cpp .llama_sample_top_k (
385
- self .ctx , ctypes .byref (candidates .candidates ), k , min_keep # type: ignore
385
+ self .ctx , llama_cpp .byref (candidates .candidates ), k , min_keep
386
386
)
387
387
388
388
def sample_top_p (self , candidates : "_LlamaTokenDataArray" , p : float , min_keep : int ):
389
389
assert self .ctx is not None
390
390
llama_cpp .llama_sample_top_p (
391
- self .ctx , ctypes .byref (candidates .candidates ), p , min_keep # type: ignore
391
+ self .ctx , llama_cpp .byref (candidates .candidates ), p , min_keep
392
392
)
393
393
394
394
def sample_min_p (self , candidates : "_LlamaTokenDataArray" , p : float , min_keep : int ):
395
395
assert self .ctx is not None
396
396
llama_cpp .llama_sample_min_p (
397
- self .ctx , ctypes .byref (candidates .candidates ), p , min_keep # type: ignore
397
+ self .ctx , llama_cpp .byref (candidates .candidates ), p , min_keep
398
398
)
399
399
400
400
def sample_tail_free (
401
401
self , candidates : "_LlamaTokenDataArray" , z : float , min_keep : int
402
402
):
403
403
assert self .ctx is not None
404
404
llama_cpp .llama_sample_tail_free (
405
- self .ctx , ctypes .byref (candidates .candidates ), z , min_keep # type: ignore
405
+ self .ctx , llama_cpp .byref (candidates .candidates ), z , min_keep
406
406
)
407
407
408
408
def sample_typical (
409
409
self , candidates : "_LlamaTokenDataArray" , p : float , min_keep : int
410
410
):
411
411
assert self .ctx is not None
412
412
llama_cpp .llama_sample_typical (
413
- self .ctx , ctypes .byref (candidates .candidates ), p , min_keep # type: ignore
413
+ self .ctx , llama_cpp .byref (candidates .candidates ), p , min_keep
414
414
)
415
415
416
416
def sample_temp (self , candidates : "_LlamaTokenDataArray" , temp : float ):
417
417
assert self .ctx is not None
418
418
llama_cpp .llama_sample_temp (
419
- self .ctx , ctypes .byref (candidates .candidates ), temp # type: ignore
419
+ self .ctx , llama_cpp .byref (candidates .candidates ), temp
420
420
)
421
421
422
422
def sample_grammar (self , candidates : "_LlamaTokenDataArray" , grammar : LlamaGrammar ):
423
423
assert self .ctx is not None
424
424
assert grammar .grammar is not None
425
425
llama_cpp .llama_sample_grammar (
426
426
self .ctx ,
427
- ctypes .byref (candidates .candidates ), # type: ignore
427
+ llama_cpp .byref (candidates .candidates ),
428
428
grammar .grammar ,
429
429
)
430
430
@@ -434,25 +434,25 @@ def sample_token_mirostat(
434
434
tau : float ,
435
435
eta : float ,
436
436
m : int ,
437
- mu : ctypes . _Pointer [ctypes .c_float ], # type: ignore
437
+ mu : llama_cpp . CtypesPointerOrRef [ctypes .c_float ],
438
438
) -> int :
439
439
assert self .ctx is not None
440
440
return llama_cpp .llama_sample_token_mirostat (
441
441
self .ctx ,
442
- ctypes .byref (candidates .candidates ), # type: ignore
442
+ llama_cpp .byref (candidates .candidates ),
443
443
tau ,
444
444
eta ,
445
445
m ,
446
446
mu ,
447
447
)
448
448
449
449
def sample_token_mirostat_v2 (
450
- self , candidates : "_LlamaTokenDataArray" , tau : float , eta : float , mu : ctypes . _Pointer [ctypes .c_float ] # type: ignore
450
+ self , candidates : "_LlamaTokenDataArray" , tau : float , eta : float , mu : llama_cpp . CtypesPointerOrRef [ctypes .c_float ]
451
451
) -> int :
452
452
assert self .ctx is not None
453
453
return llama_cpp .llama_sample_token_mirostat_v2 (
454
454
self .ctx ,
455
- ctypes .byref (candidates .candidates ), # type: ignore
455
+ llama_cpp .byref (candidates .candidates ),
456
456
tau ,
457
457
eta ,
458
458
mu ,
@@ -462,14 +462,14 @@ def sample_token_greedy(self, candidates: "_LlamaTokenDataArray") -> int:
462
462
assert self .ctx is not None
463
463
return llama_cpp .llama_sample_token_greedy (
464
464
self .ctx ,
465
- ctypes .byref (candidates .candidates ), # type: ignore
465
+ llama_cpp .byref (candidates .candidates ),
466
466
)
467
467
468
468
def sample_token (self , candidates : "_LlamaTokenDataArray" ) -> int :
469
469
assert self .ctx is not None
470
470
return llama_cpp .llama_sample_token (
471
471
self .ctx ,
472
- ctypes .byref (candidates .candidates ), # type: ignore
472
+ llama_cpp .byref (candidates .candidates ),
473
473
)
474
474
475
475
# Grammar
@@ -566,7 +566,7 @@ def __init__(self, *, n_vocab: int):
566
566
size = self .n_vocab ,
567
567
sorted = False ,
568
568
)
569
- self .default_candidates_data_id = np .arange (self .n_vocab , dtype = np .intc )
569
+ self .default_candidates_data_id = np .arange (self .n_vocab , dtype = np .intc ) # type: ignore
570
570
self .default_candidates_data_p = np .zeros (self .n_vocab , dtype = np .single )
571
571
572
572
def copy_logits (self , logits : npt .NDArray [np .single ]):
@@ -754,7 +754,7 @@ def sample(
754
754
ctx_main .sample_repetition_penalties (
755
755
token_data_array ,
756
756
# TODO: Only create this once
757
- (llama_cpp .llama_token * len (self .prev ))(* self .prev ), # type: ignore
757
+ (llama_cpp .llama_token * len (self .prev ))(* self .prev ),
758
758
self .params .penalty_last_n ,
759
759
self .params .penalty_repeat ,
760
760
self .params .penalty_freq ,
0 commit comments