@@ -228,7 +228,7 @@ def __init__(
228
228
rope_freq_scale : float = 1.0 ,
229
229
n_gqa : Optional [int ] = None , # (TEMPORARY) must be 8 for llama2 70b
230
230
rms_norm_eps : Optional [float ] = None , # (TEMPORARY)
231
- mul_mat_q : Optional [bool ] = None , # (TEMPORARY)
231
+ mul_mat_q : Optional [bool ] = None ,
232
232
verbose : bool = True ,
233
233
):
234
234
"""Load a llama.cpp model from `model_path`.
@@ -290,11 +290,6 @@ def __init__(
290
290
self .params .rope_freq_base = rope_freq_base
291
291
self .params .rope_freq_scale = rope_freq_scale
292
292
293
- if n_gqa is not None :
294
- self .params .n_gqa = n_gqa
295
-
296
- if rms_norm_eps is not None :
297
- self .params .rms_norm_eps = rms_norm_eps
298
293
299
294
if mul_mat_q is not None :
300
295
self .params .mul_mat_q = mul_mat_q
@@ -453,6 +448,8 @@ def detokenize(self, tokens: List[int]) -> bytes:
453
448
buffer_size = 32
454
449
buffer = (ctypes .c_char * buffer_size )()
455
450
for token in tokens :
451
+ if token == llama_cpp .llama_token_bos (self .ctx ):
452
+ continue
456
453
n = llama_cpp .llama_token_to_str (
457
454
self .ctx , llama_cpp .llama_token (token ), buffer , buffer_size
458
455
)
@@ -1585,13 +1582,7 @@ def __getstate__(self):
1585
1582
lora_base = self .lora_base ,
1586
1583
lora_path = self .lora_path ,
1587
1584
tensor_split = self .tensor_split ,
1588
- ### TEMPORARY ###
1589
- n_gqa = self .params .n_gqa ,
1590
- rms_norm_eps = self .params .rms_norm_eps ,
1591
- ### TEMPORARY ###
1592
- ### DEPRECATED ###
1593
- n_parts = self .n_parts ,
1594
- ### DEPRECATED ###
1585
+ mul_mat_q = self .params .mul_mat_q ,
1595
1586
)
1596
1587
1597
1588
def __setstate__ (self , state ):
@@ -1613,14 +1604,8 @@ def __setstate__(self, state):
1613
1604
lora_base = state ["lora_base" ],
1614
1605
lora_path = state ["lora_path" ],
1615
1606
tensor_split = state ["tensor_split" ],
1607
+ mul_mat_q = state ["mul_mat_q" ],
1616
1608
verbose = state ["verbose" ],
1617
- ### TEMPORARY ###
1618
- n_gqa = state ["n_gqa" ],
1619
- rms_norm_eps = state ["rms_norm_eps" ],
1620
- ### TEMPORARY ###
1621
- ### DEPRECATED ###
1622
- n_parts = state ["n_parts" ],
1623
- ### DEPRECATED ###
1624
1609
)
1625
1610
1626
1611
def save_state (self ) -> LlamaState :
0 commit comments