@@ -224,7 +224,7 @@ def __init__(
224
224
rope_freq_base : float = 10000.0 ,
225
225
rope_freq_scale : float = 1.0 ,
226
226
n_gqa : Optional [int ] = None , # (TEMPORARY) must be 8 for llama2 70b
227
- rms_norm_eps : Optional [float ] = None , # (TEMPORARY)
227
+ rms_norm_eps : Optional [float ] = None , # (TEMPORARY)
228
228
verbose : bool = True ,
229
229
):
230
230
"""Load a llama.cpp model from `model_path`.
@@ -277,7 +277,9 @@ def __init__(
277
277
278
278
if self .tensor_split is not None :
279
279
FloatArray = (ctypes .c_float * len (self .tensor_split ))(* self .tensor_split )
280
- self ._p_tensor_split = ctypes .POINTER (ctypes .c_float )(FloatArray ) # keep a reference to the array so it is not gc'd
280
+ self ._p_tensor_split = ctypes .POINTER (ctypes .c_float )(
281
+ FloatArray
282
+ ) # keep a reference to the array so it is not gc'd
281
283
self .params .tensor_split = self ._p_tensor_split
282
284
283
285
self .params .rope_freq_base = rope_freq_base
@@ -959,9 +961,7 @@ def _create_completion(
959
961
for token in remaining_tokens :
960
962
token_end_position += len (self .detokenize ([token ]))
961
963
# Check if stop sequence is in the token
962
- if token_end_position >= (
963
- remaining_length - first_stop_position
964
- ):
964
+ if token_end_position >= (remaining_length - first_stop_position ):
965
965
break
966
966
logprobs_or_none : Optional [CompletionLogprobs ] = None
967
967
if logprobs is not None :
0 commit comments