Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 8ab098e

Browse filesBrowse files
committed
Re-order Llama class params
1 parent e4f9db3 commit 8ab098e
Copy full SHA for 8ab098e

File tree

Expand file treeCollapse file tree

1 file changed

+7
-6
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+7
-6
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+7-6Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,6 @@ def __init__(
205205
model_path: str,
206206
# NOTE: These parameters are likely to change in the future.
207207
n_ctx: int = 512,
208-
rope_freq_base: float = 10000.0,
209-
rope_freq_scale: float = 1.0,
210208
n_parts: int = -1,
211209
n_gpu_layers: int = 0,
212210
seed: int = 1337,
@@ -223,15 +221,15 @@ def __init__(
223221
lora_path: Optional[str] = None,
224222
low_vram: bool = False,
225223
tensor_split: Optional[List[float]] = None,
224+
rope_freq_base: float = 10000.0,
225+
rope_freq_scale: float = 1.0,
226226
verbose: bool = True,
227227
):
228228
"""Load a llama.cpp model from `model_path`.
229229
230230
Args:
231231
model_path: Path to the model.
232232
n_ctx: Maximum context size.
233-
rope_freq_base: RoPE base frequency.
234-
rope_freq_scale: RoPE frequency scale.
235233
n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
236234
seed: Random seed. -1 for random.
237235
f16_kv: Use half-precision for key/value cache.
@@ -246,6 +244,8 @@ def __init__(
246244
lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.
247245
lora_path: Path to a LoRA file to apply to the model.
248246
tensor_split: List of floats to split the model across multiple GPUs. If None, the model is not split.
247+
rope_freq_base: Base frequency for rope sampling.
248+
rope_freq_scale: Scale factor for rope sampling.
249249
verbose: Print verbose output to stderr.
250250
251251
Raises:
@@ -260,8 +260,6 @@ def __init__(
260260

261261
self.params = llama_cpp.llama_context_default_params()
262262
self.params.n_ctx = n_ctx
263-
self.params.rope_freq_base = rope_freq_base
264-
self.params.rope_freq_scale = rope_freq_scale
265263
self.params.n_gpu_layers = n_gpu_layers
266264
self.params.seed = seed
267265
self.params.f16_kv = f16_kv
@@ -281,6 +279,9 @@ def __init__(
281279
self._c_tensor_split = FloatArray(*tensor_split) # keep a reference to the array so it is not gc'd
282280
self.params.tensor_split = self._c_tensor_split
283281

282+
self.params.rope_freq_base = rope_freq_base
283+
self.params.rope_freq_scale = rope_freq_scale
284+
284285
self.last_n_tokens_size = last_n_tokens_size
285286
self.n_batch = min(n_ctx, n_batch)
286287

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.