Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit df9362e

Browse filesBrowse files
committed
Update llama.cpp
1 parent 3af7b21 commit df9362e
Copy full SHA for df9362e

File tree

Expand file treeCollapse file tree

4 files changed

+8
-8
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+8
-8
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def __init__(
233233
rope_scaling_type: Optional[int] = llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED,
234234
rope_freq_base: float = 0.0,
235235
rope_freq_scale: float = 0.0,
236-
yarn_ext_factor: float = float("nan"),
236+
yarn_ext_factor: float = -1.0,
237237
yarn_attn_factor: float = 1.0,
238238
yarn_beta_fast: float = 32.0,
239239
yarn_beta_slow: float = 1.0,

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+5-5Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -315,11 +315,11 @@ class llama_model_params(Structure):
315315

316316

317317
# struct llama_context_params {
318-
# uint32_t seed; // RNG seed, -1 for random
319-
# uint32_t n_ctx; // text context, 0 = from model
320-
# uint32_t n_batch; // prompt processing maximum batch size
321-
# uint32_t n_threads; // number of threads to use for generation
322-
# uint32_t n_threads_batch; // number of threads to use for batch processing
318+
# uint32_t seed; // RNG seed, -1 for random
319+
# uint32_t n_ctx; // text context, 0 = from model
320+
# uint32_t n_batch; // prompt processing maximum batch size
321+
# uint32_t n_threads; // number of threads to use for generation
322+
# uint32_t n_threads_batch; // number of threads to use for batch processing
323323
# int8_t rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
324324

325325
# // ref: https://github.com/ggerganov/llama.cpp/pull/2054

‎llama_cpp/server/app.py

Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ class Settings(BaseSettings):
9393
default=0.0, description="RoPE frequency scaling factor"
9494
)
9595
yarn_ext_factor: float = Field(
96-
default=float("nan")
96+
default=-1.0
9797
)
9898
yarn_attn_factor: float = Field(
9999
default=1.0

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.