bois1616
diff --git a/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+1-1Lines changed: 1 addition & 1 deletion b/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+1-1Lines changed: 1 addition & 1 deletion
diff --git a/‎llama_cpp/llama_cpp.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+5-5Lines changed: 5 additions & 5 deletions b/‎llama_cpp/llama_cpp.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+5-5Lines changed: 5 additions & 5 deletions
diff --git a/‎llama_cpp/server/app.py
Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+1-1Lines changed: 1 addition & 1 deletion b/‎llama_cpp/server/app.py
Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+1-1Lines changed: 1 addition & 1 deletion
diff --git a/‎vendor/llama.cpp
Copy file name to clipboard b/‎vendor/llama.cpp
Copy file name to clipboard
@@ -233,7 +233,7 @@ def __init__(
         rope_scaling_type: Optional[int] = llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED,
         rope_freq_base: float = 0.0,
         rope_freq_scale: float = 0.0,
-        yarn_ext_factor: float = float("nan"),
+        yarn_ext_factor: float = -1.0,
         yarn_attn_factor: float = 1.0,
         yarn_beta_fast: float = 32.0,
         yarn_beta_slow: float = 1.0,
 
@@ -315,11 +315,11 @@ class llama_model_params(Structure):
 
 
 # struct llama_context_params {
-#     uint32_t seed;            // RNG seed, -1 for random
-#     uint32_t n_ctx;           // text context, 0 = from model
-#     uint32_t n_batch;         // prompt processing maximum batch size
-#     uint32_t n_threads;       // number of threads to use for generation
-#     uint32_t n_threads_batch; // number of threads to use for batch processing
+#     uint32_t seed;              // RNG seed, -1 for random
+#     uint32_t n_ctx;             // text context, 0 = from model
+#     uint32_t n_batch;           // prompt processing maximum batch size
+#     uint32_t n_threads;         // number of threads to use for generation
+#     uint32_t n_threads_batch;   // number of threads to use for batch processing
 #     int8_t   rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
 
 #     // ref: https://github.com/ggerganov/llama.cpp/pull/2054
 
@@ -93,7 +93,7 @@ class Settings(BaseSettings):
         default=0.0, description="RoPE frequency scaling factor"
     )
     yarn_ext_factor: float = Field(
-        default=float("nan")
+        default=-1.0
     )
     yarn_attn_factor: float = Field(
         default=1.0
Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,7 @@ class Settings(BaseSettings):`
`93`	`93`	`default=0.0, description="RoPE frequency scaling factor"`
`94`	`94`	`)`
`95`	`95`	`yarn_ext_factor: float = Field(`
`96`		`- default=float("nan")`
	`96`	`+ default=-1.0`
`97`	`97`	`)`
`98`	`98`	`yarn_attn_factor: float = Field(`
`99`	`99`	`default=1.0`