Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit fea33c9

Browse filesBrowse files
committed
feat: Update llama.cpp
1 parent 4d574bd commit fea33c9
Copy full SHA for fea33c9

File tree

Expand file treeCollapse file tree

2 files changed

+6
-1
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+6
-1
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
264264
# LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
265265
# LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
266266
# LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
267+
# LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
267268

268269
# LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
269270
# };
@@ -295,6 +296,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
295296
LLAMA_FTYPE_MOSTLY_IQ3_M = 27
296297
LLAMA_FTYPE_MOSTLY_IQ2_S = 28
297298
LLAMA_FTYPE_MOSTLY_IQ2_M = 29
299+
LLAMA_FTYPE_MOSTLY_IQ4_XS = 30
298300
LLAMA_FTYPE_GUESSED = 1024
299301

300302
# enum llama_rope_scaling_type {
@@ -548,6 +550,7 @@ class llama_model_params(ctypes.Structure):
548550
# float yarn_beta_fast; // YaRN low correction dim
549551
# float yarn_beta_slow; // YaRN high correction dim
550552
# uint32_t yarn_orig_ctx; // YaRN original context size
553+
# float defrag_thold; // defragment the KV cache if holes/size > thold, < 0 disabled (default)
551554

552555
# ggml_backend_sched_eval_callback cb_eval;
553556
# void * cb_eval_user_data;
@@ -580,6 +583,7 @@ class llama_context_params(ctypes.Structure):
580583
yarn_beta_fast (float): YaRN low correction dim
581584
yarn_beta_slow (float): YaRN high correction dim
582585
yarn_orig_ctx (int): YaRN original context size
586+
defrag_thold (float): defragment the KV cache if holes/size > thold, < 0 disabled (default)
583587
cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
584588
cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
585589
type_k (int): data type for K cache
@@ -605,6 +609,7 @@ class llama_context_params(ctypes.Structure):
605609
("yarn_beta_fast", ctypes.c_float),
606610
("yarn_beta_slow", ctypes.c_float),
607611
("yarn_orig_ctx", ctypes.c_uint32),
612+
("defrag_thold", ctypes.c_float),
608613
("cb_eval", ggml_backend_sched_eval_callback),
609614
("cb_eval_user_data", ctypes.c_void_p),
610615
("type_k", ctypes.c_int),

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.