Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit f062a7f

Browse filesBrowse files
committed
feat: Update llama.cpp
1 parent cf1fdd8 commit f062a7f
Copy full SHA for f062a7f

File tree

Expand file treeCollapse file tree

3 files changed

+4
-12
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+4
-12
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
-4Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def __init__(
8686
yarn_beta_fast: float = 32.0,
8787
yarn_beta_slow: float = 1.0,
8888
yarn_orig_ctx: int = 0,
89-
mul_mat_q: bool = True,
9089
logits_all: bool = False,
9190
embedding: bool = False,
9291
offload_kqv: bool = True,
@@ -291,7 +290,6 @@ def __init__(
291290
yarn_beta_slow if yarn_beta_slow != 0.0 else 0
292291
)
293292
self.context_params.yarn_orig_ctx = yarn_orig_ctx if yarn_orig_ctx != 0 else 0
294-
self.context_params.mul_mat_q = mul_mat_q
295293
self.context_params.logits_all = (
296294
logits_all if draft_model is None else True
297295
) # Must be set to True for speculative decoding
@@ -1724,7 +1722,6 @@ def __getstate__(self):
17241722
yarn_beta_fast=self.context_params.yarn_beta_fast,
17251723
yarn_beta_slow=self.context_params.yarn_beta_slow,
17261724
yarn_orig_ctx=self.context_params.yarn_orig_ctx,
1727-
mul_mat_q=self.context_params.mul_mat_q,
17281725
logits_all=self.context_params.logits_all,
17291726
embedding=self.context_params.embedding,
17301727
# Sampling Params
@@ -1768,7 +1765,6 @@ def __setstate__(self, state):
17681765
yarn_beta_fast=state["yarn_beta_fast"],
17691766
yarn_beta_slow=state["yarn_beta_slow"],
17701767
yarn_orig_ctx=state["yarn_orig_ctx"],
1771-
mul_mat_q=state["mul_mat_q"],
17721768
logits_all=state["logits_all"],
17731769
embedding=state["embedding"],
17741770
# Sampling Params

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+3-7Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -559,9 +559,7 @@ class llama_model_params(ctypes.Structure):
559559
# enum ggml_type type_k; // data type for K cache
560560
# enum ggml_type type_v; // data type for V cache
561561

562-
563562
# // Keep the booleans together to avoid misalignment during copy-by-value.
564-
# bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
565563
# bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
566564
# bool embedding; // embedding mode only
567565
# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
@@ -589,7 +587,6 @@ class llama_context_params(ctypes.Structure):
589587
cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
590588
type_k (int): data type for K cache
591589
type_v (int): data type for V cache
592-
mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
593590
logits_all (bool): the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
594591
embedding (bool): embedding mode only
595592
offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU
@@ -615,7 +612,6 @@ class llama_context_params(ctypes.Structure):
615612
("cb_eval_user_data", ctypes.c_void_p),
616613
("type_k", ctypes.c_int),
617614
("type_v", ctypes.c_int),
618-
("mul_mat_q", ctypes.c_bool),
619615
("logits_all", ctypes.c_bool),
620616
("embedding", ctypes.c_bool),
621617
("offload_kqv", ctypes.c_bool),
@@ -1519,11 +1515,11 @@ def llama_copy_state_data(
15191515
...
15201516

15211517

1522-
# Set the state reading from the specified address
1523-
# Returns the number of bytes read
1518+
# // Set the state reading from the specified address
1519+
# // Returns the number of bytes read
15241520
# LLAMA_API size_t llama_set_state_data(
15251521
# struct llama_context * ctx,
1526-
# uint8_t * src);
1522+
# const uint8_t * src);
15271523
@ctypes_function(
15281524
"llama_set_state_data",
15291525
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8)],

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.