Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 0e70984

Browse filesBrowse files
committed
feat: Update llama.cpp
1 parent d5df431 commit 0e70984
Copy full SHA for 0e70984

File tree

Expand file treeCollapse file tree

2 files changed

+35
-3
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+35
-3
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+34-2Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,12 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
148148
ctypes.c_bool, ctypes.c_void_p, ctypes.c_bool, ctypes.c_void_p
149149
)
150150

151+
# // Abort callback
152+
# // If not NULL, called before ggml computation
153+
# // If it returns true, the computation is aborted
154+
# typedef bool (*ggml_abort_callback)(void * data);
155+
ggml_abort_callback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_void_p)
156+
151157
# llama.h bindings
152158

153159
_lib.llama_max_devices.argtypes = []
@@ -560,10 +566,16 @@ class llama_model_params(ctypes.Structure):
560566
# enum ggml_type type_v; // data type for V cache
561567

562568
# // Keep the booleans together to avoid misalignment during copy-by-value.
563-
# bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
569+
# bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
564570
# bool embedding; // embedding mode only
565571
# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
566572
# bool do_pooling; // whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
573+
574+
# // Abort callback
575+
# // if it returns true, execution of llama_decode() will be aborted
576+
# // currently works only with CPU execution
577+
# ggml_abort_callback abort_callback;
578+
# void * abort_callback_data;
567579
# };
568580
class llama_context_params(ctypes.Structure):
569581
"""Parameters for llama_context
@@ -591,6 +603,8 @@ class llama_context_params(ctypes.Structure):
591603
embedding (bool): embedding mode only
592604
offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU
593605
do_pooling (bool): whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
606+
abort_callback (ggml_abort_callback): abort callback if it returns true, execution of llama_decode() will be aborted
607+
abort_callback_data (ctypes.ctypes.c_void_p): data for abort_callback
594608
"""
595609

596610
_fields_ = [
@@ -616,6 +630,8 @@ class llama_context_params(ctypes.Structure):
616630
("embedding", ctypes.c_bool),
617631
("offload_kqv", ctypes.c_bool),
618632
("do_pooling", ctypes.c_bool),
633+
("abort_callback", ggml_abort_callback),
634+
("abort_callback_data", ctypes.c_void_p),
619635
]
620636

621637

@@ -1703,8 +1719,24 @@ def llama_set_n_threads(
17031719
"""
17041720
...
17051721

1722+
# // Set abort callback
1723+
# LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data);
1724+
@ctypes_function(
1725+
"llama_set_abort_callback",
1726+
[llama_context_p_ctypes, ggml_abort_callback, ctypes.c_void_p],
1727+
None,
1728+
)
1729+
def llama_set_abort_callback(
1730+
ctx: llama_context_p,
1731+
abort_callback: Callable[[ctypes.c_void_p], None],
1732+
abort_callback_data: ctypes.c_void_p,
1733+
/,
1734+
):
1735+
"""Set abort callback"""
1736+
...
1737+
17061738

1707-
# // Token logits obtained from the last call to llama_eval()
1739+
# // Token logits obtained from the last call to llama_decode()
17081740
# // The logits for the last token are stored in the last row
17091741
# // Logits for which llama_batch.logits[i] == 0 are undefined
17101742
# // Rows: n_tokens provided with llama_batch

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.