Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 89cce50

Browse filesBrowse files
committed
Update llama.cpp
1 parent b8fc1c7 commit 89cce50
Copy full SHA for 89cce50

File tree

Expand file treeCollapse file tree

2 files changed

+14
-1
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+14
-1
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+13Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@ def _load_shared_library(lib_base_name: str):
9191
c_uint8_p = POINTER(c_uint8)
9292
c_size_t_p = POINTER(c_size_t)
9393

94+
# from ggml-backend.h
95+
# typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
96+
ggml_backend_sched_eval_callback = ctypes.CFUNCTYPE(
97+
c_bool, c_void_p, c_bool, c_void_p
98+
)
99+
94100
# llama.h bindings
95101

96102
_lib.llama_max_devices.argtypes = []
@@ -448,6 +454,9 @@ class llama_model_params(Structure):
448454
# float yarn_beta_slow; // YaRN high correction dim
449455
# uint32_t yarn_orig_ctx; // YaRN original context size
450456

457+
# ggml_backend_sched_eval_callback cb_eval;
458+
# void * cb_eval_user_data;
459+
451460
# enum ggml_type type_k; // data type for K cache
452461
# enum ggml_type type_v; // data type for V cache
453462

@@ -475,6 +484,8 @@ class llama_context_params(Structure):
475484
yarn_beta_fast (float): YaRN low correction dim
476485
yarn_beta_slow (float): YaRN high correction dim
477486
yarn_orig_ctx (int): YaRN original context size
487+
cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
488+
cb_eval_user_data (ctypes.c_void_p): user data for cb_eval
478489
type_k (int): data type for K cache
479490
type_v (int): data type for V cache
480491
mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
@@ -497,6 +508,8 @@ class llama_context_params(Structure):
497508
("yarn_beta_fast", c_float),
498509
("yarn_beta_slow", c_float),
499510
("yarn_orig_ctx", c_uint32),
511+
("cb_eval", ggml_backend_sched_eval_callback),
512+
("cb_eval_user_data", c_void_p),
500513
("type_k", c_int),
501514
("type_v", c_int),
502515
("mul_mat_q", c_bool),

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.