Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit c67f786

Browse filesBrowse files
committed
Update llama.cpp
1 parent e34f441 commit c67f786
Copy full SHA for c67f786

File tree

Expand file treeCollapse file tree

2 files changed

+28
-6
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+28
-6
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+27-5Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -290,13 +290,14 @@ def llama_mlock_supported() -> bool:
290290

291291
# // TODO: not great API - very likely to change
292292
# // Initialize the llama + ggml backend
293+
# // If numa is true, use NUMA optimizations
293294
# // Call once at the start of the program
294-
# LLAMA_API void llama_init_backend();
295-
def llama_init_backend():
296-
return _lib.llama_init_backend()
295+
# LLAMA_API void llama_init_backend(bool numa);
296+
def llama_init_backend(numa: c_bool):
297+
return _lib.llama_init_backend(numa)
297298

298299

299-
_lib.llama_init_backend.argtypes = []
300+
_lib.llama_init_backend.argtypes = [c_bool]
300301
_lib.llama_init_backend.restype = None
301302

302303

@@ -565,6 +566,27 @@ def llama_eval(
565566
_lib.llama_eval.restype = c_int
566567

567568

569+
# // Same as llama_eval, but use float matrix input directly.
570+
# LLAMA_API int llama_eval_embd(
571+
# struct llama_context * ctx,
572+
# const float * embd,
573+
# int n_tokens,
574+
# int n_past,
575+
# int n_threads);
576+
def llama_eval_embd(
577+
ctx: llama_context_p,
578+
embd, # type: Array[c_float]
579+
n_tokens: c_int,
580+
n_past: c_int,
581+
n_threads: c_int,
582+
) -> int:
583+
return _lib.llama_eval_embd(ctx, embd, n_tokens, n_past, n_threads)
584+
585+
586+
_lib.llama_eval_embd.argtypes = [llama_context_p, c_float_p, c_int, c_int, c_int]
587+
_lib.llama_eval_embd.restype = c_int
588+
589+
568590
# Convert the provided text into tokens.
569591
# The tokens pointer must be large enough to hold the resulting tokens.
570592
# Returns the number of tokens on success, no more than n_max_tokens
@@ -998,5 +1020,5 @@ def llama_print_system_info() -> bytes:
9981020
_llama_initialized = False
9991021

10001022
if not _llama_initialized:
1001-
llama_init_backend()
1023+
llama_init_backend(c_bool(False))
10021024
_llama_initialized = True

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.