Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit f4090a0

Browse filesBrowse files
committed
Add numa support, low level api users must now explicitly call llama_backend_init at the start of their programs.
1 parent c999325 commit f4090a0
Copy full SHA for f4090a0

File tree

Expand file treeCollapse file tree

5 files changed

+20
-9
lines changed
Filter options
Expand file treeCollapse file tree

5 files changed

+20
-9
lines changed

‎README.md

Copy file name to clipboardExpand all lines: README.md
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ Below is a short example demonstrating how to use the low-level API to tokenize
180180
```python
181181
>>> import llama_cpp
182182
>>> import ctypes
183+
>>> llama_cpp.llama_backend_init(numa=False) # Must be called once at the start of each program
183184
>>> params = llama_cpp.llama_context_default_params()
184185
# use bytes for char * params
185186
>>> model = llama_cpp.llama_load_model_from_file(b"./models/7b/ggml-model.bin", params)

‎examples/low_level_api/low_level_api_llama_cpp.py

Copy file name to clipboardExpand all lines: examples/low_level_api/low_level_api_llama_cpp.py
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import llama_cpp
66

7+
llama_cpp.llama_backend_init(numa=False)
8+
79
N_THREADS = multiprocessing.cpu_count()
810
MODEL_PATH = os.environ.get('MODEL', "../models/7B/ggml-model.bin")
911

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+13Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,8 @@ def __call__(
209209
class Llama:
210210
"""High-level Python wrapper for a llama.cpp model."""
211211

212+
__backend_initialized = False
213+
212214
def __init__(
213215
self,
214216
model_path: str,
@@ -234,6 +236,7 @@ def __init__(
234236
last_n_tokens_size: int = 64,
235237
lora_base: Optional[str] = None,
236238
lora_path: Optional[str] = None,
239+
numa: bool = False,
237240
verbose: bool = True,
238241
**kwargs # type: ignore
239242
):
@@ -261,6 +264,7 @@ def __init__(
261264
last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque.
262265
lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.
263266
lora_path: Path to a LoRA file to apply to the model.
267+
numa: Enable NUMA support. (NOTE: The initial value of this parameter is used for the remainder of the program as this value is set in llama_backend_init)
264268
verbose: Print verbose output to stderr.
265269
kwargs: Unused keyword arguments (for additional backwards compatibility).
266270
@@ -272,6 +276,15 @@ def __init__(
272276
"""
273277

274278
self.verbose = verbose
279+
280+
if not Llama.__backend_initialized:
281+
if self.verbose:
282+
llama_cpp.llama_backend_init(numa)
283+
else:
284+
with suppress_stdout_stderr():
285+
llama_cpp.llama_backend_init(numa)
286+
Llama.__backend_initialized = True
287+
275288
self.model_path = model_path
276289

277290
self.params = llama_cpp.llama_context_default_params()

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
-9Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,12 +1524,3 @@ def llama_dump_timing_info_yaml(stream: ctypes.c_void_p, ctx: llama_context_p):
15241524
_lib.llama_dump_timing_info_yaml.argtypes = [ctypes.c_void_p, llama_context_p]
15251525
_lib.llama_dump_timing_info_yaml.restype = None
15261526

1527-
1528-
###################################################################################################
1529-
1530-
1531-
_llama_initialized = False
1532-
1533-
if not _llama_initialized:
1534-
llama_backend_init(False)
1535-
_llama_initialized = True

‎llama_cpp/server/app.py

Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+4Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ class Settings(BaseSettings):
9898
default=None,
9999
description="Path to a LoRA file to apply to the model.",
100100
)
101+
numa: bool = Field(
102+
default=False,
103+
description="Enable NUMA support.",
104+
)
101105
cache: bool = Field(
102106
default=False,
103107
description="Use a cache to reduce processing times for evaluated prompts.",

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.