Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 71e3e4c

Browse filesBrowse files
committed
Update llama.cpp
1 parent 2b37d8e commit 71e3e4c
Copy full SHA for 71e3e4c

File tree

Expand file treeCollapse file tree

2 files changed

+35
-8
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+35
-8
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+34-7Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def _load_shared_library(lib_base_name: str):
9898
# llama.h bindings
9999

100100
_lib.llama_max_devices.argtypes = []
101-
_lib.llama_max_devices.restype = ctypes.c_int32
101+
_lib.llama_max_devices.restype = ctypes.c_size_t
102102

103103
LLAMA_MAX_DEVICES = _lib.llama_max_devices()
104104

@@ -390,7 +390,7 @@ class llama_model_kv_override(Structure):
390390
# // LLAMA_SPLIT_LAYER: ignored
391391
# int32_t main_gpu;
392392

393-
# // proportion of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES
393+
# // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
394394
# const float * tensor_split;
395395

396396
# // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
@@ -417,7 +417,7 @@ class llama_model_params(Structure):
417417
n_gpu_layers (int): number of layers to store in VRAM
418418
split_mode (int): how to split the model across multiple GPUs
419419
main_gpu (int): the GPU that is used for the entire model. main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results LLAMA_SPLIT_LAYER: ignored
420-
tensor_split (ctypes.Array[ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES
420+
tensor_split (ctypes.Array[ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
421421
progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
422422
progress_callback_user_data (ctypes.c_void_p): context pointer passed to the progress callback
423423
kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
@@ -760,16 +760,43 @@ def llama_time_us() -> int:
760760
_lib.llama_time_us.restype = ctypes.c_int64
761761

762762

763-
# LLAMA_API int32_t llama_max_devices(void);
763+
# LLAMA_API size_t llama_max_devices(void);
764764
def llama_max_devices() -> int:
765765
return _lib.llama_max_devices()
766766

767767

768768
_lib.llama_max_devices.argtypes = []
769-
_lib.llama_max_devices.restype = ctypes.c_int32
769+
_lib.llama_max_devices.restype = ctypes.c_size_t
770770

771771

772-
# LLAMA_API bool llama_mmap_supported (void);
772+
# LLAMA_API bool llama_supports_mmap (void);
773+
def llama_supports_mmap() -> bool:
774+
return _lib.llama_supports_mmap()
775+
776+
777+
_lib.llama_supports_mmap.argtypes = []
778+
_lib.llama_supports_mmap.restype = c_bool
779+
780+
781+
# LLAMA_API bool llama_supports_mlock (void);
782+
def llama_supports_mlock() -> bool:
783+
return _lib.llama_supports_mlock()
784+
785+
786+
_lib.llama_supports_mlock.argtypes = []
787+
_lib.llama_supports_mlock.restype = c_bool
788+
789+
790+
# LLAMA_API bool llama_supports_gpu_offload(void);
791+
def llama_supports_gpu_offload() -> bool:
792+
return _lib.llama_supports_gpu_offload()
793+
794+
795+
_lib.llama_supports_gpu_offload.argtypes = []
796+
_lib.llama_supports_gpu_offload.restype = c_bool
797+
798+
799+
# LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead");
773800
def llama_mmap_supported() -> bool:
774801
return _lib.llama_mmap_supported()
775802

@@ -778,7 +805,7 @@ def llama_mmap_supported() -> bool:
778805
_lib.llama_mmap_supported.restype = c_bool
779806

780807

781-
# LLAMA_API bool llama_mlock_supported(void);
808+
# LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead");
782809
def llama_mlock_supported() -> bool:
783810
return _lib.llama_mlock_supported()
784811

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.