Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 9f1e565

Browse filesBrowse files
committed
Update llama.cpp
1 parent 213cc5c commit 9f1e565
Copy full SHA for 9f1e565

File tree

Expand file treeCollapse file tree

2 files changed

+40
-9
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+40
-9
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+39-8Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,21 @@
11
import sys
22
import os
33
import ctypes
4-
from ctypes import c_int, c_float, c_char_p, c_void_p, c_bool, POINTER, Structure, Array, c_uint8, c_size_t
4+
from ctypes import (
5+
c_int,
6+
c_float,
7+
c_char_p,
8+
c_void_p,
9+
c_bool,
10+
POINTER,
11+
Structure,
12+
Array,
13+
c_uint8,
14+
c_size_t,
15+
)
516
import pathlib
617

18+
719
# Load the library
820
def _load_shared_library(lib_base_name):
921
# Determine the file extension based on the platform
@@ -22,10 +34,10 @@ def _load_shared_library(lib_base_name):
2234
# for llamacpp) and "llama" (default name for this repo)
2335
_lib_paths = [
2436
_base_path / f"lib{lib_base_name}{lib_ext}",
25-
_base_path / f"{lib_base_name}{lib_ext}"
37+
_base_path / f"{lib_base_name}{lib_ext}",
2638
]
2739

28-
if ("LLAMA_CPP_LIB" in os.environ):
40+
if "LLAMA_CPP_LIB" in os.environ:
2941
lib_base_name = os.environ["LLAMA_CPP_LIB"]
3042
_lib = pathlib.Path(lib_base_name)
3143
_base_path = _lib.parent.resolve()
@@ -43,7 +55,10 @@ def _load_shared_library(lib_base_name):
4355
except Exception as e:
4456
raise RuntimeError(f"Failed to load shared library '{_lib_path}': {e}")
4557

46-
raise FileNotFoundError(f"Shared library with base name '{lib_base_name}' not found")
58+
raise FileNotFoundError(
59+
f"Shared library with base name '{lib_base_name}' not found"
60+
)
61+
4762

4863
# Specify the base name of the shared library to load
4964
_lib_base_name = "llama"
@@ -95,6 +110,10 @@ class llama_context_params(Structure):
95110

96111
llama_context_params_p = POINTER(llama_context_params)
97112

113+
LLAMA_FTYPE_ALL_F32 = ctypes.c_int(0)
114+
LLAMA_FTYPE_MOSTLY_F16 = ctypes.c_int(1) # except 1d tensors
115+
LLAMA_FTYPE_MOSTLY_Q4_0 = ctypes.c_int(2) # except 1d tensors
116+
LLAMA_FTYPE_MOSTLY_Q4_1 = ctypes.c_int(3) # except 1d tensors
98117

99118
# Functions
100119

@@ -106,18 +125,23 @@ def llama_context_default_params() -> llama_context_params:
106125
_lib.llama_context_default_params.argtypes = []
107126
_lib.llama_context_default_params.restype = llama_context_params
108127

128+
109129
def llama_mmap_supported() -> c_bool:
110130
return _lib.llama_mmap_supported()
111131

132+
112133
_lib.llama_mmap_supported.argtypes = []
113134
_lib.llama_mmap_supported.restype = c_bool
114135

136+
115137
def llama_mlock_supported() -> c_bool:
116138
return _lib.llama_mlock_supported()
117139

140+
118141
_lib.llama_mlock_supported.argtypes = []
119142
_lib.llama_mlock_supported.restype = c_bool
120143

144+
121145
# Various functions for loading a ggml llama model.
122146
# Allocate (almost) all memory needed for the model.
123147
# Return NULL on failure
@@ -142,42 +166,49 @@ def llama_free(ctx: llama_context_p):
142166

143167
# TODO: not great API - very likely to change
144168
# Returns 0 on success
145-
def llama_model_quantize(
146-
fname_inp: bytes, fname_out: bytes, itype: c_int
147-
) -> c_int:
169+
def llama_model_quantize(fname_inp: bytes, fname_out: bytes, itype: c_int) -> c_int:
148170
return _lib.llama_model_quantize(fname_inp, fname_out, itype)
149171

150172

151173
_lib.llama_model_quantize.argtypes = [c_char_p, c_char_p, c_int]
152174
_lib.llama_model_quantize.restype = c_int
153175

176+
154177
# Returns the KV cache that will contain the context for the
155178
# ongoing prediction with the model.
156179
def llama_get_kv_cache(ctx: llama_context_p):
157180
return _lib.llama_get_kv_cache(ctx)
158181

182+
159183
_lib.llama_get_kv_cache.argtypes = [llama_context_p]
160184
_lib.llama_get_kv_cache.restype = POINTER(c_uint8)
161185

186+
162187
# Returns the size of the KV cache
163188
def llama_get_kv_cache_size(ctx: llama_context_p) -> c_size_t:
164189
return _lib.llama_get_kv_cache_size(ctx)
165190

191+
166192
_lib.llama_get_kv_cache_size.argtypes = [llama_context_p]
167193
_lib.llama_get_kv_cache_size.restype = c_size_t
168194

195+
169196
# Returns the number of tokens in the KV cache
170197
def llama_get_kv_cache_token_count(ctx: llama_context_p) -> c_int:
171198
return _lib.llama_get_kv_cache_token_count(ctx)
172199

200+
173201
_lib.llama_get_kv_cache_token_count.argtypes = [llama_context_p]
174202
_lib.llama_get_kv_cache_token_count.restype = c_int
175203

176204

177205
# Sets the KV cache containing the current context for the model
178-
def llama_set_kv_cache(ctx: llama_context_p, kv_cache, n_size: c_size_t, n_token_count: c_int):
206+
def llama_set_kv_cache(
207+
ctx: llama_context_p, kv_cache, n_size: c_size_t, n_token_count: c_int
208+
):
179209
return _lib.llama_set_kv_cache(ctx, kv_cache, n_size, n_token_count)
180210

211+
181212
_lib.llama_set_kv_cache.argtypes = [llama_context_p, POINTER(c_uint8), c_size_t, c_int]
182213
_lib.llama_set_kv_cache.restype = None
183214

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.