Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e99caed

Browse filesBrowse files
committed
Update llama.cpp
1 parent 643b73e commit e99caed
Copy full SHA for e99caed

File tree

Expand file treeCollapse file tree

2 files changed

+38
-3
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+38
-3
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+37-2Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,9 @@ def llama_free(ctx: llama_context_p):
172172
# TODO: not great API - very likely to change
173173
# Returns 0 on success
174174
# nthread - how many threads to use. If <=0, will use std::thread::hardware_concurrency(), else the number given
175-
def llama_model_quantize(fname_inp: bytes, fname_out: bytes, ftype: c_int, nthread: c_int) -> c_int:
175+
def llama_model_quantize(
176+
fname_inp: bytes, fname_out: bytes, ftype: c_int, nthread: c_int
177+
) -> c_int:
176178
return _lib.llama_model_quantize(fname_inp, fname_out, ftype, nthread)
177179

178180

@@ -187,7 +189,10 @@ def llama_model_quantize(fname_inp: bytes, fname_out: bytes, ftype: c_int, nthre
187189
# will be applied on top of the previous one
188190
# Returns 0 on success
189191
def llama_apply_lora_from_file(
190-
ctx: llama_context_p, path_lora: ctypes.c_char_p, path_base_model: ctypes.c_char_p, n_threads: c_int
192+
ctx: llama_context_p,
193+
path_lora: ctypes.c_char_p,
194+
path_base_model: ctypes.c_char_p,
195+
n_threads: c_int,
191196
) -> c_int:
192197
return _lib.llama_apply_lora_from_file(ctx, path_lora, path_base_model, n_threads)
193198

@@ -235,6 +240,36 @@ def llama_set_kv_cache(
235240
_lib.llama_set_kv_cache.restype = None
236241

237242

243+
# Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
244+
def llama_get_state_size(ctx: llama_context_p) -> c_size_t:
245+
return _lib.llama_get_state_size(ctx)
246+
247+
248+
_lib.llama_get_state_size.argtypes = [llama_context_p]
249+
_lib.llama_get_state_size.restype = c_size_t
250+
251+
252+
# Copies the state to the specified destination address.
253+
# Destination needs to have allocated enough memory.
254+
# Returns the number of bytes copied
255+
def llama_copy_state_data(ctx: llama_context_p, dest) -> c_size_t:
256+
return _lib.llama_copy_state_data(ctx, dest)
257+
258+
259+
_lib.llama_copy_state_data.argtypes = [llama_context_p, POINTER(c_uint8)]
260+
_lib.llama_copy_state_data.restype = c_size_t
261+
262+
263+
# Set the state reading from the specified address
264+
# Returns the number of bytes read
265+
def llama_set_state_data(ctx: llama_context_p, src) -> c_size_t:
266+
return _lib.llama_set_state_data(ctx, src)
267+
268+
269+
_lib.llama_set_state_data.argtypes = [llama_context_p, POINTER(c_uint8)]
270+
_lib.llama_set_state_data.restype = c_size_t
271+
272+
238273
# Run the llama inference to obtain the logits and probabilities for the next token.
239274
# tokens + n_tokens is the provided batch of new tokens to process
240275
# n_past is the number of tokens to use from previous eval calls

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.