Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit dccb148

Browse filesBrowse files
committed
feat: Update llama.cpp
1 parent a14b49d commit dccb148
Copy full SHA for dccb148

File tree

Expand file treeCollapse file tree

2 files changed

+66
-27
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+66
-27
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+65-26Lines changed: 65 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,6 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
233233
# define LLAMA_DEFAULT_SEED 0xFFFFFFFF
234234
LLAMA_DEFAULT_SEED = 0xFFFFFFFF
235235

236-
# define LLAMA_MAX_RNG_STATE (64*1024)
237-
LLAMA_MAX_RNG_STATE = 64 * 1024
238-
239236
# define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
240237
LLAMA_FILE_MAGIC_GGLA = 0x67676C61
241238

@@ -247,13 +244,13 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
247244

248245
# define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
249246
LLAMA_SESSION_MAGIC = LLAMA_FILE_MAGIC_GGSN
250-
# define LLAMA_SESSION_VERSION 7
251-
LLAMA_SESSION_VERSION = 7
247+
# define LLAMA_SESSION_VERSION 8
248+
LLAMA_SESSION_VERSION = 8
252249

253250
# define LLAMA_STATE_SEQ_MAGIC LLAMA_FILE_MAGIC_GGSQ
254251
LLAMA_STATE_SEQ_MAGIC = LLAMA_FILE_MAGIC_GGSQ
255-
# define LLAMA_STATE_SEQ_VERSION 1
256-
LLAMA_STATE_SEQ_VERSION = 1
252+
# define LLAMA_STATE_SEQ_VERSION 2
253+
LLAMA_STATE_SEQ_VERSION = 2
257254

258255
# struct llama_model;
259256
llama_model_p = NewType("llama_model_p", int)
@@ -1583,7 +1580,7 @@ def llama_lora_adapter_set(
15831580
...
15841581

15851582

1586-
# // Remove a LoRA adapter from given context
1583+
# // Remove a specific LoRA adapter from given context
15871584
# // Return -1 if the adapter is not present in the context
15881585
# LLAMA_API int32_t llama_lora_adapter_remove(
15891586
# struct llama_context * ctx,
@@ -1601,6 +1598,19 @@ def llama_lora_adapter_remove(
16011598
...
16021599

16031600

1601+
# // Remove all LoRA adapters from given context
1602+
# LLAMA_API void llama_lora_adapter_clear(
1603+
# struct llama_context * ctx);
1604+
@ctypes_function(
1605+
"llama_lora_adapter_clear",
1606+
[llama_context_p_ctypes],
1607+
None,
1608+
)
1609+
def llama_lora_adapter_clear(ctx: llama_context_p, /):
1610+
"""Remove all LoRA adapters from given context"""
1611+
...
1612+
1613+
16041614
# // Manually free a LoRA adapter
16051615
# // Note: loaded adapters will be free when the associated model is deleted
16061616
# LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
@@ -1992,17 +2002,17 @@ def llama_kv_cache_update(ctx: llama_context_p, /):
19922002
# //
19932003

19942004

1995-
# Returns the maximum size in bytes of the state (rng, logits, embedding
1996-
# and kv_cache) - will often be smaller after compacting tokens
1997-
# LLAMA_API size_t llama_state_get_size(const struct llama_context * ctx);
2005+
# // Returns the *actual* size in bytes of the state
2006+
# // (rng, logits, embedding and kv_cache)
2007+
# // Only use when saving the state, not when restoring it, otherwise the size may be too small.
2008+
# LLAMA_API size_t llama_state_get_size(struct llama_context * ctx);
19982009
@ctypes_function("llama_state_get_size", [llama_context_p_ctypes], ctypes.c_size_t)
19992010
def llama_state_get_size(ctx: llama_context_p, /) -> int:
2000-
"""Returns the maximum size in bytes of the state (rng, logits, embedding
2001-
and kv_cache) - will often be smaller after compacting tokens"""
2011+
"""Returns the *actual* size in bytes of the state (rng, logits, embedding and kv_cache) - will often be smaller after compacting tokens"""
20022012
...
20032013

20042014

2005-
# LLAMA_API DEPRECATED(size_t llama_get_state_size(const struct llama_context * ctx),
2015+
# LLAMA_API DEPRECATED(size_t llama_get_state_size(struct llama_context * ctx),
20062016
# "use llama_state_get_size instead");
20072017
@ctypes_function("llama_get_state_size", [llama_context_p_ctypes], ctypes.c_size_t)
20082018
def llama_get_state_size(ctx: llama_context_p, /) -> int:
@@ -2011,22 +2021,27 @@ def llama_get_state_size(ctx: llama_context_p, /) -> int:
20112021
...
20122022

20132023

2014-
# Copies the state to the specified destination address.
2015-
# Destination needs to have allocated enough memory.
2016-
# Returns the number of bytes copied
2024+
# // Copies the state to the specified destination address.
2025+
# // Destination needs to have allocated enough memory.
2026+
# // Returns the number of bytes copied
20172027
# LLAMA_API size_t llama_state_get_data(
20182028
# struct llama_context * ctx,
2019-
# uint8_t * dst);
2029+
# uint8_t * dst,
2030+
# size_t size);
20202031
@ctypes_function(
20212032
"llama_state_get_data",
20222033
[
20232034
llama_context_p_ctypes,
20242035
ctypes.POINTER(ctypes.c_uint8),
2036+
ctypes.c_size_t,
20252037
],
20262038
ctypes.c_size_t,
20272039
)
20282040
def llama_state_get_data(
2029-
ctx: llama_context_p, dst: CtypesArray[ctypes.c_uint8], /
2041+
ctx: llama_context_p,
2042+
dst: CtypesArray[ctypes.c_uint8],
2043+
size: Union[ctypes.c_size_t, int],
2044+
/,
20302045
) -> int:
20312046
"""Copies the state to the specified destination address.
20322047
Destination needs to have allocated enough memory.
@@ -2059,14 +2074,18 @@ def llama_copy_state_data(
20592074
# // Returns the number of bytes read
20602075
# LLAMA_API size_t llama_state_set_data(
20612076
# struct llama_context * ctx,
2062-
# const uint8_t * src);
2077+
# const uint8_t * src,
2078+
# size_t size);
20632079
@ctypes_function(
20642080
"llama_state_set_data",
2065-
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8)],
2081+
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8), ctypes.c_size_t],
20662082
ctypes.c_size_t,
20672083
)
20682084
def llama_state_set_data(
2069-
ctx: llama_context_p, src: CtypesArray[ctypes.c_uint8], /
2085+
ctx: llama_context_p,
2086+
src: CtypesArray[ctypes.c_uint8],
2087+
size: Union[ctypes.c_size_t, int],
2088+
/,
20702089
) -> int:
20712090
"""Set the state reading from the specified address
20722091
Returns the number of bytes read"""
@@ -2216,14 +2235,24 @@ def llama_state_seq_get_size(ctx: llama_context_p, seq_id: llama_seq_id, /) -> i
22162235
# LLAMA_API size_t llama_state_seq_get_data(
22172236
# struct llama_context * ctx,
22182237
# uint8_t * dst,
2238+
# size_t size,
22192239
# llama_seq_id seq_id);
22202240
@ctypes_function(
22212241
"llama_state_seq_get_data",
2222-
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8), llama_seq_id],
2242+
[
2243+
llama_context_p_ctypes,
2244+
ctypes.POINTER(ctypes.c_uint8),
2245+
ctypes.c_size_t,
2246+
llama_seq_id,
2247+
],
22232248
ctypes.c_size_t,
22242249
)
22252250
def llama_state_seq_get_data(
2226-
ctx: llama_context_p, dst: CtypesArray[ctypes.c_uint8], seq_id: llama_seq_id, /
2251+
ctx: llama_context_p,
2252+
dst: CtypesArray[ctypes.c_uint8],
2253+
size: Union[ctypes.c_size_t, int],
2254+
seq_id: llama_seq_id,
2255+
/,
22272256
) -> int:
22282257
"""Copy the KV cache of a single sequence into the specified buffer"""
22292258
...
@@ -2236,14 +2265,24 @@ def llama_state_seq_get_data(
22362265
# LLAMA_API size_t llama_state_seq_set_data(
22372266
# struct llama_context * ctx,
22382267
# const uint8_t * src,
2268+
# size_t size,
22392269
# llama_seq_id dest_seq_id);
22402270
@ctypes_function(
22412271
"llama_state_seq_set_data",
2242-
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8), llama_seq_id],
2272+
[
2273+
llama_context_p_ctypes,
2274+
ctypes.POINTER(ctypes.c_uint8),
2275+
ctypes.c_size_t,
2276+
llama_seq_id,
2277+
],
22432278
ctypes.c_size_t,
22442279
)
22452280
def llama_state_seq_set_data(
2246-
ctx: llama_context_p, src: CtypesArray[ctypes.c_uint8], dest_seq_id: llama_seq_id, /
2281+
ctx: llama_context_p,
2282+
src: CtypesArray[ctypes.c_uint8],
2283+
size: Union[ctypes.c_size_t, int],
2284+
dest_seq_id: llama_seq_id,
2285+
/,
22472286
) -> int:
22482287
"""Copy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequence"""
22492288
...

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.