Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 8d298b4

Browse filesBrowse files
committed
feat: Update llama.cpp
1 parent 6eb2523 commit 8d298b4
Copy full SHA for 8d298b4

File tree

Expand file treeCollapse file tree

2 files changed

+78
-9
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+78
-9
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+77-8Lines changed: 77 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,7 @@ class llama_model_params(ctypes.Structure):
581581
# bool embeddings; // if true, extract embeddings (together with logits)
582582
# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
583583

584+
584585
# // Abort callback
585586
# // if it returns true, execution of llama_decode() will be aborted
586587
# // currently works only with CPU execution
@@ -1006,6 +1007,11 @@ def llama_n_ctx_train(model: llama_model_p, /) -> int: ...
10061007
def llama_n_embd(model: llama_model_p, /) -> int: ...
10071008

10081009

1010+
# LLAMA_API int32_t llama_n_layer (const struct llama_model * model);
1011+
@ctypes_function("llama_n_layer", [llama_model_p_ctypes], ctypes.c_int32)
1012+
def llama_n_layer(model: llama_model_p, /) -> int: ...
1013+
1014+
10091015
# // Get the model's RoPE frequency scaling factor
10101016
# LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
10111017
@ctypes_function("llama_rope_freq_scale_train", [llama_model_p_ctypes], ctypes.c_float)
@@ -1166,12 +1172,18 @@ def llama_model_quantize(
11661172
...
11671173

11681174

1175+
# // Apply a LoRA adapter to a loaded model
1176+
# // path_base_model is the path to a higher quality model to use as a base for
1177+
# // the layers modified by the adapter. Can be NULL to use the current loaded model.
1178+
# // The model needs to be reloaded before applying a new adapter, otherwise the adapter
1179+
# // will be applied on top of the previous one
1180+
# // Returns 0 on success
11691181
# LLAMA_API int32_t llama_model_apply_lora_from_file(
11701182
# const struct llama_model * model,
1171-
# const char * path_lora,
1172-
# float scale,
1173-
# const char * path_base_model,
1174-
# int32_t n_threads);
1183+
# const char * path_lora,
1184+
# float scale,
1185+
# const char * path_base_model,
1186+
# int32_t n_threads);
11751187
@ctypes_function(
11761188
"llama_model_apply_lora_from_file",
11771189
[
@@ -1190,7 +1202,57 @@ def llama_model_apply_lora_from_file(
11901202
path_base_model: Union[ctypes.c_char_p, bytes, None],
11911203
n_threads: Union[ctypes.c_int32, int],
11921204
/,
1193-
) -> int: ...
1205+
) -> int:
1206+
"""Apply a LoRA adapter to a loaded model
1207+
path_base_model is the path to a higher quality model to use as a base for
1208+
the layers modified by the adapter. Can be NULL to use the current loaded model.
1209+
The model needs to be reloaded before applying a new adapter, otherwise the adapter
1210+
will be applied on top of the previous one
1211+
Returns 0 on success"""
1212+
...
1213+
1214+
1215+
# // Apply a loaded control vector to a llama_context, or if data is NULL, clear
1216+
# // the currently loaded vector.
1217+
# // n_embd should be the size of a single layer's control, and data should point
1218+
# // to an n_embd x n_layers buffer starting from layer 1.
1219+
# // il_start and il_end are the layer range the vector should apply to (both inclusive)
1220+
# // See llama_control_vector_load in common to load a control vector.
1221+
# LLAMA_API int32_t llama_control_vector_apply(
1222+
# struct llama_context * lctx,
1223+
# const float * data,
1224+
# size_t len,
1225+
# int32_t n_embd,
1226+
# int32_t il_start,
1227+
# int32_t il_end);
1228+
@ctypes_function(
1229+
"llama_control_vector_apply",
1230+
[
1231+
llama_context_p_ctypes,
1232+
ctypes.POINTER(ctypes.c_float),
1233+
ctypes.c_size_t,
1234+
ctypes.c_int32,
1235+
ctypes.c_int32,
1236+
ctypes.c_int32,
1237+
],
1238+
ctypes.c_int32,
1239+
)
1240+
def llama_control_vector_apply(
1241+
lctx: llama_context_p,
1242+
data: CtypesPointerOrRef[ctypes.c_float],
1243+
len: int,
1244+
n_embd: int,
1245+
il_start: int,
1246+
il_end: int,
1247+
/,
1248+
) -> int:
1249+
"""Apply a loaded control vector to a llama_context, or if data is NULL, clear
1250+
the currently loaded vector.
1251+
n_embd should be the size of a single layer's control, and data should point
1252+
to an n_embd x n_layers buffer starting from layer 1.
1253+
il_start and il_end are the layer range the vector should apply to (both inclusive)
1254+
See llama_control_vector_load in common to load a control vector."""
1255+
...
11941256

11951257

11961258
# //
@@ -1205,6 +1267,12 @@ def llama_model_apply_lora_from_file(
12051267
# llama_pos pos;
12061268
# };
12071269
class llama_kv_cache_view_cell(ctypes.Structure):
1270+
"""Information associated with an individual cell in the KV cache view.
1271+
1272+
Attributes:
1273+
pos (llama_pos): The position for this cell. Takes KV cache shifts into account.
1274+
May be negative if the cell is not populated."""
1275+
12081276
_fields_ = [("pos", llama_pos)]
12091277

12101278

@@ -1985,7 +2053,7 @@ def llama_tokenize(
19852053
/,
19862054
) -> int:
19872055
"""Convert the provided text into tokens.
1988-
2056+
19892057
Args:
19902058
model: The model to use for tokenization.
19912059
text: The text to tokenize.
@@ -1995,10 +2063,11 @@ def llama_tokenize(
19952063
add_bos: Whether to add a beginning-of-sentence token.
19962064
special: Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
19972065
Does not insert a leading space.
1998-
2066+
19992067
Returns:
20002068
Returns the number of tokens on success, no more than n_tokens_max
2001-
Returns a negative number on failure - the number of tokens that would have been returned"""
2069+
Returns a negative number on failure - the number of tokens that would have been returned
2070+
"""
20022071
...
20032072

20042073

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.