Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 7e20e34

Browse filesBrowse files
committed
feat: Update llama.cpp
1 parent 01bddd6 commit 7e20e34
Copy full SHA for 7e20e34

File tree

Expand file treeCollapse file tree

2 files changed

+35
-1
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+35
-1
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+34Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,6 +1444,24 @@ def llama_get_model_tensor(
14441444
...
14451445

14461446

1447+
# // Returns true if the model contains an encoder that requires llama_encode() call
1448+
# LLAMA_API bool llama_model_has_encoder(const struct llama_model * model);
1449+
@ctypes_function("llama_model_has_encoder", [llama_model_p_ctypes], ctypes.c_bool)
1450+
def llama_model_has_encoder(model: llama_model_p, /) -> bool:
1451+
"""Returns true if the model contains an encoder that requires llama_encode() call"""
1452+
...
1453+
1454+
1455+
# // For encoder-decoder models, this function returns id of the token that must be provided
1456+
# // to the decoder to start generating output sequence. For other models, it returns -1.
1457+
# LLAMA_API llama_token llama_model_decoder_start_token(const struct llama_model * model);
1458+
@ctypes_function("llama_model_decoder_start_token", [llama_model_p_ctypes], ctypes.c_int32)
1459+
def llama_model_decoder_start_token(model: llama_model_p, /) -> int:
1460+
"""For encoder-decoder models, this function returns id of the token that must be provided
1461+
to the decoder to start generating output sequence. For other models, it returns -1."""
1462+
...
1463+
1464+
14471465
# // Returns 0 on success
14481466
# LLAMA_API uint32_t llama_model_quantize(
14491467
# const char * fname_inp,
@@ -2271,6 +2289,22 @@ def llama_batch_free(batch: llama_batch, /):
22712289
...
22722290

22732291

2292+
# // Processes a batch of tokens with the ecoder part of the encoder-decoder model.
2293+
# // Stores the encoder output internally for later use by the decoder cross-attention layers.
2294+
# // 0 - success
2295+
# // < 0 - error
2296+
# LLAMA_API int32_t llama_encode(
2297+
# struct llama_context * ctx,
2298+
# struct llama_batch batch);
2299+
@ctypes_function("llama_encode", [llama_context_p_ctypes, llama_batch], ctypes.c_int32)
2300+
def llama_encode(ctx: llama_context_p, batch: llama_batch, /) -> int:
2301+
"""Processes a batch of tokens with the ecoder part of the encoder-decoder model.
2302+
Stores the encoder output internally for later use by the decoder cross-attention layers.
2303+
0 - success
2304+
< 0 - error"""
2305+
...
2306+
2307+
22742308
# // Positive return values does not mean a fatal error, but rather a warning.
22752309
# // 0 - success
22762310
# // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.