Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 57e70bb

Browse filesBrowse files
committed
feat: Update llama.cpp
1 parent 01c7607 commit 57e70bb
Copy full SHA for 57e70bb

File tree

Expand file treeCollapse file tree

2 files changed

+13
-5
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+13
-5
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+12-4Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@
220220
# LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
221221
# LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
222222
# LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
223+
# LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
223224
# };
224225
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
225226
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -247,6 +248,7 @@
247248
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23
248249
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24
249250
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25
251+
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26
250252

251253

252254
# // note: these values should be synchronized with ggml_rope
@@ -404,12 +406,14 @@
404406
# LLAMA_POOLING_TYPE_MEAN = 1,
405407
# LLAMA_POOLING_TYPE_CLS = 2,
406408
# LLAMA_POOLING_TYPE_LAST = 3,
409+
# LLAMA_POOLING_TYPE_RANK = 4, // used by reranking models to attach the classification head to the graph
407410
# };
408411
LLAMA_POOLING_TYPE_UNSPECIFIED = -1
409412
LLAMA_POOLING_TYPE_NONE = 0
410413
LLAMA_POOLING_TYPE_MEAN = 1
411414
LLAMA_POOLING_TYPE_CLS = 2
412415
LLAMA_POOLING_TYPE_LAST = 3
416+
LLAMA_POOLING_TYPE_RANK = 4
413417

414418
# enum llama_attention_type {
415419
# LLAMA_ATTENTION_TYPE_UNSPECIFIED = -1,
@@ -420,10 +424,11 @@
420424
LLAMA_ATTENTION_TYPE_CAUSAL = 0
421425
LLAMA_ATTENTION_TYPE_NON_CAUSAL = 1
422426

427+
423428
# enum llama_split_mode {
424-
# LLAMA_SPLIT_MODE_NONE = 0, // single GPU
425-
# LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
426-
# LLAMA_SPLIT_MODE_ROW = 2, // split rows across GPUs
429+
# LLAMA_SPLIT_MODE_NONE = 0, // single GPU
430+
# LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs
431+
# LLAMA_SPLIT_MODE_ROW = 2, // split rows across GPUs
427432
# };
428433
LLAMA_SPLIT_MODE_NONE = 0
429434
LLAMA_SPLIT_MODE_LAYER = 1
@@ -2520,7 +2525,8 @@ def llama_get_embeddings_ith(
25202525

25212526
# // Get the embeddings for a sequence id
25222527
# // Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
2523-
# // shape: [n_embd] (1-dimensional)
2528+
# // when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[1] with the rank of the sequence
2529+
# // otherwise: float[n_embd] (1-dimensional)
25242530
# LLAMA_API float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id);
25252531
@ctypes_function(
25262532
"llama_get_embeddings_seq",
@@ -2672,6 +2678,8 @@ def llama_token_eot(model: llama_model_p, /) -> int:
26722678
# //
26732679
# // Tokenization
26742680
# //
2681+
# // The API is thread-safe.
2682+
# //
26752683

26762684

26772685
# /// @details Convert the provided text into tokens.

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.