Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 04959f1

Browse filesBrowse files
committed
feat: Update llama_cpp.py bindings
1 parent 35c980e commit 04959f1
Copy full SHA for 04959f1

File tree

Expand file treeCollapse file tree

1 file changed

+12
-1
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+12
-1
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+12-1Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,11 +468,13 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
468468
# LLAMA_POOLING_TYPE_NONE = 0,
469469
# LLAMA_POOLING_TYPE_MEAN = 1,
470470
# LLAMA_POOLING_TYPE_CLS = 2,
471+
# LLAMA_POOLING_TYPE_LAST = 3,
471472
# };
472473
LLAMA_POOLING_TYPE_UNSPECIFIED = -1
473474
LLAMA_POOLING_TYPE_NONE = 0
474475
LLAMA_POOLING_TYPE_MEAN = 1
475476
LLAMA_POOLING_TYPE_CLS = 2
477+
LLAMA_POOLING_TYPE_LAST = 3
476478

477479
# enum llama_split_mode {
478480
# LLAMA_SPLIT_MODE_NONE = 0, // single GPU
@@ -761,7 +763,6 @@ class llama_model_params(ctypes.Structure):
761763

762764
# enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
763765
# enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
764-
# // (ignored if no pooling layer)
765766

766767
# // ref: https://github.com/ggerganov/llama.cpp/pull/2054
767768
# float rope_freq_base; // RoPE base frequency, 0 = from model
@@ -2316,6 +2317,16 @@ def llama_n_threads_batch(ctx: llama_context_p, /) -> int:
23162317
...
23172318

23182319

2320+
# // Set whether the model is in embeddings model or not
2321+
# // If true, embeddings will be returned but logits will not
2322+
# LLAMA_API void llama_set_embeddings(struct llama_context * ctx, bool embeddings);
2323+
@ctypes_function("llama_set_embeddings", [llama_context_p_ctypes, ctypes.c_bool], None)
2324+
def llama_set_embeddings(ctx: llama_context_p, embeddings: bool, /):
2325+
"""Set whether the model is in embeddings model or not
2326+
If true, embeddings will be returned but logits will not"""
2327+
...
2328+
2329+
23192330
# // Set whether to use causal attention or not
23202331
# // If set to true, the model will only attend to the past tokens
23212332
# LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.