Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 901fe02

Browse filesBrowse files
committed
feat: Update llama.cpp
1 parent b64fa4e commit 901fe02
Copy full SHA for 901fe02

File tree

Expand file treeCollapse file tree

2 files changed

+21
-11
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+21
-11
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+20-10Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,8 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
175175

176176
# define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
177177
LLAMA_SESSION_MAGIC = LLAMA_FILE_MAGIC_GGSN
178-
# define LLAMA_SESSION_VERSION 4
179-
LLAMA_SESSION_VERSION = 4
178+
# define LLAMA_SESSION_VERSION 5
179+
LLAMA_SESSION_VERSION = 5
180180

181181

182182
# struct llama_model;
@@ -274,6 +274,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
274274
# LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
275275
# LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
276276
# LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
277+
# LLAMA_FTYPE_MOSTLY_IQ1_M = 31, // except 1d tensors
277278

278279
# LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
279280
# };
@@ -677,6 +678,7 @@ class llama_context_params(ctypes.Structure):
677678
# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
678679
# bool pure; // quantize all tensors to the default type
679680
# void * imatrix; // pointer to importance matrix data
681+
# void * kv_overrides; // pointer to vector containing overrides
680682
# } llama_model_quantize_params;
681683
class llama_model_quantize_params(ctypes.Structure):
682684
"""Parameters for llama_model_quantize
@@ -691,6 +693,7 @@ class llama_model_quantize_params(ctypes.Structure):
691693
only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
692694
pure (bool): quantize all tensors to the default type
693695
imatrix (ctypes.c_void_p): pointer to importance matrix data
696+
kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
694697
"""
695698

696699
_fields_ = [
@@ -703,6 +706,7 @@ class llama_model_quantize_params(ctypes.Structure):
703706
("only_copy", ctypes.c_bool),
704707
("pure", ctypes.c_bool),
705708
("imatrix", ctypes.c_void_p),
709+
("kv_overrides", ctypes.c_void_p),
706710
]
707711

708712

@@ -1838,9 +1842,9 @@ def llama_synchronize(ctx: llama_context_p, /):
18381842

18391843

18401844
# // Token logits obtained from the last call to llama_decode()
1841-
# // The logits for the last token are stored in the last row
1842-
# // Logits for which llama_batch.logits[i] == 0 are undefined
1843-
# // Rows: n_tokens provided with llama_batch
1845+
# // The logits for which llama_batch.logits[i] != 0 are stored contiguously
1846+
# // in the order they have appeared in the batch.
1847+
# // Rows: number of tokens for which llama_batch.logits[i] != 0
18441848
# // Cols: n_vocab
18451849
# LLAMA_API float * llama_get_logits(struct llama_context * ctx);
18461850
@ctypes_function(
@@ -1859,7 +1863,8 @@ def llama_get_logits(ctx: llama_context_p, /) -> CtypesArray[ctypes.c_float]:
18591863

18601864

18611865
# // Logits for the ith token. Equivalent to:
1862-
# // llama_get_logits(ctx) + i*n_vocab
1866+
# // llama_get_logits(ctx) + ctx->output_ids[i]*n_vocab
1867+
# // returns NULL for invalid ids.
18631868
# LLAMA_API float * llama_get_logits_ith(struct llama_context * ctx, int32_t i);
18641869
@ctypes_function(
18651870
"llama_get_logits_ith",
@@ -1874,8 +1879,12 @@ def llama_get_logits_ith(
18741879
...
18751880

18761881

1877-
# // Get all output token embeddings
1878-
# // shape: [n_tokens*n_embd] (1-dimensional)
1882+
# // Get all output token embeddings.
1883+
# // when pooling_type == LLAMA_POOLING_TYPE_NONE or when using a generative model,
1884+
# // the embeddings for which llama_batch.logits[i] != 0 are stored contiguously
1885+
# // in the order they have appeared in the batch.
1886+
# // shape: [n_outputs*n_embd]
1887+
# // Otherwise, returns NULL.
18791888
# LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
18801889
@ctypes_function(
18811890
"llama_get_embeddings", [llama_context_p_ctypes], ctypes.POINTER(ctypes.c_float)
@@ -1886,9 +1895,10 @@ def llama_get_embeddings(ctx: llama_context_p, /) -> CtypesArray[ctypes.c_float]
18861895
...
18871896

18881897

1889-
# // Get the embeddings for the ith token
1890-
# // llama_get_embeddings(ctx) + i*n_embd
1898+
# // Get the embeddings for the ith token. Equivalent to:
1899+
# // llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd
18911900
# // shape: [n_embd] (1-dimensional)
1901+
# // returns NULL for invalid ids.
18921902
# LLAMA_API float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i);
18931903
@ctypes_function(
18941904
"llama_get_embeddings_ith",

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.