Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 454c9bb

Browse filesBrowse files
committed
feat: Update llama.cpp
1 parent 2d89964 commit 454c9bb
Copy full SHA for 454c9bb

File tree

Expand file treeCollapse file tree

2 files changed

+18
-5
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+18
-5
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+17-4Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
300300
# LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
301301
# LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
302302
# LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
303+
# LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
303304
# };
304305
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
305306
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -315,6 +316,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
315316
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11
316317
LLAMA_VOCAB_PRE_TYPE_OLMO = 12
317318
LLAMA_VOCAB_PRE_TYPE_DBRX = 13
319+
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14
318320

319321

320322
# // note: these values should be synchronized with ggml_rope
@@ -718,6 +720,8 @@ class llama_model_params(ctypes.Structure):
718720
]
719721

720722

723+
# // NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
724+
# // https://github.com/ggerganov/llama.cpp/pull/7544
721725
# struct llama_context_params {
722726
# uint32_t seed; // RNG seed, -1 for random
723727
# uint32_t n_ctx; // text context, 0 = from model
@@ -744,15 +748,14 @@ class llama_model_params(ctypes.Structure):
744748
# ggml_backend_sched_eval_callback cb_eval;
745749
# void * cb_eval_user_data;
746750

747-
# enum ggml_type type_k; // data type for K cache
748-
# enum ggml_type type_v; // data type for V cache
751+
# enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
752+
# enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
749753

750754
# // Keep the booleans together to avoid misalignment during copy-by-value.
751755
# bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
752756
# bool embeddings; // if true, extract embeddings (together with logits)
753757
# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
754-
# bool flash_attn; // whether to use flash attention
755-
758+
# bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
756759

757760
# // Abort callback
758761
# // if it returns true, execution of llama_decode() will be aborted
@@ -2454,6 +2457,16 @@ def llama_token_is_eog(model: llama_model_p, token: Union[llama_token, int], /)
24542457
...
24552458

24562459

2460+
# // Identify if Token Id is a control token or a render-able token
2461+
# LLAMA_API bool llama_token_is_control(const struct llama_model * model, llama_token token);
2462+
@ctypes_function(
2463+
"llama_token_is_control", [llama_model_p_ctypes, llama_token], ctypes.c_bool
2464+
)
2465+
def llama_token_is_control(model: llama_model_p, token: Union[llama_token, int], /) -> bool:
2466+
"""Identify if Token Id is a control token or a render-able token"""
2467+
...
2468+
2469+
24572470
# // Special tokens
24582471

24592472

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.