Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 4c6514d

Browse filesBrowse files
committed
feat: Update llama.cpp
1 parent 99f2ebf commit 4c6514d
Copy full SHA for 4c6514d

File tree

Expand file treeCollapse file tree

3 files changed

+23
-14
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+23
-14
lines changed

‎CMakeLists.txt

Copy file name to clipboardExpand all lines: CMakeLists.txt
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ if (LLAMA_BUILD)
143143
endif()
144144

145145
# Building llava
146-
add_subdirectory(vendor/llama.cpp/examples/llava)
146+
add_subdirectory(vendor/llama.cpp/tools/mtmd)
147147
set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
148148

149149
if (WIN32)

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+21-12Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,8 @@
235235
# LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
236236
# LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
237237
# LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
238+
# LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
239+
# LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
238240
# };
239241
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
240242
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -252,7 +254,7 @@
252254
LLAMA_VOCAB_PRE_TYPE_DBRX = 13
253255
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14
254256
LLAMA_VOCAB_PRE_TYPE_PORO = 15
255-
LLAMA_VOCAV_PRE_TYPE_CHATGLM3 = 16
257+
LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16
256258
LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17
257259
LLAMA_VOCAB_PRE_TYPE_VIKING = 18
258260
LLAMA_VOCAB_PRE_TYPE_JAIS = 19
@@ -269,6 +271,8 @@
269271
LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30
270272
LLAMA_VOCAB_PRE_TYPE_TRILLION = 31
271273
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32
274+
LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33
275+
LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34
272276

273277

274278
# // note: these values should be synchronized with ggml_rope
@@ -891,17 +895,18 @@ class llama_context_params(ctypes.Structure):
891895

892896
# // model quantization parameters
893897
# typedef struct llama_model_quantize_params {
894-
# int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
895-
# enum llama_ftype ftype; // quantize to this llama_ftype
896-
# enum ggml_type output_tensor_type; // output tensor type
897-
# enum ggml_type token_embedding_type; // token embeddings tensor type
898-
# bool allow_requantize; // allow quantizing non-f32/f16 tensors
899-
# bool quantize_output_tensor; // quantize output.weight
900-
# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
901-
# bool pure; // quantize all tensors to the default type
902-
# bool keep_split; // quantize to the same number of shards
903-
# void * imatrix; // pointer to importance matrix data
904-
# void * kv_overrides; // pointer to vector containing overrides
898+
# int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
899+
# enum llama_ftype ftype; // quantize to this llama_ftype
900+
# enum ggml_type output_tensor_type; // output tensor type
901+
# enum ggml_type token_embedding_type; // token embeddings tensor type
902+
# bool allow_requantize; // allow quantizing non-f32/f16 tensors
903+
# bool quantize_output_tensor; // quantize output.weight
904+
# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
905+
# bool pure; // quantize all tensors to the default type
906+
# bool keep_split; // quantize to the same number of shards
907+
# void * imatrix; // pointer to importance matrix data
908+
# void * kv_overrides; // pointer to vector containing overrides
909+
# void * tensor_types; // pointer to vector containing tensor types
905910
# } llama_model_quantize_params;
906911
class llama_model_quantize_params(ctypes.Structure):
907912
"""Parameters for llama_model_quantize
@@ -918,6 +923,7 @@ class llama_model_quantize_params(ctypes.Structure):
918923
keep_split (bool): quantize to the same number of shards
919924
imatrix (ctypes.c_void_p): pointer to importance matrix data
920925
kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
926+
tensor_types (ctypes.c_void_p): pointer to vector containing tensor types
921927
"""
922928

923929
if TYPE_CHECKING:
@@ -932,6 +938,7 @@ class llama_model_quantize_params(ctypes.Structure):
932938
keep_split: bool
933939
imatrix: ctypes.c_void_p
934940
kv_overrides: ctypes.c_void_p
941+
tensor_types: ctypes.c_void_p
935942

936943
_fields_ = [
937944
("nthread", ctypes.c_int32),
@@ -945,6 +952,7 @@ class llama_model_quantize_params(ctypes.Structure):
945952
("keep_split", ctypes.c_bool),
946953
("imatrix", ctypes.c_void_p),
947954
("kv_overrides", ctypes.c_void_p),
955+
("tensor_types", ctypes.c_void_p),
948956
]
949957

950958

@@ -3812,6 +3820,7 @@ def llama_sampler_init_softmax() -> llama_sampler_p:
38123820

38133821

38143822
# /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
3823+
# /// Setting k <= 0 makes this a noop
38153824
# LLAMA_API struct llama_sampler * llama_sampler_init_top_k (int32_t k);
38163825
@ctypes_function("llama_sampler_init_top_k", [ctypes.c_int32], llama_sampler_p_ctypes)
38173826
def llama_sampler_init_top_k(k: int) -> llama_sampler_p:

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.