Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a6457ba

Browse filesBrowse files
committed
Merge branch 'main' of https://github.com/abetlen/llama-cpp-python into main
2 parents af3ed50 + 165b4dc commit a6457ba
Copy full SHA for a6457ba

File tree

Expand file treeCollapse file tree

7 files changed

+51
-24
lines changed
Filter options
Expand file treeCollapse file tree

7 files changed

+51
-24
lines changed

‎.github/workflows/build-and-release.yaml

Copy file name to clipboardExpand all lines: .github/workflows/build-and-release.yaml
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
python -m pip install -e .[all]
3030
3131
- name: Build wheels
32-
uses: pypa/cibuildwheel@v2.18.0
32+
uses: pypa/cibuildwheel@v2.18.1
3333
env:
3434
# disable repair
3535
CIBW_REPAIR_WHEEL_COMMAND: ""
@@ -56,7 +56,7 @@ jobs:
5656
platforms: linux/arm64
5757

5858
- name: Build wheels
59-
uses: pypa/cibuildwheel@v2.18.0
59+
uses: pypa/cibuildwheel@v2.18.1
6060
env:
6161
CIBW_SKIP: "*musllinux* pp*"
6262
CIBW_REPAIR_WHEEL_COMMAND: ""

‎Makefile

Copy file name to clipboardExpand all lines: Makefile
+7-1Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,13 @@ build:
1313
python3 -m pip install --verbose -e .
1414

1515
build.debug:
16-
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Debug" python3 -m pip install --verbose --config-settings=cmake.verbose=true --config-settings=logging.level=INFO --config-settings=install.strip=false --editable .
16+
python3 -m pip install \
17+
--verbose \
18+
--config-settings=cmake.verbose=true \
19+
--config-settings=logging.level=INFO \
20+
--config-settings=install.strip=false \
21+
--config-settings=cmake.args="-DCMAKE_BUILD_TYPE=Debug;-DCMAKE_C_FLAGS='-ggdb -O0';-DCMAKE_CXX_FLAGS='-ggdb -O0'" \
22+
--editable .
1723

1824
build.cuda:
1925
CMAKE_ARGS="-DLLAMA_CUDA=on" python3 -m pip install --verbose -e .

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+8-5Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import time
77
import json
88
import ctypes
9+
import typing
910
import fnmatch
1011
import multiprocessing
1112

@@ -249,24 +250,26 @@ def __init__(
249250
self._kv_overrides_array[i].key = k.encode("utf-8")
250251
if isinstance(v, bool):
251252
self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_TYPE_BOOL
252-
self._kv_overrides_array[i].value.bool_value = v
253+
self._kv_overrides_array[i].value.val_bool = v
253254
elif isinstance(v, int):
254255
self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_TYPE_INT
255-
self._kv_overrides_array[i].value.int_value = v
256+
self._kv_overrides_array[i].value.val_i64 = v
256257
elif isinstance(v, float):
257258
self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_TYPE_FLOAT
258-
self._kv_overrides_array[i].value.float_value = v
259+
self._kv_overrides_array[i].value.val_f64 = v
259260
elif isinstance(v, str): # type: ignore
260261
v_bytes = v.encode("utf-8")
261262
if len(v_bytes) > 128: # TODO: Make this a constant
262263
raise ValueError(f"Value for {k} is too long: {v}")
263264
v_bytes = v_bytes.ljust(128, b"\0")
264265
self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_TYPE_STR
265266
# copy min(v_bytes, 128) to str_value
267+
address = typing.cast(int, ctypes.addressof(self._kv_overrides_array[i].value) + llama_cpp.llama_model_kv_override_value.val_str.offset)
268+
buffer_start = ctypes.cast(address, ctypes.POINTER(ctypes.c_char))
266269
ctypes.memmove(
267-
self._kv_overrides_array[i].value.str_value,
270+
buffer_start,
268271
v_bytes,
269-
min(len(v_bytes), 128),
272+
128,
270273
)
271274
else:
272275
raise ValueError(f"Unknown value type for {k}: {v}")

‎llama_cpp/llama_chat_format.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_chat_format.py
+5-2Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3098,7 +3098,7 @@ class NanoLlavaChatHandler(Llava15ChatHandler):
30983098
"{% endif %}"
30993099
)
31003100

3101-
class Llama3VisionAlpha(Llava15ChatHandler):
3101+
class Llama3VisionAlphaChatHandler(Llava15ChatHandler):
31023102
# question = "<image>" + q
31033103

31043104
# prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
@@ -3159,6 +3159,10 @@ class Llama3VisionAlpha(Llava15ChatHandler):
31593159
"{% endif %}"
31603160
)
31613161

3162+
# alias
3163+
Llama3VisionAlpha = Llama3VisionAlphaChatHandler
3164+
3165+
31623166
@register_chat_completion_handler("chatml-function-calling")
31633167
def chatml_function_calling(
31643168
llama: llama.Llama,
@@ -3193,7 +3197,6 @@ def chatml_function_calling(
31933197
llama_types.CreateChatCompletionResponse,
31943198
Iterator[llama_types.CreateChatCompletionStreamResponse],
31953199
]:
3196-
print(logprobs)
31973200
function_calling_template = (
31983201
"{% for message in messages %}"
31993202
"<|im_start|>{{ message.role }}\n"

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+25-12Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
300300
# LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
301301
# LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
302302
# LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
303+
# LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
303304
# };
304305
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
305306
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -315,6 +316,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
315316
LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11
316317
LLAMA_VOCAB_PRE_TYPE_OLMO = 12
317318
LLAMA_VOCAB_PRE_TYPE_DBRX = 13
319+
LLAMA_VOCAB_PRE_TYPE_SMAUG = 14
318320

319321

320322
# // note: these values should be synchronized with ggml_rope
@@ -611,17 +613,17 @@ class llama_batch(ctypes.Structure):
611613
# };
612614
class llama_model_kv_override_value(ctypes.Union):
613615
_fields_ = [
614-
("int_value", ctypes.c_int64),
615-
("float_value", ctypes.c_double),
616-
("bool_value", ctypes.c_bool),
617-
("str_value", ctypes.c_char * 128),
616+
("val_i64", ctypes.c_int64),
617+
("val_f64", ctypes.c_double),
618+
("val_bool", ctypes.c_bool),
619+
("val_str", ctypes.c_char * 128),
618620
]
619621

620622
if TYPE_CHECKING:
621-
int_value: int
622-
float_value: float
623-
bool_value: bool
624-
str_value: bytes
623+
val_i64: int
624+
val_f64: float
625+
val_bool: bool
626+
val_str: bytes
625627

626628

627629
class llama_model_kv_override(ctypes.Structure):
@@ -718,6 +720,8 @@ class llama_model_params(ctypes.Structure):
718720
]
719721

720722

723+
# // NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
724+
# // https://github.com/ggerganov/llama.cpp/pull/7544
721725
# struct llama_context_params {
722726
# uint32_t seed; // RNG seed, -1 for random
723727
# uint32_t n_ctx; // text context, 0 = from model
@@ -744,15 +748,14 @@ class llama_model_params(ctypes.Structure):
744748
# ggml_backend_sched_eval_callback cb_eval;
745749
# void * cb_eval_user_data;
746750

747-
# enum ggml_type type_k; // data type for K cache
748-
# enum ggml_type type_v; // data type for V cache
751+
# enum ggml_type type_k; // data type for K cache [EXPERIMENTAL]
752+
# enum ggml_type type_v; // data type for V cache [EXPERIMENTAL]
749753

750754
# // Keep the booleans together to avoid misalignment during copy-by-value.
751755
# bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
752756
# bool embeddings; // if true, extract embeddings (together with logits)
753757
# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
754-
# bool flash_attn; // whether to use flash attention
755-
758+
# bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
756759

757760
# // Abort callback
758761
# // if it returns true, execution of llama_decode() will be aborted
@@ -2454,6 +2457,16 @@ def llama_token_is_eog(model: llama_model_p, token: Union[llama_token, int], /)
24542457
...
24552458

24562459

2460+
# // Identify if Token Id is a control token or a render-able token
2461+
# LLAMA_API bool llama_token_is_control(const struct llama_model * model, llama_token token);
2462+
@ctypes_function(
2463+
"llama_token_is_control", [llama_model_p_ctypes, llama_token], ctypes.c_bool
2464+
)
2465+
def llama_token_is_control(model: llama_model_p, token: Union[llama_token, int], /) -> bool:
2466+
"""Identify if Token Id is a control token or a render-able token"""
2467+
...
2468+
2469+
24572470
# // Special tokens
24582471

24592472

‎llama_cpp/server/model.py

Copy file name to clipboardExpand all lines: llama_cpp/server/model.py
+3-1Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
183183
num_pred_tokens=settings.draft_model_num_pred_tokens
184184
)
185185

186-
kv_overrides: Optional[Dict[str, Union[bool, int, float]]] = None
186+
kv_overrides: Optional[Dict[str, Union[bool, int, float, str]]] = None
187187
if settings.kv_overrides is not None:
188188
assert isinstance(settings.kv_overrides, list)
189189
kv_overrides = {}
@@ -197,6 +197,8 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
197197
kv_overrides[key] = int(value)
198198
elif value_type == "float":
199199
kv_overrides[key] = float(value)
200+
elif value_type == "str":
201+
kv_overrides[key] = value
200202
else:
201203
raise ValueError(f"Unknown value type {value_type}")
202204

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.