Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit d70f8e2

Browse filesBrowse files
authored
Merge branch 'main' into functionary-stream
2 parents 23177a3 + f9b7221 commit d70f8e2
Copy full SHA for d70f8e2

File tree

Expand file treeCollapse file tree

7 files changed

+94
-8
lines changed
Filter options
Expand file treeCollapse file tree

7 files changed

+94
-8
lines changed

‎CHANGELOG.md

Copy file name to clipboardExpand all lines: CHANGELOG.md
+9-1Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.2.69]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@6ecf3189e00a1e8e737a78b6d10e1d7006e050a2
13+
- feat: Add llama-3-vision-alpha chat format by @abetlen in 31b1d95a6c19f5b615a3286069f181a415f872e8
14+
- fix: Change default verbose value of verbose in image chat format handlers to True to match Llama by @abetlen in 4f01c452b6c738dc56eacac3758119b12c57ea94
15+
- fix: Suppress all logs when verbose=False, use hardcoded fileno's to work in colab notebooks by @abetlen in f116175a5a7c84569c88cad231855c1e6e59ff6e
16+
- fix: UTF-8 handling with grammars by @jsoma in #1415
17+
1018
## [0.2.68]
1119

12-
- feat: Update llama.cpp to ggerganov/llama.cpp@
20+
- feat: Update llama.cpp to ggerganov/llama.cpp@77e15bec6217a39be59b9cc83d6b9afb6b0d8167
1321
- feat: Add option to enable flash_attn to Lllama params and ModelSettings by @abetlen in 22d77eefd2edaf0148f53374d0cac74d0e25d06e
1422
- fix(ci): Fix build-and-release.yaml by @Smartappli in #1413
1523

‎llama_cpp/__init__.py

Copy file name to clipboard
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .llama_cpp import *
22
from .llama import *
33

4-
__version__ = "0.2.68"
4+
__version__ = "0.2.69"

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+6-1Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,12 @@ def __init__(
262262
raise ValueError(f"Value for {k} is too long: {v}")
263263
v_bytes = v_bytes.ljust(128, b"\0")
264264
self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_TYPE_STR
265-
self._kv_overrides_array[i].value.str_value[:128] = v_bytes
265+
# copy min(v_bytes, 128) to str_value
266+
ctypes.memmove(
267+
self._kv_overrides_array[i].value.str_value,
268+
v_bytes,
269+
min(len(v_bytes), 128),
270+
)
266271
else:
267272
raise ValueError(f"Unknown value type for {k}: {v}")
268273

‎llama_cpp/llama_chat_format.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_chat_format.py
+62-2Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2475,7 +2475,7 @@ def generate_streaming(tools, functions, function_call, prompt):
24752475

24762476

24772477
class Llava15ChatHandler:
2478-
DEFAULT_SYSTEM_MESSAGE = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
2478+
DEFAULT_SYSTEM_MESSAGE: Optional[str] = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
24792479

24802480
CHAT_FORMAT = (
24812481
"{% for message in messages %}"
@@ -2598,7 +2598,7 @@ def __call__(
25982598
assert self.clip_ctx is not None
25992599

26002600
system_prompt = _get_system_message(messages)
2601-
if system_prompt == "":
2601+
if system_prompt == "" and self.DEFAULT_SYSTEM_MESSAGE is not None:
26022602
messages = [llama_types.ChatCompletionRequestSystemMessage(role="system", content=self.DEFAULT_SYSTEM_MESSAGE)] + messages
26032603

26042604
image_urls = self.get_image_urls(messages)
@@ -3081,6 +3081,66 @@ class NanoLlavaChatHandler(Llava15ChatHandler):
30813081
"{% endif %}"
30823082
)
30833083

3084+
class Llama3VisionAlpha(Llava15ChatHandler):
3085+
# question = "<image>" + q
3086+
3087+
# prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
3088+
DEFAULT_SYSTEM_MESSAGE = None
3089+
3090+
CHAT_FORMAT = (
3091+
"{% for message in messages %}"
3092+
3093+
"<|start_header_id|>"
3094+
3095+
"{% if message.role == 'user' %}"
3096+
3097+
"user<|end_header_id|>\n\n"
3098+
3099+
"{% if message.content is iterable %}"
3100+
3101+
# <image>
3102+
"{% for content in message.content %}"
3103+
"{% if content.type == 'image_url' %}"
3104+
"{% if content.image_url is string %}"
3105+
"{{ content.image_url }}"
3106+
"{% endif %}"
3107+
"{% if content.image_url is mapping %}"
3108+
"{{ content.image_url.url }}"
3109+
"{% endif %}"
3110+
"{% endif %}"
3111+
"{% endfor %}"
3112+
3113+
# Question:
3114+
"{% for content in message.content %}"
3115+
"{% if content.type == 'text' %}"
3116+
"{{ content.text }}"
3117+
"{% endif %}"
3118+
"{% endfor %}"
3119+
3120+
"{% endif %}"
3121+
3122+
# Question:
3123+
"{% if message.content is string %}"
3124+
"{{ message.content }}"
3125+
"{% endif %}"
3126+
3127+
"{% endif %}"
3128+
3129+
# Answer:
3130+
"{% if message.role == 'assistant' %}"
3131+
"assistant<|end_header_id|>\n\n"
3132+
"{{ message.content }}"
3133+
"{% endif %}"
3134+
3135+
"<|eot_id|>"
3136+
3137+
"{% endfor %}"
3138+
3139+
# Generation prompt
3140+
"{% if add_generation_prompt %}"
3141+
"<|start_header_id|>assistant<|end_header_id|>\n\n"
3142+
"{% endif %}"
3143+
)
30843144

30853145
@register_chat_completion_handler("chatml-function-calling")
30863146
def chatml_function_calling(

‎llama_cpp/server/model.py

Copy file name to clipboardExpand all lines: llama_cpp/server/model.py
+15Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,20 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
140140
chat_handler = llama_cpp.llama_chat_format.NanoLlavaChatHandler(
141141
clip_model_path=settings.clip_model_path, verbose=settings.verbose
142142
)
143+
elif settings.chat_format == "llama-3-vision-alpha":
144+
assert settings.clip_model_path is not None, "clip model not found"
145+
if settings.hf_model_repo_id is not None:
146+
chat_handler = (
147+
llama_cpp.llama_chat_format.Llama3VisionAlpha.from_pretrained(
148+
repo_id=settings.hf_model_repo_id,
149+
filename=settings.clip_model_path,
150+
verbose=settings.verbose,
151+
)
152+
)
153+
else:
154+
chat_handler = llama_cpp.llama_chat_format.Llama3VisionAlpha(
155+
clip_model_path=settings.clip_model_path, verbose=settings.verbose
156+
)
143157
elif settings.chat_format == "hf-autotokenizer":
144158
assert (
145159
settings.hf_pretrained_model_name_or_path is not None
@@ -228,6 +242,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
228242
logits_all=settings.logits_all,
229243
embedding=settings.embedding,
230244
offload_kqv=settings.offload_kqv,
245+
flash_attn=settings.flash_attn,
231246
# Sampling Params
232247
last_n_tokens_size=settings.last_n_tokens_size,
233248
# LoRA Params

‎llama_cpp/server/types.py

Copy file name to clipboardExpand all lines: llama_cpp/server/types.py
-2Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818

1919
temperature_field = Field(
2020
default=0.8,
21-
ge=0.0,
22-
le=2.0,
2321
description="Adjust the randomness of the generated text.\n\n"
2422
+ "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The default value is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.",
2523
)

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.