Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 76aafa6

Browse filesBrowse files
phiharriabetlen
andauthored
Implement GGUF metadata KV overrides (abetlen#1011)
* Implement GGUF metadata overrides * whitespace fix * Fix kv overrides. * Fix pointer and pickle * Match llama.cpp kv_overrides cli argument --------- Co-authored-by: Andrei <abetlen@gmail.com>
1 parent 7eff42c commit 76aafa6
Copy full SHA for 76aafa6

File tree

Expand file treeCollapse file tree

3 files changed

+55
-1
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+55
-1
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+32Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,7 @@ def __init__(
735735
vocab_only: bool = False,
736736
use_mmap: bool = True,
737737
use_mlock: bool = False,
738+
kv_overrides: Optional[Dict[str, Union[bool, int, float]]] = None,
738739
# Context Params
739740
seed: int = llama_cpp.LLAMA_DEFAULT_SEED,
740741
n_ctx: int = 512,
@@ -803,6 +804,7 @@ def __init__(
803804
vocab_only: Only load the vocabulary no weights.
804805
use_mmap: Use mmap if possible.
805806
use_mlock: Force the system to keep the model in RAM.
807+
kv_overrides: Key-value overrides for the model.
806808
seed: RNG seed, -1 for random
807809
n_ctx: Text context, 0 = from model
808810
n_batch: Prompt processing maximum batch size
@@ -866,6 +868,34 @@ def __init__(
866868
self.model_params.use_mmap = use_mmap if lora_path is None else False
867869
self.model_params.use_mlock = use_mlock
868870

871+
self.kv_overrides = kv_overrides
872+
if kv_overrides is not None:
873+
n_overrides = len(kv_overrides)
874+
self._kv_overrides_array = llama_cpp.llama_model_kv_override * (n_overrides + 1)
875+
self._kv_overrides_array_keys = []
876+
877+
for k, v in kv_overrides.items():
878+
key_buf = ctypes.create_string_buffer(k.encode("utf-8"))
879+
self._kv_overrides_array_keys.append(key_buf)
880+
self._kv_overrides_array[i].key = key_buf
881+
if isinstance(v, int):
882+
self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_INT
883+
self._kv_overrides_array[i].value.int_value = v
884+
elif isinstance(v, float):
885+
self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_FLOAT
886+
self._kv_overrides_array[i].value.float_value = v
887+
elif isinstance(v, bool):
888+
self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_BOOL
889+
self._kv_overrides_array[i].value.bool_value = v
890+
else:
891+
raise ValueError(f"Unknown value type for {k}: {v}")
892+
893+
self._kv_overrides_array_sentinel_key = b'\0'
894+
895+
# null array sentinel
896+
self._kv_overrides_array[n_overrides].key = self._kv_overrides_array_sentinel_key
897+
self.model_params.kv_overrides = self._kv_overrides_array
898+
869899
self.n_batch = min(n_ctx, n_batch) # ???
870900
self.n_threads = n_threads or max(multiprocessing.cpu_count() // 2, 1)
871901
self.n_threads_batch = n_threads_batch or max(
@@ -2148,6 +2178,7 @@ def __getstate__(self):
21482178
vocab_only=self.model_params.vocab_only,
21492179
use_mmap=self.model_params.use_mmap,
21502180
use_mlock=self.model_params.use_mlock,
2181+
kv_overrides=self.kv_overrides,
21512182
# Context Params
21522183
seed=self.context_params.seed,
21532184
n_ctx=self.context_params.n_ctx,
@@ -2190,6 +2221,7 @@ def __setstate__(self, state):
21902221
vocab_only=state["vocab_only"],
21912222
use_mmap=state["use_mmap"],
21922223
use_mlock=state["use_mlock"],
2224+
kv_overrides=state["kv_overrides"],
21932225
# Context Params
21942226
seed=state["seed"],
21952227
n_ctx=state["n_ctx"],

‎llama_cpp/server/model.py

Copy file name to clipboardExpand all lines: llama_cpp/server/model.py
+19-1Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import Optional, Union, List
3+
from typing import Dict, Optional, Union, List
44

55
import llama_cpp
66

@@ -71,6 +71,23 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
7171
chat_handler = llama_cpp.llama_chat_format.Llava15ChatHandler(
7272
clip_model_path=settings.clip_model_path, verbose=settings.verbose
7373
)
74+
75+
kv_overrides: Optional[Dict[str, Union[bool, int, float]]] = None
76+
if settings.kv_overrides is not None:
77+
assert isinstance(settings.kv_overrides, list)
78+
kv_overrides = {}
79+
for kv in settings.kv_overrides:
80+
key, value = kv.split("=")
81+
if ":" in value:
82+
value_type, value = value.split(":")
83+
if value_type == "bool":
84+
kv_overrides[key] = value.lower() in ["true", "1"]
85+
elif value_type == "int":
86+
kv_overrides[key] = int(value)
87+
elif value_type == "float":
88+
kv_overrides[key] = float(value)
89+
else:
90+
raise ValueError(f"Unknown value type {value_type}")
7491

7592
_model = llama_cpp.Llama(
7693
model_path=settings.model,
@@ -81,6 +98,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
8198
vocab_only=settings.vocab_only,
8299
use_mmap=settings.use_mmap,
83100
use_mlock=settings.use_mlock,
101+
kv_overrides=kv_overrides,
84102
# Context Params
85103
seed=settings.seed,
86104
n_ctx=settings.n_ctx,

‎llama_cpp/server/settings.py

Copy file name to clipboardExpand all lines: llama_cpp/server/settings.py
+4Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ class ModelSettings(BaseSettings):
4848
default=llama_cpp.llama_mlock_supported(),
4949
description="Use mlock.",
5050
)
51+
kv_overrides: Optional[List[str]] = Field(
52+
default=None,
53+
description="List of model kv overrides in the format key=type:value where type is one of (bool, int, float). Valid true values are (true, TRUE, 1), otherwise false.",
54+
)
5155
# Context Params
5256
seed: int = Field(
5357
default=llama_cpp.LLAMA_DEFAULT_SEED, description="Random seed. -1 for random."

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.