Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e377987

Browse filesBrowse files
committed
Update llama.cpp
1 parent 92b0013 commit e377987
Copy full SHA for e377987

File tree

Expand file treeCollapse file tree

3 files changed

+16
-16
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+16
-16
lines changed

‎CHANGELOG.md

Copy file name to clipboardExpand all lines: CHANGELOG.md
+4Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- (llama.cpp) Fix struct misalignment bug
13+
1014
## [0.1.64]
1115

1216
### Added

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+11-15Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -150,47 +150,43 @@ class llama_token_data_array(Structure):
150150

151151

152152
# struct llama_context_params {
153+
# int seed; // RNG seed, -1 for random
153154
# int n_ctx; // text context
154155
# int n_batch; // prompt processing batch size
155156
# int n_gpu_layers; // number of layers to store in VRAM
156157
# int main_gpu; // the GPU that is used for scratch and small tensors
157158
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
158-
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
159-
# int seed; // RNG seed, -1 for random
159+
# // called with a progress value between 0 and 1, pass NULL to disable
160+
# llama_progress_callback progress_callback;
161+
# // context pointer passed to the progress callback
162+
# void * progress_callback_user_data;
160163

164+
# // Keep the booleans together to avoid misalignment during copy-by-value.
165+
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
161166
# bool f16_kv; // use fp16 for KV cache
162167
# bool logits_all; // the llama_eval() call computes all logits, not just the last one
163168
# bool vocab_only; // only load the vocabulary, no weights
164169
# bool use_mmap; // use mmap if possible
165170
# bool use_mlock; // force system to keep model in RAM
166171
# bool embedding; // embedding mode only
167-
168-
169-
# // called with a progress value between 0 and 1, pass NULL to disable
170-
# llama_progress_callback progress_callback;
171-
# // context pointer passed to the progress callback
172-
# void * progress_callback_user_data;
173172
# };
174173
class llama_context_params(Structure):
175174
_fields_ = [
175+
("seed", c_int),
176176
("n_ctx", c_int),
177177
("n_batch", c_int),
178178
("n_gpu_layers", c_int),
179179
("main_gpu", c_int),
180180
("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
181+
("progress_callback", llama_progress_callback),
182+
("progress_callback_user_data", c_void_p),
181183
("low_vram", c_bool),
182-
("seed", c_int),
183184
("f16_kv", c_bool),
184-
(
185-
"logits_all",
186-
c_bool,
187-
),
185+
("logits_all", c_bool),
188186
("vocab_only", c_bool),
189187
("use_mmap", c_bool),
190188
("use_mlock", c_bool),
191189
("embedding", c_bool),
192-
("progress_callback", llama_progress_callback),
193-
("progress_callback_user_data", c_void_p),
194190
]
195191

196192

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.