AHHR80
diff --git a/‎CHANGELOG.md
Copy file name to clipboardExpand all lines: CHANGELOG.md
+4Lines changed: 4 additions & 0 deletions b/‎CHANGELOG.md
Copy file name to clipboardExpand all lines: CHANGELOG.md
+4Lines changed: 4 additions & 0 deletions
diff --git a/‎llama_cpp/llama_cpp.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+11-15Lines changed: 11 additions & 15 deletions b/‎llama_cpp/llama_cpp.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+11-15Lines changed: 11 additions & 15 deletions
diff --git a/‎vendor/llama.cpp
Copy file name to clipboard b/‎vendor/llama.cpp
Copy file name to clipboard
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+
+- (llama.cpp) Fix struct misalignment bug
+
 ## [0.1.64]
 
 ### Added
 
@@ -150,47 +150,43 @@ class llama_token_data_array(Structure):
 
 
 # struct llama_context_params {
+#     int seed;                              // RNG seed, -1 for random
 #     int n_ctx;                             // text context
 #     int n_batch;                           // prompt processing batch size
 #     int n_gpu_layers;                      // number of layers to store in VRAM
 #     int main_gpu;                          // the GPU that is used for scratch and small tensors
 #     float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
-#     bool low_vram;                         // if true, reduce VRAM usage at the cost of performance
-#     int seed;                              // RNG seed, -1 for random
+#     // called with a progress value between 0 and 1, pass NULL to disable
+#     llama_progress_callback progress_callback;
+#     // context pointer passed to the progress callback
+#     void * progress_callback_user_data;
 
+#     // Keep the booleans together to avoid misalignment during copy-by-value.
+#     bool low_vram;   // if true, reduce VRAM usage at the cost of performance
 #     bool f16_kv;     // use fp16 for KV cache
 #     bool logits_all; // the llama_eval() call computes all logits, not just the last one
 #     bool vocab_only; // only load the vocabulary, no weights
 #     bool use_mmap;   // use mmap if possible
 #     bool use_mlock;  // force system to keep model in RAM
 #     bool embedding;  // embedding mode only
-
-
-#     // called with a progress value between 0 and 1, pass NULL to disable
-#     llama_progress_callback progress_callback;
-#     // context pointer passed to the progress callback
-#     void * progress_callback_user_data;
 # };
 class llama_context_params(Structure):
     _fields_ = [
+        ("seed", c_int),
         ("n_ctx", c_int),
         ("n_batch", c_int),
         ("n_gpu_layers", c_int),
         ("main_gpu", c_int),
         ("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
+        ("progress_callback", llama_progress_callback),
+        ("progress_callback_user_data", c_void_p),
         ("low_vram", c_bool),
-        ("seed", c_int),
         ("f16_kv", c_bool),
-        (
-            "logits_all",
-            c_bool,
-        ),
+        ("logits_all", c_bool),
         ("vocab_only", c_bool),
         ("use_mmap", c_bool),
         ("use_mlock", c_bool),
         ("embedding", c_bool),
-        ("progress_callback", llama_progress_callback),
-        ("progress_callback_user_data", c_void_p),
     ]