Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit b83728a

Browse filesBrowse files
committed
Update llama.cpp
1 parent a4fe3fe commit b83728a
Copy full SHA for b83728a

File tree

Expand file treeCollapse file tree

2 files changed

+5
-9
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+5
-9
lines changed

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+4-8Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ class llama_token_data_array(Structure):
164164
# int32_t n_batch; // prompt processing batch size
165165
# int32_t n_gpu_layers; // number of layers to store in VRAM
166166
# int32_t main_gpu; // the GPU that is used for scratch and small tensors
167-
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
167+
# const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
168168

169169
# // ref: https://github.com/ggerganov/llama.cpp/pull/2054
170170
# float rope_freq_base; // RoPE base frequency
@@ -192,7 +192,7 @@ class llama_context_params(Structure):
192192
("n_batch", c_int32),
193193
("n_gpu_layers", c_int32),
194194
("main_gpu", c_int32),
195-
("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
195+
("tensor_split", POINTER(c_float)),
196196
("rope_freq_base", c_float),
197197
("rope_freq_scale", c_float),
198198
("progress_callback", llama_progress_callback),
@@ -933,22 +933,19 @@ def llama_sample_frequency_and_presence_penalties(
933933
# /// @param candidates A vector of `llama_token_data` containing the candidate tokens, the logits must be directly extracted from the original generation context without being sorted.
934934
# /// @params guidance_ctx A separate context from the same model. Other than a negative prompt at the beginning, it should have all generated and user input tokens copied from the main context.
935935
# /// @params scale Guidance strength. 1.0f means no guidance. Higher values mean stronger guidance.
936-
# /// @params smooth_factor Smooth factor between guidance logits and original logits. 1.0f means only use guidance logits. 0.0f means only original logits.
937936
# LLAMA_API void llama_sample_classifier_free_guidance(
938937
# struct llama_context * ctx,
939938
# llama_token_data_array * candidates,
940939
# struct llama_context * guidance_ctx,
941-
# float scale,
942-
# float smooth_factor);
940+
# float scale);
943941
def llama_sample_classifier_free_guidance(
944942
ctx: llama_context_p,
945943
candidates, # type: _Pointer[llama_token_data_array]
946944
guidance_ctx: llama_context_p,
947945
scale: c_float,
948-
smooth_factor: c_float,
949946
):
950947
return _lib.llama_sample_classifier_free_guidance(
951-
ctx, candidates, guidance_ctx, scale, smooth_factor
948+
ctx, candidates, guidance_ctx, scale
952949
)
953950

954951

@@ -957,7 +954,6 @@ def llama_sample_classifier_free_guidance(
957954
llama_token_data_array_p,
958955
llama_context_p,
959956
c_float,
960-
c_float,
961957
]
962958
_lib.llama_sample_classifier_free_guidance.restype = None
963959

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.