We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 153a004 commit cb79171Copy full SHA for cb79171
llama_cpp/llama.py
@@ -281,7 +281,7 @@ def __init__(
281
)
282
self.context_params.yarn_orig_ctx = yarn_orig_ctx if yarn_orig_ctx != 0 else 0
283
self.context_params.mul_mat_q = mul_mat_q
284
- self.context_params.logits_all = logits_all
+ self.context_params.logits_all = logits_all if draft_model is None else True # Must be set to True for speculative decoding
285
self.context_params.embedding = embedding
286
self.context_params.offload_kqv = offload_kqv
287
0 commit comments