From 3530fc26b67905fecbad4fd61062e04d8280b41e Mon Sep 17 00:00:00 2001 From: Rich Dougherty Date: Sat, 2 Nov 2024 16:30:48 +1300 Subject: [PATCH] docs: Remove ref to llama_eval in llama_cpp.py docs The llama_eval function was removed in 0.2.14 (0d37ce5) but the docs still reference it. Updated to match the llama.h docs from upstream. --- llama_cpp/llama_cpp.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 457c6dddb..4ab3c54ed 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -764,7 +764,7 @@ class llama_context_params(ctypes.Structure): cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval type_k (int): data type for K cache type_v (int): data type for V cache - logits_all (bool): the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead) + logits_all (bool): the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead) embeddings (bool): if true, extract embeddings (together with logits) offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU flash_attn (bool): whether to use flash attention @@ -2453,10 +2453,10 @@ def llama_synchronize(ctx: llama_context_p, /): "llama_get_logits", [llama_context_p_ctypes], ctypes.POINTER(ctypes.c_float) ) def llama_get_logits(ctx: llama_context_p, /) -> CtypesArray[ctypes.c_float]: - """Token logits obtained from the last call to llama_eval() - The logits for the last token are stored in the last row - Logits for which llama_batch.logits[i] == 0 are undefined - Rows: n_tokens provided with llama_batch + """Token logits obtained from the last call to llama_decode() + The logits for which llama_batch.logits[i] != 0 are stored contiguously + in the order they have appeared in the batch. + Rows: number of tokens for which llama_batch.logits[i] != 0 Cols: n_vocab Returns: