off-by-some
diff --git a/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+2-2Lines changed: 2 additions & 2 deletions b/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+2-2Lines changed: 2 additions & 2 deletions
@@ -735,10 +735,10 @@ def _create_completion(
             try:
                 cache_item = self.cache[prompt_tokens]
                 cache_prefix_len = Llama.longest_token_prefix(
-                    cache_item.eval_tokens, prompt_tokens
+                    cache_item.input_ids.tolist(), prompt_tokens
                 )
                 eval_prefix_len = Llama.longest_token_prefix(
-                    self.eval_tokens, prompt_tokens
+                    self._input_ids.tolist(), prompt_tokens
                 )
                 if cache_prefix_len > eval_prefix_len:
                     self.load_state(cache_item)
Original file line number	Diff line number	Diff line change
`@@ -735,10 +735,10 @@ def _create_completion(`
`735`	`735`	`try:`
`736`	`736`	`cache_item = self.cache[prompt_tokens]`
`737`	`737`	`cache_prefix_len = Llama.longest_token_prefix(`
`738`		`- cache_item.eval_tokens, prompt_tokens`
	`738`	`+ cache_item.input_ids.tolist(), prompt_tokens`
`739`	`739`	`)`
`740`	`740`	`eval_prefix_len = Llama.longest_token_prefix(`
`741`		`- self.eval_tokens, prompt_tokens`
	`741`	`+ self._input_ids.tolist(), prompt_tokens`
`742`	`742`	`)`
`743`	`743`	`if cache_prefix_len > eval_prefix_len:`
`744`	`744`	`self.load_state(cache_item)`