antonvice
diff --git a/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+2-3Lines changed: 2 additions & 3 deletions b/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+2-3Lines changed: 2 additions & 3 deletions
@@ -1019,12 +1019,11 @@ def eval(self, tokens: Sequence[int]):
         """
         assert self._ctx.ctx is not None
         assert self._batch.batch is not None
-        n_ctx = self._n_ctx
+        self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1)
         for i in range(0, len(tokens), self.n_batch):
             batch = tokens[i : min(len(tokens), i + self.n_batch)]
-            n_past = min(n_ctx - len(batch), self.n_tokens)
+            n_past = self.n_tokens
             n_tokens = len(batch)
-            self._ctx.kv_cache_seq_rm(-1, n_past, -1)
             self._batch.set_batch(
                 batch=batch, n_past=n_past, logits_all=self.context_params.logits_all
             )