zinccat
diff --git a/‎CHANGELOG.md
Copy file name to clipboardExpand all lines: CHANGELOG.md
+2Lines changed: 2 additions & 0 deletions b/‎CHANGELOG.md
Copy file name to clipboardExpand all lines: CHANGELOG.md
+2Lines changed: 2 additions & 0 deletions
diff --git a/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+5-3Lines changed: 5 additions & 3 deletions b/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+5-3Lines changed: 5 additions & 3 deletions
@@ -12,7 +12,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - (build-system) Migrate from scikit-build to scikit-build-core
 
 ### Fixed
+
 - Truncate max_tokens in create_completion so requested tokens doesn't exceed context size.
+- Temporarily disable cache for completion requests
 
 ## [v0.1.59]
 
 
@@ -831,7 +831,9 @@ def _create_completion(
                 "logprobs is not supported for models created with logits_all=False"
             )
 
-        if self.cache:
+        # Temporarily disable usage of the cache
+        # See: https://github.com/abetlen/llama-cpp-python/issues/348#issuecomment-1583072408
+        if self.cache and False:
             try:
                 cache_item = self.cache[prompt_tokens]
                 cache_prefix_len = Llama.longest_token_prefix(
@@ -1069,14 +1071,14 @@ def _create_completion(
                         }
                     ],
                 }
-            if self.cache:
+            if self.cache and False:
                 if self.verbose:
                     print("Llama._create_completion: cache save", file=sys.stderr)
                 self.cache[prompt_tokens + completion_tokens] = self.save_state()
                 print("Llama._create_completion: cache saved", file=sys.stderr)
             return
 
-        if self.cache:
+        if self.cache and False:
             if self.verbose:
                 print("Llama._create_completion: cache save", file=sys.stderr)
             self.cache[prompt_tokens + completion_tokens] = self.save_state()