rubra-ai
diff --git a/‎examples/lookup/lookup.cpp
Copy file name to clipboardExpand all lines: examples/lookup/lookup.cpp
+29Lines changed: 29 additions & 0 deletions b/‎examples/lookup/lookup.cpp
Copy file name to clipboardExpand all lines: examples/lookup/lookup.cpp
+29Lines changed: 29 additions & 0 deletions
@@ -74,6 +74,35 @@ int main(int argc, char ** argv){
         t_draft_flat_us += ggml_time_us() - t_start_draft_us;
     }
 
+    llama_ngram_cache ngram_cache_context;
+    llama_ngram_cache ngram_cache_dynamic;
+    llama_ngram_cache ngram_cache_static;
+    int64_t t_draft_flat_us = 0;
+    int64_t t_draft_us = 0;
+
+    {
+        // Fill up context ngram cache with tokens from user input:
+        const int64_t t_start_draft_us = ggml_time_us();
+        llama_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, inp, inp.size(), false);
+
+        if (!params.lookup_cache_static.empty()) {
+            try {
+                ngram_cache_static = llama_ngram_cache_load(params.lookup_cache_static);
+            } catch (std::ifstream::failure const &) {
+                fprintf(stderr, "error: failed to open static lookup cache: %s", params.lookup_cache_static.c_str());
+                exit(1);
+            }
+        }
+
+        if (!params.lookup_cache_dynamic.empty()) {
+            try {
+                ngram_cache_dynamic = llama_ngram_cache_load(params.lookup_cache_dynamic);
+            } catch (std::ifstream::failure const &) {} // if the file does not exist it will simply be created at the end of the program
+        }
+
+        t_draft_flat_us += ggml_time_us() - t_start_draft_us;
+    }
+
     const int max_context_size     = llama_n_ctx(ctx);
     const int max_tokens_list_size = max_context_size - 4;