abetlen
diff --git a/‎CHANGELOG.md
Copy file name to clipboardExpand all lines: CHANGELOG.md
+9Lines changed: 9 additions & 0 deletions b/‎CHANGELOG.md
Copy file name to clipboardExpand all lines: CHANGELOG.md
+9Lines changed: 9 additions & 0 deletions
diff --git a/‎llama_cpp/__init__.py
Copy file name to clipboard
+1-1Lines changed: 1 addition & 1 deletion b/‎llama_cpp/__init__.py
Copy file name to clipboard
+1-1Lines changed: 1 addition & 1 deletion
diff --git a/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+2-1Lines changed: 2 additions & 1 deletion b/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+2-1Lines changed: 2 additions & 1 deletion
diff --git a/‎llama_cpp/llama_cpp.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+8Lines changed: 8 additions & 0 deletions b/‎llama_cpp/llama_cpp.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+8Lines changed: 8 additions & 0 deletions
diff --git a/‎llama_cpp/llama_grammar.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_grammar.py
+20-1Lines changed: 20 additions & 1 deletion b/‎llama_cpp/llama_grammar.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_grammar.py
+20-1Lines changed: 20 additions & 1 deletion
diff --git a/‎vendor/llama.cpp
Copy file name to clipboard b/‎vendor/llama.cpp
Copy file name to clipboard
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.88]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@fc4ca27b25464a11b3b86c9dbb5b6ed6065965c2
+- fix: only print 'cache saved' in verbose mode by @lsorber in #1668 
+- fix: Added back from_file method to LlamaGrammar by @ExtReMLapin in #1673
+- fix: grammar prints on each call by @abetlen in 0998ea0deea076a547d54bd598d6b413b588ee2b
+- feat: Enable recursive search of HFFS.ls when using from_pretrained by @benHeidabetlen in #1656
+- feat: Add more detailed log for prefix-match by @xu-song in #1659
+
 ## [0.2.87]
 
 - feat: Update llama.cpp to ggerganov/llama.cpp@be55695eff44784a141a863f273661a6bce63dfc
 
@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
 
-__version__ = "0.2.87"
+__version__ = "0.2.88"
@@ -1528,7 +1528,8 @@ def logit_bias_processor(
                 if self.verbose:
                     print("Llama._create_completion: cache save", file=sys.stderr)
                 self.cache[prompt_tokens + completion_tokens] = self.save_state()
-                print("Llama._create_completion: cache saved", file=sys.stderr)
+                if self.verbose:
+                    print("Llama._create_completion: cache saved", file=sys.stderr)
             return
 
         if self.cache:
 
@@ -1505,6 +1505,14 @@ def llama_model_has_encoder(model: llama_model_p, /) -> bool:
     ...
 
 
+# // Returns true if the model contains a decoder that requires llama_decode() call
+# LLAMA_API bool llama_model_has_decoder(const struct llama_model * model);
+@ctypes_function("llama_model_has_decoder", [llama_model_p_ctypes], ctypes.c_bool)
+def llama_model_has_decoder(model: llama_model_p, /) -> bool:
+    """Returns true if the model contains a decoder that requires llama_decode() call"""
+    ...
+
+
 # // For encoder-decoder models, this function returns id of the token that must be provided
 # // to the decoder to start generating output sequence. For other models, it returns -1.
 # LLAMA_API llama_token llama_model_decoder_start_token(const struct llama_model * model);
 
@@ -1,6 +1,7 @@
 """Python implementation of llama grammar parser directly translated from C++ source file in vendor/llama.cpp/common/grammar-parser.cpp."""
 
 # flake8: noqa
+from pathlib import Path
 import sys
 import ctypes
 import enum
@@ -890,8 +891,26 @@ def reset(self):
     @classmethod
     def from_string(cls, grammar: str, verbose: bool = True) -> "LlamaGrammar":
         parsed_grammar = parse(grammar)
-        print_grammar(file=sys.stdout, state=parsed_grammar)
+        if verbose:
+            print_grammar(file=sys.stdout, state=parsed_grammar)
         return cls(parsed_grammar)
+    
+    @classmethod
+    def from_file(cls, file: Union[str, Path], verbose: bool = True) -> "LlamaGrammar":
+        try:
+            with open(file) as f:
+                grammar = f.read()
+        except Exception as err:
+            raise Exception(
+                f"{cls.from_file.__name__}: error reading grammar file: {err}"
+            )
+
+        if grammar:
+            return cls.from_string(grammar, verbose=verbose)
+
+        raise ValueError(
+            f"{cls.from_file.__name__}: error parsing grammar file: params_grammer is empty"
+        )
 
     @classmethod
     def from_json_schema(cls, json_schema: str, verbose: bool = True) -> "LlamaGrammar":