8bitsats
diff --git a/‎.github/workflows/build-and-release.yaml
Copy file name to clipboardExpand all lines: .github/workflows/build-and-release.yaml
+2-2Lines changed: 2 additions & 2 deletions b/‎.github/workflows/build-and-release.yaml
Copy file name to clipboardExpand all lines: .github/workflows/build-and-release.yaml
+2-2Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/build-wheels-metal.yaml
Copy file name to clipboardExpand all lines: .github/workflows/build-wheels-metal.yaml
+1-1Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build-wheels-metal.yaml
Copy file name to clipboardExpand all lines: .github/workflows/build-wheels-metal.yaml
+1-1Lines changed: 1 addition & 1 deletion
diff --git a/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+4-3Lines changed: 4 additions & 3 deletions b/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+4-3Lines changed: 4 additions & 3 deletions
@@ -29,7 +29,7 @@ jobs:
           python -m pip install -e .[all]
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.19.2
+        uses: pypa/cibuildwheel@v2.20.0
         env:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
@@ -56,7 +56,7 @@ jobs:
           platforms: linux/arm64
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.19.2
+        uses: pypa/cibuildwheel@v2.20.0
         env:
           CIBW_SKIP: "*musllinux* pp*"
           CIBW_REPAIR_WHEEL_COMMAND: ""
 
@@ -30,7 +30,7 @@ jobs:
           python -m pip install -e .[all]
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.19.2
+        uses: pypa/cibuildwheel@v2.20.0
         env:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
 
@@ -777,11 +777,12 @@ def generate(
                 else:
                     break
             if longest_prefix > 0:
-                if self.verbose:
-                    print("Llama.generate: prefix-match hit", file=sys.stderr)
                 reset = False
                 tokens = tokens[longest_prefix:]
                 self.n_tokens = longest_prefix
+                if self.verbose:
+                    print(f"Llama.generate: {longest_prefix} prefix-match hit, "
+                          f"remaining {len(tokens)} prompt tokens to eval", file=sys.stderr)                    
 
         # Reset the model state
         if reset:
@@ -2159,7 +2160,7 @@ def from_pretrained(
 
         files = [
             file["name"] if isinstance(file, dict) else file
-            for file in hffs.ls(repo_id)
+            for file in hffs.ls(repo_id, recursive=True))
         ]
 
         # split each file into repo_id, subfolder, filename