th-neu
diff --git a/‎README.md
Copy file name to clipboardExpand all lines: README.md
+7-1Lines changed: 7 additions & 1 deletion b/‎README.md
Copy file name to clipboardExpand all lines: README.md
+7-1Lines changed: 7 additions & 1 deletion
diff --git a/‎llama_cpp/llama_cpp.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+3-1Lines changed: 3 additions & 1 deletion b/‎llama_cpp/llama_cpp.py
Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+3-1Lines changed: 3 additions & 1 deletion
diff --git a/‎poetry.lock
Copy file name to clipboardExpand all lines: poetry.lock
+4-4Lines changed: 4 additions & 4 deletions b/‎poetry.lock
Copy file name to clipboardExpand all lines: poetry.lock
+4-4Lines changed: 4 additions & 4 deletions
diff --git a/‎pyproject.toml
Copy file name to clipboardExpand all lines: pyproject.toml
+1-1Lines changed: 1 addition & 1 deletion b/‎pyproject.toml
Copy file name to clipboardExpand all lines: pyproject.toml
+1-1Lines changed: 1 addition & 1 deletion
diff --git a/‎vendor/llama.cpp
Copy file name to clipboard b/‎vendor/llama.cpp
Copy file name to clipboard
@@ -26,6 +26,12 @@ pip install llama-cpp-python
 The above command will attempt to install the package and build build `llama.cpp` from source.
 This is the recommended installation method as it ensures that `llama.cpp` is built with the available optimizations for your system.
 
+Note: If you are using Apple Silicon (M1) Mac, make sure you have installed a version of Python that supports arm64 architecture. For example:
+```
+wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
+bash Miniforge3-MacOSX-arm64.sh
+```
+Otherwise, while installing it will build the llama.ccp x86 version which will be 10x slower on Apple Silicon (M1) Mac.
 
 ### Installation with OpenBLAS / cuBLAS / CLBlast
 
@@ -120,7 +126,7 @@ Below is a short example demonstrating how to use the low-level API to tokenize
 >>> ctx = llama_cpp.llama_init_from_file(b"./models/7b/ggml-model.bin", params)
 >>> max_tokens = params.n_ctx
 # use ctypes arrays for array params
->>> tokens = (llama_cppp.llama_token * int(max_tokens))()
+>>> tokens = (llama_cpp.llama_token * int(max_tokens))()
 >>> n_tokens = llama_cpp.llama_tokenize(ctx, b"Q: Name the planets in the solar system? A: ", tokens, max_tokens, add_bos=llama_cpp.c_bool(True))
 >>> llama_cpp.llama_free(ctx)
 ```
 
@@ -44,15 +44,17 @@ def _load_shared_library(lib_base_name: str):
         _base_path = _lib.parent.resolve()
         _lib_paths = [_lib.resolve()]
 
+    cdll_args = dict() # type: ignore
     # Add the library directory to the DLL search path on Windows (if needed)
     if sys.platform == "win32" and sys.version_info >= (3, 8):
         os.add_dll_directory(str(_base_path))
+        cdll_args["winmode"] = 0
 
     # Try to load the shared library, handling potential errors
     for _lib_path in _lib_paths:
         if _lib_path.exists():
             try:
-                return ctypes.CDLL(str(_lib_path))
+                return ctypes.CDLL(str(_lib_path), **cdll_args)
             except Exception as e:
                 raise RuntimeError(f"Failed to load shared library '{_lib_path}': {e}")
 
 
@@ -22,7 +22,7 @@ black = "^23.3.0"
 twine = "^4.0.2"
 mkdocs = "^1.4.3"
 mkdocstrings = {extras = ["python"], version = "^0.21.2"}
-mkdocs-material = "^9.1.11"
+mkdocs-material = "^9.1.12"
 pytest = "^7.3.1"
 httpx = "^0.24.0"