mgonzs13
diff --git a/‎examples/high_level_api/fastapi_server.py
Copy file name to clipboardExpand all lines: examples/high_level_api/fastapi_server.py
+3-3Lines changed: 3 additions & 3 deletions b/‎examples/high_level_api/fastapi_server.py
Copy file name to clipboardExpand all lines: examples/high_level_api/fastapi_server.py
+3-3Lines changed: 3 additions & 3 deletions
diff --git a/‎llama_cpp/server/__main__.py
Copy file name to clipboardExpand all lines: llama_cpp/server/__main__.py
+2-2Lines changed: 2 additions & 2 deletions b/‎llama_cpp/server/__main__.py
Copy file name to clipboardExpand all lines: llama_cpp/server/__main__.py
+2-2Lines changed: 2 additions & 2 deletions
@@ -27,10 +27,10 @@
 class Settings(BaseSettings):
     model: str
     n_ctx: int = 2048
-    n_batch: int = 2048
-    n_threads: int = os.cpu_count() or 1
+    n_batch: int = 8
+    n_threads: int = int(os.cpu_count() / 2) or 1
     f16_kv: bool = True
-    use_mlock: bool = True
+    use_mlock: bool = False     # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...
     embedding: bool = True
     last_n_tokens_size: int = 64
 
 
@@ -28,9 +28,9 @@ class Settings(BaseSettings):
     model: str
     n_ctx: int = 2048
     n_batch: int = 8
-    n_threads: int = os.cpu_count() or 1
+    n_threads: int = int(os.cpu_count() / 2) or 1
     f16_kv: bool = True
-    use_mlock: bool = True
+    use_mlock: bool = False     # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out...
     embedding: bool = True
     last_n_tokens_size: int = 64