Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 53ebcc8

Browse filesBrowse files
authored
feat(server): Provide ability to dynamically allocate all threads if desired using -1 (abetlen#1364)
1 parent 507c1da commit 53ebcc8
Copy full SHA for 53ebcc8

File tree

Expand file treeCollapse file tree

1 file changed

+13
-3
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+13
-3
lines changed

‎llama_cpp/server/settings.py

Copy file name to clipboardExpand all lines: llama_cpp/server/settings.py
+13-3Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import multiprocessing
44

55
from typing import Optional, List, Literal, Union
6-
from pydantic import Field
6+
from pydantic import Field, root_validator
77
from pydantic_settings import BaseSettings
88

99
import llama_cpp
@@ -67,12 +67,12 @@ class ModelSettings(BaseSettings):
6767
n_threads: int = Field(
6868
default=max(multiprocessing.cpu_count() // 2, 1),
6969
ge=1,
70-
description="The number of threads to use.",
70+
description="The number of threads to use. Use -1 for max cpu threads",
7171
)
7272
n_threads_batch: int = Field(
7373
default=max(multiprocessing.cpu_count(), 1),
7474
ge=0,
75-
description="The number of threads to use when batch processing.",
75+
description="The number of threads to use when batch processing. Use -1 for max cpu threads",
7676
)
7777
rope_scaling_type: int = Field(
7878
default=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED
@@ -173,6 +173,16 @@ class ModelSettings(BaseSettings):
173173
default=True, description="Whether to print debug information."
174174
)
175175

176+
@root_validator(pre=True) # pre=True to ensure this runs before any other validation
177+
def set_dynamic_defaults(cls, values):
178+
# If n_threads or n_threads_batch is -1, set it to multiprocessing.cpu_count()
179+
cpu_count = multiprocessing.cpu_count()
180+
if values.get('n_threads', 0) == -1:
181+
values['n_threads'] = cpu_count
182+
if values.get('n_threads_batch', 0) == -1:
183+
values['n_threads_batch'] = cpu_count
184+
return values
185+
176186

177187
class ServerSettings(BaseSettings):
178188
"""Server settings used to configure the FastAPI and Uvicorn server."""

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.