Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit d9bce17

Browse filesBrowse files
committed
Update server params
1 parent 3720c73 commit d9bce17
Copy full SHA for d9bce17

File tree

Expand file treeCollapse file tree

1 file changed

+9
-7
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+9
-7
lines changed

‎llama_cpp/server/app.py

Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+9-7Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import numpy.typing as npt
2828

2929

30+
# Disable warning for model and model_alias settings
3031
BaseSettings.model_config['protected_namespaces'] = ()
3132

3233

@@ -58,14 +59,10 @@ class Settings(BaseSettings):
5859
description="Split layers across multiple GPUs in proportion.",
5960
)
6061
rope_freq_base: float = Field(
61-
default=10000, ge=1, description="RoPE base frequency"
62+
default=0.0, description="RoPE base frequency"
6263
)
6364
rope_freq_scale: float = Field(
64-
default=1.0, description="RoPE frequency scaling factor"
65-
)
66-
low_vram: bool = Field(
67-
default=False,
68-
description="Whether to use less VRAM. This will reduce performance.",
65+
default=0.0, description="RoPE frequency scaling factor"
6966
)
7067
mul_mat_q: bool = Field(
7168
default=True, description="if true, use experimental mul_mat_q kernels"
@@ -106,6 +103,10 @@ class Settings(BaseSettings):
106103
default=False,
107104
description="Enable NUMA support.",
108105
)
106+
chat_format: str = Field(
107+
default="llama-2",
108+
description="Chat format to use.",
109+
)
109110
cache: bool = Field(
110111
default=False,
111112
description="Use a cache to reduce processing times for evaluated prompts.",
@@ -349,7 +350,6 @@ def create_app(settings: Optional[Settings] = None):
349350
tensor_split=settings.tensor_split,
350351
rope_freq_base=settings.rope_freq_base,
351352
rope_freq_scale=settings.rope_freq_scale,
352-
low_vram=settings.low_vram,
353353
mul_mat_q=settings.mul_mat_q,
354354
f16_kv=settings.f16_kv,
355355
logits_all=settings.logits_all,
@@ -361,6 +361,8 @@ def create_app(settings: Optional[Settings] = None):
361361
last_n_tokens_size=settings.last_n_tokens_size,
362362
lora_base=settings.lora_base,
363363
lora_path=settings.lora_path,
364+
numa=settings.numa,
365+
chat_format=settings.chat_format,
364366
verbose=settings.verbose,
365367
)
366368
if settings.cache:

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.