We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0e94a70 commit 14da46fCopy full SHA for 14da46f
llama_cpp/server/app.py
@@ -45,6 +45,10 @@ class Settings(BaseSettings):
45
default=False,
46
description="Use a cache to reduce processing times for evaluated prompts.",
47
)
48
+ cache_size: int = Field(
49
+ default=2 << 30,
50
+ description="The size of the cache in bytes. Only used if cache is True.",
51
+ )
52
vocab_only: bool = Field(
53
default=False, description="Whether to only return the vocabulary."
54
@@ -89,7 +93,7 @@ def create_app(settings: Optional[Settings] = None):
89
93
verbose=settings.verbose,
90
94
91
95
if settings.cache:
92
- cache = llama_cpp.LlamaCache()
96
+ cache = llama_cpp.LlamaCache(capacity_bytes=settings.cache_size)
97
llama.set_cache(cache)
98
return app
99
0 commit comments