File tree Expand file tree Collapse file tree 3 files changed +3
-1
lines changed
Filter options
Expand file tree Collapse file tree 3 files changed +3
-1
lines changed
Original file line number Diff line number Diff line change @@ -238,6 +238,7 @@ def __init__(
238
238
n_ctx: Maximum context size.
239
239
n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
240
240
seed: Random seed. -1 for random.
241
+ n_gpu_layers: Number of layers to offload to GPU (-ngl). If -1, all layers are offloaded.
241
242
f16_kv: Use half-precision for key/value cache.
242
243
logits_all: Return logits for all tokens, not just the last token.
243
244
vocab_only: Only load the vocabulary no weights.
@@ -266,7 +267,7 @@ def __init__(
266
267
267
268
self .params = llama_cpp .llama_context_default_params ()
268
269
self .params .n_ctx = n_ctx
269
- self .params .n_gpu_layers = n_gpu_layers
270
+ self .params .n_gpu_layers = 0x7FFFFFFF if n_gpu_layers == - 1 else n_gpu_layers # 0x7FFFFFFF is INT32 max, will be auto set to all layers
270
271
self .params .seed = seed
271
272
self .params .f16_kv = f16_kv
272
273
self .params .logits_all = logits_all
Original file line number Diff line number Diff line change 15
15
author_email = "abetlen@gmail.com" ,
16
16
license = "MIT" ,
17
17
package_dir = {"llama_cpp" : "llama_cpp" , "llama_cpp.server" : "llama_cpp/server" },
18
+ package_data = {"llama_cpp" : ["py.typed" ]},
18
19
packages = ["llama_cpp" , "llama_cpp.server" ],
19
20
install_requires = ["typing-extensions>=4.5.0" , "numpy>=1.20.0" , "diskcache>=5.6.1" ],
20
21
extras_require = {
You can’t perform that action at this time.
0 commit comments