Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 9f21f54

Browse filesBrowse files
author
Shouyi Wang
committed
Add tensor split
1 parent 99f064e commit 9f21f54
Copy full SHA for 9f21f54

File tree

Expand file treeCollapse file tree

2 files changed

+17
-1
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+17
-1
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+12-1Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@
1919
from collections import deque, OrderedDict
2020

2121
import diskcache
22+
import ctypes
2223

2324
from . import llama_cpp
2425
from .llama_types import *
2526

2627
import numpy as np
2728
import numpy.typing as npt
2829

29-
3030
class BaseLlamaCache(ABC):
3131
"""Base cache class for a llama.cpp model."""
3232

@@ -207,6 +207,7 @@ def __init__(
207207
n_ctx: int = 512,
208208
n_parts: int = -1,
209209
n_gpu_layers: int = 0,
210+
tensor_split: list[float] = None,
210211
seed: int = 1337,
211212
f16_kv: bool = True,
212213
logits_all: bool = False,
@@ -248,12 +249,20 @@ def __init__(
248249
Returns:
249250
A Llama instance.
250251
"""
252+
if tensor_split is None:
253+
tensor_split = [0.0] * llama_cpp.LLAMA_MAX_DEVICES.value
254+
255+
#Type conversion and expand the list to the length of LLAMA_MAX_DEVICES
256+
FloatArray = ctypes.c_float * llama_cpp.LLAMA_MAX_DEVICES.value
257+
c_tensor_split = FloatArray(*tensor_split)
258+
251259
self.verbose = verbose
252260
self.model_path = model_path
253261

254262
self.params = llama_cpp.llama_context_default_params()
255263
self.params.n_ctx = n_ctx
256264
self.params.n_gpu_layers = n_gpu_layers
265+
self.params.tensor_split = c_tensor_split
257266
self.params.seed = seed
258267
self.params.f16_kv = f16_kv
259268
self.params.logits_all = logits_all
@@ -1494,6 +1503,7 @@ def __getstate__(self):
14941503
model_path=self.model_path,
14951504
n_ctx=self.params.n_ctx,
14961505
n_gpu_layers=self.params.n_gpu_layers,
1506+
tensor_split=self.params.tensor_split,
14971507
seed=self.params.seed,
14981508
f16_kv=self.params.f16_kv,
14991509
logits_all=self.params.logits_all,
@@ -1518,6 +1528,7 @@ def __setstate__(self, state):
15181528
n_ctx=state["n_ctx"],
15191529
n_parts=state["n_parts"],
15201530
n_gpu_layers=state["n_gpu_layers"],
1531+
tensor_split=state["tensor_split"],
15211532
seed=state["seed"],
15221533
f16_kv=state["f16_kv"],
15231534
logits_all=state["logits_all"],

‎llama_cpp/server/app.py

Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ class Settings(BaseSettings):
3131
ge=0,
3232
description="The number of layers to put on the GPU. The rest will be on the CPU.",
3333
)
34+
tensor_split: List[float] = Field(
35+
default=None,
36+
description="Split layers across multiple GPUs in proportion.",
37+
)
3438
seed: int = Field(
3539
default=1337, description="Random seed. -1 for random."
3640
)
@@ -117,6 +121,7 @@ def create_app(settings: Optional[Settings] = None):
117121
llama = llama_cpp.Llama(
118122
model_path=settings.model,
119123
n_gpu_layers=settings.n_gpu_layers,
124+
tensor_split=settings.tensor_split,
120125
seed=settings.seed,
121126
f16_kv=settings.f16_kv,
122127
use_mlock=settings.use_mlock,

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.