Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 0a7c980

Browse filesBrowse files
authored
gguf : track writer state, free unneeded tensors, cleanup (#3871)
1 parent 413503d commit 0a7c980
Copy full SHA for 0a7c980

File tree

Expand file treeCollapse file tree

2 files changed

+54
-30
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+54
-30
lines changed

‎gguf-py/gguf/gguf.py

Copy file name to clipboardExpand all lines: gguf-py/gguf/gguf.py
+53-29Lines changed: 53 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -646,18 +646,17 @@ def get_type(val):
646646
sys.exit()
647647

648648

649+
class WriterState(Enum):
650+
EMPTY = auto()
651+
HEADER = auto()
652+
KV_DATA = auto()
653+
TI_DATA = auto()
654+
655+
649656
class GGUFWriter:
650657
fout: BufferedWriter
651-
arch: str
652-
offset_tensor = 0
653-
data_alignment = GGUF_DEFAULT_ALIGNMENT
654-
kv_data = b""
655-
kv_data_count = 0
656-
ti_data = b""
657-
ti_data_count = 0
658-
use_temp_file: bool
659-
temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None
660-
tensors: list[tuple[np.ndarray[Any, Any], int]]
658+
temp_file: tempfile.SpooledTemporaryFile[bytes] | None
659+
tensors: list[np.ndarray[Any, Any]]
661660

662661
@property
663662
def pack_prefix(self):
@@ -683,27 +682,47 @@ def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True
683682
GGUFValueType.FLOAT64: f"{self.pack_prefix}d",
684683
GGUFValueType.BOOL: "?" ,
685684
}
686-
self.add_architecture()
685+
self.offset_tensor = 0
686+
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
687+
self.kv_data = b""
688+
self.kv_data_count = 0
689+
self.ti_data = b""
690+
self.ti_data_count = 0
687691
self.use_temp_file = use_temp_file
692+
self.temp_file = None
688693
self.tensors = []
689694
endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian"
690695
print(f"This gguf file is for {endianess_str} only")
696+
self.state = WriterState.EMPTY
697+
698+
self.add_architecture()
691699

692700
def write_header_to_file(self):
701+
if self.state is not WriterState.EMPTY:
702+
raise ValueError(f'Expected output file to be empty, got {self.state}')
703+
693704
self.fout.write(struct.pack("<I", GGUF_MAGIC))
694705
self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_VERSION))
695706
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.ti_data_count))
696707
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.kv_data_count))
697708
self.flush()
698-
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
709+
self.state = WriterState.HEADER
699710

700711
def write_kv_data_to_file(self):
712+
if self.state is not WriterState.HEADER:
713+
raise ValueError(f'Expected output file to contain the header, got {self.state}')
714+
701715
self.fout.write(self.kv_data)
702716
self.flush()
717+
self.state = WriterState.KV_DATA
703718

704719
def write_ti_data_to_file(self):
720+
if self.state is not WriterState.KV_DATA:
721+
raise ValueError(f'Expected output file to contain KV data, got {self.state}')
722+
705723
self.fout.write(self.ti_data)
706724
self.flush()
725+
self.state = WriterState.TI_DATA
707726

708727
def add_key(self, key: str):
709728
self.add_val(key, GGUFValueType.STRING, add_vtype=False)
@@ -796,6 +815,9 @@ def ggml_pad(x: int, n: int) -> int:
796815
return ((x + n - 1) // n) * n
797816

798817
def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None):
818+
if self.state is not WriterState.EMPTY:
819+
raise ValueError(f'Expected output file to be empty, got {self.state}')
820+
799821
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
800822

801823
encoded_name = name.encode("utf8")
@@ -825,23 +847,22 @@ def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequenc
825847
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
826848
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
827849

828-
pad = GGUFWriter.ggml_pad(tensor.nbytes, self.data_alignment) - tensor.nbytes
829-
830-
if self.temp_file is None:
831-
self.tensors.append((tensor, pad))
850+
if self.temp_file is None:
851+
self.tensors.append(tensor)
832852
return
833853

834854
tensor.tofile(self.temp_file)
855+
self.write_padding(self.temp_file, tensor.nbytes)
835856

836-
if pad != 0:
837-
self.temp_file.write(bytes([0] * pad))
838-
839-
def write_padding(self, fp: BinaryIO, n: int, align: int | None = None):
857+
def write_padding(self, fp: IO[bytes], n: int, align: int | None = None):
840858
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
841859
if pad != 0:
842860
fp.write(bytes([0] * pad))
843861

844862
def write_tensor_data(self, tensor: np.ndarray[Any, Any]):
863+
if self.state is not WriterState.TI_DATA:
864+
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
865+
845866
if self.endianess==GGUFEndian.BIG:
846867
tensor.byteswap(inplace=True)
847868
self.write_padding(self.fout, self.fout.tell())
@@ -854,10 +875,13 @@ def write_tensors_to_file(self):
854875
self.write_padding(self.fout, self.fout.tell())
855876

856877
if self.temp_file is None:
857-
for (currtensor, currpad) in self.tensors:
858-
currtensor.tofile(self.fout)
859-
if currpad != 0:
860-
self.fout.write(bytes([0] * currpad))
878+
while True:
879+
try:
880+
tensor = self.tensors.pop(0)
881+
except IndexError:
882+
break
883+
tensor.tofile(self.fout)
884+
self.write_padding(self.fout, tensor.nbytes)
861885
return
862886

863887
self.temp_file.seek(0)
@@ -1002,11 +1026,8 @@ def add_pad_token_id(self, id: int):
10021026

10031027

10041028
class SpecialVocab:
1005-
load_merges: bool = False
1006-
merges: list[str] = []
1007-
special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
1008-
special_token_ids: dict[str, int] = {}
1009-
n_vocab: int | None = None
1029+
merges: list[str]
1030+
special_token_ids: dict[str, int]
10101031

10111032
def __init__(
10121033
self, path: str | os.PathLike[str], load_merges: bool = False,
@@ -1016,8 +1037,11 @@ def __init__(
10161037
self.special_token_ids = {}
10171038
self.n_vocab = n_vocab
10181039
self.load_merges = load_merges
1040+
self.merges = []
10191041
if special_token_types is not None:
10201042
self.special_token_types = special_token_types
1043+
else:
1044+
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
10211045
self._load(Path(path))
10221046

10231047
def _load(self, path: Path) -> None:

‎gguf-py/pyproject.toml

Copy file name to clipboardExpand all lines: gguf-py/pyproject.toml
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "gguf"
3-
version = "0.4.5"
3+
version = "0.4.6"
44
description = "Write ML models in GGUF for GGML"
55
authors = ["GGML <ggml@ggml.ai>"]
66
packages = [

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.