Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 840e7bf

Browse filesBrowse files
committed
convert.py: Outfile default name change and additional metadata support
1 parent a8bd14d commit 840e7bf
Copy full SHA for 840e7bf

File tree

Expand file treeCollapse file tree

1 file changed

+142
-25
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+142
-25
lines changed

‎convert.py

Copy file name to clipboardExpand all lines: convert.py
+142-25Lines changed: 142 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
2424
from dataclasses import dataclass
2525
from pathlib import Path
26-
from typing import TYPE_CHECKING, Any, Callable, ClassVar, IO, Iterable, Literal, Protocol, TypeVar, runtime_checkable
26+
from typing import TYPE_CHECKING, Any, Callable, ClassVar, IO, Iterable, Literal, Protocol, TypeVar, runtime_checkable, Optional
2727

2828
import numpy as np
2929
from sentencepiece import SentencePieceProcessor
@@ -337,10 +337,46 @@ def load(model_plus: ModelPlus) -> Params:
337337
return params
338338

339339

340+
@dataclass
341+
class Metadata:
342+
name: Optional[str] = None
343+
author: Optional[str] = None
344+
version: Optional[str] = None
345+
url: Optional[str] = None
346+
description: Optional[str] = None
347+
licence: Optional[str] = None
348+
source_url: Optional[str] = None
349+
source_hf_repo: Optional[str] = None
350+
351+
@staticmethod
352+
def load(metadata_path: Path) -> "Metadata":
353+
if metadata_path is None or not metadata_path.exists():
354+
return Metadata()
355+
356+
with open(metadata_path, 'r') as file:
357+
data = json.load(file)
358+
359+
# Create a new Metadata instance
360+
metadata = Metadata()
361+
362+
# Assigning values to Metadata attributes if they exist in the JSON file
363+
metadata.name = data.get("general.name")
364+
metadata.author = data.get("general.author")
365+
metadata.version = data.get("general.version")
366+
metadata.url = data.get("general.url")
367+
metadata.description = data.get("general.description")
368+
metadata.license = data.get("general.license")
369+
metadata.source_url = data.get("general.source_url")
370+
metadata.source_hf_repo = data.get("general.source_hf_repo")
371+
372+
return metadata
373+
374+
340375
#
341376
# vocab
342377
#
343378

379+
344380
@runtime_checkable
345381
class BaseVocab(Protocol):
346382
tokenizer_model: ClassVar[str]
@@ -1053,21 +1089,41 @@ class OutputFile:
10531089
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE):
10541090
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
10551091

1056-
def add_meta_arch(self, params: Params) -> None:
1092+
def add_meta_model(self, params: Params, metadata: Metadata) -> None:
1093+
# Metadata About The Model And It's Provenence
10571094
name = "LLaMA"
1058-
1059-
# TODO: better logic to determine model name
1060-
if params.n_ctx == 4096:
1061-
name = "LLaMA v2"
1095+
if metadata is not None and metadata.name is not None:
1096+
name = metadata.name
10621097
elif params.path_model is not None:
1063-
name = str(params.path_model.parent).split('/')[-1]
1064-
1065-
self.gguf.add_name (name)
1066-
self.gguf.add_vocab_size (params.n_vocab)
1067-
self.gguf.add_context_length (params.n_ctx)
1068-
self.gguf.add_embedding_length (params.n_embd)
1069-
self.gguf.add_block_count (params.n_layer)
1070-
self.gguf.add_feed_forward_length (params.n_ff)
1098+
name = str(params.path_model.parent).split("/")[-1]
1099+
elif params.n_ctx == 4096:
1100+
# Heuristic detection of LLaMA v2 model
1101+
name = "LLaMA v2"
1102+
1103+
self.gguf.add_name(name)
1104+
1105+
if metadata is not None:
1106+
if metadata.author is not None:
1107+
self.gguf.add_author(metadata.author)
1108+
if metadata.version is not None:
1109+
self.gguf.add_version(metadata.version)
1110+
if metadata.url is not None:
1111+
self.gguf.add_url(metadata.url)
1112+
if metadata.description is not None:
1113+
self.gguf.add_description(metadata.description)
1114+
if metadata.licence is not None:
1115+
self.gguf.add_licence(metadata.licence)
1116+
if metadata.source_url is not None:
1117+
self.gguf.add_source_url(metadata.source_url)
1118+
if metadata.source_hf_repo is not None:
1119+
self.gguf.add_source_hf_repo(metadata.source_hf_repo)
1120+
1121+
def add_meta_arch(self, params: Params) -> None:
1122+
# Metadata About The Neural Architecture Itself
1123+
self.gguf.add_context_length(params.n_ctx)
1124+
self.gguf.add_embedding_length(params.n_embd)
1125+
self.gguf.add_block_count(params.n_layer)
1126+
self.gguf.add_feed_forward_length(params.n_ff)
10711127
self.gguf.add_rope_dimension_count(params.n_embd // params.n_head)
10721128
self.gguf.add_head_count (params.n_head)
10731129
self.gguf.add_head_count_kv (params.n_head_kv)
@@ -1170,13 +1226,14 @@ def close(self) -> None:
11701226
@staticmethod
11711227
def write_vocab_only(
11721228
fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab,
1173-
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False,
1229+
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, metadata: Metadata = None,
11741230
) -> None:
11751231
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
11761232

11771233
of = OutputFile(fname_out, endianess=endianess)
11781234

11791235
# meta data
1236+
of.add_meta_model(params, metadata)
11801237
of.add_meta_arch(params)
11811238
of.add_meta_vocab(vocab)
11821239
of.add_meta_special_vocab(svocab)
@@ -1203,12 +1260,14 @@ def write_all(
12031260
fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: BaseVocab, svocab: gguf.SpecialVocab,
12041261
concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
12051262
pad_vocab: bool = False,
1263+
metadata: Metadata = None,
12061264
) -> None:
12071265
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
12081266

12091267
of = OutputFile(fname_out, endianess=endianess)
12101268

12111269
# meta data
1270+
of.add_meta_model(params, metadata)
12121271
of.add_meta_arch(params)
12131272
if isinstance(vocab, Vocab):
12141273
of.add_meta_vocab(vocab)
@@ -1244,6 +1303,37 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
12441303
raise ValueError(f"Unexpected combination of types: {name_to_type}")
12451304

12461305

1306+
def model_parameter_count(model: LazyModel) -> int:
1307+
total_model_parameters = 0
1308+
for i, (name, lazy_tensor) in enumerate(model.items()):
1309+
sum_weights_in_tensor = 1
1310+
for dim in lazy_tensor.shape:
1311+
sum_weights_in_tensor *= dim
1312+
total_model_parameters += sum_weights_in_tensor
1313+
return total_model_parameters
1314+
1315+
1316+
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
1317+
if model_params_count > 1e12 :
1318+
# Trillions Of Parameters
1319+
scaled_model_params = model_params_count * 1e-12
1320+
scale_suffix = "T"
1321+
elif model_params_count > 1e9 :
1322+
# Billions Of Parameters
1323+
scaled_model_params = model_params_count * 1e-9
1324+
scale_suffix = "B"
1325+
elif model_params_count > 1e6 :
1326+
# Millions Of Parameters
1327+
scaled_model_params = model_params_count * 1e-6
1328+
scale_suffix = "M"
1329+
else:
1330+
# Thousands Of Parameters
1331+
scaled_model_params = model_params_count * 1e-3
1332+
scale_suffix = "K"
1333+
1334+
return f"{round(scaled_model_params)}{scale_suffix}"
1335+
1336+
12471337
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
12481338
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
12491339
for (name, tensor) in model.items()}
@@ -1423,13 +1513,30 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
14231513
return vocab, special_vocab
14241514

14251515

1426-
def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path:
1427-
namestr = {
1428-
GGMLFileType.AllF32: "f32",
1429-
GGMLFileType.MostlyF16: "f16",
1430-
GGMLFileType.MostlyQ8_0:"q8_0",
1516+
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path:
1517+
quantization = {
1518+
GGMLFileType.AllF32: "F32",
1519+
GGMLFileType.MostlyF16: "F16",
1520+
GGMLFileType.MostlyQ8_0: "Q8_0",
14311521
}[file_type]
1432-
ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf"
1522+
1523+
parameters = model_parameter_count_rounded_notation(model_params_count)
1524+
1525+
expert_count = ""
1526+
if params.n_experts is not None:
1527+
expert_count = f"{params.n_experts}x"
1528+
1529+
version = ""
1530+
if metadata is not None and metadata.version is not None:
1531+
version = f"-{metadata.version}"
1532+
1533+
name = "ggml-model"
1534+
if metadata is not None and metadata.name is not None:
1535+
name = metadata.name
1536+
elif params.path_model is not None:
1537+
name = params.path_model.name
1538+
1539+
ret = model_paths[0].parent / f"{name}{version}-{expert_count}{parameters}-{quantization}.gguf"
14331540
if ret in model_paths:
14341541
sys.stderr.write(
14351542
f"Error: Default output path ({ret}) would overwrite the input. "
@@ -1466,8 +1573,12 @@ def main(args_in: list[str] | None = None) -> None:
14661573
parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine")
14671574
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
14681575
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
1576+
parser.add_argument("--metadata", type=Path, help="Specify the path for a metadata file")
14691577

14701578
args = parser.parse_args(args_in)
1579+
1580+
metadata = Metadata.load(args.metadata)
1581+
14711582
if args.no_vocab and args.vocab_only:
14721583
raise ValueError("--vocab-only does not make sense with --no-vocab")
14731584

@@ -1481,6 +1592,9 @@ def main(args_in: list[str] | None = None) -> None:
14811592
else:
14821593
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
14831594

1595+
model_params_count = model_parameter_count(model_plus.model)
1596+
print(f"model parameters count : {model_params_count} ({model_parameter_count_rounded_notation(model_params_count)})")
1597+
14841598
if args.dump:
14851599
do_dump_model(model_plus)
14861600
return
@@ -1520,27 +1634,30 @@ def main(args_in: list[str] | None = None) -> None:
15201634
raise ValueError("need --outfile if using --vocab-only")
15211635
outfile = args.outfile
15221636
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
1523-
endianess=endianess, pad_vocab=args.pad_vocab)
1637+
endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata)
15241638
print(f"Wrote {outfile}")
15251639
return
15261640

15271641
if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
15281642
vocab = model_plus.vocab
15291643

15301644
print(f"Vocab info: {vocab}")
1531-
print(f"Special vocab info: {special_vocab}")
1645+
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent,
1646+
load_merges = True,
1647+
n_vocab = vocab.vocab_size)
15321648

1649+
print(f"Special vocab info: {special_vocab}")
15331650
model = model_plus.model
15341651
model = convert_model_names(model, params, args.skip_unknown)
15351652
ftype = pick_output_type(model, args.outtype)
15361653
model = convert_to_output_type(model, ftype)
1537-
outfile = args.outfile or default_outfile(model_plus.paths, ftype)
1654+
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count, metadata)
15381655

15391656
params.ftype = ftype
15401657
print(f"Writing {outfile}, format {ftype}")
15411658

15421659
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
1543-
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab)
1660+
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata)
15441661
print(f"Wrote {outfile}")
15451662

15461663

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.