Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit da064a8

Browse filesBrowse files
committed
convert.py: Outfile default name change and additional metadata support
1 parent a307375 commit da064a8
Copy full SHA for da064a8

File tree

Expand file treeCollapse file tree

4 files changed

+143
-24
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+143
-24
lines changed

‎convert.py

Copy file name to clipboardExpand all lines: convert.py
+134-24Lines changed: 134 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,39 @@ def load(model_plus: ModelPlus) -> Params:
336336

337337
return params
338338

339+
@dataclass
340+
class Metadata:
341+
name: Optional[str] = None
342+
author: Optional[str] = None
343+
version: Optional[str] = None
344+
url: Optional[str] = None
345+
description: Optional[str] = None
346+
licence: Optional[str] = None
347+
source_url: Optional[str] = None
348+
source_hf_repo: Optional[str] = None
349+
350+
@staticmethod
351+
def load(metadata_path: Path) -> "Metadata":
352+
if metadata_path is None or not metadata_path.exists():
353+
return Metadata()
354+
355+
with open(metadata_path, 'r') as file:
356+
data = json.load(file)
357+
358+
# Create a new Metadata instance
359+
metadata = Metadata()
360+
361+
# Assigning values to Metadata attributes if they exist in the JSON file
362+
metadata.name = data.get("general.name")
363+
metadata.author = data.get("general.author")
364+
metadata.version = data.get("general.version")
365+
metadata.url = data.get("general.url")
366+
metadata.description = data.get("general.description")
367+
metadata.license = data.get("general.license")
368+
metadata.source_url = data.get("general.source_url")
369+
metadata.source_hf_repo = data.get("general.source_hf_repo")
370+
371+
return metadata
339372

340373
#
341374
# vocab
@@ -1053,21 +1086,41 @@ class OutputFile:
10531086
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE):
10541087
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
10551088

1056-
def add_meta_arch(self, params: Params) -> None:
1089+
def add_meta_model(self, params: Params, metadata: Metadata) -> None:
1090+
# Metadata About The Model And It's Provenence
10571091
name = "LLaMA"
1058-
1059-
# TODO: better logic to determine model name
1060-
if params.n_ctx == 4096:
1061-
name = "LLaMA v2"
1092+
if metadata is not None and metadata.name is not None:
1093+
name = metadata.name
10621094
elif params.path_model is not None:
1063-
name = str(params.path_model.parent).split('/')[-1]
1064-
1065-
self.gguf.add_name (name)
1066-
self.gguf.add_vocab_size (params.n_vocab)
1067-
self.gguf.add_context_length (params.n_ctx)
1068-
self.gguf.add_embedding_length (params.n_embd)
1069-
self.gguf.add_block_count (params.n_layer)
1070-
self.gguf.add_feed_forward_length (params.n_ff)
1095+
name = str(params.path_model.parent).split("/")[-1]
1096+
elif params.n_ctx == 4096:
1097+
# Heuristic detection of LLaMA v2 model
1098+
name = "LLaMA v2"
1099+
1100+
self.gguf.add_name(name)
1101+
1102+
if metadata is not None:
1103+
if metadata.author is not None:
1104+
self.gguf.add_author(metadata.author)
1105+
if metadata.version is not None:
1106+
self.gguf.add_version(metadata.version)
1107+
if metadata.url is not None:
1108+
self.gguf.add_url(metadata.url)
1109+
if metadata.description is not None:
1110+
self.gguf.add_description(metadata.description)
1111+
if metadata.licence is not None:
1112+
self.gguf.add_licence(metadata.licence)
1113+
if metadata.source_url is not None:
1114+
self.gguf.add_source_url(metadata.source_url)
1115+
if metadata.source_hf_repo is not None:
1116+
self.gguf.add_source_hf_repo(metadata.source_hf_repo)
1117+
1118+
def add_meta_arch(self, params: Params) -> None:
1119+
# Metadata About The Neural Architecture Itself
1120+
self.gguf.add_context_length(params.n_ctx)
1121+
self.gguf.add_embedding_length(params.n_embd)
1122+
self.gguf.add_block_count(params.n_layer)
1123+
self.gguf.add_feed_forward_length(params.n_ff)
10711124
self.gguf.add_rope_dimension_count(params.n_embd // params.n_head)
10721125
self.gguf.add_head_count (params.n_head)
10731126
self.gguf.add_head_count_kv (params.n_head_kv)
@@ -1170,13 +1223,14 @@ def close(self) -> None:
11701223
@staticmethod
11711224
def write_vocab_only(
11721225
fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab,
1173-
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False,
1226+
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, metadata: Metadata = None,
11741227
) -> None:
11751228
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
11761229

11771230
of = OutputFile(fname_out, endianess=endianess)
11781231

11791232
# meta data
1233+
of.add_meta_model(params, metadata)
11801234
of.add_meta_arch(params)
11811235
of.add_meta_vocab(vocab)
11821236
of.add_meta_special_vocab(svocab)
@@ -1203,12 +1257,14 @@ def write_all(
12031257
fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: BaseVocab, svocab: gguf.SpecialVocab,
12041258
concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
12051259
pad_vocab: bool = False,
1260+
metadata: Metadata = None,
12061261
) -> None:
12071262
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
12081263

12091264
of = OutputFile(fname_out, endianess=endianess)
12101265

12111266
# meta data
1267+
of.add_meta_model(params, metadata)
12121268
of.add_meta_arch(params)
12131269
if isinstance(vocab, Vocab):
12141270
of.add_meta_vocab(vocab)
@@ -1244,6 +1300,37 @@ def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileT
12441300
raise ValueError(f"Unexpected combination of types: {name_to_type}")
12451301

12461302

1303+
def model_parameter_count(model: LazyModel) -> int:
1304+
total_model_parameters = 0
1305+
for i, (name, lazy_tensor) in enumerate(model.items()):
1306+
sum_weights_in_tensor = 1
1307+
for dim in lazy_tensor.shape:
1308+
sum_weights_in_tensor *= dim
1309+
total_model_parameters += sum_weights_in_tensor
1310+
return total_model_parameters
1311+
1312+
1313+
def model_parameter_count_rounded_notation(model_params_count: int) -> str:
1314+
if model_params_count > 1e12 :
1315+
# Trillions Of Parameters
1316+
scaled_model_params = model_params_count * 1e-12
1317+
scale_suffix = "T"
1318+
elif model_params_count > 1e9 :
1319+
# Billions Of Parameters
1320+
scaled_model_params = model_params_count * 1e-9
1321+
scale_suffix = "B"
1322+
elif model_params_count > 1e6 :
1323+
# Millions Of Parameters
1324+
scaled_model_params = model_params_count * 1e-6
1325+
scale_suffix = "M"
1326+
else:
1327+
# Thousands Of Parameters
1328+
scaled_model_params = model_params_count * 1e-3
1329+
scale_suffix = "K"
1330+
1331+
return f"{round(scaled_model_params)}{scale_suffix}"
1332+
1333+
12471334
def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyModel:
12481335
return {name: tensor.astype(output_type.type_for_tensor(name, tensor))
12491336
for (name, tensor) in model.items()}
@@ -1423,13 +1510,26 @@ def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) ->
14231510
return vocab, special_vocab
14241511

14251512

1426-
def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path:
1427-
namestr = {
1428-
GGMLFileType.AllF32: "f32",
1429-
GGMLFileType.MostlyF16: "f16",
1430-
GGMLFileType.MostlyQ8_0:"q8_0",
1513+
def default_outfile(model_paths: list[Path], file_type: GGMLFileType, params: Params, model_params_count: int, metadata: Metadata) -> Path:
1514+
quantization = {
1515+
GGMLFileType.AllF32: "F32",
1516+
GGMLFileType.MostlyF16: "F16",
1517+
GGMLFileType.MostlyQ8_0: "Q8_0",
14311518
}[file_type]
1432-
ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf"
1519+
1520+
parameters = model_parameter_count_rounded_notation(model_params_count)
1521+
1522+
version = ""
1523+
if metadata is not None and metadata.version is not None:
1524+
version = f"-{metadata.version}"
1525+
1526+
name = "ggml-model"
1527+
if metadata is not None and metadata.name is not None:
1528+
name = metadata.name
1529+
elif params.path_model is not None:
1530+
name = params.path_model.name
1531+
1532+
ret = model_paths[0].parent / f"{name}{version}-{parameters}-{quantization}.gguf"
14331533
if ret in model_paths:
14341534
sys.stderr.write(
14351535
f"Error: Default output path ({ret}) would overwrite the input. "
@@ -1466,8 +1566,12 @@ def main(args_in: list[str] | None = None) -> None:
14661566
parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine")
14671567
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
14681568
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
1569+
parser.add_argument("--metadata", type=Path, help="Specify the path for a metadata file")
14691570

14701571
args = parser.parse_args(args_in)
1572+
1573+
metadata = Metadata.load(args.metadata)
1574+
14711575
if args.no_vocab and args.vocab_only:
14721576
raise ValueError("--vocab-only does not make sense with --no-vocab")
14731577

@@ -1481,6 +1585,9 @@ def main(args_in: list[str] | None = None) -> None:
14811585
else:
14821586
model_plus = ModelPlus(model = {}, paths = [args.model / 'dummy'], format = 'none', vocab = None)
14831587

1588+
model_params_count = model_parameter_count(model_plus.model)
1589+
print(f"model parameters count : {model_params_count} ({model_parameter_count_rounded_notation(model_params_count)})")
1590+
14841591
if args.dump:
14851592
do_dump_model(model_plus)
14861593
return
@@ -1520,27 +1627,30 @@ def main(args_in: list[str] | None = None) -> None:
15201627
raise ValueError("need --outfile if using --vocab-only")
15211628
outfile = args.outfile
15221629
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
1523-
endianess=endianess, pad_vocab=args.pad_vocab)
1630+
endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata)
15241631
print(f"Wrote {outfile}")
15251632
return
15261633

15271634
if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
15281635
vocab = model_plus.vocab
15291636

15301637
print(f"Vocab info: {vocab}")
1531-
print(f"Special vocab info: {special_vocab}")
1638+
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent,
1639+
load_merges = True,
1640+
n_vocab = vocab.vocab_size)
15321641

1642+
print(f"Special vocab info: {special_vocab}")
15331643
model = model_plus.model
15341644
model = convert_model_names(model, params, args.skip_unknown)
15351645
ftype = pick_output_type(model, args.outtype)
15361646
model = convert_to_output_type(model, ftype)
1537-
outfile = args.outfile or default_outfile(model_plus.paths, ftype)
1647+
outfile = args.outfile or default_outfile(model_plus.paths, ftype, params, model_params_count, metadata)
15381648

15391649
params.ftype = ftype
15401650
print(f"Writing {outfile}, format {ftype}")
15411651

15421652
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
1543-
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab)
1653+
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab, metadata=metadata)
15441654
print(f"Wrote {outfile}")
15451655

15461656

‎gguf-py/gguf/constants.py

Copy file name to clipboardExpand all lines: gguf-py/gguf/constants.py
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class General:
2424
ALIGNMENT = "general.alignment"
2525
NAME = "general.name"
2626
AUTHOR = "general.author"
27+
VERSION = "general.version"
2728
URL = "general.url"
2829
DESCRIPTION = "general.description"
2930
LICENSE = "general.license"

‎gguf-py/gguf/gguf_writer.py

Copy file name to clipboardExpand all lines: gguf-py/gguf/gguf_writer.py
+6Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,9 @@ def add_architecture(self) -> None:
296296
def add_author(self, author: str) -> None:
297297
self.add_string(Keys.General.AUTHOR, author)
298298

299+
def add_version(self, version: str) -> None:
300+
self.add_string(Keys.General.VERSION, version)
301+
299302
def add_tensor_data_layout(self, layout: str) -> None:
300303
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
301304

@@ -305,6 +308,9 @@ def add_url(self, url: str) -> None:
305308
def add_description(self, description: str) -> None:
306309
self.add_string(Keys.General.DESCRIPTION, description)
307310

311+
def add_licence(self, licence: str) -> None:
312+
self.add_string(Keys.General.LICENSE, licence)
313+
308314
def add_source_url(self, url: str) -> None:
309315
self.add_string(Keys.General.SOURCE_URL, url)
310316

‎llama.cpp

Copy file name to clipboardExpand all lines: llama.cpp
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ enum llm_kv {
261261
LLM_KV_GENERAL_ALIGNMENT,
262262
LLM_KV_GENERAL_NAME,
263263
LLM_KV_GENERAL_AUTHOR,
264+
LLM_KV_GENERAL_VERSION,
264265
LLM_KV_GENERAL_URL,
265266
LLM_KV_GENERAL_DESCRIPTION,
266267
LLM_KV_GENERAL_LICENSE,
@@ -330,6 +331,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
330331
{ LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
331332
{ LLM_KV_GENERAL_NAME, "general.name" },
332333
{ LLM_KV_GENERAL_AUTHOR, "general.author" },
334+
{ LLM_KV_GENERAL_VERSION, "general.version" },
333335
{ LLM_KV_GENERAL_URL, "general.url" },
334336
{ LLM_KV_GENERAL_DESCRIPTION, "general.description" },
335337
{ LLM_KV_GENERAL_LICENSE, "general.license" },

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.