Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 48edda3

Browse filesBrowse files
convert : update Falcon script for new HF config (#3448)
Also adds Falcon-180B support. Closes #3049 Co-authored-by: jb <jonathan.t.barnard@gmail.com>
1 parent 45eba93 commit 48edda3
Copy full SHA for 48edda3

File tree

Expand file treeCollapse file tree

1 file changed

+79
-64
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+79
-64
lines changed

‎convert-falcon-hf-to-gguf.py

Copy file name to clipboardExpand all lines: convert-falcon-hf-to-gguf.py
+79-64Lines changed: 79 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from __future__ import annotations
55

66
import argparse
7+
import contextlib
78
import json
89
import os
910
import struct
@@ -20,10 +21,10 @@
2021
import gguf
2122

2223

23-
def count_model_parts(dir_model: Path) -> int:
24+
def count_model_parts(dir_model: Path, prefix: str) -> int:
2425
num_parts = 0
2526
for filename in os.listdir(dir_model):
26-
if filename.startswith("pytorch_model-"):
27+
if filename.startswith(prefix):
2728
num_parts += 1
2829

2930
if num_parts > 0:
@@ -77,30 +78,36 @@ def parse_args() -> argparse.Namespace:
7778
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
7879
hparams = json.load(f)
7980

80-
if hparams["architectures"][0] != "RWForCausalLM":
81+
if hparams["architectures"][0] != "FalconForCausalLM":
8182
print("Model architecture not supported: " + hparams["architectures"][0])
8283

8384
sys.exit(1)
8485

8586
# get number of model parts
86-
num_parts = count_model_parts(dir_model)
87+
num_parts = count_model_parts(dir_model, "model-00")
88+
if num_parts:
89+
is_safetensors = True
90+
from safetensors import safe_open
91+
else:
92+
is_safetensors = False
93+
num_parts = count_model_parts(dir_model, "pytorch_model-")
8794

8895
ARCH=gguf.MODEL_ARCH.FALCON
8996
gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
9097

9198
print("gguf: get model metadata")
9299

93-
block_count = hparams["n_layer"]
100+
block_count = hparams["num_hidden_layers"]
94101

95102
gguf_writer.add_name("Falcon")
96103
gguf_writer.add_context_length(2048) # not in config.json
97104
gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
98105
gguf_writer.add_embedding_length(hparams["hidden_size"])
99106
gguf_writer.add_feed_forward_length(4 * hparams["hidden_size"])
100107
gguf_writer.add_block_count(block_count)
101-
gguf_writer.add_head_count(hparams["n_head"])
102-
if "n_head_kv" in hparams:
103-
gguf_writer.add_head_count_kv(hparams["n_head_kv"])
108+
gguf_writer.add_head_count(hparams["num_attention_heads"])
109+
if "num_kv_heads" in hparams:
110+
gguf_writer.add_head_count_kv(hparams["num_kv_heads"])
104111
else:
105112
gguf_writer.add_head_count_kv(1)
106113
gguf_writer.add_layer_norm_eps(hparams["layer_norm_epsilon"])
@@ -146,8 +153,8 @@ def parse_args() -> argparse.Namespace:
146153
tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
147154

148155
# params for qkv transform
149-
n_head = hparams["n_head"]
150-
n_head_kv = hparams["n_head_kv"] if "n_head_kv" in hparams else 1
156+
n_head = hparams["num_attention_heads"]
157+
n_head_kv = hparams["num_kv_heads"] if "num_kv_heads" in hparams else 1
151158

152159
head_dim = hparams["hidden_size"] // n_head
153160

@@ -156,6 +163,10 @@ def parse_args() -> argparse.Namespace:
156163

157164
if num_parts == 0:
158165
part_names = iter(("pytorch_model.bin",))
166+
elif is_safetensors:
167+
part_names = (
168+
f"model-{n:05}-of-{num_parts:05}.safetensors" for n in range(1, num_parts + 1)
169+
)
159170
else:
160171
part_names = (
161172
f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
@@ -165,60 +176,64 @@ def parse_args() -> argparse.Namespace:
165176
if args.vocab_only:
166177
break
167178
print("gguf: loading model part '" + part_name + "'")
168-
model_part = torch.load(dir_model / part_name, map_location="cpu")
169-
170-
for name in model_part.keys():
171-
data = model_part[name]
172-
173-
old_dtype = data.dtype
174-
175-
# convert any unsupported data types to float32
176-
if data.dtype != torch.float16 and data.dtype != torch.float32:
177-
data = data.to(torch.float32)
178-
179-
# QKV tensor transform
180-
# The original query_key_value tensor contains n_head_kv "kv groups",
181-
# each consisting of n_head/n_head_kv query weights followed by one key
182-
# and one value weight (shared by all query heads in the kv group).
183-
# This layout makes it a big pain to work with in GGML.
184-
# So we rearrange them here,, so that we have n_head query weights
185-
# followed by n_head_kv key weights followed by n_head_kv value weights,
186-
# in contiguous fashion.
187-
# ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-ggml.py
188-
189-
if "query_key_value" in name:
190-
qkv = data.view(n_head_kv, n_head // n_head_kv + 2, head_dim, head_dim * n_head)
191-
q = qkv[:, :-2 ].reshape(n_head * head_dim, head_dim * n_head)
192-
k = qkv[:, [-2]].reshape(n_head_kv * head_dim, head_dim * n_head)
193-
v = qkv[:, [-1]].reshape(n_head_kv * head_dim, head_dim * n_head)
194-
data = torch.cat((q,k,v)).reshape_as(data)
195-
196-
data = data.squeeze().numpy()
197-
198-
# map tensor names
199-
new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
200-
if new_name is None:
201-
print("Can not map tensor '" + name + "'")
202-
sys.exit()
203-
204-
n_dims = len(data.shape)
205-
data_dtype = data.dtype
206-
207-
# if f32 desired, convert any float16 to float32
208-
if ftype == 0 and data_dtype == np.float16:
209-
data = data.astype(np.float32)
210-
211-
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
212-
if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
213-
data = data.astype(np.float32)
214-
215-
# if f16 desired, convert any float32 2-dim weight tensors to float16
216-
if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
217-
data = data.astype(np.float16)
218-
219-
print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
220-
221-
gguf_writer.add_tensor(new_name, data)
179+
if is_safetensors:
180+
ctx = safe_open(dir_model / part_name, framework="pt", device="cpu")
181+
else:
182+
ctx = contextlib.nullcontext(torch.load(dir_model / part_name, map_location="cpu"))
183+
184+
with ctx as model_part:
185+
for name in model_part.keys():
186+
data = model_part.get_tensor(name) if is_safetensors else model_part[name]
187+
188+
old_dtype = data.dtype
189+
190+
# convert any unsupported data types to float32
191+
if data.dtype != torch.float16 and data.dtype != torch.float32:
192+
data = data.to(torch.float32)
193+
194+
# QKV tensor transform
195+
# The original query_key_value tensor contains n_head_kv "kv groups",
196+
# each consisting of n_head/n_head_kv query weights followed by one key
197+
# and one value weight (shared by all query heads in the kv group).
198+
# This layout makes it a big pain to work with in GGML.
199+
# So we rearrange them here,, so that we have n_head query weights
200+
# followed by n_head_kv key weights followed by n_head_kv value weights,
201+
# in contiguous fashion.
202+
# ref: https://github.com/jploski/ggml/blob/falcon40b/examples/falcon/convert-hf-to-ggml.py
203+
204+
if "query_key_value" in name:
205+
qkv = data.view(n_head_kv, n_head // n_head_kv + 2, head_dim, head_dim * n_head)
206+
q = qkv[:, :-2 ].reshape(n_head * head_dim, head_dim * n_head)
207+
k = qkv[:, [-2]].reshape(n_head_kv * head_dim, head_dim * n_head)
208+
v = qkv[:, [-1]].reshape(n_head_kv * head_dim, head_dim * n_head)
209+
data = torch.cat((q,k,v)).reshape_as(data)
210+
211+
data = data.squeeze().numpy()
212+
213+
# map tensor names
214+
new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
215+
if new_name is None:
216+
print("Can not map tensor '" + name + "'")
217+
sys.exit()
218+
219+
n_dims = len(data.shape)
220+
data_dtype = data.dtype
221+
222+
# if f32 desired, convert any float16 to float32
223+
if ftype == 0 and data_dtype == np.float16:
224+
data = data.astype(np.float32)
225+
226+
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
227+
if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
228+
data = data.astype(np.float32)
229+
230+
# if f16 desired, convert any float32 2-dim weight tensors to float16
231+
if ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
232+
data = data.astype(np.float16)
233+
234+
print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
235+
236+
gguf_writer.add_tensor(new_name, data)
222237

223238

224239
print("gguf: write header")

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.