Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 108e53c

Browse filesBrowse files
authored
llama : add support for GPT2, Bloom and CodeShell tied word embeddings (ggml-org#12456)
* Add support for GPT2, Bloom and CodeShell tied word embeddings * Deduplicate tied word embeddings weights * Workaround for incorrect weight map It appears transformer.wte.weight is in the weight map even though the weights are not there, remove it if output weights are encountered first. * check++ * fatfingers--
1 parent a686171 commit 108e53c
Copy full SHA for 108e53c

File tree

Expand file treeCollapse file tree

2 files changed

+34
-24
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+34
-24
lines changed

‎convert_hf_to_gguf.py

Copy file name to clipboardExpand all lines: convert_hf_to_gguf.py
+16-21Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,8 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
180180
extra = sorted(tensor_names_from_parts.difference(self.tensor_names))
181181
missing_files = sorted(set(weight_map[n] for n in missing if n in weight_map))
182182
if len(extra) == 0 and len(missing_files) > 0:
183-
raise ValueError(f"Missing or incomplete model files: {missing_files}")
183+
raise ValueError(f"Missing or incomplete model files: {missing_files}\n"
184+
f"Missing tensors: {missing}")
184185
else:
185186
raise ValueError("Mismatch between weight map and model parts for tensor names:\n"
186187
f"Missing tensors: {missing}\n"
@@ -1099,13 +1100,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
10991100

11001101
tensors.append((self.map_tensor_name(name), data_torch))
11011102

1102-
if name == "word_embeddings.weight":
1103-
assert self.tensor_names is not None
1104-
1105-
# TODO: tie them at runtime, don't duplicate in the model file
1106-
if all(s not in self.tensor_names for s in ("lm_head.weight", "output.weight")):
1107-
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
1108-
11091103
return tensors
11101104

11111105

@@ -2423,10 +2417,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
24232417

24242418
tensors.append((new_name, data_torch))
24252419

2426-
# note: GPT2 output is tied to (same as) wte in original model
2427-
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
2428-
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
2429-
24302420
return tensors
24312421

24322422

@@ -2756,21 +2746,26 @@ def set_gguf_parameters(self):
27562746
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
27572747
self.gguf_writer.add_rope_scaling_factor(1.0)
27582748

2749+
_has_tok_embd = False
2750+
27592751
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
27602752
del bid # unused
27612753

2762-
new_name = self.map_tensor_name(name)
2763-
2764-
tensors: list[tuple[str, Tensor]] = [(new_name, data_torch)]
2754+
output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT)
2755+
tok_embd_name = self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD)
27652756

2766-
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
2767-
assert self.tensor_names is not None
2757+
new_name = self.map_tensor_name(name)
27682758

2769-
if all(s not in self.tensor_names for s in ("lm_head.weight", "output.weight")):
2770-
# copy tok_embd.weight to output.weight
2771-
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
2759+
# assuming token_embd.weight is seen before output.weight
2760+
if not self._has_tok_embd and new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
2761+
# even though the tensor file(s) does not contain the word embeddings they are still in the weight map
2762+
if self.tensor_names and "transformer.wte.weight" in self.tensor_names:
2763+
logger.debug(f"{tok_embd_name} not found before {output_name}, assuming they are tied")
2764+
self.tensor_names.remove("transformer.wte.weight")
2765+
elif new_name == tok_embd_name:
2766+
self._has_tok_embd = True
27722767

2773-
return tensors
2768+
return [(new_name, data_torch)]
27742769

27752770

27762771
@Model.register("InternLM2ForCausalLM")

‎src/llama-model.cpp

Copy file name to clipboardExpand all lines: src/llama-model.cpp
+18-3Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2020,7 +2020,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
20202020
// output
20212021
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
20222022
output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, 0);
2023-
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0);
2023+
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
2024+
2025+
// if output is NULL, init from the input tok embed
2026+
if (output == NULL) {
2027+
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
2028+
}
20242029

20252030
for (int i = 0; i < n_layer; ++i) {
20262031
auto & layer = layers[i];
@@ -2381,7 +2386,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
23812386
// output
23822387
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
23832388
output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, 0);
2384-
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0);
2389+
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
2390+
2391+
// if output is NULL, init from the input tok embed
2392+
if (output == NULL) {
2393+
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
2394+
}
23852395

23862396
for (int i = 0; i < n_layer; ++i) {
23872397
auto & layer = layers[i];
@@ -2407,7 +2417,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
24072417
} break;
24082418
case LLM_ARCH_CODESHELL:
24092419
{
2410-
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
2420+
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
2421+
2422+
// if tok embd is NULL, init from output
2423+
if (tok_embd == NULL) {
2424+
tok_embd = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
2425+
}
24112426

24122427
// output
24132428
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.