@@ -180,7 +180,8 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
180
180
extra = sorted (tensor_names_from_parts .difference (self .tensor_names ))
181
181
missing_files = sorted (set (weight_map [n ] for n in missing if n in weight_map ))
182
182
if len (extra ) == 0 and len (missing_files ) > 0 :
183
- raise ValueError (f"Missing or incomplete model files: { missing_files } " )
183
+ raise ValueError (f"Missing or incomplete model files: { missing_files } \n "
184
+ f"Missing tensors: { missing } " )
184
185
else :
185
186
raise ValueError ("Mismatch between weight map and model parts for tensor names:\n "
186
187
f"Missing tensors: { missing } \n "
@@ -1099,13 +1100,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
1099
1100
1100
1101
tensors .append ((self .map_tensor_name (name ), data_torch ))
1101
1102
1102
- if name == "word_embeddings.weight" :
1103
- assert self .tensor_names is not None
1104
-
1105
- # TODO: tie them at runtime, don't duplicate in the model file
1106
- if all (s not in self .tensor_names for s in ("lm_head.weight" , "output.weight" )):
1107
- tensors .append ((self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ), data_torch ))
1108
-
1109
1103
return tensors
1110
1104
1111
1105
@@ -2423,10 +2417,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
2423
2417
2424
2418
tensors .append ((new_name , data_torch ))
2425
2419
2426
- # note: GPT2 output is tied to (same as) wte in original model
2427
- if new_name == self .format_tensor_name (gguf .MODEL_TENSOR .TOKEN_EMBD ):
2428
- tensors .append ((self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ), data_torch ))
2429
-
2430
2420
return tensors
2431
2421
2432
2422
@@ -2756,21 +2746,26 @@ def set_gguf_parameters(self):
2756
2746
self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
2757
2747
self .gguf_writer .add_rope_scaling_factor (1.0 )
2758
2748
2749
+ _has_tok_embd = False
2750
+
2759
2751
def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
2760
2752
del bid # unused
2761
2753
2762
- new_name = self .map_tensor_name (name )
2763
-
2764
- tensors : list [tuple [str , Tensor ]] = [(new_name , data_torch )]
2754
+ output_name = self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT )
2755
+ tok_embd_name = self .format_tensor_name (gguf .MODEL_TENSOR .TOKEN_EMBD )
2765
2756
2766
- if new_name == self .format_tensor_name (gguf .MODEL_TENSOR .TOKEN_EMBD ):
2767
- assert self .tensor_names is not None
2757
+ new_name = self .map_tensor_name (name )
2768
2758
2769
- if all (s not in self .tensor_names for s in ("lm_head.weight" , "output.weight" )):
2770
- # copy tok_embd.weight to output.weight
2771
- tensors .append ((self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ), data_torch ))
2759
+ # assuming token_embd.weight is seen before output.weight
2760
+ if not self ._has_tok_embd and new_name == self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT ):
2761
+ # even though the tensor file(s) does not contain the word embeddings they are still in the weight map
2762
+ if self .tensor_names and "transformer.wte.weight" in self .tensor_names :
2763
+ logger .debug (f"{ tok_embd_name } not found before { output_name } , assuming they are tied" )
2764
+ self .tensor_names .remove ("transformer.wte.weight" )
2765
+ elif new_name == tok_embd_name :
2766
+ self ._has_tok_embd = True
2772
2767
2773
- return tensors
2768
+ return [( new_name , data_torch )]
2774
2769
2775
2770
2776
2771
@Model .register ("InternLM2ForCausalLM" )
0 commit comments