Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 5e1c3ae

Browse filesBrowse files
authored
convert : fix nomic-bert-moe mask token (#13757)
1 parent c496fe0 commit 5e1c3ae
Copy full SHA for 5e1c3ae

File tree

Expand file treeCollapse file tree

2 files changed

+13
-2
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+13
-2
lines changed

‎convert_hf_to_gguf.py

Copy file name to clipboardExpand all lines: convert_hf_to_gguf.py
+6Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3889,6 +3889,12 @@ def _xlmroberta_set_vocab(self) -> None:
38893889
SentencePieceTokenTypes.UNKNOWN,
38903890
] + toktypes[3:-1]
38913891

3892+
if self.model_arch == gguf.MODEL_ARCH.NOMIC_BERT_MOE:
3893+
# Add mask token missing from sentencepiece.bpe.model
3894+
tokens[250001] = b'<mask>'
3895+
scores[250001] = 0.0
3896+
toktypes[250001] = SentencePieceTokenTypes.CONTROL
3897+
38923898
self.gguf_writer.add_tokenizer_model("t5")
38933899
self.gguf_writer.add_tokenizer_pre("default")
38943900
self.gguf_writer.add_token_list(tokens)

‎src/llama-vocab.cpp

Copy file name to clipboardExpand all lines: src/llama-vocab.cpp
+7-2Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2080,9 +2080,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
20802080

20812081
std::string model_name;
20822082
std::string tokenizer_pre;
2083+
std::string general_arch;
20832084

20842085
ml.get_key(LLM_KV_GENERAL_NAME, model_name, false);
20852086
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
2087+
ml.get_key(LLM_KV_GENERAL_ARCHITECTURE, general_arch, false);
20862088

20872089
// model name to lowercase
20882090
std::transform(model_name.begin(), model_name.end(), model_name.begin(),
@@ -2091,8 +2093,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
20912093
}
20922094
);
20932095

2094-
// set attributes by model/tokenizer name
2095-
if (_contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})) {
2096+
// set attributes by model/tokenizer/architecture name
2097+
if (false
2098+
|| _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})
2099+
|| _contains_any(general_arch, {"nomic-bert-moe"})
2100+
) {
20962101
_set_token_attr("<mask>", LLAMA_TOKEN_ATTR_LSTRIP, true);
20972102
} else if (_contains_any(model_name, {"phi-3", "phi3"})) {
20982103
for (auto id : cache_special_tokens) {

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.