Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit d3286d6

Browse filesBrowse files
tokenize: fix double BOS token
1 parent 858f6b7 commit d3286d6
Copy full SHA for d3286d6

File tree

Expand file treeCollapse file tree

21 files changed

+78
-58
lines changed
Filter options
Expand file treeCollapse file tree

21 files changed

+78
-58
lines changed

‎common/common.cpp

Copy file name to clipboardExpand all lines: common/common.cpp
+15-3Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2343,15 +2343,17 @@ std::vector<llama_token> llama_tokenize(
23432343
const struct llama_context * ctx,
23442344
const std::string & text,
23452345
bool add_special,
2346-
bool parse_special) {
2347-
return llama_tokenize(llama_get_model(ctx), text, add_special, parse_special);
2346+
bool parse_special,
2347+
bool fix_double_bos) {
2348+
return llama_tokenize(llama_get_model(ctx), text, add_special, parse_special, fix_double_bos);
23482349
}
23492350

23502351
std::vector<llama_token> llama_tokenize(
23512352
const struct llama_model * model,
23522353
const std::string & text,
23532354
bool add_special,
2354-
bool parse_special) {
2355+
bool parse_special,
2356+
bool fix_double_bos) {
23552357
// upper limit for the number of tokens
23562358
int n_tokens = text.length() + 2 * add_special;
23572359
std::vector<llama_token> result(n_tokens);
@@ -2363,9 +2365,19 @@ std::vector<llama_token> llama_tokenize(
23632365
} else {
23642366
result.resize(n_tokens);
23652367
}
2368+
if (fix_double_bos) {
2369+
llama_fix_double_bos(model, result);
2370+
}
23662371
return result;
23672372
}
23682373

2374+
void llama_fix_double_bos(const struct llama_model * model, std::vector<llama_token> & prompt) {
2375+
const llama_token bos = llama_token_bos(model);
2376+
if (prompt.size() >= 2 && prompt[0] == bos && prompt[1] == bos) {
2377+
prompt.erase(prompt.begin(), prompt.begin() + 1);
2378+
}
2379+
}
2380+
23692381
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
23702382
std::vector<char> result(8, 0);
23712383
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);

‎common/common.h

Copy file name to clipboardExpand all lines: common/common.h
+7-2Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,13 +238,18 @@ std::vector<llama_token> llama_tokenize(
238238
const struct llama_context * ctx,
239239
const std::string & text,
240240
bool add_special,
241-
bool parse_special = false);
241+
bool parse_special = false,
242+
bool fix_dobule_bos = false);
242243

243244
std::vector<llama_token> llama_tokenize(
244245
const struct llama_model * model,
245246
const std::string & text,
246247
bool add_special,
247-
bool parse_special = false);
248+
bool parse_special = false,
249+
bool fix_double_bos = false);
250+
251+
// if the first and the second token in the prompt are both EOS, remove the first token
252+
void llama_fix_double_bos(const struct llama_model * model, std::vector<llama_token> & prompt);
248253

249254
// tokenizes a token into a piece, optionally renders special/control tokens
250255
// should work similar to Python's `tokenizer.id_to_piece`

‎examples/batched/batched.cpp

Copy file name to clipboardExpand all lines: examples/batched/batched.cpp
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ int main(int argc, char ** argv) {
7171
// tokenize the prompt
7272

7373
std::vector<llama_token> tokens_list;
74-
tokens_list = ::llama_tokenize(model, params.prompt, true);
74+
tokens_list = ::llama_tokenize(model, params.prompt, true, true, true);
7575

7676
const int n_kv_req = tokens_list.size() + (n_len - tokens_list.size())*n_parallel;
7777

‎examples/beam-search/beam-search.cpp

Copy file name to clipboardExpand all lines: examples/beam-search/beam-search.cpp
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ int main(int argc, char ** argv)
137137
// Tokenize the prompt :
138138
//---------------------------------
139139

140-
std::vector<llama_token> tokens_list = llama_tokenize(ctx, params.prompt, true);
140+
std::vector<llama_token> tokens_list = llama_tokenize(ctx, params.prompt, true, true, true);
141141

142142
const size_t max_context_size = llama_n_ctx( ctx );
143143
const size_t max_tokens_list_size = max_context_size - 4 ;

‎examples/embedding/embedding.cpp

Copy file name to clipboardExpand all lines: examples/embedding/embedding.cpp
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ int main(int argc, char ** argv) {
114114
// tokenize the prompts and trim
115115
std::vector<std::vector<int32_t>> inputs;
116116
for (const auto & prompt : prompts) {
117-
auto inp = ::llama_tokenize(ctx, prompt, true, false);
117+
auto inp = ::llama_tokenize(ctx, prompt, true, false, true);
118118
if (inp.size() > n_batch) {
119119
fprintf(stderr, "%s: error: number of tokens in input line (%lld) exceeds batch size (%lld), increase batch size and re-run\n",
120120
__func__, (long long int) inp.size(), (long long int) n_batch);

‎examples/imatrix/imatrix.cpp

Copy file name to clipboardExpand all lines: examples/imatrix/imatrix.cpp
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params, bool
378378
auto tim1 = std::chrono::high_resolution_clock::now();
379379
fprintf(stderr, "%s: tokenizing the input ..\n", __func__);
380380

381-
std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, true);
381+
std::vector<llama_token> tokens = ::llama_tokenize(ctx, params.prompt, true, true, true);
382382

383383
auto tim2 = std::chrono::high_resolution_clock::now();
384384
fprintf(stderr, "%s: tokenization took %g ms\n",__func__,1e-3*std::chrono::duration_cast<std::chrono::microseconds>(tim2-tim1).count());

‎examples/infill/infill.cpp

Copy file name to clipboardExpand all lines: examples/infill/infill.cpp
+7-7Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,8 @@ int main(int argc, char ** argv) {
248248
suff_rm_leading_spc = false;
249249
}
250250
std::vector<llama_token> embd_inp;
251-
std::vector<llama_token> inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false);
252-
std::vector<llama_token> inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false);
251+
std::vector<llama_token> inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true, false);
252+
std::vector<llama_token> inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true, false);
253253
const int space_token = 29871;
254254
if (suff_rm_leading_spc && inp_sfx[0] == space_token) {
255255
inp_sfx.erase(inp_sfx.begin());
@@ -280,10 +280,10 @@ int main(int argc, char ** argv) {
280280
if (ctx_guidance) {
281281
LOG("cfg_negative_prompt: \"%s\"\n", log_tostr(sparams.cfg_negative_prompt));
282282

283-
guidance_inp = ::llama_tokenize(ctx_guidance, sparams.cfg_negative_prompt, true);
283+
guidance_inp = ::llama_tokenize(ctx_guidance, sparams.cfg_negative_prompt, true, true, true);
284284
LOG("guidance_inp tokenized: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx_guidance, guidance_inp).c_str());
285285

286-
std::vector<llama_token> original_inp = ::llama_tokenize(ctx, params.prompt, true);
286+
std::vector<llama_token> original_inp = ::llama_tokenize(ctx, params.prompt, true, true, true);
287287
LOG("original_inp tokenized: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, original_inp).c_str());
288288

289289
original_prompt_len = original_inp.size();
@@ -630,8 +630,8 @@ int main(int argc, char ** argv) {
630630
suff_rm_leading_spc = false;
631631
}
632632
// tokenize new prefix and suffix
633-
std::vector<llama_token> inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false);
634-
std::vector<llama_token> inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false);
633+
std::vector<llama_token> inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true, false);
634+
std::vector<llama_token> inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true, false);
635635
if (suff_rm_leading_spc && inp_sfx[0] == space_token) {
636636
inp_sfx.erase(inp_sfx.begin());
637637
}
@@ -703,7 +703,7 @@ int main(int argc, char ** argv) {
703703

704704
const size_t original_size = embd_inp.size();
705705

706-
const auto line_inp = ::llama_tokenize(ctx, buffer, false);
706+
const auto line_inp = ::llama_tokenize(ctx, buffer, false, true, false);
707707
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
708708

709709
embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());

‎examples/llava/llava-cli.cpp

Copy file name to clipboardExpand all lines: examples/llava/llava-cli.cpp
+4-4Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
3535

3636
static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
3737
std::string str2 = str;
38-
std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true);
38+
std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true, add_bos);
3939
eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
4040
return true;
4141
}
@@ -156,14 +156,14 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
156156
user_prompt = prompt.substr(image_pos + std::string("<image>").length());
157157
LOG_TEE("system_prompt: %s\n", system_prompt.c_str());
158158
if (params->verbose_prompt) {
159-
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
159+
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true, true);
160160
for (int i = 0; i < (int) tmp.size(); i++) {
161161
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
162162
}
163163
}
164164
LOG_TEE("user_prompt: %s\n", user_prompt.c_str());
165165
if (params->verbose_prompt) {
166-
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
166+
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true, true);
167167
for (int i = 0; i < (int) tmp.size(); i++) {
168168
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
169169
}
@@ -173,7 +173,7 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
173173
system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
174174
user_prompt = prompt + "\nASSISTANT:";
175175
if (params->verbose_prompt) {
176-
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
176+
auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true, true);
177177
for (int i = 0; i < (int) tmp.size(); i++) {
178178
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
179179
}

‎examples/lookahead/lookahead.cpp

Copy file name to clipboardExpand all lines: examples/lookahead/lookahead.cpp
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ int main(int argc, char ** argv) {
6767
std::vector<llama_token> inp;
6868
std::vector<llama_token> all;
6969

70-
inp = ::llama_tokenize(ctx, params.prompt, true, true);
70+
inp = ::llama_tokenize(ctx, params.prompt, true, true, true);
7171
all = inp;
7272

7373
const int max_context_size = llama_n_ctx(ctx);

‎examples/lookup/lookup-create.cpp

Copy file name to clipboardExpand all lines: examples/lookup/lookup-create.cpp
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ int main(int argc, char ** argv){
2929

3030
// tokenize the prompt
3131
std::vector<llama_token> inp;
32-
inp = ::llama_tokenize(ctx, params.prompt, true, true);
32+
inp = ::llama_tokenize(ctx, params.prompt, true, true, true);
3333
fprintf(stderr, "%s: tokenization done\n", __func__);
3434

3535

‎examples/lookup/lookup-stats.cpp

Copy file name to clipboardExpand all lines: examples/lookup/lookup-stats.cpp
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ int main(int argc, char ** argv){
3434

3535
// tokenize the prompt
3636
std::vector<llama_token> inp;
37-
inp = ::llama_tokenize(ctx, params.prompt, true, true);
37+
inp = ::llama_tokenize(ctx, params.prompt, true, true, true);
3838

3939
llama_ngram_cache ngram_cache_context;
4040
llama_ngram_cache ngram_cache_dynamic;

‎examples/lookup/lookup.cpp

Copy file name to clipboardExpand all lines: examples/lookup/lookup.cpp
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ int main(int argc, char ** argv){
4242

4343
// tokenize the prompt
4444
std::vector<llama_token> inp;
45-
inp = ::llama_tokenize(ctx, params.prompt, true, true);
45+
inp = ::llama_tokenize(ctx, params.prompt, true, true, true);
4646

4747
llama_ngram_cache ngram_cache_context;
4848
llama_ngram_cache ngram_cache_dynamic;

‎examples/main/main.cpp

Copy file name to clipboardExpand all lines: examples/main/main.cpp
+15-15Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ int main(int argc, char ** argv) {
255255
if (params.chatml) {
256256
params.prompt = "<|im_start|>system\n" + params.prompt + "<|im_end|>";
257257
}
258-
embd_inp = ::llama_tokenize(ctx, params.prompt, true, true);
258+
embd_inp = ::llama_tokenize(ctx, params.prompt, true, true, true);
259259
} else {
260260
LOG("use session tokens\n");
261261
embd_inp = session_tokens;
@@ -277,10 +277,10 @@ int main(int argc, char ** argv) {
277277
if (ctx_guidance) {
278278
LOG("cfg_negative_prompt: \"%s\"\n", log_tostr(sparams.cfg_negative_prompt));
279279

280-
guidance_inp = ::llama_tokenize(ctx_guidance, sparams.cfg_negative_prompt, true, true);
280+
guidance_inp = ::llama_tokenize(ctx_guidance, sparams.cfg_negative_prompt, true, true, true);
281281
LOG("guidance_inp tokenized: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx_guidance, guidance_inp).c_str());
282282

283-
std::vector<llama_token> original_inp = ::llama_tokenize(ctx, params.prompt, true, true);
283+
std::vector<llama_token> original_inp = ::llama_tokenize(ctx, params.prompt, true, true, true);
284284
LOG("original_inp tokenized: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, original_inp).c_str());
285285

286286
original_prompt_len = original_inp.size();
@@ -339,15 +339,15 @@ int main(int argc, char ** argv) {
339339
}
340340

341341
// prefix & suffix for instruct mode
342-
const auto inp_pfx = ::llama_tokenize(ctx, "\n\n### Instruction:\n\n", true, true);
343-
const auto inp_sfx = ::llama_tokenize(ctx, "\n\n### Response:\n\n", false, true);
342+
const auto inp_pfx = ::llama_tokenize(ctx, "\n\n### Instruction:\n\n", true, true, false);
343+
const auto inp_sfx = ::llama_tokenize(ctx, "\n\n### Response:\n\n", false, true, false);
344344

345345
LOG("inp_pfx: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, inp_pfx).c_str());
346346
LOG("inp_sfx: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, inp_sfx).c_str());
347347

348348
// chatml prefix & suffix
349-
const auto cml_pfx = ::llama_tokenize(ctx, "\n<|im_start|>user\n", true, true);
350-
const auto cml_sfx = ::llama_tokenize(ctx, "<|im_end|>\n<|im_start|>assistant\n", false, true);
349+
const auto cml_pfx = ::llama_tokenize(ctx, "\n<|im_start|>user\n", true, true, false);
350+
const auto cml_sfx = ::llama_tokenize(ctx, "<|im_end|>\n<|im_start|>assistant\n", false, true, false);
351351

352352
LOG("cml_pfx: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, cml_pfx).c_str());
353353
LOG("cml_sfx: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, cml_sfx).c_str());
@@ -418,7 +418,7 @@ int main(int argc, char ** argv) {
418418
for (const auto & antiprompt : params.antiprompt) {
419419
LOG_TEE("Reverse prompt: '%s'\n", antiprompt.c_str());
420420
if (params.verbose_prompt) {
421-
auto tmp = ::llama_tokenize(ctx, antiprompt, false, true);
421+
auto tmp = ::llama_tokenize(ctx, antiprompt, false, true, false);
422422
for (int i = 0; i < (int) tmp.size(); i++) {
423423
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str());
424424
}
@@ -433,7 +433,7 @@ int main(int argc, char ** argv) {
433433
if (!params.input_prefix.empty()) {
434434
LOG_TEE("Input prefix: '%s'\n", params.input_prefix.c_str());
435435
if (params.verbose_prompt) {
436-
auto tmp = ::llama_tokenize(ctx, params.input_prefix, true, true);
436+
auto tmp = ::llama_tokenize(ctx, params.input_prefix, true, true, true);
437437
for (int i = 0; i < (int) tmp.size(); i++) {
438438
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str());
439439
}
@@ -443,7 +443,7 @@ int main(int argc, char ** argv) {
443443
if (!params.input_suffix.empty()) {
444444
LOG_TEE("Input suffix: '%s'\n", params.input_suffix.c_str());
445445
if (params.verbose_prompt) {
446-
auto tmp = ::llama_tokenize(ctx, params.input_suffix, false, true);
446+
auto tmp = ::llama_tokenize(ctx, params.input_suffix, false, true, false);
447447
for (int i = 0; i < (int) tmp.size(); i++) {
448448
LOG_TEE("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str());
449449
}
@@ -516,7 +516,7 @@ int main(int argc, char ** argv) {
516516

517517
antiprompt_ids.reserve(params.antiprompt.size());
518518
for (const std::string & antiprompt : params.antiprompt) {
519-
antiprompt_ids.emplace_back(::llama_tokenize(ctx, antiprompt, false, true));
519+
antiprompt_ids.emplace_back(::llama_tokenize(ctx, antiprompt, false, true, false));
520520
}
521521

522522
struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams);
@@ -801,7 +801,7 @@ int main(int argc, char ** argv) {
801801
if (params.interactive) {
802802
if (!params.antiprompt.empty()) {
803803
// tokenize and inject first reverse prompt
804-
const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false, true);
804+
const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false, true, false);
805805
embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
806806
is_antiprompt = true;
807807
}
@@ -875,9 +875,9 @@ int main(int argc, char ** argv) {
875875
process_escapes(buffer);
876876
}
877877

878-
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
879-
const auto line_inp = ::llama_tokenize(ctx, buffer, false, false);
880-
const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
878+
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true, false);
879+
const auto line_inp = ::llama_tokenize(ctx, buffer, false, false, false);
880+
const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true, false);
881881

882882
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
883883

‎examples/parallel/parallel.cpp

Copy file name to clipboardExpand all lines: examples/parallel/parallel.cpp
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ int main(int argc, char ** argv) {
164164
}
165165

166166
std::vector<llama_token> tokens_system;
167-
tokens_system = ::llama_tokenize(ctx, k_system, true);
167+
tokens_system = ::llama_tokenize(ctx, k_system, true, true, true);
168168
const int32_t n_tokens_system = tokens_system.size();
169169

170170
llama_seq_id g_seq_id = 0;
@@ -256,7 +256,7 @@ int main(int argc, char ** argv) {
256256

257257
// do not prepend BOS because we have a system prompt!
258258
std::vector<llama_token> tokens_prompt;
259-
tokens_prompt = ::llama_tokenize(ctx, client.prompt, false);
259+
tokens_prompt = ::llama_tokenize(ctx, client.prompt, false, true, false);
260260

261261
for (size_t i = 0; i < tokens_prompt.size(); ++i) {
262262
llama_batch_add(batch, tokens_prompt[i], i + n_tokens_system, { client.id + 1 }, false);

‎examples/passkey/passkey.cpp

Copy file name to clipboardExpand all lines: examples/passkey/passkey.cpp
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,10 @@ int main(int argc, char ** argv) {
108108

109109
// tokenize the prompt
110110
std::vector<llama_token> tokens_list;
111-
tokens_list = ::llama_tokenize(ctx, params.prompt, true);
111+
tokens_list = ::llama_tokenize(ctx, params.prompt, true, true, true);
112112

113113
// tokenize the prefix and use it as a sink
114-
const int n_tokens_prefix = ::llama_tokenize(ctx, prompt_prefix, true).size();
114+
const int n_tokens_prefix = ::llama_tokenize(ctx, prompt_prefix, true, true, true).size();
115115

116116
const int n_tokens_all = tokens_list.size();
117117

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.