Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit eed6128

Browse filesBrowse files
author
Mug
committed
Dont detect off tokens, detect off detokenized utf8
1 parent 3a98747 commit eed6128
Copy full SHA for eed6128

File tree

Expand file treeCollapse file tree

1 file changed

+11
-7
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+11
-7
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+11-7Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -459,12 +459,6 @@ def _create_completion(
459459
finish_reason = "stop"
460460
break
461461

462-
# Contains multi-byte UTF8
463-
for num,pattern in [(2, 192), (3, 224), (4, 240)]:
464-
# Bitwise AND check
465-
if (pattern & token == pattern):
466-
multibyte_fix = num - 1
467-
468462
if self.cache and len(completion_tokens) == 0:
469463
if prompt_tokens not in self.cache:
470464
if self.verbose:
@@ -473,12 +467,22 @@ def _create_completion(
473467

474468
completion_tokens.append(token)
475469

470+
all_text = self.detokenize(completion_tokens)
471+
472+
# Contains multi-byte UTF8
473+
for k,char in enumerate(all_text[-3:]):
474+
k = 3 - k
475+
char = int.from_bytes(char, "big")
476+
for num,pattern in [(2, 192), (3, 224), (4, 240)]:
477+
# Bitwise AND check
478+
if (num > k and pattern & char == pattern):
479+
multibyte_fix = num - k
480+
476481
# Stop incomplete bytes from passing
477482
if (multibyte_fix > 0):
478483
multibyte_fix -= 1
479484
continue
480485

481-
all_text = self.detokenize(completion_tokens)
482486
any_stop = [s for s in stop_sequences if s in all_text]
483487
if len(any_stop) > 0:
484488
first_stop = any_stop[0]

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.