Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 5f81400

Browse filesBrowse files
author
Mug
committed
Also ignore errors on input prompts
1 parent 3c130f0 commit 5f81400
Copy full SHA for 5f81400

File tree

Expand file treeCollapse file tree

3 files changed

+5
-5
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+5
-5
lines changed

‎examples/low_level_api/low_level_api_chat_cpp.py

Copy file name to clipboardExpand all lines: examples/low_level_api/low_level_api_chat_cpp.py
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ def __init__(self, params: GptParams) -> None:
201201
# tokenize a prompt
202202
def _tokenize(self, prompt, bos=True):
203203
_arr = (llama_cpp.llama_token * (len(prompt) + 1))()
204-
_n = llama_cpp.llama_tokenize(self.ctx, prompt.encode("utf8"), _arr, len(_arr), bos)
204+
_n = llama_cpp.llama_tokenize(self.ctx, prompt.encode("utf8", errors="ignore"), _arr, len(_arr), bos)
205205
return _arr[:_n]
206206

207207
def set_color(self, c):

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+3-3Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ def create_embedding(self, input: str) -> Embedding:
358358
if self.verbose:
359359
llama_cpp.llama_reset_timings(self.ctx)
360360

361-
tokens = self.tokenize(input.encode("utf-8"))
361+
tokens = self.tokenize(input.encode("utf-8", errors="ignore"))
362362
self.reset()
363363
self.eval(tokens)
364364
n_tokens = len(tokens)
@@ -416,7 +416,7 @@ def _create_completion(
416416
completion_tokens: List[llama_cpp.llama_token] = []
417417
# Add blank space to start of prompt to match OG llama tokenizer
418418
prompt_tokens: List[llama_cpp.llama_token] = self.tokenize(
419-
b" " + prompt.encode("utf-8")
419+
b" " + prompt.encode("utf-8", errors="ignore")
420420
)
421421
text: bytes = b""
422422
returned_characters: int = 0
@@ -431,7 +431,7 @@ def _create_completion(
431431
)
432432

433433
if stop != []:
434-
stop_sequences = [s.encode("utf-8") for s in stop]
434+
stop_sequences = [s.encode("utf-8", errors="ignore") for s in stop]
435435
else:
436436
stop_sequences = []
437437

‎tests/test_llama.py

Copy file name to clipboardExpand all lines: tests/test_llama.py
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def mock_eval(*args, **kwargs):
2424
monkeypatch.setattr("llama_cpp.llama_cpp.llama_eval", mock_eval)
2525

2626
output_text = " jumps over the lazy dog."
27-
output_tokens = llama.tokenize(output_text.encode("utf-8"))
27+
output_tokens = llama.tokenize(output_text.encode("utf-8", errors="ignore"))
2828
token_eos = llama.token_eos()
2929
n = 0
3030

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.