Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 5b982d0

Browse filesBrowse files
committed
fix: use both eos and bos tokens as stop sequences for hf-tokenizer-config chat format.
1 parent 2ce0b8a commit 5b982d0
Copy full SHA for 5b982d0

File tree

Expand file treeCollapse file tree

1 file changed

+12
-8
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+12
-8
lines changed

‎llama_cpp/llama_chat_format.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_chat_format.py
+12-8Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,8 @@ def hf_autotokenizer_to_chat_completion_handler(
379379

380380

381381
def hf_tokenizer_config_to_chat_formatter(
382-
tokenizer_config: Dict[str, Any]
382+
tokenizer_config: Dict[str, Any],
383+
add_generation_prompt: bool = True,
383384
) -> ChatFormatter:
384385
assert isinstance(tokenizer_config, dict)
385386

@@ -401,31 +402,34 @@ def hf_tokenizer_config_to_chat_formatter(
401402
lstrip_blocks=True,
402403
).from_string(chat_template)
403404

404-
def format_autotokenizer(
405+
def format_tokenizer_config(
405406
messages: List[llama_types.ChatCompletionRequestMessage],
406407
**kwargs: Any,
407408
) -> ChatFormatterResponse:
408409
# TODO: veryify this is correct
409410
# Add a blank assistant message to the end of the messages to prompt the model to generate a response
410-
prompt = env.render(
411-
messages=[
411+
if add_generation_prompt:
412+
messages = [
412413
*messages,
413414
llama_types.ChatCompletionRequestAssistantMessage(
414415
role="assistant", content=""
415416
),
416-
],
417+
]
418+
prompt = env.render(
419+
messages=messages,
417420
bos_token=bos_token,
418421
eos_token=eos_token,
419422
)
420-
return ChatFormatterResponse(prompt=prompt, stop=eos_token)
423+
return ChatFormatterResponse(prompt=prompt, stop=[eos_token, bos_token])
421424

422-
return format_autotokenizer
425+
return format_tokenizer_config
423426

424427

425428
def hf_tokenizer_config_to_chat_completion_handler(
426429
tokenizer_config: Dict[str, Any],
430+
add_generation_prompt: bool = True,
427431
) -> LlamaChatCompletionHandler:
428-
chat_formatter = hf_tokenizer_config_to_chat_formatter(tokenizer_config)
432+
chat_formatter = hf_tokenizer_config_to_chat_formatter(tokenizer_config, add_generation_prompt=add_generation_prompt)
429433
return chat_formatter_to_chat_completion_handler(chat_formatter)
430434

431435

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.