Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a86bfdf

Browse filesBrowse files
committed
bugfix: truncate completion max_tokens to fit context length by default
1 parent 6f70cc4 commit a86bfdf
Copy full SHA for a86bfdf

File tree

Expand file treeCollapse file tree

1 file changed

+6
-10
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+6
-10
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+6-10Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -824,19 +824,15 @@ def _create_completion(
824824
if self.verbose:
825825
llama_cpp.llama_reset_timings(self.ctx)
826826

827-
if max_tokens <= 0:
828-
# Unlimited, depending on n_ctx.
829-
if len(prompt_tokens) >= int(llama_cpp.llama_n_ctx(self.ctx)):
830-
raise ValueError(
831-
f"Requested tokens exceed context window of {llama_cpp.llama_n_ctx(self.ctx)}"
832-
)
833-
else:
834-
max_tokens = int(llama_cpp.llama_n_ctx(self.ctx)) - len(prompt_tokens)
835-
elif len(prompt_tokens) + max_tokens > int(llama_cpp.llama_n_ctx(self.ctx)):
827+
if len(prompt_tokens) >= llama_cpp.llama_n_ctx(self.ctx):
836828
raise ValueError(
837-
f"Requested tokens ({len(prompt_tokens)}) exceed context window of {self._n_ctx}"
829+
f"Requested tokens exceed context window of {llama_cpp.llama_n_ctx(self.ctx)}"
838830
)
839831

832+
if max_tokens <= 0:
833+
# Unlimited, depending on n_ctx.
834+
max_tokens = llama_cpp.llama_n_ctx(self.ctx) - len(prompt_tokens)
835+
840836
# Truncate max_tokens if requested tokens would exceed the context window
841837
max_tokens = (
842838
max_tokens

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.