Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 90e1021

Browse filesBrowse files
committed
Add unlimited max_tokens
1 parent a5554a2 commit 90e1021
Copy full SHA for 90e1021

File tree

Expand file treeCollapse file tree

1 file changed

+12
-4
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+12
-4
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+12-4Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,15 @@ def _create_completion(
317317
if self.verbose:
318318
llama_cpp.llama_reset_timings(self.ctx)
319319

320-
if len(prompt_tokens) + max_tokens > int(llama_cpp.llama_n_ctx(self.ctx)):
320+
if max_tokens <= 0:
321+
# Unlimited, depending on n_ctx.
322+
if len(prompt_tokens) >= int(llama_cpp.llama_n_ctx(self.ctx)):
323+
raise ValueError(
324+
f"Requested tokens exceed context window of {llama_cpp.llama_n_ctx(self.ctx)}"
325+
)
326+
else:
327+
max_tokens = int(llama_cpp.llama_n_ctx(self.ctx)) - len(prompt_tokens)
328+
elif len(prompt_tokens) + max_tokens > int(llama_cpp.llama_n_ctx(self.ctx)):
321329
raise ValueError(
322330
f"Requested tokens exceed context window of {llama_cpp.llama_n_ctx(self.ctx)}"
323331
)
@@ -455,7 +463,7 @@ def create_completion(
455463
Args:
456464
prompt: The prompt to generate text from.
457465
suffix: A suffix to append to the generated text. If None, no suffix is appended.
458-
max_tokens: The maximum number of tokens to generate.
466+
max_tokens: The maximum number of tokens to generate. If max_tokens <= 0, the maximum number of tokens to generate is unlimited and depends on n_ctx.
459467
temperature: The temperature to use for sampling.
460468
top_p: The top-p value to use for sampling.
461469
logprobs: The number of logprobs to return. If None, no logprobs are returned.
@@ -510,7 +518,7 @@ def __call__(
510518
Args:
511519
prompt: The prompt to generate text from.
512520
suffix: A suffix to append to the generated text. If None, no suffix is appended.
513-
max_tokens: The maximum number of tokens to generate.
521+
max_tokens: The maximum number of tokens to generate. If max_tokens <= 0, the maximum number of tokens to generate is unlimited and depends on n_ctx.
514522
temperature: The temperature to use for sampling.
515523
top_p: The top-p value to use for sampling.
516524
logprobs: The number of logprobs to return. If None, no logprobs are returned.
@@ -619,7 +627,7 @@ def create_chat_completion(
619627
top_k: The top-k value to use for sampling.
620628
stream: Whether to stream the results.
621629
stop: A list of strings to stop generation when encountered.
622-
max_tokens: The maximum number of tokens to generate.
630+
max_tokens: The maximum number of tokens to generate. If max_tokens <= 0, the maximum number of tokens to generate is unlimited and depends on n_ctx.
623631
repeat_penalty: The penalty to apply to repeated tokens.
624632
625633
Returns:

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.