Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e7962d2

Browse filesBrowse files
committed
Fix: default max_tokens matches openai api (16 for completion, max length for chat completion)
1 parent 8207280 commit e7962d2
Copy full SHA for e7962d2

File tree

Expand file treeCollapse file tree

2 files changed

+9
-7
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+9
-7
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+6-6Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1296,7 +1296,7 @@ def _create_completion(
12961296
self,
12971297
prompt: Union[str, List[int]],
12981298
suffix: Optional[str] = None,
1299-
max_tokens: int = 16,
1299+
max_tokens: Optional[int] = 16,
13001300
temperature: float = 0.8,
13011301
top_p: float = 0.95,
13021302
logprobs: Optional[int] = None,
@@ -1350,7 +1350,7 @@ def _create_completion(
13501350
f"Requested tokens ({len(prompt_tokens)}) exceed context window of {llama_cpp.llama_n_ctx(self.ctx)}"
13511351
)
13521352

1353-
if max_tokens <= 0:
1353+
if max_tokens is None or max_tokens <= 0:
13541354
# Unlimited, depending on n_ctx.
13551355
max_tokens = self._n_ctx - len(prompt_tokens)
13561356

@@ -1762,7 +1762,7 @@ def create_completion(
17621762
self,
17631763
prompt: Union[str, List[int]],
17641764
suffix: Optional[str] = None,
1765-
max_tokens: int = 128,
1765+
max_tokens: Optional[int] = 16,
17661766
temperature: float = 0.8,
17671767
top_p: float = 0.95,
17681768
logprobs: Optional[int] = None,
@@ -1788,7 +1788,7 @@ def create_completion(
17881788
Args:
17891789
prompt: The prompt to generate text from.
17901790
suffix: A suffix to append to the generated text. If None, no suffix is appended.
1791-
max_tokens: The maximum number of tokens to generate. If max_tokens <= 0, the maximum number of tokens to generate is unlimited and depends on n_ctx.
1791+
max_tokens: The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx.
17921792
temperature: The temperature to use for sampling.
17931793
top_p: The top-p value to use for sampling.
17941794
logprobs: The number of logprobs to return. If None, no logprobs are returned.
@@ -1921,7 +1921,7 @@ def create_chat_completion(
19211921
stop: Optional[Union[str, List[str]]] = [],
19221922
seed: Optional[int] = None,
19231923
response_format: Optional[ChatCompletionRequestResponseFormat] = None,
1924-
max_tokens: int = 256,
1924+
max_tokens: Optional[int] = None,
19251925
presence_penalty: float = 0.0,
19261926
frequency_penalty: float = 0.0,
19271927
repeat_penalty: float = 1.1,
@@ -1944,7 +1944,7 @@ def create_chat_completion(
19441944
top_k: The top-k value to use for sampling.
19451945
stream: Whether to stream the results.
19461946
stop: A list of strings to stop generation when encountered.
1947-
max_tokens: The maximum number of tokens to generate. If max_tokens <= 0, the maximum number of tokens to generate is unlimited and depends on n_ctx.
1947+
max_tokens: The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx.
19481948
repeat_penalty: The penalty to apply to repeated tokens.
19491949
19501950
Returns:

‎llama_cpp/server/app.py

Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+3-1Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -783,7 +783,9 @@ class CreateChatCompletionRequest(BaseModel):
783783
default=None,
784784
description="A tool to apply to the generated completions.",
785785
) # TODO: verify
786-
max_tokens: int = max_tokens_field
786+
max_tokens: Optional[int] = Field(
787+
default=None, description="The maximum number of tokens to generate. Defaults to inf"
788+
)
787789
temperature: float = temperature_field
788790
top_p: float = top_p_field
789791
stop: Optional[List[str]] = stop_field

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.