Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 727d60c

Browse filesBrowse files
committed
misc: Format
1 parent 0d37ce5 commit 727d60c
Copy full SHA for 727d60c

File tree

Expand file treeCollapse file tree

5 files changed

+44
-39
lines changed
Filter options
Expand file treeCollapse file tree

5 files changed

+44
-39
lines changed

‎llama_cpp/server/app.py

Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+26-24Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,8 @@ async def authenticate(
199199
@router.post(
200200
"/v1/completions",
201201
summary="Completion",
202-
dependencies=[Depends(authenticate)],
203-
response_model= Union[
202+
dependencies=[Depends(authenticate)],
203+
response_model=Union[
204204
llama_cpp.CreateCompletionResponse,
205205
str,
206206
],
@@ -211,19 +211,19 @@ async def authenticate(
211211
"application/json": {
212212
"schema": {
213213
"anyOf": [
214-
{"$ref": "#/components/schemas/CreateCompletionResponse"}
214+
{"$ref": "#/components/schemas/CreateCompletionResponse"}
215215
],
216216
"title": "Completion response, when stream=False",
217217
}
218218
},
219-
"text/event-stream":{
220-
"schema": {
221-
"type": "string",
222-
"title": "Server Side Streaming response, when stream=True. " +
223-
"See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", # noqa: E501
224-
"example": """data: {... see CreateCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]"""
219+
"text/event-stream": {
220+
"schema": {
221+
"type": "string",
222+
"title": "Server Side Streaming response, when stream=True. "
223+
+ "See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", # noqa: E501
224+
"example": """data: {... see CreateCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]""",
225225
}
226-
}
226+
},
227227
},
228228
}
229229
},
@@ -290,7 +290,7 @@ def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]:
290290
inner_send_chan=send_chan,
291291
iterator=iterator(),
292292
),
293-
sep='\n',
293+
sep="\n",
294294
)
295295
else:
296296
return iterator_or_completion
@@ -310,30 +310,32 @@ async def create_embedding(
310310

311311

312312
@router.post(
313-
"/v1/chat/completions", summary="Chat", dependencies=[Depends(authenticate)],
314-
response_model= Union[
315-
llama_cpp.ChatCompletion, str
316-
],
313+
"/v1/chat/completions",
314+
summary="Chat",
315+
dependencies=[Depends(authenticate)],
316+
response_model=Union[llama_cpp.ChatCompletion, str],
317317
responses={
318318
"200": {
319319
"description": "Successful Response",
320320
"content": {
321321
"application/json": {
322322
"schema": {
323323
"anyOf": [
324-
{"$ref": "#/components/schemas/CreateChatCompletionResponse"}
324+
{
325+
"$ref": "#/components/schemas/CreateChatCompletionResponse"
326+
}
325327
],
326328
"title": "Completion response, when stream=False",
327329
}
328330
},
329-
"text/event-stream":{
330-
"schema": {
331-
"type": "string",
332-
"title": "Server Side Streaming response, when stream=True" +
333-
"See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", # noqa: E501
334-
"example": """data: {... see CreateChatCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]"""
331+
"text/event-stream": {
332+
"schema": {
333+
"type": "string",
334+
"title": "Server Side Streaming response, when stream=True"
335+
+ "See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", # noqa: E501
336+
"example": """data: {... see CreateChatCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]""",
335337
}
336-
}
338+
},
337339
},
338340
}
339341
},
@@ -383,7 +385,7 @@ def iterator() -> Iterator[llama_cpp.ChatCompletionChunk]:
383385
inner_send_chan=send_chan,
384386
iterator=iterator(),
385387
),
386-
sep='\n',
388+
sep="\n",
387389
)
388390
else:
389391
return iterator_or_completion

‎llama_cpp/server/errors.py

Copy file name to clipboardExpand all lines: llama_cpp/server/errors.py
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
CreateChatCompletionRequest,
2323
)
2424

25+
2526
class ErrorResponse(TypedDict):
2627
"""OpenAI style error response"""
2728

@@ -75,7 +76,7 @@ def context_length_exceeded(
7576
(completion_tokens or 0) + prompt_tokens,
7677
prompt_tokens,
7778
completion_tokens,
78-
), # type: ignore
79+
), # type: ignore
7980
type="invalid_request_error",
8081
param="messages",
8182
code="context_length_exceeded",
@@ -207,4 +208,3 @@ async def custom_route_handler(request: Request) -> Response:
207208
)
208209

209210
return custom_route_handler
210-

‎llama_cpp/server/model.py

Copy file name to clipboardExpand all lines: llama_cpp/server/model.py
+11-8Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,15 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
8888
assert (
8989
settings.hf_tokenizer_config_path is not None
9090
), "hf_tokenizer_config_path must be set for hf-tokenizer-config"
91-
chat_handler = (
92-
llama_cpp.llama_chat_format.hf_tokenizer_config_to_chat_completion_handler(
93-
json.load(open(settings.hf_tokenizer_config_path))
94-
)
91+
chat_handler = llama_cpp.llama_chat_format.hf_tokenizer_config_to_chat_completion_handler(
92+
json.load(open(settings.hf_tokenizer_config_path))
9593
)
9694

9795
tokenizer: Optional[llama_cpp.BaseLlamaTokenizer] = None
9896
if settings.hf_pretrained_model_name_or_path is not None:
99-
tokenizer = llama_tokenizer.LlamaHFTokenizer.from_pretrained(settings.hf_pretrained_model_name_or_path)
97+
tokenizer = llama_tokenizer.LlamaHFTokenizer.from_pretrained(
98+
settings.hf_pretrained_model_name_or_path
99+
)
100100

101101
draft_model = None
102102
if settings.draft_model is not None:
@@ -120,17 +120,20 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
120120
kv_overrides[key] = float(value)
121121
else:
122122
raise ValueError(f"Unknown value type {value_type}")
123-
123+
124124
import functools
125125

126126
kwargs = {}
127127

128128
if settings.hf_model_repo_id is not None:
129-
create_fn = functools.partial(llama_cpp.Llama.from_pretrained, repo_id=settings.hf_model_repo_id, filename=settings.model)
129+
create_fn = functools.partial(
130+
llama_cpp.Llama.from_pretrained,
131+
repo_id=settings.hf_model_repo_id,
132+
filename=settings.model,
133+
)
130134
else:
131135
create_fn = llama_cpp.Llama
132136
kwargs["model_path"] = settings.model
133-
134137

135138
_model = create_fn(
136139
**kwargs,

‎llama_cpp/server/settings.py

Copy file name to clipboardExpand all lines: llama_cpp/server/settings.py
+4-4Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,9 @@ class ModelSettings(BaseSettings):
7474
ge=0,
7575
description="The number of threads to use when batch processing.",
7676
)
77-
rope_scaling_type: int = Field(default=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED)
77+
rope_scaling_type: int = Field(
78+
default=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED
79+
)
7880
rope_freq_base: float = Field(default=0.0, description="RoPE base frequency")
7981
rope_freq_scale: float = Field(
8082
default=0.0, description="RoPE frequency scaling factor"
@@ -193,6 +195,4 @@ class Settings(ServerSettings, ModelSettings):
193195
class ConfigFileSettings(ServerSettings):
194196
"""Configuration file format settings."""
195197

196-
models: List[ModelSettings] = Field(
197-
default=[], description="Model configs"
198-
)
198+
models: List[ModelSettings] = Field(default=[], description="Model configs")

‎llama_cpp/server/types.py

Copy file name to clipboardExpand all lines: llama_cpp/server/types.py
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ class CreateCompletionRequest(BaseModel):
110110
default=None,
111111
description="A suffix to append to the generated text. If None, no suffix is appended. Useful for chatbots.",
112112
)
113-
max_tokens: Optional[int] = Field(
113+
max_tokens: Optional[int] = Field(
114114
default=16, ge=0, description="The maximum number of tokens to generate."
115115
)
116116
temperature: float = temperature_field

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.