Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 073b7e4

Browse filesBrowse files
authored
fix: added missing exit_stack.close() to /v1/chat/completions (abetlen#1796)
* fix: added missing exit_stack.close() to /v1/chat/completions * fix: added missing exit_stack.close() to /v1/completions
1 parent 77a12a3 commit 073b7e4
Copy full SHA for 073b7e4

File tree

Expand file treeCollapse file tree

2 files changed

+20
-9
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+20
-9
lines changed

‎llama_cpp/server/app.py

Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+16-7Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,14 @@ async def create_completion(
314314
else:
315315
kwargs["logits_processor"].extend(_min_tokens_logits_processor)
316316

317-
iterator_or_completion: Union[
318-
llama_cpp.CreateCompletionResponse,
319-
Iterator[llama_cpp.CreateCompletionStreamResponse],
320-
] = await run_in_threadpool(llama, **kwargs)
317+
try:
318+
iterator_or_completion: Union[
319+
llama_cpp.CreateCompletionResponse,
320+
Iterator[llama_cpp.CreateCompletionStreamResponse],
321+
] = await run_in_threadpool(llama, **kwargs)
322+
except Exception as err:
323+
exit_stack.close()
324+
raise err
321325

322326
if isinstance(iterator_or_completion, Iterator):
323327
# EAFP: It's easier to ask for forgiveness than permission
@@ -344,6 +348,7 @@ def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]:
344348
ping_message_factory=_ping_message_factory,
345349
)
346350
else:
351+
exit_stack.close()
347352
return iterator_or_completion
348353

349354

@@ -508,9 +513,13 @@ async def create_chat_completion(
508513
else:
509514
kwargs["logits_processor"].extend(_min_tokens_logits_processor)
510515

511-
iterator_or_completion: Union[
512-
llama_cpp.ChatCompletion, Iterator[llama_cpp.ChatCompletionChunk]
513-
] = await run_in_threadpool(llama.create_chat_completion, **kwargs)
516+
try:
517+
iterator_or_completion: Union[
518+
llama_cpp.ChatCompletion, Iterator[llama_cpp.ChatCompletionChunk]
519+
] = await run_in_threadpool(llama.create_chat_completion, **kwargs)
520+
except Exception as err:
521+
exit_stack.close()
522+
raise err
514523

515524
if isinstance(iterator_or_completion, Iterator):
516525
# EAFP: It's easier to ask for forgiveness than permission

‎llama_cpp/server/errors.py

Copy file name to clipboardExpand all lines: llama_cpp/server/errors.py
+4-2Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,6 @@ def error_message_wrapper(
134134
] = None,
135135
) -> Tuple[int, ErrorResponse]:
136136
"""Wraps error message in OpenAI style error response"""
137-
print(f"Exception: {str(error)}", file=sys.stderr)
138-
traceback.print_exc(file=sys.stderr)
139137
if body is not None and isinstance(
140138
body,
141139
(
@@ -149,6 +147,10 @@ def error_message_wrapper(
149147
if match is not None:
150148
return callback(body, match)
151149

150+
# Only print the trace on unexpected exceptions
151+
print(f"Exception: {str(error)}", file=sys.stderr)
152+
traceback.print_exc(file=sys.stderr)
153+
152154
# Wrap other errors as internal server error
153155
return 500, ErrorResponse(
154156
message=str(error),

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.