Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit b8e1ba9

Browse filesBrowse files
authored
Merge pull request #10 from ZenHubHQ/feature/#2277
Uses logger with formatted output across function calls
2 parents 4d60fdf + 8bb7ee5 commit b8e1ba9
Copy full SHA for b8e1ba9

File tree

Expand file treeCollapse file tree

6 files changed

+62
-19
lines changed
Filter options
Expand file treeCollapse file tree

6 files changed

+62
-19
lines changed

‎llama_cpp/_logger.py

Copy file name to clipboardExpand all lines: llama_cpp/_logger.py
+39-2Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import sys
22
import ctypes
33
import logging
4+
import logging.config
45

56
import llama_cpp
67

@@ -17,8 +18,38 @@
1718
5: logging.DEBUG,
1819
}
1920

20-
logger = logging.getLogger("llama-cpp-python")
21+
UVICORN_LOGGING_CONFIG = {
22+
"version": 1,
23+
"disable_existing_loggers": False,
24+
"formatters": {
25+
"standard": {"format": "%(asctime)s [%(levelname)s] %(message)s"},
26+
},
27+
"handlers": {
28+
"default": {
29+
"level": "INFO",
30+
"formatter": "standard",
31+
"class": "logging.StreamHandler",
32+
"stream": "ext://sys.stdout", # Default is stderr
33+
},
34+
},
35+
"loggers": {
36+
"uvicorn.error": {
37+
"level": "DEBUG",
38+
"handlers": ["default"],
39+
},
40+
"uvicorn.access": {
41+
"level": "DEBUG",
42+
"handlers": ["default"],
43+
},
44+
},
45+
}
2146

47+
# Set up llama-cpp-python logger matching the format of uvicorn logger
48+
logger = logging.getLogger("llama-cpp-python")
49+
handler = logging.StreamHandler()
50+
formatter = logging.Formatter("%(asctime)s - [%(levelname)s] - %(message)s")
51+
handler.setFormatter(formatter)
52+
logger.addHandler(handler)
2253

2354
@llama_cpp.llama_log_callback
2455
def llama_log_callback(
@@ -27,7 +58,13 @@ def llama_log_callback(
2758
user_data: ctypes.c_void_p,
2859
):
2960
if logger.level <= GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level]:
30-
print(text.decode("utf-8"), end="", flush=True, file=sys.stderr)
61+
_text = text.decode("utf-8")
62+
if _text.endswith("\n"):
63+
_text = _text[:-1]
64+
65+
# Skip if the message only contains "."
66+
if not _text == ".":
67+
logger.log(GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level], _text)
3168

3269

3370
llama_cpp.llama_log_set(llama_log_callback, ctypes.c_void_p(0))

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+12-11Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
_LlamaSamplingContext, # type: ignore
6767
_normalize_embedding, # type: ignore
6868
)
69-
from ._logger import set_verbose
69+
from ._logger import set_verbose, logger
7070
from ._utils import suppress_stdout_stderr
7171

7272

@@ -403,7 +403,7 @@ def __init__(
403403
)
404404

405405
if self.verbose:
406-
print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
406+
logger.info(f'System info: {llama_cpp.llama_print_system_info().decode("utf-8")}')
407407

408408
self.chat_format = chat_format
409409
self.chat_handler = chat_handler
@@ -434,10 +434,10 @@ def __init__(
434434
except Exception as e:
435435
self.metadata = {}
436436
if self.verbose:
437-
print(f"Failed to load metadata: {e}", file=sys.stderr)
437+
logger.error(f"Failed to load metadata: {e}")
438438

439439
if self.verbose:
440-
print(f"Model metadata: {self.metadata}", file=sys.stderr)
440+
logger.info(f"Model metadata: {self.metadata}")
441441

442442
eos_token_id = self.token_eos()
443443
bos_token_id = self.token_bos()
@@ -452,7 +452,7 @@ def __init__(
452452
template_choices["chat_template.default"] = self.metadata["tokenizer.chat_template"]
453453

454454
if self.verbose and template_choices:
455-
print(f"Available chat formats from metadata: {', '.join(template_choices.keys())}", file=sys.stderr)
455+
logger.info(f"Available chat formats from metadata: {', '.join(template_choices.keys())}")
456456

457457
for name, template in template_choices.items():
458458
self._chat_handlers[name] = llama_chat_format.Jinja2ChatFormatter(
@@ -474,19 +474,19 @@ def __init__(
474474
if chat_format is not None:
475475
self.chat_format = chat_format
476476
if self.verbose:
477-
print(f"Guessed chat format: {chat_format}", file=sys.stderr)
477+
logger.info(f"Guessed chat format: {chat_format}")
478478
else:
479479
if self.verbose:
480-
print(f"Using gguf chat template: {template_choices['chat_template.default']}", file=sys.stderr)
481-
print(f"Using chat eos_token: {eos_token}", file=sys.stderr)
482-
print(f"Using chat bos_token: {bos_token}", file=sys.stderr)
480+
logger.info(f"Using gguf chat template: {template_choices['chat_template.default']}")
481+
logger.info(f"Using chat eos_token: {eos_token}")
482+
logger.info(f"Using chat bos_token: {bos_token}")
483483

484484
self.chat_format = "chat_template.default"
485485

486486
if self.chat_format is None and self.chat_handler is None:
487487
self.chat_format = "llama-2"
488488
if self.verbose:
489-
print(f"Using fallback chat format: {self.chat_format}", file=sys.stderr)
489+
logger.info(f"Using fallback chat format: {self.chat_format}")
490490

491491
@property
492492
def ctx(self) -> llama_cpp.llama_context_p:
@@ -728,7 +728,8 @@ def generate(
728728
break
729729
if longest_prefix > 0:
730730
if self.verbose:
731-
print("Llama.generate: prefix-match hit", file=sys.stderr)
731+
# print("Llama.generate: prefix-match hit", file=sys.stderr)
732+
logger.info("Llama.generate: prefix-match hit")
732733
reset = False
733734
tokens = tokens[longest_prefix:]
734735
self.n_tokens = longest_prefix

‎llama_cpp/server/__main__.py

Copy file name to clipboardExpand all lines: llama_cpp/server/__main__.py
+3-1Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
ConfigFileSettings,
3838
)
3939
from llama_cpp.server.cli import add_args_from_model, parse_model_from_args
40+
from llama_cpp._logger import logger, UVICORN_LOGGING_CONFIG
4041

4142

4243
def main():
@@ -75,7 +76,7 @@ def main():
7576
server_settings = parse_model_from_args(ServerSettings, args)
7677
model_settings = [parse_model_from_args(ModelSettings, args)]
7778
except Exception as e:
78-
print(e, file=sys.stderr)
79+
logger.error(e)
7980
parser.print_help()
8081
sys.exit(1)
8182
assert server_settings is not None
@@ -90,6 +91,7 @@ def main():
9091
port=int(os.getenv("PORT", server_settings.port)),
9192
ssl_keyfile=server_settings.ssl_keyfile,
9293
ssl_certfile=server_settings.ssl_certfile,
94+
log_config=UVICORN_LOGGING_CONFIG
9395
)
9496

9597

‎llama_cpp/server/app.py

Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+3-2Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
from llama_cpp.server.errors import RouteErrorHandler
5252
from llama_cpp._utils import monitor_task_queue
5353
from llama_cpp.llama_metrics import MetricsExporter
54+
from llama_cpp._logger import logger
5455

5556

5657
router = APIRouter(route_class=RouteErrorHandler)
@@ -211,9 +212,9 @@ async def get_event_publisher(
211212
raise anyio.get_cancelled_exc_class()()
212213
await inner_send_chan.send(dict(data="[DONE]"))
213214
except anyio.get_cancelled_exc_class() as e:
214-
print("disconnected")
215+
logger.warning(f"Disconnected from client {request.client}")
215216
with anyio.move_on_after(1, shield=True):
216-
print(f"Disconnected from client (via refresh/close) {request.client}")
217+
logger.error(f"Disconnected from client (via refresh/close) {request.client}")
217218
raise e
218219
finally:
219220
if on_complete:

‎llama_cpp/server/errors.py

Copy file name to clipboardExpand all lines: llama_cpp/server/errors.py
+2-1Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
CreateEmbeddingRequest,
2222
CreateChatCompletionRequest,
2323
)
24+
from llama_cpp._logger import logger
2425

2526

2627
class ErrorResponse(TypedDict):
@@ -134,7 +135,7 @@ def error_message_wrapper(
134135
] = None,
135136
) -> Tuple[int, ErrorResponse]:
136137
"""Wraps error message in OpenAI style error response"""
137-
print(f"Exception: {str(error)}", file=sys.stderr)
138+
logger.error(f"Exception: {str(error)}")
138139
traceback.print_exc(file=sys.stderr)
139140
if body is not None and isinstance(
140141
body,

‎llama_cpp/server/model.py

Copy file name to clipboardExpand all lines: llama_cpp/server/model.py
+3-2Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import llama_cpp.llama_tokenizer as llama_tokenizer
1010

1111
from llama_cpp.server.settings import ModelSettings
12+
from llama_cpp._logger import logger
1213

1314

1415
class LlamaProxy:
@@ -272,11 +273,11 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
272273
if settings.cache:
273274
if settings.cache_type == "disk":
274275
if settings.verbose:
275-
print(f"Using disk cache with size {settings.cache_size}")
276+
logger.info(f"Using disk cache with size {settings.cache_size}")
276277
cache = llama_cpp.LlamaDiskCache(capacity_bytes=settings.cache_size)
277278
else:
278279
if settings.verbose:
279-
print(f"Using ram cache with size {settings.cache_size}")
280+
logger.info(f"Using ram cache with size {settings.cache_size}")
280281
cache = llama_cpp.LlamaRAMCache(capacity_bytes=settings.cache_size)
281282
_model.set_cache(cache)
282283
return _model

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.