Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 690c563

Browse filesBrowse files
committed
Merge branch 'main' of github.com:abetlen/llama_cpp_python into main
2 parents c0fc0a1 + 8e44a32 commit 690c563
Copy full SHA for 690c563

File tree

Expand file treeCollapse file tree

6 files changed

+54
-17
lines changed
Filter options
Expand file treeCollapse file tree

6 files changed

+54
-17
lines changed

‎README.md

Copy file name to clipboardExpand all lines: README.md
+4-2Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ The gguf-converted files for this model can be found here: [functionary-7b-v1](h
207207
messages = [
208208
{
209209
"role": "system",
210-
"content": "A chat between a curious user and an artificial intelligence assitant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant callse functions with appropriate input when necessary"
210+
"content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary"
211+
211212
},
212213
{
213214
"role": "user",
@@ -265,7 +266,8 @@ Then you'll need to use a custom chat handler to load the clip model and process
265266
>>> llm = Llama(
266267
model_path="./path/to/llava/llama-model.gguf",
267268
chat_handler=chat_handler,
268-
n_ctx=2048 # n_ctx should be increased to accomodate the image embedding
269+
n_ctx=2048, # n_ctx should be increased to accomodate the image embedding
270+
logits_all=True,# needed to make llava work
269271
)
270272
>>> llm.create_chat_completion(
271273
messages = [

‎llama_cpp/_utils.py

Copy file name to clipboardExpand all lines: llama_cpp/_utils.py
+16-10Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,18 @@ def __enter__(self):
1717
if self.disable:
1818
return self
1919

20+
# Check if sys.stdout and sys.stderr have fileno method
21+
if not hasattr(self.sys.stdout, 'fileno') or not hasattr(self.sys.stderr, 'fileno'):
22+
return self # Return the instance without making changes
23+
2024
self.outnull_file = self.open(self.os.devnull, "w")
2125
self.errnull_file = self.open(self.os.devnull, "w")
2226

2327
self.old_stdout_fileno_undup = self.sys.stdout.fileno()
2428
self.old_stderr_fileno_undup = self.sys.stderr.fileno()
2529

26-
self.old_stdout_fileno = self.os.dup(self.sys.stdout.fileno())
27-
self.old_stderr_fileno = self.os.dup(self.sys.stderr.fileno())
30+
self.old_stdout_fileno = self.os.dup(self.old_stdout_fileno_undup)
31+
self.old_stderr_fileno = self.os.dup(self.old_stderr_fileno_undup)
2832

2933
self.old_stdout = self.sys.stdout
3034
self.old_stderr = self.sys.stderr
@@ -40,14 +44,16 @@ def __exit__(self, *_):
4044
if self.disable:
4145
return
4246

43-
self.sys.stdout = self.old_stdout
44-
self.sys.stderr = self.old_stderr
47+
# Check if sys.stdout and sys.stderr have fileno method
48+
if hasattr(self.sys.stdout, 'fileno') and hasattr(self.sys.stderr, 'fileno'):
49+
self.sys.stdout = self.old_stdout
50+
self.sys.stderr = self.old_stderr
4551

46-
self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup)
47-
self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup)
52+
self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup)
53+
self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup)
4854

49-
self.os.close(self.old_stdout_fileno)
50-
self.os.close(self.old_stderr_fileno)
55+
self.os.close(self.old_stdout_fileno)
56+
self.os.close(self.old_stderr_fileno)
5157

52-
self.outnull_file.close()
53-
self.errnull_file.close()
58+
self.outnull_file.close()
59+
self.errnull_file.close()

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+8-4Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2280,10 +2280,14 @@ def token_nl(self) -> int:
22802280
return self._model.token_nl()
22812281

22822282
@staticmethod
2283-
def logits_to_logprobs(logits: List[float]) -> List[float]:
2284-
exps = [math.exp(float(x)) for x in logits]
2285-
sum_exps = sum(exps)
2286-
return [math.log(x / sum_exps) for x in exps]
2283+
def logits_to_logprobs(logits: npt.NDArray[np.single]) -> npt.NDArray[np.single]:
2284+
maximum = np.max(logits)
2285+
tmp = np.subtract(logits, maximum, dtype=np.single)
2286+
np.exp(tmp, out=tmp)
2287+
normalizer = 1.0 / np.sum(tmp)
2288+
np.multiply(normalizer, tmp, out=tmp)
2289+
np.log(tmp, out=tmp)
2290+
return tmp
22872291

22882292
@staticmethod
22892293
def longest_token_prefix(a: Sequence[int], b: Sequence[int]):

‎llama_cpp/llama_chat_format.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_chat_format.py
+17Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,23 @@ def format_zephyr(
637637
_prompt = _format_chatml(system_message, _messages, _sep)
638638
return ChatFormatterResponse(prompt=_prompt, stop=_sep)
639639

640+
641+
@register_chat_format("pygmalion")
642+
def format_pygmalion(
643+
messages: List[llama_types.ChatCompletionRequestMessage],
644+
**kwargs: Any,
645+
) -> ChatFormatterResponse:
646+
system_template = """<|system|>{system_message}"""
647+
system_message = _get_system_message(messages)
648+
system_message = system_template.format(system_message=system_message)
649+
_roles = dict(user="<|user|>", assistant="<|model|>")
650+
_sep = "\n"
651+
_messages = _map_roles(messages, _roles)
652+
_messages.append((_roles["assistant"], None))
653+
_prompt = _format_chatml(system_message, _messages, _sep)
654+
return ChatFormatterResponse(prompt=_prompt, stop=_sep)
655+
656+
640657
@register_chat_format("chatml")
641658
def format_chatml(
642659
messages: List[llama_types.ChatCompletionRequestMessage],

‎llama_cpp/server/__main__.py

Copy file name to clipboardExpand all lines: llama_cpp/server/__main__.py
+2-1Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,5 +96,6 @@ def parse_bool_arg(arg):
9696
app = create_app(settings=settings)
9797

9898
uvicorn.run(
99-
app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port))
99+
app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port)),
100+
ssl_keyfile=settings.ssl_keyfile, ssl_certfile=settings.ssl_certfile
100101
)

‎llama_cpp/server/app.py

Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+7Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,13 @@ class Settings(BaseSettings):
150150
# Server Params
151151
host: str = Field(default="localhost", description="Listen address")
152152
port: int = Field(default=8000, description="Listen port")
153+
# SSL Params
154+
ssl_keyfile: Optional[str] = Field(
155+
default=None, description="SSL key file for HTTPS"
156+
)
157+
ssl_certfile: Optional[str] = Field(
158+
default=None, description="SSL certificate file for HTTPS"
159+
)
153160
interrupt_requests: bool = Field(
154161
default=True,
155162
description="Whether to interrupt requests when a new request is received.",

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.