Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 4f0ec65

Browse filesBrowse files
authored
fix: chat API logprobs format (abetlen#1788)
* fix: chat API logprobs format * Fix optional properties
1 parent d610477 commit 4f0ec65
Copy full SHA for 4f0ec65

File tree

Expand file treeCollapse file tree

3 files changed

+59
-18
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+59
-18
lines changed

‎Makefile

Copy file name to clipboardExpand all lines: Makefile
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ docker:
6262
docker build -t llama-cpp-python:latest -f docker/simple/Dockerfile .
6363

6464
run-server:
65-
uvicorn --factory llama.server:app --host ${HOST} --port ${PORT}
65+
python llama_cpp/server --model ${MODEL}
6666

6767
clean:
6868
- cd vendor/llama.cpp && make clean

‎llama_cpp/llama_chat_format.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_chat_format.py
+38-15Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,31 @@ def to_chat_handler(self) -> LlamaChatCompletionHandler:
259259
return chat_formatter_to_chat_completion_handler(self)
260260

261261

262+
def _convert_text_completion_logprobs_to_chat(
263+
logprobs: Optional[llama_types.CompletionLogprobs],
264+
) -> llama_types.ChatCompletionLogprobs:
265+
if logprobs is None:
266+
return None
267+
268+
return {
269+
"content": [
270+
{
271+
"token": token,
272+
"bytes": None,
273+
"logprob": logprob,
274+
"top_logprobs": [
275+
{
276+
"token": top_token,
277+
"logprob": top_logprob,
278+
"bytes": None,
279+
}
280+
for top_token, top_logprob in top_logprobs.items()
281+
],
282+
} for (token, logprob, top_logprobs) in zip(logprobs["tokens"], logprobs["token_logprobs"], logprobs["top_logprobs"])
283+
],
284+
"refusal": None,
285+
}
286+
262287
def _convert_text_completion_to_chat(
263288
completion: llama_types.Completion,
264289
) -> llama_types.ChatCompletion:
@@ -275,7 +300,7 @@ def _convert_text_completion_to_chat(
275300
"role": "assistant",
276301
"content": completion["choices"][0]["text"],
277302
},
278-
"logprobs": completion["choices"][0]["logprobs"],
303+
"logprobs": _convert_text_completion_logprobs_to_chat(completion["choices"][0]["logprobs"]),
279304
"finish_reason": completion["choices"][0]["finish_reason"],
280305
}
281306
],
@@ -319,7 +344,7 @@ def _convert_text_completion_chunks_to_chat(
319344
if chunk["choices"][0]["finish_reason"] is None
320345
else {}
321346
),
322-
"logprobs": chunk["choices"][0]["logprobs"],
347+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
323348
"finish_reason": chunk["choices"][0]["finish_reason"],
324349
}
325350
],
@@ -382,7 +407,7 @@ def _convert_completion_to_chat_function(
382407
}
383408
],
384409
},
385-
"logprobs": completion["choices"][0]["logprobs"],
410+
"logprobs": _convert_text_completion_logprobs_to_chat(completion["choices"][0]["logprobs"]),
386411
"finish_reason": "tool_calls",
387412
}
388413
],
@@ -435,7 +460,7 @@ def _stream_response_to_function_stream(
435460
{
436461
"index": 0,
437462
"finish_reason": None,
438-
"logprobs": chunk["choices"][0]["logprobs"],
463+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
439464
"delta": {
440465
"role": None,
441466
"content": None,
@@ -472,7 +497,7 @@ def _stream_response_to_function_stream(
472497
{
473498
"index": 0,
474499
"finish_reason": None,
475-
"logprobs": chunk["choices"][0]["logprobs"],
500+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
476501
"delta": {
477502
"role": None,
478503
"content": None,
@@ -1716,7 +1741,7 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
17161741
}
17171742
],
17181743
},
1719-
"logprobs": completion["choices"][0]["logprobs"],
1744+
"logprobs": _convert_text_completion_logprobs_to_chat(completion["choices"][0]["logprobs"]),
17201745
"finish_reason": "tool_calls",
17211746
}
17221747
],
@@ -2128,7 +2153,7 @@ def generate_streaming(tools, functions, function_call, prompt):
21282153
choices=[
21292154
{
21302155
"index": 0,
2131-
"logprobs": chunk["choices"][0]["logprobs"],
2156+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
21322157
"delta": {
21332158
"role": None,
21342159
"content": None,
@@ -2230,7 +2255,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22302255
choices=[
22312256
{
22322257
"index": 0,
2233-
"logprobs": chunk["choices"][0]["logprobs"],
2258+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
22342259
"delta": {
22352260
"role": "assistant",
22362261
"content": None,
@@ -2268,9 +2293,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22682293
choices=[
22692294
{
22702295
"index": 0,
2271-
"logprobs": chunk["choices"][0][
2272-
"logprobs"
2273-
],
2296+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
22742297
"delta": {
22752298
"role": "assistant",
22762299
"content": buffer.pop(0),
@@ -2293,7 +2316,7 @@ def generate_streaming(tools, functions, function_call, prompt):
22932316
choices=[
22942317
{
22952318
"index": 0,
2296-
"logprobs": chunk["choices"][0]["logprobs"],
2319+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
22972320
"delta": {
22982321
"role": "assistant",
22992322
"content": (
@@ -2379,7 +2402,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23792402
choices=[
23802403
{
23812404
"index": 0,
2382-
"logprobs": chunk["choices"][0]["logprobs"],
2405+
"logprobs": _convert_text_completion_logprobs_to_chat(chunk["choices"][0]["logprobs"]),
23832406
"delta": {
23842407
"role": None,
23852408
"content": None,
@@ -2613,7 +2636,7 @@ def generate_streaming(tools, functions, function_call, prompt):
26132636
choices=[
26142637
{
26152638
"index": 0,
2616-
"logprobs": completion["choices"][0]["logprobs"],
2639+
"logprobs": _convert_text_completion_logprobs_to_chat(completion["choices"][0]["logprobs"]),
26172640
"message": {
26182641
"role": "assistant",
26192642
"content": None if content == "" else content,
@@ -3745,7 +3768,7 @@ def chatml_function_calling(
37453768
{
37463769
"finish_reason": "tool_calls",
37473770
"index": 0,
3748-
"logprobs": completion["choices"][0]["logprobs"],
3771+
"logprobs": _convert_text_completion_logprobs_to_chat(completion["choices"][0]["logprobs"]),
37493772
"message": {
37503773
"role": "assistant",
37513774
"content": None,

‎llama_cpp/llama_types.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_types.py
+20-2Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,28 @@ class ChatCompletionFunction(TypedDict):
8282
parameters: Dict[str, JsonType] # TODO: make this more specific
8383

8484

85+
class ChatCompletionTopLogprobToken(TypedDict):
86+
token: str
87+
logprob: float
88+
bytes: Optional[List[int]]
89+
90+
91+
class ChatCompletionLogprobToken(ChatCompletionTopLogprobToken):
92+
token: str
93+
logprob: float
94+
bytes: Optional[List[int]]
95+
top_logprobs: List[ChatCompletionTopLogprobToken]
96+
97+
98+
class ChatCompletionLogprobs(TypedDict):
99+
content: Optional[List[ChatCompletionLogprobToken]]
100+
refusal: Optional[List[ChatCompletionLogprobToken]]
101+
102+
85103
class ChatCompletionResponseChoice(TypedDict):
86104
index: int
87105
message: "ChatCompletionResponseMessage"
88-
logprobs: Optional[CompletionLogprobs]
106+
logprobs: Optional[ChatCompletionLogprobs]
89107
finish_reason: Optional[str]
90108

91109

@@ -134,7 +152,7 @@ class ChatCompletionStreamResponseChoice(TypedDict):
134152
ChatCompletionStreamResponseDelta, ChatCompletionStreamResponseDeltaEmpty
135153
]
136154
finish_reason: Optional[Literal["stop", "length", "tool_calls", "function_call"]]
137-
logprobs: NotRequired[Optional[CompletionLogprobs]]
155+
logprobs: NotRequired[Optional[ChatCompletionLogprobs]]
138156

139157

140158
class CreateChatCompletionStreamResponse(TypedDict):

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.