Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e1cd61e

Browse filesBrowse files
committed
1 parent b1e9962 commit e1cd61e
Copy full SHA for e1cd61e

File tree

Expand file treeCollapse file tree

2 files changed

+24
-7
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+24
-7
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+8-3Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,7 @@ def _completion_response(text: str, finish_reason: Literal["stop", "length"], lo
872872
break
873873

874874
if stream:
875-
remaining_tokens = completion_tokens[returned_tokens:]
875+
remaining_tokens = completion_tokens[returned_tokens:-1]
876876
remaining_text = self.detokenize(remaining_tokens)
877877
remaining_length = len(remaining_text)
878878

@@ -1030,9 +1030,14 @@ def _completion_response(text: str, finish_reason: Literal["stop", "length"], lo
10301030
break
10311031
returned_tokens += 1
10321032
yield _completion_stream_response(
1033-
text=last_text[: len(last_text) - (token_end_position - end)].decode("utf-8", errors="ignore"), logprobs_or_none=logprobs_or_none
1033+
text=last_text[: len(last_text) - (token_end_position - end)].decode("utf-8", errors="ignore"), logprobs_or_none=logprobs_or_none, finish_reason=finish_reason
10341034
)
1035-
break
1035+
if self.cache:
1036+
if self.verbose:
1037+
print("Llama._create_completion: cache save", file=sys.stderr)
1038+
self.cache[prompt_tokens + completion_tokens] = self.save_state()
1039+
print("Llama._create_completion: cache saved", file=sys.stderr)
1040+
return
10361041
returned_tokens += 1
10371042
yield _completion_stream_response(
10381043
text=self.detokenize([token]).decode("utf-8", errors="ignore"), logprobs_or_none=logprobs_or_none

‎llama_cpp/llama_chat_format.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_chat_format.py
+16-4Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -260,13 +260,25 @@ def _convert_text_completion_chunks_to_chat(
260260
"index": 0,
261261
"delta": {
262262
"content": chunk["choices"][0]["text"],
263-
}
264-
if chunk["choices"][0]["finish_reason"] is None
265-
else {},
266-
"finish_reason": chunk["choices"][0]["finish_reason"],
263+
},
264+
"finish_reason": None,
267265
}
268266
],
269267
}
268+
if chunk["choices"][0]["finish_reason"] is not None:
269+
yield {
270+
"id": "chat" + chunk["id"],
271+
"model": chunk["model"],
272+
"created": chunk["created"],
273+
"object": "chat.completion.chunk",
274+
"choices": [
275+
{
276+
"index": 0,
277+
"delta": {},
278+
"finish_reason": chunk["choices"][0]["finish_reason"],
279+
}
280+
],
281+
}
270282

271283

272284
def _convert_completion_to_chat(

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.