Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 1f1abfd

Browse filesBrowse files
committed
Update llama_types to match OpenAI v1 API
1 parent 7c3009e commit 1f1abfd
Copy full SHA for 1f1abfd

File tree

Expand file treeCollapse file tree

4 files changed

+145
-53
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+145
-53
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+5-5Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,7 +1231,7 @@ def create_embedding(
12311231
else:
12321232
inputs = input
12331233

1234-
data: List[EmbeddingData] = []
1234+
data: List[Embedding] = []
12351235
total_tokens = 0
12361236
for index, input in enumerate(inputs):
12371237
tokens = self.tokenize(input.encode("utf-8"), special=True)
@@ -1297,7 +1297,7 @@ def _create_completion(
12971297
stopping_criteria: Optional[StoppingCriteriaList] = None,
12981298
logits_processor: Optional[LogitsProcessorList] = None,
12991299
grammar: Optional[LlamaGrammar] = None,
1300-
) -> Union[Iterator[Completion], Iterator[CompletionChunk]]:
1300+
) -> Union[Iterator[CreateCompletionResponse], Iterator[CreateCompletionStreamResponse]]:
13011301
assert self._ctx is not None
13021302
assert suffix is None or suffix.__class__ is str
13031303

@@ -1753,7 +1753,7 @@ def create_completion(
17531753
stopping_criteria: Optional[StoppingCriteriaList] = None,
17541754
logits_processor: Optional[LogitsProcessorList] = None,
17551755
grammar: Optional[LlamaGrammar] = None,
1756-
) -> Union[Completion, Iterator[CompletionChunk]]:
1756+
) -> Union[CreateCompletionResponse, Iterator[CreateCompletionStreamResponse]]:
17571757
"""Generate text from a prompt.
17581758
17591759
Args:
@@ -1800,7 +1800,7 @@ def create_completion(
18001800
grammar=grammar,
18011801
)
18021802
if stream:
1803-
chunks: Iterator[CompletionChunk] = completion_or_chunks
1803+
chunks: Iterator[CreateCompletionStreamResponse] = completion_or_chunks
18041804
return chunks
18051805
completion: Completion = next(completion_or_chunks) # type: ignore
18061806
return completion
@@ -1828,7 +1828,7 @@ def __call__(
18281828
stopping_criteria: Optional[StoppingCriteriaList] = None,
18291829
logits_processor: Optional[LogitsProcessorList] = None,
18301830
grammar: Optional[LlamaGrammar] = None,
1831-
) -> Union[Completion, Iterator[CompletionChunk]]:
1831+
) -> Union[CreateCompletionResponse, Iterator[CreateCompletionStreamResponse]]:
18321832
"""Generate text from a prompt.
18331833
18341834
Args:

‎llama_cpp/llama_chat_format.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_chat_format.py
+39-7Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def _convert_text_completion_to_chat(
199199

200200

201201
def _convert_text_completion_chunks_to_chat(
202-
chunks: Iterator[llama_types.CompletionChunk],
202+
chunks: Iterator[llama_types.CreateCompletionStreamResponse],
203203
) -> Iterator[llama_types.ChatCompletionChunk]:
204204
for i, chunk in enumerate(chunks):
205205
if i == 0:
@@ -239,12 +239,12 @@ def _convert_text_completion_chunks_to_chat(
239239

240240
def _convert_completion_to_chat(
241241
completion_or_chunks: Union[
242-
llama_types.Completion, Iterator[llama_types.CompletionChunk]
242+
llama_types.CreateCompletionResponse, Iterator[llama_types.CreateCompletionStreamResponse]
243243
],
244244
stream: bool = False,
245-
) -> Union[llama_types.ChatCompletion, Iterator[llama_types.ChatCompletionChunk]]:
245+
) -> Union[llama_types.CreateChatCompletionResponse, Iterator[llama_types.ChatCompletionChunk]]:
246246
if stream:
247-
chunks: Iterator[llama_types.CompletionChunk] = completion_or_chunks # type: ignore
247+
chunks: Iterator[llama_types.CreateCompletionStreamResponse] = completion_or_chunks # type: ignore
248248
return _convert_text_completion_chunks_to_chat(chunks)
249249
else:
250250
completion: llama_types.Completion = completion_or_chunks # type: ignore
@@ -613,13 +613,13 @@ def prepare_messages_for_inference(
613613
all_messages: List[llama_types.ChatCompletionRequestMessage] = []
614614
if functions is not None:
615615
all_messages.append(
616-
llama_types.ChatCompletionRequestMessage(
616+
llama_types.ChatCompletionRequestSystemMessage(
617617
role="system", content=generate_schema_from_functions(functions)
618618
)
619619
)
620620

621621
all_messages.append(
622-
llama_types.ChatCompletionRequestMessage(
622+
llama_types.ChatCompletionRequestSystemMessage(
623623
role="system", content=SYSTEM_MESSAGE
624624
)
625625
)
@@ -636,7 +636,7 @@ def prepare_messages_for_inference(
636636
all_messages.append(message)
637637

638638
all_messages.append(
639-
llama_types.ChatCompletionRequestMessage(role="assistant", content=None)
639+
llama_types.ChatCompletionRequestAssistantMessage(role="assistant", content=None)
640640
)
641641

642642
def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
@@ -734,3 +734,35 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
734734
],
735735
usage=completion["usage"],
736736
)
737+
738+
739+
@register_chat_completion_handler("llava-1.5")
740+
def lava_1_5_chat_handler(
741+
llama: llama.Llama,
742+
messages: List[llama_types.ChatCompletionRequestMessage],
743+
functions: Optional[List[llama_types.ChatCompletionFunction]] = None,
744+
function_call: Optional[Union[str, llama_types.ChatCompletionFunctionCall]] = None,
745+
temperature: float = 0.2,
746+
top_p: float = 0.95,
747+
top_k: int = 40,
748+
stream: bool = False,
749+
stop: Optional[Union[str, List[str]]] = [],
750+
max_tokens: int = 256,
751+
presence_penalty: float = 0.0,
752+
frequency_penalty: float = 0.0,
753+
repeat_penalty: float = 1.1,
754+
tfs_z: float = 1.0,
755+
mirostat_mode: int = 0,
756+
mirostat_tau: float = 5.0,
757+
mirostat_eta: float = 0.1,
758+
model: Optional[str] = None,
759+
logits_processor: Optional[llama.LogitsProcessorList] = None,
760+
grammar: Optional[llama.LlamaGrammar] = None,
761+
) -> Union[llama_types.ChatCompletion, Iterator[llama_types.ChatCompletionChunk]]:
762+
# convert messages into a list of strings and images objects
763+
# for each item in list
764+
# if string, process it and append to prompt
765+
# if image, evaluate it and add empty string to prompt (for now)
766+
# generate completion
767+
items = []
768+
current_prompt = ""

‎llama_cpp/llama_types.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_types.py
+99-39Lines changed: 99 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Types and request signatures for OpenAI compatibility
22
3+
NOTE: These types may change to match the OpenAI OpenAPI specification.
4+
35
Based on the OpenAI OpenAPI specification:
46
https://github.com/openai/openai-openapi/blob/master/openapi.yaml
57
@@ -19,9 +21,6 @@ class Embedding(TypedDict):
1921
embedding: List[float]
2022

2123

22-
EmbeddingData = Embedding
23-
24-
2524
class CreateEmbeddingResponse(TypedDict):
2625
object: Literal["list"]
2726
model: str
@@ -57,9 +56,6 @@ class CreateCompletionStreamResponse(TypedDict):
5756
choices: List[CompletionChoice]
5857

5958

60-
CompletionChunk = CreateCompletionStreamResponse
61-
62-
6359
class CreateCompletionResponse(TypedDict):
6460
id: str
6561
object: Literal["text_completion"]
@@ -69,9 +65,6 @@ class CreateCompletionResponse(TypedDict):
6965
usage: CompletionUsage
7066

7167

72-
Completion = CreateCompletionResponse
73-
74-
7568
class ChatCompletionFunctionCall(TypedDict):
7669
name: str
7770
arguments: str
@@ -100,73 +93,58 @@ class ChatCompletionResponseMessage(TypedDict):
10093
function_call: NotRequired[ChatCompletionFunctionCall]
10194

10295

103-
ChatCompletionMessage = ChatCompletionResponseMessage
104-
105-
10696
class ChatCompletionResponseFunction(TypedDict):
10797
name: str
10898
description: NotRequired[str]
10999
parameters: Dict[str, Any] # TODO: make this more specific
110100

111101

112-
ChatCompletionFunction = ChatCompletionResponseFunction
113-
114-
115102
class ChatCompletionResponseChoice(TypedDict):
116103
index: int
117-
message: ChatCompletionMessage
104+
message: "ChatCompletionMessage"
118105
finish_reason: Optional[str]
119106

120107

121-
ChatCompletionChoice = ChatCompletionResponseChoice
122-
123-
124108
class CreateChatCompletionResponse(TypedDict):
125109
id: str
126110
object: Literal["chat.completion"]
127111
created: int
128112
model: str
129-
choices: List[ChatCompletionChoice]
113+
choices: List["ChatCompletionChoice"]
130114
usage: CompletionUsage
131115

132116

133-
ChatCompletion = CreateChatCompletionResponse
117+
class ChatCompletionMessageToolCallChunk(TypedDict):
118+
index: int
119+
id: NotRequired[str]
120+
type: Literal["function"]
121+
function: ChatCompletionFunctionCall
134122

135123

136124
class ChatCompletionStreamResponseDeltaEmpty(TypedDict):
137125
pass
138126

139127

140-
ChatCompletionChunkDeltaEmpty = ChatCompletionStreamResponseDeltaEmpty
141-
142-
143128
class ChatCompletionStreamResponseDelta(TypedDict):
144-
role: NotRequired[Literal["assistant"]]
145129
content: NotRequired[str]
146130
function_call: NotRequired[ChatCompletionFunctionCall]
147-
148-
149-
ChatCompletionChunkDelta = ChatCompletionStreamResponseDelta
131+
tool_calls: NotRequired[List[ChatCompletionMessageToolCallChunk]]
132+
role: NotRequired[Literal["system", "user", "assistant", "tool"]]
150133

151134

152135
class ChatCompletionStreamResponseChoice(TypedDict):
153136
index: int
154-
delta: Union[ChatCompletionChunkDelta, ChatCompletionChunkDeltaEmpty]
137+
delta: Union["ChatCompletionChunkDelta", "ChatCompletionChunkDeltaEmpty"]
155138
finish_reason: Optional[Literal["stop", "length", "function_call"]]
156139

157140

158-
ChatCompletionChunkChoice = ChatCompletionStreamResponseChoice
159-
160-
161141
class ChatCompletionStreamResponse(TypedDict):
162142
id: str
163143
model: str
164144
object: Literal["chat.completion.chunk"]
165145
created: int
166-
choices: List[ChatCompletionChunkChoice]
167-
146+
choices: List["ChatCompletionChunkChoice"]
168147

169-
ChatCompletionChunk = ChatCompletionStreamResponse
170148

171149
JsonType = Union[None, int, str, bool, List["JsonType"], Dict[str, "JsonType"]]
172150

@@ -181,8 +159,90 @@ class ChatCompletionFunctionCallOption(TypedDict):
181159
name: str
182160

183161

184-
class ChatCompletionRequestMessage(TypedDict):
185-
role: Literal["assistant", "user", "system", "function"]
162+
class ChatCompletionRequestMessageContentPartText(TypedDict):
163+
type: Literal["text"]
164+
text: str
165+
166+
167+
class ChatCompletionRequestMessageContentPartImageImageUrl(TypedDict):
168+
url: str
169+
detail: NotRequired[Literal["auto", "low", "high"]]
170+
171+
172+
class ChatCompletionRequestMessageContentPartImage(TypedDict):
173+
type: Literal["image_url"]
174+
image_url: ChatCompletionRequestMessageContentPartImageImageUrl
175+
176+
177+
ChatCompletionRequestMessageContentPart = Union[
178+
ChatCompletionRequestMessageContentPartText,
179+
ChatCompletionRequestMessageContentPartImage,
180+
]
181+
182+
183+
class ChatCompletionRequestSystemMessage(TypedDict):
184+
role: Literal["system"]
186185
content: Optional[str]
187-
name: NotRequired[str]
188-
function_call: NotRequired[ChatCompletionFunctionCall]
186+
187+
188+
class ChatCompletionRequestUserMessage(TypedDict):
189+
role: Literal["user"]
190+
content: Optional[Union[str, List[ChatCompletionRequestMessageContentPart]]]
191+
192+
193+
class ChatCompletionMessageToolCallFunction(TypedDict):
194+
name: str
195+
arguments: str
196+
197+
198+
class ChatCompletionMessageToolCall(TypedDict):
199+
id: str
200+
type: Literal["function"]
201+
function: ChatCompletionMessageToolCallFunction
202+
203+
204+
ChatCompletionMessageToolCalls = List[ChatCompletionMessageToolCall]
205+
206+
207+
class ChatCompletionRequestAssistantMessage(TypedDict):
208+
role: Literal["assistant"]
209+
content: Optional[str]
210+
tool_calls: NotRequired[ChatCompletionMessageToolCalls]
211+
function_call: NotRequired[ChatCompletionFunctionCall] # DEPRECATED
212+
213+
214+
class ChatCompletionRequestToolMessage(TypedDict):
215+
role: Literal["tool"]
216+
content: Optional[str]
217+
tool_call_id: str
218+
219+
220+
class ChatCompletionRequestFunctionMessage(TypedDict):
221+
role: Literal["function"]
222+
content: Optional[str]
223+
name: str
224+
225+
226+
ChatCompletionRequestMessage = Union[
227+
ChatCompletionRequestSystemMessage,
228+
ChatCompletionRequestUserMessage,
229+
ChatCompletionRequestAssistantMessage,
230+
ChatCompletionRequestUserMessage,
231+
ChatCompletionRequestToolMessage,
232+
ChatCompletionRequestFunctionMessage,
233+
]
234+
235+
# NOTE: The following type names are not part of the OpenAI OpenAPI specification
236+
# and will be removed in a future major release.
237+
238+
EmbeddingData = Embedding
239+
CompletionChunk = CreateCompletionStreamResponse
240+
Completion = CreateCompletionResponse
241+
ChatCompletionMessage = ChatCompletionResponseMessage
242+
ChatCompletionChoice = ChatCompletionResponseChoice
243+
ChatCompletion = CreateChatCompletionResponse
244+
ChatCompletionChunkDeltaEmpty = ChatCompletionStreamResponseDeltaEmpty
245+
ChatCompletionChunkChoice = ChatCompletionStreamResponseChoice
246+
ChatCompletionChunkDelta = ChatCompletionStreamResponseDelta
247+
ChatCompletionChunk = ChatCompletionStreamResponse
248+
ChatCompletionFunction = ChatCompletionResponseFunction

‎llama_cpp/server/app.py

Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ async def create_completion(
688688
kwargs["grammar"] = llama_cpp.LlamaGrammar.from_string(body.grammar)
689689

690690
iterator_or_completion: Union[
691-
llama_cpp.Completion, Iterator[llama_cpp.CompletionChunk]
691+
llama_cpp.CreateCompletionResponse, Iterator[llama_cpp.CreateCompletionStreamResponse]
692692
] = await run_in_threadpool(llama, **kwargs)
693693

694694
if isinstance(iterator_or_completion, Iterator):
@@ -697,7 +697,7 @@ async def create_completion(
697697

698698
# If no exception was raised from first_response, we can assume that
699699
# the iterator is valid and we can use it to stream the response.
700-
def iterator() -> Iterator[llama_cpp.CompletionChunk]:
700+
def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]:
701701
yield first_response
702702
yield from iterator_or_completion
703703

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.