Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 919fca9

Browse filesBrowse files
committed
Merge branch 'main' of https://github.com/abetlen/llama-cpp-python into main
2 parents d02a9cf + c139f8b commit 919fca9
Copy full SHA for 919fca9

File tree

Expand file treeCollapse file tree

2 files changed

+105
-2
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+105
-2
lines changed

‎llama_cpp/server/app.py

Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+69-2Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@
4141
CreateEmbeddingRequest,
4242
CreateChatCompletionRequest,
4343
ModelList,
44+
TokenizeInputRequest,
45+
TokenizeInputResponse,
46+
TokenizeInputCountResponse,
47+
DetokenizeInputRequest,
48+
DetokenizeInputResponse,
4449
)
4550
from llama_cpp.server.errors import RouteErrorHandler
4651

@@ -196,6 +201,9 @@ async def authenticate(
196201
)
197202

198203

204+
openai_v1_tag = "OpenAI V1"
205+
206+
199207
@router.post(
200208
"/v1/completions",
201209
summary="Completion",
@@ -227,11 +235,13 @@ async def authenticate(
227235
},
228236
}
229237
},
238+
tags=[openai_v1_tag],
230239
)
231240
@router.post(
232241
"/v1/engines/copilot-codex/completions",
233242
include_in_schema=False,
234243
dependencies=[Depends(authenticate)],
244+
tags=[openai_v1_tag],
235245
)
236246
async def create_completion(
237247
request: Request,
@@ -297,7 +307,10 @@ def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]:
297307

298308

299309
@router.post(
300-
"/v1/embeddings", summary="Embedding", dependencies=[Depends(authenticate)]
310+
"/v1/embeddings",
311+
summary="Embedding",
312+
dependencies=[Depends(authenticate)],
313+
tags=[openai_v1_tag],
301314
)
302315
async def create_embedding(
303316
request: CreateEmbeddingRequest,
@@ -339,6 +352,7 @@ async def create_embedding(
339352
},
340353
}
341354
},
355+
tags=[openai_v1_tag],
342356
)
343357
async def create_chat_completion(
344358
request: Request,
@@ -391,7 +405,12 @@ def iterator() -> Iterator[llama_cpp.ChatCompletionChunk]:
391405
return iterator_or_completion
392406

393407

394-
@router.get("/v1/models", summary="Models", dependencies=[Depends(authenticate)])
408+
@router.get(
409+
"/v1/models",
410+
summary="Models",
411+
dependencies=[Depends(authenticate)],
412+
tags=[openai_v1_tag],
413+
)
395414
async def get_models(
396415
llama_proxy: LlamaProxy = Depends(get_llama_proxy),
397416
) -> ModelList:
@@ -407,3 +426,51 @@ async def get_models(
407426
for model_alias in llama_proxy
408427
],
409428
}
429+
430+
431+
extras_tag = "Extras"
432+
433+
434+
@router.post(
435+
"/extras/tokenize",
436+
summary="Tokenize",
437+
dependencies=[Depends(authenticate)],
438+
tags=[extras_tag],
439+
)
440+
async def tokenize(
441+
body: TokenizeInputRequest,
442+
llama_proxy: LlamaProxy = Depends(get_llama_proxy),
443+
) -> TokenizeInputResponse:
444+
tokens = llama_proxy(body.model).tokenize(body.input.encode("utf-8"), special=True)
445+
446+
return {"tokens": tokens}
447+
448+
449+
@router.post(
450+
"/extras/tokenize/count",
451+
summary="Tokenize Count",
452+
dependencies=[Depends(authenticate)],
453+
tags=[extras_tag],
454+
)
455+
async def count_query_tokens(
456+
body: TokenizeInputRequest,
457+
llama_proxy: LlamaProxy = Depends(get_llama_proxy),
458+
) -> TokenizeInputCountResponse:
459+
tokens = llama_proxy(body.model).tokenize(body.input.encode("utf-8"), special=True)
460+
461+
return {"count": len(tokens)}
462+
463+
464+
@router.post(
465+
"/extras/detokenize",
466+
summary="Detokenize",
467+
dependencies=[Depends(authenticate)],
468+
tags=[extras_tag],
469+
)
470+
async def detokenize(
471+
body: DetokenizeInputRequest,
472+
llama_proxy: LlamaProxy = Depends(get_llama_proxy),
473+
) -> DetokenizeInputResponse:
474+
text = llama_proxy(body.model).detokenize(body.tokens).decode("utf-8")
475+
476+
return {"text": text}

‎llama_cpp/server/types.py

Copy file name to clipboardExpand all lines: llama_cpp/server/types.py
+36Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,39 @@ class ModelData(TypedDict):
264264
class ModelList(TypedDict):
265265
object: Literal["list"]
266266
data: List[ModelData]
267+
268+
269+
class TokenizeInputRequest(BaseModel):
270+
model: Optional[str] = model_field
271+
input: Optional[str] = Field(description="The input to tokenize.")
272+
273+
model_config = {
274+
"json_schema_extra": {"examples": [{"input": "How many tokens in this query?"}]}
275+
}
276+
277+
278+
class TokenizeInputResponse(BaseModel):
279+
tokens: List[int] = Field(description="A list of tokens.")
280+
281+
model_config = {"json_schema_extra": {"example": {"tokens": [123, 321, 222]}}}
282+
283+
284+
class TokenizeInputCountResponse(BaseModel):
285+
count: int = Field(description="The number of tokens in the input.")
286+
287+
model_config = {"json_schema_extra": {"example": {"count": 5}}}
288+
289+
290+
class DetokenizeInputRequest(BaseModel):
291+
model: Optional[str] = model_field
292+
tokens: List[int] = Field(description="A list of toekns to detokenize.")
293+
294+
model_config = {"json_schema_extra": {"example": [{"tokens": [123, 321, 222]}]}}
295+
296+
297+
class DetokenizeInputResponse(BaseModel):
298+
text: str = Field(description="The detokenized text.")
299+
300+
model_config = {
301+
"json_schema_extra": {"example": {"text": "How many tokens in this query?"}}
302+
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.