llama-cpp-python/examples/batch-processing/server.py at main · msadr-git/llama-cpp-python

31 lines (18 loc) · 755 Bytes

"""llama-cpp-python server from scratch in a single file.
"""

# import llama_cpp

# path = b"../../models/Qwen1.5-0.5B-Chat-GGUF/qwen1_5-0_5b-chat-q8_0.gguf"

# model_params = llama_cpp.llama_model_default_params()
# model = llama_cpp.llama_load_model_from_file(path, model_params)

# if model is None:
#     raise RuntimeError(f"Failed to load model from file: {path}")

# ctx_params = llama_cpp.llama_context_default_params()
# ctx = llama_cpp.llama_new_context_with_model(model, ctx_params)

# if ctx is None:
#     raise RuntimeError("Failed to create context")

from fastapi import FastAPI

app = FastAPI()

import openai.types.chat as types

@app.post("/v1/chat/completions")
def create_chat_completions():
    return {"message": "Hello World"}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Expand file tree

Search code, repositories, users, issues, pull requests...

FilesExpand file tree

server.py

Latest commit

History

server.py

File metadata and controls

Expand file tree