Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit c1ae815

Browse filesBrowse files
committed
fix(misc): Format
1 parent b77e507 commit c1ae815
Copy full SHA for c1ae815
Expand file treeCollapse file tree

35 files changed

+2270
-1332
lines changed

‎examples/batch-processing/server.py

Copy file name to clipboardExpand all lines: examples/batch-processing/server.py
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import openai.types.chat as types
2727

28+
2829
@app.post("/v1/chat/completions")
2930
def create_chat_completions():
3031
return {"message": "Hello World"}

‎examples/gradio_chat/local.py

Copy file name to clipboardExpand all lines: examples/gradio_chat/local.py
+15-7Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,26 @@
66
llama = llama_cpp.Llama.from_pretrained(
77
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
88
filename="*q8_0.gguf",
9-
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
10-
verbose=False
9+
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
10+
"Qwen/Qwen1.5-0.5B"
11+
),
12+
verbose=False,
1113
)
1214

1315
model = "gpt-3.5-turbo"
1416

17+
1518
def predict(message, history):
1619
messages = []
1720

1821
for user_message, assistant_message in history:
1922
messages.append({"role": "user", "content": user_message})
2023
messages.append({"role": "assistant", "content": assistant_message})
21-
24+
2225
messages.append({"role": "user", "content": message})
2326

2427
response = llama.create_chat_completion_openai_v1(
25-
model=model,
26-
messages=messages,
27-
stream=True
28+
model=model, messages=messages, stream=True
2829
)
2930

3031
text = ""
@@ -52,7 +53,14 @@ def predict(message, history):
5253
"""
5354

5455
with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
55-
gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"])
56+
gr.ChatInterface(
57+
predict,
58+
fill_height=True,
59+
examples=[
60+
"What is the capital of France?",
61+
"Who was the first person on the moon?",
62+
],
63+
)
5664

5765

5866
if __name__ == "__main__":

‎examples/gradio_chat/server.py

Copy file name to clipboardExpand all lines: examples/gradio_chat/server.py
+12-9Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,22 @@
22

33
from openai import OpenAI
44

5-
client = OpenAI(
6-
base_url="http://localhost:8000/v1",
7-
api_key="llama.cpp"
8-
)
5+
client = OpenAI(base_url="http://localhost:8000/v1", api_key="llama.cpp")
96

107
model = "gpt-3.5-turbo"
118

9+
1210
def predict(message, history):
1311
messages = []
1412

1513
for user_message, assistant_message in history:
1614
messages.append({"role": "user", "content": user_message})
1715
messages.append({"role": "assistant", "content": assistant_message})
18-
16+
1917
messages.append({"role": "user", "content": message})
2018

2119
response = client.chat.completions.create(
22-
model=model,
23-
messages=messages,
24-
stream=True
20+
model=model, messages=messages, stream=True
2521
)
2622

2723
text = ""
@@ -49,7 +45,14 @@ def predict(message, history):
4945
"""
5046

5147
with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
52-
gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"])
48+
gr.ChatInterface(
49+
predict,
50+
fill_height=True,
51+
examples=[
52+
"What is the capital of France?",
53+
"Who was the first person on the moon?",
54+
],
55+
)
5356

5457

5558
if __name__ == "__main__":

‎examples/hf_pull/main.py

Copy file name to clipboardExpand all lines: examples/hf_pull/main.py
+9-12Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,26 @@
55
llama = llama_cpp.Llama.from_pretrained(
66
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
77
filename="*q8_0.gguf",
8-
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
9-
verbose=False
8+
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
9+
"Qwen/Qwen1.5-0.5B"
10+
),
11+
verbose=False,
1012
)
1113

1214
response = llama.create_chat_completion(
13-
messages=[
14-
{
15-
"role": "user",
16-
"content": "What is the capital of France?"
17-
}
18-
],
15+
messages=[{"role": "user", "content": "What is the capital of France?"}],
1916
response_format={
2017
"type": "json_object",
2118
"schema": {
2219
"type": "object",
2320
"properties": {
2421
"country": {"type": "string"},
25-
"capital": {"type": "string"}
22+
"capital": {"type": "string"},
2623
},
2724
"required": ["country", "capital"],
28-
}
25+
},
2926
},
30-
stream=True
27+
stream=True,
3128
)
3229

3330
for chunk in response:
@@ -36,4 +33,4 @@
3633
continue
3734
print(delta["content"], end="", flush=True)
3835

39-
print()
36+
print()

‎examples/high_level_api/fastapi_server.py

Copy file name to clipboardExpand all lines: examples/high_level_api/fastapi_server.py
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
To actually see the implementation of the server, see llama_cpp/server/app.py
2525
2626
"""
27+
2728
import os
2829
import uvicorn
2930

‎examples/high_level_api/high_level_api_infill.py

Copy file name to clipboardExpand all lines: examples/high_level_api/high_level_api_infill.py
+12-8Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
77
parser.add_argument("-p", "--prompt", type=str, default="def add(")
88
parser.add_argument("-s", "--suffix", type=str, default="\n return sum\n\n")
9-
parser.add_argument("-i", "--spm-infill", action='store_true')
9+
parser.add_argument("-i", "--spm-infill", action="store_true")
1010
args = parser.parse_args()
1111

1212
llm = Llama(model_path=args.model, n_gpu_layers=-1, spm_infill=args.spm_infill)
1313

1414
output = llm.create_completion(
15-
temperature = 0.0,
16-
repeat_penalty = 1.0,
17-
prompt = args.prompt,
18-
suffix = args.suffix,
15+
temperature=0.0,
16+
repeat_penalty=1.0,
17+
prompt=args.prompt,
18+
suffix=args.suffix,
1919
)
2020

2121
# Models sometimes repeat suffix in response, attempt to filter that
@@ -25,9 +25,13 @@
2525
unwanted_response_length = len(unwanted_response_suffix)
2626

2727
filtered = False
28-
if unwanted_response_suffix and response_stripped[-unwanted_response_length:] == unwanted_response_suffix:
28+
if (
29+
unwanted_response_suffix
30+
and response_stripped[-unwanted_response_length:] == unwanted_response_suffix
31+
):
2932
response = response_stripped[:-unwanted_response_length]
3033
filtered = True
3134

32-
print(f"Fill-in-Middle completion{' (filtered)' if filtered else ''}:\n\n{args.prompt}\033[32m{response}\033[{'33' if filtered else '0'}m{args.suffix}\033[0m")
33-
35+
print(
36+
f"Fill-in-Middle completion{' (filtered)' if filtered else ''}:\n\n{args.prompt}\033[32m{response}\033[{'33' if filtered else '0'}m{args.suffix}\033[0m"
37+
)

‎examples/low_level_api/Chat.py

Copy file name to clipboardExpand all lines: examples/low_level_api/Chat.py
+28-24Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
from common import GptParams
44
from low_level_api_chat_cpp import LLaMAInteract
55

6+
67
def env_or_def(env, default):
7-
if (env in os.environ):
8-
return os.environ[env]
9-
return default
8+
if env in os.environ:
9+
return os.environ[env]
10+
return default
11+
1012

1113
AI_NAME = env_or_def("AI_NAME", "ChatLLaMa")
1214
MODEL = env_or_def("MODEL", "./models/llama-13B/ggml-model.bin")
@@ -15,10 +17,10 @@ def env_or_def(env, default):
1517
N_THREAD = int(env_or_def("N_THREAD", "8"))
1618

1719
today = datetime.datetime.today()
18-
DATE_YEAR=today.strftime("%Y")
19-
DATE_TIME=today.strftime("%H:%M")
20+
DATE_YEAR = today.strftime("%Y")
21+
DATE_TIME = today.strftime("%H:%M")
2022

21-
prompt=f"""Text transcript of a never ending dialog, where {USER_NAME} interacts with an AI assistant named {AI_NAME}.
23+
prompt = f"""Text transcript of a never ending dialog, where {USER_NAME} interacts with an AI assistant named {AI_NAME}.
2224
{AI_NAME} is helpful, kind, honest, friendly, good at writing and never fails to answer {USER_NAME}'s requests immediately and with details and precision.
2325
There are no annotations like (30 seconds passed...) or (to himself), just what {USER_NAME} and {AI_NAME} say aloud to each other.
2426
The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long.
@@ -45,27 +47,29 @@ def env_or_def(env, default):
4547
{AI_NAME}: Blue.
4648
{USER_NAME}: What time is it?
4749
{AI_NAME}: It is {DATE_TIME}.
48-
{USER_NAME}:""" + " ".join(sys.argv[1:])
50+
{USER_NAME}:""" + " ".join(
51+
sys.argv[1:]
52+
)
4953

5054
print("Loading model...")
5155
params = GptParams(
52-
n_ctx=2048,
53-
temp=0.7,
54-
top_k=40,
55-
top_p=0.5,
56-
repeat_last_n=256,
57-
n_batch=1024,
58-
repeat_penalty=1.17647,
59-
model=MODEL,
60-
n_threads=N_THREAD,
61-
n_predict=N_PREDICTS,
62-
use_color=True,
63-
interactive=True,
64-
antiprompt=[f"{USER_NAME}:"],
65-
input_prefix=" ",
66-
input_suffix=f"{AI_NAME}:",
67-
prompt=prompt,
56+
n_ctx=2048,
57+
temp=0.7,
58+
top_k=40,
59+
top_p=0.5,
60+
repeat_last_n=256,
61+
n_batch=1024,
62+
repeat_penalty=1.17647,
63+
model=MODEL,
64+
n_threads=N_THREAD,
65+
n_predict=N_PREDICTS,
66+
use_color=True,
67+
interactive=True,
68+
antiprompt=[f"{USER_NAME}:"],
69+
input_prefix=" ",
70+
input_suffix=f"{AI_NAME}:",
71+
prompt=prompt,
6872
)
6973

7074
with LLaMAInteract(params) as m:
71-
m.interact()
75+
m.interact()

‎examples/low_level_api/Miku.py

Copy file name to clipboardExpand all lines: examples/low_level_api/Miku.py
+25-21Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,20 @@
33
from common import GptParams
44
from low_level_api_chat_cpp import LLaMAInteract
55

6+
67
def env_or_def(env, default):
7-
if (env in os.environ):
8-
return os.environ[env]
9-
return default
8+
if env in os.environ:
9+
return os.environ[env]
10+
return default
11+
1012

1113
AI_NAME = env_or_def("AI_NAME", "Miku")
1214
MODEL = env_or_def("MODEL", "./models/llama-13B/ggml-model.bin")
1315
USER_NAME = env_or_def("USER_NAME", "Anon")
1416
N_PREDICTS = int(env_or_def("N_PREDICTS", "4096"))
1517
N_THREAD = int(env_or_def("N_THREAD", "0"))
1618

17-
prompt=f"""This is a transcript of a 1000 page, never ending conversation between {USER_NAME} and the cute and helpful AI assistant {AI_NAME}. {AI_NAME} is a girl who is an AI running on the users computer.
19+
prompt = f"""This is a transcript of a 1000 page, never ending conversation between {USER_NAME} and the cute and helpful AI assistant {AI_NAME}. {AI_NAME} is a girl who is an AI running on the users computer.
1820
{AI_NAME} can think for herself without the user seeing her thoughts by adding a /think prefix to her output. She uses this to reason about the world and to think about what she should say next.
1921
{AI_NAME} is always coherent and makes sense, but if she isn't sure if what she is saying is correct she will ask the user for help.
2022
{AI_NAME} is a very helpful AI and will help the user with anything they need, she is also very friendly and will try to make the user feel better if they are sad.
@@ -32,28 +34,30 @@ def env_or_def(env, default):
3234
{AI_NAME}: /think It sounds like {USER_NAME} is happy to have me as their assistant! I'm so happy too! ^_^ Glad that whole emotion thing didn't scare him off!
3335
{AI_NAME}: /think I wonder what {USER_NAME} likes to do in his free time? I should ask him about that!
3436
{AI_NAME}: What do you like to do in your free time? ^_^
35-
{USER_NAME}:""" + " ".join(sys.argv[1:])
37+
{USER_NAME}:""" + " ".join(
38+
sys.argv[1:]
39+
)
3640

3741
print("Loading model...")
3842
params = GptParams(
39-
n_batch=1024,
40-
n_ctx=2048,
41-
n_keep=-1,
42-
repeat_last_n=256,
43-
repeat_penalty=1.17647,
44-
temp=0.7,
45-
top_k=40,
46-
top_p=0.5,
47-
model=MODEL,
48-
n_predict=N_PREDICTS,
49-
use_color=True,
50-
interactive=True,
51-
antiprompt=[f"{USER_NAME}:"],
52-
prompt=prompt,
43+
n_batch=1024,
44+
n_ctx=2048,
45+
n_keep=-1,
46+
repeat_last_n=256,
47+
repeat_penalty=1.17647,
48+
temp=0.7,
49+
top_k=40,
50+
top_p=0.5,
51+
model=MODEL,
52+
n_predict=N_PREDICTS,
53+
use_color=True,
54+
interactive=True,
55+
antiprompt=[f"{USER_NAME}:"],
56+
prompt=prompt,
5357
)
5458

5559
if N_THREAD > 0:
56-
params.n_threads = N_THREAD
60+
params.n_threads = N_THREAD
5761

5862
with LLaMAInteract(params) as m:
59-
m.interact()
63+
m.interact()

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.