File tree Expand file tree Collapse file tree 2 files changed +115
-0
lines changed
Filter options
Expand file tree Collapse file tree 2 files changed +115
-0
lines changed
Original file line number Diff line number Diff line change
1
+ import llama_cpp
2
+ import llama_cpp .llama_tokenizer
3
+
4
+ import gradio as gr
5
+
6
+ llama = llama_cpp .Llama .from_pretrained (
7
+ repo_id = "Qwen/Qwen1.5-0.5B-Chat-GGUF" ,
8
+ filename = "*q8_0.gguf" ,
9
+ tokenizer = llama_cpp .llama_tokenizer .LlamaHFTokenizer .from_pretrained ("Qwen/Qwen1.5-0.5B" ),
10
+ verbose = False
11
+ )
12
+
13
+ model = "gpt-3.5-turbo"
14
+
15
+ def predict (message , history ):
16
+ messages = []
17
+
18
+ for user_message , assistant_message in history :
19
+ messages .append ({"role" : "user" , "content" : user_message })
20
+ messages .append ({"role" : "assistant" , "content" : assistant_message })
21
+
22
+ messages .append ({"role" : "user" , "content" : message })
23
+
24
+ response = llama .create_chat_completion_openai_v1 (
25
+ model = model ,
26
+ messages = messages ,
27
+ stream = True
28
+ )
29
+
30
+ text = ""
31
+ for chunk in response :
32
+ content = chunk .choices [0 ].delta .content
33
+ if content :
34
+ text += content
35
+ yield text
36
+
37
+
38
+ js = """function () {
39
+ gradioURL = window.location.href
40
+ if (!gradioURL.endsWith('?__theme=dark')) {
41
+ window.location.replace(gradioURL + '?__theme=dark');
42
+ }
43
+ }"""
44
+
45
+ css = """
46
+ footer {
47
+ visibility: hidden;
48
+ }
49
+ full-height {
50
+ height: 100%;
51
+ }
52
+ """
53
+
54
+ with gr .Blocks (theme = gr .themes .Soft (), js = js , css = css , fill_height = True ) as demo :
55
+ gr .ChatInterface (predict , fill_height = True , examples = ["What is the capital of France?" , "Who was the first person on the moon?" ])
56
+
57
+
58
+ if __name__ == "__main__" :
59
+ demo .launch ()
Original file line number Diff line number Diff line change
1
+ import gradio as gr
2
+
3
+ from openai import OpenAI
4
+
5
+ client = OpenAI (
6
+ base_url = "http://localhost:8000/v1" ,
7
+ api_key = "llama.cpp"
8
+ )
9
+
10
+ model = "gpt-3.5-turbo"
11
+
12
+ def predict (message , history ):
13
+ messages = []
14
+
15
+ for user_message , assistant_message in history :
16
+ messages .append ({"role" : "user" , "content" : user_message })
17
+ messages .append ({"role" : "assistant" , "content" : assistant_message })
18
+
19
+ messages .append ({"role" : "user" , "content" : message })
20
+
21
+ response = client .chat .completions .create (
22
+ model = model ,
23
+ messages = messages ,
24
+ stream = True
25
+ )
26
+
27
+ text = ""
28
+ for chunk in response :
29
+ content = chunk .choices [0 ].delta .content
30
+ if content :
31
+ text += content
32
+ yield text
33
+
34
+
35
+ js = """function () {
36
+ gradioURL = window.location.href
37
+ if (!gradioURL.endsWith('?__theme=dark')) {
38
+ window.location.replace(gradioURL + '?__theme=dark');
39
+ }
40
+ }"""
41
+
42
+ css = """
43
+ footer {
44
+ visibility: hidden;
45
+ }
46
+ full-height {
47
+ height: 100%;
48
+ }
49
+ """
50
+
51
+ with gr .Blocks (theme = gr .themes .Soft (), js = js , css = css , fill_height = True ) as demo :
52
+ gr .ChatInterface (predict , fill_height = True , examples = ["What is the capital of France?" , "Who was the first person on the moon?" ])
53
+
54
+
55
+ if __name__ == "__main__" :
56
+ demo .launch ()
You can’t perform that action at this time.
0 commit comments