@@ -153,7 +153,9 @@ def mock_kv_cache_seq_add(
153
153
154
154
def test_llama_patch (mock_llama ):
155
155
n_ctx = 128
156
+ ai_service = "testing"
156
157
llama = llama_cpp .Llama (model_path = MODEL , vocab_only = True , n_ctx = n_ctx )
158
+
157
159
n_vocab = llama_cpp .llama_n_vocab (llama ._model .model )
158
160
assert n_vocab == 32000
159
161
@@ -163,32 +165,32 @@ def test_llama_patch(mock_llama):
163
165
164
166
## Test basic completion from bos until eos
165
167
mock_llama (llama , all_text )
166
- completion = llama .create_completion ("" , max_tokens = 36 )
168
+ completion = llama .create_completion ("" , max_tokens = 36 , ai_service = ai_service )
167
169
assert completion ["choices" ][0 ]["text" ] == all_text
168
170
assert completion ["choices" ][0 ]["finish_reason" ] == "stop"
169
171
170
172
## Test basic completion until eos
171
173
mock_llama (llama , all_text )
172
- completion = llama .create_completion (text , max_tokens = 20 )
174
+ completion = llama .create_completion (text , max_tokens = 20 , ai_service = ai_service )
173
175
assert completion ["choices" ][0 ]["text" ] == output_text
174
176
assert completion ["choices" ][0 ]["finish_reason" ] == "stop"
175
177
176
178
## Test streaming completion until eos
177
179
mock_llama (llama , all_text )
178
- chunks = list (llama .create_completion (text , max_tokens = 20 , stream = True ))
180
+ chunks = list (llama .create_completion (text , max_tokens = 20 , stream = True , ai_service = ai_service ))
179
181
assert "" .join (chunk ["choices" ][0 ]["text" ] for chunk in chunks ) == output_text
180
182
assert chunks [- 1 ]["choices" ][0 ]["finish_reason" ] == "stop"
181
183
182
184
## Test basic completion until stop sequence
183
185
mock_llama (llama , all_text )
184
- completion = llama .create_completion (text , max_tokens = 20 , stop = ["lazy" ])
186
+ completion = llama .create_completion (text , max_tokens = 20 , stop = ["lazy" ], ai_service = ai_service )
185
187
assert completion ["choices" ][0 ]["text" ] == " jumps over the "
186
188
assert completion ["choices" ][0 ]["finish_reason" ] == "stop"
187
189
188
190
## Test streaming completion until stop sequence
189
191
mock_llama (llama , all_text )
190
192
chunks = list (
191
- llama .create_completion (text , max_tokens = 20 , stream = True , stop = ["lazy" ])
193
+ llama .create_completion (text , max_tokens = 20 , stream = True , stop = ["lazy" ], ai_service = ai_service )
192
194
)
193
195
assert (
194
196
"" .join (chunk ["choices" ][0 ]["text" ] for chunk in chunks ) == " jumps over the "
@@ -197,13 +199,13 @@ def test_llama_patch(mock_llama):
197
199
198
200
## Test basic completion until length
199
201
mock_llama (llama , all_text )
200
- completion = llama .create_completion (text , max_tokens = 2 )
202
+ completion = llama .create_completion (text , max_tokens = 2 , ai_service = ai_service )
201
203
assert completion ["choices" ][0 ]["text" ] == " jumps"
202
204
assert completion ["choices" ][0 ]["finish_reason" ] == "length"
203
205
204
206
## Test streaming completion until length
205
207
mock_llama (llama , all_text )
206
- chunks = list (llama .create_completion (text , max_tokens = 2 , stream = True ))
208
+ chunks = list (llama .create_completion (text , max_tokens = 2 , stream = True , ai_service = ai_service ))
207
209
assert "" .join (chunk ["choices" ][0 ]["text" ] for chunk in chunks ) == " jumps"
208
210
assert chunks [- 1 ]["choices" ][0 ]["finish_reason" ] == "length"
209
211
@@ -230,15 +232,16 @@ def test_utf8(mock_llama):
230
232
llama = llama_cpp .Llama (model_path = MODEL , vocab_only = True , logits_all = True )
231
233
232
234
output_text = "😀"
235
+ ai_service = "testing"
233
236
234
237
## Test basic completion with utf8 multibyte
235
238
mock_llama (llama , output_text )
236
- completion = llama .create_completion ("" , max_tokens = 4 )
239
+ completion = llama .create_completion ("" , max_tokens = 4 , ai_service = ai_service )
237
240
assert completion ["choices" ][0 ]["text" ] == output_text
238
241
239
242
## Test basic completion with incomplete utf8 multibyte
240
243
mock_llama (llama , output_text )
241
- completion = llama .create_completion ("" , max_tokens = 1 )
244
+ completion = llama .create_completion ("" , max_tokens = 1 , ai_service = ai_service )
242
245
assert completion ["choices" ][0 ]["text" ] == ""
243
246
244
247
0 commit comments