Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit d0e0d1a

Browse filesBrowse files
JSON: [key] -> .at(key), assert() -> GGML_ASSERT
1 parent 3855416 commit d0e0d1a
Copy full SHA for d0e0d1a

File tree

Expand file treeCollapse file tree

4 files changed

+84
-74
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+84
-74
lines changed

‎common/common.cpp

Copy file name to clipboardExpand all lines: common/common.cpp
+8-6Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#include "common.h"
2+
// Change JSON_ASSERT from assert() to GGML_ASSERT:
3+
#define JSON_ASSERT GGML_ASSERT
24
#include "json.hpp"
35
#include "json-schema-to-grammar.h"
46
#include "llama.h"
@@ -1964,18 +1966,18 @@ static bool llama_download_file(const std::string & url, const std::string & pat
19641966
try {
19651967
metadata_in >> metadata;
19661968
fprintf(stderr, "%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
1967-
if (metadata.contains("url") && metadata["url"].is_string()) {
1968-
auto previous_url = metadata["url"].get<std::string>();
1969+
if (metadata.contains("url") && metadata.at("url").is_string()) {
1970+
auto previous_url = metadata.at("url").get<std::string>();
19691971
if (previous_url != url) {
19701972
fprintf(stderr, "%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
19711973
return false;
19721974
}
19731975
}
1974-
if (metadata.contains("etag") && metadata["etag"].is_string()) {
1975-
etag = metadata["etag"];
1976+
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
1977+
etag = metadata.at("etag");
19761978
}
1977-
if (metadata.contains("lastModified") && metadata["lastModified"].is_string()) {
1978-
last_modified = metadata["lastModified"];
1979+
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
1980+
last_modified = metadata.at("lastModified");
19791981
}
19801982
} catch (const nlohmann::json::exception & e) {
19811983
fprintf(stderr, "%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());

‎common/json-schema-to-grammar.h

Copy file name to clipboard
+4Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
#pragma once
2+
3+
#include "ggml.h"
4+
// Change JSON_ASSERT from assert() to GGML_ASSERT:
5+
#define JSON_ASSERT GGML_ASSERT
26
#include "json.hpp"
37

48
std::string json_schema_to_grammar(const nlohmann::ordered_json& schema);

‎examples/server/server.cpp

Copy file name to clipboardExpand all lines: examples/server/server.cpp
+52-50Lines changed: 52 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
// increase max payload length to allow use of larger context size
1313
#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576
1414
#include "httplib.h"
15+
// Change JSON_ASSERT from assert() to GGML_ASSERT:
16+
#define JSON_ASSERT GGML_ASSERT
1517
#include "json.hpp"
1618

1719
// auto generated files (update with ./deps.sh)
@@ -745,7 +747,7 @@ struct server_context {
745747
}
746748

747749
default_generation_settings_for_props = get_formated_generation(slots.front());
748-
default_generation_settings_for_props["seed"] = -1;
750+
default_generation_settings_for_props.at("seed") = -1;
749751

750752
// the update_slots() logic will always submit a maximum of n_batch tokens
751753
// note that n_batch can be > n_ctx (e.g. for non-causal attention models such as BERT where the KV cache is not used)
@@ -859,7 +861,7 @@ struct server_context {
859861
slot.sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
860862

861863
// process "json_schema" and "grammar"
862-
if (data.contains("json_schema") && !data["json_schema"].is_null() && data.contains("grammar") && !data["grammar"].is_null()) {
864+
if (data.contains("json_schema") && !data.at("json_schema").is_null() && data.contains("grammar") && !data.at("grammar").is_null()) {
863865
send_error(task, "Either \"json_schema\" or \"grammar\" can be specified, but not both", ERROR_TYPE_INVALID_REQUEST);
864866
return false;
865867
} else if (data.contains("json_schema") && !data.contains("grammar")) {
@@ -1343,12 +1345,12 @@ struct server_context {
13431345
}
13441346
slot.n_sent_token_probs = probs_stop_pos;
13451347

1346-
res.data["completion_probabilities"] = probs_vector_to_json(ctx, probs_output);
1348+
res.data.at("completion_probabilities") = probs_vector_to_json(ctx, probs_output);
13471349
}
13481350

13491351
if (slot.oaicompat) {
1350-
res.data["oaicompat_token_ctr"] = slot.n_decoded;
1351-
res.data["model"] = slot.oaicompat_model;
1352+
res.data.at("oaicompat_token_ctr") = slot.n_decoded;
1353+
res.data.at("model") = slot.oaicompat_model;
13521354
}
13531355

13541356
queue_results.send(res);
@@ -1393,12 +1395,12 @@ struct server_context {
13931395
slot.generated_token_probs.end());
13941396
}
13951397

1396-
res.data["completion_probabilities"] = probs_vector_to_json(ctx, probs);
1398+
res.data.at("completion_probabilities") = probs_vector_to_json(ctx, probs);
13971399
}
13981400

13991401
if (slot.oaicompat) {
1400-
res.data["oaicompat_token_ctr"] = slot.n_decoded;
1401-
res.data["model"] = slot.oaicompat_model;
1402+
res.data.at("oaicompat_token_ctr") = slot.n_decoded;
1403+
res.data.at("model") = slot.oaicompat_model;
14021404
}
14031405

14041406
queue_results.send(res);
@@ -1512,7 +1514,7 @@ struct server_context {
15121514
// add subtasks
15131515
for (int i = 0; i < prompt_count; i++) {
15141516
json subtask_data = multiprompt_task.data;
1515-
subtask_data["prompt"] = subtask_data["prompt"][i];
1517+
subtask_data.at("prompt") = subtask_data.at("prompt")[i];
15161518

15171519
// subtasks inherit everything else (infill mode, embedding mode, etc.)
15181520
request_completion(subtask_ids[i], id_multi, subtask_data, multiprompt_task.infill, multiprompt_task.embedding);
@@ -1532,7 +1534,7 @@ struct server_context {
15321534
}
15331535

15341536
if (task.data.contains("system_prompt")) {
1535-
system_prompt_set(task.data["system_prompt"]);
1537+
system_prompt_set(task.data.at("system_prompt"));
15361538

15371539
for (server_slot & slot : slots) {
15381540
slot.n_past = 0;
@@ -1575,11 +1577,11 @@ struct server_context {
15751577

15761578
for (server_slot & slot : slots) {
15771579
json slot_data = get_formated_generation(slot);
1578-
slot_data["id"] = slot.id;
1579-
slot_data["id_task"] = slot.id_task;
1580-
slot_data["state"] = slot.state;
1581-
slot_data["prompt"] = slot.prompt;
1582-
slot_data["next_token"] = {
1580+
slot_data.at("id") = slot.id;
1581+
slot_data.at("id_task") = slot.id_task;
1582+
slot_data.at("state") = slot.state;
1583+
slot_data.at("prompt") = slot.prompt;
1584+
slot_data.at("next_token") = {
15831585
{"has_next_token", slot.has_next_token},
15841586
{"n_remain", slot.n_remaining},
15851587
{"n_decoded", slot.n_decoded},
@@ -1589,7 +1591,7 @@ struct server_context {
15891591
{"stopping_word", slot.stopping_word},
15901592
};
15911593

1592-
if (slot_data["state"] == SLOT_STATE_IDLE) {
1594+
if (slot_data.at("state") == SLOT_STATE_IDLE) {
15931595
n_idle_slots++;
15941596
} else {
15951597
n_processing_slots++;
@@ -1644,7 +1646,7 @@ struct server_context {
16441646
} break;
16451647
case SERVER_TASK_TYPE_SLOT_SAVE:
16461648
{
1647-
int id_slot = task.data["id_slot"];
1649+
int id_slot = task.data.at("id_slot");
16481650
server_slot * slot = get_slot(id_slot);
16491651
if (slot == nullptr) {
16501652
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
@@ -1654,8 +1656,8 @@ struct server_context {
16541656
const size_t token_count = slot->cache_tokens.size();
16551657
const int64_t t_start = ggml_time_us();
16561658

1657-
std::string filename = task.data["filename"];
1658-
std::string filepath = task.data["filepath"];
1659+
std::string filename = task.data.at("filename");
1660+
std::string filepath = task.data.at("filepath");
16591661

16601662
const size_t nwrite = llama_state_seq_save_file(ctx, filepath.c_str(), slot->id + 1, slot->cache_tokens.data(), token_count);
16611663

@@ -1679,7 +1681,7 @@ struct server_context {
16791681
} break;
16801682
case SERVER_TASK_TYPE_SLOT_RESTORE:
16811683
{
1682-
int id_slot = task.data["id_slot"];
1684+
int id_slot = task.data.at("id_slot");
16831685
server_slot * slot = get_slot(id_slot);
16841686
if (slot == nullptr) {
16851687
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
@@ -1688,8 +1690,8 @@ struct server_context {
16881690

16891691
const int64_t t_start = ggml_time_us();
16901692

1691-
std::string filename = task.data["filename"];
1692-
std::string filepath = task.data["filepath"];
1693+
std::string filename = task.data.at("filename");
1694+
std::string filepath = task.data.at("filepath");
16931695

16941696
slot->cache_tokens.resize(slot->n_ctx);
16951697
size_t token_count = 0;
@@ -1721,7 +1723,7 @@ struct server_context {
17211723
} break;
17221724
case SERVER_TASK_TYPE_SLOT_ERASE:
17231725
{
1724-
int id_slot = task.data["id_slot"];
1726+
int id_slot = task.data.at("id_slot");
17251727
server_slot * slot = get_slot(id_slot);
17261728
if (slot == nullptr) {
17271729
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
@@ -3136,8 +3138,8 @@ int main(int argc, char ** argv) {
31363138
server_task_result result = ctx_server.queue_results.recv(task.id);
31373139
ctx_server.queue_results.remove_waiting_task_id(task.id);
31383140

3139-
const int n_idle_slots = result.data["idle"];
3140-
const int n_processing_slots = result.data["processing"];
3141+
const int n_idle_slots = result.data.at("idle");
3142+
const int n_processing_slots = result.data.at("processing");
31413143

31423144
json health = {
31433145
{"status", "ok"},
@@ -3147,11 +3149,11 @@ int main(int argc, char ** argv) {
31473149

31483150
res.status = 200; // HTTP OK
31493151
if (sparams.slots_endpoint && req.has_param("include_slots")) {
3150-
health["slots"] = result.data["slots"];
3152+
health.at("slots") = result.data.at("slots");
31513153
}
31523154

31533155
if (n_idle_slots == 0) {
3154-
health["status"] = "no slot available";
3156+
health.at("status") = "no slot available";
31553157
if (req.has_param("fail_on_no_slot")) {
31563158
res.status = 503; // HTTP Service Unavailable
31573159
}
@@ -3191,7 +3193,7 @@ int main(int argc, char ** argv) {
31913193
server_task_result result = ctx_server.queue_results.recv(task.id);
31923194
ctx_server.queue_results.remove_waiting_task_id(task.id);
31933195

3194-
res.set_content(result.data["slots"].dump(), "application/json");
3196+
res.set_content(result.data.at("slots").dump(), "application/json");
31953197
res.status = 200; // HTTP OK
31963198
};
31973199

@@ -3218,32 +3220,32 @@ int main(int argc, char ** argv) {
32183220

32193221
json data = result.data;
32203222

3221-
const uint64_t n_prompt_tokens_processed = data["n_prompt_tokens_processed"];
3222-
const uint64_t t_prompt_processing = data["t_prompt_processing"];
3223+
const uint64_t n_prompt_tokens_processed = data.at("n_prompt_tokens_processed");
3224+
const uint64_t t_prompt_processing = data.at("t_prompt_processing");
32233225

3224-
const uint64_t n_tokens_predicted = data["n_tokens_predicted"];
3225-
const uint64_t t_tokens_generation = data["t_tokens_generation"];
3226+
const uint64_t n_tokens_predicted = data.at("n_tokens_predicted");
3227+
const uint64_t t_tokens_generation = data.at("t_tokens_generation");
32263228

3227-
const int32_t kv_cache_used_cells = data["kv_cache_used_cells"];
3229+
const int32_t kv_cache_used_cells = data.at("kv_cache_used_cells");
32283230

32293231
// metrics definition: https://prometheus.io/docs/practices/naming/#metric-names
32303232
json all_metrics_def = json {
32313233
{"counter", {{
32323234
{"name", "prompt_tokens_total"},
32333235
{"help", "Number of prompt tokens processed."},
3234-
{"value", (uint64_t) data["n_prompt_tokens_processed_total"]}
3236+
{"value", (uint64_t) data.at("n_prompt_tokens_processed_total")}
32353237
}, {
32363238
{"name", "prompt_seconds_total"},
32373239
{"help", "Prompt process time"},
3238-
{"value", (uint64_t) data["t_prompt_processing_total"] / 1.e3}
3240+
{"value", (uint64_t) data.at("t_prompt_processing_total") / 1.e3}
32393241
}, {
32403242
{"name", "tokens_predicted_total"},
32413243
{"help", "Number of generation tokens processed."},
3242-
{"value", (uint64_t) data["n_tokens_predicted_total"]}
3244+
{"value", (uint64_t) data.at("n_tokens_predicted_total")}
32433245
}, {
32443246
{"name", "tokens_predicted_seconds_total"},
32453247
{"help", "Predict process time"},
3246-
{"value", (uint64_t) data["t_tokens_generation_total"] / 1.e3}
3248+
{"value", (uint64_t) data.at("t_tokens_generation_total") / 1.e3}
32473249
}}},
32483250
{"gauge", {{
32493251
{"name", "prompt_tokens_seconds"},
@@ -3260,15 +3262,15 @@ int main(int argc, char ** argv) {
32603262
},{
32613263
{"name", "kv_cache_tokens"},
32623264
{"help", "KV-cache tokens."},
3263-
{"value", (uint64_t) data["kv_cache_tokens_count"]}
3265+
{"value", (uint64_t) data.at("kv_cache_tokens_count")}
32643266
},{
32653267
{"name", "requests_processing"},
32663268
{"help", "Number of request processing."},
3267-
{"value", (uint64_t) data["processing"]}
3269+
{"value", (uint64_t) data.at("processing")}
32683270
},{
32693271
{"name", "requests_deferred"},
32703272
{"help", "Number of request deferred."},
3271-
{"value", (uint64_t) data["deferred"]}
3273+
{"value", (uint64_t) data.at("deferred")}
32723274
}}}
32733275
};
32743276

@@ -3279,8 +3281,8 @@ int main(int argc, char ** argv) {
32793281
const auto & metrics_def = el.value();
32803282

32813283
for (const auto & metric_def : metrics_def) {
3282-
const std::string name = metric_def["name"];
3283-
const std::string help = metric_def["help"];
3284+
const std::string name = metric_def.at("name");
3285+
const std::string help = metric_def.at("help");
32843286

32853287
auto value = json_value(metric_def, "value", 0.);
32863288
prometheus << "# HELP llamacpp:" << name << " " << help << "\n"
@@ -3289,7 +3291,7 @@ int main(int argc, char ** argv) {
32893291
}
32903292
}
32913293

3292-
const int64_t t_start = data["t_start"];
3294+
const int64_t t_start = data.at("t_start");
32933295
res.set_header("Process-Start-Time-Unix", std::to_string(t_start));
32943296

32953297
res.set_content(prometheus.str(), "text/plain; version=0.0.4");
@@ -3298,7 +3300,7 @@ int main(int argc, char ** argv) {
32983300

32993301
const auto handle_slots_save = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
33003302
json request_data = json::parse(req.body);
3301-
std::string filename = request_data["filename"];
3303+
std::string filename = request_data.at("filename");
33023304
if (!validate_file_name(filename)) {
33033305
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
33043306
return;
@@ -3328,7 +3330,7 @@ int main(int argc, char ** argv) {
33283330

33293331
const auto handle_slots_restore = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
33303332
json request_data = json::parse(req.body);
3331-
std::string filename = request_data["filename"];
3333+
std::string filename = request_data.at("filename");
33323334
if (!validate_file_name(filename)) {
33333335
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
33343336
return;
@@ -3647,7 +3649,7 @@ int main(int argc, char ** argv) {
36473649

36483650
std::vector<llama_token> tokens;
36493651
if (body.count("content") != 0) {
3650-
tokens = ctx_server.tokenize(body["content"], false);
3652+
tokens = ctx_server.tokenize(body.at("content"), false);
36513653
}
36523654
const json data = format_tokenizer_response(tokens);
36533655
return res.set_content(data.dump(), "application/json; charset=utf-8");
@@ -3659,7 +3661,7 @@ int main(int argc, char ** argv) {
36593661

36603662
std::string content;
36613663
if (body.count("tokens") != 0) {
3662-
const std::vector<llama_token> tokens = body["tokens"];
3664+
const std::vector<llama_token> tokens = body.at("tokens");
36633665
content = tokens_to_str(ctx_server.ctx, tokens.cbegin(), tokens.cend());
36643666
}
36653667

@@ -3682,10 +3684,10 @@ int main(int argc, char ** argv) {
36823684
json prompt;
36833685
if (body.count("input") != 0) {
36843686
is_openai = true;
3685-
prompt = body["input"];
3687+
prompt = body.at("input");
36863688
} else if (body.count("content") != 0) {
36873689
// with "content", we only support single prompt
3688-
prompt = std::vector<std::string>{body["content"]};
3690+
prompt = std::vector<std::string>{body.at("content")};
36893691
} else {
36903692
res_error(res, format_error_response("\"input\" or \"content\" must be provided", ERROR_TYPE_INVALID_REQUEST));
36913693
return;
@@ -3704,7 +3706,7 @@ int main(int argc, char ** argv) {
37043706
if (!result.error) {
37053707
if (result.data.count("results")) {
37063708
// result for multi-task
3707-
responses = result.data["results"];
3709+
responses = result.data.at("results");
37083710
} else {
37093711
// result for single task
37103712
responses = std::vector<json>{result.data};

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.