Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 9e0ecfb

Browse filesBrowse files
authored
server : clarify /slots endpoint, add is_processing (#10162)
* server : clarify /slots endpoint, add is_processing * fix tests
1 parent 6a066b9 commit 9e0ecfb
Copy full SHA for 9e0ecfb

File tree

Expand file treeCollapse file tree

3 files changed

+18
-19
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+18
-19
lines changed

‎examples/server/README.md

Copy file name to clipboardExpand all lines: examples/server/README.md
+5-6Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,10 @@ Given a ChatML-formatted json description in `messages`, it returns the predicte
692692

693693
### GET `/slots`: Returns the current slots processing state
694694

695-
This endpoint can be disabled with `--no-slots`
695+
> [!WARNING]
696+
> This endpoint is intended for debugging and may be modified in future versions. For security reasons, we strongly advise against enabling it in production environments.
697+
698+
This endpoint is disabled by default and can be enabled with `--slots`
696699

697700
If query param `?fail_on_no_slot=1` is set, this endpoint will respond with status code 503 if there is no available slots.
698701

@@ -709,6 +712,7 @@ Example:
709712
"grammar": "",
710713
"id": 0,
711714
"ignore_eos": false,
715+
"is_processing": false,
712716
"logit_bias": [],
713717
"min_p": 0.05000000074505806,
714718
"mirostat": 0,
@@ -741,7 +745,6 @@ Example:
741745
"temperature"
742746
],
743747
"seed": 42,
744-
"state": 1,
745748
"stop": [
746749
"\n"
747750
],
@@ -755,10 +758,6 @@ Example:
755758
]
756759
```
757760

758-
Possible values for `slot[i].state` are:
759-
- `0`: SLOT_STATE_IDLE
760-
- `1`: SLOT_STATE_PROCESSING
761-
762761
### GET `/metrics`: Prometheus compatible metrics exporter
763762

764763
This endpoint is only accessible if `--metrics` is set.

‎examples/server/server.cpp

Copy file name to clipboardExpand all lines: examples/server/server.cpp
+8-8Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,11 +1566,11 @@ struct server_context {
15661566

15671567
for (server_slot & slot : slots) {
15681568
json slot_data = get_formated_generation(slot);
1569-
slot_data["id"] = slot.id;
1570-
slot_data["id_task"] = slot.id_task;
1571-
slot_data["state"] = slot.state;
1572-
slot_data["prompt"] = common_detokenize(ctx, slot.prompt_tokens);
1573-
slot_data["next_token"] = {
1569+
slot_data["id"] = slot.id;
1570+
slot_data["id_task"] = slot.id_task;
1571+
slot_data["is_processing"] = slot.is_processing();
1572+
slot_data["prompt"] = common_detokenize(ctx, slot.prompt_tokens);
1573+
slot_data["next_token"] = {
15741574
{"has_next_token", slot.has_next_token},
15751575
{"has_new_line", slot.has_new_line},
15761576
{"n_remain", slot.n_remaining},
@@ -1581,10 +1581,10 @@ struct server_context {
15811581
{"stopping_word", slot.stopping_word},
15821582
};
15831583

1584-
if (slot_data["state"] == SLOT_STATE_IDLE) {
1585-
n_idle_slots++;
1586-
} else {
1584+
if (slot.is_processing()) {
15871585
n_processing_slots++;
1586+
} else {
1587+
n_idle_slots++;
15881588
}
15891589

15901590
slots_data.push_back(slot_data);

‎examples/server/tests/features/steps/steps.py

Copy file name to clipboardExpand all lines: examples/server/tests/features/steps/steps.py
+5-5Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -260,13 +260,13 @@ async def step_wait_for_server_status(context, expecting_status: Literal['health
260260
async def step_all_slots_status(context, expected_slot_status_string: Literal['idle', 'busy'] | str):
261261
match expected_slot_status_string:
262262
case 'idle':
263-
expected_slot_status = 0
263+
expected_slot_status = False
264264
case 'busy':
265-
expected_slot_status = 1
265+
expected_slot_status = True
266266
case _:
267267
assert False, "unknown status"
268268

269-
expected_slots = [{'id': slot_id, 'state': expected_slot_status}
269+
expected_slots = [{'id': slot_id, 'is_processing': expected_slot_status}
270270
for slot_id in range(context.n_slots)]
271271
await request_slots_status(context, expected_slots)
272272

@@ -1354,8 +1354,8 @@ async def wait_for_slots_status(context,
13541354
if status_code == 503 and status_code == expected_http_status_code:
13551355
return
13561356
if status_code == 200 and status_code == expected_http_status_code:
1357-
n_slots_idle = sum(1 if slot["state"] == 0 else 0 for slot in slots)
1358-
n_slots_processing = sum(1 if slot["state"] != 0 else 0 for slot in slots)
1357+
n_slots_idle = sum(1 if not slot["is_processing"] else 0 for slot in slots)
1358+
n_slots_processing = sum(1 if slot["is_processing"] else 0 for slot in slots)
13591359
if ((slots_idle is None or slots_idle == n_slots_idle)
13601360
and (slots_processing is None or slots_processing == n_slots_processing)):
13611361
return

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.