@@ -1497,6 +1497,7 @@ struct server_queue {
1497
1497
// queues
1498
1498
lock_free::linked_list<server_task> queue_tasks;
1499
1499
lock_free::linked_list<server_task> queue_tasks_deferred;
1500
+ std::atomic<int > n_queue_tasks_deferred = 0 ;
1500
1501
1501
1502
lock_free::hash_map<int , int > cancel_tasks = {10000 };
1502
1503
@@ -1543,6 +1544,7 @@ struct server_queue {
1543
1544
void defer (server_task task) {
1544
1545
QUE_DBG (" defer task, id = %d\n " , task.id );
1545
1546
queue_tasks_deferred.insertHead (std::move (task));
1547
+ n_queue_tasks_deferred++;
1546
1548
condition_tasks.notify_one ();
1547
1549
}
1548
1550
@@ -1565,9 +1567,10 @@ struct server_queue {
1565
1567
// Call when the state of one slot is changed, it will move one task from deferred to main queue
1566
1568
void pop_deferred_task () {
1567
1569
if (!queue_tasks_deferred.empty ()) {
1568
- queue_tasks_deferred.sweepOnce ([&](server_task & task) {
1570
+ queue_tasks_deferred.sweepOnce ([&](server_task && task) {
1569
1571
queue_tasks.insertHead (std::move (task));
1570
1572
});
1573
+ n_queue_tasks_deferred--;
1571
1574
}
1572
1575
condition_tasks.notify_one ();
1573
1576
}
@@ -1599,7 +1602,7 @@ struct server_queue {
1599
1602
if (queue_tasks.empty ()) {
1600
1603
break ;
1601
1604
}
1602
- queue_tasks.sweepOnce ([&](server_task & task) {
1605
+ queue_tasks.sweepOnce ([&](server_task && task) {
1603
1606
QUE_DBG (" processing task, id = %d\n " , task.id );
1604
1607
if (cancel_tasks.erase (task.id ) > 0 ) {
1605
1608
QUE_DBG (" task id = %d is canceled\n " , task.id );
@@ -1620,6 +1623,7 @@ struct server_queue {
1620
1623
return ;
1621
1624
}
1622
1625
if (queue_tasks.empty ()) {
1626
+ std::unique_lock<std::mutex> lock (mutex_tasks);
1623
1627
condition_tasks.wait (lock, [&]{
1624
1628
return (!queue_tasks.empty () || !running);
1625
1629
});
@@ -2595,7 +2599,7 @@ struct server_context {
2595
2599
res->slots_data = std::move (slots_data);
2596
2600
res->n_idle_slots = n_idle_slots;
2597
2601
res->n_processing_slots = n_processing_slots;
2598
- res->n_tasks_deferred = queue_tasks.queue_tasks_deferred . size () ;
2602
+ res->n_tasks_deferred = queue_tasks.n_queue_tasks_deferred ;
2599
2603
res->t_start = metrics.t_start ;
2600
2604
2601
2605
res->kv_cache_tokens_count = llama_get_kv_cache_token_count (ctx);
0 commit comments