Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit f8a676e

Browse filesBrowse files
santigimenoMyles Borins
authored andcommitted
cluster: fix race condition setting suicide prop
There is no guarantee that the `suicide` property of a worker in the master process is going to be set when the `disconnect` and `exit` events are emitted. To fix it, wait for the ACK of the suicide message from the master before disconnecting the worker. Also, there's no need to send the suicide message from the worker if the disconnection has been initiated in the master. Add `test-cluster-disconnect-suicide-race` that forks a lot of workers to consistently reproduce the issue this patch tries to solve. Modify `test-regress-GH-3238` so it checks both the `kill` and `disconnect` cases. Also take into account that the `disconnect` event may be received after the `exit` event. PR-URL: #4349 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Rich Trott <rtrott@gmail.com>
1 parent 48c2783 commit f8a676e
Copy full SHA for f8a676e

File tree

Expand file treeCollapse file tree

3 files changed

+75
-28
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

3 files changed

+75
-28
lines changed
Open diff view settings
Collapse file

‎lib/cluster.js‎

Copy file name to clipboardExpand all lines: lib/cluster.js
+31-17Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ function masterInit() {
428428
else if (message.act === 'listening')
429429
listening(worker, message);
430430
else if (message.act === 'suicide')
431-
worker.suicide = true;
431+
suicide(worker, message);
432432
else if (message.act === 'close')
433433
close(worker, message);
434434
}
@@ -439,6 +439,11 @@ function masterInit() {
439439
cluster.emit('online', worker);
440440
}
441441

442+
function suicide(worker, message) {
443+
worker.suicide = true;
444+
send(worker, { ack: message.seq });
445+
}
446+
442447
function queryServer(worker, message) {
443448
var args = [message.address,
444449
message.port,
@@ -532,7 +537,7 @@ function workerInit() {
532537
if (message.act === 'newconn')
533538
onconnection(message, handle);
534539
else if (message.act === 'disconnect')
535-
worker.disconnect();
540+
_disconnect.call(worker, true);
536541
}
537542
};
538543

@@ -653,14 +658,36 @@ function workerInit() {
653658
}
654659

655660
Worker.prototype.disconnect = function() {
661+
_disconnect.call(this);
662+
};
663+
664+
Worker.prototype.destroy = function() {
665+
this.suicide = true;
666+
if (!this.isConnected()) process.exit(0);
667+
var exit = process.exit.bind(null, 0);
668+
send({ act: 'suicide' }, () => process.disconnect());
669+
process.once('disconnect', exit);
670+
};
671+
672+
function send(message, cb) {
673+
sendHelper(process, message, null, cb);
674+
}
675+
676+
function _disconnect(masterInitiated) {
656677
this.suicide = true;
657678
let waitingCount = 1;
658679

659680
function checkWaitingCount() {
660681
waitingCount--;
661682
if (waitingCount === 0) {
662-
send({ act: 'suicide' });
663-
process.disconnect();
683+
// If disconnect is worker initiated, wait for ack to be sure suicide
684+
// is properly set in the master, otherwise, if it's master initiated
685+
// there's no need to send the suicide message
686+
if (masterInitiated) {
687+
process.disconnect();
688+
} else {
689+
send({ act: 'suicide' }, () => process.disconnect());
690+
}
664691
}
665692
}
666693

@@ -672,19 +699,6 @@ function workerInit() {
672699
}
673700

674701
checkWaitingCount();
675-
};
676-
677-
Worker.prototype.destroy = function() {
678-
this.suicide = true;
679-
if (!this.isConnected()) process.exit(0);
680-
var exit = process.exit.bind(null, 0);
681-
send({ act: 'suicide' }, exit);
682-
process.once('disconnect', exit);
683-
process.disconnect();
684-
};
685-
686-
function send(message, cb) {
687-
sendHelper(process, message, null, cb);
688702
}
689703
}
690704

Collapse file

‎test/parallel/test-regress-GH-3238.js‎

Copy file name to clipboardExpand all lines: test/parallel/test-regress-GH-3238.js
+12-11Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,19 @@ const assert = require('assert');
44
const cluster = require('cluster');
55

66
if (cluster.isMaster) {
7-
const worker = cluster.fork();
8-
let disconnected = false;
7+
function forkWorker(action) {
8+
const worker = cluster.fork({ action });
9+
worker.on('disconnect', common.mustCall(() => {
10+
assert.strictEqual(worker.suicide, true);
11+
}));
912

10-
worker.on('disconnect', common.mustCall(function() {
11-
assert.strictEqual(worker.suicide, true);
12-
disconnected = true;
13-
}));
13+
worker.on('exit', common.mustCall(() => {
14+
assert.strictEqual(worker.suicide, true);
15+
}));
16+
}
1417

15-
worker.on('exit', common.mustCall(function() {
16-
assert.strictEqual(worker.suicide, true);
17-
assert.strictEqual(disconnected, true);
18-
}));
18+
forkWorker('disconnect');
19+
forkWorker('kill');
1920
} else {
20-
cluster.worker.disconnect();
21+
cluster.worker[process.env.action]();
2122
}
Collapse file
+32Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
'use strict';
2+
const common = require('../common');
3+
const assert = require('assert');
4+
const cluster = require('cluster');
5+
const os = require('os');
6+
7+
if (cluster.isMaster) {
8+
function forkWorker(action) {
9+
const worker = cluster.fork({ action });
10+
worker.on('disconnect', common.mustCall(() => {
11+
assert.strictEqual(worker.suicide, true);
12+
}));
13+
14+
worker.on('exit', common.mustCall(() => {
15+
assert.strictEqual(worker.suicide, true);
16+
}));
17+
}
18+
19+
const cpus = os.cpus().length;
20+
const tries = cpus > 8 ? 64 : cpus * 8;
21+
22+
cluster.on('exit', common.mustCall((worker, code) => {
23+
assert.strictEqual(code, 0, 'worker exited with error');
24+
}, tries * 2));
25+
26+
for (let i = 0; i < tries; ++i) {
27+
forkWorker('disconnect');
28+
forkWorker('kill');
29+
}
30+
} else {
31+
cluster.worker[process.env.action]();
32+
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.