Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 8fd75fb

Browse filesBrowse files
jasnellgibfahn
authored andcommitted
util: graduate TextEncoder/TextDecoder, tests
Add tests ported from Web Platform Tests. Graduate TextEncoder / TextDecoder from experimental PR-URL: #15743 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Refael Ackermann <refack@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com> Reviewed-By: Timothy Gu <timothygu99@gmail.com>
1 parent f00ba6b commit 8fd75fb
Copy full SHA for 8fd75fb
Expand file treeCollapse file tree

12 files changed

+563
-28
lines changed
Open diff view settings
Collapse file

‎doc/api/util.md‎

Copy file name to clipboardExpand all lines: doc/api/util.md
-4Lines changed: 0 additions & 4 deletions
  • Display the source diff
  • Display the rich diff
Original file line numberDiff line numberDiff line change
@@ -551,8 +551,6 @@ see [Custom promisified functions][].
551551
added: v8.3.0
552552
-->
553553

554-
> Stability: 1 - Experimental
555-
556554
An implementation of the [WHATWG Encoding Standard][] `TextDecoder` API.
557555

558556
```js
@@ -690,8 +688,6 @@ mark.
690688
added: v8.3.0
691689
-->
692690

693-
> Stability: 1 - Experimental
694-
695691
An implementation of the [WHATWG Encoding Standard][] `TextEncoder` API. All
696692
instances of `TextEncoder` only support UTF-8 encoding.
697693

Collapse file

‎lib/internal/encoding.js‎

Copy file name to clipboardExpand all lines: lib/internal/encoding.js
-20Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,6 @@ const kEncoding = Symbol('encoding');
1010
const kDecoder = Symbol('decoder');
1111
const kEncoder = Symbol('encoder');
1212

13-
let warned = false;
14-
const experimental =
15-
'The WHATWG Encoding Standard implementation is an experimental API. It ' +
16-
'should not yet be used in production applications.';
17-
1813
const {
1914
getConstructorOf,
2015
customInspectSymbol: inspect
@@ -289,11 +284,6 @@ function getEncodingFromLabel(label) {
289284

290285
class TextEncoder {
291286
constructor() {
292-
if (!warned) {
293-
warned = true;
294-
process.emitWarning(experimental, 'ExperimentalWarning');
295-
}
296-
297287
this[kEncoder] = true;
298288
}
299289

@@ -353,11 +343,6 @@ function makeTextDecoderICU() {
353343

354344
class TextDecoder {
355345
constructor(encoding = 'utf-8', options = {}) {
356-
if (!warned) {
357-
warned = true;
358-
process.emitWarning(experimental, 'ExperimentalWarning');
359-
}
360-
361346
encoding = `${encoding}`;
362347
if (typeof options !== 'object')
363348
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
@@ -430,11 +415,6 @@ function makeTextDecoderJS() {
430415

431416
class TextDecoder {
432417
constructor(encoding = 'utf-8', options = {}) {
433-
if (!warned) {
434-
warned = true;
435-
process.emitWarning(experimental, 'ExperimentalWarning');
436-
}
437-
438418
encoding = `${encoding}`;
439419
if (typeof options !== 'object')
440420
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
Collapse file
+76Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
'use strict';
2+
3+
// From: https://github.com/w3c/web-platform-tests/blob/d74324b53c/encoding/textdecoder-fatal-streaming.html
4+
5+
const common = require('../common');
6+
7+
if (!common.hasIntl)
8+
common.skip('missing Intl');
9+
10+
const assert = require('assert');
11+
const {
12+
TextDecoder
13+
} = require('util');
14+
15+
16+
{
17+
[
18+
{ encoding: 'utf-8', sequence: [0xC0] },
19+
{ encoding: 'utf-16le', sequence: [0x00] },
20+
{ encoding: 'utf-16be', sequence: [0x00] }
21+
].forEach((testCase) => {
22+
const data = new Uint8Array([testCase.sequence]);
23+
common.expectsError(
24+
() => {
25+
const decoder = new TextDecoder(testCase.encoding, { fatal: true });
26+
decoder.decode(data);
27+
}, {
28+
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
29+
type: TypeError,
30+
message:
31+
`The encoded data was not valid for encoding ${testCase.encoding}`
32+
}
33+
);
34+
35+
assert.strictEqual(
36+
new TextDecoder(testCase.encoding).decode(data),
37+
'\uFFFD'
38+
);
39+
});
40+
}
41+
42+
{
43+
const decoder = new TextDecoder('utf-16le', { fatal: true });
44+
const odd = new Uint8Array([0x00]);
45+
const even = new Uint8Array([0x00, 0x00]);
46+
47+
assert.strictEqual(decoder.decode(odd, { stream: true }), '');
48+
assert.strictEqual(decoder.decode(odd), '\u0000');
49+
50+
common.expectsError(
51+
() => {
52+
decoder.decode(even, { stream: true });
53+
decoder.decode(odd);
54+
}, {
55+
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
56+
type: TypeError,
57+
message:
58+
'The encoded data was not valid for encoding utf-16le'
59+
}
60+
);
61+
62+
common.expectsError(
63+
() => {
64+
decoder.decode(odd, { stream: true });
65+
decoder.decode(even);
66+
}, {
67+
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
68+
type: TypeError,
69+
message:
70+
'The encoded data was not valid for encoding utf-16le'
71+
}
72+
);
73+
74+
assert.strictEqual(decoder.decode(even, { stream: true }), '\u0000');
75+
assert.strictEqual(decoder.decode(even), '\u0000');
76+
}
Collapse file

‎test/parallel/test-whatwg-encoding-internals.js‎

Copy file name to clipboardExpand all lines: test/parallel/test-whatwg-encoding-internals.js
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
'use strict';
33

44
require('../common');
5+
56
const assert = require('assert');
67
const { getEncodingFromLabel } = require('internal/encoding');
78

Collapse file
+56Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
'use strict';
2+
3+
// From: https://github.com/w3c/web-platform-tests/blob/fa9436d12c/encoding/api-surrogates-utf8.html
4+
5+
require('../common');
6+
7+
const assert = require('assert');
8+
const {
9+
TextDecoder,
10+
TextEncoder
11+
} = require('util');
12+
13+
const badStrings = [
14+
{
15+
input: 'abc123',
16+
expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
17+
decoded: 'abc123',
18+
name: 'Sanity check'
19+
},
20+
{
21+
input: '\uD800',
22+
expected: [0xef, 0xbf, 0xbd],
23+
decoded: '\uFFFD',
24+
name: 'Surrogate half (low)'
25+
},
26+
{
27+
input: '\uDC00',
28+
expected: [0xef, 0xbf, 0xbd],
29+
decoded: '\uFFFD',
30+
name: 'Surrogate half (high)'
31+
},
32+
{
33+
input: 'abc\uD800123',
34+
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
35+
decoded: 'abc\uFFFD123',
36+
name: 'Surrogate half (low), in a string'
37+
},
38+
{
39+
input: 'abc\uDC00123',
40+
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
41+
decoded: 'abc\uFFFD123',
42+
name: 'Surrogate half (high), in a string'
43+
},
44+
{
45+
input: '\uDC00\uD800',
46+
expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
47+
decoded: '\uFFFD\uFFFD',
48+
name: 'Wrong order'
49+
}
50+
];
51+
52+
badStrings.forEach((t) => {
53+
const encoded = new TextEncoder().encode(t.input);
54+
assert.deepStrictEqual([].slice.call(encoded), t.expected);
55+
assert.strictEqual(new TextDecoder('utf-8').decode(encoded), t.decoded);
56+
});
Collapse file
+93Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
'use strict';
2+
3+
// From: https://github.com/w3c/web-platform-tests/blob/39a67e2fff/encoding/textdecoder-fatal.html
4+
5+
const common = require('../common');
6+
7+
if (!common.hasIntl)
8+
common.skip('missing Intl');
9+
10+
const assert = require('assert');
11+
const {
12+
TextDecoder
13+
} = require('util');
14+
15+
const bad = [
16+
{ encoding: 'utf-8', input: [0xFF], name: 'invalid code' },
17+
{ encoding: 'utf-8', input: [0xC0], name: 'ends early' },
18+
{ encoding: 'utf-8', input: [0xE0], name: 'ends early 2' },
19+
{ encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' },
20+
{ encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' },
21+
{ encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' },
22+
{ encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' },
23+
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' },
24+
{ encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' },
25+
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
26+
name: '> 0x10FFFF' },
27+
{ encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80],
28+
name: 'obsolete lead byte' },
29+
// Overlong encodings
30+
{ encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' },
31+
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x80],
32+
name: 'overlong U+0000 - 3 bytes' },
33+
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80],
34+
name: 'overlong U+0000 - 4 bytes' },
35+
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80],
36+
name: 'overlong U+0000 - 5 bytes' },
37+
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
38+
name: 'overlong U+0000 - 6 bytes' },
39+
{ encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' },
40+
{ encoding: 'utf-8', input: [0xE0, 0x81, 0xBF],
41+
name: 'overlong U+007F - 3 bytes' },
42+
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF],
43+
name: 'overlong U+007F - 4 bytes' },
44+
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF],
45+
name: 'overlong U+007F - 5 bytes' },
46+
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF],
47+
name: 'overlong U+007F - 6 bytes' },
48+
{ encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF],
49+
name: 'overlong U+07FF - 3 bytes' },
50+
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF],
51+
name: 'overlong U+07FF - 4 bytes' },
52+
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF],
53+
name: 'overlong U+07FF - 5 bytes' },
54+
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF],
55+
name: 'overlong U+07FF - 6 bytes' },
56+
{ encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF],
57+
name: 'overlong U+FFFF - 4 bytes' },
58+
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF],
59+
name: 'overlong U+FFFF - 5 bytes' },
60+
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF],
61+
name: 'overlong U+FFFF - 6 bytes' },
62+
{ encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF],
63+
name: 'overlong U+10FFFF - 5 bytes' },
64+
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF],
65+
name: 'overlong U+10FFFF - 6 bytes' },
66+
// UTF-16 surrogates encoded as code points in UTF-8
67+
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' },
68+
{ encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' },
69+
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80],
70+
name: 'surrogate pair' },
71+
{ encoding: 'utf-16le', input: [0x00], name: 'truncated code unit' },
72+
// Mismatched UTF-16 surrogates are exercised in utf16-surrogates.html
73+
// FIXME: Add legacy encoding cases
74+
];
75+
76+
bad.forEach((t) => {
77+
common.expectsError(
78+
() => {
79+
new TextDecoder(t.encoding, { fatal: true })
80+
.decode(new Uint8Array(t.input));
81+
}, {
82+
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
83+
type: TypeError
84+
}
85+
);
86+
});
87+
88+
{
89+
assert('fatal' in new TextDecoder());
90+
assert.strictEqual(typeof new TextDecoder().fatal, 'boolean');
91+
assert(!new TextDecoder().fatal);
92+
assert(new TextDecoder('utf-8', { fatal: true }).fatal);
93+
}
Collapse file
+50Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
'use strict';
2+
3+
// From: https://github.com/w3c/web-platform-tests/blob/7f567fa29c/encoding/textdecoder-ignorebom.html
4+
5+
const common = require('../common');
6+
7+
const assert = require('assert');
8+
const {
9+
TextDecoder
10+
} = require('util');
11+
12+
const cases = [
13+
{
14+
encoding: 'utf-8',
15+
bytes: [0xEF, 0xBB, 0xBF, 0x61, 0x62, 0x63],
16+
skipNoIntl: false
17+
},
18+
{
19+
encoding: 'utf-16le',
20+
bytes: [0xFF, 0xFE, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00],
21+
skipNoIntl: false
22+
},
23+
{
24+
encoding: 'utf-16be',
25+
bytes: [0xFE, 0xFF, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63],
26+
skipNoIntl: true
27+
}
28+
];
29+
30+
cases.forEach((testCase) => {
31+
if (testCase.skipNoIntl && !common.hasIntl) {
32+
console.log(`skipping ${testCase.encoding} because missing Intl`);
33+
return; // skipping
34+
}
35+
const BOM = '\uFEFF';
36+
let decoder = new TextDecoder(testCase.encoding, { ignoreBOM: true });
37+
const bytes = new Uint8Array(testCase.bytes);
38+
assert.strictEqual(decoder.decode(bytes), `${BOM}abc`);
39+
decoder = new TextDecoder(testCase.encoding, { ignoreBOM: false });
40+
assert.strictEqual(decoder.decode(bytes), 'abc');
41+
decoder = new TextDecoder(testCase.encoding);
42+
assert.strictEqual(decoder.decode(bytes), 'abc');
43+
});
44+
45+
{
46+
assert('ignoreBOM' in new TextDecoder());
47+
assert.strictEqual(typeof new TextDecoder().ignoreBOM, 'boolean');
48+
assert(!new TextDecoder().ignoreBOM);
49+
assert(new TextDecoder('utf-8', { ignoreBOM: true }).ignoreBOM);
50+
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.