Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a782598

Browse filesBrowse files
ChALkeRaduh95
authored andcommitted
lib: unify ICU and no-ICU TextDecoder
PR-URL: #61409 Reviewed-By: Aviv Keller <me@aviv.sh> Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: Gürgün Dayıoğlu <hey@gurgun.day> Reviewed-By: Richard Lau <richard.lau@ibm.com>
1 parent 0ceb8ca commit a782598
Copy full SHA for a782598

2 files changed

+111-162Lines changed: 111 additions & 162 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎lib/internal/encoding.js‎

Copy file name to clipboardExpand all lines: lib/internal/encoding.js
+84-138Lines changed: 84 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ const {
2323
ERR_INVALID_THIS,
2424
ERR_NO_ICU,
2525
} = require('internal/errors').codes;
26-
const kMethod = Symbol('method');
26+
const kSingleByte = Symbol('single-byte');
2727
const kHandle = Symbol('handle');
2828
const kFlags = Symbol('flags');
2929
const kEncoding = Symbol('encoding');
@@ -53,6 +53,8 @@ const {
5353
validateObject,
5454
kValidateObjectAllowObjectsAndNull,
5555
} = require('internal/validators');
56+
57+
const { hasIntl } = internalBinding('config');
5658
const binding = internalBinding('encoding_binding');
5759
const {
5860
encodeInto,
@@ -406,166 +408,110 @@ function parseInput(input) {
406408
}
407409
}
408410

409-
const TextDecoder =
410-
internalBinding('config').hasIntl ?
411-
makeTextDecoderICU() :
412-
makeTextDecoderJS();
413-
414-
function makeTextDecoderICU() {
415-
const {
416-
decode: _decode,
417-
getConverter,
418-
} = internalBinding('icu');
419-
420-
class TextDecoder {
421-
constructor(encoding = 'utf-8', options = kEmptyObject) {
422-
encoding = `${encoding}`;
423-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
424-
425-
const enc = getEncodingFromLabel(encoding);
426-
if (enc === undefined)
427-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
428-
429-
let flags = 0;
430-
if (options !== null) {
431-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
432-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
433-
}
434-
435-
this[kDecoder] = true;
436-
this[kFlags] = flags;
437-
this[kEncoding] = enc;
438-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
439-
this[kFatal] = Boolean(options?.fatal);
440-
// Only support fast path for UTF-8.
441-
this[kUTF8FastPath] = enc === 'utf-8';
442-
this[kHandle] = undefined;
443-
this[kMethod] = undefined;
444-
445-
if (isSinglebyteEncoding(this.encoding)) {
446-
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
447-
} else if (!this[kUTF8FastPath]) {
448-
this.#prepareConverter();
449-
}
450-
}
451-
452-
#prepareConverter() {
453-
if (this[kHandle] !== undefined) return;
454-
let icuEncoding = this[kEncoding];
455-
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
456-
const handle = getConverter(icuEncoding, this[kFlags]);
457-
if (handle === undefined)
458-
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
459-
this[kHandle] = handle;
460-
}
461-
462-
decode(input = empty, options = kEmptyObject) {
463-
validateDecoder(this);
464-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
411+
let icuDecode, icuGetConverter;
412+
if (hasIntl) {
413+
;({
414+
decode: icuDecode,
415+
getConverter: icuGetConverter,
416+
} = internalBinding('icu'));
417+
}
465418

466-
if (this[kMethod]) return this[kMethod](parseInput(input));
419+
const kBOMSeen = Symbol('BOM seen');
467420

468-
this[kUTF8FastPath] &&= !(options?.stream);
421+
let StringDecoder;
422+
function lazyStringDecoder() {
423+
if (StringDecoder === undefined)
424+
({ StringDecoder } = require('string_decoder'));
425+
return StringDecoder;
426+
}
469427

470-
if (this[kUTF8FastPath]) {
471-
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
472-
}
428+
class TextDecoder {
429+
constructor(encoding = 'utf-8', options = kEmptyObject) {
430+
encoding = `${encoding}`;
431+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
473432

474-
this.#prepareConverter();
433+
const enc = getEncodingFromLabel(encoding);
434+
if (enc === undefined)
435+
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
475436

476-
let flags = 0;
477-
if (options !== null)
478-
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
437+
let flags = 0;
438+
if (options !== null) {
439+
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
440+
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
441+
}
479442

480-
return _decode(this[kHandle], input, flags, this.encoding);
443+
this[kDecoder] = true;
444+
this[kFlags] = flags;
445+
this[kEncoding] = enc;
446+
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
447+
this[kFatal] = Boolean(options?.fatal);
448+
this[kUTF8FastPath] = false;
449+
this[kHandle] = undefined;
450+
this[kSingleByte] = undefined; // Does not care about streaming or BOM
451+
452+
if (enc === 'utf-8') {
453+
this[kUTF8FastPath] = true;
454+
} else if (isSinglebyteEncoding(enc)) {
455+
this[kSingleByte] = createSinglebyteDecoder(enc, this[kFatal]);
456+
} else {
457+
this.#prepareConverter(); // Need to throw early if we don't support the encoding
481458
}
482459
}
483460

484-
return TextDecoder;
485-
}
486-
487-
function makeTextDecoderJS() {
488-
let StringDecoder;
489-
function lazyStringDecoder() {
490-
if (StringDecoder === undefined)
491-
({ StringDecoder } = require('string_decoder'));
492-
return StringDecoder;
461+
#prepareConverter() {
462+
if (this[kHandle] !== undefined) return;
463+
if (hasIntl) {
464+
let icuEncoding = this[kEncoding];
465+
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
466+
const handle = icuGetConverter(icuEncoding, this[kFlags]);
467+
if (handle === undefined)
468+
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
469+
this[kHandle] = handle;
470+
} else if (this[kEncoding] === 'utf-8' || this[kEncoding] === 'utf-16le') {
471+
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
472+
this[kHandle] = new (lazyStringDecoder())(this[kEncoding]);
473+
this[kBOMSeen] = false;
474+
} else {
475+
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
476+
}
493477
}
494478

495-
const kBOMSeen = Symbol('BOM seen');
479+
decode(input = empty, options = kEmptyObject) {
480+
validateDecoder(this);
481+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
496482

497-
function hasConverter(encoding) {
498-
return encoding === 'utf-8' || encoding === 'utf-16le';
499-
}
483+
if (this[kSingleByte]) return this[kSingleByte](parseInput(input));
500484

501-
class TextDecoder {
502-
constructor(encoding = 'utf-8', options = kEmptyObject) {
503-
encoding = `${encoding}`;
504-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
485+
const stream = options?.stream;
486+
if (this[kUTF8FastPath]) {
487+
if (!stream) return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
488+
this[kUTF8FastPath] = false;
489+
}
505490

506-
const enc = getEncodingFromLabel(encoding);
507-
if (enc === undefined)
508-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
491+
this.#prepareConverter();
509492

510-
let flags = 0;
511-
if (options !== null) {
512-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
513-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
514-
}
515-
516-
this[kDecoder] = true;
517-
this[kFlags] = flags;
518-
this[kEncoding] = enc;
519-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
520-
this[kFatal] = Boolean(options?.fatal);
521-
this[kBOMSeen] = false;
522-
this[kMethod] = undefined;
523-
524-
if (isSinglebyteEncoding(enc)) {
525-
this[kMethod] = createSinglebyteDecoder(enc, this[kFatal]);
526-
} else {
527-
if (!hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
528-
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
529-
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
530-
this[kHandle] = new (lazyStringDecoder())(enc);
531-
}
493+
if (hasIntl) {
494+
const flags = stream ? 0 : CONVERTER_FLAGS_FLUSH;
495+
return icuDecode(this[kHandle], input, flags, this[kEncoding]);
532496
}
533497

534-
decode(input = empty, options = kEmptyObject) {
535-
validateDecoder(this);
536-
input = parseInput(input);
537-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
498+
input = parseInput(input);
538499

539-
if (this[kMethod]) return this[kMethod](input);
540-
541-
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
542-
this[kBOMSeen] = false;
543-
}
500+
let result = stream ? this[kHandle].write(input) : this[kHandle].end(input);
544501

545-
if (options !== null && options.stream) {
546-
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
547-
} else {
548-
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
502+
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
503+
// If the very first result in the stream is a BOM, and we are not
504+
// explicitly told to ignore it, then we discard it.
505+
if (result[0] === '\ufeff') {
506+
result = StringPrototypeSlice(result, 1);
549507
}
508+
this[kBOMSeen] = true;
509+
}
550510

551-
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
552-
this[kHandle].end(input) :
553-
this[kHandle].write(input);
554-
555-
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
556-
// If the very first result in the stream is a BOM, and we are not
557-
// explicitly told to ignore it, then we discard it.
558-
if (result[0] === '\ufeff') {
559-
result = StringPrototypeSlice(result, 1);
560-
}
561-
this[kBOMSeen] = true;
562-
}
511+
if (!stream) this[kBOMSeen] = false;
563512

564-
return result;
565-
}
513+
return result;
566514
}
567-
568-
return TextDecoder;
569515
}
570516

571517
// Mix in some shared properties.
Collapse file

‎test/parallel/test-whatwg-encoding-custom-textdecoder.js‎

Copy file name to clipboardExpand all lines: test/parallel/test-whatwg-encoding-custom-textdecoder.js
+27-24Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -67,32 +67,34 @@ assert(TextDecoder);
6767
}
6868

6969
// Test TextDecoder, UTF-8, fatal: true, ignoreBOM: false
70-
if (common.hasIntl) {
71-
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
72-
const dec = new TextDecoder(i, { fatal: true });
73-
assert.throws(() => dec.decode(buf.slice(0, 8)),
74-
{
75-
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
76-
name: 'TypeError',
77-
message: 'The encoded data was not valid ' +
78-
'for encoding utf-8'
79-
});
80-
});
81-
82-
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
83-
const dec = new TextDecoder(i, { fatal: true });
70+
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
71+
const dec = new TextDecoder(i, { fatal: true });
72+
assert.throws(() => dec.decode(buf.slice(0, 8)),
73+
{
74+
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
75+
name: 'TypeError',
76+
message: 'The encoded data was not valid ' +
77+
'for encoding utf-8'
78+
});
79+
});
80+
81+
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
82+
const dec = new TextDecoder(i, { fatal: true });
83+
if (common.hasIntl) {
8484
dec.decode(buf.slice(0, 8), { stream: true });
8585
dec.decode(buf.slice(8));
86-
});
87-
} else {
88-
assert.throws(
89-
() => new TextDecoder('utf-8', { fatal: true }),
90-
{
91-
code: 'ERR_NO_ICU',
92-
name: 'TypeError',
93-
message: '"fatal" option is not supported on Node.js compiled without ICU'
94-
});
95-
}
86+
} else {
87+
assert.throws(
88+
() => {
89+
dec.decode(buf.slice(0, 8), { stream: true });
90+
},
91+
{
92+
code: 'ERR_NO_ICU',
93+
name: 'TypeError',
94+
message: '"fatal" option is not supported on Node.js compiled without ICU'
95+
});
96+
}
97+
});
9698

9799
// Test TextDecoder, label undefined, options null
98100
{
@@ -132,6 +134,7 @@ if (common.hasIntl) {
132134
'}'
133135
);
134136
} else {
137+
dec.decode(Uint8Array.of(0), { stream: true });
135138
assert.strictEqual(
136139
util.inspect(dec, { showHidden: true }),
137140
'TextDecoder {\n' +

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.