Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 374eb41

Browse filesBrowse files
mertcanaltinruyadorno
authored andcommitted
util: add fast path for Latin1 decoding
PR-URL: #55275 Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Daniel Lemire <daniel@lemire.me>
1 parent 34c6882 commit 374eb41
Copy full SHA for 374eb41

File tree

Expand file treeCollapse file tree

5 files changed

+212
-2
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

5 files changed

+212
-2
lines changed
Open diff view settings
Collapse file

‎benchmark/util/text-decoder.js‎

Copy file name to clipboardExpand all lines: benchmark/util/text-decoder.js
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
const common = require('../common.js');
44

55
const bench = common.createBenchmark(main, {
6-
encoding: ['utf-8', 'latin1', 'iso-8859-3'],
6+
encoding: ['utf-8', 'windows-1252', 'iso-8859-3'],
77
ignoreBOM: [0, 1],
88
fatal: [0, 1],
99
len: [256, 1024 * 16, 1024 * 128],
Collapse file

‎lib/internal/encoding.js‎

Copy file name to clipboardExpand all lines: lib/internal/encoding.js
+9-1Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const kDecoder = Symbol('decoder');
2929
const kEncoder = Symbol('encoder');
3030
const kFatal = Symbol('kFatal');
3131
const kUTF8FastPath = Symbol('kUTF8FastPath');
32+
const kLatin1FastPath = Symbol('kLatin1FastPath');
3233
const kIgnoreBOM = Symbol('kIgnoreBOM');
3334

3435
const {
@@ -55,6 +56,7 @@ const {
5556
encodeIntoResults,
5657
encodeUtf8String,
5758
decodeUTF8,
59+
decodeLatin1,
5860
} = binding;
5961

6062
const { Buffer } = require('buffer');
@@ -419,9 +421,10 @@ function makeTextDecoderICU() {
419421
this[kFatal] = Boolean(options?.fatal);
420422
// Only support fast path for UTF-8.
421423
this[kUTF8FastPath] = enc === 'utf-8';
424+
this[kLatin1FastPath] = enc === 'windows-1252';
422425
this[kHandle] = undefined;
423426

424-
if (!this[kUTF8FastPath]) {
427+
if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) {
425428
this.#prepareConverter();
426429
}
427430
}
@@ -438,11 +441,16 @@ function makeTextDecoderICU() {
438441
validateDecoder(this);
439442

440443
this[kUTF8FastPath] &&= !(options?.stream);
444+
this[kLatin1FastPath] &&= !(options?.stream);
441445

442446
if (this[kUTF8FastPath]) {
443447
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
444448
}
445449

450+
if (this[kLatin1FastPath]) {
451+
return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]);
452+
}
453+
446454
this.#prepareConverter();
447455

448456
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
Collapse file

‎src/encoding_binding.cc‎

Copy file name to clipboardExpand all lines: src/encoding_binding.cc
+46Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "encoding_binding.h"
22
#include "ada.h"
33
#include "env-inl.h"
4+
#include "node_buffer.h"
45
#include "node_errors.h"
56
#include "node_external_reference.h"
67
#include "simdutf.h"
@@ -226,6 +227,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
226227
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
227228
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
228229
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
230+
SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1);
229231
}
230232

231233
void BindingData::CreatePerContextProperties(Local<Object> target,
@@ -243,6 +245,50 @@ void BindingData::RegisterTimerExternalReferences(
243245
registry->Register(DecodeUTF8);
244246
registry->Register(ToASCII);
245247
registry->Register(ToUnicode);
248+
registry->Register(DecodeLatin1);
249+
}
250+
251+
void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
252+
Environment* env = Environment::GetCurrent(args);
253+
254+
CHECK_GE(args.Length(), 1);
255+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
256+
args[0]->IsArrayBufferView())) {
257+
return node::THROW_ERR_INVALID_ARG_TYPE(
258+
env->isolate(),
259+
"The \"input\" argument must be an instance of ArrayBuffer, "
260+
"SharedArrayBuffer, or ArrayBufferView.");
261+
}
262+
263+
bool ignore_bom = args[1]->IsTrue();
264+
bool has_fatal = args[2]->IsTrue();
265+
266+
ArrayBufferViewContents<uint8_t> buffer(args[0]);
267+
const uint8_t* data = buffer.data();
268+
size_t length = buffer.length();
269+
270+
if (ignore_bom && length > 0 && data[0] == 0xFF) {
271+
data++;
272+
length--;
273+
}
274+
275+
if (length == 0) {
276+
return args.GetReturnValue().SetEmptyString();
277+
}
278+
279+
std::string result(length * 2, '\0');
280+
281+
size_t written = simdutf::convert_latin1_to_utf8(
282+
reinterpret_cast<const char*>(data), length, result.data());
283+
284+
if (has_fatal && written == 0) {
285+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
286+
env->isolate(), "The encoded data was not valid for encoding latin1");
287+
}
288+
289+
Local<Object> buffer_result =
290+
node::Buffer::Copy(env, result.c_str(), written).ToLocalChecked();
291+
args.GetReturnValue().Set(buffer_result);
246292
}
247293

248294
} // namespace encoding_binding
Collapse file

‎src/encoding_binding.h‎

Copy file name to clipboardExpand all lines: src/encoding_binding.h
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
3131
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
3232
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
3333
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
34+
static void DecodeLatin1(const v8::FunctionCallbackInfo<v8::Value>& args);
3435

3536
static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
3637
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);
Collapse file
+155Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
#include "encoding_binding.h"
2+
#include "env-inl.h"
3+
#include "gtest/gtest.h"
4+
#include "node_test_fixture.h"
5+
#include "v8.h"
6+
7+
namespace node {
8+
namespace encoding_binding {
9+
10+
bool RunDecodeLatin1(Environment* env,
11+
Local<Value> args[],
12+
bool ignore_bom,
13+
bool has_fatal,
14+
Local<Value>* result) {
15+
Isolate* isolate = env->isolate();
16+
TryCatch try_catch(isolate);
17+
18+
Local<Boolean> ignoreBOMValue = Boolean::New(isolate, ignore_bom);
19+
Local<Boolean> fatalValue = Boolean::New(isolate, has_fatal);
20+
21+
Local<Value> updatedArgs[] = {args[0], ignoreBOMValue, fatalValue};
22+
23+
BindingData::DecodeLatin1(FunctionCallbackInfo<Value>(updatedArgs));
24+
25+
if (try_catch.HasCaught()) {
26+
return false;
27+
}
28+
29+
*result = try_catch.Exception();
30+
return true;
31+
}
32+
33+
class EncodingBindingTest : public NodeTestFixture {};
34+
35+
TEST_F(EncodingBindingTest, DecodeLatin1_ValidInput) {
36+
Environment* env = CreateEnvironment();
37+
Isolate* isolate = env->isolate();
38+
HandleScope handle_scope(isolate);
39+
40+
const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3};
41+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
42+
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
43+
44+
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
45+
Local<Value> args[] = {array};
46+
47+
Local<Value> result;
48+
EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result));
49+
50+
String::Utf8Value utf8_result(isolate, result);
51+
EXPECT_STREQ(*utf8_result, "Áéó");
52+
}
53+
54+
TEST_F(EncodingBindingTest, DecodeLatin1_EmptyInput) {
55+
Environment* env = CreateEnvironment();
56+
Isolate* isolate = env->isolate();
57+
HandleScope handle_scope(isolate);
58+
59+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, 0);
60+
Local<Uint8Array> array = Uint8Array::New(ab, 0, 0);
61+
Local<Value> args[] = {array};
62+
63+
Local<Value> result;
64+
EXPECT_TRUE(RunDecodeLatin1(env, args, false, false, &result));
65+
66+
String::Utf8Value utf8_result(isolate, result);
67+
EXPECT_STREQ(*utf8_result, "");
68+
}
69+
70+
TEST_F(EncodingBindingTest, DecodeLatin1_InvalidInput) {
71+
Environment* env = CreateEnvironment();
72+
Isolate* isolate = env->isolate();
73+
HandleScope handle_scope(isolate);
74+
75+
Local<Value> args[] = {String::NewFromUtf8Literal(isolate, "Invalid input")};
76+
77+
Local<Value> result;
78+
EXPECT_FALSE(RunDecodeLatin1(env, args, false, false, &result));
79+
}
80+
81+
TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOM) {
82+
Environment* env = CreateEnvironment();
83+
Isolate* isolate = env->isolate();
84+
HandleScope handle_scope(isolate);
85+
86+
const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3};
87+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
88+
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
89+
90+
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
91+
Local<Value> args[] = {array};
92+
93+
Local<Value> result;
94+
EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result));
95+
96+
String::Utf8Value utf8_result(isolate, result);
97+
EXPECT_STREQ(*utf8_result, "Áéó");
98+
}
99+
100+
TEST_F(EncodingBindingTest, DecodeLatin1_FatalInvalidInput) {
101+
Environment* env = CreateEnvironment();
102+
Isolate* isolate = env->isolate();
103+
HandleScope handle_scope(isolate);
104+
105+
const uint8_t invalid_data[] = {0xFF, 0xFF, 0xFF};
106+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(invalid_data));
107+
memcpy(ab->GetBackingStore()->Data(), invalid_data, sizeof(invalid_data));
108+
109+
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(invalid_data));
110+
Local<Value> args[] = {array};
111+
112+
Local<Value> result;
113+
EXPECT_FALSE(RunDecodeLatin1(env, args, false, true, &result));
114+
}
115+
116+
TEST_F(EncodingBindingTest, DecodeLatin1_IgnoreBOMAndFatal) {
117+
Environment* env = CreateEnvironment();
118+
Isolate* isolate = env->isolate();
119+
HandleScope handle_scope(isolate);
120+
121+
const uint8_t latin1_data[] = {0xFE, 0xFF, 0xC1, 0xE9, 0xF3};
122+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
123+
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
124+
125+
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
126+
Local<Value> args[] = {array};
127+
128+
Local<Value> result;
129+
EXPECT_TRUE(RunDecodeLatin1(env, args, true, true, &result));
130+
131+
String::Utf8Value utf8_result(isolate, result);
132+
EXPECT_STREQ(*utf8_result, "Áéó");
133+
}
134+
135+
TEST_F(EncodingBindingTest, DecodeLatin1_BOMPresent) {
136+
Environment* env = CreateEnvironment();
137+
Isolate* isolate = env->isolate();
138+
HandleScope handle_scope(isolate);
139+
140+
const uint8_t latin1_data[] = {0xFF, 0xC1, 0xE9, 0xF3};
141+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
142+
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
143+
144+
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
145+
Local<Value> args[] = {array};
146+
147+
Local<Value> result;
148+
EXPECT_TRUE(RunDecodeLatin1(env, args, true, false, &result));
149+
150+
String::Utf8Value utf8_result(isolate, result);
151+
EXPECT_STREQ(*utf8_result, "Áéó");
152+
}
153+
154+
} // namespace encoding_binding
155+
} // namespace node

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.