Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit fab72b1

Browse filesBrowse files
addaleaxjuanarbol
authored andcommitted
src: use simdutf for converting externalized builtins to UTF-16
Remove the dependency on ICU for this part, as well as the hacky way of converting embedder main sources to UTF-8 via V8 APIs. Allow `UnionBytes` to own the memory its pointing to in order to simplify the code on the `BuiltinLoader` side. PR-URL: #46119 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: Darshan Sen <raisinten@gmail.com>
1 parent f3dc432 commit fab72b1
Copy full SHA for fab72b1

File tree

Expand file treeCollapse file tree

10 files changed

+106
-105
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

10 files changed

+106
-105
lines changed
Open diff view settings
Collapse file

‎configure.py‎

Copy file name to clipboardExpand all lines: configure.py
+1-5Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2077,11 +2077,7 @@ def make_bin_override():
20772077
for builtin in shareable_builtins:
20782078
builtin_id = 'node_shared_builtin_' + builtin.replace('/', '_') + '_path'
20792079
if getattr(options, builtin_id):
2080-
if options.with_intl == 'none':
2081-
option_name = '--shared-builtin-' + builtin + '-path'
2082-
error(option_name + ' is incompatible with --with-intl=none' )
2083-
else:
2084-
output['defines'] += [builtin_id.upper() + '=' + getattr(options, builtin_id)]
2080+
output['defines'] += [builtin_id.upper() + '=' + getattr(options, builtin_id)]
20852081
else:
20862082
output['variables']['node_builtin_shareable_builtins'] += [shareable_builtins[builtin]]
20872083

Collapse file

‎node.gyp‎

Copy file name to clipboardExpand all lines: node.gyp
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,6 +1211,7 @@
12111211
'node_dtrace_header',
12121212
'node_dtrace_ustack',
12131213
'node_dtrace_provider',
1214+
'deps/simdutf/simdutf.gyp:simdutf',
12141215
],
12151216

12161217
'includes': [
Collapse file

‎src/api/environment.cc‎

Copy file name to clipboardExpand all lines: src/api/environment.cc
+2-13Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -464,21 +464,10 @@ MaybeLocal<Value> LoadEnvironment(
464464
Environment* env,
465465
const char* main_script_source_utf8) {
466466
CHECK_NOT_NULL(main_script_source_utf8);
467-
Isolate* isolate = env->isolate();
468467
return LoadEnvironment(
469-
env,
470-
[&](const StartExecutionCallbackInfo& info) -> MaybeLocal<Value> {
471-
// This is a slightly hacky way to convert UTF-8 to UTF-16.
472-
Local<String> str =
473-
String::NewFromUtf8(isolate,
474-
main_script_source_utf8).ToLocalChecked();
475-
auto main_utf16 = std::make_unique<String::Value>(isolate, str);
476-
477-
// TODO(addaleax): Avoid having a global table for all scripts.
468+
env, [&](const StartExecutionCallbackInfo& info) -> MaybeLocal<Value> {
478469
std::string name = "embedder_main_" + std::to_string(env->thread_id());
479-
builtins::BuiltinLoader::Add(
480-
name.c_str(), UnionBytes(**main_utf16, main_utf16->length()));
481-
env->set_main_utf16(std::move(main_utf16));
470+
builtins::BuiltinLoader::Add(name.c_str(), main_script_source_utf8);
482471
Realm* realm = env->principal_realm();
483472

484473
// Arguments must match the parameters specified in
Collapse file

‎src/env-inl.h‎

Copy file name to clipboardExpand all lines: src/env-inl.h
-5Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -799,11 +799,6 @@ void Environment::RemoveCleanupHook(CleanupQueue::Callback fn, void* arg) {
799799
cleanup_queue_.Remove(fn, arg);
800800
}
801801

802-
void Environment::set_main_utf16(std::unique_ptr<v8::String::Value> str) {
803-
CHECK(!main_utf16_);
804-
main_utf16_ = std::move(str);
805-
}
806-
807802
void Environment::set_process_exit_handler(
808803
std::function<void(Environment*, int)>&& handler) {
809804
process_exit_handler_ = std::move(handler);
Collapse file

‎src/env.h‎

Copy file name to clipboardExpand all lines: src/env.h
-6Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,6 @@ class Environment : public MemoryRetainer {
950950

951951
#endif // HAVE_INSPECTOR
952952

953-
inline void set_main_utf16(std::unique_ptr<v8::String::Value>);
954953
inline void set_process_exit_handler(
955954
std::function<void(Environment*, int)>&& handler);
956955

@@ -1122,11 +1121,6 @@ class Environment : public MemoryRetainer {
11221121

11231122
std::unique_ptr<Realm> principal_realm_ = nullptr;
11241123

1125-
// Keeps the main script source alive is one was passed to LoadEnvironment().
1126-
// We should probably find a way to just use plain `v8::String`s created from
1127-
// the source passed to LoadEnvironment() directly instead.
1128-
std::unique_ptr<v8::String::Value> main_utf16_;
1129-
11301124
// Used by allocate_managed_buffer() and release_managed_buffer() to keep
11311125
// track of the BackingStore for a given pointer.
11321126
std::unordered_map<char*, std::unique_ptr<v8::BackingStore>>
Collapse file

‎src/node_builtins.cc‎

Copy file name to clipboardExpand all lines: src/node_builtins.cc
+14-12Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "env-inl.h"
44
#include "node_external_reference.h"
55
#include "node_internals.h"
6+
#include "simdutf.h"
67
#include "util-inl.h"
78

89
namespace node {
@@ -32,7 +33,6 @@ BuiltinLoader BuiltinLoader::instance_;
3233

3334
BuiltinLoader::BuiltinLoader() : config_(GetConfig()), has_code_cache_(false) {
3435
LoadJavaScriptSource();
35-
#if defined(NODE_HAVE_I18N_SUPPORT)
3636
#ifdef NODE_SHARED_BUILTIN_CJS_MODULE_LEXER_LEXER_PATH
3737
AddExternalizedBuiltin(
3838
"internal/deps/cjs-module-lexer/lexer",
@@ -49,7 +49,6 @@ BuiltinLoader::BuiltinLoader() : config_(GetConfig()), has_code_cache_(false) {
4949
AddExternalizedBuiltin("internal/deps/undici/undici",
5050
STRINGIFY(NODE_SHARED_BUILTIN_UNDICI_UNDICI_PATH));
5151
#endif // NODE_SHARED_BUILTIN_UNDICI_UNDICI_PATH
52-
#endif // NODE_HAVE_I18N_SUPPORT
5352
}
5453

5554
BuiltinLoader* BuiltinLoader::GetInstance() {
@@ -237,7 +236,6 @@ MaybeLocal<String> BuiltinLoader::LoadBuiltinSource(Isolate* isolate,
237236
#endif // NODE_BUILTIN_MODULES_PATH
238237
}
239238

240-
#if defined(NODE_HAVE_I18N_SUPPORT)
241239
void BuiltinLoader::AddExternalizedBuiltin(const char* id,
242240
const char* filename) {
243241
std::string source;
@@ -249,16 +247,20 @@ void BuiltinLoader::AddExternalizedBuiltin(const char* id,
249247
return;
250248
}
251249

252-
icu::UnicodeString utf16 = icu::UnicodeString::fromUTF8(
253-
icu::StringPiece(source.data(), source.length()));
254-
auto source_utf16 = std::make_unique<icu::UnicodeString>(utf16);
255-
Add(id,
256-
UnionBytes(reinterpret_cast<const uint16_t*>((*source_utf16).getBuffer()),
257-
utf16.length()));
258-
// keep source bytes for builtin alive while BuiltinLoader exists
259-
GetInstance()->externalized_source_bytes_.push_back(std::move(source_utf16));
250+
Add(id, source);
251+
}
252+
253+
bool BuiltinLoader::Add(const char* id, std::string_view utf8source) {
254+
size_t expected_u16_length =
255+
simdutf::utf16_length_from_utf8(utf8source.data(), utf8source.length());
256+
auto out = std::make_shared<std::vector<uint16_t>>(expected_u16_length);
257+
size_t u16_length = simdutf::convert_utf8_to_utf16le(
258+
utf8source.data(),
259+
utf8source.length(),
260+
reinterpret_cast<char16_t*>(out->data()));
261+
out->resize(u16_length);
262+
return Add(id, UnionBytes(out));
260263
}
261-
#endif // NODE_HAVE_I18N_SUPPORT
262264

263265
// Returns Local<Function> of the compiled module if return_code_cache
264266
// is false (we are only compiling the function).
Collapse file

‎src/node_builtins.h‎

Copy file name to clipboardExpand all lines: src/node_builtins.h
+1-8Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@
88
#include <memory>
99
#include <set>
1010
#include <string>
11-
#if defined(NODE_HAVE_I18N_SUPPORT)
12-
#include <unicode/unistr.h>
13-
#endif // NODE_HAVE_I18N_SUPPORT
1411
#include <vector>
1512
#include "node_mutex.h"
1613
#include "node_union_bytes.h"
@@ -59,6 +56,7 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
5956
static v8::Local<v8::String> GetConfigString(v8::Isolate* isolate);
6057
static bool Exists(const char* id);
6158
static bool Add(const char* id, const UnionBytes& source);
59+
static bool Add(const char* id, std::string_view utf8source);
6260

6361
static bool CompileAllBuiltins(v8::Local<v8::Context> context);
6462
static void RefreshCodeCache(const std::vector<CodeCacheInfo>& in);
@@ -124,18 +122,13 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
124122
static void HasCachedBuiltins(
125123
const v8::FunctionCallbackInfo<v8::Value>& args);
126124

127-
#if defined(NODE_HAVE_I18N_SUPPORT)
128125
static void AddExternalizedBuiltin(const char* id, const char* filename);
129-
#endif // NODE_HAVE_I18N_SUPPORT
130126

131127
static BuiltinLoader instance_;
132128
BuiltinCategories builtin_categories_;
133129
BuiltinSourceMap source_;
134130
BuiltinCodeCacheMap code_cache_;
135131
UnionBytes config_;
136-
#if defined(NODE_HAVE_I18N_SUPPORT)
137-
std::list<std::unique_ptr<icu::UnicodeString>> externalized_source_bytes_;
138-
#endif // NODE_HAVE_I18N_SUPPORT
139132

140133
// Used to synchronize access to the code cache map
141134
Mutex code_cache_mutex_;
Collapse file

‎src/node_union_bytes.h‎

Copy file name to clipboardExpand all lines: src/node_union_bytes.h
+9-55Lines changed: 9 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -11,57 +11,20 @@
1111

1212
namespace node {
1313

14-
class NonOwningExternalOneByteResource
15-
: public v8::String::ExternalOneByteStringResource {
16-
public:
17-
explicit NonOwningExternalOneByteResource(const uint8_t* data, size_t length)
18-
: data_(data), length_(length) {}
19-
~NonOwningExternalOneByteResource() override = default;
20-
21-
const char* data() const override {
22-
return reinterpret_cast<const char*>(data_);
23-
}
24-
size_t length() const override { return length_; }
25-
26-
NonOwningExternalOneByteResource(const NonOwningExternalOneByteResource&) =
27-
delete;
28-
NonOwningExternalOneByteResource& operator=(
29-
const NonOwningExternalOneByteResource&) = delete;
30-
31-
private:
32-
const uint8_t* data_;
33-
size_t length_;
34-
};
35-
36-
class NonOwningExternalTwoByteResource
37-
: public v8::String::ExternalStringResource {
38-
public:
39-
explicit NonOwningExternalTwoByteResource(const uint16_t* data, size_t length)
40-
: data_(data), length_(length) {}
41-
~NonOwningExternalTwoByteResource() override = default;
42-
43-
const uint16_t* data() const override { return data_; }
44-
size_t length() const override { return length_; }
45-
46-
NonOwningExternalTwoByteResource(const NonOwningExternalTwoByteResource&) =
47-
delete;
48-
NonOwningExternalTwoByteResource& operator=(
49-
const NonOwningExternalTwoByteResource&) = delete;
50-
51-
private:
52-
const uint16_t* data_;
53-
size_t length_;
54-
};
55-
5614
// Similar to a v8::String, but it's independent from Isolates
5715
// and can be materialized in Isolates as external Strings
58-
// via ToStringChecked. The data pointers are owned by the caller.
16+
// via ToStringChecked.
5917
class UnionBytes {
6018
public:
6119
UnionBytes(const uint16_t* data, size_t length)
6220
: one_bytes_(nullptr), two_bytes_(data), length_(length) {}
6321
UnionBytes(const uint8_t* data, size_t length)
6422
: one_bytes_(data), two_bytes_(nullptr), length_(length) {}
23+
template <typename T> // T = uint8_t or uint16_t
24+
explicit UnionBytes(std::shared_ptr<std::vector</*const*/ T>> data)
25+
: UnionBytes(data->data(), data->size()) {
26+
owning_ptr_ = data;
27+
}
6528

6629
UnionBytes(const UnionBytes&) = default;
6730
UnionBytes& operator=(const UnionBytes&) = default;
@@ -77,23 +40,14 @@ class UnionBytes {
7740
CHECK_NOT_NULL(one_bytes_);
7841
return one_bytes_;
7942
}
80-
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) const {
81-
if (is_one_byte()) {
82-
NonOwningExternalOneByteResource* source =
83-
new NonOwningExternalOneByteResource(one_bytes_data(), length_);
84-
return v8::String::NewExternalOneByte(isolate, source).ToLocalChecked();
85-
} else {
86-
NonOwningExternalTwoByteResource* source =
87-
new NonOwningExternalTwoByteResource(two_bytes_data(), length_);
88-
return v8::String::NewExternalTwoByte(isolate, source).ToLocalChecked();
89-
}
90-
}
91-
size_t length() { return length_; }
43+
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) const;
44+
size_t length() const { return length_; }
9245

9346
private:
9447
const uint8_t* one_bytes_;
9548
const uint16_t* two_bytes_;
9649
size_t length_;
50+
std::shared_ptr<void> owning_ptr_;
9751
};
9852

9953
} // namespace node
Collapse file

‎src/util.cc‎

Copy file name to clipboardExpand all lines: src/util.cc
+62Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,4 +475,66 @@ void SetConstructorFunction(Local<v8::Context> context,
475475
that->Set(context, name, tmpl->GetFunction(context).ToLocalChecked()).Check();
476476
}
477477

478+
namespace {
479+
480+
class NonOwningExternalOneByteResource
481+
: public v8::String::ExternalOneByteStringResource {
482+
public:
483+
explicit NonOwningExternalOneByteResource(const UnionBytes& source)
484+
: source_(source) {}
485+
~NonOwningExternalOneByteResource() override = default;
486+
487+
const char* data() const override {
488+
return reinterpret_cast<const char*>(source_.one_bytes_data());
489+
}
490+
size_t length() const override { return source_.length(); }
491+
492+
NonOwningExternalOneByteResource(const NonOwningExternalOneByteResource&) =
493+
delete;
494+
NonOwningExternalOneByteResource& operator=(
495+
const NonOwningExternalOneByteResource&) = delete;
496+
497+
private:
498+
const UnionBytes source_;
499+
};
500+
501+
class NonOwningExternalTwoByteResource
502+
: public v8::String::ExternalStringResource {
503+
public:
504+
explicit NonOwningExternalTwoByteResource(const UnionBytes& source)
505+
: source_(source) {}
506+
~NonOwningExternalTwoByteResource() override = default;
507+
508+
const uint16_t* data() const override { return source_.two_bytes_data(); }
509+
size_t length() const override { return source_.length(); }
510+
511+
NonOwningExternalTwoByteResource(const NonOwningExternalTwoByteResource&) =
512+
delete;
513+
NonOwningExternalTwoByteResource& operator=(
514+
const NonOwningExternalTwoByteResource&) = delete;
515+
516+
private:
517+
const UnionBytes source_;
518+
};
519+
520+
} // anonymous namespace
521+
522+
Local<String> UnionBytes::ToStringChecked(Isolate* isolate) const {
523+
if (UNLIKELY(length() == 0)) {
524+
// V8 requires non-null data pointers for empty external strings,
525+
// but we don't guarantee that. Solve this by not creating an
526+
// external string at all in that case.
527+
return String::Empty(isolate);
528+
}
529+
if (is_one_byte()) {
530+
NonOwningExternalOneByteResource* source =
531+
new NonOwningExternalOneByteResource(*this);
532+
return String::NewExternalOneByte(isolate, source).ToLocalChecked();
533+
} else {
534+
NonOwningExternalTwoByteResource* source =
535+
new NonOwningExternalTwoByteResource(*this);
536+
return String::NewExternalTwoByte(isolate, source).ToLocalChecked();
537+
}
538+
}
539+
478540
} // namespace node
Collapse file

‎test/cctest/test_util.cc‎

Copy file name to clipboardExpand all lines: test/cctest/test_util.cc
+16-1Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
#include "util-inl.h"
21
#include "debug_utils-inl.h"
32
#include "env-inl.h"
43
#include "gtest/gtest.h"
4+
#include "simdutf.h"
5+
#include "util-inl.h"
56

67
using node::Calloc;
78
using node::Malloc;
@@ -298,3 +299,17 @@ TEST(UtilTest, SPrintF) {
298299
const std::string with_zero = std::string("a") + '\0' + 'b';
299300
EXPECT_EQ(SPrintF("%s", with_zero), with_zero);
300301
}
302+
303+
TEST(UtilTest, SimdutfEndiannessDoesNotMeanEndianness) {
304+
// In simdutf, "LE" does *not* refer to Little Endian, it refers
305+
// to 16-byte code units that are stored using *host* endianness.
306+
// This is weird and confusing naming, and so we add this assertion
307+
// here to verify that this is actually the case (so that CI tells
308+
// us if it changed, because for most people Little Endian is
309+
// host endianness, so locally everything would work fine).
310+
const char utf8source[] = "\xe7\x8c\xab";
311+
char16_t u16output;
312+
size_t u16len = simdutf::convert_utf8_to_utf16le(utf8source, 3, &u16output);
313+
EXPECT_EQ(u16len, 1u);
314+
EXPECT_EQ(u16output, 0x732B);
315+
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.