Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 28af831

Browse filesBrowse files
addaleaxRafaelGSS
authored andcommitted
src: use simdutf for converting externalized builtins to UTF-16
Remove the dependency on ICU for this part, as well as the hacky way of converting embedder main sources to UTF-8 via V8 APIs. Allow `UnionBytes` to own the memory its pointing to in order to simplify the code on the `BuiltinLoader` side. PR-URL: #46119 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: Darshan Sen <raisinten@gmail.com>
1 parent ac90e41 commit 28af831
Copy full SHA for 28af831

File tree

Expand file treeCollapse file tree

10 files changed

+106
-105
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

10 files changed

+106
-105
lines changed
Open diff view settings
Collapse file

‎configure.py‎

Copy file name to clipboardExpand all lines: configure.py
+1-5Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2024,11 +2024,7 @@ def make_bin_override():
20242024
for builtin in shareable_builtins:
20252025
builtin_id = 'node_shared_builtin_' + builtin.replace('/', '_') + '_path'
20262026
if getattr(options, builtin_id):
2027-
if options.with_intl == 'none':
2028-
option_name = '--shared-builtin-' + builtin + '-path'
2029-
error(option_name + ' is incompatible with --with-intl=none' )
2030-
else:
2031-
output['defines'] += [builtin_id.upper() + '=' + getattr(options, builtin_id)]
2027+
output['defines'] += [builtin_id.upper() + '=' + getattr(options, builtin_id)]
20322028
else:
20332029
output['variables']['node_builtin_shareable_builtins'] += [shareable_builtins[builtin]]
20342030

Collapse file

‎node.gyp‎

Copy file name to clipboardExpand all lines: node.gyp
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -973,6 +973,7 @@
973973
'deps/googletest/googletest.gyp:gtest_main',
974974
'deps/histogram/histogram.gyp:histogram',
975975
'deps/uvwasi/uvwasi.gyp:uvwasi',
976+
'deps/simdutf/simdutf.gyp:simdutf',
976977
],
977978

978979
'includes': [
Collapse file

‎src/api/environment.cc‎

Copy file name to clipboardExpand all lines: src/api/environment.cc
+2-13Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -467,21 +467,10 @@ MaybeLocal<Value> LoadEnvironment(
467467
Environment* env,
468468
const char* main_script_source_utf8) {
469469
CHECK_NOT_NULL(main_script_source_utf8);
470-
Isolate* isolate = env->isolate();
471470
return LoadEnvironment(
472-
env,
473-
[&](const StartExecutionCallbackInfo& info) -> MaybeLocal<Value> {
474-
// This is a slightly hacky way to convert UTF-8 to UTF-16.
475-
Local<String> str =
476-
String::NewFromUtf8(isolate,
477-
main_script_source_utf8).ToLocalChecked();
478-
auto main_utf16 = std::make_unique<String::Value>(isolate, str);
479-
480-
// TODO(addaleax): Avoid having a global table for all scripts.
471+
env, [&](const StartExecutionCallbackInfo& info) -> MaybeLocal<Value> {
481472
std::string name = "embedder_main_" + std::to_string(env->thread_id());
482-
builtins::BuiltinLoader::Add(
483-
name.c_str(), UnionBytes(**main_utf16, main_utf16->length()));
484-
env->set_main_utf16(std::move(main_utf16));
473+
builtins::BuiltinLoader::Add(name.c_str(), main_script_source_utf8);
485474
Realm* realm = env->principal_realm();
486475

487476
return realm->ExecuteBootstrapper(name.c_str());
Collapse file

‎src/env-inl.h‎

Copy file name to clipboardExpand all lines: src/env-inl.h
-5Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -799,11 +799,6 @@ void Environment::RemoveCleanupHook(CleanupQueue::Callback fn, void* arg) {
799799
cleanup_queue_.Remove(fn, arg);
800800
}
801801

802-
void Environment::set_main_utf16(std::unique_ptr<v8::String::Value> str) {
803-
CHECK(!main_utf16_);
804-
main_utf16_ = std::move(str);
805-
}
806-
807802
void Environment::set_process_exit_handler(
808803
std::function<void(Environment*, ExitCode)>&& handler) {
809804
process_exit_handler_ = std::move(handler);
Collapse file

‎src/env.h‎

Copy file name to clipboardExpand all lines: src/env.h
-6Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -959,7 +959,6 @@ class Environment : public MemoryRetainer {
959959

960960
#endif // HAVE_INSPECTOR
961961

962-
inline void set_main_utf16(std::unique_ptr<v8::String::Value>);
963962
inline void set_process_exit_handler(
964963
std::function<void(Environment*, ExitCode)>&& handler);
965964

@@ -1134,11 +1133,6 @@ class Environment : public MemoryRetainer {
11341133

11351134
std::unique_ptr<Realm> principal_realm_ = nullptr;
11361135

1137-
// Keeps the main script source alive is one was passed to LoadEnvironment().
1138-
// We should probably find a way to just use plain `v8::String`s created from
1139-
// the source passed to LoadEnvironment() directly instead.
1140-
std::unique_ptr<v8::String::Value> main_utf16_;
1141-
11421136
// Used by allocate_managed_buffer() and release_managed_buffer() to keep
11431137
// track of the BackingStore for a given pointer.
11441138
std::unordered_map<char*, std::unique_ptr<v8::BackingStore>>
Collapse file

‎src/node_builtins.cc‎

Copy file name to clipboardExpand all lines: src/node_builtins.cc
+14-12Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "env-inl.h"
44
#include "node_external_reference.h"
55
#include "node_internals.h"
6+
#include "simdutf.h"
67
#include "util-inl.h"
78

89
namespace node {
@@ -35,7 +36,6 @@ BuiltinLoader BuiltinLoader::instance_;
3536

3637
BuiltinLoader::BuiltinLoader() : config_(GetConfig()), has_code_cache_(false) {
3738
LoadJavaScriptSource();
38-
#if defined(NODE_HAVE_I18N_SUPPORT)
3939
#ifdef NODE_SHARED_BUILTIN_CJS_MODULE_LEXER_LEXER_PATH
4040
AddExternalizedBuiltin(
4141
"internal/deps/cjs-module-lexer/lexer",
@@ -52,7 +52,6 @@ BuiltinLoader::BuiltinLoader() : config_(GetConfig()), has_code_cache_(false) {
5252
AddExternalizedBuiltin("internal/deps/undici/undici",
5353
STRINGIFY(NODE_SHARED_BUILTIN_UNDICI_UNDICI_PATH));
5454
#endif // NODE_SHARED_BUILTIN_UNDICI_UNDICI_PATH
55-
#endif // NODE_HAVE_I18N_SUPPORT
5655
}
5756

5857
BuiltinLoader* BuiltinLoader::GetInstance() {
@@ -240,7 +239,6 @@ MaybeLocal<String> BuiltinLoader::LoadBuiltinSource(Isolate* isolate,
240239
#endif // NODE_BUILTIN_MODULES_PATH
241240
}
242241

243-
#if defined(NODE_HAVE_I18N_SUPPORT)
244242
void BuiltinLoader::AddExternalizedBuiltin(const char* id,
245243
const char* filename) {
246244
std::string source;
@@ -252,16 +250,20 @@ void BuiltinLoader::AddExternalizedBuiltin(const char* id,
252250
return;
253251
}
254252

255-
icu::UnicodeString utf16 = icu::UnicodeString::fromUTF8(
256-
icu::StringPiece(source.data(), source.length()));
257-
auto source_utf16 = std::make_unique<icu::UnicodeString>(utf16);
258-
Add(id,
259-
UnionBytes(reinterpret_cast<const uint16_t*>((*source_utf16).getBuffer()),
260-
utf16.length()));
261-
// keep source bytes for builtin alive while BuiltinLoader exists
262-
GetInstance()->externalized_source_bytes_.push_back(std::move(source_utf16));
253+
Add(id, source);
254+
}
255+
256+
bool BuiltinLoader::Add(const char* id, std::string_view utf8source) {
257+
size_t expected_u16_length =
258+
simdutf::utf16_length_from_utf8(utf8source.data(), utf8source.length());
259+
auto out = std::make_shared<std::vector<uint16_t>>(expected_u16_length);
260+
size_t u16_length = simdutf::convert_utf8_to_utf16le(
261+
utf8source.data(),
262+
utf8source.length(),
263+
reinterpret_cast<char16_t*>(out->data()));
264+
out->resize(u16_length);
265+
return Add(id, UnionBytes(out));
263266
}
264-
#endif // NODE_HAVE_I18N_SUPPORT
265267

266268
// Returns Local<Function> of the compiled module if return_code_cache
267269
// is false (we are only compiling the function).
Collapse file

‎src/node_builtins.h‎

Copy file name to clipboardExpand all lines: src/node_builtins.h
+1-8Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@
88
#include <memory>
99
#include <set>
1010
#include <string>
11-
#if defined(NODE_HAVE_I18N_SUPPORT)
12-
#include <unicode/unistr.h>
13-
#endif // NODE_HAVE_I18N_SUPPORT
1411
#include <vector>
1512
#include "node_mutex.h"
1613
#include "node_union_bytes.h"
@@ -71,6 +68,7 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
7168
static v8::Local<v8::String> GetConfigString(v8::Isolate* isolate);
7269
static bool Exists(const char* id);
7370
static bool Add(const char* id, const UnionBytes& source);
71+
static bool Add(const char* id, std::string_view utf8source);
7472

7573
static bool CompileAllBuiltins(v8::Local<v8::Context> context);
7674
static void RefreshCodeCache(const std::vector<CodeCacheInfo>& in);
@@ -136,18 +134,13 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
136134
static void HasCachedBuiltins(
137135
const v8::FunctionCallbackInfo<v8::Value>& args);
138136

139-
#if defined(NODE_HAVE_I18N_SUPPORT)
140137
static void AddExternalizedBuiltin(const char* id, const char* filename);
141-
#endif // NODE_HAVE_I18N_SUPPORT
142138

143139
static BuiltinLoader instance_;
144140
BuiltinCategories builtin_categories_;
145141
BuiltinSourceMap source_;
146142
BuiltinCodeCacheMap code_cache_;
147143
UnionBytes config_;
148-
#if defined(NODE_HAVE_I18N_SUPPORT)
149-
std::list<std::unique_ptr<icu::UnicodeString>> externalized_source_bytes_;
150-
#endif // NODE_HAVE_I18N_SUPPORT
151144

152145
// Used to synchronize access to the code cache map
153146
Mutex code_cache_mutex_;
Collapse file

‎src/node_union_bytes.h‎

Copy file name to clipboardExpand all lines: src/node_union_bytes.h
+9-55Lines changed: 9 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -11,57 +11,20 @@
1111

1212
namespace node {
1313

14-
class NonOwningExternalOneByteResource
15-
: public v8::String::ExternalOneByteStringResource {
16-
public:
17-
explicit NonOwningExternalOneByteResource(const uint8_t* data, size_t length)
18-
: data_(data), length_(length) {}
19-
~NonOwningExternalOneByteResource() override = default;
20-
21-
const char* data() const override {
22-
return reinterpret_cast<const char*>(data_);
23-
}
24-
size_t length() const override { return length_; }
25-
26-
NonOwningExternalOneByteResource(const NonOwningExternalOneByteResource&) =
27-
delete;
28-
NonOwningExternalOneByteResource& operator=(
29-
const NonOwningExternalOneByteResource&) = delete;
30-
31-
private:
32-
const uint8_t* data_;
33-
size_t length_;
34-
};
35-
36-
class NonOwningExternalTwoByteResource
37-
: public v8::String::ExternalStringResource {
38-
public:
39-
explicit NonOwningExternalTwoByteResource(const uint16_t* data, size_t length)
40-
: data_(data), length_(length) {}
41-
~NonOwningExternalTwoByteResource() override = default;
42-
43-
const uint16_t* data() const override { return data_; }
44-
size_t length() const override { return length_; }
45-
46-
NonOwningExternalTwoByteResource(const NonOwningExternalTwoByteResource&) =
47-
delete;
48-
NonOwningExternalTwoByteResource& operator=(
49-
const NonOwningExternalTwoByteResource&) = delete;
50-
51-
private:
52-
const uint16_t* data_;
53-
size_t length_;
54-
};
55-
5614
// Similar to a v8::String, but it's independent from Isolates
5715
// and can be materialized in Isolates as external Strings
58-
// via ToStringChecked. The data pointers are owned by the caller.
16+
// via ToStringChecked.
5917
class UnionBytes {
6018
public:
6119
UnionBytes(const uint16_t* data, size_t length)
6220
: one_bytes_(nullptr), two_bytes_(data), length_(length) {}
6321
UnionBytes(const uint8_t* data, size_t length)
6422
: one_bytes_(data), two_bytes_(nullptr), length_(length) {}
23+
template <typename T> // T = uint8_t or uint16_t
24+
explicit UnionBytes(std::shared_ptr<std::vector</*const*/ T>> data)
25+
: UnionBytes(data->data(), data->size()) {
26+
owning_ptr_ = data;
27+
}
6528

6629
UnionBytes(const UnionBytes&) = default;
6730
UnionBytes& operator=(const UnionBytes&) = default;
@@ -77,23 +40,14 @@ class UnionBytes {
7740
CHECK_NOT_NULL(one_bytes_);
7841
return one_bytes_;
7942
}
80-
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) const {
81-
if (is_one_byte()) {
82-
NonOwningExternalOneByteResource* source =
83-
new NonOwningExternalOneByteResource(one_bytes_data(), length_);
84-
return v8::String::NewExternalOneByte(isolate, source).ToLocalChecked();
85-
} else {
86-
NonOwningExternalTwoByteResource* source =
87-
new NonOwningExternalTwoByteResource(two_bytes_data(), length_);
88-
return v8::String::NewExternalTwoByte(isolate, source).ToLocalChecked();
89-
}
90-
}
91-
size_t length() { return length_; }
43+
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) const;
44+
size_t length() const { return length_; }
9245

9346
private:
9447
const uint8_t* one_bytes_;
9548
const uint16_t* two_bytes_;
9649
size_t length_;
50+
std::shared_ptr<void> owning_ptr_;
9751
};
9852

9953
} // namespace node
Collapse file

‎src/util.cc‎

Copy file name to clipboardExpand all lines: src/util.cc
+62Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,4 +515,66 @@ void SetConstructorFunction(Isolate* isolate,
515515
that->Set(name, tmpl);
516516
}
517517

518+
namespace {
519+
520+
class NonOwningExternalOneByteResource
521+
: public v8::String::ExternalOneByteStringResource {
522+
public:
523+
explicit NonOwningExternalOneByteResource(const UnionBytes& source)
524+
: source_(source) {}
525+
~NonOwningExternalOneByteResource() override = default;
526+
527+
const char* data() const override {
528+
return reinterpret_cast<const char*>(source_.one_bytes_data());
529+
}
530+
size_t length() const override { return source_.length(); }
531+
532+
NonOwningExternalOneByteResource(const NonOwningExternalOneByteResource&) =
533+
delete;
534+
NonOwningExternalOneByteResource& operator=(
535+
const NonOwningExternalOneByteResource&) = delete;
536+
537+
private:
538+
const UnionBytes source_;
539+
};
540+
541+
class NonOwningExternalTwoByteResource
542+
: public v8::String::ExternalStringResource {
543+
public:
544+
explicit NonOwningExternalTwoByteResource(const UnionBytes& source)
545+
: source_(source) {}
546+
~NonOwningExternalTwoByteResource() override = default;
547+
548+
const uint16_t* data() const override { return source_.two_bytes_data(); }
549+
size_t length() const override { return source_.length(); }
550+
551+
NonOwningExternalTwoByteResource(const NonOwningExternalTwoByteResource&) =
552+
delete;
553+
NonOwningExternalTwoByteResource& operator=(
554+
const NonOwningExternalTwoByteResource&) = delete;
555+
556+
private:
557+
const UnionBytes source_;
558+
};
559+
560+
} // anonymous namespace
561+
562+
Local<String> UnionBytes::ToStringChecked(Isolate* isolate) const {
563+
if (UNLIKELY(length() == 0)) {
564+
// V8 requires non-null data pointers for empty external strings,
565+
// but we don't guarantee that. Solve this by not creating an
566+
// external string at all in that case.
567+
return String::Empty(isolate);
568+
}
569+
if (is_one_byte()) {
570+
NonOwningExternalOneByteResource* source =
571+
new NonOwningExternalOneByteResource(*this);
572+
return String::NewExternalOneByte(isolate, source).ToLocalChecked();
573+
} else {
574+
NonOwningExternalTwoByteResource* source =
575+
new NonOwningExternalTwoByteResource(*this);
576+
return String::NewExternalTwoByte(isolate, source).ToLocalChecked();
577+
}
578+
}
579+
518580
} // namespace node
Collapse file

‎test/cctest/test_util.cc‎

Copy file name to clipboardExpand all lines: test/cctest/test_util.cc
+16-1Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
#include "util-inl.h"
21
#include "debug_utils-inl.h"
32
#include "env-inl.h"
43
#include "gtest/gtest.h"
4+
#include "simdutf.h"
5+
#include "util-inl.h"
56

67
using node::Calloc;
78
using node::Malloc;
@@ -298,3 +299,17 @@ TEST(UtilTest, SPrintF) {
298299
const std::string with_zero = std::string("a") + '\0' + 'b';
299300
EXPECT_EQ(SPrintF("%s", with_zero), with_zero);
300301
}
302+
303+
TEST(UtilTest, SimdutfEndiannessDoesNotMeanEndianness) {
304+
// In simdutf, "LE" does *not* refer to Little Endian, it refers
305+
// to 16-byte code units that are stored using *host* endianness.
306+
// This is weird and confusing naming, and so we add this assertion
307+
// here to verify that this is actually the case (so that CI tells
308+
// us if it changed, because for most people Little Endian is
309+
// host endianness, so locally everything would work fine).
310+
const char utf8source[] = "\xe7\x8c\xab";
311+
char16_t u16output;
312+
size_t u16len = simdutf::convert_utf8_to_utf16le(utf8source, 3, &u16output);
313+
EXPECT_EQ(u16len, 1u);
314+
EXPECT_EQ(u16output, 0x732B);
315+
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.