Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e62608b

Browse filesBrowse files
mertcanaltinaduh95
authored andcommitted
src: improve textEncoder encode performance with simdutf
PR-URL: #61496 Refs: cloudflare/workerd#5448 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: Daniel Lemire <daniel@lemire.me>
1 parent 915d105 commit e62608b
Copy full SHA for e62608b

1 file changed

+84-11Lines changed: 84 additions & 11 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎src/encoding_binding.cc‎

Copy file name to clipboardExpand all lines: src/encoding_binding.cc
+84-11Lines changed: 84 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -312,11 +312,12 @@ void BindingData::EncodeUtf8String(const FunctionCallbackInfo<Value>& args) {
312312
CHECK_GE(args.Length(), 1);
313313
CHECK(args[0]->IsString());
314314

315-
Local<String> str = args[0].As<String>();
316-
size_t length = str->Utf8LengthV2(isolate);
315+
Local<String> source = args[0].As<String>();
317316

318-
Local<ArrayBuffer> ab;
319-
{
317+
// For small strings, use the V8 path
318+
static constexpr int kSmallStringThreshold = 32;
319+
if (source->Length() <= kSmallStringThreshold) {
320+
size_t length = source->Utf8LengthV2(isolate);
320321
std::unique_ptr<BackingStore> bs = ArrayBuffer::NewBackingStore(
321322
isolate,
322323
length,
@@ -328,16 +329,88 @@ void BindingData::EncodeUtf8String(const FunctionCallbackInfo<Value>& args) {
328329
return;
329330
}
330331

331-
// We are certain that `data` is sufficiently large
332-
str->WriteUtf8V2(isolate,
333-
static_cast<char*>(bs->Data()),
334-
bs->MaxByteLength(),
335-
String::WriteFlags::kReplaceInvalidUtf8);
332+
source->WriteUtf8V2(isolate,
333+
static_cast<char*>(bs->Data()),
334+
bs->MaxByteLength(),
335+
String::WriteFlags::kReplaceInvalidUtf8);
336+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, std::move(bs));
337+
args.GetReturnValue().Set(Uint8Array::New(ab, 0, length));
338+
return;
339+
}
336340

337-
ab = ArrayBuffer::New(isolate, std::move(bs));
341+
size_t length = source->Length();
342+
size_t utf8_length = 0;
343+
bool is_one_byte = source->IsOneByte();
344+
345+
if (is_one_byte) {
346+
// One-byte string (Latin1) - copy to buffer first, then process
347+
MaybeStackBuffer<uint8_t, MAX_SIZE_FOR_STACK_ALLOC> latin1_buffer(length);
348+
source->WriteOneByteV2(isolate, 0, length, latin1_buffer.out());
349+
350+
auto data = reinterpret_cast<const char*>(latin1_buffer.out());
351+
352+
// Check if it's pure ASCII - if so, we can just copy
353+
simdutf::result result = simdutf::validate_ascii_with_errors(data, length);
354+
if (result.error == simdutf::SUCCESS) {
355+
// Pure ASCII - direct copy
356+
std::unique_ptr<BackingStore> bs = ArrayBuffer::NewBackingStore(
357+
isolate, length, BackingStoreInitializationMode::kUninitialized);
358+
CHECK(bs);
359+
memcpy(bs->Data(), data, length);
360+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, std::move(bs));
361+
args.GetReturnValue().Set(Uint8Array::New(ab, 0, length));
362+
return;
363+
}
364+
365+
// Latin1 with non-ASCII characters - need conversion
366+
utf8_length = simdutf::utf8_length_from_latin1(data, length);
367+
std::unique_ptr<BackingStore> bs = ArrayBuffer::NewBackingStore(
368+
isolate, utf8_length, BackingStoreInitializationMode::kUninitialized);
369+
CHECK(bs);
370+
[[maybe_unused]] size_t written = simdutf::convert_latin1_to_utf8(
371+
data, length, static_cast<char*>(bs->Data()));
372+
DCHECK_EQ(written, utf8_length);
373+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, std::move(bs));
374+
args.GetReturnValue().Set(Uint8Array::New(ab, 0, utf8_length));
375+
return;
376+
}
377+
378+
// Two-byte string (UTF-16) - copy to buffer first
379+
MaybeStackBuffer<uint16_t, MAX_SIZE_FOR_STACK_ALLOC> utf16_buffer(length);
380+
source->WriteV2(isolate, 0, length, utf16_buffer.out());
381+
382+
auto data = reinterpret_cast<char16_t*>(utf16_buffer.out());
383+
384+
// Check for unpaired surrogates
385+
simdutf::result validation_result =
386+
simdutf::validate_utf16_with_errors(data, length);
387+
388+
if (validation_result.error == simdutf::SUCCESS) {
389+
// Valid UTF-16 - use the fast path
390+
utf8_length = simdutf::utf8_length_from_utf16(data, length);
391+
std::unique_ptr<BackingStore> bs = ArrayBuffer::NewBackingStore(
392+
isolate, utf8_length, BackingStoreInitializationMode::kUninitialized);
393+
CHECK(bs);
394+
[[maybe_unused]] size_t written = simdutf::convert_utf16_to_utf8(
395+
data, length, static_cast<char*>(bs->Data()));
396+
DCHECK_EQ(written, utf8_length);
397+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, std::move(bs));
398+
args.GetReturnValue().Set(Uint8Array::New(ab, 0, utf8_length));
399+
return;
338400
}
339401

340-
args.GetReturnValue().Set(Uint8Array::New(ab, 0, length));
402+
// Invalid UTF-16 with unpaired surrogates - convert to well-formed in place
403+
simdutf::to_well_formed_utf16(data, length, data);
404+
405+
utf8_length = simdutf::utf8_length_from_utf16(data, length);
406+
std::unique_ptr<BackingStore> bs = ArrayBuffer::NewBackingStore(
407+
isolate, utf8_length, BackingStoreInitializationMode::kUninitialized);
408+
CHECK(bs);
409+
[[maybe_unused]] size_t written = simdutf::convert_utf16_to_utf8(
410+
data, length, static_cast<char*>(bs->Data()));
411+
DCHECK_EQ(written, utf8_length);
412+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, std::move(bs));
413+
args.GetReturnValue().Set(Uint8Array::New(ab, 0, utf8_length));
341414
}
342415

343416
// Convert the input into an encoded string

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.