Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 16ee02f

Browse filesBrowse files
anonrigjuanarbol
authored andcommitted
buffer: add buffer.isUtf8 for utf8 validation
PR-URL: #45947 Reviewed-By: Robert Nagy <ronagy@icloud.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Luigi Pinca <luigipinca@gmail.com> Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Anna Henningsen <anna@addaleax.net>
1 parent 4063cdc commit 16ee02f
Copy full SHA for 16ee02f

File tree

Expand file treeCollapse file tree

7 files changed

+131
-1
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

7 files changed

+131
-1
lines changed
Open diff view settings
Collapse file

‎doc/api/buffer.md‎

Copy file name to clipboardExpand all lines: doc/api/buffer.md
+11Lines changed: 11 additions & 0 deletions
  • Display the source diff
  • Display the rich diff
Original file line numberDiff line numberDiff line change
@@ -5126,6 +5126,17 @@ For code running using Node.js APIs, converting between base64-encoded strings
51265126
and binary data should be performed using `Buffer.from(str, 'base64')` and
51275127
`buf.toString('base64')`.**
51285128

5129+
### `buffer.isUtf8(input)`
5130+
5131+
<!-- YAML
5132+
added: REPLACEME
5133+
-->
5134+
5135+
* input {Buffer | ArrayBuffer | TypedArray} The input to validate.
5136+
* Returns: {boolean} Returns `true` if and only if the input is valid UTF-8.
5137+
5138+
This function is used to check if input contains UTF-8 code points (characters).
5139+
51295140
### `buffer.INSPECT_MAX_BYTES`
51305141

51315142
<!-- YAML
Collapse file

‎lib/buffer.js‎

Copy file name to clipboardExpand all lines: lib/buffer.js
+12-1Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ const {
5757
compareOffset,
5858
createFromString,
5959
fill: bindingFill,
60+
isUtf8: bindingIsUtf8,
6061
indexOfBuffer,
6162
indexOfNumber,
6263
indexOfString,
@@ -83,7 +84,8 @@ const {
8384
const {
8485
isAnyArrayBuffer,
8586
isArrayBufferView,
86-
isUint8Array
87+
isUint8Array,
88+
isTypedArray,
8789
} = require('internal/util/types');
8890
const {
8991
inspect: utilInspect
@@ -1322,13 +1324,22 @@ function atob(input) {
13221324
return Buffer.from(input, 'base64').toString('latin1');
13231325
}
13241326

1327+
function isUtf8(input) {
1328+
if (isTypedArray(input) || isAnyArrayBuffer(input)) {
1329+
return bindingIsUtf8(input);
1330+
}
1331+
1332+
throw new ERR_INVALID_ARG_TYPE('input', ['TypedArray', 'Buffer'], input);
1333+
}
1334+
13251335
module.exports = {
13261336
Blob,
13271337
File,
13281338
resolveObjectURL,
13291339
Buffer,
13301340
SlowBuffer,
13311341
transcode,
1342+
isUtf8,
13321343

13331344
// Legacy
13341345
kMaxLength,
Collapse file

‎src/node_buffer.cc‎

Copy file name to clipboardExpand all lines: src/node_buffer.cc
+18Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,6 +1223,20 @@ static void EncodeInto(const FunctionCallbackInfo<Value>& args) {
12231223
results[1] = written;
12241224
}
12251225

1226+
static void IsUtf8(const FunctionCallbackInfo<Value>& args) {
1227+
Environment* env = Environment::GetCurrent(args);
1228+
CHECK_EQ(args.Length(), 1);
1229+
CHECK(args[0]->IsTypedArray() || args[0]->IsArrayBuffer() ||
1230+
args[0]->IsSharedArrayBuffer());
1231+
ArrayBufferViewContents<char> abv(args[0]);
1232+
1233+
if (abv.WasDetached()) {
1234+
return node::THROW_ERR_INVALID_STATE(
1235+
env, "Cannot validate on a detached buffer");
1236+
}
1237+
1238+
args.GetReturnValue().Set(simdutf::validate_utf8(abv.data(), abv.length()));
1239+
}
12261240

12271241
void SetBufferPrototype(const FunctionCallbackInfo<Value>& args) {
12281242
Environment* env = Environment::GetCurrent(args);
@@ -1358,6 +1372,8 @@ void Initialize(Local<Object> target,
13581372
SetMethod(context, target, "encodeInto", EncodeInto);
13591373
SetMethodNoSideEffect(context, target, "encodeUtf8String", EncodeUtf8String);
13601374

1375+
SetMethodNoSideEffect(context, target, "isUtf8", IsUtf8);
1376+
13611377
target
13621378
->Set(context,
13631379
FIXED_ONE_BYTE_STRING(isolate, "kMaxLength"),
@@ -1413,6 +1429,8 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
14131429
registry->Register(EncodeInto);
14141430
registry->Register(EncodeUtf8String);
14151431

1432+
registry->Register(IsUtf8);
1433+
14161434
registry->Register(StringSlice<ASCII>);
14171435
registry->Register(StringSlice<BASE64>);
14181436
registry->Register(StringSlice<BASE64URL>);
Collapse file

‎src/node_errors.h‎

Copy file name to clipboardExpand all lines: src/node_errors.h
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ void OOMErrorHandler(const char* location, bool is_heap_oom);
6868
V(ERR_INVALID_ARG_TYPE, TypeError) \
6969
V(ERR_INVALID_OBJECT_DEFINE_PROPERTY, TypeError) \
7070
V(ERR_INVALID_MODULE, Error) \
71+
V(ERR_INVALID_STATE, Error) \
7172
V(ERR_INVALID_THIS, TypeError) \
7273
V(ERR_INVALID_TRANSFER_OBJECT, TypeError) \
7374
V(ERR_MEMORY_ALLOCATION_FAILED, Error) \
Collapse file

‎src/util-inl.h‎

Copy file name to clipboardExpand all lines: src/util-inl.h
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,7 @@ void ArrayBufferViewContents<T, S>::ReadValue(v8::Local<v8::Value> buf) {
555555
auto ab = buf.As<v8::ArrayBuffer>();
556556
length_ = ab->ByteLength();
557557
data_ = static_cast<T*>(ab->Data());
558+
was_detached_ = ab->WasDetached();
558559
} else {
559560
CHECK(buf->IsSharedArrayBuffer());
560561
auto sab = buf.As<v8::SharedArrayBuffer>();
Collapse file

‎src/util.h‎

Copy file name to clipboardExpand all lines: src/util.h
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,7 @@ class ArrayBufferViewContents {
510510
inline void Read(v8::Local<v8::ArrayBufferView> abv);
511511
inline void ReadValue(v8::Local<v8::Value> buf);
512512

513+
inline bool WasDetached() const { return was_detached_; }
513514
inline const T* data() const { return data_; }
514515
inline size_t length() const { return length_; }
515516

@@ -524,6 +525,7 @@ class ArrayBufferViewContents {
524525
T stack_storage_[kStackStorageSize];
525526
T* data_ = nullptr;
526527
size_t length_ = 0;
528+
bool was_detached_ = false;
527529
};
528530

529531
class Utf8Value : public MaybeStackBuffer<char> {
Collapse file
+86Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
'use strict';
2+
3+
require('../common');
4+
const assert = require('assert');
5+
const { isUtf8, Buffer } = require('buffer');
6+
const { TextEncoder } = require('util');
7+
8+
const encoder = new TextEncoder();
9+
10+
assert.strictEqual(isUtf8(encoder.encode('hello')), true);
11+
assert.strictEqual(isUtf8(encoder.encode('ğ')), true);
12+
assert.strictEqual(isUtf8(Buffer.from([])), true);
13+
14+
// Taken from test/fixtures/wpt/encoding/textdecoder-fatal.any.js
15+
[
16+
[0xFF], // 'invalid code'
17+
[0xC0], // 'ends early'
18+
[0xE0], // 'ends early 2'
19+
[0xC0, 0x00], // 'invalid trail'
20+
[0xC0, 0xC0], // 'invalid trail 2'
21+
[0xE0, 0x00], // 'invalid trail 3'
22+
[0xE0, 0xC0], // 'invalid trail 4'
23+
[0xE0, 0x80, 0x00], // 'invalid trail 5'
24+
[0xE0, 0x80, 0xC0], // 'invalid trail 6'
25+
[0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], // '> 0x10FFFF'
26+
[0xFE, 0x80, 0x80, 0x80, 0x80, 0x80], // 'obsolete lead byte'
27+
28+
// Overlong encodings
29+
[0xC0, 0x80], // 'overlong U+0000 - 2 bytes'
30+
[0xE0, 0x80, 0x80], // 'overlong U+0000 - 3 bytes'
31+
[0xF0, 0x80, 0x80, 0x80], // 'overlong U+0000 - 4 bytes'
32+
[0xF8, 0x80, 0x80, 0x80, 0x80], // 'overlong U+0000 - 5 bytes'
33+
[0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], // 'overlong U+0000 - 6 bytes'
34+
35+
[0xC1, 0xBF], // 'overlong U+007F - 2 bytes'
36+
[0xE0, 0x81, 0xBF], // 'overlong U+007F - 3 bytes'
37+
[0xF0, 0x80, 0x81, 0xBF], // 'overlong U+007F - 4 bytes'
38+
[0xF8, 0x80, 0x80, 0x81, 0xBF], // 'overlong U+007F - 5 bytes'
39+
[0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF], // 'overlong U+007F - 6 bytes'
40+
41+
[0xE0, 0x9F, 0xBF], // 'overlong U+07FF - 3 bytes'
42+
[0xF0, 0x80, 0x9F, 0xBF], // 'overlong U+07FF - 4 bytes'
43+
[0xF8, 0x80, 0x80, 0x9F, 0xBF], // 'overlong U+07FF - 5 bytes'
44+
[0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF], // 'overlong U+07FF - 6 bytes'
45+
46+
[0xF0, 0x8F, 0xBF, 0xBF], // 'overlong U+FFFF - 4 bytes'
47+
[0xF8, 0x80, 0x8F, 0xBF, 0xBF], // 'overlong U+FFFF - 5 bytes'
48+
[0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF], // 'overlong U+FFFF - 6 bytes'
49+
50+
[0xF8, 0x84, 0x8F, 0xBF, 0xBF], // 'overlong U+10FFFF - 5 bytes'
51+
[0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF], // 'overlong U+10FFFF - 6 bytes'
52+
53+
// UTF-16 surrogates encoded as code points in UTF-8
54+
[0xED, 0xA0, 0x80], // 'lead surrogate'
55+
[0xED, 0xB0, 0x80], // 'trail surrogate'
56+
[0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], // 'surrogate pair'
57+
].forEach((input) => {
58+
assert.strictEqual(isUtf8(Buffer.from(input)), false);
59+
});
60+
61+
[
62+
null,
63+
undefined,
64+
'hello',
65+
true,
66+
false,
67+
].forEach((input) => {
68+
assert.throws(
69+
() => { isUtf8(input); },
70+
{
71+
code: 'ERR_INVALID_ARG_TYPE',
72+
},
73+
);
74+
});
75+
76+
{
77+
// Test with detached array buffers
78+
const arrayBuffer = new ArrayBuffer(1024);
79+
structuredClone(arrayBuffer, { transfer: [arrayBuffer] });
80+
assert.throws(
81+
() => { isUtf8(arrayBuffer); },
82+
{
83+
code: 'ERR_INVALID_STATE'
84+
}
85+
);
86+
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.