Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e399360

Browse filesBrowse files
nodejs-github-bottargos
authored andcommitted
deps: update simdutf to 5.2.6
PR-URL: #52727 Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: Marco Ippolito <marcoippolito54@gmail.com> Reviewed-By: Luigi Pinca <luigipinca@gmail.com> Reviewed-By: Antoine du Hamel <duhamelantoine1995@gmail.com>
1 parent 9102255 commit e399360
Copy full SHA for e399360

File tree

Expand file treeCollapse file tree

2 files changed

+112
-51
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

2 files changed

+112
-51
lines changed
Open diff view settings
Collapse file

‎deps/simdutf/simdutf.cpp‎

Copy file name to clipboardExpand all lines: deps/simdutf/simdutf.cpp
+95-39Lines changed: 95 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2024-04-11 09:56:55 -0400. Do not edit! */
1+
/* auto-generated on 2024-04-24 01:28:18 -0400. Do not edit! */
22
/* begin file src/simdutf.cpp */
33
#include "simdutf.h"
44
// We include base64_tables once.
@@ -5999,8 +5999,8 @@ static const implementation* get_single_implementation() {
59995999
*/
60006000
class detect_best_supported_implementation_on_first_use final : public implementation {
60016001
public:
6002-
const std::string &name() const noexcept final { return set_best()->name(); }
6003-
const std::string &description() const noexcept final { return set_best()->description(); }
6002+
std::string name() const noexcept final { return set_best()->name(); }
6003+
std::string description() const noexcept final { return set_best()->description(); }
60046004
uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
60056005

60066006
simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept override {
@@ -6333,6 +6333,8 @@ class detect_best_supported_implementation_on_first_use final : public implement
63336333
const implementation *set_best() const noexcept;
63346334
};
63356335

6336+
static_assert(std::is_trivially_destructible<detect_best_supported_implementation_on_first_use>::value, "detect_best_supported_implementation_on_first_use should be trivially destructible");
6337+
63366338
static const std::initializer_list<const implementation *>& get_available_implementation_pointers() {
63376339
static const std::initializer_list<const implementation *> available_implementation_pointers {
63386340
#if SIMDUTF_IMPLEMENTATION_ICELAKE
@@ -6695,7 +6697,11 @@ class unsupported_implementation final : public implementation {
66956697
unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {}
66966698
};
66976699

6698-
const unsupported_implementation unsupported_singleton{};
6700+
const unsupported_implementation* get_unsupported_singleton() {
6701+
static const unsupported_implementation unsupported_singleton{};
6702+
return &unsupported_singleton;
6703+
}
6704+
static_assert(std::is_trivially_destructible<unsupported_implementation>::value, "unsupported_singleton should be trivially destructible");
66996705

67006706
size_t available_implementation_list::size() const noexcept {
67016707
return internal::get_available_implementation_pointers().size();
@@ -6713,7 +6719,7 @@ const implementation *available_implementation_list::detect_best_supported() con
67136719
uint32_t required_instruction_sets = impl->required_instruction_sets();
67146720
if ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets) { return impl; }
67156721
}
6716-
return &unsupported_singleton; // this should never happen?
6722+
return get_unsupported_singleton(); // this should never happen?
67176723
}
67186724

67196725
const implementation *detect_best_supported_implementation_on_first_use::set_best() const noexcept {
@@ -6728,7 +6734,7 @@ const implementation *detect_best_supported_implementation_on_first_use::set_bes
67286734
return get_active_implementation() = force_implementation;
67296735
} else {
67306736
// Note: abort() and stderr usage within the library is forbidden.
6731-
return get_active_implementation() = &unsupported_singleton;
6737+
return get_active_implementation() = get_unsupported_singleton();
67326738
}
67336739
}
67346740
return get_active_implementation() = get_available_implementations().detect_best_supported();
@@ -6747,8 +6753,8 @@ SIMDUTF_DLLIMPORTEXPORT const internal::available_implementation_list& get_avail
67476753
}
67486754

67496755
/**
6750-
* The active implementation.
6751-
*/
6756+
* The active implementation.
6757+
*/
67526758
SIMDUTF_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation() {
67536759
#if SIMDUTF_SINGLE_IMPLEMENTATION
67546760
// skip runtime detection
@@ -26119,7 +26125,7 @@ std::pair<result, char*> avx2_convert_utf16_to_utf8_with_errors(const char16_t*
2611926125
1. an input register contains no surrogates and each value
2612026126
is in range 0x0000 .. 0x07ff.
2612126127
2. an input register contains no surrogates and values are
26122-
is in range 0x0000 .. 0xffff.
26128+
in range 0x0000 .. 0xffff.
2612326129
3. an input register contains surrogates --- i.e. codepoints
2612426130
can have 16 or 32 bits.
2612526131

@@ -32395,6 +32401,8 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const cha
3239532401

3239632402
/* end file src/rvv/rvv_utf8_to.inl.cpp */
3239732403
/* begin file src/rvv/rvv_utf16_to.inl.cpp */
32404+
#include <cstdio>
32405+
3239832406
template<simdutf_ByteFlip bflip>
3239932407
simdutf_really_inline static result rvv_utf16_to_latin1_with_errors(const char16_t *src, size_t len, char *dst) {
3240032408
const char16_t *const beg = src;
@@ -32609,47 +32617,95 @@ simdutf_really_inline static result rvv_utf16_to_utf32_with_errors(const char16_
3260932617
const char16_t *const srcBeg = src;
3261032618
char32_t *const dstBeg = dst;
3261132619

32620+
constexpr const uint16_t ANY_SURROGATE_MASK = 0xf800;
32621+
constexpr const uint16_t ANY_SURROGATE_VALUE = 0xd800;
32622+
constexpr const uint16_t LO_SURROGATE_MASK = 0xfc00;
32623+
constexpr const uint16_t LO_SURROGATE_VALUE = 0xdc00;
32624+
constexpr const uint16_t HI_SURROGATE_MASK = 0xfc00;
32625+
constexpr const uint16_t HI_SURROGATE_VALUE = 0xd800;
32626+
3261232627
uint16_t last = 0;
32613-
for (size_t vl, vlOut; len > 0; len -= vl, src += vl, dst += vlOut, last = simdutf_byteflip<bflip>(src[-1])) {
32614-
vl = __riscv_vsetvl_e16m2(len);
32615-
vuint16m2_t v1 = __riscv_vle16_v_u16m2((uint16_t const*)src, vl);
32616-
v1 = simdutf_byteflip<bflip>(v1, vl);
32617-
vuint16m2_t v0 = __riscv_vslide1up_vx_u16m2(v1, last, vl);
32628+
while (len > 0) {
32629+
size_t vl = __riscv_vsetvl_e16m2(len);
32630+
vuint16m2_t v0 = __riscv_vle16_v_u16m2((uint16_t const*)src, vl);
32631+
v0 = simdutf_byteflip<bflip>(v0, vl);
32632+
32633+
{ // check fast-path
32634+
const vuint16m2_t v = __riscv_vand_vx_u16m2(v0, ANY_SURROGATE_MASK, vl);
32635+
const vbool8_t any_surrogate = __riscv_vmseq_vx_u16m2_b8(v, ANY_SURROGATE_VALUE, vl);
32636+
if (__riscv_vfirst_m_b8(any_surrogate, vl) < 0) {
32637+
/* no surrogates */
32638+
__riscv_vse32_v_u32m4((uint32_t*)dst, __riscv_vzext_vf2_u32m4(v0, vl), vl);
32639+
len -= vl;
32640+
src += vl;
32641+
dst += vl;
32642+
continue;
32643+
}
32644+
}
3261832645

32619-
vbool8_t surhi0 = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v0, 0xFC00, vl), 0xD800, vl);
32620-
vbool8_t surlo1 = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v1, 0xFC00, vl), 0xDC00, vl);
32646+
if ((simdutf_byteflip<bflip>(src[0]) & LO_SURROGATE_MASK) == LO_SURROGATE_VALUE) {
32647+
return result(error_code::SURROGATE, src - srcBeg);
32648+
}
3262132649

32622-
/* no surrogates */
32623-
if (__riscv_vfirst_m_b8(__riscv_vmor_mm_b8(surhi0, surlo1, vl), vl) < 0) {
32624-
vlOut = vl;
32625-
__riscv_vse32_v_u32m4((uint32_t*)dst, __riscv_vzext_vf2_u32m4(v1, vl), vl);
32626-
continue;
32650+
// decode surrogates
32651+
vuint16m2_t v1 = __riscv_vslide1down_vx_u16m2(v0, 0, vl);
32652+
vl = __riscv_vsetvl_e16m2(vl - 1);
32653+
if (vl == 0) {
32654+
return result(error_code::SURROGATE, src - srcBeg);
3262732655
}
3262832656

32629-
long idx = __riscv_vfirst_m_b8(__riscv_vmxor_mm_b8(surhi0, surlo1, vl), vl);
32630-
if (idx >= 0) {
32631-
last = idx > 0 ? simdutf_byteflip<bflip>(src[idx-1]) : last;
32632-
return result(error_code::SURROGATE, src - srcBeg + idx - (last - 0xD800u < 0x400u));
32657+
const vbool8_t surhi = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v0, HI_SURROGATE_MASK, vl), HI_SURROGATE_VALUE, vl);
32658+
const vbool8_t surlo = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v1, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, vl);
32659+
32660+
// compress everything but lo surrogates
32661+
const vbool8_t compress = __riscv_vmsne_vx_u16m2_b8(__riscv_vand_vx_u16m2(v0, LO_SURROGATE_MASK, vl), LO_SURROGATE_VALUE, vl);
32662+
32663+
{
32664+
const vbool8_t diff = __riscv_vmxor_mm_b8(surhi, surlo, vl);
32665+
const long idx = __riscv_vfirst_m_b8(diff, vl);
32666+
if (idx >= 0) {
32667+
return result(error_code::SURROGATE, src - srcBeg + idx + 1);
32668+
}
3263332669
}
3263432670

32635-
vbool8_t surhi1 = __riscv_vmseq_vx_u16m2_b8(__riscv_vand_vx_u16m2(v1, 0xFC00, vl), 0xD800, vl);
32636-
uint16_t next = vl < len ? simdutf_byteflip<bflip>(src[vl]) : 0;
32671+
last = simdutf_byteflip<bflip>(src[vl]);
32672+
vuint32m4_t utf32 = __riscv_vzext_vf2_u32m4(v0, vl);
32673+
32674+
// v0 = 110110yyyyyyyyyy (0xd800 + yyyyyyyyyy) --- hi surrogate
32675+
// v1 = 110111xxxxxxxxxx (0xdc00 + xxxxxxxxxx) --- lo surrogate
32676+
32677+
// t0 = u16( 0000_00yy_yyyy_yyyy)
32678+
const vuint32m4_t t0 = __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v0, 0x03ff, vl), vl);
32679+
// t1 = u32(0000_0000_0000_yyyy_yyyy_yy00_0000_0000)
32680+
const vuint32m4_t t1 = __riscv_vsll_vx_u32m4(t0, 10, vl);
32681+
32682+
// t2 = u32(0000_0000_0000_0000_0000_00xx_xxxx_xxxx)
32683+
const vuint32m4_t t2 = __riscv_vzext_vf2_u32m4(__riscv_vand_vx_u16m2(v1, 0x03ff, vl), vl);
32684+
32685+
// t3 = u32(0000_0000_0000_yyyy_yyyy_yyxx_xxxx_xxxx)
32686+
const vuint32m4_t t3 = __riscv_vor_vv_u32m4(t1, t2, vl);
32687+
32688+
// t4 = utf32 from surrogate pairs
32689+
const vuint32m4_t t4 = __riscv_vadd_vx_u32m4(t3, 0x10000, vl);
3263732690

32638-
vuint32m4_t wide = __riscv_vzext_vf2_u32m4(v1, vl);
32639-
vuint32m4_t slided = __riscv_vslide1down_vx_u32m4(wide, next, vl);
32640-
vuint32m4_t aligned = __riscv_vsll_vx_u32m4_mu(surhi1, wide, wide, 10, vl);
32641-
vuint32m4_t added = __riscv_vadd_vv_u32m4_mu(surhi1, aligned, aligned, slided, vl);
32642-
vuint32m4_t utf32 = __riscv_vadd_vx_u32m4_mu(surhi1, added, added, 0xFCA02400, vl);
32643-
vbool8_t m = __riscv_vmnot_m_b8(surlo1, vl);
32644-
vlOut = __riscv_vcpop_m_b8(m, vl);
32645-
vuint32m4_t comp = __riscv_vcompress_vm_u32m4(utf32, m, vl);
32691+
const vuint32m4_t result = __riscv_vmerge_vvm_u32m4(utf32, t4, surhi, vl);
32692+
32693+
const vuint32m4_t comp = __riscv_vcompress_vm_u32m4(result, compress, vl);
32694+
const size_t vlOut = __riscv_vcpop_m_b8(compress, vl);
3264632695
__riscv_vse32_v_u32m4((uint32_t*)dst, comp, vlOut);
32696+
32697+
len -= vl;
32698+
src += vl;
32699+
dst += vlOut;
32700+
32701+
if ((last & LO_SURROGATE_MASK) == LO_SURROGATE_VALUE) {
32702+
// last item is lo surrogate and got already consumed
32703+
len -= 1;
32704+
src += 1;
32705+
}
3264732706
}
3264832707

32649-
if (last - 0xD800u < 0x400u)
32650-
return result(error_code::SURROGATE, src - srcBeg - 1); /* end on high surrogate */
32651-
else
32652-
return result(error_code::SUCCESS, dst - dstBeg);
32708+
return result(error_code::SUCCESS, dst - dstBeg);
3265332709
}
3265432710

3265532711
simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t *src, size_t len, char32_t *dst) const noexcept {
Collapse file

‎deps/simdutf/simdutf.h‎

Copy file name to clipboardExpand all lines: deps/simdutf/simdutf.h
+17-12Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2024-04-11 09:56:55 -0400. Do not edit! */
1+
/* auto-generated on 2024-04-24 01:28:18 -0400. Do not edit! */
22
/* begin file include/simdutf.h */
33
#ifndef SIMDUTF_H
44
#define SIMDUTF_H
@@ -149,7 +149,7 @@
149149
#define SIMDUTF_HAS_RVV_TARGET_REGION 1
150150
#endif
151151

152-
#if __riscv_v_intrinsic >= 11000 && !(__GNUC__ == 13 && __GNUC_MINOR__ == 2 && __GNUC_PATCHLEVEL__ == 0)
152+
#if __riscv_v_intrinsic >= 11000
153153
#define SIMDUTF_HAS_RVV_INTRINSICS 1
154154
#endif
155155

@@ -594,7 +594,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
594594
#define SIMDUTF_SIMDUTF_VERSION_H
595595

596596
/** The version of simdutf being used (major.minor.revision) */
597-
#define SIMDUTF_VERSION "5.2.4"
597+
#define SIMDUTF_VERSION "5.2.6"
598598

599599
namespace simdutf {
600600
enum {
@@ -609,7 +609,7 @@ enum {
609609
/**
610610
* The revision (major.minor.REVISION) of simdutf being used.
611611
*/
612-
SIMDUTF_VERSION_REVISION = 4
612+
SIMDUTF_VERSION_REVISION = 6
613613
};
614614
} // namespace simdutf
615615

@@ -717,6 +717,7 @@ static inline uint32_t detect_supported_architectures() {
717717
#elif SIMDUTF_IS_RISCV64
718718

719719
#if defined(__linux__)
720+
720721
#include <unistd.h>
721722
// We define these our selfs, for backwards compatibility
722723
struct simdutf_riscv_hwprobe { int64_t key; uint64_t value; };
@@ -744,6 +745,10 @@ static inline uint32_t detect_supported_architectures() {
744745
if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB)
745746
host_isa |= instruction_set::ZVBB;
746747
}
748+
#endif
749+
#if defined(RUN_IN_SPIKE_SIMULATOR)
750+
// Proxy Kernel does not implement yet hwprobe syscall
751+
host_isa |= instruction_set::RVV;
747752
#endif
748753
return host_isa;
749754
}
@@ -2454,7 +2459,7 @@ class implementation {
24542459
*
24552460
* @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
24562461
*/
2457-
virtual const std::string &name() const { return _name; }
2462+
virtual std::string name() const { return std::string(_name); }
24582463

24592464
/**
24602465
* The description of this implementation.
@@ -2464,7 +2469,7 @@ class implementation {
24642469
*
24652470
* @return the name of the implementation, e.g. "haswell", "westmere", "arm64"
24662471
*/
2467-
virtual const std::string &description() const { return _description; }
2472+
virtual std::string description() const { return std::string(_description); }
24682473

24692474
/**
24702475
* The instruction sets this implementation is compiled against
@@ -3602,27 +3607,27 @@ class implementation {
36023607
protected:
36033608
/** @private Construct an implementation with the given name and description. For subclasses. */
36043609
simdutf_really_inline implementation(
3605-
std::string name,
3606-
std::string description,
3610+
const char* name,
3611+
const char* description,
36073612
uint32_t required_instruction_sets
36083613
) :
36093614
_name(name),
36103615
_description(description),
36113616
_required_instruction_sets(required_instruction_sets)
36123617
{
36133618
}
3614-
virtual ~implementation()=default;
3615-
3619+
protected:
3620+
~implementation() = default;
36163621
private:
36173622
/**
36183623
* The name of this implementation.
36193624
*/
3620-
const std::string _name;
3625+
const char* _name;
36213626

36223627
/**
36233628
* The description of this implementation.
36243629
*/
3625-
const std::string _description;
3630+
const char* _description;
36263631

36273632
/**
36283633
* Instruction sets required for this implementation.

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.