Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 3154543

Browse filesBrowse files
nodejs-github-bottargos
authored andcommitted
deps: update ada to 2.1.0
PR-URL: #47598 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: Filip Skokan <panva.ip@gmail.com> Reviewed-By: Matthew Aitken <maitken033380023@gmail.com> Reviewed-By: Tiancheng "Timothy" Gu <timothygu99@gmail.com> Reviewed-By: Tobias Nießen <tniessen@tnie.de> Reviewed-By: Rich Trott <rtrott@gmail.com>
1 parent d11ff4b commit 3154543
Copy full SHA for 3154543

File tree

Expand file treeCollapse file tree

2 files changed

+93
-19
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

2 files changed

+93
-19
lines changed
Open diff view settings
Collapse file

‎deps/ada/ada.cpp‎

Copy file name to clipboardExpand all lines: deps/ada/ada.cpp
+79-14Lines changed: 79 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2023-03-30 17:00:48 -0400. Do not edit! */
1+
/* auto-generated on 2023-04-17 12:20:41 -0400. Do not edit! */
22
/* begin file src/ada.cpp */
33
#include "ada.h"
44
/* begin file src/checkers.cpp */
@@ -2753,7 +2753,7 @@ bool ascii_has_upper_case(char* input, size_t length) {
27532753
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
27542754
uint64_t broadcast_80 = broadcast(0x80);
27552755
uint64_t broadcast_Ap = broadcast(128 - 'A');
2756-
uint64_t broadcast_Zp = broadcast(128 - 'Z');
2756+
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
27572757
size_t i = 0;
27582758

27592759
uint64_t runner{0};
@@ -2775,7 +2775,7 @@ void ascii_map(char* input, size_t length) {
27752775
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
27762776
uint64_t broadcast_80 = broadcast(0x80);
27772777
uint64_t broadcast_Ap = broadcast(128 - 'A');
2778-
uint64_t broadcast_Zp = broadcast(128 - 'Z');
2778+
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
27792779
size_t i = 0;
27802780

27812781
for (; i + 7 < length; i += 8) {
@@ -9845,7 +9845,7 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
98459845
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
98469846
uint64_t broadcast_80 = broadcast(0x80);
98479847
uint64_t broadcast_Ap = broadcast(128 - 'A');
9848-
uint64_t broadcast_Zp = broadcast(128 - 'Z');
9848+
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
98499849
uint64_t non_ascii = 0;
98509850
size_t i = 0;
98519851

@@ -9961,7 +9961,7 @@ ada_really_inline constexpr bool is_forbidden_domain_code_point(
99619961
}
99629962

99639963
ada_really_inline constexpr bool contains_forbidden_domain_code_point(
9964-
char* input, size_t length) noexcept {
9964+
const char* input, size_t length) noexcept {
99659965
size_t i = 0;
99669966
uint8_t accumulator{};
99679967
for (; i + 4 <= length; i += 4) {
@@ -9976,6 +9976,44 @@ ada_really_inline constexpr bool contains_forbidden_domain_code_point(
99769976
return accumulator;
99779977
}
99789978

9979+
constexpr static uint8_t is_forbidden_domain_code_point_table_or_upper[] = {
9980+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9981+
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
9982+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
9983+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0,
9984+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9985+
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9986+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9987+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9988+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9989+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9990+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
9991+
9992+
static_assert(sizeof(is_forbidden_domain_code_point_table_or_upper) == 256);
9993+
static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('A')] == 2);
9994+
static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('Z')] == 2);
9995+
9996+
ada_really_inline constexpr bool contains_forbidden_domain_code_point_or_upper(
9997+
const char* input, size_t length) noexcept {
9998+
size_t i = 0;
9999+
uint8_t accumulator{};
10000+
for (; i + 4 <= length; i += 4) {
10001+
accumulator |=
10002+
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i])];
10003+
accumulator |=
10004+
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i + 1])];
10005+
accumulator |=
10006+
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i + 2])];
10007+
accumulator |=
10008+
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i + 3])];
10009+
}
10010+
for (; i < length; i++) {
10011+
accumulator |=
10012+
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i])];
10013+
}
10014+
return accumulator;
10015+
}
10016+
997910017
static_assert(unicode::is_forbidden_domain_code_point('%'));
998010018
static_assert(unicode::is_forbidden_domain_code_point('\x7f'));
998110019
static_assert(unicode::is_forbidden_domain_code_point('\0'));
@@ -13473,23 +13511,50 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
1347313511
// to ASCII with domain and false. The most common case is an ASCII input, in
1347413512
// which case we do not need to call the expensive 'to_ascii' if a few
1347513513
// conditions are met: no '%' and no 'xn-' subsequence.
13476-
std::string _buffer = std::string(input);
13477-
// This next function checks that the result is ascii, but we are going to
13478-
// to check anyhow with is_forbidden.
13479-
// bool is_ascii =
13480-
unicode::to_lower_ascii(_buffer.data(), _buffer.size());
13481-
bool is_forbidden = unicode::contains_forbidden_domain_code_point(
13482-
_buffer.data(), _buffer.size());
13483-
if (is_forbidden == 0 && _buffer.find("xn-") == std::string_view::npos) {
13514+
13515+
// Often, the input does not contain any forbidden code points, and no upper
13516+
// case ASCII letter, then we can just copy it to the buffer. We want to
13517+
// optimize for such a common case.
13518+
uint8_t is_forbidden_or_upper =
13519+
unicode::contains_forbidden_domain_code_point_or_upper(input.data(),
13520+
input.size());
13521+
// Minor optimization opportunity:
13522+
// contains_forbidden_domain_code_point_or_upper could be extend to check for
13523+
// the presence of characters that cannot appear in the ipv4 address and we
13524+
// could also check whether x and n and - are present, and so we could skip
13525+
// some of the checks below. However, the gains are likely to be small, and
13526+
// the code would be more complex.
13527+
if (is_forbidden_or_upper == 0 &&
13528+
input.find("xn-") == std::string_view::npos) {
1348413529
// fast path
13485-
update_base_hostname(_buffer);
13530+
update_base_hostname(input);
1348613531
if (checkers::is_ipv4(get_hostname())) {
1348713532
ada_log("parse_host fast path ipv4");
1348813533
return parse_ipv4(get_hostname());
1348913534
}
1349013535
ada_log("parse_host fast path ", get_hostname());
1349113536
return true;
13537+
} else if (is_forbidden_or_upper == 2) {
13538+
// We have encountered at least one upper case ASCII letter, let us
13539+
// try to convert it to lower case. If there is no 'xn-' in the result,
13540+
// we can then use a secondary fast path.
13541+
std::string _buffer = std::string(input);
13542+
unicode::to_lower_ascii(_buffer.data(), _buffer.size());
13543+
if (input.find("xn-") == std::string_view::npos) {
13544+
// secondary fast path when input is not all lower case
13545+
update_base_hostname(input);
13546+
if (checkers::is_ipv4(get_hostname())) {
13547+
ada_log("parse_host fast path ipv4");
13548+
return parse_ipv4(get_hostname());
13549+
}
13550+
ada_log("parse_host fast path ", get_hostname());
13551+
return true;
13552+
}
1349213553
}
13554+
// We have encountered at least one forbidden code point or the input contains
13555+
// 'xn-' (case insensitive), so we need to call 'to_ascii' to perform the full
13556+
// conversion.
13557+
1349313558
ada_log("parse_host calling to_ascii");
1349413559
std::optional<std::string> host = std::string(get_hostname());
1349513560
is_valid = ada::unicode::to_ascii(host, input, input.find('%'));
Collapse file

‎deps/ada/ada.h‎

Copy file name to clipboardExpand all lines: deps/ada/ada.h
+14-5Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2023-03-30 17:00:48 -0400. Do not edit! */
1+
/* auto-generated on 2023-04-17 12:20:41 -0400. Do not edit! */
22
/* begin file include/ada.h */
33
/**
44
* @file ada.h
@@ -1418,11 +1418,20 @@ ada_really_inline constexpr bool is_forbidden_host_code_point(
14181418
const char c) noexcept;
14191419

14201420
/**
1421-
* Checks if the input is a forbidden domain code point.
1421+
* Checks if the input contains a forbidden domain code point.
14221422
* @see https://url.spec.whatwg.org/#forbidden-domain-code-point
14231423
*/
14241424
ada_really_inline constexpr bool contains_forbidden_domain_code_point(
1425-
char* input, size_t length) noexcept;
1425+
const char* input, size_t length) noexcept;
1426+
1427+
/**
1428+
* Checks if the input contains a forbidden domain code point in which case
1429+
* the first bit is set to 1. If the input contains an upper case ASCII letter,
1430+
* then the second bit is set to 1.
1431+
* @see https://url.spec.whatwg.org/#forbidden-domain-code-point
1432+
*/
1433+
ada_really_inline constexpr bool contains_forbidden_domain_code_point_or_upper(
1434+
const char* input, size_t length) noexcept;
14261435

14271436
/**
14281437
* Checks if the input is a forbidden doamin code point.
@@ -6503,13 +6512,13 @@ inline std::ostream &operator<<(std::ostream &out,
65036512
#ifndef ADA_ADA_VERSION_H
65046513
#define ADA_ADA_VERSION_H
65056514

6506-
#define ADA_VERSION "2.0.0"
6515+
#define ADA_VERSION "2.1.0"
65076516

65086517
namespace ada {
65096518

65106519
enum {
65116520
ADA_VERSION_MAJOR = 2,
6512-
ADA_VERSION_MINOR = 0,
6521+
ADA_VERSION_MINOR = 1,
65136522
ADA_VERSION_REVISION = 0,
65146523
};
65156524

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.