Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 9239e7e

Browse filesBrowse files
committed
data: update to UCD 16
1 parent 7691e49 commit 9239e7e
Copy full SHA for 9239e7e

18 files changed

+2149
-625
lines changed

‎regex-automata/src/nfa/thompson/compiler.rs

Copy file name to clipboardExpand all lines: regex-automata/src/nfa/thompson/compiler.rs
+4-4Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -230,15 +230,15 @@ impl Config {
230230
/// # if cfg!(miri) { return Ok(()); } // miri takes too long
231231
/// use regex_automata::nfa::thompson::NFA;
232232
///
233-
/// // 300KB isn't enough!
233+
/// // 400KB isn't enough!
234234
/// NFA::compiler()
235-
/// .configure(NFA::config().nfa_size_limit(Some(300_000)))
235+
/// .configure(NFA::config().nfa_size_limit(Some(400_000)))
236236
/// .build(r"\w{20}")
237237
/// .unwrap_err();
238238
///
239-
/// // ... but 400KB probably is.
239+
/// // ... but 500KB probably is.
240240
/// let nfa = NFA::compiler()
241-
/// .configure(NFA::config().nfa_size_limit(Some(400_000)))
241+
/// .configure(NFA::config().nfa_size_limit(Some(500_000)))
242242
/// .build(r"\w{20}")?;
243243
///
244244
/// assert_eq!(nfa.pattern_len(), 1);

‎regex-automata/src/util/unicode_data/perl_word.rs

Copy file name to clipboardExpand all lines: regex-automata/src/util/unicode_data/perl_word.rs
+45-20Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
22
//
3-
// ucd-generate perl-word tmp/ucd-15.0.0/ --chars
3+
// ucd-generate perl-word ucd-16.0.0 --chars
44
//
5-
// Unicode version: 15.0.0.
5+
// Unicode version: 16.0.0.
66
//
7-
// ucd-generate 0.2.15 is available on crates.io.
7+
// ucd-generate 0.3.1 is available on crates.io.
88

99
pub const PERL_WORD: &'static [(char, char)] = &[
1010
('0', '9'),
@@ -59,7 +59,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
5959
('ࡠ', 'ࡪ'),
6060
('ࡰ', 'ࢇ'),
6161
('ࢉ', 'ࢎ'),
62-
('\u{898}', '\u{8e1}'),
62+
('\u{897}', '\u{8e1}'),
6363
('\u{8e3}', '\u{963}'),
6464
('०', '९'),
6565
('ॱ', 'ঃ'),
@@ -158,8 +158,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
158158
('ಪ', 'ಳ'),
159159
('ವ', 'ಹ'),
160160
('\u{cbc}', 'ೄ'),
161-
('\u{cc6}', ''),
162-
('', '\u{ccd}'),
161+
('\u{cc6}', '\u{cc8}'),
162+
('\u{cca}', '\u{ccd}'),
163163
('\u{cd5}', '\u{cd6}'),
164164
('ೝ', 'ೞ'),
165165
('ೠ', '\u{ce3}'),
@@ -243,8 +243,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
243243
('ᚁ', 'ᚚ'),
244244
('ᚠ', 'ᛪ'),
245245
('ᛮ', 'ᛸ'),
246-
('ᜀ', ''),
247-
('ᜟ', ''),
246+
('ᜀ', '\u{1715}'),
247+
('ᜟ', '\u{1734}'),
248248
('ᝀ', '\u{1753}'),
249249
('ᝠ', 'ᝬ'),
250250
('ᝮ', 'ᝰ'),
@@ -276,11 +276,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[
276276
('\u{1b00}', 'ᭌ'),
277277
('᭐', '᭙'),
278278
('\u{1b6b}', '\u{1b73}'),
279-
('\u{1b80}', ''),
279+
('\u{1b80}', '\u{1bf3}'),
280280
('ᰀ', '\u{1c37}'),
281281
('᱀', '᱉'),
282282
('ᱍ', 'ᱽ'),
283-
('ᲀ', ''),
283+
('ᲀ', ''),
284284
('Ა', 'Ჺ'),
285285
('Ჽ', 'Ჿ'),
286286
('\u{1cd0}', '\u{1cd2}'),
@@ -367,10 +367,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[
367367
('ꙿ', '\u{a6f1}'),
368368
('ꜗ', 'ꜟ'),
369369
('Ꜣ', 'ꞈ'),
370-
('Ꞌ', ''),
370+
('Ꞌ', ''),
371371
('Ꟑ', 'ꟑ'),
372372
('ꟓ', 'ꟓ'),
373-
('ꟕ', ''),
373+
('ꟕ', ''),
374374
('ꟲ', 'ꠧ'),
375375
('\u{a82c}', '\u{a82c}'),
376376
('ꡀ', 'ꡳ'),
@@ -379,9 +379,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[
379379
('\u{a8e0}', 'ꣷ'),
380380
('ꣻ', 'ꣻ'),
381381
('ꣽ', '\u{a92d}'),
382-
('ꤰ', ''),
382+
('ꤰ', '\u{a953}'),
383383
('ꥠ', 'ꥼ'),
384-
('\u{a980}', ''),
384+
('\u{a980}', '\u{a9c0}'),
385385
('ꧏ', '꧙'),
386386
('ꧠ', 'ꧾ'),
387387
('ꨀ', '\u{aa36}'),
@@ -468,6 +468,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
468468
('𐖣', '𐖱'),
469469
('𐖳', '𐖹'),
470470
('𐖻', '𐖼'),
471+
('𐗀', '𐗳'),
471472
('𐘀', '𐜶'),
472473
('𐝀', '𐝕'),
473474
('𐝠', '𐝧'),
@@ -508,10 +509,14 @@ pub const PERL_WORD: &'static [(char, char)] = &[
508509
('𐳀', '𐳲'),
509510
('𐴀', '\u{10d27}'),
510511
('𐴰', '𐴹'),
512+
('𐵀', '𐵥'),
513+
('\u{10d69}', '\u{10d6d}'),
514+
('𐵯', '𐶅'),
511515
('𐺀', '𐺩'),
512516
('\u{10eab}', '\u{10eac}'),
513517
('𐺰', '𐺱'),
514-
('\u{10efd}', '𐼜'),
518+
('𐻂', '𐻄'),
519+
('\u{10efc}', '𐼜'),
515520
('𐼧', '𐼧'),
516521
('𐼰', '\u{10f50}'),
517522
('𐽰', '\u{10f85}'),
@@ -551,12 +556,22 @@ pub const PERL_WORD: &'static [(char, char)] = &[
551556
('𑌵', '𑌹'),
552557
('\u{1133b}', '𑍄'),
553558
('𑍇', '𑍈'),
554-
('𑍋', '𑍍'),
559+
('𑍋', '\u{1134d}'),
555560
('𑍐', '𑍐'),
556561
('\u{11357}', '\u{11357}'),
557562
('𑍝', '𑍣'),
558563
('\u{11366}', '\u{1136c}'),
559564
('\u{11370}', '\u{11374}'),
565+
('𑎀', '𑎉'),
566+
('𑎋', '𑎋'),
567+
('𑎎', '𑎎'),
568+
('𑎐', '𑎵'),
569+
('𑎷', '\u{113c0}'),
570+
('\u{113c2}', '\u{113c2}'),
571+
('\u{113c5}', '\u{113c5}'),
572+
('\u{113c7}', '𑏊'),
573+
('𑏌', '𑏓'),
574+
('\u{113e1}', '\u{113e2}'),
560575
('𑐀', '𑑊'),
561576
('𑑐', '𑑙'),
562577
('\u{1145e}', '𑑡'),
@@ -571,6 +586,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
571586
('𑙐', '𑙙'),
572587
('𑚀', '𑚸'),
573588
('𑛀', '𑛉'),
589+
('𑛐', '𑛣'),
574590
('𑜀', '𑜚'),
575591
('\u{1171d}', '\u{1172b}'),
576592
('𑜰', '𑜹'),
@@ -594,6 +610,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
594610
('𑩐', '\u{11a99}'),
595611
('𑪝', '𑪝'),
596612
('𑪰', '𑫸'),
613+
('𑯀', '𑯠'),
614+
('𑯰', '𑯹'),
597615
('𑰀', '𑰈'),
598616
('𑰊', '\u{11c36}'),
599617
('\u{11c38}', '𑱀'),
@@ -618,15 +636,17 @@ pub const PERL_WORD: &'static [(char, char)] = &[
618636
('\u{11f00}', '𑼐'),
619637
('𑼒', '\u{11f3a}'),
620638
('𑼾', '\u{11f42}'),
621-
('𑽐', '𑽙'),
639+
('𑽐', '\u{11f5a}'),
622640
('𑾰', '𑾰'),
623641
('𒀀', '𒎙'),
624642
('𒐀', '𒑮'),
625643
('𒒀', '𒕃'),
626644
('𒾐', '𒿰'),
627645
('𓀀', '𓐯'),
628646
('\u{13440}', '\u{13455}'),
647+
('𓑠', '𔏺'),
629648
('𔐀', '𔙆'),
649+
('𖄀', '𖄹'),
630650
('𖠀', '𖨸'),
631651
('𖩀', '𖩞'),
632652
('𖩠', '𖩩'),
@@ -639,16 +659,18 @@ pub const PERL_WORD: &'static [(char, char)] = &[
639659
('𖭐', '𖭙'),
640660
('𖭣', '𖭷'),
641661
('𖭽', '𖮏'),
662+
('𖵀', '𖵬'),
663+
('𖵰', '𖵹'),
642664
('𖹀', '𖹿'),
643665
('𖼀', '𖽊'),
644666
('\u{16f4f}', '𖾇'),
645667
('\u{16f8f}', '𖾟'),
646668
('𖿠', '𖿡'),
647669
('𖿣', '\u{16fe4}'),
648-
('𖿰', '𖿱'),
670+
('\u{16ff0}', '\u{16ff1}'),
649671
('𗀀', '𘟷'),
650672
('𘠀', '𘳕'),
651-
('𘴀', '𘴈'),
673+
('𘳿', '𘴈'),
652674
('𚿰', '𚿳'),
653675
('𚿵', '𚿻'),
654676
('𚿽', '𚿾'),
@@ -663,10 +685,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[
663685
('𛲀', '𛲈'),
664686
('𛲐', '𛲙'),
665687
('\u{1bc9d}', '\u{1bc9e}'),
688+
('𜳰', '𜳹'),
666689
('\u{1cf00}', '\u{1cf2d}'),
667690
('\u{1cf30}', '\u{1cf46}'),
668691
('\u{1d165}', '\u{1d169}'),
669-
('𝅭', '\u{1d172}'),
692+
('\u{1d16d}', '\u{1d172}'),
670693
('\u{1d17b}', '\u{1d182}'),
671694
('\u{1d185}', '\u{1d18b}'),
672695
('\u{1d1aa}', '\u{1d1ad}'),
@@ -724,6 +747,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
724747
('𞊐', '\u{1e2ae}'),
725748
('𞋀', '𞋹'),
726749
('𞓐', '𞓹'),
750+
('𞗐', '𞗺'),
727751
('𞟠', '𞟦'),
728752
('𞟨', '𞟫'),
729753
('𞟭', '𞟮'),
@@ -774,6 +798,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
774798
('𫝀', '𫠝'),
775799
('𫠠', '𬺡'),
776800
('𬺰', '𮯠'),
801+
('𮯰', '𮹝'),
777802
('丽', '𪘀'),
778803
('𰀀', '𱍊'),
779804
('𱍐', '𲎯'),

‎regex-syntax/src/hir/translate.rs

Copy file name to clipboardExpand all lines: regex-syntax/src/hir/translate.rs
+21Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3143,10 +3143,31 @@ mod tests {
31433143
#[cfg(feature = "unicode-script")]
31443144
assert_eq!(
31453145
t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
3146+
// Class({
3147+
// '·'..='·',
3148+
// '\u{300}'..='\u{301}',
3149+
// '\u{304}'..='\u{304}',
3150+
// '\u{306}'..='\u{306}',
3151+
// '\u{308}'..='\u{308}',
3152+
// '\u{313}'..='\u{313}',
3153+
// '\u{342}'..='\u{342}',
3154+
// '\u{345}'..='\u{345}',
3155+
// 'ʹ'..='ʹ',
3156+
// '\u{1dc0}'..='\u{1dc1}',
3157+
// '⁝'..='⁝',
3158+
// })
31463159
hir_uclass(&[
3160+
('·', '·'),
3161+
('\u{0300}', '\u{0301}'),
3162+
('\u{0304}', '\u{0304}'),
3163+
('\u{0306}', '\u{0306}'),
3164+
('\u{0308}', '\u{0308}'),
3165+
('\u{0313}', '\u{0313}'),
31473166
('\u{0342}', '\u{0342}'),
31483167
('\u{0345}', '\u{0345}'),
3168+
('ʹ', 'ʹ'),
31493169
('\u{1DC0}', '\u{1DC1}'),
3170+
('⁝', '⁝'),
31503171
])
31513172
);
31523173
assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));

‎regex-syntax/src/unicode.rs

Copy file name to clipboardExpand all lines: regex-syntax/src/unicode.rs
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,8 @@ fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>, Error> {
675675
("V13_0", age::V13_0),
676676
("V14_0", age::V14_0),
677677
("V15_0", age::V15_0),
678+
("V15_1", age::V15_1),
679+
("V16_0", age::V16_0),
678680
];
679681
assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");
680682

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.