Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit cfd1e78

Browse filesBrowse files
committed
U32 backed PyStr for non-unicode data
1 parent c073a61 commit cfd1e78
Copy full SHA for cfd1e78

File tree

1 file changed

+22
-0
lines changed
Filter options

1 file changed

+22
-0
lines changed

‎vm/src/builtins/str.rs

Copy file name to clipboardExpand all lines: vm/src/builtins/str.rs
+22Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ use unicode_casing::CharExt;
4141
pub(crate) enum PyStrKind {
4242
Ascii,
4343
Utf8,
44+
U32,
4445
}
4546

4647
impl std::ops::BitOr for PyStrKind {
@@ -58,6 +59,7 @@ impl PyStrKind {
5859
match self {
5960
PyStrKind::Ascii => PyStrKindData::Ascii,
6061
PyStrKind::Utf8 => PyStrKindData::Utf8(Radium::new(usize::MAX)),
62+
PyStrKind::U32 => PyStrKindData::U32,
6163
}
6264
}
6365
}
@@ -67,13 +69,15 @@ enum PyStrKindData {
6769
Ascii,
6870
// uses usize::MAX as a sentinel for "uncomputed"
6971
Utf8(PyAtomic<usize>),
72+
U32,
7073
}
7174

7275
impl PyStrKindData {
7376
fn kind(&self) -> PyStrKind {
7477
match self {
7578
PyStrKindData::Ascii => PyStrKind::Ascii,
7679
PyStrKindData::Utf8(_) => PyStrKind::Utf8,
80+
PyStrKindData::U32 => PyStrKind::U32,
7781
}
7882
}
7983
}
@@ -383,13 +387,24 @@ impl PyStr {
383387
}
384388
}
385389

390+
#[allow(dead_code)]
391+
fn as_u32_slice(&self) -> &[u32] {
392+
assert_eq!(self.kind.kind(), PyStrKind::U32);
393+
assert_eq!(self.bytes.len() % 4, 0);
394+
let (prefix, reslice, suffix) = unsafe { self.bytes.align_to::<u32>() };
395+
assert_eq!(prefix.len(), 0);
396+
assert_eq!(suffix.len(), 0);
397+
reslice
398+
}
399+
386400
fn char_all<F>(&self, test: F) -> bool
387401
where
388402
F: Fn(char) -> bool,
389403
{
390404
match self.kind.kind() {
391405
PyStrKind::Ascii => self.bytes.iter().all(|&x| test(char::from(x))),
392406
PyStrKind::Utf8 => self.as_str().chars().all(test),
407+
PyStrKind::U32 => unimplemented!(),
393408
}
394409
}
395410
}
@@ -488,6 +503,7 @@ impl PyStr {
488503
usize::MAX => self._compute_char_len(),
489504
len => len,
490505
},
506+
PyStrKindData::U32 => self.bytes.len() / 4,
491507
}
492508
}
493509
#[cold]
@@ -512,6 +528,7 @@ impl PyStr {
512528
match self.kind {
513529
PyStrKindData::Ascii => true,
514530
PyStrKindData::Utf8(_) => false,
531+
PyStrKindData::U32 => false,
515532
}
516533
}
517534

@@ -558,6 +575,7 @@ impl PyStr {
558575
match self.kind.kind() {
559576
PyStrKind::Ascii => self.as_str().to_ascii_lowercase(),
560577
PyStrKind::Utf8 => self.as_str().to_lowercase(),
578+
PyStrKind::U32 => unimplemented!(),
561579
}
562580
}
563581

@@ -572,6 +590,7 @@ impl PyStr {
572590
match self.kind.kind() {
573591
PyStrKind::Ascii => self.as_str().to_ascii_uppercase(),
574592
PyStrKind::Utf8 => self.as_str().to_uppercase(),
593+
PyStrKind::U32 => unimplemented!(),
575594
}
576595
}
577596

@@ -624,6 +643,7 @@ impl PyStr {
624643
|v, s, n, vm| v.splitn(n, s).map(|s| vm.ctx.new_str(s).into()).collect(),
625644
|v, n, vm| v.py_split_whitespace(n, |s| vm.ctx.new_str(s).into()),
626645
),
646+
PyStrKind::U32 => unimplemented!(),
627647
}?;
628648
Ok(elements)
629649
}
@@ -907,6 +927,7 @@ impl PyStr {
907927
PyStrKind::Utf8 => self
908928
.as_str()
909929
.py_iscase(char::is_lowercase, char::is_uppercase),
930+
PyStrKind::U32 => unimplemented!(),
910931
}
911932
}
912933

@@ -918,6 +939,7 @@ impl PyStr {
918939
PyStrKind::Utf8 => self
919940
.as_str()
920941
.py_iscase(char::is_uppercase, char::is_lowercase),
942+
PyStrKind::U32 => unimplemented!(),
921943
}
922944
}
923945

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.