Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 4c20cb1

Browse filesBrowse files
committed
U32 backed PyStr for non-unicode data
1 parent def4a2b commit 4c20cb1
Copy full SHA for 4c20cb1

File tree

1 file changed

+21
-0
lines changed
Filter options

1 file changed

+21
-0
lines changed

‎vm/src/builtins/pystr.rs

Copy file name to clipboardExpand all lines: vm/src/builtins/pystr.rs
+21Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ use unicode_casing::CharExt;
4141
pub(crate) enum PyStrKind {
4242
Ascii,
4343
Utf8,
44+
U32,
4445
}
4546

4647
impl std::ops::BitOr for PyStrKind {
@@ -58,6 +59,7 @@ impl PyStrKind {
5859
match self {
5960
PyStrKind::Ascii => PyStrKindData::Ascii,
6061
PyStrKind::Utf8 => PyStrKindData::Utf8(Radium::new(usize::MAX)),
62+
PyStrKind::U32 => PyStrKindData::U32,
6163
}
6264
}
6365
}
@@ -67,13 +69,15 @@ enum PyStrKindData {
6769
Ascii,
6870
// uses usize::MAX as a sentinel for "uncomputed"
6971
Utf8(PyAtomic<usize>),
72+
U32,
7073
}
7174

7275
impl PyStrKindData {
7376
fn kind(&self) -> PyStrKind {
7477
match self {
7578
PyStrKindData::Ascii => PyStrKind::Ascii,
7679
PyStrKindData::Utf8(_) => PyStrKind::Utf8,
80+
PyStrKindData::U32 => PyStrKind::U32,
7781
}
7882
}
7983
}
@@ -361,13 +365,23 @@ impl PyStr {
361365
}
362366
}
363367

368+
fn as_u32_slice(&self) -> &[u32] {
369+
assert_eq!(self.kind.kind(), PyStrKind::U32);
370+
assert_eq!(self.bytes.len() % 4, 0);
371+
let (prefix, reslice, suffix) = unsafe { self.bytes.as_slice().align_to::<u32>() };
372+
assert_eq!(prefix.len(), 0);
373+
assert_eq!(suffix.len(), 0);
374+
reslice
375+
}
376+
364377
fn char_all<F>(&self, test: F) -> bool
365378
where
366379
F: Fn(char) -> bool,
367380
{
368381
match self.kind.kind() {
369382
PyStrKind::Ascii => self.bytes.iter().all(|&x| test(char::from(x))),
370383
PyStrKind::Utf8 => self.as_str().chars().all(test),
384+
PyStrKind::U32 => unimplemented!(),
371385
}
372386
}
373387
}
@@ -466,6 +480,7 @@ impl PyStr {
466480
usize::MAX => self._compute_char_len(),
467481
len => len,
468482
},
483+
PyStrKindData::U32 => self.bytes.len() / 4,
469484
}
470485
}
471486
#[cold]
@@ -490,6 +505,7 @@ impl PyStr {
490505
match self.kind {
491506
PyStrKindData::Ascii => true,
492507
PyStrKindData::Utf8(_) => false,
508+
PyStrKindData::U32 => false,
493509
}
494510
}
495511

@@ -531,6 +547,7 @@ impl PyStr {
531547
match self.kind.kind() {
532548
PyStrKind::Ascii => self.as_str().to_ascii_lowercase(),
533549
PyStrKind::Utf8 => self.as_str().to_lowercase(),
550+
PyStrKind::U32 => unimplemented!(),
534551
}
535552
}
536553

@@ -545,6 +562,7 @@ impl PyStr {
545562
match self.kind.kind() {
546563
PyStrKind::Ascii => self.as_str().to_ascii_uppercase(),
547564
PyStrKind::Utf8 => self.as_str().to_uppercase(),
565+
PyStrKind::U32 => unimplemented!(),
548566
}
549567
}
550568

@@ -597,6 +615,7 @@ impl PyStr {
597615
|v, s, n, vm| v.splitn(n, s).map(|s| vm.ctx.new_str(s).into()).collect(),
598616
|v, n, vm| v.py_split_whitespace(n, |s| vm.ctx.new_str(s).into()),
599617
),
618+
PyStrKind::U32 => unimplemented!(),
600619
}?;
601620
Ok(elements)
602621
}
@@ -880,6 +899,7 @@ impl PyStr {
880899
PyStrKind::Utf8 => self
881900
.as_str()
882901
.py_iscase(char::is_lowercase, char::is_uppercase),
902+
PyStrKind::U32 => unimplemented!(),
883903
}
884904
}
885905

@@ -891,6 +911,7 @@ impl PyStr {
891911
PyStrKind::Utf8 => self
892912
.as_str()
893913
.py_iscase(char::is_uppercase, char::is_lowercase),
914+
PyStrKind::U32 => unimplemented!(),
894915
}
895916
}
896917

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.