Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 64b6ee6

Browse filesBrowse files
committed
U32 backed PyStr for non-unicode data
1 parent e4ce0d5 commit 64b6ee6
Copy full SHA for 64b6ee6

File tree

1 file changed

+22
-0
lines changed
Filter options

1 file changed

+22
-0
lines changed

‎vm/src/builtins/pystr.rs

Copy file name to clipboardExpand all lines: vm/src/builtins/pystr.rs
+22Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ use unicode_casing::CharExt;
4040
pub(crate) enum PyStrKind {
4141
Ascii,
4242
Utf8,
43+
U32,
4344
}
4445

4546
impl std::ops::BitOr for PyStrKind {
@@ -57,6 +58,7 @@ impl PyStrKind {
5758
match self {
5859
PyStrKind::Ascii => PyStrKindData::Ascii,
5960
PyStrKind::Utf8 => PyStrKindData::Utf8(Radium::new(usize::MAX)),
61+
PyStrKind::U32 => PyStrKindData::U32,
6062
}
6163
}
6264
}
@@ -66,13 +68,15 @@ enum PyStrKindData {
6668
Ascii,
6769
// uses usize::MAX as a sentinel for "uncomputed"
6870
Utf8(PyAtomic<usize>),
71+
U32,
6972
}
7073

7174
impl PyStrKindData {
7275
fn kind(&self) -> PyStrKind {
7376
match self {
7477
PyStrKindData::Ascii => PyStrKind::Ascii,
7578
PyStrKindData::Utf8(_) => PyStrKind::Utf8,
79+
PyStrKindData::U32 => PyStrKind::U32,
7680
}
7781
}
7882
}
@@ -361,13 +365,24 @@ impl PyStr {
361365
}
362366
}
363367

368+
#[allow(dead_code)]
369+
fn as_u32_slice(&self) -> &[u32] {
370+
assert_eq!(self.kind.kind(), PyStrKind::U32);
371+
assert_eq!(self.bytes.len() % 4, 0);
372+
let (prefix, reslice, suffix) = unsafe { self.bytes.as_slice().align_to::<u32>() };
373+
assert_eq!(prefix.len(), 0);
374+
assert_eq!(suffix.len(), 0);
375+
reslice
376+
}
377+
364378
fn char_all<F>(&self, test: F) -> bool
365379
where
366380
F: Fn(char) -> bool,
367381
{
368382
match self.kind.kind() {
369383
PyStrKind::Ascii => self.bytes.iter().all(|&x| test(char::from(x))),
370384
PyStrKind::Utf8 => self.as_str().chars().all(test),
385+
PyStrKind::U32 => unimplemented!(),
371386
}
372387
}
373388
}
@@ -466,6 +481,7 @@ impl PyStr {
466481
usize::MAX => self._compute_char_len(),
467482
len => len,
468483
},
484+
PyStrKindData::U32 => self.bytes.len() / 4,
469485
}
470486
}
471487
#[cold]
@@ -490,6 +506,7 @@ impl PyStr {
490506
match self.kind {
491507
PyStrKindData::Ascii => true,
492508
PyStrKindData::Utf8(_) => false,
509+
PyStrKindData::U32 => false,
493510
}
494511
}
495512

@@ -531,6 +548,7 @@ impl PyStr {
531548
match self.kind.kind() {
532549
PyStrKind::Ascii => self.as_str().to_ascii_lowercase(),
533550
PyStrKind::Utf8 => self.as_str().to_lowercase(),
551+
PyStrKind::U32 => unimplemented!(),
534552
}
535553
}
536554

@@ -545,6 +563,7 @@ impl PyStr {
545563
match self.kind.kind() {
546564
PyStrKind::Ascii => self.as_str().to_ascii_uppercase(),
547565
PyStrKind::Utf8 => self.as_str().to_uppercase(),
566+
PyStrKind::U32 => unimplemented!(),
548567
}
549568
}
550569

@@ -597,6 +616,7 @@ impl PyStr {
597616
|v, s, n, vm| v.splitn(n, s).map(|s| vm.ctx.new_str(s).into()).collect(),
598617
|v, n, vm| v.py_split_whitespace(n, |s| vm.ctx.new_str(s).into()),
599618
),
619+
PyStrKind::U32 => unimplemented!(),
600620
}?;
601621
Ok(elements)
602622
}
@@ -880,6 +900,7 @@ impl PyStr {
880900
PyStrKind::Utf8 => self
881901
.as_str()
882902
.py_iscase(char::is_lowercase, char::is_uppercase),
903+
PyStrKind::U32 => unimplemented!(),
883904
}
884905
}
885906

@@ -891,6 +912,7 @@ impl PyStr {
891912
PyStrKind::Utf8 => self
892913
.as_str()
893914
.py_iscase(char::is_uppercase, char::is_lowercase),
915+
PyStrKind::U32 => unimplemented!(),
894916
}
895917
}
896918

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.