Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e27db2e

Browse filesBrowse files
committed
Use bstr for wtf8 pattern methods
1 parent d175479 commit e27db2e
Copy full SHA for e27db2e

File tree

3 files changed

+22
-34
lines changed
Filter options

3 files changed

+22
-34
lines changed

‎Cargo.lock

Copy file name to clipboardExpand all lines: Cargo.lock
+1Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎common/Cargo.toml

Copy file name to clipboardExpand all lines: common/Cargo.toml
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ rustpython-format = { workspace = true }
1616

1717
ascii = { workspace = true }
1818
bitflags = { workspace = true }
19+
bstr = { workspace = true }
1920
cfg-if = { workspace = true }
2021
itertools = { workspace = true }
2122
libc = { workspace = true }

‎common/src/wtf8/mod.rs

Copy file name to clipboardExpand all lines: common/src/wtf8/mod.rs
+20-34Lines changed: 20 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ use std::collections::TryReserveError;
4949
use std::string::String;
5050
use std::vec::Vec;
5151

52+
use bstr::ByteSlice;
53+
5254
mod core_char;
5355
mod core_str;
5456

@@ -864,49 +866,27 @@ impl Wtf8 {
864866
}
865867

866868
pub fn split(&self, pat: &Wtf8) -> impl Iterator<Item = &Self> {
867-
self.splitn(usize::MAX, pat)
869+
self.as_bytes()
870+
.split_str(pat)
871+
.map(|w| unsafe { Wtf8::from_bytes_unchecked(w) })
868872
}
869873

870874
pub fn splitn(&self, n: usize, pat: &Wtf8) -> impl Iterator<Item = &Self> {
871-
let (haystack, needle) = (self.as_bytes(), pat.as_bytes());
872-
if n == 0 {
873-
return None.into_iter().flatten();
874-
}
875-
let mut prev_idx = Some(0);
876-
let mut iter = memchr::memmem::find_iter(haystack, needle).take(n - 1);
877-
Some(std::iter::from_fn(move || {
878-
prev_idx.map(|prev| {
879-
let idx = iter.next();
880-
let chunk = &haystack[prev..idx.unwrap_or(haystack.len())];
881-
prev_idx = idx.map(|i| i + needle.len());
882-
unsafe { Wtf8::from_bytes_unchecked(chunk) }
883-
})
884-
}))
885-
.into_iter()
886-
.flatten()
875+
self.as_bytes()
876+
.splitn_str(n, pat)
877+
.map(|w| unsafe { Wtf8::from_bytes_unchecked(w) })
887878
}
888879

889880
pub fn rsplit(&self, pat: &Wtf8) -> impl Iterator<Item = &Self> {
890-
self.rsplitn(usize::MAX, pat)
881+
self.as_bytes()
882+
.rsplit_str(pat)
883+
.map(|w| unsafe { Wtf8::from_bytes_unchecked(w) })
891884
}
892885

893886
pub fn rsplitn(&self, n: usize, pat: &Wtf8) -> impl Iterator<Item = &Self> {
894-
let (haystack, needle) = (self.as_bytes(), pat.as_bytes());
895-
if n == 0 {
896-
return None.into_iter().flatten();
897-
}
898-
let mut prev_idx = Some(haystack.len());
899-
let mut iter = memchr::memmem::rfind_iter(haystack, needle).take(n - 1);
900-
Some(std::iter::from_fn(move || {
901-
prev_idx.map(|prev| {
902-
let idx = iter.next();
903-
let chunk = &haystack[idx.map_or(0, |i| i + needle.len())..prev];
904-
prev_idx = idx;
905-
unsafe { Wtf8::from_bytes_unchecked(chunk) }
906-
})
907-
}))
908-
.into_iter()
909-
.flatten()
887+
self.as_bytes()
888+
.rsplitn_str(n, pat)
889+
.map(|w| unsafe { Wtf8::from_bytes_unchecked(w) })
910890
}
911891

912892
pub fn trim_start_matches(&self, f: impl Fn(CodePoint) -> bool) -> &Self {
@@ -980,6 +960,12 @@ impl AsRef<Wtf8> for str {
980960
}
981961
}
982962

963+
impl AsRef<[u8]> for Wtf8 {
964+
fn as_ref(&self) -> &[u8] {
965+
self.as_bytes()
966+
}
967+
}
968+
983969
/// Returns a slice of the given string for the byte range \[`begin`..`end`).
984970
///
985971
/// # Panics

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.