rustpython_vm/builtins/
str.rs

1use super::{
2    int::{PyInt, PyIntRef},
3    iter::IterStatus::{self, Exhausted},
4    PositionIterInternal, PyBytesRef, PyDict, PyTupleRef, PyType, PyTypeRef,
5};
6use crate::{
7    anystr::{self, adjust_indices, AnyStr, AnyStrContainer, AnyStrWrapper},
8    atomic_func,
9    class::PyClassImpl,
10    common::str::{BorrowedStr, PyStrKind, PyStrKindData},
11    convert::{IntoPyException, ToPyException, ToPyObject, ToPyResult},
12    format::{format, format_map},
13    function::{ArgIterable, ArgSize, FuncArgs, OptionalArg, OptionalOption, PyComparisonValue},
14    intern::PyInterned,
15    object::{Traverse, TraverseFn},
16    protocol::{PyIterReturn, PyMappingMethods, PyNumberMethods, PySequenceMethods},
17    sequence::SequenceExt,
18    sliceable::{SequenceIndex, SliceableSequenceOp},
19    types::{
20        AsMapping, AsNumber, AsSequence, Comparable, Constructor, Hashable, IterNext, Iterable,
21        PyComparisonOp, Representable, SelfIter, Unconstructible,
22    },
23    AsObject, Context, Py, PyExact, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, PyResult,
24    TryFromBorrowedObject, VirtualMachine,
25};
26use ascii::{AsciiStr, AsciiString};
27use bstr::ByteSlice;
28use itertools::Itertools;
29use num_traits::ToPrimitive;
30use once_cell::sync::Lazy;
31use rustpython_common::{
32    ascii,
33    atomic::{self, PyAtomic, Radium},
34    hash,
35    lock::PyMutex,
36};
37use rustpython_format::{FormatSpec, FormatString, FromTemplate};
38use std::{char, fmt, ops::Range, string::ToString};
39use unic_ucd_bidi::BidiClass;
40use unic_ucd_category::GeneralCategory;
41use unic_ucd_ident::{is_xid_continue, is_xid_start};
42use unicode_casing::CharExt;
43
44impl<'a> TryFromBorrowedObject<'a> for String {
45    fn try_from_borrowed_object(vm: &VirtualMachine, obj: &'a PyObject) -> PyResult<Self> {
46        obj.try_value_with(|pystr: &PyStr| Ok(pystr.as_str().to_owned()), vm)
47    }
48}
49
50impl<'a> TryFromBorrowedObject<'a> for &'a str {
51    fn try_from_borrowed_object(vm: &VirtualMachine, obj: &'a PyObject) -> PyResult<Self> {
52        let pystr: &Py<PyStr> = TryFromBorrowedObject::try_from_borrowed_object(vm, obj)?;
53        Ok(pystr.as_str())
54    }
55}
56
57#[pyclass(module = false, name = "str")]
58pub struct PyStr {
59    bytes: Box<[u8]>,
60    kind: PyStrKindData,
61    hash: PyAtomic<hash::PyHash>,
62}
63
64impl fmt::Debug for PyStr {
65    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
66        f.debug_struct("PyStr")
67            .field("value", &self.as_str())
68            .field("kind", &self.kind)
69            .field("hash", &self.hash)
70            .finish()
71    }
72}
73
74impl AsRef<str> for PyStr {
75    fn as_ref(&self) -> &str {
76        self.as_str()
77    }
78}
79
80impl AsRef<str> for Py<PyStr> {
81    fn as_ref(&self) -> &str {
82        self.as_str()
83    }
84}
85
86impl AsRef<str> for PyStrRef {
87    fn as_ref(&self) -> &str {
88        self.as_str()
89    }
90}
91
92impl<'a> From<&'a AsciiStr> for PyStr {
93    fn from(s: &'a AsciiStr) -> Self {
94        s.to_owned().into()
95    }
96}
97
98impl From<AsciiString> for PyStr {
99    fn from(s: AsciiString) -> Self {
100        unsafe { Self::new_ascii_unchecked(s.into()) }
101    }
102}
103
104impl<'a> From<&'a str> for PyStr {
105    fn from(s: &'a str) -> Self {
106        s.to_owned().into()
107    }
108}
109
110impl From<String> for PyStr {
111    fn from(s: String) -> Self {
112        s.into_boxed_str().into()
113    }
114}
115
116impl<'a> From<std::borrow::Cow<'a, str>> for PyStr {
117    fn from(s: std::borrow::Cow<'a, str>) -> Self {
118        s.into_owned().into()
119    }
120}
121
122impl From<Box<str>> for PyStr {
123    #[inline]
124    fn from(value: Box<str>) -> Self {
125        // doing the check is ~10x faster for ascii, and is actually only 2% slower worst case for
126        // non-ascii; see https://github.com/RustPython/RustPython/pull/2586#issuecomment-844611532
127        let is_ascii = value.is_ascii();
128        let bytes = value.into_boxed_bytes();
129        let kind = if is_ascii {
130            PyStrKind::Ascii
131        } else {
132            PyStrKind::Utf8
133        }
134        .new_data();
135        Self {
136            bytes,
137            kind,
138            hash: Radium::new(hash::SENTINEL),
139        }
140    }
141}
142
143pub type PyStrRef = PyRef<PyStr>;
144
145impl fmt::Display for PyStr {
146    #[inline]
147    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148        fmt::Display::fmt(self.as_str(), f)
149    }
150}
151
152pub trait AsPyStr<'a>
153where
154    Self: 'a,
155{
156    #[allow(clippy::wrong_self_convention)] // to implement on refs
157    fn as_pystr(self, ctx: &Context) -> &'a Py<PyStr>;
158}
159
160impl<'a> AsPyStr<'a> for &'a Py<PyStr> {
161    #[inline]
162    fn as_pystr(self, _ctx: &Context) -> &'a Py<PyStr> {
163        self
164    }
165}
166
167impl<'a> AsPyStr<'a> for &'a PyStrRef {
168    #[inline]
169    fn as_pystr(self, _ctx: &Context) -> &'a Py<PyStr> {
170        self
171    }
172}
173
174impl AsPyStr<'static> for &'static str {
175    #[inline]
176    fn as_pystr(self, ctx: &Context) -> &'static Py<PyStr> {
177        ctx.intern_str(self)
178    }
179}
180
181impl<'a> AsPyStr<'a> for &'a PyStrInterned {
182    #[inline]
183    fn as_pystr(self, _ctx: &Context) -> &'a Py<PyStr> {
184        self
185    }
186}
187
188#[pyclass(module = false, name = "str_iterator", traverse = "manual")]
189#[derive(Debug)]
190pub struct PyStrIterator {
191    internal: PyMutex<(PositionIterInternal<PyStrRef>, usize)>,
192}
193
194unsafe impl Traverse for PyStrIterator {
195    fn traverse(&self, tracer: &mut TraverseFn) {
196        // No need to worry about deadlock, for inner is a PyStr and can't make ref cycle
197        self.internal.lock().0.traverse(tracer);
198    }
199}
200
201impl PyPayload for PyStrIterator {
202    fn class(ctx: &Context) -> &'static Py<PyType> {
203        ctx.types.str_iterator_type
204    }
205}
206
207#[pyclass(with(Unconstructible, IterNext, Iterable))]
208impl PyStrIterator {
209    #[pymethod(magic)]
210    fn length_hint(&self) -> usize {
211        self.internal.lock().0.length_hint(|obj| obj.char_len())
212    }
213
214    #[pymethod(magic)]
215    fn setstate(&self, state: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
216        let mut internal = self.internal.lock();
217        internal.1 = usize::MAX;
218        internal
219            .0
220            .set_state(state, |obj, pos| pos.min(obj.char_len()), vm)
221    }
222
223    #[pymethod(magic)]
224    fn reduce(&self, vm: &VirtualMachine) -> PyTupleRef {
225        self.internal
226            .lock()
227            .0
228            .builtins_iter_reduce(|x| x.clone().into(), vm)
229    }
230}
231impl Unconstructible for PyStrIterator {}
232
233impl SelfIter for PyStrIterator {}
234impl IterNext for PyStrIterator {
235    fn next(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<PyIterReturn> {
236        let mut internal = zelf.internal.lock();
237
238        if let IterStatus::Active(s) = &internal.0.status {
239            let value = s.as_str();
240
241            if internal.1 == usize::MAX {
242                if let Some((offset, ch)) = value.char_indices().nth(internal.0.position) {
243                    internal.0.position += 1;
244                    internal.1 = offset + ch.len_utf8();
245                    return Ok(PyIterReturn::Return(ch.to_pyobject(vm)));
246                }
247            } else if let Some(value) = value.get(internal.1..) {
248                if let Some(ch) = value.chars().next() {
249                    internal.0.position += 1;
250                    internal.1 += ch.len_utf8();
251                    return Ok(PyIterReturn::Return(ch.to_pyobject(vm)));
252                }
253            }
254            internal.0.status = Exhausted;
255        }
256        Ok(PyIterReturn::StopIteration(None))
257    }
258}
259
260#[derive(FromArgs)]
261pub struct StrArgs {
262    #[pyarg(any, optional)]
263    object: OptionalArg<PyObjectRef>,
264    #[pyarg(any, optional)]
265    encoding: OptionalArg<PyStrRef>,
266    #[pyarg(any, optional)]
267    errors: OptionalArg<PyStrRef>,
268}
269
270impl Constructor for PyStr {
271    type Args = StrArgs;
272
273    fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult {
274        let string: PyStrRef = match args.object {
275            OptionalArg::Present(input) => {
276                if let OptionalArg::Present(enc) = args.encoding {
277                    vm.state.codec_registry.decode_text(
278                        input,
279                        enc.as_str(),
280                        args.errors.into_option(),
281                        vm,
282                    )?
283                } else {
284                    input.str(vm)?
285                }
286            }
287            OptionalArg::Missing => {
288                PyStr::from(String::new()).into_ref_with_type(vm, cls.clone())?
289            }
290        };
291        if string.class().is(&cls) {
292            Ok(string.into())
293        } else {
294            PyStr::from(string.as_str())
295                .into_ref_with_type(vm, cls)
296                .map(Into::into)
297        }
298    }
299}
300
301impl PyStr {
302    /// # Safety: Given `bytes` must be valid data for given `kind`
303    pub(crate) unsafe fn new_str_unchecked(bytes: Vec<u8>, kind: PyStrKind) -> Self {
304        let s = Self {
305            bytes: bytes.into_boxed_slice(),
306            kind: kind.new_data(),
307            hash: Radium::new(hash::SENTINEL),
308        };
309        debug_assert!(matches!(s.kind, PyStrKindData::Ascii) || !s.as_str().is_ascii());
310        s
311    }
312
313    /// # Safety
314    /// Given `bytes` must be ascii
315    pub unsafe fn new_ascii_unchecked(bytes: Vec<u8>) -> Self {
316        Self::new_str_unchecked(bytes, PyStrKind::Ascii)
317    }
318
319    pub fn new_ref(zelf: impl Into<Self>, ctx: &Context) -> PyRef<Self> {
320        let zelf = zelf.into();
321        PyRef::new_ref(zelf, ctx.types.str_type.to_owned(), None)
322    }
323
324    fn new_substr(&self, s: String) -> Self {
325        let kind = if self.kind.kind() == PyStrKind::Ascii || s.is_ascii() {
326            PyStrKind::Ascii
327        } else {
328            PyStrKind::Utf8
329        };
330        unsafe {
331            // SAFETY: kind is properly decided for substring
332            Self::new_str_unchecked(s.into_bytes(), kind)
333        }
334    }
335
336    #[inline]
337    pub fn as_str(&self) -> &str {
338        unsafe {
339            // SAFETY: Both PyStrKind::{Ascii, Utf8} are valid utf8 string
340            std::str::from_utf8_unchecked(&self.bytes)
341        }
342    }
343
344    fn char_all<F>(&self, test: F) -> bool
345    where
346        F: Fn(char) -> bool,
347    {
348        match self.kind.kind() {
349            PyStrKind::Ascii => self.bytes.iter().all(|&x| test(char::from(x))),
350            PyStrKind::Utf8 => self.as_str().chars().all(test),
351        }
352    }
353
354    fn borrow(&self) -> &BorrowedStr {
355        unsafe { std::mem::transmute(self) }
356    }
357
358    fn repeat(zelf: PyRef<Self>, value: isize, vm: &VirtualMachine) -> PyResult<PyRef<Self>> {
359        if value == 0 && zelf.class().is(vm.ctx.types.str_type) {
360            // Special case: when some `str` is multiplied by `0`,
361            // returns the empty `str`.
362            return Ok(vm.ctx.empty_str.to_owned());
363        }
364        if (value == 1 || zelf.is_empty()) && zelf.class().is(vm.ctx.types.str_type) {
365            // Special case: when some `str` is multiplied by `1` or is the empty `str`,
366            // nothing really happens, we need to return an object itself
367            // with the same `id()` to be compatible with CPython.
368            // This only works for `str` itself, not its subclasses.
369            return Ok(zelf);
370        }
371        zelf.as_str()
372            .as_bytes()
373            .mul(vm, value)
374            .map(|x| Self::from(unsafe { String::from_utf8_unchecked(x) }).into_ref(&vm.ctx))
375    }
376}
377
378#[pyclass(
379    flags(BASETYPE),
380    with(
381        PyRef,
382        AsMapping,
383        AsNumber,
384        AsSequence,
385        Representable,
386        Hashable,
387        Comparable,
388        Iterable,
389        Constructor
390    )
391)]
392impl PyStr {
393    #[pymethod(magic)]
394    fn add(zelf: PyRef<Self>, other: PyObjectRef, vm: &VirtualMachine) -> PyResult {
395        if let Some(other) = other.payload::<PyStr>() {
396            let bytes = zelf.as_str().py_add(other.as_ref());
397            Ok(unsafe {
398                // SAFETY: `kind` is safely decided
399                let kind = zelf.kind.kind() | other.kind.kind();
400                Self::new_str_unchecked(bytes.into_bytes(), kind)
401            }
402            .to_pyobject(vm))
403        } else if let Some(radd) = vm.get_method(other.clone(), identifier!(vm, __radd__)) {
404            // hack to get around not distinguishing number add from seq concat
405            radd?.call((zelf,), vm)
406        } else {
407            Err(vm.new_type_error(format!(
408                "can only concatenate str (not \"{}\") to str",
409                other.class().name()
410            )))
411        }
412    }
413
414    fn _contains(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult<bool> {
415        if let Some(needle) = needle.payload::<Self>() {
416            Ok(self.as_str().contains(needle.as_str()))
417        } else {
418            Err(vm.new_type_error(format!(
419                "'in <string>' requires string as left operand, not {}",
420                needle.class().name()
421            )))
422        }
423    }
424
425    #[pymethod(magic)]
426    fn contains(&self, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult<bool> {
427        self._contains(&needle, vm)
428    }
429
430    fn _getitem(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult {
431        match SequenceIndex::try_from_borrowed_object(vm, needle, "str")? {
432            SequenceIndex::Int(i) => self.getitem_by_index(vm, i).map(|x| x.to_string()),
433            SequenceIndex::Slice(slice) => self.getitem_by_slice(vm, slice),
434        }
435        .map(|x| self.new_substr(x).into_ref(&vm.ctx).into())
436    }
437
438    #[pymethod(magic)]
439    fn getitem(&self, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult {
440        self._getitem(&needle, vm)
441    }
442
443    #[inline]
444    pub(crate) fn hash(&self, vm: &VirtualMachine) -> hash::PyHash {
445        match self.hash.load(atomic::Ordering::Relaxed) {
446            hash::SENTINEL => self._compute_hash(vm),
447            hash => hash,
448        }
449    }
450    #[cold]
451    fn _compute_hash(&self, vm: &VirtualMachine) -> hash::PyHash {
452        let hash_val = vm.state.hash_secret.hash_str(self.as_str());
453        debug_assert_ne!(hash_val, hash::SENTINEL);
454        // like with char_len, we don't need a cmpxchg loop, since it'll always be the same value
455        self.hash.store(hash_val, atomic::Ordering::Relaxed);
456        hash_val
457    }
458
459    #[inline]
460    pub fn byte_len(&self) -> usize {
461        self.bytes.len()
462    }
463    #[inline]
464    pub fn is_empty(&self) -> bool {
465        self.bytes.is_empty()
466    }
467
468    #[pymethod(name = "__len__")]
469    #[inline]
470    pub fn char_len(&self) -> usize {
471        self.borrow().char_len()
472    }
473
474    #[pymethod(name = "isascii")]
475    #[inline(always)]
476    pub fn is_ascii(&self) -> bool {
477        match self.kind {
478            PyStrKindData::Ascii => true,
479            PyStrKindData::Utf8(_) => false,
480        }
481    }
482
483    #[pymethod(magic)]
484    fn sizeof(&self) -> usize {
485        std::mem::size_of::<Self>() + self.byte_len() * std::mem::size_of::<u8>()
486    }
487
488    #[pymethod(name = "__rmul__")]
489    #[pymethod(magic)]
490    fn mul(zelf: PyRef<Self>, value: ArgSize, vm: &VirtualMachine) -> PyResult<PyRef<Self>> {
491        Self::repeat(zelf, value.into(), vm)
492    }
493
494    #[inline]
495    pub(crate) fn repr(&self, vm: &VirtualMachine) -> PyResult<String> {
496        use crate::literal::escape::UnicodeEscape;
497        let escape = UnicodeEscape::new_repr(self.as_str());
498        escape
499            .str_repr()
500            .to_string()
501            .ok_or_else(|| vm.new_overflow_error("string is too long to generate repr".to_owned()))
502    }
503
504    #[pymethod]
505    fn lower(&self) -> String {
506        match self.kind.kind() {
507            PyStrKind::Ascii => self.as_str().to_ascii_lowercase(),
508            PyStrKind::Utf8 => self.as_str().to_lowercase(),
509        }
510    }
511
512    // casefold is much more aggressive than lower
513    #[pymethod]
514    fn casefold(&self) -> String {
515        caseless::default_case_fold_str(self.as_str())
516    }
517
518    #[pymethod]
519    fn upper(&self) -> String {
520        match self.kind.kind() {
521            PyStrKind::Ascii => self.as_str().to_ascii_uppercase(),
522            PyStrKind::Utf8 => self.as_str().to_uppercase(),
523        }
524    }
525
526    #[pymethod]
527    fn capitalize(&self) -> String {
528        let mut chars = self.as_str().chars();
529        if let Some(first_char) = chars.next() {
530            format!(
531                "{}{}",
532                first_char.to_uppercase(),
533                &chars.as_str().to_lowercase(),
534            )
535        } else {
536            "".to_owned()
537        }
538    }
539
540    #[pymethod]
541    fn split(&self, args: SplitArgs, vm: &VirtualMachine) -> PyResult<Vec<PyObjectRef>> {
542        let elements = match self.kind.kind() {
543            PyStrKind::Ascii => self.as_str().py_split(
544                args,
545                vm,
546                |v, s, vm| {
547                    v.as_bytes()
548                        .split_str(s)
549                        .map(|s| {
550                            unsafe { PyStr::new_ascii_unchecked(s.to_owned()) }.to_pyobject(vm)
551                        })
552                        .collect()
553                },
554                |v, s, n, vm| {
555                    v.as_bytes()
556                        .splitn_str(n, s)
557                        .map(|s| {
558                            unsafe { PyStr::new_ascii_unchecked(s.to_owned()) }.to_pyobject(vm)
559                        })
560                        .collect()
561                },
562                |v, n, vm| {
563                    v.as_bytes().py_split_whitespace(n, |s| {
564                        unsafe { PyStr::new_ascii_unchecked(s.to_owned()) }.to_pyobject(vm)
565                    })
566                },
567            ),
568            PyStrKind::Utf8 => self.as_str().py_split(
569                args,
570                vm,
571                |v, s, vm| v.split(s).map(|s| vm.ctx.new_str(s).into()).collect(),
572                |v, s, n, vm| v.splitn(n, s).map(|s| vm.ctx.new_str(s).into()).collect(),
573                |v, n, vm| v.py_split_whitespace(n, |s| vm.ctx.new_str(s).into()),
574            ),
575        }?;
576        Ok(elements)
577    }
578
579    #[pymethod]
580    fn rsplit(&self, args: SplitArgs, vm: &VirtualMachine) -> PyResult<Vec<PyObjectRef>> {
581        let mut elements = self.as_str().py_split(
582            args,
583            vm,
584            |v, s, vm| v.rsplit(s).map(|s| vm.ctx.new_str(s).into()).collect(),
585            |v, s, n, vm| v.rsplitn(n, s).map(|s| vm.ctx.new_str(s).into()).collect(),
586            |v, n, vm| v.py_rsplit_whitespace(n, |s| vm.ctx.new_str(s).into()),
587        )?;
588        // Unlike Python rsplit, Rust rsplitn returns an iterator that
589        // starts from the end of the string.
590        elements.reverse();
591        Ok(elements)
592    }
593
594    #[pymethod]
595    fn strip(&self, chars: OptionalOption<PyStrRef>) -> String {
596        self.as_str()
597            .py_strip(
598                chars,
599                |s, chars| s.trim_matches(|c| chars.contains(c)),
600                |s| s.trim(),
601            )
602            .to_owned()
603    }
604
605    #[pymethod]
606    fn lstrip(
607        zelf: PyRef<Self>,
608        chars: OptionalOption<PyStrRef>,
609        vm: &VirtualMachine,
610    ) -> PyRef<Self> {
611        let s = zelf.as_str();
612        let stripped = s.py_strip(
613            chars,
614            |s, chars| s.trim_start_matches(|c| chars.contains(c)),
615            |s| s.trim_start(),
616        );
617        if s == stripped {
618            zelf
619        } else {
620            vm.ctx.new_str(stripped)
621        }
622    }
623
624    #[pymethod]
625    fn rstrip(
626        zelf: PyRef<Self>,
627        chars: OptionalOption<PyStrRef>,
628        vm: &VirtualMachine,
629    ) -> PyRef<Self> {
630        let s = zelf.as_str();
631        let stripped = s.py_strip(
632            chars,
633            |s, chars| s.trim_end_matches(|c| chars.contains(c)),
634            |s| s.trim_end(),
635        );
636        if s == stripped {
637            zelf
638        } else {
639            vm.ctx.new_str(stripped)
640        }
641    }
642
643    #[pymethod]
644    fn endswith(&self, options: anystr::StartsEndsWithArgs, vm: &VirtualMachine) -> PyResult<bool> {
645        let (affix, substr) =
646            match options.prepare(self.as_str(), self.len(), |s, r| s.get_chars(r)) {
647                Some(x) => x,
648                None => return Ok(false),
649            };
650        substr.py_startsendswith(
651            &affix,
652            "endswith",
653            "str",
654            |s, x: &Py<PyStr>| s.ends_with(x.as_str()),
655            vm,
656        )
657    }
658
659    #[pymethod]
660    fn startswith(
661        &self,
662        options: anystr::StartsEndsWithArgs,
663        vm: &VirtualMachine,
664    ) -> PyResult<bool> {
665        let (affix, substr) =
666            match options.prepare(self.as_str(), self.len(), |s, r| s.get_chars(r)) {
667                Some(x) => x,
668                None => return Ok(false),
669            };
670        substr.py_startsendswith(
671            &affix,
672            "startswith",
673            "str",
674            |s, x: &Py<PyStr>| s.starts_with(x.as_str()),
675            vm,
676        )
677    }
678
679    /// Return a str with the given prefix string removed if present.
680    ///
681    /// If the string starts with the prefix string, return string[len(prefix):]
682    /// Otherwise, return a copy of the original string.
683    #[pymethod]
684    fn removeprefix(&self, pref: PyStrRef) -> String {
685        self.as_str()
686            .py_removeprefix(pref.as_str(), pref.byte_len(), |s, p| s.starts_with(p))
687            .to_owned()
688    }
689
690    /// Return a str with the given suffix string removed if present.
691    ///
692    /// If the string ends with the suffix string, return string[:len(suffix)]
693    /// Otherwise, return a copy of the original string.
694    #[pymethod]
695    fn removesuffix(&self, suffix: PyStrRef) -> String {
696        self.as_str()
697            .py_removesuffix(suffix.as_str(), suffix.byte_len(), |s, p| s.ends_with(p))
698            .to_owned()
699    }
700
701    #[pymethod]
702    fn isalnum(&self) -> bool {
703        !self.bytes.is_empty() && self.char_all(char::is_alphanumeric)
704    }
705
706    #[pymethod]
707    fn isnumeric(&self) -> bool {
708        !self.bytes.is_empty() && self.char_all(char::is_numeric)
709    }
710
711    #[pymethod]
712    fn isdigit(&self) -> bool {
713        // python's isdigit also checks if exponents are digits, these are the unicode codepoints for exponents
714        let valid_codepoints: [u16; 10] = [
715            0x2070, 0x00B9, 0x00B2, 0x00B3, 0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079,
716        ];
717        let s = self.as_str();
718        !s.is_empty()
719            && s.chars()
720                .filter(|c| !c.is_ascii_digit())
721                .all(|c| valid_codepoints.contains(&(c as u16)))
722    }
723
724    #[pymethod]
725    fn isdecimal(&self) -> bool {
726        !self.bytes.is_empty()
727            && self.char_all(|c| GeneralCategory::of(c) == GeneralCategory::DecimalNumber)
728    }
729
730    #[pymethod(name = "__mod__")]
731    fn modulo(&self, values: PyObjectRef, vm: &VirtualMachine) -> PyResult<String> {
732        let formatted = self.as_str().py_cformat(values, vm)?;
733        Ok(formatted)
734    }
735
736    #[pymethod(magic)]
737    fn rmod(&self, _values: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef {
738        vm.ctx.not_implemented()
739    }
740
741    #[pymethod]
742    fn format(&self, args: FuncArgs, vm: &VirtualMachine) -> PyResult<String> {
743        let format_str = FormatString::from_str(self.as_str()).map_err(|e| e.to_pyexception(vm))?;
744        format(&format_str, &args, vm)
745    }
746
747    /// S.format_map(mapping) -> str
748    ///
749    /// Return a formatted version of S, using substitutions from mapping.
750    /// The substitutions are identified by braces ('{' and '}').
751    #[pymethod]
752    fn format_map(&self, mapping: PyObjectRef, vm: &VirtualMachine) -> PyResult<String> {
753        let format_string =
754            FormatString::from_str(self.as_str()).map_err(|err| err.to_pyexception(vm))?;
755        format_map(&format_string, &mapping, vm)
756    }
757
758    #[pymethod(name = "__format__")]
759    fn __format__(zelf: PyRef<Self>, spec: PyStrRef, vm: &VirtualMachine) -> PyResult<PyStrRef> {
760        let spec = spec.as_str();
761        if spec.is_empty() {
762            return if zelf.class().is(vm.ctx.types.str_type) {
763                Ok(zelf)
764            } else {
765                zelf.as_object().str(vm)
766            };
767        }
768
769        let s = FormatSpec::parse(spec)
770            .and_then(|format_spec| format_spec.format_string(zelf.borrow()))
771            .map_err(|err| err.into_pyexception(vm))?;
772        Ok(vm.ctx.new_str(s))
773    }
774
775    /// Return a titlecased version of the string where words start with an
776    /// uppercase character and the remaining characters are lowercase.
777    #[pymethod]
778    fn title(&self) -> String {
779        let mut title = String::with_capacity(self.bytes.len());
780        let mut previous_is_cased = false;
781        for c in self.as_str().chars() {
782            if c.is_lowercase() {
783                if !previous_is_cased {
784                    title.extend(c.to_titlecase());
785                } else {
786                    title.push(c);
787                }
788                previous_is_cased = true;
789            } else if c.is_uppercase() || c.is_titlecase() {
790                if previous_is_cased {
791                    title.extend(c.to_lowercase());
792                } else {
793                    title.push(c);
794                }
795                previous_is_cased = true;
796            } else {
797                previous_is_cased = false;
798                title.push(c);
799            }
800        }
801        title
802    }
803
804    #[pymethod]
805    fn swapcase(&self) -> String {
806        let mut swapped_str = String::with_capacity(self.bytes.len());
807        for c in self.as_str().chars() {
808            // to_uppercase returns an iterator, to_ascii_uppercase returns the char
809            if c.is_lowercase() {
810                swapped_str.push(c.to_ascii_uppercase());
811            } else if c.is_uppercase() {
812                swapped_str.push(c.to_ascii_lowercase());
813            } else {
814                swapped_str.push(c);
815            }
816        }
817        swapped_str
818    }
819
820    #[pymethod]
821    fn isalpha(&self) -> bool {
822        !self.bytes.is_empty() && self.char_all(char::is_alphabetic)
823    }
824
825    #[pymethod]
826    fn replace(&self, old: PyStrRef, new: PyStrRef, count: OptionalArg<isize>) -> String {
827        let s = self.as_str();
828        match count {
829            OptionalArg::Present(max_count) if max_count >= 0 => {
830                if max_count == 0 || (s.is_empty() && !old.is_empty()) {
831                    // nothing to do; return the original bytes
832                    s.to_owned()
833                } else if s.is_empty() && old.is_empty() {
834                    new.as_str().to_owned()
835                } else {
836                    s.replacen(old.as_str(), new.as_str(), max_count as usize)
837                }
838            }
839            _ => s.replace(old.as_str(), new.as_str()),
840        }
841    }
842
843    /// Return true if all characters in the string are printable or the string is empty,
844    /// false otherwise.  Nonprintable characters are those characters defined in the
845    /// Unicode character database as `Other` or `Separator`,
846    /// excepting the ASCII space (0x20) which is considered printable.
847    ///
848    /// All characters except those characters defined in the Unicode character
849    /// database as following categories are considered printable.
850    ///   * Cc (Other, Control)
851    ///   * Cf (Other, Format)
852    ///   * Cs (Other, Surrogate)
853    ///   * Co (Other, Private Use)
854    ///   * Cn (Other, Not Assigned)
855    ///   * Zl Separator, Line ('\u2028', LINE SEPARATOR)
856    ///   * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
857    ///   * Zs (Separator, Space) other than ASCII space('\x20').
858    #[pymethod]
859    fn isprintable(&self) -> bool {
860        self.char_all(|c| c == '\u{0020}' || rustpython_literal::char::is_printable(c))
861    }
862
863    #[pymethod]
864    fn isspace(&self) -> bool {
865        use unic_ucd_bidi::bidi_class::abbr_names::*;
866        !self.bytes.is_empty()
867            && self.char_all(|c| {
868                GeneralCategory::of(c) == GeneralCategory::SpaceSeparator
869                    || matches!(BidiClass::of(c), WS | B | S)
870            })
871    }
872
873    // Return true if all cased characters in the string are lowercase and there is at least one cased character, false otherwise.
874    #[pymethod]
875    fn islower(&self) -> bool {
876        match self.kind.kind() {
877            PyStrKind::Ascii => self.bytes.py_iscase(char::is_lowercase, char::is_uppercase),
878            PyStrKind::Utf8 => self
879                .as_str()
880                .py_iscase(char::is_lowercase, char::is_uppercase),
881        }
882    }
883
884    // Return true if all cased characters in the string are uppercase and there is at least one cased character, false otherwise.
885    #[pymethod]
886    fn isupper(&self) -> bool {
887        match self.kind.kind() {
888            PyStrKind::Ascii => self.bytes.py_iscase(char::is_uppercase, char::is_lowercase),
889            PyStrKind::Utf8 => self
890                .as_str()
891                .py_iscase(char::is_uppercase, char::is_lowercase),
892        }
893    }
894
895    #[pymethod]
896    fn splitlines(&self, args: anystr::SplitLinesArgs, vm: &VirtualMachine) -> Vec<PyObjectRef> {
897        let into_wrapper = |s: &str| self.new_substr(s.to_owned()).to_pyobject(vm);
898        let mut elements = Vec::new();
899        let mut last_i = 0;
900        let self_str = self.as_str();
901        let mut enumerated = self_str.char_indices().peekable();
902        while let Some((i, ch)) = enumerated.next() {
903            let end_len = match ch {
904                '\n' => 1,
905                '\r' => {
906                    let is_rn = enumerated.peek().map_or(false, |(_, ch)| *ch == '\n');
907                    if is_rn {
908                        let _ = enumerated.next();
909                        2
910                    } else {
911                        1
912                    }
913                }
914                '\x0b' | '\x0c' | '\x1c' | '\x1d' | '\x1e' | '\u{0085}' | '\u{2028}'
915                | '\u{2029}' => ch.len_utf8(),
916                _ => {
917                    continue;
918                }
919            };
920            let range = if args.keepends {
921                last_i..i + end_len
922            } else {
923                last_i..i
924            };
925            last_i = i + end_len;
926            elements.push(into_wrapper(&self_str[range]));
927        }
928        if last_i != self_str.len() {
929            elements.push(into_wrapper(&self_str[last_i..]));
930        }
931        elements
932    }
933
934    #[pymethod]
935    fn join(
936        zelf: PyRef<Self>,
937        iterable: ArgIterable<PyStrRef>,
938        vm: &VirtualMachine,
939    ) -> PyResult<PyStrRef> {
940        let iter = iterable.iter(vm)?;
941        let joined = match iter.exactly_one() {
942            Ok(first) => {
943                let first = first?;
944                if first.as_object().class().is(vm.ctx.types.str_type) {
945                    return Ok(first);
946                } else {
947                    first.as_str().to_owned()
948                }
949            }
950            Err(iter) => zelf.as_str().py_join(iter)?,
951        };
952        Ok(vm.ctx.new_str(joined))
953    }
954
955    // FIXME: two traversals of str is expensive
956    #[inline]
957    fn _to_char_idx(r: &str, byte_idx: usize) -> usize {
958        r[..byte_idx].chars().count()
959    }
960
961    #[inline]
962    fn _find<F>(&self, args: FindArgs, find: F) -> Option<usize>
963    where
964        F: Fn(&str, &str) -> Option<usize>,
965    {
966        let (sub, range) = args.get_value(self.len());
967        self.as_str().py_find(sub.as_str(), range, find)
968    }
969
970    #[pymethod]
971    fn find(&self, args: FindArgs) -> isize {
972        self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
973            .map_or(-1, |v| v as isize)
974    }
975
976    #[pymethod]
977    fn rfind(&self, args: FindArgs) -> isize {
978        self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
979            .map_or(-1, |v| v as isize)
980    }
981
982    #[pymethod]
983    fn index(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
984        self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
985            .ok_or_else(|| vm.new_value_error("substring not found".to_owned()))
986    }
987
988    #[pymethod]
989    fn rindex(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
990        self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
991            .ok_or_else(|| vm.new_value_error("substring not found".to_owned()))
992    }
993
994    #[pymethod]
995    fn partition(&self, sep: PyStrRef, vm: &VirtualMachine) -> PyResult {
996        let (front, has_mid, back) = self.as_str().py_partition(
997            sep.as_str(),
998            || self.as_str().splitn(2, sep.as_str()),
999            vm,
1000        )?;
1001        let partition = (
1002            self.new_substr(front),
1003            if has_mid {
1004                sep
1005            } else {
1006                vm.ctx.new_str(ascii!(""))
1007            },
1008            self.new_substr(back),
1009        );
1010        Ok(partition.to_pyobject(vm))
1011    }
1012
1013    #[pymethod]
1014    fn rpartition(&self, sep: PyStrRef, vm: &VirtualMachine) -> PyResult {
1015        let (back, has_mid, front) = self.as_str().py_partition(
1016            sep.as_str(),
1017            || self.as_str().rsplitn(2, sep.as_str()),
1018            vm,
1019        )?;
1020        Ok((
1021            self.new_substr(front),
1022            if has_mid {
1023                sep
1024            } else {
1025                vm.ctx.new_str(ascii!(""))
1026            },
1027            self.new_substr(back),
1028        )
1029            .to_pyobject(vm))
1030    }
1031
1032    /// Return `true` if the sequence is ASCII titlecase and the sequence is not
1033    /// empty, `false` otherwise.
1034    #[pymethod]
1035    fn istitle(&self) -> bool {
1036        if self.bytes.is_empty() {
1037            return false;
1038        }
1039
1040        let mut cased = false;
1041        let mut previous_is_cased = false;
1042        for c in self.as_str().chars() {
1043            if c.is_uppercase() || c.is_titlecase() {
1044                if previous_is_cased {
1045                    return false;
1046                }
1047                previous_is_cased = true;
1048                cased = true;
1049            } else if c.is_lowercase() {
1050                if !previous_is_cased {
1051                    return false;
1052                }
1053                previous_is_cased = true;
1054                cased = true;
1055            } else {
1056                previous_is_cased = false;
1057            }
1058        }
1059        cased
1060    }
1061
1062    #[pymethod]
1063    fn count(&self, args: FindArgs) -> usize {
1064        let (needle, range) = args.get_value(self.len());
1065        self.as_str()
1066            .py_count(needle.as_str(), range, |h, n| h.matches(n).count())
1067    }
1068
1069    #[pymethod]
1070    fn zfill(&self, width: isize) -> String {
1071        unsafe {
1072            // SAFETY: this is safe-guaranteed because the original self.as_str() is valid utf8
1073            String::from_utf8_unchecked(self.as_str().py_zfill(width))
1074        }
1075    }
1076
1077    #[inline]
1078    fn _pad(
1079        &self,
1080        width: isize,
1081        fillchar: OptionalArg<PyStrRef>,
1082        pad: fn(&str, usize, char, usize) -> String,
1083        vm: &VirtualMachine,
1084    ) -> PyResult<String> {
1085        let fillchar = fillchar.map_or(Ok(' '), |ref s| {
1086            s.as_str().chars().exactly_one().map_err(|_| {
1087                vm.new_type_error(
1088                    "The fill character must be exactly one character long".to_owned(),
1089                )
1090            })
1091        })?;
1092        Ok(if self.len() as isize >= width {
1093            String::from(self.as_str())
1094        } else {
1095            pad(self.as_str(), width as usize, fillchar, self.len())
1096        })
1097    }
1098
1099    #[pymethod]
1100    fn center(
1101        &self,
1102        width: isize,
1103        fillchar: OptionalArg<PyStrRef>,
1104        vm: &VirtualMachine,
1105    ) -> PyResult<String> {
1106        self._pad(width, fillchar, AnyStr::py_center, vm)
1107    }
1108
1109    #[pymethod]
1110    fn ljust(
1111        &self,
1112        width: isize,
1113        fillchar: OptionalArg<PyStrRef>,
1114        vm: &VirtualMachine,
1115    ) -> PyResult<String> {
1116        self._pad(width, fillchar, AnyStr::py_ljust, vm)
1117    }
1118
1119    #[pymethod]
1120    fn rjust(
1121        &self,
1122        width: isize,
1123        fillchar: OptionalArg<PyStrRef>,
1124        vm: &VirtualMachine,
1125    ) -> PyResult<String> {
1126        self._pad(width, fillchar, AnyStr::py_rjust, vm)
1127    }
1128
1129    #[pymethod]
1130    fn expandtabs(&self, args: anystr::ExpandTabsArgs) -> String {
1131        let tab_stop = args.tabsize();
1132        let mut expanded_str = String::with_capacity(self.byte_len());
1133        let mut tab_size = tab_stop;
1134        let mut col_count = 0usize;
1135        for ch in self.as_str().chars() {
1136            match ch {
1137                '\t' => {
1138                    let num_spaces = tab_size - col_count;
1139                    col_count += num_spaces;
1140                    let expand = " ".repeat(num_spaces);
1141                    expanded_str.push_str(&expand);
1142                }
1143                '\r' | '\n' => {
1144                    expanded_str.push(ch);
1145                    col_count = 0;
1146                    tab_size = 0;
1147                }
1148                _ => {
1149                    expanded_str.push(ch);
1150                    col_count += 1;
1151                }
1152            }
1153            if col_count >= tab_size {
1154                tab_size += tab_stop;
1155            }
1156        }
1157        expanded_str
1158    }
1159
1160    #[pymethod]
1161    fn isidentifier(&self) -> bool {
1162        let mut chars = self.as_str().chars();
1163        let is_identifier_start = chars.next().map_or(false, |c| c == '_' || is_xid_start(c));
1164        // a string is not an identifier if it has whitespace or starts with a number
1165        is_identifier_start && chars.all(is_xid_continue)
1166    }
1167
1168    // https://docs.python.org/3/library/stdtypes.html#str.translate
1169    #[pymethod]
1170    fn translate(&self, table: PyObjectRef, vm: &VirtualMachine) -> PyResult<String> {
1171        vm.get_method_or_type_error(table.clone(), identifier!(vm, __getitem__), || {
1172            format!("'{}' object is not subscriptable", table.class().name())
1173        })?;
1174
1175        let mut translated = String::new();
1176        for c in self.as_str().chars() {
1177            match table.get_item(&*(c as u32).to_pyobject(vm), vm) {
1178                Ok(value) => {
1179                    if let Some(text) = value.payload::<PyStr>() {
1180                        translated.push_str(text.as_str());
1181                    } else if let Some(bigint) = value.payload::<PyInt>() {
1182                        let ch = bigint
1183                            .as_bigint()
1184                            .to_u32()
1185                            .and_then(std::char::from_u32)
1186                            .ok_or_else(|| {
1187                                vm.new_value_error(
1188                                    "character mapping must be in range(0x110000)".to_owned(),
1189                                )
1190                            })?;
1191                        translated.push(ch);
1192                    } else if !vm.is_none(&value) {
1193                        return Err(vm.new_type_error(
1194                            "character mapping must return integer, None or str".to_owned(),
1195                        ));
1196                    }
1197                }
1198                _ => translated.push(c),
1199            }
1200        }
1201        Ok(translated)
1202    }
1203
1204    #[pystaticmethod]
1205    fn maketrans(
1206        dict_or_str: PyObjectRef,
1207        to_str: OptionalArg<PyStrRef>,
1208        none_str: OptionalArg<PyStrRef>,
1209        vm: &VirtualMachine,
1210    ) -> PyResult {
1211        let new_dict = vm.ctx.new_dict();
1212        if let OptionalArg::Present(to_str) = to_str {
1213            match dict_or_str.downcast::<PyStr>() {
1214                Ok(from_str) => {
1215                    if to_str.len() == from_str.len() {
1216                        for (c1, c2) in from_str.as_str().chars().zip(to_str.as_str().chars()) {
1217                            new_dict.set_item(
1218                                &*vm.new_pyobj(c1 as u32),
1219                                vm.new_pyobj(c2 as u32),
1220                                vm,
1221                            )?;
1222                        }
1223                        if let OptionalArg::Present(none_str) = none_str {
1224                            for c in none_str.as_str().chars() {
1225                                new_dict.set_item(&*vm.new_pyobj(c as u32), vm.ctx.none(), vm)?;
1226                            }
1227                        }
1228                        Ok(new_dict.to_pyobject(vm))
1229                    } else {
1230                        Err(vm.new_value_error(
1231                            "the first two maketrans arguments must have equal length".to_owned(),
1232                        ))
1233                    }
1234                }
1235                _ => Err(vm.new_type_error(
1236                    "first maketrans argument must be a string if there is a second argument"
1237                        .to_owned(),
1238                )),
1239            }
1240        } else {
1241            // dict_str must be a dict
1242            match dict_or_str.downcast::<PyDict>() {
1243                Ok(dict) => {
1244                    for (key, val) in dict {
1245                        // FIXME: ints are key-compatible
1246                        if let Some(num) = key.payload::<PyInt>() {
1247                            new_dict.set_item(
1248                                &*num.as_bigint().to_i32().to_pyobject(vm),
1249                                val,
1250                                vm,
1251                            )?;
1252                        } else if let Some(string) = key.payload::<PyStr>() {
1253                            if string.len() == 1 {
1254                                let num_value = string.as_str().chars().next().unwrap() as u32;
1255                                new_dict.set_item(&*num_value.to_pyobject(vm), val, vm)?;
1256                            } else {
1257                                return Err(vm.new_value_error(
1258                                    "string keys in translate table must be of length 1".to_owned(),
1259                                ));
1260                            }
1261                        }
1262                    }
1263                    Ok(new_dict.to_pyobject(vm))
1264                }
1265                _ => Err(vm.new_value_error(
1266                    "if you give only one argument to maketrans it must be a dict".to_owned(),
1267                )),
1268            }
1269        }
1270    }
1271
1272    #[pymethod]
1273    fn encode(zelf: PyRef<Self>, args: EncodeArgs, vm: &VirtualMachine) -> PyResult<PyBytesRef> {
1274        encode_string(zelf, args.encoding, args.errors, vm)
1275    }
1276
1277    #[pymethod(magic)]
1278    fn getnewargs(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyObjectRef {
1279        (zelf.as_str(),).to_pyobject(vm)
1280    }
1281}
1282
1283#[pyclass]
1284impl PyRef<PyStr> {
1285    #[pymethod(magic)]
1286    fn str(self, vm: &VirtualMachine) -> PyRefExact<PyStr> {
1287        self.into_exact_or(&vm.ctx, |zelf| unsafe {
1288            // Creating a copy with same kind is safe
1289            PyStr::new_str_unchecked(zelf.bytes.to_vec(), zelf.kind.kind()).into_exact_ref(&vm.ctx)
1290        })
1291    }
1292}
1293
1294impl PyStrRef {
1295    pub fn concat_in_place(&mut self, other: &str, vm: &VirtualMachine) {
1296        // TODO: call [A]Rc::get_mut on the str to try to mutate the data in place
1297        if other.is_empty() {
1298            return;
1299        }
1300        let mut s = String::with_capacity(self.byte_len() + other.len());
1301        s.push_str(self.as_ref());
1302        s.push_str(other);
1303        *self = PyStr::from(s).into_ref(&vm.ctx);
1304    }
1305}
1306
1307impl Representable for PyStr {
1308    #[inline]
1309    fn repr_str(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<String> {
1310        zelf.repr(vm)
1311    }
1312}
1313
1314impl Hashable for PyStr {
1315    #[inline]
1316    fn hash(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<hash::PyHash> {
1317        Ok(zelf.hash(vm))
1318    }
1319}
1320
1321impl Comparable for PyStr {
1322    fn cmp(
1323        zelf: &Py<Self>,
1324        other: &PyObject,
1325        op: PyComparisonOp,
1326        _vm: &VirtualMachine,
1327    ) -> PyResult<PyComparisonValue> {
1328        if let Some(res) = op.identical_optimization(zelf, other) {
1329            return Ok(res.into());
1330        }
1331        let other = class_or_notimplemented!(Self, other);
1332        Ok(op.eval_ord(zelf.as_str().cmp(other.as_str())).into())
1333    }
1334}
1335
1336impl Iterable for PyStr {
1337    fn iter(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult {
1338        Ok(PyStrIterator {
1339            internal: PyMutex::new((PositionIterInternal::new(zelf, 0), 0)),
1340        }
1341        .into_pyobject(vm))
1342    }
1343}
1344
1345impl AsMapping for PyStr {
1346    fn as_mapping() -> &'static PyMappingMethods {
1347        static AS_MAPPING: Lazy<PyMappingMethods> = Lazy::new(|| PyMappingMethods {
1348            length: atomic_func!(|mapping, _vm| Ok(PyStr::mapping_downcast(mapping).len())),
1349            subscript: atomic_func!(
1350                |mapping, needle, vm| PyStr::mapping_downcast(mapping)._getitem(needle, vm)
1351            ),
1352            ..PyMappingMethods::NOT_IMPLEMENTED
1353        });
1354        &AS_MAPPING
1355    }
1356}
1357
1358impl AsNumber for PyStr {
1359    fn as_number() -> &'static PyNumberMethods {
1360        static AS_NUMBER: PyNumberMethods = PyNumberMethods {
1361            remainder: Some(|a, b, vm| {
1362                if let Some(a) = a.downcast_ref::<PyStr>() {
1363                    a.modulo(b.to_owned(), vm).to_pyresult(vm)
1364                } else {
1365                    Ok(vm.ctx.not_implemented())
1366                }
1367            }),
1368            ..PyNumberMethods::NOT_IMPLEMENTED
1369        };
1370        &AS_NUMBER
1371    }
1372}
1373
1374impl AsSequence for PyStr {
1375    fn as_sequence() -> &'static PySequenceMethods {
1376        static AS_SEQUENCE: Lazy<PySequenceMethods> = Lazy::new(|| PySequenceMethods {
1377            length: atomic_func!(|seq, _vm| Ok(PyStr::sequence_downcast(seq).len())),
1378            concat: atomic_func!(|seq, other, vm| {
1379                let zelf = PyStr::sequence_downcast(seq);
1380                PyStr::add(zelf.to_owned(), other.to_owned(), vm)
1381            }),
1382            repeat: atomic_func!(|seq, n, vm| {
1383                let zelf = PyStr::sequence_downcast(seq);
1384                PyStr::repeat(zelf.to_owned(), n, vm).map(|x| x.into())
1385            }),
1386            item: atomic_func!(|seq, i, vm| {
1387                let zelf = PyStr::sequence_downcast(seq);
1388                zelf.getitem_by_index(vm, i)
1389                    .map(|x| zelf.new_substr(x.to_string()).into_ref(&vm.ctx).into())
1390            }),
1391            contains: atomic_func!(
1392                |seq, needle, vm| PyStr::sequence_downcast(seq)._contains(needle, vm)
1393            ),
1394            ..PySequenceMethods::NOT_IMPLEMENTED
1395        });
1396        &AS_SEQUENCE
1397    }
1398}
1399
1400#[derive(FromArgs)]
1401struct EncodeArgs {
1402    #[pyarg(any, default)]
1403    encoding: Option<PyStrRef>,
1404    #[pyarg(any, default)]
1405    errors: Option<PyStrRef>,
1406}
1407
1408pub(crate) fn encode_string(
1409    s: PyStrRef,
1410    encoding: Option<PyStrRef>,
1411    errors: Option<PyStrRef>,
1412    vm: &VirtualMachine,
1413) -> PyResult<PyBytesRef> {
1414    let encoding = encoding
1415        .as_ref()
1416        .map_or(crate::codecs::DEFAULT_ENCODING, |s| s.as_str());
1417    vm.state.codec_registry.encode_text(s, encoding, errors, vm)
1418}
1419
1420impl PyPayload for PyStr {
1421    fn class(ctx: &Context) -> &'static Py<PyType> {
1422        ctx.types.str_type
1423    }
1424}
1425
1426impl ToPyObject for String {
1427    fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1428        vm.ctx.new_str(self).into()
1429    }
1430}
1431
1432impl ToPyObject for char {
1433    fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1434        vm.ctx.new_str(self.to_string()).into()
1435    }
1436}
1437
1438impl ToPyObject for &str {
1439    fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1440        vm.ctx.new_str(self).into()
1441    }
1442}
1443
1444impl ToPyObject for &String {
1445    fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1446        vm.ctx.new_str(self.clone()).into()
1447    }
1448}
1449
1450impl ToPyObject for &AsciiStr {
1451    fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1452        vm.ctx.new_str(self).into()
1453    }
1454}
1455
1456impl ToPyObject for AsciiString {
1457    fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1458        vm.ctx.new_str(self).into()
1459    }
1460}
1461
1462type SplitArgs = anystr::SplitArgs<PyStrRef>;
1463
1464#[derive(FromArgs)]
1465pub struct FindArgs {
1466    #[pyarg(positional)]
1467    sub: PyStrRef,
1468    #[pyarg(positional, default)]
1469    start: Option<PyIntRef>,
1470    #[pyarg(positional, default)]
1471    end: Option<PyIntRef>,
1472}
1473
1474impl FindArgs {
1475    fn get_value(self, len: usize) -> (PyStrRef, std::ops::Range<usize>) {
1476        let range = adjust_indices(self.start, self.end, len);
1477        (self.sub, range)
1478    }
1479}
1480
1481pub fn init(ctx: &Context) {
1482    PyStr::extend_class(ctx, ctx.types.str_type);
1483
1484    PyStrIterator::extend_class(ctx, ctx.types.str_iterator_type);
1485}
1486
1487impl SliceableSequenceOp for PyStr {
1488    type Item = char;
1489    type Sliced = String;
1490
1491    fn do_get(&self, index: usize) -> Self::Item {
1492        if self.is_ascii() {
1493            self.bytes[index] as char
1494        } else {
1495            self.as_str().chars().nth(index).unwrap()
1496        }
1497    }
1498
1499    fn do_slice(&self, range: Range<usize>) -> Self::Sliced {
1500        let value = self.as_str();
1501        if self.is_ascii() {
1502            value[range].to_owned()
1503        } else {
1504            rustpython_common::str::get_chars(value, range).to_owned()
1505        }
1506    }
1507
1508    fn do_slice_reverse(&self, range: Range<usize>) -> Self::Sliced {
1509        if self.is_ascii() {
1510            // this is an ascii string
1511            let mut v = self.bytes[range].to_vec();
1512            v.reverse();
1513            unsafe {
1514                // SAFETY: an ascii string is always utf8
1515                String::from_utf8_unchecked(v)
1516            }
1517        } else {
1518            let mut s = String::with_capacity(self.bytes.len());
1519            s.extend(
1520                self.as_str()
1521                    .chars()
1522                    .rev()
1523                    .skip(self.char_len() - range.end)
1524                    .take(range.end - range.start),
1525            );
1526            s
1527        }
1528    }
1529
1530    fn do_stepped_slice(&self, range: Range<usize>, step: usize) -> Self::Sliced {
1531        if self.is_ascii() {
1532            let v = self.bytes[range].iter().copied().step_by(step).collect();
1533            unsafe {
1534                // SAFETY: Any subset of ascii string is a valid utf8 string
1535                String::from_utf8_unchecked(v)
1536            }
1537        } else {
1538            let mut s = String::with_capacity(2 * ((range.len() / step) + 1));
1539            s.extend(
1540                self.as_str()
1541                    .chars()
1542                    .skip(range.start)
1543                    .take(range.end - range.start)
1544                    .step_by(step),
1545            );
1546            s
1547        }
1548    }
1549
1550    fn do_stepped_slice_reverse(&self, range: Range<usize>, step: usize) -> Self::Sliced {
1551        if self.is_ascii() {
1552            // this is an ascii string
1553            let v: Vec<u8> = self.bytes[range]
1554                .iter()
1555                .rev()
1556                .copied()
1557                .step_by(step)
1558                .collect();
1559            // TODO: from_utf8_unchecked?
1560            String::from_utf8(v).unwrap()
1561        } else {
1562            // not ascii, so the codepoints have to be at least 2 bytes each
1563            let mut s = String::with_capacity(2 * ((range.len() / step) + 1));
1564            s.extend(
1565                self.as_str()
1566                    .chars()
1567                    .rev()
1568                    .skip(self.char_len() - range.end)
1569                    .take(range.end - range.start)
1570                    .step_by(step),
1571            );
1572            s
1573        }
1574    }
1575
1576    fn empty() -> Self::Sliced {
1577        String::new()
1578    }
1579
1580    fn len(&self) -> usize {
1581        self.char_len()
1582    }
1583}
1584
1585impl AsRef<str> for PyRefExact<PyStr> {
1586    fn as_ref(&self) -> &str {
1587        self.as_str()
1588    }
1589}
1590
1591impl AsRef<str> for PyExact<PyStr> {
1592    fn as_ref(&self) -> &str {
1593        self.as_str()
1594    }
1595}
1596
1597#[cfg(test)]
1598mod tests {
1599    use super::*;
1600    use crate::Interpreter;
1601
1602    #[test]
1603    fn str_title() {
1604        let tests = vec![
1605            (" Hello ", " hello "),
1606            ("Hello ", "hello "),
1607            ("Hello ", "Hello "),
1608            ("Format This As Title String", "fOrMaT thIs aS titLe String"),
1609            ("Format,This-As*Title;String", "fOrMaT,thIs-aS*titLe;String"),
1610            ("Getint", "getInt"),
1611            ("Greek Ωppercases ...", "greek ωppercases ..."),
1612            ("Greek ῼitlecases ...", "greek ῳitlecases ..."),
1613        ];
1614        for (title, input) in tests {
1615            assert_eq!(PyStr::from(input).title().as_str(), title);
1616        }
1617    }
1618
1619    #[test]
1620    fn str_istitle() {
1621        let pos = vec![
1622            "A",
1623            "A Titlecased Line",
1624            "A\nTitlecased Line",
1625            "A Titlecased, Line",
1626            "Greek Ωppercases ...",
1627            "Greek ῼitlecases ...",
1628        ];
1629
1630        for s in pos {
1631            assert!(PyStr::from(s).istitle());
1632        }
1633
1634        let neg = vec![
1635            "",
1636            "a",
1637            "\n",
1638            "Not a capitalized String",
1639            "Not\ta Titlecase String",
1640            "Not--a Titlecase String",
1641            "NOT",
1642        ];
1643        for s in neg {
1644            assert!(!PyStr::from(s).istitle());
1645        }
1646    }
1647
1648    #[test]
1649    fn str_maketrans_and_translate() {
1650        Interpreter::without_stdlib(Default::default()).enter(|vm| {
1651            let table = vm.ctx.new_dict();
1652            table
1653                .set_item("a", vm.ctx.new_str("🎅").into(), vm)
1654                .unwrap();
1655            table.set_item("b", vm.ctx.none(), vm).unwrap();
1656            table
1657                .set_item("c", vm.ctx.new_str(ascii!("xda")).into(), vm)
1658                .unwrap();
1659            let translated =
1660                PyStr::maketrans(table.into(), OptionalArg::Missing, OptionalArg::Missing, vm)
1661                    .unwrap();
1662            let text = PyStr::from("abc");
1663            let translated = text.translate(translated, vm).unwrap();
1664            assert_eq!(translated, "🎅xda".to_owned());
1665            let translated = text.translate(vm.ctx.new_int(3).into(), vm);
1666            assert_eq!("TypeError", &*translated.unwrap_err().class().name(),);
1667        })
1668    }
1669}
1670
1671impl AnyStrWrapper for PyStrRef {
1672    type Str = str;
1673    fn as_ref(&self) -> &str {
1674        self.as_str()
1675    }
1676}
1677
1678impl AnyStrContainer<str> for String {
1679    fn new() -> Self {
1680        String::new()
1681    }
1682
1683    fn with_capacity(capacity: usize) -> Self {
1684        String::with_capacity(capacity)
1685    }
1686
1687    fn push_str(&mut self, other: &str) {
1688        String::push_str(self, other)
1689    }
1690}
1691
1692impl AnyStr for str {
1693    type Char = char;
1694    type Container = String;
1695
1696    fn element_bytes_len(c: char) -> usize {
1697        c.len_utf8()
1698    }
1699
1700    fn to_container(&self) -> Self::Container {
1701        self.to_owned()
1702    }
1703
1704    fn as_bytes(&self) -> &[u8] {
1705        self.as_bytes()
1706    }
1707
1708    fn as_utf8_str(&self) -> Result<&str, std::str::Utf8Error> {
1709        Ok(self)
1710    }
1711
1712    fn chars(&self) -> impl Iterator<Item = char> {
1713        str::chars(self)
1714    }
1715
1716    fn elements(&self) -> impl Iterator<Item = char> {
1717        str::chars(self)
1718    }
1719
1720    fn get_bytes(&self, range: std::ops::Range<usize>) -> &Self {
1721        &self[range]
1722    }
1723
1724    fn get_chars(&self, range: std::ops::Range<usize>) -> &Self {
1725        rustpython_common::str::get_chars(self, range)
1726    }
1727
1728    fn is_empty(&self) -> bool {
1729        Self::is_empty(self)
1730    }
1731
1732    fn bytes_len(&self) -> usize {
1733        Self::len(self)
1734    }
1735
1736    fn py_split_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
1737    where
1738        F: Fn(&Self) -> PyObjectRef,
1739    {
1740        // CPython split_whitespace
1741        let mut splits = Vec::new();
1742        let mut last_offset = 0;
1743        let mut count = maxsplit;
1744        for (offset, _) in self.match_indices(|c: char| c.is_ascii_whitespace() || c == '\x0b') {
1745            if last_offset == offset {
1746                last_offset += 1;
1747                continue;
1748            }
1749            if count == 0 {
1750                break;
1751            }
1752            splits.push(convert(&self[last_offset..offset]));
1753            last_offset = offset + 1;
1754            count -= 1;
1755        }
1756        if last_offset != self.len() {
1757            splits.push(convert(&self[last_offset..]));
1758        }
1759        splits
1760    }
1761
1762    fn py_rsplit_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
1763    where
1764        F: Fn(&Self) -> PyObjectRef,
1765    {
1766        // CPython rsplit_whitespace
1767        let mut splits = Vec::new();
1768        let mut last_offset = self.len();
1769        let mut count = maxsplit;
1770        for (offset, _) in self.rmatch_indices(|c: char| c.is_ascii_whitespace() || c == '\x0b') {
1771            if last_offset == offset + 1 {
1772                last_offset -= 1;
1773                continue;
1774            }
1775            if count == 0 {
1776                break;
1777            }
1778            splits.push(convert(&self[offset + 1..last_offset]));
1779            last_offset = offset;
1780            count -= 1;
1781        }
1782        if last_offset != 0 {
1783            splits.push(convert(&self[..last_offset]));
1784        }
1785        splits
1786    }
1787}
1788
1789/// The unique reference of interned PyStr
1790/// Always intended to be used as a static reference
1791pub type PyStrInterned = PyInterned<PyStr>;
1792
1793impl PyStrInterned {
1794    #[inline]
1795    pub fn to_exact(&'static self) -> PyRefExact<PyStr> {
1796        unsafe { PyRefExact::new_unchecked(self.to_owned()) }
1797    }
1798}
1799
1800impl std::fmt::Display for PyStrInterned {
1801    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1802        std::fmt::Display::fmt(self.as_str(), f)
1803    }
1804}
1805
1806impl AsRef<str> for PyStrInterned {
1807    #[inline(always)]
1808    fn as_ref(&self) -> &str {
1809        self.as_str()
1810    }
1811}
Morty Proxy This is a proxified and sanitized view of the page, visit original site.