1use super::{
2 int::{PyInt, PyIntRef},
3 iter::IterStatus::{self, Exhausted},
4 PositionIterInternal, PyBytesRef, PyDict, PyTupleRef, PyType, PyTypeRef,
5};
6use crate::{
7 anystr::{self, adjust_indices, AnyStr, AnyStrContainer, AnyStrWrapper},
8 atomic_func,
9 class::PyClassImpl,
10 common::str::{BorrowedStr, PyStrKind, PyStrKindData},
11 convert::{IntoPyException, ToPyException, ToPyObject, ToPyResult},
12 format::{format, format_map},
13 function::{ArgIterable, ArgSize, FuncArgs, OptionalArg, OptionalOption, PyComparisonValue},
14 intern::PyInterned,
15 object::{Traverse, TraverseFn},
16 protocol::{PyIterReturn, PyMappingMethods, PyNumberMethods, PySequenceMethods},
17 sequence::SequenceExt,
18 sliceable::{SequenceIndex, SliceableSequenceOp},
19 types::{
20 AsMapping, AsNumber, AsSequence, Comparable, Constructor, Hashable, IterNext, Iterable,
21 PyComparisonOp, Representable, SelfIter, Unconstructible,
22 },
23 AsObject, Context, Py, PyExact, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, PyResult,
24 TryFromBorrowedObject, VirtualMachine,
25};
26use ascii::{AsciiStr, AsciiString};
27use bstr::ByteSlice;
28use itertools::Itertools;
29use num_traits::ToPrimitive;
30use once_cell::sync::Lazy;
31use rustpython_common::{
32 ascii,
33 atomic::{self, PyAtomic, Radium},
34 hash,
35 lock::PyMutex,
36};
37use rustpython_format::{FormatSpec, FormatString, FromTemplate};
38use std::{char, fmt, ops::Range, string::ToString};
39use unic_ucd_bidi::BidiClass;
40use unic_ucd_category::GeneralCategory;
41use unic_ucd_ident::{is_xid_continue, is_xid_start};
42use unicode_casing::CharExt;
43
44impl<'a> TryFromBorrowedObject<'a> for String {
45 fn try_from_borrowed_object(vm: &VirtualMachine, obj: &'a PyObject) -> PyResult<Self> {
46 obj.try_value_with(|pystr: &PyStr| Ok(pystr.as_str().to_owned()), vm)
47 }
48}
49
50impl<'a> TryFromBorrowedObject<'a> for &'a str {
51 fn try_from_borrowed_object(vm: &VirtualMachine, obj: &'a PyObject) -> PyResult<Self> {
52 let pystr: &Py<PyStr> = TryFromBorrowedObject::try_from_borrowed_object(vm, obj)?;
53 Ok(pystr.as_str())
54 }
55}
56
57#[pyclass(module = false, name = "str")]
58pub struct PyStr {
59 bytes: Box<[u8]>,
60 kind: PyStrKindData,
61 hash: PyAtomic<hash::PyHash>,
62}
63
64impl fmt::Debug for PyStr {
65 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
66 f.debug_struct("PyStr")
67 .field("value", &self.as_str())
68 .field("kind", &self.kind)
69 .field("hash", &self.hash)
70 .finish()
71 }
72}
73
74impl AsRef<str> for PyStr {
75 fn as_ref(&self) -> &str {
76 self.as_str()
77 }
78}
79
80impl AsRef<str> for Py<PyStr> {
81 fn as_ref(&self) -> &str {
82 self.as_str()
83 }
84}
85
86impl AsRef<str> for PyStrRef {
87 fn as_ref(&self) -> &str {
88 self.as_str()
89 }
90}
91
92impl<'a> From<&'a AsciiStr> for PyStr {
93 fn from(s: &'a AsciiStr) -> Self {
94 s.to_owned().into()
95 }
96}
97
98impl From<AsciiString> for PyStr {
99 fn from(s: AsciiString) -> Self {
100 unsafe { Self::new_ascii_unchecked(s.into()) }
101 }
102}
103
104impl<'a> From<&'a str> for PyStr {
105 fn from(s: &'a str) -> Self {
106 s.to_owned().into()
107 }
108}
109
110impl From<String> for PyStr {
111 fn from(s: String) -> Self {
112 s.into_boxed_str().into()
113 }
114}
115
116impl<'a> From<std::borrow::Cow<'a, str>> for PyStr {
117 fn from(s: std::borrow::Cow<'a, str>) -> Self {
118 s.into_owned().into()
119 }
120}
121
122impl From<Box<str>> for PyStr {
123 #[inline]
124 fn from(value: Box<str>) -> Self {
125 let is_ascii = value.is_ascii();
128 let bytes = value.into_boxed_bytes();
129 let kind = if is_ascii {
130 PyStrKind::Ascii
131 } else {
132 PyStrKind::Utf8
133 }
134 .new_data();
135 Self {
136 bytes,
137 kind,
138 hash: Radium::new(hash::SENTINEL),
139 }
140 }
141}
142
143pub type PyStrRef = PyRef<PyStr>;
144
145impl fmt::Display for PyStr {
146 #[inline]
147 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148 fmt::Display::fmt(self.as_str(), f)
149 }
150}
151
152pub trait AsPyStr<'a>
153where
154 Self: 'a,
155{
156 #[allow(clippy::wrong_self_convention)] fn as_pystr(self, ctx: &Context) -> &'a Py<PyStr>;
158}
159
160impl<'a> AsPyStr<'a> for &'a Py<PyStr> {
161 #[inline]
162 fn as_pystr(self, _ctx: &Context) -> &'a Py<PyStr> {
163 self
164 }
165}
166
167impl<'a> AsPyStr<'a> for &'a PyStrRef {
168 #[inline]
169 fn as_pystr(self, _ctx: &Context) -> &'a Py<PyStr> {
170 self
171 }
172}
173
174impl AsPyStr<'static> for &'static str {
175 #[inline]
176 fn as_pystr(self, ctx: &Context) -> &'static Py<PyStr> {
177 ctx.intern_str(self)
178 }
179}
180
181impl<'a> AsPyStr<'a> for &'a PyStrInterned {
182 #[inline]
183 fn as_pystr(self, _ctx: &Context) -> &'a Py<PyStr> {
184 self
185 }
186}
187
188#[pyclass(module = false, name = "str_iterator", traverse = "manual")]
189#[derive(Debug)]
190pub struct PyStrIterator {
191 internal: PyMutex<(PositionIterInternal<PyStrRef>, usize)>,
192}
193
194unsafe impl Traverse for PyStrIterator {
195 fn traverse(&self, tracer: &mut TraverseFn) {
196 self.internal.lock().0.traverse(tracer);
198 }
199}
200
201impl PyPayload for PyStrIterator {
202 fn class(ctx: &Context) -> &'static Py<PyType> {
203 ctx.types.str_iterator_type
204 }
205}
206
207#[pyclass(with(Unconstructible, IterNext, Iterable))]
208impl PyStrIterator {
209 #[pymethod(magic)]
210 fn length_hint(&self) -> usize {
211 self.internal.lock().0.length_hint(|obj| obj.char_len())
212 }
213
214 #[pymethod(magic)]
215 fn setstate(&self, state: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
216 let mut internal = self.internal.lock();
217 internal.1 = usize::MAX;
218 internal
219 .0
220 .set_state(state, |obj, pos| pos.min(obj.char_len()), vm)
221 }
222
223 #[pymethod(magic)]
224 fn reduce(&self, vm: &VirtualMachine) -> PyTupleRef {
225 self.internal
226 .lock()
227 .0
228 .builtins_iter_reduce(|x| x.clone().into(), vm)
229 }
230}
231impl Unconstructible for PyStrIterator {}
232
233impl SelfIter for PyStrIterator {}
234impl IterNext for PyStrIterator {
235 fn next(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<PyIterReturn> {
236 let mut internal = zelf.internal.lock();
237
238 if let IterStatus::Active(s) = &internal.0.status {
239 let value = s.as_str();
240
241 if internal.1 == usize::MAX {
242 if let Some((offset, ch)) = value.char_indices().nth(internal.0.position) {
243 internal.0.position += 1;
244 internal.1 = offset + ch.len_utf8();
245 return Ok(PyIterReturn::Return(ch.to_pyobject(vm)));
246 }
247 } else if let Some(value) = value.get(internal.1..) {
248 if let Some(ch) = value.chars().next() {
249 internal.0.position += 1;
250 internal.1 += ch.len_utf8();
251 return Ok(PyIterReturn::Return(ch.to_pyobject(vm)));
252 }
253 }
254 internal.0.status = Exhausted;
255 }
256 Ok(PyIterReturn::StopIteration(None))
257 }
258}
259
260#[derive(FromArgs)]
261pub struct StrArgs {
262 #[pyarg(any, optional)]
263 object: OptionalArg<PyObjectRef>,
264 #[pyarg(any, optional)]
265 encoding: OptionalArg<PyStrRef>,
266 #[pyarg(any, optional)]
267 errors: OptionalArg<PyStrRef>,
268}
269
270impl Constructor for PyStr {
271 type Args = StrArgs;
272
273 fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult {
274 let string: PyStrRef = match args.object {
275 OptionalArg::Present(input) => {
276 if let OptionalArg::Present(enc) = args.encoding {
277 vm.state.codec_registry.decode_text(
278 input,
279 enc.as_str(),
280 args.errors.into_option(),
281 vm,
282 )?
283 } else {
284 input.str(vm)?
285 }
286 }
287 OptionalArg::Missing => {
288 PyStr::from(String::new()).into_ref_with_type(vm, cls.clone())?
289 }
290 };
291 if string.class().is(&cls) {
292 Ok(string.into())
293 } else {
294 PyStr::from(string.as_str())
295 .into_ref_with_type(vm, cls)
296 .map(Into::into)
297 }
298 }
299}
300
301impl PyStr {
302 pub(crate) unsafe fn new_str_unchecked(bytes: Vec<u8>, kind: PyStrKind) -> Self {
304 let s = Self {
305 bytes: bytes.into_boxed_slice(),
306 kind: kind.new_data(),
307 hash: Radium::new(hash::SENTINEL),
308 };
309 debug_assert!(matches!(s.kind, PyStrKindData::Ascii) || !s.as_str().is_ascii());
310 s
311 }
312
313 pub unsafe fn new_ascii_unchecked(bytes: Vec<u8>) -> Self {
316 Self::new_str_unchecked(bytes, PyStrKind::Ascii)
317 }
318
319 pub fn new_ref(zelf: impl Into<Self>, ctx: &Context) -> PyRef<Self> {
320 let zelf = zelf.into();
321 PyRef::new_ref(zelf, ctx.types.str_type.to_owned(), None)
322 }
323
324 fn new_substr(&self, s: String) -> Self {
325 let kind = if self.kind.kind() == PyStrKind::Ascii || s.is_ascii() {
326 PyStrKind::Ascii
327 } else {
328 PyStrKind::Utf8
329 };
330 unsafe {
331 Self::new_str_unchecked(s.into_bytes(), kind)
333 }
334 }
335
336 #[inline]
337 pub fn as_str(&self) -> &str {
338 unsafe {
339 std::str::from_utf8_unchecked(&self.bytes)
341 }
342 }
343
344 fn char_all<F>(&self, test: F) -> bool
345 where
346 F: Fn(char) -> bool,
347 {
348 match self.kind.kind() {
349 PyStrKind::Ascii => self.bytes.iter().all(|&x| test(char::from(x))),
350 PyStrKind::Utf8 => self.as_str().chars().all(test),
351 }
352 }
353
354 fn borrow(&self) -> &BorrowedStr {
355 unsafe { std::mem::transmute(self) }
356 }
357
358 fn repeat(zelf: PyRef<Self>, value: isize, vm: &VirtualMachine) -> PyResult<PyRef<Self>> {
359 if value == 0 && zelf.class().is(vm.ctx.types.str_type) {
360 return Ok(vm.ctx.empty_str.to_owned());
363 }
364 if (value == 1 || zelf.is_empty()) && zelf.class().is(vm.ctx.types.str_type) {
365 return Ok(zelf);
370 }
371 zelf.as_str()
372 .as_bytes()
373 .mul(vm, value)
374 .map(|x| Self::from(unsafe { String::from_utf8_unchecked(x) }).into_ref(&vm.ctx))
375 }
376}
377
378#[pyclass(
379 flags(BASETYPE),
380 with(
381 PyRef,
382 AsMapping,
383 AsNumber,
384 AsSequence,
385 Representable,
386 Hashable,
387 Comparable,
388 Iterable,
389 Constructor
390 )
391)]
392impl PyStr {
393 #[pymethod(magic)]
394 fn add(zelf: PyRef<Self>, other: PyObjectRef, vm: &VirtualMachine) -> PyResult {
395 if let Some(other) = other.payload::<PyStr>() {
396 let bytes = zelf.as_str().py_add(other.as_ref());
397 Ok(unsafe {
398 let kind = zelf.kind.kind() | other.kind.kind();
400 Self::new_str_unchecked(bytes.into_bytes(), kind)
401 }
402 .to_pyobject(vm))
403 } else if let Some(radd) = vm.get_method(other.clone(), identifier!(vm, __radd__)) {
404 radd?.call((zelf,), vm)
406 } else {
407 Err(vm.new_type_error(format!(
408 "can only concatenate str (not \"{}\") to str",
409 other.class().name()
410 )))
411 }
412 }
413
414 fn _contains(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult<bool> {
415 if let Some(needle) = needle.payload::<Self>() {
416 Ok(self.as_str().contains(needle.as_str()))
417 } else {
418 Err(vm.new_type_error(format!(
419 "'in <string>' requires string as left operand, not {}",
420 needle.class().name()
421 )))
422 }
423 }
424
425 #[pymethod(magic)]
426 fn contains(&self, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult<bool> {
427 self._contains(&needle, vm)
428 }
429
430 fn _getitem(&self, needle: &PyObject, vm: &VirtualMachine) -> PyResult {
431 match SequenceIndex::try_from_borrowed_object(vm, needle, "str")? {
432 SequenceIndex::Int(i) => self.getitem_by_index(vm, i).map(|x| x.to_string()),
433 SequenceIndex::Slice(slice) => self.getitem_by_slice(vm, slice),
434 }
435 .map(|x| self.new_substr(x).into_ref(&vm.ctx).into())
436 }
437
438 #[pymethod(magic)]
439 fn getitem(&self, needle: PyObjectRef, vm: &VirtualMachine) -> PyResult {
440 self._getitem(&needle, vm)
441 }
442
443 #[inline]
444 pub(crate) fn hash(&self, vm: &VirtualMachine) -> hash::PyHash {
445 match self.hash.load(atomic::Ordering::Relaxed) {
446 hash::SENTINEL => self._compute_hash(vm),
447 hash => hash,
448 }
449 }
450 #[cold]
451 fn _compute_hash(&self, vm: &VirtualMachine) -> hash::PyHash {
452 let hash_val = vm.state.hash_secret.hash_str(self.as_str());
453 debug_assert_ne!(hash_val, hash::SENTINEL);
454 self.hash.store(hash_val, atomic::Ordering::Relaxed);
456 hash_val
457 }
458
459 #[inline]
460 pub fn byte_len(&self) -> usize {
461 self.bytes.len()
462 }
463 #[inline]
464 pub fn is_empty(&self) -> bool {
465 self.bytes.is_empty()
466 }
467
468 #[pymethod(name = "__len__")]
469 #[inline]
470 pub fn char_len(&self) -> usize {
471 self.borrow().char_len()
472 }
473
474 #[pymethod(name = "isascii")]
475 #[inline(always)]
476 pub fn is_ascii(&self) -> bool {
477 match self.kind {
478 PyStrKindData::Ascii => true,
479 PyStrKindData::Utf8(_) => false,
480 }
481 }
482
483 #[pymethod(magic)]
484 fn sizeof(&self) -> usize {
485 std::mem::size_of::<Self>() + self.byte_len() * std::mem::size_of::<u8>()
486 }
487
488 #[pymethod(name = "__rmul__")]
489 #[pymethod(magic)]
490 fn mul(zelf: PyRef<Self>, value: ArgSize, vm: &VirtualMachine) -> PyResult<PyRef<Self>> {
491 Self::repeat(zelf, value.into(), vm)
492 }
493
494 #[inline]
495 pub(crate) fn repr(&self, vm: &VirtualMachine) -> PyResult<String> {
496 use crate::literal::escape::UnicodeEscape;
497 let escape = UnicodeEscape::new_repr(self.as_str());
498 escape
499 .str_repr()
500 .to_string()
501 .ok_or_else(|| vm.new_overflow_error("string is too long to generate repr".to_owned()))
502 }
503
504 #[pymethod]
505 fn lower(&self) -> String {
506 match self.kind.kind() {
507 PyStrKind::Ascii => self.as_str().to_ascii_lowercase(),
508 PyStrKind::Utf8 => self.as_str().to_lowercase(),
509 }
510 }
511
512 #[pymethod]
514 fn casefold(&self) -> String {
515 caseless::default_case_fold_str(self.as_str())
516 }
517
518 #[pymethod]
519 fn upper(&self) -> String {
520 match self.kind.kind() {
521 PyStrKind::Ascii => self.as_str().to_ascii_uppercase(),
522 PyStrKind::Utf8 => self.as_str().to_uppercase(),
523 }
524 }
525
526 #[pymethod]
527 fn capitalize(&self) -> String {
528 let mut chars = self.as_str().chars();
529 if let Some(first_char) = chars.next() {
530 format!(
531 "{}{}",
532 first_char.to_uppercase(),
533 &chars.as_str().to_lowercase(),
534 )
535 } else {
536 "".to_owned()
537 }
538 }
539
540 #[pymethod]
541 fn split(&self, args: SplitArgs, vm: &VirtualMachine) -> PyResult<Vec<PyObjectRef>> {
542 let elements = match self.kind.kind() {
543 PyStrKind::Ascii => self.as_str().py_split(
544 args,
545 vm,
546 |v, s, vm| {
547 v.as_bytes()
548 .split_str(s)
549 .map(|s| {
550 unsafe { PyStr::new_ascii_unchecked(s.to_owned()) }.to_pyobject(vm)
551 })
552 .collect()
553 },
554 |v, s, n, vm| {
555 v.as_bytes()
556 .splitn_str(n, s)
557 .map(|s| {
558 unsafe { PyStr::new_ascii_unchecked(s.to_owned()) }.to_pyobject(vm)
559 })
560 .collect()
561 },
562 |v, n, vm| {
563 v.as_bytes().py_split_whitespace(n, |s| {
564 unsafe { PyStr::new_ascii_unchecked(s.to_owned()) }.to_pyobject(vm)
565 })
566 },
567 ),
568 PyStrKind::Utf8 => self.as_str().py_split(
569 args,
570 vm,
571 |v, s, vm| v.split(s).map(|s| vm.ctx.new_str(s).into()).collect(),
572 |v, s, n, vm| v.splitn(n, s).map(|s| vm.ctx.new_str(s).into()).collect(),
573 |v, n, vm| v.py_split_whitespace(n, |s| vm.ctx.new_str(s).into()),
574 ),
575 }?;
576 Ok(elements)
577 }
578
579 #[pymethod]
580 fn rsplit(&self, args: SplitArgs, vm: &VirtualMachine) -> PyResult<Vec<PyObjectRef>> {
581 let mut elements = self.as_str().py_split(
582 args,
583 vm,
584 |v, s, vm| v.rsplit(s).map(|s| vm.ctx.new_str(s).into()).collect(),
585 |v, s, n, vm| v.rsplitn(n, s).map(|s| vm.ctx.new_str(s).into()).collect(),
586 |v, n, vm| v.py_rsplit_whitespace(n, |s| vm.ctx.new_str(s).into()),
587 )?;
588 elements.reverse();
591 Ok(elements)
592 }
593
594 #[pymethod]
595 fn strip(&self, chars: OptionalOption<PyStrRef>) -> String {
596 self.as_str()
597 .py_strip(
598 chars,
599 |s, chars| s.trim_matches(|c| chars.contains(c)),
600 |s| s.trim(),
601 )
602 .to_owned()
603 }
604
605 #[pymethod]
606 fn lstrip(
607 zelf: PyRef<Self>,
608 chars: OptionalOption<PyStrRef>,
609 vm: &VirtualMachine,
610 ) -> PyRef<Self> {
611 let s = zelf.as_str();
612 let stripped = s.py_strip(
613 chars,
614 |s, chars| s.trim_start_matches(|c| chars.contains(c)),
615 |s| s.trim_start(),
616 );
617 if s == stripped {
618 zelf
619 } else {
620 vm.ctx.new_str(stripped)
621 }
622 }
623
624 #[pymethod]
625 fn rstrip(
626 zelf: PyRef<Self>,
627 chars: OptionalOption<PyStrRef>,
628 vm: &VirtualMachine,
629 ) -> PyRef<Self> {
630 let s = zelf.as_str();
631 let stripped = s.py_strip(
632 chars,
633 |s, chars| s.trim_end_matches(|c| chars.contains(c)),
634 |s| s.trim_end(),
635 );
636 if s == stripped {
637 zelf
638 } else {
639 vm.ctx.new_str(stripped)
640 }
641 }
642
643 #[pymethod]
644 fn endswith(&self, options: anystr::StartsEndsWithArgs, vm: &VirtualMachine) -> PyResult<bool> {
645 let (affix, substr) =
646 match options.prepare(self.as_str(), self.len(), |s, r| s.get_chars(r)) {
647 Some(x) => x,
648 None => return Ok(false),
649 };
650 substr.py_startsendswith(
651 &affix,
652 "endswith",
653 "str",
654 |s, x: &Py<PyStr>| s.ends_with(x.as_str()),
655 vm,
656 )
657 }
658
659 #[pymethod]
660 fn startswith(
661 &self,
662 options: anystr::StartsEndsWithArgs,
663 vm: &VirtualMachine,
664 ) -> PyResult<bool> {
665 let (affix, substr) =
666 match options.prepare(self.as_str(), self.len(), |s, r| s.get_chars(r)) {
667 Some(x) => x,
668 None => return Ok(false),
669 };
670 substr.py_startsendswith(
671 &affix,
672 "startswith",
673 "str",
674 |s, x: &Py<PyStr>| s.starts_with(x.as_str()),
675 vm,
676 )
677 }
678
679 #[pymethod]
684 fn removeprefix(&self, pref: PyStrRef) -> String {
685 self.as_str()
686 .py_removeprefix(pref.as_str(), pref.byte_len(), |s, p| s.starts_with(p))
687 .to_owned()
688 }
689
690 #[pymethod]
695 fn removesuffix(&self, suffix: PyStrRef) -> String {
696 self.as_str()
697 .py_removesuffix(suffix.as_str(), suffix.byte_len(), |s, p| s.ends_with(p))
698 .to_owned()
699 }
700
701 #[pymethod]
702 fn isalnum(&self) -> bool {
703 !self.bytes.is_empty() && self.char_all(char::is_alphanumeric)
704 }
705
706 #[pymethod]
707 fn isnumeric(&self) -> bool {
708 !self.bytes.is_empty() && self.char_all(char::is_numeric)
709 }
710
711 #[pymethod]
712 fn isdigit(&self) -> bool {
713 let valid_codepoints: [u16; 10] = [
715 0x2070, 0x00B9, 0x00B2, 0x00B3, 0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079,
716 ];
717 let s = self.as_str();
718 !s.is_empty()
719 && s.chars()
720 .filter(|c| !c.is_ascii_digit())
721 .all(|c| valid_codepoints.contains(&(c as u16)))
722 }
723
724 #[pymethod]
725 fn isdecimal(&self) -> bool {
726 !self.bytes.is_empty()
727 && self.char_all(|c| GeneralCategory::of(c) == GeneralCategory::DecimalNumber)
728 }
729
730 #[pymethod(name = "__mod__")]
731 fn modulo(&self, values: PyObjectRef, vm: &VirtualMachine) -> PyResult<String> {
732 let formatted = self.as_str().py_cformat(values, vm)?;
733 Ok(formatted)
734 }
735
736 #[pymethod(magic)]
737 fn rmod(&self, _values: PyObjectRef, vm: &VirtualMachine) -> PyObjectRef {
738 vm.ctx.not_implemented()
739 }
740
741 #[pymethod]
742 fn format(&self, args: FuncArgs, vm: &VirtualMachine) -> PyResult<String> {
743 let format_str = FormatString::from_str(self.as_str()).map_err(|e| e.to_pyexception(vm))?;
744 format(&format_str, &args, vm)
745 }
746
747 #[pymethod]
752 fn format_map(&self, mapping: PyObjectRef, vm: &VirtualMachine) -> PyResult<String> {
753 let format_string =
754 FormatString::from_str(self.as_str()).map_err(|err| err.to_pyexception(vm))?;
755 format_map(&format_string, &mapping, vm)
756 }
757
758 #[pymethod(name = "__format__")]
759 fn __format__(zelf: PyRef<Self>, spec: PyStrRef, vm: &VirtualMachine) -> PyResult<PyStrRef> {
760 let spec = spec.as_str();
761 if spec.is_empty() {
762 return if zelf.class().is(vm.ctx.types.str_type) {
763 Ok(zelf)
764 } else {
765 zelf.as_object().str(vm)
766 };
767 }
768
769 let s = FormatSpec::parse(spec)
770 .and_then(|format_spec| format_spec.format_string(zelf.borrow()))
771 .map_err(|err| err.into_pyexception(vm))?;
772 Ok(vm.ctx.new_str(s))
773 }
774
775 #[pymethod]
778 fn title(&self) -> String {
779 let mut title = String::with_capacity(self.bytes.len());
780 let mut previous_is_cased = false;
781 for c in self.as_str().chars() {
782 if c.is_lowercase() {
783 if !previous_is_cased {
784 title.extend(c.to_titlecase());
785 } else {
786 title.push(c);
787 }
788 previous_is_cased = true;
789 } else if c.is_uppercase() || c.is_titlecase() {
790 if previous_is_cased {
791 title.extend(c.to_lowercase());
792 } else {
793 title.push(c);
794 }
795 previous_is_cased = true;
796 } else {
797 previous_is_cased = false;
798 title.push(c);
799 }
800 }
801 title
802 }
803
804 #[pymethod]
805 fn swapcase(&self) -> String {
806 let mut swapped_str = String::with_capacity(self.bytes.len());
807 for c in self.as_str().chars() {
808 if c.is_lowercase() {
810 swapped_str.push(c.to_ascii_uppercase());
811 } else if c.is_uppercase() {
812 swapped_str.push(c.to_ascii_lowercase());
813 } else {
814 swapped_str.push(c);
815 }
816 }
817 swapped_str
818 }
819
820 #[pymethod]
821 fn isalpha(&self) -> bool {
822 !self.bytes.is_empty() && self.char_all(char::is_alphabetic)
823 }
824
825 #[pymethod]
826 fn replace(&self, old: PyStrRef, new: PyStrRef, count: OptionalArg<isize>) -> String {
827 let s = self.as_str();
828 match count {
829 OptionalArg::Present(max_count) if max_count >= 0 => {
830 if max_count == 0 || (s.is_empty() && !old.is_empty()) {
831 s.to_owned()
833 } else if s.is_empty() && old.is_empty() {
834 new.as_str().to_owned()
835 } else {
836 s.replacen(old.as_str(), new.as_str(), max_count as usize)
837 }
838 }
839 _ => s.replace(old.as_str(), new.as_str()),
840 }
841 }
842
843 #[pymethod]
859 fn isprintable(&self) -> bool {
860 self.char_all(|c| c == '\u{0020}' || rustpython_literal::char::is_printable(c))
861 }
862
863 #[pymethod]
864 fn isspace(&self) -> bool {
865 use unic_ucd_bidi::bidi_class::abbr_names::*;
866 !self.bytes.is_empty()
867 && self.char_all(|c| {
868 GeneralCategory::of(c) == GeneralCategory::SpaceSeparator
869 || matches!(BidiClass::of(c), WS | B | S)
870 })
871 }
872
873 #[pymethod]
875 fn islower(&self) -> bool {
876 match self.kind.kind() {
877 PyStrKind::Ascii => self.bytes.py_iscase(char::is_lowercase, char::is_uppercase),
878 PyStrKind::Utf8 => self
879 .as_str()
880 .py_iscase(char::is_lowercase, char::is_uppercase),
881 }
882 }
883
884 #[pymethod]
886 fn isupper(&self) -> bool {
887 match self.kind.kind() {
888 PyStrKind::Ascii => self.bytes.py_iscase(char::is_uppercase, char::is_lowercase),
889 PyStrKind::Utf8 => self
890 .as_str()
891 .py_iscase(char::is_uppercase, char::is_lowercase),
892 }
893 }
894
895 #[pymethod]
896 fn splitlines(&self, args: anystr::SplitLinesArgs, vm: &VirtualMachine) -> Vec<PyObjectRef> {
897 let into_wrapper = |s: &str| self.new_substr(s.to_owned()).to_pyobject(vm);
898 let mut elements = Vec::new();
899 let mut last_i = 0;
900 let self_str = self.as_str();
901 let mut enumerated = self_str.char_indices().peekable();
902 while let Some((i, ch)) = enumerated.next() {
903 let end_len = match ch {
904 '\n' => 1,
905 '\r' => {
906 let is_rn = enumerated.peek().map_or(false, |(_, ch)| *ch == '\n');
907 if is_rn {
908 let _ = enumerated.next();
909 2
910 } else {
911 1
912 }
913 }
914 '\x0b' | '\x0c' | '\x1c' | '\x1d' | '\x1e' | '\u{0085}' | '\u{2028}'
915 | '\u{2029}' => ch.len_utf8(),
916 _ => {
917 continue;
918 }
919 };
920 let range = if args.keepends {
921 last_i..i + end_len
922 } else {
923 last_i..i
924 };
925 last_i = i + end_len;
926 elements.push(into_wrapper(&self_str[range]));
927 }
928 if last_i != self_str.len() {
929 elements.push(into_wrapper(&self_str[last_i..]));
930 }
931 elements
932 }
933
934 #[pymethod]
935 fn join(
936 zelf: PyRef<Self>,
937 iterable: ArgIterable<PyStrRef>,
938 vm: &VirtualMachine,
939 ) -> PyResult<PyStrRef> {
940 let iter = iterable.iter(vm)?;
941 let joined = match iter.exactly_one() {
942 Ok(first) => {
943 let first = first?;
944 if first.as_object().class().is(vm.ctx.types.str_type) {
945 return Ok(first);
946 } else {
947 first.as_str().to_owned()
948 }
949 }
950 Err(iter) => zelf.as_str().py_join(iter)?,
951 };
952 Ok(vm.ctx.new_str(joined))
953 }
954
955 #[inline]
957 fn _to_char_idx(r: &str, byte_idx: usize) -> usize {
958 r[..byte_idx].chars().count()
959 }
960
961 #[inline]
962 fn _find<F>(&self, args: FindArgs, find: F) -> Option<usize>
963 where
964 F: Fn(&str, &str) -> Option<usize>,
965 {
966 let (sub, range) = args.get_value(self.len());
967 self.as_str().py_find(sub.as_str(), range, find)
968 }
969
970 #[pymethod]
971 fn find(&self, args: FindArgs) -> isize {
972 self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
973 .map_or(-1, |v| v as isize)
974 }
975
976 #[pymethod]
977 fn rfind(&self, args: FindArgs) -> isize {
978 self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
979 .map_or(-1, |v| v as isize)
980 }
981
982 #[pymethod]
983 fn index(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
984 self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
985 .ok_or_else(|| vm.new_value_error("substring not found".to_owned()))
986 }
987
988 #[pymethod]
989 fn rindex(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
990 self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
991 .ok_or_else(|| vm.new_value_error("substring not found".to_owned()))
992 }
993
994 #[pymethod]
995 fn partition(&self, sep: PyStrRef, vm: &VirtualMachine) -> PyResult {
996 let (front, has_mid, back) = self.as_str().py_partition(
997 sep.as_str(),
998 || self.as_str().splitn(2, sep.as_str()),
999 vm,
1000 )?;
1001 let partition = (
1002 self.new_substr(front),
1003 if has_mid {
1004 sep
1005 } else {
1006 vm.ctx.new_str(ascii!(""))
1007 },
1008 self.new_substr(back),
1009 );
1010 Ok(partition.to_pyobject(vm))
1011 }
1012
1013 #[pymethod]
1014 fn rpartition(&self, sep: PyStrRef, vm: &VirtualMachine) -> PyResult {
1015 let (back, has_mid, front) = self.as_str().py_partition(
1016 sep.as_str(),
1017 || self.as_str().rsplitn(2, sep.as_str()),
1018 vm,
1019 )?;
1020 Ok((
1021 self.new_substr(front),
1022 if has_mid {
1023 sep
1024 } else {
1025 vm.ctx.new_str(ascii!(""))
1026 },
1027 self.new_substr(back),
1028 )
1029 .to_pyobject(vm))
1030 }
1031
1032 #[pymethod]
1035 fn istitle(&self) -> bool {
1036 if self.bytes.is_empty() {
1037 return false;
1038 }
1039
1040 let mut cased = false;
1041 let mut previous_is_cased = false;
1042 for c in self.as_str().chars() {
1043 if c.is_uppercase() || c.is_titlecase() {
1044 if previous_is_cased {
1045 return false;
1046 }
1047 previous_is_cased = true;
1048 cased = true;
1049 } else if c.is_lowercase() {
1050 if !previous_is_cased {
1051 return false;
1052 }
1053 previous_is_cased = true;
1054 cased = true;
1055 } else {
1056 previous_is_cased = false;
1057 }
1058 }
1059 cased
1060 }
1061
1062 #[pymethod]
1063 fn count(&self, args: FindArgs) -> usize {
1064 let (needle, range) = args.get_value(self.len());
1065 self.as_str()
1066 .py_count(needle.as_str(), range, |h, n| h.matches(n).count())
1067 }
1068
1069 #[pymethod]
1070 fn zfill(&self, width: isize) -> String {
1071 unsafe {
1072 String::from_utf8_unchecked(self.as_str().py_zfill(width))
1074 }
1075 }
1076
1077 #[inline]
1078 fn _pad(
1079 &self,
1080 width: isize,
1081 fillchar: OptionalArg<PyStrRef>,
1082 pad: fn(&str, usize, char, usize) -> String,
1083 vm: &VirtualMachine,
1084 ) -> PyResult<String> {
1085 let fillchar = fillchar.map_or(Ok(' '), |ref s| {
1086 s.as_str().chars().exactly_one().map_err(|_| {
1087 vm.new_type_error(
1088 "The fill character must be exactly one character long".to_owned(),
1089 )
1090 })
1091 })?;
1092 Ok(if self.len() as isize >= width {
1093 String::from(self.as_str())
1094 } else {
1095 pad(self.as_str(), width as usize, fillchar, self.len())
1096 })
1097 }
1098
1099 #[pymethod]
1100 fn center(
1101 &self,
1102 width: isize,
1103 fillchar: OptionalArg<PyStrRef>,
1104 vm: &VirtualMachine,
1105 ) -> PyResult<String> {
1106 self._pad(width, fillchar, AnyStr::py_center, vm)
1107 }
1108
1109 #[pymethod]
1110 fn ljust(
1111 &self,
1112 width: isize,
1113 fillchar: OptionalArg<PyStrRef>,
1114 vm: &VirtualMachine,
1115 ) -> PyResult<String> {
1116 self._pad(width, fillchar, AnyStr::py_ljust, vm)
1117 }
1118
1119 #[pymethod]
1120 fn rjust(
1121 &self,
1122 width: isize,
1123 fillchar: OptionalArg<PyStrRef>,
1124 vm: &VirtualMachine,
1125 ) -> PyResult<String> {
1126 self._pad(width, fillchar, AnyStr::py_rjust, vm)
1127 }
1128
1129 #[pymethod]
1130 fn expandtabs(&self, args: anystr::ExpandTabsArgs) -> String {
1131 let tab_stop = args.tabsize();
1132 let mut expanded_str = String::with_capacity(self.byte_len());
1133 let mut tab_size = tab_stop;
1134 let mut col_count = 0usize;
1135 for ch in self.as_str().chars() {
1136 match ch {
1137 '\t' => {
1138 let num_spaces = tab_size - col_count;
1139 col_count += num_spaces;
1140 let expand = " ".repeat(num_spaces);
1141 expanded_str.push_str(&expand);
1142 }
1143 '\r' | '\n' => {
1144 expanded_str.push(ch);
1145 col_count = 0;
1146 tab_size = 0;
1147 }
1148 _ => {
1149 expanded_str.push(ch);
1150 col_count += 1;
1151 }
1152 }
1153 if col_count >= tab_size {
1154 tab_size += tab_stop;
1155 }
1156 }
1157 expanded_str
1158 }
1159
1160 #[pymethod]
1161 fn isidentifier(&self) -> bool {
1162 let mut chars = self.as_str().chars();
1163 let is_identifier_start = chars.next().map_or(false, |c| c == '_' || is_xid_start(c));
1164 is_identifier_start && chars.all(is_xid_continue)
1166 }
1167
1168 #[pymethod]
1170 fn translate(&self, table: PyObjectRef, vm: &VirtualMachine) -> PyResult<String> {
1171 vm.get_method_or_type_error(table.clone(), identifier!(vm, __getitem__), || {
1172 format!("'{}' object is not subscriptable", table.class().name())
1173 })?;
1174
1175 let mut translated = String::new();
1176 for c in self.as_str().chars() {
1177 match table.get_item(&*(c as u32).to_pyobject(vm), vm) {
1178 Ok(value) => {
1179 if let Some(text) = value.payload::<PyStr>() {
1180 translated.push_str(text.as_str());
1181 } else if let Some(bigint) = value.payload::<PyInt>() {
1182 let ch = bigint
1183 .as_bigint()
1184 .to_u32()
1185 .and_then(std::char::from_u32)
1186 .ok_or_else(|| {
1187 vm.new_value_error(
1188 "character mapping must be in range(0x110000)".to_owned(),
1189 )
1190 })?;
1191 translated.push(ch);
1192 } else if !vm.is_none(&value) {
1193 return Err(vm.new_type_error(
1194 "character mapping must return integer, None or str".to_owned(),
1195 ));
1196 }
1197 }
1198 _ => translated.push(c),
1199 }
1200 }
1201 Ok(translated)
1202 }
1203
1204 #[pystaticmethod]
1205 fn maketrans(
1206 dict_or_str: PyObjectRef,
1207 to_str: OptionalArg<PyStrRef>,
1208 none_str: OptionalArg<PyStrRef>,
1209 vm: &VirtualMachine,
1210 ) -> PyResult {
1211 let new_dict = vm.ctx.new_dict();
1212 if let OptionalArg::Present(to_str) = to_str {
1213 match dict_or_str.downcast::<PyStr>() {
1214 Ok(from_str) => {
1215 if to_str.len() == from_str.len() {
1216 for (c1, c2) in from_str.as_str().chars().zip(to_str.as_str().chars()) {
1217 new_dict.set_item(
1218 &*vm.new_pyobj(c1 as u32),
1219 vm.new_pyobj(c2 as u32),
1220 vm,
1221 )?;
1222 }
1223 if let OptionalArg::Present(none_str) = none_str {
1224 for c in none_str.as_str().chars() {
1225 new_dict.set_item(&*vm.new_pyobj(c as u32), vm.ctx.none(), vm)?;
1226 }
1227 }
1228 Ok(new_dict.to_pyobject(vm))
1229 } else {
1230 Err(vm.new_value_error(
1231 "the first two maketrans arguments must have equal length".to_owned(),
1232 ))
1233 }
1234 }
1235 _ => Err(vm.new_type_error(
1236 "first maketrans argument must be a string if there is a second argument"
1237 .to_owned(),
1238 )),
1239 }
1240 } else {
1241 match dict_or_str.downcast::<PyDict>() {
1243 Ok(dict) => {
1244 for (key, val) in dict {
1245 if let Some(num) = key.payload::<PyInt>() {
1247 new_dict.set_item(
1248 &*num.as_bigint().to_i32().to_pyobject(vm),
1249 val,
1250 vm,
1251 )?;
1252 } else if let Some(string) = key.payload::<PyStr>() {
1253 if string.len() == 1 {
1254 let num_value = string.as_str().chars().next().unwrap() as u32;
1255 new_dict.set_item(&*num_value.to_pyobject(vm), val, vm)?;
1256 } else {
1257 return Err(vm.new_value_error(
1258 "string keys in translate table must be of length 1".to_owned(),
1259 ));
1260 }
1261 }
1262 }
1263 Ok(new_dict.to_pyobject(vm))
1264 }
1265 _ => Err(vm.new_value_error(
1266 "if you give only one argument to maketrans it must be a dict".to_owned(),
1267 )),
1268 }
1269 }
1270 }
1271
1272 #[pymethod]
1273 fn encode(zelf: PyRef<Self>, args: EncodeArgs, vm: &VirtualMachine) -> PyResult<PyBytesRef> {
1274 encode_string(zelf, args.encoding, args.errors, vm)
1275 }
1276
1277 #[pymethod(magic)]
1278 fn getnewargs(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyObjectRef {
1279 (zelf.as_str(),).to_pyobject(vm)
1280 }
1281}
1282
1283#[pyclass]
1284impl PyRef<PyStr> {
1285 #[pymethod(magic)]
1286 fn str(self, vm: &VirtualMachine) -> PyRefExact<PyStr> {
1287 self.into_exact_or(&vm.ctx, |zelf| unsafe {
1288 PyStr::new_str_unchecked(zelf.bytes.to_vec(), zelf.kind.kind()).into_exact_ref(&vm.ctx)
1290 })
1291 }
1292}
1293
1294impl PyStrRef {
1295 pub fn concat_in_place(&mut self, other: &str, vm: &VirtualMachine) {
1296 if other.is_empty() {
1298 return;
1299 }
1300 let mut s = String::with_capacity(self.byte_len() + other.len());
1301 s.push_str(self.as_ref());
1302 s.push_str(other);
1303 *self = PyStr::from(s).into_ref(&vm.ctx);
1304 }
1305}
1306
1307impl Representable for PyStr {
1308 #[inline]
1309 fn repr_str(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<String> {
1310 zelf.repr(vm)
1311 }
1312}
1313
1314impl Hashable for PyStr {
1315 #[inline]
1316 fn hash(zelf: &Py<Self>, vm: &VirtualMachine) -> PyResult<hash::PyHash> {
1317 Ok(zelf.hash(vm))
1318 }
1319}
1320
1321impl Comparable for PyStr {
1322 fn cmp(
1323 zelf: &Py<Self>,
1324 other: &PyObject,
1325 op: PyComparisonOp,
1326 _vm: &VirtualMachine,
1327 ) -> PyResult<PyComparisonValue> {
1328 if let Some(res) = op.identical_optimization(zelf, other) {
1329 return Ok(res.into());
1330 }
1331 let other = class_or_notimplemented!(Self, other);
1332 Ok(op.eval_ord(zelf.as_str().cmp(other.as_str())).into())
1333 }
1334}
1335
1336impl Iterable for PyStr {
1337 fn iter(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult {
1338 Ok(PyStrIterator {
1339 internal: PyMutex::new((PositionIterInternal::new(zelf, 0), 0)),
1340 }
1341 .into_pyobject(vm))
1342 }
1343}
1344
1345impl AsMapping for PyStr {
1346 fn as_mapping() -> &'static PyMappingMethods {
1347 static AS_MAPPING: Lazy<PyMappingMethods> = Lazy::new(|| PyMappingMethods {
1348 length: atomic_func!(|mapping, _vm| Ok(PyStr::mapping_downcast(mapping).len())),
1349 subscript: atomic_func!(
1350 |mapping, needle, vm| PyStr::mapping_downcast(mapping)._getitem(needle, vm)
1351 ),
1352 ..PyMappingMethods::NOT_IMPLEMENTED
1353 });
1354 &AS_MAPPING
1355 }
1356}
1357
1358impl AsNumber for PyStr {
1359 fn as_number() -> &'static PyNumberMethods {
1360 static AS_NUMBER: PyNumberMethods = PyNumberMethods {
1361 remainder: Some(|a, b, vm| {
1362 if let Some(a) = a.downcast_ref::<PyStr>() {
1363 a.modulo(b.to_owned(), vm).to_pyresult(vm)
1364 } else {
1365 Ok(vm.ctx.not_implemented())
1366 }
1367 }),
1368 ..PyNumberMethods::NOT_IMPLEMENTED
1369 };
1370 &AS_NUMBER
1371 }
1372}
1373
1374impl AsSequence for PyStr {
1375 fn as_sequence() -> &'static PySequenceMethods {
1376 static AS_SEQUENCE: Lazy<PySequenceMethods> = Lazy::new(|| PySequenceMethods {
1377 length: atomic_func!(|seq, _vm| Ok(PyStr::sequence_downcast(seq).len())),
1378 concat: atomic_func!(|seq, other, vm| {
1379 let zelf = PyStr::sequence_downcast(seq);
1380 PyStr::add(zelf.to_owned(), other.to_owned(), vm)
1381 }),
1382 repeat: atomic_func!(|seq, n, vm| {
1383 let zelf = PyStr::sequence_downcast(seq);
1384 PyStr::repeat(zelf.to_owned(), n, vm).map(|x| x.into())
1385 }),
1386 item: atomic_func!(|seq, i, vm| {
1387 let zelf = PyStr::sequence_downcast(seq);
1388 zelf.getitem_by_index(vm, i)
1389 .map(|x| zelf.new_substr(x.to_string()).into_ref(&vm.ctx).into())
1390 }),
1391 contains: atomic_func!(
1392 |seq, needle, vm| PyStr::sequence_downcast(seq)._contains(needle, vm)
1393 ),
1394 ..PySequenceMethods::NOT_IMPLEMENTED
1395 });
1396 &AS_SEQUENCE
1397 }
1398}
1399
1400#[derive(FromArgs)]
1401struct EncodeArgs {
1402 #[pyarg(any, default)]
1403 encoding: Option<PyStrRef>,
1404 #[pyarg(any, default)]
1405 errors: Option<PyStrRef>,
1406}
1407
1408pub(crate) fn encode_string(
1409 s: PyStrRef,
1410 encoding: Option<PyStrRef>,
1411 errors: Option<PyStrRef>,
1412 vm: &VirtualMachine,
1413) -> PyResult<PyBytesRef> {
1414 let encoding = encoding
1415 .as_ref()
1416 .map_or(crate::codecs::DEFAULT_ENCODING, |s| s.as_str());
1417 vm.state.codec_registry.encode_text(s, encoding, errors, vm)
1418}
1419
1420impl PyPayload for PyStr {
1421 fn class(ctx: &Context) -> &'static Py<PyType> {
1422 ctx.types.str_type
1423 }
1424}
1425
1426impl ToPyObject for String {
1427 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1428 vm.ctx.new_str(self).into()
1429 }
1430}
1431
1432impl ToPyObject for char {
1433 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1434 vm.ctx.new_str(self.to_string()).into()
1435 }
1436}
1437
1438impl ToPyObject for &str {
1439 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1440 vm.ctx.new_str(self).into()
1441 }
1442}
1443
1444impl ToPyObject for &String {
1445 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1446 vm.ctx.new_str(self.clone()).into()
1447 }
1448}
1449
1450impl ToPyObject for &AsciiStr {
1451 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1452 vm.ctx.new_str(self).into()
1453 }
1454}
1455
1456impl ToPyObject for AsciiString {
1457 fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
1458 vm.ctx.new_str(self).into()
1459 }
1460}
1461
1462type SplitArgs = anystr::SplitArgs<PyStrRef>;
1463
1464#[derive(FromArgs)]
1465pub struct FindArgs {
1466 #[pyarg(positional)]
1467 sub: PyStrRef,
1468 #[pyarg(positional, default)]
1469 start: Option<PyIntRef>,
1470 #[pyarg(positional, default)]
1471 end: Option<PyIntRef>,
1472}
1473
1474impl FindArgs {
1475 fn get_value(self, len: usize) -> (PyStrRef, std::ops::Range<usize>) {
1476 let range = adjust_indices(self.start, self.end, len);
1477 (self.sub, range)
1478 }
1479}
1480
1481pub fn init(ctx: &Context) {
1482 PyStr::extend_class(ctx, ctx.types.str_type);
1483
1484 PyStrIterator::extend_class(ctx, ctx.types.str_iterator_type);
1485}
1486
1487impl SliceableSequenceOp for PyStr {
1488 type Item = char;
1489 type Sliced = String;
1490
1491 fn do_get(&self, index: usize) -> Self::Item {
1492 if self.is_ascii() {
1493 self.bytes[index] as char
1494 } else {
1495 self.as_str().chars().nth(index).unwrap()
1496 }
1497 }
1498
1499 fn do_slice(&self, range: Range<usize>) -> Self::Sliced {
1500 let value = self.as_str();
1501 if self.is_ascii() {
1502 value[range].to_owned()
1503 } else {
1504 rustpython_common::str::get_chars(value, range).to_owned()
1505 }
1506 }
1507
1508 fn do_slice_reverse(&self, range: Range<usize>) -> Self::Sliced {
1509 if self.is_ascii() {
1510 let mut v = self.bytes[range].to_vec();
1512 v.reverse();
1513 unsafe {
1514 String::from_utf8_unchecked(v)
1516 }
1517 } else {
1518 let mut s = String::with_capacity(self.bytes.len());
1519 s.extend(
1520 self.as_str()
1521 .chars()
1522 .rev()
1523 .skip(self.char_len() - range.end)
1524 .take(range.end - range.start),
1525 );
1526 s
1527 }
1528 }
1529
1530 fn do_stepped_slice(&self, range: Range<usize>, step: usize) -> Self::Sliced {
1531 if self.is_ascii() {
1532 let v = self.bytes[range].iter().copied().step_by(step).collect();
1533 unsafe {
1534 String::from_utf8_unchecked(v)
1536 }
1537 } else {
1538 let mut s = String::with_capacity(2 * ((range.len() / step) + 1));
1539 s.extend(
1540 self.as_str()
1541 .chars()
1542 .skip(range.start)
1543 .take(range.end - range.start)
1544 .step_by(step),
1545 );
1546 s
1547 }
1548 }
1549
1550 fn do_stepped_slice_reverse(&self, range: Range<usize>, step: usize) -> Self::Sliced {
1551 if self.is_ascii() {
1552 let v: Vec<u8> = self.bytes[range]
1554 .iter()
1555 .rev()
1556 .copied()
1557 .step_by(step)
1558 .collect();
1559 String::from_utf8(v).unwrap()
1561 } else {
1562 let mut s = String::with_capacity(2 * ((range.len() / step) + 1));
1564 s.extend(
1565 self.as_str()
1566 .chars()
1567 .rev()
1568 .skip(self.char_len() - range.end)
1569 .take(range.end - range.start)
1570 .step_by(step),
1571 );
1572 s
1573 }
1574 }
1575
1576 fn empty() -> Self::Sliced {
1577 String::new()
1578 }
1579
1580 fn len(&self) -> usize {
1581 self.char_len()
1582 }
1583}
1584
1585impl AsRef<str> for PyRefExact<PyStr> {
1586 fn as_ref(&self) -> &str {
1587 self.as_str()
1588 }
1589}
1590
1591impl AsRef<str> for PyExact<PyStr> {
1592 fn as_ref(&self) -> &str {
1593 self.as_str()
1594 }
1595}
1596
1597#[cfg(test)]
1598mod tests {
1599 use super::*;
1600 use crate::Interpreter;
1601
1602 #[test]
1603 fn str_title() {
1604 let tests = vec![
1605 (" Hello ", " hello "),
1606 ("Hello ", "hello "),
1607 ("Hello ", "Hello "),
1608 ("Format This As Title String", "fOrMaT thIs aS titLe String"),
1609 ("Format,This-As*Title;String", "fOrMaT,thIs-aS*titLe;String"),
1610 ("Getint", "getInt"),
1611 ("Greek Ωppercases ...", "greek ωppercases ..."),
1612 ("Greek ῼitlecases ...", "greek ῳitlecases ..."),
1613 ];
1614 for (title, input) in tests {
1615 assert_eq!(PyStr::from(input).title().as_str(), title);
1616 }
1617 }
1618
1619 #[test]
1620 fn str_istitle() {
1621 let pos = vec![
1622 "A",
1623 "A Titlecased Line",
1624 "A\nTitlecased Line",
1625 "A Titlecased, Line",
1626 "Greek Ωppercases ...",
1627 "Greek ῼitlecases ...",
1628 ];
1629
1630 for s in pos {
1631 assert!(PyStr::from(s).istitle());
1632 }
1633
1634 let neg = vec![
1635 "",
1636 "a",
1637 "\n",
1638 "Not a capitalized String",
1639 "Not\ta Titlecase String",
1640 "Not--a Titlecase String",
1641 "NOT",
1642 ];
1643 for s in neg {
1644 assert!(!PyStr::from(s).istitle());
1645 }
1646 }
1647
1648 #[test]
1649 fn str_maketrans_and_translate() {
1650 Interpreter::without_stdlib(Default::default()).enter(|vm| {
1651 let table = vm.ctx.new_dict();
1652 table
1653 .set_item("a", vm.ctx.new_str("🎅").into(), vm)
1654 .unwrap();
1655 table.set_item("b", vm.ctx.none(), vm).unwrap();
1656 table
1657 .set_item("c", vm.ctx.new_str(ascii!("xda")).into(), vm)
1658 .unwrap();
1659 let translated =
1660 PyStr::maketrans(table.into(), OptionalArg::Missing, OptionalArg::Missing, vm)
1661 .unwrap();
1662 let text = PyStr::from("abc");
1663 let translated = text.translate(translated, vm).unwrap();
1664 assert_eq!(translated, "🎅xda".to_owned());
1665 let translated = text.translate(vm.ctx.new_int(3).into(), vm);
1666 assert_eq!("TypeError", &*translated.unwrap_err().class().name(),);
1667 })
1668 }
1669}
1670
1671impl AnyStrWrapper for PyStrRef {
1672 type Str = str;
1673 fn as_ref(&self) -> &str {
1674 self.as_str()
1675 }
1676}
1677
1678impl AnyStrContainer<str> for String {
1679 fn new() -> Self {
1680 String::new()
1681 }
1682
1683 fn with_capacity(capacity: usize) -> Self {
1684 String::with_capacity(capacity)
1685 }
1686
1687 fn push_str(&mut self, other: &str) {
1688 String::push_str(self, other)
1689 }
1690}
1691
1692impl AnyStr for str {
1693 type Char = char;
1694 type Container = String;
1695
1696 fn element_bytes_len(c: char) -> usize {
1697 c.len_utf8()
1698 }
1699
1700 fn to_container(&self) -> Self::Container {
1701 self.to_owned()
1702 }
1703
1704 fn as_bytes(&self) -> &[u8] {
1705 self.as_bytes()
1706 }
1707
1708 fn as_utf8_str(&self) -> Result<&str, std::str::Utf8Error> {
1709 Ok(self)
1710 }
1711
1712 fn chars(&self) -> impl Iterator<Item = char> {
1713 str::chars(self)
1714 }
1715
1716 fn elements(&self) -> impl Iterator<Item = char> {
1717 str::chars(self)
1718 }
1719
1720 fn get_bytes(&self, range: std::ops::Range<usize>) -> &Self {
1721 &self[range]
1722 }
1723
1724 fn get_chars(&self, range: std::ops::Range<usize>) -> &Self {
1725 rustpython_common::str::get_chars(self, range)
1726 }
1727
1728 fn is_empty(&self) -> bool {
1729 Self::is_empty(self)
1730 }
1731
1732 fn bytes_len(&self) -> usize {
1733 Self::len(self)
1734 }
1735
1736 fn py_split_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
1737 where
1738 F: Fn(&Self) -> PyObjectRef,
1739 {
1740 let mut splits = Vec::new();
1742 let mut last_offset = 0;
1743 let mut count = maxsplit;
1744 for (offset, _) in self.match_indices(|c: char| c.is_ascii_whitespace() || c == '\x0b') {
1745 if last_offset == offset {
1746 last_offset += 1;
1747 continue;
1748 }
1749 if count == 0 {
1750 break;
1751 }
1752 splits.push(convert(&self[last_offset..offset]));
1753 last_offset = offset + 1;
1754 count -= 1;
1755 }
1756 if last_offset != self.len() {
1757 splits.push(convert(&self[last_offset..]));
1758 }
1759 splits
1760 }
1761
1762 fn py_rsplit_whitespace<F>(&self, maxsplit: isize, convert: F) -> Vec<PyObjectRef>
1763 where
1764 F: Fn(&Self) -> PyObjectRef,
1765 {
1766 let mut splits = Vec::new();
1768 let mut last_offset = self.len();
1769 let mut count = maxsplit;
1770 for (offset, _) in self.rmatch_indices(|c: char| c.is_ascii_whitespace() || c == '\x0b') {
1771 if last_offset == offset + 1 {
1772 last_offset -= 1;
1773 continue;
1774 }
1775 if count == 0 {
1776 break;
1777 }
1778 splits.push(convert(&self[offset + 1..last_offset]));
1779 last_offset = offset;
1780 count -= 1;
1781 }
1782 if last_offset != 0 {
1783 splits.push(convert(&self[..last_offset]));
1784 }
1785 splits
1786 }
1787}
1788
1789pub type PyStrInterned = PyInterned<PyStr>;
1792
1793impl PyStrInterned {
1794 #[inline]
1795 pub fn to_exact(&'static self) -> PyRefExact<PyStr> {
1796 unsafe { PyRefExact::new_unchecked(self.to_owned()) }
1797 }
1798}
1799
1800impl std::fmt::Display for PyStrInterned {
1801 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1802 std::fmt::Display::fmt(self.as_str(), f)
1803 }
1804}
1805
1806impl AsRef<str> for PyStrInterned {
1807 #[inline(always)]
1808 fn as_ref(&self) -> &str {
1809 self.as_str()
1810 }
1811}