Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 7f4582b

Browse filesBrowse files
committed
Make cformat wtf8-compatible
1 parent cace112 commit 7f4582b
Copy full SHA for 7f4582b

File tree

Expand file treeCollapse file tree

6 files changed

+153
-34
lines changed
Filter options
Expand file treeCollapse file tree

6 files changed

+153
-34
lines changed

‎Lib/test/test_codecs.py

Copy file name to clipboardExpand all lines: Lib/test/test_codecs.py
-2Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1698,8 +1698,6 @@ def test_decode_invalid(self):
16981698

16991699

17001700
class NameprepTest(unittest.TestCase):
1701-
# TODO: RUSTPYTHON
1702-
@unittest.expectedFailure
17031701
def test_nameprep(self):
17041702
from encodings.idna import nameprep
17051703
for pos, (orig, prepped) in enumerate(nameprep_tests):

‎common/src/cformat.rs

Copy file name to clipboardExpand all lines: common/src/cformat.rs
+39-3Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@ use std::{
1111
str::FromStr,
1212
};
1313

14+
use crate::wtf8::{CodePoint, Wtf8, Wtf8Buf};
15+
1416
#[derive(Debug, PartialEq)]
1517
pub enum CFormatErrorType {
1618
UnmatchedKeyParentheses,
1719
MissingModuloSign,
18-
UnsupportedFormatChar(char),
20+
UnsupportedFormatChar(CodePoint),
1921
IncompleteFormat,
2022
IntTooBig,
2123
// Unimplemented,
@@ -39,7 +41,9 @@ impl fmt::Display for CFormatError {
3941
UnsupportedFormatChar(c) => write!(
4042
f,
4143
"unsupported format character '{}' ({:#x}) at index {}",
42-
c, c as u32, self.index
44+
c,
45+
c.to_u32(),
46+
self.index
4347
),
4448
IntTooBig => write!(f, "width/precision too big"),
4549
_ => write!(f, "unexpected error parsing format string"),
@@ -160,7 +164,7 @@ pub trait FormatBuf:
160164
fn concat(self, other: Self) -> Self;
161165
}
162166

163-
pub trait FormatChar: Copy + Into<char> + From<u8> {
167+
pub trait FormatChar: Copy + Into<CodePoint> + From<u8> {
164168
fn to_char_lossy(self) -> char;
165169
fn eq_char(self, c: char) -> bool;
166170
}
@@ -188,6 +192,29 @@ impl FormatChar for char {
188192
}
189193
}
190194

195+
impl FormatBuf for Wtf8Buf {
196+
type Char = CodePoint;
197+
fn chars(&self) -> impl Iterator<Item = Self::Char> {
198+
self.code_points()
199+
}
200+
fn len(&self) -> usize {
201+
(**self).len()
202+
}
203+
fn concat(mut self, other: Self) -> Self {
204+
self.extend([other]);
205+
self
206+
}
207+
}
208+
209+
impl FormatChar for CodePoint {
210+
fn to_char_lossy(self) -> char {
211+
self.to_char_lossy()
212+
}
213+
fn eq_char(self, c: char) -> bool {
214+
self == c
215+
}
216+
}
217+
191218
impl FormatBuf for Vec<u8> {
192219
type Char = u8;
193220
fn chars(&self) -> impl Iterator<Item = Self::Char> {
@@ -801,6 +828,15 @@ impl FromStr for CFormatString {
801828
}
802829
}
803830

831+
pub type CFormatWtf8 = CFormatStrOrBytes<Wtf8Buf>;
832+
833+
impl CFormatWtf8 {
834+
pub fn parse_from_wtf8(s: &Wtf8) -> Result<Self, CFormatError> {
835+
let mut iter = s.code_points().enumerate().peekable();
836+
Self::parse(&mut iter)
837+
}
838+
}
839+
804840
#[cfg(test)]
805841
mod tests {
806842
use super::*;

‎common/src/wtf8/mod.rs

Copy file name to clipboardExpand all lines: common/src/wtf8/mod.rs
+30-5Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ impl fmt::Debug for CodePoint {
7575
}
7676
}
7777

78+
impl fmt::Display for CodePoint {
79+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80+
self.to_char_lossy().fmt(f)
81+
}
82+
}
83+
7884
impl CodePoint {
7985
/// Unsafely creates a new `CodePoint` without checking the value.
8086
///
@@ -109,13 +115,13 @@ impl CodePoint {
109115

110116
/// Returns the numeric value of the code point.
111117
#[inline]
112-
pub fn to_u32(&self) -> u32 {
118+
pub fn to_u32(self) -> u32 {
113119
self.value
114120
}
115121

116122
/// Returns the numeric value of the code point if it is a leading surrogate.
117123
#[inline]
118-
pub fn to_lead_surrogate(&self) -> Option<u16> {
124+
pub fn to_lead_surrogate(self) -> Option<u16> {
119125
match self.value {
120126
lead @ 0xD800..=0xDBFF => Some(lead as u16),
121127
_ => None,
@@ -124,7 +130,7 @@ impl CodePoint {
124130

125131
/// Returns the numeric value of the code point if it is a trailing surrogate.
126132
#[inline]
127-
pub fn to_trail_surrogate(&self) -> Option<u16> {
133+
pub fn to_trail_surrogate(self) -> Option<u16> {
128134
match self.value {
129135
trail @ 0xDC00..=0xDFFF => Some(trail as u16),
130136
_ => None,
@@ -135,7 +141,7 @@ impl CodePoint {
135141
///
136142
/// Returns `None` if the code point is a surrogate (from U+D800 to U+DFFF).
137143
#[inline]
138-
pub fn to_char(&self) -> Option<char> {
144+
pub fn to_char(self) -> Option<char> {
139145
match self.value {
140146
0xD800..=0xDFFF => None,
141147
_ => Some(unsafe { char::from_u32_unchecked(self.value) }),
@@ -147,7 +153,7 @@ impl CodePoint {
147153
/// Returns `'\u{FFFD}'` (the replacement character “�”)
148154
/// if the code point is a surrogate (from U+D800 to U+DFFF).
149155
#[inline]
150-
pub fn to_char_lossy(&self) -> char {
156+
pub fn to_char_lossy(self) -> char {
151157
self.to_char().unwrap_or('\u{FFFD}')
152158
}
153159

@@ -170,6 +176,12 @@ impl From<u16> for CodePoint {
170176
}
171177
}
172178

179+
impl From<u8> for CodePoint {
180+
fn from(value: u8) -> Self {
181+
char::from(value).into()
182+
}
183+
}
184+
173185
impl From<char> for CodePoint {
174186
fn from(value: char) -> Self {
175187
Self::from_char(value)
@@ -515,6 +527,13 @@ impl Extend<CodePoint> for Wtf8Buf {
515527
}
516528
}
517529

530+
impl<W: AsRef<Wtf8>> Extend<W> for Wtf8Buf {
531+
fn extend<T: IntoIterator<Item = W>>(&mut self, iter: T) {
532+
iter.into_iter()
533+
.for_each(move |w| self.push_wtf8(w.as_ref()));
534+
}
535+
}
536+
518537
impl<W: AsRef<Wtf8>> FromIterator<W> for Wtf8Buf {
519538
fn from_iter<T: IntoIterator<Item = W>>(iter: T) -> Self {
520539
let mut buf = Wtf8Buf::new();
@@ -541,6 +560,12 @@ impl From<&str> for Wtf8Buf {
541560
}
542561
}
543562

563+
impl From<ascii::AsciiString> for Wtf8Buf {
564+
fn from(s: ascii::AsciiString) -> Self {
565+
Wtf8Buf::from_string(s.into())
566+
}
567+
}
568+
544569
/// A borrowed slice of well-formed WTF-8 data.
545570
///
546571
/// Similar to `&str`, but can additionally contain surrogate code points

‎vm/src/builtins/str.rs

Copy file name to clipboardExpand all lines: vm/src/builtins/str.rs
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -857,8 +857,8 @@ impl PyStr {
857857
}
858858

859859
#[pymethod(name = "__mod__")]
860-
fn modulo(&self, values: PyObjectRef, vm: &VirtualMachine) -> PyResult<String> {
861-
cformat_string(vm, self.as_str(), values)
860+
fn modulo(&self, values: PyObjectRef, vm: &VirtualMachine) -> PyResult<Wtf8Buf> {
861+
cformat_string(vm, self.as_wtf8(), values)
862862
}
863863

864864
#[pymethod(magic)]

‎vm/src/cformat.rs

Copy file name to clipboardExpand all lines: vm/src/cformat.rs
+22-20Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
//! as per the [Python Docs](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting).
33
44
use crate::common::cformat::*;
5+
use crate::common::wtf8::{CodePoint, Wtf8, Wtf8Buf};
56
use crate::{
67
AsObject, PyObjectRef, PyResult, TryFromBorrowedObject, TryFromObject, VirtualMachine,
78
builtins::{
@@ -125,13 +126,13 @@ fn spec_format_string(
125126
spec: &CFormatSpec,
126127
obj: PyObjectRef,
127128
idx: usize,
128-
) -> PyResult<String> {
129+
) -> PyResult<Wtf8Buf> {
129130
match &spec.format_type {
130131
CFormatType::String(conversion) => {
131132
let result = match conversion {
132133
CFormatConversion::Ascii => builtins::ascii(obj, vm)?.into(),
133-
CFormatConversion::Str => obj.str(vm)?.as_str().to_owned(),
134-
CFormatConversion::Repr => obj.repr(vm)?.as_str().to_owned(),
134+
CFormatConversion::Str => obj.str(vm)?.as_wtf8().to_owned(),
135+
CFormatConversion::Repr => obj.repr(vm)?.as_wtf8().to_owned(),
135136
CFormatConversion::Bytes => {
136137
// idx is the position of the %, we want the position of the b
137138
return Err(vm.new_value_error(format!(
@@ -146,16 +147,18 @@ fn spec_format_string(
146147
CNumberType::DecimalD | CNumberType::DecimalI | CNumberType::DecimalU => {
147148
match_class!(match &obj {
148149
ref i @ PyInt => {
149-
Ok(spec.format_number(i.as_bigint()))
150+
Ok(spec.format_number(i.as_bigint()).into())
150151
}
151152
ref f @ PyFloat => {
152-
Ok(spec.format_number(&try_f64_to_bigint(f.to_f64(), vm)?))
153+
Ok(spec
154+
.format_number(&try_f64_to_bigint(f.to_f64(), vm)?)
155+
.into())
153156
}
154157
obj => {
155158
if let Some(method) = vm.get_method(obj.clone(), identifier!(vm, __int__)) {
156159
let result = method?.call((), vm)?;
157160
if let Some(i) = result.payload::<PyInt>() {
158-
return Ok(spec.format_number(i.as_bigint()));
161+
return Ok(spec.format_number(i.as_bigint()).into());
159162
}
160163
}
161164
Err(vm.new_type_error(format!(
@@ -168,7 +171,7 @@ fn spec_format_string(
168171
}
169172
_ => {
170173
if let Some(i) = obj.payload::<PyInt>() {
171-
Ok(spec.format_number(i.as_bigint()))
174+
Ok(spec.format_number(i.as_bigint()).into())
172175
} else {
173176
Err(vm.new_type_error(format!(
174177
"%{} format: an integer is required, not {}",
@@ -180,21 +183,21 @@ fn spec_format_string(
180183
},
181184
CFormatType::Float(_) => {
182185
let value = ArgIntoFloat::try_from_object(vm, obj)?;
183-
Ok(spec.format_float(value.into()))
186+
Ok(spec.format_float(value.into()).into())
184187
}
185188
CFormatType::Character(CCharacterType::Character) => {
186189
if let Some(i) = obj.payload::<PyInt>() {
187190
let ch = i
188191
.as_bigint()
189192
.to_u32()
190-
.and_then(char::from_u32)
193+
.and_then(CodePoint::from_u32)
191194
.ok_or_else(|| {
192195
vm.new_overflow_error("%c arg not in range(0x110000)".to_owned())
193196
})?;
194197
return Ok(spec.format_char(ch));
195198
}
196199
if let Some(s) = obj.payload::<PyStr>() {
197-
if let Ok(ch) = s.as_str().chars().exactly_one() {
200+
if let Ok(ch) = s.as_wtf8().code_points().exactly_one() {
198201
return Ok(spec.format_char(ch));
199202
}
200203
}
@@ -374,17 +377,16 @@ pub(crate) fn cformat_bytes(
374377

375378
pub(crate) fn cformat_string(
376379
vm: &VirtualMachine,
377-
format_string: &str,
380+
format_string: &Wtf8,
378381
values_obj: PyObjectRef,
379-
) -> PyResult<String> {
380-
let format = format_string
381-
.parse::<CFormatString>()
382+
) -> PyResult<Wtf8Buf> {
383+
let format = CFormatWtf8::parse_from_wtf8(format_string)
382384
.map_err(|err| vm.new_value_error(err.to_string()))?;
383385
let (num_specifiers, mapping_required) = format
384386
.check_specifiers()
385387
.ok_or_else(|| specifier_error(vm))?;
386388

387-
let mut result = String::new();
389+
let mut result = Wtf8Buf::new();
388390

389391
let is_mapping = values_obj.class().has_attr(identifier!(vm, __getitem__))
390392
&& !values_obj.fast_isinstance(vm.ctx.types.tuple_type)
@@ -399,7 +401,7 @@ pub(crate) fn cformat_string(
399401
{
400402
for (_, part) in format.iter() {
401403
match part {
402-
CFormatPart::Literal(literal) => result.push_str(literal),
404+
CFormatPart::Literal(literal) => result.push_wtf8(literal),
403405
CFormatPart::Spec(_) => unreachable!(),
404406
}
405407
}
@@ -415,11 +417,11 @@ pub(crate) fn cformat_string(
415417
return if is_mapping {
416418
for (idx, part) in format {
417419
match part {
418-
CFormatPart::Literal(literal) => result.push_str(&literal),
420+
CFormatPart::Literal(literal) => result.push_wtf8(&literal),
419421
CFormatPart::Spec(CFormatSpecKeyed { mapping_key, spec }) => {
420422
let value = values_obj.get_item(&mapping_key.unwrap(), vm)?;
421423
let part_result = spec_format_string(vm, &spec, value, idx)?;
422-
result.push_str(&part_result);
424+
result.push_wtf8(&part_result);
423425
}
424426
}
425427
}
@@ -439,7 +441,7 @@ pub(crate) fn cformat_string(
439441

440442
for (idx, part) in format {
441443
match part {
442-
CFormatPart::Literal(literal) => result.push_str(&literal),
444+
CFormatPart::Literal(literal) => result.push_wtf8(&literal),
443445
CFormatPart::Spec(CFormatSpecKeyed { mut spec, .. }) => {
444446
try_update_quantity_from_tuple(
445447
vm,
@@ -456,7 +458,7 @@ pub(crate) fn cformat_string(
456458
}
457459
}?;
458460
let part_result = spec_format_string(vm, &spec, value, idx)?;
459-
result.push_str(&part_result);
461+
result.push_wtf8(&part_result);
460462
}
461463
}
462464
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.