Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Allow surrogates in str #5587

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions 3 Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions 2 Lib/test/string_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,8 +1066,6 @@ def test_hash(self):
hash(b)
self.assertEqual(hash(a), hash(b))

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_capitalize_nonascii(self):
# check that titlecased chars are lowered correctly
# \u1ffc is the titlecased char
Expand Down
1 change: 1 addition & 0 deletions 1 Lib/test/test_cmd_line_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,7 @@ def test_pep_409_verbiage(self):
self.assertTrue(text[1].startswith(' File '))
self.assertTrue(text[3].startswith('NameError'))

@unittest.expectedFailureIf(sys.platform == "linux", "TODO: RUSTPYTHON")
def test_non_ascii(self):
# Mac OS X denies the creation of a file with an invalid UTF-8 name.
# Windows allows creating a name with an arbitrary bytes name, but
Expand Down
2 changes: 0 additions & 2 deletions 2 Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1698,8 +1698,6 @@ def test_decode_invalid(self):


class NameprepTest(unittest.TestCase):
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_nameprep(self):
from encodings.idna import nameprep
for pos, (orig, prepped) in enumerate(nameprep_tests):
Expand Down
2 changes: 0 additions & 2 deletions 2 Lib/test/test_difflib.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,8 +373,6 @@ def test_byte_content(self):
check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_byte_filenames(self):
# somebody renamed a file from ISO-8859-2 to UTF-8
fna = b'\xb3odz.txt' # "łodz.txt"
Expand Down
2 changes: 2 additions & 0 deletions 2 Lib/test/test_import/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,6 +1305,8 @@ def exec_module(*args):
else:
importlib.SourceLoader.exec_module = old_exec_module

# TODO: RUSTPYTHON
@unittest.expectedFailure
@unittest.skipUnless(TESTFN_UNENCODABLE, 'need TESTFN_UNENCODABLE')
def test_unencodable_filename(self):
# Issue #11619: The Python parser and the import machinery must not
Expand Down
6 changes: 0 additions & 6 deletions 6 Lib/test/test_json/test_scanstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,4 @@ def test_overflow(self):


class TestPyScanstring(TestScanstring, PyTest): pass
# TODO: RUSTPYTHON
class TestPyScanstring(TestScanstring, PyTest):
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_bad_escapes(self):
super().test_bad_escapes()
class TestCScanstring(TestScanstring, CTest): pass
6 changes: 0 additions & 6 deletions 6 Lib/test/test_ntpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -1032,12 +1032,6 @@ class NtCommonTest(test_genericpath.CommonTest, unittest.TestCase):
pathmodule = ntpath
attributes = ['relpath']

# TODO: RUSTPYTHON
if sys.platform == "linux":
@unittest.expectedFailure
def test_nonascii_abspath(self):
super().test_nonascii_abspath()

# TODO: RUSTPYTHON
if sys.platform == "win32":
# TODO: RUSTPYTHON, ValueError: illegal environment variable name
Expand Down
3 changes: 1 addition & 2 deletions 3 Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,8 +854,6 @@ def test_string_boundaries(self):
# Can match around the whitespace.
self.assertEqual(len(re.findall(r"\B", " ")), 2)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_bigcharset(self):
self.assertEqual(re.match("([\u2222\u2223])",
"\u2222").group(1), "\u2222")
Expand Down Expand Up @@ -2233,6 +2231,7 @@ def test_bug_40736(self):
with self.assertRaisesRegex(TypeError, "got 'type'"):
re.search("x*", type)

@unittest.skip("TODO: RUSTPYTHON: flaky, improve perf")
@requires_resource('cpu')
def test_search_anchor_at_beginning(self):
s = 'x'*10**7
Expand Down
2 changes: 0 additions & 2 deletions 2 Lib/test/test_smtplib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1459,8 +1459,6 @@ def test_send_unicode_with_SMTPUTF8_via_low_level_API(self):
self.assertIn('SMTPUTF8', self.serv.last_mail_options)
self.assertEqual(self.serv.last_rcpt_options, [])

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_send_message_uses_smtputf8_if_addrs_non_ascii(self):
msg = EmailMessage()
msg['From'] = "Páolo <főo@bar.com>"
Expand Down
4 changes: 1 addition & 3 deletions 4 Lib/test/test_socket.py
Original file line number Diff line number Diff line change
Expand Up @@ -1578,7 +1578,7 @@ def test_getnameinfo(self):
# only IP addresses are allowed
self.assertRaises(OSError, socket.getnameinfo, ('mail.python.org',0), 0)

@unittest.expectedFailureIf(sys.platform != "darwin", "TODO: RUSTPYTHON; socket.gethostbyname_ex")
@unittest.skip("TODO: RUSTPYTHON: flaky on CI?")
@unittest.skipUnless(support.is_resource_enabled('network'),
'network is not enabled')
def test_idna(self):
Expand Down Expand Up @@ -5519,8 +5519,6 @@ def testBytesAddr(self):
self.addCleanup(os_helper.unlink, path)
self.assertEqual(self.sock.getsockname(), path)

# TODO: RUSTPYTHON, surrogateescape
@unittest.expectedFailure
def testSurrogateescapeBind(self):
# Test binding to a valid non-ASCII pathname, with the
# non-ASCII bytes supplied using surrogateescape encoding.
Expand Down
2 changes: 0 additions & 2 deletions 2 Lib/test/test_sqlite3/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,6 @@ def test_too_large_int(self):
row = self.cur.fetchone()
self.assertIsNone(row)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_string_with_surrogates(self):
for value in 0xd8ff, 0xdcff:
with self.assertRaises(UnicodeEncodeError):
Expand Down
2 changes: 0 additions & 2 deletions 2 Lib/test/test_ucn.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,6 @@ def test_cjk_unified_ideographs(self):
self.checkletter("CJK UNIFIED IDEOGRAPH-2B81D", "\U0002B81D")
self.checkletter("CJK UNIFIED IDEOGRAPH-3134A", "\U0003134A")

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_bmp_characters(self):
for code in range(0x10000):
char = chr(code)
Expand Down
2 changes: 0 additions & 2 deletions 2 Lib/test/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,8 +721,6 @@ def test_isspace(self):
'\U0001F40D', '\U0001F46F']:
self.assertFalse(ch.isspace(), '{!a} is not space.'.format(ch))

# TODO: RUSTPYTHON
@unittest.expectedFailure
@support.requires_resource('cpu')
def test_isspace_invariant(self):
for codepoint in range(sys.maxunicode + 1):
Expand Down
6 changes: 0 additions & 6 deletions 6 Lib/test/test_unicodedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,6 @@ def test_function_checksum(self):
result = h.hexdigest()
self.assertEqual(result, self.expectedchecksum)

# TODO: RUSTPYTHON
@unittest.expectedFailure
@requires_resource('cpu')
def test_name_inverse_lookup(self):
for i in range(sys.maxunicode + 1):
Expand Down Expand Up @@ -326,8 +324,6 @@ def test_ucd_510(self):
self.assertTrue("\u1d79".upper()=='\ua77d')
self.assertTrue(".".upper()=='.')

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_bug_5828(self):
self.assertEqual("\u1d79".lower(), "\u1d79")
# Only U+0000 should have U+0000 as its upper/lower/titlecase variant
Expand All @@ -347,8 +343,6 @@ def test_bug_4971(self):
self.assertEqual("\u01c5".title(), "\u01c5")
self.assertEqual("\u01c6".title(), "\u01c5")

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_linebreak_7643(self):
for i in range(0x10000):
lines = (chr(i) + 'A').splitlines()
Expand Down
2 changes: 2 additions & 0 deletions 2 common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ rustpython-literal = { workspace = true }

ascii = { workspace = true }
bitflags = { workspace = true }
bstr = { workspace = true }
cfg-if = { workspace = true }
itertools = { workspace = true }
libc = { workspace = true }
malachite-bigint = { workspace = true }
malachite-q = { workspace = true }
malachite-base = { workspace = true }
memchr = { workspace = true }
num-complex = { workspace = true }
num-traits = { workspace = true }
once_cell = { workspace = true }
Expand Down
42 changes: 39 additions & 3 deletions 42 common/src/cformat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ use std::{
str::FromStr,
};

use crate::wtf8::{CodePoint, Wtf8, Wtf8Buf};

#[derive(Debug, PartialEq)]
pub enum CFormatErrorType {
UnmatchedKeyParentheses,
MissingModuloSign,
UnsupportedFormatChar(char),
UnsupportedFormatChar(CodePoint),
IncompleteFormat,
IntTooBig,
// Unimplemented,
Expand All @@ -39,7 +41,9 @@ impl fmt::Display for CFormatError {
UnsupportedFormatChar(c) => write!(
f,
"unsupported format character '{}' ({:#x}) at index {}",
c, c as u32, self.index
c,
c.to_u32(),
self.index
),
IntTooBig => write!(f, "width/precision too big"),
_ => write!(f, "unexpected error parsing format string"),
Expand Down Expand Up @@ -160,7 +164,7 @@ pub trait FormatBuf:
fn concat(self, other: Self) -> Self;
}

pub trait FormatChar: Copy + Into<char> + From<u8> {
pub trait FormatChar: Copy + Into<CodePoint> + From<u8> {
fn to_char_lossy(self) -> char;
fn eq_char(self, c: char) -> bool;
}
Expand Down Expand Up @@ -188,6 +192,29 @@ impl FormatChar for char {
}
}

impl FormatBuf for Wtf8Buf {
type Char = CodePoint;
fn chars(&self) -> impl Iterator<Item = Self::Char> {
self.code_points()
}
fn len(&self) -> usize {
(**self).len()
}
fn concat(mut self, other: Self) -> Self {
self.extend([other]);
self
}
}

impl FormatChar for CodePoint {
fn to_char_lossy(self) -> char {
self.to_char_lossy()
}
fn eq_char(self, c: char) -> bool {
self == c
}
}

impl FormatBuf for Vec<u8> {
type Char = u8;
fn chars(&self) -> impl Iterator<Item = Self::Char> {
Expand Down Expand Up @@ -801,6 +828,15 @@ impl FromStr for CFormatString {
}
}

pub type CFormatWtf8 = CFormatStrOrBytes<Wtf8Buf>;

impl CFormatWtf8 {
pub fn parse_from_wtf8(s: &Wtf8) -> Result<Self, CFormatError> {
let mut iter = s.code_points().enumerate().peekable();
Self::parse(&mut iter)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.