Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Parse surrogates in string literals properly #5629

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions 3 Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 0 additions & 6 deletions 6 Lib/test/test_codeccallbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,8 +536,6 @@ def test_badandgoodxmlcharrefreplaceexceptions(self):
("".join("&#%d;" % c for c in cs), 1 + len(s))
)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_badandgoodbackslashreplaceexceptions(self):
# "backslashreplace" complains about a non-exception passed in
self.assertRaises(
Expand Down Expand Up @@ -596,8 +594,6 @@ def test_badandgoodbackslashreplaceexceptions(self):
(r, 2)
)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_badandgoodnamereplaceexceptions(self):
# "namereplace" complains about a non-exception passed in
self.assertRaises(
Expand Down Expand Up @@ -644,8 +640,6 @@ def test_badandgoodnamereplaceexceptions(self):
(r, 1 + len(s))
)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_badandgoodsurrogateescapeexceptions(self):
surrogateescape_errors = codecs.lookup_error('surrogateescape')
# "surrogateescape" complains about a non-exception passed in
Expand Down
31 changes: 15 additions & 16 deletions 31 Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,11 @@ def test_bug691291(self):
with reader:
self.assertEqual(reader.read(), s1)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_incremental_surrogatepass(self):
super().test_incremental_surrogatepass()

class UTF16LETest(ReadTest, unittest.TestCase):
encoding = "utf-16-le"
ill_formed_sequence = b"\x80\xdc"
Expand Down Expand Up @@ -917,6 +922,11 @@ def test_nonbmp(self):
self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding),
"\U00010203")

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_incremental_surrogatepass(self):
super().test_incremental_surrogatepass()

class UTF16BETest(ReadTest, unittest.TestCase):
encoding = "utf-16-be"
ill_formed_sequence = b"\xdc\x80"
Expand Down Expand Up @@ -965,6 +975,11 @@ def test_nonbmp(self):
self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding),
"\U00010203")

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_incremental_surrogatepass(self):
super().test_incremental_surrogatepass()

class UTF8Test(ReadTest, unittest.TestCase):
encoding = "utf-8"
ill_formed_sequence = b"\xed\xb2\x80"
Expand Down Expand Up @@ -998,8 +1013,6 @@ def test_decoder_state(self):
self.check_state_handling_decode(self.encoding,
u, u.encode(self.encoding))

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_decode_error(self):
for data, error_handler, expected in (
(b'[\x80\xff]', 'ignore', '[]'),
Expand All @@ -1026,8 +1039,6 @@ def test_lone_surrogates(self):
exc = cm.exception
self.assertEqual(exc.object[exc.start:exc.end], '\uD800\uDFFF')

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_surrogatepass_handler(self):
self.assertEqual("abc\ud800def".encode(self.encoding, "surrogatepass"),
self.BOM + b"abc\xed\xa0\x80def")
Expand Down Expand Up @@ -2884,8 +2895,6 @@ def test_escape_encode(self):

class SurrogateEscapeTest(unittest.TestCase):

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_utf8(self):
# Bad byte
self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"),
Expand All @@ -2898,8 +2907,6 @@ def test_utf8(self):
self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "surrogateescape"),
b"\xed\xb0\x80")

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_ascii(self):
# bad byte
self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"),
Expand All @@ -2916,8 +2923,6 @@ def test_charmap(self):
self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"),
b"foo\xa5bar")

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_latin1(self):
# Issue6373
self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"),
Expand Down Expand Up @@ -3561,8 +3566,6 @@ class ASCIITest(unittest.TestCase):
def test_encode(self):
self.assertEqual('abc123'.encode('ascii'), b'abc123')

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_encode_error(self):
for data, error_handler, expected in (
('[\x80\xff\u20ac]', 'ignore', b'[]'),
Expand All @@ -3585,8 +3588,6 @@ def test_encode_surrogateescape_error(self):
def test_decode(self):
self.assertEqual(b'abc'.decode('ascii'), 'abc')

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_decode_error(self):
for data, error_handler, expected in (
(b'[\x80\xff]', 'ignore', '[]'),
Expand All @@ -3609,8 +3610,6 @@ def test_encode(self):
with self.subTest(data=data, expected=expected):
self.assertEqual(data.encode('latin1'), expected)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_encode_errors(self):
for data, error_handler, expected in (
('[\u20ac\udc80]', 'ignore', b'[]'),
Expand Down
2 changes: 0 additions & 2 deletions 2 Lib/test/test_json/test_scanstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,6 @@ def test_scanstring(self):
scanstring('["Bad value", truth]', 2, True),
('Bad value', 12))

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_surrogates(self):
scanstring = self.json.decoder.scanstring
def assertScan(given, expect):
Expand Down
2 changes: 0 additions & 2 deletions 2 Lib/test/test_regrtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,15 +945,13 @@ def test_leak(self):
""")
self.check_leak(code, 'file descriptors')

@unittest.expectedFailureIfWindows('TODO: RUSTPYTHON Windows')
def test_list_tests(self):
# test --list-tests
tests = [self.create_test() for i in range(5)]
output = self.run_tests('--list-tests', *tests)
self.assertEqual(output.rstrip().splitlines(),
tests)

@unittest.expectedFailureIfWindows('TODO: RUSTPYTHON Windows')
def test_list_cases(self):
# test --list-cases
code = textwrap.dedent("""
Expand Down
2 changes: 0 additions & 2 deletions 2 Lib/test/test_stringprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
from stringprep import *

class StringprepTests(unittest.TestCase):
# TODO: RUSTPYTHON
@unittest.expectedFailure
def test(self):
self.assertTrue(in_table_a1("\u0221"))
self.assertFalse(in_table_a1("\u0222"))
Expand Down
2 changes: 0 additions & 2 deletions 2 Lib/test/test_subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -1198,8 +1198,6 @@ def test_universal_newlines_communicate_encodings(self):
stdout, stderr = popen.communicate(input='')
self.assertEqual(stdout, '1\n2\n3\n4')

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_communicate_errors(self):
for errors, expected in [
('ignore', ''),
Expand Down
14 changes: 0 additions & 14 deletions 14 Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2086,11 +2086,6 @@ class UstarUnicodeTest(UnicodeTest, unittest.TestCase):

format = tarfile.USTAR_FORMAT

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_uname_unicode(self):
super().test_uname_unicode()

# Test whether the utf-8 encoded version of a filename exceeds the 100
# bytes name field limit (every occurrence of '\xff' will be expanded to 2
# bytes).
Expand Down Expand Up @@ -2170,13 +2165,6 @@ class GNUUnicodeTest(UnicodeTest, unittest.TestCase):

format = tarfile.GNU_FORMAT

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_uname_unicode(self):
super().test_uname_unicode()

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_bad_pax_header(self):
# Test for issue #8633. GNU tar <= 1.23 creates raw binary fields
# without a hdrcharset=BINARY header.
Expand All @@ -2198,8 +2186,6 @@ class PAXUnicodeTest(UnicodeTest, unittest.TestCase):
# PAX_FORMAT ignores encoding in write mode.
test_unicode_filename_error = None

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_binary_header(self):
# Test a POSIX.1-2008 compatible header with a hdrcharset=BINARY field.
for encoding, name in (
Expand Down
8 changes: 0 additions & 8 deletions 8 Lib/test/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,8 +608,6 @@ def test_bytes_comparison(self):
self.assertEqual('abc' == bytearray(b'abc'), False)
self.assertEqual('abc' != bytearray(b'abc'), True)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_comparison(self):
# Comparisons:
self.assertEqual('abc', 'abc')
Expand Down Expand Up @@ -830,8 +828,6 @@ def test_isidentifier_legacy(self):
warnings.simplefilter('ignore', DeprecationWarning)
self.assertTrue(_testcapi.unicode_legacy_string(u).isidentifier())

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_isprintable(self):
self.assertTrue("".isprintable())
self.assertTrue(" ".isprintable())
Expand All @@ -847,8 +843,6 @@ def test_isprintable(self):
self.assertTrue('\U0001F46F'.isprintable())
self.assertFalse('\U000E0020'.isprintable())

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_surrogates(self):
for s in ('a\uD800b\uDFFF', 'a\uDFFFb\uD800',
'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a'):
Expand Down Expand Up @@ -1827,8 +1821,6 @@ def test_codecs_utf7(self):
'ill-formed sequence'):
b'+@'.decode('utf-7')

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_codecs_utf8(self):
self.assertEqual(''.encode('utf-8'), b'')
self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
Expand Down
4 changes: 0 additions & 4 deletions 4 Lib/test/test_userstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,13 @@ def __rmod__(self, other):
str3 = ustr3('TEST')
self.assertEqual(fmt2 % str3, 'value is TEST')

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_encode_default_args(self):
self.checkequal(b'hello', 'hello', 'encode')
# Check that encoding defaults to utf-8
self.checkequal(b'\xf0\xa3\x91\x96', '\U00023456', 'encode')
# Check that errors defaults to 'strict'
self.checkraises(UnicodeError, '\ud800', 'encode')

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_encode_explicit_none_args(self):
self.checkequal(b'hello', 'hello', 'encode', None, None)
# Check that encoding defaults to utf-8
Expand Down
1 change: 1 addition & 0 deletions 1 Lib/test/test_zipimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,7 @@ def testTraceback(self):

@unittest.skipIf(os_helper.TESTFN_UNENCODABLE is None,
"need an unencodable filename")
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
def testUnencodable(self):
filename = os_helper.TESTFN_UNENCODABLE + ".zip"
self.addCleanup(os_helper.unlink, filename)
Expand Down
2 changes: 1 addition & 1 deletion 2 common/src/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ pub mod errors {
let mut out = String::with_capacity(num_chars * 4);
for c in err_str.code_points() {
let c_u32 = c.to_u32();
if let Some(c_name) = unicode_names2::name(c.to_char_lossy()) {
if let Some(c_name) = c.to_char().and_then(unicode_names2::name) {
write!(out, "\\N{{{c_name}}}").unwrap();
} else if c_u32 >= 0x10000 {
write!(out, "\\U{c_u32:08x}").unwrap();
Expand Down
Loading
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.