RustPython · coolreader18 · Mar 27, 2025 · Mar 27, 2025 · Mar 27, 2025 · Mar 27, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
@@ -536,8 +536,6 @@ def test_badandgoodxmlcharrefreplaceexceptions(self):
            ("".join("&#%d;" % c for c in cs), 1 + len(s))
        )

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_badandgoodbackslashreplaceexceptions(self):
        # "backslashreplace" complains about a non-exception passed in
        self.assertRaises(
@@ -596,8 +594,6 @@ def test_badandgoodbackslashreplaceexceptions(self):
                    (r, 2)
                )

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_badandgoodnamereplaceexceptions(self):
        # "namereplace" complains about a non-exception passed in
        self.assertRaises(
@@ -644,8 +640,6 @@ def test_badandgoodnamereplaceexceptions(self):
                    (r, 1 + len(s))
                )

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_badandgoodsurrogateescapeexceptions(self):
        surrogateescape_errors = codecs.lookup_error('surrogateescape')
        # "surrogateescape" complains about a non-exception passed in

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
@@ -869,6 +869,11 @@ def test_bug691291(self):
        with reader:
            self.assertEqual(reader.read(), s1)

+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_incremental_surrogatepass(self):
+        super().test_incremental_surrogatepass()
+
 class UTF16LETest(ReadTest, unittest.TestCase):
    encoding = "utf-16-le"
    ill_formed_sequence = b"\x80\xdc"
@@ -917,6 +922,11 @@ def test_nonbmp(self):
        self.assertEqual(b'\x00\xd8\x03\xde'.decode(self.encoding),
                         "\U00010203")

+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_incremental_surrogatepass(self):
+        super().test_incremental_surrogatepass()
+
 class UTF16BETest(ReadTest, unittest.TestCase):
    encoding = "utf-16-be"
    ill_formed_sequence = b"\xdc\x80"
@@ -965,6 +975,11 @@ def test_nonbmp(self):
        self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding),
                         "\U00010203")

+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_incremental_surrogatepass(self):
+        super().test_incremental_surrogatepass()
+
 class UTF8Test(ReadTest, unittest.TestCase):
    encoding = "utf-8"
    ill_formed_sequence = b"\xed\xb2\x80"
@@ -998,8 +1013,6 @@ def test_decoder_state(self):
        self.check_state_handling_decode(self.encoding,
                                         u, u.encode(self.encoding))

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_decode_error(self):
        for data, error_handler, expected in (
            (b'[\x80\xff]', 'ignore', '[]'),
@@ -1026,8 +1039,6 @@ def test_lone_surrogates(self):
        exc = cm.exception
        self.assertEqual(exc.object[exc.start:exc.end], '\uD800\uDFFF')

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_surrogatepass_handler(self):
        self.assertEqual("abc\ud800def".encode(self.encoding, "surrogatepass"),
                         self.BOM + b"abc\xed\xa0\x80def")
@@ -2884,8 +2895,6 @@ def test_escape_encode(self):

 class SurrogateEscapeTest(unittest.TestCase):

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_utf8(self):
        # Bad byte
        self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"),
@@ -2898,8 +2907,6 @@ def test_utf8(self):
        self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "surrogateescape"),
                         b"\xed\xb0\x80")

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_ascii(self):
        # bad byte
        self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"),
@@ -2916,8 +2923,6 @@ def test_charmap(self):
        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"),
                         b"foo\xa5bar")

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_latin1(self):
        # Issue6373
        self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"),
@@ -3561,8 +3566,6 @@ class ASCIITest(unittest.TestCase):
    def test_encode(self):
        self.assertEqual('abc123'.encode('ascii'), b'abc123')

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_encode_error(self):
        for data, error_handler, expected in (
            ('[\x80\xff\u20ac]', 'ignore', b'[]'),
@@ -3585,8 +3588,6 @@ def test_encode_surrogateescape_error(self):
    def test_decode(self):
        self.assertEqual(b'abc'.decode('ascii'), 'abc')

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_decode_error(self):
        for data, error_handler, expected in (
            (b'[\x80\xff]', 'ignore', '[]'),
@@ -3609,8 +3610,6 @@ def test_encode(self):
            with self.subTest(data=data, expected=expected):
                self.assertEqual(data.encode('latin1'), expected)

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_encode_errors(self):
        for data, error_handler, expected in (
            ('[\u20ac\udc80]', 'ignore', b'[]'),

diff --git a/Lib/test/test_json/test_scanstring.py b/Lib/test/test_json/test_scanstring.py
@@ -86,8 +86,6 @@ def test_scanstring(self):
            scanstring('["Bad value", truth]', 2, True),
            ('Bad value', 12))

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_surrogates(self):
        scanstring = self.json.decoder.scanstring
        def assertScan(given, expect):

diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py
@@ -945,15 +945,13 @@ def test_leak(self):
        """)
        self.check_leak(code, 'file descriptors')

-    @unittest.expectedFailureIfWindows('TODO: RUSTPYTHON Windows')
    def test_list_tests(self):
        # test --list-tests
        tests = [self.create_test() for i in range(5)]
        output = self.run_tests('--list-tests', *tests)
        self.assertEqual(output.rstrip().splitlines(),
                         tests)

-    @unittest.expectedFailureIfWindows('TODO: RUSTPYTHON Windows')
    def test_list_cases(self):
        # test --list-cases
        code = textwrap.dedent("""

diff --git a/Lib/test/test_stringprep.py b/Lib/test/test_stringprep.py
@@ -6,8 +6,6 @@
 from stringprep import *

 class StringprepTests(unittest.TestCase):
-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test(self):
        self.assertTrue(in_table_a1("\u0221"))
        self.assertFalse(in_table_a1("\u0222"))

diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py
@@ -1198,8 +1198,6 @@ def test_universal_newlines_communicate_encodings(self):
            stdout, stderr = popen.communicate(input='')
            self.assertEqual(stdout, '1\n2\n3\n4')

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_communicate_errors(self):
        for errors, expected in [
            ('ignore', ''),

diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
@@ -2086,11 +2086,6 @@ class UstarUnicodeTest(UnicodeTest, unittest.TestCase):

    format = tarfile.USTAR_FORMAT

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
-    def test_uname_unicode(self):
-        super().test_uname_unicode()
-
    # Test whether the utf-8 encoded version of a filename exceeds the 100
    # bytes name field limit (every occurrence of '\xff' will be expanded to 2
    # bytes).
@@ -2170,13 +2165,6 @@ class GNUUnicodeTest(UnicodeTest, unittest.TestCase):

    format = tarfile.GNU_FORMAT

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
-    def test_uname_unicode(self):
-        super().test_uname_unicode()
-
-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_bad_pax_header(self):
        # Test for issue #8633. GNU tar <= 1.23 creates raw binary fields
        # without a hdrcharset=BINARY header.
@@ -2198,8 +2186,6 @@ class PAXUnicodeTest(UnicodeTest, unittest.TestCase):
    # PAX_FORMAT ignores encoding in write mode.
    test_unicode_filename_error = None

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_binary_header(self):
        # Test a POSIX.1-2008 compatible header with a hdrcharset=BINARY field.
        for encoding, name in (

diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
@@ -608,8 +608,6 @@ def test_bytes_comparison(self):
            self.assertEqual('abc' == bytearray(b'abc'), False)
            self.assertEqual('abc' != bytearray(b'abc'), True)

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_comparison(self):
        # Comparisons:
        self.assertEqual('abc', 'abc')
@@ -830,8 +828,6 @@ def test_isidentifier_legacy(self):
            warnings.simplefilter('ignore', DeprecationWarning)
            self.assertTrue(_testcapi.unicode_legacy_string(u).isidentifier())

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_isprintable(self):
        self.assertTrue("".isprintable())
        self.assertTrue(" ".isprintable())
@@ -847,8 +843,6 @@ def test_isprintable(self):
        self.assertTrue('\U0001F46F'.isprintable())
        self.assertFalse('\U000E0020'.isprintable())

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_surrogates(self):
        for s in ('a\uD800b\uDFFF', 'a\uDFFFb\uD800',
                  'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a'):
@@ -1827,8 +1821,6 @@ def test_codecs_utf7(self):
                                    'ill-formed sequence'):
            b'+@'.decode('utf-7')

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_codecs_utf8(self):
        self.assertEqual(''.encode('utf-8'), b'')
        self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')

diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py
@@ -53,17 +53,13 @@ def __rmod__(self, other):
        str3 = ustr3('TEST')
        self.assertEqual(fmt2 % str3, 'value is TEST')

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_encode_default_args(self):
        self.checkequal(b'hello', 'hello', 'encode')
        # Check that encoding defaults to utf-8
        self.checkequal(b'\xf0\xa3\x91\x96', '\U00023456', 'encode')
        # Check that errors defaults to 'strict'
        self.checkraises(UnicodeError, '\ud800', 'encode')

-    # TODO: RUSTPYTHON
-    @unittest.expectedFailure
    def test_encode_explicit_none_args(self):
        self.checkequal(b'hello', 'hello', 'encode', None, None)
        # Check that encoding defaults to utf-8

diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py
@@ -730,6 +730,7 @@ def testTraceback(self):

    @unittest.skipIf(os_helper.TESTFN_UNENCODABLE is None,
                     "need an unencodable filename")
+    @unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
    def testUnencodable(self):
        filename = os_helper.TESTFN_UNENCODABLE + ".zip"
        self.addCleanup(os_helper.unlink, filename)

diff --git a/common/src/encodings.rs b/common/src/encodings.rs
@@ -401,7 +401,7 @@ pub mod errors {
            let mut out = String::with_capacity(num_chars * 4);
            for c in err_str.code_points() {
                let c_u32 = c.to_u32();
-                if let Some(c_name) = unicode_names2::name(c.to_char_lossy()) {
+                if let Some(c_name) = c.to_char().and_then(unicode_names2::name) {
                    write!(out, "\\N{{{c_name}}}").unwrap();
                } else if c_u32 >= 0x10000 {
                    write!(out, "\\U{c_u32:08x}").unwrap();