Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit d3b437c

Browse filesBrowse files
[3.12] gh-123378: fix a crash in UnicodeError.__str__ (GH-124935) (#125098)
gh-123378: fix a crash in `UnicodeError.__str__` (GH-124935) (cherry picked from commit ba14dfa) Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
1 parent 8225737 commit d3b437c
Copy full SHA for d3b437c

File tree

Expand file treeCollapse file tree

3 files changed

+93
-45
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+93
-45
lines changed

‎Lib/test/test_exceptions.py

Copy file name to clipboardExpand all lines: Lib/test/test_exceptions.py
+24Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import weakref
99
import errno
1010
from codecs import BOM_UTF8
11+
from itertools import product
1112
from textwrap import dedent
1213

1314
from test.support import (captured_stderr, check_impl_detail,
@@ -1333,6 +1334,29 @@ def test_unicode_errors_no_object(self):
13331334
for klass in klasses:
13341335
self.assertEqual(str(klass.__new__(klass)), "")
13351336

1337+
def test_unicode_error_str_does_not_crash(self):
1338+
# Test that str(UnicodeError(...)) does not crash.
1339+
# See https://github.com/python/cpython/issues/123378.
1340+
1341+
for start, end, objlen in product(
1342+
range(-5, 5),
1343+
range(-5, 5),
1344+
range(7),
1345+
):
1346+
obj = 'a' * objlen
1347+
with self.subTest('encode', objlen=objlen, start=start, end=end):
1348+
exc = UnicodeEncodeError('utf-8', obj, start, end, '')
1349+
self.assertIsInstance(str(exc), str)
1350+
1351+
with self.subTest('translate', objlen=objlen, start=start, end=end):
1352+
exc = UnicodeTranslateError(obj, start, end, '')
1353+
self.assertIsInstance(str(exc), str)
1354+
1355+
encoded = obj.encode()
1356+
with self.subTest('decode', objlen=objlen, start=start, end=end):
1357+
exc = UnicodeDecodeError('utf-8', encoded, start, end, '')
1358+
self.assertIsInstance(str(exc), str)
1359+
13361360
@no_tracing
13371361
def test_badisinstance(self):
13381362
# Bug #2542: if issubclass(e, MyException) raises an exception,
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix a crash in the :meth:`~object.__str__` method of :exc:`UnicodeError`
2+
objects when the :attr:`UnicodeError.start` and :attr:`UnicodeError.end`
3+
values are invalid or out-of-range. Patch by Bénédikt Tran.

‎Objects/exceptions.c

Copy file name to clipboardExpand all lines: Objects/exceptions.c
+66-45Lines changed: 66 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2961,46 +2961,55 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
29612961
static PyObject *
29622962
UnicodeEncodeError_str(PyObject *self)
29632963
{
2964-
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
2964+
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
29652965
PyObject *result = NULL;
29662966
PyObject *reason_str = NULL;
29672967
PyObject *encoding_str = NULL;
29682968

2969-
if (!uself->object)
2969+
if (exc->object == NULL) {
29702970
/* Not properly initialized. */
29712971
return PyUnicode_FromString("");
2972+
}
29722973

29732974
/* Get reason and encoding as strings, which they might not be if
29742975
they've been modified after we were constructed. */
2975-
reason_str = PyObject_Str(uself->reason);
2976-
if (reason_str == NULL)
2976+
reason_str = PyObject_Str(exc->reason);
2977+
if (reason_str == NULL) {
29772978
goto done;
2978-
encoding_str = PyObject_Str(uself->encoding);
2979-
if (encoding_str == NULL)
2979+
}
2980+
encoding_str = PyObject_Str(exc->encoding);
2981+
if (encoding_str == NULL) {
29802982
goto done;
2983+
}
2984+
2985+
Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
2986+
Py_ssize_t start = exc->start, end = exc->end;
29812987

2982-
if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) {
2983-
Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start);
2988+
if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
2989+
Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start);
29842990
const char *fmt;
2985-
if (badchar <= 0xff)
2991+
if (badchar <= 0xff) {
29862992
fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U";
2987-
else if (badchar <= 0xffff)
2993+
}
2994+
else if (badchar <= 0xffff) {
29882995
fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U";
2989-
else
2996+
}
2997+
else {
29902998
fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U";
2999+
}
29913000
result = PyUnicode_FromFormat(
29923001
fmt,
29933002
encoding_str,
29943003
(int)badchar,
2995-
uself->start,
3004+
start,
29963005
reason_str);
29973006
}
29983007
else {
29993008
result = PyUnicode_FromFormat(
30003009
"'%U' codec can't encode characters in position %zd-%zd: %U",
30013010
encoding_str,
3002-
uself->start,
3003-
uself->end-1,
3011+
start,
3012+
end - 1,
30043013
reason_str);
30053014
}
30063015
done:
@@ -3074,41 +3083,46 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
30743083
static PyObject *
30753084
UnicodeDecodeError_str(PyObject *self)
30763085
{
3077-
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
3086+
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
30783087
PyObject *result = NULL;
30793088
PyObject *reason_str = NULL;
30803089
PyObject *encoding_str = NULL;
30813090

3082-
if (!uself->object)
3091+
if (exc->object == NULL) {
30833092
/* Not properly initialized. */
30843093
return PyUnicode_FromString("");
3094+
}
30853095

30863096
/* Get reason and encoding as strings, which they might not be if
30873097
they've been modified after we were constructed. */
3088-
reason_str = PyObject_Str(uself->reason);
3089-
if (reason_str == NULL)
3098+
reason_str = PyObject_Str(exc->reason);
3099+
if (reason_str == NULL) {
30903100
goto done;
3091-
encoding_str = PyObject_Str(uself->encoding);
3092-
if (encoding_str == NULL)
3101+
}
3102+
encoding_str = PyObject_Str(exc->encoding);
3103+
if (encoding_str == NULL) {
30933104
goto done;
3105+
}
3106+
3107+
Py_ssize_t len = PyBytes_GET_SIZE(exc->object);
3108+
Py_ssize_t start = exc->start, end = exc->end;
30943109

3095-
if (uself->start < PyBytes_GET_SIZE(uself->object) && uself->end == uself->start+1) {
3096-
int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff);
3110+
if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
3111+
int badbyte = (int)(PyBytes_AS_STRING(exc->object)[start] & 0xff);
30973112
result = PyUnicode_FromFormat(
30983113
"'%U' codec can't decode byte 0x%02x in position %zd: %U",
30993114
encoding_str,
3100-
byte,
3101-
uself->start,
3115+
badbyte,
3116+
start,
31023117
reason_str);
31033118
}
31043119
else {
31053120
result = PyUnicode_FromFormat(
31063121
"'%U' codec can't decode bytes in position %zd-%zd: %U",
31073122
encoding_str,
3108-
uself->start,
3109-
uself->end-1,
3110-
reason_str
3111-
);
3123+
start,
3124+
end - 1,
3125+
reason_str);
31123126
}
31133127
done:
31143128
Py_XDECREF(reason_str);
@@ -3171,42 +3185,49 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
31713185
static PyObject *
31723186
UnicodeTranslateError_str(PyObject *self)
31733187
{
3174-
PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
3188+
PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
31753189
PyObject *result = NULL;
31763190
PyObject *reason_str = NULL;
31773191

3178-
if (!uself->object)
3192+
if (exc->object == NULL) {
31793193
/* Not properly initialized. */
31803194
return PyUnicode_FromString("");
3195+
}
31813196

31823197
/* Get reason as a string, which it might not be if it's been
31833198
modified after we were constructed. */
3184-
reason_str = PyObject_Str(uself->reason);
3185-
if (reason_str == NULL)
3199+
reason_str = PyObject_Str(exc->reason);
3200+
if (reason_str == NULL) {
31863201
goto done;
3202+
}
3203+
3204+
Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
3205+
Py_ssize_t start = exc->start, end = exc->end;
31873206

3188-
if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) {
3189-
Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start);
3207+
if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
3208+
Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start);
31903209
const char *fmt;
3191-
if (badchar <= 0xff)
3210+
if (badchar <= 0xff) {
31923211
fmt = "can't translate character '\\x%02x' in position %zd: %U";
3193-
else if (badchar <= 0xffff)
3212+
}
3213+
else if (badchar <= 0xffff) {
31943214
fmt = "can't translate character '\\u%04x' in position %zd: %U";
3195-
else
3215+
}
3216+
else {
31963217
fmt = "can't translate character '\\U%08x' in position %zd: %U";
3218+
}
31973219
result = PyUnicode_FromFormat(
31983220
fmt,
31993221
(int)badchar,
3200-
uself->start,
3201-
reason_str
3202-
);
3203-
} else {
3222+
start,
3223+
reason_str);
3224+
}
3225+
else {
32043226
result = PyUnicode_FromFormat(
32053227
"can't translate characters in position %zd-%zd: %U",
3206-
uself->start,
3207-
uself->end-1,
3208-
reason_str
3209-
);
3228+
start,
3229+
end - 1,
3230+
reason_str);
32103231
}
32113232
done:
32123233
Py_XDECREF(reason_str);

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.