Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

[3.10] gh-99612: Fix PyUnicode_DecodeUTF8Stateful() for ASCII-only data (GH-99613) (GH-107224) #107230

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions 54 Lib/test/test_capi/test_codecs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import unittest
from test.support import import_helper

_testcapi = import_helper.import_module('_testcapi')


class CAPITest(unittest.TestCase):

def test_decodeutf8(self):
"""Test PyUnicode_DecodeUTF8()"""
decodeutf8 = _testcapi.unicode_decodeutf8

for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
b = s.encode('utf-8')
self.assertEqual(decodeutf8(b), s)
self.assertEqual(decodeutf8(b, 'strict'), s)

self.assertRaises(UnicodeDecodeError, decodeutf8, b'\x80')
self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xc0')
self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xff')
self.assertRaises(UnicodeDecodeError, decodeutf8, b'a\xf0\x9f')
self.assertEqual(decodeutf8(b'a\xf0\x9f', 'replace'), 'a\ufffd')
self.assertEqual(decodeutf8(b'a\xf0\x9fb', 'replace'), 'a\ufffdb')

self.assertRaises(LookupError, decodeutf8, b'a\x80', 'foo')
# TODO: Test PyUnicode_DecodeUTF8() with NULL as data and
# negative size.

def test_decodeutf8stateful(self):
"""Test PyUnicode_DecodeUTF8Stateful()"""
decodeutf8stateful = _testcapi.unicode_decodeutf8stateful

for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']:
b = s.encode('utf-8')
self.assertEqual(decodeutf8stateful(b), (s, len(b)))
self.assertEqual(decodeutf8stateful(b, 'strict'), (s, len(b)))

self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\x80')
self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xc0')
self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xff')
self.assertEqual(decodeutf8stateful(b'a\xf0\x9f'), ('a', 1))
self.assertEqual(decodeutf8stateful(b'a\xf0\x9f', 'replace'), ('a', 1))
self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'a\xf0\x9fb')
self.assertEqual(decodeutf8stateful(b'a\xf0\x9fb', 'replace'), ('a\ufffdb', 4))

self.assertRaises(LookupError, decodeutf8stateful, b'a\x80', 'foo')
# TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as data and
# negative size.
# TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as the address of
# "consumed".


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix :c:func:`PyUnicode_DecodeUTF8Stateful` for ASCII-only data:
``*consumed`` was not set.
37 changes: 36 additions & 1 deletion 37 Modules/_testcapimodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -2112,6 +2112,40 @@ unicode_asutf8andsize(PyObject *self, PyObject *args)
return Py_BuildValue("(Nn)", result, utf8_len);
}

/* Test PyUnicode_DecodeUTF8() */
static PyObject *
unicode_decodeutf8(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;

if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors))
return NULL;

return PyUnicode_DecodeUTF8(data, size, errors);
}

/* Test PyUnicode_DecodeUTF8Stateful() */
static PyObject *
unicode_decodeutf8stateful(PyObject *self, PyObject *args)
{
const char *data;
Py_ssize_t size;
const char *errors = NULL;
Py_ssize_t consumed = 123456789;
PyObject *result;

if (!PyArg_ParseTuple(args, "y#|z", &data, &size, &errors))
return NULL;

result = PyUnicode_DecodeUTF8Stateful(data, size, errors, &consumed);
if (!result) {
return NULL;
}
return Py_BuildValue("(Nn)", result, consumed);
}

static PyObject *
unicode_findchar(PyObject *self, PyObject *args)
{
Expand Down Expand Up @@ -5846,7 +5880,8 @@ static PyMethodDef TestMethods[] = {
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
{"unicode_findchar", unicode_findchar, METH_VARARGS},
{"unicode_decodeutf8", unicode_decodeutf8, METH_VARARGS},
{"unicode_decodeutf8stateful",unicode_decodeutf8stateful, METH_VARARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS},
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
#if USE_UNICODE_WCHAR_CACHE
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
Expand Down
3 changes: 3 additions & 0 deletions 3 Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5206,6 +5206,9 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
}
s += ascii_decode(s, end, PyUnicode_1BYTE_DATA(u));
if (s == end) {
if (consumed) {
*consumed = size;
}
return u;
}

Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.