Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

gh-101765: Fix SystemError / segmentation fault in iter __reduce__ when internal access of builtins.__dict__ exhausts the iterator #101769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Feb 24, 2023
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f256afe
Fix undefined behavior in listiter_reduce from _PyEval_GetBuiltin sid…
ionite34 Feb 10, 2023
6b4faad
Update comment to not mention `__eq__`
ionite34 Feb 10, 2023
a6d6211
Fix undefined behavior in iter_reduce and calliter_reduce
ionite34 Feb 10, 2023
4ccf427
Update listiter_reduce_general comment
ionite34 Feb 10, 2023
c2c9cfb
Fix undefined behavior in bytearrayiter_reduce from _PyEval_GetBuilti…
ionite34 Feb 10, 2023
e2989d9
Fix undefined behavior in striter_reduce from _PyEval_GetBuiltin side…
ionite34 Feb 10, 2023
71960a8
Fix undefined behavior in tupleiter_reduce from _PyEval_GetBuiltin si…
ionite34 Feb 10, 2023
efa0540
Move iter call in unicodeiter_reduce before `it` pointer access due t…
ionite34 Feb 10, 2023
c5abb14
Add iter reduce tests for issue #101765
ionite34 Feb 10, 2023
45522c6
Remove C++ reference in comments
ionite34 Feb 10, 2023
4f5fc19
Remove C++ reference in comments
ionite34 Feb 10, 2023
049a8dd
Move builtin declarations inside if blocks
ionite34 Feb 10, 2023
ef4f955
Move _PyEval_GetBuiltin before gi checks, add gi NULL check in ga_ite…
ionite34 Feb 10, 2023
7d4afb0
Update iter reduce mutating tests for generic alias
ionite34 Feb 10, 2023
8e4418d
📜🤖 Added by blurb_it.
blurb-it[bot] Feb 10, 2023
d8ced8e
Fix backticks format for news
ionite34 Feb 10, 2023
178b8ea
Refactor iter reduce builtins mutation tests
ionite34 Feb 10, 2023
49ba8c3
Update iter mutating builtins test to include reversed iterator for l…
ionite34 Feb 10, 2023
93854e1
Add comment in unicodeiter_reduce for moving iter call before it poin…
ionite34 Feb 10, 2023
98ec3c6
Change test `__builtins__` to builtins import
ionite34 Feb 10, 2023
e661495
Change NEWS blurb phrasing
ionite34 Feb 10, 2023
19ab9c6
Update iter reduce mutating builtins test comments and simplify logic
ionite34 Feb 10, 2023
9b664c2
Update comments to better reflect issue
ionite34 Feb 24, 2023
c67b11a
Clarify test comments
ionite34 Feb 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions 84 Lib/test/test_iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from test.support import check_free_after_iterating, ALWAYS_EQ, NEVER_EQ
import pickle
import collections.abc
import functools
import contextlib
import builtins

# Test result of triple loop (too big to inline)
TRIPLETS = [(0, 0, 0), (0, 0, 1), (0, 0, 2),
Expand Down Expand Up @@ -91,6 +94,12 @@ def __call__(self):
raise IndexError # Emergency stop
return i

class EmptyIterClass:
def __len__(self):
return 0
def __getitem__(self, i):
raise StopIteration

# Main test suite

class TestCase(unittest.TestCase):
Expand Down Expand Up @@ -238,6 +247,81 @@ def test_mutating_seq_class_exhausted_iter(self):
self.assertEqual(list(empit), [5, 6])
self.assertEqual(list(a), [0, 1, 2, 3, 4, 5, 6])

def test_reduce_mutating_builtins_iter(self):
# This is a reproducer of issue #101765
# where iter `__reduce__` calls could lead to a segfault or SystemError
# depending on the order of C argument evaluation, which is undefined

# Backup builtins
builtins_dict = builtins.__dict__
orig = {"iter": iter, "reversed": reversed}

def run(builtin_name, item, init=None, sentinel=None):
if init is not None:
args = init if isinstance(init, tuple) else (init,)
carljm marked this conversation as resolved.
Show resolved Hide resolved
item = item(*args)
it = iter(item) if sentinel is None else iter(item, sentinel)

class CustomStr:
def __init__(self, name, iterator):
self.name = name
self.iterator = iterator
def __hash__(self):
return hash(self.name)
def __eq__(self, other):
# Here we exhaust our iterator, possibly changing
# its `it_seq` pointer to NULL
# The `__reduce__` call should correctly get
# the pointers after this call
list(self.iterator)
return other == self.name

# del is required here
# to avoid calling the last test case's custom __eq__
del builtins_dict[builtin_name]
builtins_dict[CustomStr(builtin_name, it)] = orig[builtin_name]

return it.__reduce__()

types = [
(EmptyIterClass, ()),
(bytes, 8),
(bytearray, 8),
(tuple, range(8)),
(lambda: 0, None, 0),
(tuple[int],) # GenericAlias
]

try:
run_iter = functools.partial(run, "iter")
# The returned value of `__reduce__` should not only be valid
# but also *empty*, as `it` was exhausted during `__eq__`
# i.e "xyz" returns (iter, ("",))
self.assertEqual(run_iter(str, "xyz"), (orig["iter"], ("",)))
self.assertEqual(run_iter(list, range(8)), (orig["iter"], ([],)))

# _PyEval_GetBuiltin is also called for `reversed` in a branch of
# listiter_reduce_general
self.assertEqual(
run("reversed", orig["reversed"], list(range(8))),
(iter, ([],))
)

for case in types:
self.assertEqual(run_iter(*case), (orig["iter"], ((),)))
finally:
# del is required here
# to avoid calling our custom __eq__
# we also need to supress KeyErrors in case
carljm marked this conversation as resolved.
Show resolved Hide resolved
# a failed test deletes the key without setting anything
with contextlib.suppress(KeyError):
del builtins_dict["iter"]
with contextlib.suppress(KeyError):
del builtins_dict["reversed"]
carljm marked this conversation as resolved.
Show resolved Hide resolved
# Restore original builtins
for key, func in orig.items():
builtins_dict[key] = func

# Test a new_style class with __iter__ but no next() method
def test_new_style_iter_class(self):
class IterClass(object):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix SystemError / segmentation fault in iter ``__reduce__`` when calling ``__builtins__.__dict__["iter"]`` mutates the iter object.
carljm marked this conversation as resolved.
Show resolved Hide resolved
12 changes: 9 additions & 3 deletions 12 Objects/bytearrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2391,11 +2391,17 @@ PyDoc_STRVAR(length_hint_doc,
static PyObject *
bytearrayiter_reduce(bytesiterobject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));

/* _PyEval_GetBuiltin can invoke arbitrary code.
* calls must be *before* access of `it` pointers,
* since C parameter eval order is undefined.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think parameter eval order isn't the issue and shouldn't be mentioned. The problem is that the _PyEval_GetBuiltin call invalidates the previous if check.

For example, this code would also be buggy:

    if (it->it_seq != NULL) {
        PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
        return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
    }

The parameter evaluation order makes things a bit more unpredictable, but the bug is that the if check is done too early, not that we're relying on evaluation order.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, yeah. I think this might be better?

    /* _PyEval_GetBuiltin can invoke arbitrary code,
     * call must be before access of iterator pointers.
     * see issue #101765 */

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated all comments to the above in 9b664c2

* see issue #101765 */

if (it->it_seq != NULL) {
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
} else {
return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
return Py_BuildValue("N(())", iter);
}
}

Expand Down
12 changes: 9 additions & 3 deletions 12 Objects/bytesobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3169,11 +3169,17 @@ PyDoc_STRVAR(length_hint_doc,
static PyObject *
striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));

/* _PyEval_GetBuiltin can invoke arbitrary code.
* calls must be *before* access of `it` pointers,
* since C parameter eval order is undefined.
* see issue #101765 */

if (it->it_seq != NULL) {
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
} else {
return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
return Py_BuildValue("N(())", iter);
}
}

Expand Down
12 changes: 11 additions & 1 deletion 12 Objects/genericaliasobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -877,8 +877,18 @@ ga_iter_clear(PyObject *self) {
static PyObject *
ga_iter_reduce(PyObject *self, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
gaiterobject *gi = (gaiterobject *)self;
return Py_BuildValue("N(O)", _PyEval_GetBuiltin(&_Py_ID(iter)), gi->obj);

/* _PyEval_GetBuiltin can invoke arbitrary code.
* call must be *before* access of `gi` pointers,
* since C parameter eval order is undefined.
* see issue #101765 */

if (gi->obj)
return Py_BuildValue("N(O)", iter, gi->obj);
else
return Py_BuildValue("N(())", iter);
}

static PyMethodDef ga_iter_methods[] = {
Expand Down
24 changes: 18 additions & 6 deletions 24 Objects/iterobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,17 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(
static PyObject *
iter_reduce(seqiterobject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));

/* _PyEval_GetBuiltin can invoke arbitrary code.
* calls must be *before* access of `it` pointers,
* since C parameter eval order is undefined.
* see issue #101765 */

if (it->it_seq != NULL)
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
else
return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
return Py_BuildValue("N(())", iter);
}

PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
Expand Down Expand Up @@ -239,11 +245,17 @@ calliter_iternext(calliterobject *it)
static PyObject *
calliter_reduce(calliterobject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));

/* _PyEval_GetBuiltin can invoke arbitrary code.
* calls must be *before* access of `it` pointers,
* since C parameter eval order is undefined.
* see issue #101765 */

if (it->it_callable != NULL && it->it_sentinel != NULL)
return Py_BuildValue("N(OO)", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_callable, it->it_sentinel);
return Py_BuildValue("N(OO)", iter, it->it_callable, it->it_sentinel);
else
return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
return Py_BuildValue("N(())", iter);
}

static PyMethodDef calliter_methods[] = {
Expand Down
13 changes: 9 additions & 4 deletions 13 Objects/listobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3444,18 +3444,23 @@ listiter_reduce_general(void *_it, int forward)
{
PyObject *list;

/* _PyEval_GetBuiltin can invoke arbitrary code.
* calls must be *before* access of `_it` pointers,
* since C parameter eval order is undefined.
* see issue #101765 */

/* the objects are not the same, index is of different types! */
if (forward) {
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));
_PyListIterObject *it = (_PyListIterObject *)_it;
if (it->it_seq) {
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
}
} else {
PyObject *reversed = _PyEval_GetBuiltin(&_Py_ID(reversed));
ionite34 marked this conversation as resolved.
Show resolved Hide resolved
listreviterobject *it = (listreviterobject *)_it;
if (it->it_seq) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it_seq is NULL then reversed is leaked here, _PyEval_GetBuiltin returns a strong reference.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, but doesn't that mean the old code also leaked references? I'll prepare a PR to fix this now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh wait, the N code to Py_BuildValue steals a reference, so the previous code was right in terms of refcounting.

return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(reversed)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", reversed, it->it_seq, it->it_index);
}
}
/* empty iterator, create an empty list */
Expand Down
12 changes: 9 additions & 3 deletions 12 Objects/tupleobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1048,11 +1048,17 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(
static PyObject *
tupleiter_reduce(_PyTupleIterObject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));

/* _PyEval_GetBuiltin can invoke arbitrary code.
* calls must be *before* access of `it` pointers,
* since C parameter eval order is undefined.
* see issue #101765 */

if (it->it_seq)
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
else
return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
return Py_BuildValue("N(())", iter);
}

static PyObject *
Expand Down
12 changes: 9 additions & 3 deletions 12 Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -14784,14 +14784,20 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(
static PyObject *
unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
{
PyObject *iter = _PyEval_GetBuiltin(&_Py_ID(iter));

/* _PyEval_GetBuiltin can invoke arbitrary code.
* calls must be *before* access of `it` pointers,
* since C parameter eval order is undefined.
* see issue #101765 */

ionite34 marked this conversation as resolved.
Show resolved Hide resolved
if (it->it_seq != NULL) {
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
} else {
PyObject *u = unicode_new_empty();
if (u == NULL)
return NULL;
return Py_BuildValue("N(N)", _PyEval_GetBuiltin(&_Py_ID(iter)), u);
return Py_BuildValue("N(N)", iter, u);
}
}

Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.