From d6d90d1d3b6b65541054bbe50a3836dc7bc794fb Mon Sep 17 00:00:00 2001 From: Nicholas Musolino Date: Thu, 27 Jun 2019 15:46:00 -0400 Subject: [PATCH] bpo-26828: Add __length_hint__() to builtin map iterator --- Lib/test/test_builtin.py | 105 +++++++++++++++--- .../2019-06-27-16-18-00.bpo-37435.LUdu4p.rst | 2 + Python/bltinmodule.c | 34 ++++++ 3 files changed, 128 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-06-27-16-18-00.bpo-37435.LUdu4p.rst diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 61155799c44a92a..b4169f0814aa5db 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -23,7 +23,7 @@ from itertools import product from textwrap import dedent from types import AsyncGeneratorType, FunctionType -from operator import neg +from operator import neg, length_hint from test.support import ( EnvironmentVarGuard, TESTFN, check_warnings, swap_attr, unlink, maybe_get_event_loop_policy) @@ -35,6 +35,23 @@ pty = signal = None +class Iter(object): + + def __iter__(self): + return self + + def __next__(self): + raise StopIteration + + +class SequenceWithoutALength: + + def __getitem__(self, i): + if i == 5: + raise IndexError + else: + return i + class Squares: def __init__(self, max): @@ -932,6 +949,80 @@ def test_map_pickle(self): m2 = map(map_char, "Is this the real life?") self.check_iter_pickle(m1, list(m2), proto) + def test_map_length_hint(self): + def identity(x): + return x + + it = map(pow, range(4), range(10)) + self.assertEqual(4, length_hint(it)) + next(it) + self.assertEqual(3, length_hint(it)) + next(it) + self.assertEqual(2, length_hint(it)) + self.assertEqual([pow(2,2), pow(3,3)], list(it)) # Consume the iterator. + self.assertEqual(0, length_hint(it)) + + types = [tuple, str, list, set, dict.fromkeys] + for typ in types: + with self.subTest(typ=typ): + self.assertEqual(0, length_hint(map(identity, typ('')), 8)) + self.assertEqual(0, length_hint(map(identity, iter(typ(''))), 8)) + + self.assertEqual(3, length_hint(map(identity, typ('abc')), 8)) + self.assertEqual(3, length_hint(map(identity, iter(typ('abc'))), 8)) + + class Hinted(Iter): + def __init__(self, hint_result): + self.hint_result = hint_result + + def __length_hint__(self): + hint_result = self.hint_result + if isinstance(hint_result, type) and issubclass(hint_result, Exception): + raise hint_result() + return self.hint_result + + self.assertEqual( + 3, + length_hint(map(identity, Hinted(3), Hinted(4), Hinted(5))) + ) + self.assertEqual( + 8, + length_hint(map(identity, Hinted(3), Iter()), 8) + ) + self.assertEqual( + 8, + length_hint(map(identity, Hinted(3), Hinted(NotImplemented)), 8) + ) + self.assertEqual( + 8, + length_hint(map(identity, SequenceWithoutALength()), 8) + ) + self.assertEqual( + 8, + length_hint(map(identity, Hinted(3), SequenceWithoutALength()), 8) + ) + self.assertRaises( + ValueError, length_hint, map(identity, Hinted(ValueError)) + ) + self.assertRaises( + ValueError, length_hint, map(identity, Hinted(-5)) + ) + self.assertRaises( + OverflowError, length_hint, map(identity, Hinted(sys.maxsize + 1)) + ) + # According to PEP 424, when __length_hint__ raises TypeError, + # operator.length_hint() returns its default value. + self.assertEqual( + 8, length_hint(map(identity, Hinted(3), Hinted(TypeError)), 8) + ) + # This is probably not desirable. It emerges because an inner + # evaluation of ``operator.length_hint(Hinted('z'))`` raises + # TypeError, which is then caught by an outer call to + # ``operator.length_hint``. + self.assertEqual( + 8, length_hint(map(identity, Hinted(3), Hinted('z')), 8) + ) + def test_max(self): self.assertEqual(max('123123'), '3') self.assertEqual(max(1, 2, 3), 3) @@ -1048,12 +1139,6 @@ def test_next(self): self.assertRaises(StopIteration, next, it) self.assertEqual(next(it, 42), 42) - class Iter(object): - def __iter__(self): - return self - def __next__(self): - raise StopIteration - it = iter(Iter()) self.assertEqual(next(it, 42), 42) self.assertRaises(StopIteration, next, it) @@ -1450,12 +1535,6 @@ class G: # Make sure zip doesn't try to allocate a billion elements for the # result list when one of its arguments doesn't say how long it is. # A MemoryError is the most likely failure mode. - class SequenceWithoutALength: - def __getitem__(self, i): - if i == 5: - raise IndexError - else: - return i self.assertEqual( list(zip(SequenceWithoutALength(), range(2**30))), list(enumerate(range(5))) diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-06-27-16-18-00.bpo-37435.LUdu4p.rst b/Misc/NEWS.d/next/Core and Builtins/2019-06-27-16-18-00.bpo-37435.LUdu4p.rst new file mode 100644 index 000000000000000..20586a19387a276 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-06-27-16-18-00.bpo-37435.LUdu4p.rst @@ -0,0 +1,2 @@ +The iterator provided by builtin :func:`map` now provides a `__length_hint__` +method. Patch by Nicholas Musolino. diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 90fbb44882b01b3..2eecb913aa63920 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -1293,6 +1293,39 @@ map_next(mapobject *lz) return result; } +PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); + +static PyObject * +map_length_hint(mapobject *lz, PyObject *Py_UNUSED(ignored)) +{ + Py_ssize_t niters, i, hint_result = PY_SSIZE_T_MAX; + niters = PyTuple_GET_SIZE(lz->iters); + assert(niters > 0); + + for (i = 0; i < niters; i++) { + PyObject *it = PyTuple_GET_ITEM(lz->iters, i); + Py_ssize_t it_hint = PyObject_LengthHint(it, /*defaultvalue*/ -1); + /* There are three cases to distinguish: + it_hint >= 0: + Iterator provided length hint. + it_hint < 0 && PyErr_Occurred(): + Exception occurred during length_hint evaluation; propagate it. + it_hint < 0 && no error occurred: + The iterator does not provide a length hint. + */ + if (it_hint < 0) { + if (PyErr_Occurred()) { + return NULL; + } + Py_RETURN_NOTIMPLEMENTED; + } + /* min(hint_result, it_hint) */ + hint_result = (it_hint < hint_result) ? it_hint : hint_result; + } + assert(hint_result >= 0); + return PyLong_FromSize_t(hint_result); +} + static PyObject * map_reduce(mapobject *lz, PyObject *Py_UNUSED(ignored)) { @@ -1313,6 +1346,7 @@ map_reduce(mapobject *lz, PyObject *Py_UNUSED(ignored)) } static PyMethodDef map_methods[] = { + {"__length_hint__", (PyCFunction)map_length_hint, METH_NOARGS, length_hint_doc}, {"__reduce__", (PyCFunction)map_reduce, METH_NOARGS, reduce_doc}, {NULL, NULL} /* sentinel */ };