diff --git a/INSTALL.rst.txt b/INSTALL.rst.txt index 30f58fa6d31c..2b92267514f6 100644 --- a/INSTALL.rst.txt +++ b/INSTALL.rst.txt @@ -26,8 +26,13 @@ Building NumPy requires the following installed software: This is required for testing NumPy, but not for using it. +4) Hypothesis__ (optional) 5.3.0 or later + + This is required for testing NumPy, but not for using it. + Python__ http://www.python.org pytest__ http://pytest.readthedocs.io +Hypothesis__ https://hypothesis.readthedocs.io/en/latest/ .. note:: diff --git a/doc/TESTS.rst.txt b/doc/TESTS.rst.txt index 9023c7100c5d..007840b399c1 100644 --- a/doc/TESTS.rst.txt +++ b/doc/TESTS.rst.txt @@ -360,7 +360,17 @@ deterministic by setting the random number seed before generating it. Use either Python's ``random.seed(some_number)`` or NumPy's ``numpy.random.seed(some_number)``, depending on the source of random numbers. +Alternatively, you can use `Hypothesis`_ to generate arbitrary data. +Hypothesis manages both Python's and Numpy's random seeds for you, and +provides a very concise and powerful way to describe data (including +``hypothesis.extra.numpy``, e.g. for a set of mutually-broadcastable shapes). + +The advantages over random generation include tools to replay and share +failures without requiring a fixed seed, reporting *minimal* examples for +each failure, and better-than-naive-random techniques for triggering bugs. + .. _nose: https://nose.readthedocs.io/en/latest/ .. _pytest: https://pytest.readthedocs.io .. _parameterization: https://docs.pytest.org/en/latest/parametrize.html +.. _Hypothesis: https://hypothesis.readthedocs.io/en/latest/ diff --git a/numpy/conftest.py b/numpy/conftest.py index a843f725fc8b..1d3e0349fedb 100644 --- a/numpy/conftest.py +++ b/numpy/conftest.py @@ -3,6 +3,7 @@ """ import os +import hypothesis import pytest import numpy @@ -12,6 +13,12 @@ _old_fpu_mode = None _collect_results = {} +# See https://hypothesis.readthedocs.io/en/latest/settings.html +hypothesis.settings.register_profile( + name="numpy-profile", deadline=None, print_blob=True, +) +hypothesis.settings.load_profile("numpy-profile") + def pytest_configure(config): config.addinivalue_line("markers", diff --git a/numpy/core/tests/test_arrayprint.py b/numpy/core/tests/test_arrayprint.py index cd3e501a5ca0..008ca20e6f6d 100644 --- a/numpy/core/tests/test_arrayprint.py +++ b/numpy/core/tests/test_arrayprint.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- import sys import gc +from hypothesis import given +from hypothesis.extra import numpy as hynp import pytest import numpy as np @@ -393,6 +395,18 @@ def test_wide_element(self): "[ 'xxxxx']" ) + @given(hynp.from_dtype(np.dtype("U"))) + def test_any_text(self, text): + # This test checks that, given any value that can be represented in an + # array of dtype("U") (i.e. unicode string), ... + a = np.array([text, text, text]) + # casting a list of them to an array does not e.g. truncate the value + assert_equal(a[0], text) + # and that np.array2string puts a newline in the expected location + expected_repr = "[{0!r} {0!r}\n {0!r}]".format(text) + result = np.array2string(a, max_line_width=len(repr(text)) * 2 + 3) + assert_equal(result, expected_repr) + @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") def test_refcount(self): # make sure we do not hold references to the array due to a recursive diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 3bc4cd187100..135acc51dfb3 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -14,6 +14,9 @@ assert_warns, HAS_REFCOUNT ) +from hypothesis import assume, given, strategies as st +from hypothesis.extra import numpy as hynp + class TestResize: def test_copies(self): @@ -2018,6 +2021,68 @@ def test_NaT_propagation(self, arr, amin, amax): actual = np.clip(arr, amin, amax) assert_equal(actual, expected) + @given(data=st.data(), shape=hynp.array_shapes()) + def test_clip_property(self, data, shape): + """A property-based test using Hypothesis. + + This aims for maximum generality: it could in principle generate *any* + valid inputs to np.clip, and in practice generates much more varied + inputs than human testers come up with. + + Because many of the inputs have tricky dependencies - compatible dtypes + and mutually-broadcastable shapes - we use `st.data()` strategy draw + values *inside* the test function, from strategies we construct based + on previous values. An alternative would be to define a custom strategy + with `@st.composite`, but until we have duplicated code inline is fine. + + That accounts for most of the function; the actual test is just three + lines to calculate and compare actual vs expected results! + """ + # Our base array and bounds should not need to be of the same type as + # long as they are all compatible - so we allow any int or float type. + dtype_strategy = hynp.integer_dtypes() | hynp.floating_dtypes() + + # The following line is a total hack to disable the varied-dtypes + # component of this test, because result != expected if dtypes can vary. + dtype_strategy = st.just(data.draw(dtype_strategy)) + + # Generate an arbitrary array of the chosen shape and dtype + # This is the value that we clip. + arr = data.draw(hynp.arrays(dtype=dtype_strategy, shape=shape)) + + # Generate shapes for the bounds which can be broadcast with each other + # and with the base shape. Below, we might decide to use scalar bounds, + # but it's clearer to generate these shapes unconditionally in advance. + in_shapes, result_shape = data.draw( + hynp.mutually_broadcastable_shapes( + num_shapes=2, + base_shape=shape, + # Commenting out the min_dims line allows zero-dimensional arrays, + # and zero-dimensional arrays containing NaN make the test fail. + min_dims=1 + + ) + ) + amin = data.draw( + dtype_strategy.flatmap(hynp.from_dtype) + | hynp.arrays(dtype=dtype_strategy, shape=in_shapes[0]) + ) + amax = data.draw( + dtype_strategy.flatmap(hynp.from_dtype) + | hynp.arrays(dtype=dtype_strategy, shape=in_shapes[1]) + ) + # If we allow either bound to be a scalar `nan`, the test will fail - + # so we just "assume" that away (if it is, this raises a special + # exception and Hypothesis will try again with different inputs) + assume(not np.isscalar(amin) or not np.isnan(amin)) + assume(not np.isscalar(amax) or not np.isnan(amax)) + + # Then calculate our result and expected result and check that they're + # equal! See gh-12519 for discussion deciding on this property. + result = np.clip(arr, amin, amax) + expected = np.minimum(amax, np.maximum(arr, amin)) + assert_array_equal(result, expected) + class TestAllclose: rtol = 1e-5 diff --git a/test_requirements.txt b/test_requirements.txt index a4b0fcc30155..f4286d85d81c 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,4 +1,5 @@ cython==0.29.14 +hypothesis==5.3.0 pytest==5.3.5 pytz==2019.3 pytest-cov==2.8.1