diff --git a/doc/release/upcoming_changes/28639.improvement.rst b/doc/release/upcoming_changes/28639.improvement.rst new file mode 100644 index 000000000000..9952f9142532 --- /dev/null +++ b/doc/release/upcoming_changes/28639.improvement.rst @@ -0,0 +1,8 @@ +Improved range and speed of conversion from a ``PyInt`` to a `np.longdouble` +---------------------------------------------------------------------------- +To convert to a `np.longdouble` from a ``PyInt``, the conversion is now done +numerically instead of through a string. Which means that: + +* For platforms that support 80/128 bit ``longdoubles``, it can handle numbers with more than 10^4300 digits +* If the number is too large to be converted (platform dependent) it now raises an ``OverflowError``, same behavior as Python's ``float`` function +* Improved performance \ No newline at end of file diff --git a/numpy/_core/src/common/npy_longdouble.c b/numpy/_core/src/common/npy_longdouble.c index ce80a9ae2bc3..7c63db69512f 100644 --- a/numpy/_core/src/common/npy_longdouble.c +++ b/numpy/_core/src/common/npy_longdouble.c @@ -9,6 +9,10 @@ #include "numpyos.h" +#ifndef LDBL_MAX_EXP + #include +#endif + /* * Heavily derived from PyLong_FromDouble * Notably, we can't set the digits directly, so have to shift and or instead. @@ -97,79 +101,119 @@ npy_longdouble_to_PyLong(npy_longdouble ldval) return v; } -/* Helper function to get unicode(PyLong).encode('utf8') */ -static PyObject * -_PyLong_Bytes(PyObject *long_obj) { - PyObject *bytes; - PyObject *unicode = PyObject_Str(long_obj); - if (unicode == NULL) { - return NULL; - } - bytes = PyUnicode_AsUTF8String(unicode); - Py_DECREF(unicode); - return bytes; +// Overflow error function +npy_longdouble _ldbl_ovfl_err(void) { + PyErr_SetString(PyExc_OverflowError, "Number too big to be represented as a np.longdouble --This is platform dependent--"); + return -1; } - -/** - * TODO: currently a hack that converts the long through a string. This is - * correct, but slow. - * - * Another approach would be to do this numerically, in a similar way to - * PyLong_AsDouble. - * However, in order to respect rounding modes correctly, this needs to know - * the size of the mantissa, which is platform-dependent. - */ -NPY_VISIBILITY_HIDDEN npy_longdouble -npy_longdouble_from_PyLong(PyObject *long_obj) { - npy_longdouble result = 1234; - char *end; - char *cstr; - PyObject *bytes; - - /* convert the long to a string */ - bytes = _PyLong_Bytes(long_obj); - if (bytes == NULL) { - return -1; +#define NPY_LGDB_SIGN_POS 1 +#define NPY_LGDB_SIGN_NEG -1 + +#ifndef NPY_LDBL_MAX_EXP + #if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE + #define NPY_LDBL_MAX_EXP DBL_MAX_EXP + #else + #define NPY_LDBL_MAX_EXP LDBL_MAX_EXP + #endif +#endif + +// Uses mathematical opperations to calculate the number, by using the numbers in the +// mantissa, scaling them approprietly using exp, and then adding them together +npy_longdouble _int_to_ld(int64_t *val, int exp, int sign) { + if (PyErr_Occurred()) { return -1; } + uint64_t mantissa[3]; + for (int i = 0; i < 3; i++) { mantissa[i] = (uint64_t)val[i]; } + npy_longdouble ld; + if (exp == 0) { + ld = (npy_longdouble)sign * (npy_longdouble)mantissa[0]; + // Edge case for numbers that may or maynot be too big to be represented + } else if (exp == NPY_LDBL_MAX_EXP && mantissa[0] == 0) { + ld = (npy_longdouble)sign * (ldexpl((npy_longdouble)mantissa[1], exp - 64) + + ldexpl((npy_longdouble)mantissa[2], exp - 128)); + // Sometimes it overflows in weird ways + if (ld == (npy_longdouble)INFINITY || ld == (npy_longdouble)(-INFINITY) || ld == (npy_longdouble)NAN || ld == (npy_longdouble)(-NAN)) { + return _ldbl_ovfl_err(); + } + } else if (exp >= NPY_LDBL_MAX_EXP) { + return _ldbl_ovfl_err(); + } else { + ld = (npy_longdouble)sign * (ldexpl((npy_longdouble)mantissa[0], exp) + + ldexpl((npy_longdouble)mantissa[1], exp - 64) + + ldexpl((npy_longdouble)mantissa[2], exp - 128)); } + return ld; +} - cstr = PyBytes_AsString(bytes); - if (cstr == NULL) { - goto fail; - } - end = NULL; - - /* convert the string to a long double and capture errors */ - errno = 0; - result = NumPyOS_ascii_strtold(cstr, &end); - if (errno == ERANGE) { - /* strtold returns INFINITY of the correct sign. */ - if (PyErr_Warn(PyExc_RuntimeWarning, - "overflow encountered in conversion from python long") < 0) { - goto fail; +// Helper function that get the exponent and mantissa, this works on all platforms +// This function works by getting the bits of the number in increments of 64 bits +// Since the size of the number is unknown, a while loop is needed. +// The more significant digits are stored in the smaller indexes +void _get_num(PyObject* py_int, int64_t* val, int *exp, int *ovf) { + PyObject* shift = PyLong_FromLong(64); + PyObject* temp = py_int; + while (*ovf != 0) { + *exp += 64; + val[2] = val[1]; + val[1] = (uint64_t)PyLong_AsUnsignedLongLongMask(py_int); + temp = PyNumber_Rshift(py_int, shift); + if (temp != py_int) { // Check if a new object was created + Py_DECREF(py_int); // Decrement refcount of the old py_int + py_int = temp; // Update py_int to the new object } + val[0] = (uint64_t)PyLong_AsLongLongAndOverflow(py_int, ovf); } - else if (errno) { - PyErr_Format(PyExc_RuntimeError, - "Could not parse python long as longdouble: %s (%s)", - cstr, - strerror(errno)); - goto fail; - } + Py_DECREF(shift); + Py_DECREF(temp); +} - /* Extra characters at the end of the string, or nothing parsed */ - if (end == cstr || *end != '\0') { - PyErr_Format(PyExc_RuntimeError, - "Could not parse long as longdouble: %s", - cstr); - goto fail; - } +// helper function that gets the sign, and if the number is too big to be represented +// as a int64, calls _get_num to find the exponent and mantissa +void _fix_py_num(PyObject* py_int, int64_t* val, int *exp, int *sign) { + int overflow; + val[0] = PyLong_AsLongLongAndOverflow(py_int, &overflow); + if (overflow == 1) { + *sign = NPY_LGDB_SIGN_POS; + PyObject* copy_int = PyNumber_Absolute(py_int); + _get_num(copy_int, val, exp, &overflow); + } else if (overflow == -1) { + *sign = NPY_LGDB_SIGN_NEG; + PyObject* copy_int = PyNumber_Negative(py_int); //create new PyLongObject + _get_num(copy_int, val, exp, &overflow); + } else { + if (val[0] == 0) {*sign = 0;} + else if (val[0] < 0) { val[0] = -val[0]; *sign = NPY_LGDB_SIGN_NEG;} + else {*sign = NPY_LGDB_SIGN_POS;} + } + +} +/* +The precision and max number is platform dependent, for 80 and 128 bit platforms +The largest number that can be converted is 2^16384 - 1. +In 64 bit platforms the largest number is 2^1024 - 1. +if the number to be converted is too big for a platform, it will give an error +In (my personal 80bit platform), I have tested that it converts up to the max +Now gives an overflow error if the number is too big, follows same rounding rules +as python's float() function, including overflow. +*/ +NPY_VISIBILITY_HIDDEN npy_longdouble +npy_longdouble_from_PyLong(PyObject *long_obj) { - /* finally safe to decref now that we're done with `end` */ - Py_DECREF(bytes); - return result; + npy_longdouble value; + if (PyFloat_Check(long_obj)) { + value = (npy_longdouble)PyFloat_AsDouble(long_obj); + } else if (PyLong_Check(long_obj)) { + int sign; + int E = 0; + int64_t val[3]; //needs to be size 3 in 80bit prescision + _fix_py_num(long_obj, val, &E, &sign); + if (PyErr_Occurred()) { return -1; } + value = _int_to_ld(val, E, sign); + } else { + PyErr_SetString(PyExc_TypeError, "Expected a number (int or float)"); + return -1; + } -fail: - Py_DECREF(bytes); - return -1; -} + if (PyErr_Occurred()) { return -1; } + return value; +} \ No newline at end of file diff --git a/numpy/_core/tests/test_longdouble.py b/numpy/_core/tests/test_longdouble.py index f7edd9774573..c6bd2eebcdff 100644 --- a/numpy/_core/tests/test_longdouble.py +++ b/numpy/_core/tests/test_longdouble.py @@ -1,18 +1,16 @@ import platform -import warnings - import pytest - import numpy as np from numpy._core.tests._locales import CommaDecimalPointLocale from numpy.testing import ( - IS_MUSL, assert_, - assert_array_equal, assert_equal, assert_raises, + assert_array_equal, temppath, -) + assert_allclose, + IS_MUSL + ) LD_INFO = np.finfo(np.longdouble) longdouble_longer_than_double = (LD_INFO.eps < np.finfo(np.double).eps) @@ -323,25 +321,31 @@ def test_fromstring_foreign_value(self): np.fromstring("1,234", dtype=np.longdouble, sep=" ") +# Maximum number that will pass the test in all platforms is: +# 2^1023 + 2^1022 + ... + 2^970 +# largest number that can be stored in a double without loosing precision +# Now gives an overflow error if te number is too big +@pytest.mark.parametrize("sign", [1, -1]) @pytest.mark.parametrize("int_val", [ - # cases discussed in gh-10723 - # and gh-9968 - 2 ** 1024, 0]) -def test_longdouble_from_int(int_val): + 2 ** 1023, 0, 10, 3, 15653215315, 5511, 644861]) +def test_longdouble_from_int(sign, int_val): # for issue gh-9968 + int_val *= sign str_val = str(int_val) - # we'll expect a RuntimeWarning on platforms - # with np.longdouble equivalent to np.double - # for large integer input - with warnings.catch_warnings(record=True) as w: - warnings.filterwarnings('always', '', RuntimeWarning) - # can be inf==inf on some platforms - assert np.longdouble(int_val) == np.longdouble(str_val) - # we can't directly compare the int and - # max longdouble value on all platforms - if np.allclose(np.finfo(np.longdouble).max, - np.finfo(np.double).max) and w: - assert w[0].category is RuntimeWarning + assert np.longdouble(int_val) == np.longdouble(str_val) + +@pytest.mark.parametrize("sign", [1, -1]) +@pytest.mark.parametrize("int_val", [ + 1000, 1815361, 358165, 646153161, 1]) +def test_longdouble_from_int_rounding(sign, int_val): + int_val *= sign + flt_val = float(int_val) + assert_allclose(np.longdouble(int_val), flt_val) + +def test_longdouble_from_int_overflow(): + + with pytest.raises(OverflowError, match="Number too big to be represented as a np.longdouble --This is platform dependent--"): + a = np.longdouble(10**5000) @pytest.mark.parametrize("bool_val", [ True, False]) diff --git a/numpy/_core/tests/test_nep50_promotions.py b/numpy/_core/tests/test_nep50_promotions.py index 8d9d9e63ce38..2a95c1858b3e 100644 --- a/numpy/_core/tests/test_nep50_promotions.py +++ b/numpy/_core/tests/test_nep50_promotions.py @@ -70,39 +70,15 @@ def test_nep50_weak_integers_with_inexact(dtype): too_big_int = int(np.finfo(dtype).max) * 2 - if dtype in "dDG": - # These dtypes currently convert to Python float internally, which - # raises an OverflowError, while the other dtypes overflow to inf. - # NOTE: It may make sense to normalize the behavior! - with pytest.raises(OverflowError): - scalar_type(1) + too_big_int - - with pytest.raises(OverflowError): - np.array(1, dtype=dtype) + too_big_int - else: - # NumPy uses (or used) `int -> string -> longdouble` for the - # conversion. But Python may refuse `str(int)` for huge ints. - # In that case, RuntimeWarning would be correct, but conversion - # fails earlier (seems to happen on 32bit linux, possibly only debug). - if dtype in "gG": - try: - str(too_big_int) - except ValueError: - pytest.skip("`huge_int -> string -> longdouble` failed") - - # Otherwise, we overflow to infinity: - with pytest.warns(RuntimeWarning): - res = scalar_type(1) + too_big_int - assert res.dtype == dtype - assert res == np.inf - - with pytest.warns(RuntimeWarning): - # We force the dtype here, since windows may otherwise pick the - # double instead of the longdouble loop. That leads to slightly - # different results (conversion of the int fails as above). - res = np.add(np.array(1, dtype=dtype), too_big_int, dtype=dtype) - assert res.dtype == dtype - assert res == np.inf + # Some dtypes currently convert to Python float internally, which + # raises an OverflowError, while the other dtypes overflow to inf. + # This catches it all, but doesn't differentiate + # NOTE: It may make sense to normalize the behavior! + with pytest.raises((OverflowError, RuntimeWarning)): + scalar_type(1) + too_big_int + + with pytest.raises((OverflowError, RuntimeWarning)): + np.array(1, dtype=dtype) + too_big_int @pytest.mark.parametrize("op", [operator.add, operator.pow])