Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

BUG: Converting large integer to np.longdouble can raise ValueError #28722

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: main
Choose a base branch
Loading
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 200 additions & 62 deletions 262 numpy/_core/src/common/npy_longdouble.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@

#include "numpyos.h"

#ifndef LDBL_MAX_EXP
#include <float.h>
#endif

/*
* Heavily derived from PyLong_FromDouble
* Notably, we can't set the digits directly, so have to shift and or instead.
Expand Down Expand Up @@ -97,79 +101,213 @@ npy_longdouble_to_PyLong(npy_longdouble ldval)
return v;
}

/* Helper function to get unicode(PyLong).encode('utf8') */
static PyObject *
_PyLong_Bytes(PyObject *long_obj) {
PyObject *bytes;
PyObject *unicode = PyObject_Str(long_obj);
if (unicode == NULL) {
return NULL;
}
bytes = PyUnicode_AsUTF8String(unicode);
Py_DECREF(unicode);
return bytes;
npy_longdouble _ldbl_ovfl_err(void) {
PyErr_SetString(PyExc_OverflowError, "Number too big to be represented as a np.longdouble --This is platform dependent--");
return -1;
}

// When Double is the same as the long double, and it is little endian
// It then (thanfully) follows IEEE 754, so it is possible to do bitwise operations
#if LDBL_MANT_DIG == 53 && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__

/**
* TODO: currently a hack that converts the long through a string. This is
* correct, but slow.
*
* Another approach would be to do this numerically, in a similar way to
* PyLong_AsDouble.
* However, in order to respect rounding modes correctly, this needs to know
* the size of the mantissa, which is platform-dependent.
*/
NPY_VISIBILITY_HIDDEN npy_longdouble
npy_longdouble_from_PyLong(PyObject *long_obj) {
npy_longdouble result = 1234;
char *end;
char *cstr;
PyObject *bytes;

/* convert the long to a string */
bytes = _PyLong_Bytes(long_obj);
if (bytes == NULL) {
return -1;
}
#define NPY_LGDB_SIGN_POS 0
#define NPY_LGDB_SIGN_NEG 1

typedef union {
uint64_t u64;
npy_longdouble d;
} uint64_double_union;

cstr = PyBytes_AsString(bytes);
if (cstr == NULL) {
goto fail;
npy_longdouble _int_to_ld(int64_t *val, int e, int s) {
uint64_t mantissa[3];
uint64_t exp = (uint64_t)e;
uint64_t sign = (uint64_t)s;
for (int i = 0; i < 3; i++) { mantissa[i] = (uint64_t)val[i]; }
int foo = 0;
uint64_double_union value;

if (exp == 0) {
s = (sign == 1) ? -1 : 1;
return (npy_longdouble)s*(npy_longdouble)mantissa[0];
} else if (mantissa[0] == 0) {
mantissa[1] <<= 1;
exp--;
} else {
for (int i = 63; i >= 0; i--) {
if ((mantissa[0] >> i) & 1) {
foo = i;
break;
}
}
}
exp += foo;
mantissa[0] = (mantissa[1] >> foo) | (mantissa[0] << (64 - foo));
uint64_t guard = (mantissa[0] >> 11) & 1;
mantissa[0] >>= 12;
// Rounding (round to nearest even)
if (guard) {
mantissa[0]++;
if (mantissa[0] == 0x10000000000000) {
mantissa[0] = 0;
exp++;
}
}
if (exp >= DBL_MAX_EXP) {
return _ldbl_ovfl_err();
}
end = NULL;
sign <<= 63;
exp += 1023;
exp <<= 52;
value.u64 = mantissa[0] | exp | sign;
return value.d;
}

// For 80bit platforms (x86 architectures), if it is little endian the layout of the longdouble is known
#elif LDBL_MANT_DIG == 64 && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__

#define NPY_LGDB_SIGN_POS 0
#define NPY_LGDB_SIGN_NEG 1

typedef union {
npy_longdouble ld;
uint64_t u64[2]; // Two 64-bit halves
} ld_128_union;

npy_longdouble _int_to_ld(int64_t *val, int exp, int sign) {
uint64_t mantissa[3];
for (int i = 0; i < 3; i++) { mantissa[i] = (uint64_t)val[i]; }

/* convert the string to a long double and capture errors */
errno = 0;
result = NumPyOS_ascii_strtold(cstr, &end);
if (errno == ERANGE) {
/* strtold returns INFINITY of the correct sign. */
if (PyErr_Warn(PyExc_RuntimeWarning,
"overflow encountered in conversion from python long") < 0) {
goto fail;
ld_128_union value;
uint64_t guard;
int foo = 0;
if (exp == 0) {
sign = (sign == 1) ? -1 : 1;
return (npy_longdouble)sign*(npy_longdouble)mantissa[0];
} else if (mantissa[0] == 0) {
guard = (mantissa[2] >> 63) & 1;
} else {
for (int i = 63; i >= 0; i--) {
if ((mantissa[0] >> i) & 1) {
foo = i + 1;
break;
}
}
guard = (mantissa[1] >> (foo + 1)) & 1;
}
else if (errno) {
PyErr_Format(PyExc_RuntimeError,
"Could not parse python long as longdouble: %s (%s)",
cstr,
strerror(errno));
goto fail;
exp += foo - 1;
value.u64[0] = (mantissa[1] >> foo) | (mantissa[0] << (64 - foo));

// Rounding (round to nearest even)
if (guard) {
value.u64[0]++;
if (value.u64[0] == 0) {
value.u64[0] = 0x8000000000000000;
exp++;
}
}
if (exp >= LDBL_MAX_EXP) {
return _ldbl_ovfl_err();
}
value.u64[1] = (uint64_t)(exp + 16383) | (uint64_t)(sign << 15);
return value.ld;

}
#else

#define NPY_LGDB_SIGN_POS 1
#define NPY_LGDB_SIGN_NEG -1

#ifndef NPY_LDBL_MAX_EXP
#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE
#define NPY_LDBL_MAX_EXP DBL_MAX_EXP
#else
#define NPY_LDBL_MAX_EXP LDBL_MAX_EXP
#endif
#endif

/* Extra characters at the end of the string, or nothing parsed */
if (end == cstr || *end != '\0') {
PyErr_Format(PyExc_RuntimeError,
"Could not parse long as longdouble: %s",
cstr);
goto fail;
// Since there are many types of long double, 64, 80, 128...
// and no consensus, as everything changes platform to platform.
// For more performance needs to be platform specific
npy_longdouble _int_to_ld(int64_t *val, int exp, int sign) {
uint64_t mantissa[3];
for (int i = 0; i < 3; i++) { mantissa[i] = (uint64_t)val[i]; }
npy_longdouble ld;
if (exp == 0) {
ld = (npy_longdouble)sign * (npy_longdouble)mantissa[0];
} else if (exp == NPY_LDBL_MAX_EXP && mantissa[0] == 0) {
ld = (npy_longdouble)sign * ((npy_longdouble)mantissa[1] * powl(2.0L, (npy_longdouble)(exp - 64)) +
(npy_longdouble)mantissa[2] * powl(2.0L, (npy_longdouble)(exp - 128)));
//Sometimes it overflows in weird ways
if (ld == (npy_longdouble)INFINITY || ld == (npy_longdouble)(-INFINITY) || ld == (npy_longdouble)NAN || ld == (npy_longdouble)(-NAN)) {
return _ldbl_ovfl_err();
}
} else if (exp >= NPY_LDBL_MAX_EXP) {
return _ldbl_ovfl_err();
} else {
ld = (npy_longdouble)sign * ((npy_longdouble)mantissa[0] * powl(2.0L, (npy_longdouble)(exp)) +
(npy_longdouble)mantissa[1] * powl(2.0L, (npy_longdouble)(exp - 64)) +
(npy_longdouble)mantissa[2] * powl(2.0L, (npy_longdouble)(exp - 128)));
}
return ld;
}
#endif

/* finally safe to decref now that we're done with `end` */
Py_DECREF(bytes);
return result;
// Helper functions that get the exponent and mantissa, this works on all platforms
void _get_num(PyObject* py_int, int64_t* val, int *exp, int *ovf) {
PyObject* shift = PyLong_FromLong(64);
while (*ovf != 0) {
*exp += 64;
val[2] = val[1]; //only needed for 128 bit platform
val[1] = (uint64_t)PyLong_AsUnsignedLongLongMask(py_int);
py_int = PyNumber_Rshift(py_int, shift);
val[0] = (uint64_t)PyLong_AsLongLongAndOverflow(py_int, ovf);
}
}

fail:
Py_DECREF(bytes);
return -1;
void _fix_py_num(PyObject* py_int, int64_t* val, int *exp, int *sign) {

int overflow;
val[0] = PyLong_AsLongLongAndOverflow(py_int, &overflow);
if (overflow == 1) {
*sign = NPY_LGDB_SIGN_POS;
_get_num(py_int, val, exp, &overflow);
} else if (overflow == -1) {
*sign = NPY_LGDB_SIGN_NEG;
py_int = PyNumber_Negative(py_int);
_get_num(py_int, val, exp, &overflow);
} else {
if (val[0] == 0) {*sign = 0;}
else if (val[0] < 0) { val[0] = -val[0]; *sign = NPY_LGDB_SIGN_NEG;}
else {*sign = NPY_LGDB_SIGN_POS;}
}
}
/*
The precision and max number is platform dependent, for 80 and 128 bit platforms
The largest number that can be converted is 2^16384 - 1.
In 64 bit platforms the largest number is 2^1024 - 1.
if the number to be converted is too big for a platform, it will give an error
In (my personal 80bit platform), I have tested that it converts up to the max
Now gives an overflow error if the number is too big, follows same rules as python's float()
*/
NPY_VISIBILITY_HIDDEN npy_longdouble
npy_longdouble_from_PyLong(PyObject *long_obj) {

npy_longdouble value;
if (PyFloat_Check(long_obj)) {
value = (npy_longdouble)PyFloat_AsDouble(long_obj);
} else if (PyLong_Check(long_obj)) {
int sign;
int E = 0;
int64_t val[3]; //needs to be size 3 in 128bit prescision
_fix_py_num(long_obj, val, &E, &sign);
value = _int_to_ld(val, E, sign);
} else {
PyErr_SetString(PyExc_TypeError, "Expected a number (int or float)");
return -1;
}

if (PyErr_Occurred()) {
return -1;
}
return value;
}
38 changes: 22 additions & 16 deletions 38 numpy/_core/tests/test_longdouble.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,25 +319,31 @@ def test_fromstring_foreign_value(self):
np.fromstring("1,234", dtype=np.longdouble, sep=" ")


# Maximum number that will pass the test in all platforms is:
# 2^1023 + 2^1022 + ... + 2^970
# largest number that can be stored in a double without loosing precision
# Now gives an overflow error if te number is too big
@pytest.mark.parametrize("sign", [1, -1])
@pytest.mark.parametrize("int_val", [
# cases discussed in gh-10723
# and gh-9968
2 ** 1024, 0])
def test_longdouble_from_int(int_val):
2 ** 1023, 0, 10, 3, 15653215315, 5511, 644861])
def test_longdouble_from_int(sign, int_val):
# for issue gh-9968
int_val *= sign
str_val = str(int_val)
# we'll expect a RuntimeWarning on platforms
# with np.longdouble equivalent to np.double
# for large integer input
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings('always', '', RuntimeWarning)
# can be inf==inf on some platforms
assert np.longdouble(int_val) == np.longdouble(str_val)
# we can't directly compare the int and
# max longdouble value on all platforms
if np.allclose(np.finfo(np.longdouble).max,
np.finfo(np.double).max) and w:
assert w[0].category is RuntimeWarning
assert np.longdouble(int_val) == np.longdouble(str_val)

@pytest.mark.parametrize("sign", [1, -1])
@pytest.mark.parametrize("int_val", [
1000, 1815361, 358165, 646153161, 1])
def test_longdouble_from_int_rounding(sign, int_val):
int_val *= sign
flt_val = float(int_val)
assert np.isclose(np.longdouble(int_val), flt_val)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a minor comment in passing--for testing, usage of assert_allclose() will typically give you better error messages when there is a failure (since it is purpose-built for testing of course).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion, will do


def test_longdouble_from_int_overflow():

with pytest.raises(OverflowError, match="Number too big to be represented as a np.longdouble --This is platform dependent--"):
a = np.longdouble(10**5000)

@pytest.mark.parametrize("bool_val", [
True, False])
Expand Down
42 changes: 9 additions & 33 deletions 42 numpy/_core/tests/test_nep50_promotions.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,39 +71,15 @@ def test_nep50_weak_integers_with_inexact(dtype):

too_big_int = int(np.finfo(dtype).max) * 2

if dtype in "dDG":
# These dtypes currently convert to Python float internally, which
# raises an OverflowError, while the other dtypes overflow to inf.
# NOTE: It may make sense to normalize the behavior!
with pytest.raises(OverflowError):
scalar_type(1) + too_big_int

with pytest.raises(OverflowError):
np.array(1, dtype=dtype) + too_big_int
else:
# NumPy uses (or used) `int -> string -> longdouble` for the
# conversion. But Python may refuse `str(int)` for huge ints.
# In that case, RuntimeWarning would be correct, but conversion
# fails earlier (seems to happen on 32bit linux, possibly only debug).
if dtype in "gG":
try:
str(too_big_int)
except ValueError:
pytest.skip("`huge_int -> string -> longdouble` failed")

# Otherwise, we overflow to infinity:
with pytest.warns(RuntimeWarning):
res = scalar_type(1) + too_big_int
assert res.dtype == dtype
assert res == np.inf

with pytest.warns(RuntimeWarning):
# We force the dtype here, since windows may otherwise pick the
# double instead of the longdouble loop. That leads to slightly
# different results (conversion of the int fails as above).
res = np.add(np.array(1, dtype=dtype), too_big_int, dtype=dtype)
assert res.dtype == dtype
assert res == np.inf
# Some dtypes currently convert to Python float internally, which
# raises an OverflowError, while the other dtypes overflow to inf.
# This catches it all, but doesn't differentiate
# NOTE: It may make sense to normalize the behavior!
with pytest.raises((OverflowError, RuntimeWarning)):
scalar_type(1) + too_big_int

with pytest.raises((OverflowError, RuntimeWarning)):
np.array(1, dtype=dtype) + too_big_int


@pytest.mark.parametrize("op", [operator.add, operator.pow])
Expand Down
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.