Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

BUG: fix datetime64/timedelta64 hash and match Python #14622

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 2 numpy/_core/include/numpy/ndarraytypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,7 @@ typedef struct {
npy_int32 month, day, hour, min, sec, us, ps, as;
} npy_datetimestruct;

/* This is not used internally. */
/* This structure contains an exploded view of a timedelta value */
typedef struct {
npy_int64 day;
npy_int32 sec, us, ps, as;
Expand Down
6 changes: 6 additions & 0 deletions 6 numpy/_core/src/multiarray/_datetime.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,4 +328,10 @@ find_object_datetime_type(PyObject *obj, int type_num);
NPY_NO_EXPORT int
PyArray_InitializeDatetimeCasts(void);

NPY_NO_EXPORT npy_hash_t
datetime_hash(PyArray_DatetimeMetaData *meta, npy_datetime dt);

NPY_NO_EXPORT npy_hash_t
timedelta_hash(PyArray_DatetimeMetaData *meta, npy_timedelta td);

#endif /* NUMPY_CORE_SRC_MULTIARRAY__DATETIME_H_ */
233 changes: 190 additions & 43 deletions 233 numpy/_core/src/multiarray/datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -2818,85 +2818,232 @@ convert_datetime_to_pyobject(npy_datetime dt, PyArray_DatetimeMetaData *meta)
}

/*
* Converts a timedelta into a PyObject *.
* We require that if d is a PyDateTime, then
* hash(numpy.datetime64(d)) == hash(d).
* Where possible, convert dt to a PyDateTime and hash it.
*
* Not-a-time is returned as the string "NaT".
* For microseconds or coarser, returns a datetime.timedelta.
* For units finer than microseconds, returns an integer.
* NOTE: "equals" across PyDate, PyDateTime and np.datetime64 is not transitive:
* datetime.datetime(1970, 1, 1) == np.datetime64(0, 'us')
* np.datetime64(0, 'us') == np.datetime64(0, 'D')
* datetime.datetime(1970, 1, 1) != np.datetime64(0, 'D') # date, not datetime!
*
* But:
* datetime.date(1970, 1, 1) == np.datetime64(0, 'D')
*
* For hash(datetime64(0, 'D')) we could return either PyDate.hash or PyDateTime.hash.
* We choose PyDateTime.hash to match datetime64(0, 'us')
*/
NPY_NO_EXPORT PyObject *
convert_timedelta_to_pyobject(npy_timedelta td, PyArray_DatetimeMetaData *meta)
NPY_NO_EXPORT npy_hash_t
datetime_hash(PyArray_DatetimeMetaData *meta, npy_datetime dt)
{
npy_timedelta value;
int days = 0, seconds = 0, useconds = 0;
PyObject *obj;
npy_hash_t res;
npy_datetimestruct dts;

/*
* Convert NaT (not-a-time) into None.
*/
if (td == NPY_DATETIME_NAT) {
Py_RETURN_NONE;
if (dt == NPY_DATETIME_NAT) {
return -1; /* should have been handled by caller */
}

/*
* If the type's precision is greater than microseconds, is
* Y/M/B (nonlinear units), or is generic units, return an int
*/
if (meta->base > NPY_FR_us ||
meta->base == NPY_FR_Y ||
meta->base == NPY_FR_M ||
meta->base == NPY_FR_GENERIC) {
return PyLong_FromLongLong(td);
if (meta->base == NPY_FR_GENERIC) {
obj = PyLong_FromLongLong(dt);
} else {
if (NpyDatetime_ConvertDatetime64ToDatetimeStruct(meta, dt, &dts) < 0) {
return -1;
}

if (dts.year < 1 || dts.year > 9999
|| dts.ps != 0 || dts.as != 0) {
/* NpyDatetime_ConvertDatetime64ToDatetimeStruct does memset,
* so this is safe from loose struct packing. */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing that out :).

obj = PyBytes_FromStringAndSize((const char *)&dts, sizeof(dts));
} else {
obj = PyDateTime_FromDateAndTime(dts.year, dts.month, dts.day,
dts.hour, dts.min, dts.sec, dts.us);
}
}

if (obj == NULL) {
return -1;
}

value = td;
res = PyObject_Hash(obj);

Py_DECREF(obj);

return res;
}

static int
convert_timedelta_to_timedeltastruct(PyArray_DatetimeMetaData *meta,
npy_timedelta td,
npy_timedeltastruct *out)
{
memset(out, 0, sizeof(npy_timedeltastruct));

/* Apply the unit multiplier (TODO: overflow treatment...) */
value *= meta->num;
td *= meta->num;

/* Convert to days/seconds/useconds */
switch (meta->base) {
case NPY_FR_W:
days = value * 7;
out->day = td * 7;
break;
case NPY_FR_D:
days = value;
out->day = td;
break;
case NPY_FR_h:
days = extract_unit_64(&value, 24ULL);
seconds = value*60*60;
out->day = extract_unit_64(&td, 24LL);
out->sec = (npy_int32)(td * 60*60);
break;
case NPY_FR_m:
days = extract_unit_64(&value, 60ULL*24);
seconds = value*60;
out->day = extract_unit_64(&td, 60LL*24);
out->sec = (npy_int32)(td * 60);
break;
case NPY_FR_s:
days = extract_unit_64(&value, 60ULL*60*24);
seconds = value;
out->day = extract_unit_64(&td, 60LL*60*24);
out->sec = (npy_int32)td;
break;
case NPY_FR_ms:
days = extract_unit_64(&value, 1000ULL*60*60*24);
seconds = extract_unit_64(&value, 1000ULL);
useconds = value*1000;
out->day = extract_unit_64(&td, 1000LL*60*60*24);
out->sec = (npy_int32)extract_unit_64(&td, 1000LL);
out->us = (npy_int32)(td * 1000LL);
break;
case NPY_FR_us:
days = extract_unit_64(&value, 1000ULL*1000*60*60*24);
seconds = extract_unit_64(&value, 1000ULL*1000);
useconds = value;
out->day = extract_unit_64(&td, 1000LL*1000*60*60*24);
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000);
out->us = (npy_int32)td;
break;
default:
// unreachable, handled by the `if` above
assert(NPY_FALSE);
case NPY_FR_ns:
out->day = extract_unit_64(&td, 1000LL*1000*1000*60*60*24);
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000);
out->us = (npy_int32)extract_unit_64(&td, 1000LL);
out->ps = (npy_int32)(td * 1000LL);
break;
case NPY_FR_ps:
out->day = extract_unit_64(&td, 1000LL*1000*1000*1000*60*60*24);
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000);
out->us = (npy_int32)extract_unit_64(&td, 1000LL*1000);
out->ps = (npy_int32)td;
break;
case NPY_FR_fs:
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000*1000);
out->us = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000);
out->ps = (npy_int32)extract_unit_64(&td, 1000LL);
out->as = (npy_int32)(td * 1000LL);
break;
case NPY_FR_as:
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000*1000*1000);
out->us = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000);
out->ps = (npy_int32)extract_unit_64(&td, 1000LL*1000);
out->as = (npy_int32)td;
break;
default:
PyErr_SetString(PyExc_RuntimeError,
"NumPy timedelta metadata is corrupted with invalid "
"base unit");
return -1;
}

return 0;
}

/*
* Converts a timedelta into a PyObject *.
*
* Not-a-time is returned as the string "NaT".
* For microseconds or coarser, returns a datetime.timedelta.
* For units finer than microseconds, returns an integer.
*/
NPY_NO_EXPORT PyObject *
convert_timedelta_to_pyobject(npy_timedelta td, PyArray_DatetimeMetaData *meta)
{
npy_timedeltastruct tds;

/*
* Convert NaT (not-a-time) into None.
*/
if (td == NPY_DATETIME_NAT) {
Py_RETURN_NONE;
}

/*
* If the type's precision is greater than microseconds, is
* Y/M/B (nonlinear units), or is generic units, return an int
*/
if (meta->base > NPY_FR_us ||
meta->base == NPY_FR_Y ||
meta->base == NPY_FR_M ||
meta->base == NPY_FR_GENERIC) {
return PyLong_FromLongLong(td);
}

if (convert_timedelta_to_timedeltastruct(meta, td, &tds) < 0) {
return NULL;
}

/*
* If it would overflow the datetime.timedelta days, return a raw int
*/
if (days < -999999999 || days > 999999999) {
if (tds.day < -999999999 || tds.day > 999999999) {
return PyLong_FromLongLong(td);
}
else {
return PyDelta_FromDSU(days, seconds, useconds);
return PyDelta_FromDSU(tds.day, tds.sec, tds.us);
}
}

/*
* We require that if d is a PyDelta, then
* hash(numpy.timedelta64(d)) == hash(d).
* Where possible, convert dt to a PyDelta and hash it.
*/
NPY_NO_EXPORT npy_hash_t
timedelta_hash(PyArray_DatetimeMetaData *meta, npy_timedelta td)
{
PyObject *obj;
npy_hash_t res;
npy_timedeltastruct tds;

if (td == NPY_DATETIME_NAT) {
return -1; /* should have been handled by caller */
}

if (meta->base == NPY_FR_GENERIC) {
/* generic compares equal to *every* other base, so no single hash works. */
PyErr_SetString(PyExc_ValueError, "Can't hash generic timedelta64");
return -1;
}

/* Y and M can be converted to each other but not to other units */

if (meta->base == NPY_FR_Y) {
obj = PyLong_FromLongLong(td * 12);
} else if (meta->base == NPY_FR_M) {
obj = PyLong_FromLongLong(td);
} else {
if (convert_timedelta_to_timedeltastruct(meta, td, &tds) < 0) {
return -1;
}

if (tds.day < -999999999 || tds.day > 999999999
|| tds.ps != 0 || tds.as != 0) {
/* convert_timedelta_to_timedeltastruct does memset,
* so this is safe from loose struct packing. */
obj = PyBytes_FromStringAndSize((const char *)&tds, sizeof(tds));
} else {
obj = PyDelta_FromDSU(tds.day, tds.sec, tds.us);
}
}

if (obj == NULL) {
return -1;
}

res = PyObject_Hash(obj);

Py_DECREF(obj);

return res;
}

/*
Expand Down
39 changes: 10 additions & 29 deletions 39 numpy/_core/src/multiarray/scalartypes.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -3904,45 +3904,26 @@ static inline npy_hash_t
* #lname = datetime, timedelta#
* #name = Datetime, Timedelta#
*/
#if NPY_SIZEOF_HASH_T==NPY_SIZEOF_DATETIME
static npy_hash_t
@lname@_arrtype_hash(PyObject *obj)
{
npy_hash_t x = (npy_hash_t)(PyArrayScalar_VAL(obj, @name@));
if (x == -1) {
x = -2;
}
return x;
}
#elif NPY_SIZEOF_LONGLONG==NPY_SIZEOF_DATETIME
static npy_hash_t
@lname@_arrtype_hash(PyObject *obj)
{
npy_hash_t y;
npy_longlong x = (PyArrayScalar_VAL(obj, @name@));
PyArray_DatetimeMetaData *meta;
PyArray_Descr *dtype;
npy_@lname@ val = PyArrayScalar_VAL(obj, @name@);

if ((x <= LONG_MAX)) {
y = (npy_hash_t) x;
if (val == NPY_DATETIME_NAT) {
/* Use identity, similar to NaN */
return PyBaseObject_Type.tp_hash(obj);
}
else {
union Mask {
long hashvals[2];
npy_longlong v;
} both;

both.v = x;
y = both.hashvals[0] + (1000003)*both.hashvals[1];
}
if (y == -1) {
y = -2;
}
return y;
dtype = PyArray_DescrFromScalar(obj);
meta = get_datetime_metadata_from_dtype(dtype);

return @lname@_hash(meta, val);
}
#endif
/**end repeat**/



/* Wrong thing to do for longdouble, but....*/

/**begin repeat
Expand Down
Loading
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.