Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 6f4ec86

Browse filesBrowse files
committed
BUG: datetime64 hash.
#3836
1 parent a83bc02 commit 6f4ec86
Copy full SHA for 6f4ec86

File tree

6 files changed

+291
-74
lines changed
Filter options

6 files changed

+291
-74
lines changed

‎numpy/_core/include/numpy/ndarraytypes.h

Copy file name to clipboardExpand all lines: numpy/_core/include/numpy/ndarraytypes.h
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ typedef struct {
841841
npy_int32 month, day, hour, min, sec, us, ps, as;
842842
} npy_datetimestruct;
843843

844-
/* This is not used internally. */
844+
/* This structure contains an exploded view of a timedelta value */
845845
typedef struct {
846846
npy_int64 day;
847847
npy_int32 sec, us, ps, as;

‎numpy/_core/src/multiarray/_datetime.h

Copy file name to clipboardExpand all lines: numpy/_core/src/multiarray/_datetime.h
+6Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,4 +328,10 @@ find_object_datetime_type(PyObject *obj, int type_num);
328328
NPY_NO_EXPORT int
329329
PyArray_InitializeDatetimeCasts(void);
330330

331+
NPY_NO_EXPORT npy_hash_t
332+
datetime_hash(PyArray_DatetimeMetaData *meta, npy_datetime dt);
333+
334+
NPY_NO_EXPORT npy_hash_t
335+
timedelta_hash(PyArray_DatetimeMetaData *meta, npy_timedelta td);
336+
331337
#endif /* NUMPY_CORE_SRC_MULTIARRAY__DATETIME_H_ */

‎numpy/_core/src/multiarray/datetime.c

Copy file name to clipboardExpand all lines: numpy/_core/src/multiarray/datetime.c
+190-43Lines changed: 190 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2818,85 +2818,232 @@ convert_datetime_to_pyobject(npy_datetime dt, PyArray_DatetimeMetaData *meta)
28182818
}
28192819

28202820
/*
2821-
* Converts a timedelta into a PyObject *.
2821+
* We require that if d is a PyDateTime, then
2822+
* hash(numpy.datetime64(d)) == hash(d).
2823+
* Where possible, convert dt to a PyDateTime and hash it.
28222824
*
2823-
* Not-a-time is returned as the string "NaT".
2824-
* For microseconds or coarser, returns a datetime.timedelta.
2825-
* For units finer than microseconds, returns an integer.
2825+
* NOTE: "equals" across PyDate, PyDateTime and np.datetime64 is not transitive:
2826+
* datetime.datetime(1970, 1, 1) == np.datetime64(0, 'us')
2827+
* np.datetime64(0, 'us') == np.datetime64(0, 'D')
2828+
* datetime.datetime(1970, 1, 1) != np.datetime64(0, 'D') # date, not datetime!
2829+
*
2830+
* But:
2831+
* datetime.date(1970, 1, 1) == np.datetime64(0, 'D')
2832+
*
2833+
* For hash(datetime64(0, 'D')) we could return either PyDate.hash or PyDateTime.hash.
2834+
* We choose PyDateTime.hash to match datetime64(0, 'us')
28262835
*/
2827-
NPY_NO_EXPORT PyObject *
2828-
convert_timedelta_to_pyobject(npy_timedelta td, PyArray_DatetimeMetaData *meta)
2836+
NPY_NO_EXPORT npy_hash_t
2837+
datetime_hash(PyArray_DatetimeMetaData *meta, npy_datetime dt)
28292838
{
2830-
npy_timedelta value;
2831-
int days = 0, seconds = 0, useconds = 0;
2839+
PyObject *obj;
2840+
npy_hash_t res;
2841+
npy_datetimestruct dts;
28322842

2833-
/*
2834-
* Convert NaT (not-a-time) into None.
2835-
*/
2836-
if (td == NPY_DATETIME_NAT) {
2837-
Py_RETURN_NONE;
2843+
if (dt == NPY_DATETIME_NAT) {
2844+
return -1; /* should have been handled by caller */
28382845
}
28392846

2840-
/*
2841-
* If the type's precision is greater than microseconds, is
2842-
* Y/M/B (nonlinear units), or is generic units, return an int
2843-
*/
2844-
if (meta->base > NPY_FR_us ||
2845-
meta->base == NPY_FR_Y ||
2846-
meta->base == NPY_FR_M ||
2847-
meta->base == NPY_FR_GENERIC) {
2848-
return PyLong_FromLongLong(td);
2847+
if (meta->base == NPY_FR_GENERIC) {
2848+
obj = PyLong_FromLongLong(dt);
2849+
} else {
2850+
if (NpyDatetime_ConvertDatetime64ToDatetimeStruct(meta, dt, &dts) < 0) {
2851+
return -1;
2852+
}
2853+
2854+
if (dts.year < 1 || dts.year > 9999
2855+
|| dts.ps != 0 || dts.as != 0) {
2856+
/* NpyDatetime_ConvertDatetime64ToDatetimeStruct does memset,
2857+
* so this is safe from loose struct packing. */
2858+
obj = PyBytes_FromStringAndSize((const char *)&dts, sizeof(dts));
2859+
} else {
2860+
obj = PyDateTime_FromDateAndTime(dts.year, dts.month, dts.day,
2861+
dts.hour, dts.min, dts.sec, dts.us);
2862+
}
2863+
}
2864+
2865+
if (obj == NULL) {
2866+
return -1;
28492867
}
28502868

2851-
value = td;
2869+
res = PyObject_Hash(obj);
2870+
2871+
Py_DECREF(obj);
2872+
2873+
return res;
2874+
}
2875+
2876+
static int
2877+
convert_timedelta_to_timedeltastruct(PyArray_DatetimeMetaData *meta,
2878+
npy_timedelta td,
2879+
npy_timedeltastruct *out)
2880+
{
2881+
memset(out, 0, sizeof(npy_timedeltastruct));
28522882

28532883
/* Apply the unit multiplier (TODO: overflow treatment...) */
2854-
value *= meta->num;
2884+
td *= meta->num;
28552885

28562886
/* Convert to days/seconds/useconds */
28572887
switch (meta->base) {
28582888
case NPY_FR_W:
2859-
days = value * 7;
2889+
out->day = td * 7;
28602890
break;
28612891
case NPY_FR_D:
2862-
days = value;
2892+
out->day = td;
28632893
break;
28642894
case NPY_FR_h:
2865-
days = extract_unit_64(&value, 24ULL);
2866-
seconds = value*60*60;
2895+
out->day = extract_unit_64(&td, 24LL);
2896+
out->sec = (npy_int32)(td * 60*60);
28672897
break;
28682898
case NPY_FR_m:
2869-
days = extract_unit_64(&value, 60ULL*24);
2870-
seconds = value*60;
2899+
out->day = extract_unit_64(&td, 60LL*24);
2900+
out->sec = (npy_int32)(td * 60);
28712901
break;
28722902
case NPY_FR_s:
2873-
days = extract_unit_64(&value, 60ULL*60*24);
2874-
seconds = value;
2903+
out->day = extract_unit_64(&td, 60LL*60*24);
2904+
out->sec = (npy_int32)td;
28752905
break;
28762906
case NPY_FR_ms:
2877-
days = extract_unit_64(&value, 1000ULL*60*60*24);
2878-
seconds = extract_unit_64(&value, 1000ULL);
2879-
useconds = value*1000;
2907+
out->day = extract_unit_64(&td, 1000LL*60*60*24);
2908+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL);
2909+
out->us = (npy_int32)(td * 1000LL);
28802910
break;
28812911
case NPY_FR_us:
2882-
days = extract_unit_64(&value, 1000ULL*1000*60*60*24);
2883-
seconds = extract_unit_64(&value, 1000ULL*1000);
2884-
useconds = value;
2912+
out->day = extract_unit_64(&td, 1000LL*1000*60*60*24);
2913+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000);
2914+
out->us = (npy_int32)td;
28852915
break;
2886-
default:
2887-
// unreachable, handled by the `if` above
2888-
assert(NPY_FALSE);
2916+
case NPY_FR_ns:
2917+
out->day = extract_unit_64(&td, 1000LL*1000*1000*60*60*24);
2918+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000);
2919+
out->us = (npy_int32)extract_unit_64(&td, 1000LL);
2920+
out->ps = (npy_int32)(td * 1000LL);
2921+
break;
2922+
case NPY_FR_ps:
2923+
out->day = extract_unit_64(&td, 1000LL*1000*1000*1000*60*60*24);
2924+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000);
2925+
out->us = (npy_int32)extract_unit_64(&td, 1000LL*1000);
2926+
out->ps = (npy_int32)td;
2927+
break;
2928+
case NPY_FR_fs:
2929+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000*1000);
2930+
out->us = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000);
2931+
out->ps = (npy_int32)extract_unit_64(&td, 1000LL);
2932+
out->as = (npy_int32)(td * 1000LL);
28892933
break;
2934+
case NPY_FR_as:
2935+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000*1000*1000);
2936+
out->us = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000);
2937+
out->ps = (npy_int32)extract_unit_64(&td, 1000LL*1000);
2938+
out->as = (npy_int32)td;
2939+
break;
2940+
default:
2941+
PyErr_SetString(PyExc_RuntimeError,
2942+
"NumPy timedelta metadata is corrupted with invalid "
2943+
"base unit");
2944+
return -1;
2945+
}
2946+
2947+
return 0;
2948+
}
2949+
2950+
/*
2951+
* Converts a timedelta into a PyObject *.
2952+
*
2953+
* Not-a-time is returned as the string "NaT".
2954+
* For microseconds or coarser, returns a datetime.timedelta.
2955+
* For units finer than microseconds, returns an integer.
2956+
*/
2957+
NPY_NO_EXPORT PyObject *
2958+
convert_timedelta_to_pyobject(npy_timedelta td, PyArray_DatetimeMetaData *meta)
2959+
{
2960+
npy_timedeltastruct tds;
2961+
2962+
/*
2963+
* Convert NaT (not-a-time) into None.
2964+
*/
2965+
if (td == NPY_DATETIME_NAT) {
2966+
Py_RETURN_NONE;
2967+
}
2968+
2969+
/*
2970+
* If the type's precision is greater than microseconds, is
2971+
* Y/M/B (nonlinear units), or is generic units, return an int
2972+
*/
2973+
if (meta->base > NPY_FR_us ||
2974+
meta->base == NPY_FR_Y ||
2975+
meta->base == NPY_FR_M ||
2976+
meta->base == NPY_FR_GENERIC) {
2977+
return PyLong_FromLongLong(td);
2978+
}
2979+
2980+
if (convert_timedelta_to_timedeltastruct(meta, td, &tds) < 0) {
2981+
return NULL;
28902982
}
2983+
28912984
/*
28922985
* If it would overflow the datetime.timedelta days, return a raw int
28932986
*/
2894-
if (days < -999999999 || days > 999999999) {
2987+
if (tds.day < -999999999 || tds.day > 999999999) {
28952988
return PyLong_FromLongLong(td);
28962989
}
28972990
else {
2898-
return PyDelta_FromDSU(days, seconds, useconds);
2991+
return PyDelta_FromDSU(tds.day, tds.sec, tds.us);
2992+
}
2993+
}
2994+
2995+
/*
2996+
* We require that if d is a PyDelta, then
2997+
* hash(numpy.timedelta64(d)) == hash(d).
2998+
* Where possible, convert dt to a PyDelta and hash it.
2999+
*/
3000+
NPY_NO_EXPORT npy_hash_t
3001+
timedelta_hash(PyArray_DatetimeMetaData *meta, npy_timedelta td)
3002+
{
3003+
PyObject *obj;
3004+
npy_hash_t res;
3005+
npy_timedeltastruct tds;
3006+
3007+
if (td == NPY_DATETIME_NAT) {
3008+
return -1; /* should have been handled by caller */
3009+
}
3010+
3011+
if (meta->base == NPY_FR_GENERIC) {
3012+
/* generic compares equal to *every* other base, so no single hash works. */
3013+
PyErr_SetString(PyExc_ValueError, "Can't hash generic timedelta64");
3014+
return -1;
28993015
}
3016+
3017+
/* Y and M can be converted to each other but not to other units */
3018+
3019+
if (meta->base == NPY_FR_Y) {
3020+
obj = PyLong_FromLongLong(td * 12);
3021+
} else if (meta->base == NPY_FR_M) {
3022+
obj = PyLong_FromLongLong(td);
3023+
} else {
3024+
if (convert_timedelta_to_timedeltastruct(meta, td, &tds) < 0) {
3025+
return -1;
3026+
}
3027+
3028+
if (tds.day < -999999999 || tds.day > 999999999
3029+
|| tds.ps != 0 || tds.as != 0) {
3030+
/* convert_timedelta_to_timedeltastruct does memset,
3031+
* so this is safe from loose struct packing. */
3032+
obj = PyBytes_FromStringAndSize((const char *)&tds, sizeof(tds));
3033+
} else {
3034+
obj = PyDelta_FromDSU(tds.day, tds.sec, tds.us);
3035+
}
3036+
}
3037+
3038+
if (obj == NULL) {
3039+
return -1;
3040+
}
3041+
3042+
res = PyObject_Hash(obj);
3043+
3044+
Py_DECREF(obj);
3045+
3046+
return res;
29003047
}
29013048

29023049
/*

‎numpy/_core/src/multiarray/scalartypes.c.src

Copy file name to clipboardExpand all lines: numpy/_core/src/multiarray/scalartypes.c.src
+10-29Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3904,45 +3904,26 @@ static inline npy_hash_t
39043904
* #lname = datetime, timedelta#
39053905
* #name = Datetime, Timedelta#
39063906
*/
3907-
#if NPY_SIZEOF_HASH_T==NPY_SIZEOF_DATETIME
39083907
static npy_hash_t
39093908
@lname@_arrtype_hash(PyObject *obj)
39103909
{
3911-
npy_hash_t x = (npy_hash_t)(PyArrayScalar_VAL(obj, @name@));
3912-
if (x == -1) {
3913-
x = -2;
3914-
}
3915-
return x;
3916-
}
3917-
#elif NPY_SIZEOF_LONGLONG==NPY_SIZEOF_DATETIME
3918-
static npy_hash_t
3919-
@lname@_arrtype_hash(PyObject *obj)
3920-
{
3921-
npy_hash_t y;
3922-
npy_longlong x = (PyArrayScalar_VAL(obj, @name@));
3910+
PyArray_DatetimeMetaData *meta;
3911+
PyArray_Descr *dtype;
3912+
npy_@lname@ val = PyArrayScalar_VAL(obj, @name@);
39233913

3924-
if ((x <= LONG_MAX)) {
3925-
y = (npy_hash_t) x;
3914+
if (val == NPY_DATETIME_NAT) {
3915+
/* Use identity, similar to NaN */
3916+
return PyBaseObject_Type.tp_hash(obj);
39263917
}
3927-
else {
3928-
union Mask {
3929-
long hashvals[2];
3930-
npy_longlong v;
3931-
} both;
39323918

3933-
both.v = x;
3934-
y = both.hashvals[0] + (1000003)*both.hashvals[1];
3935-
}
3936-
if (y == -1) {
3937-
y = -2;
3938-
}
3939-
return y;
3919+
dtype = PyArray_DescrFromScalar(obj);
3920+
meta = get_datetime_metadata_from_dtype(dtype);
3921+
3922+
return @lname@_hash(meta, val);
39403923
}
3941-
#endif
39423924
/**end repeat**/
39433925

39443926

3945-
39463927
/* Wrong thing to do for longdouble, but....*/
39473928

39483929
/**begin repeat

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.