Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit fa643d6

Browse filesBrowse files
committed
BUG: datetime64 hash.
#3836
1 parent 70fde29 commit fa643d6
Copy full SHA for fa643d6

File tree

Expand file treeCollapse file tree

6 files changed

+284
-77
lines changed
Filter options
Expand file treeCollapse file tree

6 files changed

+284
-77
lines changed

‎numpy/_core/include/numpy/ndarraytypes.h

Copy file name to clipboardExpand all lines: numpy/_core/include/numpy/ndarraytypes.h
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ typedef struct {
841841
npy_int32 month, day, hour, min, sec, us, ps, as;
842842
} npy_datetimestruct;
843843

844-
/* This is not used internally. */
844+
/* This structure contains an exploded view of a timedelta value */
845845
typedef struct {
846846
npy_int64 day;
847847
npy_int32 sec, us, ps, as;

‎numpy/_core/src/multiarray/_datetime.h

Copy file name to clipboardExpand all lines: numpy/_core/src/multiarray/_datetime.h
+6Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,4 +328,10 @@ find_object_datetime_type(PyObject *obj, int type_num);
328328
NPY_NO_EXPORT int
329329
PyArray_InitializeDatetimeCasts(void);
330330

331+
NPY_NO_EXPORT npy_hash_t
332+
datetime_hash(PyArray_DatetimeMetaData *meta, npy_datetime dt);
333+
334+
NPY_NO_EXPORT npy_hash_t
335+
timedelta_hash(PyArray_DatetimeMetaData *meta, npy_timedelta td);
336+
331337
#endif /* NUMPY_CORE_SRC_MULTIARRAY__DATETIME_H_ */

‎numpy/_core/src/multiarray/datetime.c

Copy file name to clipboardExpand all lines: numpy/_core/src/multiarray/datetime.c
+186-43Lines changed: 186 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2818,85 +2818,228 @@ convert_datetime_to_pyobject(npy_datetime dt, PyArray_DatetimeMetaData *meta)
28182818
}
28192819

28202820
/*
2821-
* Converts a timedelta into a PyObject *.
2821+
* We require that if d is a PyDateTime, then
2822+
* hash(numpy.datetime64(d)) == hash(d).
2823+
* Where possible, convert dt to a PyDateTime and hash it.
28222824
*
2823-
* Not-a-time is returned as the string "NaT".
2824-
* For microseconds or coarser, returns a datetime.timedelta.
2825-
* For units finer than microseconds, returns an integer.
2825+
* NOTE: "equals" across PyDate, PyDateTime and np.datetime64 is not transitive:
2826+
* datetime.datetime(1970, 1, 1) == np.datetime64(0, 'us')
2827+
* np.datetime64(0, 'us') == np.datetime64(0, 'D')
2828+
* datetime.datetime(1970, 1, 1) != np.datetime64(0, 'D') # date, not datetime!
2829+
*
2830+
* But:
2831+
* datetime.date(1970, 1, 1) == np.datetime64(0, 'D')
2832+
*
2833+
* For hash(datetime64(0, 'D')) we could return either PyDate.hash or PyDateTime.hash.
2834+
* We choose PyDateTime.hash to match datetime64(0, 'us')
28262835
*/
2827-
NPY_NO_EXPORT PyObject *
2828-
convert_timedelta_to_pyobject(npy_timedelta td, PyArray_DatetimeMetaData *meta)
2836+
NPY_NO_EXPORT npy_hash_t
2837+
datetime_hash(PyArray_DatetimeMetaData *meta, npy_datetime dt)
28292838
{
2830-
npy_timedelta value;
2831-
int days = 0, seconds = 0, useconds = 0;
2839+
PyObject *obj;
2840+
npy_hash_t res;
2841+
npy_datetimestruct dts;
28322842

2833-
/*
2834-
* Convert NaT (not-a-time) into None.
2835-
*/
2836-
if (td == NPY_DATETIME_NAT) {
2837-
Py_RETURN_NONE;
2843+
if (dt == NPY_DATETIME_NAT || meta->base == NPY_FR_GENERIC) {
2844+
return -2;
28382845
}
28392846

2840-
/*
2841-
* If the type's precision is greater than microseconds, is
2842-
* Y/M/B (nonlinear units), or is generic units, return an int
2843-
*/
2844-
if (meta->base > NPY_FR_us ||
2845-
meta->base == NPY_FR_Y ||
2846-
meta->base == NPY_FR_M ||
2847-
meta->base == NPY_FR_GENERIC) {
2848-
return PyLong_FromLongLong(td);
2847+
if (NpyDatetime_ConvertDatetime64ToDatetimeStruct(meta, dt, &dts) < 0) {
2848+
return -1;
2849+
}
2850+
2851+
if (dts.year < 1 || dts.year > 9999
2852+
|| dts.ps != 0 || dts.as != 0) {
2853+
/* NpyDatetime_ConvertDatetime64ToDatetimeStruct does memset,
2854+
* so this is safe from loose struct packing. */
2855+
obj = PyBytes_FromStringAndSize((const char *)&dts, sizeof(dts));
2856+
} else {
2857+
obj = PyDateTime_FromDateAndTime(dts.year, dts.month, dts.day,
2858+
dts.hour, dts.min, dts.sec, dts.us);
28492859
}
28502860

2851-
value = td;
2861+
if (obj == NULL) {
2862+
return -1;
2863+
}
2864+
2865+
res = PyObject_Hash(obj);
2866+
2867+
Py_DECREF(obj);
2868+
2869+
return res;
2870+
}
2871+
2872+
static int
2873+
convert_timedelta_to_timedeltastruct(PyArray_DatetimeMetaData *meta,
2874+
npy_timedelta td,
2875+
npy_timedeltastruct *out)
2876+
{
2877+
memset(out, 0, sizeof(npy_timedeltastruct));
28522878

28532879
/* Apply the unit multiplier (TODO: overflow treatment...) */
2854-
value *= meta->num;
2880+
td *= meta->num;
28552881

28562882
/* Convert to days/seconds/useconds */
28572883
switch (meta->base) {
28582884
case NPY_FR_W:
2859-
days = value * 7;
2885+
out->day = td * 7;
28602886
break;
28612887
case NPY_FR_D:
2862-
days = value;
2888+
out->day = td;
28632889
break;
28642890
case NPY_FR_h:
2865-
days = extract_unit_64(&value, 24ULL);
2866-
seconds = value*60*60;
2891+
out->day = extract_unit_64(&td, 24LL);
2892+
out->sec = (npy_int32)(td * 60*60);
28672893
break;
28682894
case NPY_FR_m:
2869-
days = extract_unit_64(&value, 60ULL*24);
2870-
seconds = value*60;
2895+
out->day = extract_unit_64(&td, 60LL*24);
2896+
out->sec = (npy_int32)(td * 60);
28712897
break;
28722898
case NPY_FR_s:
2873-
days = extract_unit_64(&value, 60ULL*60*24);
2874-
seconds = value;
2899+
out->day = extract_unit_64(&td, 60LL*60*24);
2900+
out->sec = (npy_int32)td;
28752901
break;
28762902
case NPY_FR_ms:
2877-
days = extract_unit_64(&value, 1000ULL*60*60*24);
2878-
seconds = extract_unit_64(&value, 1000ULL);
2879-
useconds = value*1000;
2903+
out->day = extract_unit_64(&td, 1000LL*60*60*24);
2904+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL);
2905+
out->us = (npy_int32)(td * 1000LL);
28802906
break;
28812907
case NPY_FR_us:
2882-
days = extract_unit_64(&value, 1000ULL*1000*60*60*24);
2883-
seconds = extract_unit_64(&value, 1000ULL*1000);
2884-
useconds = value;
2908+
out->day = extract_unit_64(&td, 1000LL*1000*60*60*24);
2909+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000);
2910+
out->us = (npy_int32)td;
28852911
break;
2886-
default:
2887-
// unreachable, handled by the `if` above
2888-
assert(NPY_FALSE);
2912+
case NPY_FR_ns:
2913+
out->day = extract_unit_64(&td, 1000LL*1000*1000*60*60*24);
2914+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000);
2915+
out->us = (npy_int32)extract_unit_64(&td, 1000LL);
2916+
out->ps = (npy_int32)(td * 1000LL);
2917+
break;
2918+
case NPY_FR_ps:
2919+
out->day = extract_unit_64(&td, 1000LL*1000*1000*1000*60*60*24);
2920+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000);
2921+
out->us = (npy_int32)extract_unit_64(&td, 1000LL*1000);
2922+
out->ps = (npy_int32)td;
28892923
break;
2924+
case NPY_FR_fs:
2925+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000*1000);
2926+
out->us = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000);
2927+
out->ps = (npy_int32)extract_unit_64(&td, 1000LL);
2928+
out->as = (npy_int32)(td * 1000LL);
2929+
break;
2930+
case NPY_FR_as:
2931+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000*1000*1000);
2932+
out->us = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000);
2933+
out->ps = (npy_int32)extract_unit_64(&td, 1000LL*1000);
2934+
out->as = (npy_int32)td;
2935+
break;
2936+
default:
2937+
PyErr_SetString(PyExc_RuntimeError,
2938+
"NumPy timedelta metadata is corrupted with invalid "
2939+
"base unit");
2940+
return -1;
28902941
}
2942+
2943+
return 0;
2944+
}
2945+
2946+
/*
2947+
* Converts a timedelta into a PyObject *.
2948+
*
2949+
* Not-a-time is returned as the string "NaT".
2950+
* For microseconds or coarser, returns a datetime.timedelta.
2951+
* For units finer than microseconds, returns an integer.
2952+
*/
2953+
NPY_NO_EXPORT PyObject *
2954+
convert_timedelta_to_pyobject(npy_timedelta td, PyArray_DatetimeMetaData *meta)
2955+
{
2956+
npy_timedeltastruct tds;
2957+
2958+
/*
2959+
* Convert NaT (not-a-time) into None.
2960+
*/
2961+
if (td == NPY_DATETIME_NAT) {
2962+
Py_RETURN_NONE;
2963+
}
2964+
2965+
/*
2966+
* If the type's precision is greater than microseconds, is
2967+
* Y/M/B (nonlinear units), or is generic units, return an int
2968+
*/
2969+
if (meta->base > NPY_FR_us ||
2970+
meta->base == NPY_FR_Y ||
2971+
meta->base == NPY_FR_M ||
2972+
meta->base == NPY_FR_GENERIC) {
2973+
return PyLong_FromLongLong(td);
2974+
}
2975+
2976+
if (convert_timedelta_to_timedeltastruct(meta, td, &tds) < 0) {
2977+
return NULL;
2978+
}
2979+
28912980
/*
28922981
* If it would overflow the datetime.timedelta days, return a raw int
28932982
*/
2894-
if (days < -999999999 || days > 999999999) {
2983+
if (tds.day < -999999999 || tds.day > 999999999) {
28952984
return PyLong_FromLongLong(td);
28962985
}
28972986
else {
2898-
return PyDelta_FromDSU(days, seconds, useconds);
2987+
return PyDelta_FromDSU(tds.day, tds.sec, tds.us);
2988+
}
2989+
}
2990+
2991+
/*
2992+
* We require that if d is a PyDelta, then
2993+
* hash(numpy.timedelta64(d)) == hash(d).
2994+
* Where possible, convert dt to a PyDelta and hash it.
2995+
*/
2996+
NPY_NO_EXPORT npy_hash_t
2997+
timedelta_hash(PyArray_DatetimeMetaData *meta, npy_timedelta td)
2998+
{
2999+
PyObject *obj;
3000+
npy_hash_t res;
3001+
npy_timedeltastruct tds;
3002+
3003+
if (td == NPY_DATETIME_NAT) {
3004+
return -2;
28993005
}
3006+
3007+
if (meta->base == NPY_FR_GENERIC) {
3008+
/* generic compares equal to *every* other base, so no single hash works. */
3009+
PyErr_SetString(PyExc_ValueError, "Can't hash generic timedelta64");
3010+
return -1;
3011+
}
3012+
3013+
/* Y and M can be converted to each other but not to other units */
3014+
3015+
if (meta->base == NPY_FR_Y) {
3016+
obj = PyLong_FromLong(td * 12);
3017+
} else if (meta->base == NPY_FR_M) {
3018+
obj = PyLong_FromLong(td);
3019+
} else {
3020+
if (convert_timedelta_to_timedeltastruct(meta, td, &tds) < 0) {
3021+
return -1;
3022+
}
3023+
3024+
if (tds.day < -999999999 || tds.day > 999999999
3025+
|| tds.ps != 0 || tds.as != 0) {
3026+
/* convert_timedelta_to_timedeltastruct does memset,
3027+
* so this is safe from loose struct packing. */
3028+
obj = PyBytes_FromStringAndSize((const char *)&tds, sizeof(tds));
3029+
} else {
3030+
obj = PyDelta_FromDSU(tds.day, tds.sec, tds.us);
3031+
}
3032+
}
3033+
3034+
if (obj == NULL) {
3035+
return -1;
3036+
}
3037+
3038+
res = PyObject_Hash(obj);
3039+
3040+
Py_DECREF(obj);
3041+
3042+
return res;
29003043
}
29013044

29023045
/*

‎numpy/_core/src/multiarray/scalartypes.c.src

Copy file name to clipboardExpand all lines: numpy/_core/src/multiarray/scalartypes.c.src
+3-32Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3904,45 +3904,16 @@ static inline npy_hash_t
39043904
* #lname = datetime, timedelta#
39053905
* #name = Datetime, Timedelta#
39063906
*/
3907-
#if NPY_SIZEOF_HASH_T==NPY_SIZEOF_DATETIME
39083907
static npy_hash_t
39093908
@lname@_arrtype_hash(PyObject *obj)
39103909
{
3911-
npy_hash_t x = (npy_hash_t)(PyArrayScalar_VAL(obj, @name@));
3912-
if (x == -1) {
3913-
x = -2;
3914-
}
3915-
return x;
3910+
PyArray_Descr *dtype = PyArray_DescrFromScalar(obj);
3911+
PyArray_DatetimeMetaData *meta = get_datetime_metadata_from_dtype(dtype);
3912+
return @lname@_hash(meta, PyArrayScalar_VAL(obj, @name@));
39163913
}
3917-
#elif NPY_SIZEOF_LONGLONG==NPY_SIZEOF_DATETIME
3918-
static npy_hash_t
3919-
@lname@_arrtype_hash(PyObject *obj)
3920-
{
3921-
npy_hash_t y;
3922-
npy_longlong x = (PyArrayScalar_VAL(obj, @name@));
3923-
3924-
if ((x <= LONG_MAX)) {
3925-
y = (npy_hash_t) x;
3926-
}
3927-
else {
3928-
union Mask {
3929-
long hashvals[2];
3930-
npy_longlong v;
3931-
} both;
3932-
3933-
both.v = x;
3934-
y = both.hashvals[0] + (1000003)*both.hashvals[1];
3935-
}
3936-
if (y == -1) {
3937-
y = -2;
3938-
}
3939-
return y;
3940-
}
3941-
#endif
39423914
/**end repeat**/
39433915

39443916

3945-
39463917
/* Wrong thing to do for longdouble, but....*/
39473918

39483919
/**begin repeat

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.