diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h index c8495db8e58f..d5f329b66754 100644 --- a/numpy/core/include/numpy/npy_common.h +++ b/numpy/core/include/numpy/npy_common.h @@ -10,16 +10,6 @@ #include #endif -// compile time environment variables -#ifndef NPY_RELAXED_STRIDES_CHECKING - #define NPY_RELAXED_STRIDES_CHECKING 0 -#endif -#ifndef NPY_RELAXED_STRIDES_DEBUG - #define NPY_RELAXED_STRIDES_DEBUG 0 -#endif -#ifndef NPY_USE_NEW_CASTINGIMPL - #define NPY_USE_NEW_CASTINGIMPL 0 -#endif /* * using static inline modifiers when defining npy_math functions * allows the compiler to make optimizations when possible diff --git a/numpy/core/setup.py b/numpy/core/setup.py index dfb26c9c1108..822f9f580d44 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -468,14 +468,20 @@ def generate_config_h(ext, build_dir): # Use relaxed stride checking if NPY_RELAXED_STRIDES_CHECKING: moredefs.append(('NPY_RELAXED_STRIDES_CHECKING', 1)) + else: + moredefs.append(('NPY_RELAXED_STRIDES_CHECKING', 0)) # Use bogus stride debug aid when relaxed strides are enabled if NPY_RELAXED_STRIDES_DEBUG: moredefs.append(('NPY_RELAXED_STRIDES_DEBUG', 1)) + else: + moredefs.append(('NPY_RELAXED_STRIDES_DEBUG', 0)) # Use the new experimental casting implementation in NumPy 1.20: if NPY_USE_NEW_CASTINGIMPL: moredefs.append(('NPY_USE_NEW_CASTINGIMPL', 1)) + else: + moredefs.append(('NPY_USE_NEW_CASTINGIMPL', 0)) # Get long double representation rep = check_long_double_representation(config_cmd) @@ -788,6 +794,7 @@ def get_mathlib_info(*args): join('src', 'multiarray', 'ctors.h'), join('src', 'multiarray', 'descriptor.h'), join('src', 'multiarray', 'dtypemeta.h'), + join('src', 'multiarray', 'dtype_transfer.h'), join('src', 'multiarray', 'dragon4.h'), join('src', 'multiarray', 'einsum_debug.h'), join('src', 'multiarray', 'einsum_sumprod.h'), diff --git a/numpy/core/src/common/lowlevel_strided_loops.h b/numpy/core/src/common/lowlevel_strided_loops.h index 12aa61822a14..bda9bb5e4ed0 100644 --- a/numpy/core/src/common/lowlevel_strided_loops.h +++ b/numpy/core/src/common/lowlevel_strided_loops.h @@ -196,6 +196,88 @@ PyArray_GetDTypeTransferFunction(int aligned, NpyAuxData **out_transferdata, int *out_needs_api); + +/* Same as above, but only wraps copyswapn or legacy cast functions */ +NPY_NO_EXPORT int +PyArray_GetLegacyDTypeTransferFunction(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedUnaryOp **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api, int wrap_if_unaligned); + +/* Specialized dtype transfer functions */ +NPY_NO_EXPORT int +get_nbo_cast_datetime_transfer_function(int aligned, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedUnaryOp **out_stransfer, + NpyAuxData **out_transferdata); + +NPY_NO_EXPORT int +get_nbo_datetime_to_string_transfer_function( + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata); + +NPY_NO_EXPORT int +get_datetime_to_unicode_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedUnaryOp **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api); + +NPY_NO_EXPORT int +get_nbo_string_to_datetime_transfer_function( + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata); + +NPY_NO_EXPORT int +get_unicode_to_datetime_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedUnaryOp **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api); + +NPY_NO_EXPORT int +get_fields_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedUnaryOp **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api); + +NPY_NO_EXPORT int +get_subarray_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedUnaryOp **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api); + +NPY_NO_EXPORT int +_strided_to_strided_move_references(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + NpyAuxData *data); + +NPY_NO_EXPORT int +_strided_to_strided_copy_references(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + NpyAuxData *data); + +NPY_NO_EXPORT int +wrap_aligned_contig_transfer_function_with_copyswapn( + int aligned, npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata, + int *out_needs_api, + PyArray_StridedUnaryOp *caststransfer, NpyAuxData *castdata); + /* * This is identical to PyArray_GetDTypeTransferFunction, but returns a * transfer function which also takes a mask as a parameter. The mask is used diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c index cae452454811..38284fac20cb 100644 --- a/numpy/core/src/multiarray/array_method.c +++ b/numpy/core/src/multiarray/array_method.c @@ -121,6 +121,19 @@ default_resolve_descriptors( } +NPY_INLINE static int +is_contiguous( + npy_intp const *strides, PyArray_Descr *const *descriptors, int nargs) +{ + for (int i = 0; i < nargs; i++) { + if (strides[i] != descriptors[i]->elsize) { + return 0; + } + } + return 1; +} + + /** * The default method to fetch the correct loop for a cast or ufunc * (at the time of writing only casts). @@ -138,18 +151,36 @@ default_resolve_descriptors( * @param flags * @return 0 on success -1 on failure. */ -static int -default_get_strided_loop( - PyArrayMethod_Context *NPY_UNUSED(context), - int NPY_UNUSED(aligned), int NPY_UNUSED(move_references), - npy_intp *NPY_UNUSED(strides), - PyArray_StridedUnaryOp **NPY_UNUSED(out_loop), - NpyAuxData **NPY_UNUSED(out_transferdata), - NPY_ARRAYMETHOD_FLAGS *NPY_UNUSED(flags)) +NPY_NO_EXPORT int +npy_default_get_strided_loop( + PyArrayMethod_Context *context, + int aligned, int NPY_UNUSED(move_references), npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) { - PyErr_SetString(PyExc_NotImplementedError, - "default loop getter is not implemented"); - return -1; + PyArray_Descr **descrs = context->descriptors; + PyArrayMethodObject *meth = context->method; + *flags = meth->flags & NPY_METH_RUNTIME_FLAGS; + *out_transferdata = NULL; + + int nargs = meth->nin + meth->nout; + if (aligned) { + if (meth->contiguous_loop == NULL || + !is_contiguous(strides, descrs, nargs)) { + *out_loop = meth->strided_loop; + return 0; + } + *out_loop = meth->contiguous_loop; + } + else { + if (meth->unaligned_contiguous_loop == NULL || + !is_contiguous(strides, descrs, nargs)) { + *out_loop = meth->unaligned_strided_loop; + return 0; + } + *out_loop = meth->unaligned_contiguous_loop; + } + return 0; } @@ -225,7 +256,7 @@ fill_arraymethod_from_slots( PyArrayMethodObject *meth = res->method; /* Set the defaults */ - meth->get_strided_loop = &default_get_strided_loop; + meth->get_strided_loop = &npy_default_get_strided_loop; meth->resolve_descriptors = &default_resolve_descriptors; /* Fill in the slots passed by the user */ @@ -295,7 +326,7 @@ fill_arraymethod_from_slots( } } } - if (meth->get_strided_loop != &default_get_strided_loop) { + if (meth->get_strided_loop != &npy_default_get_strided_loop) { /* Do not check the actual loop fields. */ return 0; } @@ -468,6 +499,9 @@ boundarraymethod_dealloc(PyObject *self) * May raise an error, but usually should not. * The function validates the casting attribute compared to the returned * casting level. + * + * TODO: This function is not public API, and certain code paths will need + * changes and especially testing if they were to be made public. */ static PyObject * boundarraymethod__resolve_descripors( @@ -481,7 +515,7 @@ boundarraymethod__resolve_descripors( if (!PyTuple_CheckExact(descr_tuple) || PyTuple_Size(descr_tuple) != nin + nout) { - PyErr_Format(PyExc_ValueError, + PyErr_Format(PyExc_TypeError, "_resolve_descriptors() takes exactly one tuple with as many " "elements as the method takes arguments (%d+%d).", nin, nout); return NULL; @@ -494,7 +528,7 @@ boundarraymethod__resolve_descripors( } else if (tmp == Py_None) { if (i < nin) { - PyErr_SetString(PyExc_ValueError, + PyErr_SetString(PyExc_TypeError, "only output dtypes may be omitted (set to None)."); return NULL; } @@ -502,7 +536,7 @@ boundarraymethod__resolve_descripors( } else if (PyArray_DescrCheck(tmp)) { if (Py_TYPE(tmp) != (PyTypeObject *)self->dtypes[i]) { - PyErr_Format(PyExc_ValueError, + PyErr_Format(PyExc_TypeError, "input dtype %S was not an exact instance of the bound " "DType class %S.", tmp, self->dtypes[i]); return NULL; @@ -580,9 +614,145 @@ boundarraymethod__resolve_descripors( } +/* + * TODO: This function is not public API, and certain code paths will need + * changes and especially testing if they were to be made public. + */ +static PyObject * +boundarraymethod__simple_strided_call( + PyBoundArrayMethodObject *self, PyObject *arr_tuple) +{ + PyArrayObject *arrays[NPY_MAXARGS]; + PyArray_Descr *descrs[NPY_MAXARGS]; + PyArray_Descr *out_descrs[NPY_MAXARGS]; + ssize_t length = -1; + int aligned = 1; + npy_intp strides[NPY_MAXARGS]; + int nin = self->method->nin; + int nout = self->method->nout; + + if (!PyTuple_CheckExact(arr_tuple) || + PyTuple_Size(arr_tuple) != nin + nout) { + PyErr_Format(PyExc_TypeError, + "_simple_strided_call() takes exactly one tuple with as many " + "arrays as the method takes arguments (%d+%d).", nin, nout); + return NULL; + } + + for (int i = 0; i < nin + nout; i++) { + PyObject *tmp = PyTuple_GetItem(arr_tuple, i); + if (tmp == NULL) { + return NULL; + } + else if (!PyArray_CheckExact(tmp)) { + PyErr_SetString(PyExc_TypeError, + "All inputs must be NumPy arrays."); + return NULL; + } + arrays[i] = (PyArrayObject *)tmp; + descrs[i] = PyArray_DESCR(arrays[i]); + + /* Check that the input is compatible with a simple method call. */ + if (Py_TYPE(descrs[i]) != (PyTypeObject *)self->dtypes[i]) { + PyErr_Format(PyExc_TypeError, + "input dtype %S was not an exact instance of the bound " + "DType class %S.", descrs[i], self->dtypes[i]); + return NULL; + } + if (PyArray_NDIM(arrays[i]) != 1) { + PyErr_SetString(PyExc_ValueError, + "All arrays must be one dimensional."); + return NULL; + } + if (i == 0) { + length = PyArray_SIZE(arrays[i]); + } + else if (PyArray_SIZE(arrays[i]) != length) { + PyErr_SetString(PyExc_ValueError, + "All arrays must have the same length."); + return NULL; + } + if (i >= nout) { + if (PyArray_FailUnlessWriteable( + arrays[i], "_simple_strided_call() output") < 0) { + return NULL; + } + } + + strides[i] = PyArray_STRIDES(arrays[i])[0]; + /* TODO: We may need to distinguish aligned and itemsize-aligned */ + aligned &= PyArray_ISALIGNED(arrays[i]); + } + if (!aligned && !(self->method->flags & NPY_METH_SUPPORTS_UNALIGNED)) { + PyErr_SetString(PyExc_ValueError, + "method does not support unaligned input."); + return NULL; + } + + NPY_CASTING casting = self->method->resolve_descriptors( + self->method, self->dtypes, descrs, out_descrs); + + if (casting < 0) { + PyObject *err_type = NULL, *err_value = NULL, *err_traceback = NULL; + PyErr_Fetch(&err_type, &err_value, &err_traceback); + PyErr_SetString(PyExc_TypeError, + "cannot perform method call with the given dtypes."); + npy_PyErr_ChainExceptions(err_type, err_value, err_traceback); + return NULL; + } + + int dtypes_were_adapted = 0; + for (int i = 0; i < nin + nout; i++) { + /* NOTE: This check is probably much stricter than necessary... */ + dtypes_were_adapted |= descrs[i] != out_descrs[i]; + Py_DECREF(out_descrs[i]); + } + if (dtypes_were_adapted) { + PyErr_SetString(PyExc_TypeError, + "_simple_strided_call(): requires dtypes to not require a cast " + "(must match exactly with `_resolve_descriptors()`)."); + return NULL; + } + + PyArrayMethod_Context context = { + .caller = NULL, + .method = self->method, + .descriptors = descrs, + }; + PyArray_StridedUnaryOp *strided_loop = NULL; + NpyAuxData *loop_data = NULL; + NPY_ARRAYMETHOD_FLAGS flags = 0; + + if (self->method->get_strided_loop( + &context, aligned, 0, strides, + &strided_loop, &loop_data, &flags) < 0) { + return NULL; + } + + /* + * TODO: Add floating point error checks if requested and + * possibly release GIL if allowed by the flags. + */ + /* TODO: strided_loop is currently a cast loop, this will change. */ + int res = strided_loop( + PyArray_BYTES(arrays[1]), strides[1], + PyArray_BYTES(arrays[0]), strides[0], + length, descrs[0]->elsize, loop_data); + if (loop_data != NULL) { + loop_data->free(loop_data); + } + if (res < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + PyMethodDef boundarraymethod_methods[] = { {"_resolve_descriptors", (PyCFunction)boundarraymethod__resolve_descripors, METH_O, "Resolve the given dtypes."}, + {"_simple_strided_call", (PyCFunction)boundarraymethod__simple_strided_call, + METH_O, "call on 1-d inputs and pre-allocated outputs (single call)."}, {NULL, 0, 0, NULL}, }; diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h index 15ea948ce5fa..1fa8a9ba0290 100644 --- a/numpy/core/src/multiarray/array_method.h +++ b/numpy/core/src/multiarray/array_method.h @@ -144,6 +144,21 @@ extern NPY_NO_EXPORT PyTypeObject PyBoundArrayMethod_Type; #define NPY_METH_unaligned_contiguous_loop 6 +/* + * Used internally (initially) for real to complex loops only + */ +NPY_NO_EXPORT int +npy_default_get_strided_loop( + PyArrayMethod_Context *context, + int aligned, int NPY_UNUSED(move_references), npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags); + + +/* + * TODO: This function is the internal version, and its error paths may + * need better tests when a public version is exposed. + */ NPY_NO_EXPORT PyBoundArrayMethodObject * PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int private); diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index 5d5b69bd5c5b..5ee5f0c1686e 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -25,6 +25,7 @@ #include "datetime_strings.h" #include "array_method.h" #include "usertypes.h" +#include "dtype_transfer.h" /* @@ -61,7 +62,7 @@ PyArray_GetObjectToGenericCastingImpl(void); * @returns A castingimpl (PyArrayDTypeMethod *), None or NULL with an * error set. */ -static PyObject * +NPY_NO_EXPORT PyObject * PyArray_GetCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) { PyObject *res = PyDict_GetItem(from->castingimpls, (PyObject *)to); @@ -1973,6 +1974,30 @@ legacy_same_dtype_resolve_descriptors( } +NPY_NO_EXPORT int +legacy_cast_get_strided_loop( + PyArrayMethod_Context *context, + int aligned, int move_references, npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + PyArray_Descr **descrs = context->descriptors; + int out_needs_api = 0; + + *flags = context->method->flags & NPY_METH_RUNTIME_FLAGS; + + if (PyArray_GetLegacyDTypeTransferFunction( + aligned, strides[0], strides[1], descrs[0], descrs[1], + move_references, out_loop, out_transferdata, &out_needs_api, 0) < 0) { + return -1; + } + if (!out_needs_api) { + *flags &= ~NPY_METH_REQUIRES_PYAPI; + } + return 0; +} + + /* * Simple dtype resolver for casting between two different (non-parametric) * (legacy) dtypes. @@ -2012,10 +2037,77 @@ simple_cast_resolve_descriptors( } +NPY_NO_EXPORT int +get_byteswap_loop( + PyArrayMethod_Context *context, + int aligned, int NPY_UNUSED(move_references), npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + PyArray_Descr **descrs = context->descriptors; + assert(descrs[0]->kind == descrs[1]->kind); + assert(descrs[0]->elsize == descrs[1]->elsize); + int itemsize = descrs[0]->elsize; + *flags = NPY_METH_NO_FLOATINGPOINT_ERRORS; + *out_transferdata = NULL; + if (descrs[0]->kind == 'c') { + /* + * TODO: we have an issue with complex, since the below loops + * use the itemsize, the complex alignment would be too small. + * Using aligned = 0, might cause slow downs in some cases. + */ + aligned = 0; + } + + if (PyDataType_ISNOTSWAPPED(descrs[0]) == + PyDataType_ISNOTSWAPPED(descrs[1])) { + *out_loop = PyArray_GetStridedCopyFn( + aligned, strides[0], strides[1], itemsize); + } + else if (!PyTypeNum_ISCOMPLEX(descrs[0]->type_num)) { + *out_loop = PyArray_GetStridedCopySwapFn( + aligned, strides[0], strides[1], itemsize); + } + else { + *out_loop = PyArray_GetStridedCopySwapPairFn( + aligned, strides[0], strides[1], itemsize); + } + if (*out_loop == NULL) { + return -1; + } + return 0; +} + + +NPY_NO_EXPORT int +complex_to_noncomplex_get_loop( + PyArrayMethod_Context *context, + int aligned, int move_references, npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + static PyObject *cls = NULL; + int ret; + npy_cache_import("numpy.core", "ComplexWarning", &cls); + if (cls == NULL) { + return -1; + } + ret = PyErr_WarnEx(cls, + "Casting complex values to real discards " + "the imaginary part", 1); + if (ret < 0) { + return -1; + } + return npy_default_get_strided_loop( + context, aligned, move_references, strides, + out_loop, out_transferdata, flags); +} + + static int add_numeric_cast(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) { - PyType_Slot slots[6]; + PyType_Slot slots[7]; PyArray_DTypeMeta *dtypes[2] = {from, to}; PyArrayMethod_Spec spec = { .name = "numeric_cast", @@ -2026,8 +2118,8 @@ add_numeric_cast(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) .dtypes = dtypes, }; - npy_intp from_itemsize = dtypes[0]->singleton->elsize; - npy_intp to_itemsize = dtypes[1]->singleton->elsize; + npy_intp from_itemsize = from->singleton->elsize; + npy_intp to_itemsize = to->singleton->elsize; slots[0].slot = NPY_METH_resolve_descriptors; slots[0].pfunc = &simple_cast_resolve_descriptors; @@ -2044,8 +2136,24 @@ add_numeric_cast(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) slots[4].slot = NPY_METH_unaligned_contiguous_loop; slots[4].pfunc = PyArray_GetStridedNumericCastFn( 0, from_itemsize, to_itemsize, from->type_num, to->type_num); - slots[5].slot = 0; - slots[5].pfunc = NULL; + if (PyTypeNum_ISCOMPLEX(from->type_num) && + !PyTypeNum_ISCOMPLEX(to->type_num) && + !PyTypeNum_ISBOOL(to->type_num)) { + /* + * The get_loop function must also give a ComplexWarning. We could + * consider moving this warning into the inner-loop at some point + * for simplicity (this requires ensuring it is only emitted once). + */ + slots[5].slot = NPY_METH_get_loop; + slots[5].pfunc = &complex_to_noncomplex_get_loop; + slots[6].slot = 0; + slots[6].pfunc = NULL; + } + else { + /* Use the default get loop function. */ + slots[5].slot = 0; + slots[5].pfunc = NULL; + } assert(slots[1].pfunc && slots[2].pfunc && slots[3].pfunc && slots[4].pfunc); @@ -2057,7 +2165,7 @@ add_numeric_cast(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) slots[0].slot = NPY_METH_resolve_descriptors; slots[0].pfunc = &legacy_same_dtype_resolve_descriptors; slots[1].slot = NPY_METH_get_loop; - slots[1].pfunc = NULL; + slots[1].pfunc = &get_byteswap_loop; slots[2].slot = 0; slots[2].pfunc = NULL; @@ -2240,9 +2348,9 @@ add_other_to_and_from_string_cast( */ PyArray_DTypeMeta *dtypes[2] = {other, string}; PyType_Slot slots[] = { - {NPY_METH_get_loop, NULL}, + {NPY_METH_get_loop, &legacy_cast_get_strided_loop}, {NPY_METH_resolve_descriptors, &cast_to_string_resolve_descriptors}, - {0, NULL}}; + {0, NULL}}; PyArrayMethod_Spec spec = { .name = "legacy_cast_to_string", .nin = 1, @@ -2300,6 +2408,35 @@ string_to_string_resolve_descriptors( } +NPY_NO_EXPORT int +string_to_string_get_loop( + PyArrayMethod_Context *context, + int aligned, int NPY_UNUSED(move_references), npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + int unicode_swap = 0; + PyArray_Descr **descrs = context->descriptors; + + assert(NPY_DTYPE(descrs[0]) == NPY_DTYPE(descrs[1])); + *flags = context->method->flags & NPY_METH_RUNTIME_FLAGS; + if (descrs[0]->type_num == NPY_UNICODE) { + if (PyDataType_ISNOTSWAPPED(descrs[0]) != + PyDataType_ISNOTSWAPPED(descrs[1])) { + unicode_swap = 1; + } + } + + if (PyArray_GetStridedZeroPadCopyFn( + aligned, unicode_swap, strides[0], strides[1], + descrs[0]->elsize, descrs[1]->elsize, + out_loop, out_transferdata) == NPY_FAIL) { + return -1; + } + return 0; +} + + /* * Add string casts. Right now all string casts are just legacy-wrapped ones * (except string<->string and unicode<->unicode), but they do require @@ -2339,9 +2476,9 @@ PyArray_InitializeStringCasts(void) /* string<->string and unicode<->unicode have their own specialized casts */ PyArray_DTypeMeta *dtypes[2]; PyType_Slot slots[] = { - {NPY_METH_get_loop, NULL}, + {NPY_METH_get_loop, &string_to_string_get_loop}, {NPY_METH_resolve_descriptors, &string_to_string_resolve_descriptors}, - {0, NULL}}; + {0, NULL}}; PyArrayMethod_Spec spec = { .name = "string_to_string_cast", .casting = NPY_NO_CASTING, @@ -2489,6 +2626,61 @@ int give_bad_field_error(PyObject *key) } +static int +nonstructured_to_structured_get_loop( + PyArrayMethod_Context *context, + int aligned, int move_references, + npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + if (context->descriptors[1]->names != NULL) { + int needs_api = 0; + if (get_fields_transfer_function( + aligned, strides[0], strides[1], + context->descriptors[0], context->descriptors[1], + move_references, out_loop, out_transferdata, + &needs_api) == NPY_FAIL) { + return -1; + } + *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0; + } + else if (context->descriptors[1]->subarray != NULL) { + int needs_api = 0; + if (get_subarray_transfer_function( + aligned, strides[0], strides[1], + context->descriptors[0], context->descriptors[1], + move_references, out_loop, out_transferdata, + &needs_api) == NPY_FAIL) { + return -1; + } + *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0; + } + else { + /* + * TODO: This could be a simple zero padded cast, adding a decref + * in case of `move_references`. But for now use legacy casts + * (which is the behaviour at least up to 1.20). + */ + int needs_api = 0; + if (!aligned) { + /* We need to wrap if aligned is 0. Use a recursive call */ + + } + if (PyArray_GetLegacyDTypeTransferFunction( + 1, strides[0], strides[1], + context->descriptors[0], context->descriptors[1], + move_references, out_loop, out_transferdata, + &needs_api, 1) < 0) { + return -1; + } + *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0; + } + return 0; +} + + static PyObject * PyArray_GetGenericToVoidCastingImpl(void) { @@ -2508,7 +2700,7 @@ PyArray_GetGenericToVoidCastingImpl(void) method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; method->casting = NPY_SAFE_CASTING; method->resolve_descriptors = &nonstructured_to_structured_resolve_descriptors; - method->get_strided_loop = NULL; + method->get_strided_loop = &nonstructured_to_structured_get_loop; return (PyObject *)method; } @@ -2578,6 +2770,56 @@ structured_to_nonstructured_resolve_descriptors( } +static int +structured_to_nonstructured_get_loop( + PyArrayMethod_Context *context, + int aligned, int move_references, + npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + if (context->descriptors[0]->names != NULL) { + int needs_api = 0; + if (get_fields_transfer_function( + aligned, strides[0], strides[1], + context->descriptors[0], context->descriptors[1], + move_references, out_loop, out_transferdata, + &needs_api) == NPY_FAIL) { + return -1; + } + *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0; + } + else if (context->descriptors[0]->subarray != NULL) { + int needs_api = 0; + if (get_subarray_transfer_function( + aligned, strides[0], strides[1], + context->descriptors[0], context->descriptors[1], + move_references, out_loop, out_transferdata, + &needs_api) == NPY_FAIL) { + return -1; + } + *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0; + } + else { + /* + * In general this is currently defined through legacy behaviour via + * scalars, and should likely just not be allowed. + */ + int needs_api = 0; + if (PyArray_GetLegacyDTypeTransferFunction( + aligned, strides[0], strides[1], + context->descriptors[0], context->descriptors[1], + move_references, out_loop, out_transferdata, + &needs_api, 1) < 0) { + return -1; + } + *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0; + } + return 0; +} + + static PyObject * PyArray_GetVoidToGenericCastingImpl(void) { @@ -2597,7 +2839,7 @@ PyArray_GetVoidToGenericCastingImpl(void) method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; method->casting = NPY_UNSAFE_CASTING; method->resolve_descriptors = &structured_to_nonstructured_resolve_descriptors; - method->get_strided_loop = NULL; + method->get_strided_loop = &structured_to_nonstructured_get_loop; return (PyObject *)method; } @@ -2761,6 +3003,56 @@ void_to_void_resolve_descriptors( } +NPY_NO_EXPORT int +void_to_void_get_loop( + PyArrayMethod_Context *context, + int aligned, int move_references, + npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + if (context->descriptors[0]->names != NULL || + context->descriptors[1]->names != NULL) { + int needs_api = 0; + if (get_fields_transfer_function( + aligned, strides[0], strides[1], + context->descriptors[0], context->descriptors[1], + move_references, out_loop, out_transferdata, + &needs_api) == NPY_FAIL) { + return -1; + } + *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0; + } + else if (context->descriptors[0]->subarray != NULL || + context->descriptors[1]->subarray != NULL) { + int needs_api = 0; + if (get_subarray_transfer_function( + aligned, strides[0], strides[1], + context->descriptors[0], context->descriptors[1], + move_references, out_loop, out_transferdata, + &needs_api) == NPY_FAIL) { + return -1; + } + *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0; + } + else { + /* + * This is a string-like copy of the two bytes (zero padding if + * necessary) + */ + if (PyArray_GetStridedZeroPadCopyFn( + 0, 0, strides[0], strides[1], + context->descriptors[0]->elsize, context->descriptors[1]->elsize, + out_loop, out_transferdata) == NPY_FAIL) { + return -1; + } + *flags = 0; + } + return 0; +} + + /* * This initializes the void to void cast. Voids include structured dtypes, * which means that they can cast from and to any other dtype and, in that @@ -2772,9 +3064,9 @@ PyArray_InitializeVoidToVoidCast(void) PyArray_DTypeMeta *Void = PyArray_DTypeFromTypeNum(NPY_VOID); PyArray_DTypeMeta *dtypes[2] = {Void, Void}; PyType_Slot slots[] = { - {NPY_METH_get_loop, NULL}, + {NPY_METH_get_loop, &void_to_void_get_loop}, {NPY_METH_resolve_descriptors, &void_to_void_resolve_descriptors}, - {0, NULL}}; + {0, NULL}}; PyArrayMethod_Spec spec = { .name = "void_to_void_cast", .casting = NPY_NO_CASTING, @@ -2859,7 +3151,7 @@ PyArray_GetObjectToGenericCastingImpl(void) method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; method->casting = NPY_UNSAFE_CASTING; method->resolve_descriptors = &object_to_any_resolve_descriptors; - method->get_strided_loop = NULL; + method->get_strided_loop = &object_to_any_get_loop; return (PyObject *)method; } @@ -2915,12 +3207,38 @@ PyArray_GetGenericToObjectCastingImpl(void) method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; method->casting = NPY_SAFE_CASTING; method->resolve_descriptors = &any_to_object_resolve_descriptors; - method->get_strided_loop = NULL; + method->get_strided_loop = &any_to_object_get_loop; return (PyObject *)method; } +/* + * Casts within the object dtype is always just a plain copy/view. + * For that reason, this function might remain unimplemented. + */ +static int +object_to_object_get_loop( + PyArrayMethod_Context *NPY_UNUSED(context), + int NPY_UNUSED(aligned), int move_references, + npy_intp *NPY_UNUSED(strides), + PyArray_StridedUnaryOp **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + *flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_NO_FLOATINGPOINT_ERRORS; + if (move_references) { + *out_loop = &_strided_to_strided_move_references; + *out_transferdata = NULL; + } + else { + *out_loop = &_strided_to_strided_copy_references; + *out_transferdata = NULL; + } + return 0; +} + + static int PyArray_InitializeObjectToObjectCast(void) { @@ -2931,8 +3249,8 @@ PyArray_InitializeObjectToObjectCast(void) PyArray_DTypeMeta *Object = PyArray_DTypeFromTypeNum(NPY_OBJECT); PyArray_DTypeMeta *dtypes[2] = {Object, Object}; PyType_Slot slots[] = { - {NPY_METH_get_loop, NULL}, - {0, NULL}}; + {NPY_METH_get_loop, &object_to_object_get_loop}, + {0, NULL}}; PyArrayMethod_Spec spec = { .name = "object_to_object_cast", .casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW, diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h index 97006b952543..a147dec3c780 100644 --- a/numpy/core/src/multiarray/convert_datatype.h +++ b/numpy/core/src/multiarray/convert_datatype.h @@ -5,6 +5,9 @@ extern NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[]; +NPY_NO_EXPORT PyObject * +PyArray_GetCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to); + NPY_NO_EXPORT PyObject * _get_castingimpl(PyObject *NPY_UNUSED(module), PyObject *args); @@ -73,6 +76,13 @@ legacy_same_dtype_resolve_descriptors( PyArray_Descr **given_descrs, PyArray_Descr **loop_descrs); +NPY_NO_EXPORT int +legacy_cast_get_strided_loop( + PyArrayMethod_Context *context, + int aligned, int move_references, npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags); + NPY_NO_EXPORT NPY_CASTING simple_cast_resolve_descriptors( PyArrayMethodObject *self, diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c index 9c1b606bb564..6962159447cb 100644 --- a/numpy/core/src/multiarray/datetime.c +++ b/numpy/core/src/multiarray/datetime.c @@ -3803,6 +3803,47 @@ time_to_time_resolve_descriptors( } +static int +time_to_time_get_loop( + PyArrayMethod_Context *context, + int aligned, int NPY_UNUSED(move_references), npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + int requires_wrap = 0; + int inner_aligned = aligned; + PyArray_Descr **descrs = context->descriptors; + *flags = NPY_METH_NO_FLOATINGPOINT_ERRORS; + + if (!PyDataType_ISNOTSWAPPED(descrs[0]) || + !PyDataType_ISNOTSWAPPED(descrs[1])) { + inner_aligned = 1; + requires_wrap = 1; + } + if (get_nbo_cast_datetime_transfer_function( + inner_aligned, descrs[0], descrs[1], + out_loop, out_transferdata) == NPY_FAIL) { + return -1; + } + + if (!requires_wrap) { + return 0; + } + + int needs_api = 0; + NpyAuxData *castdata = *out_transferdata; + if (wrap_aligned_contig_transfer_function_with_copyswapn( + aligned, strides[0], strides[1], descrs[0], descrs[1], + out_loop, out_transferdata, &needs_api, + *out_loop, castdata) == NPY_FAIL) { + NPY_AUXDATA_FREE(castdata); + return -1; + } + assert(needs_api == 0); + return 0; +} + + /* Handles datetime<->timedelta type resolution (both directions) */ static NPY_CASTING datetime_to_timedelta_resolve_descriptors( @@ -3844,9 +3885,7 @@ time_to_string_resolve_descriptors( PyArray_Descr **given_descrs, PyArray_Descr **loop_descrs) { - Py_INCREF(given_descrs[0]); - loop_descrs[0] = given_descrs[0]; - if (given_descrs[1] != NULL) { + if (given_descrs[1] != NULL && dtypes[0]->type_num == NPY_DATETIME) { /* * At the time of writing, NumPy does not check the length here, * but will error if filling fails. @@ -3863,6 +3902,10 @@ time_to_string_resolve_descriptors( size = get_datetime_iso_8601_strlen(0, meta->base); } else { + /* + * This is arguably missing space for the unit, e.g. for: + * `np.timedelta64(1231234342124, 'ms')` + */ size = 21; } if (dtypes[1]->type_num == NPY_UNICODE) { @@ -3870,15 +3913,47 @@ time_to_string_resolve_descriptors( } loop_descrs[1] = PyArray_DescrNewFromType(dtypes[1]->type_num); if (loop_descrs[1] == NULL) { - Py_DECREF(loop_descrs[0]); return -1; } loop_descrs[1]->elsize = size; } + + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + assert(self->casting == NPY_UNSAFE_CASTING); return NPY_UNSAFE_CASTING; } +static int +time_to_string_get_loop( + PyArrayMethod_Context *context, + int aligned, int NPY_UNUSED(move_references), npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + PyArray_Descr **descrs = context->descriptors; + *flags = context->method->flags & NPY_METH_RUNTIME_FLAGS; + + if (descrs[1]->type_num == NPY_STRING) { + if (get_nbo_datetime_to_string_transfer_function( + descrs[0], descrs[1], + out_loop, out_transferdata) == NPY_FAIL) { + return -1; + } + } + else { + assert(descrs[1]->type_num == NPY_UNICODE); + int out_needs_api; + if (get_datetime_to_unicode_transfer_function( + aligned, strides[0], strides[1], descrs[0], descrs[1], + out_loop, out_transferdata, &out_needs_api) == NPY_FAIL) { + return -1; + } + } + return 0; +} + static NPY_CASTING string_to_datetime_cast_resolve_descriptors( @@ -3908,6 +3983,36 @@ string_to_datetime_cast_resolve_descriptors( } +static int +string_to_datetime_cast_get_loop( + PyArrayMethod_Context *context, + int aligned, int NPY_UNUSED(move_references), npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + PyArray_Descr **descrs = context->descriptors; + *flags = context->method->flags & NPY_METH_RUNTIME_FLAGS; + + if (descrs[0]->type_num == NPY_STRING) { + if (get_nbo_string_to_datetime_transfer_function( + descrs[0], descrs[1], out_loop, out_transferdata) == NPY_FAIL) { + return -1; + } + } + else { + assert(descrs[0]->type_num == NPY_UNICODE); + int out_needs_api; + if (get_unicode_to_datetime_transfer_function( + aligned, strides[0], strides[1], descrs[0], descrs[1], + out_loop, out_transferdata, &out_needs_api) == NPY_FAIL) { + return -1; + } + } + return 0; +} + + + /* * This registers the castingimpl for all datetime related casts. */ @@ -3930,7 +4035,7 @@ PyArray_InitializeDatetimeCasts() slots[0].slot = NPY_METH_resolve_descriptors; slots[0].pfunc = &time_to_time_resolve_descriptors; slots[1].slot = NPY_METH_get_loop; - slots[1].pfunc = NULL; + slots[1].pfunc = &time_to_time_get_loop; slots[2].slot = 0; slots[2].pfunc = NULL; @@ -3955,10 +4060,12 @@ PyArray_InitializeDatetimeCasts() * Casting between timedelta and datetime uses legacy casting loops, but * custom dtype resolution (to handle copying of the time unit). */ + spec.flags = NPY_METH_REQUIRES_PYAPI; + slots[0].slot = NPY_METH_resolve_descriptors; slots[0].pfunc = &datetime_to_timedelta_resolve_descriptors; slots[1].slot = NPY_METH_get_loop; - slots[1].pfunc = NULL; + slots[1].pfunc = &legacy_cast_get_strided_loop; slots[2].slot = 0; slots[2].pfunc = NULL; @@ -4030,7 +4137,7 @@ PyArray_InitializeDatetimeCasts() slots[0].slot = NPY_METH_resolve_descriptors; slots[0].pfunc = &time_to_string_resolve_descriptors; slots[1].slot = NPY_METH_get_loop; - slots[1].pfunc = NULL; + slots[1].pfunc = &time_to_string_get_loop; slots[2].slot = 0; slots[2].pfunc = NULL; @@ -4070,7 +4177,7 @@ PyArray_InitializeDatetimeCasts() slots[0].slot = NPY_METH_resolve_descriptors; slots[0].pfunc = &string_to_datetime_cast_resolve_descriptors; slots[1].slot = NPY_METH_get_loop; - slots[1].pfunc = NULL; + slots[1].pfunc = &string_to_datetime_cast_get_loop; slots[2].slot = 0; slots[2].pfunc = NULL; diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index 630bd76f3955..9b8e5f32f816 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -28,8 +28,11 @@ #include "array_assign.h" #include "shape.h" -#include "lowlevel_strided_loops.h" +#include "dtype_transfer.h" #include "alloc.h" +#include "dtypemeta.h" +#include "array_method.h" +#include "array_coercion.h" #define NPY_LOWLEVEL_BUFFER_BLOCKSIZE 128 @@ -105,7 +108,7 @@ get_bool_setdstone_transfer_function(npy_intp dst_stride, /*************************** COPY REFERENCES *******************************/ /* Moves references from src to dst */ -static int +NPY_NO_EXPORT int _strided_to_strided_move_references(char *dst, npy_intp dst_stride, char *src, npy_intp src_stride, npy_intp N, npy_intp src_itemsize, @@ -134,7 +137,7 @@ _strided_to_strided_move_references(char *dst, npy_intp dst_stride, } /* Copies references from src to dst */ -static int +NPY_NO_EXPORT int _strided_to_strided_copy_references(char *dst, npy_intp dst_stride, char *src, npy_intp src_stride, npy_intp N, npy_intp src_itemsize, @@ -161,6 +164,217 @@ _strided_to_strided_copy_references(char *dst, npy_intp dst_stride, return 0; } +/************************** ANY TO OBJECT *********************************/ + +typedef struct { + NpyAuxData base; + PyArray_GetItemFunc *getitem; + PyArrayObject_fields arr_fields; + PyArray_StridedUnaryOp *decref_func; + NpyAuxData *decref_data; +} _any_to_object_auxdata; + + +static void +_any_to_object_auxdata_free(NpyAuxData *auxdata) +{ + _any_to_object_auxdata *data = (_any_to_object_auxdata *)auxdata; + + Py_DECREF(data->arr_fields.descr); + NPY_AUXDATA_FREE(data->decref_data); + PyMem_Free(data); +} + + +static NpyAuxData * +_any_to_object_auxdata_clone(NpyAuxData *auxdata) +{ + _any_to_object_auxdata *data = (_any_to_object_auxdata *)auxdata; + + _any_to_object_auxdata *res = PyMem_Malloc(sizeof(_any_to_object_auxdata)); + + memcpy(res, data, sizeof(*data)); + Py_INCREF(res->arr_fields.descr); + if (res->decref_data != NULL) { + res->decref_data = NPY_AUXDATA_CLONE(data->decref_data); + if (res->decref_data == NULL) { + NPY_AUXDATA_FREE((NpyAuxData *) res); + return NULL; + } + } + return (NpyAuxData *)res; +} + + +static int +_strided_to_strided_any_to_object(char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + NpyAuxData *auxdata) +{ + _any_to_object_auxdata *data = (_any_to_object_auxdata *)auxdata; + + PyObject *dst_ref = NULL; + char *orig_src = src; + while (N > 0) { + memcpy(&dst_ref, dst, sizeof(dst_ref)); + Py_XDECREF(dst_ref); + dst_ref = data->getitem(src, &data->arr_fields); + memcpy(dst, &dst_ref, sizeof(PyObject *)); + + if (dst_ref == NULL) { + return -1; + } + src += src_stride; + dst += dst_stride; + --N; + } + if (data->decref_func != NULL) { + /* If necessar, clear the input buffer (`move_references`) */ + if (data->decref_func(NULL, 0, orig_src, src_stride, N, + src_itemsize, data->decref_data) < 0) { + return -1; + } + } + return 0; +} + + +NPY_NO_EXPORT int +any_to_object_get_loop( + PyArrayMethod_Context *context, + int aligned, int move_references, + npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + + *flags = NPY_METH_REQUIRES_PYAPI; /* No need for floating point errors */ + + *out_loop = _strided_to_strided_any_to_object; + *out_transferdata = PyMem_Malloc(sizeof(_any_to_object_auxdata)); + if (*out_transferdata == NULL) { + return -1; + } + _any_to_object_auxdata *data = (_any_to_object_auxdata *)*out_transferdata; + data->base.free = &_any_to_object_auxdata_free; + data->base.clone = &_any_to_object_auxdata_clone; + data->arr_fields.base = NULL; + data->arr_fields.descr = context->descriptors[0]; + Py_INCREF(data->arr_fields.descr); + data->arr_fields.flags = aligned ? NPY_ARRAY_ALIGNED : 0; + data->arr_fields.nd = 0; + + data->getitem = context->descriptors[0]->f->getitem; + data->decref_func = NULL; + data->decref_data = NULL; + + if (move_references && PyDataType_REFCHK(context->descriptors[0])) { + int needs_api; + if (get_decsrcref_transfer_function( + aligned, strides[0], context->descriptors[0], + &data->decref_func, &data->decref_data, + &needs_api) == NPY_FAIL) { + NPY_AUXDATA_FREE(*out_transferdata); + *out_transferdata = NULL; + return -1; + } + } + return 0; +} + + +/************************** OBJECT TO ANY *********************************/ + +typedef struct { + NpyAuxData base; + PyArray_Descr *descr; + int move_references; +} _object_to_any_auxdata; + + +static void +_object_to_any_auxdata_free(NpyAuxData *auxdata) +{ + _object_to_any_auxdata *data = (_object_to_any_auxdata *)auxdata; + Py_DECREF(data->descr); + PyMem_Free(data); +} + +static NpyAuxData * +_object_to_any_auxdata_clone(NpyAuxData *data) +{ + _object_to_any_auxdata *res = PyMem_Malloc(sizeof(*res)); + if (res == NULL) { + return NULL; + } + memcpy(res, data, sizeof(*res)); + Py_INCREF(res->descr); + return (NpyAuxData *)res; +} + + +static int +strided_to_strided_object_to_any( + char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + NpyAuxData *auxdata) +{ + _object_to_any_auxdata *data = (_object_to_any_auxdata *)auxdata; + + PyObject *src_ref; + + while (N > 0) { + memcpy(&src_ref, src, sizeof(src_ref)); + if (PyArray_Pack(data->descr, dst, src_ref) < 0) { + return -1; + } + + if (data->move_references) { + Py_DECREF(src_ref); + memset(src, 0, sizeof(src_ref)); + } + + N--; + dst += dst_stride; + src += src_stride; + } + return 0; +} + + +NPY_NO_EXPORT int +object_to_any_get_loop( + PyArrayMethod_Context *context, + int NPY_UNUSED(aligned), int move_references, + npy_intp *NPY_UNUSED(strides), + PyArray_StridedUnaryOp **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags) +{ + *flags = NPY_METH_REQUIRES_PYAPI; + + /* + * TODO: After passing `context`, auxdata can be statically allocated + * since `descriptor` is always passed. + */ + _object_to_any_auxdata *data = PyMem_Malloc(sizeof(*data)); + if (data == NULL) { + return -1; + } + data->base.free = &_object_to_any_auxdata_free; + data->base.clone = &_object_to_any_auxdata_clone; + + Py_INCREF(context->descriptors[1]); + data->descr = context->descriptors[1]; + data->move_references = move_references; + *out_transferdata = (NpyAuxData *)data; + *out_loop = &strided_to_strided_object_to_any; + return 0; +} + /************************** ZERO-PADDED COPY ******************************/ @@ -725,6 +939,7 @@ _aligned_contig_to_contig_cast(char *dst, npy_intp NPY_UNUSED(dst_stride), return 0; } +#if !NPY_USE_NEW_CASTINGIMPL static int get_nbo_cast_numeric_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, @@ -762,6 +977,7 @@ get_nbo_cast_numeric_transfer_function(int aligned, return NPY_SUCCEED; } +#endif /* * Does a datetime->datetime, timedelta->timedelta, @@ -1464,6 +1680,7 @@ get_legacy_dtype_cast_function( } +#if !NPY_USE_NEW_CASTINGIMPL static int get_nbo_cast_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, @@ -1548,6 +1765,7 @@ get_nbo_cast_transfer_function(int aligned, move_references, out_stransfer, out_transferdata, out_needs_api, out_needs_wrap); } +#endif NPY_NO_EXPORT int @@ -1619,6 +1837,7 @@ wrap_aligned_contig_transfer_function_with_copyswapn( } +#if !NPY_USE_NEW_CASTINGIMPL static int get_cast_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, @@ -1661,6 +1880,7 @@ get_cast_transfer_function(int aligned, caststransfer, castdata); } } +#endif /**************************** COPY 1 TO N CONTIGUOUS ************************/ @@ -2411,7 +2631,7 @@ get_subarray_broadcast_transfer_function(int aligned, * Handles subarray transfer. To call this, at least one of the dtype's * subarrays must be non-NULL */ -static int +NPY_NO_EXPORT int get_subarray_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, @@ -2621,7 +2841,7 @@ _strided_to_strided_field_transfer(char *dst, npy_intp dst_stride, * Handles fields transfer. To call this, at least one of the dtypes * must have fields. Does not take care of object<->structure conversion */ -static int +NPY_NO_EXPORT int get_fields_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, @@ -3511,8 +3731,9 @@ PyArray_GetDTypeCopySwapFn(int aligned, /********************* MAIN DTYPE TRANSFER FUNCTION ***********************/ -NPY_NO_EXPORT int -PyArray_GetDTypeTransferFunction(int aligned, +#if !NPY_USE_NEW_CASTINGIMPL +static int +PyArray_LegacyGetDTypeTransferFunction(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, @@ -3756,6 +3977,544 @@ PyArray_GetDTypeTransferFunction(int aligned, out_stransfer, out_transferdata, out_needs_api); } +#endif + +/* + * ********************* Generalized Multistep Cast ************************ + * + * New general purpose multiple step cast function when resolve descriptors + * implies that multiple cast steps are necessary. + */ +#if NPY_USE_NEW_CASTINGIMPL + +/* + * The full context passed in is never the correct context for each + * individual cast, so we have to store each of these casts information. + * Certain fields may be undefined (currently, the `caller`). + */ +typedef struct { + PyArray_StridedUnaryOp *stransfer; + NpyAuxData *auxdata; + PyArrayMethod_Context context; + PyArray_Descr *descriptors[2]; +} _cast_info; + +typedef struct { + NpyAuxData base; + /* Information for main cast */ + _cast_info main; + /* Information for input preparation cast */ + _cast_info from; + /* Information for output finalization cast */ + _cast_info to; + char *from_buffer; + char *to_buffer; +} _multistep_castdata; + + +static NPY_INLINE void +_cast_info_free(_cast_info *cast_info) +{ + NPY_AUXDATA_FREE(cast_info->auxdata); + Py_DECREF(cast_info->descriptors[0]); + Py_DECREF(cast_info->descriptors[1]); + Py_DECREF(cast_info->context.method); +} + + +/* zero-padded data copy function */ +static void +_multistep_cast_auxdata_free(NpyAuxData *auxdata) +{ + _multistep_castdata *data = (_multistep_castdata *)auxdata; + _cast_info_free(&data->main); + if (data->from.stransfer != NULL) { + _cast_info_free(&data->from); + } + if (data->to.stransfer != NULL) { + _cast_info_free(&data->to); + } + PyMem_Free(data); +} + + +static NpyAuxData * +_multistep_cast_auxdata_clone(NpyAuxData *auxdata_old); + +static NpyAuxData * +_multistep_cast_auxdata_clone_int(NpyAuxData *auxdata_old, int move_auxdata) +{ + _multistep_castdata *castdata = (_multistep_castdata *)auxdata_old; + + /* Round up the structure size to 16-byte boundary for the buffers */ + ssize_t datasize = (sizeof(_multistep_castdata) + 15) & ~0xf; + + ssize_t from_buffer_offset = datasize; + if (castdata->from.stransfer != NULL) { + ssize_t src_itemsize = castdata->main.context.descriptors[0]->elsize; + datasize += NPY_LOWLEVEL_BUFFER_BLOCKSIZE * src_itemsize; + datasize = (datasize + 15) & ~0xf; + } + ssize_t to_buffer_offset = datasize; + if (castdata->to.stransfer != NULL) { + ssize_t dst_itemsize = castdata->main.context.descriptors[1]->elsize; + datasize += NPY_LOWLEVEL_BUFFER_BLOCKSIZE * dst_itemsize; + } + + char *char_data = PyMem_Malloc(datasize); + if (char_data == NULL) { + return NULL; + } + + _multistep_castdata *auxdata = (_multistep_castdata *)char_data; + + /* Copy the prepared old and fix it up internal pointers */ + memcpy(char_data, castdata, sizeof(*castdata)); + + auxdata->from_buffer = char_data + from_buffer_offset; + auxdata->to_buffer = char_data + to_buffer_offset; + + auxdata->main.context.descriptors = auxdata->main.descriptors; + auxdata->from.context.descriptors = auxdata->from.descriptors; + auxdata->to.context.descriptors = auxdata->to.descriptors; + + auxdata->base.free = &_multistep_cast_auxdata_free; + auxdata->base.clone = &_multistep_cast_auxdata_clone; + + /* Hold on to references and initialize buffers if necessary. */ + Py_INCREF(auxdata->main.descriptors[0]); + Py_INCREF(auxdata->main.descriptors[1]); + Py_INCREF(auxdata->main.context.method); + + if (!move_auxdata) { + /* Ensure we don't free twice on error: */ + auxdata->from.auxdata = NULL; + auxdata->to.auxdata = NULL; + + if (castdata->main.auxdata != NULL) { + auxdata->main.auxdata = NPY_AUXDATA_CLONE(castdata->main.auxdata); + if (auxdata->main.auxdata == NULL) { + NPY_AUXDATA_FREE((NpyAuxData *)auxdata); + return NULL; + } + } + } + else { + /* Clear the original, to avoid double free. */ + castdata->main.auxdata = NULL; + castdata->from.auxdata = NULL; + castdata->to.auxdata = NULL; + } + + if (castdata->from.stransfer != NULL) { + Py_INCREF(auxdata->from.descriptors[0]); + Py_INCREF(auxdata->from.descriptors[1]); + Py_INCREF(auxdata->from.context.method); + if (PyDataType_FLAGCHK(auxdata->main.descriptors[0], NPY_NEEDS_INIT)) { + memset(auxdata->from_buffer, 0, to_buffer_offset - from_buffer_offset); + } + if (!move_auxdata && castdata->from.auxdata != NULL) { + auxdata->from.auxdata = NPY_AUXDATA_CLONE(castdata->from.auxdata); + if (auxdata->from.auxdata == NULL) { + NPY_AUXDATA_FREE((NpyAuxData *)auxdata); + return NULL; + } + } + } + if (castdata->to.stransfer != NULL) { + Py_INCREF(auxdata->to.descriptors[0]); + Py_INCREF(auxdata->to.descriptors[1]); + Py_INCREF(auxdata->to.context.method); + if (PyDataType_FLAGCHK(auxdata->main.descriptors[1], NPY_NEEDS_INIT)) { + memset(auxdata->to_buffer, 0, datasize - to_buffer_offset); + } + if (!move_auxdata && castdata->to.auxdata != NULL) { + auxdata->to.auxdata = NPY_AUXDATA_CLONE(castdata->to.auxdata); + if (auxdata->to.auxdata == NULL) { + NPY_AUXDATA_FREE((NpyAuxData *)auxdata); + return NULL; + } + } + } + + return (NpyAuxData *)auxdata; +} + +static NpyAuxData * +_multistep_cast_auxdata_clone(NpyAuxData *auxdata_old) +{ + return _multistep_cast_auxdata_clone_int(auxdata_old, 0); +} + + +static int +_strided_to_strided_multistep_cast( + char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp src_itemsize, + NpyAuxData *data) +{ + _multistep_castdata *castdata = (_multistep_castdata *)data; + + char *main_src, *main_dst; + npy_intp main_src_stride, main_dst_stride, main_src_itemsize; + + npy_intp block_size = NPY_LOWLEVEL_BUFFER_BLOCKSIZE; + while (N > 0) { + if (block_size > N) { + block_size = N; + } + + if (castdata->from.stransfer != NULL) { + npy_intp out_stride = castdata->from.descriptors[1]->elsize; + if (castdata->from.stransfer( + castdata->from_buffer, out_stride, src, src_stride, + block_size, src_itemsize, castdata->from.auxdata)) { + /* TODO: Internal buffer may require cleanup on error. */ + return -1; + } + main_src = castdata->from_buffer; + main_src_stride = out_stride; + main_src_itemsize = out_stride; + } + else { + main_src = src; + main_src_stride = src_stride; + main_src_itemsize = src_itemsize; + } + + if (castdata->to.stransfer != NULL) { + main_dst = castdata->to_buffer; + main_dst_stride = castdata->main.descriptors[1]->elsize; + } + else { + main_dst = dst; + main_dst_stride = dst_stride; + } + + if (castdata->main.stransfer( + main_dst, main_dst_stride, main_src, main_src_stride, + block_size, main_src_itemsize, castdata->main.auxdata)) { + /* TODO: Internal buffer may require cleanup on error. */ + return -1; + } + + if (castdata->to.stransfer != NULL) { + if (castdata->to.stransfer( + dst, dst_stride, main_dst, main_dst_stride, + block_size, main_dst_stride, castdata->to.auxdata)) { + return -1; + } + } + + N -= block_size; + src += block_size * src_stride; + dst += block_size * dst_stride; + } + return 0; +} + + +/* + * Initialize most of a cast-info structure, this step does not fetch the + * transferfunction and transferdata. + */ +static NPY_INLINE int +init_cast_info(_cast_info *cast_info, NPY_CASTING *casting, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int main_step) +{ + PyObject *meth = PyArray_GetCastingImpl( + NPY_DTYPE(src_dtype), NPY_DTYPE(dst_dtype)); + if (meth == NULL) { + return -1; + } + if (meth == Py_None) { + Py_DECREF(Py_None); + PyErr_Format(PyExc_TypeError, + "Cannot cast data from %S to %S.", src_dtype, dst_dtype); + return -1; + } + /* Initialize the context and related data */ + cast_info->context.caller = NULL; + cast_info->stransfer = NULL; + cast_info->auxdata = NULL; + + cast_info->context.method = (PyArrayMethodObject *)meth; + cast_info->context.descriptors = cast_info->descriptors; + + PyArray_DTypeMeta *dtypes[2] = {NPY_DTYPE(src_dtype), NPY_DTYPE(dst_dtype)}; + PyArray_Descr *in_descr[2] = {src_dtype, dst_dtype}; + + *casting = cast_info->context.method->resolve_descriptors( + cast_info->context.method, dtypes, in_descr, cast_info->descriptors); + if (NPY_UNLIKELY(*casting < 0)) { + if (!PyErr_Occurred()) { + PyErr_Format(PyExc_TypeError, + "Cannot cast data from %S to %S.", src_dtype, dst_dtype); + Py_DECREF(meth); + return -1; + } + } + + if (!main_step && NPY_UNLIKELY(src_dtype != cast_info->descriptors[0] || + dst_dtype != cast_info->descriptors[1])) { + /* + * We currently do not resolve recursively, but require a non + * main cast (within the same DType) to be done in a single step. + * This could be expanded at some point if the need arises. + */ + PyErr_Format(PyExc_RuntimeError, + "Required internal cast from %R to %R was not done in a single " + "step (a secondary cast must currently be between instances of " + "the same DType class and such a cast must currently return " + "the input descriptors unmodified).", + src_dtype, dst_dtype); + _cast_info_free(cast_info); + return -1; + } + + return 0; +} + + +/* + * Helper for PyArray_GetDTypeTransferFunction, which fetches a single + * transfer function from the each casting implementation (ArrayMethod). + * May set the transfer function to NULL when the cast can be achieved using + * a view. + * The `out_needs_api` flag must be initialized. + * + * NOTE: In theory casting errors here could be slightly misleading in case + * of a multi-step casting scenario. It should be possible to improve + * this in the future. + * + * Note about `move_references`: Move references means stealing of + * references. It is useful to clear buffers immediately. No matter the + * input all copies from a buffer must use `move_references`. Move references + * is thus used: + * * For the added initial "from" cast if it was passed in + * * Always in the main step if a "from" cast is made (it casts from a buffer) + * * Always for the "to" cast, as it always cast from a buffer to the output. + * + * Returns -1 on failure, 0 on success + */ +static int +get_transferfunction_for_descrs( + int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedUnaryOp **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api) +{ + *out_transferdata = NULL; /* ensure NULL on error */ + /* Storage for all cast info in case multi-step casting is necessary */ + _multistep_castdata castdata; + /* Initialize secondary `stransfer` to indicate whether they are used: */ + castdata.to.stransfer = NULL; + castdata.from.stransfer = NULL; + NPY_CASTING casting = -1; + int res = -1; + + if (init_cast_info(&castdata.main, &casting, src_dtype, dst_dtype, 1) < 0) { + return -1; + } + + /* + * Both input and output must be wrapped in case they may be unaligned + * and the method does not support unaligned data. + * NOTE: It is probable that most/all legacy loops actually do support + * unaligned output, we could move the wrapping there if we wanted + * to. It probably isn't speed relevant though and they should be + * deleted in any case. + */ + int must_wrap = (!aligned && + (castdata.main.context.method->flags & NPY_METH_SUPPORTS_UNALIGNED) == 0); + + /* + * Wrap the input with an additional cast if necessary. + */ + if (NPY_UNLIKELY(src_dtype != castdata.main.descriptors[0] || must_wrap)) { + NPY_CASTING from_casting = -1; + /* Cast function may not support the input, wrap if necessary */ + if (init_cast_info( + &castdata.from, &from_casting, + src_dtype, castdata.main.descriptors[0], 0) < 0) { + goto fail; + } + casting = PyArray_MinCastSafety(casting, from_casting); + + /* Prepare the actual cast (if necessary): */ + if (from_casting & _NPY_CAST_IS_VIEW && !must_wrap) { + /* This step is not necessary and can be skipped. */ + _cast_info_free(&castdata.from); + } + else { + /* Fetch the cast function and set up */ + PyArrayMethod_Context *context = &castdata.from.context; + npy_intp strides[2] = {src_stride, castdata.main.descriptors[0]->elsize}; + NPY_ARRAYMETHOD_FLAGS flags; + if (context->method->get_strided_loop( + context, aligned, move_references, strides, + &castdata.from.stransfer, &castdata.from.auxdata, &flags) < 0) { + assert(castdata.from.auxdata != NULL); + _cast_info_free(&castdata.from); + castdata.from.stransfer = NULL; /* ensure we cleanup once */ + goto fail; + } + assert(castdata.from.stransfer != NULL); + + *out_needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0; + /* The main cast now uses a buffered input: */ + src_stride = strides[1]; + move_references = 1; /* main cast has to clear the buffer */ + } + } + /* + * Wrap the output with an additional cast if necessary. + */ + if (NPY_UNLIKELY(dst_dtype != castdata.main.descriptors[1] || must_wrap)) { + NPY_CASTING to_casting = -1; + /* Cast function may not support the output, wrap if necessary */ + if (init_cast_info( + &castdata.to, &to_casting, + castdata.main.descriptors[1], dst_dtype, 0) < 0) { + goto fail; + } + casting = PyArray_MinCastSafety(casting, to_casting); + + /* Prepare the actual cast (if necessary): */ + if (to_casting & _NPY_CAST_IS_VIEW && !must_wrap) { + /* This step is not necessary and can be skipped. */ + _cast_info_free(&castdata.to); + } + else { + /* Fetch the cast function and set up */ + PyArrayMethod_Context *context = &castdata.to.context; + npy_intp strides[2] = {castdata.main.descriptors[1]->elsize, dst_stride}; + NPY_ARRAYMETHOD_FLAGS flags; + if (context->method->get_strided_loop( + context, aligned, 1 /* clear buffer */, strides, + &castdata.to.stransfer, &castdata.to.auxdata, &flags) < 0) { + assert(castdata.to.auxdata != NULL); + _cast_info_free(&castdata.to); + castdata.to.stransfer = NULL; /* ensure we cleanup once */ + goto fail; + } + assert(castdata.to.stransfer != NULL); + + *out_needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0; + /* The main cast now uses a buffered input: */ + dst_stride = strides[0]; + if (castdata.from.stransfer != NULL) { + /* Both input and output are wrapped, now always aligned */ + aligned = 1; + } + } + } + + /* Fetch the main cast function (with updated values) */ + PyArrayMethod_Context *context = &castdata.main.context; + npy_intp strides[2] = {src_stride, dst_stride}; + NPY_ARRAYMETHOD_FLAGS flags; + if (context->method->get_strided_loop( + context, aligned, move_references, strides, + &castdata.main.stransfer, &castdata.main.auxdata, &flags) < 0) { + goto fail; + } + + *out_needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0; + + if (castdata.from.stransfer == NULL && castdata.to.stransfer == NULL) { + /* The main step is sufficient to do the cast */ + *out_stransfer = castdata.main.stransfer; + *out_transferdata = castdata.main.auxdata; + castdata.main.auxdata = NULL; /* do not free the auxdata */ + _cast_info_free(&castdata.main); + return 0; + } + + /* Clone the castdata as it is currently not persistently stored. */ + *out_transferdata = _multistep_cast_auxdata_clone_int( + (NpyAuxData *)&castdata, 1); + if (*out_transferdata == NULL) { + PyErr_NoMemory(); + goto fail; + } + *out_stransfer = &_strided_to_strided_multistep_cast; + + res = 0; /* success */ + + fail: + _cast_info_free(&castdata.main); + if (castdata.from.stransfer != NULL) { + _cast_info_free(&castdata.from); + } + if (castdata.to.stransfer != NULL) { + _cast_info_free(&castdata.to); + } + return res; +} +#endif + + +NPY_NO_EXPORT int +PyArray_GetDTypeTransferFunction(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedUnaryOp **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api) +{ +#if NPY_USE_NEW_CASTINGIMPL + /* + * If one of the dtypes is NULL, we give back either a src decref + * function or a dst setzero function + * + * TODO: Eventually, we may wish to support user dtype with references + * (including and beyond bare `PyObject *` this may require extending + * the ArrayMethod API and those paths should likely be split out + * from this function.) + */ + if (dst_dtype == NULL) { + if (move_references) { + return get_decsrcref_transfer_function(aligned, + src_dtype->elsize, + src_dtype, + out_stransfer, out_transferdata, + out_needs_api); + } + else { + *out_stransfer = &_dec_src_ref_nop; + *out_transferdata = NULL; + return NPY_SUCCEED; + } + } + else if (src_dtype == NULL) { + return get_setdstzero_transfer_function(aligned, + dst_dtype->elsize, + dst_dtype, + out_stransfer, out_transferdata, + out_needs_api); + } + + if (get_transferfunction_for_descrs(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, move_references, + out_stransfer, out_transferdata, out_needs_api) < 0) { + return NPY_FAIL; + } + + return NPY_SUCCEED; + +#else + return PyArray_LegacyGetDTypeTransferFunction( + aligned, src_stride, dst_stride, src_dtype, dst_dtype, + move_references, out_stransfer, out_transferdata, out_needs_api); +#endif +} /* @@ -3763,7 +4522,9 @@ PyArray_GetDTypeTransferFunction(int aligned, * support. * It supports only wrapping the copyswapn functions and the legacy * cast functions registered with `PyArray_RegisterCastFunc`. - * This function takes the easy way out: It does not wrap + * This function takes the easy way out: It does not wrap, so if wrapping + * might be necessary due to unaligned data, the user has to ensure that + * this is done and aligned is passed in as True (this is asserted only). */ NPY_NO_EXPORT int PyArray_GetLegacyDTypeTransferFunction(int aligned, @@ -3772,7 +4533,7 @@ PyArray_GetLegacyDTypeTransferFunction(int aligned, int move_references, PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata, - int *out_needs_api) + int *out_needs_api, int wrap_if_unaligned) { /* Note: We ignore `needs_wrap`; needs-wrap is handled by another cast */ int needs_wrap = 0; @@ -3790,7 +4551,8 @@ PyArray_GetLegacyDTypeTransferFunction(int aligned, out_stransfer, out_transferdata); } - if (get_legacy_dtype_cast_function(aligned, + if (get_legacy_dtype_cast_function( + aligned, src_stride, dst_stride, src_dtype, dst_dtype, move_references, @@ -3798,9 +4560,39 @@ PyArray_GetLegacyDTypeTransferFunction(int aligned, out_transferdata, out_needs_api, &needs_wrap) != NPY_SUCCEED) { - return NPY_FAIL; + return -1; } - return NPY_SUCCEED; + if (!needs_wrap) { + return 0; + } + if (NPY_UNLIKELY(!wrap_if_unaligned)) { + /* + * Legacy casts do not support unaligned which requires wrapping. + * However, normally we ensure that wrapping happens before calling + * this function, so this path should never happen. + */ + PyErr_Format(PyExc_RuntimeError, + "Internal NumPy error, casting %S to %S required wrapping, " + "probably it incorrectly flagged support for unaligned data. " + "(aligned passed to discovery is %d)", + src_dtype, dst_dtype, aligned); + return -1; + } + + /* + * If we are here, use the legacy code to wrap the above cast (which + * does not support unaligned data) into copyswapn. + */ + NpyAuxData *castdata = *out_transferdata; + *out_transferdata = NULL; + if (wrap_aligned_contig_transfer_function_with_copyswapn( + aligned, src_stride, dst_stride, src_dtype, dst_dtype, + out_stransfer, out_transferdata, out_needs_api, + *out_stransfer, castdata) == NPY_FAIL) { + NPY_AUXDATA_FREE(castdata); + return -1; + } + return 0; } diff --git a/numpy/core/src/multiarray/dtype_transfer.h b/numpy/core/src/multiarray/dtype_transfer.h new file mode 100644 index 000000000000..c61119bfa294 --- /dev/null +++ b/numpy/core/src/multiarray/dtype_transfer.h @@ -0,0 +1,27 @@ +#ifndef _NPY_DTYPE_TRANSFER_H +#define _NPY_DTYPE_TRANSFER_H + +#include "lowlevel_strided_loops.h" +#include "array_method.h" + + +NPY_NO_EXPORT int +any_to_object_get_loop( + PyArrayMethod_Context *context, + int aligned, int move_references, + npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags); + +NPY_NO_EXPORT int +object_to_any_get_loop( + PyArrayMethod_Context *context, + int NPY_UNUSED(aligned), int move_references, + npy_intp *NPY_UNUSED(strides), + PyArray_StridedUnaryOp **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags); + + +#endif /* _NPY_DTYPE_TRANSFER_H */ diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index b8ebee6ed96b..04682d1ed7e3 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -19,6 +19,8 @@ #include "lowlevel_strided_loops.h" #include "array_assign.h" +#include "array_method.h" +#include "usertypes.h" /* diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c index 3eaf99196875..a1ed46f13a60 100644 --- a/numpy/core/src/multiarray/usertypes.c +++ b/numpy/core/src/multiarray/usertypes.c @@ -538,7 +538,7 @@ PyArray_AddLegacyWrapping_CastingImpl( if (from == to) { spec.flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_SUPPORTS_UNALIGNED; PyType_Slot slots[] = { - {NPY_METH_get_loop, NULL}, + {NPY_METH_get_loop, &legacy_cast_get_strided_loop}, {NPY_METH_resolve_descriptors, &legacy_same_dtype_resolve_descriptors}, {0, NULL}}; spec.slots = slots; @@ -547,7 +547,7 @@ PyArray_AddLegacyWrapping_CastingImpl( else { spec.flags = NPY_METH_REQUIRES_PYAPI; PyType_Slot slots[] = { - {NPY_METH_get_loop, NULL}, + {NPY_METH_get_loop, &legacy_cast_get_strided_loop}, {NPY_METH_resolve_descriptors, &simple_cast_resolve_descriptors}, {0, NULL}}; spec.slots = slots; diff --git a/numpy/core/tests/test_arraymethod.py b/numpy/core/tests/test_arraymethod.py new file mode 100644 index 000000000000..b1bc79b80a63 --- /dev/null +++ b/numpy/core/tests/test_arraymethod.py @@ -0,0 +1,58 @@ +""" +This file tests the generic aspects of ArrayMethod. At the time of writing +this is private API, but when added, public API may be added here. +""" + +import pytest + +import numpy as np +from numpy.core._multiarray_umath import _get_castingimpl as get_castingimpl + + +class TestResolveDescriptors: + # Test mainly error paths of the resolve_descriptors function, + # note that the `casting_unittests` tests exercise this non-error paths. + + # Casting implementations are the main/only current user: + method = get_castingimpl(type(np.dtype("d")), type(np.dtype("f"))) + + @pytest.mark.parametrize("args", [ + (True,), # Not a tuple. + ((None,)), # Too few elements + ((None, None, None),), # Too many + ((None, None),), # Input dtype is None, which is invalid. + ((np.dtype("d"), True),), # Output dtype is not a dtype + ((np.dtype("f"), None),), # Input dtype does not match method + ]) + def test_invalid_arguments(self, args): + with pytest.raises(TypeError): + self.method._resolve_descriptors(*args) + + +class TestSimpleStridedCall: + # Test mainly error paths of the resolve_descriptors function, + # note that the `casting_unittests` tests exercise this non-error paths. + + # Casting implementations are the main/only current user: + method = get_castingimpl(type(np.dtype("d")), type(np.dtype("f"))) + + @pytest.mark.parametrize(["args", "error"], [ + ((True,), TypeError), # Not a tuple + (((None,),), TypeError), # Too few elements + ((None, None), TypeError), # Inputs are not arrays. + (((None, None, None),), TypeError), # Too many + (((np.arange(3), np.arange(3)),), TypeError), # Incorrect dtypes + (((np.ones(3, dtype=">d"), np.ones(3, dtype="= dtype2.itemsize: + length = self.size // dtype1.itemsize + else: + length = self.size // dtype2.itemsize + + # Assume that the base array is well enough aligned for all inputs. + arr1 = np.empty(length, dtype=dtype1) + assert arr1.flags.c_contiguous + assert arr1.flags.aligned + + values = [random.randrange(-128, 128) for _ in range(length)] + + for i, value in enumerate(values): + # Use item assignment to ensure this is not using casting: + arr1[i] = value + + if dtype2 is None: + if dtype1.char == "?": + values = [bool(v) for v in values] + return arr1, values + + if dtype2.char == "?": + values = [bool(v) for v in values] + + arr2 = np.empty(length, dtype=dtype2) + assert arr2.flags.c_contiguous + assert arr2.flags.aligned + + for i, value in enumerate(values): + # Use item assignment to ensure this is not using casting: + arr2[i] = value + + return arr1, arr2, values + + def get_data_variation(self, arr1, arr2, aligned=True, contig=True): + """ + Returns a copy of arr1 that may be non-contiguous or unaligned, and a + matching array for arr2 (although not a copy). + """ + if contig: + stride1 = arr1.dtype.itemsize + stride2 = arr2.dtype.itemsize + elif aligned: + stride1 = 2 * arr1.dtype.itemsize + stride2 = 2 * arr2.dtype.itemsize + else: + stride1 = arr1.dtype.itemsize + 1 + stride2 = arr2.dtype.itemsize + 1 + + max_size1 = len(arr1) * 3 * arr1.dtype.itemsize + 1 + max_size2 = len(arr2) * 3 * arr2.dtype.itemsize + 1 + from_bytes = np.zeros(max_size1, dtype=np.uint8) + to_bytes = np.zeros(max_size2, dtype=np.uint8) + + # Sanity check that the above is large enough: + assert stride1 * len(arr1) <= from_bytes.nbytes + assert stride2 * len(arr2) <= to_bytes.nbytes + + if aligned: + new1 = as_strided(from_bytes[:-1].view(arr1.dtype), + arr1.shape, (stride1,)) + new2 = as_strided(to_bytes[:-1].view(arr2.dtype), + arr2.shape, (stride2,)) + else: + new1 = as_strided(from_bytes[1:].view(arr1.dtype), + arr1.shape, (stride1,)) + new2 = as_strided(to_bytes[1:].view(arr2.dtype), + arr2.shape, (stride2,)) + + new1[...] = arr1 + + if not contig: + # Ensure we did not overwrite bytes that should not be written: + offset = arr1.dtype.itemsize if aligned else 0 + buf = from_bytes[offset::stride1].tobytes() + assert buf.count(b"\0") == len(buf) + + if contig: + assert new1.flags.c_contiguous + assert new2.flags.c_contiguous + else: + assert not new1.flags.c_contiguous + assert not new2.flags.c_contiguous + + if aligned: + assert new1.flags.aligned + assert new2.flags.aligned + else: + assert not new1.flags.aligned or new1.dtype.alignment == 1 + assert not new2.flags.aligned or new2.dtype.alignment == 1 + + return new1, new2 + @pytest.mark.parametrize("from_Dt", simple_dtypes) def test_simple_cancast(self, from_Dt): for to_Dt in simple_dtypes: @@ -193,6 +291,183 @@ def test_simple_cancast(self, from_Dt): assert(to_dt is to_res) + @pytest.mark.filterwarnings("ignore::numpy.ComplexWarning") + @pytest.mark.parametrize("from_dt", simple_dtype_instances()) + def test_simple_direct_casts(self, from_dt): + """ + This test checks numeric direct casts for dtypes supported also by the + struct module (plus complex). It tries to be test a wide range of + inputs, but skips over possibly undefined behaviour (e.g. int rollover). + Longdouble and CLongdouble are tested, but only using double precision. + + If this test creates issues, it should possibly just be simplified + or even removed (checking whether unaligned/non-contiguous casts give + the same results is useful, though). + """ + for to_dt in simple_dtype_instances(): + to_dt = to_dt.values[0] + cast = get_castingimpl(type(from_dt), type(to_dt)) + + casting, (from_res, to_res) = cast._resolve_descriptors( + (from_dt, to_dt)) + + if from_res is not from_dt or to_res is not to_dt: + # Do not test this case, it is handled in multiple steps, + # each of which should is tested individually. + return + + safe = (casting & ~Casting.cast_is_view) <= Casting.safe + del from_res, to_res, casting + + arr1, arr2, values = self.get_data(from_dt, to_dt) + + cast._simple_strided_call((arr1, arr2)) + + # Check via python list + assert arr2.tolist() == values + + # Check that the same results are achieved for strided loops + arr1_o, arr2_o = self.get_data_variation(arr1, arr2, True, False) + cast._simple_strided_call((arr1_o, arr2_o)) + + assert_array_equal(arr2_o, arr2) + assert arr2_o.tobytes() == arr2.tobytes() + + # Check if alignment makes a difference, but only if supported + # and only if the alignment can be wrong + if ((from_dt.alignment == 1 and to_dt.alignment == 1) or + not cast._supports_unaligned): + return + + arr1_o, arr2_o = self.get_data_variation(arr1, arr2, False, True) + cast._simple_strided_call((arr1_o, arr2_o)) + + assert_array_equal(arr2_o, arr2) + assert arr2_o.tobytes() == arr2.tobytes() + + arr1_o, arr2_o = self.get_data_variation(arr1, arr2, False, False) + cast._simple_strided_call((arr1_o, arr2_o)) + + assert_array_equal(arr2_o, arr2) + assert arr2_o.tobytes() == arr2.tobytes() + + del arr1_o, arr2_o, cast + + @pytest.mark.parametrize("from_Dt", simple_dtypes) + def test_numeric_to_times(self, from_Dt): + # We currently only implement contiguous loops, so only need to + # test those. + from_dt = from_Dt() + + time_dtypes = [np.dtype("M8"), np.dtype("M8[ms]"), np.dtype("M8[4D]"), + np.dtype("m8"), np.dtype("m8[ms]"), np.dtype("m8[4D]")] + for time_dt in time_dtypes: + cast = get_castingimpl(type(from_dt), type(time_dt)) + + casting, (from_res, to_res) = cast._resolve_descriptors( + (from_dt, time_dt)) + + assert from_res is from_dt + assert to_res is time_dt + del from_res, to_res + + assert(casting & CAST_TABLE[from_Dt][type(time_dt)]) + + int64_dt = np.dtype(np.int64) + arr1, arr2, values = self.get_data(from_dt, int64_dt) + arr2 = arr2.view(time_dt) + arr2[...] = np.datetime64("NaT") + + if time_dt == np.dtype("M8"): + # This is a bit of a strange path, and could probably be removed + arr1[-1] = 0 # ensure at least one value is not NaT + + # The cast currently succeeds, but the values are invalid: + cast._simple_strided_call((arr1, arr2)) + with pytest.raises(ValueError): + str(arr2[-1]) # e.g. conversion to string fails + return + + cast._simple_strided_call((arr1, arr2)) + + assert [int(v) for v in arr2.tolist()] == values + + # Check that the same results are achieved for strided loops + arr1_o, arr2_o = self.get_data_variation(arr1, arr2, True, False) + cast._simple_strided_call((arr1_o, arr2_o)) + + assert_array_equal(arr2_o, arr2) + assert arr2_o.tobytes() == arr2.tobytes() + + @pytest.mark.parametrize( + ["from_dt", "to_dt", "expected_casting", "nom", "denom"], + [("M8[ns]", None, + Casting.no | Casting.cast_is_view, 1, 1), + (str(np.dtype("M8[ns]").newbyteorder()), None, Casting.equiv, 1, 1), + ("M8", "M8[ms]", Casting.safe | Casting.cast_is_view, 1, 1), + ("M8[ms]", "M8", Casting.unsafe, 1, 1), # should be invalid cast + ("M8[5ms]", "M8[5ms]", Casting.no | Casting.cast_is_view, 1, 1), + ("M8[ns]", "M8[ms]", Casting.same_kind, 1, 10**6), + ("M8[ms]", "M8[ns]", Casting.safe, 10**6, 1), + ("M8[ms]", "M8[7ms]", Casting.same_kind, 1, 7), + ("M8[4D]", "M8[1M]", Casting.same_kind, None, + # give full values based on NumPy 1.19.x + [-2**63, 0, -1, 1314, -1315, 564442610]), + ("m8[ns]", None, Casting.no | Casting.cast_is_view, 1, 1), + (str(np.dtype("m8[ns]").newbyteorder()), None, Casting.equiv, 1, 1), + ("m8", "m8[ms]", Casting.safe | Casting.cast_is_view, 1, 1), + ("m8[ms]", "m8", Casting.unsafe, 1, 1), # should be invalid cast + ("m8[5ms]", "m8[5ms]", Casting.no | Casting.cast_is_view, 1, 1), + ("m8[ns]", "m8[ms]", Casting.same_kind, 1, 10**6), + ("m8[ms]", "m8[ns]", Casting.safe, 10**6, 1), + ("m8[ms]", "m8[7ms]", Casting.same_kind, 1, 7), + ("m8[4D]", "m8[1M]", Casting.unsafe, None, + # give full values based on NumPy 1.19.x + [-2**63, 0, 0, 1314, -1315, 564442610])]) + def test_time_to_time(self, from_dt, to_dt, expected_casting, nom, denom): + from_dt = np.dtype(from_dt) + if to_dt is not None: + to_dt = np.dtype(to_dt) + + # Test a few values for casting (results generated with NumPy 1.19) + values = np.array([-2**63, 1, 2**63-1, 10000, -10000, 2**32]) + values = values.astype(np.dtype("int64").newbyteorder(from_dt.byteorder)) + assert values.dtype.byteorder == from_dt.byteorder + assert np.isnat(values.view(from_dt)[0]) + + DType = type(from_dt) + cast = get_castingimpl(DType, DType) + casting, (from_res, to_res) = cast._resolve_descriptors((from_dt, to_dt)) + assert from_res is from_dt + assert to_res is to_dt or to_dt is None + assert casting == expected_casting + + if nom is not None: + expected_out = (values * nom // denom).view(to_res) + expected_out[0] = "NaT" + else: + expected_out = np.empty_like(values) + expected_out[...] = denom + expected_out = expected_out.view(to_dt) + + orig_arr = values.view(from_dt) + orig_out = np.empty_like(expected_out) + + if casting == Casting.unsafe and (to_dt == "m8" or to_dt == "M8"): + # Casting from non-generic to generic units is an error and should + # probably be reported as an invalid cast earlier. + with pytest.raises(ValueError): + cast._simple_strided_call((orig_arr, orig_out)) + return + + for aligned in [True, True]: + for contig in [True, True]: + arr, out = self.get_data_variation( + orig_arr, orig_out, aligned, contig) + out[...] = 0 + cast._simple_strided_call((arr, out)) + assert_array_equal(out.view("int64"), expected_out.view("int64")) + def string_with_modified_length(self, dtype, change_length): fact = 1 if dtype.char == "S" else 4 length = dtype.itemsize // fact + change_length @@ -239,6 +514,67 @@ def test_string_cancast(self, other_DT, string_char): assert safety == Casting.unsafe assert other_dt is res_dt # returns the singleton for simple dtypes + @pytest.mark.parametrize("string_char", ["S", "U"]) + @pytest.mark.parametrize("other_dt", simple_dtype_instances()) + def test_simple_string_casts_roundtrip(self, other_dt, string_char): + """ + Tests casts from and to string by checking the roundtripping property. + + The test also covers some string to string casts (but not all). + + If this test creates issues, it should possibly just be simplified + or even removed (checking whether unaligned/non-contiguous casts give + the same results is useful, though). + """ + string_DT = type(np.dtype(string_char)) + + cast = get_castingimpl(type(other_dt), string_DT) + cast_back = get_castingimpl(string_DT, type(other_dt)) + _, (res_other_dt, string_dt) = cast._resolve_descriptors((other_dt, None)) + + if res_other_dt is not other_dt: + # do not support non-native byteorder, skip test in that case + assert other_dt.byteorder != res_other_dt.byteorder + return + + orig_arr, values = self.get_data(other_dt, None) + str_arr = np.zeros(len(orig_arr), dtype=string_dt) + string_dt_short = self.string_with_modified_length(string_dt, -1) + str_arr_short = np.zeros(len(orig_arr), dtype=string_dt_short) + string_dt_long = self.string_with_modified_length(string_dt, 1) + str_arr_long = np.zeros(len(orig_arr), dtype=string_dt_long) + + assert not cast._supports_unaligned # if support is added, should test + assert not cast_back._supports_unaligned + + for contig in [True, False]: + other_arr, str_arr = self.get_data_variation( + orig_arr, str_arr, True, contig) + _, str_arr_short = self.get_data_variation( + orig_arr, str_arr_short.copy(), True, contig) + _, str_arr_long = self.get_data_variation( + orig_arr, str_arr_long, True, contig) + + cast._simple_strided_call((other_arr, str_arr)) + + cast._simple_strided_call((other_arr, str_arr_short)) + assert_array_equal(str_arr.astype(string_dt_short), str_arr_short) + + cast._simple_strided_call((other_arr, str_arr_long)) + assert_array_equal(str_arr, str_arr_long) + + if other_dt.kind == "b": + # Booleans do not roundtrip + continue + + other_arr[...] = 0 + cast_back._simple_strided_call((str_arr, other_arr)) + assert_array_equal(orig_arr, other_arr) + + other_arr[...] = 0 + cast_back._simple_strided_call((str_arr_long, other_arr)) + assert_array_equal(orig_arr, other_arr) + @pytest.mark.parametrize("other_dt", ["S8", "U8"]) @pytest.mark.parametrize("string_char", ["S", "U"]) def test_string_to_string_cancast(self, other_dt, string_char): diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py index e10c7ad92db3..5e6472ae5536 100644 --- a/numpy/core/tests/test_nditer.py +++ b/numpy/core/tests/test_nditer.py @@ -1362,6 +1362,74 @@ def test_iter_copy(): j = i.copy() assert_equal([x[()] for x in j], a.ravel(order='F')) + +@pytest.mark.parametrize("dtype", np.typecodes["All"]) +@pytest.mark.parametrize("loop_dtype", np.typecodes["All"]) +def test_iter_copy_casts(dtype, loop_dtype): + # Ensure the dtype is never flexible: + if loop_dtype.lower() == "m": + loop_dtype = loop_dtype + "[ms]" + elif np.dtype(loop_dtype).itemsize == 0: + loop_dtype = loop_dtype + "50" + + # Make things a bit more interesting by requiring a byte-swap as well: + arr = np.ones(1000, dtype=np.dtype(dtype).newbyteorder()) + try: + expected = arr.astype(loop_dtype) + except Exception: + # Some casts are not possible, do not worry about them + return + + it = np.nditer((arr,), ["buffered", "external_loop", "refs_ok"], + op_dtypes=[loop_dtype], casting="unsafe") + + if np.issubdtype(np.dtype(loop_dtype), np.number): + # Casting to strings may be strange, but for simple dtypes do not rely + # on the cast being correct: + assert_array_equal(expected, np.ones(1000, dtype=loop_dtype)) + + it_copy = it.copy() + res = next(it) + del it + res_copy = next(it_copy) + del it_copy + + assert_array_equal(res, expected) + assert_array_equal(res_copy, expected) + + +def test_iter_copy_casts_structured(): + # Test a complicated structured dtype for casting, as it requires + # both multiple steps and a more complex casting setup. + # Includes a structured -> unstructured (any to object), and many other + # casts, which cause this to require all steps in the casting machinery + # one level down as well as the iterator copy (which uses NpyAuxData clone) + in_dtype = np.dtype([("a", np.dtype("i,")), + ("b", np.dtype(">i,d,S17,>d,(3)f,O,i1"))]) + out_dtype = np.dtype([("a", np.dtype("O")), + ("b", np.dtype(">i,>i,S17,>d,>U3,(3)d,i1,O"))]) + arr = np.ones(1000, dtype=in_dtype) + + it = np.nditer((arr,), ["buffered", "external_loop", "refs_ok"], + op_dtypes=[out_dtype], casting="unsafe") + it_copy = it.copy() + + res1 = next(it) + del it + res2 = next(it_copy) + del it_copy + + expected = arr["a"].astype(out_dtype["a"]) + assert_array_equal(res1["a"], expected) + assert_array_equal(res2["a"], expected) + + for field in in_dtype["b"].names: + # Note that the .base avoids the subarray field + expected = arr["b"][field].astype(out_dtype["b"][field].base) + assert_array_equal(res1["b"][field], expected) + assert_array_equal(res2["b"][field], expected) + + def test_iter_allocate_output_simple(): # Check that the iterator will properly allocate outputs