diff --git a/numpy/_core/_internal.py b/numpy/_core/_internal.py index 8d6dc04851b5..058e93644dec 100644 --- a/numpy/_core/_internal.py +++ b/numpy/_core/_internal.py @@ -560,7 +560,7 @@ def _view_is_safe(oldtype, newtype): return if newtype.hasobject or oldtype.hasobject: - raise TypeError("Cannot change data-type for object array.") + raise TypeError("Cannot change data-type for array of references.") return diff --git a/numpy/_core/src/multiarray/convert_datatype.c b/numpy/_core/src/multiarray/convert_datatype.c index 92e6964fc21f..9765a4041e06 100644 --- a/numpy/_core/src/multiarray/convert_datatype.c +++ b/numpy/_core/src/multiarray/convert_datatype.c @@ -465,14 +465,17 @@ _get_cast_safety_from_castingimpl(PyArrayMethodObject *castingimpl, /* * Check for less harmful non-standard returns. The following two returns * should never happen: - * 1. No-casting must imply a view offset of 0. + * 1. No-casting must imply a view offset of 0 unless the DType + defines a finalization function, which implies it stores data + on the descriptor * 2. Equivalent-casting + 0 view offset is (usually) the definition * of a "no" cast. However, changing the order of fields can also * create descriptors that are not equivalent but views. * Note that unsafe casts can have a view offset. For example, in * principle, casting `finalize_descr == NULL)) { assert(casting != NPY_NO_CASTING); } else { @@ -648,6 +651,35 @@ PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to) } +/* + * This function returns true if the two types can be safely cast at + * *minimum_safety* casting level. Sets the *view_offset* if that is set + * for the cast. If ignore_error is set, the error indicator is cleared + * if there are any errors in cast setup and returns false, otherwise + * the error indicator is left set and returns -1. + */ +NPY_NO_EXPORT npy_intp +PyArray_SafeCast(PyArray_Descr *type1, PyArray_Descr *type2, + npy_intp* view_offset, NPY_CASTING minimum_safety, + npy_intp ignore_error) +{ + if (type1 == type2) { + *view_offset = 0; + return 1; + } + + NPY_CASTING safety = PyArray_GetCastInfo(type1, type2, NULL, view_offset); + if (safety < 0) { + if (ignore_error) { + PyErr_Clear(); + return 0; + } + return -1; + } + return PyArray_MinCastSafety(safety, minimum_safety) == minimum_safety; +} + + /* Provides an ordering for the dtype 'kind' character codes */ NPY_NO_EXPORT int dtype_kind_to_ordering(char kind) diff --git a/numpy/_core/src/multiarray/convert_datatype.h b/numpy/_core/src/multiarray/convert_datatype.h index 8dccbab6f8c0..d1493e6997bd 100644 --- a/numpy/_core/src/multiarray/convert_datatype.h +++ b/numpy/_core/src/multiarray/convert_datatype.h @@ -102,6 +102,11 @@ PyArray_GetCastInfo( PyArray_Descr *from, PyArray_Descr *to, PyArray_DTypeMeta *to_dtype, npy_intp *view_offset); +NPY_NO_EXPORT npy_intp +PyArray_SafeCast(PyArray_Descr *type1, PyArray_Descr *type2, + npy_intp* view_offset, NPY_CASTING minimum_safety, + npy_intp ignore_errors); + NPY_NO_EXPORT int PyArray_CheckCastSafety(NPY_CASTING casting, PyArray_Descr *from, PyArray_Descr *to, PyArray_DTypeMeta *to_dtype); diff --git a/numpy/_core/src/multiarray/ctors.c b/numpy/_core/src/multiarray/ctors.c index 4c9d76991296..836ed493e1e6 100644 --- a/numpy/_core/src/multiarray/ctors.c +++ b/numpy/_core/src/multiarray/ctors.c @@ -1908,8 +1908,10 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) } arrflags = PyArray_FLAGS(arr); - /* If a guaranteed copy was requested */ - copy = (flags & NPY_ARRAY_ENSURECOPY) || + + + copy = /* If a guaranteed copy was requested */ + (flags & NPY_ARRAY_ENSURECOPY) || /* If C contiguous was requested, and arr is not */ ((flags & NPY_ARRAY_C_CONTIGUOUS) && (!(arrflags & NPY_ARRAY_C_CONTIGUOUS))) || @@ -1921,8 +1923,13 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) (!(arrflags & NPY_ARRAY_F_CONTIGUOUS))) || /* If a writeable array was requested, and arr is not */ ((flags & NPY_ARRAY_WRITEABLE) && - (!(arrflags & NPY_ARRAY_WRITEABLE))) || - !PyArray_EquivTypes(oldtype, newtype); + (!(arrflags & NPY_ARRAY_WRITEABLE))); + + if (!copy) { + npy_intp view_offset; + npy_intp is_safe = PyArray_SafeCast(oldtype, newtype, &view_offset, NPY_NO_CASTING, 1); + copy = !(is_safe && (view_offset != NPY_MIN_INTP)); + } if (copy) { if (flags & NPY_ARRAY_ENSURENOCOPY ) { diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c index 3c153adb83a8..b8b777697fe3 100644 --- a/numpy/_core/src/multiarray/multiarraymodule.c +++ b/numpy/_core/src/multiarray/multiarraymodule.c @@ -1449,23 +1449,6 @@ PyArray_EquivTypes(PyArray_Descr *type1, PyArray_Descr *type2) return 1; } - if (Py_TYPE(Py_TYPE(type1)) == &PyType_Type) { - /* - * 2021-12-17: This case is nonsense and should be removed eventually! - * - * boost::python has/had a bug effectively using EquivTypes with - * `type(arbitrary_obj)`. That is clearly wrong as that cannot be a - * `PyArray_Descr *`. We assume that `type(type(type(arbitrary_obj))` - * is always in practice `type` (this is the type of the metaclass), - * but for our descriptors, `type(type(descr))` is DTypeMeta. - * - * In that case, we just return False. There is a possibility that - * this actually _worked_ effectively (returning 1 sometimes). - * We ignore that possibility for simplicity; it really is not our bug. - */ - return 0; - } - /* * Do not use PyArray_CanCastTypeTo because it supports legacy flexible * dtypes as input. @@ -1609,7 +1592,10 @@ _array_fromobject_generic( /* One more chance for faster exit if user specified the dtype. */ oldtype = PyArray_DESCR(oparr); - if (PyArray_EquivTypes(oldtype, dtype)) { + npy_intp view_offset; + npy_intp is_safe = PyArray_SafeCast(oldtype, dtype, &view_offset, NPY_NO_CASTING, 1); + npy_intp view_safe = (is_safe && (view_offset != NPY_MIN_INTP)); + if (view_safe) { if (copy != NPY_COPY_ALWAYS && STRIDING_OK(oparr, order)) { if (oldtype == dtype) { Py_INCREF(op); diff --git a/numpy/_core/src/multiarray/stringdtype/casts.c b/numpy/_core/src/multiarray/stringdtype/casts.c index e3e7c5fde872..b896d09ace65 100644 --- a/numpy/_core/src/multiarray/stringdtype/casts.c +++ b/numpy/_core/src/multiarray/stringdtype/casts.c @@ -79,7 +79,10 @@ string_to_string_resolve_descriptors(PyObject *NPY_UNUSED(self), return NPY_UNSAFE_CASTING; } - *view_offset = 0; + // views are only legal between descriptors that share allocators (e.g. the same object) + if (descr0->allocator == descr1->allocator) { + *view_offset = 0; + }; return NPY_NO_CASTING; } diff --git a/numpy/_core/src/umath/ufunc_object.c b/numpy/_core/src/umath/ufunc_object.c index 63ef0d9080ca..d1f4eb30b5a8 100644 --- a/numpy/_core/src/umath/ufunc_object.c +++ b/numpy/_core/src/umath/ufunc_object.c @@ -789,9 +789,9 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl, if (dtypes[i] != PyArray_DESCR(op[i])) { npy_intp view_offset; - NPY_CASTING safety = PyArray_GetCastInfo( - PyArray_DESCR(op[i]), dtypes[i], NULL, &view_offset); - if (safety < 0 && PyErr_Occurred()) { + npy_intp is_safe = PyArray_SafeCast( + PyArray_DESCR(op[i]), dtypes[i], &view_offset, casting, 0); + if (is_safe < 0 && PyErr_Occurred()) { /* A proper error during a cast check, should be rare */ return -1; } @@ -806,8 +806,8 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl, * can force cast to bool) */ } - else if (PyArray_MinCastSafety(safety, casting) != casting) { - return 0; /* the cast is not safe enough */ + else if (is_safe != 1) { + return 0; /* there was a cast error or cast is not safe enough */ } } if (must_copy) { diff --git a/numpy/_core/tests/test_stringdtype.py b/numpy/_core/tests/test_stringdtype.py index e38575dbc0dd..cc11ad237d47 100644 --- a/numpy/_core/tests/test_stringdtype.py +++ b/numpy/_core/tests/test_stringdtype.py @@ -458,6 +458,29 @@ def test_creation_functions(): assert np.empty(3, dtype="T")[0] == "" +def test_create_with_copy_none(string_list): + arr = np.array(string_list, dtype=StringDType()) + # create another stringdtype array with an arena that has a different + # in-memory layout than the first array + arr_rev = np.array(string_list[::-1], dtype=StringDType()) + + # this should create a copy and the resulting array + # shouldn't share an allocator or arena with arr_rev, despite + # explicitly passing arr_rev.dtype + arr_copy = np.array(arr, copy=None, dtype=arr_rev.dtype) + np.testing.assert_array_equal(arr, arr_copy) + assert arr_copy.base is None + + with pytest.raises(ValueError, match="Unable to avoid copy"): + np.array(arr, copy=False, dtype=arr_rev.dtype) + + # because we're using arr's dtype instance, the view is safe + arr_view = np.array(arr, copy=None, dtype=arr.dtype) + np.testing.assert_array_equal(arr, arr) + np.testing.assert_array_equal(arr_view[::-1], arr_rev) + assert arr_view is arr + + @pytest.mark.parametrize( "strings", [