Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

BUG: introduce PyArray_SafeCast to fix issues around stringdtype views #26147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 2 numpy/_core/_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def _view_is_safe(oldtype, newtype):
return

if newtype.hasobject or oldtype.hasobject:
raise TypeError("Cannot change data-type for object array.")
raise TypeError("Cannot change data-type for array of references.")
return


Expand Down
36 changes: 34 additions & 2 deletions 36 numpy/_core/src/multiarray/convert_datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -465,14 +465,17 @@ _get_cast_safety_from_castingimpl(PyArrayMethodObject *castingimpl,
/*
* Check for less harmful non-standard returns. The following two returns
* should never happen:
* 1. No-casting must imply a view offset of 0.
* 1. No-casting must imply a view offset of 0 unless the DType
defines a finalization function, which implies it stores data
on the descriptor
* 2. Equivalent-casting + 0 view offset is (usually) the definition
* of a "no" cast. However, changing the order of fields can also
* create descriptors that are not equivalent but views.
* Note that unsafe casts can have a view offset. For example, in
* principle, casting `<i8` to `<i4` is a cast with 0 offset.
*/
if (*view_offset != 0) {
if ((*view_offset != 0 &&
NPY_DT_SLOTS(NPY_DTYPE(from))->finalize_descr == NULL)) {
assert(casting != NPY_NO_CASTING);
}
else {
Expand Down Expand Up @@ -648,6 +651,35 @@ PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to)
}


/*
* This function returns true if the two types can be safely cast at
* *minimum_safety* casting level. Sets the *view_offset* if that is set
* for the cast. If ignore_error is set, the error indicator is cleared
* if there are any errors in cast setup and returns false, otherwise
* the error indicator is left set and returns -1.
*/
NPY_NO_EXPORT npy_intp
PyArray_SafeCast(PyArray_Descr *type1, PyArray_Descr *type2,
npy_intp* view_offset, NPY_CASTING minimum_safety,
npy_intp ignore_error)
{
if (type1 == type2) {
*view_offset = 0;
return 1;
}

NPY_CASTING safety = PyArray_GetCastInfo(type1, type2, NULL, view_offset);
if (safety < 0) {
if (ignore_error) {
PyErr_Clear();
return 0;
}
return -1;
}
return PyArray_MinCastSafety(safety, minimum_safety) == minimum_safety;
}


/* Provides an ordering for the dtype 'kind' character codes */
NPY_NO_EXPORT int
dtype_kind_to_ordering(char kind)
Expand Down
5 changes: 5 additions & 0 deletions 5 numpy/_core/src/multiarray/convert_datatype.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ PyArray_GetCastInfo(
PyArray_Descr *from, PyArray_Descr *to, PyArray_DTypeMeta *to_dtype,
npy_intp *view_offset);

NPY_NO_EXPORT npy_intp
PyArray_SafeCast(PyArray_Descr *type1, PyArray_Descr *type2,
npy_intp* view_offset, NPY_CASTING minimum_safety,
npy_intp ignore_errors);

NPY_NO_EXPORT int
PyArray_CheckCastSafety(NPY_CASTING casting,
PyArray_Descr *from, PyArray_Descr *to, PyArray_DTypeMeta *to_dtype);
Expand Down
15 changes: 11 additions & 4 deletions 15 numpy/_core/src/multiarray/ctors.c
Original file line number Diff line number Diff line change
Expand Up @@ -1908,8 +1908,10 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
}

arrflags = PyArray_FLAGS(arr);
/* If a guaranteed copy was requested */
copy = (flags & NPY_ARRAY_ENSURECOPY) ||


copy = /* If a guaranteed copy was requested */
(flags & NPY_ARRAY_ENSURECOPY) ||
/* If C contiguous was requested, and arr is not */
((flags & NPY_ARRAY_C_CONTIGUOUS) &&
(!(arrflags & NPY_ARRAY_C_CONTIGUOUS))) ||
Expand All @@ -1921,8 +1923,13 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
(!(arrflags & NPY_ARRAY_F_CONTIGUOUS))) ||
/* If a writeable array was requested, and arr is not */
((flags & NPY_ARRAY_WRITEABLE) &&
(!(arrflags & NPY_ARRAY_WRITEABLE))) ||
!PyArray_EquivTypes(oldtype, newtype);
(!(arrflags & NPY_ARRAY_WRITEABLE)));

if (!copy) {
npy_intp view_offset;
npy_intp is_safe = PyArray_SafeCast(oldtype, newtype, &view_offset, NPY_NO_CASTING, 1);
copy = !(is_safe && (view_offset != NPY_MIN_INTP));
}

if (copy) {
if (flags & NPY_ARRAY_ENSURENOCOPY ) {
Expand Down
22 changes: 4 additions & 18 deletions 22 numpy/_core/src/multiarray/multiarraymodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1449,23 +1449,6 @@ PyArray_EquivTypes(PyArray_Descr *type1, PyArray_Descr *type2)
return 1;
}

if (Py_TYPE(Py_TYPE(type1)) == &PyType_Type) {
/*
* 2021-12-17: This case is nonsense and should be removed eventually!
*
* boost::python has/had a bug effectively using EquivTypes with
* `type(arbitrary_obj)`. That is clearly wrong as that cannot be a
* `PyArray_Descr *`. We assume that `type(type(type(arbitrary_obj))`
* is always in practice `type` (this is the type of the metaclass),
* but for our descriptors, `type(type(descr))` is DTypeMeta.
*
* In that case, we just return False. There is a possibility that
* this actually _worked_ effectively (returning 1 sometimes).
* We ignore that possibility for simplicity; it really is not our bug.
*/
return 0;
}

/*
* Do not use PyArray_CanCastTypeTo because it supports legacy flexible
* dtypes as input.
Expand Down Expand Up @@ -1609,7 +1592,10 @@ _array_fromobject_generic(

/* One more chance for faster exit if user specified the dtype. */
oldtype = PyArray_DESCR(oparr);
if (PyArray_EquivTypes(oldtype, dtype)) {
npy_intp view_offset;
npy_intp is_safe = PyArray_SafeCast(oldtype, dtype, &view_offset, NPY_NO_CASTING, 1);
npy_intp view_safe = (is_safe && (view_offset != NPY_MIN_INTP));
if (view_safe) {
if (copy != NPY_COPY_ALWAYS && STRIDING_OK(oparr, order)) {
if (oldtype == dtype) {
Py_INCREF(op);
Expand Down
5 changes: 4 additions & 1 deletion 5 numpy/_core/src/multiarray/stringdtype/casts.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ string_to_string_resolve_descriptors(PyObject *NPY_UNUSED(self),
return NPY_UNSAFE_CASTING;
}

*view_offset = 0;
// views are only legal between descriptors that share allocators (e.g. the same object)
if (descr0->allocator == descr1->allocator) {
*view_offset = 0;
};

return NPY_NO_CASTING;
}
Expand Down
10 changes: 5 additions & 5 deletions 10 numpy/_core/src/umath/ufunc_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -789,9 +789,9 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl,

if (dtypes[i] != PyArray_DESCR(op[i])) {
npy_intp view_offset;
NPY_CASTING safety = PyArray_GetCastInfo(
PyArray_DESCR(op[i]), dtypes[i], NULL, &view_offset);
if (safety < 0 && PyErr_Occurred()) {
npy_intp is_safe = PyArray_SafeCast(
PyArray_DESCR(op[i]), dtypes[i], &view_offset, casting, 0);
if (is_safe < 0 && PyErr_Occurred()) {
/* A proper error during a cast check, should be rare */
return -1;
}
Expand All @@ -806,8 +806,8 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl,
* can force cast to bool)
*/
}
else if (PyArray_MinCastSafety(safety, casting) != casting) {
return 0; /* the cast is not safe enough */
else if (is_safe != 1) {
mhvk marked this conversation as resolved.
Show resolved Hide resolved
return 0; /* there was a cast error or cast is not safe enough */
}
}
if (must_copy) {
Expand Down
23 changes: 23 additions & 0 deletions 23 numpy/_core/tests/test_stringdtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,29 @@ def test_creation_functions():
assert np.empty(3, dtype="T")[0] == ""


def test_create_with_copy_none(string_list):
arr = np.array(string_list, dtype=StringDType())
# create another stringdtype array with an arena that has a different
# in-memory layout than the first array
arr_rev = np.array(string_list[::-1], dtype=StringDType())
ngoldbaum marked this conversation as resolved.
Show resolved Hide resolved

# this should create a copy and the resulting array
# shouldn't share an allocator or arena with arr_rev, despite
# explicitly passing arr_rev.dtype
arr_copy = np.array(arr, copy=None, dtype=arr_rev.dtype)
np.testing.assert_array_equal(arr, arr_copy)
assert arr_copy.base is None

with pytest.raises(ValueError, match="Unable to avoid copy"):
np.array(arr, copy=False, dtype=arr_rev.dtype)

# because we're using arr's dtype instance, the view is safe
arr_view = np.array(arr, copy=None, dtype=arr.dtype)
np.testing.assert_array_equal(arr, arr)
np.testing.assert_array_equal(arr_view[::-1], arr_rev)
assert arr_view is arr


@pytest.mark.parametrize(
"strings",
[
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.