review comments

numpy · eendebakpt · Oct 6, 2024 · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024
commit 53a9ef386d6f00230316821d1412fb151e288cd9
diff --git a/numpy/_core/src/common/lowlevel_strided_loops.h b/numpy/_core/src/common/lowlevel_strided_loops.h
@@ -787,7 +787,7 @@ PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(PyArrayObject *arr1, PyArrayObject *arr
                    stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2); \
                }

-NPY_NO_EXPORT npy_bool nonzero_idxs_dispatcher(void * data, npy_intp* idxs, int dim, const npy_intp* shape,
+NPY_NO_EXPORT npy_bool nonzero_idxs_dispatcher(void const* data, npy_intp* idxs, int dim, const npy_intp* shape,
                        const npy_intp* strides, int dtype, npy_intp nonzero_count);



diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c
@@ -2801,7 +2801,9 @@ PyArray_CountNonzero(PyArrayObject *self)
    return nonzero_count;
 }

-static inline void nonzero_idxs_1D_bool(npy_intp count, npy_intp nonzero_count, char *data, npy_intp stride, npy_intp* multi_index)
+static inline void
+nonzero_idxs_1D_bool(npy_intp count, npy_intp nonzero_count, char *data,
+                     npy_intp stride, npy_intp* multi_index)
 {
    /*
    * use fast memchr variant for sparse data, see gh-4370
@@ -2910,14 +2912,11 @@ PyArray_Nonzero(PyArrayObject *self)
        goto finish;
    }

-    PyArrayObject* original_array = self;
-    if (PyArray_BASE(self) != NULL) {
-        original_array = (PyArrayObject*) PyArray_BASE(self);
-    }
-    int bytes_not_swapped = PyArray_ISNOTSWAPPED(self) && PyArray_ISNOTSWAPPED(original_array);
+    int bytes_not_swapped = PyArray_ISNOTSWAPPED(self);

    // do not add ndim=1, dtype->kind == 'b', since we have a separate fast path for it
-    int optimized_count = PyArray_TRIVIALLY_ITERABLE(self) && ! (ndim == 1 && dtype->kind=='b') && bytes_not_swapped;
+    int optimized_count = PyArray_TRIVIALLY_ITERABLE(self) &&
+            !(ndim == 1 && dtype->kind=='b') && bytes_not_swapped;
    if (optimized_count ) {
        npy_intp * multi_index = (npy_intp *)PyArray_DATA(ret);
        char * data = PyArray_BYTES(self);
@@ -2926,9 +2925,14 @@ PyArray_Nonzero(PyArrayObject *self)
        const npy_intp* M_strides = PyArray_STRIDES(self);
        int M_type_num = dtype->type_num;

+        NPY_BEGIN_THREADS_DEF;
+        if (!needs_api) {
+            NPY_BEGIN_THREADS_THRESHOLDED(PyArray_DIM(self, 0));
+        }
        bool executed = nonzero_idxs_dispatcher((void*)data, multi_index, M_dim,
                                        M_shape, M_strides, M_type_num, nonzero_count);

+        NPY_END_THREADS;
        if (executed) {
            added_count = nonzero_count;
            goto finish;
@@ -2942,7 +2946,6 @@ PyArray_Nonzero(PyArrayObject *self)
        npy_intp stride = PyArray_STRIDE(self, 0);
        npy_intp count = PyArray_DIM(self, 0);
        NPY_BEGIN_THREADS_DEF;
-
        if (!needs_api) {
            NPY_BEGIN_THREADS_THRESHOLDED(count);
        }

diff --git a/numpy/_core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/_core/src/multiarray/lowlevel_strided_loops.c.src
@@ -1887,7 +1887,7 @@ mapiter_@name@(
 * #name = bool, u8, i8, u16, i16, u32, i32, u64, i64, f32, f64#
 */

-static inline void nonzero_idxs_trivial_@name@(@dtype@ * data, npy_intp* idxs, const npy_intp* shape, const npy_intp* strides, npy_intp nonzero_count) {
+static inline void nonzero_idxs_trivial_@name@(@dtype@ const* data, npy_intp* idxs, const npy_intp* shape, const npy_intp* strides, npy_intp nonzero_count) {
    npy_intp stride = strides[0];
    npy_intp added_count = 0;
    npy_intp a = 0;
@@ -1903,7 +1903,7 @@ static inline void nonzero_idxs_trivial_@name@(@dtype@ * data, npy_intp* idxs, c
    }
 }

-static inline void nonzero_idxs_2D_@name@(@dtype@ * data, npy_intp* idxs, const npy_intp* shape, const npy_intp* strides, npy_intp nonzero_count) { 
+static inline void nonzero_idxs_2D_@name@(@dtype@ const* data, npy_intp* idxs, const npy_intp* shape, const npy_intp* strides, npy_intp nonzero_count) {
    npy_intp idxs_stride = 2; 
    npy_intp size_1 = shape[1]; 
    npy_intp stride_1 = strides[1]; 
@@ -1932,9 +1932,8 @@ static inline void nonzero_idxs_2D_@name@(@dtype@ * data, npy_intp* idxs, const

 /**end repeat**/

-npy_bool nonzero_idxs_dispatcher(void * data, npy_intp* idxs, int ndim, const npy_intp* shape, const npy_intp* strides, int dtype, npy_intp nonzero_count)
+npy_bool nonzero_idxs_dispatcher(void const * data, npy_intp* idxs, int ndim, const npy_intp* shape, const npy_intp* strides, int dtype, npy_intp nonzero_count)
 { 
-
    if (ndim==1) {
        switch(dtype) {
            /**begin repeat
@@ -1943,7 +1942,6 @@ npy_bool nonzero_idxs_dispatcher(void * data, npy_intp* idxs, int ndim, const np
            * #dtypeID = NPY_BOOL, NPY_UINT8, NPY_INT8, NPY_UINT16, NPY_INT16, NPY_UINT32, NPY_INT32, NPY_UINT64, NPY_INT64, NPY_FLOAT32, NPY_FLOAT64#
            * #name = bool, u8, i8, u16, i16, u32, i32, u64, i64, f32, f64#
            */
-
            case @dtypeID@:
            {
                @dtype@ * data_ptr = (@dtype@ *) data;
@@ -1962,7 +1960,6 @@ npy_bool nonzero_idxs_dispatcher(void * data, npy_intp* idxs, int ndim, const np
            * #dtypeID = NPY_BOOL, NPY_UINT8, NPY_INT8, NPY_UINT16, NPY_INT16, NPY_UINT32, NPY_INT32, NPY_UINT64, NPY_INT64, NPY_FLOAT32, NPY_FLOAT64#
            * #name = bool, u8, i8, u16, i16, u32, i32, u64, i64, f32, f64#
            */
-
            case @dtypeID@:
            {
                @dtype@ * data_ptr = (@dtype@ *) data;

diff --git a/numpy/_core/tests/test_numeric.py b/numpy/_core/tests/test_numeric.py
@@ -1956,6 +1956,17 @@ def __bool__(self):
        a = np.array([[ThrowsAfter(15)]] * 10)
        assert_raises(ValueError, np.nonzero, a)

+    def test_nonzero_byteorder(self):
+        values = [np.array([0., -0., 1, float('nan')]), np.array([0, 1]),
+                  np.array([0, 12.3], dtype=np.float16)]
+        expected = [[2, 3], [1], [1]]
+
+        for A, expected in zip(values, expected):
+            A_byteswapped = (A.view(A.dtype.newbyteorder()).byteswap()).copy()
+
+            assert_equal(np.nonzero(A)[0], expected)
+            assert_equal(np.nonzero(A_byteswapped)[0], expected)
+

 class TestIndex:
    def test_boolean(self):