numpy · seberg · Jul 22, 2024 · Jul 10, 2024 · Jul 15, 2024 · Jul 15, 2024
diff --git a/numpy/_core/include/numpy/ufuncobject.h b/numpy/_core/include/numpy/ufuncobject.h
@@ -65,6 +65,9 @@ typedef int (PyUFunc_TypeResolutionFunc)(
                                PyObject *type_tup,
                                PyArray_Descr **out_dtypes);

+typedef int (PyUFunc_ProcessCoreDimsFunc)(
+                                struct _tagPyUFuncObject *ufunc,
+                                npy_intp *core_dim_sizes);

 typedef struct _tagPyUFuncObject {
        PyObject_HEAD
@@ -191,6 +194,10 @@ typedef struct _tagPyUFuncObject {
        /* A PyListObject of `(tuple of DTypes, ArrayMethod/Promoter)` */
        PyObject *_loops;
    #endif
+    #if NPY_FEATURE_VERSION >= NPY_2_1_API_VERSION
+        /* User defined function to process core dimensions of a gufunc. */
+        PyUFunc_ProcessCoreDimsFunc *process_core_dims_func;
+    #endif
 } PyUFuncObject;

 #include "arrayobject.h"

diff --git a/numpy/_core/src/umath/_umath_tests.c.src b/numpy/_core/src/umath/_umath_tests.c.src
@@ -761,6 +761,205 @@ add_INT32_negative_indexed(PyObject *module, PyObject *dict) {
    return 0;
 }

+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// Define the gufunc 'conv1d_full'
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define MAX(a, b) (((a) < (b)) ? (b) : (a))
+
+int conv1d_full_process_core_dims(PyUFuncObject *ufunc,
+                                  npy_intp *core_dim_sizes)
+{
+    //
+    // core_dim_sizes will hold the core dimensions [m, n, p].
+    // p will be -1 if the caller did not provide the out argument.
+    //
+    npy_intp m = core_dim_sizes[0];
+    npy_intp n = core_dim_sizes[1];
+    npy_intp p = core_dim_sizes[2];
+    npy_intp required_p = m + n - 1;
+
+    if (p == -1) {
+        core_dim_sizes[2] = required_p;
+        return 0;
+    }
+    if (p != required_p) {
+        PyErr_Format(PyExc_ValueError,
+                "conv1d_full: the core dimension p of the out parameter "
+                "does not equal m + n - 1, where m and n are the core "
+                "dimensions of the inputs x and y; got m=%zd and n=%zd so "
+                "p must be %zd, but got p=%zd.",
+                m, n, required_p, p);
+        return -1;
+    }
+    return 0;
+}
+
+static void
+conv1d_full_double_loop(char **args,
+                        npy_intp const *dimensions,
+                        npy_intp const *steps,
+                        void *NPY_UNUSED(func))
+{
+    // Input and output arrays
+    char *p_x = args[0];
+    char *p_y = args[1];
+    char *p_out = args[2];
+    // Number of loops of pdist calculations to execute.
+    npy_intp nloops = dimensions[0];
+    // Core dimensions
+    npy_intp m = dimensions[1];
+    npy_intp n = dimensions[2];
+    npy_intp p = dimensions[3]; // Must be m + n - 1.
+    // Core strides
+    npy_intp x_stride = steps[0];
+    npy_intp y_stride = steps[1];
+    npy_intp out_stride = steps[2];
+    // Inner strides
+    npy_intp x_inner_stride = steps[3];
+    npy_intp y_inner_stride = steps[4];
+    npy_intp out_inner_stride = steps[5];
+
+    for (npy_intp loop = 0; loop < nloops; ++loop, p_x += x_stride,
+                                                   p_y += y_stride,
+                                                   p_out += out_stride) {
+        // Basic implementation of 1d convolution
+        for (npy_intp k = 0; k < p; ++k) {
+            double sum = 0.0;
+            for (npy_intp i = MAX(0, k - n + 1); i < MIN(m, k + 1); ++i) {
+                double x_i = *(double *)(p_x + i*x_inner_stride);
+                double y_k_minus_i = *(double *)(p_y + (k - i)*y_inner_stride);
+                sum +=  x_i * y_k_minus_i;
+            }
+            *(double *)(p_out + k*out_inner_stride) = sum;
+        }
+    }
+}
+
+static PyUFuncGenericFunction conv1d_full_functions[] = {
+    (PyUFuncGenericFunction) &conv1d_full_double_loop
+};
+static void *const conv1d_full_data[] = {NULL};
+static const char conv1d_full_typecodes[] = {NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE};
+
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// Define the gufunc 'wl1_pdist' (weighted l1 pairwise distances)
+//
+// 'wl1_pdist' has shape signature (m,n?),(n?)->(p), where p must
+// be m*(m-1)/2.  The second input parameter provides weights for
+// the components.
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+int wl1_pdist_process_core_dims(PyUFuncObject *ufunc,
+                                npy_intp *core_dim_sizes)
+{
+    npy_intp m_even, m_odd;
+    npy_intp required_p;
+
+    //
+    // core_dim_sizes will hold the core dimensions [m, n, p].
+    // p will be -1 if the caller did not provide the out argument.
+    //
+    npy_intp m = core_dim_sizes[0];
+    npy_intp p = core_dim_sizes[2];
+    if (m == 0) {
+        PyErr_SetString(PyExc_ValueError,
+                "pdist: the core dimension m of the input parameter "
+                "x must be at least 1; got 0.");
+        return -1;
+    }
+
+    // Identify the even and odd factors of the expression m*(m - 1).
+    npy_intp r = m % 2;
+    m_even = m - r;
+    m_odd = m - (1 - r);
+
+    if (m_even/2 > NPY_MAX_INTP/m_odd) {
+        PyErr_Format(PyExc_ValueError,
+                "wl1_pdist: the core dimension m=%zd of the input parameter "
+                "x is too big; the calculation of the output size "
+                "p = m*(m - 1)/2 results in integer overflow.", m);
+        return -1;
+    }
+
+    required_p = m_odd*(m_even/2);
+
+    if (p == -1) {
+        core_dim_sizes[2] = required_p;
+        return 0;
+    }
+    if (p != required_p) {
+        PyErr_Format(PyExc_ValueError,
+                "wl1_pdist: the core dimension p of the out parameter "
+                "does not equal m*(m - 1)/2, where m is the first core "
+                "dimension of the input x; got m=%zd, so p must be %zd, "
+                "but got p=%zd).",
+                m, required_p, p);
+        return -1;
+    }
+    return 0;
+}
+
+static void
+wl1_pdist_double_loop(char **args,
+                      npy_intp const *dimensions,
+                      npy_intp const *steps,
+                      void *NPY_UNUSED(func))
+{
+    // Input and output arrays
+    char *p_x = args[0];
+    char *p_w = args[1];
+    char *p_out = args[2];
+    // Number of loops of pdist calculations to execute.
+    npy_intp nloops = dimensions[0];
+    // Core dimensions
+    npy_intp m = dimensions[1];
+    npy_intp n = dimensions[2];
+    // npy_intp p = dimensions[3]; // Must be m*(m-1)/2; unused here.
+    // Core strides
+    npy_intp x_stride = steps[0];
+    npy_intp w_stride = steps[1];
+    npy_intp out_stride = steps[2];
+    // x array strides
+    npy_intp x_row_stride = steps[3];
+    npy_intp x_col_stride = steps[4];
+    // w array strides
+    npy_intp w_inner_stride = steps[5];
+    // out array strides
+    npy_intp out_inner_stride = steps[6];
+
+    for (npy_intp loop = 0; loop < nloops; ++loop, p_x += x_stride,
+                                                   p_w += w_stride,
+                                                   p_out += out_stride) {
+        npy_intp k_out = 0;
+        for (npy_intp i = 0; i < m - 1; ++i) {
+            char *p_rowi = p_x + i*x_row_stride;
+            for (npy_intp j = i + 1; j < m; ++j) {
+                char *p_rowj = p_x + j*x_row_stride;
+                double sum = 0.0;
+                for (npy_intp k = 0; k < n; ++k) {
+                    double x1 = *(double *)(p_rowi + k*x_col_stride);
+                    double x2 = *(double *)(p_rowj + k*x_col_stride);
+                    double w = *(double *)(p_w + k*w_inner_stride);
+                    sum += w*npy_fabs(x1 - x2);
+                }
+                *(double *)(p_out + k_out*out_inner_stride) = sum;
+                ++k_out;
+            }
+        }
+    }
+}
+
+
+static PyUFuncGenericFunction wl1_pdist_functions[] = {
+    (PyUFuncGenericFunction) &wl1_pdist_double_loop
+};
+static void *const wl1_pdist_data[] = {NULL};
+static const char wl1_pdist_typecodes[] = {NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE};
+
+
 static PyMethodDef UMath_TestsMethods[] = {
    {"test_signature",  UMath_Tests_test_signature, METH_VARARGS,
     "Test signature parsing of ufunc. \n"
@@ -830,6 +1029,59 @@ PyMODINIT_FUNC PyInit__umath_tests(void) {
        return NULL;
    }

+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // Define the gufunc 'conv1d_full'
+    // Shape signature is (m),(n)->(p) where p must be m + n - 1.
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    PyUFuncObject *gufunc = (PyUFuncObject *) PyUFunc_FromFuncAndDataAndSignature(
+                                conv1d_full_functions,
+                                conv1d_full_data,
+                                conv1d_full_typecodes,
+                                1, 2, 1, PyUFunc_None, "conv1d_full",
+                                "convolution of x and y ('full' mode)",
+                                0, "(m),(n)->(p)");
+    if (gufunc == NULL) {
+        Py_DECREF(m);
+        return NULL;
+    }
+    gufunc->process_core_dims_func = &conv1d_full_process_core_dims;
+
+    int status = PyModule_AddObject(m, "conv1d_full", (PyObject *) gufunc);
+    if (status == -1) {
+        Py_DECREF(gufunc);
+        Py_DECREF(m);
+        return NULL;
+    }
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    // Define the gufunc 'wl1_pdist'
+    // Shape signature is (m,n?),(n?)->(p), where p must be m*(m-1)/2.
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    gufunc = (PyUFuncObject *) PyUFunc_FromFuncAndDataAndSignature(
+                                wl1_pdist_functions,
+                                wl1_pdist_data,
+                                wl1_pdist_typecodes,
+                                1, 2, 1, PyUFunc_None, "wl1_pdist",
+                                "pairwise l1 distance (city block distance) "
+                                "of rows in x",
+                                0, "(m,n?),(n?)->(p)");
+    if (gufunc == NULL) {
+        Py_DECREF(m);
+        return NULL;
+    }
+    gufunc->process_core_dims_func = &wl1_pdist_process_core_dims;
+
+    status = PyModule_AddObject(m, "wl1_pdist", (PyObject *) gufunc);
+    if (status == -1) {
+        Py_DECREF(gufunc);
+        Py_DECREF(m);
+        return NULL;
+    }
+
+    // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
 #if Py_GIL_DISABLED
    // signal this module supports running with the GIL disabled
    PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED);

diff --git a/numpy/_core/src/umath/ufunc_object.c b/numpy/_core/src/umath/ufunc_object.c
@@ -1590,6 +1590,13 @@ _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op,
        }
    }

+    if (ufunc->process_core_dims_func != NULL) {
+        int status = ufunc->process_core_dims_func(ufunc, core_dim_sizes);
+        if (status != 0) {
+            return -1;
+        }
+    }
+
    /*
     * Make sure no core dimension is unspecified.
     */
@@ -4689,6 +4696,8 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi
    /* Type resolution and inner loop selection functions */
    ufunc->type_resolver = &PyUFunc_DefaultTypeResolver;

+    ufunc->process_core_dims_func = NULL;
+
    ufunc->op_flags = NULL;
    ufunc->_loops = NULL;
    if (nin + nout != 0) {
-Original file line number
+Diff line change
@@ -1590,6 +1590,13 @@ _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op,
             }
         }
+        if (ufunc->process_core_dims_func != NULL) {
+            int status = ufunc->process_core_dims_func(ufunc, core_dim_sizes);
+            if (status != 0) {
+                return -1;
+            }
+        }
             Copy link

  
      
    
  

  
      

  
  Member


      

  

  
    
      

      
            seberg
  

      

      

      


        Jul 13, 2024


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      So one thing we do not do here is sanity check the output.  I.e. if the core_dim_size is already set but the user mutates it, we could be in trouble.
To be clear, I think that is OK, but it should be documented.
    
  
  


    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
  


          
          
        
      
    

    



  
        
  
    
        
    
  


      
          
  
      
            Copy link

  
      
    
  

  
      

  
  Member


      

  Author


  

  
    
      

      
            WarrenWeckesser
  

      

      

      


        Jul 13, 2024


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      It is easy enough to pass a copy to process_core_dims_func and verify on return that nothing was changed that shouldn't be changed.  I'll add that.
    
  
  


    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
  


          
          
        
      
    

    



  
        
  
    
        
    
  


      
          
  
      
            Copy link

  
      
    
  

  
      

  
  Member


      

  

  
    
      

      
            seberg
  

      

      

      


        Jul 14, 2024


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      I don't think it matters, just occured to me that we should document that you may only validate (raise an error) or fill in -1.
    
  
  


    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
  


          
          
        
      
    

    



  
        
  
    
        
    
  


      
          
  
      
            Copy link

  
      
    
  

  
      

  
    Contributor


      

  

  
    
      

      
            mhvk
  

      

      

      


        Jul 14, 2024


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      I wavered a bit about whether or not to test, but in the end, making a mistake in the processing here is no different from making a mistake in the actual function that gets called - one can get a core dump regardless. So, agreed with @seberg that a check after the call is not needed.
    
  
  


    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
  


          
          
        
      
    

    



  
        
  
    
        
    
  


      
          
  
      
            Copy link

  
      
    
  

  
      

  
  Member


      

  Author


  

  
    
      

      
            WarrenWeckesser
  

      

      

      


        Jul 14, 2024


      
    

  


        
      
  
  
  
    

    There was a problem hiding this comment.


  

 
  
    

    Choose a reason for hiding this comment

    
      The reason will be displayed to describe this comment to others. Learn more.
    

    
      
      


  


  
    
      OK, I won't add the check!
    
  
  


    

        
      
  
  
    
  
  
  
    
    Sorry, something went wrong.
  

  
    
  
    
      

              Uh oh!

              
There was an error while loading. Please reload this page.


  
  


          
      
  
    
    
      
        
            
    All reactions
         /*
          * Make sure no core dimension is unspecified.
          */
@@ -4689,6 +4696,8 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi
         /* Type resolution and inner loop selection functions */
         ufunc->type_resolver = &PyUFunc_DefaultTypeResolver;
+        ufunc->process_core_dims_func = NULL;
         ufunc->op_flags = NULL;
         ufunc->_loops = NULL;
         if (nin + nout != 0) {
-          Expand Down