Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

BUG: Address interaction between SME and FPSR #29223

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions 1 numpy/_core/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -1117,6 +1117,7 @@ src_multiarray_umath_common = [
]
if have_blas
src_multiarray_umath_common += [
'src/common/blas_utils.c',
'src/common/cblasfuncs.c',
'src/common/python_xerbla.c',
]
Expand Down
134 changes: 134 additions & 0 deletions 134 numpy/_core/src/common/blas_utils.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

#ifdef __APPLE__
#include <sys/sysctl.h>
#endif

#include "numpy/numpyconfig.h" // NPY_VISIBILITY_HIDDEN
#include "numpy/npy_math.h" // npy_get_floatstatus_barrier
#include "blas_utils.h"

#if NPY_BLAS_CHECK_FPE_SUPPORT

/* Return whether we're running on macOS 15.4 or later
*/
static inline bool
is_macOS_version_15_4_or_later(void){
#if !defined(__APPLE__)
return false;
#else
char *osProductVersion = NULL;
size_t size = 0;
bool ret = false;

// Query how large OS version string should be
if(-1 == sysctlbyname("kern.osproductversion", NULL, &size, NULL, 0)){
goto cleanup;
}

osProductVersion = malloc(size + 1);

// Get the OS version string
if(-1 == sysctlbyname("kern.osproductversion", osProductVersion, &size, NULL, 0)){
goto cleanup;
}

osProductVersion[size] = '\0';

// Parse the version string
int major = 0, minor = 0;
if(2 > sscanf(osProductVersion, "%d.%d", &major, &minor)) {
goto cleanup;
}

if(major >= 15 && minor >= 4){
ret = true;
}
Comment on lines +46 to +48
Copy link

@BertalanD BertalanD Jun 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if(major >= 15 && minor >= 4){
ret = true;
}
if(major > 15 || (major == 15 && minor >= 4)){
ret = true;
}

As written, this comparison will return false for e.g. 26.0.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I fixed this in the backport.


cleanup:
if(osProductVersion){
free(osProductVersion);
}

return ret;
#endif
}

/* ARM Scalable Matrix Extension (SME) raises all floating-point error flags
* when it's used regardless of values or operations. As a consequence,
* when SME is used, all FPE state is lost and special handling is needed.
*
* For NumPy, SME is not currently used directly, but can be used via
* BLAS / LAPACK libraries. This function does a runtime check for whether
* BLAS / LAPACK can use SME and special handling around FPE is required.
*/
static inline bool
BLAS_can_use_ARM_SME(void)
{
#if defined(__APPLE__) && defined(__aarch64__) && defined(ACCELERATE_NEW_LAPACK)
// ARM SME can be used by Apple's Accelerate framework for BLAS / LAPACK
// - macOS 15.4+
// - Apple silicon M4+

// Does OS / Accelerate support ARM SME?
if(!is_macOS_version_15_4_or_later()){
return false;
}

// Does hardware support SME?
int has_SME = 0;
size_t size = sizeof(has_SME);
if(-1 == sysctlbyname("hw.optional.arm.FEAT_SME", &has_SME, &size, NULL, 0)){
return false;
}

if(has_SME){
return true;
}
#endif

// default assume SME is not used
return false;
}

/* Static variable to cache runtime check of BLAS FPE support.
*/
static bool blas_supports_fpe = true;

#endif // NPY_BLAS_CHECK_FPE_SUPPORT


NPY_VISIBILITY_HIDDEN bool
npy_blas_supports_fpe(void)
{
#if NPY_BLAS_CHECK_FPE_SUPPORT
return blas_supports_fpe;
#else
return true;
#endif
}

NPY_VISIBILITY_HIDDEN void
npy_blas_init(void)
{
#if NPY_BLAS_CHECK_FPE_SUPPORT
blas_supports_fpe = !BLAS_can_use_ARM_SME();
#endif
}

NPY_VISIBILITY_HIDDEN int
npy_get_floatstatus_after_blas(void)
{
#if NPY_BLAS_CHECK_FPE_SUPPORT
if(!blas_supports_fpe){
// BLAS does not support FPE and we need to return FPE state.
// Instead of clearing and then grabbing state, just return
// that no flags are set.
return 0;
}
#endif
char *param = NULL;
return npy_get_floatstatus_barrier(param);
}
30 changes: 30 additions & 0 deletions 30 numpy/_core/src/common/blas_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#include <stdbool.h>

#include "numpy/numpyconfig.h" // for NPY_VISIBILITY_HIDDEN

/* NPY_BLAS_CHECK_FPE_SUPPORT controls whether we need a runtime check
* for floating-point error (FPE) support in BLAS.
*/
#if defined(__APPLE__) && defined(__aarch64__) && defined(ACCELERATE_NEW_LAPACK)
#define NPY_BLAS_CHECK_FPE_SUPPORT 1
#else
#define NPY_BLAS_CHECK_FPE_SUPPORT 0
#endif

/* Initialize BLAS environment, if needed
*/
NPY_VISIBILITY_HIDDEN void
npy_blas_init(void);

/* Runtime check if BLAS supports floating-point errors.
* true - BLAS supports FPE and one can rely on them to indicate errors
* false - BLAS does not support FPE. Special handling needed for FPE state
*/
NPY_VISIBILITY_HIDDEN bool
npy_blas_supports_fpe(void);

/* If BLAS supports FPE, exactly the same as npy_get_floatstatus_barrier().
* Otherwise, we can't rely on FPE state and need special handling.
*/
NPY_VISIBILITY_HIDDEN int
npy_get_floatstatus_after_blas(void);
3 changes: 2 additions & 1 deletion 3 numpy/_core/src/common/cblasfuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "numpy/arrayobject.h"
#include "numpy/npy_math.h"
#include "numpy/ufuncobject.h"
#include "blas_utils.h"
#include "npy_cblas.h"
#include "arraytypes.h"
#include "common.h"
Expand Down Expand Up @@ -693,7 +694,7 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
NPY_END_ALLOW_THREADS;
}

int fpes = npy_get_floatstatus_barrier((char *) result);
int fpes = npy_get_floatstatus_after_blas();
if (fpes && PyUFunc_GiveFloatingpointErrors("dot", fpes) < 0) {
goto fail;
}
Expand Down
5 changes: 5 additions & 0 deletions 5 numpy/_core/src/multiarray/multiarraymodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
#include "arraytypes.h"
#include "arrayobject.h"
#include "array_converter.h"
#include "blas_utils.h"
#include "hashdescr.h"
#include "descriptor.h"
#include "dragon4.h"
Expand Down Expand Up @@ -4781,6 +4782,10 @@ _multiarray_umath_exec(PyObject *m) {
return -1;
}

#if NPY_BLAS_CHECK_FPE_SUPPORT
npy_blas_init();
#endif

#if defined(MS_WIN64) && defined(__GNUC__)
PyErr_WarnEx(PyExc_Warning,
"Numpy built with MINGW-W64 on Windows 64 bits is experimental, " \
Expand Down
36 changes: 29 additions & 7 deletions 36 numpy/_core/src/umath/matmul.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@



#include "blas_utils.h"
#include "npy_cblas.h"
#include "arraytypes.h" /* For TYPE_dot functions */

Expand Down Expand Up @@ -120,7 +121,7 @@ static inline void
}
}

NPY_NO_EXPORT void
static void
@name@_gemv(void *ip1, npy_intp is1_m, npy_intp is1_n,
void *ip2, npy_intp is2_n,
void *op, npy_intp op_m,
Expand Down Expand Up @@ -156,7 +157,7 @@ NPY_NO_EXPORT void
is2_n / sizeof(@typ@), @step0@, op, op_m / sizeof(@typ@));
}

NPY_NO_EXPORT void
static void
@name@_matmul_matrixmatrix(void *ip1, npy_intp is1_m, npy_intp is1_n,
void *ip2, npy_intp is2_n, npy_intp is2_p,
void *op, npy_intp os_m, npy_intp os_p,
Expand Down Expand Up @@ -260,7 +261,7 @@ NPY_NO_EXPORT void
* #IS_HALF = 0, 0, 0, 1, 0*13#
*/

NPY_NO_EXPORT void
static void
@TYPE@_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n,
void *_ip2, npy_intp is2_n, npy_intp is2_p,
void *_op, npy_intp os_m, npy_intp os_p,
Expand Down Expand Up @@ -318,7 +319,7 @@ NPY_NO_EXPORT void
}

/**end repeat**/
NPY_NO_EXPORT void
static void
BOOL_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n,
void *_ip2, npy_intp is2_n, npy_intp is2_p,
void *_op, npy_intp os_m, npy_intp os_p,
Expand Down Expand Up @@ -357,7 +358,7 @@ BOOL_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n,
}
}

NPY_NO_EXPORT void
static void
OBJECT_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n,
void *_ip2, npy_intp is2_n, npy_intp is2_p,
void *_op, npy_intp os_m, npy_intp os_p,
Expand Down Expand Up @@ -629,6 +630,11 @@ NPY_NO_EXPORT void
#endif
}
#if @USEBLAS@ && defined(HAVE_CBLAS)
#if NPY_BLAS_CHECK_FPE_SUPPORT
if (!npy_blas_supports_fpe()) {
npy_clear_floatstatus_barrier((char*)args);
}
#endif
if (allocate_buffer) free(tmp_ip12op);
#endif
}
Expand All @@ -653,7 +659,7 @@ NPY_NO_EXPORT void
* #prefix = c, z, 0#
* #USE_BLAS = 1, 1, 0#
*/
NPY_NO_EXPORT void
static void
@name@_dotc(char *ip1, npy_intp is1, char *ip2, npy_intp is2,
char *op, npy_intp n, void *NPY_UNUSED(ignore))
{
Expand Down Expand Up @@ -749,6 +755,7 @@ OBJECT_dotc(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp
* CFLOAT, CDOUBLE, CLONGDOUBLE, OBJECT#
* #DOT = dot*15, dotc*4#
* #CHECK_PYERR = 0*18, 1#
* #CHECK_BLAS = 1*2, 0*13, 1*2, 0*2#
*/
NPY_NO_EXPORT void
@TYPE@_vecdot(char **args, npy_intp const *dimensions, npy_intp const *steps,
Expand All @@ -772,6 +779,11 @@ NPY_NO_EXPORT void
}
#endif
}
#if @CHECK_BLAS@ && NPY_BLAS_CHECK_FPE_SUPPORT
if (!npy_blas_supports_fpe()) {
npy_clear_floatstatus_barrier((char*)args);
}
#endif
}
/**end repeat**/

Expand All @@ -787,7 +799,7 @@ NPY_NO_EXPORT void
* #step1 = &oneF, &oneD#
* #step0 = &zeroF, &zeroD#
*/
NPY_NO_EXPORT void
static void
@name@_vecmat_via_gemm(void *ip1, npy_intp is1_n,
void *ip2, npy_intp is2_n, npy_intp is2_m,
void *op, npy_intp os_m,
Expand Down Expand Up @@ -878,6 +890,11 @@ NPY_NO_EXPORT void
#endif
}
}
#if @USEBLAS@ && NPY_BLAS_CHECK_FPE_SUPPORT
if (!npy_blas_supports_fpe()) {
npy_clear_floatstatus_barrier((char*)args);
}
#endif
}
/**end repeat**/

Expand Down Expand Up @@ -943,5 +960,10 @@ NPY_NO_EXPORT void
#endif
}
}
#if @USEBLAS@ && NPY_BLAS_CHECK_FPE_SUPPORT
if (!npy_blas_supports_fpe()) {
npy_clear_floatstatus_barrier((char*)args);
}
#endif
}
/**end repeat**/
6 changes: 6 additions & 0 deletions 6 numpy/_core/tests/test_multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from numpy.exceptions import AxisError, ComplexWarning
from numpy.lib.recfunctions import repack_fields
from numpy.testing import (
BLAS_SUPPORTS_FPE,
HAS_REFCOUNT,
IS_64BIT,
IS_PYPY,
Expand Down Expand Up @@ -3363,6 +3364,11 @@ def test_dot(self):
@pytest.mark.parametrize("dtype", [np.half, np.double, np.longdouble])
@pytest.mark.skipif(IS_WASM, reason="no wasm fp exception support")
def test_dot_errstate(self, dtype):
# Some dtypes use BLAS for 'dot' operation and
# not all BLAS support floating-point errors.
if not BLAS_SUPPORTS_FPE and dtype == np.double:
pytest.skip("BLAS does not support FPE")

a = np.array([1, 1], dtype=dtype)
b = np.array([-np.inf, np.inf], dtype=dtype)

Expand Down
10 changes: 10 additions & 0 deletions 10 numpy/testing/_private/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
'assert_no_gc_cycles', 'break_cycles', 'HAS_LAPACK64', 'IS_PYSTON',
'IS_MUSL', 'check_support_sve', 'NOGIL_BUILD',
'IS_EDITABLE', 'IS_INSTALLED', 'NUMPY_ROOT', 'run_threaded', 'IS_64BIT',
'BLAS_SUPPORTS_FPE',
]


Expand Down Expand Up @@ -89,6 +90,15 @@ class KnownFailureException(Exception):
IS_PYPY = sys.implementation.name == 'pypy'
IS_PYSTON = hasattr(sys, "pyston_version_info")
HAS_REFCOUNT = getattr(sys, 'getrefcount', None) is not None and not IS_PYSTON
BLAS_SUPPORTS_FPE = True
if platform.system() == 'Darwin' or platform.machine() == 'arm64':
try:
blas = np.__config__.CONFIG['Build Dependencies']['blas']
if blas['name'] == 'accelerate':
BLAS_SUPPORTS_FPE = False
except KeyError:
pass

HAS_LAPACK64 = numpy.linalg._umath_linalg._ilp64

IS_MUSL = False
Expand Down
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.