Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

BUG: fix race initializing legacy dtype casts #28321

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions 127 .github/workflows/compiler_sanitizers.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
name: Test with compiler sanitizers

on:
push:
branches:
- main
pull_request:
branches:
- main
- maintenance/**

defaults:
run:
shell: bash

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
clang_ASAN:
# To enable this workflow on a fork, comment out:
if: github.repository == 'numpy/numpy'
runs-on: macos-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: recursive
fetch-tags: true
persist-credentials: false
- name: Set up pyenv
run: |
git clone https://github.com/pyenv/pyenv.git "$HOME/.pyenv"
PYENV_ROOT="$HOME/.pyenv"
PYENV_BIN="$PYENV_ROOT/bin"
PYENV_SHIMS="$PYENV_ROOT/shims"
echo "$PYENV_BIN" >> $GITHUB_PATH
echo "$PYENV_SHIMS" >> $GITHUB_PATH
echo "PYENV_ROOT=$PYENV_ROOT" >> $GITHUB_ENV
- name: Check pyenv is working
run:
pyenv --version
- name: Set up LLVM
run: |
brew install llvm@19
LLVM_PREFIX=$(brew --prefix llvm@19)
echo CC="$LLVM_PREFIX/bin/clang" >> $GITHUB_ENV
echo CXX="$LLVM_PREFIX/bin/clang++" >> $GITHUB_ENV
echo LDFLAGS="-L$LLVM_PREFIX/lib" >> $GITHUB_ENV
echo CPPFLAGS="-I$LLVM_PREFIX/include" >> $GITHUB_ENV
- name: Build Python with address sanitizer
run: |
CONFIGURE_OPTS="--with-address-sanitizer" pyenv install 3.13
pyenv global 3.13
- name: Install dependencies
run: |
pip install -r requirements/build_requirements.txt
pip install -r requirements/ci_requirements.txt
pip install -r requirements/test_requirements.txt
# xdist captures stdout/stderr, but we want the ASAN output
pip uninstall -y pytest-xdist
- name: Build
run:
python -m spin build -j2 -- -Db_sanitize=address
- name: Test
run: |
# pass -s to pytest to see ASAN errors and warnings, otherwise pytest captures them
ASAN_OPTIONS=detect_leaks=0:symbolize=1:strict_init_order=true:allocator_may_return_null=1:halt_on_error=1 \
python -m spin test -- -v -s --timeout=600 --durations=10

clang_TSAN:
# To enable this workflow on a fork, comment out:
if: github.repository == 'numpy/numpy'
runs-on: macos-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: recursive
fetch-tags: true
persist-credentials: false
- name: Set up pyenv
run: |
git clone https://github.com/pyenv/pyenv.git "$HOME/.pyenv"
PYENV_ROOT="$HOME/.pyenv"
PYENV_BIN="$PYENV_ROOT/bin"
PYENV_SHIMS="$PYENV_ROOT/shims"
echo "$PYENV_BIN" >> $GITHUB_PATH
echo "$PYENV_SHIMS" >> $GITHUB_PATH
echo "PYENV_ROOT=$PYENV_ROOT" >> $GITHUB_ENV
- name: Check pyenv is working
run:
pyenv --version
- name: Set up LLVM
run: |
brew install llvm@19
LLVM_PREFIX=$(brew --prefix llvm@19)
echo CC="$LLVM_PREFIX/bin/clang" >> $GITHUB_ENV
echo CXX="$LLVM_PREFIX/bin/clang++" >> $GITHUB_ENV
echo LDFLAGS="-L$LLVM_PREFIX/lib" >> $GITHUB_ENV
echo CPPFLAGS="-I$LLVM_PREFIX/include" >> $GITHUB_ENV
- name: Build Python with thread sanitizer support
run: |
# free-threaded Python is much more likely to trigger races
CONFIGURE_OPTS="--with-thread-sanitizer" pyenv install 3.13t
pyenv global 3.13t
- name: Install dependencies
run: |
# TODO: remove when a released cython supports free-threaded python
pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython
pip install -r requirements/build_requirements.txt
pip install -r requirements/ci_requirements.txt
pip install -r requirements/test_requirements.txt
# xdist captures stdout/stderr, but we want the TSAN output
pip uninstall -y pytest-xdist
- name: Build
run:
python -m spin build -j2 -- -Db_sanitize=thread
- name: Test
run: |
# These tests are slow, so only run tests in files that do "import threading" to make them count
TSAN_OPTIONS=allocator_may_return_null=1:halt_on_error=1 \
python -m spin test \
`find numpy -name "test*.py" | xargs grep -l "import threading" | tr '\n' ' '` \
-- -v -s --timeout=600 --durations=10
140 changes: 93 additions & 47 deletions 140 numpy/_core/src/multiarray/convert_datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,46 +62,24 @@ static PyObject *
PyArray_GetObjectToGenericCastingImpl(void);


/**
* Fetch the casting implementation from one DType to another.
*
* @param from The implementation to cast from
* @param to The implementation to cast to
*
* @returns A castingimpl (PyArrayDTypeMethod *), None or NULL with an
* error set.
*/
NPY_NO_EXPORT PyObject *
PyArray_GetCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to)
static PyObject *
create_casting_impl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to)
{
PyObject *res;
if (from == to) {
res = (PyObject *)NPY_DT_SLOTS(from)->within_dtype_castingimpl;
}
else {
res = PyDict_GetItemWithError(NPY_DT_SLOTS(from)->castingimpls, (PyObject *)to);
}
if (res != NULL || PyErr_Occurred()) {
Py_XINCREF(res);
return res;
}
/*
* The following code looks up CastingImpl based on the fact that anything
* Look up CastingImpl based on the fact that anything
* can be cast to and from objects or structured (void) dtypes.
*
* The last part adds casts dynamically based on legacy definition
*/
if (from->type_num == NPY_OBJECT) {
res = PyArray_GetObjectToGenericCastingImpl();
return PyArray_GetObjectToGenericCastingImpl();
}
else if (to->type_num == NPY_OBJECT) {
res = PyArray_GetGenericToObjectCastingImpl();
return PyArray_GetGenericToObjectCastingImpl();
}
else if (from->type_num == NPY_VOID) {
res = PyArray_GetVoidToGenericCastingImpl();
return PyArray_GetVoidToGenericCastingImpl();
}
else if (to->type_num == NPY_VOID) {
res = PyArray_GetGenericToVoidCastingImpl();
return PyArray_GetGenericToVoidCastingImpl();
}
/*
* Reject non-legacy dtypes. They need to use the new API to add casts and
Expand All @@ -125,42 +103,105 @@ PyArray_GetCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to)
from->singleton, to->type_num);
if (castfunc == NULL) {
PyErr_Clear();
/* Remember that this cast is not possible */
if (PyDict_SetItem(NPY_DT_SLOTS(from)->castingimpls,
(PyObject *) to, Py_None) < 0) {
return NULL;
}
Py_RETURN_NONE;
}
}

/* PyArray_AddLegacyWrapping_CastingImpl find the correct casting level: */
/*
* TODO: Possibly move this to the cast registration time. But if we do
* that, we have to also update the cast when the casting safety
* is registered.
/* Create a cast using the state of the legacy casting setup defined
* during the setup of the DType.
*
* Ideally we would do this when we create the DType, but legacy user
* DTypes don't have a way to signal that a DType is done setting up
* casts. Without such a mechanism, the safest way to know that a
* DType is done setting up is to register the cast lazily the first
* time a user does the cast.
*
* We *could* register the casts when we create the wrapping
* DTypeMeta, but that means the internals of the legacy user DType
* system would need to update the state of the casting safety flags
* in the cast implementations stored on the DTypeMeta. That's an
* inversion of abstractions and would be tricky to do without
* creating circular dependencies inside NumPy.
*/
if (PyArray_AddLegacyWrapping_CastingImpl(from, to, -1) < 0) {
return NULL;
}
/* castingimpls is unconditionally filled by
* AddLegacyWrapping_CastingImpl, so this won't create a recursive
* critical section
*/
return PyArray_GetCastingImpl(from, to);
}
}

if (res == NULL) {
static PyObject *
ensure_castingimpl_exists(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to)
{
int return_error = 0;
PyObject *res = NULL;

/* Need to create the cast. This might happen at runtime so we enter a
critical section to avoid races */

Py_BEGIN_CRITICAL_SECTION(NPY_DT_SLOTS(from)->castingimpls);

/* check if another thread filled it while this thread was blocked on
acquiring the critical section */
if (PyDict_GetItemRef(NPY_DT_SLOTS(from)->castingimpls, (PyObject *)to,
&res) < 0) {
return_error = 1;
}
else if (res == NULL) {
res = create_casting_impl(from, to);
if (res == NULL) {
return_error = 1;
}
else if (PyDict_SetItem(NPY_DT_SLOTS(from)->castingimpls,
(PyObject *)to, res) < 0) {
return_error = 1;
}
}
Py_END_CRITICAL_SECTION();
if (return_error) {
Py_XDECREF(res);
return NULL;
}
if (from == to) {
if (from == to && res == Py_None) {
PyErr_Format(PyExc_RuntimeError,
"Internal NumPy error, within-DType cast missing for %S!", from);
Py_DECREF(res);
return NULL;
}
if (PyDict_SetItem(NPY_DT_SLOTS(from)->castingimpls,
(PyObject *)to, res) < 0) {
Py_DECREF(res);
return res;
}

/**
* Fetch the casting implementation from one DType to another.
*
* @param from The implementation to cast from
* @param to The implementation to cast to
*
* @returns A castingimpl (PyArrayDTypeMethod *), None or NULL with an
* error set.
*/
NPY_NO_EXPORT PyObject *
PyArray_GetCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to)
{
PyObject *res = NULL;
if (from == to) {
if ((NPY_DT_SLOTS(from)->within_dtype_castingimpl) != NULL) {
res = Py_XNewRef(
(PyObject *)NPY_DT_SLOTS(from)->within_dtype_castingimpl);
}
}
else if (PyDict_GetItemRef(NPY_DT_SLOTS(from)->castingimpls,
(PyObject *)to, &res) < 0) {
return NULL;
}
return res;
if (res != NULL) {
return res;
}

return ensure_castingimpl_exists(from, to);
}


Expand Down Expand Up @@ -409,7 +450,7 @@ _get_cast_safety_from_castingimpl(PyArrayMethodObject *castingimpl,
* implementations fully to have them available for doing the actual cast
* later.
*
* @param from The descriptor to cast from
* @param from The descriptor to cast from
* @param to The descriptor to cast to (may be NULL)
* @param to_dtype If `to` is NULL, must pass the to_dtype (otherwise this
* is ignored).
Expand Down Expand Up @@ -2031,6 +2072,11 @@ PyArray_AddCastingImplementation(PyBoundArrayMethodObject *meth)
/**
* Add a new casting implementation using a PyArrayMethod_Spec.
*
* Using this function outside of module initialization without holding a
* critical section on the castingimpls dict may lead to a race to fill the
* dict. Use PyArray_GetGastingImpl to lazily register casts at runtime
* safely.
*
* @param spec The specification to use as a source
* @param private If private, allow slots not publicly exposed.
* @return 0 on success -1 on failure
Expand Down
6 changes: 6 additions & 0 deletions 6 numpy/_core/src/multiarray/dtypemeta.c
Original file line number Diff line number Diff line change
Expand Up @@ -1252,6 +1252,12 @@ dtypemeta_wrap_legacy_descriptor(
return -1;
}
}
else {
// ensure the within dtype cast is populated for legacy user dtypes
if (PyArray_GetCastingImpl(dtype_class, dtype_class) == NULL) {
return -1;
}
}

return 0;
}
Expand Down
Loading
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.