Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 25d26e5

Browse filesBrowse files
authored
Merge pull request #28619 from r-devulap/xss-openmp
ENH: Use openmp on x86-simd-sort to speed up np.sort and np.argsort
2 parents 0bf61e1 + 6eff29e commit 25d26e5
Copy full SHA for 25d26e5

File tree

Expand file treeCollapse file tree

7 files changed

+53
-5
lines changed
Filter options
Expand file treeCollapse file tree

7 files changed

+53
-5
lines changed

‎.github/workflows/linux_simd.yml

Copy file name to clipboardExpand all lines: .github/workflows/linux_simd.yml
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ jobs:
212212
python -m pip install pytest pytest-xdist hypothesis typing_extensions
213213
214214
- name: Build
215-
run: CC=gcc-13 CXX=g++-13 spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_skx -Dtest-simd='BASELINE,AVX512_KNL,AVX512_KNM,AVX512_SKX,AVX512_CLX,AVX512_CNL,AVX512_ICL,AVX512_SPR'
215+
run: CC=gcc-13 CXX=g++-13 spin build -- -Denable-openmp=true -Dallow-noblas=true -Dcpu-baseline=avx512_skx -Dtest-simd='BASELINE,AVX512_KNL,AVX512_KNM,AVX512_SKX,AVX512_CLX,AVX512_CNL,AVX512_ICL,AVX512_SPR'
216216

217217
- name: Meson Log
218218
if: always()
@@ -263,7 +263,7 @@ jobs:
263263
python -m pip install pytest pytest-xdist hypothesis typing_extensions
264264
265265
- name: Build
266-
run: CC=gcc-13 CXX=g++-13 spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_spr
266+
run: CC=gcc-13 CXX=g++-13 spin build -- -Denable-openmp=true -Dallow-noblas=true -Dcpu-baseline=avx512_spr
267267

268268
- name: Meson Log
269269
if: always()
+6Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Building NumPy with OpenMP Parallelization
2+
-------------------------------------------
3+
NumPy now supports OpenMP parallel processing capabilities when built with the
4+
``-Denable_openmp=true`` Meson build flag. This feature is disabled by default.
5+
When enabled, ``np.sort`` and ``np.argsort`` functions can utilize OpenMP for
6+
parallel thread execution, improving performance for these operations.
+7Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Performance improvements to ``np.sort`` and ``np.argsort``
2+
----------------------------------------------------------
3+
``np.sort`` and ``np.argsort`` functions now can leverage OpenMP for parallel
4+
thread execution, resulting in up to 3.5x speedups on x86 architectures with
5+
AVX2 or AVX-512 instructions. This opt-in feature requires NumPy to be built
6+
with the -Denable_openmp Meson flag. Users can control the number of threads
7+
used by setting the OMP_NUM_THREADS environment variable.

‎meson.options

Copy file name to clipboardExpand all lines: meson.options
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ option('disable-intel-sort', type: 'boolean', value: false,
2222
description: 'Disables SIMD-optimized operations related to Intel x86-simd-sort')
2323
option('disable-threading', type: 'boolean', value: false,
2424
description: 'Disable threading support (see `NPY_ALLOW_THREADS` docs)')
25+
option('enable-openmp', type: 'boolean', value: false,
26+
description: 'Enable building NumPy with openmp support')
2527
option('disable-optimization', type: 'boolean', value: false,
2628
description: 'Disable CPU optimized code (dispatch,simd,unroll...)')
2729
option('cpu-baseline', type: 'string', value: 'min',

‎numpy/_core/meson.build

Copy file name to clipboardExpand all lines: numpy/_core/meson.build
+20-2Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,21 @@ if use_intel_sort and not fs.exists('src/npysort/x86-simd-sort/README.md')
128128
error('Missing the `x86-simd-sort` git submodule! Run `git submodule update --init` to fix this.')
129129
endif
130130

131+
# openMP related settings:
132+
if get_option('disable-threading') and get_option('enable-openmp')
133+
error('Build options `disable-threading` and `enable-openmp` are conflicting. Please set at most one to true.')
134+
endif
135+
136+
use_openmp = get_option('enable-openmp') and not get_option('disable-threading')
137+
138+
# Setup openmp flags for x86-simd-sort:
139+
omp = []
140+
omp_dep = []
141+
if use_intel_sort and use_openmp
142+
omp = dependency('openmp', required : true)
143+
omp_dep = declare_dependency(dependencies: omp, compile_args: ['-DXSS_USE_OPENMP'])
144+
endif
145+
131146
if not fs.exists('src/common/pythoncapi-compat')
132147
error('Missing the `pythoncapi-compat` git submodule! ' +
133148
'Run `git submodule update --init` to fix this.')
@@ -867,12 +882,15 @@ foreach gen_mtargets : [
867882
] : []
868883
],
869884
]
885+
886+
887+
870888
mtargets = mod_features.multi_targets(
871889
gen_mtargets[0], multiarray_gen_headers + gen_mtargets[1],
872890
dispatch: gen_mtargets[2],
873891
# baseline: CPU_BASELINE, it doesn't provide baseline fallback
874892
prefix: 'NPY_',
875-
dependencies: [py_dep, np_core_dep],
893+
dependencies: [py_dep, np_core_dep, omp_dep],
876894
c_args: c_args_common + max_opt,
877895
cpp_args: cpp_args_common + max_opt,
878896
include_directories: [
@@ -1286,7 +1304,7 @@ py.extension_module('_multiarray_umath',
12861304
'src/umath',
12871305
'src/highway'
12881306
],
1289-
dependencies: [blas_dep],
1307+
dependencies: [blas_dep, omp],
12901308
link_with: [
12911309
npymath_lib,
12921310
unique_hash_so,

‎numpy/_core/tests/test_multiarray.py

Copy file name to clipboardExpand all lines: numpy/_core/tests/test_multiarray.py
+15Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10329,6 +10329,21 @@ def test_argsort_int(N, dtype):
1032910329
arr[N - 1] = maxv
1033010330
assert_arg_sorted(arr, np.argsort(arr, kind='quick'))
1033110331

10332+
# Test large arrays that leverage openMP implementations from x86-simd-sort:
10333+
@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
10334+
def test_sort_largearrays(dtype):
10335+
N = 1000000
10336+
rnd = np.random.RandomState(1100710816)
10337+
arr = -0.5 + rnd.random(N).astype(dtype)
10338+
assert_equal(np.sort(arr, kind='quick'), np.sort(arr, kind='heap'))
10339+
10340+
# Test large arrays that leverage openMP implementations from x86-simd-sort:
10341+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
10342+
def test_argsort_largearrays(dtype):
10343+
N = 1000000
10344+
rnd = np.random.RandomState(1100710816)
10345+
arr = -0.5 + rnd.random(N).astype(dtype)
10346+
assert_arg_sorted(arr, np.argsort(arr, kind='quick'))
1033210347

1033310348
@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
1033410349
def test_gh_22683():

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.