Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e247961

Browse filesBrowse files
committed
ENH: Add LSX optimization for LoongArch
1 parent 907ccc3 commit e247961
Copy full SHA for e247961

22 files changed

+2180
-9
lines changed

‎meson_cpu/loongarch/meson.build

Copy file name to clipboard
+7Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
source_root = meson.project_source_root()
2+
mod_features = import('features')
3+
LSX = mod_features.new(
4+
'LSX', 1, args: ['-mlsx'],
5+
test_code: files(source_root + '/numpy/distutils/checks/cpu_lsx.c')[0]
6+
)
7+
LOONGARCH_FEATURES = {'LSX': LSX}

‎meson_cpu/main_config.h.in

Copy file name to clipboardExpand all lines: meson_cpu/main_config.h.in
+4Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,4 +385,8 @@
385385
#ifdef @P@HAVE_NEON
386386
#include <arm_neon.h>
387387
#endif
388+
389+
#ifdef @P@HAVE_LSX
390+
#include <lsxintrin.h>
391+
#endif
388392
#endif // @P@_CPU_DISPATCHER_CONF_H_

‎meson_cpu/meson.build

Copy file name to clipboardExpand all lines: meson_cpu/meson.build
+5-1Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,14 @@ subdir('x86')
7575
subdir('ppc64')
7676
subdir('s390x')
7777
subdir('arm')
78+
subdir('loongarch')
7879

7980
CPU_FEATURES = {}
8081
CPU_FEATURES += ARM_FEATURES
8182
CPU_FEATURES += X86_FEATURES
8283
CPU_FEATURES += PPC64_FEATURES
8384
CPU_FEATURES += S390X_FEATURES
85+
CPU_FEATURES += LOONGARCH_FEATURES
8486

8587
# Parse the requested baseline (CPU_CONF_BASELINE) and dispatch features
8688
# (CPU_CONF_DISPATCH).
@@ -92,7 +94,8 @@ min_features = {
9294
'ppc64': [],
9395
's390x': [],
9496
'arm': [],
95-
'aarch64': [ASIMD]
97+
'aarch64': [ASIMD],
98+
'loongarch64': [LSX]
9699
}.get(cpu_family, [])
97100
if host_machine.endian() == 'little' and cpu_family == 'ppc64'
98101
min_features = [VSX2]
@@ -106,6 +109,7 @@ max_features_dict = {
106109
's390x': S390X_FEATURES,
107110
'arm': ARM_FEATURES,
108111
'aarch64': ARM_FEATURES,
112+
'loongarch64': LOONGARCH_FEATURES,
109113
}.get(cpu_family, {})
110114
max_features = []
111115
foreach fet_name, fet_obj : max_features_dict

‎meson_options.txt

Copy file name to clipboardExpand all lines: meson_options.txt
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ option('test-simd', type: 'array',
3131
'VSX', 'VSX2', 'VSX3', 'VSX4',
3232
'NEON', 'ASIMD',
3333
'VX', 'VXE', 'VXE2',
34+
'LSX',
3435
],
3536
description: 'Specify a list of CPU features to be tested against NumPy SIMD interface')
3637
option('test-simd-args', type: 'string', value: '',

‎numpy/_core/src/common/npy_cpu_features.c

Copy file name to clipboardExpand all lines: numpy/_core/src/common/npy_cpu_features.c
+23-1Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ static struct {
118118
{NPY_CPU_FEATURE_FPHP, "FPHP"},
119119
{NPY_CPU_FEATURE_ASIMDHP, "ASIMDHP"},
120120
{NPY_CPU_FEATURE_ASIMDDP, "ASIMDDP"},
121-
{NPY_CPU_FEATURE_ASIMDFHM, "ASIMDFHM"}};
121+
{NPY_CPU_FEATURE_ASIMDFHM, "ASIMDFHM"},
122+
{NPY_CPU_FEATURE_LSX, "LSX"}};
122123

123124

124125
NPY_VISIBILITY_HIDDEN PyObject *
@@ -653,6 +654,27 @@ npy__cpu_init_features(void)
653654
npy__cpu_have[NPY_CPU_FEATURE_VX] = 1;
654655
}
655656

657+
/***************** LoongArch ******************/
658+
659+
#elif defined(__loongarch__)
660+
661+
#include <sys/auxv.h>
662+
#include <asm/hwcap.h>
663+
664+
static void
665+
npy__cpu_init_features(void)
666+
{
667+
memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX);
668+
unsigned int hwcap = getauxval(AT_HWCAP);
669+
if ((hwcap & HWCAP_LOONGARCH_LSX) == 0) {
670+
return;
671+
}
672+
673+
if ((hwcap & HWCAP_LOONGARCH_LSX)==0x10) {
674+
npy__cpu_have[NPY_CPU_FEATURE_LSX] = 1;
675+
return;
676+
}
677+
}
656678

657679
/***************** ARM ******************/
658680

‎numpy/_core/src/common/npy_cpu_features.h

Copy file name to clipboardExpand all lines: numpy/_core/src/common/npy_cpu_features.h
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ enum npy_cpu_features
9696
// Vector-Enhancements Facility 2
9797
NPY_CPU_FEATURE_VXE2 = 352,
9898

99+
// LOONGARCH
100+
NPY_CPU_FEATURE_LSX = 400,
101+
99102
NPY_CPU_FEATURE_MAX
100103
};
101104

‎numpy/_core/src/common/simd/intdiv.h

Copy file name to clipboardExpand all lines: numpy/_core/src/common/simd/intdiv.h
+20-1Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,10 @@ NPY_FINLINE npyv_u8x3 npyv_divisor_u8(npy_uint8 d)
216216
divisor.val[0] = npyv_setall_u8(m);
217217
divisor.val[1] = npyv_reinterpret_u8_s8(npyv_setall_s8(-sh1));
218218
divisor.val[2] = npyv_reinterpret_u8_s8(npyv_setall_s8(-sh2));
219+
#elif defined(NPY_HAVE_LSX)
220+
divisor.val[0] = npyv_setall_u16(m);
221+
divisor.val[1] = npyv_setall_u8(sh1);
222+
divisor.val[2] = npyv_setall_u8(sh2);
219223
#else
220224
#error "please initialize the shifting operand for the new architecture"
221225
#endif
@@ -225,7 +229,7 @@ NPY_FINLINE npyv_u8x3 npyv_divisor_u8(npy_uint8 d)
225229
NPY_FINLINE npyv_s16x3 npyv_divisor_s16(npy_int16 d);
226230
NPY_FINLINE npyv_s8x3 npyv_divisor_s8(npy_int8 d)
227231
{
228-
#ifdef NPY_HAVE_SSE2 // SSE/AVX2/AVX512
232+
#if defined NPY_HAVE_SSE2 || defined(NPY_HAVE_LSX)// SSE/AVX2/AVX512
229233
npyv_s16x3 p = npyv_divisor_s16(d);
230234
npyv_s8x3 r;
231235
r.val[0] = npyv_reinterpret_s8_s16(p.val[0]);
@@ -291,6 +295,9 @@ NPY_FINLINE npyv_u16x3 npyv_divisor_u16(npy_uint16 d)
291295
#elif defined(NPY_HAVE_NEON)
292296
divisor.val[1] = npyv_reinterpret_u16_s16(npyv_setall_s16(-sh1));
293297
divisor.val[2] = npyv_reinterpret_u16_s16(npyv_setall_s16(-sh2));
298+
#elif defined(NPY_HAVE_LSX)
299+
divisor.val[1] = npyv_setall_u16(sh1);
300+
divisor.val[2] = npyv_setall_u16(sh2);
294301
#else
295302
#error "please initialize the shifting operand for the new architecture"
296303
#endif
@@ -321,6 +328,8 @@ NPY_FINLINE npyv_s16x3 npyv_divisor_s16(npy_int16 d)
321328
divisor.val[1] = npyv_setall_s16(sh);
322329
#elif defined(NPY_HAVE_NEON)
323330
divisor.val[1] = npyv_setall_s16(-sh);
331+
#elif defined(NPY_HAVE_LSX)
332+
divisor.val[1] = npyv_setall_s16(sh);
324333
#else
325334
#error "please initialize the shifting operand for the new architecture"
326335
#endif
@@ -358,6 +367,9 @@ NPY_FINLINE npyv_u32x3 npyv_divisor_u32(npy_uint32 d)
358367
#elif defined(NPY_HAVE_NEON)
359368
divisor.val[1] = npyv_reinterpret_u32_s32(npyv_setall_s32(-sh1));
360369
divisor.val[2] = npyv_reinterpret_u32_s32(npyv_setall_s32(-sh2));
370+
#elif defined(NPY_HAVE_LSX)
371+
divisor.val[1] = npyv_setall_u32(sh1);
372+
divisor.val[2] = npyv_setall_u32(sh2);
361373
#else
362374
#error "please initialize the shifting operand for the new architecture"
363375
#endif
@@ -393,6 +405,8 @@ NPY_FINLINE npyv_s32x3 npyv_divisor_s32(npy_int32 d)
393405
divisor.val[1] = npyv_setall_s32(sh);
394406
#elif defined(NPY_HAVE_NEON)
395407
divisor.val[1] = npyv_setall_s32(-sh);
408+
#elif defined(NPY_HAVE_LSX)
409+
divisor.val[1] = npyv_setall_s32(sh);
396410
#else
397411
#error "please initialize the shifting operand for the new architecture"
398412
#endif
@@ -427,6 +441,9 @@ NPY_FINLINE npyv_u64x3 npyv_divisor_u64(npy_uint64 d)
427441
#ifdef NPY_HAVE_SSE2 // SSE/AVX2/AVX512
428442
divisor.val[1] = npyv_set_u64(sh1);
429443
divisor.val[2] = npyv_set_u64(sh2);
444+
#elif defined(NPY_HAVE_LSX)
445+
divisor.val[1] = npyv_setall_u64(sh1);
446+
divisor.val[2] = npyv_setall_u64(sh2);
430447
#else
431448
#error "please initialize the shifting operand for the new architecture"
432449
#endif
@@ -465,6 +482,8 @@ NPY_FINLINE npyv_s64x3 npyv_divisor_s64(npy_int64 d)
465482
divisor.val[2] = npyv_setall_s64(d < 0 ? -1 : 0); // sign of divisor
466483
#ifdef NPY_HAVE_SSE2 // SSE/AVX2/AVX512
467484
divisor.val[1] = npyv_set_s64(sh);
485+
#elif defined(NPY_HAVE_LSX)
486+
divisor.val[1] = npyv_setall_s64(sh);
468487
#else
469488
#error "please initialize the shifting operand for the new architecture"
470489
#endif

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.