numpy · seiko2plus · Dec 26, 2024 · Nov 5, 2024 · Nov 5, 2024 · Nov 6, 2024
diff --git a/.github/workflows/linux_qemu.yml b/.github/workflows/linux_qemu.yml
@@ -178,3 +178,99 @@ jobs:
          '"


+  linux_loongarch64_qemu:
+    # To enable this workflow on a fork, comment out:
+    if: github.repository == 'numpy/numpy'
+    runs-on: ubuntu-24.04
+    continue-on-error: true
+    strategy:
+      fail-fast: false
+      matrix:
+        BUILD_PROP:
+          - [
+              "loongarch64",
+              "loongarch64-linux-gnu",
+              "cnclarechen/numpy-loong64-debian:v1",
+              "-Dallow-noblas=true",
+              "test_kind or test_multiarray or test_simd or test_umath or test_ufunc",
+              "loong64"
+            ]
+    env:
+      TOOLCHAIN_NAME: ${{ matrix.BUILD_PROP[1] }}
+      DOCKER_CONTAINER: ${{ matrix.BUILD_PROP[2] }}
+      MESON_OPTIONS: ${{ matrix.BUILD_PROP[3] }}
+      RUNTIME_TEST_FILTER: ${{ matrix.BUILD_PROP[4] }}
+      ARCH: ${{ matrix.BUILD_PROP[5] }}
+      TERM: xterm-256color
+
+    name: "${{ matrix.BUILD_PROP[0] }}"
+    steps:
+    - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      with:
+        submodules: recursive
+        fetch-tags: true
+
+    - name: Initialize binfmt_misc for qemu-user-static
+      run: |
+          docker run --rm --privileged loongcr.lcpu.dev/multiarch/archlinux --reset -p yes
+
+    - name: Install GCC cross-compilers
+      run: |
+        sudo apt update
+        sudo apt install -y ninja-build gcc-14-${TOOLCHAIN_NAME} g++-14-${TOOLCHAIN_NAME} gfortran-14-${TOOLCHAIN_NAME}
+
+    - name: Cache docker container
+      uses: actions/cache@v4.1.2
+      id: container-cache
+      with:
+        path: ~/docker_${{ matrix.BUILD_PROP[1] }}
+        key: container-${{ runner.os }}-${{ matrix.BUILD_PROP[1] }}-${{ matrix.BUILD_PROP[2] }}-${{ hashFiles('requirements/build_requirements.txt') }}
+
+    - name: Creates new container
+      if: steps.container-cache.outputs.cache-hit != 'true'
+      run: |
+        docker run --platform=linux/${ARCH} --name the_container --interactive \
+          -v /:/host -v $(pwd):/numpy ${DOCKER_CONTAINER} /bin/bash -c "
+          mkdir -p /lib64 && ln -s /host/lib64/ld-* /lib64/ &&
+          ln -s /host/lib/x86_64-linux-gnu /lib/x86_64-linux-gnu &&
+          ln -s /host/usr/${TOOLCHAIN_NAME} /usr/${TOOLCHAIN_NAME} &&
+          ln -s /host/usr/lib/gcc-cross/${TOOLCHAIN_NAME} /usr/lib/gcc/${TOOLCHAIN_NAME} &&
+          rm -f /usr/bin/gcc && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-gcc-14 /usr/bin/gcc &&
+          rm -f /usr/bin/g++ && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-g++-14 /usr/bin/g++ &&
+          rm -f /usr/bin/gfortran && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-gfortran-14 /usr/bin/gfortran &&
+          rm -f /usr/bin/ar && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-ar /usr/bin/ar &&
+          rm -f /usr/bin/as && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-as /usr/bin/as &&
+          rm -f /usr/bin/ld && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-ld /usr/bin/ld &&
+          rm -f /usr/bin/ld.bfd && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-ld.bfd /usr/bin/ld.bfd &&
+          rm -f /usr/bin/ninja && ln -s /host/usr/bin/ninja /usr/bin/ninja &&
+          git config --global --add safe.directory /numpy &&
+          python -m pip install --break-system-packages -r /numpy/requirements/build_requirements.txt &&
+          python -m pip install --break-system-packages pytest pytest-xdist hypothesis typing_extensions
+        "
+        docker commit the_container the_container
+        mkdir -p "~/docker_${TOOLCHAIN_NAME}"
+        docker save -o "~/docker_${TOOLCHAIN_NAME}/the_container.tar" the_container
+
+    - name: Load container from cache
+      if: steps.container-cache.outputs.cache-hit == 'true'
+      run: docker load -i "~/docker_${TOOLCHAIN_NAME}/the_container.tar"
+
+    - name: Meson Build
+      run: |
+        docker run --rm --platform=linux/${ARCH} -e "TERM=xterm-256color" \
+          -v $(pwd):/numpy -v /:/host the_container \
+          /bin/script -e -q -c "/bin/bash --noprofile --norc -eo pipefail -c '
+            cd /numpy/ && spin build --clean -- ${MESON_OPTIONS}
+          '"
+
+    - name: Meson Log
+      if: always()
+      run: 'cat build/meson-logs/meson-log.txt'
+
+    - name: Run Tests
+      run: |
+        docker run --rm --platform=linux/${ARCH} -e "TERM=xterm-256color" \
+        -v $(pwd):/numpy -v /:/host the_container \
+        /bin/script -e -q -c "/bin/bash --noprofile --norc -eo pipefail -c '
+          cd /numpy && spin test -- -k \"${RUNTIME_TEST_FILTER}\"
+        '"
diff --git a/meson.options b/meson.options
@@ -35,6 +35,7 @@ option('test-simd', type: 'array',
          'VSX', 'VSX2', 'VSX3', 'VSX4',
          'NEON', 'ASIMD',
          'VX', 'VXE', 'VXE2',
+          'LSX',
        ],
        description: 'Specify a list of CPU features to be tested against NumPy SIMD interface')
 option('test-simd-args', type: 'string', value: '',

diff --git a/meson_cpu/loongarch64/meson.build b/meson_cpu/loongarch64/meson.build
@@ -0,0 +1,8 @@
+source_root = meson.project_source_root()
+mod_features = import('features')
+
+LSX = mod_features.new(
+  'LSX', 1, args: ['-mlsx'],
+  test_code: files(source_root + '/numpy/distutils/checks/cpu_lsx.c')[0]
+)
+LOONGARCH64_FEATURES = {'LSX': LSX}
diff --git a/meson_cpu/main_config.h.in b/meson_cpu/main_config.h.in
@@ -389,4 +389,8 @@
 #ifdef @P@HAVE_RVV
    #include <riscv_vector.h>
 #endif
+
+#ifdef @P@HAVE_LSX
+    #include <lsxintrin.h>
+#endif
 #endif // @P@_CPU_DISPATCHER_CONF_H_
diff --git a/meson_cpu/meson.build b/meson_cpu/meson.build
@@ -76,13 +76,15 @@ subdir('ppc64')
 subdir('s390x')
 subdir('arm')
 subdir('riscv64')
+subdir('loongarch64')

 CPU_FEATURES = {}
 CPU_FEATURES += ARM_FEATURES
 CPU_FEATURES += X86_FEATURES
 CPU_FEATURES += PPC64_FEATURES
 CPU_FEATURES += S390X_FEATURES
 CPU_FEATURES += RV64_FEATURES
+CPU_FEATURES += LOONGARCH64_FEATURES

 # Parse the requested baseline (CPU_CONF_BASELINE) and dispatch features
 # (CPU_CONF_DISPATCH).
@@ -97,6 +99,7 @@ min_features = {
  'aarch64': [ASIMD],
  'riscv64': [],
  'wasm32': [],
+  'loongarch64': [LSX],
 }.get(cpu_family, [])
 if host_machine.endian() == 'little' and cpu_family == 'ppc64'
  min_features = [VSX2]
@@ -112,6 +115,7 @@ max_features_dict = {
  'aarch64': ARM_FEATURES,
  'riscv64': RV64_FEATURES,
  'wasm32': {},
+  'loongarch64': LOONGARCH64_FEATURES,
 }.get(cpu_family, {})
 max_features = []
 foreach fet_name, fet_obj : max_features_dict

diff --git a/numpy/_core/include/numpy/npy_cpu.h b/numpy/_core/include/numpy/npy_cpu.h
@@ -109,8 +109,8 @@
    #elif __riscv_xlen == 32
 	#define NPY_CPU_RISCV32
    #endif
-#elif defined(__loongarch__)
-    #define NPY_CPU_LOONGARCH
+#elif defined(__loongarch64)
+    #define NPY_CPU_LOONGARCH64
 #elif defined(__EMSCRIPTEN__)
    /* __EMSCRIPTEN__ is defined by emscripten: an LLVM-to-Web compiler */
    #define NPY_CPU_WASM

diff --git a/numpy/_core/meson.build b/numpy/_core/meson.build
@@ -97,6 +97,10 @@ if use_svml
  endif
 endif

+if host_machine.cpu_family() == 'loongarch64'
+  add_project_arguments(['-DHWY_COMPILE_ONLY_SCALAR'], language: ['cpp'])
+endif
+
 use_highway = not get_option('disable-highway')
 if use_highway and not fs.exists('src/highway/README.md')
  error('Missing the `highway` git submodule! Run `git submodule update --init` to fix this.')
@@ -880,6 +884,7 @@ foreach gen_mtargets : [
      ASIMD, NEON,
      VSX3, VSX2,
      VXE, VX,
+      LSX,
    ]
  ],
  [
@@ -890,6 +895,7 @@ foreach gen_mtargets : [
      NEON,
      VSX4, VSX2,
      VX,
+      LSX,
    ]
  ],
  [
@@ -900,6 +906,7 @@ foreach gen_mtargets : [
      VSX3, VSX2,
      NEON,
      VXE, VX,
+      LSX,
    ]
  ],
  [
@@ -916,7 +923,8 @@ foreach gen_mtargets : [
      AVX512_SKX, [AVX2, FMA3],
      VSX4, VSX2,
      NEON_VFPV4,
-      VXE
+      VXE,
+      LSX,
    ]
  ],
  [
@@ -927,6 +935,7 @@ foreach gen_mtargets : [
      AVX512_SKX, AVX2, SSE2,
      VSX2,
      VX,
+      LSX,
    ]
  ],
  [
@@ -937,6 +946,7 @@ foreach gen_mtargets : [
      AVX512_SKX, AVX2, SSE2,
      VSX2,
      VXE, VX,
+      LSX,
    ]
  ],
  [
@@ -954,6 +964,7 @@ foreach gen_mtargets : [
      VSX4, VSX3, VSX2,
      NEON_VFPV4,
      VXE2, VXE,
+      LSX,
    ]
  ],
  [
@@ -968,7 +979,8 @@ foreach gen_mtargets : [
      ASIMD, NEON,
      AVX512_SKX, AVX2, SSE2,
      VSX2,
-      VXE, VX
+      VXE, VX,
+      LSX,
    ]
  ],
  [
@@ -978,7 +990,8 @@ foreach gen_mtargets : [
      SSE41, SSE2,
      VSX2,
      ASIMD, NEON,
-      VXE, VX
+      VXE, VX,
+      LSX,
    ]
  ],
  [
@@ -988,6 +1001,7 @@ foreach gen_mtargets : [
      SSE41, SSE2,
      VSX2,
      ASIMD, NEON,
+      LSX,
    ]
  ],
  [
@@ -998,6 +1012,7 @@ foreach gen_mtargets : [
      ASIMD, NEON,
      VSX3, VSX2,
      VXE, VX,
+      LSX,
    ]
  ],
  [
@@ -1008,6 +1023,7 @@ foreach gen_mtargets : [
      NEON,
      VSX2,
      VX,
+      LSX,
    ]
  ],
 ]

diff --git a/numpy/_core/src/_simd/_simd.dispatch.c.src b/numpy/_core/src/_simd/_simd.dispatch.c.src
@@ -30,7 +30,7 @@
 * #ncont_sup = 0,  0,  0,   0,   1,   1,   1,   1,   1,   1#
 * #intdiv_sup= 1,  1,  1,   1,   1,   1,   1,   1,   0,   0#
 * #shl_imm   = 0,  0,  15,  15,  31,  31,  63,  63,  0,   0#
- * #shr_imm   = 0,  0,  16,  16,  32,  32,  64,  64,  0,   0#
+ * #shr_imm   = 0,  0,  15,  15,  31,  31,  63,  63,  0,   0#
 * #bitw8b_sup= 1,  0,  0,   0,   0,   0,   0,   0,   0,   0#
 */
 #if @simd_sup@

diff --git a/numpy/_core/src/_simd/_simd_easyintrin.inc b/numpy/_core/src/_simd/_simd_easyintrin.inc
@@ -243,15 +243,13 @@
    NPY_EXPAND(FN(8,  __VA_ARGS__))

 #define SIMD__IMPL_COUNT_15(FN, ...)     \
-    NPY_EXPAND(FN(0,  __VA_ARGS__))      \
    SIMD__IMPL_COUNT_15_(FN, __VA_ARGS__)

 #define SIMD__IMPL_COUNT_16(FN, ...)      \
    SIMD__IMPL_COUNT_15_(FN, __VA_ARGS__) \
    NPY_EXPAND(FN(16,  __VA_ARGS__))

 #define SIMD__IMPL_COUNT_31(FN, ...)     \
-    NPY_EXPAND(FN(0,  __VA_ARGS__))      \
    SIMD__IMPL_COUNT_31_(FN, __VA_ARGS__)

 #define SIMD__IMPL_COUNT_32(FN, ...)      \
@@ -267,7 +265,6 @@
    NPY_EXPAND(FN(48,  __VA_ARGS__))

 #define SIMD__IMPL_COUNT_63(FN, ...)     \
-    NPY_EXPAND(FN(0,  __VA_ARGS__))      \
    SIMD__IMPL_COUNT_63_(FN, __VA_ARGS__)

 #define SIMD__IMPL_COUNT_64(FN, ...)      \

diff --git a/numpy/_core/src/common/npy_cpu_features.c b/numpy/_core/src/common/npy_cpu_features.c
@@ -125,7 +125,8 @@ static struct {
                {NPY_CPU_FEATURE_ASIMDDP, "ASIMDDP"},
                {NPY_CPU_FEATURE_ASIMDFHM, "ASIMDFHM"},
                {NPY_CPU_FEATURE_SVE, "SVE"},
-                {NPY_CPU_FEATURE_RVV, "RVV"}};
+                {NPY_CPU_FEATURE_RVV, "RVV"},
+                {NPY_CPU_FEATURE_LSX, "LSX"}};


 NPY_VISIBILITY_HIDDEN PyObject *
@@ -665,6 +666,25 @@ npy__cpu_init_features(void)
    npy__cpu_have[NPY_CPU_FEATURE_VX]  = 1;
 }

+/***************** LoongArch ******************/
+
+#elif defined(__loongarch64)
+
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+
+static void
+npy__cpu_init_features(void)
+{
+   memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX);
+   unsigned int hwcap = getauxval(AT_HWCAP);
+
+   if ((hwcap & HWCAP_LOONGARCH_LSX)) {
+      npy__cpu_have[NPY_CPU_FEATURE_LSX]  = 1;
+      return;
+   }
+}
+

 /***************** ARM ******************/


diff --git a/numpy/_core/src/common/npy_cpu_features.h b/numpy/_core/src/common/npy_cpu_features.h
@@ -91,7 +91,7 @@ enum npy_cpu_features

    // IBM/ZARCH
    NPY_CPU_FEATURE_VX                = 350,
- 
+
    // Vector-Enhancements Facility 1
    NPY_CPU_FEATURE_VXE               = 351,

@@ -101,6 +101,9 @@ enum npy_cpu_features
    // RISC-V
    NPY_CPU_FEATURE_RVV               = 400,

+    // LOONGARCH
+    NPY_CPU_FEATURE_LSX               = 500,
+
    NPY_CPU_FEATURE_MAX
 };

@@ -113,7 +116,7 @@ enum npy_cpu_features
 *  - uses 'NPY_DISABLE_CPU_FEATURES' to disable dispatchable features
 *  - uses 'NPY_ENABLE_CPU_FEATURES' to enable dispatchable features
 *
- * It will set a RuntimeError when 
+ * It will set a RuntimeError when
 *  - CPU baseline features from the build are not supported at runtime
 *  - 'NPY_DISABLE_CPU_FEATURES' tries to disable a baseline feature
 *  - 'NPY_DISABLE_CPU_FEATURES' and 'NPY_ENABLE_CPU_FEATURES' are
@@ -122,14 +125,14 @@ enum npy_cpu_features
 *    by the machine or build
 *  - 'NPY_ENABLE_CPU_FEATURES' tries to enable a feature when the project was
 *    not built with any feature optimization support
- *  
+ *
 * It will set an ImportWarning when:
 *  - 'NPY_DISABLE_CPU_FEATURES' tries to disable a feature that is not supported
 *    by the machine or build
 *  - 'NPY_DISABLE_CPU_FEATURES' or 'NPY_ENABLE_CPU_FEATURES' tries to
 *    disable/enable a feature when the project was not built with any feature
 *    optimization support
- * 
+ *
 * return 0 on success otherwise return -1
 */
 NPY_VISIBILITY_HIDDEN int