diff --git a/.clang-format b/.clang-format index 622a5bf67634..4304e0ed40d4 100644 --- a/.clang-format +++ b/.clang-format @@ -16,7 +16,7 @@ BinPackParameters: false BraceWrapping: AfterCaseLabel: true AfterClass: true - AfterControlStatement: MultiLine + AfterControlStatement: Never AfterEnum: true AfterFunction: true AfterNamespace: true diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 841f009ace89..e4fe0bcc4b2f 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -17,3 +17,6 @@ c106d91b866f4acd30226b68519b12a73a881490 # Add pygrep-hooks to pre-commit config e62718415aa3660da5f607e352c991a063a54219 + +# Bump clang-format from 12.0.1 to 22.1.0 version +c2d65bd451a7d8e5b6319147da95e9dabf7a382b diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 0745ca1ca9dc..87a7311b95e4 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -47,7 +47,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0 + uses: styfle/cancel-workflow-action@d07a454dad7609a92316b57b23c9ccfd4f59af66 # 0.13.1 with: access_token: ${{ github.token }} @@ -224,7 +224,7 @@ jobs: if: env.GH_EVENT_OPEN_PR_UPSTREAM == 'true' env: PR_NUM: ${{ github.event.number }} - uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2.8.2 + uses: mshick/add-pr-comment@ffd016c7e151d97d69d21a843022fd4cd5b96fe5 # v3.9.0.8.3.9.0 with: message-id: url_to_docs message: | @@ -268,7 +268,7 @@ jobs: git push tokened_docs gh-pages - name: Modify the comment with URL to official documentation - uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2.8.2 + uses: mshick/add-pr-comment@ffd016c7e151d97d69d21a843022fd4cd5b96fe5 # v3.9.0.8.3.9.0 with: message-id: url_to_docs find: | diff --git a/.github/workflows/check-onemath.yaml b/.github/workflows/check-onemath.yaml index 409117c692b9..acbfcac96890 100644 --- a/.github/workflows/check-onemath.yaml +++ b/.github/workflows/check-onemath.yaml @@ -34,7 +34,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0 + uses: styfle/cancel-workflow-action@d07a454dad7609a92316b57b23c9ccfd4f59af66 # 0.13.1 with: access_token: ${{ github.token }} @@ -87,7 +87,7 @@ jobs: fetch-depth: 0 - name: Download artifact - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ env.environment-file-name }} path: ${{ env.environment-file-loc }} @@ -181,7 +181,7 @@ jobs: fetch-depth: 0 - name: Download artifact - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ env.environment-file-name }} path: ${{ env.environment-file-loc }} diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index a12486300aa0..c894c530a20e 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -49,7 +49,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0 + uses: styfle/cancel-workflow-action@d07a454dad7609a92316b57b23c9ccfd4f59af66 # 0.13.1 with: access_token: ${{ github.token }} @@ -151,7 +151,7 @@ jobs: path: ${{ env.dpnp-repo-path }} - name: Download artifact - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ env.package-name }} ${{ runner.os }} Python ${{ matrix.python }} path: ${{ env.pkg-path-in-channel }} @@ -280,7 +280,7 @@ jobs: path: ${{ env.dpnp-repo-path }} - name: Download artifact - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ env.package-name }} ${{ runner.os }} Python ${{ matrix.python }} path: ${{ env.pkg-path-in-channel }} @@ -439,12 +439,12 @@ jobs: fetch-depth: ${{ env.fetch-depth }} - name: Download artifact - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ env.package-name }} ${{ runner.os }} Python ${{ matrix.python }} - name: Download wheels artifact - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ env.package-name }} ${{ runner.os }} Wheels Python ${{ matrix.python }} @@ -528,7 +528,7 @@ jobs: path: ${{ env.dpnp-repo-path }} - name: Download artifact - uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ${{ env.package-name }} ${{ runner.os }} Python ${{ env.python-ver }} path: ${{ env.pkg-path-in-channel }} @@ -654,7 +654,7 @@ jobs: - name: Post result to PR if: ${{ github.event.pull_request && !github.event.pull_request.head.repo.fork }} - uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2.8.2 + uses: mshick/add-pr-comment@ffd016c7e151d97d69d21a843022fd4cd5b96fe5 # v3.9.0.8.3.9.0 with: message-id: array_api_results message: | diff --git a/.github/workflows/cron-run-tests.yaml b/.github/workflows/cron-run-tests.yaml index f8e8394c6713..ea4fd4f14fc3 100644 --- a/.github/workflows/cron-run-tests.yaml +++ b/.github/workflows/cron-run-tests.yaml @@ -43,7 +43,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0 + uses: styfle/cancel-workflow-action@d07a454dad7609a92316b57b23c9ccfd4f59af66 # 0.13.1 with: access_token: ${{ github.token }} diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index 2cbe97ab0242..bfc3c7357a3e 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -33,7 +33,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@3155a141048f8f89c06b4cdae32e7853e97536bc # 0.13.0 + uses: styfle/cancel-workflow-action@d07a454dad7609a92316b57b23c9ccfd4f59af66 # 0.13.1 with: access_token: ${{ github.token }} diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml index 8b4cc3b93f64..5d7e0677281e 100644 --- a/.github/workflows/openssf-scorecard.yml +++ b/.github/workflows/openssf-scorecard.yml @@ -72,6 +72,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@89a39a4e59826350b863aa6b6252a07ad50cf83e # v4.32.4 + uses: github/codeql-action/upload-sarif@0d579ffd059c29b07949a3cce3983f0780820c98 # v4.32.6 with: sarif_file: results.sarif diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index d5f9f22146fc..d8f59405ce89 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -15,13 +15,6 @@ jobs: timeout-minutes: 10 steps: - - name: Set up clang-format - run: | - sudo apt-get install -y clang-format-12 - sudo unlink /usr/bin/clang-format - sudo ln -s /usr/bin/clang-format-12 /usr/bin/clang-format - clang-format --version - - name: Set up pip packages uses: BSFishy/pip-action@8f2d471d809dc20b6ada98c91910b6ae6243f318 # v1 with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 66245039ce3c..57ec9e2a2a8e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -57,19 +57,19 @@ repos: hooks: - id: pyupgrade - repo: https://github.com/codespell-project/codespell - rev: v2.4.1 + rev: v2.4.2 hooks: - id: codespell args: ["-L", "abd"] # ignore "abd" used in einsum tests additional_dependencies: - tomli - repo: https://github.com/psf/black - rev: 26.1.0 + rev: 26.3.1 hooks: - id: black exclude: "dpnp/_version.py" - repo: https://github.com/pycqa/isort - rev: 8.0.0 + rev: 8.0.1 hooks: - id: isort name: isort (python) @@ -88,13 +88,13 @@ repos: additional_dependencies: - flake8-docstrings==1.7.0 - flake8-bugbear==24.12.12 -- repo: https://github.com/pocc/pre-commit-hooks - rev: v1.3.5 +- repo: https://github.com/pre-commit/mirrors-clang-format + rev: v22.1.0 hooks: - id: clang-format args: ["-i"] - repo: https://github.com/gitleaks/gitleaks - rev: v8.30.0 + rev: v8.30.1 hooks: - id: gitleaks - repo: https://github.com/jumanjihouse/pre-commit-hooks @@ -114,7 +114,8 @@ repos: "-sn", # Don't display the score "--disable=import-error", "--disable=redefined-builtin", - "--disable=unused-wildcard-import" + "--disable=unused-wildcard-import", + "--disable=c-extension-no-member" ] files: '^dpnp/(dpnp_iface.*|fft|linalg|scipy|dpnp_array)' - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks @@ -127,7 +128,7 @@ repos: hooks: - id: actionlint - repo: https://github.com/BlankSpruce/gersemi - rev: 0.26.0 + rev: 0.26.1 hooks: - id: gersemi exclude: "dpnp/backend/cmake/Modules/" diff --git a/CHANGELOG.md b/CHANGELOG.md index 61cde1ddfefc..a742a2f4b532 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum * Added implementation of `dpnp.divmod` [#2674](https://github.com/IntelPython/dpnp/pull/2674) * Added implementation of `dpnp.isin` function [#2595](https://github.com/IntelPython/dpnp/pull/2595) * Added implementation of `dpnp.scipy.linalg.lu` (SciPy-compatible) [#2787](https://github.com/IntelPython/dpnp/pull/2787) +* Added support for ndarray subclassing via `dpnp.ndarray.view` method with `type` parameter [#2815](https://github.com/IntelPython/dpnp/issues/2815) ### Changed @@ -53,6 +54,8 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum * Changed `dpnp.partition` implementation to reuse `dpnp.sort` where it brings the performance benefit [#2766](https://github.com/IntelPython/dpnp/pull/2766) * `dpnp` uses pybind11 3.0.2 [#27734](https://github.com/IntelPython/dpnp/pull/2773) * Modified CMake files for the extension to explicitly mark DPC++ compiler and dpctl headers as system ones and so to suppress the build warning generated inside them [#2770](https://github.com/IntelPython/dpnp/pull/2770) +* Updated QR tests to avoid element-wise comparisons for `raw` and `r` modes [#2785](https://github.com/IntelPython/dpnp/pull/2785) +* Moved all SYCL kernel functors from `backend/extensions/` to a unified `backend/kernels/` directory hierarchy [#2816](https://github.com/IntelPython/dpnp/pull/2816) ### Deprecated @@ -76,6 +79,8 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum * Resolved an issue causing `dpnp.linspace` to return an incorrect output shape when inputs were passed as arrays [#2712](https://github.com/IntelPython/dpnp/pull/2712) * Resolved an issue where `dpnp` always returns the base allocation pointer, when the view start is expected [#2651](https://github.com/IntelPython/dpnp/pull/2651) * Fixed an issue causing an exception in `dpnp.geomspace` and `dpnp.logspace` when called with explicit `device` keyword but any input array is allocated on another device [#2723](https://github.com/IntelPython/dpnp/pull/2723) +* Fixed `.data.ptr` property on array views to correctly return the pointer to the view's data location instead of the base allocation pointer [#2812](https://github.com/IntelPython/dpnp/pull/2812) +* Resolved an issue with strides calculation in `dpnp.diagonal` to return correct values for empty diagonals [#2814](https://github.com/IntelPython/dpnp/pull/2814) ### Security diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json index c5e5663e21fb..3d0e7f88d55f 100644 --- a/benchmarks/asv.conf.json +++ b/benchmarks/asv.conf.json @@ -15,7 +15,9 @@ // List of branches to benchmark. If not provided, defaults to "master" // (for git) or "tip" (for mercurial). - "branches": ["HEAD"], + "branches": [ + "HEAD" + ], // The DVCS being used. If not set, it will be automatically // determined from "repo" by looking at the protocol in the URL @@ -35,7 +37,9 @@ // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. - "pythons": ["3.7"], + "pythons": [ + "3.7" + ], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty @@ -53,7 +57,6 @@ // environments in. If not provided, defaults to "env" "env_dir": "env", - // The directory (relative to the current directory) that raw benchmark // results are stored in. If not provided, defaults to "results". "results_dir": "results", @@ -79,7 +82,8 @@ // skipped for the matching benchmark. // // "regressions_first_commits": { - // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "some_benchmark": "352cdf", // Consider regressions only after this + // commit // "another_benchmark": null, // Skip regression detection altogether // } } diff --git a/dpnp/backend/extensions/blas/dot_common.hpp b/dpnp/backend/extensions/blas/dot_common.hpp index 1672e7217cba..383804ff1718 100644 --- a/dpnp/backend/extensions/blas/dot_common.hpp +++ b/dpnp/backend/extensions/blas/dot_common.hpp @@ -97,8 +97,7 @@ std::pair if (!dpctl::utils::queues_are_compatible( exec_q, - {vectorX.get_queue(), vectorY.get_queue(), result.get_queue()})) - { + {vectorX.get_queue(), vectorY.get_queue(), result.get_queue()})) { throw py::value_error( "USM allocations are not compatible with the execution queue."); } @@ -120,8 +119,8 @@ std::pair const int vectorY_typenum = vectorY.get_typenum(); const int result_typenum = result.get_typenum(); - if (result_typenum != vectorX_typenum || result_typenum != vectorY_typenum) - { + if (result_typenum != vectorX_typenum || + result_typenum != vectorY_typenum) { throw py::value_error("Given arrays must be of the same type."); } diff --git a/dpnp/backend/extensions/blas/gemm.cpp b/dpnp/backend/extensions/blas/gemm.cpp index 48c1ae98ead4..86f751baf2e0 100644 --- a/dpnp/backend/extensions/blas/gemm.cpp +++ b/dpnp/backend/extensions/blas/gemm.cpp @@ -181,8 +181,7 @@ std::tuple if (!dpctl::utils::queues_are_compatible( exec_q, - {matrixA.get_queue(), matrixB.get_queue(), resultC.get_queue()})) - { + {matrixA.get_queue(), matrixB.get_queue(), resultC.get_queue()})) { throw py::value_error( "USM allocations are not compatible with the execution queue."); } diff --git a/dpnp/backend/extensions/blas/gemm_batch.cpp b/dpnp/backend/extensions/blas/gemm_batch.cpp index a6cd7ac4e130..d02b035922c0 100644 --- a/dpnp/backend/extensions/blas/gemm_batch.cpp +++ b/dpnp/backend/extensions/blas/gemm_batch.cpp @@ -237,8 +237,7 @@ std::tuple if (!dpctl::utils::queues_are_compatible( exec_q, - {matrixA.get_queue(), matrixB.get_queue(), resultC.get_queue()})) - { + {matrixA.get_queue(), matrixB.get_queue(), resultC.get_queue()})) { throw py::value_error( "USM allocations are not compatible with the execution queue."); } diff --git a/dpnp/backend/extensions/blas/gemv.cpp b/dpnp/backend/extensions/blas/gemv.cpp index a9c5414ef8c7..0b6ae78bc76e 100644 --- a/dpnp/backend/extensions/blas/gemv.cpp +++ b/dpnp/backend/extensions/blas/gemv.cpp @@ -169,8 +169,7 @@ std::pair if (!dpctl::utils::queues_are_compatible( exec_q, - {matrixA.get_queue(), vectorX.get_queue(), vectorY.get_queue()})) - { + {matrixA.get_queue(), vectorX.get_queue(), vectorY.get_queue()})) { throw py::value_error( "USM allocations are not compatible with the execution queue."); } diff --git a/dpnp/backend/extensions/blas/syrk.cpp b/dpnp/backend/extensions/blas/syrk.cpp index 8b0ebce3d888..9668e72b57f6 100644 --- a/dpnp/backend/extensions/blas/syrk.cpp +++ b/dpnp/backend/extensions/blas/syrk.cpp @@ -248,8 +248,7 @@ std::pair } if (!dpctl::utils::queues_are_compatible( - exec_q, {matrixA.get_queue(), resultC.get_queue()})) - { + exec_q, {matrixA.get_queue(), resultC.get_queue()})) { throw py::value_error( "USM allocations are not compatible with the execution queue."); } diff --git a/dpnp/backend/extensions/common/ext/common.hpp b/dpnp/backend/extensions/common/ext/common.hpp index d626b56ea00c..f0ce1722bfb1 100644 --- a/dpnp/backend/extensions/common/ext/common.hpp +++ b/dpnp/backend/extensions/common/ext/common.hpp @@ -213,8 +213,7 @@ sycl::nd_range<1> pybind11::dtype dtype_from_typenum(int dst_typenum); template - typename factoryT, + template typename factoryT, int _num_types = type_dispatch::num_types> inline void init_dispatch_vector(dispatchT dispatch_vector[]) { @@ -223,8 +222,7 @@ inline void init_dispatch_vector(dispatchT dispatch_vector[]) } template - typename factoryT, + template typename factoryT, int _num_types = type_dispatch::num_types> inline void init_dispatch_table(dispatchT dispatch_table[][_num_types]) { diff --git a/dpnp/backend/extensions/common/ext/dispatch_table.hpp b/dpnp/backend/extensions/common/ext/dispatch_table.hpp index 4cfe1bd57250..6655f054f355 100644 --- a/dpnp/backend/extensions/common/ext/dispatch_table.hpp +++ b/dpnp/backend/extensions/common/ext/dispatch_table.hpp @@ -99,8 +99,7 @@ using SupportedDTypeList2 = std::vector; template - typename Func> + template typename Func> struct TableBuilder { template @@ -125,8 +124,7 @@ struct TableBuilder template - typename Func> + template typename Func> struct TableBuilder2 { template @@ -232,8 +230,7 @@ class DispatchTable2 } template - typename Func> + template typename Func> void populate_dispatch_table() { using TBulder = typename TableBuilder2::type; diff --git a/dpnp/backend/extensions/elementwise_functions/common.hpp b/dpnp/backend/extensions/elementwise_functions/common.hpp index df2b3afe53b9..f3b15c8d6774 100644 --- a/dpnp/backend/extensions/elementwise_functions/common.hpp +++ b/dpnp/backend/extensions/elementwise_functions/common.hpp @@ -131,8 +131,7 @@ struct UnaryTwoOutputsContigFunctor else if constexpr (enable_sg_loadstore && UnaryTwoOutputsOpT::supports_sg_loadstore::value && UnaryTwoOutputsOpT::supports_vec::value && - (vec_sz > 1)) - { + (vec_sz > 1)) { auto sg = ndit.get_sub_group(); const std::uint16_t sgSize = sg.get_max_local_range()[0]; @@ -171,8 +170,7 @@ struct UnaryTwoOutputsContigFunctor } else if constexpr (enable_sg_loadstore && UnaryTwoOutputsOpT::supports_sg_loadstore::value && - std::is_same_v) - { + std::is_same_v) { // default: use scalar-value function auto sg = ndit.get_sub_group(); @@ -214,8 +212,7 @@ struct UnaryTwoOutputsContigFunctor } } else if constexpr (enable_sg_loadstore && - UnaryTwoOutputsOpT::supports_sg_loadstore::value) - { + UnaryTwoOutputsOpT::supports_sg_loadstore::value) { // default: use scalar-value function auto sg = ndit.get_sub_group(); @@ -359,8 +356,7 @@ struct BinaryTwoOutputsContigFunctor if constexpr (enable_sg_loadstore && BinaryOperatorT::supports_sg_loadstore::value && - BinaryOperatorT::supports_vec::value && (vec_sz > 1)) - { + BinaryOperatorT::supports_vec::value && (vec_sz > 1)) { auto sg = ndit.get_sub_group(); std::uint16_t sgSize = sg.get_max_local_range()[0]; @@ -405,8 +401,7 @@ struct BinaryTwoOutputsContigFunctor } } else if constexpr (enable_sg_loadstore && - BinaryOperatorT::supports_sg_loadstore::value) - { + BinaryOperatorT::supports_sg_loadstore::value) { auto sg = ndit.get_sub_group(); const std::uint16_t sgSize = sg.get_max_local_range()[0]; @@ -528,21 +523,18 @@ struct BinaryTwoOutputsStridedFunctor * dpctl::tensor::kernels::elementwise_common namespace. */ template - class UnaryTwoOutputsType, + template class UnaryTwoOutputsType, template - class UnaryTwoOutputsContigFunctorT, + bool enable> class UnaryTwoOutputsContigFunctorT, template - class kernel_name, + std::uint8_t nv> class kernel_name, std::uint8_t vec_sz = 4u, std::uint8_t n_vecs = 2u> sycl::event @@ -576,8 +568,7 @@ sycl::event if (is_aligned(arg_p) && is_aligned(res1_p) && - is_aligned(res2_p)) - { + is_aligned(res2_p)) { static constexpr bool enable_sg_loadstore = true; using KernelName = BaseKernelName; using Impl = @@ -613,12 +604,15 @@ sycl::event * dpctl::tensor::kernels::elementwise_common namespace. */ template - class UnaryTwoOutputsType, - template - class UnaryTwoOutputsStridedFunctorT, - template - class kernel_name> + template class UnaryTwoOutputsType, + template class UnaryTwoOutputsStridedFunctorT, + template class kernel_name> sycl::event unary_two_outputs_strided_impl( sycl::queue &exec_q, std::size_t nelems, @@ -665,27 +659,25 @@ sycl::event unary_two_outputs_strided_impl( * @note It extends binary_contig_impl from * dpctl::tensor::kernels::elementwise_common namespace. */ -template - class BinaryTwoOutputsType, - template - class BinaryTwoOutputsContigFunctorT, - template - class kernel_name, - std::uint8_t vec_sz = 4u, - std::uint8_t n_vecs = 2u> +template < + typename argTy1, + typename argTy2, + template class BinaryTwoOutputsType, + template class BinaryTwoOutputsContigFunctorT, + template class kernel_name, + std::uint8_t vec_sz = 4u, + std::uint8_t n_vecs = 2u> sycl::event binary_two_outputs_contig_impl(sycl::queue &exec_q, std::size_t nelems, @@ -726,8 +718,7 @@ sycl::event if (is_aligned(arg1_tp) && is_aligned(arg2_tp) && is_aligned(res1_tp) && - is_aligned(res2_tp)) - { + is_aligned(res2_tp)) { static constexpr bool enable_sg_loadstore = true; using KernelName = BaseKernelName; using Impl = BinaryTwoOutputsContigFunctorT - class BinaryTwoOutputsType, - template - class BinaryTwoOutputsStridedFunctorT, - template - class kernel_name> +template class BinaryTwoOutputsType, + template class BinaryTwoOutputsStridedFunctorT, + template class kernel_name> sycl::event binary_two_outputs_strided_impl( sycl::queue &exec_q, std::size_t nelems, diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp index c996ac07df02..6a29c9a33c5a 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp @@ -309,8 +309,7 @@ std::pair // check that types are supported if (dst1_typeid != func_output_typeids.first || - dst2_typeid != func_output_typeids.second) - { + dst2_typeid != func_output_typeids.second) { throw py::value_error( "One of destination arrays has unexpected elemental data type."); } @@ -362,8 +361,7 @@ std::pair dpctl::tensor::overlap::SameLogicalTensors(); if ((overlap(src, dst1) && !same_logical_tensors(src, dst1)) || (overlap(src, dst2) && !same_logical_tensors(src, dst2)) || - (overlap(dst1, dst2) && !same_logical_tensors(dst1, dst2))) - { + (overlap(dst1, dst2) && !same_logical_tensors(dst1, dst2))) { throw py::value_error("Arrays index overlapping segments of memory"); } @@ -430,8 +428,7 @@ std::pair simplified_dst2_strides, src_offset, dst1_offset, dst2_offset); if (nd == 1 && simplified_src_strides[0] == 1 && - simplified_dst1_strides[0] == 1 && simplified_dst2_strides[0] == 1) - { + simplified_dst1_strides[0] == 1 && simplified_dst2_strides[0] == 1) { // Special case of contiguous data auto contig_fn = contig_dispatch_vector[src_typeid]; @@ -625,8 +622,7 @@ std::pair py_binary_ufunc( auto const &same_logical_tensors = dpctl::tensor::overlap::SameLogicalTensors(); if ((overlap(src1, dst) && !same_logical_tensors(src1, dst)) || - (overlap(src2, dst) && !same_logical_tensors(src2, dst))) - { + (overlap(src2, dst) && !same_logical_tensors(src2, dst))) { throw py::value_error("Arrays index overlapping segments of memory"); } // check memory overlap @@ -693,8 +689,7 @@ std::pair py_binary_ufunc( if ((nd == 1) && isEqual(simplified_src1_strides, unit_stride) && isEqual(simplified_src2_strides, unit_stride) && - isEqual(simplified_dst_strides, unit_stride)) - { + isEqual(simplified_dst_strides, unit_stride)) { auto contig_fn = contig_dispatch_table[src1_typeid][src2_typeid]; if (contig_fn != nullptr) { @@ -716,8 +711,7 @@ std::pair py_binary_ufunc( // special case of C-contiguous matrix and a row if (isEqual(simplified_src2_strides, zero_one_strides) && isEqual(simplified_src1_strides, {simplified_shape[1], one}) && - isEqual(simplified_dst_strides, {simplified_shape[1], one})) - { + isEqual(simplified_dst_strides, {simplified_shape[1], one})) { auto matrix_row_broadcast_fn = contig_matrix_row_broadcast_dispatch_table[src1_typeid] [src2_typeid]; @@ -731,8 +725,7 @@ std::pair py_binary_ufunc( is_aligned( src2_data + src2_offset * src2_itemsize) && is_aligned( - dst_data + dst_offset * dst_itemsize)) - { + dst_data + dst_offset * dst_itemsize)) { std::size_t n0 = simplified_shape[0]; std::size_t n1 = simplified_shape[1]; sycl::event comp_ev = matrix_row_broadcast_fn( @@ -749,8 +742,7 @@ std::pair py_binary_ufunc( } if (isEqual(simplified_src1_strides, one_zero_strides) && isEqual(simplified_src2_strides, {one, simplified_shape[0]}) && - isEqual(simplified_dst_strides, {one, simplified_shape[0]})) - { + isEqual(simplified_dst_strides, {one, simplified_shape[0]})) { auto row_matrix_broadcast_fn = contig_row_matrix_broadcast_dispatch_table[src1_typeid] [src2_typeid]; @@ -765,8 +757,7 @@ std::pair py_binary_ufunc( is_aligned( src2_data + src2_offset * src2_itemsize) && is_aligned( - dst_data + dst_offset * dst_itemsize)) - { + dst_data + dst_offset * dst_itemsize)) { std::size_t n0 = simplified_shape[1]; std::size_t n1 = simplified_shape[0]; sycl::event comp_ev = row_matrix_broadcast_fn( @@ -839,8 +830,7 @@ py::object py_binary_ufunc_result_type(const py::dtype &input1_dtype, } if (src1_typeid < 0 || src1_typeid >= td_ns::num_types || src2_typeid < 0 || - src2_typeid >= td_ns::num_types) - { + src2_typeid >= td_ns::num_types) { throw std::runtime_error("binary output type lookup failed"); } int dst_typeid = output_types_table[src1_typeid][src2_typeid]; @@ -898,8 +888,8 @@ std::pair } // check that queues are compatible - if (!dpctl::utils::queues_are_compatible(exec_q, {src1, src2, dst1, dst2})) - { + if (!dpctl::utils::queues_are_compatible(exec_q, + {src1, src2, dst1, dst2})) { throw py::value_error( "Execution queue is not compatible with allocation queues"); } @@ -955,8 +945,7 @@ std::pair (overlap(src1, dst2) && !same_logical_tensors(src1, dst2)) || (overlap(src2, dst1) && !same_logical_tensors(src2, dst1)) || (overlap(src2, dst2) && !same_logical_tensors(src2, dst2)) || - (overlap(dst1, dst2))) - { + (overlap(dst1, dst2))) { throw py::value_error("Arrays index overlapping segments of memory"); } @@ -1031,8 +1020,7 @@ std::pair if ((nd == 1) && isEqual(simplified_src1_strides, unit_stride) && isEqual(simplified_src2_strides, unit_stride) && isEqual(simplified_dst1_strides, unit_stride) && - isEqual(simplified_dst2_strides, unit_stride)) - { + isEqual(simplified_dst2_strides, unit_stride)) { auto contig_fn = contig_dispatch_table[src1_typeid][src2_typeid]; if (contig_fn != nullptr) { @@ -1107,8 +1095,7 @@ std::pair py_binary_two_outputs_ufunc_result_type( } if (src1_typeid < 0 || src1_typeid >= td_ns::num_types || src2_typeid < 0 || - src2_typeid >= td_ns::num_types) - { + src2_typeid >= td_ns::num_types) { throw std::runtime_error("binary output type lookup failed"); } std::pair dst_typeids = @@ -1263,8 +1250,7 @@ std::pair std::initializer_list{1}; if ((nd == 1) && isEqual(simplified_rhs_strides, unit_stride) && - isEqual(simplified_lhs_strides, unit_stride)) - { + isEqual(simplified_lhs_strides, unit_stride)) { auto contig_fn = contig_dispatch_table[rhs_typeid][lhs_typeid]; if (contig_fn != nullptr) { @@ -1283,8 +1269,7 @@ std::pair static constexpr py::ssize_t one{1}; // special case of C-contiguous matrix and a row if (isEqual(simplified_rhs_strides, one_zero_strides) && - isEqual(simplified_lhs_strides, {one, simplified_shape[0]})) - { + isEqual(simplified_lhs_strides, {one, simplified_shape[0]})) { auto row_matrix_broadcast_fn = contig_row_matrix_broadcast_dispatch_table[rhs_typeid] [lhs_typeid]; diff --git a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp index e34cb74fcb0a..c60602ccb01d 100644 --- a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp +++ b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp @@ -292,8 +292,7 @@ void simplify_iteration_space_4( simplified_dst_strides.reserve(nd); if ((src1_strides[0] < 0) && (src2_strides[0] < 0) && - (src3_strides[0] < 0) && (dst_strides[0] < 0)) - { + (src3_strides[0] < 0) && (dst_strides[0] < 0)) { simplified_src1_strides.push_back(-src1_strides[0]); simplified_src2_strides.push_back(-src2_strides[0]); simplified_src3_strides.push_back(-src3_strides[0]); diff --git a/dpnp/backend/extensions/fft/common.hpp b/dpnp/backend/extensions/fft/common.hpp index f76da9721316..44f0b43f8597 100644 --- a/dpnp/backend/extensions/fft/common.hpp +++ b/dpnp/backend/extensions/fft/common.hpp @@ -56,8 +56,7 @@ class DescriptorWrapper { mkl_dft::precision fft_prec = get_precision(); if (fft_prec == mkl_dft::precision::DOUBLE && - !q.get_device().has(sycl::aspect::fp64)) - { + !q.get_device().has(sycl::aspect::fp64)) { throw py::value_error("Descriptor is double precision but the " "device does not support double precision."); } @@ -66,10 +65,7 @@ class DescriptorWrapper queue_ptr_ = std::make_unique(q); } - descr_type &get_descriptor() - { - return descr_; - } + descr_type &get_descriptor() { return descr_; } const sycl::queue &get_queue() const { diff --git a/dpnp/backend/extensions/fft/out_of_place.tpp b/dpnp/backend/extensions/fft/out_of_place.tpp index 290408dc60bc..ed5cd37df7f1 100644 --- a/dpnp/backend/extensions/fft/out_of_place.tpp +++ b/dpnp/backend/extensions/fft/out_of_place.tpp @@ -82,9 +82,8 @@ std::pair } sycl::queue exec_q = descr.get_queue(); - if (!dpctl::utils::queues_are_compatible(exec_q, - {in.get_queue(), out.get_queue()})) - { + if (!dpctl::utils::queues_are_compatible( + exec_q, {in.get_queue(), out.get_queue()})) { throw py::value_error("USM allocations are not compatible with the " "execution queue of the descriptor."); } diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt index 370d59f95585..e1bc34c9ae8b 100644 --- a/dpnp/backend/extensions/indexing/CMakeLists.txt +++ b/dpnp/backend/extensions/indexing/CMakeLists.txt @@ -62,7 +62,7 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ ${CMAKE_CURRENT_SOURCE_DIR}/../common ) # treat below headers as system to suppress the warnings there during the build diff --git a/dpnp/backend/extensions/indexing/choose.cpp b/dpnp/backend/extensions/indexing/choose.cpp index 99d91744366f..3b2df73f46ef 100644 --- a/dpnp/backend/extensions/indexing/choose.cpp +++ b/dpnp/backend/extensions/indexing/choose.cpp @@ -30,41 +30,123 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include #include #include -#include "choose_kernel.hpp" +#include + #include "dpctl4pybind11.hpp" +#include +#include -// utils extension header #include "ext/common.hpp" +#include "kernels/indexing/choose.hpp" // dpctl tensor headers #include "utils/indexing_utils.hpp" #include "utils/memory_overlap.hpp" +#include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" #include "utils/sycl_alloc_utils.hpp" #include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" namespace dpnp::extensions::indexing { +namespace py = pybind11; +namespace impl +{ namespace td_ns = dpctl::tensor::type_dispatch; -static kernels::choose_fn_ptr_t choose_clip_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static kernels::choose_fn_ptr_t choose_wrap_dispatch_table[td_ns::num_types] - [td_ns::num_types]; +using dpctl::tensor::ssize_t; + +typedef sycl::event (*choose_fn_ptr_t)(sycl::queue &, + size_t, + ssize_t, + int, + const ssize_t *, + const char *, + char *, + char **, + ssize_t, + ssize_t, + const ssize_t *, + const std::vector &); + +static choose_fn_ptr_t choose_clip_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static choose_fn_ptr_t choose_wrap_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +template +sycl::event choose_impl(sycl::queue &q, + size_t nelems, + ssize_t n_chcs, + int nd, + const ssize_t *shape_and_strides, + const char *ind_cp, + char *dst_cp, + char **chcs_cp, + ssize_t ind_offset, + ssize_t dst_offset, + const ssize_t *chc_offsets, + const std::vector &depends) +{ + dpctl::tensor::type_utils::validate_type_for_device(q); -namespace py = pybind11; + const indTy *ind_tp = reinterpret_cast(ind_cp); + Ty *dst_tp = reinterpret_cast(dst_cp); -namespace detail + sycl::event choose_ev = q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + using InOutIndexerT = + dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; + const InOutIndexerT ind_out_indexer{nd, ind_offset, dst_offset, + shape_and_strides}; + + using NthChoiceIndexerT = + dpnp::kernels::choose::strides::NthStrideOffsetUnpacked; + const NthChoiceIndexerT choices_indexer{ + nd, chc_offsets, shape_and_strides, shape_and_strides + 3 * nd}; + + using ChooseFunc = + dpnp::kernels::choose::ChooseFunctor; + + cgh.parallel_for(sycl::range<1>(nelems), + ChooseFunc(ind_tp, dst_tp, chcs_cp, n_chcs, + ind_out_indexer, + choices_indexer)); + }); + + return choose_ev; +} + +template +struct ChooseFactory { + fnT get() + { + if constexpr (std::is_integral::value && + !std::is_same::value) { + fnT fn = choose_impl; + return fn; + } + else { + fnT fn = nullptr; + return fn; + } + } +}; +namespace detail +{ using host_ptrs_allocator_t = dpctl::tensor::alloc_utils::usm_host_allocator; using ptrs_t = std::vector; @@ -191,7 +273,6 @@ std::vector parse_py_chcs(const sycl::queue &q, return res; } - } // namespace detail std::pair @@ -412,23 +493,6 @@ std::pair return std::make_pair(arg_cleanup_ev, choose_generic_ev); } -template -struct ChooseFactory -{ - fnT get() - { - if constexpr (std::is_integral::value && - !std::is_same::value) { - fnT fn = kernels::choose_impl; - return fn; - } - else { - fnT fn = nullptr; - return fn; - } - } -}; - using dpctl::tensor::indexing_utils::ClipIndex; using dpctl::tensor::indexing_utils::WrapIndex; @@ -441,23 +505,22 @@ using ChooseClipFactory = ChooseFactory>; void init_choose_dispatch_tables(void) { using ext::common::init_dispatch_table; - using kernels::choose_fn_ptr_t; init_dispatch_table( choose_clip_dispatch_table); init_dispatch_table( choose_wrap_dispatch_table); } +} // namespace impl void init_choose(py::module_ m) { - dpnp::extensions::indexing::init_choose_dispatch_tables(); + impl::init_choose_dispatch_tables(); - m.def("_choose", &py_choose, "", py::arg("src"), py::arg("chcs"), + m.def("_choose", &impl::py_choose, "", py::arg("src"), py::arg("chcs"), py::arg("dst"), py::arg("mode"), py::arg("sycl_queue"), py::arg("depends") = py::list()); return; } - } // namespace dpnp::extensions::indexing diff --git a/dpnp/backend/extensions/indexing/choose_kernel.hpp b/dpnp/backend/extensions/indexing/choose_kernel.hpp deleted file mode 100644 index 6b1ac8005054..000000000000 --- a/dpnp/backend/extensions/indexing/choose_kernel.hpp +++ /dev/null @@ -1,191 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2025, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// - Neither the name of the copyright holder nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#pragma once - -#include -#include -#include -#include -#include - -#include - -#include "kernels/dpctl_tensor_types.hpp" -#include "utils/indexing_utils.hpp" -#include "utils/offset_utils.hpp" -#include "utils/strided_iters.hpp" -#include "utils/type_utils.hpp" - -namespace dpnp::extensions::indexing::strides_detail -{ - -struct NthStrideOffsetUnpacked -{ - NthStrideOffsetUnpacked(int common_nd, - dpctl::tensor::ssize_t const *_offsets, - dpctl::tensor::ssize_t const *_shape, - dpctl::tensor::ssize_t const *_strides) - : _ind(common_nd), nd(common_nd), offsets(_offsets), shape(_shape), - strides(_strides) - { - } - - template - size_t operator()(dpctl::tensor::ssize_t gid, nT n) const - { - dpctl::tensor::ssize_t relative_offset(0); - _ind.get_displacement( - gid, shape, strides + (n * nd), relative_offset); - - return relative_offset + offsets[n]; - } - -private: - dpctl::tensor::strides::CIndexer_vector _ind; - - int nd; - dpctl::tensor::ssize_t const *offsets; - dpctl::tensor::ssize_t const *shape; - dpctl::tensor::ssize_t const *strides; -}; - -static_assert(sycl::is_device_copyable_v); - -} // namespace dpnp::extensions::indexing::strides_detail - -namespace dpnp::extensions::indexing::kernels -{ - -template -class ChooseFunctor -{ -private: - const IndT *ind = nullptr; - T *dst = nullptr; - char **chcs = nullptr; - dpctl::tensor::ssize_t n_chcs; - const IndOutIndexerT ind_out_indexer; - const ChoicesIndexerT chcs_indexer; - -public: - ChooseFunctor(const IndT *ind_, - T *dst_, - char **chcs_, - dpctl::tensor::ssize_t n_chcs_, - const IndOutIndexerT &ind_out_indexer_, - const ChoicesIndexerT &chcs_indexer_) - : ind(ind_), dst(dst_), chcs(chcs_), n_chcs(n_chcs_), - ind_out_indexer(ind_out_indexer_), chcs_indexer(chcs_indexer_) - { - } - - void operator()(sycl::id<1> id) const - { - const ProjectorT proj{}; - - dpctl::tensor::ssize_t i = id[0]; - - auto ind_dst_offsets = ind_out_indexer(i); - dpctl::tensor::ssize_t ind_offset = ind_dst_offsets.get_first_offset(); - dpctl::tensor::ssize_t dst_offset = ind_dst_offsets.get_second_offset(); - - IndT chc_idx = ind[ind_offset]; - // proj produces an index in the range of n_chcs - dpctl::tensor::ssize_t projected_idx = proj(n_chcs, chc_idx); - - dpctl::tensor::ssize_t chc_offset = chcs_indexer(i, projected_idx); - - T *chc = reinterpret_cast(chcs[projected_idx]); - - dst[dst_offset] = chc[chc_offset]; - } -}; - -typedef sycl::event (*choose_fn_ptr_t)(sycl::queue &, - size_t, - dpctl::tensor::ssize_t, - int, - const dpctl::tensor::ssize_t *, - const char *, - char *, - char **, - dpctl::tensor::ssize_t, - dpctl::tensor::ssize_t, - const dpctl::tensor::ssize_t *, - const std::vector &); - -template -sycl::event choose_impl(sycl::queue &q, - size_t nelems, - dpctl::tensor::ssize_t n_chcs, - int nd, - const dpctl::tensor::ssize_t *shape_and_strides, - const char *ind_cp, - char *dst_cp, - char **chcs_cp, - dpctl::tensor::ssize_t ind_offset, - dpctl::tensor::ssize_t dst_offset, - const dpctl::tensor::ssize_t *chc_offsets, - const std::vector &depends) -{ - dpctl::tensor::type_utils::validate_type_for_device(q); - - const indTy *ind_tp = reinterpret_cast(ind_cp); - Ty *dst_tp = reinterpret_cast(dst_cp); - - sycl::event choose_ev = q.submit([&](sycl::handler &cgh) { - cgh.depends_on(depends); - - using InOutIndexerT = - dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; - const InOutIndexerT ind_out_indexer{nd, ind_offset, dst_offset, - shape_and_strides}; - - using NthChoiceIndexerT = strides_detail::NthStrideOffsetUnpacked; - const NthChoiceIndexerT choices_indexer{ - nd, chc_offsets, shape_and_strides, shape_and_strides + 3 * nd}; - - using ChooseFunc = ChooseFunctor; - - cgh.parallel_for(sycl::range<1>(nelems), - ChooseFunc(ind_tp, dst_tp, chcs_cp, n_chcs, - ind_out_indexer, - choices_indexer)); - }); - - return choose_ev; -} - -} // namespace dpnp::extensions::indexing::kernels diff --git a/dpnp/backend/extensions/lapack/evd_batch_common.hpp b/dpnp/backend/extensions/lapack/evd_batch_common.hpp index e1debdc35934..d2edffcf520a 100644 --- a/dpnp/backend/extensions/lapack/evd_batch_common.hpp +++ b/dpnp/backend/extensions/lapack/evd_batch_common.hpp @@ -75,8 +75,7 @@ std::pair expected_eig_vecs_nd, expected_eig_vals_nd); if (eig_vecs_shape[2] != eig_vals_shape[0] || - eig_vecs_shape[0] != eig_vals_shape[1]) - { + eig_vecs_shape[0] != eig_vals_shape[1]) { throw py::value_error( "The shape of 'eig_vals' must be (batch_size, n), " "where batch_size = " + diff --git a/dpnp/backend/extensions/lapack/geqrf_batch.cpp b/dpnp/backend/extensions/lapack/geqrf_batch.cpp index e0821e23e440..033c3db01b10 100644 --- a/dpnp/backend/extensions/lapack/geqrf_batch.cpp +++ b/dpnp/backend/extensions/lapack/geqrf_batch.cpp @@ -98,13 +98,13 @@ static sycl::event geqrf_batch_impl(sycl::queue &exec_q, geqrf_batch_event = mkl_lapack::geqrf_batch( exec_q, - m, // The number of rows in each matrix in the batch; (0 ≤ m). - // It must be a non-negative integer. - n, // The number of columns in each matrix in the batch; (0 ≤ n). - // It must be a non-negative integer. - a, // Pointer to the batch of matrices, each of size (m x n). - lda, // The leading dimension of each matrix in the batch. - // For row major layout, lda ≥ max(1, m). + m, // The number of rows in each matrix in the batch; (0 ≤ m). + // It must be a non-negative integer. + n, // The number of columns in each matrix in the batch; (0 ≤ n). + // It must be a non-negative integer. + a, // Pointer to the batch of matrices, each of size (m x n). + lda, // The leading dimension of each matrix in the batch. + // For row major layout, lda ≥ max(1, m). stride_a, // Stride between consecutive matrices in the batch. tau, // Pointer to the array of scalar factors of the elementary // reflectors for each matrix in the batch. diff --git a/dpnp/backend/extensions/lapack/gesv.cpp b/dpnp/backend/extensions/lapack/gesv.cpp index 0569fab2c350..bec24db585a6 100644 --- a/dpnp/backend/extensions/lapack/gesv.cpp +++ b/dpnp/backend/extensions/lapack/gesv.cpp @@ -114,14 +114,14 @@ static sycl::event gesv_impl(sycl::queue &exec_q, try { getrf_event = mkl_lapack::getrf( exec_q, - n, // The order of the square matrix A (0 ≤ n). - // It must be a non-negative integer. - n, // The number of columns in the square matrix A (0 ≤ n). - // It must be a non-negative integer. - a, // Pointer to the square matrix A (n x n). - lda, // The leading dimension of matrix A. - // It must be at least max(1, n). - ipiv, // Pointer to the output array of pivot indices. + n, // The order of the square matrix A (0 ≤ n). + // It must be a non-negative integer. + n, // The number of columns in the square matrix A (0 ≤ n). + // It must be a non-negative integer. + a, // Pointer to the square matrix A (n x n). + lda, // The leading dimension of matrix A. + // It must be at least max(1, n). + ipiv, // Pointer to the output array of pivot indices. scratchpad, // Pointer to scratchpad memory to be used by MKL // routine for storing intermediate results. scratchpad_size, depends); @@ -242,8 +242,7 @@ std::pair // Ensure `batch_size`, `n` and 'nrhs' are non-zero, otherwise return empty // events if (helper::check_zeros_shape(coeff_matrix_nd, coeff_matrix_shape) || - helper::check_zeros_shape(dependent_vals_nd, dependent_vals_shape)) - { + helper::check_zeros_shape(dependent_vals_nd, dependent_vals_shape)) { // nothing to do return std::make_pair(sycl::event(), sycl::event()); } diff --git a/dpnp/backend/extensions/lapack/gesv_batch.cpp b/dpnp/backend/extensions/lapack/gesv_batch.cpp index ce02f8517eb5..893279245344 100644 --- a/dpnp/backend/extensions/lapack/gesv_batch.cpp +++ b/dpnp/backend/extensions/lapack/gesv_batch.cpp @@ -258,10 +258,10 @@ static sycl::event gesv_batch_impl(sycl::queue &exec_q, try { gesv_event = mkl_lapack::gesv( exec_q, - n, // The order of the square matrix A - // and the number of rows in matrix B (0 ≤ n). - nrhs, // The number of right-hand sides, - // i.e., the number of columns in matrix B (0 ≤ nrhs). + n, // The order of the square matrix A + // and the number of rows in matrix B (0 ≤ n). + nrhs, // The number of right-hand sides, + // i.e., the number of columns in matrix B (0 ≤ nrhs). a_batch, // Pointer to the square coefficient matrix A (n x n). lda, // The leading dimension of a, must be at least max(1, n). current_ipiv, // The pivot indices that define the permutation @@ -341,8 +341,7 @@ std::pair // Ensure `batch_size`, `n` and 'nrhs' are non-zero, otherwise return empty // events if (helper::check_zeros_shape(coeff_matrix_nd, coeff_matrix_shape) || - helper::check_zeros_shape(dependent_vals_nd, dependent_vals_shape)) - { + helper::check_zeros_shape(dependent_vals_nd, dependent_vals_shape)) { // nothing to do return std::make_pair(sycl::event(), sycl::event()); } diff --git a/dpnp/backend/extensions/lapack/gesv_common_utils.hpp b/dpnp/backend/extensions/lapack/gesv_common_utils.hpp index d86d7e29413e..62f1e9589a0b 100644 --- a/dpnp/backend/extensions/lapack/gesv_common_utils.hpp +++ b/dpnp/backend/extensions/lapack/gesv_common_utils.hpp @@ -64,8 +64,7 @@ inline void common_gesv_checks(sycl::queue &exec_q, } if (dependent_vals_nd < min_dependent_vals_ndim || - dependent_vals_nd > max_dependent_vals_ndim) - { + dependent_vals_nd > max_dependent_vals_ndim) { throw py::value_error("The dependent values array has ndim=" + std::to_string(dependent_vals_nd) + ", but a " + std::to_string(min_dependent_vals_ndim) + @@ -95,8 +94,7 @@ inline void common_gesv_checks(sycl::queue &exec_q, // check compatibility of execution queue and allocation queue if (!dpctl::utils::queues_are_compatible(exec_q, - {coeff_matrix, dependent_vals})) - { + {coeff_matrix, dependent_vals})) { throw py::value_error( "Execution queue is not compatible with allocation queues."); } diff --git a/dpnp/backend/extensions/lapack/gesvd.cpp b/dpnp/backend/extensions/lapack/gesvd.cpp index d46179ac3b9a..e347837e3cfe 100644 --- a/dpnp/backend/extensions/lapack/gesvd.cpp +++ b/dpnp/backend/extensions/lapack/gesvd.cpp @@ -171,8 +171,7 @@ std::pair // Ensure `m` and 'n' are non-zero, otherwise return empty // events if (gesvd_utils::check_zeros_shape_gesvd(a_array, out_s, out_u, out_vt, - jobu_val, jobvt_val)) - { + jobu_val, jobvt_val)) { // nothing to do return std::make_pair(sycl::event(), sycl::event()); } @@ -223,8 +222,8 @@ struct GesvdContigFactory { fnT get() { - if constexpr (types::GesvdTypePairSupportFactory::is_defined) - { + if constexpr (types::GesvdTypePairSupportFactory::is_defined) { return gesvd_impl; } else { diff --git a/dpnp/backend/extensions/lapack/gesvd_batch.cpp b/dpnp/backend/extensions/lapack/gesvd_batch.cpp index eb9903ba6e1e..868facc200e2 100644 --- a/dpnp/backend/extensions/lapack/gesvd_batch.cpp +++ b/dpnp/backend/extensions/lapack/gesvd_batch.cpp @@ -102,8 +102,7 @@ static sycl::event gesvd_batch_impl(sycl::queue &exec_q, std::int64_t vt_size = 0; if (jobu == oneapi::mkl::jobsvd::somevec || - jobu == oneapi::mkl::jobsvd::vectorsina) - { + jobu == oneapi::mkl::jobsvd::vectorsina) { u_size = m * k; vt_size = k * n; } @@ -238,8 +237,7 @@ std::pair // Ensure `batch_size`, `m` and 'n' are non-zero, otherwise return empty // events if (gesvd_utils::check_zeros_shape_gesvd(a_array, out_s, out_u, out_vt, - jobu_val, jobvt_val)) - { + jobu_val, jobvt_val)) { // nothing to do return std::make_pair(sycl::event(), sycl::event()); } @@ -293,8 +291,8 @@ struct GesvdBatchContigFactory { fnT get() { - if constexpr (types::GesvdTypePairSupportFactory::is_defined) - { + if constexpr (types::GesvdTypePairSupportFactory::is_defined) { return gesvd_batch_impl; } else { diff --git a/dpnp/backend/extensions/lapack/gesvd_common_utils.hpp b/dpnp/backend/extensions/lapack/gesvd_common_utils.hpp index ce2d9c1eb474..1cd2c8ac4997 100644 --- a/dpnp/backend/extensions/lapack/gesvd_common_utils.hpp +++ b/dpnp/backend/extensions/lapack/gesvd_common_utils.hpp @@ -122,8 +122,7 @@ inline void common_gesvd_checks(sycl::queue &exec_q, // check compatibility of execution queue and allocation queue if (!dpctl::utils::queues_are_compatible(exec_q, - {a_array, out_s, out_u, out_vt})) - { + {a_array, out_s, out_u, out_vt})) { throw py::value_error( "Execution queue is not compatible with allocation queues."); } @@ -131,8 +130,7 @@ inline void common_gesvd_checks(sycl::queue &exec_q, auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); if (overlap(a_array, out_s) || overlap(a_array, out_u) || overlap(a_array, out_vt) || overlap(out_s, out_u) || - overlap(out_s, out_vt) || overlap(out_u, out_vt)) - { + overlap(out_s, out_vt) || overlap(out_u, out_vt)) { throw py::value_error("Arrays have overlapping segments of memory"); } diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp index abf20aff643a..870ccc8e811a 100644 --- a/dpnp/backend/extensions/lapack/getrf.cpp +++ b/dpnp/backend/extensions/lapack/getrf.cpp @@ -91,14 +91,14 @@ static sycl::event getrf_impl(sycl::queue &exec_q, getrf_event = mkl_lapack::getrf( exec_q, - m, // The number of rows in the input matrix A (0 ≤ m). - // It must be a non-negative integer. - n, // The number of columns in the input matrix A (0 ≤ n). - // It must be a non-negative integer. - a, // Pointer to the input matrix A (m x n). - lda, // The leading dimension of matrix A. - // It must be at least max(1, m). - ipiv, // Pointer to the output array of pivot indices. + m, // The number of rows in the input matrix A (0 ≤ m). + // It must be a non-negative integer. + n, // The number of columns in the input matrix A (0 ≤ n). + // It must be a non-negative integer. + a, // Pointer to the input matrix A (m x n). + lda, // The leading dimension of matrix A. + // It must be at least max(1, m). + ipiv, // Pointer to the output array of pivot indices. scratchpad, // Pointer to scratchpad memory to be used by MKL // routine for storing intermediate results. scratchpad_size, depends); diff --git a/dpnp/backend/extensions/lapack/getrs.cpp b/dpnp/backend/extensions/lapack/getrs.cpp index 8108afd97003..94e1a1027898 100644 --- a/dpnp/backend/extensions/lapack/getrs.cpp +++ b/dpnp/backend/extensions/lapack/getrs.cpp @@ -208,8 +208,7 @@ std::pair // check compatibility of execution queue and allocation queue if (!dpctl::utils::queues_are_compatible(exec_q, - {a_array, b_array, ipiv_array})) - { + {a_array, b_array, ipiv_array})) { throw py::value_error( "Execution queue is not compatible with allocation queues"); } diff --git a/dpnp/backend/extensions/lapack/getrs_batch.cpp b/dpnp/backend/extensions/lapack/getrs_batch.cpp index 9fc6ce1a5dfc..f4fb446c328d 100644 --- a/dpnp/backend/extensions/lapack/getrs_batch.cpp +++ b/dpnp/backend/extensions/lapack/getrs_batch.cpp @@ -253,8 +253,7 @@ std::pair // check compatibility of execution queue and allocation queue if (!dpctl::utils::queues_are_compatible(exec_q, - {a_array, b_array, ipiv_array})) - { + {a_array, b_array, ipiv_array})) { throw py::value_error( "Execution queue is not compatible with allocation queues"); } diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp index 5990e5344a17..96d6a03e9b8e 100644 --- a/dpnp/backend/extensions/lapack/heevd.cpp +++ b/dpnp/backend/extensions/lapack/heevd.cpp @@ -124,8 +124,8 @@ struct HeevdContigFactory { fnT get() { - if constexpr (types::HeevdTypePairSupportFactory::is_defined) - { + if constexpr (types::HeevdTypePairSupportFactory::is_defined) { return heevd_impl; } else { diff --git a/dpnp/backend/extensions/lapack/heevd_batch.cpp b/dpnp/backend/extensions/lapack/heevd_batch.cpp index e1c1a96bc320..e8614498bd41 100644 --- a/dpnp/backend/extensions/lapack/heevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/heevd_batch.cpp @@ -161,8 +161,8 @@ struct HeevdBatchContigFactory { fnT get() { - if constexpr (types::HeevdTypePairSupportFactory::is_defined) - { + if constexpr (types::HeevdTypePairSupportFactory::is_defined) { return heevd_batch_impl; } else { diff --git a/dpnp/backend/extensions/lapack/linalg_exceptions.hpp b/dpnp/backend/extensions/lapack/linalg_exceptions.hpp index d087adfbd2b6..c823d1995a4e 100644 --- a/dpnp/backend/extensions/lapack/linalg_exceptions.hpp +++ b/dpnp/backend/extensions/lapack/linalg_exceptions.hpp @@ -37,10 +37,7 @@ class LinAlgError : public std::exception public: explicit LinAlgError(const char *message) : msg_(message) {} - const char *what() const noexcept override - { - return msg_.c_str(); - } + const char *what() const noexcept override { return msg_.c_str(); } private: std::string msg_; diff --git a/dpnp/backend/extensions/lapack/orgqr_batch.cpp b/dpnp/backend/extensions/lapack/orgqr_batch.cpp index ef1c85b91f4a..a29fe9b342fc 100644 --- a/dpnp/backend/extensions/lapack/orgqr_batch.cpp +++ b/dpnp/backend/extensions/lapack/orgqr_batch.cpp @@ -100,15 +100,15 @@ static sycl::event orgqr_batch_impl(sycl::queue &exec_q, orgqr_batch_event = mkl_lapack::orgqr_batch( exec_q, - m, // The number of rows in each matrix in the batch; (0 ≤ m). - // It must be a non-negative integer. - n, // The number of columns in each matrix in the batch; (0 ≤ n). - // It must be a non-negative integer. - k, // The number of elementary reflectors - // whose product defines the matrices Qi; (0 ≤ k ≤ n). - a, // Pointer to the batch of matrices, each of size (m x n). - lda, // The leading dimension of each matrix in the batch. - // For row major layout, lda ≥ max(1, m). + m, // The number of rows in each matrix in the batch; (0 ≤ m). + // It must be a non-negative integer. + n, // The number of columns in each matrix in the batch; (0 ≤ n). + // It must be a non-negative integer. + k, // The number of elementary reflectors + // whose product defines the matrices Qi; (0 ≤ k ≤ n). + a, // Pointer to the batch of matrices, each of size (m x n). + lda, // The leading dimension of each matrix in the batch. + // For row major layout, lda ≥ max(1, m). stride_a, // Stride between consecutive matrices in the batch. tau, // Pointer to the array of scalar factors of the elementary // reflectors for each matrix in the batch. diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp index af69cf9e6b7e..3ecd386299ac 100644 --- a/dpnp/backend/extensions/lapack/syevd.cpp +++ b/dpnp/backend/extensions/lapack/syevd.cpp @@ -124,8 +124,8 @@ struct SyevdContigFactory { fnT get() { - if constexpr (types::SyevdTypePairSupportFactory::is_defined) - { + if constexpr (types::SyevdTypePairSupportFactory::is_defined) { return syevd_impl; } else { diff --git a/dpnp/backend/extensions/lapack/syevd_batch.cpp b/dpnp/backend/extensions/lapack/syevd_batch.cpp index 0c326e5d79bb..13237d27a35c 100644 --- a/dpnp/backend/extensions/lapack/syevd_batch.cpp +++ b/dpnp/backend/extensions/lapack/syevd_batch.cpp @@ -161,8 +161,8 @@ struct SyevdBatchContigFactory { fnT get() { - if constexpr (types::SyevdTypePairSupportFactory::is_defined) - { + if constexpr (types::SyevdTypePairSupportFactory::is_defined) { return syevd_batch_impl; } else { diff --git a/dpnp/backend/extensions/lapack/ungqr_batch.cpp b/dpnp/backend/extensions/lapack/ungqr_batch.cpp index 7c890d968b0a..04de27cb257c 100644 --- a/dpnp/backend/extensions/lapack/ungqr_batch.cpp +++ b/dpnp/backend/extensions/lapack/ungqr_batch.cpp @@ -100,15 +100,15 @@ static sycl::event ungqr_batch_impl(sycl::queue &exec_q, ungqr_batch_event = mkl_lapack::ungqr_batch( exec_q, - m, // The number of rows in each matrix in the batch; (0 ≤ m). - // It must be a non-negative integer. - n, // The number of columns in each matrix in the batch; (0 ≤ n). - // It must be a non-negative integer. - k, // The number of elementary reflectors - // whose product defines the matrices Qi; (0 ≤ k ≤ n). - a, // Pointer to the batch of matrices, each of size (m x n). - lda, // The leading dimension of each matrix in the batch. - // For row major layout, lda ≥ max(1, m). + m, // The number of rows in each matrix in the batch; (0 ≤ m). + // It must be a non-negative integer. + n, // The number of columns in each matrix in the batch; (0 ≤ n). + // It must be a non-negative integer. + k, // The number of elementary reflectors + // whose product defines the matrices Qi; (0 ≤ k ≤ n). + a, // Pointer to the batch of matrices, each of size (m x n). + lda, // The leading dimension of each matrix in the batch. + // For row major layout, lda ≥ max(1, m). stride_a, // Stride between consecutive matrices in the batch. tau, // Pointer to the array of scalar factors of the elementary // reflectors for each matrix in the batch. diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt index 7ccb05238ae4..36786c8cbaf3 100644 --- a/dpnp/backend/extensions/statistics/CMakeLists.txt +++ b/dpnp/backend/extensions/statistics/CMakeLists.txt @@ -67,7 +67,7 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ ${CMAKE_CURRENT_SOURCE_DIR}/../common ) # treat below headers as system to suppress the warnings there during the build diff --git a/dpnp/backend/extensions/statistics/bincount.cpp b/dpnp/backend/extensions/statistics/bincount.cpp index ba258cd55447..9bfe5c2a2449 100644 --- a/dpnp/backend/extensions/statistics/bincount.cpp +++ b/dpnp/backend/extensions/statistics/bincount.cpp @@ -59,10 +59,7 @@ struct BincountEdges { } - boundsT get_bounds() const - { - return {min, max}; - } + boundsT get_bounds() const { return {min, max}; } template size_t get_bin(const sycl::nd_item<_Dims> &, diff --git a/dpnp/backend/extensions/statistics/histogram_common.hpp b/dpnp/backend/extensions/statistics/histogram_common.hpp index 539b42475fbf..8091e8874d17 100644 --- a/dpnp/backend/extensions/statistics/histogram_common.hpp +++ b/dpnp/backend/extensions/statistics/histogram_common.hpp @@ -28,24 +28,26 @@ #pragma once +#include +#include +#include +#include + #include +#include "dpctl4pybind11.hpp" + #include "ext/common.hpp" +#include "kernels/statistics/histogram.hpp" -namespace dpctl::tensor +namespace statistics::histogram { -class usm_ndarray; -} - using dpctl::tensor::usm_ndarray; using ext::common::AtomicOp; using ext::common::IsNan; using ext::common::Less; -namespace statistics::histogram -{ - template struct CachedData { @@ -64,37 +66,28 @@ struct CachedData local_data = LocalData(shape, cgh); } - T *get_ptr() const - { - return &local_data[0]; - } + T *get_ptr() const { return &local_data[0]; } template void init(const sycl::nd_item<_Dims> &item) const { - uint32_t llid = item.get_local_linear_id(); + std::uint32_t llid = item.get_local_linear_id(); auto local_ptr = &local_data[0]; - uint32_t size = local_data.size(); + std::uint32_t size = local_data.size(); auto group = item.get_group(); - uint32_t local_size = group.get_local_linear_range(); + std::uint32_t local_size = group.get_local_linear_range(); - for (uint32_t i = llid; i < size; i += local_size) { + for (std::uint32_t i = llid; i < size; i += local_size) { local_ptr[i] = global_data[i]; } } - size_t size() const - { - return local_data.size(); - } + std::size_t size() const { return local_data.size(); } - T &operator[](const sycl::id &id) const - { - return local_data[id]; - } + T &operator[](const sycl::id &id) const { return local_data[id]; } template > - T &operator[](const size_t id) const + T &operator[](const std::size_t id) const { return local_data[id]; } @@ -119,28 +112,19 @@ struct UncachedData _shape = shape; } - T *get_ptr() const - { - return global_data; - } + T *get_ptr() const { return global_data; } template void init(const sycl::nd_item<_Dims> &) const { } - size_t size() const - { - return _shape.size(); - } + std::size_t size() const { return _shape.size(); } - T &operator[](const sycl::id &id) const - { - return global_data[id]; - } + T &operator[](const sycl::id &id) const { return global_data[id]; } template > - T &operator[](const size_t id) const + T &operator[](const std::size_t id) const { return global_data[id]; } @@ -157,15 +141,15 @@ struct HistLocalType }; template <> -struct HistLocalType +struct HistLocalType { - using type = uint32_t; + using type = std::uint32_t; }; template <> -struct HistLocalType +struct HistLocalType { - using type = int32_t; + using type = std::int32_t; }; template ::type> @@ -177,8 +161,8 @@ struct HistWithLocalCopies using LocalHist = sycl::local_accessor; HistWithLocalCopies(T *global_data, - size_t bins_count, - int32_t copies_count, + std::size_t bins_count, + std::int32_t copies_count, sycl::handler &cgh) { local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh); @@ -188,23 +172,25 @@ struct HistWithLocalCopies template void init(const sycl::nd_item<_Dims> &item, localT val = 0) const { - uint32_t llid = item.get_local_linear_id(); + std::uint32_t llid = item.get_local_linear_id(); auto *local_ptr = &local_hist[0][0]; - uint32_t size = local_hist.size(); + std::uint32_t size = local_hist.size(); auto group = item.get_group(); - uint32_t local_size = group.get_local_linear_range(); + std::uint32_t local_size = group.get_local_linear_range(); - for (uint32_t i = llid; i < size; i += local_size) { + for (std::uint32_t i = llid; i < size; i += local_size) { local_ptr[i] = val; } } template - void add(const sycl::nd_item<_Dims> &item, int32_t bin, localT value) const + void add(const sycl::nd_item<_Dims> &item, + std::int32_t bin, + localT value) const { - int32_t llid = item.get_local_linear_id(); - int32_t local_hist_count = local_hist.get_range().get(0); - int32_t local_copy_id = + std::int32_t llid = item.get_local_linear_id(); + std::int32_t local_hist_count = local_hist.get_range().get(0); + std::int32_t local_copy_id = local_hist_count == 1 ? 0 : llid % local_hist_count; AtomicOp void finalize(const sycl::nd_item<_Dims> &item) const { - uint32_t llid = item.get_local_linear_id(); - uint32_t bins_count = local_hist.get_range().get(1); - uint32_t local_hist_count = local_hist.get_range().get(0); + std::uint32_t llid = item.get_local_linear_id(); + std::uint32_t bins_count = local_hist.get_range().get(1); + std::uint32_t local_hist_count = local_hist.get_range().get(0); auto group = item.get_group(); - uint32_t local_size = group.get_local_linear_range(); + std::uint32_t local_size = group.get_local_linear_range(); - for (uint32_t i = llid; i < bins_count; i += local_size) { + for (std::uint32_t i = llid; i < bins_count; i += local_size) { auto value = local_hist[0][i]; - for (uint32_t lhc = 1; lhc < local_hist_count; ++lhc) { + for (std::uint32_t lhc = 1; lhc < local_hist_count; ++lhc) { value += local_hist[lhc][i]; } if (value != T(0)) { @@ -235,10 +221,7 @@ struct HistWithLocalCopies } } - uint32_t size() const - { - return local_hist.size(); - } + std::uint32_t size() const { return local_hist.size(); } private: LocalHist local_hist; @@ -251,10 +234,7 @@ struct HistGlobalMemory static constexpr bool const sync_after_init = false; static constexpr bool const sync_before_finalize = false; - HistGlobalMemory(T *global_data) - { - global_hist = global_data; - } + HistGlobalMemory(T *global_data) { global_hist = global_data; } template void init(const sycl::nd_item<_Dims> &) const @@ -262,7 +242,7 @@ struct HistGlobalMemory } template - void add(const sycl::nd_item<_Dims> &, int32_t bin, T value) const + void add(const sycl::nd_item<_Dims> &, std::int32_t bin, T value) const { AtomicOp::add(global_hist[bin], value); @@ -277,27 +257,18 @@ struct HistGlobalMemory T *global_hist = nullptr; }; -template +template struct NoWeights { - constexpr T get(size_t) const - { - return 1; - } + constexpr T get(std::size_t) const { return 1; } }; template struct Weights { - Weights(T *weights) - { - data = weights; - } + Weights(T *weights) { data = weights; } - T get(size_t id) const - { - return data[id]; - } + T get(std::size_t id) const { return data[id]; } private: T *data = nullptr; @@ -310,55 +281,23 @@ bool check_in_bounds(const dT &val, const dT &min, const dT &max) return !_less(val, min) && !_less(max, val) && !IsNan
::isnan(val); } -template -class histogram_kernel; - template void submit_histogram(const T *in, - const size_t size, - const size_t dims, - const uint32_t WorkPI, + const std::size_t size, + const std::size_t dims, + const std::uint32_t WorkPI, const HistImpl &hist, const Edges &edges, const Weights &weights, sycl::nd_range<1> nd_range, sycl::handler &cgh) { - cgh.parallel_for>( - nd_range, [=](sycl::nd_item<1> item) { - auto id = item.get_group_linear_id(); - auto lid = item.get_local_linear_id(); - auto group = item.get_group(); - auto local_size = item.get_local_range(0); - - hist.init(item); - edges.init(item); - - if constexpr (HistImpl::sync_after_init || Edges::sync_after_init) { - sycl::group_barrier(group, sycl::memory_scope::work_group); - } - - auto bounds = edges.get_bounds(); - - for (uint32_t i = 0; i < WorkPI; ++i) { - auto data_idx = id * WorkPI * local_size + i * local_size + lid; - if (data_idx < size) { - auto *d = &in[data_idx * dims]; - - if (edges.in_bounds(d, bounds)) { - auto bin = edges.get_bin(item, d, bounds); - auto weight = weights.get(data_idx); - hist.add(item, bin, weight); - } - } - } - - if constexpr (HistImpl::sync_before_finalize) { - sycl::group_barrier(group, sycl::memory_scope::work_group); - } + using HistogramKernel = + dpnp::kernels::histogram::HistogramFunctor; - hist.finalize(item); - }); + cgh.parallel_for( + nd_range, + HistogramKernel(in, size, dims, WorkPI, hist, edges, weights)); } void validate(const usm_ndarray &sample, @@ -366,8 +305,8 @@ void validate(const usm_ndarray &sample, const std::optional &weights, const usm_ndarray &histogram); -uint32_t get_local_hist_copies_count(uint32_t loc_mem_size_in_items, - uint32_t local_size, - uint32_t hist_size_in_items); +std::uint32_t get_local_hist_copies_count(std::uint32_t loc_mem_size_in_items, + std::uint32_t local_size, + std::uint32_t hist_size_in_items); } // namespace statistics::histogram diff --git a/dpnp/backend/extensions/statistics/histogramdd.cpp b/dpnp/backend/extensions/statistics/histogramdd.cpp index a5ed4a8c7d1c..bd2177073333 100644 --- a/dpnp/backend/extensions/statistics/histogramdd.cpp +++ b/dpnp/backend/extensions/statistics/histogramdd.cpp @@ -90,10 +90,7 @@ struct EdgesDd } } - boundsT get_bounds() const - { - return {&min[0], &max[0]}; - } + boundsT get_bounds() const { return {&min[0], &max[0]}; } auto get_bin_for_dim(const EdgesT &val, const EdgesT *edges_data, diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.hpp b/dpnp/backend/extensions/statistics/sliding_window1d.hpp index c5a5bac111dd..329c96dfc1c6 100644 --- a/dpnp/backend/extensions/statistics/sliding_window1d.hpp +++ b/dpnp/backend/extensions/statistics/sliding_window1d.hpp @@ -28,25 +28,21 @@ #pragma once -#include - -#include "utils/math_utils.hpp" -#include +#include +#include #include -#include - -#include "ext/common.hpp" +#include -using dpctl::tensor::usm_ndarray; +#include "dpctl4pybind11.hpp" -using ext::common::Align; -using ext::common::CeilDiv; +#include "kernels/statistics/sliding_window1d.hpp" namespace statistics::sliding_window1d { +using dpctl::tensor::usm_ndarray; -template +template class _RegistryDataStorage { public: @@ -129,37 +125,22 @@ class _RegistryDataStorage return sycl::shift_group_right(sbgroup, data[y], x); } - constexpr SizeT size_y() const - { - return _size; - } + constexpr SizeT size_y() const { return _size; } - SizeT size_x() const - { - return sbgroup.get_max_local_range()[0]; - } + SizeT size_x() const { return sbgroup.get_max_local_range()[0]; } - SizeT total_size() const - { - return size_x() * size_y(); - } + SizeT total_size() const { return size_x() * size_y(); } - ncT *ptr() - { - return data; - } + ncT *ptr() { return data; } - SizeT x() const - { - return sbgroup.get_local_linear_id(); - } + SizeT x() const { return sbgroup.get_local_linear_id(); } protected: const sycl::sub_group sbgroup; ncT data[Size]; }; -template +template struct RegistryData : public _RegistryDataStorage { using SizeT = typename _RegistryDataStorage::SizeT; @@ -277,8 +258,7 @@ struct RegistryData : public _RegistryDataStorage T *load(const T *const data, const bool &mask, const T &default_v) { - return load( - data, [mask](auto &&) { return mask; }, default_v); + return load(data, [mask](auto &&) { return mask; }, default_v); } T *load(const T *const data) @@ -349,13 +329,10 @@ struct RegistryData : public _RegistryDataStorage return store(data, [mask](auto &&) { return mask; }); } - T *store(T *const data) - { - return store(data, true); - } + T *store(T *const data) { return store(data, true); } }; -template +template struct RegistryWindow : public RegistryData { using SizeT = typename RegistryData::SizeT; @@ -368,7 +345,7 @@ struct RegistryWindow : public RegistryData static_assert(std::is_integral_v, "shift must be of an integral type"); - uint32_t shift_r = this->size_x() - shift; + std::uint32_t shift_r = this->size_x() - shift; for (SizeT i = 0; i < Size; ++i) { this->data[i] = this->shift_left(i, shift); auto border = @@ -379,10 +356,7 @@ struct RegistryWindow : public RegistryData } } - void advance_left(const T &fill_value) - { - advance_left(1, fill_value); - } + void advance_left(const T &fill_value) { advance_left(1, fill_value); } void advance_left() { @@ -391,7 +365,7 @@ struct RegistryWindow : public RegistryData } }; -template +template class Span { public: @@ -400,38 +374,26 @@ class Span Span(T *const data, const SizeT size) : data_(data), size_(size) {} - T *begin() const - { - return data(); - } + T *begin() const { return data(); } - T *end() const - { - return data() + size(); - } + T *end() const { return data() + size(); } - SizeT size() const - { - return size_; - } + SizeT size() const { return size_; } - T *data() const - { - return data_; - } + T *data() const { return data_; } protected: T *const data_; const SizeT size_; }; -template +template Span make_span(T *const data, const SizeT size) { return Span(data, size); } -template +template class PaddedSpan : public Span { public: @@ -443,82 +405,22 @@ class PaddedSpan : public Span { } - T *padded_begin() const - { - return this->begin() - pad(); - } + T *padded_begin() const { return this->begin() - pad(); } - SizeT pad() const - { - return pad_; - } + SizeT pad() const { return pad_; } protected: const SizeT pad_; }; -template +template PaddedSpan make_padded_span(T *const data, const SizeT size, const SizeT offset) { return PaddedSpan(data, size, offset); } -template -void process_block(Results &results, - uint32_t r_size, - AData &a_data, - VData &v_data, - uint32_t block_size, - Op op, - Red red) -{ - for (uint32_t i = 0; i < block_size; ++i) { - auto v_val = v_data.broadcast(i); - for (uint32_t r = 0; r < r_size; ++r) { - results[r] = red(results[r], op(a_data[r], v_val)); - } - a_data.advance_left(); - } -} - -template -SizeT get_global_linear_id(const uint32_t wpi, const sycl::nd_item<1> &item) -{ - auto sbgroup = item.get_sub_group(); - const auto sg_loc_id = sbgroup.get_local_linear_id(); - - const SizeT sg_base_id = wpi * (item.get_global_linear_id() - sg_loc_id); - const SizeT id = sg_base_id + sg_loc_id; - - return id; -} - -template -uint32_t get_results_num(const uint32_t wpi, - const SizeT size, - const SizeT global_id, - const sycl::nd_item<1> &item) -{ - auto sbgroup = item.get_sub_group(); - - const auto sbg_size = sbgroup.get_max_local_range()[0]; - const auto size_ = sycl::sub_sat(size, global_id); - return std::min(SizeT(wpi), CeilDiv(size_, sbg_size)); -} - -template -class sliding_window1d_kernel; - -template &a, sycl::nd_range<1> nd_range, sycl::handler &cgh) { - cgh.parallel_for>( - nd_range, [=](sycl::nd_item<1> item) { - auto glid = get_global_linear_id(WorkPI, item); - - auto results = RegistryData(item); - results.fill(0); - - auto results_num = get_results_num(WorkPI, out.size(), glid, item); - - const auto *a_begin = a.begin(); - const auto *a_end = a.end(); - - auto sbgroup = item.get_sub_group(); - - const auto chunks_count = - CeilDiv(v.size(), sbgroup.get_max_local_range()[0]); - - const auto *a_ptr = &a.padded_begin()[glid]; - - auto _a_load_cond = [a_begin, a_end](auto &&ptr) { - return ptr >= a_begin && ptr < a_end; - }; - - auto a_data = RegistryWindow(item); - a_ptr = a_data.load(a_ptr, _a_load_cond, 0); - - const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; - auto v_size = v.size(); - - for (uint32_t b = 0; b < chunks_count; ++b) { - auto v_data = RegistryData(item); - v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); - - uint32_t chunk_size_ = - std::min(v_size, SizeT(v_data.total_size())); - process_block(results, results_num, a_data, v_data, chunk_size_, - op, red); + using SlidingWindow1dKernel = + dpnp::kernels::sliding_window1d::SlidingWindow1dFunctor< + WorkPI, PaddedSpan, Span, Op, Red, + Span, RegistryData, RegistryWindow>; - if (b != chunks_count - 1) { - a_ptr = a_data.load_lane(a_data.size_y() - 1, a_ptr, - _a_load_cond, 0); - v_size -= v_data.total_size(); - } - } - - auto *const out_ptr = out.begin(); - // auto *const out_end = out.end(); - - auto y_start = glid; - auto y_stop = - std::min(y_start + WorkPI * results.size_x(), out.size()); - uint32_t i = 0; - for (uint32_t y = y_start; y < y_stop; y += results.size_x()) { - out_ptr[y] = results[i++]; - } - // while the code itself seems to be valid, inside correlate - // kernel it results in memory corruption. Further investigation - // is needed. SAT-7693 - // corruption results.store(&out_ptr[glid], - // [out_end](auto &&ptr) { return ptr < out_end; }); - }); + cgh.parallel_for( + nd_range, SlidingWindow1dKernel(a, v, op, red, out)); } -template -class sliding_window1d_small_kernel; - -template &a, sycl::nd_range<1> nd_range, sycl::handler &cgh) { - cgh.parallel_for>( - nd_range, [=](sycl::nd_item<1> item) { - auto glid = get_global_linear_id(WorkPI, item); - - auto results = RegistryData(item); - results.fill(0); - - auto sbgroup = item.get_sub_group(); - auto sg_size = sbgroup.get_max_local_range()[0]; + using SlidingWindow1dSmallKernel = + dpnp::kernels::sliding_window1d::SlidingWindow1dSmallFunctor< + WorkPI, PaddedSpan, Span, Op, Red, + Span, RegistryData, RegistryWindow>; - const uint32_t to_read = WorkPI * sg_size + v.size(); - const auto *a_begin = a.begin(); - - const auto *a_ptr = &a.padded_begin()[glid]; - const auto *a_end = std::min(a_ptr + to_read, a.end()); - - auto _a_load_cond = [a_begin, a_end](auto &&ptr) { - return ptr >= a_begin && ptr < a_end; - }; - - auto a_data = RegistryWindow(item); - a_data.load(a_ptr, _a_load_cond, 0); - - const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; - auto v_size = v.size(); - - auto v_data = RegistryData(item); - v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); - - auto results_num = get_results_num(WorkPI, out.size(), glid, item); - - process_block(results, results_num, a_data, v_data, v_size, op, - red); - - auto *const out_ptr = out.begin(); - // auto *const out_end = out.end(); - - auto y_start = glid; - auto y_stop = - std::min(y_start + WorkPI * results.size_x(), out.size()); - uint32_t i = 0; - for (uint32_t y = y_start; y < y_stop; y += results.size_x()) { - out_ptr[y] = results[i++]; - } - // while the code itself seems to be valid, inside correlate - // kernel it results in memory corruption. Further investigation - // is needed. SAT-7693 - // corruption results.store(&out_ptr[glid], - // [out_end](auto &&ptr) { return ptr < out_end; }); - }); + cgh.parallel_for( + nd_range, SlidingWindow1dSmallKernel(a, v, op, red, out)); } void validate(const usm_ndarray &a, const usm_ndarray &v, const usm_ndarray &out, - const size_t l_pad, - const size_t r_pad); + const std::size_t l_pad, + const std::size_t r_pad); } // namespace statistics::sliding_window1d diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp index 5254e50d3faf..6f10e651fe25 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/erf_funcs.cpp @@ -184,8 +184,7 @@ using ew_cmn_ns::unary_strided_impl_fn_ptr_t; }; template