Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

[RISCV] Support LLVM IR intrinsics for XAndesVDot #140223

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
Loading
from

Conversation

tclin914
Copy link
Contributor

@llvmbot
Copy link
Member

llvmbot commented May 16, 2025

@llvm/pr-subscribers-backend-risc-v

@llvm/pr-subscribers-llvm-ir

Author: Jim Lin (tclin914)

Changes

This patch adds LLVM IR intrinsic support for XAndesVDot similiar to #139860.

The document for the intrinsics can be found at:
https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#andes-vector-dot-product-extensionxandesvdot
and with policy variants
https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/policy_funcs/intrinsic_funcs.adoc#andes-vector-dot-product-extensionxandesvdot

The clang part will be added in a later patch.


Patch is 49.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140223.diff

5 Files Affected:

  • (modified) llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td (+5)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td (+50)
  • (added) llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dots.ll (+405)
  • (added) llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dotsu.ll (+405)
  • (added) llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dotu.ll (+405)
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
index d90fe2cd0e6f3..270066f815d8b 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
@@ -14,4 +14,9 @@ let TargetPrefix = "riscv" in {
   // Andes Vector Packed FP16 Extension
   defm nds_vfpmadt : RISCVBinaryAAXRoundingMode;
   defm nds_vfpmadb : RISCVBinaryAAXRoundingMode;
+
+  // Andes Vector Dot Product Extension
+  defm nds_vd4dots  : RISCVTernaryWide;
+  defm nds_vd4dotu  : RISCVTernaryWide;
+  defm nds_vd4dotsu : RISCVTernaryWide;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
index 6afe88b805d35..4e24a2e062635 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
@@ -388,6 +388,21 @@ multiclass VPatVFPMADBinaryV_VX_RM<string intrinsic, string instruction,
   }
 }
 
+def VD4DOT_M1 : LMULInfo<0b000,  8,   VR, VR,   VR,   VR,   VR, "M1">;
+def VD4DOT_M2 : LMULInfo<0b001, 16, VRM2, VRM2, VR,   VR,   VR, "M2">;
+def VD4DOT_M4 : LMULInfo<0b010, 32, VRM4, VRM4, VRM2, VR,   VR, "M4">;
+def VD4DOT_M8 : LMULInfo<0b011, 64, VRM8, VRM8, VRM4, VRM2, VR, "M8">;
+
+defvar MxListVD4DOT = [V_MF2, VD4DOT_M1, VD4DOT_M2, VD4DOT_M4, VD4DOT_M8];
+
+multiclass VPseudoVD4DOT_VV {
+  foreach m = MxListVD4DOT in {
+    defm "" : VPseudoBinaryV_VV<m>,
+              SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", m.MX,
+                          forcePassthruRead=true>;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // XAndesPerf
 //===----------------------------------------------------------------------===//
@@ -499,3 +514,38 @@ defm PseudoNDS_VFPMADB : VPseudoVFPMAD_VF_RM;
 
 defm : VPatVFPMADBinaryV_VX_RM<"int_riscv_nds_vfpmadt", "PseudoNDS_VFPMADT", AllFP16Vectors>;
 defm : VPatVFPMADBinaryV_VX_RM<"int_riscv_nds_vfpmadb", "PseudoNDS_VFPMADB", AllFP16Vectors>;
+
+let Predicates = [HasVendorXAndesVDot] in {
+defm PseudoNDS_VD4DOTS  : VPseudoVD4DOT_VV;
+defm PseudoNDS_VD4DOTU  : VPseudoVD4DOT_VV;
+defm PseudoNDS_VD4DOTSU : VPseudoVD4DOT_VV;
+}
+
+defset list<VTypeInfoToWide> AllQuadWidenableVD4DOTVectors = {
+  def : VTypeInfoToWide<VI8MF2,  VI32MF2>;
+  def : VTypeInfoToWide<VI8M1,   VI32M1>;
+  def : VTypeInfoToWide<VI8M2,   VI32M2>;
+  def : VTypeInfoToWide<VI8M4,   VI32M4>;
+  def : VTypeInfoToWide<VI8M8,   VI32M8>;
+  def : VTypeInfoToWide<VI16M1,  VI64M1>;
+  def : VTypeInfoToWide<VI16M2,  VI64M2>;
+  def : VTypeInfoToWide<VI16M4,  VI64M4>;
+  def : VTypeInfoToWide<VI16M8,  VI64M8>;
+}
+
+multiclass VPatTernaryVD4DOT_VV<string intrinsic, string instruction,
+                                list<VTypeInfoToWide> vtilist> {
+  foreach vtiToWti = vtilist in {
+    defvar vti = vtiToWti.Vti;
+    defvar wti = vtiToWti.Wti;
+    let Predicates = GetVTypePredicates<wti>.Predicates in
+    defm : VPatTernaryWithPolicy<intrinsic, instruction, "VV",
+                                 wti.Vector, vti.Vector, vti.Vector,
+                                 wti.Mask, wti.Log2SEW, vti.LMul,
+                                 wti.RegClass, vti.RegClass, vti.RegClass>;
+  }
+}
+
+defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dots", "PseudoNDS_VD4DOTS", AllQuadWidenableVD4DOTVectors>;
+defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dotu", "PseudoNDS_VD4DOTU", AllQuadWidenableVD4DOTVectors>;
+defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dotsu", "PseudoNDS_VD4DOTSU", AllQuadWidenableVD4DOTVectors>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dots.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dots.ll
new file mode 100644
index 0000000000000..bc839899854b5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dots.ll
@@ -0,0 +1,405 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve64x,+xandesvdot \
+; RUN:   -verify-machineinstrs -target-abi=ilp32 | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve64x,+xandesvdot \
+; RUN:   -verify-machineinstrs -target-abi=lp64 | FileCheck %s
+
+declare <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.nxv1i32.nxv4i8.nxv4i8(
+  <vscale x 1 x i32>,
+  <vscale x 4 x i8>,
+  <vscale x 4 x i8>,
+  iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_vd4dots_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv1i32_nxv4i8_nxv4i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v9, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.nxv1i32.nxv4i8.nxv4i8(
+    <vscale x 1 x i32> %0,
+    <vscale x 4 x i8> %1,
+    <vscale x 4 x i8> %2,
+    iXLen %3, iXLen 2)
+  ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8(
+  <vscale x 1 x i32>,
+  <vscale x 4 x i8>,
+  <vscale x 4 x i8>,
+  <vscale x 1 x i1>,
+  iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_vd4dots_mask_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 1 x i1>%3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv1i32_nxv4i8_nxv4i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8(
+    <vscale x 1 x i32> %0,
+    <vscale x 4 x i8> %1,
+    <vscale x 4 x i8> %2,
+    <vscale x 1 x i1> %3,
+    iXLen %4, iXLen 2)
+  ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.nxv2i32.nxv8i8.nxv8i8(
+  <vscale x 2 x i32>,
+  <vscale x 8 x i8>,
+  <vscale x 8 x i8>,
+  iXLen, iXLen);
+
+define <vscale x 2 x i32> @intrinsic_vd4dots_vv_nxv2i32_nxv8i8_nxv8i8(<vscale x 2 x i32> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv2i32_nxv8i8_nxv8i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v9, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.nxv2i32.nxv8i8.nxv8i8(
+    <vscale x 2 x i32> %0,
+    <vscale x 8 x i8> %1,
+    <vscale x 8 x i8> %2,
+    iXLen %3, iXLen 2)
+  ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8(
+  <vscale x 2 x i32>,
+  <vscale x 8 x i8>,
+  <vscale x 8 x i8>,
+  <vscale x 2 x i1>,
+  iXLen, iXLen);
+
+define <vscale x 2 x i32> @intrinsic_vd4dots_mask_vv_nxv2i32_nxv8i8_nxv8i8(<vscale x 2 x i32> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv2i32_nxv8i8_nxv8i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8(
+    <vscale x 2 x i32> %0,
+    <vscale x 8 x i8> %1,
+    <vscale x 8 x i8> %2,
+    <vscale x 2 x i1> %3,
+    iXLen %4, iXLen 2)
+  ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.nxv4i32.nxv16i8.nxv16i8(
+  <vscale x 4 x i32>,
+  <vscale x 16 x i8>,
+  <vscale x 16 x i8>,
+  iXLen, iXLen);
+
+define <vscale x 4 x i32> @intrinsic_vd4dots_vv_nxv4i32_nxv16i8_nxv16i8(<vscale x 4 x i32> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv4i32_nxv16i8_nxv16i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v10, v12
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.nxv4i32.nxv16i8.nxv16i8(
+    <vscale x 4 x i32> %0,
+    <vscale x 16 x i8> %1,
+    <vscale x 16 x i8> %2,
+    iXLen %3, iXLen 2)
+  ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8(
+  <vscale x 4 x i32>,
+  <vscale x 16 x i8>,
+  <vscale x 16 x i8>,
+  <vscale x 4 x i1>,
+  iXLen, iXLen);
+
+define <vscale x 4 x i32> @intrinsic_vd4dots_mask_vv_nxv4i32_nxv16i8_nxv16i8(<vscale x 4 x i32> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv4i32_nxv16i8_nxv16i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8(
+    <vscale x 4 x i32> %0,
+    <vscale x 16 x i8> %1,
+    <vscale x 16 x i8> %2,
+    <vscale x 4 x i1> %3,
+    iXLen %4, iXLen 2)
+  ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.nxv8i32.nxv32i8.nxv32i8(
+  <vscale x 8 x i32>,
+  <vscale x 32 x i8>,
+  <vscale x 32 x i8>,
+  iXLen, iXLen);
+
+define <vscale x 8 x i32> @intrinsic_vd4dots_vv_nxv8i32_nxv32i8_nxv32i8(<vscale x 8 x i32> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv8i32_nxv32i8_nxv32i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v12, v16
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.nxv8i32.nxv32i8.nxv32i8(
+    <vscale x 8 x i32> %0,
+    <vscale x 32 x i8> %1,
+    <vscale x 32 x i8> %2,
+    iXLen %3, iXLen 2)
+  ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8(
+  <vscale x 8 x i32>,
+  <vscale x 32 x i8>,
+  <vscale x 32 x i8>,
+  <vscale x 8 x i1>,
+  iXLen, iXLen);
+
+define <vscale x 8 x i32> @intrinsic_vd4dots_mask_vv_nxv8i32_nxv32i8_nxv32i8(<vscale x 8 x i32> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv8i32_nxv32i8_nxv32i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8(
+    <vscale x 8 x i32> %0,
+    <vscale x 32 x i8> %1,
+    <vscale x 32 x i8> %2,
+    <vscale x 8 x i1> %3,
+    iXLen %4, iXLen 2)
+  ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.nxv16i32.nxv64i8.nxv64i8(
+  <vscale x 16 x i32>,
+  <vscale x 64 x i8>,
+  <vscale x 64 x i8>,
+  iXLen, iXLen);
+
+define <vscale x 16 x i32> @intrinsic_vd4dots_vv_nxv16i32_nxv64i8_nxv64i8(<vscale x 16 x i32> %0, <vscale x 64 x i8> %1, <vscale x 64 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv16i32_nxv64i8_nxv64i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vl8r.v v24, (a0)
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v16, v24
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.nxv16i32.nxv64i8.nxv64i8(
+    <vscale x 16 x i32> %0,
+    <vscale x 64 x i8> %1,
+    <vscale x 64 x i8> %2,
+    iXLen %3, iXLen 2)
+  ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8(
+  <vscale x 16 x i32>,
+  <vscale x 64 x i8>,
+  <vscale x 64 x i8>,
+  <vscale x 16 x i1>,
+  iXLen, iXLen);
+
+define <vscale x 16 x i32> @intrinsic_vd4dots_mask_vv_nxv16i32_nxv64i8_nxv64i8(<vscale x 16 x i32> %0, <vscale x 64 x i8> %1, <vscale x 64 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv16i32_nxv64i8_nxv64i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vl8r.v v24, (a0)
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v16, v24, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8(
+    <vscale x 16 x i32> %0,
+    <vscale x 64 x i8> %1,
+    <vscale x 64 x i8> %2,
+    <vscale x 16 x i1> %3,
+    iXLen %4, iXLen 2)
+  ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.nxv1i64.nxv4i16.nxv4i16(
+  <vscale x 1 x i64>,
+  <vscale x 4 x i16>,
+  <vscale x 4 x i16>,
+  iXLen, iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vd4dots_vv_nxv1i64_nxv4i16_nxv4i16(<vscale x 1 x i64> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv1i64_nxv4i16_nxv4i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v9, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.nxv1i64.nxv4i16.nxv4i16(
+    <vscale x 1 x i64> %0,
+    <vscale x 4 x i16> %1,
+    <vscale x 4 x i16> %2,
+    iXLen %3, iXLen 2)
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16(
+  <vscale x 1 x i64>,
+  <vscale x 4 x i16>,
+  <vscale x 4 x i16>,
+  <vscale x 1 x i1>,
+  iXLen, iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vd4dots_mask_vv_nxv1i64_nxv4i16_nxv4i16(<vscale x 1 x i64> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2,  <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv1i64_nxv4i16_nxv4i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16(
+    <vscale x 1 x i64> %0,
+    <vscale x 4 x i16> %1,
+    <vscale x 4 x i16> %2,
+    <vscale x 1 x i1> %3,
+    iXLen %4, iXLen 2)
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.nxv2i64.nxv8i16.nxv8i16(
+  <vscale x 2 x i64>,
+  <vscale x 8 x i16>,
+  <vscale x 8 x i16>,
+  iXLen, iXLen);
+
+define <vscale x 2 x i64> @intrinsic_vd4dots_vv_nxv2i64_nxv8i16_nxv8i16(<vscale x 2 x i64> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv2i64_nxv8i16_nxv8i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v10, v12
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.nxv2i64.nxv8i16.nxv8i16(
+    <vscale x 2 x i64> %0,
+    <vscale x 8 x i16> %1,
+    <vscale x 8 x i16> %2,
+    iXLen %3, iXLen 2)
+  ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16(
+  <vscale x 2 x i64>,
+  <vscale x 8 x i16>,
+  <vscale x 8 x i16>,
+  <vscale x 2 x i1>,
+  iXLen, iXLen);
+
+define <vscale x 2 x i64> @intrinsic_vd4dots_mask_vv_nxv2i64_nxv8i16_nxv8i16(<vscale x 2 x i64> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 2 x i1>%3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv2i64_nxv8i16_nxv8i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v10, v12, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16(
+    <vscale x 2 x i64> %0,
+    <vscale x 8 x i16> %1,
+    <vscale x 8 x i16> %2,
+    <vscale x 2 x i1> %3,
+    iXLen %4, iXLen 2)
+  ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.nxv4i64.nxv16i16.nxv16i16(
+  <vscale x 4 x i64>,
+  <vscale x 16 x i16>,
+  <vscale x 16 x i16>,
+  iXLen, iXLen);
+
+define <vscale x 4 x i64> @intrinsic_vd4dots_vv_nxv4i64_nxv16i16_nxv16i16(<vscale x 4 x i64> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv4i64_nxv16i16_nxv16i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v12, v16
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.nxv4i64.nxv16i16.nxv16i16(
+    <vscale x 4 x i64> %0,
+    <vscale x 16 x i16> %1,
+    <vscale x 16 x i16> %2,
+    iXLen %3, iXLen 2)
+  ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16(
+  <vscale x 4 x i64>,
+  <vscale x 16 x i16>,
+  <vscale x 16 x i16>,
+  <vscale x 4 x i1>,
+  iXLen, iXLen);
+
+define <vscale x 4 x i64> @intrinsic_vd4dots_mask_vv_nxv4i64_nxv16i16_nxv16i16(<vscale x 4 x i64> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv4i64_nxv16i16_nxv16i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v12, v16, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16(
+    <vscale x 4 x i64> %0,
+    <vscale x 16 x i16> %1,
+    <vscale x 16 x i16> %2,
+    <vscale x 4 x i1> %3,
+    iXLen %4, iXLen 2)
+  ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.nxv8i64.nxv32i16.nxv32i16(
+  <vscale x 8 x i64>,
+  <vscale x 32 x i16>,
+  <vscale x 32 x i16>,
+  iXLen, iXLen);
+
+define <vscale x 8 x i64> @intrinsic_vd4dots_vv_nxv8i64_nxv32i16_nxv32i16(<vscale x 8 x i64> %0, <vscale x 32 x i16> %1, <vscale x 32 x i16> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv8i64_nxv32i16_nxv32i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vl8re16.v v24, (a0)
+; CHECK-NEXT:    vsetvli zero, a1, e64, m8, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v16, v24
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.nxv8i64.nxv32i16.nxv32i16(
+    <vscale x 8 x i64> %0,
+    <vscale x 32 x i16> %1,
+    <vscale x 32 x i16> %2,
+    iXLen %3, iXLen 2)
+  ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16(
+  <vscale x 8 x i64>,
+  <vscale x 32 x i16>,
+  <vscale x 32 x i16>,
+  <vscale x 8 x i1>,
+  iXLen, iXLen);
+
+define <vscale x 8 x i64> @intrinsic_vd4dots_mask_vv_nxv8i64_nxv32i16_nxv32i16(<vscale x 8 x i64> %0, <vscale x 32 x i16> %1, <vscale x 32 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv8i64_nxv32i16_nxv32i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vl8re16.v v24, (a0)
+; CHECK-NEXT:    vsetvli zero, a1, e64, m8, tu, ma
+; CHECK-NEXT:    nds.vd4dots.vv v8, v16, v24, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = tail call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16(
+    <vscale x 8 x i64> %0,
+    <vscale x 32 x i16> %1,
+    <vscale x 32 x i16> %2,
+    <vscale x 8 x i1> %3,
+    iXLen %4, iXLen 2)
+  ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dotsu.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dotsu.ll
new file mode 100644
index 0000000000000..88eb4f297b7af
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dotsu.ll
@@ -0,0 +1,405 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve64x,+xandesvdot \
+; RUN:   -verify-machineinstrs -target-abi=ilp32 | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve64x,+xandesvdot \
+; RUN:   -verify-machineinstrs -target-abi=lp64 | FileCheck %s
+
+declare <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.nxv1i32.nxv4i8.nxv4i8(
+  <vscale x 1 x i32>,
+  <vscale x 4 x i8>,
+  <vscale x 4 x i8>,
+  iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_vd4dotsu_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dotsu_vv_nxv1i32_nxv4i8_nxv4i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, ma
+; CHECK-NEXT:    nds.vd4dotsu.vv v8, v9, v10
+; CHECK-NEXT:    ret
+entry:...
[truncated]

@@ -388,6 +388,21 @@ multiclass VPatVFPMADBinaryV_VX_RM<string intrinsic, string instruction,
}
}

def VD4DOT_M1 : LMULInfo<0b000, 8, VR, VR, VR, VR, VR, "M1">;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need new LMULInfo? Looks like the only difference is what you pass to wregclass. Does VPseudoBinaryV_VV use wvrclass?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants
Morty Proxy This is a proxified and sanitized view of the page, visit original site.