-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV] Support LLVM IR intrinsics for XAndesVDot #140223
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This patch adds LLVM IR intrinsic support for XAndesVDot similiar to llvm#139860. The document for the intrinsics can be found at: https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#andes-vector-dot-product-extensionxandesvdot and with policy variants https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/policy_funcs/intrinsic_funcs.adoc#andes-vector-dot-product-extensionxandesvdot The clang part will be added in a later patch. Co-authored-by: Tony Chuan-Yue Yuan <yuan593@andestech.com>
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-ir Author: Jim Lin (tclin914) ChangesThis patch adds LLVM IR intrinsic support for XAndesVDot similiar to #139860. The document for the intrinsics can be found at: The clang part will be added in a later patch. Patch is 49.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140223.diff 5 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
index d90fe2cd0e6f3..270066f815d8b 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
@@ -14,4 +14,9 @@ let TargetPrefix = "riscv" in {
// Andes Vector Packed FP16 Extension
defm nds_vfpmadt : RISCVBinaryAAXRoundingMode;
defm nds_vfpmadb : RISCVBinaryAAXRoundingMode;
+
+ // Andes Vector Dot Product Extension
+ defm nds_vd4dots : RISCVTernaryWide;
+ defm nds_vd4dotu : RISCVTernaryWide;
+ defm nds_vd4dotsu : RISCVTernaryWide;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
index 6afe88b805d35..4e24a2e062635 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
@@ -388,6 +388,21 @@ multiclass VPatVFPMADBinaryV_VX_RM<string intrinsic, string instruction,
}
}
+def VD4DOT_M1 : LMULInfo<0b000, 8, VR, VR, VR, VR, VR, "M1">;
+def VD4DOT_M2 : LMULInfo<0b001, 16, VRM2, VRM2, VR, VR, VR, "M2">;
+def VD4DOT_M4 : LMULInfo<0b010, 32, VRM4, VRM4, VRM2, VR, VR, "M4">;
+def VD4DOT_M8 : LMULInfo<0b011, 64, VRM8, VRM8, VRM4, VRM2, VR, "M8">;
+
+defvar MxListVD4DOT = [V_MF2, VD4DOT_M1, VD4DOT_M2, VD4DOT_M4, VD4DOT_M8];
+
+multiclass VPseudoVD4DOT_VV {
+ foreach m = MxListVD4DOT in {
+ defm "" : VPseudoBinaryV_VV<m>,
+ SchedBinary<"WriteVIALUV", "ReadVIALUV", "ReadVIALUV", m.MX,
+ forcePassthruRead=true>;
+ }
+}
+
//===----------------------------------------------------------------------===//
// XAndesPerf
//===----------------------------------------------------------------------===//
@@ -499,3 +514,38 @@ defm PseudoNDS_VFPMADB : VPseudoVFPMAD_VF_RM;
defm : VPatVFPMADBinaryV_VX_RM<"int_riscv_nds_vfpmadt", "PseudoNDS_VFPMADT", AllFP16Vectors>;
defm : VPatVFPMADBinaryV_VX_RM<"int_riscv_nds_vfpmadb", "PseudoNDS_VFPMADB", AllFP16Vectors>;
+
+let Predicates = [HasVendorXAndesVDot] in {
+defm PseudoNDS_VD4DOTS : VPseudoVD4DOT_VV;
+defm PseudoNDS_VD4DOTU : VPseudoVD4DOT_VV;
+defm PseudoNDS_VD4DOTSU : VPseudoVD4DOT_VV;
+}
+
+defset list<VTypeInfoToWide> AllQuadWidenableVD4DOTVectors = {
+ def : VTypeInfoToWide<VI8MF2, VI32MF2>;
+ def : VTypeInfoToWide<VI8M1, VI32M1>;
+ def : VTypeInfoToWide<VI8M2, VI32M2>;
+ def : VTypeInfoToWide<VI8M4, VI32M4>;
+ def : VTypeInfoToWide<VI8M8, VI32M8>;
+ def : VTypeInfoToWide<VI16M1, VI64M1>;
+ def : VTypeInfoToWide<VI16M2, VI64M2>;
+ def : VTypeInfoToWide<VI16M4, VI64M4>;
+ def : VTypeInfoToWide<VI16M8, VI64M8>;
+}
+
+multiclass VPatTernaryVD4DOT_VV<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach vtiToWti = vtilist in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = GetVTypePredicates<wti>.Predicates in
+ defm : VPatTernaryWithPolicy<intrinsic, instruction, "VV",
+ wti.Vector, vti.Vector, vti.Vector,
+ wti.Mask, wti.Log2SEW, vti.LMul,
+ wti.RegClass, vti.RegClass, vti.RegClass>;
+ }
+}
+
+defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dots", "PseudoNDS_VD4DOTS", AllQuadWidenableVD4DOTVectors>;
+defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dotu", "PseudoNDS_VD4DOTU", AllQuadWidenableVD4DOTVectors>;
+defm : VPatTernaryVD4DOT_VV<"int_riscv_nds_vd4dotsu", "PseudoNDS_VD4DOTSU", AllQuadWidenableVD4DOTVectors>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dots.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dots.ll
new file mode 100644
index 0000000000000..bc839899854b5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dots.ll
@@ -0,0 +1,405 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve64x,+xandesvdot \
+; RUN: -verify-machineinstrs -target-abi=ilp32 | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve64x,+xandesvdot \
+; RUN: -verify-machineinstrs -target-abi=lp64 | FileCheck %s
+
+declare <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.nxv1i32.nxv4i8.nxv4i8(
+ <vscale x 1 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i8>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_vd4dots_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv1i32_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.nxv1i32.nxv4i8.nxv4i8(
+ <vscale x 1 x i32> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i8> %2,
+ iXLen %3, iXLen 2)
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8(
+ <vscale x 1 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i8>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_vd4dots_mask_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 1 x i1>%3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv1i32_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 1 x i32> @llvm.riscv.nds.vd4dots.mask.nxv1i32.nxv4i8.nxv4i8(
+ <vscale x 1 x i32> %0,
+ <vscale x 4 x i8> %1,
+ <vscale x 4 x i8> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 2)
+ ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.nxv2i32.nxv8i8.nxv8i8(
+ <vscale x 2 x i32>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i8>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i32> @intrinsic_vd4dots_vv_nxv2i32_nxv8i8_nxv8i8(<vscale x 2 x i32> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv2i32_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.nxv2i32.nxv8i8.nxv8i8(
+ <vscale x 2 x i32> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i8> %2,
+ iXLen %3, iXLen 2)
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8(
+ <vscale x 2 x i32>,
+ <vscale x 8 x i8>,
+ <vscale x 8 x i8>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i32> @intrinsic_vd4dots_mask_vv_nxv2i32_nxv8i8_nxv8i8(<vscale x 2 x i32> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv2i32_nxv8i8_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 2 x i32> @llvm.riscv.nds.vd4dots.mask.nxv2i32.nxv8i8.nxv8i8(
+ <vscale x 2 x i32> %0,
+ <vscale x 8 x i8> %1,
+ <vscale x 8 x i8> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 2)
+ ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.nxv4i32.nxv16i8.nxv16i8(
+ <vscale x 4 x i32>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i8>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i32> @intrinsic_vd4dots_vv_nxv4i32_nxv16i8_nxv16i8(<vscale x 4 x i32> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv4i32_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v10, v12
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.nxv4i32.nxv16i8.nxv16i8(
+ <vscale x 4 x i32> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i8> %2,
+ iXLen %3, iXLen 2)
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8(
+ <vscale x 4 x i32>,
+ <vscale x 16 x i8>,
+ <vscale x 16 x i8>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i32> @intrinsic_vd4dots_mask_vv_nxv4i32_nxv16i8_nxv16i8(<vscale x 4 x i32> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv4i32_nxv16i8_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 4 x i32> @llvm.riscv.nds.vd4dots.mask.nxv4i32.nxv16i8.nxv16i8(
+ <vscale x 4 x i32> %0,
+ <vscale x 16 x i8> %1,
+ <vscale x 16 x i8> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 2)
+ ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.nxv8i32.nxv32i8.nxv32i8(
+ <vscale x 8 x i32>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i8>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i32> @intrinsic_vd4dots_vv_nxv8i32_nxv32i8_nxv32i8(<vscale x 8 x i32> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv8i32_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v12, v16
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.nxv8i32.nxv32i8.nxv32i8(
+ <vscale x 8 x i32> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i8> %2,
+ iXLen %3, iXLen 2)
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8(
+ <vscale x 8 x i32>,
+ <vscale x 32 x i8>,
+ <vscale x 32 x i8>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i32> @intrinsic_vd4dots_mask_vv_nxv8i32_nxv32i8_nxv32i8(<vscale x 8 x i32> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv8i32_nxv32i8_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 8 x i32> @llvm.riscv.nds.vd4dots.mask.nxv8i32.nxv32i8.nxv32i8(
+ <vscale x 8 x i32> %0,
+ <vscale x 32 x i8> %1,
+ <vscale x 32 x i8> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 2)
+ ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.nxv16i32.nxv64i8.nxv64i8(
+ <vscale x 16 x i32>,
+ <vscale x 64 x i8>,
+ <vscale x 64 x i8>,
+ iXLen, iXLen);
+
+define <vscale x 16 x i32> @intrinsic_vd4dots_vv_nxv16i32_nxv64i8_nxv64i8(<vscale x 16 x i32> %0, <vscale x 64 x i8> %1, <vscale x 64 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv16i32_nxv64i8_nxv64i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8r.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v16, v24
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.nxv16i32.nxv64i8.nxv64i8(
+ <vscale x 16 x i32> %0,
+ <vscale x 64 x i8> %1,
+ <vscale x 64 x i8> %2,
+ iXLen %3, iXLen 2)
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8(
+ <vscale x 16 x i32>,
+ <vscale x 64 x i8>,
+ <vscale x 64 x i8>,
+ <vscale x 16 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 16 x i32> @intrinsic_vd4dots_mask_vv_nxv16i32_nxv64i8_nxv64i8(<vscale x 16 x i32> %0, <vscale x 64 x i8> %1, <vscale x 64 x i8> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv16i32_nxv64i8_nxv64i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8r.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 16 x i32> @llvm.riscv.nds.vd4dots.mask.nxv16i32.nxv64i8.nxv64i8(
+ <vscale x 16 x i32> %0,
+ <vscale x 64 x i8> %1,
+ <vscale x 64 x i8> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 2)
+ ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.nxv1i64.nxv4i16.nxv4i16(
+ <vscale x 1 x i64>,
+ <vscale x 4 x i16>,
+ <vscale x 4 x i16>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vd4dots_vv_nxv1i64_nxv4i16_nxv4i16(<vscale x 1 x i64> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv1i64_nxv4i16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.nxv1i64.nxv4i16.nxv4i16(
+ <vscale x 1 x i64> %0,
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i16> %2,
+ iXLen %3, iXLen 2)
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16(
+ <vscale x 1 x i64>,
+ <vscale x 4 x i16>,
+ <vscale x 4 x i16>,
+ <vscale x 1 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vd4dots_mask_vv_nxv1i64_nxv4i16_nxv4i16(<vscale x 1 x i64> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv1i64_nxv4i16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 1 x i64> @llvm.riscv.nds.vd4dots.mask.nxv1i64.nxv4i16.nxv4i16(
+ <vscale x 1 x i64> %0,
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i16> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 2)
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.nxv2i64.nxv8i16.nxv8i16(
+ <vscale x 2 x i64>,
+ <vscale x 8 x i16>,
+ <vscale x 8 x i16>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i64> @intrinsic_vd4dots_vv_nxv2i64_nxv8i16_nxv8i16(<vscale x 2 x i64> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv2i64_nxv8i16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v10, v12
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.nxv2i64.nxv8i16.nxv8i16(
+ <vscale x 2 x i64> %0,
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i16> %2,
+ iXLen %3, iXLen 2)
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16(
+ <vscale x 2 x i64>,
+ <vscale x 8 x i16>,
+ <vscale x 8 x i16>,
+ <vscale x 2 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 2 x i64> @intrinsic_vd4dots_mask_vv_nxv2i64_nxv8i16_nxv8i16(<vscale x 2 x i64> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 2 x i1>%3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv2i64_nxv8i16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 2 x i64> @llvm.riscv.nds.vd4dots.mask.nxv2i64.nxv8i16.nxv8i16(
+ <vscale x 2 x i64> %0,
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i16> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 2)
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.nxv4i64.nxv16i16.nxv16i16(
+ <vscale x 4 x i64>,
+ <vscale x 16 x i16>,
+ <vscale x 16 x i16>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i64> @intrinsic_vd4dots_vv_nxv4i64_nxv16i16_nxv16i16(<vscale x 4 x i64> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv4i64_nxv16i16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v12, v16
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.nxv4i64.nxv16i16.nxv16i16(
+ <vscale x 4 x i64> %0,
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i16> %2,
+ iXLen %3, iXLen 2)
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16(
+ <vscale x 4 x i64>,
+ <vscale x 16 x i16>,
+ <vscale x 16 x i16>,
+ <vscale x 4 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 4 x i64> @intrinsic_vd4dots_mask_vv_nxv4i64_nxv16i16_nxv16i16(<vscale x 4 x i64> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv4i64_nxv16i16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 4 x i64> @llvm.riscv.nds.vd4dots.mask.nxv4i64.nxv16i16.nxv16i16(
+ <vscale x 4 x i64> %0,
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i16> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 2)
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.nxv8i64.nxv32i16.nxv32i16(
+ <vscale x 8 x i64>,
+ <vscale x 32 x i16>,
+ <vscale x 32 x i16>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i64> @intrinsic_vd4dots_vv_nxv8i64_nxv32i16_nxv32i16(<vscale x 8 x i64> %0, <vscale x 32 x i16> %1, <vscale x 32 x i16> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_vv_nxv8i64_nxv32i16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v16, v24
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.nxv8i64.nxv32i16.nxv32i16(
+ <vscale x 8 x i64> %0,
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i16> %2,
+ iXLen %3, iXLen 2)
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16(
+ <vscale x 8 x i64>,
+ <vscale x 32 x i16>,
+ <vscale x 32 x i16>,
+ <vscale x 8 x i1>,
+ iXLen, iXLen);
+
+define <vscale x 8 x i64> @intrinsic_vd4dots_mask_vv_nxv8i64_nxv32i16_nxv32i16(<vscale x 8 x i64> %0, <vscale x 32 x i16> %1, <vscale x 32 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vd4dots_mask_vv_nxv8i64_nxv32i16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
+; CHECK-NEXT: nds.vd4dots.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 8 x i64> @llvm.riscv.nds.vd4dots.mask.nxv8i64.nxv32i16.nxv32i16(
+ <vscale x 8 x i64> %0,
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i16> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 2)
+ ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dotsu.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dotsu.ll
new file mode 100644
index 0000000000000..88eb4f297b7af
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/xandesvdot-vd4dotsu.ll
@@ -0,0 +1,405 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve64x,+xandesvdot \
+; RUN: -verify-machineinstrs -target-abi=ilp32 | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve64x,+xandesvdot \
+; RUN: -verify-machineinstrs -target-abi=lp64 | FileCheck %s
+
+declare <vscale x 1 x i32> @llvm.riscv.nds.vd4dotsu.nxv1i32.nxv4i8.nxv4i8(
+ <vscale x 1 x i32>,
+ <vscale x 4 x i8>,
+ <vscale x 4 x i8>,
+ iXLen, iXLen);
+
+define <vscale x 1 x i32> @intrinsic_vd4dotsu_vv_nxv1i32_nxv4i8_nxv4i8(<vscale x 1 x i32> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vd4dotsu_vv_nxv1i32_nxv4i8_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
+; CHECK-NEXT: nds.vd4dotsu.vv v8, v9, v10
+; CHECK-NEXT: ret
+entry:...
[truncated]
|
@@ -388,6 +388,21 @@ multiclass VPatVFPMADBinaryV_VX_RM<string intrinsic, string instruction, | ||
} | ||
} | ||
|
||
def VD4DOT_M1 : LMULInfo<0b000, 8, VR, VR, VR, VR, VR, "M1">; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need new LMULInfo? Looks like the only difference is what you pass to wregclass
. Does VPseudoBinaryV_VV use wvrclass
?
This patch adds LLVM IR intrinsic support for XAndesVDot similiar to #139860.
The document for the intrinsics can be found at:
https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#andes-vector-dot-product-extensionxandesvdot
and with policy variants
https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/policy_funcs/intrinsic_funcs.adoc#andes-vector-dot-product-extensionxandesvdot
The clang part will be added in a later patch.