-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV] Support LLVM IR intrinsics for XAndesVPackFPH #139860
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This patch adds LLVM IR intrinsic support for XAndesVPackFPH. The document for the intrinsics can be found at: https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#andes-vector-packed-fp16-extensionxandesvpackfph and with policy variants https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/policy_funcs/intrinsic_funcs.adoc#andes-vector-packed-fp16-extensionxandesvpackfph Co-authored-by: Tony Chuan-Yue Yuan <yuan593@andestech.com>
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-ir Author: Jim Lin (tclin914) ChangesThis patch adds LLVM IR intrinsic support for XAndesVPackFPH. The document for the intrinsics can be found at: The clang part will be added in a later patch. Patch is 24.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139860.diff 5 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 18b2883eb00e7..75fb41fcd381a 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1891,3 +1891,4 @@ let TargetPrefix = "riscv" in {
include "llvm/IR/IntrinsicsRISCVXTHead.td"
include "llvm/IR/IntrinsicsRISCVXsf.td"
include "llvm/IR/IntrinsicsRISCVXCV.td"
+include "llvm/IR/IntrinsicsRISCVXAndes.td"
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
new file mode 100644
index 0000000000000..d90fe2cd0e6f3
--- /dev/null
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
@@ -0,0 +1,17 @@
+//===- IntrinsicsRISCVXAndes.td - Andes intrinsics ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the Andes vendor intrinsics for RISC-V.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "riscv" in {
+ // Andes Vector Packed FP16 Extension
+ defm nds_vfpmadt : RISCVBinaryAAXRoundingMode;
+ defm nds_vfpmadb : RISCVBinaryAAXRoundingMode;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
index aa70a9d03cc1f..22b8cf4472bd2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
@@ -338,6 +338,33 @@ class NDSRVInstVFPMAD<bits<6> funct6, string opcodestr>
let RVVConstraint = VMConstraint;
}
+//===----------------------------------------------------------------------===//
+// Multiclass
+//===----------------------------------------------------------------------===//
+
+let fprclass = !cast<RegisterClass>("FPR32") in
+def SCALAR_F16_FPR32 : FPR_Info<16>;
+
+let hasSideEffects = 0 in
+multiclass VPseudoVFPMAD_VF_RM {
+ foreach m = SCALAR_F16_FPR32.MxList in {
+ defm "" : VPseudoBinaryV_VF_RM<m, SCALAR_F16_FPR32, 0>,
+ SchedBinary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
+ m.MX, SCALAR_F16_FPR32.SEW, forcePassthruRead=true>;
+ }
+}
+
+multiclass VPatVFPMADBinaryV_VX_RM<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in {
+ defvar kind = "V"#vti.ScalarSuffix;
+ defm : VPatBinaryRoundingMode<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
+ vti.Vector, vti.Vector, f32, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, FPR32>;
+ }
+}
+
//===----------------------------------------------------------------------===//
// XAndesPerf
//===----------------------------------------------------------------------===//
@@ -400,7 +427,9 @@ def NDS_VFPMADB_VF : NDSRVInstVFPMAD<0b000011, "nds.vfpmadb">;
}
} // DecoderNamespace = "XAndes"
-// Patterns
+//===----------------------------------------------------------------------===//
+// Pseudo-instructions and codegen patterns
+//===----------------------------------------------------------------------===//
let Predicates = [HasVendorXAndesPerf] in {
@@ -428,3 +457,12 @@ def : Sh1AddPat<NDS_LEA_H_ZE>;
def : Sh2AddPat<NDS_LEA_W_ZE>;
def : Sh3AddPat<NDS_LEA_D_ZE>;
} // Predicates = [HasVendorXAndesPerf, IsRV64]
+
+let Predicates = [HasVendorXAndesVPackFPH],
+ mayRaiseFPException = true in {
+defm PseudoNDS_VFPMADT : VPseudoVFPMAD_VF_RM;
+defm PseudoNDS_VFPMADB : VPseudoVFPMAD_VF_RM;
+} // Predicates = [HasVendorXAndesVPackFPH]
+
+defm : VPatVFPMADBinaryV_VX_RM<"int_riscv_nds_vfpmadt", "PseudoNDS_VFPMADT", AllFP16Vectors>;
+defm : VPatVFPMADBinaryV_VX_RM<"int_riscv_nds_vfpmadb", "PseudoNDS_VFPMADB", AllFP16Vectors>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadb.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadb.ll
new file mode 100644
index 0000000000000..feceacd90e5f0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadb.ll
@@ -0,0 +1,299 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve64x,+xandesvpackfph \
+; RUN: -verify-machineinstrs -target-abi=ilp32f | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve64x,+xandesvpackfph \
+; RUN: -verify-machineinstrs -target-abi=lp64f | FileCheck %s
+
+declare <vscale x 1 x half> @llvm.riscv.nds.vfpmadb.nxv1f16.f32(
+ <vscale x 1 x half>,
+ <vscale x 1 x half>,
+ float,
+ iXLen, iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfpmadb_vf_nxv1f16_nxv1f16_f32(<vscale x 1 x half> %0, float %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv1f16_nxv1f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 1 x half> @llvm.riscv.nds.vfpmadb.nxv1f16.f32(
+ <vscale x 1 x half> undef,
+ <vscale x 1 x half> %0,
+ float %1, iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.nds.vfpmadb.mask.nxv1f16.f32(
+ <vscale x 1 x half>,
+ <vscale x 1 x half>,
+ float,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfpmadb_mask_vf_nxv1f16_nxv1f16_f32(<vscale x 1 x half> %0, <vscale x 1 x half> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv1f16_nxv1f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 1 x half> @llvm.riscv.nds.vfpmadb.mask.nxv1f16.f32(
+ <vscale x 1 x half> %0,
+ <vscale x 1 x half> %1,
+ float %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.nds.vfpmadb.nxv2f16.f32(
+ <vscale x 2 x half>,
+ <vscale x 2 x half>,
+ float,
+ iXLen, iXLen);
+
+define <vscale x 2 x half> @intrinsic_vfpmadb_vf_nxv2f16_nxv2f16_f32(<vscale x 2 x half> %0, float %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv2f16_nxv2f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 2 x half> @llvm.riscv.nds.vfpmadb.nxv2f16.f32(
+ <vscale x 2 x half> undef,
+ <vscale x 2 x half> %0,
+ float %1, iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.nds.vfpmadb.mask.nxv2f16.f32(
+ <vscale x 2 x half>,
+ <vscale x 2 x half>,
+ float,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x half> @intrinsic_vfpmadb_mask_vf_nxv2f16_nxv2f16_f32(<vscale x 2 x half> %0, <vscale x 2 x half> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv2f16_nxv2f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 2 x half> @llvm.riscv.nds.vfpmadb.mask.nxv2f16.f32(
+ <vscale x 2 x half> %0,
+ <vscale x 2 x half> %1,
+ float %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.nds.vfpmadb.nxv4f16.f32(
+ <vscale x 4 x half>,
+ <vscale x 4 x half>,
+ float,
+ iXLen, iXLen);
+
+define <vscale x 4 x half> @intrinsic_vfpmadb_vf_nxv4f16_nxv4f16_f32(<vscale x 4 x half> %0, float %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv4f16_nxv4f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 4 x half> @llvm.riscv.nds.vfpmadb.nxv4f16.f32(
+ <vscale x 4 x half> undef,
+ <vscale x 4 x half> %0,
+ float %1, iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.nds.vfpmadb.mask.nxv4f16.f32(
+ <vscale x 4 x half>,
+ <vscale x 4 x half>,
+ float,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x half> @intrinsic_vfpmadb_mask_vf_nxv4f16_nxv4f16_f32(<vscale x 4 x half> %0, <vscale x 4 x half> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv4f16_nxv4f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 4 x half> @llvm.riscv.nds.vfpmadb.mask.nxv4f16.f32(
+ <vscale x 4 x half> %0,
+ <vscale x 4 x half> %1,
+ float %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.nds.vfpmadb.nxv8f16.f32(
+ <vscale x 8 x half>,
+ <vscale x 8 x half>,
+ float,
+ iXLen, iXLen);
+
+define <vscale x 8 x half> @intrinsic_vfpmadb_vf_nxv8f16_nxv8f16_f32(<vscale x 8 x half> %0, float %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv8f16_nxv8f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 8 x half> @llvm.riscv.nds.vfpmadb.nxv8f16.f32(
+ <vscale x 8 x half> undef,
+ <vscale x 8 x half> %0,
+ float %1, iXLen 0, iXLen %2)
+
+ ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.nds.vfpmadb.mask.nxv8f16.f32(
+ <vscale x 8 x half>,
+ <vscale x 8 x half>,
+ float,
+ <vscale x 8 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 8 x half> @intrinsic_vfpmadb_mask_vf_nxv8f16_nxv8f16_f32(<vscale x 8 x half> %0, <vscale x 8 x half> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv8f16_nxv8f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v10, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 8 x half> @llvm.riscv.nds.vfpmadb.mask.nxv8f16.f32(
+ <vscale x 8 x half> %0,
+ <vscale x 8 x half> %1,
+ float %2,
+ <vscale x 8 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.nds.vfpmadb.nxv16f16.f32(
+ <vscale x 16 x half>,
+ <vscale x 16 x half>,
+ float,
+ iXLen, iXLen);
+
+define <vscale x 16 x half> @intrinsic_vfpmadb_vf_nxv16f16_nxv16f16_f32(<vscale x 16 x half> %0, float %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv16f16_nxv16f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 16 x half> @llvm.riscv.nds.vfpmadb.nxv16f16.f32(
+ <vscale x 16 x half> undef,
+ <vscale x 16 x half> %0,
+ float %1, iXLen 0, iXLen %2)
+
+ ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.nds.vfpmadb.mask.nxv16f16.f32(
+ <vscale x 16 x half>,
+ <vscale x 16 x half>,
+ float,
+ <vscale x 16 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 16 x half> @intrinsic_vfpmadb_mask_vf_nxv16f16_nxv16f16_f32(<vscale x 16 x half> %0, <vscale x 16 x half> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv16f16_nxv16f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v12, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 16 x half> @llvm.riscv.nds.vfpmadb.mask.nxv16f16.f32(
+ <vscale x 16 x half> %0,
+ <vscale x 16 x half> %1,
+ float %2,
+ <vscale x 16 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.nds.vfpmadb.nxv32f16.f32(
+ <vscale x 32 x half>,
+ <vscale x 32 x half>,
+ float,
+ iXLen, iXLen);
+
+define <vscale x 32 x half> @intrinsic_vfpmadb_vf_nxv32f16_nxv32f16_f32(<vscale x 32 x half> %0, float %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv32f16_nxv32f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 32 x half> @llvm.riscv.nds.vfpmadb.nxv32f16.f32(
+ <vscale x 32 x half> undef,
+ <vscale x 32 x half> %0,
+ float %1, iXLen 0, iXLen %2)
+
+ ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.nds.vfpmadb.mask.nxv32f16.f32(
+ <vscale x 32 x half>,
+ <vscale x 32 x half>,
+ float,
+ <vscale x 32 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 32 x half> @intrinsic_vfpmadb_mask_vf_nxv32f16_nxv32f16_f32(<vscale x 32 x half> %0, <vscale x 32 x half> %1, float %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv32f16_nxv32f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v16, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 32 x half> @llvm.riscv.nds.vfpmadb.mask.nxv32f16.f32(
+ <vscale x 32 x half> %0,
+ <vscale x 32 x half> %1,
+ float %2,
+ <vscale x 32 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x half> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadt.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadt.ll
new file mode 100644
index 0000000000000..e9d78d2d8b5f5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadt.ll
@@ -0,0 +1,299 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve64x,+xandesvpackfph \
+; RUN: -verify-machineinstrs -target-abi=ilp32f | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve64x,+xandesvpackfph \
+; RUN: -verify-machineinstrs -target-abi=lp64f | FileCheck %s
+
+declare <vscale x 1 x half> @llvm.riscv.nds.vfpmadt.nxv1f16.f32(
+ <vscale x 1 x half>,
+ <vscale x 1 x half>,
+ float,
+ iXLen, iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfpmadt_vf_nxv1f16_nxv1f16_f32(<vscale x 1 x half> %0, float %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadt_vf_nxv1f16_nxv1f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 1 x half> @llvm.riscv.nds.vfpmadt.nxv1f16.f32(
+ <vscale x 1 x half> undef,
+ <vscale x 1 x half> %0,
+ float %1, iXLen 0, iXLen %2)
+
+ ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.nds.vfpmadt.mask.nxv1f16.f32(
+ <vscale x 1 x half>,
+ <vscale x 1 x half>,
+ float,
+ <vscale x 1 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 1 x half> @intrinsic_vfpmadt_mask_vf_nxv1f16_nxv1f16_f32(<vscale x 1 x half> %0, <vscale x 1 x half> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadt_mask_vf_nxv1f16_nxv1f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 1 x half> @llvm.riscv.nds.vfpmadt.mask.nxv1f16.f32(
+ <vscale x 1 x half> %0,
+ <vscale x 1 x half> %1,
+ float %2,
+ <vscale x 1 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.nds.vfpmadt.nxv2f16.f32(
+ <vscale x 2 x half>,
+ <vscale x 2 x half>,
+ float,
+ iXLen, iXLen);
+
+define <vscale x 2 x half> @intrinsic_vfpmadt_vf_nxv2f16_nxv2f16_f32(<vscale x 2 x half> %0, float %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadt_vf_nxv2f16_nxv2f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 2 x half> @llvm.riscv.nds.vfpmadt.nxv2f16.f32(
+ <vscale x 2 x half> undef,
+ <vscale x 2 x half> %0,
+ float %1, iXLen 0, iXLen %2)
+
+ ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.nds.vfpmadt.mask.nxv2f16.f32(
+ <vscale x 2 x half>,
+ <vscale x 2 x half>,
+ float,
+ <vscale x 2 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 2 x half> @intrinsic_vfpmadt_mask_vf_nxv2f16_nxv2f16_f32(<vscale x 2 x half> %0, <vscale x 2 x half> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadt_mask_vf_nxv2f16_nxv2f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 2 x half> @llvm.riscv.nds.vfpmadt.mask.nxv2f16.f32(
+ <vscale x 2 x half> %0,
+ <vscale x 2 x half> %1,
+ float %2,
+ <vscale x 2 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.nds.vfpmadt.nxv4f16.f32(
+ <vscale x 4 x half>,
+ <vscale x 4 x half>,
+ float,
+ iXLen, iXLen);
+
+define <vscale x 4 x half> @intrinsic_vfpmadt_vf_nxv4f16_nxv4f16_f32(<vscale x 4 x half> %0, float %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadt_vf_nxv4f16_nxv4f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v8
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 4 x half> @llvm.riscv.nds.vfpmadt.nxv4f16.f32(
+ <vscale x 4 x half> undef,
+ <vscale x 4 x half> %0,
+ float %1, iXLen 0, iXLen %2)
+
+ ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.nds.vfpmadt.mask.nxv4f16.f32(
+ <vscale x 4 x half>,
+ <vscale x 4 x half>,
+ float,
+ <vscale x 4 x i1>,
+ iXLen, iXLen, iXLen);
+
+define <vscale x 4 x half> @intrinsic_vfpmadt_mask_vf_nxv4f16_nxv4f16_f32(<vscale x 4 x half> %0, <vscale x 4 x half> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vfpmadt_mask_vf_nxv4f16_nxv4f16_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fsrmi a1, 0
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v9, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: ret
+entry:
+ %a = tail call <vscale x 4 x half> @llvm.riscv.nds.vfpmadt.mask.nxv4f16.f32(
+ <vscale x 4 x half> %0,
+ <vscale x 4 x half> %1,
+ float %2,
+ <vscale x 4 x i1> %3,
+ iXLen 0, iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.nds.vfpmadt.nxv8f16.f32(
+ <vscale x 8 x half>,
+ <vscale x 8 x half>,
+ float,
+ iXLen, iXLen);
+
+define <vscale x 8 x half> @intrinsic_vfpmadt_vf_nxv8f16_nxv8f16_f32(<vscale x 8 x half> %0, float %1, iXLen %2) nounwind {
+; CHECK-LABE...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadb.ll llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadt.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch adds LLVM IR intrinsic support for XAndesVDot similiar to llvm#139860. The document for the intrinsics can be found at: https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#andes-vector-dot-product-extensionxandesvdot and with policy variants https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/policy_funcs/intrinsic_funcs.adoc#andes-vector-dot-product-extensionxandesvdot The clang part will be added in a later patch. Co-authored-by: Tony Chuan-Yue Yuan <yuan593@andestech.com>
This patch adds LLVM IR intrinsic support for XAndesVPackFPH.
The document for the intrinsics can be found at:
https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#andes-vector-packed-fp16-extensionxandesvpackfph
and with policy variants
https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/policy_funcs/intrinsic_funcs.adoc#andes-vector-packed-fp16-extensionxandesvpackfph
The clang part will be added in a later patch.