-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[PowerPC] Update DMF VSX ACC data transfer instructions #138897
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
For cpu=future, acc registers no longer overlap VSRs and are prefixed with `dm`. The original, xxmfacc/xxmtacc instructions are now extended menemonics to it's dm* equivalents.
@llvm/pr-subscribers-backend-powerpc Author: Lei Huang (lei137) ChangesFor cpu=future, acc registers no longer overlap VSRs and are prefixed with Full diff: https://github.com/llvm/llvm-project/pull/138897.diff 5 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 4d4a3efd1098e..9d4d2d864fc32 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1108,7 +1108,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(
case PPC::CRSET:
case PPC::CRUNSET:
case PPC::XXSETACCZ:
- case PPC::XXSETACCZW:
+ case PPC::DMXXSETACCZ:
return true;
}
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
index 23b951871d5f4..6df76956a6e39 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
@@ -535,25 +535,25 @@ let Predicates = [MMA, IsNotISAFuture] in {
}
let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
- // For Future and up XXMFACCW and XXMTACCW will not have patterns.
// On Future CPU the wacc registers no longer overlap with the vsr registers
- // and so register allocation would have to know to match 4 vsr registers
- // with one wacc register.
- // On top of that Future CPU has a more convenient way to move between vsrs
- // and wacc registers using xxextfdmr512 and xxinstdmr512.
- def XXMFACCW :
- XForm_AT3<31, 0, 177, (outs wacc:$ATo), (ins wacc:$AT), "xxmfacc $AT",
- IIC_VecGeneral, []>,
+ // so register allocation need to match 4 vsr registers with one wacc
+ // register. XXMTACC/XXFACC will be aliased to these new instructions.
+ def DMXXMFACC:
+ XForm_AT3<31, 0, 177, (outs wacc:$ATo), (ins wacc:$AT), "dmxxmfacc $AT",
+ IIC_VecGeneral,
+ [(set v512i1:$ATo, (int_ppc_mma_xxmfacc v512i1:$AT))]>,
RegConstraint<"$ATo = $AT">, NoEncode<"$ATo">;
- def XXMTACCW :
- XForm_AT3<31, 1, 177, (outs wacc:$AT), (ins wacc:$ATi), "xxmtacc $AT",
- IIC_VecGeneral, []>,
+ def DMXXMTACC:
+ XForm_AT3<31, 1, 177, (outs wacc:$AT), (ins wacc:$ATi), "dmxxmtacc $AT",
+ IIC_VecGeneral,
+ [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
- def XXSETACCZW :
- XForm_AT3<31, 3, 177, (outs wacc:$AT), (ins), "xxsetaccz $AT",
- IIC_VecGeneral, [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
+ def DMXXSETACCZ:
+ XForm_AT3<31, 3, 177, (outs wacc:$AT), (ins), "dmxxsetaccz $AT",
+ IIC_VecGeneral,
+ [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
}
def XVI8GER4WSPP :
@@ -572,6 +572,12 @@ let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
}
}
+let Predicates = [MMA, IsISAFuture] in {
+ def : InstAlias<"dmxxmmfacc $AT ", (XXMFACC acc:$AT)>;
+ def : InstAlias<"dmxxmmtacc $AT ", (XXMTACC acc:$AT)>;
+ def : InstAlias<"dmxxsetaccz $AT ", (XXSETACCZ acc:$AT)>;
+}
+
let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in {
def PMXVI8GER4SPP :
MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT),
@@ -1093,5 +1099,5 @@ let Predicates = [MMA, IsISAFuture] in {
def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
v16i8:$vs3, v16i8:$vs2)),
(DMXXINSTDMR512 ConcatsMMA.VecsToVecPair0, ConcatsMMA.VecsToVecPair1)>;
- def : Pat<(v512i1 immAllZerosV), (XXSETACCZW)>;
+ def : Pat<(v512i1 immAllZerosV), (DMXXSETACCZ)>;
}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 8b690b7b833b3..81929964ef1bc 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -20,6 +20,8 @@ def sub_64 : SubRegIndex<64>;
def sub_64_hi_phony : SubRegIndex<64,64>;
def sub_vsx0 : SubRegIndex<128>;
def sub_vsx1 : SubRegIndex<128, 128>;
+def sub_vsx2 : ComposedSubRegIndex<sub_vsx1, sub_vsx0>;
+def sub_vsx3 : ComposedSubRegIndex<sub_vsx2, sub_vsx0>;
def sub_gp8_x0 : SubRegIndex<64>;
def sub_gp8_x1 : SubRegIndex<64, 64>;
def sub_fp0 : SubRegIndex<64>;
diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
index 41e702c94339d..9a528f4fd911f 100644
--- a/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-intrinsics.ll
@@ -769,7 +769,7 @@ declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
define void @int_xxsetaccz(ptr %ptr) {
; CHECK-LABEL: int_xxsetaccz:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxsetaccz wacc0
+; CHECK-NEXT: dmxxsetaccz wacc0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxv v4, 48(r3)
; CHECK-NEXT: stxv v5, 32(r3)
@@ -779,7 +779,7 @@ define void @int_xxsetaccz(ptr %ptr) {
;
; CHECK-BE-LABEL: int_xxsetaccz:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsetaccz wacc0
+; CHECK-BE-NEXT: dmxxsetaccz wacc0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxv v5, 48(r3)
; CHECK-BE-NEXT: stxv v4, 32(r3)
@@ -789,7 +789,7 @@ define void @int_xxsetaccz(ptr %ptr) {
;
; CHECK-O0-LABEL: int_xxsetaccz:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: xxsetaccz wacc0
+; CHECK-O0-NEXT: dmxxsetaccz wacc0
; CHECK-O0-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-O0-NEXT: xxlor vs0, v4, v4
; CHECK-O0-NEXT: stxv vs0, 48(r3)
@@ -803,7 +803,7 @@ define void @int_xxsetaccz(ptr %ptr) {
;
; CHECK-O0-BE-LABEL: int_xxsetaccz:
; CHECK-O0-BE: # %bb.0: # %entry
-; CHECK-O0-BE-NEXT: xxsetaccz wacc0
+; CHECK-O0-BE-NEXT: dmxxsetaccz wacc0
; CHECK-O0-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-O0-BE-NEXT: xxlor vs0, v5, v5
; CHECK-O0-BE-NEXT: stxv vs0, 48(r3)
@@ -817,7 +817,7 @@ define void @int_xxsetaccz(ptr %ptr) {
;
; CHECK-AIX64-LABEL: int_xxsetaccz:
; CHECK-AIX64: # %bb.0: # %entry
-; CHECK-AIX64-NEXT: xxsetaccz 0
+; CHECK-AIX64-NEXT: dmxxsetaccz 0
; CHECK-AIX64-NEXT: dmxxextfdmr512 34, 36, 0, 0
; CHECK-AIX64-NEXT: stxv 5, 48(3)
; CHECK-AIX64-NEXT: stxv 4, 32(3)
@@ -827,7 +827,7 @@ define void @int_xxsetaccz(ptr %ptr) {
;
; CHECK-AIX32-LABEL: int_xxsetaccz:
; CHECK-AIX32: # %bb.0: # %entry
-; CHECK-AIX32-NEXT: xxsetaccz 0
+; CHECK-AIX32-NEXT: dmxxsetaccz 0
; CHECK-AIX32-NEXT: dmxxextfdmr512 34, 36, 0, 0
; CHECK-AIX32-NEXT: stxv 5, 48(3)
; CHECK-AIX32-NEXT: stxv 4, 32(3)
@@ -845,7 +845,7 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble
define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
; CHECK-LABEL: disass_acc:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxsetaccz wacc0
+; CHECK-NEXT: dmxxsetaccz wacc0
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxv v5, 0(r3)
; CHECK-NEXT: stxv v4, 0(r4)
@@ -855,7 +855,7 @@ define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
;
; CHECK-BE-LABEL: disass_acc:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsetaccz wacc0
+; CHECK-BE-NEXT: dmxxsetaccz wacc0
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxv v2, 0(r3)
; CHECK-BE-NEXT: stxv v3, 0(r4)
@@ -865,7 +865,7 @@ define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
;
; CHECK-O0-LABEL: disass_acc:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: xxsetaccz wacc0
+; CHECK-O0-NEXT: dmxxsetaccz wacc0
; CHECK-O0-NEXT: dmxxextfdmr512 vsp32, vsp36, wacc0, 0
; CHECK-O0-NEXT: vmr v2, v0
; CHECK-O0-NEXT: xxlor vs0, v1, v1
@@ -879,7 +879,7 @@ define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
;
; CHECK-O0-BE-LABEL: disass_acc:
; CHECK-O0-BE: # %bb.0: # %entry
-; CHECK-O0-BE-NEXT: xxsetaccz wacc0
+; CHECK-O0-BE-NEXT: dmxxsetaccz wacc0
; CHECK-O0-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
; CHECK-O0-BE-NEXT: vmr v2, v1
; CHECK-O0-BE-NEXT: xxlor vs0, v0, v0
@@ -893,7 +893,7 @@ define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
;
; CHECK-AIX64-LABEL: disass_acc:
; CHECK-AIX64: # %bb.0: # %entry
-; CHECK-AIX64-NEXT: xxsetaccz 0
+; CHECK-AIX64-NEXT: dmxxsetaccz 0
; CHECK-AIX64-NEXT: dmxxextfdmr512 34, 36, 0, 0
; CHECK-AIX64-NEXT: stxv 2, 0(3)
; CHECK-AIX64-NEXT: stxv 3, 0(4)
@@ -903,7 +903,7 @@ define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) {
;
; CHECK-AIX32-LABEL: disass_acc:
; CHECK-AIX32: # %bb.0: # %entry
-; CHECK-AIX32-NEXT: xxsetaccz 0
+; CHECK-AIX32-NEXT: dmxxsetaccz 0
; CHECK-AIX32-NEXT: dmxxextfdmr512 34, 36, 0, 0
; CHECK-AIX32-NEXT: stxv 2, 0(3)
; CHECK-AIX32-NEXT: stxv 3, 0(4)
@@ -931,7 +931,7 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>)
define void @testcse(ptr %res, <16 x i8> %vc) {
; CHECK-LABEL: testcse:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxsetaccz wacc0
+; CHECK-NEXT: dmxxsetaccz wacc0
; CHECK-NEXT: xvf32gerpp wacc0, v2, v2
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-NEXT: stxv v4, 48(r3)
@@ -946,7 +946,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
;
; CHECK-BE-LABEL: testcse:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: xxsetaccz wacc0
+; CHECK-BE-NEXT: dmxxsetaccz wacc0
; CHECK-BE-NEXT: xvf32gerpp wacc0, v2, v2
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-BE-NEXT: stxv v5, 48(r3)
@@ -961,7 +961,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
;
; CHECK-O0-LABEL: testcse:
; CHECK-O0: # %bb.0: # %entry
-; CHECK-O0-NEXT: xxsetaccz wacc0
+; CHECK-O0-NEXT: dmxxsetaccz wacc0
; CHECK-O0-NEXT: xvf32gerpp wacc0, v2, v2
; CHECK-O0-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-O0-NEXT: xxlor vs3, v4, v4
@@ -980,7 +980,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
;
; CHECK-O0-BE-LABEL: testcse:
; CHECK-O0-BE: # %bb.0: # %entry
-; CHECK-O0-BE-NEXT: xxsetaccz wacc0
+; CHECK-O0-BE-NEXT: dmxxsetaccz wacc0
; CHECK-O0-BE-NEXT: xvf32gerpp wacc0, v2, v2
; CHECK-O0-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
; CHECK-O0-BE-NEXT: xxlor vs3, v5, v5
@@ -999,7 +999,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
;
; CHECK-AIX64-LABEL: testcse:
; CHECK-AIX64: # %bb.0: # %entry
-; CHECK-AIX64-NEXT: xxsetaccz 0
+; CHECK-AIX64-NEXT: dmxxsetaccz 0
; CHECK-AIX64-NEXT: xvf32gerpp 0, 2, 2
; CHECK-AIX64-NEXT: dmxxextfdmr512 34, 36, 0, 0
; CHECK-AIX64-NEXT: stxv 5, 48(3)
@@ -1014,7 +1014,7 @@ define void @testcse(ptr %res, <16 x i8> %vc) {
;
; CHECK-AIX32-LABEL: testcse:
; CHECK-AIX32: # %bb.0: # %entry
-; CHECK-AIX32-NEXT: xxsetaccz 0
+; CHECK-AIX32-NEXT: dmxxsetaccz 0
; CHECK-AIX32-NEXT: xvf32gerpp 0, 2, 2
; CHECK-AIX32-NEXT: dmxxextfdmr512 34, 36, 0, 0
; CHECK-AIX32-NEXT: stxv 5, 48(3)
diff --git a/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
index cf69d3ad09878..0287f067d0713 100644
--- a/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
+++ b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir
@@ -6,17 +6,17 @@
# Keep track of all of the lanemasks for various subregsiters.
#
# CHECK: %3 [80r,80d:0) 0@80r L000000000000000C [80r,80d:0) 0@80r weight:0.000000e+00
-# CHECK: %4 [96r,96d:0) 0@96r L0000000000003000 [96r,96d:0) 0@96r weight:0.000000e+00
+# CHECK: %4 [96r,96d:0) 0@96r L0000000000007000 [96r,96d:0) 0@96r weight:0.000000e+00
# CHECK: %5 [112r,112d:0) 0@112r L000000000000000C [112r,112d:0) 0@112r weight:0.000000e+00
-# CHECK: %6 [128r,128d:0) 0@128r L0000000000003000 [128r,128d:0) 0@128r weight:0.000000e+00
+# CHECK: %6 [128r,128d:0) 0@128r L0000000000007000 [128r,128d:0) 0@128r weight:0.000000e+00
# CHECK: %7 [144r,144d:0) 0@144r L0000000000000004 [144r,144d:0) 0@144r weight:0.000000e+00
-# CHECK: %8 [160r,160d:0) 0@160r L0000000000001000 [160r,160d:0) 0@160r weight:0.000000e+00
+# CHECK: %8 [160r,160d:0) 0@160r L0000000000002000 [160r,160d:0) 0@160r weight:0.000000e+00
# CHECK: %9 [176r,176d:0) 0@176r L0000000000000004 [176r,176d:0) 0@176r weight:0.000000e+00
-# CHECK: %10 [192r,192d:0) 0@192r L0000000000001000 [192r,192d:0) 0@192r weight:0.000000e+00
-# CHECK: %11 [208r,208d:0) 0@208r L0000000000004000 [208r,208d:0) 0@208r weight:0.000000e+00
-# CHECK: %12 [224r,224d:0) 0@224r L0000000000010000 [224r,224d:0) 0@224r weight:0.000000e+00
-# CHECK: %13 [240r,240d:0) 0@240r L000000000000300C [240r,240d:0) 0@240r weight:0.000000e+00
-# CHECK: %14 [256r,256d:0) 0@256r L000000000003C000 [256r,256d:0) 0@256r weight:0.000000e+00
+# CHECK: %10 [192r,192d:0) 0@192r L0000000000002000 [192r,192d:0) 0@192r weight:0.000000e+00
+# CHECK: %11 [208r,208d:0) 0@208r L0000000000008000 [208r,208d:0) 0@208r weight:0.000000e+00
+# CHECK: %12 [224r,224d:0) 0@224r L0000000000020000 [224r,224d:0) 0@224r weight:0.000000e+00
+# CHECK: %13 [240r,240d:0) 0@240r L000000000000700C [240r,240d:0) 0@240r weight:0.000000e+00
+# CHECK: %14 [256r,256d:0) 0@256r L0000000000078000 [256r,256d:0) 0@256r weight:0.000000e+00
# CHECK: 0B bb.0
|
gentle ping |
// and so register allocation would have to know to match 4 vsr registers | ||
// with one wacc register. | ||
// On top of that Future CPU has a more convenient way to move between vsrs | ||
// and wacc registers using xxextfdmr512 and xxinstdmr512. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we should add patterns for DMXXMTACC/DMXXMFACC for ISA Future, and I don't think we should change the comment. Lowering will replace these intrinsic calls.
For cpu=future, acc registers no longer overlap VSRs and are prefixed with
dm
. The original, xxmfacc/xxmtacc instructions are now extended menemonics to it's dm* equivalents.