-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[PowerPC] Add load/store support for v2048i1 and DMF cryptography instructions #136145
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…tructions This commit adds support for loading and storing v2048i1 DMR pairs and introduces Dense Math Facility cryptography instructions: DMSHA2HASH, DMSHA3HASH, and DMXXSHAPAD, along with their corresponding intrinsics and tests.
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-powerpc Author: Maryam Moghadas (maryammo) ChangesThis commit adds support for loading and storing v2048i1 DMR pairs and introduces Dense Math Facility cryptography instructions: DMSHA2HASH, DMSHA3HASH, and DMXXSHAPAD, along with their corresponding intrinsics and tests. Patch is 29.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136145.diff 10 Files Affected:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index d10b07ccd91c2..5e538eebb0890 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -330,6 +330,7 @@ def IIT_V10 : IIT_Vec<10, 61>;
def IIT_ONE_THIRD_VEC_ARG : IIT_Base<62>;
def IIT_ONE_FIFTH_VEC_ARG : IIT_Base<63>;
def IIT_ONE_SEVENTH_VEC_ARG : IIT_Base<64>;
+def IIT_V2048 : IIT_Vec<2048, 65>;
}
defvar IIT_all_FixedTypes = !filter(iit, IIT_all,
@@ -534,6 +535,7 @@ def llvm_v128i1_ty : LLVMType<v128i1>; // 128 x i1
def llvm_v256i1_ty : LLVMType<v256i1>; // 256 x i1
def llvm_v512i1_ty : LLVMType<v512i1>; // 512 x i1
def llvm_v1024i1_ty : LLVMType<v1024i1>; //1024 x i1
+def llvm_v2048i1_ty : LLVMType<v2048i1>; //2048 x i1
def llvm_v1i8_ty : LLVMType<v1i8>; // 1 x i8
def llvm_v2i8_ty : LLVMType<v2i8>; // 2 x i8
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index e4d39134a4a25..2940f808c1be4 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1732,6 +1732,20 @@ let TargetPrefix = "ppc" in {
[llvm_v1024i1_ty, llvm_v256i1_ty, llvm_v16i8_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
+ def int_ppc_mma_dmsha2hash :
+ DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
+ llvm_v1024i1_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
+ def int_ppc_mma_dmsha3hash :
+ DefaultAttrsIntrinsic<[llvm_v2048i1_ty], [llvm_v2048i1_ty,
+ llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+ def int_ppc_mma_dmxxshapad :
+ DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
+ llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>,
+ ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
}
// XL Compat intrinsics.
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index a0375c6508ec9..c73cc50573cc4 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -327,6 +327,10 @@ DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
OutputTable.push_back(IITDescriptor::getVector(1024, IsScalableVector));
DecodeIITType(NextElt, Infos, Info, OutputTable);
return;
+ case IIT_V2048:
+ OutputTable.push_back(IITDescriptor::getVector(2048, IsScalableVector));
+ DecodeIITType(NextElt, Infos, Info, OutputTable);
+ return;
case IIT_EXTERNREF:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 10));
return;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1f75425752a78..cdbb986e0b237 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1361,8 +1361,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.isISAFuture()) {
addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
+ addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
+ setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
} else {
addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
}
@@ -11780,15 +11783,19 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
SDValue LoadChain = LN->getChain();
SDValue BasePtr = LN->getBasePtr();
EVT VT = Op.getValueType();
+ bool IsV1024i1 = VT == MVT::v1024i1;
+ bool IsV2048i1 = VT == MVT::v2048i1;
- // Type v1024i1 is used for Dense Math dmr registers.
- assert(VT == MVT::v1024i1 && "Unsupported type.");
+ // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
+ // Dense Math dmr pair registers, respectively.
+ assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
"Dense Math support required.");
assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
- SmallVector<SDValue, 4> Loads;
- SmallVector<SDValue, 4> LoadChains;
+ SmallVector<SDValue, 8> Loads;
+ SmallVector<SDValue, 8> LoadChains;
+
SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
MachineMemOperand *MMO = LN->getMemOperand();
@@ -11824,10 +11831,40 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
+
SDValue Value =
SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
- SDValue RetOps[] = {Value, TF};
+ SDValue DmrPValue;
+ if (IsV2048i1) {
+ // This corresponds to v2048i1 which represents a dmr pair.
+ SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Loads[4],
+ Loads[5]), 0);
+ SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
+ Loads[6], Loads[7]), 0);
+ const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
+ SDValue Dmr1Value =
+ SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl,
+ MVT::v1024i1, Dmr1Ops), 0);
+
+ SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
+ SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
+
+ SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
+ const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
+
+ DmrPValue =
+ SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1,
+ DmrPOps), 0);
+ }
+
+ SDValue RetOps[2];
+ if (IsV1024i1)
+ RetOps[0] = Value;
+ else
+ RetOps[0] = DmrPValue;
+ RetOps[1] = TF;
+
return DAG.getMergeValues(RetOps, dl);
}
@@ -11839,7 +11876,7 @@ SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
SDValue BasePtr = LN->getBasePtr();
EVT VT = Op.getValueType();
- if (VT == MVT::v1024i1)
+ if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
return LowerDMFVectorLoad(Op, DAG);
if (VT != MVT::v256i1 && VT != MVT::v512i1)
@@ -11886,34 +11923,85 @@ SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
SDValue StoreChain = SN->getChain();
SDValue BasePtr = SN->getBasePtr();
- SmallVector<SDValue, 4> Values;
- SmallVector<SDValue, 4> Stores;
+ SmallVector<SDValue, 8> Values;
+ SmallVector<SDValue, 8> Stores;
EVT VT = SN->getValue().getValueType();
+ bool IsV1024i1 = VT == MVT::v1024i1;
+ bool IsV2048i1 = VT == MVT::v2048i1;
- // Type v1024i1 is used for Dense Math dmr registers.
- assert(VT == MVT::v1024i1 && "Unsupported type.");
+ // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
+ // Dense Math dmr pair registers, respectively.
+ assert((IsV1024i1 || IsV2048i1)&& "Unsupported type.");
assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
"Dense Math support required.");
assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
- SDValue Lo(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
- 0);
- SDValue Hi(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
- 0);
EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
- MachineSDNode *ExtNode =
- DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
- Values.push_back(SDValue(ExtNode, 0));
- Values.push_back(SDValue(ExtNode, 1));
- ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
- Values.push_back(SDValue(ExtNode, 0));
- Values.push_back(SDValue(ExtNode, 1));
+ if (IsV1024i1) {
+ SDValue Lo(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
+ 0);
+ SDValue Hi(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
+ 0);
+ MachineSDNode *ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ }
+ else {
+ // This corresponds to v2048i1 which represents a dmr pair.
+ SDValue Dmr0(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)), 0);
+
+ SDValue Dmr1(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)), 0);
+
+ SDValue Dmr0Lo(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Dmr0,
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)), 0);
+
+ SDValue Dmr0Hi(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Dmr0,
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)), 0);
+
+ SDValue Dmr1Lo(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Dmr1,
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)), 0);
+
+ SDValue Dmr1Hi(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Dmr1,
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)), 0);
+
+ MachineSDNode *ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ }
if (Subtarget.isLittleEndian())
std::reverse(Values.begin(), Values.end());
@@ -11952,7 +12040,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SDValue Value2 = SN->getValue();
EVT StoreVT = Value.getValueType();
- if (StoreVT == MVT::v1024i1)
+ if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
return LowerDMFVectorStore(Op, DAG);
if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
index 15215f7dc5faa..f1753dd0e0166 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td
@@ -159,6 +159,63 @@ multiclass DMR_UM_M448_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
}
}
+class XForm_AT3_T1_AB3<bits<6> opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern>
+ : I <opcode, OOL, IOL, asmstr, NoItinerary> {
+ bits<3> AT;
+ bits<3> AB;
+ bits<1> T;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = AT{2-0};
+ let Inst{9} = 0;
+ let Inst{10} = T;
+ let Inst{11-15} = o;
+ let Inst{16-18} = AB{2-0};
+ let Inst{19-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_ATp2_SR5<bits<6> opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern>
+ : I <opcode, OOL, IOL, asmstr, NoItinerary> {
+ bits<2> ATp;
+ bits<5> SR;
+
+ let Pattern = pattern;
+
+ let Inst{6-7} = ATp{1-0};
+ let Inst{8-10} = 0;
+ let Inst{11-15} = o;
+ let Inst{16-20} = SR{4-0};
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XX2Form_AT3_XB6_ID2_E1_BL2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, NoItinerary> {
+ bits<3> AT;
+ bits<6> XB;
+ bits<2> ID;
+ bits<1> E;
+ bits<2> BL;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = AT{2-0};
+ let Inst{9-10} = 0;
+ let Inst{11-12} = ID{1-0};
+ let Inst{13} = E;
+ let Inst{14-15} = BL{1-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
let Predicates = [IsISAFuture] in {
def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226,
(outs vsrprc:$XAp, vsrprc:$XBp),
@@ -231,6 +288,25 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
+// DMF cryptography [support] Instructions
+let Predicates = [IsISAFuture] in {
+ def DMSHA2HASH :
+ XForm_AT3_T1_AB3<31, 14, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB, u1imm:$T),
+ "dmsha2hash $AT, $AB, $T", []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+
+ def DMSHA3HASH :
+ XForm_ATp2_SR5<31, 15, 177, (outs dmrprc:$ATp), (ins dmrprc:$ATpi , u5imm:$SR),
+ "dmsha3hash $ATp, $SR", []>,
+ RegConstraint<"$ATpi = $ATp">, NoEncode<"$ATpi">;
+
+ def DMXXSHAPAD :
+ XX2Form_AT3_XB6_ID2_E1_BL2<60, 421, (outs dmr:$AT),
+ (ins dmr:$ATi, vsrc:$XB, u2imm:$ID, u1imm:$E, u2imm:$BL),
+ "dmxxshapad $AT, $XB, $ID, $E, $BL", []>,
+ RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
+}
+
// MMA+ Intrinsics
let Predicates = [MMA, IsISAFuture] in {
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4 v256i1:$XAp, v16i8:$XB)),
@@ -260,3 +336,54 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
(PMDMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
}
+
+// cryptography Intrinsics
+let Predicates = [IsISAFuture] in {
+ def : Pat<(v1024i1 (int_ppc_mma_dmsha2hash v1024i1:$ATi, v1024i1:$AB, timm:$T)),
+ (DMSHA2HASH $ATi, $AB, $T)>;
+
+ def : Pat<(v2048i1 (int_ppc_mma_dmsha3hash v2048i1:$ATpi, timm:$SR)),
+ (DMSHA3HASH $ATpi, $SR)>;
+
+ def : Pat<(v1024i1 (int_ppc_mma_dmxxshapad v1024i1:$ATi, v16i8:$XB, timm:$ID,
+ timm:$E, timm:$BL)), (DMXXSHAPAD $ATi, RCCp.BToVSRC, $ID, $E, $BL)>;
+}
+
+// MMA+ Instruction aliases
+let Predicates = [IsISAFuture] in {
+ def : InstAlias<"dmsha256hash $AT, $AB",
+ (DMSHA2HASH dmr:$AT, dmr:$AB, 0)>;
+
+ def : InstAlias<"dmsha512hash $AT, $AB",
+ (DMSHA2HASH dmr:$AT, dmr:$AB, 1)>;
+
+ def : InstAlias<"dmsha3dw $ATp",
+ (DMSHA3HASH dmrprc:$ATp, 0)>;
+
+ def : InstAlias<"dmcryshash $ATp",
+ (DMSHA3HASH dmrprc:$ATp, 12)>;
+
+ def : InstAlias<"dmxxsha3512pad $AT, $XB, $E",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 0, u1imm:$E, 0)>;
+
+ def : InstAlias<"dmxxsha3384pad $AT, $XB, $E",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 0, u1imm:$E, 1)>;
+
+ def : InstAlias<"dmxxsha3256pad $AT, $XB, $E",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 0, u1imm:$E, 2)>;
+
+ def : InstAlias<"dmxxsha3224pad $AT, $XB, $E",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 0, u1imm:$E, 3)>;
+
+ def : InstAlias<"dmxxshake256pad $AT, $XB, $E",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 1, u1imm:$E, 0)>;
+
+ def : InstAlias<"dmxxshake128pad $AT, $XB, $E",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 1, u1imm:$E, 1)>;
+
+ def : InstAlias<"dmxxsha384512pad $AT, $XB",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 2, 0, 0)>;
+
+ def : InstAlias<"dmxxsha224256pad $AT, $XB",
+ (DMXXSHAPAD dmr:$AT, vsrc:$XB, 3, 0, 0)>;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 8b690b7b833b3..359adc31eb10b 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -1140,6 +1140,6 @@ def PPCRegDMRpRCAsmOperand : AsmOperandClass {
let PredicateMethod = "isDMRpRegNumber";
}
-def dmrp : RegisterOperand<DMRpRC> {
+def dmrprc : RegisterOperand<DMRpRC> {
let ParserMatchClass = PPCRegDMRpRCAsmOperand;
}
diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll b/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll
new file mode 100644
index 0000000000000..afa28144cbf65
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-crypto.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+declare <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1>, <1024 x i1>, i32)
+
+define dso_local void @test_dmsha2hash(ptr %vop, ptr %vinp, ptr %resp) {
+; CHECK-LABEL: test_dmsha2hash:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: lxvp vsp34, 0(r4)
+; CHECK-NEXT: lxvp vsp36, 32(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r4)
+; CHECK-NEXT: lxvp vsp36, 96(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: dmsha256hash dmr0, dmr1
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_dmsha2hash:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: lxvp vsp34, 96(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmsha256hash dmr0, dmr1
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = load <1024 x i1>, ptr %vop, align 64
+ %1 = load <1024 x i1>, ptr %vinp, align 64
+ %3 = tail call <1024 x i1> @llvm.ppc.mma.dmsha2hash(<1024 x i1> %0, <1024 x i1> %1, i32 0)
+ store <1024 x i1> %3, ptr %resp, align 64
+ ret void
+}
+
+declare <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1>, i32)
+
+define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) {
+; CHECK-LABEL: test_dmsha3hash:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lxvp vsp34,...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/IR/Intrinsics.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cdbb986e0..958ea41d3 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11838,14 +11838,15 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
SDValue DmrPValue;
if (IsV2048i1) {
// This corresponds to v2048i1 which represents a dmr pair.
- SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Loads[4],
- Loads[5]), 0);
+ SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
+ Loads[4], Loads[5]),
+ 0);
SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
- Loads[6], Loads[7]), 0);
+ Loads[6], Loads[7]),
+ 0);
const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
- SDValue Dmr1Value =
- SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl,
- MVT::v1024i1, Dmr1Ops), 0);
+ SDValue Dmr1Value = SDValue(
+ DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
@@ -11853,16 +11854,15 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
- DmrPValue =
- SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1,
- DmrPOps), 0);
+ DmrPValue = SDValue(
+ DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
}
SDValue RetOps[2];
if (IsV1024i1)
RetOps[0] = Value;
- else
- RetOps[0] = DmrPValue;
+ else
+ RetOps[0] = DmrPValue;
RetOps[1] = TF;
return DAG.getMergeValues(RetOps, dl);
@@ -11931,23 +11931,23 @@ SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
// The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
// Dense Math dmr pair registers, respectively.
- assert((IsV1024i1 || IsV2048i1)&& "Unsupported type.");
+ assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
"Dense Math support required.");
assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
if (IsV1024i1) {
- SDValue Lo(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
- 0);
- SDValue Hi(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
- 0);
+ SDValue Lo(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
+ 0);
+ SDValue Hi(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
+ 0);
MachineSDNode *ExtNode =
DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
Values.push_back(SDValue(ExtNode, 0));
@@ -11955,52 +11955,55 @@ SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
Values.push_back(SDValue(ExtNode, 0));
Values.push_back(SDValue(ExtNode, 1));
- }
- else {
+ } else {
// This corresponds to v2048i1 which represents a dmr pair.
SDValue Dmr0(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)), 0);
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
+ 0);
SDValue Dmr1(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
- Op.getOperand(1),
- DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)), 0);
-
- SDValue Dmr0Lo(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Dmr0,
- DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)), 0);
-
- SDValue Dmr0Hi(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Dmr0,
- DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)), 0);
-
- SDValue Dmr1Lo(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Dmr1,
- DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)), 0);
-
- SDValue Dmr1Hi(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
- Dmr1,
- DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)), 0);
-
- MachineSDNode *ExtNode =
- DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
- Values.push_back(SDValue(ExtNode, 0));
- Values.push_back(SDValue(ExtNode, 1));
- ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
- Values.push_back(SDValue(ExtNode, 0));
- Values.push_back(SDValue(ExtNode, 1));
- ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
- Values.push_back(SDValue(ExtNode, 0));
- Values.push_back(SDValue(ExtNode, 1));
- ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
- Values.push_back(SDValue(ExtNode, 0));
- Values.push_back(SDValue(ExtNode, 1));
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
+ Op.getOperand(1),
+ DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
+ 0);
+
+ SDValue Dmr0Lo(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
+ 0);
+
+ SDValue Dmr0Hi(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
+ 0);
+
+ SDValue Dmr1Lo(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
+ DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
+ 0);
+
+ SDValue Dmr1Hi(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
+ DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
+ 0);
+
+ MachineSDNode *ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
+ ExtNode =
+ DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
+ Values.push_back(SDValue(ExtNode, 0));
+ Values.push_back(SDValue(ExtNode, 1));
}
if (Subtarget.isLittleEndian())
|
// cryptography Intrinsics | ||
let Predicates = [IsISAFuture] in { | ||
def : Pat<(v1024i1 (int_ppc_mma_dmsha2hash v1024i1:$ATi, v1024i1:$AB, timm:$T)), | ||
(DMSHA2HASH $ATi, $AB, $T)>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The intrinsic names could be added directly as patterns on the instructions and these patterns removed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the Pat for the third one, it uses RCCp.BToVSRC
, which expands to (COPY_TO_REGCLASS $XB, VSRC)
based on :
def RCCp {
dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC);
dag BToVSRC = (COPY_TO_REGCLASS $XB, VSRC);
}
It seems using COPY_TO_REGCLASS requires a separate Pat and is not allowed in instruction patterns. So combining them makes it fail to build.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, it is fine to just do the first two.
@@ -1140,6 +1140,6 @@ def PPCRegDMRpRCAsmOperand : AsmOperandClass { | ||
let PredicateMethod = "isDMRpRegNumber"; | ||
} | ||
|
||
def dmrp : RegisterOperand<DMRpRC> { | ||
def dmrprc : RegisterOperand<DMRpRC> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why are we renaming this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is dmr pair reg class, similar to
def vsrc : RegisterOperand {
let ParserMatchClass = PPCRegVSRCAsmOperand;
}
SDValue Value = | ||
SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0); | ||
|
||
SDValue RetOps[] = {Value, TF}; | ||
SDValue DmrPValue; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we can do do early exit here:
SDValue DmrPValue; | |
if (IsV1024i1) { | |
SDValue RetOps[] = {Value, TF}; | |
return DAG.getMergeValues(RetOps, dl); | |
} | |
// Handle Loads for V2048i1. | |
SDValue DmrPValue; |
This commit adds support for loading and storing v2048i1 DMR pairs and introduces Dense Math Facility cryptography instructions: DMSHA2HASH, DMSHA3HASH, and DMXXSHAPAD, along with their corresponding intrinsics and tests.