-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU] Automate creation of byte_sel dags. NFCI. #140155
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Automate creation of byte_sel dags. NFCI. #140155
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesFull diff: https://github.com/llvm/llvm-project/pull/140155.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 47ee0a7f60ee3..a5814dd7cf9f4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1293,6 +1293,7 @@ def WaitVMVSrc : NamedIntOperand<"wait_vm_vsrc"> {
def ByteSel : NamedIntOperand<"byte_sel"> {
let Validator = "isUInt<2>";
}
+def ByteSel0 : DefaultOperand<ByteSel, 0>;
let PrintMethod = "printBitOp3" in
def BitOp3 : NamedIntOperand<"bitop3">;
@@ -1971,7 +1972,8 @@ class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs>
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
- Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod,
+ bit HasFP8ByteSel = 0, bit HasFP8DstByteSel = 0> {
dag src0 = !if(!ge(NumSrcArgs, 1),
!if (HasModifiers,
(ins Src0Mod:$src0_modifiers, Src0RC:$src0),
@@ -1989,18 +1991,23 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
(ins));
dag clamp = !if(HasClamp, (ins Clamp0:$clamp), (ins));
dag omod = !if(HasOMod, (ins omod0:$omod), (ins));
+ dag bytesel = !if(HasFP8ByteSel,
+ !con(!if(HasFP8DstByteSel, (ins VGPR_32:$vdst_in), (ins)),
+ (ins ByteSel0:$byte_sel)),
+ (ins));
- dag ret = !con(src0, src1, src2, clamp, omod);
+ dag ret = !con(src0, src1, src2, clamp, omod, bytesel);
}
class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
- Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> {
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel,
+ bit HasFP8ByteSel = 0, bit HasFP8DstByteSel = 0> {
// getInst64 handles clamp and omod. implicit mutex between vop3p and omod
dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
- Src0Mod, Src1Mod, Src2Mod>.ret;
+ Src0Mod, Src1Mod, Src2Mod, HasFP8ByteSel, HasFP8DstByteSel>.ret;
dag opsel = (ins op_sel0:$op_sel);
dag ret = !con(base, !if(HasOpSel, opsel, (ins)));
}
@@ -2612,7 +2619,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasClamp, HasModifiers, HasSrc2Mods,
- HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
+ HasOMod, Src0Mod, Src1Mod, Src2Mod,
+ HasFP8ByteSel, HasFP8DstByteSel>.ret;
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp, HasOpSel, HasNeg,
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
@@ -2630,7 +2638,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
- Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
+ Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel,
+ HasFP8ByteSel, HasFP8DstByteSel>.ret;
defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, HasNeg,
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index f885de3c13b12..7fdd951ecbd3c 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -674,17 +674,6 @@ class VOPProfile_Base_CVT_F_F8_ByteSel<ValueType DstVT> : VOPProfile<[DstVT, i32
let HasClamp = 0;
let HasOMod = 0;
let HasModifiers = 0;
-
- defvar bytesel = (ins ByteSel:$byte_sel);
- let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
- HasClamp, HasModifiers, HasSrc2Mods,
- HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret,
- bytesel);
- let InsVOP3Base = !con(getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, Src2VOP3DPP,
- NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods,
- HasOMod, Src0ModVOP3DPP, Src1ModVOP3DPP,
- Src2ModVOP3DPP, HasOpSel>.ret,
- bytesel);
}
let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts],
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index a7b90b9e319da..0252c4f1b0929 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -593,6 +593,7 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
let HasExtVOP3DPP = 1;
let HasOpSel = 1;
let HasFP8DstByteSel = 1;
+ let HasFP8ByteSel = 0; // It works as a dst-bytesel, but does not have byte_sel operand.
let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
getAsmVOP3OpSel<3, HasClamp, HasOMod,
HasSrc0FloatMods, HasSrc1FloatMods,
@@ -607,16 +608,6 @@ class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
VOP3_Profile<VOPProfile<[i32, SrcVT, i32, untyped]>> {
let HasFP8DstByteSel = 1;
let HasClamp = 0;
- defvar bytesel = (ins VGPR_32:$vdst_in, ByteSel:$byte_sel);
- let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
- HasClamp, HasModifiers, HasSrc2Mods,
- HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret,
- bytesel);
- let InsVOP3Base = !con(
- getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
- Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
- Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret,
- bytesel);
}
def IsPow2Plus1: PatLeaf<(i32 imm), [{
|
071898b
to
a29291e
Compare
@@ -1987,20 +1989,29 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, | ||
(ins Src2Mod:$src2_modifiers, Src2RC:$src2), | ||
(ins Src2RC:$src2)), | ||
(ins)); | ||
dag clamp = !if(HasClamp, (ins Clamp0:$clamp), (ins)); | ||
// If there is vdst_in after clamp with HasFP8DstByteSel we cannot use |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can change the placement of the byte_sel operands, but that is a big disruption. If needed it will require a separate change.
JFYI, all of that is needed because I want to make this working with t16, and it copies profiles and recreates dags. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
No description provided.