Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

[AMDGPU] Automate creation of byte_sel dags. NFCI. #140155

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

rampitec
Copy link
Collaborator

No description provided.

Copy link
Collaborator Author

rampitec commented May 15, 2025

@rampitec rampitec requested review from arsenm and shiltian May 15, 2025 22:48
@rampitec rampitec marked this pull request as ready for review May 15, 2025 22:48
@llvmbot
Copy link
Member

llvmbot commented May 15, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/140155.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+15-6)
  • (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (-11)
  • (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+1-10)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 47ee0a7f60ee3..a5814dd7cf9f4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1293,6 +1293,7 @@ def WaitVMVSrc : NamedIntOperand<"wait_vm_vsrc"> {
 def ByteSel : NamedIntOperand<"byte_sel"> {
   let Validator = "isUInt<2>";
 }
+def ByteSel0 : DefaultOperand<ByteSel, 0>;
 
 let PrintMethod = "printBitOp3" in
 def BitOp3 : NamedIntOperand<"bitop3">;
@@ -1971,7 +1972,8 @@ class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs>
 class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
                 RegisterOperand Src2RC, int NumSrcArgs,
                 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
-                Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+                Operand Src0Mod, Operand Src1Mod, Operand Src2Mod,
+                bit HasFP8ByteSel = 0, bit HasFP8DstByteSel = 0> {
   dag src0 = !if(!ge(NumSrcArgs, 1),
                  !if (HasModifiers,
                       (ins Src0Mod:$src0_modifiers, Src0RC:$src0),
@@ -1989,18 +1991,23 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
                  (ins));
   dag clamp = !if(HasClamp, (ins Clamp0:$clamp), (ins));
   dag omod = !if(HasOMod, (ins omod0:$omod), (ins));
+  dag bytesel = !if(HasFP8ByteSel,
+                    !con(!if(HasFP8DstByteSel, (ins VGPR_32:$vdst_in), (ins)),
+                         (ins ByteSel0:$byte_sel)),
+                    (ins));
 
-  dag ret = !con(src0, src1, src2, clamp, omod);
+  dag ret = !con(src0, src1, src2, clamp, omod, bytesel);
 }
 
 class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
                 RegisterOperand Src2RC, int NumSrcArgs,
                 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
-                Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> {
+                Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel,
+                bit HasFP8ByteSel = 0, bit HasFP8DstByteSel = 0> {
   // getInst64 handles clamp and omod. implicit mutex between vop3p and omod
   dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs,
                 HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
-                Src0Mod, Src1Mod, Src2Mod>.ret;
+                Src0Mod, Src1Mod, Src2Mod, HasFP8ByteSel, HasFP8DstByteSel>.ret;
   dag opsel = (ins op_sel0:$op_sel);
   dag ret = !con(base, !if(HasOpSel, opsel, (ins)));
 }
@@ -2612,7 +2619,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
   field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
                              HasClamp, HasModifiers, HasSrc2Mods,
-                             HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
+                             HasOMod, Src0Mod, Src1Mod, Src2Mod,
+                             HasFP8ByteSel, HasFP8DstByteSel>.ret;
   field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
                                    NumSrcArgs, HasClamp, HasOpSel, HasNeg,
                                    Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
@@ -2630,7 +2638,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
                                  Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
   defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
                   Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
-                  Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
+                  Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel,
+                  HasFP8ByteSel, HasFP8DstByteSel>.ret;
   defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
                   Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, HasNeg,
                   Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index f885de3c13b12..7fdd951ecbd3c 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -674,17 +674,6 @@ class VOPProfile_Base_CVT_F_F8_ByteSel<ValueType DstVT> : VOPProfile<[DstVT, i32
   let HasClamp = 0;
   let HasOMod = 0;
   let HasModifiers = 0;
-
-  defvar bytesel = (ins ByteSel:$byte_sel);
-  let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
-                            HasClamp, HasModifiers, HasSrc2Mods,
-                            HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret,
-                   bytesel);
-  let InsVOP3Base = !con(getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, Src2VOP3DPP,
-                                        NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods,
-                                        HasOMod, Src0ModVOP3DPP, Src1ModVOP3DPP,
-                                        Src2ModVOP3DPP, HasOpSel>.ret,
-                         bytesel);
 }
 
 let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts],
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index a7b90b9e319da..0252c4f1b0929 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -593,6 +593,7 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
   let HasExtVOP3DPP = 1;
   let HasOpSel = 1;
   let HasFP8DstByteSel = 1;
+  let HasFP8ByteSel = 0; // It works as a dst-bytesel, but does not have byte_sel operand.
   let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
                             getAsmVOP3OpSel<3, HasClamp, HasOMod,
                                             HasSrc0FloatMods, HasSrc1FloatMods,
@@ -607,16 +608,6 @@ class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
   VOP3_Profile<VOPProfile<[i32, SrcVT, i32, untyped]>> {
   let HasFP8DstByteSel = 1;
   let HasClamp = 0;
-  defvar bytesel = (ins VGPR_32:$vdst_in, ByteSel:$byte_sel);
-  let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
-                            HasClamp, HasModifiers, HasSrc2Mods,
-                            HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret,
-                   bytesel);
-  let InsVOP3Base = !con(
-    getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
-                   Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
-                   Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret,
-    bytesel);
 }
 
 def IsPow2Plus1: PatLeaf<(i32 imm), [{

Base automatically changed from users/rampitec/05-15-_amdgpu_cleanup_bytesel_variables._nfc to main May 15, 2025 23:04
@rampitec rampitec force-pushed the users/rampitec/05-15-_amdgpu_automate_creation_of_byte_sel_dags._nfci branch from 071898b to a29291e Compare May 15, 2025 23:36
@@ -1987,20 +1989,29 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
(ins Src2Mod:$src2_modifiers, Src2RC:$src2),
(ins Src2RC:$src2)),
(ins));
dag clamp = !if(HasClamp, (ins Clamp0:$clamp), (ins));
// If there is vdst_in after clamp with HasFP8DstByteSel we cannot use
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can change the placement of the byte_sel operands, but that is a big disruption. If needed it will require a separate change.

@rampitec
Copy link
Collaborator Author

JFYI, all of that is needed because I want to make this working with t16, and it copies profiles and recreates dags.

Copy link
Contributor

@shiltian shiltian left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@rampitec rampitec merged commit be6c168 into main May 16, 2025
11 checks passed
@rampitec rampitec deleted the users/rampitec/05-15-_amdgpu_automate_creation_of_byte_sel_dags._nfci branch May 16, 2025 15:54
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants
Morty Proxy This is a proxified and sanitized view of the page, visit original site.