Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 6d31707

Browse filesBrowse files
AMDGPU/GlobalISel: add RegBankLegalize rules for select
Uniform condition S1 is AnyExtended to S32 and high bits are cleaned using AND with 1. Divergent S1 uses VCC. Using B32/B64 rules to cover scalars vector and pointer types. Divergent B64 is split to S32.
1 parent fdca6f3 commit 6d31707
Copy full SHA for 6d31707

File tree

Expand file treeCollapse file tree

5 files changed

+653
-1280
lines changed
Filter options
Expand file treeCollapse file tree

5 files changed

+653
-1280
lines changed

‎llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+21-1Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,23 @@ void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
293293
return;
294294
}
295295

296+
void RegBankLegalizeHelper::lowerSplitTo32Sel(MachineInstr &MI) {
297+
Register Dst = MI.getOperand(0).getReg();
298+
LLT Ty = MRI.getType(Dst) == V4S16 ? V2S16 : S32;
299+
auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
300+
auto Op3 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(3).getReg());
301+
Register Cond = MI.getOperand(1).getReg();
302+
auto Flags = MI.getFlags();
303+
auto Lo =
304+
B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(0), Op3.getReg(0), Flags);
305+
auto Hi =
306+
B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(1), Op3.getReg(1), Flags);
307+
308+
B.buildMergeLikeInstr(Dst, {Lo, Hi});
309+
MI.eraseFromParent();
310+
return;
311+
}
312+
296313
void RegBankLegalizeHelper::lower(MachineInstr &MI,
297314
const RegBankLLTMapping &Mapping,
298315
SmallSet<Register, 4> &WaterfallSgprs) {
@@ -379,6 +396,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
379396
return lowerUni_BFE(MI);
380397
case SplitTo32:
381398
return lowerSplitTo32(MI);
399+
case SplitTo32Sel:
400+
return lowerSplitTo32Sel(MI);
382401
case SplitLoad: {
383402
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
384403
unsigned Size = DstTy.getSizeInBits();
@@ -492,7 +511,8 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) {
492511
case UniInVgprB64:
493512
if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
494513
Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(0, 64) ||
495-
Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64))
514+
Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64) ||
515+
Ty == LLT::pointer(999, 64))
496516
return Ty;
497517
return LLT();
498518
case SgprB96:

‎llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h

Copy file name to clipboardExpand all lines: llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ class RegBankLegalizeHelper {
113113
void lowerDiv_BFE(MachineInstr &MI);
114114
void lowerUni_BFE(MachineInstr &MI);
115115
void lowerSplitTo32(MachineInstr &MI);
116+
void lowerSplitTo32Sel(MachineInstr &MI);
116117
};
117118

118119
} // end namespace AMDGPU

‎llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+6-2Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ UniformityLLTOpPredicateID LLTToBId(LLT Ty) {
198198
return B32;
199199
if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
200200
Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(1, 64) ||
201-
Ty == LLT::pointer(4, 64))
201+
Ty == LLT::pointer(4, 64) || Ty == LLT::pointer(999, 64))
202202
return B64;
203203
if (Ty == LLT::fixed_vector(3, 32))
204204
return B96;
@@ -485,8 +485,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
485485
addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
486486

487487
addRulesForGOpcs({G_SELECT}, StandardB)
488+
.Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
489+
.Any({{UniS16}, {{Sgpr16}, {Sgpr32AExtBoolInReg, Sgpr16, Sgpr16}}})
488490
.Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
489-
.Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}});
491+
.Uni(B32, {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}})
492+
.Div(B64, {{VgprB64}, {Vcc, VgprB64, VgprB64}, SplitTo32Sel})
493+
.Uni(B64, {{SgprB64}, {Sgpr32AExtBoolInReg, SgprB64, SgprB64}});
490494

491495
addRulesForGOpcs({G_ANYEXT})
492496
.Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away

‎llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Copy file name to clipboardExpand all lines: llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ enum LoweringMethodID {
177177
Div_BFE,
178178
VgprToVccCopy,
179179
SplitTo32,
180+
SplitTo32Sel,
180181
Ext32To64,
181182
UniCstExt,
182183
SplitLoad,

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.