-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VPlan] Add narrowToSingleScalarRecipe transform. #139150
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
50be059
df295ec
a8dda3f
a7e9545
cd4b3bf
c0bf19d
451d82a
fbf5f9d
5b15a9b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -1085,6 +1085,42 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) { | |||||
} | ||||||
} | ||||||
|
||||||
static void narrowToSingleScalarRecipes(VPlan &Plan) { | ||||||
if (Plan.hasScalarVFOnly()) | ||||||
return; | ||||||
|
||||||
// Try to narrow wide and replicating recipes to single scalar recipes, | ||||||
// based on VPlan analysis. Only process blocks in the loop region for now, | ||||||
// without traversing into nested regions, as recipes in replicate regions | ||||||
// cannot be converted yet. | ||||||
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( | ||||||
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) { | ||||||
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { | ||||||
auto *RepR = dyn_cast<VPReplicateRecipe>(&R); | ||||||
if (!RepR && !isa<VPWidenRecipe>(&R)) | ||||||
continue; | ||||||
if (RepR && RepR->isSingleScalar()) | ||||||
continue; | ||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
or There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done thanks |
||||||
auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R); | ||||||
// Skip recipes that aren't single scalars or don't have only their | ||||||
// scalar results used. In the latter case, we would introduce extra | ||||||
// broadcasts. | ||||||
if (!vputils::isSingleScalar(RepOrWidenR) || | ||||||
any_of(RepOrWidenR->users(), [RepOrWidenR](VPUser *U) { | ||||||
return !U->usesScalars(RepOrWidenR); | ||||||
})) | ||||||
continue; | ||||||
|
||||||
auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(), | ||||||
RepOrWidenR->operands(), | ||||||
true /*IsSingleScalar*/); | ||||||
Clone->insertBefore(RepOrWidenR); | ||||||
RepOrWidenR->replaceAllUsesWith(Clone); | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
/// Normalize and simplify VPBlendRecipes. Should be run after simplifyRecipes | ||||||
/// to make sure the masks are simplified. | ||||||
static void simplifyBlends(VPlan &Plan) { | ||||||
|
@@ -1779,6 +1815,7 @@ void VPlanTransforms::optimize(VPlan &Plan) { | |||||
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType()); | ||||||
runPass(simplifyBlends, Plan); | ||||||
runPass(removeDeadRecipes, Plan); | ||||||
runPass(narrowToSingleScalarRecipes, Plan); | ||||||
runPass(legalizeAndOptimizeInductions, Plan); | ||||||
runPass(removeRedundantExpandSCEVRecipes, Plan); | ||||||
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType()); | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,86 +7,87 @@ define void @test(ptr %p, i40 %a) { | |
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | ||
; CHECK: vector.ph: | ||
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i1 true, false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this the said degradation? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, trivial folding that at the moment happens in IRBuilder on VPWidenRecipe, but not on replicate recipes which clone the original instruction. Will be fixed by pending VP constant folder |
||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] | ||
; CHECK: vector.body: | ||
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] | ||
; CHECK: pred.store.if: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] | ||
; CHECK: pred.store.continue: | ||
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] | ||
; CHECK: pred.store.if1: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] | ||
; CHECK: pred.store.continue2: | ||
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] | ||
; CHECK: pred.store.if3: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] | ||
; CHECK: pred.store.continue4: | ||
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] | ||
; CHECK: pred.store.if5: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] | ||
; CHECK: pred.store.continue6: | ||
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] | ||
; CHECK: pred.store.if7: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] | ||
; CHECK: pred.store.continue8: | ||
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] | ||
; CHECK: pred.store.if9: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] | ||
; CHECK: pred.store.continue10: | ||
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] | ||
; CHECK: pred.store.if11: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] | ||
; CHECK: pred.store.continue12: | ||
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] | ||
; CHECK: pred.store.if13: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] | ||
; CHECK: pred.store.continue14: | ||
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]] | ||
; CHECK: pred.store.if15: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] | ||
; CHECK: pred.store.continue16: | ||
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]] | ||
; CHECK: pred.store.if17: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]] | ||
; CHECK: pred.store.continue18: | ||
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]] | ||
; CHECK: pred.store.if19: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]] | ||
; CHECK: pred.store.continue20: | ||
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]] | ||
; CHECK: pred.store.if21: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]] | ||
; CHECK: pred.store.continue22: | ||
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]] | ||
; CHECK: pred.store.if23: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]] | ||
; CHECK: pred.store.continue24: | ||
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]] | ||
; CHECK: pred.store.if25: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]] | ||
; CHECK: pred.store.continue26: | ||
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]] | ||
; CHECK: pred.store.if27: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]] | ||
; CHECK: pred.store.continue28: | ||
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]] | ||
; CHECK: pred.store.if29: | ||
; CHECK-NEXT: store i1 false, ptr [[P]], align 1 | ||
; CHECK-NEXT: store i1 [[TMP0]], ptr [[P]], align 1 | ||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] | ||
; CHECK: pred.store.continue30: | ||
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suffice to traverse shallow?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, we cannot convert to uniform recipes in replicate regions at they moment, they need hoisting out first.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Worth a comment. This also prevents narrowing recipes in nested loop regions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added thanks