-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VPlan] Connect Entry to scalar preheader during initial construction. #140132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d481278
76212e6
a324d27
2ad3e76
f4a28d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -490,7 +490,7 @@ class InnerLoopVectorizer { | |||||
MinProfitableTripCount(MinProfitableTripCount), UF(UnrollFactor), | ||||||
Builder(PSE.getSE()->getContext()), Cost(CM), BFI(BFI), PSI(PSI), | ||||||
RTChecks(RTChecks), Plan(Plan), | ||||||
VectorPHVPB(Plan.getEntry()->getSingleSuccessor()) {} | ||||||
VectorPHVPB(Plan.getVectorLoopRegion()->getSinglePredecessor()) {} | ||||||
|
||||||
virtual ~InnerLoopVectorizer() = default; | ||||||
|
||||||
|
@@ -2366,16 +2366,15 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) { | |||||
} | ||||||
|
||||||
void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clarify that CheckBlock now excludes the initial trip-count check, which is expected to be already introduced before calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a note, thanks |
||||||
// Note: The block with the minimum trip-count check is already connected | ||||||
// during earlier VPlan construction. | ||||||
VPBlockBase *ScalarPH = Plan.getScalarPreheader(); | ||||||
VPBlockBase *PreVectorPH = VectorPHVPB->getSinglePredecessor(); | ||||||
if (PreVectorPH->getNumSuccessors() != 1) { | ||||||
assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors"); | ||||||
assert(PreVectorPH->getSuccessors()[0] == ScalarPH && | ||||||
"Unexpected successor"); | ||||||
VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB); | ||||||
VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB); | ||||||
PreVectorPH = CheckVPIRBB; | ||||||
} | ||||||
assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors"); | ||||||
assert(PreVectorPH->getSuccessors()[0] == ScalarPH && "Unexpected successor"); | ||||||
VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB); | ||||||
VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB); | ||||||
PreVectorPH = CheckVPIRBB; | ||||||
VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH); | ||||||
PreVectorPH->swapSuccessors(); | ||||||
|
||||||
|
@@ -2467,8 +2466,9 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { | |||||
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); | ||||||
LoopBypassBlocks.push_back(TCCheckBlock); | ||||||
|
||||||
// TODO: Wrap LoopVectorPreHeader in VPIRBasicBlock here. | ||||||
introduceCheckBlockInVPlan(TCCheckBlock); | ||||||
assert(cast<VPIRBasicBlock>(Plan.getEntry())->getIRBasicBlock() == | ||||||
TCCheckBlock && | ||||||
"Plan's entry must be TCCCheckBlock"); | ||||||
} | ||||||
|
||||||
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) { | ||||||
|
@@ -7667,7 +7667,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |||||
|
||||||
// 1. Set up the skeleton for vectorization, including vector pre-header and | ||||||
// middle block. The vector loop is created during VPlan execution. | ||||||
VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSingleSuccessor()); | ||||||
VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSuccessors()[1]); | ||||||
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); | ||||||
if (VectorizingEpilogue) | ||||||
VPlanTransforms::removeDeadRecipes(BestVPlan); | ||||||
|
@@ -7899,7 +7899,12 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass, | |||||
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); | ||||||
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); | ||||||
|
||||||
introduceCheckBlockInVPlan(TCCheckBlock); | ||||||
// When vectorizing the main loop, its trip-count check is placed in a new | ||||||
// block, whereas the overall trip-count check is placed in the VPlan entry | ||||||
// block. When vectorizing the epilogue loop, its trip-count check is placed | ||||||
// in the VPlan entry block. | ||||||
if (!ForEpilogue) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added thanks |
||||||
introduceCheckBlockInVPlan(TCCheckBlock); | ||||||
return TCCheckBlock; | ||||||
} | ||||||
|
||||||
|
@@ -8029,7 +8034,6 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck( | |||||
Plan.setEntry(NewEntry); | ||||||
// OldEntry is now dead and will be cleaned up when the plan gets destroyed. | ||||||
|
||||||
introduceCheckBlockInVPlan(Insert); | ||||||
return Insert; | ||||||
} | ||||||
|
||||||
|
@@ -8786,7 +8790,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, | |||||
DenseMap<VPValue *, VPValue *> &IVEndValues) { | ||||||
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); | ||||||
auto *ScalarPH = Plan.getScalarPreheader(); | ||||||
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor()); | ||||||
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]); | ||||||
Comment on lines
8792
to
+8793
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggesting a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, will check where this could be useful separately. |
||||||
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); | ||||||
VPBuilder VectorPHBuilder( | ||||||
cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())); | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -540,6 +540,9 @@ void VPlanTransforms::prepareForVectorization( | |
if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Independent: worth clarifying that not requiring a scalar epilog check means the scalar epilog is (always) required, i.e., case 1. |
||
VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB); | ||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH); | ||
Plan.getEntry()->swapSuccessors(); | ||
|
||
// The exit blocks are unreachable, remove their recipes to make sure no | ||
// users remain that may pessimize transforms. | ||
for (auto *EB : Plan.getExitBlocks()) { | ||
|
@@ -552,6 +555,11 @@ void VPlanTransforms::prepareForVectorization( | |
// The connection order corresponds to the operands of the conditional branch, | ||
// with the middle block already connected to the exit block. | ||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
// Also connect the entry block to the scalar preheader. | ||
// TODO: Also introduce a branch recipe together with the minimum trip count | ||
// check. | ||
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH); | ||
Plan.getEntry()->swapSuccessors(); | ||
|
||
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator(); | ||
// Here we use the same DebugLoc as the scalar loop latch terminator instead | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that ILV now depends on HCFG here, and elsewhere. Maybe better to retrieve the first hierarchical predecessor of first header block, to relax this dependence, possibly as follow up.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good!