-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[LV] Fold isPredicatedInst into isMaskRequired #134261
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-vectorizers Author: Ramkumar Ramachandra (artagnon) ChangesFold LoopVectorizationCostModel::isPredicatedInst into LoopVectorizationLegality::isMaskRequired, fixing a pending TODO item. Note that we still need to pass whether or not we're tail-folding by masking from the cost-model into isMaskRequired. Patch is 134.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134261.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d654ac3ec9273..00f31df1b62cb 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -420,9 +420,7 @@ class LoopVectorizationLegality {
/// Returns true if vector representation of the instruction \p I
/// requires mask.
- bool isMaskRequired(const Instruction *I) const {
- return MaskedOp.contains(I);
- }
+ bool isMaskRequired(Instruction *I, bool FoldTailByMasking) const;
/// Returns true if there is at least one function call in the loop which
/// has a vectorized variant available.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 3ec6850d6f685..8f18e2d099751 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1408,6 +1408,57 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
}
+bool LoopVectorizationLegality::isMaskRequired(Instruction *I,
+ bool FoldTailByMasking) const {
+ if (isSafeToSpeculativelyExecute(I, TheLoop->getLatchCmpInst()) ||
+ (isa<LoadInst, StoreInst, CallInst>(I) && !MaskedOp.contains(I)) ||
+ isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
+ return false;
+
+ // If the instruction was executed conditionally in the original scalar loop,
+ // predication is needed with a mask whose lanes are all possibly inactive.
+ if (blockNeedsPredication(I->getParent()))
+ return true;
+
+ // If we're not folding tail by masking, bail out now.
+ if (!FoldTailByMasking)
+ return false;
+
+ // All that remain are instructions with side-effects originally executed in
+ // the loop unconditionally, but now execute under a tail-fold mask (only)
+ // having at least one active lane (the first). If the side-effects of the
+ // instruction are invariant, executing it w/o (the tail-folding) mask is safe
+ // - it will cause the same side-effects as when masked.
+ switch (I->getOpcode()) {
+ default:
+ llvm_unreachable(
+ "instruction should have been considered by earlier checks");
+ case Instruction::Call:
+ // Side-effects of a Call are assumed to be non-invariant, needing a
+ // (fold-tail) mask.
+ assert(MaskedOp.contains(I) &&
+ "should have returned earlier for calls not needing a mask");
+ return true;
+ case Instruction::Load:
+ // If the address is loop invariant no predication is needed.
+ return !isInvariant(getLoadStorePointerOperand(I));
+ case Instruction::Store: {
+ // For stores, we need to prove both speculation safety (which follows from
+ // the same argument as loads), but also must prove the value being stored
+ // is correct. The easiest form of the later is to require that all values
+ // stored are the same.
+ return !(isInvariant(getLoadStorePointerOperand(I)) &&
+ TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
+ }
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ // If the divisor is loop-invariant no predication is needed.
+ return !TheLoop->isLoopInvariant(I->getOperand(1));
+ }
+}
+
bool LoopVectorizationLegality::blockCanBePredicated(
BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 55cc801e91452..8028829ee6810 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1313,11 +1313,6 @@ class LoopVectorizationCostModel {
/// \p VF is the vectorization factor that will be used to vectorize \p I.
bool isScalarWithPredication(Instruction *I, ElementCount VF) const;
- /// Returns true if \p I is an instruction that needs to be predicated
- /// at runtime. The result is independent of the predication mechanism.
- /// Superset of instructions that return true for isScalarWithPredication.
- bool isPredicatedInst(Instruction *I) const;
-
/// Return the costs for our two available strategies for lowering a
/// div/rem operation which requires speculating at least one lane.
/// First result is for scalarization (will be invalid for scalable
@@ -3203,7 +3198,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
bool LoopVectorizationCostModel::isScalarWithPredication(
Instruction *I, ElementCount VF) const {
- if (!isPredicatedInst(I))
+ if (!Legal->isMaskRequired(I, foldTailByMasking()))
return false;
// Do we have a non-scalar lowering for this predicated
@@ -3242,57 +3237,6 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
}
}
-// TODO: Fold into LoopVectorizationLegality::isMaskRequired.
-bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
- // If predication is not needed, avoid it.
- // TODO: We can use the loop-preheader as context point here and get
- // context sensitive reasoning for isSafeToSpeculativelyExecute.
- if (!blockNeedsPredicationForAnyReason(I->getParent()) ||
- isSafeToSpeculativelyExecute(I) ||
- (isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired(I)) ||
- isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
- return false;
-
- // If the instruction was executed conditionally in the original scalar loop,
- // predication is needed with a mask whose lanes are all possibly inactive.
- if (Legal->blockNeedsPredication(I->getParent()))
- return true;
-
- // All that remain are instructions with side-effects originally executed in
- // the loop unconditionally, but now execute under a tail-fold mask (only)
- // having at least one active lane (the first). If the side-effects of the
- // instruction are invariant, executing it w/o (the tail-folding) mask is safe
- // - it will cause the same side-effects as when masked.
- switch(I->getOpcode()) {
- default:
- llvm_unreachable(
- "instruction should have been considered by earlier checks");
- case Instruction::Call:
- // Side-effects of a Call are assumed to be non-invariant, needing a
- // (fold-tail) mask.
- assert(Legal->isMaskRequired(I) &&
- "should have returned earlier for calls not needing a mask");
- return true;
- case Instruction::Load:
- // If the address is loop invariant no predication is needed.
- return !Legal->isInvariant(getLoadStorePointerOperand(I));
- case Instruction::Store: {
- // For stores, we need to prove both speculation safety (which follows from
- // the same argument as loads), but also must prove the value being stored
- // is correct. The easiest form of the later is to require that all values
- // stored are the same.
- return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
- TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
- }
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::URem:
- // If the divisor is loop-invariant no predication is needed.
- return !TheLoop->isLoopInvariant(I->getOperand(1));
- }
-}
-
std::pair<InstructionCost, InstructionCost>
LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
ElementCount VF) const {
@@ -3405,7 +3349,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
// load, or any gaps in a store-access).
bool PredicatedAccessRequiresMasking =
blockNeedsPredicationForAnyReason(I->getParent()) &&
- Legal->isMaskRequired(I);
+ Legal->isMaskRequired(I, foldTailByMasking());
bool LoadAccessWithGapsRequiresEpilogMasking =
isa<LoadInst>(I) && Group->requiresScalarEpilogue() &&
!isScalarEpilogueAllowed();
@@ -3494,7 +3438,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
<< *I << "\n");
return;
}
- if (isPredicatedInst(I)) {
+ if (Legal->isMaskRequired(I, foldTailByMasking())) {
LLVM_DEBUG(
dbgs() << "LV: Found not uniform due to requiring predication: " << *I
<< "\n");
@@ -5379,7 +5323,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
// from moving "masked load/store" check from legality to cost model.
// Masked Load/Gather emulation was previously never allowed.
// Limited number of Masked Store/Scatter emulation was allowed.
- assert((isPredicatedInst(I)) &&
+ assert((Legal->isMaskRequired(I, foldTailByMasking())) &&
"Expecting a scalar emulated instruction");
return isa<LoadInst>(I) ||
(isa<StoreInst>(I) &&
@@ -5677,7 +5621,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// If we have a predicated load/store, it will need extra i1 extracts and
// conditional branches, but may not be executed for each vector lane. Scale
// the cost by the probability of executing the predicated block.
- if (isPredicatedInst(I)) {
+ if (Legal->isMaskRequired(I, foldTailByMasking())) {
Cost /= getPredBlockCostDivisor(CostKind);
// Add the cost of an i1 extract and a branch
@@ -5710,7 +5654,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
"Stride should be 1 or -1 for consecutive memory access");
const Align Alignment = getLoadStoreAlignment(I);
InstructionCost Cost = 0;
- if (Legal->isMaskRequired(I)) {
+ if (Legal->isMaskRequired(I, foldTailByMasking())) {
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
CostKind);
} else {
@@ -5763,9 +5707,10 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
const Value *Ptr = getLoadStorePointerOperand(I);
return TTI.getAddressComputationCost(VectorTy) +
- TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
- Legal->isMaskRequired(I), Alignment,
- CostKind, I);
+ TTI.getGatherScatterOpCost(
+ I->getOpcode(), VectorTy, Ptr,
+ Legal->isMaskRequired(I, foldTailByMasking()), Alignment, CostKind,
+ I);
}
InstructionCost
@@ -5794,12 +5739,12 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
(isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor()));
InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
InsertPos->getOpcode(), WideVecTy, Group->getFactor(), Indices,
- Group->getAlign(), AS, CostKind, Legal->isMaskRequired(I),
- UseMaskForGaps);
+ Group->getAlign(), AS, CostKind,
+ Legal->isMaskRequired(I, foldTailByMasking()), UseMaskForGaps);
if (Group->isReverse()) {
// TODO: Add support for reversed masked interleaved access.
- assert(!Legal->isMaskRequired(I) &&
+ assert(!Legal->isMaskRequired(I, foldTailByMasking()) &&
"Reverse masked interleaved access not supported.");
Cost += Group->getNumMembers() *
TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, {},
@@ -6287,7 +6232,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
continue;
}
- bool MaskRequired = Legal->isMaskRequired(CI);
+ bool MaskRequired = Legal->isMaskRequired(CI, foldTailByMasking());
// Compute corresponding vector type for return value and arguments.
Type *RetTy = toVectorizedTy(ScalarRetTy, VF);
for (Type *ScalarTy : ScalarTys)
@@ -6407,7 +6352,7 @@ bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
// instruction in the loop. In that case, it is not trivially hoistable.
auto *OpI = dyn_cast<Instruction>(Op);
return !OpI || !TheLoop->contains(OpI) ||
- (!isPredicatedInst(OpI) &&
+ (!Legal->isMaskRequired(OpI, foldTailByMasking()) &&
(!isa<PHINode>(OpI) || OpI->getParent() != TheLoop->getHeader()) &&
all_of(OpI->operands(),
[this](Value *Op) { return shouldConsiderInvariant(Op); }));
@@ -6595,7 +6540,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
- if (VF.isVector() && isPredicatedInst(I)) {
+ if (VF.isVector() && Legal->isMaskRequired(I, foldTailByMasking())) {
const auto [ScalarCost, SafeDivisorCost] = getDivRemSpeculationCost(I, VF);
return isDivRemScalarWithPredication(ScalarCost, SafeDivisorCost) ?
ScalarCost : SafeDivisorCost;
@@ -6779,8 +6724,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
return TTI::CastContextHint::Interleave;
case LoopVectorizationCostModel::CM_Scalarize:
case LoopVectorizationCostModel::CM_Widen:
- return Legal->isMaskRequired(I) ? TTI::CastContextHint::Masked
- : TTI::CastContextHint::Normal;
+ return Legal->isMaskRequired(I, foldTailByMasking())
+ ? TTI::CastContextHint::Masked
+ : TTI::CastContextHint::Normal;
case LoopVectorizationCostModel::CM_Widen_Reverse:
return TTI::CastContextHint::Reversed;
case LoopVectorizationCostModel::CM_Unknown:
@@ -8317,7 +8263,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
return nullptr;
VPValue *Mask = nullptr;
- if (Legal->isMaskRequired(I))
+ if (Legal->isMaskRequired(I, CM.foldTailByMasking()))
Mask = getBlockInMask(I->getParent());
// Determine if the pointer operand of the access is either consecutive or
@@ -8543,7 +8489,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
// vector variant at this VF requires a mask, so we synthesize an
// all-true mask.
VPValue *Mask = nullptr;
- if (Legal->isMaskRequired(CI))
+ if (Legal->isMaskRequired(CI, CM.foldTailByMasking()))
Mask = getBlockInMask(CI->getParent());
else
Mask = Plan.getOrAddLiveIn(
@@ -8584,7 +8530,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
case Instruction::URem: {
// If not provably safe, use a select to form a safe divisor before widening the
// div/rem operation itself. Otherwise fall through to general handling below.
- if (CM.isPredicatedInst(I)) {
+ if (Legal->isMaskRequired(I, CM.foldTailByMasking())) {
SmallVector<VPValue *> Ops(Operands);
VPValue *Mask = getBlockInMask(I->getParent());
VPValue *One =
@@ -8667,7 +8613,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
// In case of predicated execution (due to tail-folding, or conditional
// execution, or both), pass the relevant mask.
- if (Legal->isMaskRequired(HI->Store))
+ if (Legal->isMaskRequired(HI->Store, CM.foldTailByMasking()))
HGramOps.push_back(getBlockInMask(HI->Store->getParent()));
return new VPHistogramRecipe(Opcode,
@@ -8682,7 +8628,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
Range);
- bool IsPredicated = CM.isPredicatedInst(I);
+ bool IsPredicated = Legal->isMaskRequired(I, CM.foldTailByMasking());
// Even if the instruction is not marked as uniform, there are certain
// intrinsic calls that can be effectively treated as such, so we check for
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
index 121a6ed53309e..4e79965420ab1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
@@ -67,10 +67,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8
-; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]]
-; TFCOMMON-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3]]
-; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
-; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
+; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[LD]], i64 0
+; TFCOMMON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; TFCOMMON-NEXT: [[TMP8:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
; TFCOMMON-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
; TFCOMMON-NEXT: [[TMP11:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer
@@ -114,14 +113,10 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ]
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ]
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8
-; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]]
-; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
-; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
-; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
-; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
-; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
-; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0
-; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP10]], i32 1
+; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i64 0
+; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
+; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP13]], splat (i1 true)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 36c3a4a2b4e43..9ff4da28c6a5c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -1040,65 +1040,161 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFNONE: [[END]]:
; TFNONE-NEXT: ret void
;
-; TFCOMMON-LABEL: define void @test_widen_exp_v2(
-; TFCOMMON-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
-; TFCOMMO...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesFold LoopVectorizationCostModel::isPredicatedInst into LoopVectorizationLegality::isMaskRequired, fixing a pending TODO item. Note that we still need to pass whether or not we're tail-folding by masking from the cost-model into isMaskRequired. Patch is 134.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134261.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d654ac3ec9273..00f31df1b62cb 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -420,9 +420,7 @@ class LoopVectorizationLegality {
/// Returns true if vector representation of the instruction \p I
/// requires mask.
- bool isMaskRequired(const Instruction *I) const {
- return MaskedOp.contains(I);
- }
+ bool isMaskRequired(Instruction *I, bool FoldTailByMasking) const;
/// Returns true if there is at least one function call in the loop which
/// has a vectorized variant available.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 3ec6850d6f685..8f18e2d099751 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1408,6 +1408,57 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
}
+bool LoopVectorizationLegality::isMaskRequired(Instruction *I,
+ bool FoldTailByMasking) const {
+ if (isSafeToSpeculativelyExecute(I, TheLoop->getLatchCmpInst()) ||
+ (isa<LoadInst, StoreInst, CallInst>(I) && !MaskedOp.contains(I)) ||
+ isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
+ return false;
+
+ // If the instruction was executed conditionally in the original scalar loop,
+ // predication is needed with a mask whose lanes are all possibly inactive.
+ if (blockNeedsPredication(I->getParent()))
+ return true;
+
+ // If we're not folding tail by masking, bail out now.
+ if (!FoldTailByMasking)
+ return false;
+
+ // All that remain are instructions with side-effects originally executed in
+ // the loop unconditionally, but now execute under a tail-fold mask (only)
+ // having at least one active lane (the first). If the side-effects of the
+ // instruction are invariant, executing it w/o (the tail-folding) mask is safe
+ // - it will cause the same side-effects as when masked.
+ switch (I->getOpcode()) {
+ default:
+ llvm_unreachable(
+ "instruction should have been considered by earlier checks");
+ case Instruction::Call:
+ // Side-effects of a Call are assumed to be non-invariant, needing a
+ // (fold-tail) mask.
+ assert(MaskedOp.contains(I) &&
+ "should have returned earlier for calls not needing a mask");
+ return true;
+ case Instruction::Load:
+ // If the address is loop invariant no predication is needed.
+ return !isInvariant(getLoadStorePointerOperand(I));
+ case Instruction::Store: {
+ // For stores, we need to prove both speculation safety (which follows from
+ // the same argument as loads), but also must prove the value being stored
+ // is correct. The easiest form of the later is to require that all values
+ // stored are the same.
+ return !(isInvariant(getLoadStorePointerOperand(I)) &&
+ TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
+ }
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ // If the divisor is loop-invariant no predication is needed.
+ return !TheLoop->isLoopInvariant(I->getOperand(1));
+ }
+}
+
bool LoopVectorizationLegality::blockCanBePredicated(
BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 55cc801e91452..8028829ee6810 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1313,11 +1313,6 @@ class LoopVectorizationCostModel {
/// \p VF is the vectorization factor that will be used to vectorize \p I.
bool isScalarWithPredication(Instruction *I, ElementCount VF) const;
- /// Returns true if \p I is an instruction that needs to be predicated
- /// at runtime. The result is independent of the predication mechanism.
- /// Superset of instructions that return true for isScalarWithPredication.
- bool isPredicatedInst(Instruction *I) const;
-
/// Return the costs for our two available strategies for lowering a
/// div/rem operation which requires speculating at least one lane.
/// First result is for scalarization (will be invalid for scalable
@@ -3203,7 +3198,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
bool LoopVectorizationCostModel::isScalarWithPredication(
Instruction *I, ElementCount VF) const {
- if (!isPredicatedInst(I))
+ if (!Legal->isMaskRequired(I, foldTailByMasking()))
return false;
// Do we have a non-scalar lowering for this predicated
@@ -3242,57 +3237,6 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
}
}
-// TODO: Fold into LoopVectorizationLegality::isMaskRequired.
-bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
- // If predication is not needed, avoid it.
- // TODO: We can use the loop-preheader as context point here and get
- // context sensitive reasoning for isSafeToSpeculativelyExecute.
- if (!blockNeedsPredicationForAnyReason(I->getParent()) ||
- isSafeToSpeculativelyExecute(I) ||
- (isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired(I)) ||
- isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
- return false;
-
- // If the instruction was executed conditionally in the original scalar loop,
- // predication is needed with a mask whose lanes are all possibly inactive.
- if (Legal->blockNeedsPredication(I->getParent()))
- return true;
-
- // All that remain are instructions with side-effects originally executed in
- // the loop unconditionally, but now execute under a tail-fold mask (only)
- // having at least one active lane (the first). If the side-effects of the
- // instruction are invariant, executing it w/o (the tail-folding) mask is safe
- // - it will cause the same side-effects as when masked.
- switch(I->getOpcode()) {
- default:
- llvm_unreachable(
- "instruction should have been considered by earlier checks");
- case Instruction::Call:
- // Side-effects of a Call are assumed to be non-invariant, needing a
- // (fold-tail) mask.
- assert(Legal->isMaskRequired(I) &&
- "should have returned earlier for calls not needing a mask");
- return true;
- case Instruction::Load:
- // If the address is loop invariant no predication is needed.
- return !Legal->isInvariant(getLoadStorePointerOperand(I));
- case Instruction::Store: {
- // For stores, we need to prove both speculation safety (which follows from
- // the same argument as loads), but also must prove the value being stored
- // is correct. The easiest form of the later is to require that all values
- // stored are the same.
- return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
- TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
- }
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::URem:
- // If the divisor is loop-invariant no predication is needed.
- return !TheLoop->isLoopInvariant(I->getOperand(1));
- }
-}
-
std::pair<InstructionCost, InstructionCost>
LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
ElementCount VF) const {
@@ -3405,7 +3349,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
// load, or any gaps in a store-access).
bool PredicatedAccessRequiresMasking =
blockNeedsPredicationForAnyReason(I->getParent()) &&
- Legal->isMaskRequired(I);
+ Legal->isMaskRequired(I, foldTailByMasking());
bool LoadAccessWithGapsRequiresEpilogMasking =
isa<LoadInst>(I) && Group->requiresScalarEpilogue() &&
!isScalarEpilogueAllowed();
@@ -3494,7 +3438,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
<< *I << "\n");
return;
}
- if (isPredicatedInst(I)) {
+ if (Legal->isMaskRequired(I, foldTailByMasking())) {
LLVM_DEBUG(
dbgs() << "LV: Found not uniform due to requiring predication: " << *I
<< "\n");
@@ -5379,7 +5323,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
// from moving "masked load/store" check from legality to cost model.
// Masked Load/Gather emulation was previously never allowed.
// Limited number of Masked Store/Scatter emulation was allowed.
- assert((isPredicatedInst(I)) &&
+ assert((Legal->isMaskRequired(I, foldTailByMasking())) &&
"Expecting a scalar emulated instruction");
return isa<LoadInst>(I) ||
(isa<StoreInst>(I) &&
@@ -5677,7 +5621,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// If we have a predicated load/store, it will need extra i1 extracts and
// conditional branches, but may not be executed for each vector lane. Scale
// the cost by the probability of executing the predicated block.
- if (isPredicatedInst(I)) {
+ if (Legal->isMaskRequired(I, foldTailByMasking())) {
Cost /= getPredBlockCostDivisor(CostKind);
// Add the cost of an i1 extract and a branch
@@ -5710,7 +5654,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
"Stride should be 1 or -1 for consecutive memory access");
const Align Alignment = getLoadStoreAlignment(I);
InstructionCost Cost = 0;
- if (Legal->isMaskRequired(I)) {
+ if (Legal->isMaskRequired(I, foldTailByMasking())) {
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
CostKind);
} else {
@@ -5763,9 +5707,10 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
const Value *Ptr = getLoadStorePointerOperand(I);
return TTI.getAddressComputationCost(VectorTy) +
- TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
- Legal->isMaskRequired(I), Alignment,
- CostKind, I);
+ TTI.getGatherScatterOpCost(
+ I->getOpcode(), VectorTy, Ptr,
+ Legal->isMaskRequired(I, foldTailByMasking()), Alignment, CostKind,
+ I);
}
InstructionCost
@@ -5794,12 +5739,12 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
(isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor()));
InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
InsertPos->getOpcode(), WideVecTy, Group->getFactor(), Indices,
- Group->getAlign(), AS, CostKind, Legal->isMaskRequired(I),
- UseMaskForGaps);
+ Group->getAlign(), AS, CostKind,
+ Legal->isMaskRequired(I, foldTailByMasking()), UseMaskForGaps);
if (Group->isReverse()) {
// TODO: Add support for reversed masked interleaved access.
- assert(!Legal->isMaskRequired(I) &&
+ assert(!Legal->isMaskRequired(I, foldTailByMasking()) &&
"Reverse masked interleaved access not supported.");
Cost += Group->getNumMembers() *
TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, {},
@@ -6287,7 +6232,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
continue;
}
- bool MaskRequired = Legal->isMaskRequired(CI);
+ bool MaskRequired = Legal->isMaskRequired(CI, foldTailByMasking());
// Compute corresponding vector type for return value and arguments.
Type *RetTy = toVectorizedTy(ScalarRetTy, VF);
for (Type *ScalarTy : ScalarTys)
@@ -6407,7 +6352,7 @@ bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
// instruction in the loop. In that case, it is not trivially hoistable.
auto *OpI = dyn_cast<Instruction>(Op);
return !OpI || !TheLoop->contains(OpI) ||
- (!isPredicatedInst(OpI) &&
+ (!Legal->isMaskRequired(OpI, foldTailByMasking()) &&
(!isa<PHINode>(OpI) || OpI->getParent() != TheLoop->getHeader()) &&
all_of(OpI->operands(),
[this](Value *Op) { return shouldConsiderInvariant(Op); }));
@@ -6595,7 +6540,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
- if (VF.isVector() && isPredicatedInst(I)) {
+ if (VF.isVector() && Legal->isMaskRequired(I, foldTailByMasking())) {
const auto [ScalarCost, SafeDivisorCost] = getDivRemSpeculationCost(I, VF);
return isDivRemScalarWithPredication(ScalarCost, SafeDivisorCost) ?
ScalarCost : SafeDivisorCost;
@@ -6779,8 +6724,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
return TTI::CastContextHint::Interleave;
case LoopVectorizationCostModel::CM_Scalarize:
case LoopVectorizationCostModel::CM_Widen:
- return Legal->isMaskRequired(I) ? TTI::CastContextHint::Masked
- : TTI::CastContextHint::Normal;
+ return Legal->isMaskRequired(I, foldTailByMasking())
+ ? TTI::CastContextHint::Masked
+ : TTI::CastContextHint::Normal;
case LoopVectorizationCostModel::CM_Widen_Reverse:
return TTI::CastContextHint::Reversed;
case LoopVectorizationCostModel::CM_Unknown:
@@ -8317,7 +8263,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
return nullptr;
VPValue *Mask = nullptr;
- if (Legal->isMaskRequired(I))
+ if (Legal->isMaskRequired(I, CM.foldTailByMasking()))
Mask = getBlockInMask(I->getParent());
// Determine if the pointer operand of the access is either consecutive or
@@ -8543,7 +8489,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
// vector variant at this VF requires a mask, so we synthesize an
// all-true mask.
VPValue *Mask = nullptr;
- if (Legal->isMaskRequired(CI))
+ if (Legal->isMaskRequired(CI, CM.foldTailByMasking()))
Mask = getBlockInMask(CI->getParent());
else
Mask = Plan.getOrAddLiveIn(
@@ -8584,7 +8530,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
case Instruction::URem: {
// If not provably safe, use a select to form a safe divisor before widening the
// div/rem operation itself. Otherwise fall through to general handling below.
- if (CM.isPredicatedInst(I)) {
+ if (Legal->isMaskRequired(I, CM.foldTailByMasking())) {
SmallVector<VPValue *> Ops(Operands);
VPValue *Mask = getBlockInMask(I->getParent());
VPValue *One =
@@ -8667,7 +8613,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
// In case of predicated execution (due to tail-folding, or conditional
// execution, or both), pass the relevant mask.
- if (Legal->isMaskRequired(HI->Store))
+ if (Legal->isMaskRequired(HI->Store, CM.foldTailByMasking()))
HGramOps.push_back(getBlockInMask(HI->Store->getParent()));
return new VPHistogramRecipe(Opcode,
@@ -8682,7 +8628,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
Range);
- bool IsPredicated = CM.isPredicatedInst(I);
+ bool IsPredicated = Legal->isMaskRequired(I, CM.foldTailByMasking());
// Even if the instruction is not marked as uniform, there are certain
// intrinsic calls that can be effectively treated as such, so we check for
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
index 121a6ed53309e..4e79965420ab1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll
@@ -67,10 +67,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8
-; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]]
-; TFCOMMON-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3]]
-; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
-; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
+; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[LD]], i64 0
+; TFCOMMON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; TFCOMMON-NEXT: [[TMP8:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
; TFCOMMON-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true)
; TFCOMMON-NEXT: [[TMP11:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer
@@ -114,14 +113,10 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ]
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ]
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8
-; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]]
-; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
-; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
-; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP6]], i32 1
-; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
-; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3]]
-; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0
-; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP10]], i32 1
+; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i64 0
+; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
+; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call <2 x double> @exp_fixed(<2 x double> [[BROADCAST_SPLAT]])
; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP13]], splat (i1 true)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 36c3a4a2b4e43..9ff4da28c6a5c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -1040,65 +1040,161 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFNONE: [[END]]:
; TFNONE-NEXT: ret void
;
-; TFCOMMON-LABEL: define void @test_widen_exp_v2(
-; TFCOMMON-SAME: ptr noalias [[P2:%.*]], ptr noalias [[P:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
-; TFCOMMO...
[truncated]
|
e6c7094
to
a15f966
Compare
Fold LoopVectorizationCostModel::isPredicatedInst into LoopVectorizationLegality::isMaskRequired, fixing a pending TODO item. Note that we still need to pass whether or not we're tail-folding by masking from the cost-model into isMaskRequired.
a15f966
to
d46f279
Compare
Gentle ping. |
Fold LoopVectorizationCostModel::isPredicatedInst into LoopVectorizationLegality::isMaskRequired, fixing a pending TODO item. Note that we still need to pass whether or not we're tail-folding by masking from the cost-model into isMaskRequired.