-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[LAA] Rewrite findForkedPointer, fixing freeze #140298
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
findForkedPointer is a mysteriously named function whose result is assigned another mysteriously named variable, TranslatedPtrs. Throughly rewrite logic surrounding this, replacing findForkedPointer with getRTCheckPtrs, and simplifying createCheckForAccess. The clean rewrite has fixed two issues: first, the AddRec does not need to be affine for runtime-checks to be generated, and second, the NeedsFreeze field of the PointerIntPair was not really being respected, which has been completely redone.
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesfindForkedPointer is a mysteriously named function whose result is assigned another mysteriously named variable, TranslatedPtrs. Throughly rewrite logic surrounding this, replacing findForkedPointer with getRTCheckPtrs, and simplifying createCheckForAccess. The clean rewrite has fixed two issues: first, the AddRec does not need to be affine for runtime-checks to be generated, and second, the NeedsFreeze field of the PointerIntPair was not really being respected, which has been completely redone. Patch is 32.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140298.diff 10 Files Affected:
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index ab407e945bc53..475a46fcddc6d 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -943,6 +943,10 @@ static void findForkedSCEVs(
// regardless of whether it can be used for a forked pointer or not, along
// with an indication of whether it might be a poison or undef value.
const SCEV *Scev = SE->getSCEV(Ptr);
+ if (isa<Argument>(Ptr)) {
+ ScevList.emplace_back(Scev, false);
+ return;
+ }
if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
!isa<Instruction>(Ptr) || Depth == 0) {
ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
@@ -1088,29 +1092,47 @@ static void findForkedSCEVs(
}
}
-static SmallVector<PointerIntPair<const SCEV *, 1, bool>>
-findForkedPointer(PredicatedScalarEvolution &PSE,
- const DenseMap<Value *, const SCEV *> &StridesMap, Value *Ptr,
- const Loop *L) {
- ScalarEvolution *SE = PSE.getSE();
- assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
- SmallVector<PointerIntPair<const SCEV *, 1, bool>> Scevs;
- findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
-
- // For now, we will only accept a forked pointer with two possible SCEVs
- // that are either SCEVAddRecExprs or loop invariant.
- if (Scevs.size() == 2 &&
- (isa<SCEVAddRecExpr>(get<0>(Scevs[0])) ||
- SE->isLoopInvariant(get<0>(Scevs[0]), L)) &&
- (isa<SCEVAddRecExpr>(get<0>(Scevs[1])) ||
- SE->isLoopInvariant(get<0>(Scevs[1]), L))) {
- LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n");
- LLVM_DEBUG(dbgs() << "\t(1) " << *get<0>(Scevs[0]) << "\n");
- LLVM_DEBUG(dbgs() << "\t(2) " << *get<0>(Scevs[1]) << "\n");
- return Scevs;
+/// Given \p ForkedSCEVs corresponding to \p Ptr, get AddRecs from \p Assume and
+/// \p StridesMap when possible, and return a list of pointers that need
+/// runtime-checks, which can then be checked to see if runtime-checks can
+/// really be inserted.
+static iterator_range<PointerIntPair<const SCEV *, 1, bool> *> getRTCheckPtrs(
+ PredicatedScalarEvolution &PSE, Value *Ptr,
+ MutableArrayRef<PointerIntPair<const SCEV *, 1, bool>> ForkedSCEVs,
+ const DenseMap<Value *, const SCEV *> &StridesMap, bool Assume) {
+ for (auto &P : ForkedSCEVs) {
+ auto *AR = dyn_cast<SCEVAddRecExpr>(P.getPointer());
+ if (!AR && Assume)
+ AR = PSE.getAsAddRec(Ptr);
+ if (AR)
+ P.setPointer(AR);
}
- return {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}};
+ // De-duplicate the ForkedSCEVs. If two SCEVs are equal, prefer the SCEV that
+ // doesn't need freeze.
+ auto PtrEq = [](const auto &P, const auto &Q) {
+ return get<0>(P) == get<0>(Q);
+ };
+ auto FreezeLess = [PtrEq](const auto &P, const auto &Q) {
+ return PtrEq(P, Q) && get<1>(P) < get<1>(Q);
+ };
+ stable_sort(ForkedSCEVs, FreezeLess);
+ auto UniqPtrs = make_range(ForkedSCEVs.begin(), unique(ForkedSCEVs, PtrEq));
+
+ if (size(UniqPtrs) == 1) {
+ // If there's only one option for Ptr, look it up now, because assumptions
+ // might have been added to PSE.
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(
+ replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr))) {
+ UniqPtrs.begin()->setPointer(AR);
+ UniqPtrs.begin()->setInt(false);
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n");
+ for (auto [Idx, AR] : enumerate(UniqPtrs))
+ LLVM_DEBUG(dbgs() << "\t(" << Idx << ") " << *AR.getPointer() << "\n");
+ }
+ return UniqPtrs;
}
bool AccessAnalysis::createCheckForAccess(
@@ -1119,42 +1141,25 @@ bool AccessAnalysis::createCheckForAccess(
DenseMap<Value *, unsigned> &DepSetId, Loop *TheLoop,
unsigned &RunningDepId, unsigned ASId, bool Assume) {
Value *Ptr = Access.getPointer();
+ ScalarEvolution *SE = PSE.getSE();
+ assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
- SmallVector<PointerIntPair<const SCEV *, 1, bool>> TranslatedPtrs =
- findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
- assert(!TranslatedPtrs.empty() && "must have some translated pointers");
-
- /// Check whether all pointers can participate in a runtime bounds check. They
- /// must either be invariant or AddRecs. If ShouldCheckWrap is true, they also
- /// must not wrap.
- for (auto &P : TranslatedPtrs) {
- // The bounds for loop-invariant pointer is trivial.
- if (PSE.getSE()->isLoopInvariant(P.getPointer(), TheLoop))
- continue;
-
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(P.getPointer());
- if (!AR && Assume)
- AR = PSE.getAsAddRec(Ptr);
- if (!AR || !AR->isAffine())
- return false;
-
- // If there's only one option for Ptr, look it up after bounds and wrap
- // checking, because assumptions might have been added to PSE.
- if (TranslatedPtrs.size() == 1) {
- AR =
- cast<SCEVAddRecExpr>(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr));
- P.setPointer(AR);
- }
-
- // When we run after a failing dependency check we have to make sure
- // we don't have wrapping pointers.
- if (!isNoWrap(PSE, AR, TranslatedPtrs.size() == 1 ? Ptr : nullptr, AccessTy,
- TheLoop, Assume)) {
- return false;
- }
- }
+ // Find the ForkedSCEVs, and prepare the runtime-check pointers.
+ SmallVector<PointerIntPair<const SCEV *, 1, bool>> ForkedSCEVs;
+ findForkedSCEVs(SE, TheLoop, Ptr, ForkedSCEVs, MaxForkedSCEVDepth);
+ auto RTCheckPtrs = getRTCheckPtrs(PSE, Ptr, ForkedSCEVs, StridesMap, Assume);
+
+ /// Check whether all pointers can participate in a runtime bounds check: they
+ /// must either be loop-invariant, or an affine AddRec that does not wrap.
+ if (!all_of(RTCheckPtrs, [&](const auto &P) {
+ auto *AR = dyn_cast<SCEVAddRecExpr>(P.getPointer());
+ return PSE.getSE()->isLoopInvariant(P.getPointer(), TheLoop) ||
+ (AR && isNoWrap(PSE, AR, size(RTCheckPtrs) == 1 ? Ptr : nullptr,
+ AccessTy, TheLoop, Assume));
+ }))
+ return false;
- for (auto [PtrExpr, NeedsFreeze] : TranslatedPtrs) {
+ for (auto [PtrExpr, NeedsFreeze] : RTCheckPtrs) {
// The id of the dependence set.
unsigned DepId;
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
index ae10ab841420f..63dfffd81c69d 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
@@ -29,16 +29,16 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group
-; CHECK-NEXT: %arrayidx4 = getelementptr inbounds i32, ptr %b, i64 %conv11
-; CHECK-NEXT: Against group
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
+; CHECK-NEXT: Against group
+; CHECK-NEXT: %arrayidx4 = getelementptr inbounds i32, ptr %b, i64 %conv11
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group
-; CHECK-NEXT: (Low: %b High: ((4 * (1 umax %x)) + %b))
-; CHECK-NEXT: Member: {%b,+,4}<%for.body>
-; CHECK-NEXT: Group
; CHECK-NEXT: (Low: (4 + %a) High: (4 + (4 * (1 umax %x)) + %a))
; CHECK-NEXT: Member: {(4 + %a),+,4}<%for.body>
+; CHECK-NEXT: Group
+; CHECK-NEXT: (Low: %b High: ((4 * (1 umax %x)) + %b))
+; CHECK-NEXT: Member: {%b,+,4}<%for.body>
; CHECK: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
diff --git a/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll b/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll
index ee42860cd250e..972a0b653819b 100644
--- a/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll
+++ b/llvm/test/Transforms/LoopDistribute/laa-invalidation.ll
@@ -13,9 +13,11 @@ define void @test_pr50940(ptr %A, ptr %B) {
; CHECK-NEXT: br label [[INNER_LVER_CHECK:%.*]]
; CHECK: inner.lver.check:
; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[SCEVGEP_FR:%.*]] = freeze ptr [[UGLYGEP]]
+; CHECK-NEXT: [[SCEVGEP1_FR:%.*]] = freeze ptr [[UGLYGEP1]]
; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 2
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[UGLYGEP]], [[UGLYGEP2]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[UGLYGEP1]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP_FR]], [[UGLYGEP2]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1_FR]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[INNER_PH3_LVER_ORIG:%.*]], label [[INNER_PH3_LDIST1:%.*]]
; CHECK: inner.ph3.lver.orig:
@@ -32,8 +34,8 @@ define void @test_pr50940(ptr %A, ptr %B) {
; CHECK-NEXT: br label [[INNER_LDIST1:%.*]]
; CHECK: inner.ldist1:
; CHECK-NEXT: [[IV_LDIST1:%.*]] = phi i16 [ 0, [[INNER_PH3_LDIST1]] ], [ [[IV_NEXT_LDIST1:%.*]], [[INNER_LDIST1]] ]
-; CHECK-NEXT: [[L_LDIST1:%.*]] = load <2 x i16>, ptr [[UGLYGEP]], align 1, !alias.scope !0, !noalias !3
-; CHECK-NEXT: store i16 0, ptr [[GEP_A_3]], align 1, !alias.scope !0, !noalias !3
+; CHECK-NEXT: [[L_LDIST1:%.*]] = load <2 x i16>, ptr [[UGLYGEP]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
+; CHECK-NEXT: store i16 0, ptr [[GEP_A_3]], align 1, !alias.scope [[META0]], !noalias [[META3]]
; CHECK-NEXT: [[IV_NEXT_LDIST1]] = add nuw nsw i16 [[IV_LDIST1]], 1
; CHECK-NEXT: [[C_1_LDIST1:%.*]] = icmp ult i16 [[IV_LDIST1]], 38
; CHECK-NEXT: br i1 [[C_1_LDIST1]], label [[INNER_LDIST1]], label [[INNER_PH3:%.*]]
@@ -41,7 +43,7 @@ define void @test_pr50940(ptr %A, ptr %B) {
; CHECK-NEXT: br label [[INNER:%.*]]
; CHECK: inner:
; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, [[INNER_PH3]] ], [ [[IV_NEXT:%.*]], [[INNER]] ]
-; CHECK-NEXT: store i16 1, ptr [[B]], align 1, !alias.scope !3
+; CHECK-NEXT: store i16 1, ptr [[B]], align 1, !alias.scope [[META3]]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i16 [[IV]], 1
; CHECK-NEXT: [[C_1:%.*]] = icmp ult i16 [[IV]], 38
; CHECK-NEXT: br i1 [[C_1]], label [[INNER]], label [[EXIT_LOOPEXIT4:%.*]]
diff --git a/llvm/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll b/llvm/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll
index 208648b9ec20d..2d2d6e4fad81b 100644
--- a/llvm/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll
+++ b/llvm/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll
@@ -1,31 +1,38 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt -passes=loop-distribute -enable-loop-distribute -verify-loop-info -verify-dom-info -S < %s | FileCheck %s
-; If we can't find the bounds for one of the arrays in order to generate the
-; memchecks (e.g., C[i * i] below), loop shold not get distributed.
-;
-; for (i = 0; i < n; i++) {
-; A[i + 1] = A[i] * 3;
-; -------------------------------
-; C[i * i] = B[i] * 2;
-; }
-
-; Verify that we didn't distribute by checking that we still have the original
-; number of branches.
-
@A = common global ptr null, align 8
@B = common global ptr null, align 8
@C = common global ptr null, align 8
define void @f() {
; CHECK-LABEL: define void @f() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[A:%.*]] = load ptr, ptr @A, align 8
; CHECK-NEXT: [[B:%.*]] = load ptr, ptr @B, align 8
; CHECK-NEXT: [[C:%.*]] = load ptr, ptr @C, align 8
-; CHECK-NEXT: br label %[[FOR_BODY:.*]]
-; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY_LVER_CHECK:.*]]
+; CHECK: [[FOR_BODY_LVER_CHECK]]:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 84
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[C]], i64 1444
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult ptr [[C]], [[SCEVGEP1]]
+; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP0]], ptr [[C]], ptr [[SCEVGEP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt ptr [[C]], [[SCEVGEP1]]
+; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], ptr [[C]], ptr [[SCEVGEP1]]
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[UMAX]], i64 4
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[B]], i64 80
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP2]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[UMIN]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[A]], [[SCEVGEP3]]
+; CHECK-NEXT: [[BOUND15:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]]
+; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[FOR_BODY_PH_LVER_ORIG:.*]], label %[[FOR_BODY_PH_LDIST1:.*]]
+; CHECK: [[FOR_BODY_PH_LVER_ORIG]]:
+; CHECK-NEXT: br label %[[FOR_BODY_LVER_ORIG:.*]]
+; CHECK: [[FOR_BODY_LVER_ORIG]]:
+; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, %[[FOR_BODY_PH_LVER_ORIG]] ], [ [[ADD:%.*]], %[[FOR_BODY_LVER_ORIG]] ]
; CHECK-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IND]]
; CHECK-NEXT: [[LOADA:%.*]] = load i32, ptr [[ARRAYIDXA]], align 4
; CHECK-NEXT: [[MULA:%.*]] = mul i32 [[LOADA]], 3
@@ -39,7 +46,36 @@ define void @f() {
; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IND_2]]
; CHECK-NEXT: store i32 [[MULC]], ptr [[ARRAYIDXC]], align 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], 20
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY_LVER_ORIG]]
+; CHECK: [[FOR_BODY_PH_LDIST1]]:
+; CHECK-NEXT: br label %[[FOR_BODY_LDIST1:.*]]
+; CHECK: [[FOR_BODY_LDIST1]]:
+; CHECK-NEXT: [[IND_LDIST1:%.*]] = phi i64 [ 0, %[[FOR_BODY_PH_LDIST1]] ], [ [[ADD_LDIST1:%.*]], %[[FOR_BODY_LDIST1]] ]
+; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IND_LDIST1]]
+; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
+; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADA_LDIST1]], 3
+; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1
+; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LDIST1]]
+; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope [[META0]], !noalias [[META3]]
+; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], 20
+; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label %[[FOR_BODY_PH:.*]], label %[[FOR_BODY_LDIST1]]
+; CHECK: [[FOR_BODY_PH]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IND1:%.*]] = phi i64 [ 0, %[[FOR_BODY_PH]] ], [ [[ADD1:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ADD1]] = add nuw nsw i64 [[IND1]], 1
+; CHECK-NEXT: [[ARRAYIDXB1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IND1]]
+; CHECK-NEXT: [[LOADB1:%.*]] = load i32, ptr [[ARRAYIDXB1]], align 4, !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT: [[MULC1:%.*]] = mul i32 [[LOADB1]], 2
+; CHECK-NEXT: [[IND_3:%.*]] = mul i64 [[IND1]], [[IND1]]
+; CHECK-NEXT: [[ARRAYIDXC1:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IND_3]]
+; CHECK-NEXT: store i32 [[MULC1]], ptr [[ARRAYIDXC1]], align 4, !alias.scope [[META7:![0-9]+]]
+; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i64 [[ADD1]], 20
+; CHECK-NEXT: br i1 [[EXITCOND1]], label %[[FOR_END_LOOPEXIT7:.*]], label %[[FOR_BODY]]
+; CHECK: [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT: br label %[[FOR_END:.*]]
+; CHECK: [[FOR_END_LOOPEXIT7]]:
+; CHECK-NEXT: br label %[[FOR_END]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll
index 4d50a814b621d..f7deb2934fc1e 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll
@@ -16,8 +16,10 @@ define void @test1_select_invariant(ptr %src.1, ptr %src.2, ptr %dst, i1 %c, i8
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[PTR_SEL]], i64 1
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR_SEL]], [[UGLYGEP]]
+; CHECK-NEXT: [[PTR_SEL_FR:%.*]] = freeze ptr [[PTR_SEL]]
+; CHECK-NEXT: [[SCEVGEP1_FR:%.*]] = freeze ptr [[UGLYGEP1]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1_FR]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR_SEL_FR]], [[UGLYGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
@@ -169,16 +171,12 @@ define void @test_loop_dependent_select2(ptr %src.1, ptr %src.2, ptr %dst, i8 %n
; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[SRC_1:%.*]], i64 1
-; CHECK-NEXT: [[SRC_1_FR:%.*]] = freeze ptr [[SRC_1]]
-; CHECK-NEXT: [[UGLYGEP1_FR:%.*]] = freeze ptr [[UGLYGEP1]]
; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 1
-; CHECK-NEXT: [[SRC_2_FR:%.*]] = freeze ptr [[SRC_2]]
-; CHECK-NEXT: [[UGLYGEP2_FR:%.*]] = freeze ptr [[UGLYGEP2]]
-; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1_FR]]
-; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC_1_FR]], [[UGLYGEP]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC_1]], [[UGLYGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP2_FR]]
-; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC_2_FR]], [[UGLYGEP]]
+; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP2]]
+; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC_2]], [[UGLYGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
@@ -256,13 +254,11 @@ define void @test_loop_dependent_select_first_ptr_noundef(ptr noundef %src.1, pt
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[SRC_1:%.*]], i64 1
; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[SRC_2:%.*]], i64 1
-; CHECK-NEXT: [[SRC_2_FR:%.*]] = freeze ptr [[SRC_2]]
-; CHECK-NEXT: [[UGLYGEP2_FR:%.*]] = freeze ptr [[UGLYGEP2]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SRC_1]], [[UGLYGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP2_FR]]
-; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC_2_FR]], [[UGLYGEP]]
+; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[UGLYGEP2]]
+; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC_2]], [[UGLYGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and ...
[truncated]
|
findForkedPointer is a mysteriously named function whose result is assigned another mysteriously named variable, TranslatedPtrs. Throughly rewrite logic surrounding this, replacing findForkedPointer with getRTCheckPtrs, and simplifying createCheckForAccess. The clean rewrite has fixed an issue: the NeedsFreeze field of the PointerIntPair was not really being respected, and this has been completely redone.