diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index f715e0ec8dbb4..b6fc2927e4abe 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -393,8 +393,6 @@ class MemoryDepChecker { uint64_t MaxStride; std::optional CommonStride; - bool ShouldRetryWithRuntimeCheck; - /// TypeByteSize is either the common store size of both accesses, or 0 when /// store sizes mismatch. uint64_t TypeByteSize; @@ -404,11 +402,9 @@ class MemoryDepChecker { DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride, std::optional CommonStride, - bool ShouldRetryWithRuntimeCheck, uint64_t TypeByteSize, bool AIsWrite, bool BIsWrite) : Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride), - ShouldRetryWithRuntimeCheck(ShouldRetryWithRuntimeCheck), TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} }; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 438669df51f89..fb2752bde03a4 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -2014,14 +2014,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( if (StrideAScaled == StrideBScaled) CommonStride = StrideAScaled; - // TODO: Historically, we don't retry with runtime checks unless the - // (unscaled) strides are the same. Fix this once the condition for runtime - // checks in isDependent is fixed. - bool ShouldRetryWithRuntimeCheck = StrideAPtrInt == StrideBPtrInt; + // TODO: FoundNonConstantDistanceDependence is used as a necessary condition + // to consider retrying with runtime checks. Historically, we did not set it + // when (unscaled) strides were different but there is no inherent reason to. + if (!isa(Dist)) + FoundNonConstantDistanceDependence |= StrideAPtrInt == StrideBPtrInt; return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride, - ShouldRetryWithRuntimeCheck, TypeByteSize, - AIsWrite, BIsWrite); + TypeByteSize, AIsWrite, BIsWrite); } MemoryDepChecker::Dependence::DepType @@ -2036,15 +2036,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, if (std::holds_alternative(Res)) return std::get(Res); - auto &[Dist, MaxStride, CommonStride, ShouldRetryWithRuntimeCheck, - TypeByteSize, AIsWrite, BIsWrite] = + auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] = std::get(Res); bool HasSameSize = TypeByteSize > 0; if (isa(Dist)) { - // TODO: Relax requirement that there is a common unscaled stride to retry - // with non-constant distance dependencies. - FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n"); return Dependence::Unknown; } @@ -2103,14 +2099,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, // forward dependency will allow vectorization using any width. if (IsTrueDataDependence && EnableForwardingConflictDetection) { - if (!ConstDist) { - // TODO: FoundNonConstantDistanceDependence is used as a necessary - // condition to consider retrying with runtime checks. Historically, we - // did not set it when strides were different but there is no inherent - // reason to. - FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; + if (!ConstDist) return Dependence::Unknown; - } if (!HasSameSize || couldPreventStoreLoadForward( ConstDist->abs().getZExtValue(), TypeByteSize)) { LLVM_DEBUG( @@ -2125,22 +2115,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue(); // Below we only handle strictly positive distances. - if (MinDistance <= 0) { - FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; + if (MinDistance <= 0) return Dependence::Unknown; - } - - if (!ConstDist) { - // Previously this case would be treated as Unknown, possibly setting - // FoundNonConstantDistanceDependence to force re-trying with runtime - // checks. Until the TODO below is addressed, set it here to preserve - // original behavior w.r.t. re-trying with runtime checks. - // TODO: FoundNonConstantDistanceDependence is used as a necessary - // condition to consider retrying with runtime checks. Historically, we - // did not set it when strides were different but there is no inherent - // reason to. - FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck; - } if (!HasSameSize) { LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with " diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll index 114637ea3bf06..eea4a4a6468b0 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll @@ -320,28 +320,34 @@ exit: define void @retry_after_dep_check_with_unknown_offset(ptr %A, i32 %offset) { ; CHECK-LABEL: 'retry_after_dep_check_with_unknown_offset' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe with run-time checks ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %l.A = load float, ptr %A, align 4 -> -; CHECK-NEXT: store float 0.000000e+00, ptr %A.100.iv.offset.3, align 4 -; CHECK-EMPTY: -; CHECK-NEXT: Unknown: -; CHECK-NEXT: %l.A = load float, ptr %A, align 4 -> -; CHECK-NEXT: store float %l.A, ptr %A.100.iv, align 8 -; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GRP16:0x[0-9a-f]+]]): +; CHECK-NEXT: %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv +; CHECK-NEXT: Against group ([[GRP17:0x[0-9a-f]+]]): +; CHECK-NEXT: %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3 +; CHECK-NEXT: Check 1: +; CHECK-NEXT: Comparing group ([[GRP16]]): +; CHECK-NEXT: %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv +; CHECK-NEXT: Against group ([[GRP18:0x[0-9a-f]+]]): +; CHECK-NEXT: ptr %A +; CHECK-NEXT: Check 2: +; CHECK-NEXT: Comparing group ([[GRP17]]): +; CHECK-NEXT: %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3 +; CHECK-NEXT: Against group ([[GRP18]]): +; CHECK-NEXT: ptr %A ; CHECK-NEXT: Grouped accesses: -; CHECK-NEXT: Group [[GRP16:0x[0-9a-f]+]]: -; CHECK-NEXT: (Low: %A High: (4 + %A)) -; CHECK-NEXT: Member: %A -; CHECK-NEXT: Group [[GRP17:0x[0-9a-f]+]]: -; CHECK-NEXT: (Low: (100 + (8 * (zext i32 %offset to i64)) + %A) High: (96 + (16 * (zext i32 %offset to i64)) + %A)) -; CHECK-NEXT: Member: {(100 + (8 * (zext i32 %offset to i64)) + %A),+,8}<%loop> -; CHECK-NEXT: Group [[GRP18:0x[0-9a-f]+]]: +; CHECK-NEXT: Group [[GRP16]]: ; CHECK-NEXT: (Low: (100 + %A) High: (96 + (8 * (zext i32 %offset to i64)) + %A)) ; CHECK-NEXT: Member: {(100 + %A),+,8}<%loop> +; CHECK-NEXT: Group [[GRP17]]: +; CHECK-NEXT: (Low: (100 + (8 * (zext i32 %offset to i64)) + %A) High: (96 + (16 * (zext i32 %offset to i64)) + %A)) +; CHECK-NEXT: Member: {(100 + (8 * (zext i32 %offset to i64)) + %A),+,8}<%loop> +; CHECK-NEXT: Group [[GRP18]]: +; CHECK-NEXT: (Low: %A High: (4 + %A)) +; CHECK-NEXT: Member: %A ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: