diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 3dbd605e19c3a..9b71005dff9fd 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3303,6 +3303,12 @@ static void combineMetadata(Instruction *K, const Instruction *J, bool DoesKMove, bool AAOnly = false) { SmallVector, 4> Metadata; K->getAllMetadataOtherThanDebugLoc(Metadata); + + const unsigned AMDGPUMD[] = { + K->getContext().getMDKindID("amdgpu.no.fine.grained.memory"), + K->getContext().getMDKindID("amdgpu.no.remote.memory"), + K->getContext().getMDKindID("amdgpu.ignore.denormal.mode")}; + for (const auto &MD : Metadata) { unsigned Kind = MD.first; MDNode *JMD = J->getMetadata(Kind); @@ -3311,7 +3317,10 @@ static void combineMetadata(Instruction *K, const Instruction *J, // TODO: Assert that this switch is exhaustive for fixed MD kinds. switch (Kind) { default: - K->setMetadata(Kind, nullptr); // Remove unknown metadata + if (K->isAtomic() && (find(AMDGPUMD, Kind) != std::cend(AMDGPUMD))) + break; // Preserve AMDGPU atomic metadata. + else + K->setMetadata(Kind, nullptr); // Remove unknown metadata break; case LLVMContext::MD_dbg: llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg"); diff --git a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll new file mode 100644 index 0000000000000..1cd574e714b43 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +;; Test to ensure that AMDGPU atomic related metadata is not dropped when +;; instructions are sunk. Currently the metadata from the first instruction +;; is kept, which prevents full loss of optimisation information. + +; RUN: opt < %s -passes=simplifycfg -passes=simplifycfg -sink-common-insts -S | FileCheck %s + +define amdgpu_kernel void @f(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) local_unnamed_addr { +; CHECK-LABEL: define amdgpu_kernel void @f( +; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[P_GLOBAL:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1) +; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]] +; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_END_SINK_SPLIT]]: +; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(1) [[P_GLOBAL]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]] +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: ret void +; +entry: + %p.global = addrspacecast ptr %p to ptr addrspace(1) + br i1 %pred0, label %for.body, label %for.body1 + +for.body: + %0 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 + br label %if.end + +for.body1: + br i1 %pred1, label %if.then, label %if.end + +if.then: + %1 = atomicrmw fadd ptr addrspace(1) %p.global, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 + br label %if.end + +if.end: + ret void +} + +!0 = !{!"float", !1, i64 0} +!1 = !{!"omnipotent char", !2, i64 0} +!2 = !{!"Simple C++ TBAA"}