diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index a0618726ac0ac..ad830e2a33a67 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3303,6 +3303,13 @@ static void combineMetadata(Instruction *K, const Instruction *J, bool DoesKMove, bool AAOnly = false) { SmallVector, 4> Metadata; K->getAllMetadataOtherThanDebugLoc(Metadata); + + const auto IsAMDGPUMD = [=](unsigned Kind) { + return Kind == + K->getContext().getMDKindID("amdgpu.no.fine.grained.memory") || + Kind == K->getContext().getMDKindID("amdgpu.no.remote.memory") || + Kind == K->getContext().getMDKindID("amdgpu.ignore.denormal.mode"); + }; for (const auto &MD : Metadata) { unsigned Kind = MD.first; MDNode *JMD = J->getMetadata(Kind); @@ -3311,7 +3318,10 @@ static void combineMetadata(Instruction *K, const Instruction *J, // TODO: Assert that this switch is exhaustive for fixed MD kinds. switch (Kind) { default: - K->setMetadata(Kind, nullptr); // Remove unknown metadata + if (K->isAtomic() && IsAMDGPUMD(Kind)) + K->setMetadata(Kind, MDNode::intersect(JMD, KMD)); + else + K->setMetadata(Kind, nullptr); // Remove unknown metadata break; case LLVMContext::MD_dbg: llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg"); diff --git a/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll new file mode 100644 index 0000000000000..56f188146c246 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-amdgpu-atomic-md.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +;; Test to ensure that AMDGPU atomic related metadata is not dropped when +;; instructions are sunk. Currently the metadata from the first instruction +;; is kept, which prevents full loss of optimisation information. + +; RUN: opt < %s -passes=simplifycfg -sink-common-insts -S | FileCheck %s + +define void @both(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) { +; CHECK-LABEL: define void @both( +; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]] +; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_END_SINK_SPLIT]]: +; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], double [[D]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]] +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: ret void +; +entry: + br i1 %pred0, label %for.body, label %for.body1 + +for.body: + %0 = atomicrmw fadd ptr %p, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 + br label %if.end + +for.body1: + br i1 %pred1, label %if.then, label %if.end + +if.then: + %1 = atomicrmw fadd ptr %p, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 + br label %if.end + +if.end: + ret void +} + +define void @from(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) { +; CHECK-LABEL: define void @from( +; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]] +; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_END_SINK_SPLIT]]: +; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], double [[D]] monotonic, align 8 +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: ret void +; +entry: + br i1 %pred0, label %for.body, label %for.body1 + +for.body: + %0 = atomicrmw fadd ptr %p, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 + br label %if.end + +for.body1: + br i1 %pred1, label %if.then, label %if.end + +if.then: + %1 = atomicrmw fadd ptr %p, double %d monotonic, align 8 + br label %if.end + +if.end: + ret void +} + +define void @to(i1 %pred0, i1 %pred1, ptr captures(none) %p, double %d) { +; CHECK-LABEL: define void @to( +; CHECK-SAME: i1 [[PRED0:%.*]], i1 [[PRED1:%.*]], ptr captures(none) [[P:%.*]], double [[D:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[PRED0]], i1 true, i1 [[PRED1]] +; CHECK-NEXT: br i1 [[BRMERGE]], label %[[IF_END_SINK_SPLIT:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_END_SINK_SPLIT]]: +; CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], double [[D]] monotonic, align 8 +; CHECK-NEXT: br label %[[IF_END]] +; CHECK: [[IF_END]]: +; CHECK-NEXT: ret void +; +entry: + br i1 %pred0, label %for.body, label %for.body1 + +for.body: + %0 = atomicrmw fadd ptr %p, double %d monotonic, align 8 + br label %if.end + +for.body1: + br i1 %pred1, label %if.then, label %if.end + +if.then: + %1 = atomicrmw fadd ptr %p, double %d monotonic, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 + br label %if.end + +if.end: + ret void +} + +!0 = !{} +;. +; CHECK: [[META0]] = !{} +;.