Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 8c01d93

Browse filesBrowse files
committed
AMDGPU: Start considering new atomicrmw metadata on integer operations
Start considering !amdgpu.no.remote.memory.access and !amdgpu.no.fine.grained.host.memory metadata when deciding to expand integer atomic operations. This does not yet attempt to accurately handle fadd/fmin/fmax, which are trickier and require migrating the old "amdgpu-unsafe-fp-atomics" attribute.
1 parent 493d7e3 commit 8c01d93
Copy full SHA for 8c01d93
Expand file treeCollapse file tree

28 files changed

+11173
-9985
lines changed

‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+53-12Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16784,19 +16784,60 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1678416784
case AtomicRMWInst::UDecWrap: {
1678516785
if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
1678616786
AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16787-
// Always expand system scope atomics.
16788-
if (HasSystemScope) {
16789-
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
16790-
Op == AtomicRMWInst::Xor) {
16791-
// Atomic sub/or/xor do not work over PCI express, but atomic add
16792-
// does. InstCombine transforms these with 0 to or, so undo that.
16793-
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16794-
ConstVal && ConstVal->isNullValue())
16795-
return AtomicExpansionKind::Expand;
16796-
}
16797-
16798-
return AtomicExpansionKind::CmpXChg;
16787+
// On most subtargets, for atomicrmw operations other than add/xchg,
16788+
// whether or not the instructions will behave correctly depends on where
16789+
// the address physically resides and what interconnect is used in the
16790+
// system configuration. On some some targets the instruction will nop,
16791+
// and in others synchronization will only occur at degraded device scope.
16792+
//
16793+
// If the allocation is known local to the device, the instructions should
16794+
// work correctly.
16795+
if (RMW->hasMetadata("amdgpu.no.remote.memory"))
16796+
return atomicSupportedIfLegalIntType(RMW);
16797+
16798+
// If fine-grained remote memory works at device scope, we don't need to
16799+
// do anything.
16800+
if (!HasSystemScope &&
16801+
Subtarget->supportsAgentScopeFineGrainedRemoteMemoryAtomics())
16802+
return atomicSupportedIfLegalIntType(RMW);
16803+
16804+
// If we are targeting a remote allocated address, it depends what kind of
16805+
// allocation the address belongs to.
16806+
//
16807+
// If the allocation is fine-grained (in host memory, or in PCIe peer
16808+
// device memory), the operation will fail depending on the target.
16809+
//
16810+
// Note fine-grained host memory access does work on APUs or if XGMI is
16811+
// used, but we do not know if we are targeting an APU or the system
16812+
// configuration from the ISA version/target-cpu.
16813+
if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
16814+
return atomicSupportedIfLegalIntType(RMW);
16815+
16816+
if (Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Or ||
16817+
Op == AtomicRMWInst::Xor) {
16818+
// Atomic sub/or/xor do not work over PCI express, but atomic add
16819+
// does. InstCombine transforms these with 0 to or, so undo that.
16820+
if (Constant *ConstVal = dyn_cast<Constant>(RMW->getValOperand());
16821+
ConstVal && ConstVal->isNullValue())
16822+
return AtomicExpansionKind::Expand;
1679916823
}
16824+
16825+
// If the allocation could be in remote, fine-grained memory, the rmw
16826+
// instructions may fail. cmpxchg should work, so emit that. On some
16827+
// system configurations, PCIe atomics aren't supported so cmpxchg won't
16828+
// even work, so you're out of luck anyway.
16829+
16830+
// In summary:
16831+
//
16832+
// Cases that may fail:
16833+
// - fine-grained pinned host memory
16834+
// - fine-grained migratable host memory
16835+
// - fine-grained PCIe peer device
16836+
//
16837+
// Cases that should work, but may be treated overly conservatively.
16838+
// - fine-grained host memory on an APU
16839+
// - fine-grained XGMI peer device
16840+
return AtomicExpansionKind::CmpXChg;
1680016841
}
1680116842

1680216843
return atomicSupportedIfLegalIntType(RMW);

‎llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
+31-30Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr add
8585
; GFX11-NEXT: v_mov_b32_e32 v1, 0
8686
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
8787
; GFX11-NEXT: s_endpgm
88-
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4
88+
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
8989
store i32 %result, ptr addrspace(1) %out, align 4
9090
ret void
9191
}
@@ -350,7 +350,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr
350350
; GFX11-NEXT: buffer_gl0_inv
351351
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
352352
; GFX11-NEXT: s_endpgm
353-
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4
353+
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
354354
store i32 %result, ptr addrspace(1) %out, align 4
355355
ret void
356356
}
@@ -427,7 +427,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %ou
427427
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
428428
; GFX11-NEXT: s_endpgm
429429
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
430-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
430+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
431431
store i32 %result, ptr addrspace(1) %out, align 4
432432
ret void
433433
}
@@ -656,7 +656,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32(ptr addrspace(1) %ptr) #1
656656
; GFX11-NEXT: buffer_gl1_inv
657657
; GFX11-NEXT: buffer_gl0_inv
658658
; GFX11-NEXT: s_endpgm
659-
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4
659+
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
660660
ret void
661661
}
662662

@@ -723,7 +723,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) %
723723
; GFX11-NEXT: buffer_gl0_inv
724724
; GFX11-NEXT: s_endpgm
725725
%gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
726-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
726+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
727727
ret void
728728
}
729729

@@ -962,7 +962,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace
962962
%gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
963963
%out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id
964964
%gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
965-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
965+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
966966
store i32 %result, ptr addrspace(1) %out.gep, align 4
967967
ret void
968968
}
@@ -1040,7 +1040,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspa
10401040
%id = call i32 @llvm.amdgcn.workitem.id.x()
10411041
%gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
10421042
%gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
1043-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4
1043+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
10441044
ret void
10451045
}
10461046

@@ -1119,7 +1119,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #1 {
11191119
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
11201120
; GFX11-NEXT: flat_store_b32 v[0:1], v2
11211121
; GFX11-NEXT: s_endpgm
1122-
%result = atomicrmw udec_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4
1122+
%result = atomicrmw udec_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
11231123
store i32 %result, ptr %out, align 4
11241124
ret void
11251125
}
@@ -1206,7 +1206,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1
12061206
; GFX11-NEXT: flat_store_b32 v[0:1], v2
12071207
; GFX11-NEXT: s_endpgm
12081208
%gep = getelementptr i32, ptr %ptr, i32 4
1209-
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
1209+
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
12101210
store i32 %result, ptr %out, align 4
12111211
ret void
12121212
}
@@ -1442,7 +1442,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) #1 {
14421442
; GFX11-NEXT: buffer_gl1_inv
14431443
; GFX11-NEXT: buffer_gl0_inv
14441444
; GFX11-NEXT: s_endpgm
1445-
%result = atomicrmw udec_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4
1445+
%result = atomicrmw udec_wrap ptr %ptr, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
14461446
ret void
14471447
}
14481448

@@ -1516,7 +1516,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
15161516
; GFX11-NEXT: buffer_gl0_inv
15171517
; GFX11-NEXT: s_endpgm
15181518
%gep = getelementptr i32, ptr %ptr, i32 4
1519-
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
1519+
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
15201520
ret void
15211521
}
15221522

@@ -1780,7 +1780,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %
17801780
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
17811781
%out.gep = getelementptr i32, ptr %out, i32 %id
17821782
%gep = getelementptr i32, ptr %gep.tid, i32 5
1783-
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
1783+
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
17841784
store i32 %result, ptr %out.gep, align 4
17851785
ret void
17861786
}
@@ -1875,7 +1875,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #1
18751875
%id = call i32 @llvm.amdgcn.workitem.id.x()
18761876
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
18771877
%gep = getelementptr i32, ptr %gep.tid, i32 5
1878-
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
1878+
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
18791879
ret void
18801880
}
18811881

@@ -1969,7 +1969,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #1 {
19691969
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
19701970
; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
19711971
; GFX11-NEXT: s_endpgm
1972-
%result = atomicrmw udec_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
1972+
%result = atomicrmw udec_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
19731973
store i64 %result, ptr %out, align 4
19741974
ret void
19751975
}
@@ -2071,7 +2071,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #1
20712071
; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
20722072
; GFX11-NEXT: s_endpgm
20732073
%gep = getelementptr i64, ptr %ptr, i32 4
2074-
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
2074+
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
20752075
store i64 %result, ptr %out, align 4
20762076
ret void
20772077
}
@@ -2144,7 +2144,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64(ptr %ptr) #1 {
21442144
; GFX11-NEXT: buffer_gl1_inv
21452145
; GFX11-NEXT: buffer_gl0_inv
21462146
; GFX11-NEXT: s_endpgm
2147-
%result = atomicrmw udec_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
2147+
%result = atomicrmw udec_wrap ptr %ptr, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
21482148
ret void
21492149
}
21502150

@@ -2223,7 +2223,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) #1 {
22232223
; GFX11-NEXT: buffer_gl0_inv
22242224
; GFX11-NEXT: s_endpgm
22252225
%gep = getelementptr i64, ptr %ptr, i32 4
2226-
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
2226+
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
22272227
ret void
22282228
}
22292229

@@ -2536,7 +2536,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %
25362536
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
25372537
%out.gep = getelementptr i64, ptr %out, i32 %id
25382538
%gep = getelementptr i64, ptr %gep.tid, i32 5
2539-
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
2539+
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
25402540
store i64 %result, ptr %out.gep, align 4
25412541
ret void
25422542
}
@@ -2635,7 +2635,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #1
26352635
%id = call i32 @llvm.amdgcn.workitem.id.x()
26362636
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
26372637
%gep = getelementptr i64, ptr %gep.tid, i32 5
2638-
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
2638+
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
26392639
ret void
26402640
}
26412641

@@ -2724,7 +2724,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr
27242724
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
27252725
%idx.0 = add nsw i32 %tid.x, 2
27262726
%arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
2727-
%result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i32 9 syncscope("agent") seq_cst, align 4
2727+
%result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i32 9 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !1
27282728
store i32 %idx.0, ptr addrspace(1) %add_use, align 4
27292729
store i32 %result, ptr addrspace(1) %out, align 4
27302730
ret void
@@ -2807,7 +2807,7 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr add
28072807
; GFX11-NEXT: v_mov_b32_e32 v2, 0
28082808
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
28092809
; GFX11-NEXT: s_endpgm
2810-
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8
2810+
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
28112811
store i64 %result, ptr addrspace(1) %out, align 4
28122812
ret void
28132813
}
@@ -2953,7 +2953,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64(ptr addrspace(3) %ptr) #1 {
29532953
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
29542954
; GFX11-NEXT: buffer_gl0_inv
29552955
; GFX11-NEXT: s_endpgm
2956-
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8
2956+
%result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
29572957
ret void
29582958
}
29592959

@@ -3016,7 +3016,7 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(ptr addrspace(3) %ptr
30163016
; GFX11-NEXT: buffer_gl0_inv
30173017
; GFX11-NEXT: s_endpgm
30183018
%gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
3019-
%result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 42 syncscope("agent") seq_cst, align 8
3019+
%result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
30203020
ret void
30213021
}
30223022

@@ -3092,7 +3092,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr
30923092
; GFX11-NEXT: buffer_gl0_inv
30933093
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
30943094
; GFX11-NEXT: s_endpgm
3095-
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8
3095+
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
30963096
store i64 %result, ptr addrspace(1) %out, align 4
30973097
ret void
30983098
}
@@ -3174,7 +3174,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %ou
31743174
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
31753175
; GFX11-NEXT: s_endpgm
31763176
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
3177-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
3177+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
31783178
store i64 %result, ptr addrspace(1) %out, align 4
31793179
ret void
31803180
}
@@ -3440,7 +3440,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64(ptr addrspace(1) %ptr) #1
34403440
; GFX11-NEXT: buffer_gl1_inv
34413441
; GFX11-NEXT: buffer_gl0_inv
34423442
; GFX11-NEXT: s_endpgm
3443-
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8
3443+
%result = atomicrmw udec_wrap ptr addrspace(1) %ptr, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
34443444
ret void
34453445
}
34463446

@@ -3512,7 +3512,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) %
35123512
; GFX11-NEXT: buffer_gl0_inv
35133513
; GFX11-NEXT: s_endpgm
35143514
%gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
3515-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
3515+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
35163516
ret void
35173517
}
35183518

@@ -3788,7 +3788,7 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace
37883788
%gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
37893789
%out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id
37903790
%gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
3791-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
3791+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
37923792
store i64 %result, ptr addrspace(1) %out.gep, align 4
37933793
ret void
37943794
}
@@ -3871,7 +3871,7 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspa
38713871
%id = call i32 @llvm.amdgcn.workitem.id.x()
38723872
%gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
38733873
%gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
3874-
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8
3874+
%result = atomicrmw udec_wrap ptr addrspace(1) %gep, i64 42 syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !1
38753875
ret void
38763876
}
38773877

@@ -3966,7 +3966,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out,
39663966
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #2
39673967
%idx.0 = add nsw i32 %tid.x, 2
39683968
%arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
3969-
%result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i64 9 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
3969+
%result = atomicrmw udec_wrap ptr addrspace(3) %arrayidx0, i64 9 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0, !amdgpu.no.remote.memory !1
39703970
store i32 %idx.0, ptr addrspace(1) %add_use, align 4
39713971
store i64 %result, ptr addrspace(1) %out, align 4
39723972
ret void
@@ -3977,6 +3977,7 @@ attributes #1 = { nounwind }
39773977
attributes #2 = { nounwind memory(none) }
39783978

39793979
!0 = !{i32 5, i32 6}
3980+
!1 = !{}
39803981

39813982
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
39823983
; GCN: {{.*}}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.