Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit aa9f859

Browse filesBrowse files
authored
[AMDGPU] Add flag to prevent reruns of LowerModuleLDS (#129520)
FullLTO has to run this early before module splitting occurs otherwise module splitting won't work as expected. There was a targeted fix for fortran on another branch that disables the LTO run but that'd break full LTO module splitting entirely. Test changes are due to metadata indexes shifting. See #122891
1 parent 291fa64 commit aa9f859
Copy full SHA for aa9f859

15 files changed

+196
-194
lines changed

‎llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+6
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,12 @@ class AMDGPULowerModuleLDS {
10381038
}
10391039

10401040
bool runOnModule(Module &M) {
1041+
// Check if we've already lowered this module. The pass may run more
1042+
// than once in the LTO pipeline, and multiple runs aren't supported.
1043+
if (M.getModuleFlag("amdgpu.lowered_lds"))
1044+
return false;
1045+
M.addModuleFlag(Module::ModFlagBehavior::Error, "amdgpu.lowered_lds", 1);
1046+
10411047
CallGraph CG = CallGraph(M);
10421048
bool Changed = superAlignLDSGlobals(M);
10431049

‎llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
+41-41
Original file line numberDiff line numberDiff line change
@@ -9,46 +9,46 @@ define amdgpu_kernel void @call_debug_loc() {
99
; CHECK: bb.1.entry:
1010
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
1111
; CHECK-NEXT: {{ $}}
12-
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !7
13-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !7
14-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !7
15-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !7
16-
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !7
17-
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !7
18-
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !7
19-
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !7
20-
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !7
12+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !8
13+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !8
14+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !8
15+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !8
16+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !8
17+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !8
18+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !8
19+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !8
20+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !8
2121
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9
22-
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !7
23-
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !7
24-
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !7
25-
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !7
26-
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !7
27-
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !7
28-
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !7
29-
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !7
30-
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !7
31-
; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !7
32-
; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !7
33-
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !7
34-
; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !7
35-
; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !7
36-
; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !7
37-
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !7
38-
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !7
39-
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !7
40-
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !7
41-
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !7
42-
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !7
43-
; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !7
44-
; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !7
45-
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !7
46-
; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !7
47-
; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !7
48-
; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !7
49-
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !7 :: (dereferenceable invariant load (p0) from got, addrspace 4)
50-
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !7
51-
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !7
22+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !8
23+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !8
24+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !8
25+
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !8
26+
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !8
27+
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !8
28+
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !8
29+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !8
30+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !8
31+
; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !8
32+
; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !8
33+
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !8
34+
; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !8
35+
; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !8
36+
; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !8
37+
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !8
38+
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !8
39+
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !8
40+
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !8
41+
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !8
42+
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !8
43+
; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !8
44+
; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !8
45+
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !8
46+
; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !8
47+
; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !8
48+
; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !8
49+
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !8 :: (dereferenceable invariant load (p0) from got, addrspace 4)
50+
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !8
51+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !8
5252
; CHECK-NEXT: S_ENDPGM 0
5353
entry:
5454
call void @callee(), !dbg !6
@@ -60,11 +60,11 @@ define void @returnaddress_debug_loc(ptr addrspace(1) %ptr) {
6060
; CHECK: bb.1.entry:
6161
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
6262
; CHECK-NEXT: {{ $}}
63-
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !7
63+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !8
6464
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
6565
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
6666
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
67-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]], debug-location !7
67+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]], debug-location !8
6868
; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE]], [[COPY3]], 0, 0, implicit $exec :: (store (p0) into %ir.ptr, addrspace 1)
6969
; CHECK-NEXT: SI_RETURN
7070
entry:

‎llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
+7-7
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ define amdgpu_kernel void @asm_convergent() convergent{
77
; CHECK-NEXT: liveins: $sgpr8_sgpr9
88
; CHECK-NEXT: {{ $}}
99
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
10-
; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !1
10+
; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !2
1111
; CHECK-NEXT: S_ENDPGM 0
1212
call void asm sideeffect "s_barrier", ""() convergent, !srcloc !0
1313
ret void
@@ -19,8 +19,8 @@ define amdgpu_kernel void @asm_simple_memory_clobber() {
1919
; CHECK-NEXT: liveins: $sgpr8_sgpr9
2020
; CHECK-NEXT: {{ $}}
2121
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
22-
; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !1
23-
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !1
22+
; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !2
23+
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !2
2424
; CHECK-NEXT: S_ENDPGM 0
2525
call void asm sideeffect "", "~{memory}"(), !srcloc !0
2626
call void asm sideeffect "", ""(), !srcloc !0
@@ -33,7 +33,7 @@ define amdgpu_kernel void @asm_simple_vgpr_clobber() {
3333
; CHECK-NEXT: liveins: $sgpr8_sgpr9
3434
; CHECK-NEXT: {{ $}}
3535
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
36-
; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !1
36+
; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !2
3737
; CHECK-NEXT: S_ENDPGM 0
3838
call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"(), !srcloc !0
3939
ret void
@@ -45,7 +45,7 @@ define amdgpu_kernel void @asm_simple_sgpr_clobber() {
4545
; CHECK-NEXT: liveins: $sgpr8_sgpr9
4646
; CHECK-NEXT: {{ $}}
4747
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
48-
; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !1
48+
; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !2
4949
; CHECK-NEXT: S_ENDPGM 0
5050
call void asm sideeffect "s_mov_b32 s0, 7", "~{s0}"(), !srcloc !0
5151
ret void
@@ -57,7 +57,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() {
5757
; CHECK-NEXT: liveins: $sgpr8_sgpr9
5858
; CHECK-NEXT: {{ $}}
5959
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
60-
; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !1
60+
; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !2
6161
; CHECK-NEXT: S_ENDPGM 0
6262
call void asm sideeffect "; def a0", "~{a0}"(), !srcloc !0
6363
ret void
@@ -66,7 +66,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() {
6666
define i32 @asm_vgpr_early_clobber() {
6767
; CHECK-LABEL: name: asm_vgpr_early_clobber
6868
; CHECK: bb.1 (%ir-block.0):
69-
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1
69+
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !2
7070
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
7171
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9
7272
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]

‎llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll
+1-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
define i32 @reloc_constant() {
66
; CHECK-LABEL: name: reloc_constant
77
; CHECK: bb.1 (%ir-block.0):
8-
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0
9-
; We cannot have any specific metadata check here as ConstantAsMetadata is printed as <raw_ptr_val>
8+
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !1
109
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), <0x{{[0-9a-f]+}}>
1110
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[INT]], [[INT1]]
1211
; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)

‎llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll
+3-3
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ define <2 x i64> @global_load_v2i64_align16__rangemd(ptr addrspace(1) %ptr) {
7777
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
7878
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
7979
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
80-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[MV]](p1) :: (load (<2 x s64>) from %ir.ptr, !range !2, addrspace 1)
80+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[MV]](p1) :: (load (<2 x s64>) from %ir.ptr, !range !3, addrspace 1)
8181
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
8282
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
8383
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
@@ -119,7 +119,7 @@ define i32 @global_sextload_i8_align1__rangemd(ptr addrspace(1) %ptr) {
119119
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
120120
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
121121
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
122-
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !0, addrspace 1)
122+
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !1, addrspace 1)
123123
; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
124124
; CHECK-NEXT: SI_RETURN implicit $vgpr0
125125
%load = load i8, ptr addrspace(1) %ptr, align 1, !range !0, !noundef !1
@@ -135,7 +135,7 @@ define i32 @global_zextload_i8_align1__rangemd(ptr addrspace(1) %ptr) {
135135
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
136136
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
137137
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
138-
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !4, addrspace 1)
138+
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !5, addrspace 1)
139139
; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
140140
; CHECK-NEXT: SI_RETURN implicit $vgpr0
141141
%load = load i8, ptr addrspace(1) %ptr, align 1, !range !4, !noundef !1

‎llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll
+3-3
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ define void @fence_loads(ptr %ptr) {
1212
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1313
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1414
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
15-
; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !0
16-
; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr, align 4)
15+
; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !1
16+
; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (load acquire (s8) from %ir.ptr, align 4)
1717
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1
1818
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
19-
; CHECK-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4)
19+
; CHECK-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !3 :: (store release (s8) into %ir.ptr, align 4)
2020
; CHECK-NEXT: SI_RETURN
2121
fence release, !mmra !0
2222
%ld = load atomic i8, ptr %ptr acquire, align 4, !mmra !2

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.