diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 34ba53cbe0f9e..f79069bd6d78b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -1038,6 +1038,12 @@ class AMDGPULowerModuleLDS { } bool runOnModule(Module &M) { + // Check if we've already lowered this module. The pass may run more + // than once in the LTO pipeline, and multiple runs aren't supported. + if (M.getModuleFlag("amdgpu.lowered_lds")) + return false; + M.addModuleFlag(Module::ModFlagBehavior::Error, "amdgpu.lowered_lds", 1); + CallGraph CG = CallGraph(M); bool Changed = superAlignLDSGlobals(M); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll index aae999ec0a99a..75913d5219af2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll @@ -9,46 +9,46 @@ define amdgpu_kernel void @call_debug_loc() { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !7 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !7 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !7 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !7 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !7 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !7 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !7 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !8 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !8 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !8 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !8 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !8 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !8 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !8 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !8 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !7 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !7 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !7 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !7 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !7 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !7 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !7 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !7 - ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !7 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !7 - ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !7 - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !7 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !7 - ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !7 - ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !7 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !7 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !7 - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !7 - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !7 - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !7 - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !7 - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !7 - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !7 - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !7 - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !7 - ; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !7 - ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !7 - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !7 :: (dereferenceable invariant load (p0) from got, addrspace 4) - ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !7 - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !7 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !8 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !8 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !8 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !8 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !8 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !8 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !8 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !8 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !8 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !8 + ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !8 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !8 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !8 + ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !8 + ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !8 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !8 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !8 + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !8 + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !8 + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !8 + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !8 + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !8 + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !8 + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !8 + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !8 + ; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !8 + ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !8 + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !8 :: (dereferenceable invariant load (p0) from got, addrspace 4) + ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !8 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !8 ; CHECK-NEXT: S_ENDPGM 0 entry: call void @callee(), !dbg !6 @@ -60,11 +60,11 @@ define void @returnaddress_debug_loc(ptr addrspace(1) %ptr) { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]], debug-location !7 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]], debug-location !8 ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE]], [[COPY3]], 0, 0, implicit $exec :: (store (p0) into %ir.ptr, addrspace 1) ; CHECK-NEXT: SI_RETURN entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index 96c9f40e317ea..3da3355e51cf9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -7,7 +7,7 @@ define amdgpu_kernel void @asm_convergent() convergent{ ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !1 + ; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !2 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_barrier", ""() convergent, !srcloc !0 ret void @@ -19,8 +19,8 @@ define amdgpu_kernel void @asm_simple_memory_clobber() { ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !1 - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !1 + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !2 + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !2 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "", "~{memory}"(), !srcloc !0 call void asm sideeffect "", ""(), !srcloc !0 @@ -33,7 +33,7 @@ define amdgpu_kernel void @asm_simple_vgpr_clobber() { ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !1 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !2 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"(), !srcloc !0 ret void @@ -45,7 +45,7 @@ define amdgpu_kernel void @asm_simple_sgpr_clobber() { ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !1 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !2 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, 7", "~{s0}"(), !srcloc !0 ret void @@ -57,7 +57,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() { ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !1 + ; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !2 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "; def a0", "~{a0}"(), !srcloc !0 ret void @@ -66,7 +66,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() { define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !2 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll index 101bb6c0ed123..296eeaed0a287 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll @@ -5,8 +5,7 @@ define i32 @reloc_constant() { ; CHECK-LABEL: name: reloc_constant ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 - ; We cannot have any specific metadata check here as ConstantAsMetadata is printed as + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !1 ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), <0x{{[0-9a-f]+}}> ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[INT]], [[INT1]] ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll index b6b4301dadc7a..f1800dc6afcb9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll @@ -77,7 +77,7 @@ define <2 x i64> @global_load_v2i64_align16__rangemd(ptr addrspace(1) %ptr) { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[MV]](p1) :: (load (<2 x s64>) from %ir.ptr, !range !2, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[MV]](p1) :: (load (<2 x s64>) from %ir.ptr, !range !3, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -119,7 +119,7 @@ define i32 @global_sextload_i8_align1__rangemd(ptr addrspace(1) %ptr) { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !0, addrspace 1) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !1, addrspace 1) ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %load = load i8, ptr addrspace(1) %ptr, align 1, !range !0, !noundef !1 @@ -135,7 +135,7 @@ define i32 @global_zextload_i8_align1__rangemd(ptr addrspace(1) %ptr) { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !4, addrspace 1) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !5, addrspace 1) ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %load = load i8, ptr addrspace(1) %ptr, align 1, !range !4, !noundef !1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll index 44b12a9f6fe81..80243d658ae00 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll @@ -12,11 +12,11 @@ define void @fence_loads(ptr %ptr) { ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !0 - ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr, align 4) + ; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !1 + ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (load acquire (s8) from %ir.ptr, align 4) ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4) + ; CHECK-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !3 :: (store release (s8) into %ir.ptr, align 4) ; CHECK-NEXT: SI_RETURN fence release, !mmra !0 %ld = load atomic i8, ptr %ptr acquire, align 4, !mmra !2 diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll index 92d0a05f35732..b2ebf2e33e29f 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll @@ -23,10 +23,10 @@ ;. define amdgpu_kernel void @k0() #0 { ; CHECK-LABEL: @k0() #0 -; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !1, !noalias !4 -; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !8, !noalias !9 -; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !10, !noalias !11 -; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !12, !noalias !13 +; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !2, !noalias !5 +; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !9, !noalias !10 +; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !11, !noalias !12 +; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !13, !noalias !14 ; CHECK-NEXT: ret void store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1 @@ -41,9 +41,9 @@ define amdgpu_kernel void @k0() #0 { define amdgpu_kernel void @k1() #0 { ; CHECK-LABEL: @k1() #1 -; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !14, !noalias !17 -; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !20, !noalias !21 -; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !22, !noalias !23 +; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !15, !noalias !18 +; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !21, !noalias !22 +; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !23, !noalias !24 ; CHECK-NEXT: ret void ; store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2 @@ -83,8 +83,8 @@ define amdgpu_kernel void @calls_f0() { define void @f0() { ; CHECK-LABEL: define void @f0() -; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !24 -; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !29 +; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !25 +; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !30 ; CHECK-NEXT: ret void store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1 diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll index 70142fa4b5b29..37ae05bfab86f 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll @@ -16,10 +16,10 @@ ;. define amdgpu_kernel void @k0() { ; CHECK-LABEL: @k0( -; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !1, !noalias !4 -; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !8, !noalias !9 -; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !10, !noalias !11 -; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !12, !noalias !13 +; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !2, !noalias !5 +; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !9, !noalias !10 +; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !11, !noalias !12 +; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !13, !noalias !14 ; CHECK-NEXT: ret void store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1 @@ -34,9 +34,9 @@ define amdgpu_kernel void @k0() { define amdgpu_kernel void @k1() { ; CHECK-LABEL: @k1( -; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !14, !noalias !17 -; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !20, !noalias !21 -; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !22, !noalias !23 +; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !15, !noalias !18 +; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !21, !noalias !22 +; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !23, !noalias !24 ; CHECK-NEXT: ret void ; store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2 diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll index 4ab05c2923fdb..dbab9e520f989 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll @@ -34,19 +34,20 @@ define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce ; GCN-NEXT: s_endpgm ; CHECK-LABEL: define protected amdgpu_kernel void @test( ; CHECK-SAME: ptr addrspace(1) captures(none) [[PTR_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4 -; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7 -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1 +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope [[META2:![0-9]+]], !noalias [[META5:![0-9]+]] +; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope [[META7:![0-9]+]], !noalias [[META8:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope [[META5]], !noalias [[META2]] ; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 3 -; CHECK-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4 -; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7 -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1 +; CHECK-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope [[META2]], !noalias [[META5]] +; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope [[META7]], !noalias [[META8]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope [[META5]], !noalias [[META2]] ; CHECK-NEXT: [[CMP_I_I19:%.*]] = icmp eq i8 [[TMP1]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[CMP_I_I19]], [[CMP_I_I]] ; CHECK-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP2]] to i8 ; CHECK-NEXT: store i8 [[FROMBOOL8]], ptr addrspace(1) [[PTR_COERCE]], align 1 ; CHECK-NEXT: ret void +; entry: store i8 3, ptr addrspace(3) @_f1, align 1 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) @_f2, ptr addrspace(3) noundef align 1 dereferenceable(3) @_f1, i64 3, i1 false) @@ -63,17 +64,15 @@ entry: } declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #1 - ;. ; CHECK: attributes #[[ATTR0]] = { "amdgpu-lds-size"="7" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. -; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 1} -; CHECK: [[META1:![0-9]+]] = !{!2} -; CHECK: [[META2:![0-9]+]] = distinct !{!2, !3} -; CHECK: [[META3:![0-9]+]] = distinct !{!3} -; CHECK: [[META4:![0-9]+]] = !{!5} -; CHECK: [[META5:![0-9]+]] = distinct !{!5, !3} -; CHECK: [[META6:![0-9]+]] = !{!5, !2} -; CHECK: [[META7:![0-9]+]] = !{} +; CHECK: [[META2]] = !{[[META3:![0-9]+]]} +; CHECK: [[META3]] = distinct !{[[META3]], [[META4:![0-9]+]]} +; CHECK: [[META4]] = distinct !{[[META4]]} +; CHECK: [[META5]] = !{[[META6:![0-9]+]]} +; CHECK: [[META6]] = distinct !{[[META6]], [[META4]]} +; CHECK: [[META7]] = !{[[META6]], [[META3]]} +; CHECK: [[META8]] = !{} ;. diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll index 154c798a44f93..7437ce347d1a5 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll @@ -9,12 +9,12 @@ define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(ptr addrs ; CHECK-LABEL: define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa( ; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: bb: -; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa [[TBAA1:![0-9]+]], !noalias !6 +; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa [[TBAA2:![0-9]+]], !noalias [[META7:![0-9]+]] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1]], !noalias !6 -; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !11 +; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA2]], !noalias [[META7]] +; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA2]], !noalias [[META12:![0-9]+]] ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !11 +; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA2]], !noalias [[META12]] ; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]] ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4 ; CHECK-NEXT: ret void @@ -42,17 +42,18 @@ bb: !8 = !{!"omnipotent char", !9, i64 0} !9 = !{!"Simple C++ TBAA"} -; CHECK:!0 = !{i32 0, i32 1} -; CHECK:!1 = !{!2, !3, i64 0} -; CHECK:!2 = !{!"no_clobber_ds_load_stores_x2_preexisting_aa", !3, i64 0} -; CHECK:!3 = !{!"int", !4, i64 0} -; CHECK:!4 = !{!"omnipotent char", !5, i64 0} -; CHECK:!5 = !{!"Simple C++ TBAA"} -; CHECK:!6 = !{!7, !9} -; CHECK:!7 = distinct !{!7, !8} -; CHECK:!8 = distinct !{!8} -; CHECK:!9 = distinct !{!9, !10} -; CHECK:!10 = distinct !{!10} -; CHECK:!11 = !{!12, !13} -; CHECK:!12 = distinct !{!12, !8} -; CHECK:!13 = distinct !{!13, !10} +; CHECK: !0 = !{i32 0, i32 1} +; CHECK: !1 = !{i32 1, !"amdgpu.lowered_lds", i32 1} +; CHECK: !2 = !{!3, !4, i64 0} +; CHECK: !3 = !{!"no_clobber_ds_load_stores_x2_preexisting_aa", !4, i64 0} +; CHECK: !4 = !{!"int", !5, i64 0} +; CHECK: !5 = !{!"omnipotent char", !6, i64 0} +; CHECK: !6 = !{!"Simple C++ TBAA"} +; CHECK: !7 = !{!8, !10} +; CHECK: !8 = distinct !{!8, !9} +; CHECK: !9 = distinct !{!9} +; CHECK: !10 = distinct !{!10, !11} +; CHECK: !11 = distinct !{!11} +; CHECK: !12 = !{!13, !14} +; CHECK: !13 = distinct !{!13, !9} +; CHECK: !14 = distinct !{!14, !11} diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll index 24c1bfb8d50f0..927ef687bc8d9 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll @@ -14,12 +14,12 @@ define amdgpu_kernel void @no_clobber_ds_load_stores_x2(ptr addrspace(1) %arg, i ; CHECK-LABEL: define amdgpu_kernel void @no_clobber_ds_load_stores_x2( ; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: bb: -; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !1, !noalias !4 +; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !2, !noalias !5 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !alias.scope !1, !noalias !4 -; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !4, !noalias !1 +; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !alias.scope !2, !noalias !5 +; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !5, !noalias !2 ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !alias.scope !4, !noalias !1 +; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !alias.scope !5, !noalias !2 ; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]] ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4 ; CHECK-NEXT: ret void @@ -58,15 +58,15 @@ define amdgpu_kernel void @no_clobber_ds_load_stores_x3(ptr addrspace(1) %arg, i ; CHECK-LABEL: define amdgpu_kernel void @no_clobber_ds_load_stores_x3( ; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: bb: -; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !6, !noalias !9 +; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !7, !noalias !10 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !alias.scope !6, !noalias !9 -; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !12, !noalias !13 +; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !alias.scope !7, !noalias !10 +; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !13, !noalias !14 ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !alias.scope !12, !noalias !13 -; CHECK-NEXT: store i32 3, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !14, !noalias !15 +; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !alias.scope !13, !noalias !14 +; CHECK-NEXT: store i32 3, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !15, !noalias !16 ; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_C:%.*]] = load i32, ptr addrspace(3) [[GEP_C]], align 4, !alias.scope !14, !noalias !15 +; CHECK-NEXT: [[VAL_C:%.*]] = load i32, ptr addrspace(3) [[GEP_C]], align 4, !alias.scope !15, !noalias !16 ; CHECK-NEXT: [[VAL_1:%.*]] = add i32 [[VAL_A]], [[VAL_B]] ; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_1]], [[VAL_C]] ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4 @@ -111,18 +111,19 @@ bb: } ; CHECK: !0 = !{i32 0, i32 1} -; CHECK: !1 = !{!2} -; CHECK: !2 = distinct !{!2, !3} -; CHECK: !3 = distinct !{!3} -; CHECK: !4 = !{!5} -; CHECK: !5 = distinct !{!5, !3} -; CHECK: !6 = !{!7} -; CHECK: !7 = distinct !{!7, !8} -; CHECK: !8 = distinct !{!8} -; CHECK: !9 = !{!10, !11} -; CHECK: !10 = distinct !{!10, !8} -; CHECK: !11 = distinct !{!11, !8} -; CHECK: !12 = !{!10} -; CHECK: !13 = !{!7, !11} -; CHECK: !14 = !{!11} -; CHECK: !15 = !{!7, !10} +; CHECK: !1 = !{i32 1, !"amdgpu.lowered_lds", i32 1} +; CHECK: !2 = !{!3} +; CHECK: !3 = distinct !{!3, !4} +; CHECK: !4 = distinct !{!4} +; CHECK: !5 = !{!6} +; CHECK: !6 = distinct !{!6, !4} +; CHECK: !7 = !{!8} +; CHECK: !8 = distinct !{!8, !9} +; CHECK: !9 = distinct !{!9} +; CHECK: !10 = !{!11, !12} +; CHECK: !11 = distinct !{!11, !9} +; CHECK: !12 = distinct !{!12, !9} +; CHECK: !13 = !{!11} +; CHECK: !14 = !{!8, !12} +; CHECK: !15 = !{!12} +; CHECK: !16 = !{!8, !11} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll index 4fcad258d4a74..9edaa72fa55bb 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll @@ -9,7 +9,8 @@ @B = external addrspace(3) global [0 x i32] define amdgpu_kernel void @kernel_0() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_0() #0 !llvm.amdgcn.lds.kernel.id !1 { +; CHECK-LABEL: define amdgpu_kernel void @kernel_0 +; CHECK-SAME: () #[[ATTR0:[0-9]+]] {{.*}}.amdgcn.lds.kernel.id [[META2:![0-9]+]] { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_0.lds) ] ; CHECK-NEXT: call void @call_store_A() ; CHECK-NEXT: ret void @@ -19,7 +20,8 @@ define amdgpu_kernel void @kernel_0() { } define amdgpu_kernel void @kernel_1() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_1() !llvm.amdgcn.lds.kernel.id !2 { +; CHECK-LABEL: define amdgpu_kernel void @kernel_1 +; CHECK-SAME: () {{.*}}.amdgcn.lds.kernel.id [[META3:![0-9]+]] { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ] ; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr() ; CHECK-NEXT: ret void @@ -29,7 +31,8 @@ define amdgpu_kernel void @kernel_1() { } define amdgpu_kernel void @kernel_2() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_2() #0 !llvm.amdgcn.lds.kernel.id !3 { +; CHECK-LABEL: define amdgpu_kernel void @kernel_2 +; CHECK-SAME: () #[[ATTR0]] {{.*}}.amdgcn.lds.kernel.id [[META4:![0-9]+]] { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_2.lds) ] ; CHECK-NEXT: call void @store_A() ; CHECK-NEXT: ret void @@ -39,7 +42,8 @@ define amdgpu_kernel void @kernel_2() { } define amdgpu_kernel void @kernel_3() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_3() !llvm.amdgcn.lds.kernel.id !4 { +; CHECK-LABEL: define amdgpu_kernel void @kernel_3 +; CHECK-SAME: () {{.*}}.amdgcn.lds.kernel.id [[META5:![0-9]+]] { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ] ; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr() ; CHECK-NEXT: ret void diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll index a553375cb51e0..a98e170a68b8a 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll @@ -13,22 +13,12 @@ @dynamic_shared8 = external addrspace(3) global [0 x i64], align 8 ; CHECK: %llvm.amdgcn.module.lds.t = type { i32 } -; CHECK: @dynamic_kernel_only = external addrspace(3) global [0 x double] -; CHECK: @dynamic_shared8 = external addrspace(3) global [0 x i64], align 8 -; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 4, !absolute_symbol !0 -; CHECK: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata" ; Alignment of these must be the maximum of the alignment of the reachable symbols -; CHECK: @llvm.amdgcn.expect_align1.dynlds = external addrspace(3) global [0 x i8], align 1, !absolute_symbol !0 -; CHECK: @llvm.amdgcn.expect_align2.dynlds = external addrspace(3) global [0 x i8], align 2, !absolute_symbol !0 -; CHECK: @llvm.amdgcn.expect_align4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol !1 -; CHECK: @llvm.amdgcn.expect_align8.dynlds = external addrspace(3) global [0 x i8], align 8, !absolute_symbol !0 ; Align 4 and symbol at address [4 5) as module.lds is reachable -; CHECK: @llvm.amdgcn.expect_max_of_2_and_4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol !1 ; Builds a lookup table out of the newly created (suffixed .dynlds) variables in kernel.id order -; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [5 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds to i32)] @@ -113,7 +103,8 @@ define void @use_shared8() #0 { ; The kernels are annotated with kernel.id and llvm.donothing use of the corresponding variable define amdgpu_kernel void @expect_align1() { -; CHECK-LABEL: define amdgpu_kernel void @expect_align1() !llvm.amdgcn.lds.kernel.id !2 { +; CHECK-LABEL: define amdgpu_kernel void @expect_align1( +; CHECK-SAME: ) {{.*}}.amdgcn.lds.kernel.id [[META3:![0-9]+]] { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds) ] ; CHECK-NEXT: call void @use_shared1() ; CHECK-NEXT: ret void @@ -123,7 +114,8 @@ define amdgpu_kernel void @expect_align1() { } define amdgpu_kernel void @expect_align2() { -; CHECK-LABEL: define amdgpu_kernel void @expect_align2() !llvm.amdgcn.lds.kernel.id !3 { +; CHECK-LABEL: define amdgpu_kernel void @expect_align2( +; CHECK-SAME: ) {{.*}}.amdgcn.lds.kernel.id [[META4:![0-9]+]] { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds) ] ; CHECK-NEXT: call void @use_shared2() ; CHECK-NEXT: ret void @@ -134,7 +126,7 @@ define amdgpu_kernel void @expect_align2() { define amdgpu_kernel void @expect_align4() { ; CHECK-LABEL: define amdgpu_kernel void @expect_align4( -; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id !4 { +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {{.*}}.amdgcn.lds.kernel.id [[META5:![0-9]+]] { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds) ] ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ] ; CHECK-NEXT: call void @use_shared4() @@ -146,7 +138,8 @@ define amdgpu_kernel void @expect_align4() { ; Use dynamic_shared directly too. define amdgpu_kernel void @expect_align8() { -; CHECK-LABEL: define amdgpu_kernel void @expect_align8() !llvm.amdgcn.lds.kernel.id !5 { +; CHECK-LABEL: define amdgpu_kernel void @expect_align8( +; CHECK-SAME: ) {{.*}}.amdgcn.lds.kernel.id [[META6:![0-9]+]] { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds) ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i64], ptr addrspace(3) @dynamic_shared8, i32 0, i32 9 ; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 8 @@ -162,7 +155,7 @@ define amdgpu_kernel void @expect_align8() { ; Note: use_shared4 uses module.lds so this will allocate at offset 4 define amdgpu_kernel void @expect_max_of_2_and_4() { ; CHECK-LABEL: define amdgpu_kernel void @expect_max_of_2_and_4( -; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id !6 { +; CHECK-SAME: ) #[[ATTR1]] {{.*}}.amdgcn.lds.kernel.id [[META7:![0-9]+]] { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds) ] ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ] ; CHECK-NEXT: call void @use_shared2() @@ -178,10 +171,8 @@ define amdgpu_kernel void @expect_max_of_2_and_4() { attributes #0 = { noinline } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) -; CHECK: declare void @llvm.donothing() #2 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -; CHECK: declare noundef i32 @llvm.amdgcn.lds.kernel.id() #3 ; CHECK: attributes #0 = { noinline } ; CHECK: attributes #1 = { "amdgpu-lds-size"="4,4" } @@ -190,8 +181,8 @@ attributes #0 = { noinline } ; CHECK: !0 = !{i32 0, i32 1} ; CHECK: !1 = !{i32 4, i32 5} -; CHECK: !2 = !{i32 0} -; CHECK: !3 = !{i32 1} -; CHECK: !4 = !{i32 2} -; CHECK: !5 = !{i32 3} -; CHECK: !6 = !{i32 4} +; CHECK: !3 = !{i32 0} +; CHECK: !4 = !{i32 1} +; CHECK: !5 = !{i32 2} +; CHECK: !6 = !{i32 3} +; CHECK: !7 = !{i32 4} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll index 2a7553ae5d92b..4aa92ce85adef 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll @@ -195,7 +195,7 @@ define amdgpu_kernel void @k01() { define amdgpu_kernel void @k23() { ; OPT-LABEL: @k23( -; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]] +; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]] ; OPT-NEXT: call void @f2() ; OPT-NEXT: call void @f3() ; OPT-NEXT: ret void @@ -245,12 +245,12 @@ define amdgpu_kernel void @k23() { ; Access and allocate three variables define amdgpu_kernel void @k123() { ; OPT-LABEL: @k123( -; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope [[META11:![0-9]+]], !noalias [[META14:![0-9]+]] +; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope [[META12:![0-9]+]], !noalias [[META15:![0-9]+]] ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ] ; OPT-NEXT: call void @f1() -; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META14]], !noalias [[META11]] +; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META15]], !noalias [[META12]] ; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 8 -; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META14]], !noalias [[META11]] +; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META15]], !noalias [[META12]] ; OPT-NEXT: call void @f2() ; OPT-NEXT: ret void ; @@ -312,18 +312,19 @@ define amdgpu_kernel void @k123() { ; OPT: !1 = !{i32 4, i32 5} ; OPT: !2 = !{i32 8, i32 9} ; OPT: !3 = !{i32 1, !"amdhsa_code_object_version", i32 500} -; OPT: !4 = !{i32 1} -; OPT: !5 = !{!6} -; OPT: !6 = distinct !{!6, !7} -; OPT: !7 = distinct !{!7} -; OPT: !8 = !{!9} -; OPT: !9 = distinct !{!9, !7} -; OPT: !10 = !{i32 0} -; OPT: !11 = !{!12} -; OPT: !12 = distinct !{!12, !13} -; OPT: !13 = distinct !{!13} -; OPT: !14 = !{!15} -; OPT: !15 = distinct !{!15, !13} +; OPT: !4 = !{i32 1, !"amdgpu.lowered_lds", i32 1} +; OPT: !5 = !{i32 1} +; OPT: !6 = !{!7} +; OPT: !7 = distinct !{!7, !8} +; OPT: !8 = distinct !{!8} +; OPT: !9 = !{!10} +; OPT: !10 = distinct !{!10, !8} +; OPT: !11 = !{i32 0} +; OPT: !12 = !{!13} +; OPT: !13 = distinct !{!13, !14} +; OPT: !14 = distinct !{!14} +; OPT: !15 = !{!16} +; OPT: !16 = distinct !{!16, !14} attributes #0 = { "amdgpu-lds-size"="8" } attributes #1 = { "amdgpu-lds-size"="16" } diff --git a/llvm/test/CodeGen/AMDGPU/mmra.ll b/llvm/test/CodeGen/AMDGPU/mmra.ll index 444997858bf7a..3e88b93125101 100644 --- a/llvm/test/CodeGen/AMDGPU/mmra.ll +++ b/llvm/test/CodeGen/AMDGPU/mmra.ll @@ -14,12 +14,12 @@ define void @fence_loads(ptr %ptr) { ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !1 - ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr, align 4) + ; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !2 + ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (load acquire (s8) from %ir.ptr, align 4) ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !2 - ; CHECK-NEXT: FLAT_STORE_BYTE [[COPY3]], killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !3 + ; CHECK-NEXT: FLAT_STORE_BYTE [[COPY3]], killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr, mmra !3 :: (store release (s8) into %ir.ptr, align 4) ; CHECK-NEXT: SI_RETURN fence release, !mmra !0 %ld = load atomic i8, ptr %ptr acquire, align 4, !mmra !2 @@ -37,8 +37,8 @@ define void @atomicrmw_acq(ptr %ptr) { ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !1 - ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE killed [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !2 + ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE killed [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (load acquire (s8) from %ir.ptr) ; CHECK-NEXT: SI_RETURN %old.2 = atomicrmw add ptr %ptr, i8 0 acquire, !mmra !2 ret void @@ -69,8 +69,8 @@ define void @atomicrmw_rel(ptr %ptr) { ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 255 ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[V_LSHLREV_B32_e64_]], killed [[S_MOV_B32_2]], implicit $exec ; CHECK-NEXT: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[V_LSHLREV_B32_e64_1]], implicit $exec - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !2 - ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (load (s32) from %ir.AlignedAddr) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !3 + ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr, mmra !3 :: (load (s32) from %ir.AlignedAddr) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: @@ -83,9 +83,9 @@ define void @atomicrmw_rel(ptr %ptr) { ; CHECK-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_2]], %subreg.sub0, [[PHI1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !2 - ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY4]], killed [[COPY6]], 0, 1, implicit $exec, implicit $flat_scr, mmra !2 :: (load store release monotonic (s32) on %ir.AlignedAddr) - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI1]], implicit $exec, mmra !2 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !3 + ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY4]], killed [[COPY6]], 0, 1, implicit $exec, implicit $flat_scr, mmra !3 :: (load store release monotonic (s32) on %ir.AlignedAddr) + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI1]], implicit $exec, mmra !3 ; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK killed [[V_CMP_EQ_U32_e64_]], [[PHI]], implicit-def dead $scc ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.2 @@ -125,8 +125,8 @@ define void @cmpxchg(ptr %ptr) { ; CHECK-NEXT: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 killed [[V_LSHLREV_B32_e64_1]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; CHECK-NEXT: [[V_LSHLREV_B32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[V_LSHLREV_B32_e64_]], killed [[S_MOV_B32_3]], implicit $exec - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !1 - ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load (s32) from %ir.AlignedAddr) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !2 + ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (load (s32) from %ir.AlignedAddr) ; CHECK-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed [[FLAT_LOAD_DWORD]], [[V_NOT_B32_e32_]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; CHECK-NEXT: [[DEF4:%[0-9]+]]:sreg_64 = IMPLICIT_DEF @@ -141,8 +141,8 @@ define void @cmpxchg(ptr %ptr) { ; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_OR_B32_e64_]], %subreg.sub0, [[PHI2]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !1 - ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY4]], killed [[COPY6]], 0, 1, implicit $exec, implicit $flat_scr, mmra !1 :: (load store acquire acquire (s32) on %ir.AlignedAddr) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !2 + ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY4]], killed [[COPY6]], 0, 1, implicit $exec, implicit $flat_scr, mmra !2 :: (load store acquire acquire (s32) on %ir.AlignedAddr) ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI2]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF @@ -248,8 +248,8 @@ define void @atomicrmw_rel_deepcopy(ptr %ptr) { ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 255 ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[V_LSHLREV_B32_e64_]], killed [[S_MOV_B32_2]], implicit $exec ; CHECK-NEXT: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[V_LSHLREV_B32_e64_1]], implicit $exec - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !0 - ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY31]], 0, 0, implicit $exec, implicit $flat_scr, mmra !0 :: (load (s32) from %ir.AlignedAddr) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !1 + ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY31]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load (s32) from %ir.AlignedAddr) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: @@ -262,9 +262,9 @@ define void @atomicrmw_rel_deepcopy(ptr %ptr) { ; CHECK-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_2]], %subreg.sub0, [[PHI1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !0 - ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY30]], killed [[COPY32]], 0, 1, implicit $exec, implicit $flat_scr, mmra !0 :: (load store release monotonic (s32) on %ir.AlignedAddr) - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI1]], implicit $exec, mmra !0 + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !1 + ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY30]], killed [[COPY32]], 0, 1, implicit $exec, implicit $flat_scr, mmra !1 :: (load store release monotonic (s32) on %ir.AlignedAddr) + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI1]], implicit $exec, mmra !1 ; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK killed [[V_CMP_EQ_U32_e64_]], [[PHI]], implicit-def dead $scc ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.2