[AtomicExpand] Add bitcasts when expanding load atomic vector #120716

jofrn · Dec 20, 2024

AtomicExpand fails for aligned load atomic <n x T> because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered. It also adds support for
128 bit lowering in tablegen to support SSE/AVX.

Stack:

⚠️ Part of a stack created by spr. Do not merge manually using the UI - doing so may have unexpected results.

llvmbot · Dec 20, 2024

@llvm/pr-subscribers-llvm-selectiondag
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-backend-arm

Author: None (jofrn)

Changes

AtomicExpand fails for aligned load atomic <n x T> because it
does not find a compatible library call. This change marks load atomics
to not use sized calls and instead resort to using ___atomic_load.

Stack:

#120716 ⬅
#120640
#120598
#120387
#120386
#120385
#120384

⚠️ Part of a stack created by spr. Do not merge manually using the UI - doing so may have unexpected results.

Full diff: https://github.com/llvm/llvm-project/pull/120716.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/AtomicExpandPass.cpp (+3-1)
(modified) llvm/test/CodeGen/ARM/atomic-load-store.ll (+57)
(modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+51-10)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a75fa688d87a8d..d860bbf2685ed2 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1884,7 +1884,9 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
   IRBuilder<> Builder(I);
   IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
 
-  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
+  const bool IsAtomic = isa<LoadInst>(I) ? cast<LoadInst>(I)->isAtomic() : false;
+  const bool UseSizedLibcall = !(I->getType()->isVectorTy() && IsAtomic) &&
+      canUseSizedAtomicCall(Size, Alignment, DL);
   Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
 
   const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29d..7f0f3008d2d5c2 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,60 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
   store atomic double %val1, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    ldr r0, [r0]
+; ARM-NEXT:    dmb ish
+; ARM-NEXT:    bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE:       @ %bb.0:
+; ARMOPTNONE-NEXT:    ldr r0, [r0]
+; ARMOPTNONE-NEXT:    dmb ish
+; ARMOPTNONE-NEXT:    bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO:       @ %bb.0:
+; THUMBTWO-NEXT:    ldr r0, [r0]
+; THUMBTWO-NEXT:    dmb ish
+; THUMBTWO-NEXT:    bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE:       @ %bb.0:
+; THUMBONE-NEXT:    push {r7, lr}
+; THUMBONE-NEXT:    movs r1, #0
+; THUMBONE-NEXT:    mov r2, r1
+; THUMBONE-NEXT:    bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT:    pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4:       @ %bb.0:
+; ARMV4-NEXT:    push {r11, lr}
+; ARMV4-NEXT:    sub sp, sp, #8
+; ARMV4-NEXT:    add r2, sp, #4
+; ARMV4-NEXT:    mov r1, r0
+; ARMV4-NEXT:    mov r0, #4
+; ARMV4-NEXT:    mov r3, #2
+; ARMV4-NEXT:    bl __atomic_load
+; ARMV4-NEXT:    ldr r0, [sp, #4]
+; ARMV4-NEXT:    add sp, sp, #8
+; ARMV4-NEXT:    pop {r11, lr}
+; ARMV4-NEXT:    mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6:       @ %bb.0:
+; ARMV6-NEXT:    ldr r0, [r0]
+; ARMV6-NEXT:    mov r1, #0
+; ARMV6-NEXT:    mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT:    bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM:       @ %bb.0:
+; THUMBM-NEXT:    ldr r0, [r0]
+; THUMBM-NEXT:    dmb sy
+; THUMBM-NEXT:    bx lr
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 3157e25a3eee20..e92d9ab4add1bc 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -399,17 +399,58 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
   ret <2 x i32> %ret
 }
 
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_ptr_align:
+; CHECK3:       ## %bb.0:
+; CHECK3-NEXT:    subq $24, %rsp
+; CHECK3-NEXT:    movq %rdi, %rsi
+; CHECK3-NEXT:    movq %rsp, %rdx
+; CHECK3-NEXT:    movl $16, %edi
+; CHECK3-NEXT:    movl $2, %ecx
+; CHECK3-NEXT:    callq ___atomic_load
+; CHECK3-NEXT:    movaps (%rsp), %xmm0
+; CHECK3-NEXT:    addq $24, %rsp
+; CHECK3-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec2_ptr_align:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $24, %rsp
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $16, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movdqa (%rsp), %xmm0
+; CHECK0-NEXT:    addq $24, %rsp
+; CHECK0-NEXT:    retq
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+
 define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
-; CHECK-LABEL: atomic_vec4_float_align:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    movl $2, %esi
-; CHECK-NEXT:    callq ___atomic_load_16
-; CHECK-NEXT:    movq %rdx, %xmm1
-; CHECK-NEXT:    movq %rax, %xmm0
-; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    retq
+; CHECK3-LABEL: atomic_vec4_float_align:
+; CHECK3:       ## %bb.0:
+; CHECK3-NEXT:    subq $24, %rsp
+; CHECK3-NEXT:    movq %rdi, %rsi
+; CHECK3-NEXT:    movq %rsp, %rdx
+; CHECK3-NEXT:    movl $16, %edi
+; CHECK3-NEXT:    movl $2, %ecx
+; CHECK3-NEXT:    callq ___atomic_load
+; CHECK3-NEXT:    movaps (%rsp), %xmm0
+; CHECK3-NEXT:    addq $24, %rsp
+; CHECK3-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec4_float_align:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $24, %rsp
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $16, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movaps (%rsp), %xmm0
+; CHECK0-NEXT:    addq $24, %rsp
+; CHECK0-NEXT:    retq
   %ret = load atomic <4 x float>, ptr %x acquire, align 16
   ret <4 x float> %ret
 }

github-actions · Dec 20, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

arsenm · Dec 20, 2024

llvm/lib/CodeGen/AtomicExpandPass.cpp

-  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
+  const bool IsAtomic =
+      isa<LoadInst>(I) ? cast<LoadInst>(I)->isAtomic() : false;
+  const bool UseSizedLibcall = !(I->getType()->isVectorTy() && IsAtomic) &&


This shouldn't need to consider whether it's atomic or the type. The sized call should work fine. Some casts may be necessary

llvm/test/CodeGen/ARM/atomic-load-store.ll

llvm/lib/CodeGen/AtomicExpandPass.cpp

llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll

llvm/lib/CodeGen/AtomicExpandPass.cpp

llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll

arsenm · May 8, 2025

llvm/lib/CodeGen/AtomicExpandPass.cpp

+            Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
+        V = Builder.CreateIntToPtr(BC, I->getType());
+      } else
+        V = Builder.CreateBitOrPointerCast(Result, I->getType());


Suggested change

V = Builder.CreateBitOrPointerCast(Result, I->getType());

V = Builder.CreateBitOrPointerCast(Result, VTy);

arsenm · May 8, 2025

llvm/lib/CodeGen/AtomicExpandPass.cpp

+        unsigned AS = PtrTy->getAddressSpace();
+        Value *BC = Builder.CreateBitCast(
+            Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
+        V = Builder.CreateIntToPtr(BC, I->getType());


Suggested change

V = Builder.CreateIntToPtr(BC, I->getType());

V = Builder.CreateIntToPtr(BC, VTy);

arsenm · May 8, 2025

llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll

+  %ret = load atomic <2 x i16>, ptr %x acquire, align 8
+  ret <2 x i16> %ret
+}
+
+define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
+; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
+; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[RET:%.*]] = load atomic <2 x half>, ptr [[X]] acquire, align 8
+; CHECK-NEXT:    ret <2 x half> [[RET]]
+;
+  %ret = load atomic <2 x half>, ptr %x acquire, align 8


The 2 x i16 and 2 x half cases aren't expanded, I guess the default expansion rule missed these for some reason?

arsenm · May 8, 2025

llvm/lib/CodeGen/AtomicExpandPass.cpp

+      auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
+      auto *VTy = dyn_cast<VectorType>(I->getType());
+      if (VTy && PtrTy && !Result->getType()->isVectorTy()) {


There should probably be a utility for this somewhere

jofrn · May 27, 2025

llvm/lib/Target/X86/X86InstrCompiler.td

+def : Pat<(v2i64 (atomic_load_128_v2i64 addr:$src)),
+           (VMOVAPDrm addr:$src)>; // load atomic <2 x i64>
+def : Pat<(v4i32 (atomic_load_128_v4i32 addr:$src)),
+           (VMOVAPDrm addr:$src)>; // load atomic <4 x i32>


These are required for 128 bit vectors in SSE/AVX. The tests added in this PR require both the AtomicExpand change and these td records.

These require AVX/AVX512 variants (see below) - but x86 doesn't guarantee atomics for anything above 8 bytes (and those must be aligned to avoid cacheline crossing).

I think all known AVX capable x86 targets allow 16 byte aligned atomics, its not official but we assume it in X86TargetLowering::shouldExpandAtomicLoadInIR

AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. It also adds support for 128 bit lowering in tablegen to support SSE/AVX. commit-id:f430c1af

arsenm · Jun 2, 2025

llvm/include/llvm/Target/TargetSelectionDAG.td

+def atomic_load_128_v2i64 :
+  PatFrag<(ops node:$ptr),
+          (atomic_load node:$ptr)> {
+  let IsAtomic = true;
+  let MemoryVT = v2i64;
+}
+
+def atomic_load_128_v4i32 :
+  PatFrag<(ops node:$ptr),
+          (atomic_load node:$ptr)> {
+  let IsAtomic = true;
+  let MemoryVT = v4i32;


This patch should not require adding this, or touching any of the backend patterns

The tests that use them must also have the changes from AtomicExpand.

llvmbot added backend:ARM backend:X86 labels Dec 20, 2024

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 761d4d9 to 6506acb Compare December 20, 2024 11:39

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 241b7c7 to 1a9eae9 Compare December 20, 2024 11:52

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 6506acb to db674f8 Compare December 20, 2024 11:52

arsenm reviewed Dec 20, 2024

View reviewed changes

llvm/test/CodeGen/ARM/atomic-load-store.ll Show resolved Hide resolved

jofrn changed the title ~~[AtomicExpand] Avoid sized call when expanding load atomic vector~~ [AtomicExpand] Add bitcasts when expanding load atomic vector Jan 2, 2025

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from db674f8 to 13ea377 Compare January 2, 2025 19:21

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 61ec1ef to dc2032f Compare January 2, 2025 20:03

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 13ea377 to e11194d Compare January 2, 2025 20:45

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 45e7035 to dd2c034 Compare January 6, 2025 19:25

llvmbot added the llvm:transforms label Jan 6, 2025

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 751b5eb to 20bbd6e Compare January 7, 2025 15:31

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 0cb57c2 to ba2a301 Compare January 7, 2025 15:31

RKSimon reviewed Jan 7, 2025

View reviewed changes

llvm/lib/CodeGen/AtomicExpandPass.cpp Outdated Show resolved Hide resolved

arsenm reviewed Jan 7, 2025

View reviewed changes

llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll Show resolved Hide resolved

llvm/lib/CodeGen/AtomicExpandPass.cpp Outdated Show resolved Hide resolved

llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll Outdated Show resolved Hide resolved

jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from ba2a301 to f129a3c Compare January 15, 2025 11:52

jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from 0890009 to c5cfc13 Compare May 8, 2025 01:53

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from 8e1beae to 66207cc Compare May 8, 2025 09:13

jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from c5cfc13 to b50f678 Compare May 8, 2025 09:13

arsenm approved these changes May 8, 2025

View reviewed changes

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from 66207cc to f2e8e75 Compare May 8, 2025 23:38

jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from b50f678 to 479a227 Compare May 8, 2025 23:38

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from f2e8e75 to fb247c3 Compare May 9, 2025 12:53

jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from 479a227 to 6f96cf8 Compare May 9, 2025 12:53

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from fb247c3 to 88e3dac Compare May 9, 2025 19:43

jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch 2 times, most recently from 053d34b to 3a1f677 Compare May 9, 2025 20:03

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from 88e3dac to db72700 Compare May 9, 2025 20:03

jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from 3a1f677 to bf8fc80 Compare May 10, 2025 08:27

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from db72700 to ce64f04 Compare May 10, 2025 08:27

jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from bf8fc80 to 9fe563b Compare May 11, 2025 00:05

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from ce64f04 to e1eaeb6 Compare May 11, 2025 00:05

jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from 9fe563b to 684a542 Compare May 12, 2025 05:34

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from e1eaeb6 to 717ea64 Compare May 12, 2025 05:34

jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from 684a542 to 7ba3e69 Compare May 27, 2025 17:34

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from 717ea64 to ac4069c Compare May 27, 2025 17:34

llvmbot added the llvm:SelectionDAG SelectionDAGISel as well label May 27, 2025

jofrn commented May 27, 2025

View reviewed changes

jofrn changed the base branch from users/jofrn/spr/main/b83937a8 to main June 1, 2025 20:46

jofrn mentioned this pull request Jun 1, 2025

[X86] Cast atomic vectors in IR to support floats #142320

Open

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from ac4069c to 9e3f3b1 Compare June 1, 2025 20:46

jofrn changed the base branch from main to users/jofrn/spr/main/80b9b6a7 June 1, 2025 20:46

jofrn force-pushed the users/jofrn/spr/main/80b9b6a7 branch from d074cd3 to 6eb10b5 Compare June 2, 2025 04:15

jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from 9e3f3b1 to 70a2b52 Compare June 2, 2025 04:15

arsenm reviewed Jun 2, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AtomicExpand] Add bitcasts when expanding load atomic vector #120716

[AtomicExpand] Add bitcasts when expanding load atomic vector #120716

jofrn commented Dec 20, 2024 •

edited

Loading

Uh oh!

llvmbot commented Dec 20, 2024 •

edited

Loading

Uh oh!

github-actions bot commented Dec 20, 2024 •

edited

Loading

Uh oh!

arsenm Dec 20, 2024

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

arsenm May 8, 2025

Uh oh!

arsenm May 8, 2025

Uh oh!

arsenm May 8, 2025

Uh oh!

arsenm May 8, 2025

Uh oh!

jofrn May 27, 2025

Uh oh!

RKSimon Jun 2, 2025 •

edited

Loading

Uh oh!

RKSimon Jun 2, 2025 •

edited

Loading

Uh oh!

arsenm Jun 2, 2025

Uh oh!

jofrn Jun 2, 2025

Uh oh!

Uh oh!

	V = Builder.CreateBitOrPointerCast(Result, I->getType());
	V = Builder.CreateBitOrPointerCast(Result, VTy);

	V = Builder.CreateIntToPtr(BC, I->getType());
	V = Builder.CreateIntToPtr(BC, VTy);

Search code, repositories, users, issues, pull requests...

[AtomicExpand] Add bitcasts when expanding load atomic vector #120716

Are you sure you want to change the base?

[AtomicExpand] Add bitcasts when expanding load atomic vector #120716

Conversation

jofrn commented Dec 20, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 20, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Dec 20, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

RKSimon Jun 2, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

RKSimon Jun 2, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

jofrn commented Dec 20, 2024 •

edited

Loading

llvmbot commented Dec 20, 2024 •

edited

Loading

github-actions bot commented Dec 20, 2024 •

edited

Loading

RKSimon Jun 2, 2025 •

edited

Loading

RKSimon Jun 2, 2025 •

edited

Loading