Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

[AtomicExpand] Add bitcasts when expanding load atomic vector #120716

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: users/jofrn/spr/main/b83937a8
Choose a base branch
Loading
from

Conversation

jofrn
Copy link
Contributor

@jofrn jofrn commented Dec 20, 2024

@llvmbot
Copy link
Member

llvmbot commented Dec 20, 2024

@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-backend-arm

Author: None (jofrn)

Changes

AtomicExpand fails for aligned load atomic <n x T> because it
does not find a compatible library call. This change marks load atomics
to not use sized calls and instead resort to using ___atomic_load.


Stack:

  • #120716 ⬅
  • #120640
  • #120598
  • #120387
  • #120386
  • #120385
  • #120384

⚠️ Part of a stack created by spr. Do not merge manually using the UI - doing so may have unexpected results.


Full diff: https://github.com/llvm/llvm-project/pull/120716.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/AtomicExpandPass.cpp (+3-1)
  • (modified) llvm/test/CodeGen/ARM/atomic-load-store.ll (+57)
  • (modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+51-10)
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a75fa688d87a8d..d860bbf2685ed2 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1884,7 +1884,9 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
   IRBuilder<> Builder(I);
   IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
 
-  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
+  const bool IsAtomic = isa<LoadInst>(I) ? cast<LoadInst>(I)->isAtomic() : false;
+  const bool UseSizedLibcall = !(I->getType()->isVectorTy() && IsAtomic) &&
+      canUseSizedAtomicCall(Size, Alignment, DL);
   Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
 
   const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29d..7f0f3008d2d5c2 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,60 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
   store atomic double %val1, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    ldr r0, [r0]
+; ARM-NEXT:    dmb ish
+; ARM-NEXT:    bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE:       @ %bb.0:
+; ARMOPTNONE-NEXT:    ldr r0, [r0]
+; ARMOPTNONE-NEXT:    dmb ish
+; ARMOPTNONE-NEXT:    bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO:       @ %bb.0:
+; THUMBTWO-NEXT:    ldr r0, [r0]
+; THUMBTWO-NEXT:    dmb ish
+; THUMBTWO-NEXT:    bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE:       @ %bb.0:
+; THUMBONE-NEXT:    push {r7, lr}
+; THUMBONE-NEXT:    movs r1, #0
+; THUMBONE-NEXT:    mov r2, r1
+; THUMBONE-NEXT:    bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT:    pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4:       @ %bb.0:
+; ARMV4-NEXT:    push {r11, lr}
+; ARMV4-NEXT:    sub sp, sp, #8
+; ARMV4-NEXT:    add r2, sp, #4
+; ARMV4-NEXT:    mov r1, r0
+; ARMV4-NEXT:    mov r0, #4
+; ARMV4-NEXT:    mov r3, #2
+; ARMV4-NEXT:    bl __atomic_load
+; ARMV4-NEXT:    ldr r0, [sp, #4]
+; ARMV4-NEXT:    add sp, sp, #8
+; ARMV4-NEXT:    pop {r11, lr}
+; ARMV4-NEXT:    mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6:       @ %bb.0:
+; ARMV6-NEXT:    ldr r0, [r0]
+; ARMV6-NEXT:    mov r1, #0
+; ARMV6-NEXT:    mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT:    bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM:       @ %bb.0:
+; THUMBM-NEXT:    ldr r0, [r0]
+; THUMBM-NEXT:    dmb sy
+; THUMBM-NEXT:    bx lr
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 3157e25a3eee20..e92d9ab4add1bc 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -399,17 +399,58 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
   ret <2 x i32> %ret
 }
 
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_ptr_align:
+; CHECK3:       ## %bb.0:
+; CHECK3-NEXT:    subq $24, %rsp
+; CHECK3-NEXT:    movq %rdi, %rsi
+; CHECK3-NEXT:    movq %rsp, %rdx
+; CHECK3-NEXT:    movl $16, %edi
+; CHECK3-NEXT:    movl $2, %ecx
+; CHECK3-NEXT:    callq ___atomic_load
+; CHECK3-NEXT:    movaps (%rsp), %xmm0
+; CHECK3-NEXT:    addq $24, %rsp
+; CHECK3-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec2_ptr_align:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $24, %rsp
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $16, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movdqa (%rsp), %xmm0
+; CHECK0-NEXT:    addq $24, %rsp
+; CHECK0-NEXT:    retq
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+
 define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
-; CHECK-LABEL: atomic_vec4_float_align:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    movl $2, %esi
-; CHECK-NEXT:    callq ___atomic_load_16
-; CHECK-NEXT:    movq %rdx, %xmm1
-; CHECK-NEXT:    movq %rax, %xmm0
-; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    retq
+; CHECK3-LABEL: atomic_vec4_float_align:
+; CHECK3:       ## %bb.0:
+; CHECK3-NEXT:    subq $24, %rsp
+; CHECK3-NEXT:    movq %rdi, %rsi
+; CHECK3-NEXT:    movq %rsp, %rdx
+; CHECK3-NEXT:    movl $16, %edi
+; CHECK3-NEXT:    movl $2, %ecx
+; CHECK3-NEXT:    callq ___atomic_load
+; CHECK3-NEXT:    movaps (%rsp), %xmm0
+; CHECK3-NEXT:    addq $24, %rsp
+; CHECK3-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec4_float_align:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $24, %rsp
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $16, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movaps (%rsp), %xmm0
+; CHECK0-NEXT:    addq $24, %rsp
+; CHECK0-NEXT:    retq
   %ret = load atomic <4 x float>, ptr %x acquire, align 16
   ret <4 x float> %ret
 }

Copy link

github-actions bot commented Dec 20, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 761d4d9 to 6506acb Compare December 20, 2024 11:39
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 241b7c7 to 1a9eae9 Compare December 20, 2024 11:52
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 6506acb to db674f8 Compare December 20, 2024 11:52
bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
const bool IsAtomic =
isa<LoadInst>(I) ? cast<LoadInst>(I)->isAtomic() : false;
const bool UseSizedLibcall = !(I->getType()->isVectorTy() && IsAtomic) &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't need to consider whether it's atomic or the type. The sized call should work fine. Some casts may be necessary

@jofrn jofrn changed the title [AtomicExpand] Avoid sized call when expanding load atomic vector [AtomicExpand] Add bitcasts when expanding load atomic vector Jan 2, 2025
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from db674f8 to 13ea377 Compare January 2, 2025 19:21
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 61ec1ef to dc2032f Compare January 2, 2025 20:03
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 13ea377 to e11194d Compare January 2, 2025 20:45
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 45e7035 to dd2c034 Compare January 6, 2025 19:25
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 751b5eb to 20bbd6e Compare January 7, 2025 15:31
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 0cb57c2 to ba2a301 Compare January 7, 2025 15:31
;
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
ret <2 x ptr> %ret
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check addrspace(270) vector. Also FP and int cases?

llvm/lib/CodeGen/AtomicExpandPass.cpp Outdated Show resolved Hide resolved
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from ba2a301 to f129a3c Compare January 15, 2025 11:52
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from d600ac3 to bdf9a55 Compare May 6, 2025 04:58
@jofrn jofrn changed the base branch from users/jofrn/spr/main/b83937a8 to main May 6, 2025 06:03
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from bdf9a55 to 2b5d4c0 Compare May 6, 2025 06:03
@jofrn jofrn changed the base branch from main to users/jofrn/spr/main/b83937a8 May 6, 2025 06:04
Comment on lines 2071 to 2072
auto PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
auto VTy = dyn_cast<VectorType>(I->getType());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
auto VTy = dyn_cast<VectorType>(I->getType());
auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
auto *VTy = dyn_cast<VectorType>(I->getType());

Value *BC = Builder.CreateBitCast(
Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
V = Builder.CreateIntToPtr(
BC, VTy->getWithNewType(PointerType::get(Ctx, AS)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
BC, VTy->getWithNewType(PointerType::get(Ctx, AS)));
BC, I->getType());

@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from 2b5d4c0 to 7d44d05 Compare May 6, 2025 15:04
@jofrn jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from 18fe5c7 to 7a786f2 Compare May 6, 2025 15:04
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 72fd8a6 to 8e1beae Compare May 8, 2025 01:53
@jofrn jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from 0890009 to c5cfc13 Compare May 8, 2025 01:53
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from 8e1beae to 66207cc Compare May 8, 2025 09:13
@jofrn jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from c5cfc13 to b50f678 Compare May 8, 2025 09:13
Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
V = Builder.CreateIntToPtr(BC, I->getType());
} else
V = Builder.CreateBitOrPointerCast(Result, I->getType());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
V = Builder.CreateBitOrPointerCast(Result, I->getType());
V = Builder.CreateBitOrPointerCast(Result, VTy);

unsigned AS = PtrTy->getAddressSpace();
Value *BC = Builder.CreateBitCast(
Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
V = Builder.CreateIntToPtr(BC, I->getType());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
V = Builder.CreateIntToPtr(BC, I->getType());
V = Builder.CreateIntToPtr(BC, VTy);

Comment on lines +184 to +194
%ret = load atomic <2 x i16>, ptr %x acquire, align 8
ret <2 x i16> %ret
}

define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x half>, ptr [[X]] acquire, align 8
; CHECK-NEXT: ret <2 x half> [[RET]]
;
%ret = load atomic <2 x half>, ptr %x acquire, align 8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The 2 x i16 and 2 x half cases aren't expanded, I guess the default expansion rule missed these for some reason?

Comment on lines +2071 to +2073
auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
auto *VTy = dyn_cast<VectorType>(I->getType());
if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should probably be a utility for this somewhere

@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from 66207cc to f2e8e75 Compare May 8, 2025 23:38
@jofrn jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from b50f678 to 479a227 Compare May 8, 2025 23:38
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from f2e8e75 to fb247c3 Compare May 9, 2025 12:53
@jofrn jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from 479a227 to 6f96cf8 Compare May 9, 2025 12:53
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from fb247c3 to 88e3dac Compare May 9, 2025 19:43
@jofrn jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch 2 times, most recently from 053d34b to 3a1f677 Compare May 9, 2025 20:03
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from 88e3dac to db72700 Compare May 9, 2025 20:03
@jofrn jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from 3a1f677 to bf8fc80 Compare May 10, 2025 08:27
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from db72700 to ce64f04 Compare May 10, 2025 08:27
@jofrn jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from bf8fc80 to 9fe563b Compare May 11, 2025 00:05
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from ce64f04 to e1eaeb6 Compare May 11, 2025 00:05
AtomicExpand fails for aligned `load atomic <n x T>` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.

commit-id:f430c1af
@jofrn jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from 9fe563b to 684a542 Compare May 12, 2025 05:34
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from e1eaeb6 to 717ea64 Compare May 12, 2025 05:34
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants
Morty Proxy This is a proxified and sanitized view of the page, visit original site.