-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AtomicExpand] Add bitcasts when expanding load atomic vector #120716
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/jofrn/spr/main/b83937a8
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-arm Author: None (jofrn) ChangesAtomicExpand fails for aligned Stack:
Full diff: https://github.com/llvm/llvm-project/pull/120716.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a75fa688d87a8d..d860bbf2685ed2 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1884,7 +1884,9 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
IRBuilder<> Builder(I);
IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
- bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
+ const bool IsAtomic = isa<LoadInst>(I) ? cast<LoadInst>(I)->isAtomic() : false;
+ const bool UseSizedLibcall = !(I->getType()->isVectorTy() && IsAtomic) &&
+ canUseSizedAtomicCall(Size, Alignment, DL);
Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29d..7f0f3008d2d5c2 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,60 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
store atomic double %val1, ptr %ptr seq_cst, align 8
ret void
}
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldr r0, [r0]
+; ARM-NEXT: dmb ish
+; ARM-NEXT: bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE: @ %bb.0:
+; ARMOPTNONE-NEXT: ldr r0, [r0]
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO: @ %bb.0:
+; THUMBTWO-NEXT: ldr r0, [r0]
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE: @ %bb.0:
+; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: movs r1, #0
+; THUMBONE-NEXT: mov r2, r1
+; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT: pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4: @ %bb.0:
+; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: sub sp, sp, #8
+; ARMV4-NEXT: add r2, sp, #4
+; ARMV4-NEXT: mov r1, r0
+; ARMV4-NEXT: mov r0, #4
+; ARMV4-NEXT: mov r3, #2
+; ARMV4-NEXT: bl __atomic_load
+; ARMV4-NEXT: ldr r0, [sp, #4]
+; ARMV4-NEXT: add sp, sp, #8
+; ARMV4-NEXT: pop {r11, lr}
+; ARMV4-NEXT: mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6: @ %bb.0:
+; ARMV6-NEXT: ldr r0, [r0]
+; ARMV6-NEXT: mov r1, #0
+; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT: bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM: @ %bb.0:
+; THUMBM-NEXT: ldr r0, [r0]
+; THUMBM-NEXT: dmb sy
+; THUMBM-NEXT: bx lr
+ %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+ ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 3157e25a3eee20..e92d9ab4add1bc 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -399,17 +399,58 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
ret <2 x i32> %ret
}
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_ptr_align:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: subq $24, %rsp
+; CHECK3-NEXT: movq %rdi, %rsi
+; CHECK3-NEXT: movq %rsp, %rdx
+; CHECK3-NEXT: movl $16, %edi
+; CHECK3-NEXT: movl $2, %ecx
+; CHECK3-NEXT: callq ___atomic_load
+; CHECK3-NEXT: movaps (%rsp), %xmm0
+; CHECK3-NEXT: addq $24, %rsp
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_ptr_align:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: subq $24, %rsp
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $16, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movdqa (%rsp), %xmm0
+; CHECK0-NEXT: addq $24, %rsp
+; CHECK0-NEXT: retq
+ %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+ ret <2 x ptr> %ret
+}
+
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
-; CHECK-LABEL: atomic_vec4_float_align:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: movl $2, %esi
-; CHECK-NEXT: callq ___atomic_load_16
-; CHECK-NEXT: movq %rdx, %xmm1
-; CHECK-NEXT: movq %rax, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: retq
+; CHECK3-LABEL: atomic_vec4_float_align:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: subq $24, %rsp
+; CHECK3-NEXT: movq %rdi, %rsi
+; CHECK3-NEXT: movq %rsp, %rdx
+; CHECK3-NEXT: movl $16, %edi
+; CHECK3-NEXT: movl $2, %ecx
+; CHECK3-NEXT: callq ___atomic_load
+; CHECK3-NEXT: movaps (%rsp), %xmm0
+; CHECK3-NEXT: addq $24, %rsp
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec4_float_align:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: subq $24, %rsp
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $16, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movaps (%rsp), %xmm0
+; CHECK0-NEXT: addq $24, %rsp
+; CHECK0-NEXT: retq
%ret = load atomic <4 x float>, ptr %x acquire, align 16
ret <4 x float> %ret
}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
761d4d9
to
6506acb
Compare
241b7c7
to
1a9eae9
Compare
6506acb
to
db674f8
Compare
bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); | ||
const bool IsAtomic = | ||
isa<LoadInst>(I) ? cast<LoadInst>(I)->isAtomic() : false; | ||
const bool UseSizedLibcall = !(I->getType()->isVectorTy() && IsAtomic) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This shouldn't need to consider whether it's atomic or the type. The sized call should work fine. Some casts may be necessary
db674f8
to
13ea377
Compare
61ec1ef
to
dc2032f
Compare
13ea377
to
e11194d
Compare
45e7035
to
dd2c034
Compare
751b5eb
to
20bbd6e
Compare
0cb57c2
to
ba2a301
Compare
; | ||
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16 | ||
ret <2 x ptr> %ret | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check addrspace(270) vector. Also FP and int cases?
llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
Outdated
Show resolved
Hide resolved
ba2a301
to
f129a3c
Compare
d600ac3
to
bdf9a55
Compare
bdf9a55
to
2b5d4c0
Compare
auto PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType()); | ||
auto VTy = dyn_cast<VectorType>(I->getType()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType()); | |
auto VTy = dyn_cast<VectorType>(I->getType()); | |
auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType()); | |
auto *VTy = dyn_cast<VectorType>(I->getType()); |
Value *BC = Builder.CreateBitCast( | ||
Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); | ||
V = Builder.CreateIntToPtr( | ||
BC, VTy->getWithNewType(PointerType::get(Ctx, AS))); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BC, VTy->getWithNewType(PointerType::get(Ctx, AS))); | |
BC, I->getType()); |
2b5d4c0
to
7d44d05
Compare
18fe5c7
to
7a786f2
Compare
72fd8a6
to
8e1beae
Compare
0890009
to
c5cfc13
Compare
8e1beae
to
66207cc
Compare
c5cfc13
to
b50f678
Compare
Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); | ||
V = Builder.CreateIntToPtr(BC, I->getType()); | ||
} else | ||
V = Builder.CreateBitOrPointerCast(Result, I->getType()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
V = Builder.CreateBitOrPointerCast(Result, I->getType()); | |
V = Builder.CreateBitOrPointerCast(Result, VTy); |
unsigned AS = PtrTy->getAddressSpace(); | ||
Value *BC = Builder.CreateBitCast( | ||
Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); | ||
V = Builder.CreateIntToPtr(BC, I->getType()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
V = Builder.CreateIntToPtr(BC, I->getType()); | |
V = Builder.CreateIntToPtr(BC, VTy); |
%ret = load atomic <2 x i16>, ptr %x acquire, align 8 | ||
ret <2 x i16> %ret | ||
} | ||
|
||
define <2 x half> @atomic_vec2_half(ptr %x) nounwind { | ||
; CHECK-LABEL: define <2 x half> @atomic_vec2_half( | ||
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] { | ||
; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x half>, ptr [[X]] acquire, align 8 | ||
; CHECK-NEXT: ret <2 x half> [[RET]] | ||
; | ||
%ret = load atomic <2 x half>, ptr %x acquire, align 8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The 2 x i16 and 2 x half cases aren't expanded, I guess the default expansion rule missed these for some reason?
auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType()); | ||
auto *VTy = dyn_cast<VectorType>(I->getType()); | ||
if (VTy && PtrTy && !Result->getType()->isVectorTy()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There should probably be a utility for this somewhere
66207cc
to
f2e8e75
Compare
b50f678
to
479a227
Compare
f2e8e75
to
fb247c3
Compare
479a227
to
6f96cf8
Compare
fb247c3
to
88e3dac
Compare
053d34b
to
3a1f677
Compare
88e3dac
to
db72700
Compare
3a1f677
to
bf8fc80
Compare
db72700
to
ce64f04
Compare
bf8fc80
to
9fe563b
Compare
ce64f04
to
e1eaeb6
Compare
AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af
9fe563b
to
684a542
Compare
e1eaeb6
to
717ea64
Compare
AtomicExpand fails for aligned
load atomic <n x T>
because itdoes not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.
Stack: