Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 20bbd6e

Browse filesBrowse files
committed
[AtomicExpand] Add bitcasts when expanding load atomic vector
AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af
1 parent ba2a301 commit 20bbd6e
Copy full SHA for 20bbd6e

File tree

4 files changed

+103
-3
lines changed
Filter options

4 files changed

+103
-3
lines changed

‎llvm/lib/CodeGen/AtomicExpandPass.cpp

Copy file name to clipboardExpand all lines: llvm/lib/CodeGen/AtomicExpandPass.cpp
+22-3Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,9 +2060,28 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
20602060
I->replaceAllUsesWith(V);
20612061
} else if (HasResult) {
20622062
Value *V;
2063-
if (UseSizedLibcall)
2064-
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2065-
else {
2063+
if (UseSizedLibcall) {
2064+
// Add bitcasts from Result's T scalar type to I's <2 x T/2> vector type
2065+
if (I->getType()->getScalarType()->isIntOrPtrTy() &&
2066+
I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) {
2067+
TypeSize Size = Result->getType()->getPrimitiveSizeInBits();
2068+
assert((unsigned)Size % 2 == 0);
2069+
unsigned HalfSize = (unsigned)Size / 2;
2070+
Value *Lo =
2071+
Builder.CreateTrunc(Result, IntegerType::get(Ctx, HalfSize));
2072+
Value *RS = Builder.CreateLShr(
2073+
Result, ConstantInt::get(IntegerType::get(Ctx, Size), HalfSize));
2074+
Value *Hi = Builder.CreateTrunc(RS, IntegerType::get(Ctx, HalfSize));
2075+
Value *Vec = Builder.CreateInsertElement(
2076+
VectorType::get(IntegerType::get(Ctx, HalfSize),
2077+
cast<VectorType>(I->getType())->getElementCount()),
2078+
Lo, ConstantInt::get(IntegerType::get(Ctx, 32), 0));
2079+
Vec = Builder.CreateInsertElement(
2080+
Vec, Hi, ConstantInt::get(IntegerType::get(Ctx, 32), 1));
2081+
V = Builder.CreateBitOrPointerCast(Vec, I->getType());
2082+
} else
2083+
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2084+
} else {
20662085
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
20672086
AllocaAlignment);
20682087
Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);

‎llvm/test/CodeGen/ARM/atomic-load-store.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/ARM/atomic-load-store.ll
+52Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,3 +983,55 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
983983
store atomic double %val1, ptr %ptr seq_cst, align 8
984984
ret void
985985
}
986+
987+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
988+
; ARM-LABEL: atomic_vec1_ptr:
989+
; ARM: @ %bb.0:
990+
; ARM-NEXT: ldr r0, [r0]
991+
; ARM-NEXT: dmb ish
992+
; ARM-NEXT: bx lr
993+
;
994+
; ARMOPTNONE-LABEL: atomic_vec1_ptr:
995+
; ARMOPTNONE: @ %bb.0:
996+
; ARMOPTNONE-NEXT: ldr r0, [r0]
997+
; ARMOPTNONE-NEXT: dmb ish
998+
; ARMOPTNONE-NEXT: bx lr
999+
;
1000+
; THUMBTWO-LABEL: atomic_vec1_ptr:
1001+
; THUMBTWO: @ %bb.0:
1002+
; THUMBTWO-NEXT: ldr r0, [r0]
1003+
; THUMBTWO-NEXT: dmb ish
1004+
; THUMBTWO-NEXT: bx lr
1005+
;
1006+
; THUMBONE-LABEL: atomic_vec1_ptr:
1007+
; THUMBONE: @ %bb.0:
1008+
; THUMBONE-NEXT: push {r7, lr}
1009+
; THUMBONE-NEXT: movs r1, #0
1010+
; THUMBONE-NEXT: mov r2, r1
1011+
; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
1012+
; THUMBONE-NEXT: pop {r7, pc}
1013+
;
1014+
; ARMV4-LABEL: atomic_vec1_ptr:
1015+
; ARMV4: @ %bb.0:
1016+
; ARMV4-NEXT: push {r11, lr}
1017+
; ARMV4-NEXT: mov r1, #2
1018+
; ARMV4-NEXT: bl __atomic_load_4
1019+
; ARMV4-NEXT: mov r0, #0
1020+
; ARMV4-NEXT: pop {r11, lr}
1021+
; ARMV4-NEXT: mov pc, lr
1022+
;
1023+
; ARMV6-LABEL: atomic_vec1_ptr:
1024+
; ARMV6: @ %bb.0:
1025+
; ARMV6-NEXT: mov r1, #0
1026+
; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
1027+
; ARMV6-NEXT: ldr r0, [r0]
1028+
; ARMV6-NEXT: bx lr
1029+
;
1030+
; THUMBM-LABEL: atomic_vec1_ptr:
1031+
; THUMBM: @ %bb.0:
1032+
; THUMBM-NEXT: ldr r0, [r0]
1033+
; THUMBM-NEXT: dmb sy
1034+
; THUMBM-NEXT: bx lr
1035+
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
1036+
ret <1 x ptr> %ret
1037+
}

‎llvm/test/CodeGen/X86/atomic-load-store.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/X86/atomic-load-store.ll
+15Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
362362
ret <2 x i32> %ret
363363
}
364364

365+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
366+
; CHECK-LABEL: atomic_vec2_ptr_align:
367+
; CHECK: ## %bb.0:
368+
; CHECK-NEXT: pushq %rax
369+
; CHECK-NEXT: movl $2, %esi
370+
; CHECK-NEXT: callq ___atomic_load_16
371+
; CHECK-NEXT: movq %rdx, %xmm1
372+
; CHECK-NEXT: movq %rax, %xmm0
373+
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
374+
; CHECK-NEXT: popq %rax
375+
; CHECK-NEXT: retq
376+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
377+
ret <2 x ptr> %ret
378+
}
379+
365380
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
366381
; CHECK3-LABEL: atomic_vec4_i8:
367382
; CHECK3: ## %bb.0:

‎llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll

Copy file name to clipboardExpand all lines: llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
+14Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,17 @@ define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr,
151151
ret void
152152
}
153153

154+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
155+
; CHECK-LABEL: @atomic_vec2_ptr_align(
156+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X:%.*]], i32 2)
157+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64
158+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i128 [[TMP1]], 64
159+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
160+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
161+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP4]], i32 1
162+
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr>
163+
; CHECK-NEXT: ret <2 x ptr> [[TMP7]]
164+
;
165+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
166+
ret <2 x ptr> %ret
167+
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.