Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e1eaeb6

Browse filesBrowse files
committed
[AtomicExpand] Add bitcasts when expanding load atomic vector
AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af
1 parent 9fe563b commit e1eaeb6
Copy full SHA for e1eaeb6

File tree

4 files changed

+158
-3
lines changed
Filter options

4 files changed

+158
-3
lines changed

‎llvm/lib/CodeGen/AtomicExpandPass.cpp

Copy file name to clipboardExpand all lines: llvm/lib/CodeGen/AtomicExpandPass.cpp
+12-3Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
20662066
I->replaceAllUsesWith(V);
20672067
} else if (HasResult) {
20682068
Value *V;
2069-
if (UseSizedLibcall)
2070-
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2071-
else {
2069+
if (UseSizedLibcall) {
2070+
// Add bitcasts from Result's scalar type to I's <n x ptr> vector type
2071+
auto *PtrTy = dyn_cast<PointerType>(I->getType()->getScalarType());
2072+
auto *VTy = dyn_cast<VectorType>(I->getType());
2073+
if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
2074+
unsigned AS = PtrTy->getAddressSpace();
2075+
Value *BC = Builder.CreateBitCast(
2076+
Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
2077+
V = Builder.CreateIntToPtr(BC, I->getType());
2078+
} else
2079+
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2080+
} else {
20722081
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
20732082
AllocaAlignment);
20742083
Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);

‎llvm/test/CodeGen/ARM/atomic-load-store.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/ARM/atomic-load-store.ll
+51Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
983983
store atomic double %val1, ptr %ptr seq_cst, align 8
984984
ret void
985985
}
986+
987+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
988+
; ARM-LABEL: atomic_vec1_ptr:
989+
; ARM: @ %bb.0:
990+
; ARM-NEXT: ldr r0, [r0]
991+
; ARM-NEXT: dmb ish
992+
; ARM-NEXT: bx lr
993+
;
994+
; ARMOPTNONE-LABEL: atomic_vec1_ptr:
995+
; ARMOPTNONE: @ %bb.0:
996+
; ARMOPTNONE-NEXT: ldr r0, [r0]
997+
; ARMOPTNONE-NEXT: dmb ish
998+
; ARMOPTNONE-NEXT: bx lr
999+
;
1000+
; THUMBTWO-LABEL: atomic_vec1_ptr:
1001+
; THUMBTWO: @ %bb.0:
1002+
; THUMBTWO-NEXT: ldr r0, [r0]
1003+
; THUMBTWO-NEXT: dmb ish
1004+
; THUMBTWO-NEXT: bx lr
1005+
;
1006+
; THUMBONE-LABEL: atomic_vec1_ptr:
1007+
; THUMBONE: @ %bb.0:
1008+
; THUMBONE-NEXT: push {r7, lr}
1009+
; THUMBONE-NEXT: movs r1, #0
1010+
; THUMBONE-NEXT: mov r2, r1
1011+
; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
1012+
; THUMBONE-NEXT: pop {r7, pc}
1013+
;
1014+
; ARMV4-LABEL: atomic_vec1_ptr:
1015+
; ARMV4: @ %bb.0:
1016+
; ARMV4-NEXT: push {r11, lr}
1017+
; ARMV4-NEXT: mov r1, #2
1018+
; ARMV4-NEXT: bl __atomic_load_4
1019+
; ARMV4-NEXT: pop {r11, lr}
1020+
; ARMV4-NEXT: mov pc, lr
1021+
;
1022+
; ARMV6-LABEL: atomic_vec1_ptr:
1023+
; ARMV6: @ %bb.0:
1024+
; ARMV6-NEXT: ldr r0, [r0]
1025+
; ARMV6-NEXT: mov r1, #0
1026+
; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
1027+
; ARMV6-NEXT: bx lr
1028+
;
1029+
; THUMBM-LABEL: atomic_vec1_ptr:
1030+
; THUMBM: @ %bb.0:
1031+
; THUMBM-NEXT: ldr r0, [r0]
1032+
; THUMBM-NEXT: dmb sy
1033+
; THUMBM-NEXT: bx lr
1034+
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
1035+
ret <1 x ptr> %ret
1036+
}

‎llvm/test/CodeGen/X86/atomic-load-store.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/X86/atomic-load-store.ll
+30Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
382382
ret <2 x i32> %ret
383383
}
384384

385+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
386+
; CHECK-LABEL: atomic_vec2_ptr_align:
387+
; CHECK: ## %bb.0:
388+
; CHECK-NEXT: pushq %rax
389+
; CHECK-NEXT: movl $2, %esi
390+
; CHECK-NEXT: callq ___atomic_load_16
391+
; CHECK-NEXT: movq %rdx, %xmm1
392+
; CHECK-NEXT: movq %rax, %xmm0
393+
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
394+
; CHECK-NEXT: popq %rax
395+
; CHECK-NEXT: retq
396+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
397+
ret <2 x ptr> %ret
398+
}
399+
385400
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
386401
; CHECK3-LABEL: atomic_vec4_i8:
387402
; CHECK3: ## %bb.0:
@@ -405,6 +420,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
405420
ret <4 x i16> %ret
406421
}
407422

423+
define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
424+
; CHECK-LABEL: atomic_vec4_ptr270:
425+
; CHECK: ## %bb.0:
426+
; CHECK-NEXT: pushq %rax
427+
; CHECK-NEXT: movl $2, %esi
428+
; CHECK-NEXT: callq ___atomic_load_16
429+
; CHECK-NEXT: movq %rdx, %xmm1
430+
; CHECK-NEXT: movq %rax, %xmm0
431+
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
432+
; CHECK-NEXT: popq %rax
433+
; CHECK-NEXT: retq
434+
%ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
435+
ret <4 x ptr addrspace(270)> %ret
436+
}
437+
408438
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
409439
; CHECK-LABEL: atomic_vec4_half:
410440
; CHECK: ## %bb.0:

‎llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll

Copy file name to clipboardExpand all lines: llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
+65Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,68 @@ define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr,
151151
ret void
152152
}
153153

154+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
155+
; CHECK-LABEL: define <2 x ptr> @atomic_vec2_ptr_align(
156+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] {
157+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
158+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>
159+
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr>
160+
; CHECK-NEXT: ret <2 x ptr> [[TMP7]]
161+
;
162+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
163+
ret <2 x ptr> %ret
164+
}
165+
166+
define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(ptr %x) nounwind {
167+
; CHECK-LABEL: define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(
168+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
169+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
170+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
171+
; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(270)>
172+
; CHECK-NEXT: ret <4 x ptr addrspace(270)> [[TMP3]]
173+
;
174+
%ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
175+
ret <4 x ptr addrspace(270)> %ret
176+
}
177+
178+
define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind {
179+
; CHECK-LABEL: define <2 x i16> @atomic_vec2_i16(
180+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
181+
; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x i16>, ptr [[X]] acquire, align 8
182+
; CHECK-NEXT: ret <2 x i16> [[RET]]
183+
;
184+
%ret = load atomic <2 x i16>, ptr %x acquire, align 8
185+
ret <2 x i16> %ret
186+
}
187+
188+
define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
189+
; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
190+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
191+
; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x half>, ptr [[X]] acquire, align 8
192+
; CHECK-NEXT: ret <2 x half> [[RET]]
193+
;
194+
%ret = load atomic <2 x half>, ptr %x acquire, align 8
195+
ret <2 x half> %ret
196+
}
197+
198+
define <4 x i32> @atomic_vec4_i32(ptr %x) nounwind {
199+
; CHECK-LABEL: define <4 x i32> @atomic_vec4_i32(
200+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
201+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
202+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
203+
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
204+
;
205+
%ret = load atomic <4 x i32>, ptr %x acquire, align 16
206+
ret <4 x i32> %ret
207+
}
208+
209+
define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
210+
; CHECK-LABEL: define <4 x float> @atomic_vec4_float(
211+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
212+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
213+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x float>
214+
; CHECK-NEXT: ret <4 x float> [[TMP2]]
215+
;
216+
%ret = load atomic <4 x float>, ptr %x acquire, align 16
217+
ret <4 x float> %ret
218+
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.