Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 33a5ca1

Browse filesBrowse files
committed
[SelectionDAG][X86] Remove unused elements from atomic vector.
After splitting, all elements are created. The elements are placed back into a concat_vectors. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that its concat_vectors can be mapped to a BUILD_VECTOR and so unused elements are no longer referenced. commit-id:b83937a8
1 parent 2fb47b0 commit 33a5ca1
Copy full SHA for 33a5ca1

File tree

Expand file treeCollapse file tree

6 files changed

+69
-187
lines changed
Filter options
Expand file treeCollapse file tree

6 files changed

+69
-187
lines changed

‎llvm/include/llvm/CodeGen/SelectionDAG.h

Copy file name to clipboardExpand all lines: llvm/include/llvm/CodeGen/SelectionDAG.h
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1843,7 +1843,7 @@ class SelectionDAG {
18431843
/// chain to the token factor. This ensures that the new memory node will have
18441844
/// the same relative memory dependency position as the old load. Returns the
18451845
/// new merged load chain.
1846-
SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
1846+
SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
18471847

18481848
/// Topological-sort the AllNodes list and a
18491849
/// assign a unique node id for each node in the DAG based on their
@@ -2281,7 +2281,7 @@ class SelectionDAG {
22812281
/// merged. Check that both are nonvolatile and if LD is loading
22822282
/// 'Bytes' bytes from a location that is 'Dist' units away from the
22832283
/// location that the 'Base' load is loading from.
2284-
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
2284+
bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
22852285
unsigned Bytes, int Dist) const;
22862286

22872287
/// Infer alignment of a load / store address. Return std::nullopt if it

‎llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Copy file name to clipboardExpand all lines: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+12-8Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12213,7 +12213,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
1221312213
return TokenFactor;
1221412214
}
1221512215

12216-
SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
12216+
SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
1221712217
SDValue NewMemOp) {
1221812218
assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
1221912219
SDValue OldChain = SDValue(OldLoad, 1);
@@ -12906,17 +12906,21 @@ std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp(
1290612906
getBuildVector(NewOvVT, dl, OvScalars));
1290712907
}
1290812908

12909-
bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
12910-
LoadSDNode *Base,
12909+
bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
12910+
MemSDNode *Base,
1291112911
unsigned Bytes,
1291212912
int Dist) const {
1291312913
if (LD->isVolatile() || Base->isVolatile())
1291412914
return false;
12915-
// TODO: probably too restrictive for atomics, revisit
12916-
if (!LD->isSimple())
12917-
return false;
12918-
if (LD->isIndexed() || Base->isIndexed())
12919-
return false;
12915+
if (auto Ld = dyn_cast<LoadSDNode>(LD)) {
12916+
if (!Ld->isSimple())
12917+
return false;
12918+
if (Ld->isIndexed())
12919+
return false;
12920+
}
12921+
if (auto Ld = dyn_cast<LoadSDNode>(Base))
12922+
if (Ld->isIndexed())
12923+
return false;
1292012924
if (LD->getChain() != Base->getChain())
1292112925
return false;
1292212926
EVT VT = LD->getMemoryVT();

‎llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp

Copy file name to clipboardExpand all lines: llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+17-13Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize,
195195
}
196196

197197
/// Parses tree in Ptr for base, index, offset addresses.
198-
static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
199-
const SelectionDAG &DAG) {
198+
template <typename T>
199+
static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
200200
SDValue Ptr = N->getBasePtr();
201201

202202
// (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
206206
bool IsIndexSignExt = false;
207207

208208
// pre-inc/pre-dec ops are components of EA.
209-
if (N->getAddressingMode() == ISD::PRE_INC) {
210-
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
211-
Offset += C->getSExtValue();
212-
else // If unknown, give up now.
213-
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
214-
} else if (N->getAddressingMode() == ISD::PRE_DEC) {
215-
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
216-
Offset -= C->getSExtValue();
217-
else // If unknown, give up now.
218-
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
209+
if constexpr (std::is_same_v<T, LSBaseSDNode>) {
210+
if (N->getAddressingMode() == ISD::PRE_INC) {
211+
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
212+
Offset += C->getSExtValue();
213+
else // If unknown, give up now.
214+
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
215+
} else if (N->getAddressingMode() == ISD::PRE_DEC) {
216+
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
217+
Offset -= C->getSExtValue();
218+
else // If unknown, give up now.
219+
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
220+
}
219221
}
220222

221223
// Consume constant adds & ors with appropriate masking.
@@ -300,8 +302,10 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
300302

301303
BaseIndexOffset BaseIndexOffset::match(const SDNode *N,
302304
const SelectionDAG &DAG) {
305+
if (const auto *AN = dyn_cast<AtomicSDNode>(N))
306+
return matchSDNode(AN, DAG);
303307
if (const auto *LS0 = dyn_cast<LSBaseSDNode>(N))
304-
return matchLSNode(LS0, DAG);
308+
return matchSDNode(LS0, DAG);
305309
if (const auto *LN = dyn_cast<LifetimeSDNode>(N)) {
306310
if (LN->hasOffset())
307311
return BaseIndexOffset(LN->getOperand(1), SDValue(), LN->getOffset(),

‎llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Copy file name to clipboardExpand all lines: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+5-1Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5172,7 +5172,11 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
51725172
L = DAG.getPtrExtOrTrunc(L, dl, VT);
51735173

51745174
setValue(&I, L);
5175-
DAG.setRoot(OutChain);
5175+
5176+
if (VT.isVector())
5177+
DAG.setRoot(InChain);
5178+
else
5179+
DAG.setRoot(OutChain);
51765180
}
51775181

51785182
void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {

‎llvm/lib/Target/X86/X86ISelLowering.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86ISelLowering.cpp
+17-12Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7191,15 +7191,19 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
71917191
}
71927192

71937193
// Recurse to find a LoadSDNode source and the accumulated ByteOffest.
7194-
static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
7195-
if (ISD::isNON_EXTLoad(Elt.getNode())) {
7196-
auto *BaseLd = cast<LoadSDNode>(Elt);
7197-
if (!BaseLd->isSimple())
7198-
return false;
7194+
static bool findEltLoadSrc(SDValue Elt, MemSDNode *&Ld, int64_t &ByteOffset) {
7195+
if (auto *BaseLd = dyn_cast<AtomicSDNode>(Elt)) {
71997196
Ld = BaseLd;
72007197
ByteOffset = 0;
72017198
return true;
7202-
}
7199+
} else if (auto *BaseLd = dyn_cast<LoadSDNode>(Elt))
7200+
if (ISD::isNON_EXTLoad(Elt.getNode())) {
7201+
if (!BaseLd->isSimple())
7202+
return false;
7203+
Ld = BaseLd;
7204+
ByteOffset = 0;
7205+
return true;
7206+
}
72037207

72047208
switch (Elt.getOpcode()) {
72057209
case ISD::BITCAST:
@@ -7252,7 +7256,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
72527256
APInt ZeroMask = APInt::getZero(NumElems);
72537257
APInt UndefMask = APInt::getZero(NumElems);
72547258

7255-
SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
7259+
SmallVector<MemSDNode *, 8> Loads(NumElems, nullptr);
72567260
SmallVector<int64_t, 8> ByteOffsets(NumElems, 0);
72577261

72587262
// For each element in the initializer, see if we've found a load, zero or an
@@ -7302,7 +7306,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
73027306
EVT EltBaseVT = EltBase.getValueType();
73037307
assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() &&
73047308
"Register/Memory size mismatch");
7305-
LoadSDNode *LDBase = Loads[FirstLoadedElt];
7309+
MemSDNode *LDBase = Loads[FirstLoadedElt];
73067310
assert(LDBase && "Did not find base load for merging consecutive loads");
73077311
unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits();
73087312
unsigned BaseSizeInBytes = BaseSizeInBits / 8;
@@ -7316,8 +7320,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
73167320

73177321
// Check to see if the element's load is consecutive to the base load
73187322
// or offset from a previous (already checked) load.
7319-
auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) {
7320-
LoadSDNode *Ld = Loads[EltIdx];
7323+
auto CheckConsecutiveLoad = [&](MemSDNode *Base, int EltIdx) {
7324+
MemSDNode *Ld = Loads[EltIdx];
73217325
int64_t ByteOffset = ByteOffsets[EltIdx];
73227326
if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) {
73237327
int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes);
@@ -7345,7 +7349,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
73457349
}
73467350
}
73477351

7348-
auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
7352+
auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, MemSDNode *LDBase) {
73497353
auto MMOFlags = LDBase->getMemOperand()->getFlags();
73507354
assert(LDBase->isSimple() &&
73517355
"Cannot merge volatile or atomic loads.");
@@ -9402,8 +9406,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
94029406
{
94039407
SmallVector<SDValue, 64> Ops(Op->ops().take_front(NumElems));
94049408
if (SDValue LD =
9405-
EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false))
9409+
EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) {
94069410
return LD;
9411+
}
94079412
}
94089413

94099414
// If this is a splat of pairs of 32-bit elements, we can use a narrower

‎llvm/test/CodeGen/X86/atomic-load-store.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/X86/atomic-load-store.ll
+16-151Lines changed: 16 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -205,71 +205,19 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
205205
}
206206

207207
define <2 x half> @atomic_vec2_half(ptr %x) {
208-
; CHECK3-LABEL: atomic_vec2_half:
209-
; CHECK3: ## %bb.0:
210-
; CHECK3-NEXT: movl (%rdi), %eax
211-
; CHECK3-NEXT: movd %eax, %xmm1
212-
; CHECK3-NEXT: shrl $16, %eax
213-
; CHECK3-NEXT: pinsrw $0, %eax, %xmm2
214-
; CHECK3-NEXT: movdqa {{.*#+}} xmm0 = [65535,0,65535,65535,65535,65535,65535,65535]
215-
; CHECK3-NEXT: pand %xmm0, %xmm1
216-
; CHECK3-NEXT: pslld $16, %xmm2
217-
; CHECK3-NEXT: pandn %xmm2, %xmm0
218-
; CHECK3-NEXT: por %xmm1, %xmm0
219-
; CHECK3-NEXT: retq
220-
;
221-
; CHECK0-LABEL: atomic_vec2_half:
222-
; CHECK0: ## %bb.0:
223-
; CHECK0-NEXT: movl (%rdi), %eax
224-
; CHECK0-NEXT: movl %eax, %ecx
225-
; CHECK0-NEXT: shrl $16, %ecx
226-
; CHECK0-NEXT: movw %cx, %dx
227-
; CHECK0-NEXT: ## implicit-def: $ecx
228-
; CHECK0-NEXT: movw %dx, %cx
229-
; CHECK0-NEXT: ## implicit-def: $xmm2
230-
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
231-
; CHECK0-NEXT: movd %eax, %xmm0
232-
; CHECK0-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,65535,65535,65535,65535,65535]
233-
; CHECK0-NEXT: pand %xmm1, %xmm0
234-
; CHECK0-NEXT: pslld $16, %xmm2
235-
; CHECK0-NEXT: pandn %xmm2, %xmm1
236-
; CHECK0-NEXT: por %xmm1, %xmm0
237-
; CHECK0-NEXT: retq
208+
; CHECK-LABEL: atomic_vec2_half:
209+
; CHECK: ## %bb.0:
210+
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
211+
; CHECK-NEXT: retq
238212
%ret = load atomic <2 x half>, ptr %x acquire, align 4
239213
ret <2 x half> %ret
240214
}
241215

242216
define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) {
243-
; CHECK3-LABEL: atomic_vec2_bfloat:
244-
; CHECK3: ## %bb.0:
245-
; CHECK3-NEXT: movl (%rdi), %eax
246-
; CHECK3-NEXT: movd %eax, %xmm1
247-
; CHECK3-NEXT: shrl $16, %eax
248-
; CHECK3-NEXT: movdqa {{.*#+}} xmm0 = [65535,0,65535,65535,65535,65535,65535,65535]
249-
; CHECK3-NEXT: pand %xmm0, %xmm1
250-
; CHECK3-NEXT: pinsrw $0, %eax, %xmm2
251-
; CHECK3-NEXT: pslld $16, %xmm2
252-
; CHECK3-NEXT: pandn %xmm2, %xmm0
253-
; CHECK3-NEXT: por %xmm1, %xmm0
254-
; CHECK3-NEXT: retq
255-
;
256-
; CHECK0-LABEL: atomic_vec2_bfloat:
257-
; CHECK0: ## %bb.0:
258-
; CHECK0-NEXT: movl (%rdi), %eax
259-
; CHECK0-NEXT: movl %eax, %ecx
260-
; CHECK0-NEXT: shrl $16, %ecx
261-
; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
262-
; CHECK0-NEXT: movd %eax, %xmm0
263-
; CHECK0-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,65535,65535,65535,65535,65535]
264-
; CHECK0-NEXT: pand %xmm1, %xmm0
265-
; CHECK0-NEXT: ## implicit-def: $eax
266-
; CHECK0-NEXT: movw %cx, %ax
267-
; CHECK0-NEXT: ## implicit-def: $xmm2
268-
; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
269-
; CHECK0-NEXT: pslld $16, %xmm2
270-
; CHECK0-NEXT: pandn %xmm2, %xmm1
271-
; CHECK0-NEXT: por %xmm1, %xmm0
272-
; CHECK0-NEXT: retq
217+
; CHECK-LABEL: atomic_vec2_bfloat:
218+
; CHECK: ## %bb.0:
219+
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220+
; CHECK-NEXT: retq
273221
%ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
274222
ret <2 x bfloat> %ret
275223
}
@@ -447,102 +395,19 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
447395
}
448396

449397
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
450-
; CHECK3-LABEL: atomic_vec4_half:
451-
; CHECK3: ## %bb.0:
452-
; CHECK3-NEXT: movq (%rdi), %rax
453-
; CHECK3-NEXT: movl %eax, %ecx
454-
; CHECK3-NEXT: shrl $16, %ecx
455-
; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
456-
; CHECK3-NEXT: movq %rax, %rcx
457-
; CHECK3-NEXT: shrq $32, %rcx
458-
; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
459-
; CHECK3-NEXT: movq %rax, %xmm0
460-
; CHECK3-NEXT: shrq $48, %rax
461-
; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
462-
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
463-
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
464-
; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
465-
; CHECK3-NEXT: retq
466-
;
467-
; CHECK0-LABEL: atomic_vec4_half:
468-
; CHECK0: ## %bb.0:
469-
; CHECK0-NEXT: movq (%rdi), %rax
470-
; CHECK0-NEXT: movl %eax, %ecx
471-
; CHECK0-NEXT: shrl $16, %ecx
472-
; CHECK0-NEXT: movw %cx, %dx
473-
; CHECK0-NEXT: ## implicit-def: $ecx
474-
; CHECK0-NEXT: movw %dx, %cx
475-
; CHECK0-NEXT: ## implicit-def: $xmm2
476-
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
477-
; CHECK0-NEXT: movq %rax, %rcx
478-
; CHECK0-NEXT: shrq $32, %rcx
479-
; CHECK0-NEXT: movw %cx, %dx
480-
; CHECK0-NEXT: ## implicit-def: $ecx
481-
; CHECK0-NEXT: movw %dx, %cx
482-
; CHECK0-NEXT: ## implicit-def: $xmm1
483-
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
484-
; CHECK0-NEXT: movq %rax, %rcx
485-
; CHECK0-NEXT: shrq $48, %rcx
486-
; CHECK0-NEXT: movw %cx, %dx
487-
; CHECK0-NEXT: ## implicit-def: $ecx
488-
; CHECK0-NEXT: movw %dx, %cx
489-
; CHECK0-NEXT: ## implicit-def: $xmm3
490-
; CHECK0-NEXT: pinsrw $0, %ecx, %xmm3
491-
; CHECK0-NEXT: movq %rax, %xmm0
492-
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
493-
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
494-
; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
495-
; CHECK0-NEXT: retq
398+
; CHECK-LABEL: atomic_vec4_half:
399+
; CHECK: ## %bb.0:
400+
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
401+
; CHECK-NEXT: retq
496402
%ret = load atomic <4 x half>, ptr %x acquire, align 8
497403
ret <4 x half> %ret
498404
}
499405

500406
define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind {
501-
; CHECK3-LABEL: atomic_vec4_bfloat:
502-
; CHECK3: ## %bb.0:
503-
; CHECK3-NEXT: movq (%rdi), %rax
504-
; CHECK3-NEXT: movq %rax, %xmm0
505-
; CHECK3-NEXT: movl %eax, %ecx
506-
; CHECK3-NEXT: shrl $16, %ecx
507-
; CHECK3-NEXT: movq %rax, %rdx
508-
; CHECK3-NEXT: shrq $32, %rdx
509-
; CHECK3-NEXT: shrq $48, %rax
510-
; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
511-
; CHECK3-NEXT: pinsrw $0, %edx, %xmm2
512-
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
513-
; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
514-
; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
515-
; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
516-
; CHECK3-NEXT: retq
517-
;
518-
; CHECK0-LABEL: atomic_vec4_bfloat:
519-
; CHECK0: ## %bb.0:
520-
; CHECK0-NEXT: movq (%rdi), %rax
521-
; CHECK0-NEXT: movq %rax, %xmm0
522-
; CHECK0-NEXT: movl %eax, %ecx
523-
; CHECK0-NEXT: shrl $16, %ecx
524-
; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
525-
; CHECK0-NEXT: movq %rax, %rdx
526-
; CHECK0-NEXT: shrq $32, %rdx
527-
; CHECK0-NEXT: ## kill: def $dx killed $dx killed $rdx
528-
; CHECK0-NEXT: shrq $48, %rax
529-
; CHECK0-NEXT: movw %ax, %si
530-
; CHECK0-NEXT: ## implicit-def: $eax
531-
; CHECK0-NEXT: movw %si, %ax
532-
; CHECK0-NEXT: ## implicit-def: $xmm2
533-
; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
534-
; CHECK0-NEXT: ## implicit-def: $eax
535-
; CHECK0-NEXT: movw %dx, %ax
536-
; CHECK0-NEXT: ## implicit-def: $xmm1
537-
; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
538-
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
539-
; CHECK0-NEXT: ## implicit-def: $eax
540-
; CHECK0-NEXT: movw %cx, %ax
541-
; CHECK0-NEXT: ## implicit-def: $xmm2
542-
; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
543-
; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
544-
; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
545-
; CHECK0-NEXT: retq
407+
; CHECK-LABEL: atomic_vec4_bfloat:
408+
; CHECK: ## %bb.0:
409+
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
410+
; CHECK-NEXT: retq
546411
%ret = load atomic <4 x bfloat>, ptr %x acquire, align 8
547412
ret <4 x bfloat> %ret
548413
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.