Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a96fdf1

Browse filesBrowse files
committed
[SelectionDAG][X86] Widen <2 x T> vector types for atomic load
Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it also adds Pats to remove an extra MOV. commit-id:2894ccd1
1 parent d528b89 commit a96fdf1
Copy full SHA for a96fdf1

File tree

5 files changed

+118
-7
lines changed
Filter options

5 files changed

+118
-7
lines changed

‎llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Copy file name to clipboardExpand all lines: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+3-2Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
10461046
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
10471047
SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
10481048
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
1049+
SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
10491050
SDValue WidenVecRes_LOAD(SDNode* N);
10501051
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
10511052
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
@@ -1129,8 +1130,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
11291130
/// resulting wider type. It takes:
11301131
/// LdChain: list of chains for the load to be generated.
11311132
/// Ld: load to widen
1132-
SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
1133-
LoadSDNode *LD);
1133+
SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, MemSDNode *LD,
1134+
bool IsAtomic = false);
11341135

11351136
/// Helper function to generate a set of extension loads to load a vector with
11361137
/// a resulting wider type. It takes:

‎llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Copy file name to clipboardExpand all lines: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+35-3Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4517,6 +4517,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
45174517
break;
45184518
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
45194519
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
4520+
case ISD::ATOMIC_LOAD:
4521+
Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
4522+
break;
45204523
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
45214524
case ISD::STEP_VECTOR:
45224525
case ISD::SPLAT_VECTOR:
@@ -5903,6 +5906,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
59035906
N->getOperand(1), N->getOperand(2));
59045907
}
59055908

5909+
SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
5910+
SmallVector<SDValue, 16> LdChain; // Chain for the series of load
5911+
SDValue Result = GenWidenVectorLoads(LdChain, N, /*IsAtomic=*/true);
5912+
5913+
if (Result) {
5914+
// If we generate a single load, we can use that for the chain. Otherwise,
5915+
// build a factor node to remember the multiple loads are independent and
5916+
// chain to that.
5917+
SDValue NewChain;
5918+
if (LdChain.size() == 1)
5919+
NewChain = LdChain[0];
5920+
else
5921+
NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain);
5922+
5923+
// Modified the chain - switch anything that used the old chain to use
5924+
// the new one.
5925+
ReplaceValueWith(SDValue(N, 1), NewChain);
5926+
5927+
return Result;
5928+
}
5929+
5930+
report_fatal_error("Unable to widen atomic vector load");
5931+
}
5932+
59065933
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
59075934
LoadSDNode *LD = cast<LoadSDNode>(N);
59085935
ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -7702,7 +7729,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
77027729
}
77037730

77047731
SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
7705-
LoadSDNode *LD) {
7732+
MemSDNode *LD, bool IsAtomic) {
77067733
// The strategy assumes that we can efficiently load power-of-two widths.
77077734
// The routine chops the vector into the largest vector loads with the same
77087735
// element type or scalar loads and then recombines it to the widen vector
@@ -7759,8 +7786,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
77597786
} while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
77607787
}
77617788

7762-
SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
7763-
LD->getOriginalAlign(), MMOFlags, AAInfo);
7789+
SDValue LdOp;
7790+
if (IsAtomic)
7791+
LdOp = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, *FirstVT, *FirstVT, Chain,
7792+
BasePtr, LD->getMemOperand());
7793+
else
7794+
LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
7795+
LD->getOriginalAlign(), MMOFlags, AAInfo);
77647796
LdChain.push_back(LdOp.getValue(1));
77657797

77667798
// Check if we can load the element with one instruction.

‎llvm/lib/Target/X86/X86InstrCompiler.td

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86InstrCompiler.td
+7Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,6 +1198,13 @@ def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>;
11981198
def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>;
11991199
def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>;
12001200

1201+
def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)))))),
1202+
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8>
1203+
def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
1204+
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16>
1205+
def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
1206+
(MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float>
1207+
12011208
// Floating point loads/stores.
12021209
def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
12031210
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;

‎llvm/test/CodeGen/X86/atomic-load-store.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/X86/atomic-load-store.ll
+72Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,55 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
146146
ret <1 x i64> %ret
147147
}
148148

149+
define <2 x i8> @atomic_vec2_i8(ptr %x) {
150+
; CHECK3-LABEL: atomic_vec2_i8:
151+
; CHECK3: ## %bb.0:
152+
; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
153+
; CHECK3-NEXT: retq
154+
;
155+
; CHECK0-LABEL: atomic_vec2_i8:
156+
; CHECK0: ## %bb.0:
157+
; CHECK0-NEXT: movw (%rdi), %cx
158+
; CHECK0-NEXT: ## implicit-def: $eax
159+
; CHECK0-NEXT: movw %cx, %ax
160+
; CHECK0-NEXT: movd %eax, %xmm0
161+
; CHECK0-NEXT: retq
162+
%ret = load atomic <2 x i8>, ptr %x acquire, align 4
163+
ret <2 x i8> %ret
164+
}
165+
166+
define <2 x i16> @atomic_vec2_i16(ptr %x) {
167+
; CHECK3-LABEL: atomic_vec2_i16:
168+
; CHECK3: ## %bb.0:
169+
; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
170+
; CHECK3-NEXT: retq
171+
;
172+
; CHECK0-LABEL: atomic_vec2_i16:
173+
; CHECK0: ## %bb.0:
174+
; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
175+
; CHECK0-NEXT: retq
176+
%ret = load atomic <2 x i16>, ptr %x acquire, align 4
177+
ret <2 x i16> %ret
178+
}
179+
180+
define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
181+
; CHECK-LABEL: atomic_vec2_i32_align:
182+
; CHECK: ## %bb.0:
183+
; CHECK-NEXT: movq (%rdi), %xmm0
184+
; CHECK-NEXT: retq
185+
%ret = load atomic <2 x i32>, ptr %x acquire, align 8
186+
ret <2 x i32> %ret
187+
}
188+
189+
define <2 x float> @atomic_vec2_float_align(ptr %x) {
190+
; CHECK-LABEL: atomic_vec2_float_align:
191+
; CHECK: ## %bb.0:
192+
; CHECK-NEXT: movq (%rdi), %xmm0
193+
; CHECK-NEXT: retq
194+
%ret = load atomic <2 x float>, ptr %x acquire, align 8
195+
ret <2 x float> %ret
196+
}
197+
149198
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
150199
; CHECK3-LABEL: atomic_vec1_ptr:
151200
; CHECK3: ## %bb.0:
@@ -295,6 +344,29 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
295344
ret <2 x i32> %ret
296345
}
297346

347+
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
348+
; CHECK3-LABEL: atomic_vec4_i8:
349+
; CHECK3: ## %bb.0:
350+
; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
351+
; CHECK3-NEXT: retq
352+
;
353+
; CHECK0-LABEL: atomic_vec4_i8:
354+
; CHECK0: ## %bb.0:
355+
; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
356+
; CHECK0-NEXT: retq
357+
%ret = load atomic <4 x i8>, ptr %x acquire, align 4
358+
ret <4 x i8> %ret
359+
}
360+
361+
define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
362+
; CHECK-LABEL: atomic_vec4_i16:
363+
; CHECK: ## %bb.0:
364+
; CHECK-NEXT: movq (%rdi), %xmm0
365+
; CHECK-NEXT: retq
366+
%ret = load atomic <4 x i16>, ptr %x acquire, align 8
367+
ret <4 x i16> %ret
368+
}
369+
298370
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
299371
; CHECK-LABEL: atomic_vec4_float_align:
300372
; CHECK: ## %bb.0:

‎llvm/test/CodeGen/X86/atomic-unordered.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/X86/atomic-unordered.ll
+1-2Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2275,8 +2275,7 @@ define i64 @load_i16_anyext_i64(ptr %ptr) {
22752275
;
22762276
; CHECK-O3-LABEL: load_i16_anyext_i64:
22772277
; CHECK-O3: # %bb.0:
2278-
; CHECK-O3-NEXT: movzwl (%rdi), %eax
2279-
; CHECK-O3-NEXT: vmovd %eax, %xmm0
2278+
; CHECK-O3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
22802279
; CHECK-O3-NEXT: vmovq %xmm0, %rax
22812280
; CHECK-O3-NEXT: retq
22822281
%v = load atomic i16, ptr %ptr unordered, align 8

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.