Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e961155

Browse filesBrowse files
committed
[SelectionDAG] Widen <2 x T> vector types for atomic load
Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1
1 parent d09c5a1 commit e961155
Copy full SHA for e961155

File tree

Expand file treeCollapse file tree

3 files changed

+154
-21
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+154
-21
lines changed

‎llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Copy file name to clipboardExpand all lines: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
10621062
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
10631063
SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
10641064
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
1065+
SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
10651066
SDValue WidenVecRes_LOAD(SDNode* N);
10661067
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
10671068
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);

‎llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Copy file name to clipboardExpand all lines: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+75-21Lines changed: 75 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
46254625
break;
46264626
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
46274627
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
4628+
case ISD::ATOMIC_LOAD:
4629+
Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
4630+
break;
46284631
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
46294632
case ISD::STEP_VECTOR:
46304633
case ISD::SPLAT_VECTOR:
@@ -6014,6 +6017,76 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
60146017
N->getOperand(1), N->getOperand(2));
60156018
}
60166019

6020+
/// Either return the same load or provide appropriate casts
6021+
/// from the load and return that.
6022+
static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
6023+
TypeSize LdWidth, TypeSize FirstVTWidth,
6024+
SDLoc dl, SelectionDAG &DAG) {
6025+
assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
6026+
TypeSize WidenWidth = WidenVT.getSizeInBits();
6027+
if (!FirstVT.isVector()) {
6028+
unsigned NumElts =
6029+
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
6030+
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
6031+
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
6032+
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
6033+
} else if (FirstVT == WidenVT)
6034+
return LdOp;
6035+
// TODO: We don't currently have any tests that exercise this code path.
6036+
llvm_unreachable("Unimplemented");
6037+
}
6038+
6039+
static std::optional<EVT> findMemType(SelectionDAG &DAG,
6040+
const TargetLowering &TLI, unsigned Width,
6041+
EVT WidenVT, unsigned Align,
6042+
unsigned WidenEx);
6043+
6044+
SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
6045+
EVT WidenVT =
6046+
TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
6047+
EVT LdVT = LD->getMemoryVT();
6048+
SDLoc dl(LD);
6049+
assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors");
6050+
assert(LdVT.isScalableVector() == WidenVT.isScalableVector() &&
6051+
"Must be scalable");
6052+
assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() &&
6053+
"Expected equivalent element types");
6054+
6055+
// Load information
6056+
SDValue Chain = LD->getChain();
6057+
SDValue BasePtr = LD->getBasePtr();
6058+
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
6059+
AAMDNodes AAInfo = LD->getAAInfo();
6060+
6061+
TypeSize LdWidth = LdVT.getSizeInBits();
6062+
TypeSize WidenWidth = WidenVT.getSizeInBits();
6063+
TypeSize WidthDiff = WidenWidth - LdWidth;
6064+
6065+
// Find the vector type that can load from.
6066+
std::optional<EVT> FirstVT =
6067+
findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0,
6068+
WidthDiff.getKnownMinValue());
6069+
6070+
if (!FirstVT)
6071+
return SDValue();
6072+
6073+
SmallVector<EVT, 8> MemVTs;
6074+
TypeSize FirstVTWidth = FirstVT->getSizeInBits();
6075+
6076+
SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT,
6077+
Chain, BasePtr, LD->getMemOperand());
6078+
6079+
// Load the element with one instruction.
6080+
SDValue Result =
6081+
coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl,
6082+
DAG);
6083+
6084+
// Modified the chain - switch anything that used the old chain to use
6085+
// the new one.
6086+
ReplaceValueWith(SDValue(LD, 1), LdOp.getValue(1));
6087+
return Result;
6088+
}
6089+
60176090
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
60186091
LoadSDNode *LD = cast<LoadSDNode>(N);
60196092
ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -7897,27 +7970,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
78977970

78987971
// Check if we can load the element with one instruction.
78997972
if (MemVTs.empty()) {
7900-
assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
7901-
if (!FirstVT->isVector()) {
7902-
unsigned NumElts =
7903-
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
7904-
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
7905-
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
7906-
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
7907-
}
7908-
if (FirstVT == WidenVT)
7909-
return LdOp;
7910-
7911-
// TODO: We don't currently have any tests that exercise this code path.
7912-
assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
7913-
unsigned NumConcat =
7914-
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
7915-
SmallVector<SDValue, 16> ConcatOps(NumConcat);
7916-
SDValue UndefVal = DAG.getUNDEF(*FirstVT);
7917-
ConcatOps[0] = LdOp;
7918-
for (unsigned i = 1; i != NumConcat; ++i)
7919-
ConcatOps[i] = UndefVal;
7920-
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
7973+
return coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth,
7974+
dl, DAG);
79217975
}
79227976

79237977
// Load vector by using multiple loads from largest vector to scalar.

‎llvm/test/CodeGen/X86/atomic-load-store.ll

Copy file name to clipboardExpand all lines: llvm/test/CodeGen/X86/atomic-load-store.ll
+78Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,64 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
146146
ret <1 x i64> %ret
147147
}
148148

149+
define <2 x i8> @atomic_vec2_i8(ptr %x) {
150+
; CHECK3-LABEL: atomic_vec2_i8:
151+
; CHECK3: ## %bb.0:
152+
; CHECK3-NEXT: movzwl (%rdi), %eax
153+
; CHECK3-NEXT: movd %eax, %xmm0
154+
; CHECK3-NEXT: retq
155+
;
156+
; CHECK0-LABEL: atomic_vec2_i8:
157+
; CHECK0: ## %bb.0:
158+
; CHECK0-NEXT: movw (%rdi), %cx
159+
; CHECK0-NEXT: ## implicit-def: $eax
160+
; CHECK0-NEXT: movw %cx, %ax
161+
; CHECK0-NEXT: movd %eax, %xmm0
162+
; CHECK0-NEXT: retq
163+
%ret = load atomic <2 x i8>, ptr %x acquire, align 4
164+
ret <2 x i8> %ret
165+
}
166+
167+
define <2 x i16> @atomic_vec2_i16(ptr %x) {
168+
; CHECK-LABEL: atomic_vec2_i16:
169+
; CHECK: ## %bb.0:
170+
; CHECK-NEXT: movl (%rdi), %eax
171+
; CHECK-NEXT: movd %eax, %xmm0
172+
; CHECK-NEXT: retq
173+
%ret = load atomic <2 x i16>, ptr %x acquire, align 4
174+
ret <2 x i16> %ret
175+
}
176+
177+
define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
178+
; CHECK-LABEL: atomic_vec2_ptr270:
179+
; CHECK: ## %bb.0:
180+
; CHECK-NEXT: movq (%rdi), %rax
181+
; CHECK-NEXT: movq %rax, %xmm0
182+
; CHECK-NEXT: retq
183+
%ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8
184+
ret <2 x ptr addrspace(270)> %ret
185+
}
186+
187+
define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
188+
; CHECK-LABEL: atomic_vec2_i32_align:
189+
; CHECK: ## %bb.0:
190+
; CHECK-NEXT: movq (%rdi), %rax
191+
; CHECK-NEXT: movq %rax, %xmm0
192+
; CHECK-NEXT: retq
193+
%ret = load atomic <2 x i32>, ptr %x acquire, align 8
194+
ret <2 x i32> %ret
195+
}
196+
197+
define <2 x float> @atomic_vec2_float_align(ptr %x) {
198+
; CHECK-LABEL: atomic_vec2_float_align:
199+
; CHECK: ## %bb.0:
200+
; CHECK-NEXT: movq (%rdi), %rax
201+
; CHECK-NEXT: movq %rax, %xmm0
202+
; CHECK-NEXT: retq
203+
%ret = load atomic <2 x float>, ptr %x acquire, align 8
204+
ret <2 x float> %ret
205+
}
206+
149207
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
150208
; CHECK3-LABEL: atomic_vec1_ptr:
151209
; CHECK3: ## %bb.0:
@@ -295,6 +353,26 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
295353
ret <2 x i32> %ret
296354
}
297355

356+
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
357+
; CHECK-LABEL: atomic_vec4_i8:
358+
; CHECK: ## %bb.0:
359+
; CHECK-NEXT: movl (%rdi), %eax
360+
; CHECK-NEXT: movd %eax, %xmm0
361+
; CHECK-NEXT: retq
362+
%ret = load atomic <4 x i8>, ptr %x acquire, align 4
363+
ret <4 x i8> %ret
364+
}
365+
366+
define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
367+
; CHECK-LABEL: atomic_vec4_i16:
368+
; CHECK: ## %bb.0:
369+
; CHECK-NEXT: movq (%rdi), %rax
370+
; CHECK-NEXT: movq %rax, %xmm0
371+
; CHECK-NEXT: retq
372+
%ret = load atomic <4 x i16>, ptr %x acquire, align 8
373+
ret <4 x i16> %ret
374+
}
375+
298376
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
299377
; CHECK-LABEL: atomic_vec4_float_align:
300378
; CHECK: ## %bb.0:

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.