Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

[SelectionDAG] Deal with POISON for INSERT_VECTOR_ELT/INSERT_SUBVECTOR (part 3) #143105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: users/bjope/insertundef_3
Choose a base branch
Loading
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion 11 llvm/include/llvm/CodeGen/SelectionDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -953,8 +953,17 @@ class SelectionDAG {
}

/// Insert \p SubVec at the \p Idx element of \p Vec.
/// If \p SkipUndef is true and \p SubVec is UNDEF/POISON, then \p Vec is
/// returned.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec,
unsigned Idx) {
unsigned Idx, bool SkipUndef = false) {
// Skipping insert of UNDEF could result in POISON elements remaining in the
// resulting vector. The SkipUndef is useful in situations when getNode
// can't reason well enough about ignoring the insert, e.g. when having
// scalable vectors and the user of this method knows that the subvector
// being replaced isn't POISON.
if (SkipUndef && SubVec.isUndef())
return Vec;
return getNode(ISD::INSERT_SUBVECTOR, DL, Vec.getValueType(), Vec, SubVec,
getVectorIdxConstant(Idx, DL));
}
Expand Down
9 changes: 6 additions & 3 deletions 9 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15075,11 +15075,14 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,

if (PreferDUPAndInsert) {
// First, build a constant vector with the common element.
SmallVector<SDValue, 8> Ops(NumElts, Value);
// Make sure to freeze the common element first, since we will use it also
// for indices that should be UNDEF (so we want to avoid making those
// elements more poisonous).
SmallVector<SDValue, 8> Ops(NumElts, DAG.getFreeze(Value));
SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
// Next, insert the elements that do not match the common value.
for (unsigned I = 0; I < NumElts; ++I)
if (Op.getOperand(I) != Value)
if (Op.getOperand(I) != Value && !Op.getOperand(I).isUndef())
NewVector =
DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
Expand Down Expand Up @@ -28663,7 +28666,7 @@ static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
"Expected a fixed length vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getPOISON(VT), V, Zero);
}

// Shrink V so it's just big enough to maintain a VT's worth of data.
Expand Down
18 changes: 9 additions & 9 deletions 18 llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2845,7 +2845,7 @@ static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
assert(V.getValueType().isFixedLengthVector() &&
"Expected a fixed length vector operand!");
SDLoc DL(V);
return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
return DAG.getInsertSubvector(DL, DAG.getPOISON(VT), V, 0);
}

// Shrink V so it's just big enough to maintain a VT's worth of data.
Expand Down Expand Up @@ -4328,7 +4328,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx,
/*SkipUndef=*/true);
}
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}
Expand Down Expand Up @@ -7814,10 +7815,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SDValue Vec = DAG.getUNDEF(VT);
for (const auto &OpIdx : enumerate(Op->ops())) {
SDValue SubVec = OpIdx.value();
// Don't insert undef subvectors.
if (SubVec.isUndef())
continue;
Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts,
/*SkipUndef=*/true);
}
return Vec;
}
Expand Down Expand Up @@ -12204,9 +12203,10 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
// Reassemble the low and high pieces reversed.
// FIXME: This is a CONCAT_VECTORS.
SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0);
return DAG.getInsertSubvector(DL, Res, Lo,
LoVT.getVectorMinNumElements());
SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0,
/*SkipUndef=*/true);
return DAG.getInsertSubvector(DL, Res, Lo, LoVT.getVectorMinNumElements(),
/*SkipUndef=*/true);
}

// Just promote the int type to i16 which will double the LMUL.
Expand Down
81 changes: 6 additions & 75 deletions 81 llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@ define void @select_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: and z2.h, z2.h, #0x1
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
Expand All @@ -63,15 +59,8 @@ define void @select_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z2.h, z3.h
; VBITS_GE_256-NEXT: mov z4.h, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.h
; VBITS_GE_256-NEXT: mov z5.h, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1
; VBITS_GE_256-NEXT: and z5.h, z5.h, #0x1
; VBITS_GE_256-NEXT: cmpne p2.h, p1/z, z4.h, #0
; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z5.h, #0
; VBITS_GE_256-NEXT: sel z0.h, p2, z0.h, z1.h
; VBITS_GE_256-NEXT: sel z1.h, p1, z2.h, z3.h
; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z1.h
; VBITS_GE_256-NEXT: sel z1.h, p2, z2.h, z3.h
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
Expand All @@ -82,10 +71,6 @@ define void @select_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
; VBITS_GE_512-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
; VBITS_GE_512-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.h
; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1
; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0
; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
Expand All @@ -104,10 +89,6 @@ define void @select_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: and z2.h, z2.h, #0x1
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
Expand All @@ -126,10 +107,6 @@ define void @select_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: and z2.h, z2.h, #0x1
; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
Expand Down Expand Up @@ -173,10 +150,6 @@ define void @select_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
Expand All @@ -199,15 +172,8 @@ define void @select_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z2.s, z3.s
; VBITS_GE_256-NEXT: mov z4.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.s
; VBITS_GE_256-NEXT: mov z5.s, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1
; VBITS_GE_256-NEXT: and z5.s, z5.s, #0x1
; VBITS_GE_256-NEXT: cmpne p2.s, p1/z, z4.s, #0
; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z5.s, #0
; VBITS_GE_256-NEXT: sel z0.s, p2, z0.s, z1.s
; VBITS_GE_256-NEXT: sel z1.s, p1, z2.s, z3.s
; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_256-NEXT: sel z1.s, p2, z2.s, z3.s
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
Expand All @@ -218,10 +184,6 @@ define void @select_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; VBITS_GE_512-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.s
; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1
; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
Expand All @@ -240,10 +202,6 @@ define void @select_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
Expand All @@ -262,10 +220,6 @@ define void @select_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
Expand Down Expand Up @@ -310,10 +264,6 @@ define void @select_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
Expand All @@ -336,15 +286,8 @@ define void @select_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z2.d, z3.d
; VBITS_GE_256-NEXT: mov z4.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: ptrue p1.d
; VBITS_GE_256-NEXT: mov z5.d, p2/z, #-1 // =0xffffffffffffffff
; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1
; VBITS_GE_256-NEXT: and z5.d, z5.d, #0x1
; VBITS_GE_256-NEXT: cmpne p2.d, p1/z, z4.d, #0
; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z5.d, #0
; VBITS_GE_256-NEXT: sel z0.d, p2, z0.d, z1.d
; VBITS_GE_256-NEXT: sel z1.d, p1, z2.d, z3.d
; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_256-NEXT: sel z1.d, p2, z2.d, z3.d
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
Expand All @@ -355,10 +298,6 @@ define void @select_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
; VBITS_GE_512-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
; VBITS_GE_512-NEXT: ptrue p1.d
; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1
; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
Expand All @@ -377,10 +316,6 @@ define void @select_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
Expand All @@ -399,10 +334,6 @@ define void @select_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: and z2.d, z2.d, #0x1
; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
Expand Down
8 changes: 4 additions & 4 deletions 8 llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ define void @foo(ptr %a) #0 {
; CHECK: SelectionDAG has 13 nodes:
; CHECK-NEXT: t0: ch,glue = EntryToken
; CHECK-NEXT: t2: i64,ch = CopyFromReg t0, Register:i64 %0
; CHECK-NEXT: t21: nxv2i64,ch = LDR_ZXI<Mem:(volatile load (<vscale x 1 x s128>) from %ir.a, align 64)> t2, TargetConstant:i64<0>, t0
; CHECK-NEXT: t22: nxv2i64,ch = LDR_ZXI<Mem:(volatile load (<vscale x 1 x s128>) from %ir.a, align 64)> t2, TargetConstant:i64<0>, t0
; CHECK-NEXT: t8: i64 = ADDXri TargetFrameIndex:i64<1>, TargetConstant:i32<0>, TargetConstant:i32<0>
; CHECK-NEXT: t6: i64 = ADDXri TargetFrameIndex:i64<0>, TargetConstant:i32<0>, TargetConstant:i32<0>
; CHECK-NEXT: t22: ch = STR_ZXI<Mem:(volatile store (<vscale x 1 x s128>) into %ir.r0, align 64)> t21, t6, TargetConstant:i64<0>, t21:1
; CHECK-NEXT: t23: ch = STR_ZXI<Mem:(volatile store (<vscale x 1 x s128>) into %ir.r1, align 64)> t21, t8, TargetConstant:i64<0>, t22
; CHECK-NEXT: t10: ch = RET_ReallyLR t23
; CHECK-NEXT: t23: ch = STR_ZXI<Mem:(volatile store (<vscale x 1 x s128>) into %ir.r0, align 64)> t22, t6, TargetConstant:i64<0>, t22:1
; CHECK-NEXT: t24: ch = STR_ZXI<Mem:(volatile store (<vscale x 1 x s128>) into %ir.r1, align 64)> t22, t8, TargetConstant:i64<0>, t23
; CHECK-NEXT: t10: ch = RET_ReallyLR t24
; CHECK-EMPTY:
entry:
%r0 = alloca <8 x i64>
Expand Down
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.