Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

[LLVM][SelectionDAG] Simplify SplitVecOp_VSETCC. #139295

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions 13 llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4341,11 +4341,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
GetSplitVector(N->getOperand(isStrict ? 1 : 0), Lo0, Hi0);
GetSplitVector(N->getOperand(isStrict ? 2 : 1), Lo1, Hi1);

auto PartEltCnt = Lo0.getValueType().getVectorElementCount();

LLVMContext &Context = *DAG.getContext();
EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt);
EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2);
EVT VT = N->getValueType(0);
EVT PartResVT = Lo0.getValueType().changeElementType(VT.getScalarType());

if (Opc == ISD::SETCC) {
LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
Expand All @@ -4369,12 +4366,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
HiRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Hi0, Hi1,
N->getOperand(2), MaskHi, EVLHi);
}
SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);

EVT OpVT = N->getOperand(0).getValueType();
ISD::NodeType ExtendCode =
TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoRes, HiRes);
}


Expand Down
28 changes: 0 additions & 28 deletions 28 llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -890,8 +890,6 @@ define <8 x i1> @test_fcmp_une(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp une <8 x bfloat> %a, %b
Expand All @@ -913,8 +911,6 @@ define <8 x i1> @test_fcmp_ueq(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp ueq <8 x bfloat> %a, %b
Expand All @@ -932,8 +928,6 @@ define <8 x i1> @test_fcmp_ugt(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: fcmge v0.4s, v1.4s, v0.4s
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp ugt <8 x bfloat> %a, %b
Expand All @@ -951,8 +945,6 @@ define <8 x i1> @test_fcmp_uge(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp uge <8 x bfloat> %a, %b
Expand All @@ -970,8 +962,6 @@ define <8 x i1> @test_fcmp_ult(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: fcmge v0.4s, v0.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp ult <8 x bfloat> %a, %b
Expand All @@ -989,8 +979,6 @@ define <8 x i1> @test_fcmp_ule(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: fcmgt v0.4s, v0.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp ule <8 x bfloat> %a, %b
Expand All @@ -1012,8 +1000,6 @@ define <8 x i1> @test_fcmp_uno(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: mvn v0.16b, v0.16b
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp uno <8 x bfloat> %a, %b
Expand All @@ -1034,8 +1020,6 @@ define <8 x i1> @test_fcmp_one(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: orr v1.16b, v2.16b, v4.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp one <8 x bfloat> %a, %b
Expand All @@ -1052,8 +1036,6 @@ define <8 x i1> @test_fcmp_oeq(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: fcmeq v2.4s, v3.4s, v2.4s
; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp oeq <8 x bfloat> %a, %b
Expand All @@ -1070,8 +1052,6 @@ define <8 x i1> @test_fcmp_ogt(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: fcmgt v2.4s, v3.4s, v2.4s
; CHECK-NEXT: fcmgt v0.4s, v0.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp ogt <8 x bfloat> %a, %b
Expand All @@ -1088,8 +1068,6 @@ define <8 x i1> @test_fcmp_oge(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: fcmge v2.4s, v3.4s, v2.4s
; CHECK-NEXT: fcmge v0.4s, v0.4s, v1.4s
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp oge <8 x bfloat> %a, %b
Expand All @@ -1106,8 +1084,6 @@ define <8 x i1> @test_fcmp_olt(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: fcmgt v2.4s, v3.4s, v2.4s
; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp olt <8 x bfloat> %a, %b
Expand All @@ -1124,8 +1100,6 @@ define <8 x i1> @test_fcmp_ole(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: fcmge v2.4s, v3.4s, v2.4s
; CHECK-NEXT: fcmge v0.4s, v1.4s, v0.4s
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp ole <8 x bfloat> %a, %b
Expand All @@ -1146,8 +1120,6 @@ define <8 x i1> @test_fcmp_ord(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
; CHECK-NEXT: orr v1.16b, v2.16b, v4.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT: shl v0.8h, v0.8h, #15
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: ret
%1 = fcmp ord <8 x bfloat> %a, %b
Expand Down
132 changes: 51 additions & 81 deletions 132 llvm/test/CodeGen/AArch64/fcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1145,8 +1145,6 @@ define <7 x half> @v7f16_half(<7 x half> %a, <7 x half> %b, <7 x half> %d, <7 x
; CHECK-SD-NOFP16-NEXT: fcmgt v4.4s, v5.4s, v4.4s
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v4.8h
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-SD-NOFP16-NEXT: ret
;
Expand Down Expand Up @@ -1275,8 +1273,6 @@ define <8 x half> @v8f16_half(<8 x half> %a, <8 x half> %b, <8 x half> %d, <8 x
; CHECK-SD-NOFP16-NEXT: fcmgt v4.4s, v5.4s, v4.4s
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v4.8h
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-SD-NOFP16-NEXT: ret
;
Expand Down Expand Up @@ -1328,10 +1324,6 @@ define <16 x half> @v16f16_half(<16 x half> %a, <16 x half> %b, <16 x half> %d,
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v2.4s, v0.4s
; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v16.8h
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v3.8h
; CHECK-SD-NOFP16-NEXT: shl v1.8h, v1.8h, #15
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
; CHECK-SD-NOFP16-NEXT: cmlt v1.8h, v1.8h, #0
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v5.16b, v7.16b
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v4.16b, v6.16b
; CHECK-SD-NOFP16-NEXT: ret
Expand Down Expand Up @@ -1384,45 +1376,41 @@ entry:
define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32> %e) {
; CHECK-SD-NOFP16-LABEL: v7f16_i32:
; CHECK-SD-NOFP16: // %bb.0: // %entry
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v0.8h
; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v1.8h
; CHECK-SD-NOFP16-NEXT: fmov s2, w0
; CHECK-SD-NOFP16-NEXT: fmov s4, w7
; CHECK-SD-NOFP16-NEXT: mov x8, sp
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
; CHECK-SD-NOFP16-NEXT: ldr s4, [sp, #24]
; CHECK-SD-NOFP16-NEXT: add x9, sp, #32
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[1], [x9]
; CHECK-SD-NOFP16-NEXT: add x9, sp, #16
; CHECK-SD-NOFP16-NEXT: fcmgt v2.4s, v3.4s, v2.4s
; CHECK-SD-NOFP16-NEXT: fmov s3, w4
; CHECK-SD-NOFP16-NEXT: fmov s5, w4
; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v0.4h
; CHECK-SD-NOFP16-NEXT: ldr s3, [sp, #24]
; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v1.4h
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-SD-NOFP16-NEXT: add x9, sp, #8
; CHECK-SD-NOFP16-NEXT: mov v2.s[1], w1
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[1], [x8]
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
; CHECK-SD-NOFP16-NEXT: mov v5.s[1], w5
; CHECK-SD-NOFP16-NEXT: add x8, sp, #32
; CHECK-SD-NOFP16-NEXT: ld1 { v3.s }[1], [x8]
; CHECK-SD-NOFP16-NEXT: add x8, sp, #16
; CHECK-SD-NOFP16-NEXT: fcmgt v6.4s, v7.4s, v6.4s
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[2], [x9]
; CHECK-SD-NOFP16-NEXT: add x9, sp, #40
; CHECK-SD-NOFP16-NEXT: mov v2.s[2], w2
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
; CHECK-SD-NOFP16-NEXT: fmov s1, w0
; CHECK-SD-NOFP16-NEXT: mov v3.s[1], w5
; CHECK-SD-NOFP16-NEXT: mov v1.s[1], w1
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-SD-NOFP16-NEXT: fmov s2, w7
; CHECK-SD-NOFP16-NEXT: mov v3.s[2], w6
; CHECK-SD-NOFP16-NEXT: ld1 { v2.s }[1], [x8]
; CHECK-SD-NOFP16-NEXT: mov v1.s[2], w2
; CHECK-SD-NOFP16-NEXT: add x8, sp, #8
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
; CHECK-SD-NOFP16-NEXT: ld1 { v2.s }[2], [x8]
; CHECK-SD-NOFP16-NEXT: add x8, sp, #40
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-SD-NOFP16-NEXT: mov v1.s[3], w3
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[2], [x8]
; CHECK-SD-NOFP16-NEXT: ld1 { v2.s }[3], [x9]
; CHECK-SD-NOFP16-NEXT: sshll v5.4s, v0.4h, #0
; CHECK-SD-NOFP16-NEXT: sshll2 v0.4s, v0.8h, #0
; CHECK-SD-NOFP16-NEXT: bif v1.16b, v2.16b, v5.16b
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v3.16b, v4.16b
; CHECK-SD-NOFP16-NEXT: mov v5.s[2], w6
; CHECK-SD-NOFP16-NEXT: ld1 { v3.s }[2], [x9]
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[3], [x8]
; CHECK-SD-NOFP16-NEXT: mov v1.16b, v6.16b
; CHECK-SD-NOFP16-NEXT: mov v2.s[3], w3
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v5.16b, v3.16b
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v2.16b, v4.16b
; CHECK-SD-NOFP16-NEXT: mov w5, v0.s[1]
; CHECK-SD-NOFP16-NEXT: mov w6, v0.s[2]
; CHECK-SD-NOFP16-NEXT: fmov w4, s0
; CHECK-SD-NOFP16-NEXT: mov w1, v1.s[1]
; CHECK-SD-NOFP16-NEXT: mov w2, v1.s[2]
; CHECK-SD-NOFP16-NEXT: mov w3, v1.s[3]
; CHECK-SD-NOFP16-NEXT: mov w5, v0.s[1]
; CHECK-SD-NOFP16-NEXT: mov w6, v0.s[2]
; CHECK-SD-NOFP16-NEXT: fmov w0, s1
; CHECK-SD-NOFP16-NEXT: fmov w4, s0
; CHECK-SD-NOFP16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: v7f16_i32:
Expand Down Expand Up @@ -1630,17 +1618,12 @@ entry:
define <8 x i32> @v8f16_i32(<8 x half> %a, <8 x half> %b, <8 x i32> %d, <8 x i32> %e) {
; CHECK-SD-NOFP16-LABEL: v8f16_i32:
; CHECK-SD-NOFP16: // %bb.0: // %entry
; CHECK-SD-NOFP16-NEXT: fcvtl2 v6.4s, v0.8h
; CHECK-SD-NOFP16-NEXT: fcvtl2 v7.4s, v1.8h
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v0.4h
; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v1.4h
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
; CHECK-SD-NOFP16-NEXT: fcmgt v6.4s, v7.4s, v6.4s
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v6.8h
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-SD-NOFP16-NEXT: sshll v6.4s, v0.4h, #0
; CHECK-SD-NOFP16-NEXT: sshll2 v0.4s, v0.8h, #0
; CHECK-SD-NOFP16-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v6.16b
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v3.16b, v5.16b
Expand Down Expand Up @@ -1694,37 +1677,24 @@ entry:
define <16 x i32> @v16f16_i32(<16 x half> %a, <16 x half> %b, <16 x i32> %d, <16 x i32> %e) {
; CHECK-SD-NOFP16-LABEL: v16f16_i32:
; CHECK-SD-NOFP16: // %bb.0: // %entry
; CHECK-SD-NOFP16-NEXT: fcvtl2 v17.4s, v0.8h
; CHECK-SD-NOFP16-NEXT: fcvtl2 v18.4s, v2.8h
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v2.4h
; CHECK-SD-NOFP16-NEXT: fcvtl2 v16.4s, v1.8h
; CHECK-SD-NOFP16-NEXT: fcvtl2 v19.4s, v3.8h
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v3.4h
; CHECK-SD-NOFP16-NEXT: fcmgt v17.4s, v18.4s, v17.4s
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v2.4s, v0.4s
; CHECK-SD-NOFP16-NEXT: fcmgt v2.4s, v19.4s, v16.4s
; CHECK-SD-NOFP16-NEXT: fcmgt v1.4s, v3.4s, v1.4s
; CHECK-SD-NOFP16-NEXT: ldp q18, q19, [sp, #32]
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v17.8h
; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v2.8h
; CHECK-SD-NOFP16-NEXT: ldp q2, q20, [sp]
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
; CHECK-SD-NOFP16-NEXT: shl v1.8h, v1.8h, #15
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
; CHECK-SD-NOFP16-NEXT: cmlt v1.8h, v1.8h, #0
; CHECK-SD-NOFP16-NEXT: sshll v3.4s, v0.4h, #0
; CHECK-SD-NOFP16-NEXT: sshll v16.4s, v1.4h, #0
; CHECK-SD-NOFP16-NEXT: sshll2 v17.4s, v1.8h, #0
; CHECK-SD-NOFP16-NEXT: sshll2 v1.4s, v0.8h, #0
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
; CHECK-SD-NOFP16-NEXT: mov v3.16b, v17.16b
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v5.16b, v20.16b
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v4.16b, v2.16b
; CHECK-SD-NOFP16-NEXT: mov v2.16b, v16.16b
; CHECK-SD-NOFP16-NEXT: bsl v3.16b, v7.16b, v19.16b
; CHECK-SD-NOFP16-NEXT: bsl v2.16b, v6.16b, v18.16b
; CHECK-SD-NOFP16-NEXT: fcvtl v16.4s, v1.4h
; CHECK-SD-NOFP16-NEXT: fcvtl v17.4s, v3.4h
; CHECK-SD-NOFP16-NEXT: fcvtl v18.4s, v0.4h
; CHECK-SD-NOFP16-NEXT: fcvtl v19.4s, v2.4h
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
; CHECK-SD-NOFP16-NEXT: fcmgt v16.4s, v17.4s, v16.4s
; CHECK-SD-NOFP16-NEXT: fcmgt v18.4s, v19.4s, v18.4s
; CHECK-SD-NOFP16-NEXT: fcmgt v3.4s, v3.4s, v1.4s
; CHECK-SD-NOFP16-NEXT: fcmgt v1.4s, v2.4s, v0.4s
; CHECK-SD-NOFP16-NEXT: ldp q0, q19, [sp]
; CHECK-SD-NOFP16-NEXT: ldp q2, q17, [sp, #32]
; CHECK-SD-NOFP16-NEXT: bit v0.16b, v4.16b, v18.16b
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v5.16b, v19.16b
; CHECK-SD-NOFP16-NEXT: bsl v3.16b, v7.16b, v17.16b
; CHECK-SD-NOFP16-NEXT: bit v2.16b, v6.16b, v16.16b
; CHECK-SD-NOFP16-NEXT: ret
;
; CHECK-SD-FP16-LABEL: v16f16_i32:
Expand Down
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.