-
Notifications
You must be signed in to change notification settings - Fork 13.7k
DAG: Assert fcmp uno runtime calls are boolean values #142898
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAG: Assert fcmp uno runtime calls are boolean values #142898
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-arm Author: Matt Arsenault (arsenm) ChangesThis saves 2 instructions in the ARM soft float case for fcmp ueq. This code is written in an confusingly overly general way. The point Also stop overriding the default value for the unordered compare for ARM. Full diff: https://github.com/llvm/llvm-project/pull/142898.diff 8 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e8e820ac1f695..a3c4cb4ea0582 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -429,8 +429,20 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Update Chain.
Chain = Call.second;
} else {
+ assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
+ "unordered call should be simple boolean");
+
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
+ if (RetVT == SetCCVT &&
+ getBooleanContents(RetVT) == ZeroOrOneBooleanContent) {
+ // FIXME: Checking the type matches is a hack in case the calling
+ // convention lowering inserted some instructions after the
+ // CopyFromReg. Combines fail to look through the AssertZext.
+ NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
+ DAG.getValueType(MVT::i1));
+ }
+
SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
CCCode = getCmpLibcallCC(LC2);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index d156851d7e214..e97df268f13f9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -612,7 +612,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Single-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 4
@@ -629,7 +629,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
{ RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
// Floating-point to integer conversions.
// RTABI chapter 4.1.2, Table 6
diff --git a/llvm/test/CodeGen/ARM/fpcmp_ueq.ll b/llvm/test/CodeGen/ARM/fpcmp_ueq.ll
index 698c7506cc593..f77720fd935f7 100644
--- a/llvm/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/llvm/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -9,12 +9,13 @@ entry:
}
; CHECK-ARMv4-LABEL: f7:
-; CHECK-ARMv4-DAG: bl ___eqsf2
-; CHECK-ARMv4-DAG: bl ___unordsf2
-; CHECK-ARMv4: cmp r0, #0
-; CHECK-ARMv4: movne r0, #1
-; CHECK-ARMv4: orrs r0, r0,
-; CHECK-ARMv4: moveq r0, #42
+; CHECK-ARMv4: bl ___eqsf2
+; CHECK-ARMv4-NEXT: rsbs r1, r0, #0
+; CHECK-ARMv4-NEXT: adc r6, r0, r1
+
+; CHECK-ARMv4: bl ___unordsf2
+; CHECK-ARMv4-NEXT: orrs r0, r0, r6
+; CHECK-ARMv4-NEXT: mov r0, #154
; CHECK-ARMv7-LABEL: f7:
; CHECK-ARMv7: vcmp.f32
diff --git a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
index 949668f640dbd..a8a4554cc2f4b 100644
--- a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
+++ b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
@@ -471,15 +471,15 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp {
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: call __eqdf2
-; RV32I-NEXT: seqz s4, a0
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: mv a3, s0
-; RV32I-NEXT: call __unorddf2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: or a0, a0, s4
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: or a0, s4, a0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -1199,15 +1199,15 @@ define i32 @fcmps_ueq(double %a, double %b) nounwind strictfp {
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: call __eqdf2
-; RV32I-NEXT: seqz s4, a0
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: mv a3, s0
-; RV32I-NEXT: call __unorddf2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: or a0, a0, s4
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: or a0, s4, a0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/double-fcmp.ll b/llvm/test/CodeGen/RISCV/double-fcmp.ll
index 1e609f8081ebf..542ec6ede56b8 100644
--- a/llvm/test/CodeGen/RISCV/double-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/double-fcmp.ll
@@ -403,15 +403,15 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind {
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: mv s3, a0
-; RV32I-NEXT: call __eqdf2
-; RV32I-NEXT: seqz s4, a0
+; RV32I-NEXT: call __unorddf2
+; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: mv a2, s1
; RV32I-NEXT: mv a3, s0
-; RV32I-NEXT: call __unorddf2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: or a0, a0, s4
+; RV32I-NEXT: call __eqdf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: or a0, s4, a0
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
index 0cbfc96bf485e..f0551d3405ad3 100644
--- a/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
+++ b/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll
@@ -382,13 +382,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp {
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: call __eqsf2
-; RV32I-NEXT: seqz s2, a0
+; RV32I-NEXT: call __unordsf2
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: call __unordsf2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: or a0, a0, s2
+; RV32I-NEXT: call __eqsf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: or a0, s2, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -991,13 +991,13 @@ define i32 @fcmps_ueq(float %a, float %b) nounwind strictfp {
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: call __eqsf2
-; RV32I-NEXT: seqz s2, a0
+; RV32I-NEXT: call __unordsf2
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: call __unordsf2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: or a0, a0, s2
+; RV32I-NEXT: call __eqsf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: or a0, s2, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/float-fcmp.ll b/llvm/test/CodeGen/RISCV/float-fcmp.ll
index 265d553a3e5d9..393d76a31f1f6 100644
--- a/llvm/test/CodeGen/RISCV/float-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-fcmp.ll
@@ -344,13 +344,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind {
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
-; RV32I-NEXT: call __eqsf2
-; RV32I-NEXT: seqz s2, a0
+; RV32I-NEXT: call __unordsf2
+; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
-; RV32I-NEXT: call __unordsf2
-; RV32I-NEXT: snez a0, a0
-; RV32I-NEXT: or a0, a0, s2
+; RV32I-NEXT: call __eqsf2
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: or a0, s2, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/Thumb2/float-cmp.ll b/llvm/test/CodeGen/Thumb2/float-cmp.ll
index 73e0063a9278e..ed80544377204 100644
--- a/llvm/test/CodeGen/Thumb2/float-cmp.ll
+++ b/llvm/test/CodeGen/Thumb2/float-cmp.ll
@@ -200,8 +200,13 @@ define i1 @cmp_d_one(double %a, double %b) {
; CHECK-LABEL: cmp_d_one:
; NONE: bl __aeabi_dcmpeq
; NONE: bl __aeabi_dcmpun
-; SP: bl __aeabi_dcmpeq
; SP: bl __aeabi_dcmpun
+; SP: eor r8, r0, #1
+; SP: bl __aeabi_dcmpeq
+; SP-NEXT: clz r0, r0
+; SP-NEXT: lsrs r0, r0, #5
+; SP-NEXT: ands.w r0, r0, r8
+
; DP: vcmp.f64
; DP: movmi r0, #1
; DP: movgt r0, #1
|
EVT SetCCVT = | ||
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT); | ||
if (RetVT == SetCCVT && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a hack to avoid riscv64 regressions, since the call lowering inserts an and after the CopyFromReg
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On RISCV64, the boolean should, in fact, be extended to 64 bits, and there should be an AssertSext recording that. And call lowering shouldn't insert a mask. Maybe PromoteIntRes_AssertZext is the culprit here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like PromoteIntRes_AssertZext is the culprit. Something like this can recover it
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aba3c0f80a02..377814e09c2f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14933,6 +14933,24 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
}
}
+ // If we have (AssertZext (and (AssertSext X, iX), M), iY) and Y is smaller
+ // than X, and the And doesn't change the lower iY bits, we can move the
+ // AssertZext in front of the And and drop the AssertSext.
+ if (Opcode == ISD::AssertZext && N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::AssertSext &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ SDValue BigA = N0.getOperand(0);
+ EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+ const APInt &Mask = N0.getConstantOperandAPInt(1);
+ if (AssertVT.bitsLT(BigA_AssertVT) &&
+ Mask.countr_one() >= AssertVT.getScalarSizeInBits()) {
+ SDLoc DL(N);
+ SDValue NewAssert = DAG.getNode(Opcode, DL, N->getValueType(0),
+ BigA.getOperand(0), N1);
+ return DAG.getNode(ISD::AND, DL, N->getValueType(0), NewAssert, N0.getOperand(1));
+ }
+ }
+
return SDValue();
}
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp -- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index c2e5805b8..1518eb4bb 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -598,85 +598,126 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
const CallingConv::ID CC;
const ISD::CondCode Cond;
} LibraryCalls[] = {
- // Double-precision floating-point arithmetic helper functions
- // RTABI chapter 4.1.2, Table 2
- { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-
- // Double-precision floating-point comparison helper functions
- // RTABI chapter 4.1.2, Table 3
- { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
- { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-
- // Single-precision floating-point arithmetic helper functions
- // RTABI chapter 4.1.2, Table 4
- { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-
- // Single-precision floating-point comparison helper functions
- // RTABI chapter 4.1.2, Table 5
- { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
- { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
- { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-
- // Floating-point to integer conversions.
- // RTABI chapter 4.1.2, Table 6
- { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-
- // Conversions between floating types.
- // RTABI chapter 4.1.2, Table 7
- { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-
- // Integer to floating-point conversions.
- // RTABI chapter 4.1.2, Table 8
- { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-
- // Long long helper functions
- // RTABI chapter 4.2, Table 9
- { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-
- // Integer division functions
- // RTABI chapter 4.3.1
- { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ // Double-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 2
+ {RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+
+ // Double-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 3
+ {RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE},
+ {RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ},
+ {RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE},
+ {RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE},
+ {RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE},
+ {RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE},
+ {RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+
+ // Single-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 4
+ {RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+
+ // Single-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 5
+ {RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE},
+ {RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ},
+ {RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE},
+ {RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE},
+ {RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE},
+ {RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE},
+ {RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+
+ // Floating-point to integer conversions.
+ // RTABI chapter 4.1.2, Table 6
+ {RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+
+ // Conversions between floating types.
+ // RTABI chapter 4.1.2, Table 7
+ {RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+
+ // Integer to floating-point conversions.
+ // RTABI chapter 4.1.2, Table 8
+ {RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+
+ // Long long helper functions
+ // RTABI chapter 4.2, Table 9
+ {RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+
+ // Integer division functions
+ // RTABI chapter 4.3.1
+ {RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
+ {RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS,
+ ISD::SETCC_INVALID},
};
for (const auto &LC : LibraryCalls) {
|
@@ -429,8 +429,20 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, | ||
// Update Chain. | ||
Chain = Call.second; | ||
} else { | ||
assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) && | ||
"unordered call should be simple boolean"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're adding an assertion that CCCode should be SETEQ or SETNE... then you're changing the condition code for RTLIB::UO_F32 to SETCC_INVALID, which then goes through here? I must be missing something...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, wait, I see, we don't call setCmpLibcallCC in that case, so the ARMISelLowering change is just a no-op.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it was setting the default value so this never did anything
24a8c2e
to
232ebc7
Compare
232ebc7
to
4ed5464
Compare
4ed5464
to
af3a251
Compare
This saves 2 instructions in the ARM soft float case for fcmp ueq. This code is written in an confusingly overly general way. The point of getCmpLibcallCC is to express that the compiler-rt implementations of the FP compares are different aliases around functions which may return -1 in some cases. This does not apply to the call for unordered, which returns a normal boolean. Also stop overriding the default value for the unordered compare for ARM. This was setting it to the same value as the default, which is now assumed.
af3a251
to
bde5b5e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This saves 2 instructions in the ARM soft float case for fcmp ueq. This code is written in an confusingly overly general way. The point of getCmpLibcallCC is to express that the compiler-rt implementations of the FP compares are different aliases around functions which may return -1 in some cases. This does not apply to the call for unordered, which returns a normal boolean. Also stop overriding the default value for the unordered compare for ARM. This was setting it to the same value as the default, which is now assumed.
This saves 2 instructions in the ARM soft float case for fcmp ueq. This code is written in an confusingly overly general way. The point of getCmpLibcallCC is to express that the compiler-rt implementations of the FP compares are different aliases around functions which may return -1 in some cases. This does not apply to the call for unordered, which returns a normal boolean. Also stop overriding the default value for the unordered compare for ARM. This was setting it to the same value as the default, which is now assumed.
This saves 2 instructions in the ARM soft float case for fcmp ueq.
This code is written in an confusingly overly general way. The point
of getCmpLibcallCC is to express that the compiler-rt implementations
of the FP compares are different aliases around functions which may
return -1 in some cases. This does not apply to the call for unordered,
which returns a normal boolean.
Also stop overriding the default value for the unordered compare for ARM.
This was setting it to the same value as the default, which is now assumed.