diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index d9762c884994a..727857b647acc 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -3613,8 +3613,14 @@ bool SparcTargetLowering::isFNegFree(EVT VT) const { bool SparcTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { - return Subtarget->isVIS() && (VT == MVT::f32 || VT == MVT::f64) && - Imm.isZero(); + if (VT != MVT::f32 && VT != MVT::f64) + return false; + if (Subtarget->isVIS() && Imm.isZero()) + return true; + if (Subtarget->isVIS3()) + return Imm.isExactlyValue(+0.5) || Imm.isExactlyValue(-0.5) || + Imm.getExactLog2Abs() == -1; + return false; } bool SparcTargetLowering::isCtlzFast() const { return Subtarget->isVIS3(); } diff --git a/llvm/lib/Target/Sparc/SparcInstrVIS.td b/llvm/lib/Target/Sparc/SparcInstrVIS.td index 047a56696af84..4a0907fc64bea 100644 --- a/llvm/lib/Target/Sparc/SparcInstrVIS.td +++ b/llvm/lib/Target/Sparc/SparcInstrVIS.td @@ -281,6 +281,8 @@ def XMULXHI : VISInst<0b100010110, "xmulxhi", I64Regs>; // FP immediate patterns. def fpimm0 : FPImmLeaf; def fpnegimm0 : FPImmLeaf; +def fpimmhalf : FPImmLeaf; +def fpnegimmhalf : FPImmLeaf; // VIS instruction patterns. let Predicates = [HasVIS] in { @@ -293,6 +295,16 @@ def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>; // VIS3 instruction patterns. let Predicates = [HasVIS3] in { +// +/-0.5 immediate. +// This is needed to enable halving instructions. +// FIXME generalize this to arbitrary immediates. +// SET/MOVWTOS or SETX/MOVXTOD pair should let us materialize FP constants +// faster than constant pool loading. +def : Pat<(f32 fpimmhalf), (MOVWTOS (SETHIi 0x0FC000))>; +def : Pat<(f32 fpnegimmhalf), (MOVWTOS (SETHIi 0x2FC000))>; +def : Pat<(f64 fpimmhalf), (MOVXTOD (SLLXri (SETHIi 0x0FF800), 32))>; +def : Pat<(f64 fpnegimmhalf), (MOVXTOD (SLLXri (SETHIi 0x2FF800), 32))>; + def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>; def : Pat<(i64 (mulhu i64:$lhs, i64:$rhs)), (UMULXHI $lhs, $rhs)>; @@ -329,4 +341,12 @@ def : Pat<(f64 (fneg (fadd f64:$rs1, f64:$rs2))), (FNADDD $rs1, $rs2)>; def : Pat<(f32 (fneg (fmul f32:$rs1, f32:$rs2))), (FNMULS $rs1, $rs2)>; def : Pat<(f64 (fneg (fmul f64:$rs1, f64:$rs2))), (FNMULD $rs1, $rs2)>; def : Pat<(f64 (fneg (fmul (fpextend f32:$rs1), (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>; + +// Op-then-halve FP operations. +def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpimmhalf)), (FHADDS $rs1, $rs2)>; +def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpimmhalf)), (FHADDD $rs1, $rs2)>; +def : Pat<(f32 (fmul (fsub f32:$rs1, f32:$rs2), fpimmhalf)), (FHSUBS $rs1, $rs2)>; +def : Pat<(f64 (fmul (fsub f64:$rs1, f64:$rs2), fpimmhalf)), (FHSUBD $rs1, $rs2)>; +def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpnegimmhalf)), (FNHADDS $rs1, $rs2)>; +def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpnegimmhalf)), (FNHADDD $rs1, $rs2)>; } // Predicates = [HasVIS3] diff --git a/llvm/test/CodeGen/SPARC/float-constants.ll b/llvm/test/CodeGen/SPARC/float-constants.ll index 440c75bfca9f9..f8b600c330a31 100644 --- a/llvm/test/CodeGen/SPARC/float-constants.ll +++ b/llvm/test/CodeGen/SPARC/float-constants.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=sparc | FileCheck %s ; RUN: llc < %s -mtriple=sparcel | FileCheck %s --check-prefix=CHECK-LE ; RUN: llc < %s -mtriple=sparcv9 -mattr=+vis | FileCheck %s --check-prefix=CHECK-VIS +; RUN: llc < %s -mtriple=sparcv9 -mattr=+vis,+vis3 | FileCheck %s --check-prefix=CHECK-VIS3 ;; Bitcast should not do a runtime conversion, but rather emit a ;; constant into integer registers directly. @@ -24,6 +25,12 @@ define <2 x i32> @bitcast() nounwind { ; CHECK-VIS-NEXT: sethi 1049856, %o0 ; CHECK-VIS-NEXT: retl ; CHECK-VIS-NEXT: mov %g0, %o1 +; +; CHECK-VIS3-LABEL: bitcast: +; CHECK-VIS3: ! %bb.0: +; CHECK-VIS3-NEXT: sethi 1049856, %o0 +; CHECK-VIS3-NEXT: retl +; CHECK-VIS3-NEXT: mov %g0, %o1 %1 = bitcast double 5.0 to <2 x i32> ret <2 x i32> %1 } @@ -61,6 +68,17 @@ define void @test_call() nounwind { ; CHECK-VIS-NEXT: ldd [%i0+%l44(.LCPI1_0)], %f0 ; CHECK-VIS-NEXT: ret ; CHECK-VIS-NEXT: restore +; +; CHECK-VIS3-LABEL: test_call: +; CHECK-VIS3: ! %bb.0: +; CHECK-VIS3-NEXT: save %sp, -176, %sp +; CHECK-VIS3-NEXT: sethi %h44(.LCPI1_0), %i0 +; CHECK-VIS3-NEXT: add %i0, %m44(.LCPI1_0), %i0 +; CHECK-VIS3-NEXT: sllx %i0, 12, %i0 +; CHECK-VIS3-NEXT: call a +; CHECK-VIS3-NEXT: ldd [%i0+%l44(.LCPI1_0)], %f0 +; CHECK-VIS3-NEXT: ret +; CHECK-VIS3-NEXT: restore call void @a(double 5.0) ret void } @@ -106,6 +124,19 @@ define double @test_intrins_call() nounwind { ; CHECK-VIS-NEXT: nop ; CHECK-VIS-NEXT: ret ; CHECK-VIS-NEXT: restore +; +; CHECK-VIS3-LABEL: test_intrins_call: +; CHECK-VIS3: ! %bb.0: +; CHECK-VIS3-NEXT: save %sp, -176, %sp +; CHECK-VIS3-NEXT: sethi %h44(.LCPI2_0), %i0 +; CHECK-VIS3-NEXT: add %i0, %m44(.LCPI2_0), %i0 +; CHECK-VIS3-NEXT: sllx %i0, 12, %i0 +; CHECK-VIS3-NEXT: ldd [%i0+%l44(.LCPI2_0)], %f0 +; CHECK-VIS3-NEXT: fmovd %f0, %f2 +; CHECK-VIS3-NEXT: call pow +; CHECK-VIS3-NEXT: nop +; CHECK-VIS3-NEXT: ret +; CHECK-VIS3-NEXT: restore %1 = call double @llvm.pow.f64(double 2.0, double 2.0) ret double %1 } @@ -129,6 +160,11 @@ define double @pos_zero_double() nounwind { ; CHECK-VIS: ! %bb.0: ; CHECK-VIS-NEXT: retl ; CHECK-VIS-NEXT: fzero %f0 +; +; CHECK-VIS3-LABEL: pos_zero_double: +; CHECK-VIS3: ! %bb.0: +; CHECK-VIS3-NEXT: retl +; CHECK-VIS3-NEXT: fzero %f0 ret double +0.0 } @@ -150,6 +186,12 @@ define double @neg_zero_double() nounwind { ; CHECK-VIS-NEXT: fzero %f0 ; CHECK-VIS-NEXT: retl ; CHECK-VIS-NEXT: fnegd %f0, %f0 +; +; CHECK-VIS3-LABEL: neg_zero_double: +; CHECK-VIS3: ! %bb.0: +; CHECK-VIS3-NEXT: fzero %f0 +; CHECK-VIS3-NEXT: retl +; CHECK-VIS3-NEXT: fnegd %f0, %f0 ret double -0.0 } @@ -170,6 +212,11 @@ define float @pos_zero_float() nounwind { ; CHECK-VIS: ! %bb.0: ; CHECK-VIS-NEXT: retl ; CHECK-VIS-NEXT: fzeros %f0 +; +; CHECK-VIS3-LABEL: pos_zero_float: +; CHECK-VIS3: ! %bb.0: +; CHECK-VIS3-NEXT: retl +; CHECK-VIS3-NEXT: fzeros %f0 ret float +0.0 } @@ -191,5 +238,131 @@ define float @neg_zero_float() nounwind { ; CHECK-VIS-NEXT: fzeros %f0 ; CHECK-VIS-NEXT: retl ; CHECK-VIS-NEXT: fnegs %f0, %f0 +; +; CHECK-VIS3-LABEL: neg_zero_float: +; CHECK-VIS3: ! %bb.0: +; CHECK-VIS3-NEXT: fzeros %f0 +; CHECK-VIS3-NEXT: retl +; CHECK-VIS3-NEXT: fnegs %f0, %f0 ret float -0.0 } + +;; When we have VIS3, f32/f64 +/-0.5 constant should be materialized from sethi. + +define double @pos_half_double() nounwind { +; CHECK-LABEL: pos_half_double: +; CHECK: ! %bb.0: +; CHECK-NEXT: sethi %hi(.LCPI7_0), %o0 +; CHECK-NEXT: retl +; CHECK-NEXT: ldd [%o0+%lo(.LCPI7_0)], %f0 +; +; CHECK-LE-LABEL: pos_half_double: +; CHECK-LE: ! %bb.0: +; CHECK-LE-NEXT: sethi %hi(.LCPI7_0), %o0 +; CHECK-LE-NEXT: retl +; CHECK-LE-NEXT: ldd [%o0+%lo(.LCPI7_0)], %f0 +; +; CHECK-VIS-LABEL: pos_half_double: +; CHECK-VIS: ! %bb.0: +; CHECK-VIS-NEXT: sethi %h44(.LCPI7_0), %o0 +; CHECK-VIS-NEXT: add %o0, %m44(.LCPI7_0), %o0 +; CHECK-VIS-NEXT: sllx %o0, 12, %o0 +; CHECK-VIS-NEXT: retl +; CHECK-VIS-NEXT: ldd [%o0+%l44(.LCPI7_0)], %f0 +; +; CHECK-VIS3-LABEL: pos_half_double: +; CHECK-VIS3: ! %bb.0: +; CHECK-VIS3-NEXT: sethi 1046528, %o0 +; CHECK-VIS3-NEXT: sllx %o0, 32, %o0 +; CHECK-VIS3-NEXT: retl +; CHECK-VIS3-NEXT: movxtod %o0, %f0 + ret double +0.5 +} + +define double @neg_half_double() nounwind { +; CHECK-LABEL: neg_half_double: +; CHECK: ! %bb.0: +; CHECK-NEXT: sethi %hi(.LCPI8_0), %o0 +; CHECK-NEXT: retl +; CHECK-NEXT: ldd [%o0+%lo(.LCPI8_0)], %f0 +; +; CHECK-LE-LABEL: neg_half_double: +; CHECK-LE: ! %bb.0: +; CHECK-LE-NEXT: sethi %hi(.LCPI8_0), %o0 +; CHECK-LE-NEXT: retl +; CHECK-LE-NEXT: ldd [%o0+%lo(.LCPI8_0)], %f0 +; +; CHECK-VIS-LABEL: neg_half_double: +; CHECK-VIS: ! %bb.0: +; CHECK-VIS-NEXT: sethi %h44(.LCPI8_0), %o0 +; CHECK-VIS-NEXT: add %o0, %m44(.LCPI8_0), %o0 +; CHECK-VIS-NEXT: sllx %o0, 12, %o0 +; CHECK-VIS-NEXT: retl +; CHECK-VIS-NEXT: ldd [%o0+%l44(.LCPI8_0)], %f0 +; +; CHECK-VIS3-LABEL: neg_half_double: +; CHECK-VIS3: ! %bb.0: +; CHECK-VIS3-NEXT: sethi 3143680, %o0 +; CHECK-VIS3-NEXT: sllx %o0, 32, %o0 +; CHECK-VIS3-NEXT: retl +; CHECK-VIS3-NEXT: movxtod %o0, %f0 + ret double -0.5 +} + +define float @pos_half_float() nounwind { +; CHECK-LABEL: pos_half_float: +; CHECK: ! %bb.0: +; CHECK-NEXT: sethi %hi(.LCPI9_0), %o0 +; CHECK-NEXT: retl +; CHECK-NEXT: ld [%o0+%lo(.LCPI9_0)], %f0 +; +; CHECK-LE-LABEL: pos_half_float: +; CHECK-LE: ! %bb.0: +; CHECK-LE-NEXT: sethi %hi(.LCPI9_0), %o0 +; CHECK-LE-NEXT: retl +; CHECK-LE-NEXT: ld [%o0+%lo(.LCPI9_0)], %f0 +; +; CHECK-VIS-LABEL: pos_half_float: +; CHECK-VIS: ! %bb.0: +; CHECK-VIS-NEXT: sethi %h44(.LCPI9_0), %o0 +; CHECK-VIS-NEXT: add %o0, %m44(.LCPI9_0), %o0 +; CHECK-VIS-NEXT: sllx %o0, 12, %o0 +; CHECK-VIS-NEXT: retl +; CHECK-VIS-NEXT: ld [%o0+%l44(.LCPI9_0)], %f0 +; +; CHECK-VIS3-LABEL: pos_half_float: +; CHECK-VIS3: ! %bb.0: +; CHECK-VIS3-NEXT: sethi 1032192, %o0 +; CHECK-VIS3-NEXT: retl +; CHECK-VIS3-NEXT: movwtos %o0, %f0 + ret float +0.5 +} + +define float @neg_half_float() nounwind { +; CHECK-LABEL: neg_half_float: +; CHECK: ! %bb.0: +; CHECK-NEXT: sethi %hi(.LCPI10_0), %o0 +; CHECK-NEXT: retl +; CHECK-NEXT: ld [%o0+%lo(.LCPI10_0)], %f0 +; +; CHECK-LE-LABEL: neg_half_float: +; CHECK-LE: ! %bb.0: +; CHECK-LE-NEXT: sethi %hi(.LCPI10_0), %o0 +; CHECK-LE-NEXT: retl +; CHECK-LE-NEXT: ld [%o0+%lo(.LCPI10_0)], %f0 +; +; CHECK-VIS-LABEL: neg_half_float: +; CHECK-VIS: ! %bb.0: +; CHECK-VIS-NEXT: sethi %h44(.LCPI10_0), %o0 +; CHECK-VIS-NEXT: add %o0, %m44(.LCPI10_0), %o0 +; CHECK-VIS-NEXT: sllx %o0, 12, %o0 +; CHECK-VIS-NEXT: retl +; CHECK-VIS-NEXT: ld [%o0+%l44(.LCPI10_0)], %f0 +; +; CHECK-VIS3-LABEL: neg_half_float: +; CHECK-VIS3: ! %bb.0: +; CHECK-VIS3-NEXT: sethi 3129344, %o0 +; CHECK-VIS3-NEXT: retl +; CHECK-VIS3-NEXT: movwtos %o0, %f0 + ret float -0.5 +} diff --git a/llvm/test/CodeGen/SPARC/float-vis3.ll b/llvm/test/CodeGen/SPARC/float-vis3.ll index 2352eb0e97332..7b527f608a063 100644 --- a/llvm/test/CodeGen/SPARC/float-vis3.ll +++ b/llvm/test/CodeGen/SPARC/float-vis3.ll @@ -129,3 +129,87 @@ entry: %fneg = fneg <4 x double> %mul ret <4 x double> %fneg } + +define <4 x float> @vec_fhadds(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-LABEL: vec_fhadds: +; CHECK: ! %bb.0: ! %entry +; CHECK-NEXT: fhadds %f1, %f9, %f0 +; CHECK-NEXT: fhadds %f3, %f11, %f1 +; CHECK-NEXT: fhadds %f5, %f13, %f2 +; CHECK-NEXT: retl +; CHECK-NEXT: fhadds %f7, %f15, %f3 +entry: + %add = fadd <4 x float> %a, %b + %div = fmul <4 x float> %add, + ret <4 x float> %div +} + +define <4 x double> @vec_fhaddd(<4 x double> %a, <4 x double> %b) nounwind { +; CHECK-LABEL: vec_fhaddd: +; CHECK: ! %bb.0: ! %entry +; CHECK-NEXT: fhaddd %f0, %f8, %f0 +; CHECK-NEXT: fhaddd %f2, %f10, %f2 +; CHECK-NEXT: fhaddd %f4, %f12, %f4 +; CHECK-NEXT: retl +; CHECK-NEXT: fhaddd %f6, %f14, %f6 +entry: + %add = fadd <4 x double> %a, %b + %div = fmul <4 x double> %add, + ret <4 x double> %div +} + +define <4 x float> @vec_fhsubs(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-LABEL: vec_fhsubs: +; CHECK: ! %bb.0: ! %entry +; CHECK-NEXT: fhsubs %f1, %f9, %f0 +; CHECK-NEXT: fhsubs %f3, %f11, %f1 +; CHECK-NEXT: fhsubs %f5, %f13, %f2 +; CHECK-NEXT: retl +; CHECK-NEXT: fhsubs %f7, %f15, %f3 +entry: + %sub = fsub <4 x float> %a, %b + %div = fmul <4 x float> %sub, + ret <4 x float> %div +} + +define <4 x double> @vec_fhsubd(<4 x double> %a, <4 x double> %b) nounwind { +; CHECK-LABEL: vec_fhsubd: +; CHECK: ! %bb.0: ! %entry +; CHECK-NEXT: fhsubd %f0, %f8, %f0 +; CHECK-NEXT: fhsubd %f2, %f10, %f2 +; CHECK-NEXT: fhsubd %f4, %f12, %f4 +; CHECK-NEXT: retl +; CHECK-NEXT: fhsubd %f6, %f14, %f6 +entry: + %sub = fsub <4 x double> %a, %b + %div = fmul <4 x double> %sub, + ret <4 x double> %div +} + +define <4 x float> @vec_fnhadds(<4 x float> %a, <4 x float> %b) nounwind { +; CHECK-LABEL: vec_fnhadds: +; CHECK: ! %bb.0: ! %entry +; CHECK-NEXT: fnhadds %f1, %f9, %f0 +; CHECK-NEXT: fnhadds %f3, %f11, %f1 +; CHECK-NEXT: fnhadds %f5, %f13, %f2 +; CHECK-NEXT: retl +; CHECK-NEXT: fnhadds %f7, %f15, %f3 +entry: + %add.i = fadd <4 x float> %a, %b + %fneg = fmul <4 x float> %add.i, + ret <4 x float> %fneg +} + +define <4 x double> @vec_fnhaddd(<4 x double> %a, <4 x double> %b) nounwind { +; CHECK-LABEL: vec_fnhaddd: +; CHECK: ! %bb.0: ! %entry +; CHECK-NEXT: fnhaddd %f0, %f8, %f0 +; CHECK-NEXT: fnhaddd %f2, %f10, %f2 +; CHECK-NEXT: fnhaddd %f4, %f12, %f4 +; CHECK-NEXT: retl +; CHECK-NEXT: fnhaddd %f6, %f14, %f6 +entry: + %add.i = fadd <4 x double> %a, %b + %fneg = fmul <4 x double> %add.i, + ret <4 x double> %fneg +}