-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[SPARC] Use op-then-halve instructions when we have VIS3 #135718
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/koachan/spr/main.sparc-use-op-then-halve-instructions-when-we-have-vis3
Are you sure you want to change the base?
Conversation
Created using spr 1.3.5
@llvm/pr-subscribers-backend-sparc Author: Koakuma (koachan) ChangesFull diff: https://github.com/llvm/llvm-project/pull/135718.diff 4 Files Affected:
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 98fcaba86fee0..ad92ca524af34 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -3574,8 +3574,13 @@ bool SparcTargetLowering::useLoadStackGuardNode(const Module &M) const {
bool SparcTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
- return Subtarget->isVIS() && (VT == MVT::f32 || VT == MVT::f64) &&
- Imm.isZero();
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return false;
+ if (Imm.isZero())
+ return Subtarget->isVIS();
+ if (Imm.isExactlyValue(+0.5) || Imm.isExactlyValue(-0.5))
+ return Subtarget->isVIS3();
+ return false;
}
// Override to disable global variable loading on Linux.
diff --git a/llvm/lib/Target/Sparc/SparcInstrVIS.td b/llvm/lib/Target/Sparc/SparcInstrVIS.td
index 27f8358576789..148cad8304eb3 100644
--- a/llvm/lib/Target/Sparc/SparcInstrVIS.td
+++ b/llvm/lib/Target/Sparc/SparcInstrVIS.td
@@ -281,6 +281,8 @@ def XMULXHI : VISInst<0b100010110, "xmulxhi", I64Regs>;
// FP immediate patterns.
def fpimm0 : PatLeaf<(fpimm), [{return N->isExactlyValue(+0.0);}]>;
def fpnegimm0 : PatLeaf<(fpimm), [{return N->isExactlyValue(-0.0);}]>;
+def fpimmhalf : PatLeaf<(fpimm), [{return N->isExactlyValue(+0.5);}]>;
+def fpnegimmhalf : PatLeaf<(fpimm), [{return N->isExactlyValue(-0.5);}]>;
// VIS instruction patterns.
let Predicates = [HasVIS] in {
@@ -293,6 +295,16 @@ def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>;
// VIS3 instruction patterns.
let Predicates = [HasVIS3] in {
+// +/-0.5 immediate.
+// This is needed to enable halving instructions.
+// FIXME generalize this to arbitrary immediates.
+// SET/MOVWTOS or SETX/MOVXTOD pair should let us materialize FP constants
+// faster than constant pool loading.
+def : Pat<(f32 fpimmhalf), (MOVWTOS (SETHIi 0x0FC000))>;
+def : Pat<(f32 fpnegimmhalf), (MOVWTOS (SETHIi 0x2FC000))>;
+def : Pat<(f64 fpimmhalf), (MOVXTOD (SLLXri (SETHIi 0x0FF800), 32))>;
+def : Pat<(f64 fpnegimmhalf), (MOVXTOD (SLLXri (SETHIi 0x2FF800), 32))>;
+
def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>;
def : Pat<(i64 (mulhu i64:$lhs, i64:$rhs)), (UMULXHI $lhs, $rhs)>;
@@ -329,4 +341,12 @@ def : Pat<(f64 (fmul f64:$rs1, (fneg f64:$rs2))), (FNMULD $rs1, $rs2)>;
def : Pat<(f64 (fneg (fmul (fpextend f32:$rs1), (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;
def : Pat<(f64 (fmul (fneg (fpextend f32:$rs1)), (fpextend f32:$rs2))), (FNSMULD $rs1, $rs2)>;
def : Pat<(f64 (fmul (fpextend f32:$rs1), (fneg (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;
+
+// Op-then-halve FP operations.
+def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpimmhalf)), (FHADDS $rs1, $rs2)>;
+def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpimmhalf)), (FHADDD $rs1, $rs2)>;
+def : Pat<(f32 (fmul (fsub f32:$rs1, f32:$rs2), fpimmhalf)), (FHSUBS $rs1, $rs2)>;
+def : Pat<(f64 (fmul (fsub f64:$rs1, f64:$rs2), fpimmhalf)), (FHSUBD $rs1, $rs2)>;
+def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpnegimmhalf)), (FNHADDS $rs1, $rs2)>;
+def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpnegimmhalf)), (FNHADDD $rs1, $rs2)>;
} // Predicates = [HasVIS3]
diff --git a/llvm/test/CodeGen/SPARC/float-constants.ll b/llvm/test/CodeGen/SPARC/float-constants.ll
index 440c75bfca9f9..f8b600c330a31 100644
--- a/llvm/test/CodeGen/SPARC/float-constants.ll
+++ b/llvm/test/CodeGen/SPARC/float-constants.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=sparc | FileCheck %s
; RUN: llc < %s -mtriple=sparcel | FileCheck %s --check-prefix=CHECK-LE
; RUN: llc < %s -mtriple=sparcv9 -mattr=+vis | FileCheck %s --check-prefix=CHECK-VIS
+; RUN: llc < %s -mtriple=sparcv9 -mattr=+vis,+vis3 | FileCheck %s --check-prefix=CHECK-VIS3
;; Bitcast should not do a runtime conversion, but rather emit a
;; constant into integer registers directly.
@@ -24,6 +25,12 @@ define <2 x i32> @bitcast() nounwind {
; CHECK-VIS-NEXT: sethi 1049856, %o0
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: mov %g0, %o1
+;
+; CHECK-VIS3-LABEL: bitcast:
+; CHECK-VIS3: ! %bb.0:
+; CHECK-VIS3-NEXT: sethi 1049856, %o0
+; CHECK-VIS3-NEXT: retl
+; CHECK-VIS3-NEXT: mov %g0, %o1
%1 = bitcast double 5.0 to <2 x i32>
ret <2 x i32> %1
}
@@ -61,6 +68,17 @@ define void @test_call() nounwind {
; CHECK-VIS-NEXT: ldd [%i0+%l44(.LCPI1_0)], %f0
; CHECK-VIS-NEXT: ret
; CHECK-VIS-NEXT: restore
+;
+; CHECK-VIS3-LABEL: test_call:
+; CHECK-VIS3: ! %bb.0:
+; CHECK-VIS3-NEXT: save %sp, -176, %sp
+; CHECK-VIS3-NEXT: sethi %h44(.LCPI1_0), %i0
+; CHECK-VIS3-NEXT: add %i0, %m44(.LCPI1_0), %i0
+; CHECK-VIS3-NEXT: sllx %i0, 12, %i0
+; CHECK-VIS3-NEXT: call a
+; CHECK-VIS3-NEXT: ldd [%i0+%l44(.LCPI1_0)], %f0
+; CHECK-VIS3-NEXT: ret
+; CHECK-VIS3-NEXT: restore
call void @a(double 5.0)
ret void
}
@@ -106,6 +124,19 @@ define double @test_intrins_call() nounwind {
; CHECK-VIS-NEXT: nop
; CHECK-VIS-NEXT: ret
; CHECK-VIS-NEXT: restore
+;
+; CHECK-VIS3-LABEL: test_intrins_call:
+; CHECK-VIS3: ! %bb.0:
+; CHECK-VIS3-NEXT: save %sp, -176, %sp
+; CHECK-VIS3-NEXT: sethi %h44(.LCPI2_0), %i0
+; CHECK-VIS3-NEXT: add %i0, %m44(.LCPI2_0), %i0
+; CHECK-VIS3-NEXT: sllx %i0, 12, %i0
+; CHECK-VIS3-NEXT: ldd [%i0+%l44(.LCPI2_0)], %f0
+; CHECK-VIS3-NEXT: fmovd %f0, %f2
+; CHECK-VIS3-NEXT: call pow
+; CHECK-VIS3-NEXT: nop
+; CHECK-VIS3-NEXT: ret
+; CHECK-VIS3-NEXT: restore
%1 = call double @llvm.pow.f64(double 2.0, double 2.0)
ret double %1
}
@@ -129,6 +160,11 @@ define double @pos_zero_double() nounwind {
; CHECK-VIS: ! %bb.0:
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: fzero %f0
+;
+; CHECK-VIS3-LABEL: pos_zero_double:
+; CHECK-VIS3: ! %bb.0:
+; CHECK-VIS3-NEXT: retl
+; CHECK-VIS3-NEXT: fzero %f0
ret double +0.0
}
@@ -150,6 +186,12 @@ define double @neg_zero_double() nounwind {
; CHECK-VIS-NEXT: fzero %f0
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: fnegd %f0, %f0
+;
+; CHECK-VIS3-LABEL: neg_zero_double:
+; CHECK-VIS3: ! %bb.0:
+; CHECK-VIS3-NEXT: fzero %f0
+; CHECK-VIS3-NEXT: retl
+; CHECK-VIS3-NEXT: fnegd %f0, %f0
ret double -0.0
}
@@ -170,6 +212,11 @@ define float @pos_zero_float() nounwind {
; CHECK-VIS: ! %bb.0:
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: fzeros %f0
+;
+; CHECK-VIS3-LABEL: pos_zero_float:
+; CHECK-VIS3: ! %bb.0:
+; CHECK-VIS3-NEXT: retl
+; CHECK-VIS3-NEXT: fzeros %f0
ret float +0.0
}
@@ -191,5 +238,131 @@ define float @neg_zero_float() nounwind {
; CHECK-VIS-NEXT: fzeros %f0
; CHECK-VIS-NEXT: retl
; CHECK-VIS-NEXT: fnegs %f0, %f0
+;
+; CHECK-VIS3-LABEL: neg_zero_float:
+; CHECK-VIS3: ! %bb.0:
+; CHECK-VIS3-NEXT: fzeros %f0
+; CHECK-VIS3-NEXT: retl
+; CHECK-VIS3-NEXT: fnegs %f0, %f0
ret float -0.0
}
+
+;; When we have VIS3, f32/f64 +/-0.5 constant should be materialized from sethi.
+
+define double @pos_half_double() nounwind {
+; CHECK-LABEL: pos_half_double:
+; CHECK: ! %bb.0:
+; CHECK-NEXT: sethi %hi(.LCPI7_0), %o0
+; CHECK-NEXT: retl
+; CHECK-NEXT: ldd [%o0+%lo(.LCPI7_0)], %f0
+;
+; CHECK-LE-LABEL: pos_half_double:
+; CHECK-LE: ! %bb.0:
+; CHECK-LE-NEXT: sethi %hi(.LCPI7_0), %o0
+; CHECK-LE-NEXT: retl
+; CHECK-LE-NEXT: ldd [%o0+%lo(.LCPI7_0)], %f0
+;
+; CHECK-VIS-LABEL: pos_half_double:
+; CHECK-VIS: ! %bb.0:
+; CHECK-VIS-NEXT: sethi %h44(.LCPI7_0), %o0
+; CHECK-VIS-NEXT: add %o0, %m44(.LCPI7_0), %o0
+; CHECK-VIS-NEXT: sllx %o0, 12, %o0
+; CHECK-VIS-NEXT: retl
+; CHECK-VIS-NEXT: ldd [%o0+%l44(.LCPI7_0)], %f0
+;
+; CHECK-VIS3-LABEL: pos_half_double:
+; CHECK-VIS3: ! %bb.0:
+; CHECK-VIS3-NEXT: sethi 1046528, %o0
+; CHECK-VIS3-NEXT: sllx %o0, 32, %o0
+; CHECK-VIS3-NEXT: retl
+; CHECK-VIS3-NEXT: movxtod %o0, %f0
+ ret double +0.5
+}
+
+define double @neg_half_double() nounwind {
+; CHECK-LABEL: neg_half_double:
+; CHECK: ! %bb.0:
+; CHECK-NEXT: sethi %hi(.LCPI8_0), %o0
+; CHECK-NEXT: retl
+; CHECK-NEXT: ldd [%o0+%lo(.LCPI8_0)], %f0
+;
+; CHECK-LE-LABEL: neg_half_double:
+; CHECK-LE: ! %bb.0:
+; CHECK-LE-NEXT: sethi %hi(.LCPI8_0), %o0
+; CHECK-LE-NEXT: retl
+; CHECK-LE-NEXT: ldd [%o0+%lo(.LCPI8_0)], %f0
+;
+; CHECK-VIS-LABEL: neg_half_double:
+; CHECK-VIS: ! %bb.0:
+; CHECK-VIS-NEXT: sethi %h44(.LCPI8_0), %o0
+; CHECK-VIS-NEXT: add %o0, %m44(.LCPI8_0), %o0
+; CHECK-VIS-NEXT: sllx %o0, 12, %o0
+; CHECK-VIS-NEXT: retl
+; CHECK-VIS-NEXT: ldd [%o0+%l44(.LCPI8_0)], %f0
+;
+; CHECK-VIS3-LABEL: neg_half_double:
+; CHECK-VIS3: ! %bb.0:
+; CHECK-VIS3-NEXT: sethi 3143680, %o0
+; CHECK-VIS3-NEXT: sllx %o0, 32, %o0
+; CHECK-VIS3-NEXT: retl
+; CHECK-VIS3-NEXT: movxtod %o0, %f0
+ ret double -0.5
+}
+
+define float @pos_half_float() nounwind {
+; CHECK-LABEL: pos_half_float:
+; CHECK: ! %bb.0:
+; CHECK-NEXT: sethi %hi(.LCPI9_0), %o0
+; CHECK-NEXT: retl
+; CHECK-NEXT: ld [%o0+%lo(.LCPI9_0)], %f0
+;
+; CHECK-LE-LABEL: pos_half_float:
+; CHECK-LE: ! %bb.0:
+; CHECK-LE-NEXT: sethi %hi(.LCPI9_0), %o0
+; CHECK-LE-NEXT: retl
+; CHECK-LE-NEXT: ld [%o0+%lo(.LCPI9_0)], %f0
+;
+; CHECK-VIS-LABEL: pos_half_float:
+; CHECK-VIS: ! %bb.0:
+; CHECK-VIS-NEXT: sethi %h44(.LCPI9_0), %o0
+; CHECK-VIS-NEXT: add %o0, %m44(.LCPI9_0), %o0
+; CHECK-VIS-NEXT: sllx %o0, 12, %o0
+; CHECK-VIS-NEXT: retl
+; CHECK-VIS-NEXT: ld [%o0+%l44(.LCPI9_0)], %f0
+;
+; CHECK-VIS3-LABEL: pos_half_float:
+; CHECK-VIS3: ! %bb.0:
+; CHECK-VIS3-NEXT: sethi 1032192, %o0
+; CHECK-VIS3-NEXT: retl
+; CHECK-VIS3-NEXT: movwtos %o0, %f0
+ ret float +0.5
+}
+
+define float @neg_half_float() nounwind {
+; CHECK-LABEL: neg_half_float:
+; CHECK: ! %bb.0:
+; CHECK-NEXT: sethi %hi(.LCPI10_0), %o0
+; CHECK-NEXT: retl
+; CHECK-NEXT: ld [%o0+%lo(.LCPI10_0)], %f0
+;
+; CHECK-LE-LABEL: neg_half_float:
+; CHECK-LE: ! %bb.0:
+; CHECK-LE-NEXT: sethi %hi(.LCPI10_0), %o0
+; CHECK-LE-NEXT: retl
+; CHECK-LE-NEXT: ld [%o0+%lo(.LCPI10_0)], %f0
+;
+; CHECK-VIS-LABEL: neg_half_float:
+; CHECK-VIS: ! %bb.0:
+; CHECK-VIS-NEXT: sethi %h44(.LCPI10_0), %o0
+; CHECK-VIS-NEXT: add %o0, %m44(.LCPI10_0), %o0
+; CHECK-VIS-NEXT: sllx %o0, 12, %o0
+; CHECK-VIS-NEXT: retl
+; CHECK-VIS-NEXT: ld [%o0+%l44(.LCPI10_0)], %f0
+;
+; CHECK-VIS3-LABEL: neg_half_float:
+; CHECK-VIS3: ! %bb.0:
+; CHECK-VIS3-NEXT: sethi 3129344, %o0
+; CHECK-VIS3-NEXT: retl
+; CHECK-VIS3-NEXT: movwtos %o0, %f0
+ ret float -0.5
+}
diff --git a/llvm/test/CodeGen/SPARC/float-vis3.ll b/llvm/test/CodeGen/SPARC/float-vis3.ll
index bc9904dfa356a..1bae634e0dff8 100644
--- a/llvm/test/CodeGen/SPARC/float-vis3.ll
+++ b/llvm/test/CodeGen/SPARC/float-vis3.ll
@@ -57,3 +57,69 @@ entry:
%fneg = fneg double %mul
ret double %fneg
}
+
+define float @fhadds(float %a, float %b) nounwind {
+; CHECK-LABEL: fhadds:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: retl
+; CHECK-NEXT: fhadds %f1, %f3, %f0
+entry:
+ %add = fadd float %a, %b
+ %div = fmul float %add, 5.000000e-01
+ ret float %div
+}
+
+define double @fhaddd(double %a, double %b) nounwind {
+; CHECK-LABEL: fhaddd:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: retl
+; CHECK-NEXT: fhaddd %f0, %f2, %f0
+entry:
+ %add = fadd double %a, %b
+ %div = fmul double %add, 5.000000e-01
+ ret double %div
+}
+
+define float @fhsubs(float %a, float %b) nounwind {
+; CHECK-LABEL: fhsubs:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: retl
+; CHECK-NEXT: fhsubs %f1, %f3, %f0
+entry:
+ %sub = fsub float %a, %b
+ %div = fmul float %sub, 5.000000e-01
+ ret float %div
+}
+
+define double @fhsubd(double %a, double %b) nounwind {
+; CHECK-LABEL: fhsubd:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: retl
+; CHECK-NEXT: fhsubd %f0, %f2, %f0
+entry:
+ %sub = fsub double %a, %b
+ %div = fmul double %sub, 5.000000e-01
+ ret double %div
+}
+
+define float @fnhadds(float %a, float %b) nounwind {
+; CHECK-LABEL: fnhadds:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: retl
+; CHECK-NEXT: fnhadds %f1, %f3, %f0
+entry:
+ %add.i = fadd float %a, %b
+ %fneg = fmul float %add.i, -5.000000e-01
+ ret float %fneg
+}
+
+define double @fnhaddd(double %a, double %b) nounwind {
+; CHECK-LABEL: fnhaddd:
+; CHECK: ! %bb.0: ! %entry
+; CHECK-NEXT: retl
+; CHECK-NEXT: fnhaddd %f0, %f2, %f0
+entry:
+ %add.i = fadd double %a, %b
+ %fneg = fmul double %add.i, -5.000000e-01
+ ret double %fneg
+}
|
Created using spr 1.3.5
Created using spr 1.3.5
Created using spr 1.3.5
if (VT != MVT::f32 && VT != MVT::f64) | ||
return false; | ||
if (Imm.isZero()) | ||
return Subtarget->isVIS(); | ||
if (Imm.isExactlyValue(+0.5) || Imm.isExactlyValue(-0.5)) | ||
return Subtarget->isVIS3(); | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you do this separately? I'd also expect to do the target check before the value check
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've reordered the check so the target check is done first, but what do you mean by separately?
Created using spr 1.3.5
Ping? |
if (Subtarget->isVIS()) | ||
CanLower = CanLower || Imm.isZero(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (Subtarget->isVIS()) | |
CanLower = CanLower || Imm.isZero(); | |
if (Subtarget->isVIS() mm.isZero()) | |
return true; |
Did this mean to include -0?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, +0 and -0.
@@ -3613,13 +3613,15 @@ bool SparcTargetLowering::isFNegFree(EVT VT) const { | ||
|
||
bool SparcTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, | ||
bool ForCodeSize) const { | ||
bool CanLower = false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bool CanLower = false; |
if (Subtarget->isVIS3()) | ||
CanLower = | ||
CanLower || (Imm.isExactlyValue(+0.5) || Imm.isExactlyValue(-0.5)); | ||
return CanLower; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (Subtarget->isVIS3()) | |
CanLower = | |
CanLower || (Imm.isExactlyValue(+0.5) || Imm.isExactlyValue(-0.5)); | |
return CanLower; | |
if (Subtarget->isVIS3()) | |
return Imm.isExactlyValue(+0.5) || Imm.isExactlyValue(-0.5); | |
return false; |
Could also use getExactLog2Abs == -1?
Created using spr 1.3.5
No description provided.