-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[X86][GlobalISel] Support fp80 for G_FPTRUNC and G_FPEXT #141611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-globalisel Author: Evgenii Kudriashov (e-kud) ChangesWe intentionally omit Full diff: https://github.com/llvm/llvm-project/pull/141611.diff 4 Files Affected:
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index f008cb1bea839..58215d4e00202 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -376,9 +376,15 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
Action.legalForTypesWithMemDesc({{s8, p0, s8, 1},
{s16, p0, s16, 1},
{s32, p0, s32, 1},
- {s80, p0, s80, 1},
{p0, p0, p0, 1},
{v4s8, p0, v4s8, 1}});
+
+ if (UseX87)
+ Action.legalForTypesWithMemDesc({{s80, p0, s32, 1},
+ {s80, p0, s64, 1},
+ {s32, p0, s80, 1},
+ {s64, p0, s80, 1},
+ {s80, p0, s80, 1}});
if (Is64Bit)
Action.legalForTypesWithMemDesc(
{{s64, p0, s64, 1}, {v2s32, p0, v2s32, 1}});
@@ -476,18 +482,17 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
.widenScalarToNextPow2(1);
// fp conversions
- getActionDefinitionsBuilder(G_FPEXT).legalIf([=](const LegalityQuery &Query) {
- return (HasSSE2 && typePairInSet(0, 1, {{s64, s32}})(Query)) ||
- (HasAVX && typePairInSet(0, 1, {{v4s64, v4s32}})(Query)) ||
- (HasAVX512 && typePairInSet(0, 1, {{v8s64, v8s32}})(Query));
- });
-
- getActionDefinitionsBuilder(G_FPTRUNC).legalIf(
- [=](const LegalityQuery &Query) {
- return (HasSSE2 && typePairInSet(0, 1, {{s32, s64}})(Query)) ||
- (HasAVX && typePairInSet(0, 1, {{v4s32, v4s64}})(Query)) ||
- (HasAVX512 && typePairInSet(0, 1, {{v8s32, v8s64}})(Query));
- });
+ getActionDefinitionsBuilder(G_FPEXT)
+ .legalFor(HasSSE2, {{s64, s32}})
+ .legalFor(HasAVX, {{v4s64, v4s32}})
+ .legalFor(HasAVX512, {{v8s64, v8s32}})
+ .customFor(UseX87, {{s64, s32}, {s80, s32}, {s80, s64}});
+
+ getActionDefinitionsBuilder(G_FPTRUNC)
+ .legalFor(HasSSE2, {{s32, s64}})
+ .legalFor(HasAVX, {{v4s32, v4s64}})
+ .legalFor(HasAVX512, {{v8s32, v8s64}})
+ .customFor(UseX87, {{s32, s64}, {s32, s80}, {s64, s80}});
getActionDefinitionsBuilder(G_SITOFP)
.legalIf([=](const LegalityQuery &Query) {
@@ -671,6 +676,9 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
return legalizeUITOFP(MI, MRI, Helper);
case TargetOpcode::G_STORE:
return legalizeNarrowingStore(MI, MRI, Helper);
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
+ return legalizeFPExtAndTrunc(MI, MRI, Helper);
}
llvm_unreachable("expected switch to return");
}
@@ -781,6 +789,33 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
return true;
}
+bool X86LegalizerInfo::legalizeFPExtAndTrunc(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
+ MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
+ "Only G_FPEXT and G_FPTRUNC are expected");
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ MachinePointerInfo PtrInfo;
+ LLT StackTy = MI.getOpcode() == TargetOpcode::G_FPEXT ? SrcTy : DstTy;
+ Align StackTyAlign = Helper.getStackTemporaryAlignment(StackTy);
+ auto StackTemp = Helper.createStackTemporary(StackTy.getSizeInBytes(),
+ StackTyAlign, PtrInfo);
+
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineFunction &MF = MIRBuilder.getMF();
+ auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ StackTy, StackTyAlign);
+ MIRBuilder.buildStore(SrcReg, StackTemp, *StoreMMO);
+
+ auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ StackTy, StackTyAlign);
+ MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
return true;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 54f776456397b..b224f3f46a2d5 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -48,6 +48,9 @@ class X86LegalizerInfo : public LegalizerInfo {
bool legalizeNarrowingStore(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
+
+ bool legalizeFPExtAndTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const;
};
} // namespace llvm
#endif
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll
deleted file mode 100644
index 8501009e2915a..0000000000000
--- a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK
-
-define double @test(float %a) {
-; CHECK-LABEL: test:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
-; CHECK-NEXT: retq
-entry:
- %conv = fpext float %a to double
- ret double %conv
-}
diff --git a/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll
new file mode 100644
index 0000000000000..0ad9b90806ce9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll
@@ -0,0 +1,279 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,SDAG-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,SDAG-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX
+; COMM: FastISel has troubles with fp80 type
+; RUN: llc < %s -mtriple=i686-unknown-unknown -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,FAST-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,FAST-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX
+; COMM: GlobalISel can't legalize double stores on 32bit platform due to lack of double/integer distinguish during legalization
+; RUN: llc < %s -mtriple=i686-unknown-unknown -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes X86,GLOBAL-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes SSE,GLOBAL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX
+
+define double @fpext_float_to_double(float %f) {
+; X86-LABEL: fpext_float_to_double:
+; X86: # %bb.0:
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: retl
+;
+; SSE-LABEL: fpext_float_to_double:
+; SSE: # %bb.0:
+; SSE-NEXT: cvtss2sd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fpext_float_to_double:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = fpext float %f to double
+ ret double %1
+}
+
+define x86_fp80 @fpext_float_to_x86_fp80(float %f) {
+; FASTSDAG-X86-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-X86: # %bb.0:
+; FASTSDAG-X86-NEXT: flds {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT: retl
+;
+; FASTSDAG-SSE-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-SSE: # %bb.0:
+; FASTSDAG-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: flds -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: retq
+;
+; FASTSDAG-AVX-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-AVX: # %bb.0:
+; FASTSDAG-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: flds -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: retq
+;
+; GLOBAL-X86-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-X86: # %bb.0:
+; GLOBAL-X86-NEXT: pushl %eax
+; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GLOBAL-X86-NEXT: movl %eax, (%esp)
+; GLOBAL-X86-NEXT: flds (%esp)
+; GLOBAL-X86-NEXT: popl %eax
+; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 4
+; GLOBAL-X86-NEXT: retl
+;
+; GLOBAL-SSE-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-SSE: # %bb.0:
+; GLOBAL-SSE-NEXT: movd %xmm0, %eax
+; GLOBAL-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: flds -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: retq
+;
+; GLOBAL-AVX-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-AVX: # %bb.0:
+; GLOBAL-AVX-NEXT: vmovd %xmm0, %eax
+; GLOBAL-AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: flds -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: retq
+ %1 = fpext float %f to x86_fp80
+ ret x86_fp80 %1
+}
+
+define x86_fp80 @fpext_double_to_x86_fp80(double %d) {
+; FASTSDAG-X86-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-X86: # %bb.0:
+; FASTSDAG-X86-NEXT: fldl {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT: retl
+;
+; FASTSDAG-SSE-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-SSE: # %bb.0:
+; FASTSDAG-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: fldl -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: retq
+;
+; FASTSDAG-AVX-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-AVX: # %bb.0:
+; FASTSDAG-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: fldl -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: retq
+;
+; GLOBAL-X86-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-X86: # %bb.0:
+; GLOBAL-X86-NEXT: pushl %ebp
+; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT: .cfi_offset %ebp, -8
+; GLOBAL-X86-NEXT: movl %esp, %ebp
+; GLOBAL-X86-NEXT: .cfi_def_cfa_register %ebp
+; GLOBAL-X86-NEXT: andl $-8, %esp
+; GLOBAL-X86-NEXT: subl $8, %esp
+; GLOBAL-X86-NEXT: leal 8(%ebp), %eax
+; GLOBAL-X86-NEXT: movl 8(%ebp), %ecx
+; GLOBAL-X86-NEXT: movl 4(%eax), %eax
+; GLOBAL-X86-NEXT: movl %esp, %edx
+; GLOBAL-X86-NEXT: movl %ecx, (%esp)
+; GLOBAL-X86-NEXT: movl %eax, 4(%edx)
+; GLOBAL-X86-NEXT: fldl (%esp)
+; GLOBAL-X86-NEXT: movl %ebp, %esp
+; GLOBAL-X86-NEXT: popl %ebp
+; GLOBAL-X86-NEXT: .cfi_def_cfa %esp, 4
+; GLOBAL-X86-NEXT: retl
+;
+; GLOBAL-SSE-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-SSE: # %bb.0:
+; GLOBAL-SSE-NEXT: movq %xmm0, %rax
+; GLOBAL-SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: fldl -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: retq
+;
+; GLOBAL-AVX-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-AVX: # %bb.0:
+; GLOBAL-AVX-NEXT: vmovq %xmm0, %rax
+; GLOBAL-AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: fldl -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: retq
+ %1 = fpext double %d to x86_fp80
+ ret x86_fp80 %1
+}
+
+define float @fptrunc_double_to_float(double %d) {
+; FASTSDAG-X86-LABEL: fptrunc_double_to_float:
+; FASTSDAG-X86: # %bb.0:
+; FASTSDAG-X86-NEXT: pushl %eax
+; FASTSDAG-X86-NEXT: .cfi_def_cfa_offset 8
+; FASTSDAG-X86-NEXT: fldl {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT: fstps (%esp)
+; FASTSDAG-X86-NEXT: flds (%esp)
+; FASTSDAG-X86-NEXT: popl %eax
+; FASTSDAG-X86-NEXT: .cfi_def_cfa_offset 4
+; FASTSDAG-X86-NEXT: retl
+;
+; SSE-LABEL: fptrunc_double_to_float:
+; SSE: # %bb.0:
+; SSE-NEXT: cvtsd2ss %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptrunc_double_to_float:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; GLOBAL-X86-LABEL: fptrunc_double_to_float:
+; GLOBAL-X86: # %bb.0:
+; GLOBAL-X86-NEXT: pushl %eax
+; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; GLOBAL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GLOBAL-X86-NEXT: movl 4(%eax), %eax
+; GLOBAL-X86-NEXT: movl %esp, %edx
+; GLOBAL-X86-NEXT: movl %ecx, (%esp)
+; GLOBAL-X86-NEXT: movl %eax, 4(%edx)
+; GLOBAL-X86-NEXT: flds (%esp)
+; GLOBAL-X86-NEXT: popl %eax
+; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 4
+; GLOBAL-X86-NEXT: retl
+ %1 = fptrunc double %d to float
+ ret float %1
+}
+
+define float @fptrunc_x86_fp80_to_float(x86_fp80 %x) {
+; X86-LABEL: fptrunc_x86_fp80_to_float:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: flds (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_float:
+; FASTSDAG-SSE: # %bb.0:
+; FASTSDAG-SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: fstps -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FASTSDAG-SSE-NEXT: retq
+;
+; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_float:
+; FASTSDAG-AVX: # %bb.0:
+; FASTSDAG-AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: fstps -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FASTSDAG-AVX-NEXT: retq
+;
+; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_float:
+; GLOBAL-SSE: # %bb.0:
+; GLOBAL-SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: fstps -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; GLOBAL-SSE-NEXT: movd %eax, %xmm0
+; GLOBAL-SSE-NEXT: retq
+;
+; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_float:
+; GLOBAL-AVX: # %bb.0:
+; GLOBAL-AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: fstps -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; GLOBAL-AVX-NEXT: vmovd %eax, %xmm0
+; GLOBAL-AVX-NEXT: retq
+ %1 = fptrunc x86_fp80 %x to float
+ ret float %1
+}
+
+define double @fptrunc_x86_fp80_to_double(x86_fp80 %x) {
+; X86-LABEL: fptrunc_x86_fp80_to_double:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: fldt 8(%ebp)
+; X86-NEXT: fstpl (%esp)
+; X86-NEXT: fldl (%esp)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+;
+; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_double:
+; FASTSDAG-SSE: # %bb.0:
+; FASTSDAG-SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: fstpl -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; FASTSDAG-SSE-NEXT: retq
+;
+; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_double:
+; FASTSDAG-AVX: # %bb.0:
+; FASTSDAG-AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: fstpl -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; FASTSDAG-AVX-NEXT: retq
+;
+; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_double:
+; GLOBAL-SSE: # %bb.0:
+; GLOBAL-SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: fstpl -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; GLOBAL-SSE-NEXT: movq %rax, %xmm0
+; GLOBAL-SSE-NEXT: retq
+;
+; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_double:
+; GLOBAL-AVX: # %bb.0:
+; GLOBAL-AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: fstpl -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; GLOBAL-AVX-NEXT: vmovq %rax, %xmm0
+; GLOBAL-AVX-NEXT: retq
+ %1 = fptrunc x86_fp80 %x to double
+ ret double %1
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FAST-AVX: {{.*}}
+; FAST-SSE: {{.*}}
+; FAST-X86: {{.*}}
+; SDAG-AVX: {{.*}}
+; SDAG-SSE: {{.*}}
+; SDAG-X86: {{.*}}
|
@llvm/pr-subscribers-backend-x86 Author: Evgenii Kudriashov (e-kud) ChangesWe intentionally omit Full diff: https://github.com/llvm/llvm-project/pull/141611.diff 4 Files Affected:
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index f008cb1bea839..58215d4e00202 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -376,9 +376,15 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
Action.legalForTypesWithMemDesc({{s8, p0, s8, 1},
{s16, p0, s16, 1},
{s32, p0, s32, 1},
- {s80, p0, s80, 1},
{p0, p0, p0, 1},
{v4s8, p0, v4s8, 1}});
+
+ if (UseX87)
+ Action.legalForTypesWithMemDesc({{s80, p0, s32, 1},
+ {s80, p0, s64, 1},
+ {s32, p0, s80, 1},
+ {s64, p0, s80, 1},
+ {s80, p0, s80, 1}});
if (Is64Bit)
Action.legalForTypesWithMemDesc(
{{s64, p0, s64, 1}, {v2s32, p0, v2s32, 1}});
@@ -476,18 +482,17 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
.widenScalarToNextPow2(1);
// fp conversions
- getActionDefinitionsBuilder(G_FPEXT).legalIf([=](const LegalityQuery &Query) {
- return (HasSSE2 && typePairInSet(0, 1, {{s64, s32}})(Query)) ||
- (HasAVX && typePairInSet(0, 1, {{v4s64, v4s32}})(Query)) ||
- (HasAVX512 && typePairInSet(0, 1, {{v8s64, v8s32}})(Query));
- });
-
- getActionDefinitionsBuilder(G_FPTRUNC).legalIf(
- [=](const LegalityQuery &Query) {
- return (HasSSE2 && typePairInSet(0, 1, {{s32, s64}})(Query)) ||
- (HasAVX && typePairInSet(0, 1, {{v4s32, v4s64}})(Query)) ||
- (HasAVX512 && typePairInSet(0, 1, {{v8s32, v8s64}})(Query));
- });
+ getActionDefinitionsBuilder(G_FPEXT)
+ .legalFor(HasSSE2, {{s64, s32}})
+ .legalFor(HasAVX, {{v4s64, v4s32}})
+ .legalFor(HasAVX512, {{v8s64, v8s32}})
+ .customFor(UseX87, {{s64, s32}, {s80, s32}, {s80, s64}});
+
+ getActionDefinitionsBuilder(G_FPTRUNC)
+ .legalFor(HasSSE2, {{s32, s64}})
+ .legalFor(HasAVX, {{v4s32, v4s64}})
+ .legalFor(HasAVX512, {{v8s32, v8s64}})
+ .customFor(UseX87, {{s32, s64}, {s32, s80}, {s64, s80}});
getActionDefinitionsBuilder(G_SITOFP)
.legalIf([=](const LegalityQuery &Query) {
@@ -671,6 +676,9 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
return legalizeUITOFP(MI, MRI, Helper);
case TargetOpcode::G_STORE:
return legalizeNarrowingStore(MI, MRI, Helper);
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
+ return legalizeFPExtAndTrunc(MI, MRI, Helper);
}
llvm_unreachable("expected switch to return");
}
@@ -781,6 +789,33 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
return true;
}
+bool X86LegalizerInfo::legalizeFPExtAndTrunc(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
+ MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
+ "Only G_FPEXT and G_FPTRUNC are expected");
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ MachinePointerInfo PtrInfo;
+ LLT StackTy = MI.getOpcode() == TargetOpcode::G_FPEXT ? SrcTy : DstTy;
+ Align StackTyAlign = Helper.getStackTemporaryAlignment(StackTy);
+ auto StackTemp = Helper.createStackTemporary(StackTy.getSizeInBytes(),
+ StackTyAlign, PtrInfo);
+
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineFunction &MF = MIRBuilder.getMF();
+ auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ StackTy, StackTyAlign);
+ MIRBuilder.buildStore(SrcReg, StackTemp, *StoreMMO);
+
+ auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ StackTy, StackTyAlign);
+ MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
return true;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 54f776456397b..b224f3f46a2d5 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -48,6 +48,9 @@ class X86LegalizerInfo : public LegalizerInfo {
bool legalizeNarrowingStore(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
+
+ bool legalizeFPExtAndTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const;
};
} // namespace llvm
#endif
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll
deleted file mode 100644
index 8501009e2915a..0000000000000
--- a/llvm/test/CodeGen/X86/GlobalISel/fpext-scalar.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=CHECK
-
-define double @test(float %a) {
-; CHECK-LABEL: test:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: cvtss2sd %xmm0, %xmm0
-; CHECK-NEXT: retq
-entry:
- %conv = fpext float %a to double
- ret double %conv
-}
diff --git a/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll
new file mode 100644
index 0000000000000..0ad9b90806ce9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fptrunc-fpext.ll
@@ -0,0 +1,279 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,SDAG-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,SDAG-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,SDAG-AVX
+; COMM: FastISel has troubles with fp80 type
+; RUN: llc < %s -mtriple=i686-unknown-unknown -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes X86,FASTSDAG-X86,FAST-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes SSE,FASTSDAG-SSE,FAST-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=1 -global-isel=0 -fast-isel-abort=0 | FileCheck %s --check-prefixes AVX,FASTSDAG-AVX,FAST-AVX
+; COMM: GlobalISel can't legalize double stores on 32bit platform due to lack of double/integer distinguish during legalization
+; RUN: llc < %s -mtriple=i686-unknown-unknown -fast-isel=0 -global-isel=1 -global-isel-abort=2 | FileCheck %s --check-prefixes X86,GLOBAL-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes SSE,GLOBAL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -fast-isel=0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefixes AVX,GLOBAL-AVX
+
+define double @fpext_float_to_double(float %f) {
+; X86-LABEL: fpext_float_to_double:
+; X86: # %bb.0:
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: retl
+;
+; SSE-LABEL: fpext_float_to_double:
+; SSE: # %bb.0:
+; SSE-NEXT: cvtss2sd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fpext_float_to_double:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = fpext float %f to double
+ ret double %1
+}
+
+define x86_fp80 @fpext_float_to_x86_fp80(float %f) {
+; FASTSDAG-X86-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-X86: # %bb.0:
+; FASTSDAG-X86-NEXT: flds {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT: retl
+;
+; FASTSDAG-SSE-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-SSE: # %bb.0:
+; FASTSDAG-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: flds -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: retq
+;
+; FASTSDAG-AVX-LABEL: fpext_float_to_x86_fp80:
+; FASTSDAG-AVX: # %bb.0:
+; FASTSDAG-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: flds -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: retq
+;
+; GLOBAL-X86-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-X86: # %bb.0:
+; GLOBAL-X86-NEXT: pushl %eax
+; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GLOBAL-X86-NEXT: movl %eax, (%esp)
+; GLOBAL-X86-NEXT: flds (%esp)
+; GLOBAL-X86-NEXT: popl %eax
+; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 4
+; GLOBAL-X86-NEXT: retl
+;
+; GLOBAL-SSE-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-SSE: # %bb.0:
+; GLOBAL-SSE-NEXT: movd %xmm0, %eax
+; GLOBAL-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: flds -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: retq
+;
+; GLOBAL-AVX-LABEL: fpext_float_to_x86_fp80:
+; GLOBAL-AVX: # %bb.0:
+; GLOBAL-AVX-NEXT: vmovd %xmm0, %eax
+; GLOBAL-AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: flds -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: retq
+ %1 = fpext float %f to x86_fp80
+ ret x86_fp80 %1
+}
+
+define x86_fp80 @fpext_double_to_x86_fp80(double %d) {
+; FASTSDAG-X86-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-X86: # %bb.0:
+; FASTSDAG-X86-NEXT: fldl {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT: retl
+;
+; FASTSDAG-SSE-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-SSE: # %bb.0:
+; FASTSDAG-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: fldl -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: retq
+;
+; FASTSDAG-AVX-LABEL: fpext_double_to_x86_fp80:
+; FASTSDAG-AVX: # %bb.0:
+; FASTSDAG-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: fldl -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: retq
+;
+; GLOBAL-X86-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-X86: # %bb.0:
+; GLOBAL-X86-NEXT: pushl %ebp
+; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT: .cfi_offset %ebp, -8
+; GLOBAL-X86-NEXT: movl %esp, %ebp
+; GLOBAL-X86-NEXT: .cfi_def_cfa_register %ebp
+; GLOBAL-X86-NEXT: andl $-8, %esp
+; GLOBAL-X86-NEXT: subl $8, %esp
+; GLOBAL-X86-NEXT: leal 8(%ebp), %eax
+; GLOBAL-X86-NEXT: movl 8(%ebp), %ecx
+; GLOBAL-X86-NEXT: movl 4(%eax), %eax
+; GLOBAL-X86-NEXT: movl %esp, %edx
+; GLOBAL-X86-NEXT: movl %ecx, (%esp)
+; GLOBAL-X86-NEXT: movl %eax, 4(%edx)
+; GLOBAL-X86-NEXT: fldl (%esp)
+; GLOBAL-X86-NEXT: movl %ebp, %esp
+; GLOBAL-X86-NEXT: popl %ebp
+; GLOBAL-X86-NEXT: .cfi_def_cfa %esp, 4
+; GLOBAL-X86-NEXT: retl
+;
+; GLOBAL-SSE-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-SSE: # %bb.0:
+; GLOBAL-SSE-NEXT: movq %xmm0, %rax
+; GLOBAL-SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: fldl -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: retq
+;
+; GLOBAL-AVX-LABEL: fpext_double_to_x86_fp80:
+; GLOBAL-AVX: # %bb.0:
+; GLOBAL-AVX-NEXT: vmovq %xmm0, %rax
+; GLOBAL-AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: fldl -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: retq
+ %1 = fpext double %d to x86_fp80
+ ret x86_fp80 %1
+}
+
+define float @fptrunc_double_to_float(double %d) {
+; FASTSDAG-X86-LABEL: fptrunc_double_to_float:
+; FASTSDAG-X86: # %bb.0:
+; FASTSDAG-X86-NEXT: pushl %eax
+; FASTSDAG-X86-NEXT: .cfi_def_cfa_offset 8
+; FASTSDAG-X86-NEXT: fldl {{[0-9]+}}(%esp)
+; FASTSDAG-X86-NEXT: fstps (%esp)
+; FASTSDAG-X86-NEXT: flds (%esp)
+; FASTSDAG-X86-NEXT: popl %eax
+; FASTSDAG-X86-NEXT: .cfi_def_cfa_offset 4
+; FASTSDAG-X86-NEXT: retl
+;
+; SSE-LABEL: fptrunc_double_to_float:
+; SSE: # %bb.0:
+; SSE-NEXT: cvtsd2ss %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptrunc_double_to_float:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; GLOBAL-X86-LABEL: fptrunc_double_to_float:
+; GLOBAL-X86: # %bb.0:
+; GLOBAL-X86-NEXT: pushl %eax
+; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 8
+; GLOBAL-X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; GLOBAL-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GLOBAL-X86-NEXT: movl 4(%eax), %eax
+; GLOBAL-X86-NEXT: movl %esp, %edx
+; GLOBAL-X86-NEXT: movl %ecx, (%esp)
+; GLOBAL-X86-NEXT: movl %eax, 4(%edx)
+; GLOBAL-X86-NEXT: flds (%esp)
+; GLOBAL-X86-NEXT: popl %eax
+; GLOBAL-X86-NEXT: .cfi_def_cfa_offset 4
+; GLOBAL-X86-NEXT: retl
+ %1 = fptrunc double %d to float
+ ret float %1
+}
+
+define float @fptrunc_x86_fp80_to_float(x86_fp80 %x) {
+; X86-LABEL: fptrunc_x86_fp80_to_float:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: flds (%esp)
+; X86-NEXT: popl %eax
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+;
+; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_float:
+; FASTSDAG-SSE: # %bb.0:
+; FASTSDAG-SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: fstps -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FASTSDAG-SSE-NEXT: retq
+;
+; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_float:
+; FASTSDAG-AVX: # %bb.0:
+; FASTSDAG-AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: fstps -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; FASTSDAG-AVX-NEXT: retq
+;
+; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_float:
+; GLOBAL-SSE: # %bb.0:
+; GLOBAL-SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: fstps -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; GLOBAL-SSE-NEXT: movd %eax, %xmm0
+; GLOBAL-SSE-NEXT: retq
+;
+; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_float:
+; GLOBAL-AVX: # %bb.0:
+; GLOBAL-AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: fstps -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax
+; GLOBAL-AVX-NEXT: vmovd %eax, %xmm0
+; GLOBAL-AVX-NEXT: retq
+ %1 = fptrunc x86_fp80 %x to float
+ ret float %1
+}
+
+define double @fptrunc_x86_fp80_to_double(x86_fp80 %x) {
+; X86-LABEL: fptrunc_x86_fp80_to_double:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: .cfi_def_cfa_register %ebp
+; X86-NEXT: andl $-8, %esp
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: fldt 8(%ebp)
+; X86-NEXT: fstpl (%esp)
+; X86-NEXT: fldl (%esp)
+; X86-NEXT: movl %ebp, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa %esp, 4
+; X86-NEXT: retl
+;
+; FASTSDAG-SSE-LABEL: fptrunc_x86_fp80_to_double:
+; FASTSDAG-SSE: # %bb.0:
+; FASTSDAG-SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: fstpl -{{[0-9]+}}(%rsp)
+; FASTSDAG-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; FASTSDAG-SSE-NEXT: retq
+;
+; FASTSDAG-AVX-LABEL: fptrunc_x86_fp80_to_double:
+; FASTSDAG-AVX: # %bb.0:
+; FASTSDAG-AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: fstpl -{{[0-9]+}}(%rsp)
+; FASTSDAG-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; FASTSDAG-AVX-NEXT: retq
+;
+; GLOBAL-SSE-LABEL: fptrunc_x86_fp80_to_double:
+; GLOBAL-SSE: # %bb.0:
+; GLOBAL-SSE-NEXT: fldt {{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: fstpl -{{[0-9]+}}(%rsp)
+; GLOBAL-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; GLOBAL-SSE-NEXT: movq %rax, %xmm0
+; GLOBAL-SSE-NEXT: retq
+;
+; GLOBAL-AVX-LABEL: fptrunc_x86_fp80_to_double:
+; GLOBAL-AVX: # %bb.0:
+; GLOBAL-AVX-NEXT: fldt {{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: fstpl -{{[0-9]+}}(%rsp)
+; GLOBAL-AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; GLOBAL-AVX-NEXT: vmovq %rax, %xmm0
+; GLOBAL-AVX-NEXT: retq
+ %1 = fptrunc x86_fp80 %x to double
+ ret double %1
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FAST-AVX: {{.*}}
+; FAST-SSE: {{.*}}
+; FAST-X86: {{.*}}
+; SDAG-AVX: {{.*}}
+; SDAG-SSE: {{.*}}
+; SDAG-X86: {{.*}}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We intentionally omit s32->s64 and s64->s32 variants of stores and loads for X87 because during legalization we can't determine whether it is a floating point store or an integer one.
But you shouldn't need this? The FP-ness should be encoded in the load/store directly. i.e. are we missing FP ext load and store?
|
||
auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, | ||
StackTy, StackTyAlign); | ||
MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this be a generic lower action that creates an FP extending load / FP truncating store?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it looks quite generic. Let me move it to generic lower actions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm we have a conflicting FP64->FP16 lowering
llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Lines 8010 to 8019 in 59b7b5b
LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) { | |
auto [DstTy, SrcTy] = MI.getFirst2LLTs(); | |
const LLT S64 = LLT::scalar(64); | |
const LLT S16 = LLT::scalar(16); | |
if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64) | |
return lowerFPTRUNC_F64_TO_F16(MI); | |
return UnableToLegalize; | |
} |
It doesn't hurt X86 but is it ok for the generic lowering to have FP64->FP16 conversion through scalars but for others using memory?
Yes, we don't have them:
IIUC it should be resolved once FP types are added https://discourse.llvm.org/t/rfc-globalisel-adding-fp-type-information-to-llt/83349. After this LLT types will show whether store or load is a float or integer operation. And MMO will show whether it is extending or truncating. |
We intentionally omit
s32->s64
ands64->s32
variants of stores and loads for X87 because during legalization we can't determine whether it is a floating point store or an integer one.