From 44f07e02f379ffa7303d52ff03a8ccaadc28873c Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 01:00:46 +0300 Subject: [PATCH 01/14] AMDGPU --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 1 + llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 160 +------------ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 223 ------------------ .../Target/AMDGPU/AMDGPUSelectionDAGInfo.cpp | 33 ++- .../Target/AMDGPU/AMDGPUSelectionDAGInfo.h | 37 ++- llvm/lib/Target/AMDGPU/CMakeLists.txt | 2 + llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 1 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 1 + .../llvm.amdgcn.ds.ordered.add.gfx11.ll | 2 +- ...mdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll | 16 +- .../CodeGen/AMDGPU/sdag-print-divergence.ll | 4 +- 11 files changed, 81 insertions(+), 399 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index f3b9364fdb92b..a538fa70b9b1c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H +#include "AMDGPUSelectionDAGInfo.h" #include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "SIModeRegisterDefaults.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 7ed055e8da2b6..01897eb74c966 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -17,6 +17,7 @@ #include "AMDGPUInstrInfo.h" #include "AMDGPUMachineFunction.h" #include "AMDGPUMemoryUtils.h" +#include "AMDGPUSelectionDAGInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h" @@ -5517,165 +5518,6 @@ uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( return getImplicitParameterOffset(MFI->getExplicitKernArgSize(), Param); } -#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; - -const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((AMDGPUISD::NodeType)Opcode) { - case AMDGPUISD::FIRST_NUMBER: break; - // AMDIL DAG nodes - NODE_NAME_CASE(BRANCH_COND); - - // AMDGPU DAG nodes - NODE_NAME_CASE(IF) - NODE_NAME_CASE(ELSE) - NODE_NAME_CASE(LOOP) - NODE_NAME_CASE(CALL) - NODE_NAME_CASE(TC_RETURN) - NODE_NAME_CASE(TC_RETURN_GFX) - NODE_NAME_CASE(TC_RETURN_CHAIN) - NODE_NAME_CASE(TC_RETURN_CHAIN_DVGPR) - NODE_NAME_CASE(TRAP) - NODE_NAME_CASE(RET_GLUE) - NODE_NAME_CASE(WAVE_ADDRESS) - NODE_NAME_CASE(RETURN_TO_EPILOG) - NODE_NAME_CASE(ENDPGM) - NODE_NAME_CASE(ENDPGM_TRAP) - NODE_NAME_CASE(SIMULATED_TRAP) - NODE_NAME_CASE(DWORDADDR) - NODE_NAME_CASE(FRACT) - NODE_NAME_CASE(SETCC) - NODE_NAME_CASE(DENORM_MODE) - NODE_NAME_CASE(FMA_W_CHAIN) - NODE_NAME_CASE(FMUL_W_CHAIN) - NODE_NAME_CASE(CLAMP) - NODE_NAME_CASE(COS_HW) - NODE_NAME_CASE(SIN_HW) - NODE_NAME_CASE(FMAX_LEGACY) - NODE_NAME_CASE(FMIN_LEGACY) - NODE_NAME_CASE(FMAX3) - NODE_NAME_CASE(SMAX3) - NODE_NAME_CASE(UMAX3) - NODE_NAME_CASE(FMIN3) - NODE_NAME_CASE(SMIN3) - NODE_NAME_CASE(UMIN3) - NODE_NAME_CASE(FMED3) - NODE_NAME_CASE(SMED3) - NODE_NAME_CASE(UMED3) - NODE_NAME_CASE(FMAXIMUM3) - NODE_NAME_CASE(FMINIMUM3) - NODE_NAME_CASE(FDOT2) - NODE_NAME_CASE(URECIP) - NODE_NAME_CASE(DIV_SCALE) - NODE_NAME_CASE(DIV_FMAS) - NODE_NAME_CASE(DIV_FIXUP) - NODE_NAME_CASE(FMAD_FTZ) - NODE_NAME_CASE(RCP) - NODE_NAME_CASE(RSQ) - NODE_NAME_CASE(RCP_LEGACY) - NODE_NAME_CASE(RCP_IFLAG) - NODE_NAME_CASE(LOG) - NODE_NAME_CASE(EXP) - NODE_NAME_CASE(FMUL_LEGACY) - NODE_NAME_CASE(RSQ_CLAMP) - NODE_NAME_CASE(FP_CLASS) - NODE_NAME_CASE(DOT4) - NODE_NAME_CASE(CARRY) - NODE_NAME_CASE(BORROW) - NODE_NAME_CASE(BFE_U32) - NODE_NAME_CASE(BFE_I32) - NODE_NAME_CASE(BFI) - NODE_NAME_CASE(BFM) - NODE_NAME_CASE(FFBH_U32) - NODE_NAME_CASE(FFBH_I32) - NODE_NAME_CASE(FFBL_B32) - NODE_NAME_CASE(MUL_U24) - NODE_NAME_CASE(MUL_I24) - NODE_NAME_CASE(MULHI_U24) - NODE_NAME_CASE(MULHI_I24) - NODE_NAME_CASE(MAD_U24) - NODE_NAME_CASE(MAD_I24) - NODE_NAME_CASE(MAD_I64_I32) - NODE_NAME_CASE(MAD_U64_U32) - NODE_NAME_CASE(PERM) - NODE_NAME_CASE(TEXTURE_FETCH) - NODE_NAME_CASE(R600_EXPORT) - NODE_NAME_CASE(CONST_ADDRESS) - NODE_NAME_CASE(REGISTER_LOAD) - NODE_NAME_CASE(REGISTER_STORE) - NODE_NAME_CASE(CVT_F32_UBYTE0) - NODE_NAME_CASE(CVT_F32_UBYTE1) - NODE_NAME_CASE(CVT_F32_UBYTE2) - NODE_NAME_CASE(CVT_F32_UBYTE3) - NODE_NAME_CASE(CVT_PKRTZ_F16_F32) - NODE_NAME_CASE(CVT_PKNORM_I16_F32) - NODE_NAME_CASE(CVT_PKNORM_U16_F32) - NODE_NAME_CASE(CVT_PK_I16_I32) - NODE_NAME_CASE(CVT_PK_U16_U32) - NODE_NAME_CASE(FP_TO_FP16) - NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) - NODE_NAME_CASE(CONST_DATA_PTR) - NODE_NAME_CASE(PC_ADD_REL_OFFSET) - NODE_NAME_CASE(LDS) - NODE_NAME_CASE(DUMMY_CHAIN) - NODE_NAME_CASE(LOAD_D16_HI) - NODE_NAME_CASE(LOAD_D16_LO) - NODE_NAME_CASE(LOAD_D16_HI_I8) - NODE_NAME_CASE(LOAD_D16_HI_U8) - NODE_NAME_CASE(LOAD_D16_LO_I8) - NODE_NAME_CASE(LOAD_D16_LO_U8) - NODE_NAME_CASE(STORE_MSKOR) - NODE_NAME_CASE(TBUFFER_STORE_FORMAT) - NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16) - NODE_NAME_CASE(TBUFFER_LOAD_FORMAT) - NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16) - NODE_NAME_CASE(DS_ORDERED_COUNT) - NODE_NAME_CASE(ATOMIC_CMP_SWAP) - NODE_NAME_CASE(BUFFER_LOAD) - NODE_NAME_CASE(BUFFER_LOAD_UBYTE) - NODE_NAME_CASE(BUFFER_LOAD_USHORT) - NODE_NAME_CASE(BUFFER_LOAD_BYTE) - NODE_NAME_CASE(BUFFER_LOAD_SHORT) - NODE_NAME_CASE(BUFFER_LOAD_TFE) - NODE_NAME_CASE(BUFFER_LOAD_UBYTE_TFE) - NODE_NAME_CASE(BUFFER_LOAD_USHORT_TFE) - NODE_NAME_CASE(BUFFER_LOAD_BYTE_TFE) - NODE_NAME_CASE(BUFFER_LOAD_SHORT_TFE) - NODE_NAME_CASE(BUFFER_LOAD_FORMAT) - NODE_NAME_CASE(BUFFER_LOAD_FORMAT_TFE) - NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16) - NODE_NAME_CASE(SBUFFER_LOAD) - NODE_NAME_CASE(SBUFFER_LOAD_BYTE) - NODE_NAME_CASE(SBUFFER_LOAD_UBYTE) - NODE_NAME_CASE(SBUFFER_LOAD_SHORT) - NODE_NAME_CASE(SBUFFER_LOAD_USHORT) - NODE_NAME_CASE(SBUFFER_PREFETCH_DATA) - NODE_NAME_CASE(BUFFER_STORE) - NODE_NAME_CASE(BUFFER_STORE_BYTE) - NODE_NAME_CASE(BUFFER_STORE_SHORT) - NODE_NAME_CASE(BUFFER_STORE_FORMAT) - NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16) - NODE_NAME_CASE(BUFFER_ATOMIC_SWAP) - NODE_NAME_CASE(BUFFER_ATOMIC_ADD) - NODE_NAME_CASE(BUFFER_ATOMIC_SUB) - NODE_NAME_CASE(BUFFER_ATOMIC_SMIN) - NODE_NAME_CASE(BUFFER_ATOMIC_UMIN) - NODE_NAME_CASE(BUFFER_ATOMIC_SMAX) - NODE_NAME_CASE(BUFFER_ATOMIC_UMAX) - NODE_NAME_CASE(BUFFER_ATOMIC_AND) - NODE_NAME_CASE(BUFFER_ATOMIC_OR) - NODE_NAME_CASE(BUFFER_ATOMIC_XOR) - NODE_NAME_CASE(BUFFER_ATOMIC_INC) - NODE_NAME_CASE(BUFFER_ATOMIC_DEC) - NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) - NODE_NAME_CASE(BUFFER_ATOMIC_CSUB) - NODE_NAME_CASE(BUFFER_ATOMIC_FADD) - NODE_NAME_CASE(BUFFER_ATOMIC_FMIN) - NODE_NAME_CASE(BUFFER_ATOMIC_FMAX) - NODE_NAME_CASE(BUFFER_ATOMIC_COND_SUB_U32) - } - return nullptr; -} - SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 0dd2183b72b24..a20a515b6e137 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -280,8 +280,6 @@ class AMDGPUTargetLowering : public TargetLowering { SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const; - const char* getTargetNodeName(unsigned Opcode) const override; - // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection for // AMDGPU. Commit r319036, // (https://github.com/llvm/llvm-project/commit/db77e57ea86d941a4262ef60261692f4cb6893e6) @@ -390,227 +388,6 @@ class AMDGPUTargetLowering : public TargetLowering { } }; -namespace AMDGPUISD { - -enum NodeType : unsigned { - // AMDIL ISD Opcodes - FIRST_NUMBER = ISD::BUILTIN_OP_END, - BRANCH_COND, - // End AMDIL ISD Opcodes - - // Function call. - CALL, - TC_RETURN, - TC_RETURN_GFX, - TC_RETURN_CHAIN, - TC_RETURN_CHAIN_DVGPR, - TRAP, - - // Masked control flow nodes. - IF, - ELSE, - LOOP, - - // A uniform kernel return that terminates the wavefront. - ENDPGM, - - // s_endpgm, but we may want to insert it in the middle of the block. - ENDPGM_TRAP, - - // "s_trap 2" equivalent on hardware that does not support it. - SIMULATED_TRAP, - - // Return to a shader part's epilog code. - RETURN_TO_EPILOG, - - // Return with values from a non-entry function. - RET_GLUE, - - // Convert a unswizzled wave uniform stack address to an address compatible - // with a vector offset for use in stack access. - WAVE_ADDRESS, - - DWORDADDR, - FRACT, - - /// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output - /// modifier behavior with dx10_enable. - CLAMP, - - // This is SETCC with the full mask result which is used for a compare with a - // result bit per item in the wavefront. - SETCC, - - DENORM_MODE, - - // FP ops with input and output chain. - FMA_W_CHAIN, - FMUL_W_CHAIN, - - // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. - // Denormals handled on some parts. - COS_HW, - SIN_HW, - FMAX_LEGACY, - FMIN_LEGACY, - - FMAX3, - SMAX3, - UMAX3, - FMIN3, - SMIN3, - UMIN3, - FMED3, - SMED3, - UMED3, - FMAXIMUM3, - FMINIMUM3, - FDOT2, - URECIP, - DIV_SCALE, - DIV_FMAS, - DIV_FIXUP, - // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is - // treated as an illegal operation. - FMAD_FTZ, - - // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. - // For f64, max error 2^29 ULP, handles denormals. - RCP, - RSQ, - RCP_LEGACY, - RCP_IFLAG, - - // log2, no denormal handling for f32. - LOG, - - // exp2, no denormal handling for f32. - EXP, - - FMUL_LEGACY, - RSQ_CLAMP, - FP_CLASS, - DOT4, - CARRY, - BORROW, - BFE_U32, // Extract range of bits with zero extension to 32-bits. - BFE_I32, // Extract range of bits with sign extension to 32-bits. - BFI, // (src0 & src1) | (~src0 & src2) - BFM, // Insert a range of bits into a 32-bit word. - FFBH_U32, // ctlz with -1 if input is zero. - FFBH_I32, - FFBL_B32, // cttz with -1 if input is zero. - MUL_U24, - MUL_I24, - MULHI_U24, - MULHI_I24, - MAD_U24, - MAD_I24, - MAD_U64_U32, - MAD_I64_I32, - PERM, - TEXTURE_FETCH, - R600_EXPORT, - CONST_ADDRESS, - REGISTER_LOAD, - REGISTER_STORE, - - // These cvt_f32_ubyte* nodes need to remain consecutive and in order. - CVT_F32_UBYTE0, - CVT_F32_UBYTE1, - CVT_F32_UBYTE2, - CVT_F32_UBYTE3, - - // Convert two float 32 numbers into a single register holding two packed f16 - // with round to zero. - CVT_PKRTZ_F16_F32, - CVT_PKNORM_I16_F32, - CVT_PKNORM_U16_F32, - CVT_PK_I16_I32, - CVT_PK_U16_U32, - - // Same as the standard node, except the high bits of the resulting integer - // are known 0. - FP_TO_FP16, - - /// This node is for VLIW targets and it is used to represent a vector - /// that is stored in consecutive registers with the same channel. - /// For example: - /// |X |Y|Z|W| - /// T0|v.x| | | | - /// T1|v.y| | | | - /// T2|v.z| | | | - /// T3|v.w| | | | - BUILD_VERTICAL_VECTOR, - /// Pointer to the start of the shader's constant data. - CONST_DATA_PTR, - PC_ADD_REL_OFFSET, - LDS, - - DUMMY_CHAIN, - - FIRST_MEMORY_OPCODE, - LOAD_D16_HI = FIRST_MEMORY_OPCODE, - LOAD_D16_LO, - LOAD_D16_HI_I8, - LOAD_D16_HI_U8, - LOAD_D16_LO_I8, - LOAD_D16_LO_U8, - - STORE_MSKOR, - TBUFFER_STORE_FORMAT, - TBUFFER_STORE_FORMAT_D16, - TBUFFER_LOAD_FORMAT, - TBUFFER_LOAD_FORMAT_D16, - DS_ORDERED_COUNT, - ATOMIC_CMP_SWAP, - BUFFER_LOAD, - BUFFER_LOAD_UBYTE, - BUFFER_LOAD_USHORT, - BUFFER_LOAD_BYTE, - BUFFER_LOAD_SHORT, - BUFFER_LOAD_TFE, - BUFFER_LOAD_UBYTE_TFE, - BUFFER_LOAD_USHORT_TFE, - BUFFER_LOAD_BYTE_TFE, - BUFFER_LOAD_SHORT_TFE, - BUFFER_LOAD_FORMAT, - BUFFER_LOAD_FORMAT_TFE, - BUFFER_LOAD_FORMAT_D16, - SBUFFER_LOAD, - SBUFFER_LOAD_BYTE, - SBUFFER_LOAD_UBYTE, - SBUFFER_LOAD_SHORT, - SBUFFER_LOAD_USHORT, - SBUFFER_PREFETCH_DATA, - BUFFER_STORE, - BUFFER_STORE_BYTE, - BUFFER_STORE_SHORT, - BUFFER_STORE_FORMAT, - BUFFER_STORE_FORMAT_D16, - BUFFER_ATOMIC_SWAP, - BUFFER_ATOMIC_ADD, - BUFFER_ATOMIC_SUB, - BUFFER_ATOMIC_SMIN, - BUFFER_ATOMIC_UMIN, - BUFFER_ATOMIC_SMAX, - BUFFER_ATOMIC_UMAX, - BUFFER_ATOMIC_AND, - BUFFER_ATOMIC_OR, - BUFFER_ATOMIC_XOR, - BUFFER_ATOMIC_INC, - BUFFER_ATOMIC_DEC, - BUFFER_ATOMIC_CMPSWAP, - BUFFER_ATOMIC_CSUB, - BUFFER_ATOMIC_FADD, - BUFFER_ATOMIC_FMIN, - BUFFER_ATOMIC_FMAX, - BUFFER_ATOMIC_COND_SUB_U32, - LAST_MEMORY_OPCODE = BUFFER_ATOMIC_COND_SUB_U32, -}; - -} // End namespace AMDGPUISD - } // End namespace llvm #endif diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSelectionDAGInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSelectionDAGInfo.cpp index 2941a48c78d94..7c37f24722a21 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSelectionDAGInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSelectionDAGInfo.cpp @@ -7,13 +7,38 @@ //===----------------------------------------------------------------------===// #include "AMDGPUSelectionDAGInfo.h" -#include "AMDGPUISelLowering.h" + +#define GET_SDNODE_DESC +#include "AMDGPUGenSDNodeInfo.inc" using namespace llvm; +AMDGPUSelectionDAGInfo::AMDGPUSelectionDAGInfo() + : SelectionDAGGenTargetInfo(AMDGPUGenSDNodeInfo) {} + AMDGPUSelectionDAGInfo::~AMDGPUSelectionDAGInfo() = default; -bool AMDGPUSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= AMDGPUISD::FIRST_MEMORY_OPCODE && - Opcode <= AMDGPUISD::LAST_MEMORY_OPCODE; +const char *AMDGPUSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { +#define NODE_NAME_CASE(node) \ + case AMDGPUISD::node: \ + return #node; + + switch (static_cast(Opcode)) { + // These nodes don't have corresponding entries in *.td files yet. + NODE_NAME_CASE(WAVE_ADDRESS) + NODE_NAME_CASE(MAD_I64_I32) + NODE_NAME_CASE(MAD_U64_U32) + NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) + // These do, but only when compiling R600.td, + // and the enum is generated from AMDGPU.td. + NODE_NAME_CASE(DOT4) + NODE_NAME_CASE(TEXTURE_FETCH) + NODE_NAME_CASE(R600_EXPORT) + NODE_NAME_CASE(CONST_ADDRESS) + NODE_NAME_CASE(DUMMY_CHAIN) + } + +#undef NODE_NAME_CASE + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSelectionDAGInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUSelectionDAGInfo.h index 3280be73b2fdf..dec91a359a4a6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSelectionDAGInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSelectionDAGInfo.h @@ -11,13 +11,46 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "AMDGPUGenSDNodeInfo.inc" + namespace llvm { +namespace AMDGPUISD { + +enum NodeType : unsigned { + // Convert a unswizzled wave uniform stack address to an address compatible + // with a vector offset for use in stack access. + WAVE_ADDRESS = GENERATED_OPCODE_END, + + DOT4, + MAD_U64_U32, + MAD_I64_I32, + TEXTURE_FETCH, + R600_EXPORT, + CONST_ADDRESS, + + /// This node is for VLIW targets and it is used to represent a vector + /// that is stored in consecutive registers with the same channel. + /// For example: + /// |X |Y|Z|W| + /// T0|v.x| | | | + /// T1|v.y| | | | + /// T2|v.z| | | | + /// T3|v.w| | | | + BUILD_VERTICAL_VECTOR, -class AMDGPUSelectionDAGInfo : public SelectionDAGTargetInfo { + DUMMY_CHAIN, +}; + +} // namespace AMDGPUISD + +class AMDGPUSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + AMDGPUSelectionDAGInfo(); + ~AMDGPUSelectionDAGInfo() override; - bool isTargetMemoryOpcode(unsigned Opcode) const override; + const char *getTargetNodeName(unsigned Opcode) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index c6d70ee39202e..08c377a2259de 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -12,6 +12,7 @@ tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM AMDGPUGenRegisterBank.inc -gen-register-bank) tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM AMDGPUGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM AMDGPUGenSearchableTables.inc -gen-searchable-tables) tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget) @@ -32,6 +33,7 @@ tablegen(LLVM R600GenDFAPacketizer.inc -gen-dfa-packetizer) tablegen(LLVM R600GenInstrInfo.inc -gen-instr-info) tablegen(LLVM R600GenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM R600GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM R600GenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM R600GenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(AMDGPUCommonTableGen) diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 9c2811006bc1c..6b2043967f4bf 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -13,6 +13,7 @@ #include "R600ISelLowering.h" #include "AMDGPU.h" +#include "AMDGPUSelectionDAGInfo.h" #include "MCTargetDesc/R600MCTargetDesc.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ade88a16193b8..eeac3b4a4dab9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -14,6 +14,7 @@ #include "SIISelLowering.h" #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" +#include "AMDGPUSelectionDAGInfo.h" #include "AMDGPUTargetMachine.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll index 30a7235d6a702..e79b79aeb88ad 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s ; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GFX12-ERR %s -; GFX12-ERR: LLVM ERROR: Cannot select: {{.*}} = DS_ORDERED_COUNT +; GFX12-ERR: LLVM ERROR: Cannot select: {{.*}} = AMDGPUISD::DS_ORDERED_COUNT ; FUNC-LABEL: {{^}}ds_ordered_add: ; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll index a0f03d020b989..f8838909aa3a9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ptr.buffer.atomic.fadd_rtn_errors.ll @@ -27,7 +27,7 @@ ; Check bf16 buffer fadd does not select on supported subtargets. ;--- raw-ret-f32-error.ll -; ERR-RAW-F32-SDAG: LLVM ERROR: Cannot select: {{.+}}: f32,ch = BUFFER_ATOMIC_FADD +; ERR-RAW-F32-SDAG: LLVM ERROR: Cannot select: {{.+}}: f32,ch = AMDGPUISD::BUFFER_ATOMIC_FADD ; ERR-RAW-F32-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD define float @raw_ptr_buffer_atomic_fadd_f32_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { @@ -36,7 +36,7 @@ define float @raw_ptr_buffer_atomic_fadd_f32_rtn(float %val, <4 x i32> inreg %rs } ;--- struct-ret-f32-error.ll -; ERR-STRUCT-F32-SDAG: LLVM ERROR: Cannot select: {{.+}}: f32,ch = BUFFER_ATOMIC_FADD +; ERR-STRUCT-F32-SDAG: LLVM ERROR: Cannot select: {{.+}}: f32,ch = AMDGPUISD::BUFFER_ATOMIC_FADD ; ERR-STRUCT-F32-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD define float @struct_ptr_buffer_atomic_fadd_f32_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { @@ -45,7 +45,7 @@ define float @struct_ptr_buffer_atomic_fadd_f32_rtn(float %val, ptr addrspace(8) } ;--- raw-ret-v2f16-error.ll -; ERR-RAW-V2F16-SDAG: LLVM ERROR: Cannot select: {{.+}}: v2f16,ch = BUFFER_ATOMIC_FADD +; ERR-RAW-V2F16-SDAG: LLVM ERROR: Cannot select: {{.+}}: v2f16,ch = AMDGPUISD::BUFFER_ATOMIC_FADD ; ERR-RAW-V2F16-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(<2 x s16>) = G_AMDGPU_BUFFER_ATOMIC_FADD define <2 x half> @raw_ptr_buffer_atomic_fadd_v2f16_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { @@ -54,7 +54,7 @@ define <2 x half> @raw_ptr_buffer_atomic_fadd_v2f16_rtn(<2 x half> %val, <4 x i3 } ;--- struct-ret-v2f16-error.ll -; ERR-STRUCT-V2F16-SDAG: LLVM ERROR: Cannot select: {{.+}}: v2f16,ch = BUFFER_ATOMIC_FADD +; ERR-STRUCT-V2F16-SDAG: LLVM ERROR: Cannot select: {{.+}}: v2f16,ch = AMDGPUISD::BUFFER_ATOMIC_FADD ; ERR-STRUCT-V2F16-GISEL: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(<2 x s16>) = G_AMDGPU_BUFFER_ATOMIC_FADD define <2 x half> @struct_ptr_buffer_atomic_fadd_v2f16_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { @@ -63,8 +63,8 @@ define <2 x half> @struct_ptr_buffer_atomic_fadd_v2f16_rtn(<2 x half> %val, ptr } ;--- raw-ret-v2bf16-error.ll -; ERR-RAW-V2BF16-SDAG: LLVM ERROR: Cannot select: {{.+}}: v2bf16,ch = BUFFER_ATOMIC_FADD -; ERR-RAW-V2BF16-GISEL: LLVM ERROR: Cannot select: {{.+}}: v2bf16,ch = BUFFER_ATOMIC_FADD +; ERR-RAW-V2BF16-SDAG: LLVM ERROR: Cannot select: {{.+}}: v2bf16,ch = AMDGPUISD::BUFFER_ATOMIC_FADD +; ERR-RAW-V2BF16-GISEL: LLVM ERROR: Cannot select: {{.+}}: v2bf16,ch = AMDGPUISD::BUFFER_ATOMIC_FADD define <2 x bfloat> @raw_ptr_buffer_atomic_fadd_v2bf16_rtn(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { %ret = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) @@ -72,8 +72,8 @@ define <2 x bfloat> @raw_ptr_buffer_atomic_fadd_v2bf16_rtn(<2 x bfloat> %val, <4 } ;--- struct-ret-v2bf16-error.ll -; ERR-STRUCT-V2BF16-SDAG: LLVM ERROR: Cannot select: {{.+}}: v2bf16,ch = BUFFER_ATOMIC_FADD -; ERR-STRUCT-V2BF16-GISEL: LLVM ERROR: Cannot select: {{.+}}: v2bf16,ch = BUFFER_ATOMIC_FADD +; ERR-STRUCT-V2BF16-SDAG: LLVM ERROR: Cannot select: {{.+}}: v2bf16,ch = AMDGPUISD::BUFFER_ATOMIC_FADD +; ERR-STRUCT-V2BF16-GISEL: LLVM ERROR: Cannot select: {{.+}}: v2bf16,ch = AMDGPUISD::BUFFER_ATOMIC_FADD define <2 x bfloat> @struct_ptr_buffer_atomic_fadd_v2bf16_rtn(<2 x bfloat> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { %ret = call <2 x bfloat> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll b/llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll index e114f1cc44834..70a7e79e3f0af 100644 --- a/llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll +++ b/llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll @@ -15,7 +15,7 @@ ; GCN-DEFAULT: t4: f32,ch = CopyFromReg # D:1 t0, Register:f32 %1 ; GCN-DEFAULT: t6: f32 = fadd # D:1 t5, t4 ; GCN-DEFAULT: t9: ch,glue = CopyToReg # D:1 t0, Register:f32 $vgpr0, t6 -; GCN-DEFAULT: t10: ch = RETURN_TO_EPILOG t9, Register:f32 $vgpr0, t9:1 +; GCN-DEFAULT: t10: ch = AMDGPUISD::RETURN_TO_EPILOG t9, Register:f32 $vgpr0, t9:1 ; GCN-VERBOSE: t0: ch,glue = EntryToken # D:0 ; GCN-VERBOSE: t2: f32,ch = CopyFromReg [ORD=1] # D:0 t0, Register:f32 %0 # D:0 @@ -24,7 +24,7 @@ ; GCN-VERBOSE: t4: f32,ch = CopyFromReg [ORD=1] # D:1 t0, Register:f32 %1 # D:0 ; GCN-VERBOSE: t6: f32 = fadd [ORD=3] # D:1 t5, t4 ; GCN-VERBOSE: t9: ch,glue = CopyToReg [ORD=4] # D:1 t0, Register:f32 $vgpr0 # D:0, t6 -; GCN-VERBOSE: t10: ch = RETURN_TO_EPILOG [ORD=4] # D:0 t9, Register:f32 $vgpr0 # D:0, t9:1 +; GCN-VERBOSE: t10: ch = AMDGPUISD::RETURN_TO_EPILOG [ORD=4] # D:0 t9, Register:f32 $vgpr0 # D:0, t9:1 define amdgpu_ps float @test_sdag_dump(float inreg %scalar, float %vector) { entry: From 28fd3538954b527080408b1831ead7218f552f2f Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 01:00:59 +0300 Subject: [PATCH 02/14] ARM --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 220 +------------- llvm/lib/Target/ARM/ARMISelLowering.h | 315 -------------------- llvm/lib/Target/ARM/ARMInstrInfo.td | 2 +- llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp | 86 +++++- llvm/lib/Target/ARM/ARMSelectionDAGInfo.h | 73 ++++- llvm/lib/Target/ARM/CMakeLists.txt | 1 + 6 files changed, 161 insertions(+), 536 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 1f7ab8ce3a0e0..54253453c05a0 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1714,220 +1714,6 @@ ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, return std::make_pair(RRC, Cost); } -const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { -#define MAKE_CASE(V) \ - case V: \ - return #V; - switch ((ARMISD::NodeType)Opcode) { - case ARMISD::FIRST_NUMBER: - break; - MAKE_CASE(ARMISD::Wrapper) - MAKE_CASE(ARMISD::WrapperPIC) - MAKE_CASE(ARMISD::WrapperJT) - MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL) - MAKE_CASE(ARMISD::CALL) - MAKE_CASE(ARMISD::CALL_PRED) - MAKE_CASE(ARMISD::CALL_NOLINK) - MAKE_CASE(ARMISD::tSECALL) - MAKE_CASE(ARMISD::t2CALL_BTI) - MAKE_CASE(ARMISD::BRCOND) - MAKE_CASE(ARMISD::BR_JT) - MAKE_CASE(ARMISD::BR2_JT) - MAKE_CASE(ARMISD::RET_GLUE) - MAKE_CASE(ARMISD::SERET_GLUE) - MAKE_CASE(ARMISD::INTRET_GLUE) - MAKE_CASE(ARMISD::PIC_ADD) - MAKE_CASE(ARMISD::CMP) - MAKE_CASE(ARMISD::CMN) - MAKE_CASE(ARMISD::CMPZ) - MAKE_CASE(ARMISD::CMPFP) - MAKE_CASE(ARMISD::CMPFPE) - MAKE_CASE(ARMISD::CMPFPw0) - MAKE_CASE(ARMISD::CMPFPEw0) - MAKE_CASE(ARMISD::BCC_i64) - MAKE_CASE(ARMISD::FMSTAT) - MAKE_CASE(ARMISD::CMOV) - MAKE_CASE(ARMISD::SSAT) - MAKE_CASE(ARMISD::USAT) - MAKE_CASE(ARMISD::ASRL) - MAKE_CASE(ARMISD::LSRL) - MAKE_CASE(ARMISD::LSLL) - MAKE_CASE(ARMISD::LSLS) - MAKE_CASE(ARMISD::LSRS1) - MAKE_CASE(ARMISD::ASRS1) - MAKE_CASE(ARMISD::RRX) - MAKE_CASE(ARMISD::ADDC) - MAKE_CASE(ARMISD::ADDE) - MAKE_CASE(ARMISD::SUBC) - MAKE_CASE(ARMISD::SUBE) - MAKE_CASE(ARMISD::VMOVRRD) - MAKE_CASE(ARMISD::VMOVDRR) - MAKE_CASE(ARMISD::VMOVhr) - MAKE_CASE(ARMISD::VMOVrh) - MAKE_CASE(ARMISD::VMOVSR) - MAKE_CASE(ARMISD::EH_SJLJ_SETJMP) - MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP) - MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH) - MAKE_CASE(ARMISD::TC_RETURN) - MAKE_CASE(ARMISD::THREAD_POINTER) - MAKE_CASE(ARMISD::DYN_ALLOC) - MAKE_CASE(ARMISD::MEMBARRIER_MCR) - MAKE_CASE(ARMISD::PRELOAD) - MAKE_CASE(ARMISD::LDRD) - MAKE_CASE(ARMISD::STRD) - MAKE_CASE(ARMISD::WIN__CHKSTK) - MAKE_CASE(ARMISD::WIN__DBZCHK) - MAKE_CASE(ARMISD::PREDICATE_CAST) - MAKE_CASE(ARMISD::VECTOR_REG_CAST) - MAKE_CASE(ARMISD::MVESEXT) - MAKE_CASE(ARMISD::MVEZEXT) - MAKE_CASE(ARMISD::MVETRUNC) - MAKE_CASE(ARMISD::VCMP) - MAKE_CASE(ARMISD::VCMPZ) - MAKE_CASE(ARMISD::VTST) - MAKE_CASE(ARMISD::VSHLs) - MAKE_CASE(ARMISD::VSHLu) - MAKE_CASE(ARMISD::VSHLIMM) - MAKE_CASE(ARMISD::VSHRsIMM) - MAKE_CASE(ARMISD::VSHRuIMM) - MAKE_CASE(ARMISD::VRSHRsIMM) - MAKE_CASE(ARMISD::VRSHRuIMM) - MAKE_CASE(ARMISD::VRSHRNIMM) - MAKE_CASE(ARMISD::VQSHLsIMM) - MAKE_CASE(ARMISD::VQSHLuIMM) - MAKE_CASE(ARMISD::VQSHLsuIMM) - MAKE_CASE(ARMISD::VQSHRNsIMM) - MAKE_CASE(ARMISD::VQSHRNuIMM) - MAKE_CASE(ARMISD::VQSHRNsuIMM) - MAKE_CASE(ARMISD::VQRSHRNsIMM) - MAKE_CASE(ARMISD::VQRSHRNuIMM) - MAKE_CASE(ARMISD::VQRSHRNsuIMM) - MAKE_CASE(ARMISD::VSLIIMM) - MAKE_CASE(ARMISD::VSRIIMM) - MAKE_CASE(ARMISD::VGETLANEu) - MAKE_CASE(ARMISD::VGETLANEs) - MAKE_CASE(ARMISD::VMOVIMM) - MAKE_CASE(ARMISD::VMVNIMM) - MAKE_CASE(ARMISD::VMOVFPIMM) - MAKE_CASE(ARMISD::VDUP) - MAKE_CASE(ARMISD::VDUPLANE) - MAKE_CASE(ARMISD::VEXT) - MAKE_CASE(ARMISD::VREV64) - MAKE_CASE(ARMISD::VREV32) - MAKE_CASE(ARMISD::VREV16) - MAKE_CASE(ARMISD::VZIP) - MAKE_CASE(ARMISD::VUZP) - MAKE_CASE(ARMISD::VTRN) - MAKE_CASE(ARMISD::VTBL1) - MAKE_CASE(ARMISD::VTBL2) - MAKE_CASE(ARMISD::VMOVN) - MAKE_CASE(ARMISD::VQMOVNs) - MAKE_CASE(ARMISD::VQMOVNu) - MAKE_CASE(ARMISD::VCVTN) - MAKE_CASE(ARMISD::VCVTL) - MAKE_CASE(ARMISD::VIDUP) - MAKE_CASE(ARMISD::VMULLs) - MAKE_CASE(ARMISD::VMULLu) - MAKE_CASE(ARMISD::VQDMULH) - MAKE_CASE(ARMISD::VADDVs) - MAKE_CASE(ARMISD::VADDVu) - MAKE_CASE(ARMISD::VADDVps) - MAKE_CASE(ARMISD::VADDVpu) - MAKE_CASE(ARMISD::VADDLVs) - MAKE_CASE(ARMISD::VADDLVu) - MAKE_CASE(ARMISD::VADDLVAs) - MAKE_CASE(ARMISD::VADDLVAu) - MAKE_CASE(ARMISD::VADDLVps) - MAKE_CASE(ARMISD::VADDLVpu) - MAKE_CASE(ARMISD::VADDLVAps) - MAKE_CASE(ARMISD::VADDLVApu) - MAKE_CASE(ARMISD::VMLAVs) - MAKE_CASE(ARMISD::VMLAVu) - MAKE_CASE(ARMISD::VMLAVps) - MAKE_CASE(ARMISD::VMLAVpu) - MAKE_CASE(ARMISD::VMLALVs) - MAKE_CASE(ARMISD::VMLALVu) - MAKE_CASE(ARMISD::VMLALVps) - MAKE_CASE(ARMISD::VMLALVpu) - MAKE_CASE(ARMISD::VMLALVAs) - MAKE_CASE(ARMISD::VMLALVAu) - MAKE_CASE(ARMISD::VMLALVAps) - MAKE_CASE(ARMISD::VMLALVApu) - MAKE_CASE(ARMISD::VMINVu) - MAKE_CASE(ARMISD::VMINVs) - MAKE_CASE(ARMISD::VMAXVu) - MAKE_CASE(ARMISD::VMAXVs) - MAKE_CASE(ARMISD::UMAAL) - MAKE_CASE(ARMISD::UMLAL) - MAKE_CASE(ARMISD::SMLAL) - MAKE_CASE(ARMISD::SMLALBB) - MAKE_CASE(ARMISD::SMLALBT) - MAKE_CASE(ARMISD::SMLALTB) - MAKE_CASE(ARMISD::SMLALTT) - MAKE_CASE(ARMISD::SMULWB) - MAKE_CASE(ARMISD::SMULWT) - MAKE_CASE(ARMISD::SMLALD) - MAKE_CASE(ARMISD::SMLALDX) - MAKE_CASE(ARMISD::SMLSLD) - MAKE_CASE(ARMISD::SMLSLDX) - MAKE_CASE(ARMISD::SMMLAR) - MAKE_CASE(ARMISD::SMMLSR) - MAKE_CASE(ARMISD::QADD16b) - MAKE_CASE(ARMISD::QSUB16b) - MAKE_CASE(ARMISD::QADD8b) - MAKE_CASE(ARMISD::QSUB8b) - MAKE_CASE(ARMISD::UQADD16b) - MAKE_CASE(ARMISD::UQSUB16b) - MAKE_CASE(ARMISD::UQADD8b) - MAKE_CASE(ARMISD::UQSUB8b) - MAKE_CASE(ARMISD::BUILD_VECTOR) - MAKE_CASE(ARMISD::BFI) - MAKE_CASE(ARMISD::VORRIMM) - MAKE_CASE(ARMISD::VBICIMM) - MAKE_CASE(ARMISD::VBSP) - MAKE_CASE(ARMISD::MEMCPY) - MAKE_CASE(ARMISD::VLD1DUP) - MAKE_CASE(ARMISD::VLD2DUP) - MAKE_CASE(ARMISD::VLD3DUP) - MAKE_CASE(ARMISD::VLD4DUP) - MAKE_CASE(ARMISD::VLD1_UPD) - MAKE_CASE(ARMISD::VLD2_UPD) - MAKE_CASE(ARMISD::VLD3_UPD) - MAKE_CASE(ARMISD::VLD4_UPD) - MAKE_CASE(ARMISD::VLD1x2_UPD) - MAKE_CASE(ARMISD::VLD1x3_UPD) - MAKE_CASE(ARMISD::VLD1x4_UPD) - MAKE_CASE(ARMISD::VLD2LN_UPD) - MAKE_CASE(ARMISD::VLD3LN_UPD) - MAKE_CASE(ARMISD::VLD4LN_UPD) - MAKE_CASE(ARMISD::VLD1DUP_UPD) - MAKE_CASE(ARMISD::VLD2DUP_UPD) - MAKE_CASE(ARMISD::VLD3DUP_UPD) - MAKE_CASE(ARMISD::VLD4DUP_UPD) - MAKE_CASE(ARMISD::VST1_UPD) - MAKE_CASE(ARMISD::VST2_UPD) - MAKE_CASE(ARMISD::VST3_UPD) - MAKE_CASE(ARMISD::VST4_UPD) - MAKE_CASE(ARMISD::VST1x2_UPD) - MAKE_CASE(ARMISD::VST1x3_UPD) - MAKE_CASE(ARMISD::VST1x4_UPD) - MAKE_CASE(ARMISD::VST2LN_UPD) - MAKE_CASE(ARMISD::VST3LN_UPD) - MAKE_CASE(ARMISD::VST4LN_UPD) - MAKE_CASE(ARMISD::WLS) - MAKE_CASE(ARMISD::WLSSETUP) - MAKE_CASE(ARMISD::LE) - MAKE_CASE(ARMISD::LOOP_DEC) - MAKE_CASE(ARMISD::CSINV) - MAKE_CASE(ARMISD::CSNEG) - MAKE_CASE(ARMISD::CSINC) - MAKE_CASE(ARMISD::MEMCPYLOOP) - MAKE_CASE(ARMISD::MEMSETLOOP) -#undef MAKE_CASE - } - return nullptr; -} - EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) @@ -3463,8 +3249,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return LowerInterruptReturn(RetOps, dl, DAG); } - ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE : - ARMISD::RET_GLUE; + unsigned RetNode = + AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE : ARMISD::RET_GLUE; return DAG.getNode(RetNode, dl, MVT::Other, RetOps); } @@ -4986,7 +4772,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } } - ARMISD::NodeType CompareType; + unsigned CompareType; switch (CondCode) { default: CompareType = ARMISD::CMP; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 9fad056edd3f1..46cb897e4969f 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -50,319 +50,6 @@ class TargetMachine; class TargetRegisterInfo; class VectorType; - namespace ARMISD { - - // ARM Specific DAG Nodes - enum NodeType : unsigned { - // Start the numbering where the builtin ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - Wrapper, // Wrapper - A wrapper node for TargetConstantPool, - // TargetExternalSymbol, and TargetGlobalAddress. - WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in - // PIC mode. - WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable - - // Add pseudo op to model memcpy for struct byval. - COPY_STRUCT_BYVAL, - - CALL, // Function call. - CALL_PRED, // Function call that's predicable. - CALL_NOLINK, // Function call with branch not branch-and-link. - tSECALL, // CMSE non-secure function call. - t2CALL_BTI, // Thumb function call followed by BTI instruction. - BRCOND, // Conditional branch. - BR_JT, // Jumptable branch. - BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). - RET_GLUE, // Return with a flag operand. - SERET_GLUE, // CMSE Entry function return with a flag operand. - INTRET_GLUE, // Interrupt return with an LR-offset and a flag operand. - - PIC_ADD, // Add with a PC operand and a PIC label. - - ASRL, // MVE long arithmetic shift right. - LSRL, // MVE long shift right. - LSLL, // MVE long shift left. - - CMP, // ARM compare instructions. - CMN, // ARM CMN instructions. - CMPZ, // ARM compare that sets only Z flag. - CMPFP, // ARM VFP compare instruction, sets FPSCR. - CMPFPE, // ARM VFP signalling compare instruction, sets FPSCR. - CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. - CMPFPEw0, // ARM VFP signalling compare against zero instruction, sets - // FPSCR. - FMSTAT, // ARM fmstat instruction. - - CMOV, // ARM conditional move instructions. - - SSAT, // Signed saturation - USAT, // Unsigned saturation - - BCC_i64, - - LSLS, // Flag-setting shift left. - LSRS1, // Flag-setting logical shift right by one bit. - ASRS1, // Flag-setting arithmetic shift right by one bit. - RRX, // Shift right one bit with carry in. - - ADDC, // Add with carry - ADDE, // Add using carry - SUBC, // Sub with carry - SUBE, // Sub using carry - - VMOVRRD, // double to two gprs. - VMOVDRR, // Two gprs to double. - VMOVSR, // move gpr to single, used for f32 literal constructed in a gpr - - EH_SJLJ_SETJMP, // SjLj exception handling setjmp. - EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. - EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch. - - TC_RETURN, // Tail call return pseudo. - - THREAD_POINTER, - - DYN_ALLOC, // Dynamic allocation on the stack. - - MEMBARRIER_MCR, // Memory barrier (MCR) - - PRELOAD, // Preload - - WIN__CHKSTK, // Windows' __chkstk call to do stack probing. - WIN__DBZCHK, // Windows' divide by zero check - - WLS, // Low-overhead loops, While Loop Start branch. See t2WhileLoopStart - WLSSETUP, // Setup for the iteration count of a WLS. See t2WhileLoopSetup. - LOOP_DEC, // Really a part of LE, performs the sub - LE, // Low-overhead loops, Loop End - - PREDICATE_CAST, // Predicate cast for MVE i1 types - VECTOR_REG_CAST, // Reinterpret the current contents of a vector register - - MVESEXT, // Legalization aids for extending a vector into two/four vectors. - MVEZEXT, // or truncating two/four vectors into one. Eventually becomes - MVETRUNC, // stack store/load sequence, if not optimized to anything else. - - VCMP, // Vector compare. - VCMPZ, // Vector compare to zero. - VTST, // Vector test bits. - - // Vector shift by vector - VSHLs, // ...left/right by signed - VSHLu, // ...left/right by unsigned - - // Vector shift by immediate: - VSHLIMM, // ...left - VSHRsIMM, // ...right (signed) - VSHRuIMM, // ...right (unsigned) - - // Vector rounding shift by immediate: - VRSHRsIMM, // ...right (signed) - VRSHRuIMM, // ...right (unsigned) - VRSHRNIMM, // ...right narrow - - // Vector saturating shift by immediate: - VQSHLsIMM, // ...left (signed) - VQSHLuIMM, // ...left (unsigned) - VQSHLsuIMM, // ...left (signed to unsigned) - VQSHRNsIMM, // ...right narrow (signed) - VQSHRNuIMM, // ...right narrow (unsigned) - VQSHRNsuIMM, // ...right narrow (signed to unsigned) - - // Vector saturating rounding shift by immediate: - VQRSHRNsIMM, // ...right narrow (signed) - VQRSHRNuIMM, // ...right narrow (unsigned) - VQRSHRNsuIMM, // ...right narrow (signed to unsigned) - - // Vector shift and insert: - VSLIIMM, // ...left - VSRIIMM, // ...right - - // Vector get lane (VMOV scalar to ARM core register) - // (These are used for 8- and 16-bit element types only.) - VGETLANEu, // zero-extend vector extract element - VGETLANEs, // sign-extend vector extract element - - // Vector move immediate and move negated immediate: - VMOVIMM, - VMVNIMM, - - // Vector move f32 immediate: - VMOVFPIMM, - - // Move H <-> R, clearing top 16 bits - VMOVrh, - VMOVhr, - - // Vector duplicate: - VDUP, - VDUPLANE, - - // Vector shuffles: - VEXT, // extract - VREV64, // reverse elements within 64-bit doublewords - VREV32, // reverse elements within 32-bit words - VREV16, // reverse elements within 16-bit halfwords - VZIP, // zip (interleave) - VUZP, // unzip (deinterleave) - VTRN, // transpose - VTBL1, // 1-register shuffle with mask - VTBL2, // 2-register shuffle with mask - VMOVN, // MVE vmovn - - // MVE Saturating truncates - VQMOVNs, // Vector (V) Saturating (Q) Move and Narrow (N), signed (s) - VQMOVNu, // Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u) - - // MVE float <> half converts - VCVTN, // MVE vcvt f32 -> f16, truncating into either the bottom or top - // lanes - VCVTL, // MVE vcvt f16 -> f32, extending from either the bottom or top lanes - - // MVE VIDUP instruction, taking a start value and increment. - VIDUP, - - // Vector multiply long: - VMULLs, // ...signed - VMULLu, // ...unsigned - - VQDMULH, // MVE vqdmulh instruction - - // MVE reductions - VADDVs, // sign- or zero-extend the elements of a vector to i32, - VADDVu, // add them all together, and return an i32 of their sum - VADDVps, // Same as VADDV[su] but with a v4i1 predicate mask - VADDVpu, - VADDLVs, // sign- or zero-extend elements to i64 and sum, returning - VADDLVu, // the low and high 32-bit halves of the sum - VADDLVAs, // Same as VADDLV[su] but also add an input accumulator - VADDLVAu, // provided as low and high halves - VADDLVps, // Same as VADDLV[su] but with a v4i1 predicate mask - VADDLVpu, - VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask - VADDLVApu, - VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply - VMLAVu, // them and add the results together, returning an i32 of the sum - VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask - VMLAVpu, - VMLALVs, // Same as VMLAV but with i64, returning the low and - VMLALVu, // high 32-bit halves of the sum - VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask - VMLALVpu, - VMLALVAs, // Same as VMLALV but also add an input accumulator - VMLALVAu, // provided as low and high halves - VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask - VMLALVApu, - VMINVu, // Find minimum unsigned value of a vector and register - VMINVs, // Find minimum signed value of a vector and register - VMAXVu, // Find maximum unsigned value of a vector and register - VMAXVs, // Find maximum signed value of a vector and register - - SMULWB, // Signed multiply word by half word, bottom - SMULWT, // Signed multiply word by half word, top - UMLAL, // 64bit Unsigned Accumulate Multiply - SMLAL, // 64bit Signed Accumulate Multiply - UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply - SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16 - SMLALBT, // 64-bit signed accumulate multiply bottom, top 16 - SMLALTB, // 64-bit signed accumulate multiply top, bottom 16 - SMLALTT, // 64-bit signed accumulate multiply top, top 16 - SMLALD, // Signed multiply accumulate long dual - SMLALDX, // Signed multiply accumulate long dual exchange - SMLSLD, // Signed multiply subtract long dual - SMLSLDX, // Signed multiply subtract long dual exchange - SMMLAR, // Signed multiply long, round and add - SMMLSR, // Signed multiply long, subtract and round - - // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b - // stands for. - QADD8b, - QSUB8b, - QADD16b, - QSUB16b, - UQADD8b, - UQSUB8b, - UQADD16b, - UQSUB16b, - - // Operands of the standard BUILD_VECTOR node are not legalized, which - // is fine if BUILD_VECTORs are always lowered to shuffles or other - // operations, but for ARM some BUILD_VECTORs are legal as-is and their - // operands need to be legalized. Define an ARM-specific version of - // BUILD_VECTOR for this purpose. - BUILD_VECTOR, - - // Bit-field insert - BFI, - - // Vector OR with immediate - VORRIMM, - // Vector AND with NOT of immediate - VBICIMM, - - // Pseudo vector bitwise select - VBSP, - - // Pseudo-instruction representing a memory copy using ldm/stm - // instructions. - MEMCPY, - - // Pseudo-instruction representing a memory copy using a tail predicated - // loop - MEMCPYLOOP, - // Pseudo-instruction representing a memset using a tail predicated - // loop - MEMSETLOOP, - - // V8.1MMainline condition select - CSINV, // Conditional select invert. - CSNEG, // Conditional select negate. - CSINC, // Conditional select increment. - - // Vector load N-element structure to all lanes: - FIRST_MEMORY_OPCODE, - VLD1DUP = FIRST_MEMORY_OPCODE, - VLD2DUP, - VLD3DUP, - VLD4DUP, - - // NEON loads with post-increment base updates: - VLD1_UPD, - VLD2_UPD, - VLD3_UPD, - VLD4_UPD, - VLD2LN_UPD, - VLD3LN_UPD, - VLD4LN_UPD, - VLD1DUP_UPD, - VLD2DUP_UPD, - VLD3DUP_UPD, - VLD4DUP_UPD, - VLD1x2_UPD, - VLD1x3_UPD, - VLD1x4_UPD, - - // NEON stores with post-increment base updates: - VST1_UPD, - VST2_UPD, - VST3_UPD, - VST4_UPD, - VST2LN_UPD, - VST3LN_UPD, - VST4LN_UPD, - VST1x2_UPD, - VST1x3_UPD, - VST1x4_UPD, - - // Load/Store of dual registers - LDRD, - STRD, - LAST_MEMORY_OPCODE = STRD, - }; - - } // end namespace ARMISD - namespace ARM { /// Possible values of current rounding mode, which is specified in bits /// 23:22 of FPSCR. @@ -424,8 +111,6 @@ class VectorType; void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const override; - const char *getTargetNodeName(unsigned Opcode) const override; - bool isSelectSupported(SelectSupportKind Kind) const override { // ARM does not support scalar condition selects on vectors. return (Kind != ScalarCondVectorVal); diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 1f5ba998970fc..811a123616a12 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -40,7 +40,7 @@ def SDT_ARMCMov : SDTypeProfile<1, 4, [ SDTCisVT<4, FlagsVT>, // in flags ]>; -def SDT_ARMBrcond : SDTypeProfile<0, 2, [ +def SDT_ARMBrcond : SDTypeProfile<0, 3, [ SDTCisVT<0, OtherVT>, // target basic block SDTCisVT<1, CondCodeVT>, // condition code SDTCisVT<2, FlagsVT>, // in flags diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 77f4782699c96..a72979536ebd4 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -10,9 +10,14 @@ // //===----------------------------------------------------------------------===// +#include "ARMSelectionDAGInfo.h" #include "ARMTargetTransformInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Support/CommandLine.h" + +#define GET_SDNODE_DESC +#include "ARMGenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "arm-selectiondag-info" @@ -30,9 +35,86 @@ static cl::opt EnableMemtransferTPLoop( "Allow (may be subject to certain conditions) " "conversion of memcpy to TP loop."))); +ARMSelectionDAGInfo::ARMSelectionDAGInfo() + : SelectionDAGGenTargetInfo(ARMGenSDNodeInfo) {} + +const char *ARMSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { +#define MAKE_CASE(V) \ + case V: \ + return #V; + + // These nodes don't have corresponding entries in *.td files yet. + switch (static_cast(Opcode)) { + MAKE_CASE(ARMISD::DYN_ALLOC) + MAKE_CASE(ARMISD::MVESEXT) + MAKE_CASE(ARMISD::MVEZEXT) + MAKE_CASE(ARMISD::MVETRUNC) + MAKE_CASE(ARMISD::UMAAL) + MAKE_CASE(ARMISD::UMLAL) + MAKE_CASE(ARMISD::SMLAL) + MAKE_CASE(ARMISD::BUILD_VECTOR) + MAKE_CASE(ARMISD::VLD1DUP) + MAKE_CASE(ARMISD::VLD2DUP) + MAKE_CASE(ARMISD::VLD3DUP) + MAKE_CASE(ARMISD::VLD4DUP) + MAKE_CASE(ARMISD::VLD1_UPD) + MAKE_CASE(ARMISD::VLD2_UPD) + MAKE_CASE(ARMISD::VLD3_UPD) + MAKE_CASE(ARMISD::VLD4_UPD) + MAKE_CASE(ARMISD::VLD1x2_UPD) + MAKE_CASE(ARMISD::VLD1x3_UPD) + MAKE_CASE(ARMISD::VLD1x4_UPD) + MAKE_CASE(ARMISD::VLD2LN_UPD) + MAKE_CASE(ARMISD::VLD3LN_UPD) + MAKE_CASE(ARMISD::VLD4LN_UPD) + MAKE_CASE(ARMISD::VLD1DUP_UPD) + MAKE_CASE(ARMISD::VLD2DUP_UPD) + MAKE_CASE(ARMISD::VLD3DUP_UPD) + MAKE_CASE(ARMISD::VLD4DUP_UPD) + MAKE_CASE(ARMISD::VST1_UPD) + MAKE_CASE(ARMISD::VST3_UPD) + MAKE_CASE(ARMISD::VST1x2_UPD) + MAKE_CASE(ARMISD::VST1x3_UPD) + MAKE_CASE(ARMISD::VST1x4_UPD) + MAKE_CASE(ARMISD::VST2LN_UPD) + MAKE_CASE(ARMISD::VST3LN_UPD) + MAKE_CASE(ARMISD::VST4LN_UPD) + MAKE_CASE(ARMISD::WLS) + MAKE_CASE(ARMISD::WLSSETUP) + MAKE_CASE(ARMISD::LE) + MAKE_CASE(ARMISD::LOOP_DEC) + } +#undef MAKE_CASE + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + bool ARMSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= ARMISD::FIRST_MEMORY_OPCODE && - Opcode <= ARMISD::LAST_MEMORY_OPCODE; + // These nodes don't have corresponding entries in *.td files yet. + if (Opcode >= ARMISD::FIRST_MEMORY_OPCODE && + Opcode <= ARMISD::LAST_MEMORY_OPCODE) + return true; + + return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode); +} + +void ARMSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case ARMISD::WIN__DBZCHK: + // invalid number of results; expected 2, got 1 + case ARMISD::WIN__CHKSTK: + // invalid number of results; expected 1, got 2 + case ARMISD::COPY_STRUCT_BYVAL: + // invalid number of operands; expected 6, got 5 + case ARMISD::MEMCPY: + // invalid number of operands; expected 5, got 4 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } // Emit, if possible, a specialized version of the given Libcall. Typically this diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h index d68150e66567c..58d45346a8c8e 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -17,7 +17,71 @@ #include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "ARMGenSDNodeInfo.inc" + namespace llvm { +namespace ARMISD { + +enum NodeType : unsigned { + DYN_ALLOC = GENERATED_OPCODE_END, // Dynamic allocation on the stack. + + WLS, // Low-overhead loops, While Loop Start branch. See t2WhileLoopStart + WLSSETUP, // Setup for the iteration count of a WLS. See t2WhileLoopSetup. + LOOP_DEC, // Really a part of LE, performs the sub + LE, // Low-overhead loops, Loop End + + MVESEXT, // Legalization aids for extending a vector into two/four vectors. + MVEZEXT, // or truncating two/four vectors into one. Eventually becomes + MVETRUNC, // stack store/load sequence, if not optimized to anything else. + + UMLAL, // 64bit Unsigned Accumulate Multiply + SMLAL, // 64bit Signed Accumulate Multiply + UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply + + // Operands of the standard BUILD_VECTOR node are not legalized, which + // is fine if BUILD_VECTORs are always lowered to shuffles or other + // operations, but for ARM some BUILD_VECTORs are legal as-is and their + // operands need to be legalized. Define an ARM-specific version of + // BUILD_VECTOR for this purpose. + BUILD_VECTOR, + + // Vector load N-element structure to all lanes: + FIRST_MEMORY_OPCODE, + VLD1DUP = FIRST_MEMORY_OPCODE, + VLD2DUP, + VLD3DUP, + VLD4DUP, + + // NEON loads with post-increment base updates: + VLD1_UPD, + VLD2_UPD, + VLD3_UPD, + VLD4_UPD, + VLD2LN_UPD, + VLD3LN_UPD, + VLD4LN_UPD, + VLD1DUP_UPD, + VLD2DUP_UPD, + VLD3DUP_UPD, + VLD4DUP_UPD, + VLD1x2_UPD, + VLD1x3_UPD, + VLD1x4_UPD, + + // NEON stores with post-increment base updates: + VST1_UPD, + VST3_UPD, + VST2LN_UPD, + VST3LN_UPD, + VST4LN_UPD, + VST1x2_UPD, + VST1x3_UPD, + VST1x4_UPD, + LAST_MEMORY_OPCODE = VST1x4_UPD, +}; + +} // namespace ARMISD namespace ARM_AM { static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) { @@ -35,10 +99,17 @@ namespace ARM_AM { } } // end namespace ARM_AM -class ARMSelectionDAGInfo : public SelectionDAGTargetInfo { +class ARMSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + ARMSelectionDAGInfo(); + + const char *getTargetNodeName(unsigned Opcode) const override; + bool isTargetMemoryOpcode(unsigned Opcode) const override; + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt index a39629bd8aeb0..d99368e1d3b2b 100644 --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -14,6 +14,7 @@ tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank) tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM ARMGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM ARMGenSystemRegister.inc -gen-searchable-tables) From 5ed6d5b70bfd3208bb29a9e070e12532027e6e6e Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 01:01:09 +0300 Subject: [PATCH 03/14] BPF --- llvm/lib/Target/BPF/BPFISelLowering.cpp | 20 -------------------- llvm/lib/Target/BPF/BPFISelLowering.h | 14 -------------- llvm/lib/Target/BPF/BPFInstrInfo.td | 5 ++--- llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp | 8 ++++++++ llvm/lib/Target/BPF/BPFSelectionDAGInfo.h | 7 ++++++- llvm/lib/Target/BPF/CMakeLists.txt | 1 + 6 files changed, 17 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index f4f414d192df0..20f473ed63c6b 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -780,26 +780,6 @@ SDValue BPFTargetLowering::LowerTRAP(SDValue Op, SelectionDAG &DAG) const { return LowerCall(CLI, InVals); } -const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((BPFISD::NodeType)Opcode) { - case BPFISD::FIRST_NUMBER: - break; - case BPFISD::RET_GLUE: - return "BPFISD::RET_GLUE"; - case BPFISD::CALL: - return "BPFISD::CALL"; - case BPFISD::SELECT_CC: - return "BPFISD::SELECT_CC"; - case BPFISD::BR_CC: - return "BPFISD::BR_CC"; - case BPFISD::Wrapper: - return "BPFISD::Wrapper"; - case BPFISD::MEMCPY: - return "BPFISD::MEMCPY"; - } - return nullptr; -} - static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags) { return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h index 23cbce7094e6b..dbdc3d830260e 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -20,17 +20,6 @@ namespace llvm { class BPFSubtarget; -namespace BPFISD { -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - RET_GLUE, - CALL, - SELECT_CC, - BR_CC, - Wrapper, - MEMCPY -}; -} class BPFTargetLowering : public TargetLowering { public: @@ -39,9 +28,6 @@ class BPFTargetLowering : public TargetLowering { // Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - // This method returns the name of a target specific DAG node. - const char *getTargetNodeName(unsigned Opcode) const override; - // This method decides whether folding a constant offset // with the given GlobalAddress is legal. bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index b21f1a0eee3b0..2e68bfaa70936 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -41,13 +41,12 @@ def BPFcallseq_start: SDNode<"ISD::CALLSEQ_START", SDT_BPFCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; def BPFcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_BPFCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def BPFbrcc : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue]>; +def BPFbrcc : SDNode<"BPFISD::BR_CC", SDT_BPFBrCC, [SDNPHasChain]>; def BPFselectcc : SDNode<"BPFISD::SELECT_CC", SDT_BPFSelectCC>; def BPFWrapper : SDNode<"BPFISD::Wrapper", SDT_BPFWrapper>; def BPFmemcpy : SDNode<"BPFISD::MEMCPY", SDT_BPFMEMCPY, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + [SDNPHasChain, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; def BPFIsLittleEndian : Predicate<"Subtarget->isLittleEndian()">; def BPFIsBigEndian : Predicate<"!Subtarget->isLittleEndian()">; diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp index 3e29e6c7ed386..fede6fdc7e14a 100644 --- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp +++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp @@ -10,12 +10,20 @@ // //===----------------------------------------------------------------------===// +#include "BPFSelectionDAGInfo.h" #include "BPFTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" + +#define GET_SDNODE_DESC +#include "BPFGenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "bpf-selectiondag-info" +BPFSelectionDAGInfo::BPFSelectionDAGInfo() + : SelectionDAGGenTargetInfo(BPFGenSDNodeInfo) {} + SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h index 79f05e57bb5cd..0bd267fb934fd 100644 --- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h +++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.h @@ -15,10 +15,15 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "BPFGenSDNodeInfo.inc" + namespace llvm { -class BPFSelectionDAGInfo : public SelectionDAGTargetInfo { +class BPFSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + BPFSelectionDAGInfo(); + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt index eade4cacb7100..105b67db108f8 100644 --- a/llvm/lib/Target/BPF/CMakeLists.txt +++ b/llvm/lib/Target/BPF/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(LLVM BPFGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM BPFGenInstrInfo.inc -gen-instr-info) tablegen(LLVM BPFGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM BPFGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM BPFGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM BPFGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM BPFGenGlobalISel.inc -gen-global-isel) tablegen(LLVM BPFGenRegisterBank.inc -gen-register-bank) From c8dbba551aff9b72d109de76430d0aa300dcc8a7 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 01:01:21 +0300 Subject: [PATCH 04/14] Hexagon --- llvm/lib/Target/Hexagon/CMakeLists.txt | 1 + .../Target/Hexagon/HexagonISelLowering.cpp | 60 ------------ llvm/lib/Target/Hexagon/HexagonISelLowering.h | 95 ------------------- .../Hexagon/HexagonSelectionDAGInfo.cpp | 59 ++++++++++++ .../Target/Hexagon/HexagonSelectionDAGInfo.h | 61 +++++++++++- 5 files changed, 119 insertions(+), 157 deletions(-) diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index d758260a8ab5d..998d2f84334df 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -11,6 +11,7 @@ tablegen(LLVM HexagonGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info) tablegen(LLVM HexagonGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM HexagonGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(HexagonCommonTableGen) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 01efcedebc808..10e25f6050475 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1892,64 +1892,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf"); } -const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((HexagonISD::NodeType)Opcode) { - case HexagonISD::ADDC: return "HexagonISD::ADDC"; - case HexagonISD::SUBC: return "HexagonISD::SUBC"; - case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA"; - case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT"; - case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL"; - case HexagonISD::BARRIER: return "HexagonISD::BARRIER"; - case HexagonISD::CALL: return "HexagonISD::CALL"; - case HexagonISD::CALLnr: return "HexagonISD::CALLnr"; - case HexagonISD::CALLR: return "HexagonISD::CALLR"; - case HexagonISD::COMBINE: return "HexagonISD::COMBINE"; - case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP"; - case HexagonISD::CONST32: return "HexagonISD::CONST32"; - case HexagonISD::CP: return "HexagonISD::CP"; - case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH"; - case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN"; - case HexagonISD::TSTBIT: return "HexagonISD::TSTBIT"; - case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU"; - case HexagonISD::INSERT: return "HexagonISD::INSERT"; - case HexagonISD::JT: return "HexagonISD::JT"; - case HexagonISD::RET_GLUE: return "HexagonISD::RET_GLUE"; - case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; - case HexagonISD::VASL: return "HexagonISD::VASL"; - case HexagonISD::VASR: return "HexagonISD::VASR"; - case HexagonISD::VLSR: return "HexagonISD::VLSR"; - case HexagonISD::MFSHL: return "HexagonISD::MFSHL"; - case HexagonISD::MFSHR: return "HexagonISD::MFSHR"; - case HexagonISD::SSAT: return "HexagonISD::SSAT"; - case HexagonISD::USAT: return "HexagonISD::USAT"; - case HexagonISD::SMUL_LOHI: return "HexagonISD::SMUL_LOHI"; - case HexagonISD::UMUL_LOHI: return "HexagonISD::UMUL_LOHI"; - case HexagonISD::USMUL_LOHI: return "HexagonISD::USMUL_LOHI"; - case HexagonISD::VEXTRACTW: return "HexagonISD::VEXTRACTW"; - case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0"; - case HexagonISD::VROR: return "HexagonISD::VROR"; - case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE"; - case HexagonISD::READTIMER: return "HexagonISD::READTIMER"; - case HexagonISD::PTRUE: return "HexagonISD::PTRUE"; - case HexagonISD::PFALSE: return "HexagonISD::PFALSE"; - case HexagonISD::D2P: return "HexagonISD::D2P"; - case HexagonISD::P2D: return "HexagonISD::P2D"; - case HexagonISD::V2Q: return "HexagonISD::V2Q"; - case HexagonISD::Q2V: return "HexagonISD::Q2V"; - case HexagonISD::QCAT: return "HexagonISD::QCAT"; - case HexagonISD::QTRUE: return "HexagonISD::QTRUE"; - case HexagonISD::QFALSE: return "HexagonISD::QFALSE"; - case HexagonISD::TL_EXTEND: return "HexagonISD::TL_EXTEND"; - case HexagonISD::TL_TRUNCATE: return "HexagonISD::TL_TRUNCATE"; - case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST"; - case HexagonISD::VALIGN: return "HexagonISD::VALIGN"; - case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR"; - case HexagonISD::ISEL: return "HexagonISD::ISEL"; - case HexagonISD::OP_END: break; - } - return nullptr; -} - bool HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, Align NeedAlign, const SDLoc &dl, SelectionDAG &DAG) const { @@ -3343,8 +3285,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { default: #ifndef NDEBUG Op.getNode()->dumpr(&DAG); - if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END) - errs() << "Error: check for a non-legal type in this operation\n"; #endif llvm_unreachable("Should not custom lower this!"); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 1321bee44a295..be6dd6fc32b3c 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -29,99 +29,6 @@ namespace llvm { -namespace HexagonISD { - -enum NodeType : unsigned { - OP_BEGIN = ISD::BUILTIN_OP_END, - - CONST32 = OP_BEGIN, - CONST32_GP, // For marking data present in GP. - ADDC, // Add with carry: (X, Y, Cin) -> (X+Y, Cout). - SUBC, // Sub with carry: (X, Y, Cin) -> (X+~Y+Cin, Cout). - ALLOCA, - - AT_GOT, // Index in GOT. - AT_PCREL, // Offset relative to PC. - - CALL, // Function call. - CALLnr, // Function call that does not return. - CALLR, - - RET_GLUE, // Return with a glue operand. - BARRIER, // Memory barrier. - JT, // Jump table. - CP, // Constant pool. - - COMBINE, - VASL, // Vector shifts by a scalar value - VASR, - VLSR, - MFSHL, // Funnel shifts with the shift amount guaranteed to be - MFSHR, // within the range of the bit width of the element. - - SSAT, // Signed saturate. - USAT, // Unsigned saturate. - SMUL_LOHI, // Same as ISD::SMUL_LOHI, but opaque to the combiner. - UMUL_LOHI, // Same as ISD::UMUL_LOHI, but opaque to the combiner. - // We want to legalize MULH[SU] to [SU]MUL_LOHI, but the - // combiner will keep rewriting it back to MULH[SU]. - USMUL_LOHI, // Like SMUL_LOHI, but unsigned*signed. - - TSTBIT, - INSERT, - EXTRACTU, - VEXTRACTW, - VINSERTW0, - VROR, - TC_RETURN, - EH_RETURN, - DCFETCH, - READCYCLE, - READTIMER, - PTRUE, - PFALSE, - D2P, // Convert 8-byte value to 8-bit predicate register. [*] - P2D, // Convert 8-bit predicate register to 8-byte value. [*] - V2Q, // Convert HVX vector to a vector predicate reg. [*] - Q2V, // Convert vector predicate to an HVX vector. [*] - // [*] The equivalence is defined as "Q <=> (V != 0)", - // where the != operation compares bytes. - // Note: V != 0 is implemented as V >u 0. - QCAT, - QTRUE, - QFALSE, - - TL_EXTEND, // Wrappers for ISD::*_EXTEND and ISD::TRUNCATE to prevent DAG - TL_TRUNCATE, // from auto-folding operations, e.g. - // (i32 ext (i16 ext i8)) would be folded to (i32 ext i8). - // To simplify the type legalization, we want to keep these - // single steps separate during type legalization. - // TL_[EXTEND|TRUNCATE] Inp, i128 _, i32 Opc - // * Inp is the original input to extend/truncate, - // * _ is a dummy operand with an illegal type (can be undef), - // * Opc is the original opcode. - // The legalization process (in Hexagon lowering code) will - // first deal with the "real" types (i.e. Inp and the result), - // and once all of them are processed, the wrapper node will - // be replaced with the original ISD node. The dummy illegal - // operand is there to make sure that the legalization hooks - // are called again after everything else is legal, giving - // us the opportunity to undo the wrapping. - - TYPECAST, // No-op that's used to convert between different legal - // types in a register. - VALIGN, // Align two vectors (in Op0, Op1) to one that would have - // been loaded from address in Op2. - VALIGNADDR, // Align vector address: Op0 & -Op1, except when it is - // an address in a vector load, then it's a no-op. - ISEL, // Marker for nodes that were created during ISel, and - // which need explicit selection (would have been left - // unselected otherwise). - OP_END -}; - -} // end namespace HexagonISD - class HexagonSubtarget; class HexagonTargetLowering : public TargetLowering { @@ -182,8 +89,6 @@ class HexagonTargetLowering : public TargetLowering { void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const override; - const char *getTargetNodeName(unsigned Opcode) const override; - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp index 1b724e8fcae91..70aaa9ed9f96b 100644 --- a/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -10,12 +10,71 @@ // //===----------------------------------------------------------------------===// +#include "HexagonSelectionDAGInfo.h" #include "HexagonTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" + +#define GET_SDNODE_DESC +#include "HexagonGenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "hexagon-selectiondag-info" +HexagonSelectionDAGInfo::HexagonSelectionDAGInfo() + : SelectionDAGGenTargetInfo(HexagonGenSDNodeInfo) {} + +const char *HexagonSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { + // These nodes don't have corresponding entries in *.td files yet. + switch (static_cast(Opcode)) { + case HexagonISD::ADDC: + return "HexagonISD::ADDC"; + case HexagonISD::SUBC: + return "HexagonISD::SUBC"; + case HexagonISD::CALLR: + return "HexagonISD::CALLR"; + case HexagonISD::SMUL_LOHI: + return "HexagonISD::SMUL_LOHI"; + case HexagonISD::UMUL_LOHI: + return "HexagonISD::UMUL_LOHI"; + case HexagonISD::USMUL_LOHI: + return "HexagonISD::USMUL_LOHI"; + case HexagonISD::VROR: + return "HexagonISD::VROR"; + case HexagonISD::D2P: + return "HexagonISD::D2P"; + case HexagonISD::P2D: + return "HexagonISD::P2D"; + case HexagonISD::V2Q: + return "HexagonISD::V2Q"; + case HexagonISD::Q2V: + return "HexagonISD::Q2V"; + case HexagonISD::TL_EXTEND: + return "HexagonISD::TL_EXTEND"; + case HexagonISD::TL_TRUNCATE: + return "HexagonISD::TL_TRUNCATE"; + case HexagonISD::TYPECAST: + return "HexagonISD::TYPECAST"; + case HexagonISD::ISEL: + return "HexagonISD::ISEL"; + } + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + +void HexagonSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case HexagonISD::VALIGNADDR: + // invalid number of operands; expected 1, got 2 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); +} + SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, diff --git a/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h index 0d3b1725d1bc4..c62be58901034 100644 --- a/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -15,11 +15,68 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "HexagonGenSDNodeInfo.inc" + namespace llvm { +namespace HexagonISD { + +enum NodeType : unsigned { + ADDC = GENERATED_OPCODE_END, // Add with carry: (X, Y, Cin) -> (X+Y, Cout). + SUBC, // Sub with carry: (X, Y, Cin) -> (X+~Y+Cin, Cout). + + CALLR, + + SMUL_LOHI, // Same as ISD::SMUL_LOHI, but opaque to the combiner. + UMUL_LOHI, // Same as ISD::UMUL_LOHI, but opaque to the combiner. + // We want to legalize MULH[SU] to [SU]MUL_LOHI, but the + // combiner will keep rewriting it back to MULH[SU]. + USMUL_LOHI, // Like SMUL_LOHI, but unsigned*signed. + + VROR, + D2P, // Convert 8-byte value to 8-bit predicate register. [*] + P2D, // Convert 8-bit predicate register to 8-byte value. [*] + V2Q, // Convert HVX vector to a vector predicate reg. [*] + Q2V, // Convert vector predicate to an HVX vector. [*] + // [*] The equivalence is defined as "Q <=> (V != 0)", + // where the != operation compares bytes. + // Note: V != 0 is implemented as V >u 0. + + TL_EXTEND, // Wrappers for ISD::*_EXTEND and ISD::TRUNCATE to prevent DAG + TL_TRUNCATE, // from auto-folding operations, e.g. + // (i32 ext (i16 ext i8)) would be folded to (i32 ext i8). + // To simplify the type legalization, we want to keep these + // single steps separate during type legalization. + // TL_[EXTEND|TRUNCATE] Inp, i128 _, i32 Opc + // * Inp is the original input to extend/truncate, + // * _ is a dummy operand with an illegal type (can be undef), + // * Opc is the original opcode. + // The legalization process (in Hexagon lowering code) will + // first deal with the "real" types (i.e. Inp and the result), + // and once all of them are processed, the wrapper node will + // be replaced with the original ISD node. The dummy illegal + // operand is there to make sure that the legalization hooks + // are called again after everything else is legal, giving + // us the opportunity to undo the wrapping. -class HexagonSelectionDAGInfo : public SelectionDAGTargetInfo { + TYPECAST, // No-op that's used to convert between different legal + // types in a register. + ISEL, // Marker for nodes that were created during ISel, and + // which need explicit selection (would have been left + // unselected otherwise). + // clang-format on +}; + +} // namespace HexagonISD + +class HexagonSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: - explicit HexagonSelectionDAGInfo() = default; + HexagonSelectionDAGInfo(); + + const char *getTargetNodeName(unsigned Opcode) const override; + + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, From bd12ee6bdd2d9cabc6e4c7dbbdfe55c318d3fbc2 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 03:11:17 +0300 Subject: [PATCH 05/14] LoongArch (ROTL_W, CACOP_D, CACOP_W removed) --- llvm/lib/Target/LoongArch/CMakeLists.txt | 2 + .../Target/LoongArch/LoongArchISelDAGToDAG.h | 1 + .../LoongArch/LoongArchISelLowering.cpp | 102 +----------- .../Target/LoongArch/LoongArchISelLowering.h | 148 ------------------ .../LoongArch/LoongArchSelectionDAGInfo.cpp | 38 +++++ .../LoongArch/LoongArchSelectionDAGInfo.h | 41 +++++ .../Target/LoongArch/LoongArchSubtarget.cpp | 11 +- .../lib/Target/LoongArch/LoongArchSubtarget.h | 10 +- 8 files changed, 100 insertions(+), 253 deletions(-) create mode 100644 llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h diff --git a/llvm/lib/Target/LoongArch/CMakeLists.txt b/llvm/lib/Target/LoongArch/CMakeLists.txt index 0f674b1b0fa9e..8689d09140a1e 100644 --- a/llvm/lib/Target/LoongArch/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(LLVM LoongArchGenInstrInfo.inc -gen-instr-info) tablegen(LLVM LoongArchGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM LoongArchGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM LoongArchGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM LoongArchGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM LoongArchGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(LoongArchCommonTableGen) @@ -27,6 +28,7 @@ add_llvm_target(LoongArchCodeGen LoongArchMergeBaseOffset.cpp LoongArchOptWInstrs.cpp LoongArchRegisterInfo.cpp + LoongArchSelectionDAGInfo.cpp LoongArchSubtarget.cpp LoongArchTargetMachine.cpp LoongArchTargetTransformInfo.cpp diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h index e94f249c14be2..47079a8cb96f6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h @@ -14,6 +14,7 @@ #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H #include "LoongArch.h" +#include "LoongArchSelectionDAGInfo.h" #include "LoongArchTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 7a9ec9f5e96b3..a56451ddf0078 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -15,6 +15,7 @@ #include "LoongArch.h" #include "LoongArchMachineFunctionInfo.h" #include "LoongArchRegisterInfo.h" +#include "LoongArchSelectionDAGInfo.h" #include "LoongArchSubtarget.h" #include "MCTargetDesc/LoongArchBaseInfo.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" @@ -3564,7 +3565,7 @@ SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, // Returns the opcode of the target-specific SDNode that implements the 32-bit // form of the given Opcode. -static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { +static unsigned getLoongArchWOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unexpected opcode"); @@ -3600,7 +3601,7 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc = ISD::ANY_EXTEND) { SDLoc DL(N); - LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); + unsigned WOpcode = getLoongArchWOpcode(N->getOpcode()); SDValue NewOp0, NewRes; switch (NumOp) { @@ -5824,103 +5825,6 @@ bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( return true; } -const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((LoongArchISD::NodeType)Opcode) { - case LoongArchISD::FIRST_NUMBER: - break; - -#define NODE_NAME_CASE(node) \ - case LoongArchISD::node: \ - return "LoongArchISD::" #node; - - // TODO: Add more target-dependent nodes later. - NODE_NAME_CASE(CALL) - NODE_NAME_CASE(CALL_MEDIUM) - NODE_NAME_CASE(CALL_LARGE) - NODE_NAME_CASE(RET) - NODE_NAME_CASE(TAIL) - NODE_NAME_CASE(TAIL_MEDIUM) - NODE_NAME_CASE(TAIL_LARGE) - NODE_NAME_CASE(SELECT_CC) - NODE_NAME_CASE(SLL_W) - NODE_NAME_CASE(SRA_W) - NODE_NAME_CASE(SRL_W) - NODE_NAME_CASE(BSTRINS) - NODE_NAME_CASE(BSTRPICK) - NODE_NAME_CASE(MOVGR2FR_W_LA64) - NODE_NAME_CASE(MOVFR2GR_S_LA64) - NODE_NAME_CASE(FTINT) - NODE_NAME_CASE(REVB_2H) - NODE_NAME_CASE(REVB_2W) - NODE_NAME_CASE(BITREV_4B) - NODE_NAME_CASE(BITREV_8B) - NODE_NAME_CASE(BITREV_W) - NODE_NAME_CASE(ROTR_W) - NODE_NAME_CASE(ROTL_W) - NODE_NAME_CASE(DIV_W) - NODE_NAME_CASE(DIV_WU) - NODE_NAME_CASE(MOD_W) - NODE_NAME_CASE(MOD_WU) - NODE_NAME_CASE(CLZ_W) - NODE_NAME_CASE(CTZ_W) - NODE_NAME_CASE(DBAR) - NODE_NAME_CASE(IBAR) - NODE_NAME_CASE(BREAK) - NODE_NAME_CASE(SYSCALL) - NODE_NAME_CASE(CRC_W_B_W) - NODE_NAME_CASE(CRC_W_H_W) - NODE_NAME_CASE(CRC_W_W_W) - NODE_NAME_CASE(CRC_W_D_W) - NODE_NAME_CASE(CRCC_W_B_W) - NODE_NAME_CASE(CRCC_W_H_W) - NODE_NAME_CASE(CRCC_W_W_W) - NODE_NAME_CASE(CRCC_W_D_W) - NODE_NAME_CASE(CSRRD) - NODE_NAME_CASE(CSRWR) - NODE_NAME_CASE(CSRXCHG) - NODE_NAME_CASE(IOCSRRD_B) - NODE_NAME_CASE(IOCSRRD_H) - NODE_NAME_CASE(IOCSRRD_W) - NODE_NAME_CASE(IOCSRRD_D) - NODE_NAME_CASE(IOCSRWR_B) - NODE_NAME_CASE(IOCSRWR_H) - NODE_NAME_CASE(IOCSRWR_W) - NODE_NAME_CASE(IOCSRWR_D) - NODE_NAME_CASE(CPUCFG) - NODE_NAME_CASE(MOVGR2FCSR) - NODE_NAME_CASE(MOVFCSR2GR) - NODE_NAME_CASE(CACOP_D) - NODE_NAME_CASE(CACOP_W) - NODE_NAME_CASE(VSHUF) - NODE_NAME_CASE(VPICKEV) - NODE_NAME_CASE(VPICKOD) - NODE_NAME_CASE(VPACKEV) - NODE_NAME_CASE(VPACKOD) - NODE_NAME_CASE(VILVL) - NODE_NAME_CASE(VILVH) - NODE_NAME_CASE(VSHUF4I) - NODE_NAME_CASE(VREPLVEI) - NODE_NAME_CASE(VREPLGR2VR) - NODE_NAME_CASE(XVPERMI) - NODE_NAME_CASE(VPICK_SEXT_ELT) - NODE_NAME_CASE(VPICK_ZEXT_ELT) - NODE_NAME_CASE(VREPLVE) - NODE_NAME_CASE(VALL_ZERO) - NODE_NAME_CASE(VANY_ZERO) - NODE_NAME_CASE(VALL_NONZERO) - NODE_NAME_CASE(VANY_NONZERO) - NODE_NAME_CASE(FRECIPE) - NODE_NAME_CASE(FRSQRTE) - NODE_NAME_CASE(VSLLI) - NODE_NAME_CASE(VSRLI) - NODE_NAME_CASE(VBSLL) - NODE_NAME_CASE(VBSRL) - NODE_NAME_CASE(VLDREPL) - } -#undef NODE_NAME_CASE - return nullptr; -} - //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 6bf295984dfc5..ce1022f1fa3e3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -21,151 +21,6 @@ namespace llvm { class LoongArchSubtarget; -namespace LoongArchISD { -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // TODO: add more LoongArchISDs - CALL, - CALL_MEDIUM, - CALL_LARGE, - RET, - TAIL, - TAIL_MEDIUM, - TAIL_LARGE, - - // Select - SELECT_CC, - - // 32-bit shifts, directly matching the semantics of the named LoongArch - // instructions. - SLL_W, - SRA_W, - SRL_W, - - ROTL_W, - ROTR_W, - - // unsigned 32-bit integer division - DIV_W, - MOD_W, - DIV_WU, - MOD_WU, - - // FPR<->GPR transfer operations - MOVGR2FR_W_LA64, - MOVFR2GR_S_LA64, - MOVFCSR2GR, - MOVGR2FCSR, - - FTINT, - - // Bit counting operations - CLZ_W, - CTZ_W, - - BSTRINS, - BSTRPICK, - - // Byte-swapping and bit-reversal - REVB_2H, - REVB_2W, - BITREV_4B, - BITREV_8B, - BITREV_W, - - // Intrinsic operations start ============================================ - BREAK, - CACOP_D, - CACOP_W, - DBAR, - IBAR, - SYSCALL, - - // CRC check operations - CRC_W_B_W, - CRC_W_H_W, - CRC_W_W_W, - CRC_W_D_W, - CRCC_W_B_W, - CRCC_W_H_W, - CRCC_W_W_W, - CRCC_W_D_W, - - CSRRD, - - // Write new value to CSR and return old value. - // Operand 0: A chain pointer. - // Operand 1: The new value to write. - // Operand 2: The address of the required CSR. - // Result 0: The old value of the CSR. - // Result 1: The new chain pointer. - CSRWR, - - // Similar to CSRWR but with a write mask. - // Operand 0: A chain pointer. - // Operand 1: The new value to write. - // Operand 2: The write mask. - // Operand 3: The address of the required CSR. - // Result 0: The old value of the CSR. - // Result 1: The new chain pointer. - CSRXCHG, - - // IOCSR access operations - IOCSRRD_B, - IOCSRRD_W, - IOCSRRD_H, - IOCSRRD_D, - IOCSRWR_B, - IOCSRWR_H, - IOCSRWR_W, - IOCSRWR_D, - - // Read CPU configuration information operation - CPUCFG, - - // Vector Shuffle - VREPLVE, - VSHUF, - VPICKEV, - VPICKOD, - VPACKEV, - VPACKOD, - VILVL, - VILVH, - VSHUF4I, - VREPLVEI, - VREPLGR2VR, - XVPERMI, - - // Extended vector element extraction - VPICK_SEXT_ELT, - VPICK_ZEXT_ELT, - - // Vector comparisons - VALL_ZERO, - VANY_ZERO, - VALL_NONZERO, - VANY_NONZERO, - - // Floating point approximate reciprocal operation - FRECIPE, - FRSQRTE, - - // Vector logicial left / right shift by immediate - VSLLI, - VSRLI, - - // Vector byte logicial left / right shift - VBSLL, - VBSRL, - - // Scalar load broadcast to vector - VLDREPL - - // Intrinsic operations end ============================================= -}; -} // end namespace LoongArchISD class LoongArchTargetLowering : public TargetLowering { const LoongArchSubtarget &Subtarget; @@ -185,9 +40,6 @@ class LoongArchTargetLowering : public TargetLowering { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - // This method returns the name of a target specific DAG node. - const char *getTargetNodeName(unsigned Opcode) const override; - // Lower incoming arguments, copy physregs into vregs. SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, diff --git a/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..19557954867d6 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.cpp @@ -0,0 +1,38 @@ +//===- LoongArchSelectionDAGInfo.cpp --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchSelectionDAGInfo.h" + +#define GET_SDNODE_DESC +#include "LoongArchGenSDNodeInfo.inc" + +using namespace llvm; + +LoongArchSelectionDAGInfo::LoongArchSelectionDAGInfo() + : SelectionDAGGenTargetInfo(LoongArchGenSDNodeInfo) {} + +LoongArchSelectionDAGInfo::~LoongArchSelectionDAGInfo() = default; + +const char * +LoongArchSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { + switch (static_cast(Opcode)) { + case LoongArchISD::VSHUF4I: + return "LoongArchISD::VSHUF4I"; + } + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + +void LoongArchSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + if (N->getOpcode() == LoongArchISD::VLDREPL) { + // invalid number of operands; expected 2, got 3 + return; + } + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h new file mode 100644 index 0000000000000..c53304df4650b --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchSelectionDAGInfo.h @@ -0,0 +1,41 @@ +//===- LoongArchSelectionDAGInfo.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSELECTIONDAGINFO_H + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +#define GET_SDNODE_ENUM +#include "LoongArchGenSDNodeInfo.inc" + +namespace llvm { +namespace LoongArchISD { + +enum NodeType { + // This is skipped by TableGen because it has conflicting SDTypeProfiles. + VSHUF4I = GENERATED_OPCODE_END, +}; + +} // namespace LoongArchISD + +class LoongArchSelectionDAGInfo : public SelectionDAGGenTargetInfo { +public: + LoongArchSelectionDAGInfo(); + + ~LoongArchSelectionDAGInfo() override; + + const char *getTargetNodeName(unsigned Opcode) const override; + + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSELECTIONDAGINFO_H diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp index 3acbe4992273a..3ac3f558c3ce6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp @@ -12,6 +12,7 @@ #include "LoongArchSubtarget.h" #include "LoongArchFrameLowering.h" +#include "LoongArchSelectionDAGInfo.h" #include "MCTargetDesc/LoongArchBaseInfo.h" using namespace llvm; @@ -95,4 +96,12 @@ LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU, : LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS), FrameLowering( initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), - InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {} + InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) { + TSInfo = std::make_unique(); +} + +LoongArchSubtarget::~LoongArchSubtarget() = default; + +const SelectionDAGTargetInfo *LoongArchSubtarget::getSelectionDAGInfo() const { + return TSInfo.get(); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h index 5e12bafebb0d5..16d06dc06766f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h @@ -18,7 +18,6 @@ #include "LoongArchInstrInfo.h" #include "LoongArchRegisterInfo.h" #include "MCTargetDesc/LoongArchBaseInfo.h" -#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" @@ -47,7 +46,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { LoongArchInstrInfo InstrInfo; LoongArchRegisterInfo RegInfo; LoongArchTargetLowering TLInfo; - SelectionDAGTargetInfo TSInfo; + std::unique_ptr TSInfo; Align PrefFunctionAlignment; Align PrefLoopAlignment; @@ -69,6 +68,8 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, StringRef ABIName, const TargetMachine &TM); + ~LoongArchSubtarget() override; + // Parses features string setting specified subtarget options. The // definition of this function is auto-generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); @@ -83,9 +84,8 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { const LoongArchTargetLowering *getTargetLowering() const override { return &TLInfo; } - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } + + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override; #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ bool GETTER() const { return ATTRIBUTE; } From cbad54d0c38965c354f297eb97e0ece63d2934e5 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 01:02:01 +0300 Subject: [PATCH 06/14] Mips --- llvm/lib/Target/Mips/CMakeLists.txt | 1 + llvm/lib/Target/Mips/MipsISelLowering.cpp | 122 ---------- llvm/lib/Target/Mips/MipsISelLowering.h | 216 +----------------- llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp | 35 ++- llvm/lib/Target/Mips/MipsSelectionDAGInfo.h | 26 ++- 5 files changed, 57 insertions(+), 343 deletions(-) diff --git a/llvm/lib/Target/Mips/CMakeLists.txt b/llvm/lib/Target/Mips/CMakeLists.txt index 21d1765107ae6..6d59a21ad137f 100644 --- a/llvm/lib/Target/Mips/CMakeLists.txt +++ b/llvm/lib/Target/Mips/CMakeLists.txt @@ -16,6 +16,7 @@ tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM MipsGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM MipsGenRegisterBank.inc -gen-register-bank) tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM MipsGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM MipsGenExegesis.inc -gen-exegesis) diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index e933e97ea3706..064366f18b903 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -171,128 +171,6 @@ SDValue MipsTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, N->getOffset(), Flag); } -const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((MipsISD::NodeType)Opcode) { - case MipsISD::FIRST_NUMBER: break; - case MipsISD::JmpLink: return "MipsISD::JmpLink"; - case MipsISD::TailCall: return "MipsISD::TailCall"; - case MipsISD::Highest: return "MipsISD::Highest"; - case MipsISD::Higher: return "MipsISD::Higher"; - case MipsISD::Hi: return "MipsISD::Hi"; - case MipsISD::Lo: return "MipsISD::Lo"; - case MipsISD::GotHi: return "MipsISD::GotHi"; - case MipsISD::TlsHi: return "MipsISD::TlsHi"; - case MipsISD::GPRel: return "MipsISD::GPRel"; - case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer"; - case MipsISD::Ret: return "MipsISD::Ret"; - case MipsISD::ERet: return "MipsISD::ERet"; - case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN"; - case MipsISD::FAbs: return "MipsISD::FAbs"; - case MipsISD::FMS: return "MipsISD::FMS"; - case MipsISD::FPBrcond: return "MipsISD::FPBrcond"; - case MipsISD::FPCmp: return "MipsISD::FPCmp"; - case MipsISD::FSELECT: return "MipsISD::FSELECT"; - case MipsISD::MTC1_D64: return "MipsISD::MTC1_D64"; - case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T"; - case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F"; - case MipsISD::TruncIntFP: return "MipsISD::TruncIntFP"; - case MipsISD::MFHI: return "MipsISD::MFHI"; - case MipsISD::MFLO: return "MipsISD::MFLO"; - case MipsISD::MTLOHI: return "MipsISD::MTLOHI"; - case MipsISD::Mult: return "MipsISD::Mult"; - case MipsISD::Multu: return "MipsISD::Multu"; - case MipsISD::MAdd: return "MipsISD::MAdd"; - case MipsISD::MAddu: return "MipsISD::MAddu"; - case MipsISD::MSub: return "MipsISD::MSub"; - case MipsISD::MSubu: return "MipsISD::MSubu"; - case MipsISD::DivRem: return "MipsISD::DivRem"; - case MipsISD::DivRemU: return "MipsISD::DivRemU"; - case MipsISD::DivRem16: return "MipsISD::DivRem16"; - case MipsISD::DivRemU16: return "MipsISD::DivRemU16"; - case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64"; - case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; - case MipsISD::Wrapper: return "MipsISD::Wrapper"; - case MipsISD::DynAlloc: return "MipsISD::DynAlloc"; - case MipsISD::Sync: return "MipsISD::Sync"; - case MipsISD::Ext: return "MipsISD::Ext"; - case MipsISD::Ins: return "MipsISD::Ins"; - case MipsISD::CIns: return "MipsISD::CIns"; - case MipsISD::LWL: return "MipsISD::LWL"; - case MipsISD::LWR: return "MipsISD::LWR"; - case MipsISD::SWL: return "MipsISD::SWL"; - case MipsISD::SWR: return "MipsISD::SWR"; - case MipsISD::LDL: return "MipsISD::LDL"; - case MipsISD::LDR: return "MipsISD::LDR"; - case MipsISD::SDL: return "MipsISD::SDL"; - case MipsISD::SDR: return "MipsISD::SDR"; - case MipsISD::EXTP: return "MipsISD::EXTP"; - case MipsISD::EXTPDP: return "MipsISD::EXTPDP"; - case MipsISD::EXTR_S_H: return "MipsISD::EXTR_S_H"; - case MipsISD::EXTR_W: return "MipsISD::EXTR_W"; - case MipsISD::EXTR_R_W: return "MipsISD::EXTR_R_W"; - case MipsISD::EXTR_RS_W: return "MipsISD::EXTR_RS_W"; - case MipsISD::SHILO: return "MipsISD::SHILO"; - case MipsISD::MTHLIP: return "MipsISD::MTHLIP"; - case MipsISD::MULSAQ_S_W_PH: return "MipsISD::MULSAQ_S_W_PH"; - case MipsISD::MAQ_S_W_PHL: return "MipsISD::MAQ_S_W_PHL"; - case MipsISD::MAQ_S_W_PHR: return "MipsISD::MAQ_S_W_PHR"; - case MipsISD::MAQ_SA_W_PHL: return "MipsISD::MAQ_SA_W_PHL"; - case MipsISD::MAQ_SA_W_PHR: return "MipsISD::MAQ_SA_W_PHR"; - case MipsISD::DOUBLE_SELECT_I: return "MipsISD::DOUBLE_SELECT_I"; - case MipsISD::DOUBLE_SELECT_I64: return "MipsISD::DOUBLE_SELECT_I64"; - case MipsISD::DPAU_H_QBL: return "MipsISD::DPAU_H_QBL"; - case MipsISD::DPAU_H_QBR: return "MipsISD::DPAU_H_QBR"; - case MipsISD::DPSU_H_QBL: return "MipsISD::DPSU_H_QBL"; - case MipsISD::DPSU_H_QBR: return "MipsISD::DPSU_H_QBR"; - case MipsISD::DPAQ_S_W_PH: return "MipsISD::DPAQ_S_W_PH"; - case MipsISD::DPSQ_S_W_PH: return "MipsISD::DPSQ_S_W_PH"; - case MipsISD::DPAQ_SA_L_W: return "MipsISD::DPAQ_SA_L_W"; - case MipsISD::DPSQ_SA_L_W: return "MipsISD::DPSQ_SA_L_W"; - case MipsISD::DPA_W_PH: return "MipsISD::DPA_W_PH"; - case MipsISD::DPS_W_PH: return "MipsISD::DPS_W_PH"; - case MipsISD::DPAQX_S_W_PH: return "MipsISD::DPAQX_S_W_PH"; - case MipsISD::DPAQX_SA_W_PH: return "MipsISD::DPAQX_SA_W_PH"; - case MipsISD::DPAX_W_PH: return "MipsISD::DPAX_W_PH"; - case MipsISD::DPSX_W_PH: return "MipsISD::DPSX_W_PH"; - case MipsISD::DPSQX_S_W_PH: return "MipsISD::DPSQX_S_W_PH"; - case MipsISD::DPSQX_SA_W_PH: return "MipsISD::DPSQX_SA_W_PH"; - case MipsISD::MULSA_W_PH: return "MipsISD::MULSA_W_PH"; - case MipsISD::MULT: return "MipsISD::MULT"; - case MipsISD::MULTU: return "MipsISD::MULTU"; - case MipsISD::MADD_DSP: return "MipsISD::MADD_DSP"; - case MipsISD::MADDU_DSP: return "MipsISD::MADDU_DSP"; - case MipsISD::MSUB_DSP: return "MipsISD::MSUB_DSP"; - case MipsISD::MSUBU_DSP: return "MipsISD::MSUBU_DSP"; - case MipsISD::SHLL_DSP: return "MipsISD::SHLL_DSP"; - case MipsISD::SHRA_DSP: return "MipsISD::SHRA_DSP"; - case MipsISD::SHRL_DSP: return "MipsISD::SHRL_DSP"; - case MipsISD::SETCC_DSP: return "MipsISD::SETCC_DSP"; - case MipsISD::SELECT_CC_DSP: return "MipsISD::SELECT_CC_DSP"; - case MipsISD::VALL_ZERO: return "MipsISD::VALL_ZERO"; - case MipsISD::VANY_ZERO: return "MipsISD::VANY_ZERO"; - case MipsISD::VALL_NONZERO: return "MipsISD::VALL_NONZERO"; - case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO"; - case MipsISD::VCEQ: return "MipsISD::VCEQ"; - case MipsISD::VCLE_S: return "MipsISD::VCLE_S"; - case MipsISD::VCLE_U: return "MipsISD::VCLE_U"; - case MipsISD::VCLT_S: return "MipsISD::VCLT_S"; - case MipsISD::VCLT_U: return "MipsISD::VCLT_U"; - case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT"; - case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT"; - case MipsISD::VNOR: return "MipsISD::VNOR"; - case MipsISD::VSHF: return "MipsISD::VSHF"; - case MipsISD::SHF: return "MipsISD::SHF"; - case MipsISD::ILVEV: return "MipsISD::ILVEV"; - case MipsISD::ILVOD: return "MipsISD::ILVOD"; - case MipsISD::ILVL: return "MipsISD::ILVL"; - case MipsISD::ILVR: return "MipsISD::ILVR"; - case MipsISD::PCKEV: return "MipsISD::PCKEV"; - case MipsISD::PCKOD: return "MipsISD::PCKOD"; - case MipsISD::INSVE: return "MipsISD::INSVE"; - } - return nullptr; -} - MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI) : TargetLowering(TM), Subtarget(STI), ABI(TM.getABI()) { diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 241e9343ae384..262939ebcdd8d 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -18,6 +18,7 @@ #include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "Mips.h" +#include "MipsSelectionDAGInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -50,217 +51,6 @@ class MipsTargetMachine; class TargetLibraryInfo; class TargetRegisterClass; - namespace MipsISD { - - enum NodeType : unsigned { - // Start the numbering from where ISD NodeType finishes. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // Jump and link (call) - JmpLink, - - // Tail call - TailCall, - - // Get the Highest (63-48) 16 bits from a 64-bit immediate - Highest, - - // Get the Higher (47-32) 16 bits from a 64-bit immediate - Higher, - - // Get the High 16 bits from a 32/64-bit immediate - // No relation with Mips Hi register - Hi, - - // Get the Lower 16 bits from a 32/64-bit immediate - // No relation with Mips Lo register - Lo, - - // Get the High 16 bits from a 32 bit immediate for accessing the GOT. - GotHi, - - // Get the High 16 bits from a 32-bit immediate for accessing TLS. - TlsHi, - - // Handle gp_rel (small data/bss sections) relocation. - GPRel, - - // Thread Pointer - ThreadPointer, - - // Vector Floating Point Multiply and Subtract - FMS, - - // Floating Point Branch Conditional - FPBrcond, - - // Floating Point Compare - FPCmp, - - // Floating point Abs - FAbs, - - // Floating point select - FSELECT, - - // Node used to generate an MTC1 i32 to f64 instruction - MTC1_D64, - - // Floating Point Conditional Moves - CMovFP_T, - CMovFP_F, - - // FP-to-int truncation node. - TruncIntFP, - - // Return - Ret, - - // Interrupt, exception, error trap Return - ERet, - - // Software Exception Return. - EH_RETURN, - - // Node used to extract integer from accumulator. - MFHI, - MFLO, - - // Node used to insert integers to accumulator. - MTLOHI, - - // Mult nodes. - Mult, - Multu, - - // MAdd/Sub nodes - MAdd, - MAddu, - MSub, - MSubu, - - // DivRem(u) - DivRem, - DivRemU, - DivRem16, - DivRemU16, - - BuildPairF64, - ExtractElementF64, - - Wrapper, - - DynAlloc, - - Sync, - - Ext, - Ins, - CIns, - - // EXTR.W intrinsic nodes. - EXTP, - EXTPDP, - EXTR_S_H, - EXTR_W, - EXTR_R_W, - EXTR_RS_W, - SHILO, - MTHLIP, - - // DPA.W intrinsic nodes. - MULSAQ_S_W_PH, - MAQ_S_W_PHL, - MAQ_S_W_PHR, - MAQ_SA_W_PHL, - MAQ_SA_W_PHR, - DPAU_H_QBL, - DPAU_H_QBR, - DPSU_H_QBL, - DPSU_H_QBR, - DPAQ_S_W_PH, - DPSQ_S_W_PH, - DPAQ_SA_L_W, - DPSQ_SA_L_W, - DPA_W_PH, - DPS_W_PH, - DPAQX_S_W_PH, - DPAQX_SA_W_PH, - DPAX_W_PH, - DPSX_W_PH, - DPSQX_S_W_PH, - DPSQX_SA_W_PH, - MULSA_W_PH, - - MULT, - MULTU, - MADD_DSP, - MADDU_DSP, - MSUB_DSP, - MSUBU_DSP, - - // DSP shift nodes. - SHLL_DSP, - SHRA_DSP, - SHRL_DSP, - - // DSP setcc and select_cc nodes. - SETCC_DSP, - SELECT_CC_DSP, - - // Vector comparisons. - // These take a vector and return a boolean. - VALL_ZERO, - VANY_ZERO, - VALL_NONZERO, - VANY_NONZERO, - - // These take a vector and return a vector bitmask. - VCEQ, - VCLE_S, - VCLE_U, - VCLT_S, - VCLT_U, - - // Vector Shuffle with mask as an operand - VSHF, // Generic shuffle - SHF, // 4-element set shuffle. - ILVEV, // Interleave even elements - ILVOD, // Interleave odd elements - ILVL, // Interleave left elements - ILVR, // Interleave right elements - PCKEV, // Pack even elements - PCKOD, // Pack odd elements - - // Vector Lane Copy - INSVE, // Copy element from one vector to another - - // Combined (XOR (OR $a, $b), -1) - VNOR, - - // Extended vector element extraction - VEXTRACT_SEXT_ELT, - VEXTRACT_ZEXT_ELT, - - // Double select nodes for machines without conditional-move. - DOUBLE_SELECT_I, - DOUBLE_SELECT_I64, - - // Load/Store Left/Right nodes. - FIRST_MEMORY_OPCODE, - LWL = FIRST_MEMORY_OPCODE, - LWR, - SWL, - SWR, - LDL, - LDR, - SDL, - SDR, - LAST_MEMORY_OPCODE = SDR, - }; - - } // ene namespace MipsISD - //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// @@ -331,10 +121,6 @@ class TargetRegisterClass; void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const override; - /// getTargetNodeName - This method returns the name of a target specific - // DAG node. - const char *getTargetNodeName(unsigned Opcode) const override; - /// getSetCCResultType - get the ISD::SETCC result ValueType EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; diff --git a/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp index 6497ac5bb2df6..d216624fd3934 100644 --- a/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSelectionDAGInfo.cpp @@ -7,13 +7,40 @@ //===----------------------------------------------------------------------===// #include "MipsSelectionDAGInfo.h" -#include "MipsISelLowering.h" + +#define GET_SDNODE_DESC +#include "MipsGenSDNodeInfo.inc" using namespace llvm; +MipsSelectionDAGInfo::MipsSelectionDAGInfo() + : SelectionDAGGenTargetInfo(MipsGenSDNodeInfo) {} + MipsSelectionDAGInfo::~MipsSelectionDAGInfo() = default; -bool MipsSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= MipsISD::FIRST_MEMORY_OPCODE && - Opcode <= MipsISD::LAST_MEMORY_OPCODE; +const char *MipsSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { + // These nodes don't have corresponding entries in *.td files yet. + switch (static_cast(Opcode)) { + // clang-format off + case MipsISD::FAbs: return "MipsISD::FAbs"; + case MipsISD::DynAlloc: return "MipsISD::DynAlloc"; + case MipsISD::DOUBLE_SELECT_I: return "MipsISD::DOUBLE_SELECT_I"; + case MipsISD::DOUBLE_SELECT_I64: return "MipsISD::DOUBLE_SELECT_I64"; + // clang-format on + } + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + +void MipsSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case MipsISD::ERet: + // invalid number of operands; expected at most 2, got 3 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } diff --git a/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h b/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h index 934cd2e056595..6b3682648b575 100644 --- a/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h +++ b/llvm/lib/Target/Mips/MipsSelectionDAGInfo.h @@ -11,13 +11,35 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "MipsGenSDNodeInfo.inc" + namespace llvm { +namespace MipsISD { + +enum NodeType : unsigned { + // Floating point Abs + FAbs = GENERATED_OPCODE_END, + + DynAlloc, + + // Double select nodes for machines without conditional-move. + DOUBLE_SELECT_I, + DOUBLE_SELECT_I64, +}; + +} // namespace MipsISD -class MipsSelectionDAGInfo : public SelectionDAGTargetInfo { +class MipsSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + MipsSelectionDAGInfo(); + ~MipsSelectionDAGInfo() override; - bool isTargetMemoryOpcode(unsigned Opcode) const override; + const char *getTargetNodeName(unsigned Opcode) const override; + + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; }; } // namespace llvm From 4485585f4ca33835c6e84eb42925d8ea9649e9a8 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 01:02:11 +0300 Subject: [PATCH 07/14] NVPTX --- llvm/lib/Target/NVPTX/CMakeLists.txt | 1 + llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 1 + llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 93 +------------------ llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 90 ------------------ .../Target/NVPTX/NVPTXSelectionDAGInfo.cpp | 78 +++++++++++++++- llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h | 38 +++++++- 6 files changed, 118 insertions(+), 183 deletions(-) diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt index 693f0d0b35edc..f9c24750c4836 100644 --- a/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -6,6 +6,7 @@ tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel) tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info) tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM NVPTXGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(NVPTXCommonTableGen) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 648e8e239cf78..27ad953b55f0a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -17,6 +17,7 @@ #include "NVPTX.h" #include "NVPTXISelLowering.h" #include "NVPTXRegisterInfo.h" +#include "NVPTXSelectionDAGInfo.h" #include "NVPTXTargetMachine.h" #include "llvm/ADT/MapVector.h" #include "llvm/CodeGen/SelectionDAGISel.h" diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index f496e6f92bef2..fbd4a83093def 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -14,6 +14,7 @@ #include "NVPTXISelLowering.h" #include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTX.h" +#include "NVPTXSelectionDAGInfo.h" #include "NVPTXSubtarget.h" #include "NVPTXTargetMachine.h" #include "NVPTXTargetObjectFile.h" @@ -1035,92 +1036,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i128, Custom); } -const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { - -#define MAKE_CASE(V) \ - case V: \ - return #V; - - switch ((NVPTXISD::NodeType)Opcode) { - case NVPTXISD::FIRST_NUMBER: - break; - - MAKE_CASE(NVPTXISD::CALL) - MAKE_CASE(NVPTXISD::RET_GLUE) - MAKE_CASE(NVPTXISD::LOAD_PARAM) - MAKE_CASE(NVPTXISD::Wrapper) - MAKE_CASE(NVPTXISD::DeclareParam) - MAKE_CASE(NVPTXISD::DeclareScalarParam) - MAKE_CASE(NVPTXISD::DeclareRet) - MAKE_CASE(NVPTXISD::DeclareScalarRet) - MAKE_CASE(NVPTXISD::DeclareRetParam) - MAKE_CASE(NVPTXISD::PrintCall) - MAKE_CASE(NVPTXISD::PrintConvergentCall) - MAKE_CASE(NVPTXISD::PrintCallUni) - MAKE_CASE(NVPTXISD::PrintConvergentCallUni) - MAKE_CASE(NVPTXISD::LoadParam) - MAKE_CASE(NVPTXISD::LoadParamV2) - MAKE_CASE(NVPTXISD::LoadParamV4) - MAKE_CASE(NVPTXISD::StoreParam) - MAKE_CASE(NVPTXISD::StoreParamV2) - MAKE_CASE(NVPTXISD::StoreParamV4) - MAKE_CASE(NVPTXISD::StoreParamS32) - MAKE_CASE(NVPTXISD::StoreParamU32) - MAKE_CASE(NVPTXISD::CallArgBegin) - MAKE_CASE(NVPTXISD::CallArg) - MAKE_CASE(NVPTXISD::LastCallArg) - MAKE_CASE(NVPTXISD::CallArgEnd) - MAKE_CASE(NVPTXISD::CallVoid) - MAKE_CASE(NVPTXISD::CallVal) - MAKE_CASE(NVPTXISD::CallSymbol) - MAKE_CASE(NVPTXISD::Prototype) - MAKE_CASE(NVPTXISD::MoveParam) - MAKE_CASE(NVPTXISD::StoreRetval) - MAKE_CASE(NVPTXISD::StoreRetvalV2) - MAKE_CASE(NVPTXISD::StoreRetvalV4) - MAKE_CASE(NVPTXISD::PseudoUseParam) - MAKE_CASE(NVPTXISD::UNPACK_VECTOR) - MAKE_CASE(NVPTXISD::BUILD_VECTOR) - MAKE_CASE(NVPTXISD::RETURN) - MAKE_CASE(NVPTXISD::CallSeqBegin) - MAKE_CASE(NVPTXISD::CallSeqEnd) - MAKE_CASE(NVPTXISD::CallPrototype) - MAKE_CASE(NVPTXISD::ProxyReg) - MAKE_CASE(NVPTXISD::LoadV2) - MAKE_CASE(NVPTXISD::LoadV4) - MAKE_CASE(NVPTXISD::LoadV8) - MAKE_CASE(NVPTXISD::LDUV2) - MAKE_CASE(NVPTXISD::LDUV4) - MAKE_CASE(NVPTXISD::StoreV2) - MAKE_CASE(NVPTXISD::StoreV4) - MAKE_CASE(NVPTXISD::StoreV8) - MAKE_CASE(NVPTXISD::FSHL_CLAMP) - MAKE_CASE(NVPTXISD::FSHR_CLAMP) - MAKE_CASE(NVPTXISD::BFE) - MAKE_CASE(NVPTXISD::BFI) - MAKE_CASE(NVPTXISD::PRMT) - MAKE_CASE(NVPTXISD::FCOPYSIGN) - MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC) - MAKE_CASE(NVPTXISD::STACKRESTORE) - MAKE_CASE(NVPTXISD::STACKSAVE) - MAKE_CASE(NVPTXISD::SETP_F16X2) - MAKE_CASE(NVPTXISD::SETP_BF16X2) - MAKE_CASE(NVPTXISD::Dummy) - MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED) - MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED) - MAKE_CASE(NVPTXISD::BrxEnd) - MAKE_CASE(NVPTXISD::BrxItem) - MAKE_CASE(NVPTXISD::BrxStart) - MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED) - MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X) - MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y) - MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z) - } - return nullptr; - -#undef MAKE_CASE -} - TargetLoweringBase::LegalizeTypeAction NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const { if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && @@ -1677,7 +1592,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, StoreOperands.push_back(GetStoredValue(J + K, EltVT, PartAlign)); StoreOperands.push_back(InGlue); - NVPTXISD::NodeType Op; + unsigned Op; switch (NumElts) { case 1: Op = NVPTXISD::StoreParam; @@ -1914,7 +1829,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector LoadVTs(VectorizedSize, EltType); LoadVTs.append({MVT::Other, MVT::Glue}); - NVPTXISD::NodeType Op; + unsigned Op; switch (VectorizedSize) { case 1: Op = NVPTXISD::LoadParam; @@ -3616,7 +3531,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, for (const unsigned J : llvm::seq(NumElts)) StoreOperands.push_back(GetRetVal(I + J)); - NVPTXISD::NodeType Op; + unsigned Op; switch (NumElts) { case 1: Op = NVPTXISD::StoreRetval; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 8d71022a1f102..497922236f1c2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -20,94 +20,6 @@ #include "llvm/Support/AtomicOrdering.h" namespace llvm { -namespace NVPTXISD { -enum NodeType : unsigned { - // Start the numbering from where ISD NodeType finishes. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - Wrapper, - CALL, - RET_GLUE, - LOAD_PARAM, - DeclareParam, - DeclareScalarParam, - DeclareRetParam, - DeclareRet, - DeclareScalarRet, - PrintCall, - PrintConvergentCall, - PrintCallUni, - PrintConvergentCallUni, - CallArgBegin, - CallArg, - LastCallArg, - CallArgEnd, - CallVoid, - CallVal, - CallSymbol, - Prototype, - MoveParam, - PseudoUseParam, - RETURN, - CallSeqBegin, - CallSeqEnd, - CallPrototype, - ProxyReg, - FSHL_CLAMP, - FSHR_CLAMP, - MUL_WIDE_SIGNED, - MUL_WIDE_UNSIGNED, - SETP_F16X2, - SETP_BF16X2, - BFE, - BFI, - PRMT, - - /// This node is similar to ISD::BUILD_VECTOR except that the output may be - /// implicitly bitcast to a scalar. This allows for the representation of - /// packing move instructions for vector types which are not legal i.e. v2i32 - BUILD_VECTOR, - - /// This node is the inverse of NVPTX::BUILD_VECTOR. It takes a single value - /// which may be a scalar and unpacks it into multiple values by implicitly - /// converting it to a vector. - UNPACK_VECTOR, - - FCOPYSIGN, - DYNAMIC_STACKALLOC, - STACKRESTORE, - STACKSAVE, - BrxStart, - BrxItem, - BrxEnd, - CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED, - CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X, - CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y, - CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z, - Dummy, - - FIRST_MEMORY_OPCODE, - LoadV2 = FIRST_MEMORY_OPCODE, - LoadV4, - LoadV8, - LDUV2, // LDU.v2 - LDUV4, // LDU.v4 - StoreV2, - StoreV4, - StoreV8, - LoadParam, - LoadParamV2, - LoadParamV4, - StoreParam, - StoreParamV2, - StoreParamV4, - StoreParamS32, // to sext and store a <32bit value, not used currently - StoreParamU32, // to zext and store a <32bit value, not used currently - StoreRetval, - StoreRetvalV2, - StoreRetvalV4, - LAST_MEMORY_OPCODE = StoreRetvalV4, -}; -} class NVPTXSubtarget; @@ -122,8 +34,6 @@ class NVPTXTargetLowering : public TargetLowering { SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - const char *getTargetNodeName(unsigned Opcode) const override; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override; diff --git a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp index d2035c6f8166f..eb5a98665207a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp @@ -7,13 +7,85 @@ //===----------------------------------------------------------------------===// #include "NVPTXSelectionDAGInfo.h" -#include "NVPTXISelLowering.h" + +#define GET_SDNODE_DESC +#include "NVPTXGenSDNodeInfo.inc" using namespace llvm; +NVPTXSelectionDAGInfo::NVPTXSelectionDAGInfo() + : SelectionDAGGenTargetInfo(NVPTXGenSDNodeInfo) {} + NVPTXSelectionDAGInfo::~NVPTXSelectionDAGInfo() = default; +const char *NVPTXSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { +#define MAKE_CASE(V) \ + case V: \ + return #V; + + // These nodes don't have corresponding entries in *.td files yet. + switch (static_cast(Opcode)) { + MAKE_CASE(NVPTXISD::LOAD_PARAM) + MAKE_CASE(NVPTXISD::DeclareScalarRet) + MAKE_CASE(NVPTXISD::CallSymbol) + MAKE_CASE(NVPTXISD::CallSeqBegin) + MAKE_CASE(NVPTXISD::CallSeqEnd) + MAKE_CASE(NVPTXISD::LoadV2) + MAKE_CASE(NVPTXISD::LoadV4) + MAKE_CASE(NVPTXISD::LoadV8) + MAKE_CASE(NVPTXISD::LDUV2) + MAKE_CASE(NVPTXISD::LDUV4) + MAKE_CASE(NVPTXISD::StoreV2) + MAKE_CASE(NVPTXISD::StoreV4) + MAKE_CASE(NVPTXISD::StoreV8) + MAKE_CASE(NVPTXISD::SETP_F16X2) + MAKE_CASE(NVPTXISD::SETP_BF16X2) + MAKE_CASE(NVPTXISD::UNPACK_VECTOR) + MAKE_CASE(NVPTXISD::Dummy) + } +#undef MAKE_CASE + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + bool NVPTXSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= NVPTXISD::FIRST_MEMORY_OPCODE && - Opcode <= NVPTXISD::LAST_MEMORY_OPCODE; + // These nodes don't have corresponding entries in *.td files. + if (Opcode >= NVPTXISD::FIRST_MEMORY_OPCODE && + Opcode <= NVPTXISD::LAST_MEMORY_OPCODE) + return true; + + // These nodes lack SDNPMemOperand property in *.td files. + switch (static_cast(Opcode)) { + default: + break; + case NVPTXISD::LoadParam: + case NVPTXISD::LoadParamV2: + case NVPTXISD::LoadParamV4: + case NVPTXISD::StoreParam: + case NVPTXISD::StoreParamV2: + case NVPTXISD::StoreParamV4: + case NVPTXISD::StoreParamS32: + case NVPTXISD::StoreParamU32: + case NVPTXISD::StoreRetval: + case NVPTXISD::StoreRetvalV2: + case NVPTXISD::StoreRetvalV4: + return true; + } + + return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode); +} + +void NVPTXSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case NVPTXISD::ProxyReg: + // invalid number of results; expected 3, got 1 + case NVPTXISD::BrxEnd: + // invalid number of results; expected 1, got 2 + return; + } + + return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } diff --git a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h index 9d69f48026c79..bed06a812eb9a 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h @@ -11,13 +11,49 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "NVPTXGenSDNodeInfo.inc" + namespace llvm { +namespace NVPTXISD { + +enum NodeType : unsigned { + LOAD_PARAM = GENERATED_OPCODE_END, + DeclareScalarRet, + CallSymbol, + CallSeqBegin, + CallSeqEnd, + SETP_F16X2, + SETP_BF16X2, + UNPACK_VECTOR, + Dummy, + + FIRST_MEMORY_OPCODE, + LoadV2 = FIRST_MEMORY_OPCODE, + LoadV4, + LoadV8, + LDUV2, // LDU.v2 + LDUV4, // LDU.v4 + StoreV2, + StoreV4, + StoreV8, + LAST_MEMORY_OPCODE = StoreV8, +}; -class NVPTXSelectionDAGInfo : public SelectionDAGTargetInfo { +} // namespace NVPTXISD + +class NVPTXSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + NVPTXSelectionDAGInfo(); + ~NVPTXSelectionDAGInfo() override; + const char *getTargetNodeName(unsigned Opcode) const override; + bool isTargetMemoryOpcode(unsigned Opcode) const override; + + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; }; } // namespace llvm From b9a93e8114bb27305fda88c6024c467af11bed4c Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 01:02:16 +0300 Subject: [PATCH 08/14] PowerPC --- llvm/lib/Target/PowerPC/CMakeLists.txt | 1 + llvm/lib/Target/PowerPC/PPCFastISel.cpp | 1 + llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 1 + llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 186 +----- llvm/lib/Target/PowerPC/PPCISelLowering.h | 589 ------------------ llvm/lib/Target/PowerPC/PPCInstrInfo.td | 36 +- .../Target/PowerPC/PPCSelectionDAGInfo.cpp | 59 +- llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h | 57 +- 8 files changed, 131 insertions(+), 799 deletions(-) diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt index 3808a26a0b92a..d1611e93cbd2e 100644 --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -11,6 +11,7 @@ tablegen(LLVM PPCGenFastISel.inc -gen-fast-isel) tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info) tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM PPCGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM PPCGenExegesis.inc -gen-exegesis) tablegen(LLVM PPCGenRegisterBank.inc -gen-register-bank) diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index e92e00f80c552..ae05147594e75 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -17,6 +17,7 @@ #include "PPCCallingConv.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" +#include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index f921032356d65..ab2ec2444cf88 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -16,6 +16,7 @@ #include "PPC.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" +#include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/APInt.h" diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 0c2a506005604..a44908e659544 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -21,6 +21,7 @@ #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCRegisterInfo.h" +#include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/APFloat.h" @@ -1691,191 +1692,6 @@ bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore( return false; } -const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((PPCISD::NodeType)Opcode) { - case PPCISD::FIRST_NUMBER: break; - case PPCISD::FSEL: return "PPCISD::FSEL"; - case PPCISD::XSMAXC: return "PPCISD::XSMAXC"; - case PPCISD::XSMINC: return "PPCISD::XSMINC"; - case PPCISD::FCFID: return "PPCISD::FCFID"; - case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; - case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; - case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; - case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; - case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; - case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; - case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; - case PPCISD::FRE: return "PPCISD::FRE"; - case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; - case PPCISD::FTSQRT: - return "PPCISD::FTSQRT"; - case PPCISD::FSQRT: - return "PPCISD::FSQRT"; - case PPCISD::STFIWX: return "PPCISD::STFIWX"; - case PPCISD::VPERM: return "PPCISD::VPERM"; - case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; - case PPCISD::XXSPLTI_SP_TO_DP: - return "PPCISD::XXSPLTI_SP_TO_DP"; - case PPCISD::XXSPLTI32DX: - return "PPCISD::XXSPLTI32DX"; - case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; - case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; - case PPCISD::XXPERM: - return "PPCISD::XXPERM"; - case PPCISD::VECSHL: return "PPCISD::VECSHL"; - case PPCISD::CMPB: return "PPCISD::CMPB"; - case PPCISD::Hi: return "PPCISD::Hi"; - case PPCISD::Lo: return "PPCISD::Lo"; - case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; - case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8"; - case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16"; - case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; - case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; - case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA"; - case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; - case PPCISD::SRL: return "PPCISD::SRL"; - case PPCISD::SRA: return "PPCISD::SRA"; - case PPCISD::SHL: return "PPCISD::SHL"; - case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; - case PPCISD::CALL: return "PPCISD::CALL"; - case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; - case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC"; - case PPCISD::CALL_RM: - return "PPCISD::CALL_RM"; - case PPCISD::CALL_NOP_RM: - return "PPCISD::CALL_NOP_RM"; - case PPCISD::CALL_NOTOC_RM: - return "PPCISD::CALL_NOTOC_RM"; - case PPCISD::MTCTR: return "PPCISD::MTCTR"; - case PPCISD::BCTRL: return "PPCISD::BCTRL"; - case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; - case PPCISD::BCTRL_RM: - return "PPCISD::BCTRL_RM"; - case PPCISD::BCTRL_LOAD_TOC_RM: - return "PPCISD::BCTRL_LOAD_TOC_RM"; - case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE"; - case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; - case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; - case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; - case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; - case PPCISD::MFVSR: return "PPCISD::MFVSR"; - case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; - case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; - case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; - case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; - case PPCISD::SCALAR_TO_VECTOR_PERMUTED: - return "PPCISD::SCALAR_TO_VECTOR_PERMUTED"; - case PPCISD::ANDI_rec_1_EQ_BIT: - return "PPCISD::ANDI_rec_1_EQ_BIT"; - case PPCISD::ANDI_rec_1_GT_BIT: - return "PPCISD::ANDI_rec_1_GT_BIT"; - case PPCISD::VCMP: return "PPCISD::VCMP"; - case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec"; - case PPCISD::LBRX: return "PPCISD::LBRX"; - case PPCISD::STBRX: return "PPCISD::STBRX"; - case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; - case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; - case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; - case PPCISD::STXSIX: return "PPCISD::STXSIX"; - case PPCISD::VEXTS: return "PPCISD::VEXTS"; - case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; - case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; - case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE"; - case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE"; - case PPCISD::ST_VSR_SCAL_INT: - return "PPCISD::ST_VSR_SCAL_INT"; - case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; - case PPCISD::BDNZ: return "PPCISD::BDNZ"; - case PPCISD::BDZ: return "PPCISD::BDZ"; - case PPCISD::MFFS: return "PPCISD::MFFS"; - case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; - case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; - case PPCISD::CR6SET: return "PPCISD::CR6SET"; - case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; - case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; - case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; - case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; - case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; - case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; - case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; - case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; - case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; - case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX"; - case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER"; - case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; - case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX"; - case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX"; - case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; - case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; - case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; - case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; - case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; - case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; - case PPCISD::PADDI_DTPREL: - return "PPCISD::PADDI_DTPREL"; - case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; - case PPCISD::SC: return "PPCISD::SC"; - case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; - case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; - case PPCISD::RFEBB: return "PPCISD::RFEBB"; - case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; - case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; - case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; - case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64"; - case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE"; - case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; - case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; - case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF"; - case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; - case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR: - return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR"; - case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR: - return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR"; - case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD"; - case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD"; - case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG"; - case PPCISD::XXMFACC: return "PPCISD::XXMFACC"; - case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; - case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT"; - case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT"; - case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; - case PPCISD::STRICT_FADDRTZ: - return "PPCISD::STRICT_FADDRTZ"; - case PPCISD::STRICT_FCTIDZ: - return "PPCISD::STRICT_FCTIDZ"; - case PPCISD::STRICT_FCTIWZ: - return "PPCISD::STRICT_FCTIWZ"; - case PPCISD::STRICT_FCTIDUZ: - return "PPCISD::STRICT_FCTIDUZ"; - case PPCISD::STRICT_FCTIWUZ: - return "PPCISD::STRICT_FCTIWUZ"; - case PPCISD::STRICT_FCFID: - return "PPCISD::STRICT_FCFID"; - case PPCISD::STRICT_FCFIDU: - return "PPCISD::STRICT_FCFIDU"; - case PPCISD::STRICT_FCFIDS: - return "PPCISD::STRICT_FCFIDS"; - case PPCISD::STRICT_FCFIDUS: - return "PPCISD::STRICT_FCFIDUS"; - case PPCISD::LXVRZX: return "PPCISD::LXVRZX"; - case PPCISD::STORE_COND: - return "PPCISD::STORE_COND"; - case PPCISD::SETBC: - return "PPCISD::SETBC"; - case PPCISD::SETBCR: - return "PPCISD::SETBCR"; - case PPCISD::ADDC: - return "PPCISD::ADDC"; - case PPCISD::ADDE: - return "PPCISD::ADDE"; - case PPCISD::SUBC: - return "PPCISD::SUBC"; - case PPCISD::SUBE: - return "PPCISD::SUBE"; - } - return nullptr; -} - EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, EVT VT) const { if (!VT.isVector()) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 2c55b5427297a..8f9af04fa42e9 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -34,591 +34,6 @@ namespace llvm { - namespace PPCISD { - - // When adding a NEW PPCISD node please add it to the correct position in - // the enum. The order of elements in this enum matters! - // Values that are added between FIRST_MEMORY_OPCODE and LAST_MEMORY_OPCODE - // are considered memory opcodes and are treated differently than other - // entries. - enum NodeType : unsigned { - // Start the numbering where the builtin ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - /// FSEL - Traditional three-operand fsel node. - /// - FSEL, - - /// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions. - XSMAXC, - XSMINC, - - /// FCFID - The FCFID instruction, taking an f64 operand and producing - /// and f64 value containing the FP representation of the integer that - /// was temporarily in the f64 operand. - FCFID, - - /// Newer FCFID[US] integer-to-floating-point conversion instructions for - /// unsigned integers and single-precision outputs. - FCFIDU, - FCFIDS, - FCFIDUS, - - /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 - /// operand, producing an f64 value containing the integer representation - /// of that FP value. - FCTIDZ, - FCTIWZ, - - /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for - /// unsigned integers with round toward zero. - FCTIDUZ, - FCTIWUZ, - - /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in - /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. - VEXTS, - - /// Reciprocal estimate instructions (unary FP ops). - FRE, - FRSQRTE, - - /// Test instruction for software square root. - FTSQRT, - - /// Square root instruction. - FSQRT, - - /// VPERM - The PPC VPERM Instruction. - /// - VPERM, - - /// XXSPLT - The PPC VSX splat instructions - /// - XXSPLT, - - /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for - /// converting immediate single precision numbers to double precision - /// vector or scalar. - XXSPLTI_SP_TO_DP, - - /// XXSPLTI32DX - The PPC XXSPLTI32DX instruction. - /// - XXSPLTI32DX, - - /// VECINSERT - The PPC vector insert instruction - /// - VECINSERT, - - /// VECSHL - The PPC vector shift left instruction - /// - VECSHL, - - /// XXPERMDI - The PPC XXPERMDI instruction - /// - XXPERMDI, - XXPERM, - - /// The CMPB instruction (takes two operands of i32 or i64). - CMPB, - - /// Hi/Lo - These represent the high and low 16-bit parts of a global - /// address respectively. These nodes have two operands, the first of - /// which must be a TargetGlobalAddress, and the second of which must be a - /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', - /// though these are usually folded into other nodes. - Hi, - Lo, - - /// The following two target-specific nodes are used for calls through - /// function pointers in the 64-bit SVR4 ABI. - - /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) - /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to - /// compute an allocation on the stack. - DYNALLOC, - - /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to - /// compute an offset from native SP to the address of the most recent - /// dynamic alloca. - DYNAREAOFFSET, - - /// To avoid stack clash, allocation is performed by block and each block is - /// probed. - PROBED_ALLOCA, - - /// The result of the mflr at function entry, used for PIC code. - GlobalBaseReg, - - /// These nodes represent PPC shifts. - /// - /// For scalar types, only the last `n + 1` bits of the shift amounts - /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc. - /// for exact behaviors. - /// - /// For vector types, only the last n bits are used. See vsld. - SRL, - SRA, - SHL, - - /// These nodes represent PPC arithmetic operations with carry. - ADDC, - ADDE, - SUBC, - SUBE, - - /// FNMSUB - Negated multiply-subtract instruction. - FNMSUB, - - /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign - /// word and shift left immediate. - EXTSWSLI, - - /// The combination of sra[wd]i and addze used to implemented signed - /// integer division by a power of 2. The first operand is the dividend, - /// and the second is the constant shift amount (representing the - /// divisor). - SRA_ADDZE, - - /// CALL - A direct function call. - /// CALL_NOP is a call with the special NOP which follows 64-bit - /// CALL_NOTOC the caller does not use the TOC. - /// SVR4 calls and 32-bit/64-bit AIX calls. - CALL, - CALL_NOP, - CALL_NOTOC, - - /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a - /// MTCTR instruction. - MTCTR, - - /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a - /// BCTRL instruction. - BCTRL, - - /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl - /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX - /// and 64-bit AIX. - BCTRL_LOAD_TOC, - - /// The variants that implicitly define rounding mode for calls with - /// strictfp semantics. - CALL_RM, - CALL_NOP_RM, - CALL_NOTOC_RM, - BCTRL_RM, - BCTRL_LOAD_TOC_RM, - - /// Return with a glue operand, matched by 'blr' - RET_GLUE, - - /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. - /// This copies the bits corresponding to the specified CRREG into the - /// resultant GPR. Bits corresponding to other CR regs are undefined. - MFOCRF, - - /// Direct move from a VSX register to a GPR - MFVSR, - - /// Direct move from a GPR to a VSX register (algebraic) - MTVSRA, - - /// Direct move from a GPR to a VSX register (zero) - MTVSRZ, - - /// Direct move of 2 consecutive GPR to a VSX register. - BUILD_FP128, - - /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and - /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is - /// unsupported for this target. - /// Merge 2 GPRs to a single SPE register. - BUILD_SPE64, - - /// Extract SPE register component, second argument is high or low. - EXTRACT_SPE, - - /// Extract a subvector from signed integer vector and convert to FP. - /// It is primarily used to convert a (widened) illegal integer vector - /// type to a legal floating point vector type. - /// For example v2i32 -> widened to v4i32 -> v2f64 - SINT_VEC_TO_FP, - - /// Extract a subvector from unsigned integer vector and convert to FP. - /// As with SINT_VEC_TO_FP, used for converting illegal types. - UINT_VEC_TO_FP, - - /// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to - /// place the value into the least significant element of the most - /// significant doubleword in the vector. This is not element zero for - /// anything smaller than a doubleword on either endianness. This node has - /// the same semantics as SCALAR_TO_VECTOR except that the value remains in - /// the aforementioned location in the vector register. - SCALAR_TO_VECTOR_PERMUTED, - - // FIXME: Remove these once the ANDI glue bug is fixed: - /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the - /// eq or gt bit of CR0 after executing andi. x, 1. This is used to - /// implement truncation of i32 or i64 to i1. - ANDI_rec_1_EQ_BIT, - ANDI_rec_1_GT_BIT, - - // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit - // target (returns (Lo, Hi)). It takes a chain operand. - READ_TIME_BASE, - - // EH_SJLJ_SETJMP - SjLj exception handling setjmp. - EH_SJLJ_SETJMP, - - // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. - EH_SJLJ_LONGJMP, - - /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* - /// instructions. For lack of better number, we use the opcode number - /// encoding for the OPC field to identify the compare. For example, 838 - /// is VCMPGTSH. - VCMP, - - /// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the - /// altivec VCMP*_rec instructions. For lack of better number, we use the - /// opcode number encoding for the OPC field to identify the compare. For - /// example, 838 is VCMPGTSH. - VCMP_rec, - - /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This - /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the - /// condition register to branch on, OPC is the branch opcode to use (e.g. - /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is - /// an optional input flag argument. - COND_BRANCH, - - /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based - /// loops. - BDNZ, - BDZ, - - /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding - /// towards zero. Used only as part of the long double-to-int - /// conversion sequence. - FADDRTZ, - - /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. - MFFS, - - /// TC_RETURN - A tail call return. - /// operand #0 chain - /// operand #1 callee (register or absolute) - /// operand #2 stack adjustment - /// operand #3 optional in flag - TC_RETURN, - - /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls - CR6SET, - CR6UNSET, - - /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS - /// for non-position independent code on PPC32. - PPC32_GOT, - - /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and - /// local dynamic TLS and position indendepent code on PPC32. - PPC32_PICGOT, - - /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec - /// TLS model, produces an ADDIS8 instruction that adds the GOT - /// base to sym\@got\@tprel\@ha. - ADDIS_GOT_TPREL_HA, - - /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec - /// TLS model, produces a LD instruction with base register G8RReg - /// and offset sym\@got\@tprel\@l. This completes the addition that - /// finds the offset of "sym" relative to the thread pointer. - LD_GOT_TPREL_L, - - /// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec - /// and local-exec TLS models, produces an ADD instruction that adds - /// the contents of G8RReg to the thread pointer. Symbol contains a - /// relocation sym\@tls which is to be replaced by the thread pointer - /// and identifies to the linker that the instruction is part of a - /// TLS sequence. - ADD_TLS, - - /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS - /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym\@got\@tlsgd\@ha. - ADDIS_TLSGD_HA, - - /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS - /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by - /// ADDIS_TLSGD_L_ADDR until after register assignment. - ADDI_TLSGD_L, - - /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS - /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by - /// ADDIS_TLSGD_L_ADDR until after register assignment. - GET_TLS_ADDR, - - /// %x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on - /// 32-bit AIX, produces a call to .__get_tpointer to retrieve the thread - /// pointer. At the end of the call, the thread pointer is found in R3. - GET_TPOINTER, - - /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that - /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following - /// register assignment. - ADDI_TLSGD_L_ADDR, - - /// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY - /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY - /// Op that combines two register copies of TOC entries - /// (region handle into R3 and variable offset into R4) followed by a - /// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr. - /// This node is used in 64-bit mode as well (in which case the result is - /// G8RC and inputs are X3/X4). - TLSGD_AIX, - - /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, - /// produces a call to .__tls_get_mod(_$TLSML\@ml). - GET_TLS_MOD_AIX, - - /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) - /// Op that requires a single input of the module handle TOC entry in R3, - /// and generates a GET_TLS_MOD_AIX node which will be expanded into a call - /// to .__tls_get_mod. This node is used in both 32-bit and 64-bit modes. - /// The only difference is the register class. - TLSLD_AIX, - - /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS - /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym\@got\@tlsld\@ha. - ADDIS_TLSLD_HA, - - /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS - /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by - /// ADDIS_TLSLD_L_ADDR until after register assignment. - ADDI_TLSLD_L, - - /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS - /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by - /// ADDIS_TLSLD_L_ADDR until after register assignment. - GET_TLSLD_ADDR, - - /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that - /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion - /// following register assignment. - ADDI_TLSLD_L_ADDR, - - /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS - /// model, produces an ADDIS8 instruction that adds X3 to - /// sym\@dtprel\@ha. - ADDIS_DTPREL_HA, - - /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS - /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@dtprel\@l. - ADDI_DTPREL_L, - - /// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS - /// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel. - PADDI_DTPREL, - - /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded - /// during instruction selection to optimize a BUILD_VECTOR into - /// operations on splats. This is necessary to avoid losing these - /// optimizations due to constant folding. - VADD_SPLAT, - - /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned - /// operand identifies the operating system entry point. - SC, - - /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer. - CLRBHRB, - - /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch - /// history rolling buffer entry. - MFBHRBE, - - /// CHAIN = RFEBB CHAIN, State - Return from event-based branch. - RFEBB, - - /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little - /// endian. Maps to an xxswapd instruction that corrects an lxvd2x - /// or stxvd2x instruction. The chain is necessary because the - /// sequence replaces a load and needs to provide the same number - /// of outputs. - XXSWAPD, - - /// An SDNode for swaps that are not associated with any loads/stores - /// and thereby have no chain. - SWAP_NO_CHAIN, - - /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or - /// lower (IDX=1) half of v4f32 to v2f64. - FP_EXTEND_HALF, - - /// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done - /// either through an add like PADDI or through a PC Relative load like - /// PLD. - MAT_PCREL_ADDR, - - /// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for - /// TLS global address when using dynamic access models. This can be done - /// through an add like PADDI. - TLS_DYNAMIC_MAT_PCREL_ADDR, - - /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address - /// when using local exec access models, and when prefixed instructions are - /// available. This is used with ADD_TLS to produce an add like PADDI. - TLS_LOCAL_EXEC_MAT_ADDR, - - /// ACC_BUILD = Build an accumulator register from 4 VSX registers. - ACC_BUILD, - - /// PAIR_BUILD = Build a vector pair register from 2 VSX registers. - PAIR_BUILD, - - /// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of - /// an accumulator or pair register. This node is needed because - /// EXTRACT_SUBVECTOR expects the input and output vectors to have the same - /// element type. - EXTRACT_VSX_REG, - - /// XXMFACC = This corresponds to the xxmfacc instruction. - XXMFACC, - - // Constrained conversion from floating point to int - FIRST_STRICTFP_OPCODE, - STRICT_FCTIDZ = FIRST_STRICTFP_OPCODE, - STRICT_FCTIWZ, - STRICT_FCTIDUZ, - STRICT_FCTIWUZ, - - /// Constrained integer-to-floating-point conversion instructions. - STRICT_FCFID, - STRICT_FCFIDU, - STRICT_FCFIDS, - STRICT_FCFIDUS, - - /// Constrained floating point add in round-to-zero mode. - STRICT_FADDRTZ, - LAST_STRICTFP_OPCODE = STRICT_FADDRTZ, - - /// SETBC - The ISA 3.1 (P10) SETBC instruction. - SETBC, - - /// SETBCR - The ISA 3.1 (P10) SETBCR instruction. - SETBCR, - - // NOTE: The nodes below may require PC-Rel specific patterns if the - // address could be PC-Relative. When adding new nodes below, consider - // whether or not the address can be PC-Relative and add the corresponding - // PC-relative patterns and tests. - - /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a - /// byte-swapping store instruction. It byte-swaps the low "Type" bits of - /// the GPRC input, then stores it through Ptr. Type can be either i16 or - /// i32. - FIRST_MEMORY_OPCODE, - STBRX = FIRST_MEMORY_OPCODE, - - /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a - /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, - /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 - /// or i32. - LBRX, - - /// STFIWX - The STFIWX instruction. The first operand is an input token - /// chain, then an f64 value to store, then an address to store it to. - STFIWX, - - /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point - /// load which sign-extends from a 32-bit integer value into the - /// destination 64-bit register. - LFIWAX, - - /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point - /// load which zero-extends from a 32-bit integer value into the - /// destination 64-bit register. - LFIWZX, - - /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an - /// integer smaller than 64 bits into a VSR. The integer is zero-extended. - /// This can be used for converting loaded integers to floating point. - LXSIZX, - - /// STXSIX - The STXSI[bh]X instruction. The first operand is an input - /// chain, then an f64 value to store, then an address to store it to, - /// followed by a byte-width for the store. - STXSIX, - - /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian. - /// Maps directly to an lxvd2x instruction that will be followed by - /// an xxswapd. - LXVD2X, - - /// LXVRZX - Load VSX Vector Rightmost and Zero Extend - /// This node represents v1i128 BUILD_VECTOR of a zero extending load - /// instruction from to i128. - /// Allows utilization of the Load VSX Vector Rightmost Instructions. - LXVRZX, - - /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. - /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on - /// the vector type to load vector in big-endian element order. - LOAD_VEC_BE, - - /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a - /// v2f32 value into the lower half of a VSR register. - LD_VSX_LH, - - /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory - /// instructions such as LXVDSX, LXVWSX. - LD_SPLAT, - - /// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory - /// that zero-extends. - ZEXT_LD_SPLAT, - - /// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory - /// that sign-extends. - SEXT_LD_SPLAT, - - /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. - /// Maps directly to an stxvd2x instruction that will be preceded by - /// an xxswapd. - STXVD2X, - - /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian. - /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on - /// the vector type to store vector in big-endian element order. - STORE_VEC_BE, - - /// Store scalar integers from VSR. - ST_VSR_SCAL_INT, - - /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes - /// except they ensure that the compare input is zero-extended for - /// sub-word versions because the atomic loads zero-extend. - ATOMIC_CMP_SWAP_8, - ATOMIC_CMP_SWAP_16, - - /// CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr - /// The store conditional instruction ST[BHWD]ARX that produces a glue - /// result to attach it to a conditional branch. - STORE_COND, - - /// GPRC = TOC_ENTRY GA, TOC - /// Loads the entry for GA from the TOC, where the TOC base is given by - /// the last operand. - TOC_ENTRY, - LAST_MEMORY_OPCODE = TOC_ENTRY, - }; - - } // end namespace PPCISD - /// Define some predicates that are used for node matching. namespace PPC { @@ -763,10 +178,6 @@ namespace llvm { explicit PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI); - /// getTargetNodeName() - This method returns the name of a target specific - /// DAG node. - const char *getTargetNodeName(unsigned Opcode) const override; - bool isSelectSupported(SelectSupportKind Kind) const override { // PowerPC does not support scalar condition selects on vectors. return (Kind != SelectSupportKind::ScalarCondVectorVal); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index b70290df07b1c..d7b1014b55a15 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -157,14 +157,16 @@ def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; -def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS", - SDTFPRoundOp, [SDNPHasChain]>; -def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS", - SDTFPRoundOp, [SDNPHasChain]>; +let IsStrictFP = true in { + def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID", + SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU", + SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS", + SDTFPRoundOp, [SDNPHasChain]>; + def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS", + SDTFPRoundOp, [SDNPHasChain]>; +} def PPCany_fcfid : PatFrags<(ops node:$op), [(PPCfcfid node:$op), @@ -201,6 +203,8 @@ def PPCmffs : SDNode<"PPCISD::MFFS", // Perform FADD in round-to-zero mode. def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; + +let IsStrictFP = true in def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp, [SDNPHasChain]>; @@ -266,14 +270,16 @@ def PPCfnmsub : SDNode<"PPCISD::FNMSUB" , SDTFPTernaryOp>; def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>; -def PPCstrict_fctidz : SDNode<"PPCISD::STRICT_FCTIDZ", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fctiwz : SDNode<"PPCISD::STRICT_FCTIWZ", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fctiduz : SDNode<"PPCISD::STRICT_FCTIDUZ", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fctiwuz : SDNode<"PPCISD::STRICT_FCTIWUZ", +let IsStrictFP = true in { + def PPCstrict_fctidz : SDNode<"PPCISD::STRICT_FCTIDZ", + SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fctiwz : SDNode<"PPCISD::STRICT_FCTIWZ", SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fctiduz : SDNode<"PPCISD::STRICT_FCTIDUZ", + SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fctiwuz : SDNode<"PPCISD::STRICT_FCTIWUZ", + SDTFPUnaryOp, [SDNPHasChain]>; +} def PPCany_fctidz : PatFrags<(ops node:$op), [(PPCstrict_fctidz node:$op), diff --git a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp index 95de9f39b86e8..60183a9cd8ee7 100644 --- a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -7,18 +7,63 @@ //===----------------------------------------------------------------------===// #include "PPCSelectionDAGInfo.h" -#include "PPCISelLowering.h" + +#define GET_SDNODE_DESC +#include "PPCGenSDNodeInfo.inc" using namespace llvm; +PPCSelectionDAGInfo::PPCSelectionDAGInfo() + : SelectionDAGGenTargetInfo(PPCGenSDNodeInfo) {} + PPCSelectionDAGInfo::~PPCSelectionDAGInfo() = default; -bool PPCSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= PPCISD::FIRST_MEMORY_OPCODE && - Opcode <= PPCISD::LAST_MEMORY_OPCODE; +const char *PPCSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { + switch (static_cast(Opcode)) { + case PPCISD::GlobalBaseReg: + return "PPCISD::GlobalBaseReg"; + case PPCISD::SRA_ADDZE: + return "PPCISD::SRA_ADDZE"; + case PPCISD::READ_TIME_BASE: + return "PPCISD::READ_TIME_BASE"; + case PPCISD::MFOCRF: + return "PPCISD::MFOCRF"; + case PPCISD::ANDI_rec_1_EQ_BIT: + return "PPCISD::ANDI_rec_1_EQ_BIT"; + case PPCISD::ANDI_rec_1_GT_BIT: + return "PPCISD::ANDI_rec_1_GT_BIT"; + case PPCISD::BDNZ: + return "PPCISD::BDNZ"; + case PPCISD::BDZ: + return "PPCISD::BDZ"; + case PPCISD::PPC32_PICGOT: + return "PPCISD::PPC32_PICGOT"; + case PPCISD::VADD_SPLAT: + return "PPCISD::VADD_SPLAT"; + } + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); } -bool PPCSelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const { - return Opcode >= PPCISD::FIRST_STRICTFP_OPCODE && - Opcode <= PPCISD::LAST_STRICTFP_OPCODE; +void PPCSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case PPCISD::DYNAREAOFFSET: + // invalid number of results; expected 2, got 1 + case PPCISD::TOC_ENTRY: + // invalid number of results; expected 1, got 2 + case PPCISD::STORE_COND: + // invalid number of results; expected 2, got 3 + case PPCISD::LD_SPLAT: + case PPCISD::SEXT_LD_SPLAT: + case PPCISD::ZEXT_LD_SPLAT: + // invalid number of operands; expected 2, got 3 + case PPCISD::ST_VSR_SCAL_INT: + // invalid number of operands; expected 4, got 5 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } diff --git a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h index 08e2ddbf1c4ca..2e763629b51a1 100644 --- a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -11,15 +11,66 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "PPCGenSDNodeInfo.inc" + namespace llvm { +namespace PPCISD { + +enum NodeType : unsigned { + /// The result of the mflr at function entry, used for PIC code. + GlobalBaseReg = GENERATED_OPCODE_END, + + /// The combination of sra[wd]i and addze used to implemented signed + /// integer division by a power of 2. The first operand is the dividend, + /// and the second is the constant shift amount (representing the + /// divisor). + SRA_ADDZE, + + /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. + /// This copies the bits corresponding to the specified CRREG into the + /// resultant GPR. Bits corresponding to other CR regs are undefined. + MFOCRF, + + // FIXME: Remove these once the ANDI glue bug is fixed: + /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the + /// eq or gt bit of CR0 after executing andi. x, 1. This is used to + /// implement truncation of i32 or i64 to i1. + ANDI_rec_1_EQ_BIT, + ANDI_rec_1_GT_BIT, + + // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit + // target (returns (Lo, Hi)). It takes a chain operand. + READ_TIME_BASE, -class PPCSelectionDAGInfo : public SelectionDAGTargetInfo { + /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based + /// loops. + BDNZ, + BDZ, + + /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and + /// local dynamic TLS and position indendepent code on PPC32. + PPC32_PICGOT, + + /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded + /// during instruction selection to optimize a BUILD_VECTOR into + /// operations on splats. This is necessary to avoid losing these + /// optimizations due to constant folding. + VADD_SPLAT, +}; + +} // namespace PPCISD + +class PPCSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + PPCSelectionDAGInfo(); + ~PPCSelectionDAGInfo() override; - bool isTargetMemoryOpcode(unsigned Opcode) const override; + const char *getTargetNodeName(unsigned Opcode) const override; - bool isTargetStrictFPOpcode(unsigned Opcode) const override; + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; }; } // namespace llvm From 5575449e8268cdbefb27577238c23d879df8809f Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 01:02:30 +0300 Subject: [PATCH 09/14] SystemZ --- llvm/lib/Target/SystemZ/CMakeLists.txt | 1 + .../Target/SystemZ/SystemZISelLowering.cpp | 147 ------- llvm/lib/Target/SystemZ/SystemZISelLowering.h | 385 ------------------ llvm/lib/Target/SystemZ/SystemZOperators.td | 46 ++- .../SystemZ/SystemZSelectionDAGInfo.cpp | 20 +- .../Target/SystemZ/SystemZSelectionDAGInfo.h | 29 +- 6 files changed, 65 insertions(+), 563 deletions(-) diff --git a/llvm/lib/Target/SystemZ/CMakeLists.txt b/llvm/lib/Target/SystemZ/CMakeLists.txt index 0d8f3eac6ee4f..6d94a755322df 100644 --- a/llvm/lib/Target/SystemZ/CMakeLists.txt +++ b/llvm/lib/Target/SystemZ/CMakeLists.txt @@ -11,6 +11,7 @@ tablegen(LLVM SystemZGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM SystemZGenInstrInfo.inc -gen-instr-info) tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM SystemZGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM SystemZGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(SystemZCommonTableGen) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3b86a9bc58790..19412b164542f 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -7378,153 +7378,6 @@ SystemZTargetLowering::ReplaceNodeResults(SDNode *N, return LowerOperationWrapper(N, Results, DAG); } -const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { -#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME - switch ((SystemZISD::NodeType)Opcode) { - case SystemZISD::FIRST_NUMBER: break; - OPCODE(RET_GLUE); - OPCODE(CALL); - OPCODE(SIBCALL); - OPCODE(TLS_GDCALL); - OPCODE(TLS_LDCALL); - OPCODE(PCREL_WRAPPER); - OPCODE(PCREL_OFFSET); - OPCODE(ICMP); - OPCODE(FCMP); - OPCODE(STRICT_FCMP); - OPCODE(STRICT_FCMPS); - OPCODE(TM); - OPCODE(BR_CCMASK); - OPCODE(SELECT_CCMASK); - OPCODE(ADJDYNALLOC); - OPCODE(PROBED_ALLOCA); - OPCODE(POPCNT); - OPCODE(SMUL_LOHI); - OPCODE(UMUL_LOHI); - OPCODE(SDIVREM); - OPCODE(UDIVREM); - OPCODE(SADDO); - OPCODE(SSUBO); - OPCODE(UADDO); - OPCODE(USUBO); - OPCODE(ADDCARRY); - OPCODE(SUBCARRY); - OPCODE(GET_CCMASK); - OPCODE(MVC); - OPCODE(NC); - OPCODE(OC); - OPCODE(XC); - OPCODE(CLC); - OPCODE(MEMSET_MVC); - OPCODE(STPCPY); - OPCODE(STRCMP); - OPCODE(SEARCH_STRING); - OPCODE(IPM); - OPCODE(TBEGIN); - OPCODE(TBEGIN_NOFLOAT); - OPCODE(TEND); - OPCODE(BYTE_MASK); - OPCODE(ROTATE_MASK); - OPCODE(REPLICATE); - OPCODE(JOIN_DWORDS); - OPCODE(SPLAT); - OPCODE(MERGE_HIGH); - OPCODE(MERGE_LOW); - OPCODE(SHL_DOUBLE); - OPCODE(PERMUTE_DWORDS); - OPCODE(PERMUTE); - OPCODE(PACK); - OPCODE(PACKS_CC); - OPCODE(PACKLS_CC); - OPCODE(UNPACK_HIGH); - OPCODE(UNPACKL_HIGH); - OPCODE(UNPACK_LOW); - OPCODE(UNPACKL_LOW); - OPCODE(VSHL_BY_SCALAR); - OPCODE(VSRL_BY_SCALAR); - OPCODE(VSRA_BY_SCALAR); - OPCODE(VROTL_BY_SCALAR); - OPCODE(SHL_DOUBLE_BIT); - OPCODE(SHR_DOUBLE_BIT); - OPCODE(VSUM); - OPCODE(VACC); - OPCODE(VSCBI); - OPCODE(VAC); - OPCODE(VSBI); - OPCODE(VACCC); - OPCODE(VSBCBI); - OPCODE(VMAH); - OPCODE(VMALH); - OPCODE(VME); - OPCODE(VMLE); - OPCODE(VMO); - OPCODE(VMLO); - OPCODE(VICMPE); - OPCODE(VICMPH); - OPCODE(VICMPHL); - OPCODE(VICMPES); - OPCODE(VICMPHS); - OPCODE(VICMPHLS); - OPCODE(VFCMPE); - OPCODE(STRICT_VFCMPE); - OPCODE(STRICT_VFCMPES); - OPCODE(VFCMPH); - OPCODE(STRICT_VFCMPH); - OPCODE(STRICT_VFCMPHS); - OPCODE(VFCMPHE); - OPCODE(STRICT_VFCMPHE); - OPCODE(STRICT_VFCMPHES); - OPCODE(VFCMPES); - OPCODE(VFCMPHS); - OPCODE(VFCMPHES); - OPCODE(VFTCI); - OPCODE(VEXTEND); - OPCODE(STRICT_VEXTEND); - OPCODE(VROUND); - OPCODE(STRICT_VROUND); - OPCODE(VTM); - OPCODE(SCMP128HI); - OPCODE(UCMP128HI); - OPCODE(VFAE_CC); - OPCODE(VFAEZ_CC); - OPCODE(VFEE_CC); - OPCODE(VFEEZ_CC); - OPCODE(VFENE_CC); - OPCODE(VFENEZ_CC); - OPCODE(VISTR_CC); - OPCODE(VSTRC_CC); - OPCODE(VSTRCZ_CC); - OPCODE(VSTRS_CC); - OPCODE(VSTRSZ_CC); - OPCODE(TDC); - OPCODE(ATOMIC_SWAPW); - OPCODE(ATOMIC_LOADW_ADD); - OPCODE(ATOMIC_LOADW_SUB); - OPCODE(ATOMIC_LOADW_AND); - OPCODE(ATOMIC_LOADW_OR); - OPCODE(ATOMIC_LOADW_XOR); - OPCODE(ATOMIC_LOADW_NAND); - OPCODE(ATOMIC_LOADW_MIN); - OPCODE(ATOMIC_LOADW_MAX); - OPCODE(ATOMIC_LOADW_UMIN); - OPCODE(ATOMIC_LOADW_UMAX); - OPCODE(ATOMIC_CMP_SWAPW); - OPCODE(ATOMIC_CMP_SWAP); - OPCODE(ATOMIC_LOAD_128); - OPCODE(ATOMIC_STORE_128); - OPCODE(ATOMIC_CMP_SWAP_128); - OPCODE(LRV); - OPCODE(STRV); - OPCODE(VLER); - OPCODE(VSTER); - OPCODE(STCKF); - OPCODE(PREFETCH); - OPCODE(ADA_ENTRY); - } - return nullptr; -#undef OPCODE -} - // Return true if VT is a vector whose elements are a whole number of bytes // in width. Also check for presence of vector support. bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index f3536a840fda8..f0b12d0bdaee1 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -22,390 +22,6 @@ #include namespace llvm { -namespace SystemZISD { -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // Return with a glue operand. Operand 0 is the chain operand. - RET_GLUE, - - // Calls a function. Operand 0 is the chain operand and operand 1 - // is the target address. The arguments start at operand 2. - // There is an optional glue operand at the end. - CALL, - SIBCALL, - - // TLS calls. Like regular calls, except operand 1 is the TLS symbol. - // (The call target is implicitly __tls_get_offset.) - TLS_GDCALL, - TLS_LDCALL, - - // Wraps a TargetGlobalAddress that should be loaded using PC-relative - // accesses (LARL). Operand 0 is the address. - PCREL_WRAPPER, - - // Used in cases where an offset is applied to a TargetGlobalAddress. - // Operand 0 is the full TargetGlobalAddress and operand 1 is a - // PCREL_WRAPPER for an anchor point. This is used so that we can - // cheaply refer to either the full address or the anchor point - // as a register base. - PCREL_OFFSET, - - // Integer comparisons. There are three operands: the two values - // to compare, and an integer of type SystemZICMP. - ICMP, - - // Floating-point comparisons. The two operands are the values to compare. - FCMP, - - // Test under mask. The first operand is ANDed with the second operand - // and the condition codes are set on the result. The third operand is - // a boolean that is true if the condition codes need to distinguish - // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the - // register forms do but the memory forms don't). - TM, - - // Branches if a condition is true. Operand 0 is the chain operand; - // operand 1 is the 4-bit condition-code mask, with bit N in - // big-endian order meaning "branch if CC=N"; operand 2 is the - // target block and operand 3 is the flag operand. - BR_CCMASK, - - // Selects between operand 0 and operand 1. Operand 2 is the - // mask of condition-code values for which operand 0 should be - // chosen over operand 1; it has the same form as BR_CCMASK. - // Operand 3 is the flag operand. - SELECT_CCMASK, - - // Evaluates to the gap between the stack pointer and the - // base of the dynamically-allocatable area. - ADJDYNALLOC, - - // For allocating stack space when using stack clash protector. - // Allocation is performed by block, and each block is probed. - PROBED_ALLOCA, - - // Count number of bits set in operand 0 per byte. - POPCNT, - - // Wrappers around the ISD opcodes of the same name. The output is GR128. - // Input operands may be GR64 or GR32, depending on the instruction. - SMUL_LOHI, - UMUL_LOHI, - SDIVREM, - UDIVREM, - - // Add/subtract with overflow/carry. These have the same operands as - // the corresponding standard operations, except with the carry flag - // replaced by a condition code value. - SADDO, SSUBO, UADDO, USUBO, ADDCARRY, SUBCARRY, - - // Set the condition code from a boolean value in operand 0. - // Operand 1 is a mask of all condition-code values that may result of this - // operation, operand 2 is a mask of condition-code values that may result - // if the boolean is true. - // Note that this operation is always optimized away, we will never - // generate any code for it. - GET_CCMASK, - - // Use a series of MVCs to copy bytes from one memory location to another. - // The operands are: - // - the target address - // - the source address - // - the constant length - // - // This isn't a memory opcode because we'd need to attach two - // MachineMemOperands rather than one. - MVC, - - // Similar to MVC, but for logic operations (AND, OR, XOR). - NC, - OC, - XC, - - // Use CLC to compare two blocks of memory, with the same comments - // as for MVC. - CLC, - - // Use MVC to set a block of memory after storing the first byte. - MEMSET_MVC, - - // Use an MVST-based sequence to implement stpcpy(). - STPCPY, - - // Use a CLST-based sequence to implement strcmp(). The two input operands - // are the addresses of the strings to compare. - STRCMP, - - // Use an SRST-based sequence to search a block of memory. The first - // operand is the end address, the second is the start, and the third - // is the character to search for. CC is set to 1 on success and 2 - // on failure. - SEARCH_STRING, - - // Store the CC value in bits 29 and 28 of an integer. - IPM, - - // Transaction begin. The first operand is the chain, the second - // the TDB pointer, and the third the immediate control field. - // Returns CC value and chain. - TBEGIN, - TBEGIN_NOFLOAT, - - // Transaction end. Just the chain operand. Returns CC value and chain. - TEND, - - // Create a vector constant by filling byte N of the result with bit - // 15-N of the single operand. - BYTE_MASK, - - // Create a vector constant by replicating an element-sized RISBG-style mask. - // The first operand specifies the starting set bit and the second operand - // specifies the ending set bit. Both operands count from the MSB of the - // element. - ROTATE_MASK, - - // Replicate a GPR scalar value into all elements of a vector. - REPLICATE, - - // Create a vector from two i64 GPRs. - JOIN_DWORDS, - - // Replicate one element of a vector into all elements. The first operand - // is the vector and the second is the index of the element to replicate. - SPLAT, - - // Interleave elements from the high half of operand 0 and the high half - // of operand 1. - MERGE_HIGH, - - // Likewise for the low halves. - MERGE_LOW, - - // Concatenate the vectors in the first two operands, shift them left - // by the third operand, and take the first half of the result. - SHL_DOUBLE, - - // Take one element of the first v2i64 operand and the one element of - // the second v2i64 operand and concatenate them to form a v2i64 result. - // The third operand is a 4-bit value of the form 0A0B, where A and B - // are the element selectors for the first operand and second operands - // respectively. - PERMUTE_DWORDS, - - // Perform a general vector permute on vector operands 0 and 1. - // Each byte of operand 2 controls the corresponding byte of the result, - // in the same way as a byte-level VECTOR_SHUFFLE mask. - PERMUTE, - - // Pack vector operands 0 and 1 into a single vector with half-sized elements. - PACK, - - // Likewise, but saturate the result and set CC. PACKS_CC does signed - // saturation and PACKLS_CC does unsigned saturation. - PACKS_CC, - PACKLS_CC, - - // Unpack the first half of vector operand 0 into double-sized elements. - // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends. - UNPACK_HIGH, - UNPACKL_HIGH, - - // Likewise for the second half. - UNPACK_LOW, - UNPACKL_LOW, - - // Shift/rotate each element of vector operand 0 by the number of bits - // specified by scalar operand 1. - VSHL_BY_SCALAR, - VSRL_BY_SCALAR, - VSRA_BY_SCALAR, - VROTL_BY_SCALAR, - - // Concatenate the vectors in the first two operands, shift them left/right - // bitwise by the third operand, and take the first/last half of the result. - SHL_DOUBLE_BIT, - SHR_DOUBLE_BIT, - - // For each element of the output type, sum across all sub-elements of - // operand 0 belonging to the corresponding element, and add in the - // rightmost sub-element of the corresponding element of operand 1. - VSUM, - - // Compute carry/borrow indication for add/subtract. - VACC, VSCBI, - // Add/subtract with carry/borrow. - VAC, VSBI, - // Compute carry/borrow indication for add/subtract with carry/borrow. - VACCC, VSBCBI, - - // High-word multiply-and-add. - VMAH, VMALH, - // Widen and multiply even/odd vector elements. - VME, VMLE, VMO, VMLO, - - // Compare integer vector operands 0 and 1 to produce the usual 0/-1 - // vector result. VICMPE is for equality, VICMPH for "signed greater than" - // and VICMPHL for "unsigned greater than". - VICMPE, - VICMPH, - VICMPHL, - - // Likewise, but also set the condition codes on the result. - VICMPES, - VICMPHS, - VICMPHLS, - - // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1 - // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and - // greater than" and VFCMPHE for "ordered and greater than or equal to". - VFCMPE, - VFCMPH, - VFCMPHE, - - // Likewise, but also set the condition codes on the result. - VFCMPES, - VFCMPHS, - VFCMPHES, - - // Test floating-point data class for vectors. - VFTCI, - - // Extend the even f32 elements of vector operand 0 to produce a vector - // of f64 elements. - VEXTEND, - - // Round the f64 elements of vector operand 0 to f32s and store them in the - // even elements of the result. - VROUND, - - // AND the two vector operands together and set CC based on the result. - VTM, - - // i128 high integer comparisons. - SCMP128HI, - UCMP128HI, - - // String operations that set CC as a side-effect. - VFAE_CC, - VFAEZ_CC, - VFEE_CC, - VFEEZ_CC, - VFENE_CC, - VFENEZ_CC, - VISTR_CC, - VSTRC_CC, - VSTRCZ_CC, - VSTRS_CC, - VSTRSZ_CC, - - // Test Data Class. - // - // Operand 0: the value to test - // Operand 1: the bit mask - TDC, - - // z/OS XPLINK ADA Entry - // Wraps a TargetGlobalAddress that should be loaded from a function's - // AssociatedData Area (ADA). Tha ADA is passed to the function by the - // caller in the XPLink ABI defined register R5. - // Operand 0: the GlobalValue/External Symbol - // Operand 1: the ADA register - // Operand 2: the offset (0 for the first and 8 for the second element in the - // function descriptor) - ADA_ENTRY, - - // Strict variants of scalar floating-point comparisons. - // Quiet and signaling versions. - FIRST_STRICTFP_OPCODE, - STRICT_FCMP = FIRST_STRICTFP_OPCODE, - STRICT_FCMPS, - - // Strict variants of vector floating-point comparisons. - // Quiet and signaling versions. - STRICT_VFCMPE, - STRICT_VFCMPH, - STRICT_VFCMPHE, - STRICT_VFCMPES, - STRICT_VFCMPHS, - STRICT_VFCMPHES, - - // Strict variants of VEXTEND and VROUND. - STRICT_VEXTEND, - STRICT_VROUND, - LAST_STRICTFP_OPCODE = STRICT_VROUND, - - // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or - // ATOMIC_LOAD_. - // - // Operand 0: the address of the containing 32-bit-aligned field - // Operand 1: the second operand of , in the high bits of an i32 - // for everything except ATOMIC_SWAPW - // Operand 2: how many bits to rotate the i32 left to bring the first - // operand into the high bits - // Operand 3: the negative of operand 2, for rotating the other way - // Operand 4: the width of the field in bits (8 or 16) - FIRST_MEMORY_OPCODE, - ATOMIC_SWAPW = FIRST_MEMORY_OPCODE, - ATOMIC_LOADW_ADD, - ATOMIC_LOADW_SUB, - ATOMIC_LOADW_AND, - ATOMIC_LOADW_OR, - ATOMIC_LOADW_XOR, - ATOMIC_LOADW_NAND, - ATOMIC_LOADW_MIN, - ATOMIC_LOADW_MAX, - ATOMIC_LOADW_UMIN, - ATOMIC_LOADW_UMAX, - - // A wrapper around the inner loop of an ATOMIC_CMP_SWAP. - // - // Operand 0: the address of the containing 32-bit-aligned field - // Operand 1: the compare value, in the low bits of an i32 - // Operand 2: the swap value, in the low bits of an i32 - // Operand 3: how many bits to rotate the i32 left to bring the first - // operand into the high bits - // Operand 4: the negative of operand 2, for rotating the other way - // Operand 5: the width of the field in bits (8 or 16) - ATOMIC_CMP_SWAPW, - - // Atomic compare-and-swap returning CC value. - // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) - ATOMIC_CMP_SWAP, - - // 128-bit atomic load. - // Val, OUTCHAIN = ATOMIC_LOAD_128(INCHAIN, ptr) - ATOMIC_LOAD_128, - - // 128-bit atomic store. - // OUTCHAIN = ATOMIC_STORE_128(INCHAIN, val, ptr) - ATOMIC_STORE_128, - - // 128-bit atomic compare-and-swap. - // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) - ATOMIC_CMP_SWAP_128, - - // Byte swapping load/store. Same operands as regular load/store. - LRV, STRV, - - // Element swapping load/store. Same operands as regular load/store. - VLER, VSTER, - - // Use STORE CLOCK FAST to store current TOD clock value. - STCKF, - - // Prefetch from the second operand using the 4-bit control code in - // the first operand. The code is 1 for a load prefetch and 2 for - // a store prefetch. - PREFETCH, - LAST_MEMORY_OPCODE = PREFETCH, -}; - -// Return true if OPCODE is some kind of PC-relative address. -inline bool isPCREL(unsigned Opcode) { - return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET; -} -} // end namespace SystemZISD namespace SystemZICMP { // Describes whether an integer comparison needs to be signed or unsigned, @@ -529,7 +145,6 @@ class SystemZTargetLowering : public TargetLowering { bool shouldExpandCmpUsingSelects(EVT VT) const override { return true; } - const char *getTargetNodeName(unsigned Opcode) const override; std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 39e216b993b11..3630cc4e03784 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -285,10 +285,14 @@ def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", SDT_ZWrapOffset, []>; def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp>; def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp>; -def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp, - [SDNPHasChain]>; -def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp, - [SDNPHasChain]>; + +let IsStrictFP = true in { + def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp, + [SDNPHasChain]>; + def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp, + [SDNPHasChain]>; +} + def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp>; def z_br_ccmask_1 : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, [SDNPHasChain]>; @@ -382,29 +386,33 @@ def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecCompareCC>; def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecCompareCC>; def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecCompareCC>; def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; -def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE", - SDT_ZVecBinaryConv, [SDNPHasChain]>; -def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES", - SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; -def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH", - SDT_ZVecBinaryConv, [SDNPHasChain]>; -def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS", - SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; -def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE", - SDT_ZVecBinaryConv, [SDNPHasChain]>; -def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES", - SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConvCC>; def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConvCC>; def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConvCC>; def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; -def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND", - SDT_ZVecUnaryConv, [SDNPHasChain]>; def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; -def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND", + +let IsStrictFP = true in { + def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND", + SDT_ZVecUnaryConv, [SDNPHasChain]>; + def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND", SDT_ZVecUnaryConv, [SDNPHasChain]>; +} + def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>; def z_scmp128hi : SDNode<"SystemZISD::SCMP128HI", SDT_ZCmp>; def z_ucmp128hi : SDNode<"SystemZISD::UCMP128HI", SDT_ZCmp>; diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index d76babec73dd4..9dc34e5c237ca 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -10,21 +10,27 @@ // //===----------------------------------------------------------------------===// +#include "SystemZSelectionDAGInfo.h" #include "SystemZTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" +#define GET_SDNODE_DESC +#include "SystemZGenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "systemz-selectiondag-info" -bool SystemZSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= SystemZISD::FIRST_MEMORY_OPCODE && - Opcode <= SystemZISD::LAST_MEMORY_OPCODE; -} +SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() + : SelectionDAGGenTargetInfo(SystemZGenSDNodeInfo) {} + +const char *SystemZSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { + switch (static_cast(Opcode)) { + case SystemZISD::GET_CCMASK: + return "SystemZISD::GET_CCMASK"; + } -bool SystemZSelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const { - return Opcode >= SystemZISD::FIRST_STRICTFP_OPCODE && - Opcode <= SystemZISD::LAST_STRICTFP_OPCODE; + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); } static unsigned getMemMemLenAdj(unsigned Op) { diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index c928f343e5710..7a7547bc0a8e6 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -15,15 +15,34 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "SystemZGenSDNodeInfo.inc" + namespace llvm { +namespace SystemZISD { + +enum NodeType : unsigned { + // Set the condition code from a boolean value in operand 0. + // Operand 1 is a mask of all condition-code values that may result of this + // operation, operand 2 is a mask of condition-code values that may result + // if the boolean is true. + // Note that this operation is always optimized away, we will never + // generate any code for it. + GET_CCMASK = GENERATED_OPCODE_END, +}; -class SystemZSelectionDAGInfo : public SelectionDAGTargetInfo { -public: - explicit SystemZSelectionDAGInfo() = default; +// Return true if OPCODE is some kind of PC-relative address. +inline bool isPCREL(unsigned Opcode) { + return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET; +} - bool isTargetMemoryOpcode(unsigned Opcode) const override; +} // namespace SystemZISD + +class SystemZSelectionDAGInfo : public SelectionDAGGenTargetInfo { +public: + SystemZSelectionDAGInfo(); - bool isTargetStrictFPOpcode(unsigned Opcode) const override; + const char *getTargetNodeName(unsigned Opcode) const override; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, From 1c7e163b6867d20bfc157737f491653bbb3318be Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 03:11:35 +0300 Subject: [PATCH 10/14] VE --- llvm/lib/Target/VE/CMakeLists.txt | 2 + llvm/lib/Target/VE/VECustomDAG.cpp | 1 + llvm/lib/Target/VE/VEISelDAGToDAG.cpp | 1 + llvm/lib/Target/VE/VEISelLowering.cpp | 42 +-------------------- llvm/lib/Target/VE/VEISelLowering.h | 46 ----------------------- llvm/lib/Target/VE/VESelectionDAGInfo.cpp | 44 ++++++++++++++++++++++ llvm/lib/Target/VE/VESelectionDAGInfo.h | 45 ++++++++++++++++++++++ llvm/lib/Target/VE/VESubtarget.cpp | 11 +++++- llvm/lib/Target/VE/VESubtarget.h | 10 ++--- llvm/lib/Target/VE/VVPISelLowering.cpp | 1 + 10 files changed, 110 insertions(+), 93 deletions(-) create mode 100644 llvm/lib/Target/VE/VESelectionDAGInfo.cpp create mode 100644 llvm/lib/Target/VE/VESelectionDAGInfo.h diff --git a/llvm/lib/Target/VE/CMakeLists.txt b/llvm/lib/Target/VE/CMakeLists.txt index d1bb4f32fcba7..b06072ddf8519 100644 --- a/llvm/lib/Target/VE/CMakeLists.txt +++ b/llvm/lib/Target/VE/CMakeLists.txt @@ -9,6 +9,7 @@ tablegen(LLVM VEGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM VEGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM VEGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM VEGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM VEGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM VEGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM VEGenCallingConv.inc -gen-callingconv) add_public_tablegen_target(VECommonTableGen) @@ -24,6 +25,7 @@ add_llvm_target(VECodeGen VEMachineFunctionInfo.cpp VEMCInstLower.cpp VERegisterInfo.cpp + VESelectionDAGInfo.cpp VESubtarget.cpp VETargetMachine.cpp VVPISelLowering.cpp diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp index 2855a65f654c9..74c21edb3d514 100644 --- a/llvm/lib/Target/VE/VECustomDAG.cpp +++ b/llvm/lib/Target/VE/VECustomDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "VECustomDAG.h" +#include "VESelectionDAGInfo.h" #ifndef DEBUG_TYPE #define DEBUG_TYPE "vecustomdag" diff --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp index 4e1bac0e91734..823bfbcb34a07 100644 --- a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp +++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "VE.h" +#include "VESelectionDAGInfo.h" #include "VETargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/ErrorHandling.h" diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index 313c894cafa85..db31ffca6a258 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -17,6 +17,7 @@ #include "VEInstrBuilder.h" #include "VEMachineFunctionInfo.h" #include "VERegisterInfo.h" +#include "VESelectionDAGInfo.h" #include "VETargetMachine.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -922,47 +923,6 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM, computeRegisterProperties(Subtarget->getRegisterInfo()); } -const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const { -#define TARGET_NODE_CASE(NAME) \ - case VEISD::NAME: \ - return "VEISD::" #NAME; - switch ((VEISD::NodeType)Opcode) { - case VEISD::FIRST_NUMBER: - break; - TARGET_NODE_CASE(CMPI) - TARGET_NODE_CASE(CMPU) - TARGET_NODE_CASE(CMPF) - TARGET_NODE_CASE(CMPQ) - TARGET_NODE_CASE(CMOV) - TARGET_NODE_CASE(CALL) - TARGET_NODE_CASE(EH_SJLJ_LONGJMP) - TARGET_NODE_CASE(EH_SJLJ_SETJMP) - TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH) - TARGET_NODE_CASE(GETFUNPLT) - TARGET_NODE_CASE(GETSTACKTOP) - TARGET_NODE_CASE(GETTLSADDR) - TARGET_NODE_CASE(GLOBAL_BASE_REG) - TARGET_NODE_CASE(Hi) - TARGET_NODE_CASE(Lo) - TARGET_NODE_CASE(RET_GLUE) - TARGET_NODE_CASE(TS1AM) - TARGET_NODE_CASE(VEC_UNPACK_LO) - TARGET_NODE_CASE(VEC_UNPACK_HI) - TARGET_NODE_CASE(VEC_PACK) - TARGET_NODE_CASE(VEC_BROADCAST) - TARGET_NODE_CASE(REPL_I32) - TARGET_NODE_CASE(REPL_F32) - - TARGET_NODE_CASE(LEGALAVL) - - // Register the VVP_* SDNodes. -#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME) -#include "VVPNodes.def" - } -#undef TARGET_NODE_CASE - return nullptr; -} - EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, EVT VT) const { return MVT::i32; diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h index 04274b14baa1f..ad7cf3e902fb6 100644 --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -20,51 +20,6 @@ namespace llvm { class VESubtarget; -namespace VEISD { -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - CMPI, // Compare between two signed integer values. - CMPU, // Compare between two unsigned integer values. - CMPF, // Compare between two floating-point values. - CMPQ, // Compare between two quad floating-point values. - CMOV, // Select between two values using the result of comparison. - - CALL, // A call instruction. - EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. - EH_SJLJ_SETJMP, // SjLj exception handling setjmp. - EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch. - GETFUNPLT, // Load function address through %plt insturction. - GETTLSADDR, // Load address for TLS access. - GETSTACKTOP, // Retrieve address of stack top (first address of - // locals and temporaries). - GLOBAL_BASE_REG, // Global base reg for PIC. - Hi, // Hi/Lo operations, typically on a global address. - Lo, // Hi/Lo operations, typically on a global address. - RET_GLUE, // Return with a flag operand. - TS1AM, // A TS1AM instruction used for 1/2 bytes swap. - VEC_UNPACK_LO, // unpack the lo v256 slice of a packed v512 vector. - VEC_UNPACK_HI, // unpack the hi v256 slice of a packed v512 vector. - // 0: v512 vector, 1: AVL - VEC_PACK, // pack a lo and a hi vector into one v512 vector - // 0: v256 lo vector, 1: v256 hi vector, 2: AVL - - VEC_BROADCAST, // A vector broadcast instruction. - // 0: scalar value, 1: VL - REPL_I32, - REPL_F32, // Replicate subregister to other half. - - // Annotation as a wrapper. LEGALAVL(VL) means that VL refers to 64bit of - // data, whereas the raw EVL coming in from VP nodes always refers to number - // of elements, regardless of their size. - LEGALAVL, - -// VVP_* nodes. -#define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME, -#include "VVPNodes.def" -}; -} - /// Convert a DAG integer condition code to a VE ICC condition. inline static VECC::CondCode intCondCode2Icc(ISD::CondCode CC) { switch (CC) { @@ -167,7 +122,6 @@ class VETargetLowering : public TargetLowering { public: VETargetLowering(const TargetMachine &TM, const VESubtarget &STI); - const char *getTargetNodeName(unsigned Opcode) const override; MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i32; } diff --git a/llvm/lib/Target/VE/VESelectionDAGInfo.cpp b/llvm/lib/Target/VE/VESelectionDAGInfo.cpp new file mode 100644 index 0000000000000..fba13eb32830b --- /dev/null +++ b/llvm/lib/Target/VE/VESelectionDAGInfo.cpp @@ -0,0 +1,44 @@ +//===- VESelectionDAGInfo.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "VESelectionDAGInfo.h" + +#define GET_SDNODE_DESC +#include "VEGenSDNodeInfo.inc" + +using namespace llvm; + +VESelectionDAGInfo::VESelectionDAGInfo() + : SelectionDAGGenTargetInfo(VEGenSDNodeInfo) {} + +VESelectionDAGInfo::~VESelectionDAGInfo() = default; + +const char *VESelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { +#define TARGET_NODE_CASE(NAME) \ + case VEISD::NAME: \ + return "VEISD::" #NAME; + + switch (static_cast(Opcode)) { + TARGET_NODE_CASE(GLOBAL_BASE_REG) + TARGET_NODE_CASE(LEGALAVL) + } +#undef TARGET_NODE_CASE + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + +void VESelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + case VEISD::GETSTACKTOP: + // result #0 has invalid type; expected ch, got i64 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); +} diff --git a/llvm/lib/Target/VE/VESelectionDAGInfo.h b/llvm/lib/Target/VE/VESelectionDAGInfo.h new file mode 100644 index 0000000000000..c9dded1611398 --- /dev/null +++ b/llvm/lib/Target/VE/VESelectionDAGInfo.h @@ -0,0 +1,45 @@ +//===- VESelectionDAGInfo.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VESELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_VE_VESELECTIONDAGINFO_H + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +#define GET_SDNODE_ENUM +#include "VEGenSDNodeInfo.inc" + +namespace llvm { +namespace VEISD { + +enum NodeType : unsigned { + GLOBAL_BASE_REG = GENERATED_OPCODE_END, // Global base reg for PIC. + + // Annotation as a wrapper. LEGALAVL(VL) means that VL refers to 64bit of + // data, whereas the raw EVL coming in from VP nodes always refers to number + // of elements, regardless of their size. + LEGALAVL, +}; + +} // namespace VEISD + +class VESelectionDAGInfo : public SelectionDAGGenTargetInfo { +public: + VESelectionDAGInfo(); + + ~VESelectionDAGInfo() override; + + const char *getTargetNodeName(unsigned Opcode) const override; + + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_VE_VESELECTIONDAGINFO_H diff --git a/llvm/lib/Target/VE/VESubtarget.cpp b/llvm/lib/Target/VE/VESubtarget.cpp index 197bffe2b55b7..9c9b1b43d1a04 100644 --- a/llvm/lib/Target/VE/VESubtarget.cpp +++ b/llvm/lib/Target/VE/VESubtarget.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "VESubtarget.h" +#include "VESelectionDAGInfo.h" #include "llvm/MC/TargetRegistry.h" using namespace llvm; @@ -43,7 +44,15 @@ VESubtarget::VESubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) : VEGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - FrameLowering(*this) {} + FrameLowering(*this) { + TSInfo = std::make_unique(); +} + +VESubtarget::~VESubtarget() = default; + +const SelectionDAGTargetInfo *VESubtarget::getSelectionDAGInfo() const { + return TSInfo.get(); +} uint64_t VESubtarget::getAdjustedFrameSize(uint64_t FrameSize) const { // Calculate adjusted frame size by adding the size of RSA frame, diff --git a/llvm/lib/Target/VE/VESubtarget.h b/llvm/lib/Target/VE/VESubtarget.h index 0c3dc0a080723..bc1c9faca5b5f 100644 --- a/llvm/lib/Target/VE/VESubtarget.h +++ b/llvm/lib/Target/VE/VESubtarget.h @@ -16,7 +16,6 @@ #include "VEFrameLowering.h" #include "VEISelLowering.h" #include "VEInstrInfo.h" -#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -41,13 +40,15 @@ class VESubtarget : public VEGenSubtargetInfo { VEInstrInfo InstrInfo; VETargetLowering TLInfo; - SelectionDAGTargetInfo TSInfo; + std::unique_ptr TSInfo; VEFrameLowering FrameLowering; public: VESubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM); + ~VESubtarget() override; + const VEInstrInfo *getInstrInfo() const override { return &InstrInfo; } const VEFrameLowering *getFrameLowering() const override { return &FrameLowering; @@ -56,9 +57,8 @@ class VESubtarget : public VEGenSubtargetInfo { return &InstrInfo.getRegisterInfo(); } const VETargetLowering *getTargetLowering() const override { return &TLInfo; } - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } + + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override; bool enableMachineScheduler() const override; diff --git a/llvm/lib/Target/VE/VVPISelLowering.cpp b/llvm/lib/Target/VE/VVPISelLowering.cpp index f1e2d7f717016..2b84529cf3dd1 100644 --- a/llvm/lib/Target/VE/VVPISelLowering.cpp +++ b/llvm/lib/Target/VE/VVPISelLowering.cpp @@ -13,6 +13,7 @@ #include "VECustomDAG.h" #include "VEISelLowering.h" +#include "VESelectionDAGInfo.h" using namespace llvm; From 890ffdce2db9a1f351f255cd37dc00dd3ba3c7c1 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 01:02:43 +0300 Subject: [PATCH 11/14] WebAssembly (BR_IF removed) --- llvm/lib/Target/WebAssembly/CMakeLists.txt | 1 + .../lib/Target/WebAssembly/WebAssemblyISD.def | 62 ------------------- .../WebAssembly/WebAssemblyISelLowering.cpp | 14 ----- .../WebAssembly/WebAssemblyISelLowering.h | 12 ---- .../WebAssemblySelectionDAGInfo.cpp | 35 ++++++++--- .../WebAssembly/WebAssemblySelectionDAGInfo.h | 20 +++++- 6 files changed, 47 insertions(+), 97 deletions(-) delete mode 100644 llvm/lib/Target/WebAssembly/WebAssemblyISD.def diff --git a/llvm/lib/Target/WebAssembly/CMakeLists.txt b/llvm/lib/Target/WebAssembly/CMakeLists.txt index 1e83cbeac50d6..17df119d62709 100644 --- a/llvm/lib/Target/WebAssembly/CMakeLists.txt +++ b/llvm/lib/Target/WebAssembly/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(LLVM WebAssemblyGenFastISel.inc -gen-fast-isel) tablegen(LLVM WebAssemblyGenInstrInfo.inc -gen-instr-info) tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM WebAssemblyGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM WebAssemblyGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM WebAssemblyGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(WebAssemblyCommonTableGen) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def deleted file mode 100644 index 378ef2c8f250e..0000000000000 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ /dev/null @@ -1,62 +0,0 @@ -//- WebAssemblyISD.def - WebAssembly ISD ---------------------------*- C++ -*-// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file describes the various WebAssembly ISD node types. -/// -//===----------------------------------------------------------------------===// - -// NOTE: NO INCLUDE GUARD DESIRED! - -HANDLE_NODETYPE(CALL) -HANDLE_NODETYPE(RET_CALL) -HANDLE_NODETYPE(RETURN) -HANDLE_NODETYPE(ARGUMENT) -HANDLE_NODETYPE(LOCAL_GET) -HANDLE_NODETYPE(LOCAL_SET) -// A wrapper node for TargetExternalSymbol, TargetGlobalAddress, and MCSymbol -HANDLE_NODETYPE(Wrapper) -// A special node for TargetGlobalAddress used in PIC code for -// __memory_base/__table_base relative access. -HANDLE_NODETYPE(WrapperREL) -HANDLE_NODETYPE(BR_IF) -HANDLE_NODETYPE(BR_TABLE) -HANDLE_NODETYPE(DOT) -HANDLE_NODETYPE(SHUFFLE) -HANDLE_NODETYPE(SWIZZLE) -HANDLE_NODETYPE(VEC_SHL) -HANDLE_NODETYPE(VEC_SHR_S) -HANDLE_NODETYPE(VEC_SHR_U) -HANDLE_NODETYPE(NARROW_U) -HANDLE_NODETYPE(EXTEND_LOW_S) -HANDLE_NODETYPE(EXTEND_LOW_U) -HANDLE_NODETYPE(EXTEND_HIGH_S) -HANDLE_NODETYPE(EXTEND_HIGH_U) -HANDLE_NODETYPE(CONVERT_LOW_S) -HANDLE_NODETYPE(CONVERT_LOW_U) -HANDLE_NODETYPE(PROMOTE_LOW) -HANDLE_NODETYPE(TRUNC_SAT_ZERO_S) -HANDLE_NODETYPE(TRUNC_SAT_ZERO_U) -HANDLE_NODETYPE(DEMOTE_ZERO) -HANDLE_NODETYPE(I64_ADD128) -HANDLE_NODETYPE(I64_SUB128) -HANDLE_NODETYPE(I64_MUL_WIDE_S) -HANDLE_NODETYPE(I64_MUL_WIDE_U) - -// Memory intrinsics -HANDLE_NODETYPE(GLOBAL_GET) -HANDLE_NODETYPE(GLOBAL_SET) -HANDLE_NODETYPE(TABLE_GET) -HANDLE_NODETYPE(TABLE_SET) - -// Bulk memory instructions. These follow LLVM's expected semantics of -// supporting out-of-bounds pointers if the length is zero, by inserting -// a branch around Wasm's `memory.copy` and `memory.fill`, which would -// otherwise trap. -HANDLE_NODETYPE(MEMCPY) -HANDLE_NODETYPE(MEMSET) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index aac3473311192..4a44cfb9958db 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -904,20 +904,6 @@ MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter( } } -const char * -WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (static_cast(Opcode)) { - case WebAssemblyISD::FIRST_NUMBER: - break; -#define HANDLE_NODETYPE(NODE) \ - case WebAssemblyISD::NODE: \ - return "WebAssemblyISD::" #NODE; -#include "WebAssemblyISD.def" -#undef HANDLE_NODETYPE - } - return nullptr; -} - std::pair WebAssemblyTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index 72401a7a259c0..d6fcb51a80e66 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -19,17 +19,6 @@ namespace llvm { -namespace WebAssemblyISD { - -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, -#define HANDLE_NODETYPE(NODE) NODE, -#include "WebAssemblyISD.def" -#undef HANDLE_NODETYPE -}; - -} // end namespace WebAssemblyISD - class WebAssemblySubtarget; class WebAssemblyTargetLowering final : public TargetLowering { @@ -55,7 +44,6 @@ class WebAssemblyTargetLowering final : public TargetLowering { MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; - const char *getTargetNodeName(unsigned Opcode) const override; std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp index 2673c81eae40b..c02f8434ee582 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp @@ -11,23 +11,44 @@ /// //===----------------------------------------------------------------------===// +#include "WebAssemblySelectionDAGInfo.h" #include "WebAssemblyTargetMachine.h" + +#define GET_SDNODE_DESC +#include "WebAssemblyGenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "wasm-selectiondag-info" +WebAssemblySelectionDAGInfo::WebAssemblySelectionDAGInfo() + : SelectionDAGGenTargetInfo(WebAssemblyGenSDNodeInfo) {} + WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() = default; // anchor -bool WebAssemblySelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { +const char * +WebAssemblySelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { switch (static_cast(Opcode)) { + case WebAssemblyISD::CALL: + return "WebAssemblyISD::CALL"; + case WebAssemblyISD::RET_CALL: + return "WebAssemblyISD::RET_CALL"; + } + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + +void WebAssemblySelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { default: - return false; - case WebAssemblyISD::GLOBAL_GET: - case WebAssemblyISD::GLOBAL_SET: - case WebAssemblyISD::TABLE_GET: - case WebAssemblyISD::TABLE_SET: - return true; + break; + case WebAssemblyISD::LOCAL_GET: + // invalid number of results; expected 2, got 1 + return; } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy( diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h index 69c9af0966308..6fcc09a368179 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h @@ -17,13 +17,29 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "WebAssemblyGenSDNodeInfo.inc" + namespace llvm { +namespace WebAssemblyISD { + +enum NodeType : unsigned { + CALL = GENERATED_OPCODE_END, + RET_CALL, +}; -class WebAssemblySelectionDAGInfo final : public SelectionDAGTargetInfo { +} // namespace WebAssemblyISD + +class WebAssemblySelectionDAGInfo final : public SelectionDAGGenTargetInfo { public: + WebAssemblySelectionDAGInfo(); + ~WebAssemblySelectionDAGInfo() override; - bool isTargetMemoryOpcode(unsigned Opcode) const override; + const char *getTargetNodeName(unsigned Opcode) const override; + + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, From 87944924ce6e44dde3c5d1a5af6af3d402a6ce2f Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 01:02:48 +0300 Subject: [PATCH 12/14] X86 --- llvm/lib/Target/X86/CMakeLists.txt | 1 + llvm/lib/Target/X86/X86ISelLowering.cpp | 480 +-------- llvm/lib/Target/X86/X86ISelLowering.h | 983 +------------------ llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 6 +- llvm/lib/Target/X86/X86InstrFragments.td | 11 +- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 52 +- llvm/lib/Target/X86/X86SelectionDAGInfo.cpp | 64 +- llvm/lib/Target/X86/X86SelectionDAGInfo.h | 56 +- 8 files changed, 172 insertions(+), 1481 deletions(-) diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index 6627e97dd0943..fb0240361f6f7 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -17,6 +17,7 @@ tablegen(LLVM X86GenInstrInfo.inc -gen-instr-info tablegen(LLVM X86GenMnemonicTables.inc -gen-x86-mnemonic-tables -asmwriternum=1) tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank) tablegen(LLVM X86GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM X86GenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM X86GenFoldTables.inc -gen-x86-fold-tables -asmwriternum=1) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 99a82cab384aa..a910f901e8fa6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19377,9 +19377,9 @@ static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA, } if (!Ret) { - X86ISD::NodeType CallType = UseTLSDESC ? X86ISD::TLSDESC - : LocalDynamic ? X86ISD::TLSBASEADDR - : X86ISD::TLSADDR; + unsigned CallType = UseTLSDESC ? X86ISD::TLSDESC + : LocalDynamic ? X86ISD::TLSBASEADDR + : X86ISD::TLSADDR; Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); if (LoadGlobalBaseReg) { @@ -29260,7 +29260,7 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget, APInt PreferredZero = APInt::getZero(SizeInBits); APInt OppositeZero = PreferredZero; EVT IVT = VT.changeTypeToInteger(); - X86ISD::NodeType MinMaxOp; + unsigned MinMaxOp; if (IsMaxOp) { MinMaxOp = X86ISD::FMAX; OppositeZero.setSignBit(); @@ -34858,478 +34858,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } } -const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((X86ISD::NodeType)Opcode) { - case X86ISD::FIRST_NUMBER: break; -#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE; - NODE_NAME_CASE(BSF) - NODE_NAME_CASE(BSR) - NODE_NAME_CASE(FSHL) - NODE_NAME_CASE(FSHR) - NODE_NAME_CASE(FAND) - NODE_NAME_CASE(FANDN) - NODE_NAME_CASE(FOR) - NODE_NAME_CASE(FXOR) - NODE_NAME_CASE(FILD) - NODE_NAME_CASE(FIST) - NODE_NAME_CASE(FP_TO_INT_IN_MEM) - NODE_NAME_CASE(FLD) - NODE_NAME_CASE(FST) - NODE_NAME_CASE(CALL) - NODE_NAME_CASE(CALL_RVMARKER) - NODE_NAME_CASE(IMP_CALL) - NODE_NAME_CASE(BT) - NODE_NAME_CASE(CMP) - NODE_NAME_CASE(FCMP) - NODE_NAME_CASE(STRICT_FCMP) - NODE_NAME_CASE(STRICT_FCMPS) - NODE_NAME_CASE(COMI) - NODE_NAME_CASE(UCOMI) - NODE_NAME_CASE(COMX) - NODE_NAME_CASE(UCOMX) - NODE_NAME_CASE(CMPM) - NODE_NAME_CASE(CMPMM) - NODE_NAME_CASE(STRICT_CMPM) - NODE_NAME_CASE(CMPMM_SAE) - NODE_NAME_CASE(SETCC) - NODE_NAME_CASE(SETCC_CARRY) - NODE_NAME_CASE(FSETCC) - NODE_NAME_CASE(FSETCCM) - NODE_NAME_CASE(FSETCCM_SAE) - NODE_NAME_CASE(CMOV) - NODE_NAME_CASE(BRCOND) - NODE_NAME_CASE(RET_GLUE) - NODE_NAME_CASE(IRET) - NODE_NAME_CASE(REP_STOS) - NODE_NAME_CASE(REP_MOVS) - NODE_NAME_CASE(GlobalBaseReg) - NODE_NAME_CASE(Wrapper) - NODE_NAME_CASE(WrapperRIP) - NODE_NAME_CASE(MOVQ2DQ) - NODE_NAME_CASE(MOVDQ2Q) - NODE_NAME_CASE(MMX_MOVD2W) - NODE_NAME_CASE(MMX_MOVW2D) - NODE_NAME_CASE(PEXTRB) - NODE_NAME_CASE(PEXTRW) - NODE_NAME_CASE(INSERTPS) - NODE_NAME_CASE(PINSRB) - NODE_NAME_CASE(PINSRW) - NODE_NAME_CASE(PSHUFB) - NODE_NAME_CASE(ANDNP) - NODE_NAME_CASE(BLENDI) - NODE_NAME_CASE(BLENDV) - NODE_NAME_CASE(HADD) - NODE_NAME_CASE(HSUB) - NODE_NAME_CASE(FHADD) - NODE_NAME_CASE(FHSUB) - NODE_NAME_CASE(CONFLICT) - NODE_NAME_CASE(FMAX) - NODE_NAME_CASE(FMAXS) - NODE_NAME_CASE(FMAX_SAE) - NODE_NAME_CASE(FMAXS_SAE) - NODE_NAME_CASE(STRICT_FMAX) - NODE_NAME_CASE(FMIN) - NODE_NAME_CASE(FMINS) - NODE_NAME_CASE(FMIN_SAE) - NODE_NAME_CASE(FMINS_SAE) - NODE_NAME_CASE(STRICT_FMIN) - NODE_NAME_CASE(FMAXC) - NODE_NAME_CASE(FMINC) - NODE_NAME_CASE(FRSQRT) - NODE_NAME_CASE(FRCP) - NODE_NAME_CASE(EXTRQI) - NODE_NAME_CASE(INSERTQI) - NODE_NAME_CASE(TLSADDR) - NODE_NAME_CASE(TLSBASEADDR) - NODE_NAME_CASE(TLSCALL) - NODE_NAME_CASE(TLSDESC) - NODE_NAME_CASE(EH_SJLJ_SETJMP) - NODE_NAME_CASE(EH_SJLJ_LONGJMP) - NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH) - NODE_NAME_CASE(EH_RETURN) - NODE_NAME_CASE(TC_RETURN) - NODE_NAME_CASE(FNSTCW16m) - NODE_NAME_CASE(FLDCW16m) - NODE_NAME_CASE(FNSTENVm) - NODE_NAME_CASE(FLDENVm) - NODE_NAME_CASE(LCMPXCHG_DAG) - NODE_NAME_CASE(LCMPXCHG8_DAG) - NODE_NAME_CASE(LCMPXCHG16_DAG) - NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG) - NODE_NAME_CASE(LADD) - NODE_NAME_CASE(LSUB) - NODE_NAME_CASE(LOR) - NODE_NAME_CASE(LXOR) - NODE_NAME_CASE(LAND) - NODE_NAME_CASE(LBTS) - NODE_NAME_CASE(LBTC) - NODE_NAME_CASE(LBTR) - NODE_NAME_CASE(LBTS_RM) - NODE_NAME_CASE(LBTC_RM) - NODE_NAME_CASE(LBTR_RM) - NODE_NAME_CASE(AADD) - NODE_NAME_CASE(AOR) - NODE_NAME_CASE(AXOR) - NODE_NAME_CASE(AAND) - NODE_NAME_CASE(VZEXT_MOVL) - NODE_NAME_CASE(VZEXT_LOAD) - NODE_NAME_CASE(VEXTRACT_STORE) - NODE_NAME_CASE(VTRUNC) - NODE_NAME_CASE(VTRUNCS) - NODE_NAME_CASE(VTRUNCUS) - NODE_NAME_CASE(VMTRUNC) - NODE_NAME_CASE(VMTRUNCS) - NODE_NAME_CASE(VMTRUNCUS) - NODE_NAME_CASE(VTRUNCSTORES) - NODE_NAME_CASE(VTRUNCSTOREUS) - NODE_NAME_CASE(VMTRUNCSTORES) - NODE_NAME_CASE(VMTRUNCSTOREUS) - NODE_NAME_CASE(VFPEXT) - NODE_NAME_CASE(STRICT_VFPEXT) - NODE_NAME_CASE(VFPEXT_SAE) - NODE_NAME_CASE(VFPEXTS) - NODE_NAME_CASE(VFPEXTS_SAE) - NODE_NAME_CASE(VFPROUND) - NODE_NAME_CASE(VFPROUND2) - NODE_NAME_CASE(VFPROUND2_RND) - NODE_NAME_CASE(STRICT_VFPROUND) - NODE_NAME_CASE(VMFPROUND) - NODE_NAME_CASE(VFPROUND_RND) - NODE_NAME_CASE(VFPROUNDS) - NODE_NAME_CASE(VFPROUNDS_RND) - NODE_NAME_CASE(VSHLDQ) - NODE_NAME_CASE(VSRLDQ) - NODE_NAME_CASE(VSHL) - NODE_NAME_CASE(VSRL) - NODE_NAME_CASE(VSRA) - NODE_NAME_CASE(VSHLI) - NODE_NAME_CASE(VSRLI) - NODE_NAME_CASE(VSRAI) - NODE_NAME_CASE(VSHLV) - NODE_NAME_CASE(VSRLV) - NODE_NAME_CASE(VSRAV) - NODE_NAME_CASE(VROTLI) - NODE_NAME_CASE(VROTRI) - NODE_NAME_CASE(VPPERM) - NODE_NAME_CASE(CMPP) - NODE_NAME_CASE(STRICT_CMPP) - NODE_NAME_CASE(PCMPEQ) - NODE_NAME_CASE(PCMPGT) - NODE_NAME_CASE(PHMINPOS) - NODE_NAME_CASE(ADD) - NODE_NAME_CASE(SUB) - NODE_NAME_CASE(ADC) - NODE_NAME_CASE(SBB) - NODE_NAME_CASE(SMUL) - NODE_NAME_CASE(UMUL) - NODE_NAME_CASE(OR) - NODE_NAME_CASE(XOR) - NODE_NAME_CASE(AND) - NODE_NAME_CASE(BEXTR) - NODE_NAME_CASE(BEXTRI) - NODE_NAME_CASE(BZHI) - NODE_NAME_CASE(PDEP) - NODE_NAME_CASE(PEXT) - NODE_NAME_CASE(MUL_IMM) - NODE_NAME_CASE(MOVMSK) - NODE_NAME_CASE(PTEST) - NODE_NAME_CASE(TESTP) - NODE_NAME_CASE(KORTEST) - NODE_NAME_CASE(KTEST) - NODE_NAME_CASE(KADD) - NODE_NAME_CASE(KSHIFTL) - NODE_NAME_CASE(KSHIFTR) - NODE_NAME_CASE(PACKSS) - NODE_NAME_CASE(PACKUS) - NODE_NAME_CASE(PALIGNR) - NODE_NAME_CASE(VALIGN) - NODE_NAME_CASE(VSHLD) - NODE_NAME_CASE(VSHRD) - NODE_NAME_CASE(VSHLDV) - NODE_NAME_CASE(VSHRDV) - NODE_NAME_CASE(PSHUFD) - NODE_NAME_CASE(PSHUFHW) - NODE_NAME_CASE(PSHUFLW) - NODE_NAME_CASE(SHUFP) - NODE_NAME_CASE(SHUF128) - NODE_NAME_CASE(MOVLHPS) - NODE_NAME_CASE(MOVHLPS) - NODE_NAME_CASE(MOVDDUP) - NODE_NAME_CASE(MOVSHDUP) - NODE_NAME_CASE(MOVSLDUP) - NODE_NAME_CASE(MOVSD) - NODE_NAME_CASE(MOVSS) - NODE_NAME_CASE(MOVSH) - NODE_NAME_CASE(UNPCKL) - NODE_NAME_CASE(UNPCKH) - NODE_NAME_CASE(VBROADCAST) - NODE_NAME_CASE(VBROADCAST_LOAD) - NODE_NAME_CASE(VBROADCASTM) - NODE_NAME_CASE(SUBV_BROADCAST_LOAD) - NODE_NAME_CASE(VPERMILPV) - NODE_NAME_CASE(VPERMILPI) - NODE_NAME_CASE(VPERM2X128) - NODE_NAME_CASE(VPERMV) - NODE_NAME_CASE(VPERMV3) - NODE_NAME_CASE(VPERMI) - NODE_NAME_CASE(VPTERNLOG) - NODE_NAME_CASE(FP_TO_SINT_SAT) - NODE_NAME_CASE(FP_TO_UINT_SAT) - NODE_NAME_CASE(VFIXUPIMM) - NODE_NAME_CASE(VFIXUPIMM_SAE) - NODE_NAME_CASE(VFIXUPIMMS) - NODE_NAME_CASE(VFIXUPIMMS_SAE) - NODE_NAME_CASE(VRANGE) - NODE_NAME_CASE(VRANGE_SAE) - NODE_NAME_CASE(VRANGES) - NODE_NAME_CASE(VRANGES_SAE) - NODE_NAME_CASE(PMULUDQ) - NODE_NAME_CASE(PMULDQ) - NODE_NAME_CASE(PSADBW) - NODE_NAME_CASE(DBPSADBW) - NODE_NAME_CASE(VASTART_SAVE_XMM_REGS) - NODE_NAME_CASE(VAARG_64) - NODE_NAME_CASE(VAARG_X32) - NODE_NAME_CASE(DYN_ALLOCA) - NODE_NAME_CASE(MFENCE) - NODE_NAME_CASE(SEG_ALLOCA) - NODE_NAME_CASE(PROBED_ALLOCA) - NODE_NAME_CASE(RDRAND) - NODE_NAME_CASE(RDSEED) - NODE_NAME_CASE(RDPKRU) - NODE_NAME_CASE(WRPKRU) - NODE_NAME_CASE(VPMADDUBSW) - NODE_NAME_CASE(VPMADDWD) - NODE_NAME_CASE(VPSHA) - NODE_NAME_CASE(VPSHL) - NODE_NAME_CASE(VPCOM) - NODE_NAME_CASE(VPCOMU) - NODE_NAME_CASE(VPERMIL2) - NODE_NAME_CASE(FMSUB) - NODE_NAME_CASE(STRICT_FMSUB) - NODE_NAME_CASE(FNMADD) - NODE_NAME_CASE(STRICT_FNMADD) - NODE_NAME_CASE(FNMSUB) - NODE_NAME_CASE(STRICT_FNMSUB) - NODE_NAME_CASE(FMADDSUB) - NODE_NAME_CASE(FMSUBADD) - NODE_NAME_CASE(FMADD_RND) - NODE_NAME_CASE(FNMADD_RND) - NODE_NAME_CASE(FMSUB_RND) - NODE_NAME_CASE(FNMSUB_RND) - NODE_NAME_CASE(FMADDSUB_RND) - NODE_NAME_CASE(FMSUBADD_RND) - NODE_NAME_CASE(VFMADDC) - NODE_NAME_CASE(VFMADDC_RND) - NODE_NAME_CASE(VFCMADDC) - NODE_NAME_CASE(VFCMADDC_RND) - NODE_NAME_CASE(VFMULC) - NODE_NAME_CASE(VFMULC_RND) - NODE_NAME_CASE(VFCMULC) - NODE_NAME_CASE(VFCMULC_RND) - NODE_NAME_CASE(VFMULCSH) - NODE_NAME_CASE(VFMULCSH_RND) - NODE_NAME_CASE(VFCMULCSH) - NODE_NAME_CASE(VFCMULCSH_RND) - NODE_NAME_CASE(VFMADDCSH) - NODE_NAME_CASE(VFMADDCSH_RND) - NODE_NAME_CASE(VFCMADDCSH) - NODE_NAME_CASE(VFCMADDCSH_RND) - NODE_NAME_CASE(VPMADD52H) - NODE_NAME_CASE(VPMADD52L) - NODE_NAME_CASE(VRNDSCALE) - NODE_NAME_CASE(STRICT_VRNDSCALE) - NODE_NAME_CASE(VRNDSCALE_SAE) - NODE_NAME_CASE(VRNDSCALES) - NODE_NAME_CASE(VRNDSCALES_SAE) - NODE_NAME_CASE(VREDUCE) - NODE_NAME_CASE(VREDUCE_SAE) - NODE_NAME_CASE(VREDUCES) - NODE_NAME_CASE(VREDUCES_SAE) - NODE_NAME_CASE(VGETMANT) - NODE_NAME_CASE(VGETMANT_SAE) - NODE_NAME_CASE(VGETMANTS) - NODE_NAME_CASE(VGETMANTS_SAE) - NODE_NAME_CASE(PCMPESTR) - NODE_NAME_CASE(PCMPISTR) - NODE_NAME_CASE(XTEST) - NODE_NAME_CASE(COMPRESS) - NODE_NAME_CASE(EXPAND) - NODE_NAME_CASE(SELECTS) - NODE_NAME_CASE(ADDSUB) - NODE_NAME_CASE(RCP14) - NODE_NAME_CASE(RCP14S) - NODE_NAME_CASE(RSQRT14) - NODE_NAME_CASE(RSQRT14S) - NODE_NAME_CASE(FADD_RND) - NODE_NAME_CASE(FADDS) - NODE_NAME_CASE(FADDS_RND) - NODE_NAME_CASE(FSUB_RND) - NODE_NAME_CASE(FSUBS) - NODE_NAME_CASE(FSUBS_RND) - NODE_NAME_CASE(FMUL_RND) - NODE_NAME_CASE(FMULS) - NODE_NAME_CASE(FMULS_RND) - NODE_NAME_CASE(FDIV_RND) - NODE_NAME_CASE(FDIVS) - NODE_NAME_CASE(FDIVS_RND) - NODE_NAME_CASE(FSQRT_RND) - NODE_NAME_CASE(FSQRTS) - NODE_NAME_CASE(FSQRTS_RND) - NODE_NAME_CASE(FGETEXP) - NODE_NAME_CASE(FGETEXP_SAE) - NODE_NAME_CASE(FGETEXPS) - NODE_NAME_CASE(FGETEXPS_SAE) - NODE_NAME_CASE(SCALEF) - NODE_NAME_CASE(SCALEF_RND) - NODE_NAME_CASE(SCALEFS) - NODE_NAME_CASE(SCALEFS_RND) - NODE_NAME_CASE(MULHRS) - NODE_NAME_CASE(SINT_TO_FP_RND) - NODE_NAME_CASE(UINT_TO_FP_RND) - NODE_NAME_CASE(CVTTP2SI) - NODE_NAME_CASE(CVTTP2UI) - NODE_NAME_CASE(STRICT_CVTTP2SI) - NODE_NAME_CASE(STRICT_CVTTP2UI) - NODE_NAME_CASE(MCVTTP2SI) - NODE_NAME_CASE(MCVTTP2UI) - NODE_NAME_CASE(CVTTP2SI_SAE) - NODE_NAME_CASE(CVTTP2UI_SAE) - NODE_NAME_CASE(CVTTS2SI) - NODE_NAME_CASE(CVTTS2UI) - NODE_NAME_CASE(CVTTS2SI_SAE) - NODE_NAME_CASE(CVTTS2UI_SAE) - NODE_NAME_CASE(CVTSI2P) - NODE_NAME_CASE(CVTUI2P) - NODE_NAME_CASE(STRICT_CVTSI2P) - NODE_NAME_CASE(STRICT_CVTUI2P) - NODE_NAME_CASE(MCVTSI2P) - NODE_NAME_CASE(MCVTUI2P) - NODE_NAME_CASE(VFPCLASS) - NODE_NAME_CASE(VFPCLASSS) - NODE_NAME_CASE(MULTISHIFT) - NODE_NAME_CASE(SCALAR_SINT_TO_FP) - NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND) - NODE_NAME_CASE(SCALAR_UINT_TO_FP) - NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND) - NODE_NAME_CASE(CVTPS2PH) - NODE_NAME_CASE(STRICT_CVTPS2PH) - NODE_NAME_CASE(CVTPS2PH_SAE) - NODE_NAME_CASE(MCVTPS2PH) - NODE_NAME_CASE(MCVTPS2PH_SAE) - NODE_NAME_CASE(CVTPH2PS) - NODE_NAME_CASE(STRICT_CVTPH2PS) - NODE_NAME_CASE(CVTPH2PS_SAE) - NODE_NAME_CASE(CVTP2SI) - NODE_NAME_CASE(CVTP2UI) - NODE_NAME_CASE(MCVTP2SI) - NODE_NAME_CASE(MCVTP2UI) - NODE_NAME_CASE(CVTP2SI_RND) - NODE_NAME_CASE(CVTP2UI_RND) - NODE_NAME_CASE(CVTS2SI) - NODE_NAME_CASE(CVTS2UI) - NODE_NAME_CASE(CVTS2SI_RND) - NODE_NAME_CASE(CVTS2UI_RND) - NODE_NAME_CASE(CVTNEPS2BF16) - NODE_NAME_CASE(MCVTNEPS2BF16) - NODE_NAME_CASE(DPBF16PS) - NODE_NAME_CASE(DPFP16PS) - NODE_NAME_CASE(MPSADBW) - NODE_NAME_CASE(LWPINS) - NODE_NAME_CASE(MGATHER) - NODE_NAME_CASE(MSCATTER) - NODE_NAME_CASE(VPDPBUSD) - NODE_NAME_CASE(VPDPBUSDS) - NODE_NAME_CASE(VPDPWSSD) - NODE_NAME_CASE(VPDPWSSDS) - NODE_NAME_CASE(VPSHUFBITQMB) - NODE_NAME_CASE(GF2P8MULB) - NODE_NAME_CASE(GF2P8AFFINEQB) - NODE_NAME_CASE(GF2P8AFFINEINVQB) - NODE_NAME_CASE(NT_CALL) - NODE_NAME_CASE(NT_BRIND) - NODE_NAME_CASE(UMWAIT) - NODE_NAME_CASE(TPAUSE) - NODE_NAME_CASE(ENQCMD) - NODE_NAME_CASE(ENQCMDS) - NODE_NAME_CASE(VP2INTERSECT) - NODE_NAME_CASE(VPDPBSUD) - NODE_NAME_CASE(VPDPBSUDS) - NODE_NAME_CASE(VPDPBUUD) - NODE_NAME_CASE(VPDPBUUDS) - NODE_NAME_CASE(VPDPBSSD) - NODE_NAME_CASE(VPDPBSSDS) - NODE_NAME_CASE(VPDPWSUD) - NODE_NAME_CASE(VPDPWSUDS) - NODE_NAME_CASE(VPDPWUSD) - NODE_NAME_CASE(VPDPWUSDS) - NODE_NAME_CASE(VPDPWUUD) - NODE_NAME_CASE(VPDPWUUDS) - NODE_NAME_CASE(VMINMAX) - NODE_NAME_CASE(VMINMAX_SAE) - NODE_NAME_CASE(VMINMAXS) - NODE_NAME_CASE(VMINMAXS_SAE) - NODE_NAME_CASE(CVTP2IBS) - NODE_NAME_CASE(CVTP2IUBS) - NODE_NAME_CASE(CVTP2IBS_RND) - NODE_NAME_CASE(CVTP2IUBS_RND) - NODE_NAME_CASE(CVTTP2IBS) - NODE_NAME_CASE(CVTTP2IUBS) - NODE_NAME_CASE(CVTTP2IBS_SAE) - NODE_NAME_CASE(CVTTP2IUBS_SAE) - NODE_NAME_CASE(VCVT2PH2BF8) - NODE_NAME_CASE(VCVT2PH2BF8S) - NODE_NAME_CASE(VCVT2PH2HF8) - NODE_NAME_CASE(VCVT2PH2HF8S) - NODE_NAME_CASE(VCVTBIASPH2BF8) - NODE_NAME_CASE(VCVTBIASPH2BF8S) - NODE_NAME_CASE(VCVTBIASPH2HF8) - NODE_NAME_CASE(VCVTBIASPH2HF8S) - NODE_NAME_CASE(VCVTPH2BF8) - NODE_NAME_CASE(VCVTPH2BF8S) - NODE_NAME_CASE(VCVTPH2HF8) - NODE_NAME_CASE(VCVTPH2HF8S) - NODE_NAME_CASE(VMCVTBIASPH2BF8) - NODE_NAME_CASE(VMCVTBIASPH2BF8S) - NODE_NAME_CASE(VMCVTBIASPH2HF8) - NODE_NAME_CASE(VMCVTBIASPH2HF8S) - NODE_NAME_CASE(VMCVTPH2BF8) - NODE_NAME_CASE(VMCVTPH2BF8S) - NODE_NAME_CASE(VMCVTPH2HF8) - NODE_NAME_CASE(VMCVTPH2HF8S) - NODE_NAME_CASE(VCVTHF82PH) - NODE_NAME_CASE(AESENC128KL) - NODE_NAME_CASE(AESDEC128KL) - NODE_NAME_CASE(AESENC256KL) - NODE_NAME_CASE(AESDEC256KL) - NODE_NAME_CASE(AESENCWIDE128KL) - NODE_NAME_CASE(AESDECWIDE128KL) - NODE_NAME_CASE(AESENCWIDE256KL) - NODE_NAME_CASE(AESDECWIDE256KL) - NODE_NAME_CASE(CMPCCXADD) - NODE_NAME_CASE(TESTUI) - NODE_NAME_CASE(FP80_ADD) - NODE_NAME_CASE(STRICT_FP80_ADD) - NODE_NAME_CASE(CCMP) - NODE_NAME_CASE(CTEST) - NODE_NAME_CASE(CLOAD) - NODE_NAME_CASE(CSTORE) - NODE_NAME_CASE(CVTTS2SIS) - NODE_NAME_CASE(CVTTS2UIS) - NODE_NAME_CASE(CVTTS2SIS_SAE) - NODE_NAME_CASE(CVTTS2UIS_SAE) - NODE_NAME_CASE(CVTTP2SIS) - NODE_NAME_CASE(MCVTTP2SIS) - NODE_NAME_CASE(CVTTP2UIS_SAE) - NODE_NAME_CASE(CVTTP2SIS_SAE) - NODE_NAME_CASE(CVTTP2UIS) - NODE_NAME_CASE(MCVTTP2UIS) - NODE_NAME_CASE(POP_FROM_X87_REG) - } - return nullptr; -#undef NODE_NAME_CASE -} - /// Return true if the addressing mode represented by AM is legal for this /// target, for a load/store of the specified type. bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 359f24768b3da..7435d03b9f63a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H +#include "X86SelectionDAGInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetLowering.h" @@ -21,985 +22,6 @@ namespace llvm { class X86Subtarget; class X86TargetMachine; - namespace X86ISD { - // X86 Specific DAG Nodes - enum NodeType : unsigned { - // Start the numbering where the builtin ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - /// Bit scan forward. - BSF, - /// Bit scan reverse. - BSR, - - /// X86 funnel/double shift i16 instructions. These correspond to - /// X86::SHLDW and X86::SHRDW instructions which have different amt - /// modulo rules to generic funnel shifts. - /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. - FSHL, - FSHR, - - /// Bitwise logical AND of floating point values. This corresponds - /// to X86::ANDPS or X86::ANDPD. - FAND, - - /// Bitwise logical OR of floating point values. This corresponds - /// to X86::ORPS or X86::ORPD. - FOR, - - /// Bitwise logical XOR of floating point values. This corresponds - /// to X86::XORPS or X86::XORPD. - FXOR, - - /// Bitwise logical ANDNOT of floating point values. This - /// corresponds to X86::ANDNPS or X86::ANDNPD. - FANDN, - - /// These operations represent an abstract X86 call - /// instruction, which includes a bunch of information. In particular the - /// operands of these node are: - /// - /// #0 - The incoming token chain - /// #1 - The callee - /// #2 - The number of arg bytes the caller pushes on the stack. - /// #3 - The number of arg bytes the callee pops off the stack. - /// #4 - The value to pass in AL/AX/EAX (optional) - /// #5 - The value to pass in DL/DX/EDX (optional) - /// - /// The result values of these nodes are: - /// - /// #0 - The outgoing token chain - /// #1 - The first register result value (optional) - /// #2 - The second register result value (optional) - /// - CALL, - - /// Same as call except it adds the NoTrack prefix. - NT_CALL, - - // Pseudo for a OBJC call that gets emitted together with a special - // marker instruction. - CALL_RVMARKER, - - /// The same as ISD::CopyFromReg except that this node makes it explicit - /// that it may lower to an x87 FPU stack pop. Optimizations should be more - /// cautious when handling this node than a normal CopyFromReg to avoid - /// removing a required FPU stack pop. A key requirement is optimizations - /// should not optimize any users of a chain that contains a - /// POP_FROM_X87_REG to use a chain from a point earlier than the - /// POP_FROM_X87_REG (which may remove a required FPU stack pop). - POP_FROM_X87_REG, - - // Pseudo for a call to an imported function to ensure the correct machine - // instruction is emitted for Import Call Optimization. - IMP_CALL, - - /// X86 compare and logical compare instructions. - CMP, - FCMP, - COMI, - UCOMI, - - // X86 compare with Intrinsics similar to COMI. - COMX, - UCOMX, - - /// X86 bit-test instructions. - BT, - - /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS - /// operand, usually produced by a CMP instruction. - SETCC, - - /// X86 Select - SELECTS, - - // Same as SETCC except it's materialized with a sbb and the value is all - // one's or all zero's. - SETCC_CARRY, // R = carry_bit ? ~0 : 0 - - /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. - /// Operands are two FP values to compare; result is a mask of - /// 0s or 1s. Generally DTRT for C/C++ with NaNs. - FSETCC, - - /// X86 FP SETCC, similar to above, but with output as an i1 mask and - /// and a version with SAE. - FSETCCM, - FSETCCM_SAE, - - /// X86 conditional moves. Operand 0 and operand 1 are the two values - /// to select from. Operand 2 is the condition code, and operand 3 is the - /// flag operand produced by a CMP or TEST instruction. - CMOV, - - /// X86 conditional branches. Operand 0 is the chain operand, operand 1 - /// is the block to branch if condition is true, operand 2 is the - /// condition code, and operand 3 is the flag operand produced by a CMP - /// or TEST instruction. - BRCOND, - - /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and - /// operand 1 is the target address. - NT_BRIND, - - /// Return with a glue operand. Operand 0 is the chain operand, operand - /// 1 is the number of bytes of stack to pop. - RET_GLUE, - - /// Return from interrupt. Operand 0 is the number of bytes to pop. - IRET, - - /// Repeat fill, corresponds to X86::REP_STOSx. - REP_STOS, - - /// Repeat move, corresponds to X86::REP_MOVSx. - REP_MOVS, - - /// On Darwin, this node represents the result of the popl - /// at function entry, used for PIC code. - GlobalBaseReg, - - /// A wrapper node for TargetConstantPool, TargetJumpTable, - /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, - /// MCSymbol and TargetBlockAddress. - Wrapper, - - /// Special wrapper used under X86-64 PIC mode for RIP - /// relative displacements. - WrapperRIP, - - /// Copies a 64-bit value from an MMX vector to the low word - /// of an XMM vector, with the high word zero filled. - MOVQ2DQ, - - /// Copies a 64-bit value from the low word of an XMM vector - /// to an MMX vector. - MOVDQ2Q, - - /// Copies a 32-bit value from the low word of a MMX - /// vector to a GPR. - MMX_MOVD2W, - - /// Copies a GPR into the low 32-bit word of a MMX vector - /// and zero out the high word. - MMX_MOVW2D, - - /// Extract an 8-bit value from a vector and zero extend it to - /// i32, corresponds to X86::PEXTRB. - PEXTRB, - - /// Extract a 16-bit value from a vector and zero extend it to - /// i32, corresponds to X86::PEXTRW. - PEXTRW, - - /// Insert any element of a 4 x float vector into any element - /// of a destination 4 x floatvector. - INSERTPS, - - /// Insert the lower 8-bits of a 32-bit value to a vector, - /// corresponds to X86::PINSRB. - PINSRB, - - /// Insert the lower 16-bits of a 32-bit value to a vector, - /// corresponds to X86::PINSRW. - PINSRW, - - /// Shuffle 16 8-bit values within a vector. - PSHUFB, - - /// Compute Sum of Absolute Differences. - PSADBW, - /// Compute Double Block Packed Sum-Absolute-Differences - DBPSADBW, - - /// Bitwise Logical AND NOT of Packed FP values. - ANDNP, - - /// Blend where the selector is an immediate. - BLENDI, - - /// Dynamic (non-constant condition) vector blend where only the sign bits - /// of the condition elements are used. This is used to enforce that the - /// condition mask is not valid for generic VSELECT optimizations. This - /// is also used to implement the intrinsics. - /// Operands are in VSELECT order: MASK, TRUE, FALSE - BLENDV, - - /// Combined add and sub on an FP vector. - ADDSUB, - - // FP vector ops with rounding mode. - FADD_RND, - FADDS, - FADDS_RND, - FSUB_RND, - FSUBS, - FSUBS_RND, - FMUL_RND, - FMULS, - FMULS_RND, - FDIV_RND, - FDIVS, - FDIVS_RND, - FMAX_SAE, - FMAXS_SAE, - FMIN_SAE, - FMINS_SAE, - FSQRT_RND, - FSQRTS, - FSQRTS_RND, - - // FP vector get exponent. - FGETEXP, - FGETEXP_SAE, - FGETEXPS, - FGETEXPS_SAE, - // Extract Normalized Mantissas. - VGETMANT, - VGETMANT_SAE, - VGETMANTS, - VGETMANTS_SAE, - // FP Scale. - SCALEF, - SCALEF_RND, - SCALEFS, - SCALEFS_RND, - - /// Integer horizontal add/sub. - HADD, - HSUB, - - /// Floating point horizontal add/sub. - FHADD, - FHSUB, - - // Detect Conflicts Within a Vector - CONFLICT, - - /// Floating point max and min. - FMAX, - FMIN, - - /// Commutative FMIN and FMAX. - FMAXC, - FMINC, - - /// Scalar intrinsic floating point max and min. - FMAXS, - FMINS, - - /// Floating point reciprocal-sqrt and reciprocal approximation. - /// Note that these typically require refinement - /// in order to obtain suitable precision. - FRSQRT, - FRCP, - - // AVX-512 reciprocal approximations with a little more precision. - RSQRT14, - RSQRT14S, - RCP14, - RCP14S, - - // Thread Local Storage. - TLSADDR, - - // Thread Local Storage. A call to get the start address - // of the TLS block for the current module. - TLSBASEADDR, - - // Thread Local Storage. When calling to an OS provided - // thunk at the address from an earlier relocation. - TLSCALL, - - // Thread Local Storage. A descriptor containing pointer to - // code and to argument to get the TLS offset for the symbol. - TLSDESC, - - // Exception Handling helpers. - EH_RETURN, - - // SjLj exception handling setjmp. - EH_SJLJ_SETJMP, - - // SjLj exception handling longjmp. - EH_SJLJ_LONGJMP, - - // SjLj exception handling dispatch. - EH_SJLJ_SETUP_DISPATCH, - - /// Tail call return. See X86TargetLowering::LowerCall for - /// the list of operands. - TC_RETURN, - - // Vector move to low scalar and zero higher vector elements. - VZEXT_MOVL, - - // Vector integer truncate. - VTRUNC, - // Vector integer truncate with unsigned/signed saturation. - VTRUNCUS, - VTRUNCS, - - // Masked version of the above. Used when less than a 128-bit result is - // produced since the mask only applies to the lower elements and can't - // be represented by a select. - // SRC, PASSTHRU, MASK - VMTRUNC, - VMTRUNCUS, - VMTRUNCS, - - // Vector FP extend. - VFPEXT, - VFPEXT_SAE, - VFPEXTS, - VFPEXTS_SAE, - - // Vector FP round. - VFPROUND, - // Convert TWO packed single data to one packed data - VFPROUND2, - VFPROUND2_RND, - VFPROUND_RND, - VFPROUNDS, - VFPROUNDS_RND, - - // Masked version of above. Used for v2f64->v4f32. - // SRC, PASSTHRU, MASK - VMFPROUND, - - // 128-bit vector logical left / right shift - VSHLDQ, - VSRLDQ, - - // Vector shift elements - VSHL, - VSRL, - VSRA, - - // Vector variable shift - VSHLV, - VSRLV, - VSRAV, - - // Vector shift elements by immediate - VSHLI, - VSRLI, - VSRAI, - - // Shifts of mask registers. - KSHIFTL, - KSHIFTR, - - // Bit rotate by immediate - VROTLI, - VROTRI, - - // Vector packed double/float comparison. - CMPP, - - // Vector integer comparisons. - PCMPEQ, - PCMPGT, - - // v8i16 Horizontal minimum and position. - PHMINPOS, - - MULTISHIFT, - - /// Vector comparison generating mask bits for fp and - /// integer signed and unsigned data types. - CMPM, - // Vector mask comparison generating mask bits for FP values. - CMPMM, - // Vector mask comparison with SAE for FP values. - CMPMM_SAE, - - // Arithmetic operations with FLAGS results. - ADD, - SUB, - ADC, - SBB, - SMUL, - UMUL, - OR, - XOR, - AND, - - // Bit field extract. - BEXTR, - BEXTRI, - - // Zero High Bits Starting with Specified Bit Position. - BZHI, - - // Parallel extract and deposit. - PDEP, - PEXT, - - // X86-specific multiply by immediate. - MUL_IMM, - - // Vector sign bit extraction. - MOVMSK, - - // Vector bitwise comparisons. - PTEST, - - // Vector packed fp sign bitwise comparisons. - TESTP, - - // OR/AND test for masks. - KORTEST, - KTEST, - - // ADD for masks. - KADD, - - // Several flavors of instructions with vector shuffle behaviors. - // Saturated signed/unnsigned packing. - PACKSS, - PACKUS, - // Intra-lane alignr. - PALIGNR, - // AVX512 inter-lane alignr. - VALIGN, - PSHUFD, - PSHUFHW, - PSHUFLW, - SHUFP, - // VBMI2 Concat & Shift. - VSHLD, - VSHRD, - VSHLDV, - VSHRDV, - // Shuffle Packed Values at 128-bit granularity. - SHUF128, - MOVDDUP, - MOVSHDUP, - MOVSLDUP, - MOVLHPS, - MOVHLPS, - MOVSD, - MOVSS, - MOVSH, - UNPCKL, - UNPCKH, - VPERMILPV, - VPERMILPI, - VPERMI, - VPERM2X128, - - // Variable Permute (VPERM). - // Res = VPERMV MaskV, V0 - VPERMV, - - // 3-op Variable Permute (VPERMT2). - // Res = VPERMV3 V0, MaskV, V1 - VPERMV3, - - // Bitwise ternary logic. - VPTERNLOG, - // Fix Up Special Packed Float32/64 values. - VFIXUPIMM, - VFIXUPIMM_SAE, - VFIXUPIMMS, - VFIXUPIMMS_SAE, - // Range Restriction Calculation For Packed Pairs of Float32/64 values. - VRANGE, - VRANGE_SAE, - VRANGES, - VRANGES_SAE, - // Reduce - Perform Reduction Transformation on scalar\packed FP. - VREDUCE, - VREDUCE_SAE, - VREDUCES, - VREDUCES_SAE, - // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. - // Also used by the legacy (V)ROUND intrinsics where we mask out the - // scaling part of the immediate. - VRNDSCALE, - VRNDSCALE_SAE, - VRNDSCALES, - VRNDSCALES_SAE, - // Tests Types Of a FP Values for packed types. - VFPCLASS, - // Tests Types Of a FP Values for scalar types. - VFPCLASSS, - - // Broadcast (splat) scalar or element 0 of a vector. If the operand is - // a vector, this node may change the vector length as part of the splat. - VBROADCAST, - // Broadcast mask to vector. - VBROADCASTM, - - /// SSE4A Extraction and Insertion. - EXTRQI, - INSERTQI, - - // XOP arithmetic/logical shifts. - VPSHA, - VPSHL, - // XOP signed/unsigned integer comparisons. - VPCOM, - VPCOMU, - // XOP packed permute bytes. - VPPERM, - // XOP two source permutation. - VPERMIL2, - - // Vector multiply packed unsigned doubleword integers. - PMULUDQ, - // Vector multiply packed signed doubleword integers. - PMULDQ, - // Vector Multiply Packed UnsignedIntegers with Round and Scale. - MULHRS, - - // Multiply and Add Packed Integers. - VPMADDUBSW, - VPMADDWD, - - // AVX512IFMA multiply and add. - // NOTE: These are different than the instruction and perform - // op0 x op1 + op2. - VPMADD52L, - VPMADD52H, - - // VNNI - VPDPBUSD, - VPDPBUSDS, - VPDPWSSD, - VPDPWSSDS, - - // FMA nodes. - // We use the target independent ISD::FMA for the non-inverted case. - FNMADD, - FMSUB, - FNMSUB, - FMADDSUB, - FMSUBADD, - - // FMA with rounding mode. - FMADD_RND, - FNMADD_RND, - FMSUB_RND, - FNMSUB_RND, - FMADDSUB_RND, - FMSUBADD_RND, - - // AVX512-FP16 complex addition and multiplication. - VFMADDC, - VFMADDC_RND, - VFCMADDC, - VFCMADDC_RND, - - VFMULC, - VFMULC_RND, - VFCMULC, - VFCMULC_RND, - - VFMADDCSH, - VFMADDCSH_RND, - VFCMADDCSH, - VFCMADDCSH_RND, - - VFMULCSH, - VFMULCSH_RND, - VFCMULCSH, - VFCMULCSH_RND, - - VPDPBSUD, - VPDPBSUDS, - VPDPBUUD, - VPDPBUUDS, - VPDPBSSD, - VPDPBSSDS, - - VPDPWSUD, - VPDPWSUDS, - VPDPWUSD, - VPDPWUSDS, - VPDPWUUD, - VPDPWUUDS, - - VMINMAX, - VMINMAX_SAE, - VMINMAXS, - VMINMAXS_SAE, - - CVTP2IBS, - CVTP2IUBS, - CVTP2IBS_RND, - CVTP2IUBS_RND, - CVTTP2IBS, - CVTTP2IUBS, - CVTTP2IBS_SAE, - CVTTP2IUBS_SAE, - - MPSADBW, - - VCVT2PH2BF8, - VCVT2PH2BF8S, - VCVT2PH2HF8, - VCVT2PH2HF8S, - VCVTBIASPH2BF8, - VCVTBIASPH2BF8S, - VCVTBIASPH2HF8, - VCVTBIASPH2HF8S, - VCVTPH2BF8, - VCVTPH2BF8S, - VCVTPH2HF8, - VCVTPH2HF8S, - VMCVTBIASPH2BF8, - VMCVTBIASPH2BF8S, - VMCVTBIASPH2HF8, - VMCVTBIASPH2HF8S, - VMCVTPH2BF8, - VMCVTPH2BF8S, - VMCVTPH2HF8, - VMCVTPH2HF8S, - VCVTHF82PH, - - // Compress and expand. - COMPRESS, - EXPAND, - - // Bits shuffle - VPSHUFBITQMB, - - // Convert Unsigned/Integer to Floating-Point Value with rounding mode. - SINT_TO_FP_RND, - UINT_TO_FP_RND, - SCALAR_SINT_TO_FP, - SCALAR_UINT_TO_FP, - SCALAR_SINT_TO_FP_RND, - SCALAR_UINT_TO_FP_RND, - - // Vector float/double to signed/unsigned integer. - CVTP2SI, - CVTP2UI, - CVTP2SI_RND, - CVTP2UI_RND, - // Scalar float/double to signed/unsigned integer. - CVTS2SI, - CVTS2UI, - CVTS2SI_RND, - CVTS2UI_RND, - - // Vector float/double to signed/unsigned integer with truncation. - CVTTP2SI, - CVTTP2UI, - CVTTP2SI_SAE, - CVTTP2UI_SAE, - - // Saturation enabled Vector float/double to signed/unsigned - // integer with truncation. - CVTTP2SIS, - CVTTP2UIS, - CVTTP2SIS_SAE, - CVTTP2UIS_SAE, - // Masked versions of above. Used for v2f64 to v4i32. - // SRC, PASSTHRU, MASK - MCVTTP2SIS, - MCVTTP2UIS, - - // Scalar float/double to signed/unsigned integer with truncation. - CVTTS2SI, - CVTTS2UI, - CVTTS2SI_SAE, - CVTTS2UI_SAE, - - // Vector signed/unsigned integer to float/double. - CVTSI2P, - CVTUI2P, - - // Scalar float/double to signed/unsigned integer with saturation. - CVTTS2SIS, - CVTTS2UIS, - CVTTS2SIS_SAE, - CVTTS2UIS_SAE, - - // Masked versions of above. Used for v2f64->v4f32. - // SRC, PASSTHRU, MASK - MCVTP2SI, - MCVTP2UI, - MCVTTP2SI, - MCVTTP2UI, - MCVTSI2P, - MCVTUI2P, - - // Custom handling for FP_TO_xINT_SAT - FP_TO_SINT_SAT, - FP_TO_UINT_SAT, - - // Vector float to bfloat16. - // Convert packed single data to packed BF16 data - CVTNEPS2BF16, - // Masked version of above. - // SRC, PASSTHRU, MASK - MCVTNEPS2BF16, - - // Dot product of BF16/FP16 pairs to accumulated into - // packed single precision. - DPBF16PS, - DPFP16PS, - - // A stack checking function call. On Windows it's _chkstk call. - DYN_ALLOCA, - - // For allocating variable amounts of stack space when using - // segmented stacks. Check if the current stacklet has enough space, and - // falls back to heap allocation if not. - SEG_ALLOCA, - - // For allocating stack space when using stack clash protector. - // Allocation is performed by block, and each block is probed. - PROBED_ALLOCA, - - // Memory barriers. - MFENCE, - - // Get a random integer and indicate whether it is valid in CF. - RDRAND, - - // Get a NIST SP800-90B & C compliant random integer and - // indicate whether it is valid in CF. - RDSEED, - - // Protection keys - // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. - // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is - // value for ECX. - RDPKRU, - WRPKRU, - - // SSE42 string comparisons. - // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG - // will emit one or two instructions based on which results are used. If - // flags and index/mask this allows us to use a single instruction since - // we won't have to pick and opcode for flags. Instead we can rely on the - // DAG to CSE everything and decide at isel. - PCMPISTR, - PCMPESTR, - - // Test if in transactional execution. - XTEST, - - // Conversions between float and half-float. - CVTPS2PH, - CVTPS2PH_SAE, - CVTPH2PS, - CVTPH2PS_SAE, - - // Masked version of above. - // SRC, RND, PASSTHRU, MASK - MCVTPS2PH, - MCVTPS2PH_SAE, - - // Galois Field Arithmetic Instructions - GF2P8AFFINEINVQB, - GF2P8AFFINEQB, - GF2P8MULB, - - // LWP insert record. - LWPINS, - - // User level wait - UMWAIT, - TPAUSE, - - // Enqueue Stores Instructions - ENQCMD, - ENQCMDS, - - // For avx512-vp2intersect - VP2INTERSECT, - - // User level interrupts - testui - TESTUI, - - // Perform an FP80 add after changing precision control in FPCW. - FP80_ADD, - - // Conditional compare instructions - CCMP, - CTEST, - - /// X86 strict FP compare instructions. - FIRST_STRICTFP_OPCODE, - STRICT_FCMP = FIRST_STRICTFP_OPCODE, - STRICT_FCMPS, - - // Vector packed double/float comparison. - STRICT_CMPP, - - /// Vector comparison generating mask bits for fp and - /// integer signed and unsigned data types. - STRICT_CMPM, - - // Vector float/double to signed/unsigned integer with truncation. - STRICT_CVTTP2SI, - STRICT_CVTTP2UI, - - // Vector FP extend. - STRICT_VFPEXT, - - // Vector FP round. - STRICT_VFPROUND, - - // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. - // Also used by the legacy (V)ROUND intrinsics where we mask out the - // scaling part of the immediate. - STRICT_VRNDSCALE, - - // Vector signed/unsigned integer to float/double. - STRICT_CVTSI2P, - STRICT_CVTUI2P, - - // Strict FMA nodes. - STRICT_FNMADD, - STRICT_FMSUB, - STRICT_FNMSUB, - - // Conversions between float and half-float. - STRICT_CVTPS2PH, - STRICT_CVTPH2PS, - - // Perform an FP80 add after changing precision control in FPCW. - STRICT_FP80_ADD, - - /// Floating point max and min. - STRICT_FMAX, - STRICT_FMIN, - LAST_STRICTFP_OPCODE = STRICT_FMIN, - - // Compare and swap. - FIRST_MEMORY_OPCODE, - LCMPXCHG_DAG = FIRST_MEMORY_OPCODE, - LCMPXCHG8_DAG, - LCMPXCHG16_DAG, - LCMPXCHG16_SAVE_RBX_DAG, - - /// LOCK-prefixed arithmetic read-modify-write instructions. - /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) - LADD, - LSUB, - LOR, - LXOR, - LAND, - LBTS, - LBTC, - LBTR, - LBTS_RM, - LBTC_RM, - LBTR_RM, - - /// RAO arithmetic instructions. - /// OUTCHAIN = AADD(INCHAIN, PTR, RHS) - AADD, - AOR, - AXOR, - AAND, - - // Load, scalar_to_vector, and zero extend. - VZEXT_LOAD, - - // extract_vector_elt, store. - VEXTRACT_STORE, - - // scalar broadcast from memory. - VBROADCAST_LOAD, - - // subvector broadcast from memory. - SUBV_BROADCAST_LOAD, - - // Store FP control word into i16 memory. - FNSTCW16m, - - // Load FP control word from i16 memory. - FLDCW16m, - - // Store x87 FPU environment into memory. - FNSTENVm, - - // Load x87 FPU environment from memory. - FLDENVm, - - /// This instruction implements FP_TO_SINT with the - /// integer destination in memory and a FP reg source. This corresponds - /// to the X86::FIST*m instructions and the rounding mode change stuff. It - /// has two inputs (token chain and address) and two outputs (int value - /// and token chain). Memory VT specifies the type to store to. - FP_TO_INT_IN_MEM, - - /// This instruction implements SINT_TO_FP with the - /// integer source in memory and FP reg result. This corresponds to the - /// X86::FILD*m instructions. It has two inputs (token chain and address) - /// and two outputs (FP value and token chain). The integer source type is - /// specified by the memory VT. - FILD, - - /// This instruction implements a fp->int store from FP stack - /// slots. This corresponds to the fist instruction. It takes a - /// chain operand, value to store, address, and glue. The memory VT - /// specifies the type to store as. - FIST, - - /// This instruction implements an extending load to FP stack slots. - /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain - /// operand, and ptr to load from. The memory VT specifies the type to - /// load from. - FLD, - - /// This instruction implements a truncating store from FP stack - /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a - /// chain operand, value to store, address, and glue. The memory VT - /// specifies the type to store as. - FST, - - /// These instructions grab the address of the next argument - /// from a va_list. (reads and modifies the va_list in memory) - VAARG_64, - VAARG_X32, - - // Vector truncating store with unsigned/signed saturation - VTRUNCSTOREUS, - VTRUNCSTORES, - // Vector truncating masked store with unsigned/signed saturation - VMTRUNCSTOREUS, - VMTRUNCSTORES, - - // X86 specific gather and scatter - MGATHER, - MSCATTER, - - // Key locker nodes that produce flags. - AESENC128KL, - AESDEC128KL, - AESENC256KL, - AESDEC256KL, - AESENCWIDE128KL, - AESDECWIDE128KL, - AESENCWIDE256KL, - AESDECWIDE256KL, - - /// Compare and Add if Condition is Met. Compare value in operand 2 with - /// value in memory of operand 1. If condition of operand 4 is met, add - /// value operand 3 to m32 and write new value in operand 1. Operand 2 is - /// always updated with the original value from operand 1. - CMPCCXADD, - - // Save xmm argument registers to the stack, according to %al. An operator - // is needed so that this can be expanded with control flow. - VASTART_SAVE_XMM_REGS, - - // Conditional load/store instructions - CLOAD, - CSTORE, - LAST_MEMORY_OPCODE = CSTORE, - }; - } // end namespace X86ISD - namespace X86 { /// Current rounding mode is represented in bits 11:10 of FPSR. These /// values are same as corresponding constants for rounding mode used @@ -1182,9 +204,6 @@ namespace llvm { EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; - /// This method returns the name of a target specific DAG node. - const char *getTargetNodeName(unsigned Opcode) const override; - /// Do not merge vector stores after legalization because that may conflict /// with x86-specific store splitting optimizations. bool mergeStoresAfterLegalization(EVT MemVT) const override { diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index 1aa00d4f09f75..169f6e0764caf 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -942,10 +942,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, if (Glue.getNode()) RetOps.push_back(Glue); - X86ISD::NodeType opcode = X86ISD::RET_GLUE; + unsigned RetOpcode = X86ISD::RET_GLUE; if (CallConv == CallingConv::X86_INTR) - opcode = X86ISD::IRET; - return DAG.getNode(opcode, dl, MVT::Other, RetOps); + RetOpcode = X86ISD::IRET; + return DAG.getNode(RetOpcode, dl, MVT::Other, RetOps); } bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td index fe95b8c20a8ff..33bc579a5bff5 100644 --- a/llvm/lib/Target/X86/X86InstrFragments.td +++ b/llvm/lib/Target/X86/X86InstrFragments.td @@ -141,8 +141,12 @@ def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>; def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>; def X86fcmp : SDNode<"X86ISD::FCMP", SDTX86FCmp>; -def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>; -def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>; + +let IsStrictFP = true in { + def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>; + def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>; +} + def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>; def X86ccmp : SDNode<"X86ISD::CCMP", SDTX86Ccmp>; @@ -790,8 +794,11 @@ def SDTX86CwdLoad : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def X86fp80_add : SDNode<"X86ISD::FP80_ADD", SDTFPBinOp, [SDNPCommutative]>; + +let IsStrictFP = true in def X86strict_fp80_add : SDNode<"X86ISD::STRICT_FP80_ADD", SDTFPBinOp, [SDNPHasChain,SDNPCommutative]>; + def any_X86fp80_add : PatFrags<(ops node:$lhs, node:$rhs), [(X86strict_fp80_add node:$lhs, node:$rhs), (X86fp80_add node:$lhs, node:$rhs)]>; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 0c20ffed77e77..bfbde19d9f55b 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -46,10 +46,12 @@ def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp, def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; -def X86strict_fmin : SDNode<"X86ISD::STRICT_FMIN", SDTFPBinOp, - [SDNPHasChain]>; -def X86strict_fmax : SDNode<"X86ISD::STRICT_FMAX", SDTFPBinOp, - [SDNPHasChain]>; +let IsStrictFP = true in { + def X86strict_fmin : SDNode<"X86ISD::STRICT_FMIN", SDTFPBinOp, + [SDNPHasChain]>; + def X86strict_fmax : SDNode<"X86ISD::STRICT_FMAX", SDTFPBinOp, + [SDNPHasChain]>; +} def X86any_fmin : PatFrags<(ops node:$src1, node:$src2), [(X86strict_fmin node:$src1, node:$src2), @@ -146,6 +148,7 @@ def X86vfpext : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisFP<1>, SDTCisVec<1>]>>; +let IsStrictFP = true in def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisFP<1>, SDTCisVec<1>]>, @@ -165,6 +168,7 @@ def X86vfpround2 : SDNode<"X86ISD::VFPROUND2", SDTCisSameAs<1, 2>, SDTCisOpSmallerThanOp<0, 1>]>>; +let IsStrictFP = true in def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisFP<1>, SDTCisVec<1>, @@ -215,7 +219,10 @@ def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>; def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>; def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; + +let IsStrictFP = true in def X86strict_cmpp : SDNode<"X86ISD::STRICT_CMPP", SDTX86VFCMP, [SDNPHasChain]>; + def X86any_cmpp : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_cmpp node:$src1, node:$src2, node:$src3), (X86cmpp node:$src1, node:$src2, node:$src3)]>; @@ -235,7 +242,10 @@ def X86CmpMaskCCScalar : def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; def X86cmpmm : SDNode<"X86ISD::CMPMM", X86MaskCmpMaskCC>; + +let IsStrictFP = true in def X86strict_cmpm : SDNode<"X86ISD::STRICT_CMPM", X86CmpMaskCC, [SDNPHasChain]>; + def X86any_cmpm : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_cmpm node:$src1, node:$src2, node:$src3), (X86cmpm node:$src1, node:$src2, node:$src3)]>; @@ -504,8 +514,11 @@ def X86VRangeSAE : SDNode<"X86ISD::VRANGE_SAE", SDTFPBinOpImm>; def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>; def X86VReduceSAE : SDNode<"X86ISD::VREDUCE_SAE", SDTFPUnaryOpImm>; def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>; + +let IsStrictFP = true in def X86strict_VRndScale : SDNode<"X86ISD::STRICT_VRNDSCALE", SDTFPUnaryOpImm, [SDNPHasChain]>; + def X86any_VRndScale : PatFrags<(ops node:$src1, node:$src2), [(X86strict_VRndScale node:$src1, node:$src2), (X86VRndScale node:$src1, node:$src2)]>; @@ -564,17 +577,26 @@ def X86fgetexps : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>; def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>; + +let IsStrictFP = true in def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; + def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_Fnmadd node:$src1, node:$src2, node:$src3), (X86Fnmadd node:$src1, node:$src2, node:$src3)]>; def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>; + +let IsStrictFP = true in def X86strict_Fmsub : SDNode<"X86ISD::STRICT_FMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; + def X86any_Fmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_Fmsub node:$src1, node:$src2, node:$src3), (X86Fmsub node:$src1, node:$src2, node:$src3)]>; def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>; + +let IsStrictFP = true in def X86strict_Fnmsub : SDNode<"X86ISD::STRICT_FNMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; + def X86any_Fnmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_Fnmsub node:$src1, node:$src2, node:$src3), (X86Fnmsub node:$src1, node:$src2, node:$src3)]>; @@ -719,8 +741,12 @@ def X86cvtp2UIntRnd : SDNode<"X86ISD::CVTP2UI_RND", SDTFloatToIntRnd>; // cvtt fp-to-int staff def X86cvttp2si : SDNode<"X86ISD::CVTTP2SI", SDTFloatToInt>; def X86cvttp2ui : SDNode<"X86ISD::CVTTP2UI", SDTFloatToInt>; -def X86strict_cvttp2si : SDNode<"X86ISD::STRICT_CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>; -def X86strict_cvttp2ui : SDNode<"X86ISD::STRICT_CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>; + +let IsStrictFP = true in { + def X86strict_cvttp2si : SDNode<"X86ISD::STRICT_CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>; + def X86strict_cvttp2ui : SDNode<"X86ISD::STRICT_CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>; +} + def X86any_cvttp2si : PatFrags<(ops node:$src), [(X86strict_cvttp2si node:$src), (X86cvttp2si node:$src)]>; @@ -730,8 +756,12 @@ def X86any_cvttp2ui : PatFrags<(ops node:$src), def X86VSintToFP : SDNode<"X86ISD::CVTSI2P", SDTVintToFP>; def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>; -def X86strict_VSintToFP : SDNode<"X86ISD::STRICT_CVTSI2P", SDTVintToFP, [SDNPHasChain]>; -def X86strict_VUintToFP : SDNode<"X86ISD::STRICT_CVTUI2P", SDTVintToFP, [SDNPHasChain]>; + +let IsStrictFP = true in { + def X86strict_VSintToFP : SDNode<"X86ISD::STRICT_CVTSI2P", SDTVintToFP, [SDNPHasChain]>; + def X86strict_VUintToFP : SDNode<"X86ISD::STRICT_CVTUI2P", SDTVintToFP, [SDNPHasChain]>; +} + def X86any_VSintToFP : PatFrags<(ops node:$src), [(X86strict_VSintToFP node:$src), (X86VSintToFP node:$src)]>; @@ -771,8 +801,11 @@ def X86mcvttp2uis : SDNode<"X86ISD::MCVTTP2UIS", SDTMFloatToInt>; def SDTcvtph2ps : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, SDTCVecEltisVT<1, i16>]>; def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTcvtph2ps>; + +let IsStrictFP = true in def X86strict_cvtph2ps : SDNode<"X86ISD::STRICT_CVTPH2PS", SDTcvtph2ps, [SDNPHasChain]>; + def X86any_cvtph2ps : PatFrags<(ops node:$src), [(X86strict_cvtph2ps node:$src), (X86cvtph2ps node:$src)]>; @@ -783,8 +816,11 @@ def SDTcvtps2ph : SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>, SDTCVecEltisVT<1, f32>, SDTCisVT<2, i32>]>; def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH", SDTcvtps2ph>; + +let IsStrictFP = true in def X86strict_cvtps2ph : SDNode<"X86ISD::STRICT_CVTPS2PH", SDTcvtps2ph, [SDNPHasChain]>; + def X86any_cvtps2ph : PatFrags<(ops node:$src1, node:$src2), [(X86strict_cvtps2ph node:$src1, node:$src2), (X86cvtps2ph node:$src1, node:$src2)]>; diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp index aba62c36546f9..d318dffda9462 100644 --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "X86SelectionDAGInfo.h" -#include "X86ISelLowering.h" #include "X86InstrInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" @@ -19,6 +18,9 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" +#define GET_SDNODE_DESC +#include "X86GenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "x86-selectiondag-info" @@ -27,14 +29,64 @@ static cl::opt UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false), cl::desc("Use fast short rep mov in memcpy lowering")); +X86SelectionDAGInfo::X86SelectionDAGInfo() + : SelectionDAGGenTargetInfo(X86GenSDNodeInfo) {} + +const char *X86SelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { +#define NODE_NAME_CASE(NODE) \ + case X86ISD::NODE: \ + return "X86ISD::" #NODE; + + // These nodes don't have corresponding entries in *.td files yet. + switch (static_cast(Opcode)) { + NODE_NAME_CASE(POP_FROM_X87_REG) + NODE_NAME_CASE(GlobalBaseReg) + NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG) + NODE_NAME_CASE(PCMPESTR) + NODE_NAME_CASE(PCMPISTR) + NODE_NAME_CASE(MGATHER) + NODE_NAME_CASE(MSCATTER) + NODE_NAME_CASE(AESENCWIDE128KL) + NODE_NAME_CASE(AESDECWIDE128KL) + NODE_NAME_CASE(AESENCWIDE256KL) + NODE_NAME_CASE(AESDECWIDE256KL) + } +#undef NODE_NAME_CASE + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + bool X86SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= X86ISD::FIRST_MEMORY_OPCODE && - Opcode <= X86ISD::LAST_MEMORY_OPCODE; + // These nodes don't have corresponding entries in *.td files yet. + if (Opcode >= X86ISD::FIRST_MEMORY_OPCODE && + Opcode <= X86ISD::LAST_MEMORY_OPCODE) + return true; + + return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode); } -bool X86SelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const { - return Opcode >= X86ISD::FIRST_STRICTFP_OPCODE && - Opcode <= X86ISD::LAST_STRICTFP_OPCODE; +void X86SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case X86ISD::VP2INTERSECT: + // invalid number of results; expected 1, got 2 + case X86ISD::VTRUNCSTOREUS: + case X86ISD::VTRUNCSTORES: + case X86ISD::FSETCCM_SAE: + // invalid number of operands; expected 3, got 4 + case X86ISD::CVTPH2PS: + case X86ISD::CVTTP2SI_SAE: + case X86ISD::CVTTP2UI_SAE: + case X86ISD::CVTTP2IBS_SAE: + // invalid number of operands; expected 1, got 2 + case X86ISD::CMPMM_SAE: + // invalid number of operands; expected 4, got 5 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } /// Returns the best type to use with repmovs/repstos depending on alignment. diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/llvm/lib/Target/X86/X86SelectionDAGInfo.h index e77e16bab830d..19c5986982614 100644 --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.h +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.h @@ -15,20 +15,68 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "X86GenSDNodeInfo.inc" + namespace llvm { +namespace X86ISD { + +enum NodeType : unsigned { + /// The same as ISD::CopyFromReg except that this node makes it explicit + /// that it may lower to an x87 FPU stack pop. Optimizations should be more + /// cautious when handling this node than a normal CopyFromReg to avoid + /// removing a required FPU stack pop. A key requirement is optimizations + /// should not optimize any users of a chain that contains a + /// POP_FROM_X87_REG to use a chain from a point earlier than the + /// POP_FROM_X87_REG (which may remove a required FPU stack pop). + POP_FROM_X87_REG = X86ISD::GENERATED_OPCODE_END, + + /// On Darwin, this node represents the result of the popl + /// at function entry, used for PIC code. + GlobalBaseReg, + + // SSE42 string comparisons. + // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG + // will emit one or two instructions based on which results are used. If + // flags and index/mask this allows us to use a single instruction since + // we won't have to pick and opcode for flags. Instead we can rely on the + // DAG to CSE everything and decide at isel. + PCMPISTR, + PCMPESTR, + + // Compare and swap. + FIRST_MEMORY_OPCODE, + LCMPXCHG16_SAVE_RBX_DAG = FIRST_MEMORY_OPCODE, -class X86SelectionDAGInfo : public SelectionDAGTargetInfo { + // X86 specific gather and scatter + MGATHER, + MSCATTER, + + // Key locker nodes that produce flags. + AESENCWIDE128KL, + AESDECWIDE128KL, + AESENCWIDE256KL, + AESDECWIDE256KL, + LAST_MEMORY_OPCODE = AESDECWIDE256KL, +}; + +} // namespace X86ISD + +class X86SelectionDAGInfo : public SelectionDAGGenTargetInfo { /// Returns true if it is possible for the base register to conflict with the /// given set of clobbers for a memory intrinsic. bool isBaseRegConflictPossible(SelectionDAG &DAG, ArrayRef ClobberSet) const; public: - explicit X86SelectionDAGInfo() = default; + X86SelectionDAGInfo(); + + const char *getTargetNodeName(unsigned Opcode) const override; bool isTargetMemoryOpcode(unsigned Opcode) const override; - bool isTargetStrictFPOpcode(unsigned Opcode) const override; + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, @@ -44,6 +92,6 @@ class X86SelectionDAGInfo : public SelectionDAGTargetInfo { MachinePointerInfo SrcPtrInfo) const override; }; -} +} // namespace llvm #endif From 0523ff2894cf3073f06d4bd0834f4d800b05904d Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 03:11:40 +0300 Subject: [PATCH 13/14] Xtensa --- llvm/lib/Target/Xtensa/CMakeLists.txt | 2 + llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp | 1 + llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 29 +------------ llvm/lib/Target/Xtensa/XtensaISelLowering.h | 41 ------------------- .../Target/Xtensa/XtensaSelectionDAGInfo.cpp | 19 +++++++++ .../Target/Xtensa/XtensaSelectionDAGInfo.h | 28 +++++++++++++ llvm/lib/Target/Xtensa/XtensaSubtarget.cpp | 11 ++++- llvm/lib/Target/Xtensa/XtensaSubtarget.h | 10 ++--- 8 files changed, 66 insertions(+), 75 deletions(-) create mode 100644 llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.cpp create mode 100644 llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.h diff --git a/llvm/lib/Target/Xtensa/CMakeLists.txt b/llvm/lib/Target/Xtensa/CMakeLists.txt index 4fc1ba6dfa650..39f510c56d6bc 100644 --- a/llvm/lib/Target/Xtensa/CMakeLists.txt +++ b/llvm/lib/Target/Xtensa/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(LLVM XtensaGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM XtensaGenInstrInfo.inc -gen-instr-info) tablegen(LLVM XtensaGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM XtensaGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM XtensaGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM XtensaGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(XtensaCommonTableGen) @@ -22,6 +23,7 @@ add_llvm_target(XtensaCodeGen XtensaISelDAGToDAG.cpp XtensaISelLowering.cpp XtensaRegisterInfo.cpp + XtensaSelectionDAGInfo.cpp XtensaSubtarget.cpp XtensaTargetMachine.cpp diff --git a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp index 06cccd4831bfc..d6bec2ed99e19 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelDAGToDAG.cpp @@ -12,6 +12,7 @@ #include "MCTargetDesc/XtensaMCTargetDesc.h" #include "Xtensa.h" +#include "XtensaSelectionDAGInfo.h" #include "XtensaTargetMachine.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index b17840aad9b4d..8e16252b1fa7e 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -15,6 +15,7 @@ #include "XtensaConstantPoolValue.h" #include "XtensaInstrInfo.h" #include "XtensaMachineFunctionInfo.h" +#include "XtensaSelectionDAGInfo.h" #include "XtensaSubtarget.h" #include "XtensaTargetMachine.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -1287,34 +1288,6 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op, } } -const char *XtensaTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - case XtensaISD::BR_JT: - return "XtensaISD::BR_JT"; - case XtensaISD::CALL: - return "XtensaISD::CALL"; - case XtensaISD::CALLW8: - return "XtensaISD::CALLW8"; - case XtensaISD::EXTUI: - return "XtensaISD::EXTUI"; - case XtensaISD::MOVSP: - return "XtensaISD::MOVSP"; - case XtensaISD::PCREL_WRAPPER: - return "XtensaISD::PCREL_WRAPPER"; - case XtensaISD::RET: - return "XtensaISD::RET"; - case XtensaISD::RETW: - return "XtensaISD::RETW"; - case XtensaISD::SELECT_CC: - return "XtensaISD::SELECT_CC"; - case XtensaISD::SRCL: - return "XtensaISD::SRCL"; - case XtensaISD::SRCR: - return "XtensaISD::SRCR"; - } - return nullptr; -} - //===----------------------------------------------------------------------===// // Custom insertion //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index c7d4f41b1f08e..53e208cbb9c97 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -20,45 +20,6 @@ namespace llvm { -namespace XtensaISD { -enum { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - BR_JT, - - // Calls a function. Operand 0 is the chain operand and operand 1 - // is the target address. The arguments start at operand 2. - // There is an optional glue operand at the end. - CALL, - // Call with rotation window by 8 registers - CALLW8, - - // Extract unsigned immediate. Operand 0 is value, operand 1 - // is bit position of the field [0..31], operand 2 is bit size - // of the field [1..16] - EXTUI, - - MOVSP, - - // Wraps a TargetGlobalAddress that should be loaded using PC-relative - // accesses. Operand 0 is the address. - PCREL_WRAPPER, - RET, - RETW, - - // Select with condition operator - This selects between a true value and - // a false value (ops #2 and #3) based on the boolean result of comparing - // the lhs and rhs (ops #0 and #1) of a conditional expression with the - // condition code in op #4 - SELECT_CC, - - // SRCL(R) performs shift left(right) of the concatenation of 2 registers - // and returns high(low) 32-bit part of 64-bit result - SRCL, - // Shift Right Combined - SRCR, -}; -} - class XtensaSubtarget; class XtensaTargetLowering : public TargetLowering { @@ -79,8 +40,6 @@ class XtensaTargetLowering : public TargetLowering { bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - const char *getTargetNodeName(unsigned Opcode) const override; - std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; diff --git a/llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.cpp b/llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..3f0ba044a7f6c --- /dev/null +++ b/llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.cpp @@ -0,0 +1,19 @@ +//===- XtensaSelectionDAGInfo.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "XtensaSelectionDAGInfo.h" + +#define GET_SDNODE_DESC +#include "XtensaGenSDNodeInfo.inc" + +using namespace llvm; + +XtensaSelectionDAGInfo::XtensaSelectionDAGInfo() + : SelectionDAGGenTargetInfo(XtensaGenSDNodeInfo) {} + +XtensaSelectionDAGInfo::~XtensaSelectionDAGInfo() = default; diff --git a/llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.h b/llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.h new file mode 100644 index 0000000000000..16a9ad2e85912 --- /dev/null +++ b/llvm/lib/Target/Xtensa/XtensaSelectionDAGInfo.h @@ -0,0 +1,28 @@ +//===- XtensaSelectionDAGInfo.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_XTENSA_XTENSASELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_XTENSA_XTENSASELECTIONDAGINFO_H + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +#define GET_SDNODE_ENUM +#include "XtensaGenSDNodeInfo.inc" + +namespace llvm { + +class XtensaSelectionDAGInfo : public SelectionDAGGenTargetInfo { +public: + XtensaSelectionDAGInfo(); + + ~XtensaSelectionDAGInfo() override; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_XTENSA_XTENSASELECTIONDAGINFO_H diff --git a/llvm/lib/Target/Xtensa/XtensaSubtarget.cpp b/llvm/lib/Target/Xtensa/XtensaSubtarget.cpp index 6b1d3255db247..c1a1efc85eb55 100644 --- a/llvm/lib/Target/Xtensa/XtensaSubtarget.cpp +++ b/llvm/lib/Target/Xtensa/XtensaSubtarget.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "XtensaSubtarget.h" +#include "XtensaSelectionDAGInfo.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/Debug.h" @@ -39,4 +40,12 @@ XtensaSubtarget::XtensaSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM) : XtensaGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - TSInfo(), FrameLowering(*this) {} + FrameLowering(*this) { + TSInfo = std::make_unique(); +} + +XtensaSubtarget::~XtensaSubtarget() = default; + +const SelectionDAGTargetInfo *XtensaSubtarget::getSelectionDAGInfo() const { + return TSInfo.get(); +} diff --git a/llvm/lib/Target/Xtensa/XtensaSubtarget.h b/llvm/lib/Target/Xtensa/XtensaSubtarget.h index 227ce2134b33b..2f8da0413b0d9 100644 --- a/llvm/lib/Target/Xtensa/XtensaSubtarget.h +++ b/llvm/lib/Target/Xtensa/XtensaSubtarget.h @@ -17,7 +17,6 @@ #include "XtensaISelLowering.h" #include "XtensaInstrInfo.h" #include "XtensaRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" @@ -38,7 +37,7 @@ class XtensaSubtarget : public XtensaGenSubtargetInfo { const Triple &TargetTriple; XtensaInstrInfo InstrInfo; XtensaTargetLowering TLInfo; - SelectionDAGTargetInfo TSInfo; + std::unique_ptr TSInfo; XtensaFrameLowering FrameLowering; XtensaSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); @@ -47,6 +46,8 @@ class XtensaSubtarget : public XtensaGenSubtargetInfo { XtensaSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM); + ~XtensaSubtarget() override; + const Triple &getTargetTriple() const { return TargetTriple; } const TargetFrameLowering *getFrameLowering() const override { @@ -60,9 +61,8 @@ class XtensaSubtarget : public XtensaGenSubtargetInfo { const XtensaTargetLowering *getTargetLowering() const override { return &TLInfo; } - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } + + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override; bool hasDensity() const { return HasDensity; } bool hasMAC16() const { return HasMAC16; } From eec0335b24dafd649273b48141f710f2465534e9 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Thu, 12 Dec 2024 01:59:14 +0300 Subject: [PATCH 14/14] Generic bugs --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 +-- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1506bc4ee187d..bb2f71828347b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1953,7 +1953,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, SDValue SelectionDAG::getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL) { EVT PTy = getTargetLoweringInfo().getPointerTy(getDataLayout()); - return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Glue, Chain, + return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Other, Chain, getTargetConstant(static_cast(JTI), DL, PTy, true)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 693a7f59629cb..c2346a98dfdb7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3107,8 +3107,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, Guard, GuardVal, ISD::SETNE); // If the guard/stackslot do not equal, branch to failure MBB. - SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, GuardVal.getOperand(0), + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. SDValue Br = DAG.getNode(ISD::BR, dl, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 8abe29a41faa7..0e86428486a21 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2670,7 +2670,7 @@ getSimpleVT(const unsigned char *MatcherTable, unsigned &MatcherIndex) { void SelectionDAGISel::Select_JUMP_TABLE_DEBUG_INFO(SDNode *N) { SDLoc dl(N); - CurDAG->SelectNodeTo(N, TargetOpcode::JUMP_TABLE_DEBUG_INFO, MVT::Glue, + CurDAG->SelectNodeTo(N, TargetOpcode::JUMP_TABLE_DEBUG_INFO, MVT::Other, CurDAG->getTargetConstant(N->getConstantOperandVal(1), dl, MVT::i64, true)); }