diff --git a/llvm/include/llvm/Transforms/CFGuard.h b/llvm/include/llvm/Transforms/CFGuard.h index caf822a2ec9fb..b81db8f487965 100644 --- a/llvm/include/llvm/Transforms/CFGuard.h +++ b/llvm/include/llvm/Transforms/CFGuard.h @@ -16,6 +16,7 @@ namespace llvm { class FunctionPass; +class GlobalValue; class CFGuardPass : public PassInfoMixin { public: @@ -34,6 +35,8 @@ FunctionPass *createCFGuardCheckPass(); /// Insert Control FLow Guard dispatches on indirect function calls. FunctionPass *createCFGuardDispatchPass(); +bool isCFGuardFunction(const GlobalValue *GV); + } // namespace llvm #endif diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index ab7552ca01061..9ad56aaf05bc5 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -599,6 +599,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { if (T.getArch() == Triple::aarch64) { ImportCallSection = Ctx->getCOFFSection(".impcall", COFF::IMAGE_SCN_LNK_INFO); + } else if (T.getArch() == Triple::x86_64) { + // Import Call Optimization on x64 leverages the same metadata as the + // retpoline mitigation, hence the unusual section name. + ImportCallSection = + Ctx->getCOFFSection(".retplne", COFF::IMAGE_SCN_LNK_INFO); } // Debug info. diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index ce9a7c42d963c..80722ebf9123a 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -464,7 +464,8 @@ static bool isIndirectBranchOrTailCall(const MachineInstr &MI) { Opc == X86::TAILJMPr64 || Opc == X86::TAILJMPm64 || Opc == X86::TCRETURNri || Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNmi64 || - Opc == X86::TAILJMPr64_REX || Opc == X86::TAILJMPm64_REX; + Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TAILJMPr64_REX || + Opc == X86::TAILJMPm64_REX; } void X86AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { @@ -912,6 +913,9 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) { if (TT.isOSBinFormatCOFF()) { emitCOFFFeatureSymbol(M); emitCOFFReplaceableFunctionData(M); + + if (M.getModuleFlag("import-call-optimization")) + EnableImportCallOptimization = true; } OutStreamer->emitSyntaxDirective(); @@ -1013,6 +1017,35 @@ void X86AsmPrinter::emitEndOfAsmFile(Module &M) { // safe to set. OutStreamer->emitAssemblerFlag(MCAF_SubsectionsViaSymbols); } else if (TT.isOSBinFormatCOFF()) { + // If import call optimization is enabled, emit the appropriate section. + // We do this whether or not we recorded any items. + if (EnableImportCallOptimization) { + OutStreamer->switchSection(getObjFileLowering().getImportCallSection()); + + // Section always starts with some magic. + constexpr char ImpCallMagic[12] = "RetpolineV1"; + OutStreamer->emitBytes(StringRef{ImpCallMagic, sizeof(ImpCallMagic)}); + + // Layout of this section is: + // Per section that contains an item to record: + // uint32_t SectionSize: Size in bytes for information in this section. + // uint32_t Section Number + // Per call to imported function in section: + // uint32_t Kind: the kind of item. + // uint32_t InstOffset: the offset of the instr in its parent section. + for (auto &[Section, CallsToImportedFuncs] : + SectionToImportedFunctionCalls) { + unsigned SectionSize = + sizeof(uint32_t) * (2 + 2 * CallsToImportedFuncs.size()); + OutStreamer->emitInt32(SectionSize); + OutStreamer->emitCOFFSecNumber(Section->getBeginSymbol()); + for (auto &[CallsiteSymbol, Kind] : CallsToImportedFuncs) { + OutStreamer->emitInt32(Kind); + OutStreamer->emitCOFFSecOffset(CallsiteSymbol); + } + } + } + if (usesMSVCFloatingPoint(TT, M)) { // In Windows' libcmt.lib, there is a file which is linked in only if the // symbol _fltused is referenced. Linking this in causes some diff --git a/llvm/lib/Target/X86/X86AsmPrinter.h b/llvm/lib/Target/X86/X86AsmPrinter.h index 61d8f45501ab1..efb951b73532f 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.h +++ b/llvm/lib/Target/X86/X86AsmPrinter.h @@ -35,6 +35,26 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { bool EmitFPOData = false; bool ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = false; bool IndCSPrefix = false; + bool EnableImportCallOptimization = false; + + enum ImportCallKind : unsigned { + IMAGE_RETPOLINE_AMD64_IMPORT_BR = 0x02, + IMAGE_RETPOLINE_AMD64_IMPORT_CALL = 0x03, + IMAGE_RETPOLINE_AMD64_INDIR_BR = 0x04, + IMAGE_RETPOLINE_AMD64_INDIR_CALL = 0x05, + IMAGE_RETPOLINE_AMD64_INDIR_BR_REX = 0x06, + IMAGE_RETPOLINE_AMD64_CFG_BR = 0x08, + IMAGE_RETPOLINE_AMD64_CFG_CALL = 0x09, + IMAGE_RETPOLINE_AMD64_CFG_BR_REX = 0x0A, + IMAGE_RETPOLINE_AMD64_SWITCHTABLE_FIRST = 0x010, + IMAGE_RETPOLINE_AMD64_SWITCHTABLE_LAST = 0x01F, + }; + struct ImportCallInfo { + MCSymbol *CalleeSymbol; + ImportCallKind Kind; + }; + DenseMap> + SectionToImportedFunctionCalls; // This utility class tracks the length of a stackmap instruction's 'shadow'. // It is used by the X86AsmPrinter to ensure that the stackmap shadow @@ -49,7 +69,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { void startFunction(MachineFunction &MF) { this->MF = &MF; } - void count(MCInst &Inst, const MCSubtargetInfo &STI, + void count(const MCInst &Inst, const MCSubtargetInfo &STI, MCCodeEmitter *CodeEmitter); // Called to signal the start of a shadow of RequiredSize bytes. @@ -130,6 +150,12 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { void emitMachOIFuncStubHelperBody(Module &M, const GlobalIFunc &GI, MCSymbol *LazyPointer) override; + void emitCallInstruction(const llvm::MCInst &MCI); + + // Emits a label to mark the next instruction as being relevant to Import Call + // Optimization. + void emitLabelAndRecordForImportCallOptimization(ImportCallKind Kind); + public: X86AsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 398b738b85697..8ba6ed357d143 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -274,6 +274,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, case X86::TCRETURNdi64: case X86::TCRETURNdi64cc: case X86::TCRETURNri64: + case X86::TCRETURNri64_ImpCall: case X86::TCRETURNmi64: { bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; MachineOperand &JumpTarget = MBBI->getOperand(0); @@ -345,12 +346,14 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); for (unsigned i = 0; i != X86::AddrNumOperands; ++i) MIB.add(MBBI->getOperand(i)); - } else if (Opcode == X86::TCRETURNri64) { + } else if ((Opcode == X86::TCRETURNri64) || + (Opcode == X86::TCRETURNri64_ImpCall)) { JumpTarget.setIsKill(); BuildMI(MBB, MBBI, DL, TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) .add(JumpTarget); } else { + assert(!IsWin64 && "Win64 requires REX for indirect jumps."); JumpTarget.setIsKill(); BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr)) .add(JumpTarget); @@ -875,6 +878,9 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, case X86::CALL64m_RVMARKER: expandCALL_RVMARKER(MBB, MBBI); return true; + case X86::CALL64r_ImpCall: + MI.setDesc(TII->get(X86::CALL64r)); + return true; case X86::ADD32mi_ND: case X86::ADD64mi32_ND: case X86::SUB32mi_ND: diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index d7cb93bdb7376..0ff7f235ed392 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsX86.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" @@ -3316,6 +3317,11 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (Flag.isSwiftError() || Flag.isPreallocated()) return false; + // Can't handle import call optimization. + if (Is64Bit && + MF->getFunction().getParent()->getModuleFlag("import-call-optimization")) + return false; + SmallVector OutVTs; SmallVector ArgRegs; diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 7e960c6420d3b..75f49beee27c6 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -2399,7 +2399,8 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { static bool isTailCallOpcode(unsigned Opc) { return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi || Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 || - Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64; + Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 || + Opc == X86::TCRETURNmi64; } void X86FrameLowering::emitEpilogue(MachineFunction &MF, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9be3b39ce16fa..2af5385b199da 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19166,7 +19166,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { - return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false); + return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false, nullptr); } SDValue @@ -19194,7 +19194,8 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { /// Creates target global address or external symbol nodes for calls or /// other uses. SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, - bool ForCall) const { + bool ForCall, + bool *IsImpCall) const { // Unpack the global address or external symbol. SDLoc dl(Op); const GlobalValue *GV = nullptr; @@ -19244,6 +19245,16 @@ SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, if (ForCall && !NeedsLoad && !HasPICReg && Offset == 0) return Result; + // If Import Call Optimization is enabled and this is an imported function + // then make a note of it and return the global address without wrapping. + if (IsImpCall && (OpFlags == X86II::MO_DLLIMPORT) && + Mod.getModuleFlag("import-call-optimization")) { + assert(ForCall && "Should only enable import call optimization if we are " + "lowering a call"); + *IsImpCall = true; + return Result; + } + Result = DAG.getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result); // With PIC, the address is actually $g + Offset. @@ -19269,7 +19280,7 @@ SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, SDValue X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false); + return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false, nullptr); } static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA, @@ -34809,6 +34820,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FST) NODE_NAME_CASE(CALL) NODE_NAME_CASE(CALL_RVMARKER) + NODE_NAME_CASE(IMP_CALL) NODE_NAME_CASE(BT) NODE_NAME_CASE(CMP) NODE_NAME_CASE(FCMP) @@ -62041,6 +62053,7 @@ X86TargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, Register TargetReg; switch (MBBI->getOpcode()) { case X86::CALL64r: + case X86::CALL64r_ImpCall: case X86::CALL64r_NT: case X86::TAILJMPr64: case X86::TAILJMPr64_REX: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 662552a972249..419111adfcf2b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -90,6 +90,10 @@ namespace llvm { /// POP_FROM_X87_REG (which may remove a required FPU stack pop). POP_FROM_X87_REG, + // Pseudo for a call to an imported function to ensure the correct machine + // instruction is emitted for Import Call Optimization. + IMP_CALL, + /// X86 compare and logical compare instructions. CMP, FCMP, @@ -1746,8 +1750,8 @@ namespace llvm { /// Creates target global address or external symbol nodes for calls or /// other uses. - SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, - bool ForCall) const; + SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, bool ForCall, + bool *IsImpCall) const; SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index 03165311dfef8..1aa00d4f09f75 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -2050,6 +2050,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (CallConv == CallingConv::X86_INTR) report_fatal_error("X86 interrupts may not be called directly"); + if (IsIndirectCall && !IsWin64 && + M->getModuleFlag("import-call-optimization")) + errorUnsupported(DAG, dl, + "Indirect calls must have a normal calling convention if " + "Import Call Optimization is enabled"); + // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); @@ -2421,6 +2427,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InGlue = Chain.getValue(1); } + bool IsImpCall = false; if (DAG.getTarget().getCodeModel() == CodeModel::Large) { assert(Is64Bit && "Large code model is only legal in 64-bit mode."); // In the 64-bit large code model, we have to make all calls @@ -2433,7 +2440,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // ForCall to true here has the effect of removing WrapperRIP when possible // to allow direct calls to be selected without first materializing the // address into a register. - Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true); + Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true, &IsImpCall); } else if (Subtarget.isTarget64BitILP32() && Callee.getValueType() == MVT::i32) { // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI @@ -2555,7 +2562,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Returns a chain & a glue for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - if (IsNoTrackIndirectCall) { + if (IsImpCall) { + Chain = DAG.getNode(X86ISD::IMP_CALL, dl, NodeTys, Ops); + } else if (IsNoTrackIndirectCall) { Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops); } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) { // Calls with a "clang.arc.attachedcall" bundle are special. They should be diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index efa1e8bd7f3e3..927b2c8b22f05 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1313,6 +1313,8 @@ def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 texternalsym:$dst)), def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 tglobaladdr:$dst)), (CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, tglobaladdr:$dst)>; +def : Pat<(X86imp_call (i64 tglobaladdr:$dst)), + (CALL64pcrel32 tglobaladdr:$dst)>; // Tailcall stuff. The TCRETURN instructions execute after the epilog, so they // can never use callee-saved registers. That is the purpose of the GR64_TC @@ -1344,7 +1346,11 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), timm:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, timm:$off)>, - Requires<[In64BitMode, NotUseIndirectThunkCalls]>; + Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationDisabled]>; + +def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), + (TCRETURNri64_ImpCall ptr_rc_tailcall:$dst, timm:$off)>, + Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationEnabled]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td index 4907105e6b8cc..22253bf0413a4 100644 --- a/llvm/lib/Target/X86/X86InstrControl.td +++ b/llvm/lib/Target/X86/X86InstrControl.td @@ -327,7 +327,7 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { Requires<[In64BitMode]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst), "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, - Requires<[In64BitMode,NotUseIndirectThunkCalls]>; + Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationDisabled]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, Requires<[In64BitMode,FavorMemIndirectCall, @@ -357,6 +357,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, def TCRETURNri64 : PseudoI<(outs), (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, Sched<[WriteJump]>; + def TCRETURNri64_ImpCall : PseudoI<(outs), + (ins GR64_A:$dst, i32imm:$offset), + []>, Sched<[WriteJump]>; + let mayLoad = 1 in def TCRETURNmi64 : PseudoI<(outs), (ins i64mem_TC:$dst, i32imm:$offset), @@ -418,6 +422,10 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1, def CALL64pcrel32_RVMARKER : PseudoI<(outs), (ins i64imm:$rvfunc, i64i32imm_brtarget:$dst), []>, Requires<[In64BitMode]>; + + def CALL64r_ImpCall : + PseudoI<(outs), (ins GR64_A:$dst), [(X86call GR64_A:$dst)]>, + Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationEnabled]>; } // Conditional tail calls are similar to the above, but they are branches diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td index f9d70d1bb5d85..fe95b8c20a8ff 100644 --- a/llvm/lib/Target/X86/X86InstrFragments.td +++ b/llvm/lib/Target/X86/X86InstrFragments.td @@ -210,6 +210,9 @@ def X86call_rvmarker : SDNode<"X86ISD::CALL_RVMARKER", SDT_X86Call, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; +def X86imp_call : SDNode<"X86ISD::IMP_CALL", SDT_X86Call, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + SDNPVariadic]>; def X86NoTrackCall : SDNode<"X86ISD::NT_CALL", SDT_X86Call, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 963a2bb84e185..7d9fa759ad81e 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3714,6 +3714,7 @@ bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const { case X86::TCRETURNmi: case X86::TCRETURNdi64: case X86::TCRETURNri64: + case X86::TCRETURNri64_ImpCall: case X86::TCRETURNmi64: return true; default: @@ -7458,7 +7459,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // do not fold loads into calls or pushes, unless optimizing for size // aggressively. if (isSlowTwoMemOps && !MF.getFunction().hasMinSize() && - (Opc == X86::CALL32r || Opc == X86::CALL64r || Opc == X86::PUSH16r || + (Opc == X86::CALL32r || Opc == X86::CALL64r || + Opc == X86::CALL64r_ImpCall || Opc == X86::PUSH16r || Opc == X86::PUSH32r || Opc == X86::PUSH64r)) return nullptr; diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index 5bdcf51be9dd8..307c03c8ef541 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -233,6 +233,8 @@ let RecomputePerFunction = 1 in { "shouldOptForSize(MF)">; def NoSSE41_Or_OptForSize : Predicate<"shouldOptForSize(MF) || " "!Subtarget->hasSSE41()">; + def ImportCallOptimizationEnabled : Predicate<"MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\")">; + def ImportCallOptimizationDisabled : Predicate<"!MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\")">; } def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 165bcb0ba9647..55d57d15f8d42 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -48,6 +48,7 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/CFGuard.h" #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" #include @@ -113,7 +114,7 @@ struct NoAutoPaddingScope { static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, const X86Subtarget *Subtarget); -void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, +void X86AsmPrinter::StackMapShadowTracker::count(const MCInst &Inst, const MCSubtargetInfo &STI, MCCodeEmitter *CodeEmitter) { if (InShadow) { @@ -2214,6 +2215,31 @@ static void addConstantComments(const MachineInstr *MI, } } +// Does the given operand refer to a DLLIMPORT function? +bool isImportedFunction(const MachineOperand &MO) { + return MO.isGlobal() && (MO.getTargetFlags() == X86II::MO_DLLIMPORT); +} + +// Is the given instruction a call to a CFGuard function? +bool isCallToCFGuardFunction(const MachineInstr *MI) { + assert(MI->getOpcode() == X86::TAILJMPm64_REX || + MI->getOpcode() == X86::CALL64m); + const MachineOperand &MO = MI->getOperand(3); + return MO.isGlobal() && (MO.getTargetFlags() == X86II::MO_NO_FLAG) && + isCFGuardFunction(MO.getGlobal()); +} + +// Does the containing block for the given instruction contain any jump table +// info (indicating that the block is a dispatch for a jump table)? +bool hasJumpTableInfoInBlock(const llvm::MachineInstr *MI) { + const MachineBasicBlock &MBB = *MI->getParent(); + for (auto I = MBB.instr_rbegin(), E = MBB.instr_rend(); I != E; ++I) + if (I->isJumpTableDebugInfo()) + return true; + + return false; +} + void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { // FIXME: Enable feature predicate checks once all the test pass. // X86_MC::verifyInstructionPredicates(MI->getOpcode(), @@ -2292,7 +2318,16 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { case X86::TAILJMPd64: if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); - [[fallthrough]]; + + if (EnableImportCallOptimization && isImportedFunction(MI->getOperand(0))) { + emitLabelAndRecordForImportCallOptimization( + IMAGE_RETPOLINE_AMD64_IMPORT_BR); + } + + // Lower this as normal, but add a comment. + OutStreamer->AddComment("TAILCALL"); + break; + case X86::TAILJMPr: case X86::TAILJMPm: case X86::TAILJMPd: @@ -2300,12 +2335,58 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { case X86::TAILJMPr64: case X86::TAILJMPm64: case X86::TAILJMPd64_CC: - case X86::TAILJMPr64_REX: - case X86::TAILJMPm64_REX: + if (EnableImportCallOptimization) + report_fatal_error("Unexpected TAILJMP instruction was emitted when " + "import call optimization was enabled"); + // Lower these as normal, but add some comments. OutStreamer->AddComment("TAILCALL"); break; + case X86::TAILJMPm64_REX: + if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) { + emitLabelAndRecordForImportCallOptimization( + IMAGE_RETPOLINE_AMD64_CFG_BR_REX); + } + + OutStreamer->AddComment("TAILCALL"); + break; + + case X86::TAILJMPr64_REX: { + if (EnableImportCallOptimization) { + assert(MI->getOperand(0).getReg() == X86::RAX && + "Indirect tail calls with impcall enabled must go through RAX (as " + "enforced by TCRETURNImpCallri64)"); + emitLabelAndRecordForImportCallOptimization( + IMAGE_RETPOLINE_AMD64_INDIR_BR); + } + + OutStreamer->AddComment("TAILCALL"); + break; + } + + case X86::JMP64r: + if (EnableImportCallOptimization && hasJumpTableInfoInBlock(MI)) { + uint16_t EncodedReg = + this->getSubtarget().getRegisterInfo()->getEncodingValue( + MI->getOperand(0).getReg().asMCReg()); + emitLabelAndRecordForImportCallOptimization( + (ImportCallKind)(IMAGE_RETPOLINE_AMD64_SWITCHTABLE_FIRST + + EncodedReg)); + } + break; + + case X86::JMP16r: + case X86::JMP16m: + case X86::JMP32r: + case X86::JMP32m: + case X86::JMP64m: + if (EnableImportCallOptimization && hasJumpTableInfoInBlock(MI)) + report_fatal_error( + "Unexpected JMP instruction was emitted for a jump-table when import " + "call optimization was enabled"); + break; + case X86::TLS_addr32: case X86::TLS_addr64: case X86::TLS_addrX32: @@ -2492,7 +2573,50 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { case X86::CALL64pcrel32: if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); + + if (EnableImportCallOptimization && isImportedFunction(MI->getOperand(0))) { + emitLabelAndRecordForImportCallOptimization( + IMAGE_RETPOLINE_AMD64_IMPORT_CALL); + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + + // For Import Call Optimization to work, we need a the call instruction + // with a rex prefix, and a 5-byte nop after the call instruction. + EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); + emitCallInstruction(TmpInst); + emitNop(*OutStreamer, 5, Subtarget); + return; + } + + break; + + case X86::CALL64r: + if (EnableImportCallOptimization) { + assert(MI->getOperand(0).getReg() == X86::RAX && + "Indirect calls with impcall enabled must go through RAX (as " + "enforced by CALL64r_ImpCall)"); + + emitLabelAndRecordForImportCallOptimization( + IMAGE_RETPOLINE_AMD64_INDIR_CALL); + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + emitCallInstruction(TmpInst); + + // For Import Call Optimization to work, we need a 3-byte nop after the + // call instruction. + emitNop(*OutStreamer, 3, Subtarget); + return; + } + break; + + case X86::CALL64m: + if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) { + emitLabelAndRecordForImportCallOptimization( + IMAGE_RETPOLINE_AMD64_CFG_CALL); + } break; + case X86::JCC_1: // Two instruction prefixes (2EH for branch not-taken and 3EH for branch // taken) are used as branch hints. Here we add branch taken prefix for @@ -2513,20 +2637,36 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); - // Stackmap shadows cannot include branch targets, so we can count the bytes - // in a call towards the shadow, but must ensure that the no thread returns - // in to the stackmap shadow. The only way to achieve this is if the call - // is at the end of the shadow. if (MI->isCall()) { - // Count then size of the call towards the shadow - SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get()); - // Then flush the shadow so that we fill with nops before the call, not - // after it. - SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); - // Then emit the call - OutStreamer->emitInstruction(TmpInst, getSubtargetInfo()); + emitCallInstruction(TmpInst); return; } EmitAndCountInstruction(TmpInst); } + +void X86AsmPrinter::emitCallInstruction(const llvm::MCInst &MCI) { + // Stackmap shadows cannot include branch targets, so we can count the bytes + // in a call towards the shadow, but must ensure that the no thread returns + // in to the stackmap shadow. The only way to achieve this is if the call + // is at the end of the shadow. + + // Count then size of the call towards the shadow + SMShadowTracker.count(MCI, getSubtargetInfo(), CodeEmitter.get()); + // Then flush the shadow so that we fill with nops before the call, not + // after it. + SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); + // Then emit the call + OutStreamer->emitInstruction(MCI, getSubtargetInfo()); +} + +void X86AsmPrinter::emitLabelAndRecordForImportCallOptimization( + ImportCallKind Kind) { + assert(EnableImportCallOptimization); + + MCSymbol *CallSiteSymbol = MMI->getContext().createNamedTempSymbol("impcall"); + OutStreamer->emitLabel(CallSiteSymbol); + + SectionToImportedFunctionCalls[OutStreamer->getCurrentSectionOnly()] + .push_back({CallSiteSymbol, Kind}); +} diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index c192e8892995b..71d36594afaeb 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -999,6 +999,7 @@ unsigned X86RegisterInfo::findDeadCallerSavedReg( case X86::TCRETURNmi: case X86::TCRETURNdi64: case X86::TCRETURNri64: + case X86::TCRETURNri64_ImpCall: case X86::TCRETURNmi64: case X86::EH_RETURN: case X86::EH_RETURN64: { diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index 48459b3aca508..3f9af5639a686 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -737,6 +737,10 @@ def GR32_SIDI : RegisterClass<"X86", [i32], 32, (add ESI, EDI)>; def GR32_DIBP : RegisterClass<"X86", [i32], 32, (add EDI, EBP)>; def GR32_BPSP : RegisterClass<"X86", [i32], 32, (add EBP, ESP)>; +// Class to support Windows Import Call Optimization: all indirect jumps must +// happen through RAX. +def GR64_A : RegisterClass<"X86", [i64], 64, (add RAX)>; + // Scalar SSE2 floating point registers. def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>; diff --git a/llvm/lib/Transforms/CFGuard/CFGuard.cpp b/llvm/lib/Transforms/CFGuard/CFGuard.cpp index 45c2a3394da02..b73a0ce2e9ff4 100644 --- a/llvm/lib/Transforms/CFGuard/CFGuard.cpp +++ b/llvm/lib/Transforms/CFGuard/CFGuard.cpp @@ -31,6 +31,9 @@ using OperandBundleDef = OperandBundleDefT; STATISTIC(CFGuardCounter, "Number of Control Flow Guard checks added"); +constexpr StringRef GuardCheckFunctionName = "__guard_check_icall_fptr"; +constexpr StringRef GuardDispatchFunctionName = "__guard_dispatch_icall_fptr"; + namespace { /// Adds Control Flow Guard (CFG) checks on indirect function calls/invokes. @@ -45,10 +48,10 @@ class CFGuardImpl { // Get or insert the guard check or dispatch global symbols. switch (GuardMechanism) { case Mechanism::Check: - GuardFnName = "__guard_check_icall_fptr"; + GuardFnName = GuardCheckFunctionName; break; case Mechanism::Dispatch: - GuardFnName = "__guard_dispatch_icall_fptr"; + GuardFnName = GuardDispatchFunctionName; break; } } @@ -318,3 +321,11 @@ FunctionPass *llvm::createCFGuardCheckPass() { FunctionPass *llvm::createCFGuardDispatchPass() { return new CFGuard(CFGuardPass::Mechanism::Dispatch); } + +bool llvm::isCFGuardFunction(const GlobalValue *GV) { + if (GV->getLinkage() != GlobalValue::ExternalLinkage) + return false; + + StringRef Name = GV->getName(); + return Name == GuardCheckFunctionName || Name == GuardDispatchFunctionName; +} diff --git a/llvm/test/CodeGen/X86/win-import-call-optimization-cfguard.ll b/llvm/test/CodeGen/X86/win-import-call-optimization-cfguard.ll new file mode 100644 index 0000000000000..12be910d68ee9 --- /dev/null +++ b/llvm/test/CodeGen/X86/win-import-call-optimization-cfguard.ll @@ -0,0 +1,34 @@ +; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK + +define dso_local void @normal_call(ptr noundef readonly %func_ptr) local_unnamed_addr section "nc_sect" { +entry: + call void %func_ptr() + ret void +} +; CHECK-LABEL: normal_call: +; CHECK: .Limpcall0: +; CHECK-NEXT: callq *__guard_dispatch_icall_fptr(%rip) + +define dso_local void @tail_call_fp(ptr noundef readonly %func_ptr) local_unnamed_addr section "tc_sect" { +entry: + tail call void %func_ptr() + ret void +} +; CHECK-LABEL: tail_call_fp: +; CHECK: .Limpcall1: +; CHECK-NEXT: rex64 jmpq *__guard_dispatch_icall_fptr(%rip) + +; CHECK-LABEL .section .retplne,"yi" +; CHECK-NEXT .asciz "RetpolineV1" +; CHECK-NEXT .long 16 +; CHECK-NEXT .secnum tc_sect +; CHECK-NEXT .long 10 +; CHECK-NEXT .secoffset .Limpcall1 +; CHECK-NEXT .long 16 +; CHECK-NEXT .secnum nc_sect +; CHECK-NEXT .long 9 +; CHECK-NEXT .secoffset .Limpcall0 + +!llvm.module.flags = !{!0, !1} +!0 = !{i32 1, !"import-call-optimization", i32 1} +!1 = !{i32 2, !"cfguard", i32 2} diff --git a/llvm/test/CodeGen/X86/win-import-call-optimization-jumptable.ll b/llvm/test/CodeGen/X86/win-import-call-optimization-jumptable.ll new file mode 100644 index 0000000000000..fe22b251685e6 --- /dev/null +++ b/llvm/test/CodeGen/X86/win-import-call-optimization-jumptable.ll @@ -0,0 +1,83 @@ +; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s + +; CHECK-LABEL: uses_rax: +; CHECK: .Limpcall0: +; CHECK-NEXT: jmpq *%rax + +define void @uses_rax(i32 %x) { +entry: + switch i32 %x, label %sw.epilog [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb: + tail call void @g(i32 0) #2 + br label %sw.epilog + +sw.bb1: + tail call void @g(i32 1) #2 + br label %sw.epilog + +sw.bb2: + tail call void @g(i32 2) #2 + br label %sw.epilog + +sw.bb3: + tail call void @g(i32 3) #2 + br label %sw.epilog + +sw.epilog: + tail call void @g(i32 10) #2 + ret void +} + +; CHECK-LABEL: uses_rcx: +; CHECK: .Limpcall1: +; CHECK-NEXT: jmpq *%rcx + +define void @uses_rcx(i32 %x) { +entry: + switch i32 %x, label %sw.epilog [ + i32 10, label %sw.bb + i32 11, label %sw.bb1 + i32 12, label %sw.bb2 + i32 13, label %sw.bb3 + ] + +sw.bb: + tail call void @g(i32 0) #2 + br label %sw.epilog + +sw.bb1: + tail call void @g(i32 1) #2 + br label %sw.epilog + +sw.bb2: + tail call void @g(i32 2) #2 + br label %sw.epilog + +sw.bb3: + tail call void @g(i32 3) #2 + br label %sw.epilog + +sw.epilog: + tail call void @g(i32 10) #2 + ret void +} + +declare void @g(i32) + +; CHECK-LABEL: .section .retplne,"yi" +; CHECK-NEXT: .asciz "RetpolineV1" +; CHECK-NEXT: .long 24 +; CHECK-NEXT: .secnum .text +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .secoffset .Limpcall0 +; CHECK-NEXT: .long 17 +; CHECK-NEXT: .secoffset .Limpcall1 + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"import-call-optimization", i32 1} diff --git a/llvm/test/CodeGen/X86/win-import-call-optimization-nocalls.ll b/llvm/test/CodeGen/X86/win-import-call-optimization-nocalls.ll new file mode 100644 index 0000000000000..4ca7b85282f2e --- /dev/null +++ b/llvm/test/CodeGen/X86/win-import-call-optimization-nocalls.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s + +define dso_local void @normal_call() local_unnamed_addr { +entry: + call void @a() + ret void +} +; CHECK-LABEL: normal_call: +; CHECK: callq a + +declare void @a() local_unnamed_addr + +; Even if there are no calls to imported functions, we still need to emit the +; .impcall section. + +; CHECK-LABEL .section .retplne,"yi" +; CHECK-NEXT .asciz "RetpolineV1" +; CHECK-NOT .secnum + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"import-call-optimization", i32 1} diff --git a/llvm/test/CodeGen/X86/win-import-call-optimization.ll b/llvm/test/CodeGen/X86/win-import-call-optimization.ll new file mode 100644 index 0000000000000..cc7e1a9f81e34 --- /dev/null +++ b/llvm/test/CodeGen/X86/win-import-call-optimization.ll @@ -0,0 +1,67 @@ +; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc --fast-isel -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc --global-isel --global-isel-abort=2 -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK + +define dso_local void @normal_call(ptr noundef readonly %func_ptr) local_unnamed_addr section "nc_sect" { +entry: + call void @a() + call void @a() + call void %func_ptr() + ret void +} +; CHECK-LABEL: normal_call: +; CHECK: .Limpcall0: +; CHECK-NEXT: rex64 +; CHECK-NEXT: callq __imp_a +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: .Limpcall1: +; CHECK-NEXT: rex64 +; CHECK-NEXT: callq __imp_a +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: .Limpcall2: +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: nopl (%rax) +; CHECK-NEXT: nop + +define dso_local void @tail_call() local_unnamed_addr section "tc_sect" { +entry: + tail call void @b() + ret void +} +; CHECK-LABEL: tail_call: +; CHECK: .Limpcall3: +; CHECK-NEXT: jmp __imp_b + +define dso_local void @tail_call_fp(ptr noundef readonly %func_ptr) local_unnamed_addr section "tc_sect" { +entry: + tail call void %func_ptr() + ret void +} +; CHECK-LABEL: tail_call_fp: +; CHECK: movq %rcx, %rax +; CHECK-NEXT: .Limpcall4: +; CHECK-NEXT: rex64 jmpq *%rax + +declare dllimport void @a() local_unnamed_addr +declare dllimport void @b() local_unnamed_addr + +; CHECK-LABEL .section .retplne,"yi" +; CHECK-NEXT .asciz "RetpolineV1" +; CHECK-NEXT .long 24 +; CHECK-NEXT .secnum tc_sect +; CHECK-NEXT .long 3 +; CHECK-NEXT .secoffset .Limpcall3 +; CHECK-NEXT .long 5 +; CHECK-NEXT .secoffset .Limpcall4 +; CHECK-NEXT .long 32 +; CHECK-NEXT .secnum nc_sect +; CHECK-NEXT .long 3 +; CHECK-NEXT .secoffset .Limpcall0 +; CHECK-NEXT .long 3 +; CHECK-NEXT .secoffset .Limpcall1 +; CHECK-NEXT .long 5 +; CHECK-NEXT .secoffset .Limpcall2 + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"import-call-optimization", i32 1} diff --git a/llvm/test/MC/X86/win-import-call-optimization.s b/llvm/test/MC/X86/win-import-call-optimization.s new file mode 100644 index 0000000000000..4f839a2bc6011 --- /dev/null +++ b/llvm/test/MC/X86/win-import-call-optimization.s @@ -0,0 +1,69 @@ +// RUN: llvm-mc -triple x86_64-windows-msvc -filetype obj -o %t.obj %s +// RUN: llvm-readobj --sections --sd --relocs %t.obj | FileCheck %s + +.section nc_sect,"xr" +normal_call: +.seh_proc normal_call +# %bb.0: # %entry + subq $40, %rsp + .seh_stackalloc 40 + .seh_endprologue +.Limpcall0: + rex64 + callq *__imp_a(%rip) + nopl 8(%rax,%rax) + nop + addq $40, %rsp + retq + .seh_endproc + +.section tc_sect,"xr" +tail_call: +.Limpcall1: + rex64 + jmp *__imp_b(%rip) + +.section .retplne,"yi" +.asciz "RetpolineV1" +.long 16 +.secnum tc_sect +.long 2 +.secoffset .Limpcall1 +.long 16 +.secnum nc_sect +.long 3 +.secoffset .Limpcall0 + +// CHECK-LABEL: Name: .retplne (2E 72 65 74 70 6C 6E 65) +// CHECK-NEXT: VirtualSize: 0x0 +// CHECK-NEXT: VirtualAddress: 0x0 +// CHECK-NEXT: RawDataSize: 44 +// CHECK-NEXT: PointerToRawData: +// CHECK-NEXT: PointerToRelocations: +// CHECK-NEXT: PointerToLineNumbers: +// CHECK-NEXT: RelocationCount: 0 +// CHECK-NEXT: LineNumberCount: 0 +// CHECK-NEXT: Characteristics [ +// CHECK-NEXT: IMAGE_SCN_ALIGN_1BYTES +// CHECK-NEXT: IMAGE_SCN_LNK_INFO +// CHECK-NEXT: ] +// CHECK-NEXT: SectionData ( +// CHECK-NEXT: 52657470 6F6C696E 65563100 10000000 |RetpolineV1.....| +// CHECK-NEXT: 0010: +// CHECK-SAME: [[#%.2X,TCSECT:]]000000 +// CHECK-SAME: 02000000 +// CHECK-SAME: [[#%.2X,TCOFFSET:]]000000 +// CHECK-SAME: 10000000 +// CHECK-NEXT: 0020: +// CHECK-SAME: [[#%.2X,NCSECT:]]000000 +// CHECK-SAME: 03000000 +// CHECK-SAME: [[#%.2X,NCOFFSET:]]000000 +// CHECK-NEXT: ) + +// CHECK-LABEL: Relocations [ +// CHECK-NEXT: Section ([[#%u,NCSECT]]) nc_sect { +// CHECK-NEXT: 0x[[#%x,NCOFFSET + 3]] IMAGE_REL_AMD64_REL32 __imp_a +// CHECK-NEXT: } +// CHECK-NEXT: Section ([[#%u,TCSECT]]) tc_sect { +// CHECK-NEXT: 0x[[#%x,TCOFFSET + 3]] IMAGE_REL_AMD64_REL32 __imp_b +// CHECK-NEXT: }