Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit f18fa07

Browse filesBrowse files
committed
Implement Import Call Optimization for x64
1 parent db38cc2 commit f18fa07
Copy full SHA for f18fa07
Expand file treeCollapse file tree

24 files changed

+569
-30
lines changed

‎llvm/include/llvm/Transforms/CFGuard.h

Copy file name to clipboardExpand all lines: llvm/include/llvm/Transforms/CFGuard.h
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
namespace llvm {
1717

1818
class FunctionPass;
19+
class GlobalValue;
1920

2021
class CFGuardPass : public PassInfoMixin<CFGuardPass> {
2122
public:
@@ -34,6 +35,8 @@ FunctionPass *createCFGuardCheckPass();
3435
/// Insert Control FLow Guard dispatches on indirect function calls.
3536
FunctionPass *createCFGuardDispatchPass();
3637

38+
bool isCFGuardFunction(const GlobalValue *GV);
39+
3740
} // namespace llvm
3841

3942
#endif

‎llvm/lib/MC/MCObjectFileInfo.cpp

Copy file name to clipboardExpand all lines: llvm/lib/MC/MCObjectFileInfo.cpp
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
599599
if (T.getArch() == Triple::aarch64) {
600600
ImportCallSection =
601601
Ctx->getCOFFSection(".impcall", COFF::IMAGE_SCN_LNK_INFO);
602+
} else if (T.getArch() == Triple::x86_64) {
603+
// Import Call Optimization on x64 leverages the same metadata as the
604+
// retpoline mitigation, hence the unusual section name.
605+
ImportCallSection =
606+
Ctx->getCOFFSection(".retplne", COFF::IMAGE_SCN_LNK_INFO);
602607
}
603608

604609
// Debug info.

‎llvm/lib/Target/X86/X86AsmPrinter.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86AsmPrinter.cpp
+34-1Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,8 @@ static bool isIndirectBranchOrTailCall(const MachineInstr &MI) {
464464
Opc == X86::TAILJMPr64 || Opc == X86::TAILJMPm64 ||
465465
Opc == X86::TCRETURNri || Opc == X86::TCRETURNmi ||
466466
Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNmi64 ||
467-
Opc == X86::TAILJMPr64_REX || Opc == X86::TAILJMPm64_REX;
467+
Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TAILJMPr64_REX ||
468+
Opc == X86::TAILJMPm64_REX;
468469
}
469470

470471
void X86AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
@@ -945,6 +946,9 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) {
945946
OutStreamer->emitSymbolAttribute(S, MCSA_Global);
946947
OutStreamer->emitAssignment(
947948
S, MCConstantExpr::create(Feat00Value, MMI->getContext()));
949+
950+
if (M.getModuleFlag("import-call-optimization"))
951+
EnableImportCallOptimization = true;
948952
}
949953
OutStreamer->emitSyntaxDirective();
950954

@@ -1046,6 +1050,35 @@ void X86AsmPrinter::emitEndOfAsmFile(Module &M) {
10461050
// safe to set.
10471051
OutStreamer->emitAssemblerFlag(MCAF_SubsectionsViaSymbols);
10481052
} else if (TT.isOSBinFormatCOFF()) {
1053+
// If import call optimization is enabled, emit the appropriate section.
1054+
// We do this whether or not we recorded any items.
1055+
if (EnableImportCallOptimization) {
1056+
OutStreamer->switchSection(getObjFileLowering().getImportCallSection());
1057+
1058+
// Section always starts with some magic.
1059+
constexpr char ImpCallMagic[12] = "RetpolineV1";
1060+
OutStreamer->emitBytes(StringRef{ImpCallMagic, sizeof(ImpCallMagic)});
1061+
1062+
// Layout of this section is:
1063+
// Per section that contains an item to record:
1064+
// uint32_t SectionSize: Size in bytes for information in this section.
1065+
// uint32_t Section Number
1066+
// Per call to imported function in section:
1067+
// uint32_t Kind: the kind of item.
1068+
// uint32_t InstOffset: the offset of the instr in its parent section.
1069+
for (auto &[Section, CallsToImportedFuncs] :
1070+
SectionToImportedFunctionCalls) {
1071+
unsigned SectionSize =
1072+
sizeof(uint32_t) * (2 + 2 * CallsToImportedFuncs.size());
1073+
OutStreamer->emitInt32(SectionSize);
1074+
OutStreamer->emitCOFFSecNumber(Section->getBeginSymbol());
1075+
for (auto &[CallsiteSymbol, Kind] : CallsToImportedFuncs) {
1076+
OutStreamer->emitInt32(Kind);
1077+
OutStreamer->emitCOFFSecOffset(CallsiteSymbol);
1078+
}
1079+
}
1080+
}
1081+
10491082
if (usesMSVCFloatingPoint(TT, M)) {
10501083
// In Windows' libcmt.lib, there is a file which is linked in only if the
10511084
// symbol _fltused is referenced. Linking this in causes some

‎llvm/lib/Target/X86/X86AsmPrinter.h

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86AsmPrinter.h
+27-1Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,26 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
3535
bool EmitFPOData = false;
3636
bool ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = false;
3737
bool IndCSPrefix = false;
38+
bool EnableImportCallOptimization = false;
39+
40+
enum ImportCallKind : unsigned {
41+
IMAGE_RETPOLINE_AMD64_IMPORT_BR = 0x02,
42+
IMAGE_RETPOLINE_AMD64_IMPORT_CALL = 0x03,
43+
IMAGE_RETPOLINE_AMD64_INDIR_BR = 0x04,
44+
IMAGE_RETPOLINE_AMD64_INDIR_CALL = 0x05,
45+
IMAGE_RETPOLINE_AMD64_INDIR_BR_REX = 0x06,
46+
IMAGE_RETPOLINE_AMD64_CFG_BR = 0x08,
47+
IMAGE_RETPOLINE_AMD64_CFG_CALL = 0x09,
48+
IMAGE_RETPOLINE_AMD64_CFG_BR_REX = 0x0A,
49+
IMAGE_RETPOLINE_AMD64_SWITCHTABLE_FIRST = 0x010,
50+
IMAGE_RETPOLINE_AMD64_SWITCHTABLE_LAST = 0x01F,
51+
};
52+
struct ImportCallInfo {
53+
MCSymbol *CalleeSymbol;
54+
ImportCallKind Kind;
55+
};
56+
DenseMap<MCSection *, std::vector<ImportCallInfo>>
57+
SectionToImportedFunctionCalls;
3858

3959
// This utility class tracks the length of a stackmap instruction's 'shadow'.
4060
// It is used by the X86AsmPrinter to ensure that the stackmap shadow
@@ -49,7 +69,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
4969
void startFunction(MachineFunction &MF) {
5070
this->MF = &MF;
5171
}
52-
void count(MCInst &Inst, const MCSubtargetInfo &STI,
72+
void count(const MCInst &Inst, const MCSubtargetInfo &STI,
5373
MCCodeEmitter *CodeEmitter);
5474

5575
// Called to signal the start of a shadow of RequiredSize bytes.
@@ -130,6 +150,12 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
130150
void emitMachOIFuncStubHelperBody(Module &M, const GlobalIFunc &GI,
131151
MCSymbol *LazyPointer) override;
132152

153+
void emitCallInstruction(const llvm::MCInst &MCI);
154+
155+
// Emits a label to mark the next instruction as being relevant to Import Call
156+
// Optimization.
157+
void emitLabelAndRecordForImportCallOptimization(ImportCallKind Kind);
158+
133159
public:
134160
X86AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);
135161

‎llvm/lib/Target/X86/X86ExpandPseudo.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86ExpandPseudo.cpp
+7-1Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
274274
case X86::TCRETURNdi64:
275275
case X86::TCRETURNdi64cc:
276276
case X86::TCRETURNri64:
277+
case X86::TCRETURNri64_ImpCall:
277278
case X86::TCRETURNmi64: {
278279
bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
279280
MachineOperand &JumpTarget = MBBI->getOperand(0);
@@ -345,12 +346,14 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
345346
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
346347
for (unsigned i = 0; i != X86::AddrNumOperands; ++i)
347348
MIB.add(MBBI->getOperand(i));
348-
} else if (Opcode == X86::TCRETURNri64) {
349+
} else if ((Opcode == X86::TCRETURNri64) ||
350+
(Opcode == X86::TCRETURNri64_ImpCall)) {
349351
JumpTarget.setIsKill();
350352
BuildMI(MBB, MBBI, DL,
351353
TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
352354
.add(JumpTarget);
353355
} else {
356+
assert(!IsWin64 && "Win64 requires REX for indirect jumps.");
354357
JumpTarget.setIsKill();
355358
BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr))
356359
.add(JumpTarget);
@@ -875,6 +878,9 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
875878
case X86::CALL64m_RVMARKER:
876879
expandCALL_RVMARKER(MBB, MBBI);
877880
return true;
881+
case X86::CALL64r_ImpCall:
882+
MI.setDesc(TII->get(X86::CALL64r));
883+
return true;
878884
case X86::ADD32mi_ND:
879885
case X86::ADD64mi32_ND:
880886
case X86::SUB32mi_ND:

‎llvm/lib/Target/X86/X86FastISel.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86FastISel.cpp
+6Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "llvm/IR/Instructions.h"
3535
#include "llvm/IR/IntrinsicInst.h"
3636
#include "llvm/IR/IntrinsicsX86.h"
37+
#include "llvm/IR/Module.h"
3738
#include "llvm/IR/Operator.h"
3839
#include "llvm/MC/MCAsmInfo.h"
3940
#include "llvm/MC/MCSymbol.h"
@@ -3316,6 +3317,11 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
33163317
if (Flag.isSwiftError() || Flag.isPreallocated())
33173318
return false;
33183319

3320+
// Can't handle import call optimization.
3321+
if (Is64Bit &&
3322+
MF->getFunction().getParent()->getModuleFlag("import-call-optimization"))
3323+
return false;
3324+
33193325
SmallVector<MVT, 16> OutVTs;
33203326
SmallVector<Register, 16> ArgRegs;
33213327

‎llvm/lib/Target/X86/X86FrameLowering.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86FrameLowering.cpp
+2-1Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2399,7 +2399,8 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
23992399
static bool isTailCallOpcode(unsigned Opc) {
24002400
return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
24012401
Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2402-
Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
2402+
Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 ||
2403+
Opc == X86::TCRETURNmi64;
24032404
}
24042405

24052406
void X86FrameLowering::emitEpilogue(MachineFunction &MF,

‎llvm/lib/Target/X86/X86ISelLowering.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86ISelLowering.cpp
+16-3Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19166,7 +19166,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1916619166

1916719167
SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op,
1916819168
SelectionDAG &DAG) const {
19169-
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
19169+
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false, nullptr);
1917019170
}
1917119171

1917219172
SDValue
@@ -19194,7 +19194,8 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
1919419194
/// Creates target global address or external symbol nodes for calls or
1919519195
/// other uses.
1919619196
SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
19197-
bool ForCall) const {
19197+
bool ForCall,
19198+
bool *IsImpCall) const {
1919819199
// Unpack the global address or external symbol.
1919919200
SDLoc dl(Op);
1920019201
const GlobalValue *GV = nullptr;
@@ -19244,6 +19245,16 @@ SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1924419245
if (ForCall && !NeedsLoad && !HasPICReg && Offset == 0)
1924519246
return Result;
1924619247

19248+
// If Import Call Optimization is enabled and this is an imported function
19249+
// then make a note of it and return the global address without wrapping.
19250+
if (IsImpCall && (OpFlags == X86II::MO_DLLIMPORT) &&
19251+
Mod.getModuleFlag("import-call-optimization")) {
19252+
assert(ForCall && "Should only enable import call optimization if we are "
19253+
"lowering a call");
19254+
*IsImpCall = true;
19255+
return Result;
19256+
}
19257+
1924719258
Result = DAG.getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result);
1924819259

1924919260
// With PIC, the address is actually $g + Offset.
@@ -19269,7 +19280,7 @@ SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1926919280

1927019281
SDValue
1927119282
X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
19272-
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
19283+
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false, nullptr);
1927319284
}
1927419285

1927519286
static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA,
@@ -34807,6 +34818,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3480734818
NODE_NAME_CASE(FST)
3480834819
NODE_NAME_CASE(CALL)
3480934820
NODE_NAME_CASE(CALL_RVMARKER)
34821+
NODE_NAME_CASE(IMP_CALL)
3481034822
NODE_NAME_CASE(BT)
3481134823
NODE_NAME_CASE(CMP)
3481234824
NODE_NAME_CASE(FCMP)
@@ -62003,6 +62015,7 @@ X86TargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
6200362015
Register TargetReg;
6200462016
switch (MBBI->getOpcode()) {
6200562017
case X86::CALL64r:
62018+
case X86::CALL64r_ImpCall:
6200662019
case X86::CALL64r_NT:
6200762020
case X86::TAILJMPr64:
6200862021
case X86::TAILJMPr64_REX:

‎llvm/lib/Target/X86/X86ISelLowering.h

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86ISelLowering.h
+6-2Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ namespace llvm {
9090
/// POP_FROM_X87_REG (which may remove a required FPU stack pop).
9191
POP_FROM_X87_REG,
9292

93+
// Pseudo for a call to an imported function to ensure the correct machine
94+
// instruction is emitted for Import Call Optimization.
95+
IMP_CALL,
96+
9397
/// X86 compare and logical compare instructions.
9498
CMP,
9599
FCMP,
@@ -1746,8 +1750,8 @@ namespace llvm {
17461750

17471751
/// Creates target global address or external symbol nodes for calls or
17481752
/// other uses.
1749-
SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1750-
bool ForCall) const;
1753+
SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, bool ForCall,
1754+
bool *IsImpCall) const;
17511755

17521756
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
17531757
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;

‎llvm/lib/Target/X86/X86ISelLoweringCall.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+11-2Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2050,6 +2050,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
20502050
if (CallConv == CallingConv::X86_INTR)
20512051
report_fatal_error("X86 interrupts may not be called directly");
20522052

2053+
if (IsIndirectCall && !IsWin64 &&
2054+
M->getModuleFlag("import-call-optimization"))
2055+
errorUnsupported(DAG, dl,
2056+
"Indirect calls must have a normal calling convention if "
2057+
"Import Call Optimization is enabled");
2058+
20532059
// Analyze operands of the call, assigning locations to each operand.
20542060
SmallVector<CCValAssign, 16> ArgLocs;
20552061
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
@@ -2421,6 +2427,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24212427
InGlue = Chain.getValue(1);
24222428
}
24232429

2430+
bool IsImpCall = false;
24242431
if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
24252432
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
24262433
// In the 64-bit large code model, we have to make all calls
@@ -2433,7 +2440,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24332440
// ForCall to true here has the effect of removing WrapperRIP when possible
24342441
// to allow direct calls to be selected without first materializing the
24352442
// address into a register.
2436-
Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
2443+
Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true, &IsImpCall);
24372444
} else if (Subtarget.isTarget64BitILP32() &&
24382445
Callee.getValueType() == MVT::i32) {
24392446
// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
@@ -2555,7 +2562,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
25552562

25562563
// Returns a chain & a glue for retval copy to use.
25572564
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2558-
if (IsNoTrackIndirectCall) {
2565+
if (IsImpCall) {
2566+
Chain = DAG.getNode(X86ISD::IMP_CALL, dl, NodeTys, Ops);
2567+
} else if (IsNoTrackIndirectCall) {
25592568
Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
25602569
} else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
25612570
// Calls with a "clang.arc.attachedcall" bundle are special. They should be

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.