Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 29c2892

Browse filesBrowse files
committed
Implement Import Call Optimization for x64
1 parent db9caf6 commit 29c2892
Copy full SHA for 29c2892
Expand file treeCollapse file tree

15 files changed

+506
-27
lines changed

‎llvm/include/llvm/Transforms/CFGuard.h

Copy file name to clipboardExpand all lines: llvm/include/llvm/Transforms/CFGuard.h
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
namespace llvm {
1717

1818
class FunctionPass;
19+
class GlobalValue;
1920

2021
class CFGuardPass : public PassInfoMixin<CFGuardPass> {
2122
public:
@@ -34,6 +35,8 @@ FunctionPass *createCFGuardCheckPass();
3435
/// Insert Control FLow Guard dispatches on indirect function calls.
3536
FunctionPass *createCFGuardDispatchPass();
3637

38+
bool isCFGuardFunction(const GlobalValue *GV);
39+
3740
} // namespace llvm
3841

3942
#endif

‎llvm/lib/MC/MCObjectFileInfo.cpp

Copy file name to clipboardExpand all lines: llvm/lib/MC/MCObjectFileInfo.cpp
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
599599
if (T.getArch() == Triple::aarch64) {
600600
ImportCallSection =
601601
Ctx->getCOFFSection(".impcall", COFF::IMAGE_SCN_LNK_INFO);
602+
} else if (T.getArch() == Triple::x86_64) {
603+
// Import Call Optimization on x64 leverages the same metadata as the
604+
// retpoline mitigation, hence the unusual section name.
605+
ImportCallSection =
606+
Ctx->getCOFFSection(".retplne", COFF::IMAGE_SCN_LNK_INFO);
602607
}
603608

604609
// Debug info.

‎llvm/lib/Target/X86/X86AsmPrinter.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86AsmPrinter.cpp
+32Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,9 @@ void X86AsmPrinter::emitStartOfAsmFile(Module &M) {
920920
OutStreamer->emitSymbolAttribute(S, MCSA_Global);
921921
OutStreamer->emitAssignment(
922922
S, MCConstantExpr::create(Feat00Value, MMI->getContext()));
923+
924+
if (M.getModuleFlag("import-call-optimization"))
925+
EnableImportCallOptimization = true;
923926
}
924927
OutStreamer->emitSyntaxDirective();
925928

@@ -1021,6 +1024,35 @@ void X86AsmPrinter::emitEndOfAsmFile(Module &M) {
10211024
// safe to set.
10221025
OutStreamer->emitAssemblerFlag(MCAF_SubsectionsViaSymbols);
10231026
} else if (TT.isOSBinFormatCOFF()) {
1027+
// If import call optimization is enabled, emit the appropriate section.
1028+
// We do this whether or not we recorded any items.
1029+
if (EnableImportCallOptimization) {
1030+
OutStreamer->switchSection(getObjFileLowering().getImportCallSection());
1031+
1032+
// Section always starts with some magic.
1033+
constexpr char ImpCallMagic[12] = "RetpolineV1";
1034+
OutStreamer->emitBytes(StringRef{ImpCallMagic, sizeof(ImpCallMagic)});
1035+
1036+
// Layout of this section is:
1037+
// Per section that contains an item to record:
1038+
// uint32_t SectionSize: Size in bytes for information in this section.
1039+
// uint32_t Section Number
1040+
// Per call to imported function in section:
1041+
// uint32_t Kind: the kind of item.
1042+
// uint32_t InstOffset: the offset of the instr in its parent section.
1043+
for (auto &[Section, CallsToImportedFuncs] :
1044+
SectionToImportedFunctionCalls) {
1045+
unsigned SectionSize =
1046+
sizeof(uint32_t) * (2 + 2 * CallsToImportedFuncs.size());
1047+
OutStreamer->emitInt32(SectionSize);
1048+
OutStreamer->emitCOFFSecNumber(Section->getBeginSymbol());
1049+
for (auto &[CallsiteSymbol, Kind] : CallsToImportedFuncs) {
1050+
OutStreamer->emitInt32(Kind);
1051+
OutStreamer->emitCOFFSecOffset(CallsiteSymbol);
1052+
}
1053+
}
1054+
}
1055+
10241056
if (usesMSVCFloatingPoint(TT, M)) {
10251057
// In Windows' libcmt.lib, there is a file which is linked in only if the
10261058
// symbol _fltused is referenced. Linking this in causes some

‎llvm/lib/Target/X86/X86AsmPrinter.h

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86AsmPrinter.h
+32-1Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,26 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
3131
bool EmitFPOData = false;
3232
bool ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = false;
3333
bool IndCSPrefix = false;
34+
bool EnableImportCallOptimization = false;
35+
36+
enum ImportCallKind : unsigned {
37+
IMAGE_RETPOLINE_AMD64_IMPORT_BR = 0x02,
38+
IMAGE_RETPOLINE_AMD64_IMPORT_CALL = 0x03,
39+
IMAGE_RETPOLINE_AMD64_INDIR_BR = 0x04,
40+
IMAGE_RETPOLINE_AMD64_INDIR_CALL = 0x05,
41+
IMAGE_RETPOLINE_AMD64_INDIR_BR_REX = 0x06,
42+
IMAGE_RETPOLINE_AMD64_CFG_BR = 0x08,
43+
IMAGE_RETPOLINE_AMD64_CFG_CALL = 0x09,
44+
IMAGE_RETPOLINE_AMD64_CFG_BR_REX = 0x0A,
45+
IMAGE_RETPOLINE_AMD64_SWITCHTABLE_FIRST = 0x010,
46+
IMAGE_RETPOLINE_AMD64_SWITCHTABLE_LAST = 0x01F,
47+
};
48+
struct ImportCallInfo {
49+
MCSymbol *CalleeSymbol;
50+
ImportCallKind Kind;
51+
};
52+
DenseMap<MCSection *, std::vector<ImportCallInfo>>
53+
SectionToImportedFunctionCalls;
3454

3555
// This utility class tracks the length of a stackmap instruction's 'shadow'.
3656
// It is used by the X86AsmPrinter to ensure that the stackmap shadow
@@ -45,7 +65,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
4565
void startFunction(MachineFunction &MF) {
4666
this->MF = &MF;
4767
}
48-
void count(MCInst &Inst, const MCSubtargetInfo &STI,
68+
void count(const MCInst &Inst, const MCSubtargetInfo &STI,
4969
MCCodeEmitter *CodeEmitter);
5070

5171
// Called to signal the start of a shadow of RequiredSize bytes.
@@ -126,6 +146,17 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
126146
void emitMachOIFuncStubHelperBody(Module &M, const GlobalIFunc &GI,
127147
MCSymbol *LazyPointer) override;
128148

149+
void emitCallInstruction(const llvm::MCInst &MCI);
150+
151+
// Emits a label to mark the next instruction as being relevant to Import Call
152+
// Optimization.
153+
void emitLabelAndRecordForImportCallOptimization(ImportCallKind Kind);
154+
155+
// Ensure that rax is used as the operand for the given instruction.
156+
//
157+
// NOTE: This assumes that it is safe to clobber rax.
158+
void ensureRaxUsedForOperand(MCInst &TmpInst);
159+
129160
public:
130161
X86AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);
131162

‎llvm/lib/Target/X86/X86ISelLowering.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86ISelLowering.cpp
+15-3Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18922,7 +18922,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1892218922

1892318923
SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op,
1892418924
SelectionDAG &DAG) const {
18925-
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
18925+
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false, nullptr);
1892618926
}
1892718927

1892818928
SDValue
@@ -18950,7 +18950,8 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
1895018950
/// Creates target global address or external symbol nodes for calls or
1895118951
/// other uses.
1895218952
SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
18953-
bool ForCall) const {
18953+
bool ForCall,
18954+
bool *IsImpCall) const {
1895418955
// Unpack the global address or external symbol.
1895518956
SDLoc dl(Op);
1895618957
const GlobalValue *GV = nullptr;
@@ -19000,6 +19001,16 @@ SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1900019001
if (ForCall && !NeedsLoad && !HasPICReg && Offset == 0)
1900119002
return Result;
1900219003

19004+
// If Import Call Optimization is enabled and this is an imported function
19005+
// then make a note of it and return the global address without wrapping.
19006+
if (IsImpCall && (OpFlags == X86II::MO_DLLIMPORT) &&
19007+
Mod.getModuleFlag("import-call-optimization")) {
19008+
assert(ForCall && "Should only enable import call optimization if we are "
19009+
"lowering a call");
19010+
*IsImpCall = true;
19011+
return Result;
19012+
}
19013+
1900319014
Result = DAG.getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result);
1900419015

1900519016
// With PIC, the address is actually $g + Offset.
@@ -19025,7 +19036,7 @@ SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1902519036

1902619037
SDValue
1902719038
X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
19028-
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
19039+
return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false, nullptr);
1902919040
}
1903019041

1903119042
static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA,
@@ -34562,6 +34573,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3456234573
NODE_NAME_CASE(FST)
3456334574
NODE_NAME_CASE(CALL)
3456434575
NODE_NAME_CASE(CALL_RVMARKER)
34576+
NODE_NAME_CASE(IMP_CALL)
3456534577
NODE_NAME_CASE(BT)
3456634578
NODE_NAME_CASE(CMP)
3456734579
NODE_NAME_CASE(FCMP)

‎llvm/lib/Target/X86/X86ISelLowering.h

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86ISelLowering.h
+6-2Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ namespace llvm {
8181
// marker instruction.
8282
CALL_RVMARKER,
8383

84+
// Pseudo for a call to an imported function to ensure the correct machine
85+
// instruction is emitted for Import Call Optimization.
86+
IMP_CALL,
87+
8488
/// X86 compare and logical compare instructions.
8589
CMP,
8690
FCMP,
@@ -1733,8 +1737,8 @@ namespace llvm {
17331737

17341738
/// Creates target global address or external symbol nodes for calls or
17351739
/// other uses.
1736-
SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1737-
bool ForCall) const;
1740+
SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, bool ForCall,
1741+
bool *IsImpCall) const;
17381742

17391743
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
17401744
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;

‎llvm/lib/Target/X86/X86ISelLoweringCall.cpp

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+5-2Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2402,6 +2402,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24022402
InGlue = Chain.getValue(1);
24032403
}
24042404

2405+
bool IsImpCall = false;
24052406
if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
24062407
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
24072408
// In the 64-bit large code model, we have to make all calls
@@ -2414,7 +2415,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24142415
// ForCall to true here has the effect of removing WrapperRIP when possible
24152416
// to allow direct calls to be selected without first materializing the
24162417
// address into a register.
2417-
Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
2418+
Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true, &IsImpCall);
24182419
} else if (Subtarget.isTarget64BitILP32() &&
24192420
Callee.getValueType() == MVT::i32) {
24202421
// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
@@ -2536,7 +2537,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
25362537

25372538
// Returns a chain & a glue for retval copy to use.
25382539
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2539-
if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2540+
if (IsImpCall) {
2541+
Chain = DAG.getNode(X86ISD::IMP_CALL, dl, NodeTys, Ops);
2542+
} else if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
25402543
Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
25412544
} else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
25422545
// Calls with a "clang.arc.attachedcall" bundle are special. They should be

‎llvm/lib/Target/X86/X86InstrCompiler.td

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86InstrCompiler.td
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,6 +1309,8 @@ def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 texternalsym:$dst)),
13091309
def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 tglobaladdr:$dst)),
13101310
(CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, tglobaladdr:$dst)>;
13111311

1312+
def : Pat<(X86imp_call (i64 tglobaladdr:$dst)),
1313+
(CALL64pcrel32 tglobaladdr:$dst)>;
13121314

13131315
// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
13141316
// can never use callee-saved registers. That is the purpose of the GR64_TC

‎llvm/lib/Target/X86/X86InstrFragments.td

Copy file name to clipboardExpand all lines: llvm/lib/Target/X86/X86InstrFragments.td
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,9 @@ def X86call_rvmarker : SDNode<"X86ISD::CALL_RVMARKER", SDT_X86Call,
210210
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
211211
SDNPVariadic]>;
212212

213+
def X86imp_call : SDNode<"X86ISD::IMP_CALL", SDT_X86Call,
214+
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
215+
SDNPVariadic]>;
213216

214217
def X86NoTrackCall : SDNode<"X86ISD::NT_CALL", SDT_X86Call,
215218
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.