diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 77fe7c58e92cd..54607e8415f92 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -77,9 +77,12 @@ #include "InputFiles.h" #include "LinkerScript.h" #include "OutputSections.h" +#include "Relocations.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" +#include "Target.h" +#include "llvm/ADT/EquivalenceClasses.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELF.h" #include "llvm/Support/Parallel.h" @@ -104,18 +107,23 @@ template class ICF { void segregate(size_t begin, size_t end, uint32_t eqClassBase, bool constant); template - bool constantEq(const InputSection *a, Relocs relsA, - const InputSection *b, Relocs relsB); + bool constantEq(InputSection *a, Relocs relsA, InputSection *b, + Relocs relsB); template - bool variableEq(const InputSection *a, Relocs relsA, - const InputSection *b, Relocs relsB); + bool variableEq(InputSection *a, Relocs relsA, InputSection *b, + Relocs relsB); - bool equalsConstant(const InputSection *a, const InputSection *b); - bool equalsVariable(const InputSection *a, const InputSection *b); + bool equalsConstant(InputSection *a, InputSection *b); + bool equalsVariable(InputSection *a, InputSection *b); size_t findBoundary(size_t begin, size_t end); + // A relocation with side-effects is considered non-trivial. Eg: relocation + // creates GOT entry or TLS slot. + template + bool isTrivialRelocation(InputSection *a, Symbol &s, RelTy reloc); + void forEachClassRange(size_t begin, size_t end, llvm::function_ref fn); @@ -234,11 +242,33 @@ void ICF::segregate(size_t begin, size_t end, uint32_t eqClassBase, } } +template +template +bool ICF::isTrivialRelocation(InputSection *a, Symbol &s, RelTy reloc) { + // For our use cases, we can get by without calculating exact location within + // the section, and just use fake location array. We need to ensure validity + // for loc[-1] to loc[3] as various targets' getRelExpr() reference them. + std::array fakeLocArray; + uint8_t *fakeLoc = fakeLocArray.data() + 1; + RelExpr expr = ctx.target->getRelExpr(reloc.getType(ctx.arg.isMips64EL), s, + fakeLoc); + + if (needsGot(expr) || needsTls(s, expr)) + return false; + return true; +} + +// Two symbols referenced by relocations can be merged together safely +// when their addends are same. +static bool canMergeSymbols(uint64_t addA, uint64_t addB) { + return addA == addB; +} + // Compare two lists of relocations. template template -bool ICF::constantEq(const InputSection *secA, Relocs ra, - const InputSection *secB, Relocs rb) { +bool ICF::constantEq(InputSection *secA, Relocs ra, + InputSection *secB, Relocs rb) { if (ra.size() != rb.size()) return false; auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin(); @@ -286,9 +316,14 @@ bool ICF::constantEq(const InputSection *secA, Relocs ra, // Relocations referring to InputSections are constant-equal if their // section offsets are equal. if (isa(da->section)) { - if (da->value + addA == db->value + addB) + if (da->value + addA == db->value + addB) { + // For non-trivial relocations, if we cannot merge symbols together, + // we must not merge sections either. + if (!isTrivialRelocation(secA, sa, *rai) && + !canMergeSymbols(addA, addB)) + return false; continue; - return false; + } } // Relocations referring to MergeInputSections are constant-equal if their @@ -314,7 +349,7 @@ bool ICF::constantEq(const InputSection *secA, Relocs ra, // Compare "non-moving" part of two InputSections, namely everything // except relocation targets. template -bool ICF::equalsConstant(const InputSection *a, const InputSection *b) { +bool ICF::equalsConstant(InputSection *a, InputSection *b) { if (a->flags != b->flags || a->getSize() != b->getSize() || a->content() != b->content()) return false; @@ -333,12 +368,35 @@ bool ICF::equalsConstant(const InputSection *a, const InputSection *b) { : constantEq(a, ra.relas, b, rb.relas); } +template +static SmallVector> +getReloc(const InputSection *sec, Relocs relocs) { + SmallVector> syms; + for (auto ri = relocs.begin(), re = relocs.end(); ri != re; ++ri) { + Symbol &sym = sec->file->getRelocTargetSym(*ri); + syms.emplace_back(&sym, getAddend(*ri)); + } + return syms; +} + +template +static SmallVector> +getRelocTargetSyms(const InputSection *sec) { + const RelsOrRelas rel = sec->template relsOrRelas(); + if (rel.areRelocsCrel()) + return getReloc(sec, rel.crels); + if (rel.areRelocsRel()) + return getReloc(sec, rel.rels); + + return getReloc(sec, rel.relas); +} + // Compare two lists of relocations. Returns true if all pairs of // relocations point to the same section in terms of ICF. template template -bool ICF::variableEq(const InputSection *secA, Relocs ra, - const InputSection *secB, Relocs rb) { +bool ICF::variableEq(InputSection *secA, Relocs ra, + InputSection *secB, Relocs rb) { assert(ra.size() == rb.size()); auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin(); @@ -352,6 +410,15 @@ bool ICF::variableEq(const InputSection *secA, Relocs ra, auto *da = cast(&sa); auto *db = cast(&sb); + // Prevent sections containing local symbols from merging into sections with + // global symbols, or vice-versa. This is to prevent local-global symbols + // getting merged into each other (done later in ICF). We do this as + // post-ICF passes cannot handle duplicates when iterating over local + // symbols. There are also assertions that prevent this. + if ((da->isLocal() || db->isLocal()) && + !isTrivialRelocation(secA, sa, *rai)) + return false; + // We already dealt with absolute and non-InputSection symbols in // constantEq, and for InputSections we have already checked everything // except the equivalence class. @@ -375,7 +442,7 @@ bool ICF::variableEq(const InputSection *secA, Relocs ra, // Compare "moving" part of two InputSections, namely relocation targets. template -bool ICF::equalsVariable(const InputSection *a, const InputSection *b) { +bool ICF::equalsVariable(InputSection *a, InputSection *b) { const RelsOrRelas ra = a->template relsOrRelas(); const RelsOrRelas rb = b->template relsOrRelas(); if (ra.areRelocsCrel() || rb.areRelocsCrel()) @@ -537,14 +604,29 @@ template void ICF::run() { auto print = [&ctx = ctx]() -> ELFSyncStream { return {ctx, ctx.arg.printIcfSections ? DiagLevel::Msg : DiagLevel::None}; }; + + EquivalenceClasses symbolEquivalence; // Merge sections by the equivalence class. + // Merge symbols identified as equivalent during ICF. forEachClassRange(0, sections.size(), [&](size_t begin, size_t end) { if (end - begin == 1) return; print() << "selected section " << sections[begin]; + SmallVector> syms = + getRelocTargetSyms(sections[begin]); for (size_t i = begin + 1; i < end; ++i) { print() << " removing identical section " << sections[i]; sections[begin]->replace(sections[i]); + SmallVector> replacedSyms = + getRelocTargetSyms(sections[i]); + assert(syms.size() == replacedSyms.size() && + "Should have same number of syms!"); + for (size_t j = 0; j < syms.size(); j++) { + if (!syms[j].first->isGlobal() || !replacedSyms[j].first->isGlobal() || + !canMergeSymbols(syms[j].second, replacedSyms[j].second)) + continue; + symbolEquivalence.unionSets(syms[j].first, replacedSyms[j].first); + } // At this point we know sections merged are fully identical and hence // we want to remove duplicate implicit dependencies such as link order @@ -563,11 +645,26 @@ template void ICF::run() { d->folded = true; } }; - for (Symbol *sym : ctx.symtab->getSymbols()) + for (Symbol *sym : ctx.symtab->getSymbols()) { fold(sym); + auto it = symbolEquivalence.findLeader(sym); + if (it != symbolEquivalence.member_end() && *it != sym) { + print() << "redirecting '" << sym->getName() << "' in symtab to '" + << (*it)->getName() << "'"; + ctx.symtab->redirect(sym, *it); + } + } parallelForEach(ctx.objectFiles, [&](ELFFileBase *file) { for (Symbol *sym : file->getLocalSymbols()) fold(sym); + for (Symbol *&sym : file->getMutableGlobalSymbols()) { + auto it = symbolEquivalence.findLeader(sym); + if (it != symbolEquivalence.member_end() && *it != sym) { + print() << "redirecting '" << sym->getName() << "' to '" + << (*it)->getName() << "'"; + sym = *it; + } + } }); // InputSectionDescription::sections is populated by processSectionCommands(). diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 277acb26987bc..3f62f419f694a 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -127,29 +127,6 @@ void elf::reportRangeError(Ctx &ctx, uint8_t *loc, int64_t v, int n, } } -// Build a bitmask with one bit set for each 64 subset of RelExpr. -static constexpr uint64_t buildMask() { return 0; } - -template -static constexpr uint64_t buildMask(int head, Tails... tails) { - return (0 <= head && head < 64 ? uint64_t(1) << head : 0) | - buildMask(tails...); -} - -// Return true if `Expr` is one of `Exprs`. -// There are more than 64 but less than 128 RelExprs, so we divide the set of -// exprs into [0, 64) and [64, 128) and represent each range as a constant -// 64-bit mask. Then we decide which mask to test depending on the value of -// expr and use a simple shift and bitwise-and to test for membership. -template static bool oneof(RelExpr expr) { - assert(0 <= expr && (int)expr < 128 && - "RelExpr is too large for 128-bit mask!"); - - if (expr >= 64) - return (uint64_t(1) << (expr - 64)) & buildMask((Exprs - 64)...); - return (uint64_t(1) << expr) & buildMask(Exprs...); -} - static RelType getMipsPairType(RelType type, bool isLocal) { switch (type) { case R_MIPS_HI16: @@ -196,15 +173,6 @@ static bool needsPlt(RelExpr expr) { RE_PPC64_CALL_PLT>(expr); } -bool lld::elf::needsGot(RelExpr expr) { - return oneof( - expr); -} - // True if this expression is of the form Sym - X, where X is a position in the // file (PC, or GOT for example). static bool isRelExpr(RelExpr expr) { @@ -1593,7 +1561,7 @@ void RelocationScanner::scanOne(typename Relocs::const_iterator &i) { // // Some RISCV TLSDESC relocations reference a local NOTYPE symbol, // but we need to process them in handleTlsRelocation. - if (sym.isTls() || oneof(expr)) { + if (needsTls(sym, expr)) { if (unsigned processed = handleTlsRelocation(expr, type, offset, sym, addend)) { i += processed - 1; diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index d2a77bc953109..2cefe3fd1f41d 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -9,6 +9,7 @@ #ifndef LLD_ELF_RELOCATIONS_H #define LLD_ELF_RELOCATIONS_H +#include "Symbols.h" #include "lld/Common/LLVM.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -18,7 +19,6 @@ namespace lld::elf { struct Ctx; class Defined; -class Symbol; class InputSection; class InputSectionBase; class OutputSection; @@ -355,6 +355,29 @@ inline Relocs sortRels(Relocs rels, return rels; } +// Build a bitmask with one bit set for each 64 subset of RelExpr. +constexpr uint64_t buildMask() { return 0; } + +template +constexpr uint64_t buildMask(int head, Tails... tails) { + return (0 <= head && head < 64 ? uint64_t(1) << head : 0) | + buildMask(tails...); +} + +// Return true if `Expr` is one of `Exprs`. +// There are more than 64 but less than 128 RelExprs, so we divide the set of +// exprs into [0, 64) and [64, 128) and represent each range as a constant +// 64-bit mask. Then we decide which mask to test depending on the value of +// expr and use a simple shift and bitwise-and to test for membership. +template static bool oneof(RelExpr expr) { + assert(0 <= expr && (int)expr < 128 && + "RelExpr is too large for 128-bit mask!"); + + if (expr >= 64) + return (uint64_t(1) << (expr - 64)) & buildMask((Exprs - 64)...); + return (uint64_t(1) << expr) & buildMask(Exprs...); +} + template inline Relocs> sortRels(Relocs> rels, @@ -367,7 +390,19 @@ RelocationBaseSection &getIRelativeSection(Ctx &ctx); // Returns true if Expr refers a GOT entry. Note that this function returns // false for TLS variables even though they need GOT, because TLS variables uses // GOT differently than the regular variables. -bool needsGot(RelExpr expr); +inline bool needsGot(RelExpr expr) { + return oneof( + expr); +} + +inline bool needsTls(Symbol &s, RelExpr expr) { + return s.isTls() || oneof(expr); +} + } // namespace lld::elf #endif diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index b8a70d4e898fc..91e47e15b01a4 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -29,6 +29,12 @@ using namespace llvm::ELF; using namespace lld; using namespace lld::elf; +void SymbolTable::redirect(Symbol *from, Symbol *to) { + int &fromIdx = symMap[CachedHashStringRef(from->getName())]; + const int toIdx = symMap[CachedHashStringRef(to->getName())]; + fromIdx = toIdx; +} + void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { // Redirect __real_foo to the original foo and foo to the original __wrap_foo. int &idx1 = symMap[CachedHashStringRef(sym->getName())]; diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index d6443742f7baa..e3a39bac85f97 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -41,6 +41,7 @@ class SymbolTable { SymbolTable(Ctx &ctx) : ctx(ctx) {} ArrayRef getSymbols() const { return symVector; } + void redirect(Symbol *from, Symbol *to); void wrap(Symbol *sym, Symbol *real, Symbol *wrap); Symbol *insert(StringRef name); diff --git a/lld/test/ELF/aarch64-got-merging-icf.s b/lld/test/ELF/aarch64-got-merging-icf.s new file mode 100644 index 0000000000000..773bacf3102a1 --- /dev/null +++ b/lld/test/ELF/aarch64-got-merging-icf.s @@ -0,0 +1,118 @@ +## REQUIRES: aarch64 +## Check that symbols that ICF assumes to be the same get a single GOT entry + +# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t +# RUN: llvm-mc -filetype=obj -crel -triple=aarch64 %s -o %tcrel +# RUN: ld.lld %t -o %t2 --icf=all --print-icf-sections +# RUN: ld.lld %tcrel -o %tcrel2 --icf=all + +# RUN: llvm-objdump --section-headers %t2 | FileCheck %s --check-prefix=EXE +# RUN: llvm-objdump --section-headers %tcrel2 | FileCheck %s --check-prefix=EXE + +# RUN: ld.lld -shared %t -o %t3 --icf=all +# RUN: ld.lld -shared %tcrel -o %tcrel3 --icf=all + +# RUN: llvm-objdump --section-headers %t3 | FileCheck %s --check-prefix=DSO +# RUN: llvm-objdump --section-headers %tcrel3 | FileCheck %s --check-prefix=DSO + +## All global g* symbols should merge into a single GOT entry while non-global +## gets its own GOT entry. +# EXE: {{.*}}.got 00000018{{.*}} + +## When symbols are preemptible in DSO mode, GOT entries wouldn't be merged +# DSO: {{.*}}.got 00000030{{.*}} + +# 1. Sections containing local symbols (f4, f5) are not merged together. +# 2. Sections containing global symbols are merged together. + +# CHECK: selected section {{.*}}:(.rodata.g0) +# CHECK-NEXT: removing identical section {{.*}}:(.rodata.g1) +# CHECK-NEXT: removing identical section {{.*}}:(.rodata.g2) +# CHECK-NEXT: removing identical section {{.*}}:(.rodata.g3) +# CHECK-NEXT: removing identical section {{.*}}:(.rodata.g4) +# CHECK-NEXT: removing identical section {{.*}}:(.rodata.g5) +# CHECK-NEXT: selected section {{.*}}:(.text.t2_0) +# CHECK-NEXT: removing identical section {{.*}}:(.text.t2_1) +# CHECK-NEXT: selected section {{.*}}:(.text.f2_0) +# CHECK-NEXT: removing identical section {{.*}}:(.text.f2_1) +# CHECK-NEXT: removing identical section {{.*}}:(.text.f2_2) +# CHECK-NEXT: removing identical section {{.*}}:(.text.f2_3) +# CHECK-NEXT: redirecting 'g1' in symtab to 'g0' +# CHECK-NEXT: redirecting 'g2' in symtab to 'g0' +# CHECK-NEXT: redirecting 'g3' in symtab to 'g0' +# CHECK-NEXT: redirecting 'g1' to 'g0' +# CHECK-NEXT: redirecting 'g2' to 'g0' +# CHECK-NEXT: redirecting 'g3' to 'g0' + +.addrsig + +callee: +ret + +.macro f, index, isglobal + +# (Kept unique) first instruction of the GOT code sequence +.section .text.f1_\index,"ax",@progbits +f1_\index: +adrp x0, :got:g\index +mov x1, #\index +b f2_\index + +# Folded, second instruction of the GOT code sequence +.section .text.f2_\index,"ax",@progbits +f2_\index: +ldr x0, [x0, :got_lo12:g\index] +b callee + +# Folded +.ifnb \isglobal +.globl g\index +.endif +.section .rodata.g\index,"a",@progbits +g_\index: +.long 111 +.long 122 + +g\index: +.byte 123 + +.section .text._start,"ax",@progbits +bl f1_\index + +.endm + +## Another set of sections merging: g1 <- g2. Linker should be able to +## resolve both g1 and g2 to g0 based on ICF on previous sections. + +.section .text.t1_0,"ax",@progbits +t1_0: +adrp x2, :got:g1 +mov x3, #1 +b t2_0 + +.section .text.t2_0,"ax",@progbits +t2_0: +ldr x2, [x2, :got_lo12:g1] +b callee + +.section .text.t1_1,"ax",@progbits +t1_1: +adrp x2, :got:g2 +mov x3, #2 +b t2_1 + +.section .text.t2_1,"ax",@progbits +t2_1: +ldr x2, [x2, :got_lo12:g2] +b callee + +.section .text._start,"ax",@progbits +.globl _start +_start: + +f 0 1 +f 1 1 +f 2 1 +f 3 1 +f 4 +f 5 diff --git a/lld/test/ELF/icf-addend.s b/lld/test/ELF/icf-addend.s new file mode 100644 index 0000000000000..8f0b6148a37d6 --- /dev/null +++ b/lld/test/ELF/icf-addend.s @@ -0,0 +1,106 @@ +# REQUIRES: x86 +# RUN: rm -rf %t && split-file %s %t && cd %t + +#--- trivial-relocation.s +# Tests following for trivial relocations: +# 1. Merging two equivalent sections is allowed but we must not merge their symbols if addends are different. +# 2. Local symbols should not be merged together even though their sections can be merged together. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux trivial-relocation.s -o trivial.o +# RUN: ld.lld trivial.o -o /dev/null --icf=all --print-icf-sections | FileCheck %s --check-prefix=TRIVIAL + +# TRIVIAL: selected section {{.*}}:(.rodata.sec1) +# TRIVIAL-NEXT: removing identical section {{.*}}:(.rodata.sec2) +# TRIVIAL-NEXT: selected section {{.*}}:(.text.f1) +# TRIVIAL-NEXT: removing identical section {{.*}}:(.text.f2) +# TRIVIAL-NEXT: removing identical section {{.*}}:(.text.f1_local) +# TRIVIAL-NEXT: removing identical section {{.*}}:(.text.f2_local) + +.addrsig + +.globl x_glob, y_glob + +.section .rodata.sec1,"a",@progbits +x_glob: +.long 11 +y_glob: +.long 12 + +.section .rodata.sec2,"a",@progbits +x: +.long 11 +y: +.long 12 + +.section .text.f1,"ax",@progbits +f1: +movq x_glob+4(%rip), %rax + +.section .text.f2,"ax",@progbits +f2: +movq y_glob(%rip), %rax + +.section .text.f1_local,"ax",@progbits +f1_local: +movq x+4(%rip), %rax + +.section .text.f2_local,"ax",@progbits +f2_local: +movq y(%rip), %rax + +.section .text._start,"ax",@progbits +.globl _start +_start: +call f1 +call f2 + +#--- non-trivial-relocation.s +# Tests following for non-trivial relocations: +# 1. We must not merge sections if addends are different. +# 2. We must not merge sections pointing to local and global symbols. + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux non-trivial-relocation.s -o non-trivial.o +# RUN: ld.lld non-trivial.o -o /dev/null --icf=all --print-icf-sections | FileCheck %s --check-prefix=NONTRIVIAL + +# NONTRIVIAL: selected section {{.*}}:(.rodata.sec1) +# NONTRIVIAL-NEXT: removing identical section {{.*}}:(.rodata.sec2) +# NONTRIVIAL-NEXT: selected section {{.*}}:(.text.f1_local) +# NONTRIVIAL-NEXT: removing identical section {{.*}}:(.text.f2_local) + +.addrsig + +.globl x_glob, y_glob + +.section .rodata.sec1,"a",@progbits +x_glob: +.long 11 +y_glob: +.long 12 + +.section .rodata.sec2,"a",@progbits +x: +.long 11 +y: +.long 12 + +.section .text.f1,"ax",@progbits +f1: +movq x_glob+4@GOTPCREL(%rip), %rax + +.section .text.f2,"ax",@progbits +f2: +movq y_glob@GOTPCREL(%rip), %rax + +.section .text.f1_local,"ax",@progbits +f1_local: +movq x+4(%rip), %rax + +.section .text.f2_local,"ax",@progbits +f2_local: +movq y(%rip), %rax + +.section .text._start,"ax",@progbits +.globl _start +_start: +call f1 +call f2 diff --git a/lld/test/ELF/icf-preemptible.s b/lld/test/ELF/icf-preemptible.s index 4bd1eca438b19..9352493600695 100644 --- a/lld/test/ELF/icf-preemptible.s +++ b/lld/test/ELF/icf-preemptible.s @@ -17,6 +17,12 @@ # EXE-NEXT: selected section {{.*}}:(.text.h1) # EXE-NEXT: removing identical section {{.*}}:(.text.h2) # EXE-NEXT: removing identical section {{.*}}:(.text.h3) +# EXE-NEXT: redirecting 'f2' in symtab to 'f1' +# EXE-NEXT: redirecting 'g2' in symtab to 'g1' +# EXE-NEXT: redirecting 'g3' in symtab to 'g1' +# EXE-NEXT: redirecting 'f2' to 'f1' +# EXE-NEXT: redirecting 'g2' to 'g1' +# EXE-NEXT: redirecting 'g3' to 'g1' # EXE-NOT: {{.}} ## Definitions are preemptible in a DSO. Only leaf functions can be folded.