diff --git a/bolt/docs/Heatmaps.md b/bolt/docs/Heatmaps.md index bf68232ef7fee..6cf9c4da533b1 100644 --- a/bolt/docs/Heatmaps.md +++ b/bolt/docs/Heatmaps.md @@ -89,7 +89,13 @@ For the generation, the default bucket size was used with a line size of 128. Some useful options are: ``` --line-size= - number of entries per line (default 256) +-line-size= - number of entries per line (default 256). + Use a smaller value (e.g. 128) if the heatmap doesn't fit + the screen horizontally. +-block-size=[,,...] - heatmap bucket size, + optionally followed by zoom-out sizes to produce coarse- + grained heatmaps. Size can be specified in human-readable + format with [kKmMgG][i][B] suffix. Default 64B, 4K, 256K. -max-address= - maximum address considered valid for heatmap (default 4GB) -print-mappings - print mappings in the legend, between characters/blocks and text sections (default false) ``` diff --git a/bolt/include/bolt/Profile/Heatmap.h b/bolt/include/bolt/Profile/Heatmap.h index 9813e7fed486d..268b02c7d093c 100644 --- a/bolt/include/bolt/Profile/Heatmap.h +++ b/bolt/include/bolt/Profile/Heatmap.h @@ -85,6 +85,9 @@ class Heatmap { void printSectionHotness(raw_ostream &OS) const; size_t size() const { return Map.size(); } + + /// Increase bucket size to \p NewSize, recomputing the heatmap. + void resizeBucket(uint64_t NewSize); }; } // namespace bolt diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index ef4081769d2ad..4acce5a3e8320 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -23,6 +23,15 @@ enum HeatmapModeKind { HM_Optional // perf2bolt --heatmap }; +using HeatmapBlockSizes = std::vector; +struct HeatmapBlockSpecParser : public llvm::cl::parser { + explicit HeatmapBlockSpecParser(llvm::cl::Option &O) + : llvm::cl::parser(O) {} + // Return true on error. + bool parse(llvm::cl::Option &O, llvm::StringRef ArgName, llvm::StringRef Arg, + HeatmapBlockSizes &Val); +}; + extern HeatmapModeKind HeatmapMode; extern bool BinaryAnalysisMode; @@ -47,7 +56,8 @@ extern llvm::cl::opt EqualizeBBCounts; extern llvm::cl::opt ForcePatch; extern llvm::cl::opt RemoveSymtab; extern llvm::cl::opt ExecutionCountThreshold; -extern llvm::cl::opt HeatmapBlock; +extern llvm::cl::opt + HeatmapBlock; extern llvm::cl::opt HeatmapMaxAddress; extern llvm::cl::opt HeatmapMinAddress; extern llvm::cl::opt HeatmapPrintMappings; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 6beb60741406e..d0620b64cad2c 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -1314,8 +1314,9 @@ std::error_code DataAggregator::printLBRHeatMap() { opts::HeatmapMaxAddress = 0xffffffffffffffff; opts::HeatmapMinAddress = KernelBaseAddr; } - Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress, - opts::HeatmapMaxAddress, getTextSections(BC)); + opts::HeatmapBlockSizes &HMBS = opts::HeatmapBlock; + Heatmap HM(HMBS[0], opts::HeatmapMinAddress, opts::HeatmapMaxAddress, + getTextSections(BC)); auto getSymbolValue = [&](const MCSymbol *Symbol) -> uint64_t { if (Symbol) if (ErrorOr SymValue = BC->getSymbolValue(*Symbol)) @@ -1365,6 +1366,14 @@ std::error_code DataAggregator::printLBRHeatMap() { HM.printCDF(opts::HeatmapOutput + ".csv"); HM.printSectionHotness(opts::HeatmapOutput + "-section-hotness.csv"); } + // Provide coarse-grained heatmaps if requested via zoom-out scales + for (const uint64_t NewBucketSize : ArrayRef(HMBS).drop_front()) { + HM.resizeBucket(NewBucketSize); + if (opts::HeatmapOutput == "-") + HM.print(opts::HeatmapOutput); + else + HM.print(formatv("{0}-{1}", opts::HeatmapOutput, NewBucketSize).str()); + } return std::error_code(); } diff --git a/bolt/lib/Profile/Heatmap.cpp b/bolt/lib/Profile/Heatmap.cpp index c66c2e5487613..6da5264d0e597 100644 --- a/bolt/lib/Profile/Heatmap.cpp +++ b/bolt/lib/Profile/Heatmap.cpp @@ -55,6 +55,8 @@ void Heatmap::print(StringRef FileName) const { errs() << "error opening output file: " << EC.message() << '\n'; exit(1); } + outs() << "HEATMAP: dumping heatmap with bucket size " << BucketSize << " to " + << FileName << '\n'; print(OS); } @@ -364,5 +366,13 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const { OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0, 0\n", 100.0 * UnmappedHotness / NumTotalCounts); } + +void Heatmap::resizeBucket(uint64_t NewSize) { + std::map NewMap; + for (const auto [Bucket, Count] : Map) + NewMap[Bucket * BucketSize / NewSize] += Count; + Map = NewMap; + BucketSize = NewSize; +} } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index add2f94864907..5635da476451d 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -12,6 +12,7 @@ #include "bolt/Utils/CommandLineOpts.h" #include "VCSVersion.inc" +#include "llvm/Support/Regex.h" using namespace llvm; @@ -103,10 +104,56 @@ ExecutionCountThreshold("execution-count-threshold", cl::Hidden, cl::cat(BoltOptCategory)); -cl::opt - HeatmapBlock("block-size", - cl::desc("size of a heat map block in bytes (default 64)"), - cl::init(64), cl::cat(HeatmapCategory)); +bool HeatmapBlockSpecParser::parse(cl::Option &O, StringRef ArgName, + StringRef Arg, HeatmapBlockSizes &Val) { + // Parses a human-readable suffix into a shift amount or nullopt on error. + auto parseSuffix = [](StringRef Suffix) -> std::optional { + if (Suffix.empty()) + return 0; + if (!Regex{"^[kKmMgG]i?[bB]?$"}.match(Suffix)) + return std::nullopt; + // clang-format off + switch (Suffix.front()) { + case 'k': case 'K': return 10; + case 'm': case 'M': return 20; + case 'g': case 'G': return 30; + } + // clang-format on + llvm_unreachable("Unexpected suffix"); + }; + + SmallVector Sizes; + Arg.split(Sizes, ','); + unsigned PreviousSize = 0; + for (StringRef Size : Sizes) { + StringRef OrigSize = Size; + unsigned &SizeVal = Val.emplace_back(0); + if (Size.consumeInteger(10, SizeVal)) { + O.error("'" + OrigSize + "' value can't be parsed as an integer"); + return true; + } + if (std::optional ShiftAmt = parseSuffix(Size)) { + SizeVal <<= *ShiftAmt; + } else { + O.error("'" + Size + "' value can't be parsed as a suffix"); + return true; + } + if (SizeVal <= PreviousSize || (PreviousSize && SizeVal % PreviousSize)) { + O.error("'" + OrigSize + "' must be a multiple of previous value"); + return true; + } + PreviousSize = SizeVal; + } + return false; +} + +cl::opt + HeatmapBlock( + "block-size", cl::value_desc("initial_size{,zoom-out_size,...}"), + cl::desc("heatmap bucket size, optionally followed by zoom-out sizes " + "for coarse-grained heatmaps (default 64B, 4K, 256K)."), + cl::init(HeatmapBlockSizes{/*Initial*/ 64, /*Zoom-out*/ 4096, 262144}), + cl::cat(HeatmapCategory)); cl::opt HeatmapMaxAddress( "max-address", cl::init(0xffffffff), diff --git a/bolt/test/X86/heatmap-preagg.test b/bolt/test/X86/heatmap-preagg.test index 306e74800a353..493101664c4fd 100644 --- a/bolt/test/X86/heatmap-preagg.test +++ b/bolt/test/X86/heatmap-preagg.test @@ -3,20 +3,41 @@ RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe ## Non-BOLTed input binary RUN: llvm-bolt-heatmap %t.exe -o %t --pa -p %p/Inputs/blarge_new.preagg.txt \ +# Heatmaps for 64B, 128B, 1K buckets +RUN: --block-size=64,128,1K --line-size 64 \ RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP %s RUN: FileCheck %s --check-prefix CHECK-SEC-HOT --input-file %t-section-hotness.csv +RUN: FileCheck %s --check-prefix CHECK-HM-64 --input-file %t +RUN: FileCheck %s --check-prefix CHECK-HM-128 --input-file %t-128 +RUN: FileCheck %s --check-prefix CHECK-HM-1024 --input-file %t-1024 ## BOLTed input binary RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt \ RUN: --reorder-blocks=ext-tsp --split-functions --split-strategy=cdsplit \ RUN: --reorder-functions=cdsort --enable-bat --dyno-stats --skip-funcs=main +# Heatmaps for 64B, 4K, 16K, 1M buckets RUN: llvm-bolt-heatmap %t.out -o %t2 --pa -p %p/Inputs/blarge_new_bat.preagg.txt \ -RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP-BAT %s +RUN: --block-size=64,4KB,16kb,1MiB 2>&1 | FileCheck --check-prefix CHECK-HEATMAP-BAT %s RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT --input-file %t2-section-hotness.csv RUN: llvm-nm -n %t.out | FileCheck %s --check-prefix=CHECK-HOT-SYMS +RUN: FileCheck %s --check-prefix CHECK-BAT-HM-64 --input-file %t2 +# Identical hottest range for 4K, 16K, 1M heatmaps +RUN: FileCheck %s --check-prefix CHECK-BAT-HM-4K --input-file %t2-4096 +RUN: FileCheck %s --check-prefix CHECK-BAT-HM-4K --input-file %t2-16384 +RUN: FileCheck %s --check-prefix CHECK-BAT-HM-4K --input-file %t2-1048576 + +# No zoomed-out heatmaps +RUN: llvm-bolt-heatmap %t.out -o %t3 --pa -p %p/Inputs/blarge_new_bat.preagg.txt \ +RUN: --block-size=1024 | FileCheck --check-prefix CHECK-HEATMAP-BAT-1K %s +CHECK-HEATMAP-BAT-1K: HEATMAP: dumping heatmap with bucket size 1024 +CHECK-HEATMAP-BAT-1K-NOT: HEATMAP: dumping heatmap with bucket size CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries CHECK-HEATMAP: HEATMAP: invalid traces: 1 +CHECK-HEATMAP: HEATMAP: dumping heatmap with bucket size 64 +CHECK-HEATMAP: HEATMAP: dumping heatmap with bucket size 128 +CHECK-HEATMAP: HEATMAP: dumping heatmap with bucket size 1024 +CHECK-HEATMAP-NOT: HEATMAP: dumping heatmap with bucket size CHECK-SEC-HOT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct, Partition Score CHECK-SEC-HOT-NEXT: .init, 0x401000, 0x40101b, 16.8545, 100.0000, 0.1685 @@ -24,8 +45,39 @@ CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667, 0.0317 CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064, 0.6671 CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000, 0.0000, 0.0000 +# Only check x scales – can't check colors, and FileCheck doesn't strip color +# codes by default. +CHECK-HM-64: (299, 937] +CHECK-HM-64-NEXT: 0 +CHECK-HM-64-NEXT: 0 +CHECK-HM-64-NEXT: 0 1 2 3 4 5 6 7 8 9 a b c d e f +CHECK-HM-64-NEXT: 048c048c048c048c048c048c048c048c048c048c048c048c048c048c048c048c +CHECK-HM-64-NEXT: 0 + +CHECK-HM-128: (299, 937] +CHECK-HM-128-NEXT: 0 +CHECK-HM-128-NEXT: 0 1 +CHECK-HM-128-NEXT: 0 1 2 3 4 5 6 7 8 9 a b c d e f 0 1 2 3 4 5 6 7 8 9 a b c d e f +CHECK-HM-128-NEXT: 0808080808080808080808080808080808080808080808080808080808080808 +CHECK-HM-128-NEXT: 0 + +CHECK-HM-1024: (483, 1663] +CHECK-HM-1024-NEXT: 0 +CHECK-HM-1024-NEXT: 0 1 2 3 4 5 6 7 8 9 a b c d e f +CHECK-HM-1024-NEXT: 048c048c048c048c048c048c048c048c048c048c048c048c048c048c048c048c +CHECK-HM-1024-NEXT: 0 +CHECK-HM-1024-NEXT: 0 + +CHECK-BAT-HM-64: (349, 1126] +CHECK-BAT-HM-4K: (605, 2182] + CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2 +CHECK-HEATMAP-BAT: HEATMAP: dumping heatmap with bucket size 64 +CHECK-HEATMAP-BAT: HEATMAP: dumping heatmap with bucket size 4096 +CHECK-HEATMAP-BAT: HEATMAP: dumping heatmap with bucket size 16384 +CHECK-HEATMAP-BAT: HEATMAP: dumping heatmap with bucket size 1048576 +CHECK-HEATMAP-BAT-NOT: HEATMAP: dumping heatmap with bucket size CHECK-SEC-HOT-BAT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct, Partition Score CHECK-SEC-HOT-BAT-NEXT: .init, 0x401000, 0x40101b, 17.2888, 100.0000, 0.1729 diff --git a/bolt/tools/heatmap/heatmap.cpp b/bolt/tools/heatmap/heatmap.cpp index b2dd586a4c6f7..43167558b6758 100644 --- a/bolt/tools/heatmap/heatmap.cpp +++ b/bolt/tools/heatmap/heatmap.cpp @@ -59,7 +59,26 @@ static std::string GetExecutablePath(const char *Argv0) { int main(int argc, char **argv) { cl::HideUnrelatedOptions(ArrayRef(opts::HeatmapCategories)); - cl::ParseCommandLineOptions(argc, argv, ""); + cl::ParseCommandLineOptions( + argc, argv, + " BOLT Code Heatmap tool\n\n" + " Produces code heatmaps using sampled profile\n\n" + + " Inputs:\n" + " - Binary (supports BOLT-optimized binaries),\n" + " - Sampled profile collected from the binary:\n" + " - perf data or pre-aggregated profile data (instrumentation profile " + "not supported)\n" + " - perf data can have basic (IP) or branch-stack (LBR) samples\n\n" + + " Outputs:\n" + " - Heatmaps: colored ASCII (requires a color-capable terminal or a" + " conversion tool like `aha`)\n" + " Multiple heatmaps are produced by default with different " + "granularities (set by `block-size` option)\n" + " - Section hotness: per-section samples% and utilization%\n" + " - Cumulative distribution: working set size corresponding to a " + "given percentile of samples\n"); if (opts::PerfData.empty()) { errs() << ToolName << ": expected -perfdata= option.\n";