1//===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Replaces repeated sequences of instructions with function calls.
11///
12/// This works by placing every instruction from every basic block in a
13/// suffix tree, and repeatedly querying that tree for repeated sequences of
14/// instructions. If a sequence of instructions appears often, then it ought
15/// to be beneficial to pull out into a function.
16///
17/// The MachineOutliner communicates with a given target using hooks defined in
18/// TargetInstrInfo.h. The target supplies the outliner with information on how
19/// a specific sequence of instructions should be outlined. This information
20/// is used to deduce the number of instructions necessary to
21///
22/// * Create an outlined function
23/// * Call that outlined function
24///
25/// Targets must implement
26/// * getOutliningCandidateInfo
27/// * buildOutlinedFrame
28/// * insertOutlinedCall
29/// * isFunctionSafeToOutlineFrom
30///
31/// in order to make use of the MachineOutliner.
32///
33/// This was originally presented at the 2016 LLVM Developers' Meeting in the
34/// talk "Reducing Code Size Using Outlining". For a high-level overview of
35/// how this pass works, the talk is available on YouTube at
36///
37/// https://www.youtube.com/watch?v=yorld-WSOeU
38///
39/// The slides for the talk are available at
40///
41/// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
42///
43/// The talk provides an overview of how the outliner finds candidates and
44/// ultimately outlines them. It describes how the main data structure for this
45/// pass, the suffix tree, is queried and purged for candidates. It also gives
46/// a simplified suffix tree construction algorithm for suffix trees based off
47/// of the algorithm actually used here, Ukkonen's algorithm.
48///
49/// For the original RFC for this pass, please see
50///
51/// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
52///
53/// For more information on the suffix tree data structure, please see
54/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
55///
56//===----------------------------------------------------------------------===//
57#include "llvm/CodeGen/MachineOutliner.h"
58#include "llvm/ADT/DenseMap.h"
59#include "llvm/ADT/SmallSet.h"
60#include "llvm/ADT/Statistic.h"
61#include "llvm/ADT/Twine.h"
62#include "llvm/Analysis/BlockFrequencyInfo.h"
63#include "llvm/Analysis/ModuleSummaryAnalysis.h"
64#include "llvm/Analysis/OptimizationRemarkEmitter.h"
65#include "llvm/Analysis/ProfileSummaryInfo.h"
66#include "llvm/CGData/CodeGenDataReader.h"
67#include "llvm/CodeGen/LivePhysRegs.h"
68#include "llvm/CodeGen/MachineModuleInfo.h"
69#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
70#include "llvm/CodeGen/Passes.h"
71#include "llvm/CodeGen/TargetInstrInfo.h"
72#include "llvm/CodeGen/TargetPassConfig.h"
73#include "llvm/CodeGen/TargetSubtargetInfo.h"
74#include "llvm/IR/DIBuilder.h"
75#include "llvm/IR/IRBuilder.h"
76#include "llvm/IR/Mangler.h"
77#include "llvm/IR/Module.h"
78#include "llvm/InitializePasses.h"
79#include "llvm/Support/CommandLine.h"
80#include "llvm/Support/Debug.h"
81#include "llvm/Support/SuffixTree.h"
82#include "llvm/Support/raw_ostream.h"
83#include "llvm/Target/TargetMachine.h"
84#include "llvm/Transforms/Utils/ModuleUtils.h"
85#include <tuple>
86#include <vector>
87
88#define DEBUG_TYPE "machine-outliner"
89
90using namespace llvm;
91using namespace ore;
92using namespace outliner;
93
94// Statistics for outlined functions.
95STATISTIC(NumOutlined, "Number of candidates outlined");
96STATISTIC(FunctionsCreated, "Number of functions created");
97
98// Statistics for instruction mapping.
99STATISTIC(NumLegalInUnsignedVec, "Outlinable instructions mapped");
100STATISTIC(NumIllegalInUnsignedVec,
101 "Unoutlinable instructions mapped + number of sentinel values");
102STATISTIC(NumSentinels, "Sentinel values inserted during mapping");
103STATISTIC(NumInvisible,
104 "Invisible instructions skipped during mapping");
105STATISTIC(UnsignedVecSize,
106 "Total number of instructions mapped and saved to mapping vector");
107STATISTIC(StableHashAttempts,
108 "Count of hashing attempts made for outlined functions");
109STATISTIC(StableHashDropped,
110 "Count of unsuccessful hashing attempts for outlined functions");
111STATISTIC(NumRemovedLOHs, "Total number of Linker Optimization Hints removed");
112STATISTIC(NumPGOBlockedOutlined,
113 "Number of times outlining was blocked by PGO");
114STATISTIC(NumPGOAllowedCold,
115 "Number of times outlining was allowed from cold functions");
116STATISTIC(NumPGOConservativeBlockedOutlined,
117 "Number of times outlining was blocked conservatively when profile "
118 "counts were missing");
119STATISTIC(NumPGOOptimisticOutlined,
120 "Number of times outlining was allowed optimistically when profile "
121 "counts were missing");
122
123// Set to true if the user wants the outliner to run on linkonceodr linkage
124// functions. This is false by default because the linker can dedupe linkonceodr
125// functions. Since the outliner is confined to a single module (modulo LTO),
126// this is off by default. It should, however, be the default behaviour in
127// LTO.
128static cl::opt<bool> EnableLinkOnceODROutlining(
129 "enable-linkonceodr-outlining", cl::Hidden,
130 cl::desc("Enable the machine outliner on linkonceodr functions"),
131 cl::init(Val: false));
132
133/// Number of times to re-run the outliner. This is not the total number of runs
134/// as the outliner will run at least one time. The default value is set to 0,
135/// meaning the outliner will run one time and rerun zero times after that.
136static cl::opt<unsigned> OutlinerReruns(
137 "machine-outliner-reruns", cl::init(Val: 0), cl::Hidden,
138 cl::desc(
139 "Number of times to rerun the outliner after the initial outline"));
140
141static cl::opt<unsigned> OutlinerBenefitThreshold(
142 "outliner-benefit-threshold", cl::init(Val: 1), cl::Hidden,
143 cl::desc(
144 "The minimum size in bytes before an outlining candidate is accepted"));
145
146static cl::opt<bool> OutlinerLeafDescendants(
147 "outliner-leaf-descendants", cl::init(Val: true), cl::Hidden,
148 cl::desc("Consider all leaf descendants of internal nodes of the suffix "
149 "tree as candidates for outlining (if false, only leaf children "
150 "are considered)"));
151
152static cl::opt<bool>
153 DisableGlobalOutlining("disable-global-outlining", cl::Hidden,
154 cl::desc("Disable global outlining only by ignoring "
155 "the codegen data generation or use"),
156 cl::init(Val: false));
157
158static cl::opt<bool> AppendContentHashToOutlinedName(
159 "append-content-hash-outlined-name", cl::Hidden,
160 cl::desc("This appends the content hash to the globally outlined function "
161 "name. It's beneficial for enhancing the precision of the stable "
162 "hash and for ordering the outlined functions."),
163 cl::init(Val: true));
164
165namespace {
166
167/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
168struct InstructionMapper {
169 const MachineModuleInfo &MMI;
170
171 /// The next available integer to assign to a \p MachineInstr that
172 /// cannot be outlined.
173 ///
174 /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
175 unsigned IllegalInstrNumber = -3;
176
177 /// The next available integer to assign to a \p MachineInstr that can
178 /// be outlined.
179 unsigned LegalInstrNumber = 0;
180
181 /// Correspondence from \p MachineInstrs to unsigned integers.
182 DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
183 InstructionIntegerMap;
184
185 /// Correspondence between \p MachineBasicBlocks and target-defined flags.
186 DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap;
187
188 /// The vector of unsigned integers that the module is mapped to.
189 SmallVector<unsigned> UnsignedVec;
190
191 /// Stores the location of the instruction associated with the integer
192 /// at index i in \p UnsignedVec for each index i.
193 SmallVector<MachineBasicBlock::iterator> InstrList;
194
195 // Set if we added an illegal number in the previous step.
196 // Since each illegal number is unique, we only need one of them between
197 // each range of legal numbers. This lets us make sure we don't add more
198 // than one illegal number per range.
199 bool AddedIllegalLastTime = false;
200
201 /// Maps \p *It to a legal integer.
202 ///
203 /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
204 /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
205 ///
206 /// \returns The integer that \p *It was mapped to.
207 unsigned mapToLegalUnsigned(
208 MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
209 bool &HaveLegalRange, unsigned &NumLegalInBlock,
210 SmallVector<unsigned> &UnsignedVecForMBB,
211 SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
212 // We added something legal, so we should unset the AddedLegalLastTime
213 // flag.
214 AddedIllegalLastTime = false;
215
216 // If we have at least two adjacent legal instructions (which may have
217 // invisible instructions in between), remember that.
218 if (CanOutlineWithPrevInstr)
219 HaveLegalRange = true;
220 CanOutlineWithPrevInstr = true;
221
222 // Keep track of the number of legal instructions we insert.
223 NumLegalInBlock++;
224
225 // Get the integer for this instruction or give it the current
226 // LegalInstrNumber.
227 InstrListForMBB.push_back(Elt: It);
228 MachineInstr &MI = *It;
229 bool WasInserted;
230 DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
231 ResultIt;
232 std::tie(args&: ResultIt, args&: WasInserted) =
233 InstructionIntegerMap.insert(KV: std::make_pair(x: &MI, y&: LegalInstrNumber));
234 unsigned MINumber = ResultIt->second;
235
236 // There was an insertion.
237 if (WasInserted)
238 LegalInstrNumber++;
239
240 UnsignedVecForMBB.push_back(Elt: MINumber);
241
242 // Make sure we don't overflow or use any integers reserved by the DenseMap.
243 if (LegalInstrNumber >= IllegalInstrNumber)
244 report_fatal_error(reason: "Instruction mapping overflow!");
245
246 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
247 "Tried to assign DenseMap tombstone or empty key to instruction.");
248 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
249 "Tried to assign DenseMap tombstone or empty key to instruction.");
250
251 // Statistics.
252 ++NumLegalInUnsignedVec;
253 return MINumber;
254 }
255
256 /// Maps \p *It to an illegal integer.
257 ///
258 /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
259 /// IllegalInstrNumber.
260 ///
261 /// \returns The integer that \p *It was mapped to.
262 unsigned mapToIllegalUnsigned(
263 MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
264 SmallVector<unsigned> &UnsignedVecForMBB,
265 SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
266 // Can't outline an illegal instruction. Set the flag.
267 CanOutlineWithPrevInstr = false;
268
269 // Only add one illegal number per range of legal numbers.
270 if (AddedIllegalLastTime)
271 return IllegalInstrNumber;
272
273 // Remember that we added an illegal number last time.
274 AddedIllegalLastTime = true;
275 unsigned MINumber = IllegalInstrNumber;
276
277 InstrListForMBB.push_back(Elt: It);
278 UnsignedVecForMBB.push_back(Elt: IllegalInstrNumber);
279 IllegalInstrNumber--;
280 // Statistics.
281 ++NumIllegalInUnsignedVec;
282
283 assert(LegalInstrNumber < IllegalInstrNumber &&
284 "Instruction mapping overflow!");
285
286 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
287 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
288
289 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
290 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
291
292 return MINumber;
293 }
294
295 /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
296 /// and appends it to \p UnsignedVec and \p InstrList.
297 ///
298 /// Two instructions are assigned the same integer if they are identical.
299 /// If an instruction is deemed unsafe to outline, then it will be assigned an
300 /// unique integer. The resulting mapping is placed into a suffix tree and
301 /// queried for candidates.
302 ///
303 /// \param MBB The \p MachineBasicBlock to be translated into integers.
304 /// \param TII \p TargetInstrInfo for the function.
305 void convertToUnsignedVec(MachineBasicBlock &MBB,
306 const TargetInstrInfo &TII) {
307 LLVM_DEBUG(dbgs() << "*** Converting MBB '" << MBB.getName()
308 << "' to unsigned vector ***\n");
309 unsigned Flags = 0;
310
311 // Don't even map in this case.
312 if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
313 return;
314
315 auto OutlinableRanges = TII.getOutlinableRanges(MBB, Flags);
316 LLVM_DEBUG(dbgs() << MBB.getName() << ": " << OutlinableRanges.size()
317 << " outlinable range(s)\n");
318 if (OutlinableRanges.empty())
319 return;
320
321 // Store info for the MBB for later outlining.
322 MBBFlagsMap[&MBB] = Flags;
323
324 MachineBasicBlock::iterator It = MBB.begin();
325
326 // The number of instructions in this block that will be considered for
327 // outlining.
328 unsigned NumLegalInBlock = 0;
329
330 // True if we have at least two legal instructions which aren't separated
331 // by an illegal instruction.
332 bool HaveLegalRange = false;
333
334 // True if we can perform outlining given the last mapped (non-invisible)
335 // instruction. This lets us know if we have a legal range.
336 bool CanOutlineWithPrevInstr = false;
337
338 // FIXME: Should this all just be handled in the target, rather than using
339 // repeated calls to getOutliningType?
340 SmallVector<unsigned> UnsignedVecForMBB;
341 SmallVector<MachineBasicBlock::iterator> InstrListForMBB;
342
343 LLVM_DEBUG(dbgs() << "*** Mapping outlinable ranges ***\n");
344 for (auto &OutlinableRange : OutlinableRanges) {
345 auto OutlinableRangeBegin = OutlinableRange.first;
346 auto OutlinableRangeEnd = OutlinableRange.second;
347#ifndef NDEBUG
348 LLVM_DEBUG(
349 dbgs() << "Mapping "
350 << std::distance(OutlinableRangeBegin, OutlinableRangeEnd)
351 << " instruction range\n");
352 // Everything outside of an outlinable range is illegal.
353 unsigned NumSkippedInRange = 0;
354#endif
355 for (; It != OutlinableRangeBegin; ++It) {
356#ifndef NDEBUG
357 ++NumSkippedInRange;
358#endif
359 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
360 InstrListForMBB);
361 }
362#ifndef NDEBUG
363 LLVM_DEBUG(dbgs() << "Skipped " << NumSkippedInRange
364 << " instructions outside outlinable range\n");
365#endif
366 assert(It != MBB.end() && "Should still have instructions?");
367 // `It` is now positioned at the beginning of a range of instructions
368 // which may be outlinable. Check if each instruction is known to be safe.
369 for (; It != OutlinableRangeEnd; ++It) {
370 // Keep track of where this instruction is in the module.
371 switch (TII.getOutliningType(MMI, MIT&: It, Flags)) {
372 case InstrType::Illegal:
373 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
374 InstrListForMBB);
375 break;
376
377 case InstrType::Legal:
378 mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
379 NumLegalInBlock, UnsignedVecForMBB,
380 InstrListForMBB);
381 break;
382
383 case InstrType::LegalTerminator:
384 mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
385 NumLegalInBlock, UnsignedVecForMBB,
386 InstrListForMBB);
387 // The instruction also acts as a terminator, so we have to record
388 // that in the string.
389 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
390 InstrListForMBB);
391 break;
392
393 case InstrType::Invisible:
394 // Normally this is set by mapTo(Blah)Unsigned, but we just want to
395 // skip this instruction. So, unset the flag here.
396 ++NumInvisible;
397 AddedIllegalLastTime = false;
398 break;
399 }
400 }
401 }
402
403 LLVM_DEBUG(dbgs() << "HaveLegalRange = " << HaveLegalRange << "\n");
404
405 // Are there enough legal instructions in the block for outlining to be
406 // possible?
407 if (HaveLegalRange) {
408 // After we're done every insertion, uniquely terminate this part of the
409 // "string". This makes sure we won't match across basic block or function
410 // boundaries since the "end" is encoded uniquely and thus appears in no
411 // repeated substring.
412 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
413 InstrListForMBB);
414 ++NumSentinels;
415 append_range(C&: InstrList, R&: InstrListForMBB);
416 append_range(C&: UnsignedVec, R&: UnsignedVecForMBB);
417 }
418 }
419
420 InstructionMapper(const MachineModuleInfo &MMI_) : MMI(MMI_) {
421 // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
422 // changed.
423 static_assert(DenseMapInfo<unsigned>::getEmptyKey() ==
424 static_cast<unsigned>(-1));
425 static_assert(DenseMapInfo<unsigned>::getTombstoneKey() ==
426 static_cast<unsigned>(-2));
427 }
428};
429
430/// An interprocedural pass which finds repeated sequences of
431/// instructions and replaces them with calls to functions.
432///
433/// Each instruction is mapped to an unsigned integer and placed in a string.
434/// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
435/// is then repeatedly queried for repeated sequences of instructions. Each
436/// non-overlapping repeated sequence is then placed in its own
437/// \p MachineFunction and each instance is then replaced with a call to that
438/// function.
439struct MachineOutliner : public ModulePass {
440
441 static char ID;
442
443 MachineModuleInfo *MMI = nullptr;
444 const TargetMachine *TM = nullptr;
445
446 /// Set to true if the outliner should consider functions with
447 /// linkonceodr linkage.
448 bool OutlineFromLinkOnceODRs = false;
449
450 /// The current repeat number of machine outlining.
451 unsigned OutlineRepeatedNum = 0;
452
453 /// The mode for whether to run the outliner
454 /// Set to always-outline by default for compatibility with llc's -run-pass
455 /// option.
456 RunOutliner RunOutlinerMode = RunOutliner::AlwaysOutline;
457
458 /// This is a compact representation of hash sequences of outlined functions.
459 /// It is used when OutlinerMode = CGDataMode::Write.
460 /// The resulting hash tree will be emitted into __llvm_outlined section
461 /// which will be dead-stripped not going to the final binary.
462 /// A post-process using llvm-cgdata, lld, or ThinLTO can merge them into
463 /// a global oulined hash tree for the subsequent codegen.
464 std::unique_ptr<OutlinedHashTree> LocalHashTree;
465
466 /// The mode of the outliner.
467 /// When is's CGDataMode::None, candidates are populated with the suffix tree
468 /// within a module and outlined.
469 /// When it's CGDataMode::Write, in addition to CGDataMode::None, the hash
470 /// sequences of outlined functions are published into LocalHashTree.
471 /// When it's CGDataMode::Read, candidates are populated with the global
472 /// outlined hash tree that has been built by the previous codegen.
473 CGDataMode OutlinerMode = CGDataMode::None;
474
475 StringRef getPassName() const override { return "Machine Outliner"; }
476
477 void getAnalysisUsage(AnalysisUsage &AU) const override {
478 AU.addRequired<MachineModuleInfoWrapperPass>();
479 AU.addRequired<TargetPassConfig>();
480 AU.addPreserved<MachineModuleInfoWrapperPass>();
481 AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
482 if (RunOutlinerMode == RunOutliner::OptimisticPGO ||
483 RunOutlinerMode == RunOutliner::ConservativePGO) {
484 AU.addRequired<BlockFrequencyInfoWrapperPass>();
485 AU.addRequired<ProfileSummaryInfoWrapperPass>();
486 }
487 AU.setPreservesAll();
488 ModulePass::getAnalysisUsage(AU);
489 }
490
491 MachineOutliner() : ModulePass(ID) {}
492
493 /// Remark output explaining that not outlining a set of candidates would be
494 /// better than outlining that set.
495 void emitNotOutliningCheaperRemark(
496 unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
497 OutlinedFunction &OF);
498
499 /// Remark output explaining that a function was outlined.
500 void emitOutlinedFunctionRemark(OutlinedFunction &OF);
501
502 /// Find all repeated substrings that satisfy the outlining cost model by
503 /// constructing a suffix tree.
504 ///
505 /// If a substring appears at least twice, then it must be represented by
506 /// an internal node which appears in at least two suffixes. Each suffix
507 /// is represented by a leaf node. To do this, we visit each internal node
508 /// in the tree, using the leaf children of each internal node. If an
509 /// internal node represents a beneficial substring, then we use each of
510 /// its leaf children to find the locations of its substring.
511 ///
512 /// \param Mapper Contains outlining mapping information.
513 /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
514 /// each type of candidate.
515 void
516 findCandidates(InstructionMapper &Mapper,
517 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
518
519 /// Find all repeated substrings that match in the global outlined hash
520 /// tree built from the previous codegen.
521 ///
522 /// \param Mapper Contains outlining mapping information.
523 /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
524 /// each type of candidate.
525 void findGlobalCandidates(
526 InstructionMapper &Mapper,
527 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
528
529 /// Replace the sequences of instructions represented by \p OutlinedFunctions
530 /// with calls to functions.
531 ///
532 /// \param M The module we are outlining from.
533 /// \param FunctionList A list of functions to be inserted into the module.
534 /// \param Mapper Contains the instruction mappings for the module.
535 /// \param[out] OutlinedFunctionNum The outlined function number.
536 bool outline(Module &M,
537 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
538 InstructionMapper &Mapper, unsigned &OutlinedFunctionNum);
539
540 /// Creates a function for \p OF and inserts it into the module.
541 MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
542 InstructionMapper &Mapper,
543 unsigned Name);
544
545 /// Compute and publish the stable hash sequence of instructions in the
546 /// outlined function, \p MF. The parameter \p CandSize represents the number
547 /// of candidates that have identical instruction sequences to \p MF.
548 void computeAndPublishHashSequence(MachineFunction &MF, unsigned CandSize);
549
550 /// Initialize the outliner mode.
551 void initializeOutlinerMode(const Module &M);
552
553 /// Emit the outlined hash tree into __llvm_outline section.
554 void emitOutlinedHashTree(Module &M);
555
556 /// Calls 'doOutline()' 1 + OutlinerReruns times.
557 bool runOnModule(Module &M) override;
558
559 /// Construct a suffix tree on the instructions in \p M and outline repeated
560 /// strings from that tree.
561 bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
562
563 /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
564 /// function for remark emission.
565 DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
566 for (const Candidate &C : OF.Candidates)
567 if (MachineFunction *MF = C.getMF())
568 if (DISubprogram *SP = MF->getFunction().getSubprogram())
569 return SP;
570 return nullptr;
571 }
572
573 /// Populate and \p InstructionMapper with instruction-to-integer mappings.
574 /// These are used to construct a suffix tree.
575 void populateMapper(InstructionMapper &Mapper, Module &M);
576
577 /// Initialize information necessary to output a size remark.
578 /// FIXME: This should be handled by the pass manager, not the outliner.
579 /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
580 /// pass manager.
581 void initSizeRemarkInfo(const Module &M,
582 StringMap<unsigned> &FunctionToInstrCount);
583
584 /// Emit the remark.
585 // FIXME: This should be handled by the pass manager, not the outliner.
586 void
587 emitInstrCountChangedRemark(const Module &M,
588 const StringMap<unsigned> &FunctionToInstrCount);
589};
590} // Anonymous namespace.
591
592char MachineOutliner::ID = 0;
593
594ModulePass *llvm::createMachineOutlinerPass(RunOutliner RunOutlinerMode) {
595 MachineOutliner *OL = new MachineOutliner();
596 OL->RunOutlinerMode = RunOutlinerMode;
597 return OL;
598}
599
600INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
601 false)
602
603void MachineOutliner::emitNotOutliningCheaperRemark(
604 unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
605 OutlinedFunction &OF) {
606 // FIXME: Right now, we arbitrarily choose some Candidate from the
607 // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
608 // We should probably sort these by function name or something to make sure
609 // the remarks are stable.
610 Candidate &C = CandidatesForRepeatedSeq.front();
611 MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
612 MORE.emit(RemarkBuilder: [&]() {
613 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
614 C.front().getDebugLoc(), C.getMBB());
615 R << "Did not outline " << NV("Length", StringLen) << " instructions"
616 << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
617 << " locations."
618 << " Bytes from outlining all occurrences ("
619 << NV("OutliningCost", OF.getOutliningCost()) << ")"
620 << " >= Unoutlined instruction bytes ("
621 << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
622 << " (Also found at: ";
623
624 // Tell the user the other places the candidate was found.
625 for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
626 R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
627 CandidatesForRepeatedSeq[i].front().getDebugLoc());
628 if (i != e - 1)
629 R << ", ";
630 }
631
632 R << ")";
633 return R;
634 });
635}
636
637void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
638 MachineBasicBlock *MBB = &*OF.MF->begin();
639 MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
640 MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
641 MBB->findDebugLoc(MBBI: MBB->begin()), MBB);
642 R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
643 << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
644 << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
645 << " locations. "
646 << "(Found at: ";
647
648 // Tell the user the other places the candidate was found.
649 for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
650
651 R << NV((Twine("StartLoc") + Twine(i)).str(),
652 OF.Candidates[i].front().getDebugLoc());
653 if (i != e - 1)
654 R << ", ";
655 }
656
657 R << ")";
658
659 MORE.emit(OptDiag&: R);
660}
661
662struct MatchedEntry {
663 unsigned StartIdx;
664 unsigned EndIdx;
665 unsigned Count;
666 MatchedEntry(unsigned StartIdx, unsigned EndIdx, unsigned Count)
667 : StartIdx(StartIdx), EndIdx(EndIdx), Count(Count) {}
668 MatchedEntry() = delete;
669};
670
671// Find all matches in the global outlined hash tree.
672// It's quadratic complexity in theory, but it's nearly linear in practice
673// since the length of outlined sequences are small within a block.
674static SmallVector<MatchedEntry> getMatchedEntries(InstructionMapper &Mapper) {
675 auto &InstrList = Mapper.InstrList;
676 auto &UnsignedVec = Mapper.UnsignedVec;
677
678 SmallVector<MatchedEntry> MatchedEntries;
679 auto Size = UnsignedVec.size();
680
681 // Get the global outlined hash tree built from the previous run.
682 assert(cgdata::hasOutlinedHashTree());
683 const auto *RootNode = cgdata::getOutlinedHashTree()->getRoot();
684
685 auto getValidInstr = [&](unsigned Index) -> const MachineInstr * {
686 if (UnsignedVec[Index] >= Mapper.LegalInstrNumber)
687 return nullptr;
688 return &(*InstrList[Index]);
689 };
690
691 auto getStableHashAndFollow =
692 [](const MachineInstr &MI, const HashNode *CurrNode) -> const HashNode * {
693 stable_hash StableHash = stableHashValue(MI);
694 if (!StableHash)
695 return nullptr;
696 auto It = CurrNode->Successors.find(x: StableHash);
697 return (It == CurrNode->Successors.end()) ? nullptr : It->second.get();
698 };
699
700 for (unsigned I = 0; I < Size; ++I) {
701 const MachineInstr *MI = getValidInstr(I);
702 if (!MI || MI->isDebugInstr())
703 continue;
704 const HashNode *CurrNode = getStableHashAndFollow(*MI, RootNode);
705 if (!CurrNode)
706 continue;
707
708 for (unsigned J = I + 1; J < Size; ++J) {
709 const MachineInstr *MJ = getValidInstr(J);
710 if (!MJ)
711 break;
712 // Skip debug instructions as we did for the outlined function.
713 if (MJ->isDebugInstr())
714 continue;
715 CurrNode = getStableHashAndFollow(*MJ, CurrNode);
716 if (!CurrNode)
717 break;
718 // Even with a match ending with a terminal, we continue finding
719 // matches to populate all candidates.
720 if (auto Count = CurrNode->Terminals)
721 MatchedEntries.emplace_back(Args&: I, Args&: J, Args&: *Count);
722 }
723 }
724
725 return MatchedEntries;
726}
727
728void MachineOutliner::findGlobalCandidates(
729 InstructionMapper &Mapper,
730 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
731 FunctionList.clear();
732 auto &InstrList = Mapper.InstrList;
733 auto &MBBFlagsMap = Mapper.MBBFlagsMap;
734
735 std::vector<Candidate> CandidatesForRepeatedSeq;
736 for (auto &ME : getMatchedEntries(Mapper)) {
737 CandidatesForRepeatedSeq.clear();
738 MachineBasicBlock::iterator StartIt = InstrList[ME.StartIdx];
739 MachineBasicBlock::iterator EndIt = InstrList[ME.EndIdx];
740 auto Length = ME.EndIdx - ME.StartIdx + 1;
741 MachineBasicBlock *MBB = StartIt->getParent();
742 CandidatesForRepeatedSeq.emplace_back(args&: ME.StartIdx, args&: Length, args&: StartIt, args&: EndIt,
743 args&: MBB, args: FunctionList.size(),
744 args&: MBBFlagsMap[MBB]);
745 const TargetInstrInfo *TII =
746 MBB->getParent()->getSubtarget().getInstrInfo();
747 unsigned MinRepeats = 1;
748 std::optional<std::unique_ptr<OutlinedFunction>> OF =
749 TII->getOutliningCandidateInfo(MMI: *MMI, RepeatedSequenceLocs&: CandidatesForRepeatedSeq,
750 MinRepeats);
751 if (!OF.has_value() || OF.value()->Candidates.empty())
752 continue;
753 // We create a global candidate for each match.
754 assert(OF.value()->Candidates.size() == MinRepeats);
755 FunctionList.emplace_back(args: std::make_unique<GlobalOutlinedFunction>(
756 args: std::move(OF.value()), args&: ME.Count));
757 }
758}
759
760void MachineOutliner::findCandidates(
761 InstructionMapper &Mapper,
762 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
763 FunctionList.clear();
764 SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);
765
766 // First, find all of the repeated substrings in the tree of minimum length
767 // 2.
768 std::vector<Candidate> CandidatesForRepeatedSeq;
769 LLVM_DEBUG(dbgs() << "*** Discarding overlapping candidates *** \n");
770 LLVM_DEBUG(
771 dbgs() << "Searching for overlaps in all repeated sequences...\n");
772 for (SuffixTree::RepeatedSubstring &RS : ST) {
773 CandidatesForRepeatedSeq.clear();
774 unsigned StringLen = RS.Length;
775 LLVM_DEBUG(dbgs() << " Sequence length: " << StringLen << "\n");
776 // Debug code to keep track of how many candidates we removed.
777#ifndef NDEBUG
778 unsigned NumDiscarded = 0;
779 unsigned NumKept = 0;
780#endif
781 // Sort the start indices so that we can efficiently check if candidates
782 // overlap with the ones we've already found for this sequence.
783 llvm::sort(C&: RS.StartIndices);
784 for (const unsigned &StartIdx : RS.StartIndices) {
785 // Trick: Discard some candidates that would be incompatible with the
786 // ones we've already found for this sequence. This will save us some
787 // work in candidate selection.
788 //
789 // If two candidates overlap, then we can't outline them both. This
790 // happens when we have candidates that look like, say
791 //
792 // AA (where each "A" is an instruction).
793 //
794 // We might have some portion of the module that looks like this:
795 // AAAAAA (6 A's)
796 //
797 // In this case, there are 5 different copies of "AA" in this range, but
798 // at most 3 can be outlined. If only outlining 3 of these is going to
799 // be unbeneficial, then we ought to not bother.
800 //
801 // Note that two things DON'T overlap when they look like this:
802 // start1...end1 .... start2...end2
803 // That is, one must either
804 // * End before the other starts
805 // * Start after the other ends
806 unsigned EndIdx = StartIdx + StringLen - 1;
807 if (!CandidatesForRepeatedSeq.empty() &&
808 StartIdx <= CandidatesForRepeatedSeq.back().getEndIdx()) {
809#ifndef NDEBUG
810 ++NumDiscarded;
811 LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx << ", "
812 << EndIdx << "]; overlaps with candidate @ ["
813 << CandidatesForRepeatedSeq.back().getStartIdx()
814 << ", " << CandidatesForRepeatedSeq.back().getEndIdx()
815 << "]\n");
816#endif
817 continue;
818 }
819 // It doesn't overlap with anything, so we can outline it.
820 // Each sequence is over [StartIt, EndIt].
821 // Save the candidate and its location.
822#ifndef NDEBUG
823 ++NumKept;
824#endif
825 MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
826 MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
827 MachineBasicBlock *MBB = StartIt->getParent();
828 CandidatesForRepeatedSeq.emplace_back(args: StartIdx, args&: StringLen, args&: StartIt, args&: EndIt,
829 args&: MBB, args: FunctionList.size(),
830 args&: Mapper.MBBFlagsMap[MBB]);
831 }
832#ifndef NDEBUG
833 LLVM_DEBUG(dbgs() << " Candidates discarded: " << NumDiscarded
834 << "\n");
835 LLVM_DEBUG(dbgs() << " Candidates kept: " << NumKept << "\n\n");
836#endif
837 unsigned MinRepeats = 2;
838
839 // We've found something we might want to outline.
840 // Create an OutlinedFunction to store it and check if it'd be beneficial
841 // to outline.
842 if (CandidatesForRepeatedSeq.size() < MinRepeats)
843 continue;
844
845 // Arbitrarily choose a TII from the first candidate.
846 // FIXME: Should getOutliningCandidateInfo move to TargetMachine?
847 const TargetInstrInfo *TII =
848 CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
849
850 std::optional<std::unique_ptr<OutlinedFunction>> OF =
851 TII->getOutliningCandidateInfo(MMI: *MMI, RepeatedSequenceLocs&: CandidatesForRepeatedSeq,
852 MinRepeats);
853
854 // If we deleted too many candidates, then there's nothing worth outlining.
855 // FIXME: This should take target-specified instruction sizes into account.
856 if (!OF.has_value() || OF.value()->Candidates.size() < MinRepeats)
857 continue;
858
859 // Is it better to outline this candidate than not?
860 if (OF.value()->getBenefit() < OutlinerBenefitThreshold) {
861 emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq,
862 OF&: *OF.value());
863 continue;
864 }
865
866 FunctionList.emplace_back(args: std::move(OF.value()));
867 }
868}
869
870void MachineOutliner::computeAndPublishHashSequence(MachineFunction &MF,
871 unsigned CandSize) {
872 // Compute the hash sequence for the outlined function.
873 SmallVector<stable_hash> OutlinedHashSequence;
874 for (auto &MBB : MF) {
875 for (auto &NewMI : MBB) {
876 stable_hash Hash = stableHashValue(MI: NewMI);
877 if (!Hash) {
878 OutlinedHashSequence.clear();
879 break;
880 }
881 OutlinedHashSequence.push_back(Elt: Hash);
882 }
883 }
884
885 // Append a unique name based on the non-empty hash sequence.
886 if (AppendContentHashToOutlinedName && !OutlinedHashSequence.empty()) {
887 auto CombinedHash = stable_hash_combine(Buffer: OutlinedHashSequence);
888 auto NewName =
889 MF.getName().str() + ".content." + std::to_string(val: CombinedHash);
890 MF.getFunction().setName(NewName);
891 }
892
893 // Publish the non-empty hash sequence to the local hash tree.
894 if (OutlinerMode == CGDataMode::Write) {
895 StableHashAttempts++;
896 if (!OutlinedHashSequence.empty())
897 LocalHashTree->insert(SequencePair: {OutlinedHashSequence, CandSize});
898 else
899 StableHashDropped++;
900 }
901}
902
903MachineFunction *MachineOutliner::createOutlinedFunction(
904 Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) {
905
906 // Create the function name. This should be unique.
907 // FIXME: We should have a better naming scheme. This should be stable,
908 // regardless of changes to the outliner's cost model/traversal order.
909 std::string FunctionName = "OUTLINED_FUNCTION_";
910 if (OutlineRepeatedNum > 0)
911 FunctionName += std::to_string(val: OutlineRepeatedNum + 1) + "_";
912 FunctionName += std::to_string(val: Name);
913 LLVM_DEBUG(dbgs() << "NEW FUNCTION: " << FunctionName << "\n");
914
915 // Create the function using an IR-level function.
916 LLVMContext &C = M.getContext();
917 Function *F = Function::Create(Ty: FunctionType::get(Result: Type::getVoidTy(C), isVarArg: false),
918 Linkage: Function::ExternalLinkage, N: FunctionName, M);
919
920 // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
921 // which gives us better results when we outline from linkonceodr functions.
922 F->setLinkage(GlobalValue::InternalLinkage);
923 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
924
925 // Set optsize/minsize, so we don't insert padding between outlined
926 // functions.
927 F->addFnAttr(Kind: Attribute::OptimizeForSize);
928 F->addFnAttr(Kind: Attribute::MinSize);
929
930 Candidate &FirstCand = OF.Candidates.front();
931 const TargetInstrInfo &TII =
932 *FirstCand.getMF()->getSubtarget().getInstrInfo();
933
934 TII.mergeOutliningCandidateAttributes(F&: *F, Candidates&: OF.Candidates);
935
936 // Set uwtable, so we generate eh_frame.
937 UWTableKind UW = std::accumulate(
938 first: OF.Candidates.cbegin(), last: OF.Candidates.cend(), init: UWTableKind::None,
939 binary_op: [](UWTableKind K, const outliner::Candidate &C) {
940 return std::max(a: K, b: C.getMF()->getFunction().getUWTableKind());
941 });
942 F->setUWTableKind(UW);
943
944 BasicBlock *EntryBB = BasicBlock::Create(Context&: C, Name: "entry", Parent: F);
945 IRBuilder<> Builder(EntryBB);
946 Builder.CreateRetVoid();
947
948 MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
949 MachineFunction &MF = MMI.getOrCreateMachineFunction(F&: *F);
950 MF.setIsOutlined(true);
951 MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
952
953 // Insert the new function into the module.
954 MF.insert(MBBI: MF.begin(), MBB: &MBB);
955
956 MachineFunction *OriginalMF = FirstCand.front().getMF();
957 const std::vector<MCCFIInstruction> &Instrs =
958 OriginalMF->getFrameInstructions();
959 for (auto &MI : FirstCand) {
960 if (MI.isDebugInstr())
961 continue;
962
963 // Don't keep debug information for outlined instructions.
964 auto DL = DebugLoc();
965 if (MI.isCFIInstruction()) {
966 unsigned CFIIndex = MI.getOperand(i: 0).getCFIIndex();
967 MCCFIInstruction CFI = Instrs[CFIIndex];
968 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
969 .addCFIIndex(CFIIndex: MF.addFrameInst(Inst: CFI));
970 } else {
971 MachineInstr &NewMI = TII.duplicate(MBB, InsertBefore: MBB.end(), Orig: MI);
972 NewMI.dropMemRefs(MF);
973 NewMI.setDebugLoc(DL);
974 }
975 }
976
977 if (OutlinerMode != CGDataMode::None)
978 computeAndPublishHashSequence(MF, CandSize: OF.Candidates.size());
979
980 // Set normal properties for a late MachineFunction.
981 MF.getProperties().resetIsSSA();
982 MF.getProperties().setNoPHIs();
983 MF.getProperties().setNoVRegs();
984 MF.getProperties().setTracksLiveness();
985 MF.getRegInfo().freezeReservedRegs();
986
987 // Compute live-in set for outlined fn
988 const MachineRegisterInfo &MRI = MF.getRegInfo();
989 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
990 LivePhysRegs LiveIns(TRI);
991 for (auto &Cand : OF.Candidates) {
992 // Figure out live-ins at the first instruction.
993 MachineBasicBlock &OutlineBB = *Cand.front().getParent();
994 LivePhysRegs CandLiveIns(TRI);
995 CandLiveIns.addLiveOuts(MBB: OutlineBB);
996 for (const MachineInstr &MI :
997 reverse(C: make_range(x: Cand.begin(), y: OutlineBB.end())))
998 CandLiveIns.stepBackward(MI);
999
1000 // The live-in set for the outlined function is the union of the live-ins
1001 // from all the outlining points.
1002 for (MCPhysReg Reg : CandLiveIns)
1003 LiveIns.addReg(Reg);
1004 }
1005 addLiveIns(MBB, LiveRegs: LiveIns);
1006
1007 TII.buildOutlinedFrame(MBB, MF, OF);
1008
1009 // If there's a DISubprogram associated with this outlined function, then
1010 // emit debug info for the outlined function.
1011 if (DISubprogram *SP = getSubprogramOrNull(OF)) {
1012 // We have a DISubprogram. Get its DICompileUnit.
1013 DICompileUnit *CU = SP->getUnit();
1014 DIBuilder DB(M, true, CU);
1015 DIFile *Unit = SP->getFile();
1016 Mangler Mg;
1017 // Get the mangled name of the function for the linkage name.
1018 std::string Dummy;
1019 raw_string_ostream MangledNameStream(Dummy);
1020 Mg.getNameWithPrefix(OS&: MangledNameStream, GV: F, CannotUsePrivateLabel: false);
1021
1022 DISubprogram *OutlinedSP = DB.createFunction(
1023 Scope: Unit /* Context */, Name: F->getName(), LinkageName: StringRef(Dummy), File: Unit /* File */,
1024 LineNo: 0 /* Line 0 is reserved for compiler-generated code. */,
1025 Ty: DB.createSubroutineType(ParameterTypes: DB.getOrCreateTypeArray(Elements: {})), /* void type */
1026 ScopeLine: 0, /* Line 0 is reserved for compiler-generated code. */
1027 Flags: DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
1028 /* Outlined code is optimized code by definition. */
1029 SPFlags: DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
1030
1031 // Attach subprogram to the function.
1032 F->setSubprogram(OutlinedSP);
1033 // We're done with the DIBuilder.
1034 DB.finalize();
1035 }
1036
1037 return &MF;
1038}
1039
1040bool MachineOutliner::outline(
1041 Module &M, std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
1042 InstructionMapper &Mapper, unsigned &OutlinedFunctionNum) {
1043 LLVM_DEBUG(dbgs() << "*** Outlining ***\n");
1044 LLVM_DEBUG(dbgs() << "NUMBER OF POTENTIAL FUNCTIONS: " << FunctionList.size()
1045 << "\n");
1046 bool OutlinedSomething = false;
1047
1048 // Sort by priority where priority := getNotOutlinedCost / getOutliningCost.
1049 // The function with highest priority should be outlined first.
1050 stable_sort(Range&: FunctionList, C: [](const std::unique_ptr<OutlinedFunction> &LHS,
1051 const std::unique_ptr<OutlinedFunction> &RHS) {
1052 return LHS->getNotOutlinedCost() * RHS->getOutliningCost() >
1053 RHS->getNotOutlinedCost() * LHS->getOutliningCost();
1054 });
1055
1056 // Walk over each function, outlining them as we go along. Functions are
1057 // outlined greedily, based off the sort above.
1058 auto *UnsignedVecBegin = Mapper.UnsignedVec.begin();
1059 LLVM_DEBUG(dbgs() << "WALKING FUNCTION LIST\n");
1060 for (auto &OF : FunctionList) {
1061#ifndef NDEBUG
1062 auto NumCandidatesBefore = OF->Candidates.size();
1063#endif
1064 // If we outlined something that overlapped with a candidate in a previous
1065 // step, then we can't outline from it.
1066 erase_if(C&: OF->Candidates, P: [&UnsignedVecBegin](Candidate &C) {
1067 return std::any_of(first: UnsignedVecBegin + C.getStartIdx(),
1068 last: UnsignedVecBegin + C.getEndIdx() + 1, pred: [](unsigned I) {
1069 return I == static_cast<unsigned>(-1);
1070 });
1071 });
1072
1073#ifndef NDEBUG
1074 auto NumCandidatesAfter = OF->Candidates.size();
1075 LLVM_DEBUG(dbgs() << "PRUNED: " << NumCandidatesBefore - NumCandidatesAfter
1076 << "/" << NumCandidatesBefore << " candidates\n");
1077#endif
1078
1079 // If we made it unbeneficial to outline this function, skip it.
1080 if (OF->getBenefit() < OutlinerBenefitThreshold) {
1081 LLVM_DEBUG(dbgs() << "SKIP: Expected benefit (" << OF->getBenefit()
1082 << " B) < threshold (" << OutlinerBenefitThreshold
1083 << " B)\n");
1084 continue;
1085 }
1086
1087 LLVM_DEBUG(dbgs() << "OUTLINE: Expected benefit (" << OF->getBenefit()
1088 << " B) > threshold (" << OutlinerBenefitThreshold
1089 << " B)\n");
1090
1091 // Remove all Linker Optimization Hints from the candidates.
1092 // TODO: The intersection of the LOHs from all candidates should be legal in
1093 // the outlined function.
1094 SmallPtrSet<MachineInstr *, 2> MIs;
1095 for (Candidate &C : OF->Candidates) {
1096 for (MachineInstr &MI : C)
1097 MIs.insert(Ptr: &MI);
1098 NumRemovedLOHs += TM->clearLinkerOptimizationHints(MIs);
1099 MIs.clear();
1100 }
1101
1102 // It's beneficial. Create the function and outline its sequence's
1103 // occurrences.
1104 OF->MF = createOutlinedFunction(M, OF&: *OF, Mapper, Name: OutlinedFunctionNum);
1105 emitOutlinedFunctionRemark(OF&: *OF);
1106 FunctionsCreated++;
1107 OutlinedFunctionNum++; // Created a function, move to the next name.
1108 MachineFunction *MF = OF->MF;
1109 const TargetSubtargetInfo &STI = MF->getSubtarget();
1110 const TargetInstrInfo &TII = *STI.getInstrInfo();
1111
1112 // Replace occurrences of the sequence with calls to the new function.
1113 LLVM_DEBUG(dbgs() << "CREATE OUTLINED CALLS\n");
1114 for (Candidate &C : OF->Candidates) {
1115 MachineBasicBlock &MBB = *C.getMBB();
1116 MachineBasicBlock::iterator StartIt = C.begin();
1117 MachineBasicBlock::iterator EndIt = std::prev(x: C.end());
1118
1119 // Insert the call.
1120 auto CallInst = TII.insertOutlinedCall(M, MBB, It&: StartIt, MF&: *MF, C);
1121// Insert the call.
1122#ifndef NDEBUG
1123 auto MBBBeingOutlinedFromName =
1124 MBB.getName().empty() ? "<unknown>" : MBB.getName().str();
1125 auto MFBeingOutlinedFromName = MBB.getParent()->getName().empty()
1126 ? "<unknown>"
1127 : MBB.getParent()->getName().str();
1128 LLVM_DEBUG(dbgs() << " CALL: " << MF->getName() << " in "
1129 << MFBeingOutlinedFromName << ":"
1130 << MBBBeingOutlinedFromName << "\n");
1131 LLVM_DEBUG(dbgs() << " .. " << *CallInst);
1132#endif
1133
1134 // If the caller tracks liveness, then we need to make sure that
1135 // anything we outline doesn't break liveness assumptions. The outlined
1136 // functions themselves currently don't track liveness, but we should
1137 // make sure that the ranges we yank things out of aren't wrong.
1138 if (MBB.getParent()->getProperties().hasTracksLiveness()) {
1139 // The following code is to add implicit def operands to the call
1140 // instruction. It also updates call site information for moved
1141 // code.
1142 SmallSet<Register, 2> UseRegs, DefRegs;
1143 // Copy over the defs in the outlined range.
1144 // First inst in outlined range <-- Anything that's defined in this
1145 // ... .. range has to be added as an
1146 // implicit Last inst in outlined range <-- def to the call
1147 // instruction. Also remove call site information for outlined block
1148 // of code. The exposed uses need to be copied in the outlined range.
1149 for (MachineBasicBlock::reverse_iterator
1150 Iter = EndIt.getReverse(),
1151 Last = std::next(x: CallInst.getReverse());
1152 Iter != Last; Iter++) {
1153 MachineInstr *MI = &*Iter;
1154 SmallSet<Register, 2> InstrUseRegs;
1155 for (MachineOperand &MOP : MI->operands()) {
1156 // Skip over anything that isn't a register.
1157 if (!MOP.isReg())
1158 continue;
1159
1160 if (MOP.isDef()) {
1161 // Introduce DefRegs set to skip the redundant register.
1162 DefRegs.insert(V: MOP.getReg());
1163 if (UseRegs.count(V: MOP.getReg()) &&
1164 !InstrUseRegs.count(V: MOP.getReg()))
1165 // Since the regiester is modeled as defined,
1166 // it is not necessary to be put in use register set.
1167 UseRegs.erase(V: MOP.getReg());
1168 } else if (!MOP.isUndef()) {
1169 // Any register which is not undefined should
1170 // be put in the use register set.
1171 UseRegs.insert(V: MOP.getReg());
1172 InstrUseRegs.insert(V: MOP.getReg());
1173 }
1174 }
1175 if (MI->isCandidateForAdditionalCallInfo())
1176 MI->getMF()->eraseAdditionalCallInfo(MI);
1177 }
1178
1179 for (const Register &I : DefRegs)
1180 // If it's a def, add it to the call instruction.
1181 CallInst->addOperand(
1182 Op: MachineOperand::CreateReg(Reg: I, isDef: true, /* isDef = true */
1183 isImp: true /* isImp = true */));
1184
1185 for (const Register &I : UseRegs)
1186 // If it's a exposed use, add it to the call instruction.
1187 CallInst->addOperand(
1188 Op: MachineOperand::CreateReg(Reg: I, isDef: false, /* isDef = false */
1189 isImp: true /* isImp = true */));
1190 }
1191
1192 // Erase from the point after where the call was inserted up to, and
1193 // including, the final instruction in the sequence.
1194 // Erase needs one past the end, so we need std::next there too.
1195 MBB.erase(I: std::next(x: StartIt), E: std::next(x: EndIt));
1196
1197 // Keep track of what we removed by marking them all as -1.
1198 for (unsigned &I : make_range(x: UnsignedVecBegin + C.getStartIdx(),
1199 y: UnsignedVecBegin + C.getEndIdx() + 1))
1200 I = static_cast<unsigned>(-1);
1201 OutlinedSomething = true;
1202
1203 // Statistics.
1204 NumOutlined++;
1205 }
1206 }
1207
1208 LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n");
1209 return OutlinedSomething;
1210}
1211
1212static bool allowPGOOutlining(RunOutliner RunOutlinerMode,
1213 const ProfileSummaryInfo *PSI,
1214 const BlockFrequencyInfo *BFI,
1215 MachineBasicBlock &MBB) {
1216 if (RunOutlinerMode != RunOutliner::OptimisticPGO &&
1217 RunOutlinerMode != RunOutliner::ConservativePGO)
1218 return true;
1219 auto *MF = MBB.getParent();
1220 if (MF->getFunction().hasFnAttribute(Kind: Attribute::Cold)) {
1221 ++NumPGOAllowedCold;
1222 return true;
1223 }
1224
1225 auto *BB = MBB.getBasicBlock();
1226 if (BB && PSI && BFI)
1227 if (auto Count = BFI->getBlockProfileCount(BB))
1228 return *Count <= PSI->getOrCompColdCountThreshold();
1229
1230 if (RunOutlinerMode == RunOutliner::OptimisticPGO) {
1231 auto *TII = MF->getSubtarget().getInstrInfo();
1232 if (TII->shouldOutlineFromFunctionByDefault(MF&: *MF)) {
1233 // Profile data is unavailable, but we optimistically allow outlining
1234 ++NumPGOOptimisticOutlined;
1235 return true;
1236 }
1237 return false;
1238 }
1239 assert(RunOutlinerMode == RunOutliner::ConservativePGO);
1240 // Profile data is unavailable, so we conservatively block outlining
1241 ++NumPGOConservativeBlockedOutlined;
1242 return false;
1243}
1244
1245void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
1246 // Build instruction mappings for each function in the module. Start by
1247 // iterating over each Function in M.
1248 LLVM_DEBUG(dbgs() << "*** Populating mapper ***\n");
1249 bool EnableProfileGuidedOutlining =
1250 RunOutlinerMode == RunOutliner::OptimisticPGO ||
1251 RunOutlinerMode == RunOutliner::ConservativePGO;
1252 ProfileSummaryInfo *PSI = nullptr;
1253 if (EnableProfileGuidedOutlining)
1254 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1255 for (Function &F : M) {
1256 LLVM_DEBUG(dbgs() << "MAPPING FUNCTION: " << F.getName() << "\n");
1257
1258 if (F.hasFnAttribute(Kind: "nooutline")) {
1259 LLVM_DEBUG(dbgs() << "SKIP: Function has nooutline attribute\n");
1260 continue;
1261 }
1262
1263 // There's something in F. Check if it has a MachineFunction associated with
1264 // it.
1265 MachineFunction *MF = MMI->getMachineFunction(F);
1266
1267 // If it doesn't, then there's nothing to outline from. Move to the next
1268 // Function.
1269 if (!MF) {
1270 LLVM_DEBUG(dbgs() << "SKIP: Function does not have a MachineFunction\n");
1271 continue;
1272 }
1273
1274 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1275 BlockFrequencyInfo *BFI = nullptr;
1276 if (EnableProfileGuidedOutlining && F.hasProfileData())
1277 BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1278 if (RunOutlinerMode == RunOutliner::TargetDefault &&
1279 !TII->shouldOutlineFromFunctionByDefault(MF&: *MF)) {
1280 LLVM_DEBUG(dbgs() << "SKIP: Target does not want to outline from "
1281 "function by default\n");
1282 continue;
1283 }
1284
1285 // We have a MachineFunction. Ask the target if it's suitable for outlining.
1286 // If it isn't, then move on to the next Function in the module.
1287 if (!TII->isFunctionSafeToOutlineFrom(MF&: *MF, OutlineFromLinkOnceODRs)) {
1288 LLVM_DEBUG(dbgs() << "SKIP: " << MF->getName()
1289 << ": unsafe to outline from\n");
1290 continue;
1291 }
1292
1293 // We have a function suitable for outlining. Iterate over every
1294 // MachineBasicBlock in MF and try to map its instructions to a list of
1295 // unsigned integers.
1296 const unsigned MinMBBSize = 2;
1297
1298 for (MachineBasicBlock &MBB : *MF) {
1299 LLVM_DEBUG(dbgs() << " MAPPING MBB: '" << MBB.getName() << "'\n");
1300 // If there isn't anything in MBB, then there's no point in outlining from
1301 // it.
1302 // If there are fewer than 2 instructions in the MBB, then it can't ever
1303 // contain something worth outlining.
1304 // FIXME: This should be based off of the maximum size in B of an outlined
1305 // call versus the size in B of the MBB.
1306 if (MBB.size() < MinMBBSize) {
1307 LLVM_DEBUG(dbgs() << " SKIP: MBB size less than minimum size of "
1308 << MinMBBSize << "\n");
1309 continue;
1310 }
1311
1312 // Check if MBB could be the target of an indirect branch. If it is, then
1313 // we don't want to outline from it.
1314 if (MBB.hasAddressTaken()) {
1315 LLVM_DEBUG(dbgs() << " SKIP: MBB's address is taken\n");
1316 continue;
1317 }
1318
1319 if (!allowPGOOutlining(RunOutlinerMode, PSI, BFI, MBB)) {
1320 ++NumPGOBlockedOutlined;
1321 continue;
1322 }
1323
1324 // MBB is suitable for outlining. Map it to a list of unsigneds.
1325 Mapper.convertToUnsignedVec(MBB, TII: *TII);
1326 }
1327 }
1328 // Statistics.
1329 UnsignedVecSize = Mapper.UnsignedVec.size();
1330}
1331
1332void MachineOutliner::initSizeRemarkInfo(
1333 const Module &M, StringMap<unsigned> &FunctionToInstrCount) {
1334 // Collect instruction counts for every function. We'll use this to emit
1335 // per-function size remarks later.
1336 for (const Function &F : M) {
1337 MachineFunction *MF = MMI->getMachineFunction(F);
1338
1339 // We only care about MI counts here. If there's no MachineFunction at this
1340 // point, then there won't be after the outliner runs, so let's move on.
1341 if (!MF)
1342 continue;
1343 FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
1344 }
1345}
1346
1347void MachineOutliner::emitInstrCountChangedRemark(
1348 const Module &M, const StringMap<unsigned> &FunctionToInstrCount) {
1349 // Iterate over each function in the module and emit remarks.
1350 // Note that we won't miss anything by doing this, because the outliner never
1351 // deletes functions.
1352 for (const Function &F : M) {
1353 MachineFunction *MF = MMI->getMachineFunction(F);
1354
1355 // The outliner never deletes functions. If we don't have a MF here, then we
1356 // didn't have one prior to outlining either.
1357 if (!MF)
1358 continue;
1359
1360 std::string Fname = std::string(F.getName());
1361 unsigned FnCountAfter = MF->getInstructionCount();
1362 unsigned FnCountBefore = 0;
1363
1364 // Check if the function was recorded before.
1365 auto It = FunctionToInstrCount.find(Key: Fname);
1366
1367 // Did we have a previously-recorded size? If yes, then set FnCountBefore
1368 // to that.
1369 if (It != FunctionToInstrCount.end())
1370 FnCountBefore = It->second;
1371
1372 // Compute the delta and emit a remark if there was a change.
1373 int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
1374 static_cast<int64_t>(FnCountBefore);
1375 if (FnDelta == 0)
1376 continue;
1377
1378 MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
1379 MORE.emit(RemarkBuilder: [&]() {
1380 MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
1381 DiagnosticLocation(), &MF->front());
1382 R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
1383 << ": Function: "
1384 << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
1385 << ": MI instruction count changed from "
1386 << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
1387 FnCountBefore)
1388 << " to "
1389 << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
1390 FnCountAfter)
1391 << "; Delta: "
1392 << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
1393 return R;
1394 });
1395 }
1396}
1397
1398void MachineOutliner::initializeOutlinerMode(const Module &M) {
1399 if (DisableGlobalOutlining)
1400 return;
1401
1402 if (auto *IndexWrapperPass =
1403 getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) {
1404 auto *TheIndex = IndexWrapperPass->getIndex();
1405 // (Full)LTO module does not have functions added to the index.
1406 // In this case, we run the outliner without using codegen data as usual.
1407 if (TheIndex && !TheIndex->hasExportedFunctions(M))
1408 return;
1409 }
1410
1411 // When codegen data write is enabled, we want to write the local outlined
1412 // hash tree to the custom section, `__llvm_outline`.
1413 // When the outlined hash tree is available from the previous codegen data,
1414 // we want to read it to optimistically create global outlining candidates.
1415 if (cgdata::emitCGData()) {
1416 OutlinerMode = CGDataMode::Write;
1417 // Create a local outlined hash tree to be published.
1418 LocalHashTree = std::make_unique<OutlinedHashTree>();
1419 // We don't need to read the outlined hash tree from the previous codegen
1420 } else if (cgdata::hasOutlinedHashTree())
1421 OutlinerMode = CGDataMode::Read;
1422}
1423
1424void MachineOutliner::emitOutlinedHashTree(Module &M) {
1425 assert(LocalHashTree);
1426 if (!LocalHashTree->empty()) {
1427 LLVM_DEBUG({
1428 dbgs() << "Emit outlined hash tree. Size: " << LocalHashTree->size()
1429 << "\n";
1430 });
1431 SmallVector<char> Buf;
1432 raw_svector_ostream OS(Buf);
1433
1434 OutlinedHashTreeRecord HTR(std::move(LocalHashTree));
1435 HTR.serialize(OS);
1436
1437 llvm::StringRef Data(Buf.data(), Buf.size());
1438 std::unique_ptr<MemoryBuffer> Buffer =
1439 MemoryBuffer::getMemBuffer(InputData: Data, BufferName: "in-memory outlined hash tree", RequiresNullTerminator: false);
1440
1441 Triple TT(M.getTargetTriple());
1442 embedBufferInModule(
1443 M, Buf: *Buffer,
1444 SectionName: getCodeGenDataSectionName(CGSK: CG_outline, OF: TT.getObjectFormat()));
1445 }
1446}
1447
1448bool MachineOutliner::runOnModule(Module &M) {
1449 if (skipModule(M))
1450 return false;
1451
1452 // Check if there's anything in the module. If it's empty, then there's
1453 // nothing to outline.
1454 if (M.empty())
1455 return false;
1456
1457 // Initialize the outliner mode.
1458 initializeOutlinerMode(M);
1459
1460 MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
1461 TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1462
1463 // Number to append to the current outlined function.
1464 unsigned OutlinedFunctionNum = 0;
1465
1466 OutlineRepeatedNum = 0;
1467 if (!doOutline(M, OutlinedFunctionNum))
1468 return false;
1469
1470 for (unsigned I = 0; I < OutlinerReruns; ++I) {
1471 OutlinedFunctionNum = 0;
1472 OutlineRepeatedNum++;
1473 if (!doOutline(M, OutlinedFunctionNum)) {
1474 LLVM_DEBUG({
1475 dbgs() << "Did not outline on iteration " << I + 2 << " out of "
1476 << OutlinerReruns + 1 << "\n";
1477 });
1478 break;
1479 }
1480 }
1481
1482 if (OutlinerMode == CGDataMode::Write)
1483 emitOutlinedHashTree(M);
1484
1485 return true;
1486}
1487
1488bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
1489 // If the user passed -enable-machine-outliner=always or
1490 // -enable-machine-outliner, the pass will run on all functions in the module.
1491 // Otherwise, if the target supports default outlining, it will run on all
1492 // functions deemed by the target to be worth outlining from by default. Tell
1493 // the user how the outliner is running.
1494 LLVM_DEBUG({
1495 dbgs() << "Machine Outliner: Running on ";
1496 switch (RunOutlinerMode) {
1497 case RunOutliner::AlwaysOutline:
1498 dbgs() << "all functions";
1499 break;
1500 case RunOutliner::OptimisticPGO:
1501 dbgs() << "optimistically cold functions";
1502 break;
1503 case RunOutliner::ConservativePGO:
1504 dbgs() << "conservatively cold functions";
1505 break;
1506 case RunOutliner::TargetDefault:
1507 dbgs() << "target-default functions";
1508 break;
1509 case RunOutliner::NeverOutline:
1510 llvm_unreachable("should not outline");
1511 }
1512 dbgs() << "\n";
1513 });
1514
1515 // If the user specifies that they want to outline from linkonceodrs, set
1516 // it here.
1517 OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
1518 InstructionMapper Mapper(*MMI);
1519
1520 // Prepare instruction mappings for the suffix tree.
1521 populateMapper(Mapper, M);
1522 std::vector<std::unique_ptr<OutlinedFunction>> FunctionList;
1523
1524 // Find all of the outlining candidates.
1525 if (OutlinerMode == CGDataMode::Read)
1526 findGlobalCandidates(Mapper, FunctionList);
1527 else
1528 findCandidates(Mapper, FunctionList);
1529
1530 // If we've requested size remarks, then collect the MI counts of every
1531 // function before outlining, and the MI counts after outlining.
1532 // FIXME: This shouldn't be in the outliner at all; it should ultimately be
1533 // the pass manager's responsibility.
1534 // This could pretty easily be placed in outline instead, but because we
1535 // really ultimately *don't* want this here, it's done like this for now
1536 // instead.
1537
1538 // Check if we want size remarks.
1539 bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
1540 StringMap<unsigned> FunctionToInstrCount;
1541 if (ShouldEmitSizeRemarks)
1542 initSizeRemarkInfo(M, FunctionToInstrCount);
1543
1544 // Outline each of the candidates and return true if something was outlined.
1545 bool OutlinedSomething =
1546 outline(M, FunctionList, Mapper, OutlinedFunctionNum);
1547
1548 // If we outlined something, we definitely changed the MI count of the
1549 // module. If we've asked for size remarks, then output them.
1550 // FIXME: This should be in the pass manager.
1551 if (ShouldEmitSizeRemarks && OutlinedSomething)
1552 emitInstrCountChangedRemark(M, FunctionToInstrCount);
1553
1554 LLVM_DEBUG({
1555 if (!OutlinedSomething)
1556 dbgs() << "Stopped outlining at iteration " << OutlineRepeatedNum
1557 << " because no changes were found.\n";
1558 });
1559
1560 return OutlinedSomething;
1561}
1562