1//===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Replaces repeated sequences of instructions with function calls.
11///
12/// This works by placing every instruction from every basic block in a
13/// suffix tree, and repeatedly querying that tree for repeated sequences of
14/// instructions. If a sequence of instructions appears often, then it ought
15/// to be beneficial to pull out into a function.
16///
17/// The MachineOutliner communicates with a given target using hooks defined in
18/// TargetInstrInfo.h. The target supplies the outliner with information on how
19/// a specific sequence of instructions should be outlined. This information
20/// is used to deduce the number of instructions necessary to
21///
22/// * Create an outlined function
23/// * Call that outlined function
24///
25/// Targets must implement
26/// * getOutliningCandidateInfo
27/// * buildOutlinedFrame
28/// * insertOutlinedCall
29/// * isFunctionSafeToOutlineFrom
30///
31/// in order to make use of the MachineOutliner.
32///
33/// This was originally presented at the 2016 LLVM Developers' Meeting in the
34/// talk "Reducing Code Size Using Outlining". For a high-level overview of
35/// how this pass works, the talk is available on YouTube at
36///
37/// https://www.youtube.com/watch?v=yorld-WSOeU
38///
39/// The slides for the talk are available at
40///
41/// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
42///
43/// The talk provides an overview of how the outliner finds candidates and
44/// ultimately outlines them. It describes how the main data structure for this
45/// pass, the suffix tree, is queried and purged for candidates. It also gives
46/// a simplified suffix tree construction algorithm for suffix trees based off
47/// of the algorithm actually used here, Ukkonen's algorithm.
48///
49/// For the original RFC for this pass, please see
50///
51/// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
52///
53/// For more information on the suffix tree data structure, please see
54/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
55///
56//===----------------------------------------------------------------------===//
57#include "llvm/CodeGen/MachineOutliner.h"
58#include "llvm/ADT/DenseMap.h"
59#include "llvm/ADT/SmallSet.h"
60#include "llvm/ADT/Statistic.h"
61#include "llvm/ADT/Twine.h"
62#include "llvm/Analysis/BlockFrequencyInfo.h"
63#include "llvm/Analysis/ModuleSummaryAnalysis.h"
64#include "llvm/Analysis/OptimizationRemarkEmitter.h"
65#include "llvm/Analysis/ProfileSummaryInfo.h"
66#include "llvm/CGData/CodeGenDataReader.h"
67#include "llvm/CodeGen/LivePhysRegs.h"
68#include "llvm/CodeGen/MachineInstrBundle.h"
69#include "llvm/CodeGen/MachineModuleInfo.h"
70#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
71#include "llvm/CodeGen/Passes.h"
72#include "llvm/CodeGen/TargetInstrInfo.h"
73#include "llvm/CodeGen/TargetPassConfig.h"
74#include "llvm/CodeGen/TargetSubtargetInfo.h"
75#include "llvm/IR/DIBuilder.h"
76#include "llvm/IR/IRBuilder.h"
77#include "llvm/IR/Mangler.h"
78#include "llvm/IR/Module.h"
79#include "llvm/InitializePasses.h"
80#include "llvm/Support/CommandLine.h"
81#include "llvm/Support/Debug.h"
82#include "llvm/Support/SuffixTree.h"
83#include "llvm/Support/raw_ostream.h"
84#include "llvm/Target/TargetMachine.h"
85#include "llvm/Transforms/Utils/ModuleUtils.h"
86#include <tuple>
87#include <vector>
88
89#define DEBUG_TYPE "machine-outliner"
90
91using namespace llvm;
92using namespace ore;
93using namespace outliner;
94
95// Statistics for outlined functions.
96STATISTIC(NumOutlined, "Number of candidates outlined");
97STATISTIC(FunctionsCreated, "Number of functions created");
98
99// Statistics for instruction mapping.
100STATISTIC(NumLegalInUnsignedVec, "Outlinable instructions mapped");
101STATISTIC(NumIllegalInUnsignedVec,
102 "Unoutlinable instructions mapped + number of sentinel values");
103STATISTIC(NumSentinels, "Sentinel values inserted during mapping");
104STATISTIC(NumInvisible,
105 "Invisible instructions skipped during mapping");
106STATISTIC(UnsignedVecSize,
107 "Total number of instructions mapped and saved to mapping vector");
108STATISTIC(StableHashAttempts,
109 "Count of hashing attempts made for outlined functions");
110STATISTIC(StableHashDropped,
111 "Count of unsuccessful hashing attempts for outlined functions");
112STATISTIC(NumRemovedLOHs, "Total number of Linker Optimization Hints removed");
113STATISTIC(NumPGOBlockedOutlined,
114 "Number of times outlining was blocked by PGO");
115STATISTIC(NumPGOAllowedCold,
116 "Number of times outlining was allowed from cold functions");
117STATISTIC(NumPGOConservativeBlockedOutlined,
118 "Number of times outlining was blocked conservatively when profile "
119 "counts were missing");
120STATISTIC(NumPGOOptimisticOutlined,
121 "Number of times outlining was allowed optimistically when profile "
122 "counts were missing");
123
124// Set to true if the user wants the outliner to run on linkonceodr linkage
125// functions. This is false by default because the linker can dedupe linkonceodr
126// functions. Since the outliner is confined to a single module (modulo LTO),
127// this is off by default. It should, however, be the default behaviour in
128// LTO.
129static cl::opt<bool> EnableLinkOnceODROutlining(
130 "enable-linkonceodr-outlining", cl::Hidden,
131 cl::desc("Enable the machine outliner on linkonceodr functions"),
132 cl::init(Val: false));
133
134/// Number of times to re-run the outliner. This is not the total number of runs
135/// as the outliner will run at least one time. The default value is set to 0,
136/// meaning the outliner will run one time and rerun zero times after that.
137static cl::opt<unsigned> OutlinerReruns(
138 "machine-outliner-reruns", cl::init(Val: 0), cl::Hidden,
139 cl::desc(
140 "Number of times to rerun the outliner after the initial outline"));
141
142static cl::opt<unsigned> OutlinerBenefitThreshold(
143 "outliner-benefit-threshold", cl::init(Val: 1), cl::Hidden,
144 cl::desc(
145 "The minimum size in bytes before an outlining candidate is accepted"));
146
147static cl::opt<bool> OutlinerLeafDescendants(
148 "outliner-leaf-descendants", cl::init(Val: true), cl::Hidden,
149 cl::desc("Consider all leaf descendants of internal nodes of the suffix "
150 "tree as candidates for outlining (if false, only leaf children "
151 "are considered)"));
152
153static cl::opt<bool>
154 DisableGlobalOutlining("disable-global-outlining", cl::Hidden,
155 cl::desc("Disable global outlining only by ignoring "
156 "the codegen data generation or use"),
157 cl::init(Val: false));
158
159static cl::opt<bool> AppendContentHashToOutlinedName(
160 "append-content-hash-outlined-name", cl::Hidden,
161 cl::desc("This appends the content hash to the globally outlined function "
162 "name. It's beneficial for enhancing the precision of the stable "
163 "hash and for ordering the outlined functions."),
164 cl::init(Val: true));
165
166namespace {
167
168/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
169struct InstructionMapper {
170 const MachineModuleInfo &MMI;
171
172 /// The next available integer to assign to a \p MachineInstr that
173 /// cannot be outlined.
174 ///
175 /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
176 unsigned IllegalInstrNumber = -3;
177
178 /// The next available integer to assign to a \p MachineInstr that can
179 /// be outlined.
180 unsigned LegalInstrNumber = 0;
181
182 /// Correspondence from \p MachineInstrs to unsigned integers.
183 DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
184 InstructionIntegerMap;
185
186 /// Correspondence between \p MachineBasicBlocks and target-defined flags.
187 DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap;
188
189 /// The vector of unsigned integers that the module is mapped to.
190 SmallVector<unsigned> UnsignedVec;
191
192 /// Stores the location of the instruction associated with the integer
193 /// at index i in \p UnsignedVec for each index i.
194 SmallVector<MachineBasicBlock::iterator> InstrList;
195
196 // Set if we added an illegal number in the previous step.
197 // Since each illegal number is unique, we only need one of them between
198 // each range of legal numbers. This lets us make sure we don't add more
199 // than one illegal number per range.
200 bool AddedIllegalLastTime = false;
201
202 /// Maps \p *It to a legal integer.
203 ///
204 /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
205 /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
206 ///
207 /// \returns The integer that \p *It was mapped to.
208 unsigned mapToLegalUnsigned(
209 MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
210 bool &HaveLegalRange, unsigned &NumLegalInBlock,
211 SmallVector<unsigned> &UnsignedVecForMBB,
212 SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
213 // We added something legal, so we should unset the AddedLegalLastTime
214 // flag.
215 AddedIllegalLastTime = false;
216
217 // If we have at least two adjacent legal instructions (which may have
218 // invisible instructions in between), remember that.
219 if (CanOutlineWithPrevInstr)
220 HaveLegalRange = true;
221 CanOutlineWithPrevInstr = true;
222
223 // Keep track of the number of legal instructions we insert.
224 NumLegalInBlock++;
225
226 // Get the integer for this instruction or give it the current
227 // LegalInstrNumber.
228 InstrListForMBB.push_back(Elt: It);
229 MachineInstr &MI = *It;
230 bool WasInserted;
231 DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
232 ResultIt;
233 std::tie(args&: ResultIt, args&: WasInserted) =
234 InstructionIntegerMap.insert(KV: std::make_pair(x: &MI, y&: LegalInstrNumber));
235 unsigned MINumber = ResultIt->second;
236
237 // There was an insertion.
238 if (WasInserted)
239 LegalInstrNumber++;
240
241 UnsignedVecForMBB.push_back(Elt: MINumber);
242
243 // Make sure we don't overflow or use any integers reserved by the DenseMap.
244 if (LegalInstrNumber >= IllegalInstrNumber)
245 report_fatal_error(reason: "Instruction mapping overflow!");
246
247 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
248 "Tried to assign DenseMap tombstone or empty key to instruction.");
249 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
250 "Tried to assign DenseMap tombstone or empty key to instruction.");
251
252 // Statistics.
253 ++NumLegalInUnsignedVec;
254 return MINumber;
255 }
256
257 /// Maps \p *It to an illegal integer.
258 ///
259 /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
260 /// IllegalInstrNumber.
261 ///
262 /// \returns The integer that \p *It was mapped to.
263 unsigned mapToIllegalUnsigned(
264 MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
265 SmallVector<unsigned> &UnsignedVecForMBB,
266 SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
267 // Can't outline an illegal instruction. Set the flag.
268 CanOutlineWithPrevInstr = false;
269
270 // Only add one illegal number per range of legal numbers.
271 if (AddedIllegalLastTime)
272 return IllegalInstrNumber;
273
274 // Remember that we added an illegal number last time.
275 AddedIllegalLastTime = true;
276 unsigned MINumber = IllegalInstrNumber;
277
278 InstrListForMBB.push_back(Elt: It);
279 UnsignedVecForMBB.push_back(Elt: IllegalInstrNumber);
280 IllegalInstrNumber--;
281 // Statistics.
282 ++NumIllegalInUnsignedVec;
283
284 assert(LegalInstrNumber < IllegalInstrNumber &&
285 "Instruction mapping overflow!");
286
287 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
288 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
289
290 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
291 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
292
293 return MINumber;
294 }
295
296 /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
297 /// and appends it to \p UnsignedVec and \p InstrList.
298 ///
299 /// Two instructions are assigned the same integer if they are identical.
300 /// If an instruction is deemed unsafe to outline, then it will be assigned an
301 /// unique integer. The resulting mapping is placed into a suffix tree and
302 /// queried for candidates.
303 ///
304 /// \param MBB The \p MachineBasicBlock to be translated into integers.
305 /// \param TII \p TargetInstrInfo for the function.
306 void convertToUnsignedVec(MachineBasicBlock &MBB,
307 const TargetInstrInfo &TII) {
308 LLVM_DEBUG(dbgs() << "*** Converting MBB '" << MBB.getName()
309 << "' to unsigned vector ***\n");
310 unsigned Flags = 0;
311
312 // Don't even map in this case.
313 if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
314 return;
315
316 auto OutlinableRanges = TII.getOutlinableRanges(MBB, Flags);
317 LLVM_DEBUG(dbgs() << MBB.getName() << ": " << OutlinableRanges.size()
318 << " outlinable range(s)\n");
319 if (OutlinableRanges.empty())
320 return;
321
322 // Store info for the MBB for later outlining.
323 MBBFlagsMap[&MBB] = Flags;
324
325 MachineBasicBlock::iterator It = MBB.begin();
326
327 // The number of instructions in this block that will be considered for
328 // outlining.
329 unsigned NumLegalInBlock = 0;
330
331 // True if we have at least two legal instructions which aren't separated
332 // by an illegal instruction.
333 bool HaveLegalRange = false;
334
335 // True if we can perform outlining given the last mapped (non-invisible)
336 // instruction. This lets us know if we have a legal range.
337 bool CanOutlineWithPrevInstr = false;
338
339 // FIXME: Should this all just be handled in the target, rather than using
340 // repeated calls to getOutliningType?
341 SmallVector<unsigned> UnsignedVecForMBB;
342 SmallVector<MachineBasicBlock::iterator> InstrListForMBB;
343
344 LLVM_DEBUG(dbgs() << "*** Mapping outlinable ranges ***\n");
345 for (auto &OutlinableRange : OutlinableRanges) {
346 auto OutlinableRangeBegin = OutlinableRange.first;
347 auto OutlinableRangeEnd = OutlinableRange.second;
348#ifndef NDEBUG
349 LLVM_DEBUG(
350 dbgs() << "Mapping "
351 << std::distance(OutlinableRangeBegin, OutlinableRangeEnd)
352 << " instruction range\n");
353 // Everything outside of an outlinable range is illegal.
354 unsigned NumSkippedInRange = 0;
355#endif
356 for (; It != OutlinableRangeBegin; ++It) {
357#ifndef NDEBUG
358 ++NumSkippedInRange;
359#endif
360 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
361 InstrListForMBB);
362 }
363#ifndef NDEBUG
364 LLVM_DEBUG(dbgs() << "Skipped " << NumSkippedInRange
365 << " instructions outside outlinable range\n");
366#endif
367 assert(It != MBB.end() && "Should still have instructions?");
368 // `It` is now positioned at the beginning of a range of instructions
369 // which may be outlinable. Check if each instruction is known to be safe.
370 for (; It != OutlinableRangeEnd; ++It) {
371 // Keep track of where this instruction is in the module.
372 switch (TII.getOutliningType(MMI, MIT&: It, Flags)) {
373 case InstrType::Illegal:
374 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
375 InstrListForMBB);
376 break;
377
378 case InstrType::Legal:
379 mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
380 NumLegalInBlock, UnsignedVecForMBB,
381 InstrListForMBB);
382 break;
383
384 case InstrType::LegalTerminator:
385 mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
386 NumLegalInBlock, UnsignedVecForMBB,
387 InstrListForMBB);
388 // The instruction also acts as a terminator, so we have to record
389 // that in the string.
390 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
391 InstrListForMBB);
392 break;
393
394 case InstrType::Invisible:
395 // Normally this is set by mapTo(Blah)Unsigned, but we just want to
396 // skip this instruction. So, unset the flag here.
397 ++NumInvisible;
398 AddedIllegalLastTime = false;
399 break;
400 }
401 }
402 }
403
404 LLVM_DEBUG(dbgs() << "HaveLegalRange = " << HaveLegalRange << "\n");
405
406 // Are there enough legal instructions in the block for outlining to be
407 // possible?
408 if (HaveLegalRange) {
409 // After we're done every insertion, uniquely terminate this part of the
410 // "string". This makes sure we won't match across basic block or function
411 // boundaries since the "end" is encoded uniquely and thus appears in no
412 // repeated substring.
413 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
414 InstrListForMBB);
415 ++NumSentinels;
416 append_range(C&: InstrList, R&: InstrListForMBB);
417 append_range(C&: UnsignedVec, R&: UnsignedVecForMBB);
418 }
419 }
420
421 InstructionMapper(const MachineModuleInfo &MMI_) : MMI(MMI_) {
422 // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
423 // changed.
424 static_assert(DenseMapInfo<unsigned>::getEmptyKey() ==
425 static_cast<unsigned>(-1));
426 static_assert(DenseMapInfo<unsigned>::getTombstoneKey() ==
427 static_cast<unsigned>(-2));
428 }
429};
430
431/// An interprocedural pass which finds repeated sequences of
432/// instructions and replaces them with calls to functions.
433///
434/// Each instruction is mapped to an unsigned integer and placed in a string.
435/// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
436/// is then repeatedly queried for repeated sequences of instructions. Each
437/// non-overlapping repeated sequence is then placed in its own
438/// \p MachineFunction and each instance is then replaced with a call to that
439/// function.
440struct MachineOutliner : public ModulePass {
441
442 static char ID;
443
444 MachineModuleInfo *MMI = nullptr;
445 const TargetMachine *TM = nullptr;
446
447 /// Set to true if the outliner should consider functions with
448 /// linkonceodr linkage.
449 bool OutlineFromLinkOnceODRs = false;
450
451 /// The current repeat number of machine outlining.
452 unsigned OutlineRepeatedNum = 0;
453
454 /// The mode for whether to run the outliner
455 /// Set to always-outline by default for compatibility with llc's -run-pass
456 /// option.
457 RunOutliner RunOutlinerMode = RunOutliner::AlwaysOutline;
458
459 /// This is a compact representation of hash sequences of outlined functions.
460 /// It is used when OutlinerMode = CGDataMode::Write.
461 /// The resulting hash tree will be emitted into __llvm_outlined section
462 /// which will be dead-stripped not going to the final binary.
463 /// A post-process using llvm-cgdata, lld, or ThinLTO can merge them into
464 /// a global oulined hash tree for the subsequent codegen.
465 std::unique_ptr<OutlinedHashTree> LocalHashTree;
466
467 /// The mode of the outliner.
468 /// When is's CGDataMode::None, candidates are populated with the suffix tree
469 /// within a module and outlined.
470 /// When it's CGDataMode::Write, in addition to CGDataMode::None, the hash
471 /// sequences of outlined functions are published into LocalHashTree.
472 /// When it's CGDataMode::Read, candidates are populated with the global
473 /// outlined hash tree that has been built by the previous codegen.
474 CGDataMode OutlinerMode = CGDataMode::None;
475
476 StringRef getPassName() const override { return "Machine Outliner"; }
477
478 void getAnalysisUsage(AnalysisUsage &AU) const override {
479 AU.addRequired<MachineModuleInfoWrapperPass>();
480 AU.addRequired<TargetPassConfig>();
481 AU.addPreserved<MachineModuleInfoWrapperPass>();
482 AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
483 if (RunOutlinerMode == RunOutliner::OptimisticPGO ||
484 RunOutlinerMode == RunOutliner::ConservativePGO) {
485 AU.addRequired<BlockFrequencyInfoWrapperPass>();
486 AU.addRequired<ProfileSummaryInfoWrapperPass>();
487 }
488 AU.setPreservesAll();
489 ModulePass::getAnalysisUsage(AU);
490 }
491
492 MachineOutliner() : ModulePass(ID) {}
493
494 /// Remark output explaining that not outlining a set of candidates would be
495 /// better than outlining that set.
496 void emitNotOutliningCheaperRemark(
497 unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
498 OutlinedFunction &OF);
499
500 /// Remark output explaining that a function was outlined.
501 void emitOutlinedFunctionRemark(OutlinedFunction &OF);
502
503 /// Find all repeated substrings that satisfy the outlining cost model by
504 /// constructing a suffix tree.
505 ///
506 /// If a substring appears at least twice, then it must be represented by
507 /// an internal node which appears in at least two suffixes. Each suffix
508 /// is represented by a leaf node. To do this, we visit each internal node
509 /// in the tree, using the leaf children of each internal node. If an
510 /// internal node represents a beneficial substring, then we use each of
511 /// its leaf children to find the locations of its substring.
512 ///
513 /// \param Mapper Contains outlining mapping information.
514 /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
515 /// each type of candidate.
516 void
517 findCandidates(InstructionMapper &Mapper,
518 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
519
520 /// Find all repeated substrings that match in the global outlined hash
521 /// tree built from the previous codegen.
522 ///
523 /// \param Mapper Contains outlining mapping information.
524 /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
525 /// each type of candidate.
526 void findGlobalCandidates(
527 InstructionMapper &Mapper,
528 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
529
530 /// Replace the sequences of instructions represented by \p OutlinedFunctions
531 /// with calls to functions.
532 ///
533 /// \param M The module we are outlining from.
534 /// \param FunctionList A list of functions to be inserted into the module.
535 /// \param Mapper Contains the instruction mappings for the module.
536 /// \param[out] OutlinedFunctionNum The outlined function number.
537 bool outline(Module &M,
538 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
539 InstructionMapper &Mapper, unsigned &OutlinedFunctionNum);
540
541 /// Creates a function for \p OF and inserts it into the module.
542 MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
543 InstructionMapper &Mapper,
544 unsigned Name);
545
546 /// Compute and publish the stable hash sequence of instructions in the
547 /// outlined function, \p MF. The parameter \p CandSize represents the number
548 /// of candidates that have identical instruction sequences to \p MF.
549 void computeAndPublishHashSequence(MachineFunction &MF, unsigned CandSize);
550
551 /// Initialize the outliner mode.
552 void initializeOutlinerMode(const Module &M);
553
554 /// Emit the outlined hash tree into __llvm_outline section.
555 void emitOutlinedHashTree(Module &M);
556
557 /// Calls 'doOutline()' 1 + OutlinerReruns times.
558 bool runOnModule(Module &M) override;
559
560 /// Construct a suffix tree on the instructions in \p M and outline repeated
561 /// strings from that tree.
562 bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
563
564 /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
565 /// function for remark emission.
566 DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
567 for (const Candidate &C : OF.Candidates)
568 if (MachineFunction *MF = C.getMF())
569 if (DISubprogram *SP = MF->getFunction().getSubprogram())
570 return SP;
571 return nullptr;
572 }
573
574 /// Populate and \p InstructionMapper with instruction-to-integer mappings.
575 /// These are used to construct a suffix tree.
576 void populateMapper(InstructionMapper &Mapper, Module &M);
577
578 /// Initialize information necessary to output a size remark.
579 /// FIXME: This should be handled by the pass manager, not the outliner.
580 /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
581 /// pass manager.
582 void initSizeRemarkInfo(const Module &M,
583 StringMap<unsigned> &FunctionToInstrCount);
584
585 /// Emit the remark.
586 // FIXME: This should be handled by the pass manager, not the outliner.
587 void
588 emitInstrCountChangedRemark(const Module &M,
589 const StringMap<unsigned> &FunctionToInstrCount);
590};
591} // Anonymous namespace.
592
593char MachineOutliner::ID = 0;
594
595ModulePass *llvm::createMachineOutlinerPass(RunOutliner RunOutlinerMode) {
596 MachineOutliner *OL = new MachineOutliner();
597 OL->RunOutlinerMode = RunOutlinerMode;
598 return OL;
599}
600
601INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
602 false)
603
604void MachineOutliner::emitNotOutliningCheaperRemark(
605 unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
606 OutlinedFunction &OF) {
607 // FIXME: Right now, we arbitrarily choose some Candidate from the
608 // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
609 // We should probably sort these by function name or something to make sure
610 // the remarks are stable.
611 Candidate &C = CandidatesForRepeatedSeq.front();
612 MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
613 MORE.emit(RemarkBuilder: [&]() {
614 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
615 C.front().getDebugLoc(), C.getMBB());
616 R << "Did not outline " << NV("Length", StringLen) << " instructions"
617 << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
618 << " locations."
619 << " Bytes from outlining all occurrences ("
620 << NV("OutliningCost", OF.getOutliningCost()) << ")"
621 << " >= Unoutlined instruction bytes ("
622 << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
623 << " (Also found at: ";
624
625 // Tell the user the other places the candidate was found.
626 for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
627 R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
628 CandidatesForRepeatedSeq[i].front().getDebugLoc());
629 if (i != e - 1)
630 R << ", ";
631 }
632
633 R << ")";
634 return R;
635 });
636}
637
638void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
639 MachineBasicBlock *MBB = &*OF.MF->begin();
640 MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
641 MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
642 MBB->findDebugLoc(MBBI: MBB->begin()), MBB);
643 R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
644 << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
645 << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
646 << " locations. "
647 << "(Found at: ";
648
649 // Tell the user the other places the candidate was found.
650 for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
651
652 R << NV((Twine("StartLoc") + Twine(i)).str(),
653 OF.Candidates[i].front().getDebugLoc());
654 if (i != e - 1)
655 R << ", ";
656 }
657
658 R << ")";
659
660 MORE.emit(OptDiag&: R);
661}
662
663struct MatchedEntry {
664 unsigned StartIdx;
665 unsigned EndIdx;
666 unsigned Count;
667 MatchedEntry(unsigned StartIdx, unsigned EndIdx, unsigned Count)
668 : StartIdx(StartIdx), EndIdx(EndIdx), Count(Count) {}
669 MatchedEntry() = delete;
670};
671
672// Find all matches in the global outlined hash tree.
673// It's quadratic complexity in theory, but it's nearly linear in practice
674// since the length of outlined sequences are small within a block.
675static SmallVector<MatchedEntry> getMatchedEntries(InstructionMapper &Mapper) {
676 auto &InstrList = Mapper.InstrList;
677 auto &UnsignedVec = Mapper.UnsignedVec;
678
679 SmallVector<MatchedEntry> MatchedEntries;
680 auto Size = UnsignedVec.size();
681
682 // Get the global outlined hash tree built from the previous run.
683 assert(cgdata::hasOutlinedHashTree());
684 const auto *RootNode = cgdata::getOutlinedHashTree()->getRoot();
685
686 auto getValidInstr = [&](unsigned Index) -> const MachineInstr * {
687 if (UnsignedVec[Index] >= Mapper.LegalInstrNumber)
688 return nullptr;
689 return &(*InstrList[Index]);
690 };
691
692 auto getStableHashAndFollow =
693 [](const MachineInstr &MI, const HashNode *CurrNode) -> const HashNode * {
694 stable_hash StableHash = stableHashValue(MI);
695 if (!StableHash)
696 return nullptr;
697 auto It = CurrNode->Successors.find(x: StableHash);
698 return (It == CurrNode->Successors.end()) ? nullptr : It->second.get();
699 };
700
701 for (unsigned I = 0; I < Size; ++I) {
702 const MachineInstr *MI = getValidInstr(I);
703 if (!MI || MI->isDebugInstr())
704 continue;
705 const HashNode *CurrNode = getStableHashAndFollow(*MI, RootNode);
706 if (!CurrNode)
707 continue;
708
709 for (unsigned J = I + 1; J < Size; ++J) {
710 const MachineInstr *MJ = getValidInstr(J);
711 if (!MJ)
712 break;
713 // Skip debug instructions as we did for the outlined function.
714 if (MJ->isDebugInstr())
715 continue;
716 CurrNode = getStableHashAndFollow(*MJ, CurrNode);
717 if (!CurrNode)
718 break;
719 // Even with a match ending with a terminal, we continue finding
720 // matches to populate all candidates.
721 if (auto Count = CurrNode->Terminals)
722 MatchedEntries.emplace_back(Args&: I, Args&: J, Args&: *Count);
723 }
724 }
725
726 return MatchedEntries;
727}
728
729void MachineOutliner::findGlobalCandidates(
730 InstructionMapper &Mapper,
731 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
732 FunctionList.clear();
733 auto &InstrList = Mapper.InstrList;
734 auto &MBBFlagsMap = Mapper.MBBFlagsMap;
735
736 std::vector<Candidate> CandidatesForRepeatedSeq;
737 for (auto &ME : getMatchedEntries(Mapper)) {
738 CandidatesForRepeatedSeq.clear();
739 MachineBasicBlock::iterator StartIt = InstrList[ME.StartIdx];
740 MachineBasicBlock::iterator EndIt = InstrList[ME.EndIdx];
741 auto Length = ME.EndIdx - ME.StartIdx + 1;
742 MachineBasicBlock *MBB = StartIt->getParent();
743 CandidatesForRepeatedSeq.emplace_back(args&: ME.StartIdx, args&: Length, args&: StartIt, args&: EndIt,
744 args&: MBB, args: FunctionList.size(),
745 args&: MBBFlagsMap[MBB]);
746 const TargetInstrInfo *TII =
747 MBB->getParent()->getSubtarget().getInstrInfo();
748 unsigned MinRepeats = 1;
749 std::optional<std::unique_ptr<OutlinedFunction>> OF =
750 TII->getOutliningCandidateInfo(MMI: *MMI, RepeatedSequenceLocs&: CandidatesForRepeatedSeq,
751 MinRepeats);
752 if (!OF.has_value() || OF.value()->Candidates.empty())
753 continue;
754 // We create a global candidate for each match.
755 assert(OF.value()->Candidates.size() == MinRepeats);
756 FunctionList.emplace_back(args: std::make_unique<GlobalOutlinedFunction>(
757 args: std::move(OF.value()), args&: ME.Count));
758 }
759}
760
761void MachineOutliner::findCandidates(
762 InstructionMapper &Mapper,
763 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
764 FunctionList.clear();
765 SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);
766
767 // First, find all of the repeated substrings in the tree of minimum length
768 // 2.
769 std::vector<Candidate> CandidatesForRepeatedSeq;
770 LLVM_DEBUG(dbgs() << "*** Discarding overlapping candidates *** \n");
771 LLVM_DEBUG(
772 dbgs() << "Searching for overlaps in all repeated sequences...\n");
773 for (SuffixTree::RepeatedSubstring &RS : ST) {
774 CandidatesForRepeatedSeq.clear();
775 unsigned StringLen = RS.Length;
776 LLVM_DEBUG(dbgs() << " Sequence length: " << StringLen << "\n");
777 // Debug code to keep track of how many candidates we removed.
778#ifndef NDEBUG
779 unsigned NumDiscarded = 0;
780 unsigned NumKept = 0;
781#endif
782 // Sort the start indices so that we can efficiently check if candidates
783 // overlap with the ones we've already found for this sequence.
784 llvm::sort(C&: RS.StartIndices);
785 for (const unsigned &StartIdx : RS.StartIndices) {
786 // Trick: Discard some candidates that would be incompatible with the
787 // ones we've already found for this sequence. This will save us some
788 // work in candidate selection.
789 //
790 // If two candidates overlap, then we can't outline them both. This
791 // happens when we have candidates that look like, say
792 //
793 // AA (where each "A" is an instruction).
794 //
795 // We might have some portion of the module that looks like this:
796 // AAAAAA (6 A's)
797 //
798 // In this case, there are 5 different copies of "AA" in this range, but
799 // at most 3 can be outlined. If only outlining 3 of these is going to
800 // be unbeneficial, then we ought to not bother.
801 //
802 // Note that two things DON'T overlap when they look like this:
803 // start1...end1 .... start2...end2
804 // That is, one must either
805 // * End before the other starts
806 // * Start after the other ends
807 unsigned EndIdx = StartIdx + StringLen - 1;
808 if (!CandidatesForRepeatedSeq.empty() &&
809 StartIdx <= CandidatesForRepeatedSeq.back().getEndIdx()) {
810#ifndef NDEBUG
811 ++NumDiscarded;
812 LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx << ", "
813 << EndIdx << "]; overlaps with candidate @ ["
814 << CandidatesForRepeatedSeq.back().getStartIdx()
815 << ", " << CandidatesForRepeatedSeq.back().getEndIdx()
816 << "]\n");
817#endif
818 continue;
819 }
820 // It doesn't overlap with anything, so we can outline it.
821 // Each sequence is over [StartIt, EndIt].
822 // Save the candidate and its location.
823#ifndef NDEBUG
824 ++NumKept;
825#endif
826 MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
827 MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
828 MachineBasicBlock *MBB = StartIt->getParent();
829 CandidatesForRepeatedSeq.emplace_back(args: StartIdx, args&: StringLen, args&: StartIt, args&: EndIt,
830 args&: MBB, args: FunctionList.size(),
831 args&: Mapper.MBBFlagsMap[MBB]);
832 }
833#ifndef NDEBUG
834 LLVM_DEBUG(dbgs() << " Candidates discarded: " << NumDiscarded
835 << "\n");
836 LLVM_DEBUG(dbgs() << " Candidates kept: " << NumKept << "\n\n");
837#endif
838 unsigned MinRepeats = 2;
839
840 // We've found something we might want to outline.
841 // Create an OutlinedFunction to store it and check if it'd be beneficial
842 // to outline.
843 if (CandidatesForRepeatedSeq.size() < MinRepeats)
844 continue;
845
846 // Arbitrarily choose a TII from the first candidate.
847 // FIXME: Should getOutliningCandidateInfo move to TargetMachine?
848 const TargetInstrInfo *TII =
849 CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
850
851 std::optional<std::unique_ptr<OutlinedFunction>> OF =
852 TII->getOutliningCandidateInfo(MMI: *MMI, RepeatedSequenceLocs&: CandidatesForRepeatedSeq,
853 MinRepeats);
854
855 // If we deleted too many candidates, then there's nothing worth outlining.
856 // FIXME: This should take target-specified instruction sizes into account.
857 if (!OF.has_value() || OF.value()->Candidates.size() < MinRepeats)
858 continue;
859
860 // Is it better to outline this candidate than not?
861 if (OF.value()->getBenefit() < OutlinerBenefitThreshold) {
862 emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq,
863 OF&: *OF.value());
864 continue;
865 }
866
867 FunctionList.emplace_back(args: std::move(OF.value()));
868 }
869}
870
871void MachineOutliner::computeAndPublishHashSequence(MachineFunction &MF,
872 unsigned CandSize) {
873 // Compute the hash sequence for the outlined function.
874 SmallVector<stable_hash> OutlinedHashSequence;
875 for (auto &MBB : MF) {
876 for (auto &NewMI : MBB) {
877 stable_hash Hash = stableHashValue(MI: NewMI);
878 if (!Hash) {
879 OutlinedHashSequence.clear();
880 break;
881 }
882 OutlinedHashSequence.push_back(Elt: Hash);
883 }
884 }
885
886 // Append a unique name based on the non-empty hash sequence.
887 if (AppendContentHashToOutlinedName && !OutlinedHashSequence.empty()) {
888 auto CombinedHash = stable_hash_combine(Buffer: OutlinedHashSequence);
889 auto NewName =
890 MF.getName().str() + ".content." + std::to_string(val: CombinedHash);
891 MF.getFunction().setName(NewName);
892 }
893
894 // Publish the non-empty hash sequence to the local hash tree.
895 if (OutlinerMode == CGDataMode::Write) {
896 StableHashAttempts++;
897 if (!OutlinedHashSequence.empty())
898 LocalHashTree->insert(SequencePair: {OutlinedHashSequence, CandSize});
899 else
900 StableHashDropped++;
901 }
902}
903
904MachineFunction *MachineOutliner::createOutlinedFunction(
905 Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) {
906
907 // Create the function name. This should be unique.
908 // FIXME: We should have a better naming scheme. This should be stable,
909 // regardless of changes to the outliner's cost model/traversal order.
910 std::string FunctionName = "OUTLINED_FUNCTION_";
911 if (OutlineRepeatedNum > 0)
912 FunctionName += std::to_string(val: OutlineRepeatedNum + 1) + "_";
913 FunctionName += std::to_string(val: Name);
914 LLVM_DEBUG(dbgs() << "NEW FUNCTION: " << FunctionName << "\n");
915
916 // Create the function using an IR-level function.
917 LLVMContext &C = M.getContext();
918 Function *F = Function::Create(Ty: FunctionType::get(Result: Type::getVoidTy(C), isVarArg: false),
919 Linkage: Function::ExternalLinkage, N: FunctionName, M);
920
921 // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
922 // which gives us better results when we outline from linkonceodr functions.
923 F->setLinkage(GlobalValue::InternalLinkage);
924 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
925
926 // Set optsize/minsize, so we don't insert padding between outlined
927 // functions.
928 F->addFnAttr(Kind: Attribute::OptimizeForSize);
929 F->addFnAttr(Kind: Attribute::MinSize);
930
931 Candidate &FirstCand = OF.Candidates.front();
932 const TargetInstrInfo &TII =
933 *FirstCand.getMF()->getSubtarget().getInstrInfo();
934
935 TII.mergeOutliningCandidateAttributes(F&: *F, Candidates&: OF.Candidates);
936
937 // Set uwtable, so we generate eh_frame.
938 UWTableKind UW = std::accumulate(
939 first: OF.Candidates.cbegin(), last: OF.Candidates.cend(), init: UWTableKind::None,
940 binary_op: [](UWTableKind K, const outliner::Candidate &C) {
941 return std::max(a: K, b: C.getMF()->getFunction().getUWTableKind());
942 });
943 F->setUWTableKind(UW);
944
945 BasicBlock *EntryBB = BasicBlock::Create(Context&: C, Name: "entry", Parent: F);
946 IRBuilder<> Builder(EntryBB);
947 Builder.CreateRetVoid();
948
949 MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
950 MachineFunction &MF = MMI.getOrCreateMachineFunction(F&: *F);
951 MF.setIsOutlined(true);
952 MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
953
954 // Insert the new function into the module.
955 MF.insert(MBBI: MF.begin(), MBB: &MBB);
956
957 MachineFunction *OriginalMF = FirstCand.front().getMF();
958 const std::vector<MCCFIInstruction> &Instrs =
959 OriginalMF->getFrameInstructions();
960 for (auto &MI : FirstCand) {
961 if (MI.isDebugInstr())
962 continue;
963
964 // Don't keep debug information for outlined instructions.
965 auto DL = DebugLoc();
966 if (MI.isCFIInstruction()) {
967 unsigned CFIIndex = MI.getOperand(i: 0).getCFIIndex();
968 MCCFIInstruction CFI = Instrs[CFIIndex];
969 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
970 .addCFIIndex(CFIIndex: MF.addFrameInst(Inst: CFI));
971 } else {
972 MachineInstr &NewMI = TII.duplicate(MBB, InsertBefore: MBB.end(), Orig: MI);
973 NewMI.dropMemRefs(MF);
974 NewMI.setDebugLoc(DL);
975 // Also clear debug locations on any bundled instructions.
976 if (NewMI.isBundledWithSucc()) {
977 auto BundleEnd = getBundleEnd(I: NewMI.getIterator());
978 for (auto I = std::next(x: NewMI.getIterator()); I != BundleEnd; ++I)
979 I->setDebugLoc(DL);
980 }
981 }
982 }
983
984 if (OutlinerMode != CGDataMode::None)
985 computeAndPublishHashSequence(MF, CandSize: OF.Candidates.size());
986
987 // Set normal properties for a late MachineFunction.
988 MF.getProperties().resetIsSSA();
989 MF.getProperties().setNoPHIs();
990 MF.getProperties().setNoVRegs();
991 MF.getProperties().setTracksLiveness();
992 MF.getRegInfo().freezeReservedRegs();
993
994 // Compute live-in set for outlined fn
995 const MachineRegisterInfo &MRI = MF.getRegInfo();
996 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
997 LivePhysRegs LiveIns(TRI);
998 for (auto &Cand : OF.Candidates) {
999 // Figure out live-ins at the first instruction.
1000 MachineBasicBlock &OutlineBB = *Cand.front().getParent();
1001 LivePhysRegs CandLiveIns(TRI);
1002 CandLiveIns.addLiveOuts(MBB: OutlineBB);
1003 for (const MachineInstr &MI :
1004 reverse(C: make_range(x: Cand.begin(), y: OutlineBB.end())))
1005 CandLiveIns.stepBackward(MI);
1006
1007 // The live-in set for the outlined function is the union of the live-ins
1008 // from all the outlining points.
1009 for (MCPhysReg Reg : CandLiveIns)
1010 LiveIns.addReg(Reg);
1011 }
1012 addLiveIns(MBB, LiveRegs: LiveIns);
1013
1014 TII.buildOutlinedFrame(MBB, MF, OF);
1015
1016 // If there's a DISubprogram associated with this outlined function, then
1017 // emit debug info for the outlined function.
1018 if (DISubprogram *SP = getSubprogramOrNull(OF)) {
1019 // We have a DISubprogram. Get its DICompileUnit.
1020 DICompileUnit *CU = SP->getUnit();
1021 DIBuilder DB(M, true, CU);
1022 DIFile *Unit = SP->getFile();
1023 Mangler Mg;
1024 // Get the mangled name of the function for the linkage name.
1025 std::string Dummy;
1026 raw_string_ostream MangledNameStream(Dummy);
1027 Mg.getNameWithPrefix(OS&: MangledNameStream, GV: F, CannotUsePrivateLabel: false);
1028
1029 DISubprogram *OutlinedSP = DB.createFunction(
1030 Scope: Unit /* Context */, Name: F->getName(), LinkageName: StringRef(Dummy), File: Unit /* File */,
1031 LineNo: 0 /* Line 0 is reserved for compiler-generated code. */,
1032 Ty: DB.createSubroutineType(ParameterTypes: DB.getOrCreateTypeArray(Elements: {})), /* void type */
1033 ScopeLine: 0, /* Line 0 is reserved for compiler-generated code. */
1034 Flags: DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
1035 /* Outlined code is optimized code by definition. */
1036 SPFlags: DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
1037
1038 // Attach subprogram to the function.
1039 F->setSubprogram(OutlinedSP);
1040 // We're done with the DIBuilder.
1041 DB.finalize();
1042 }
1043
1044 return &MF;
1045}
1046
1047bool MachineOutliner::outline(
1048 Module &M, std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
1049 InstructionMapper &Mapper, unsigned &OutlinedFunctionNum) {
1050 LLVM_DEBUG(dbgs() << "*** Outlining ***\n");
1051 LLVM_DEBUG(dbgs() << "NUMBER OF POTENTIAL FUNCTIONS: " << FunctionList.size()
1052 << "\n");
1053 bool OutlinedSomething = false;
1054
1055 // Sort by priority where priority := getNotOutlinedCost / getOutliningCost.
1056 // The function with highest priority should be outlined first.
1057 stable_sort(Range&: FunctionList, C: [](const std::unique_ptr<OutlinedFunction> &LHS,
1058 const std::unique_ptr<OutlinedFunction> &RHS) {
1059 return LHS->getNotOutlinedCost() * RHS->getOutliningCost() >
1060 RHS->getNotOutlinedCost() * LHS->getOutliningCost();
1061 });
1062
1063 // Walk over each function, outlining them as we go along. Functions are
1064 // outlined greedily, based off the sort above.
1065 auto *UnsignedVecBegin = Mapper.UnsignedVec.begin();
1066 LLVM_DEBUG(dbgs() << "WALKING FUNCTION LIST\n");
1067 for (auto &OF : FunctionList) {
1068#ifndef NDEBUG
1069 auto NumCandidatesBefore = OF->Candidates.size();
1070#endif
1071 // If we outlined something that overlapped with a candidate in a previous
1072 // step, then we can't outline from it.
1073 erase_if(C&: OF->Candidates, P: [&UnsignedVecBegin](Candidate &C) {
1074 return std::any_of(first: UnsignedVecBegin + C.getStartIdx(),
1075 last: UnsignedVecBegin + C.getEndIdx() + 1, pred: [](unsigned I) {
1076 return I == static_cast<unsigned>(-1);
1077 });
1078 });
1079
1080#ifndef NDEBUG
1081 auto NumCandidatesAfter = OF->Candidates.size();
1082 LLVM_DEBUG(dbgs() << "PRUNED: " << NumCandidatesBefore - NumCandidatesAfter
1083 << "/" << NumCandidatesBefore << " candidates\n");
1084#endif
1085
1086 // If we made it unbeneficial to outline this function, skip it.
1087 if (OF->getBenefit() < OutlinerBenefitThreshold) {
1088 LLVM_DEBUG(dbgs() << "SKIP: Expected benefit (" << OF->getBenefit()
1089 << " B) < threshold (" << OutlinerBenefitThreshold
1090 << " B)\n");
1091 continue;
1092 }
1093
1094 LLVM_DEBUG(dbgs() << "OUTLINE: Expected benefit (" << OF->getBenefit()
1095 << " B) > threshold (" << OutlinerBenefitThreshold
1096 << " B)\n");
1097
1098 // Remove all Linker Optimization Hints from the candidates.
1099 // TODO: The intersection of the LOHs from all candidates should be legal in
1100 // the outlined function.
1101 SmallPtrSet<MachineInstr *, 2> MIs;
1102 for (Candidate &C : OF->Candidates) {
1103 for (MachineInstr &MI : C)
1104 MIs.insert(Ptr: &MI);
1105 NumRemovedLOHs += TM->clearLinkerOptimizationHints(MIs);
1106 MIs.clear();
1107 }
1108
1109 // It's beneficial. Create the function and outline its sequence's
1110 // occurrences.
1111 OF->MF = createOutlinedFunction(M, OF&: *OF, Mapper, Name: OutlinedFunctionNum);
1112 emitOutlinedFunctionRemark(OF&: *OF);
1113 FunctionsCreated++;
1114 OutlinedFunctionNum++; // Created a function, move to the next name.
1115 MachineFunction *MF = OF->MF;
1116 const TargetSubtargetInfo &STI = MF->getSubtarget();
1117 const TargetInstrInfo &TII = *STI.getInstrInfo();
1118
1119 // Replace occurrences of the sequence with calls to the new function.
1120 LLVM_DEBUG(dbgs() << "CREATE OUTLINED CALLS\n");
1121 for (Candidate &C : OF->Candidates) {
1122 MachineBasicBlock &MBB = *C.getMBB();
1123 MachineBasicBlock::iterator StartIt = C.begin();
1124 MachineBasicBlock::iterator EndIt = std::prev(x: C.end());
1125
1126 // Insert the call.
1127 auto CallInst = TII.insertOutlinedCall(M, MBB, It&: StartIt, MF&: *MF, C);
1128// Insert the call.
1129#ifndef NDEBUG
1130 auto MBBBeingOutlinedFromName =
1131 MBB.getName().empty() ? "<unknown>" : MBB.getName().str();
1132 auto MFBeingOutlinedFromName = MBB.getParent()->getName().empty()
1133 ? "<unknown>"
1134 : MBB.getParent()->getName().str();
1135 LLVM_DEBUG(dbgs() << " CALL: " << MF->getName() << " in "
1136 << MFBeingOutlinedFromName << ":"
1137 << MBBBeingOutlinedFromName << "\n");
1138 LLVM_DEBUG(dbgs() << " .. " << *CallInst);
1139#endif
1140
1141 // If the caller tracks liveness, then we need to make sure that
1142 // anything we outline doesn't break liveness assumptions. The outlined
1143 // functions themselves currently don't track liveness, but we should
1144 // make sure that the ranges we yank things out of aren't wrong.
1145 if (MBB.getParent()->getProperties().hasTracksLiveness()) {
1146 // The following code is to add implicit def operands to the call
1147 // instruction. It also updates call site information for moved
1148 // code.
1149 SmallSet<Register, 2> UseRegs, DefRegs;
1150 // Copy over the defs in the outlined range.
1151 // First inst in outlined range <-- Anything that's defined in this
1152 // ... .. range has to be added as an
1153 // implicit Last inst in outlined range <-- def to the call
1154 // instruction. Also remove call site information for outlined block
1155 // of code. The exposed uses need to be copied in the outlined range.
1156 for (MachineBasicBlock::reverse_iterator
1157 Iter = EndIt.getReverse(),
1158 Last = std::next(x: CallInst.getReverse());
1159 Iter != Last; Iter++) {
1160 MachineInstr *MI = &*Iter;
1161 SmallSet<Register, 2> InstrUseRegs;
1162 for (MachineOperand &MOP : MI->operands()) {
1163 // Skip over anything that isn't a register.
1164 if (!MOP.isReg())
1165 continue;
1166
1167 if (MOP.isDef()) {
1168 // Introduce DefRegs set to skip the redundant register.
1169 DefRegs.insert(V: MOP.getReg());
1170 if (UseRegs.count(V: MOP.getReg()) &&
1171 !InstrUseRegs.count(V: MOP.getReg()))
1172 // Since the regiester is modeled as defined,
1173 // it is not necessary to be put in use register set.
1174 UseRegs.erase(V: MOP.getReg());
1175 } else if (!MOP.isUndef()) {
1176 // Any register which is not undefined should
1177 // be put in the use register set.
1178 UseRegs.insert(V: MOP.getReg());
1179 InstrUseRegs.insert(V: MOP.getReg());
1180 }
1181 }
1182 if (MI->isCandidateForAdditionalCallInfo())
1183 MI->getMF()->eraseAdditionalCallInfo(MI);
1184 }
1185
1186 for (const Register &I : DefRegs)
1187 // If it's a def, add it to the call instruction.
1188 CallInst->addOperand(
1189 Op: MachineOperand::CreateReg(Reg: I, isDef: true, /* isDef = true */
1190 isImp: true /* isImp = true */));
1191
1192 for (const Register &I : UseRegs)
1193 // If it's a exposed use, add it to the call instruction.
1194 CallInst->addOperand(
1195 Op: MachineOperand::CreateReg(Reg: I, isDef: false, /* isDef = false */
1196 isImp: true /* isImp = true */));
1197 }
1198
1199 // Erase from the point after where the call was inserted up to, and
1200 // including, the final instruction in the sequence.
1201 // Erase needs one past the end, so we need std::next there too.
1202 MBB.erase(I: std::next(x: StartIt), E: std::next(x: EndIt));
1203
1204 // Keep track of what we removed by marking them all as -1.
1205 for (unsigned &I : make_range(x: UnsignedVecBegin + C.getStartIdx(),
1206 y: UnsignedVecBegin + C.getEndIdx() + 1))
1207 I = static_cast<unsigned>(-1);
1208 OutlinedSomething = true;
1209
1210 // Statistics.
1211 NumOutlined++;
1212 }
1213 }
1214
1215 LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n");
1216 return OutlinedSomething;
1217}
1218
1219static bool allowPGOOutlining(RunOutliner RunOutlinerMode,
1220 const ProfileSummaryInfo *PSI,
1221 const BlockFrequencyInfo *BFI,
1222 MachineBasicBlock &MBB) {
1223 if (RunOutlinerMode != RunOutliner::OptimisticPGO &&
1224 RunOutlinerMode != RunOutliner::ConservativePGO)
1225 return true;
1226 auto *MF = MBB.getParent();
1227 if (MF->getFunction().hasFnAttribute(Kind: Attribute::Cold)) {
1228 ++NumPGOAllowedCold;
1229 return true;
1230 }
1231
1232 auto *BB = MBB.getBasicBlock();
1233 if (BB && PSI && BFI)
1234 if (auto Count = BFI->getBlockProfileCount(BB))
1235 return *Count <= PSI->getOrCompColdCountThreshold();
1236
1237 if (RunOutlinerMode == RunOutliner::OptimisticPGO) {
1238 auto *TII = MF->getSubtarget().getInstrInfo();
1239 if (TII->shouldOutlineFromFunctionByDefault(MF&: *MF)) {
1240 // Profile data is unavailable, but we optimistically allow outlining
1241 ++NumPGOOptimisticOutlined;
1242 return true;
1243 }
1244 return false;
1245 }
1246 assert(RunOutlinerMode == RunOutliner::ConservativePGO);
1247 // Profile data is unavailable, so we conservatively block outlining
1248 ++NumPGOConservativeBlockedOutlined;
1249 return false;
1250}
1251
1252void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
1253 // Build instruction mappings for each function in the module. Start by
1254 // iterating over each Function in M.
1255 LLVM_DEBUG(dbgs() << "*** Populating mapper ***\n");
1256 bool EnableProfileGuidedOutlining =
1257 RunOutlinerMode == RunOutliner::OptimisticPGO ||
1258 RunOutlinerMode == RunOutliner::ConservativePGO;
1259 ProfileSummaryInfo *PSI = nullptr;
1260 if (EnableProfileGuidedOutlining)
1261 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1262 for (Function &F : M) {
1263 LLVM_DEBUG(dbgs() << "MAPPING FUNCTION: " << F.getName() << "\n");
1264
1265 if (F.hasFnAttribute(Kind: Attribute::NoOutline)) {
1266 LLVM_DEBUG(dbgs() << "SKIP: Function has nooutline attribute\n");
1267 continue;
1268 }
1269
1270 // There's something in F. Check if it has a MachineFunction associated with
1271 // it.
1272 MachineFunction *MF = MMI->getMachineFunction(F);
1273
1274 // If it doesn't, then there's nothing to outline from. Move to the next
1275 // Function.
1276 if (!MF) {
1277 LLVM_DEBUG(dbgs() << "SKIP: Function does not have a MachineFunction\n");
1278 continue;
1279 }
1280
1281 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1282 BlockFrequencyInfo *BFI = nullptr;
1283 if (EnableProfileGuidedOutlining && F.hasProfileData())
1284 BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1285 if (RunOutlinerMode == RunOutliner::TargetDefault &&
1286 !TII->shouldOutlineFromFunctionByDefault(MF&: *MF)) {
1287 LLVM_DEBUG(dbgs() << "SKIP: Target does not want to outline from "
1288 "function by default\n");
1289 continue;
1290 }
1291
1292 // We have a MachineFunction. Ask the target if it's suitable for outlining.
1293 // If it isn't, then move on to the next Function in the module.
1294 if (!TII->isFunctionSafeToOutlineFrom(MF&: *MF, OutlineFromLinkOnceODRs)) {
1295 LLVM_DEBUG(dbgs() << "SKIP: " << MF->getName()
1296 << ": unsafe to outline from\n");
1297 continue;
1298 }
1299
1300 // We have a function suitable for outlining. Iterate over every
1301 // MachineBasicBlock in MF and try to map its instructions to a list of
1302 // unsigned integers.
1303 const unsigned MinMBBSize = 2;
1304
1305 for (MachineBasicBlock &MBB : *MF) {
1306 LLVM_DEBUG(dbgs() << " MAPPING MBB: '" << MBB.getName() << "'\n");
1307 // If there isn't anything in MBB, then there's no point in outlining from
1308 // it.
1309 // If there are fewer than 2 instructions in the MBB, then it can't ever
1310 // contain something worth outlining.
1311 // FIXME: This should be based off of the maximum size in B of an outlined
1312 // call versus the size in B of the MBB.
1313 if (MBB.size() < MinMBBSize) {
1314 LLVM_DEBUG(dbgs() << " SKIP: MBB size less than minimum size of "
1315 << MinMBBSize << "\n");
1316 continue;
1317 }
1318
1319 // Check if MBB could be the target of an indirect branch. If it is, then
1320 // we don't want to outline from it.
1321 if (MBB.hasAddressTaken()) {
1322 LLVM_DEBUG(dbgs() << " SKIP: MBB's address is taken\n");
1323 continue;
1324 }
1325
1326 if (!allowPGOOutlining(RunOutlinerMode, PSI, BFI, MBB)) {
1327 ++NumPGOBlockedOutlined;
1328 continue;
1329 }
1330
1331 // MBB is suitable for outlining. Map it to a list of unsigneds.
1332 Mapper.convertToUnsignedVec(MBB, TII: *TII);
1333 }
1334 }
1335 // Statistics.
1336 UnsignedVecSize = Mapper.UnsignedVec.size();
1337}
1338
1339void MachineOutliner::initSizeRemarkInfo(
1340 const Module &M, StringMap<unsigned> &FunctionToInstrCount) {
1341 // Collect instruction counts for every function. We'll use this to emit
1342 // per-function size remarks later.
1343 for (const Function &F : M) {
1344 MachineFunction *MF = MMI->getMachineFunction(F);
1345
1346 // We only care about MI counts here. If there's no MachineFunction at this
1347 // point, then there won't be after the outliner runs, so let's move on.
1348 if (!MF)
1349 continue;
1350 FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
1351 }
1352}
1353
1354void MachineOutliner::emitInstrCountChangedRemark(
1355 const Module &M, const StringMap<unsigned> &FunctionToInstrCount) {
1356 // Iterate over each function in the module and emit remarks.
1357 // Note that we won't miss anything by doing this, because the outliner never
1358 // deletes functions.
1359 for (const Function &F : M) {
1360 MachineFunction *MF = MMI->getMachineFunction(F);
1361
1362 // The outliner never deletes functions. If we don't have a MF here, then we
1363 // didn't have one prior to outlining either.
1364 if (!MF)
1365 continue;
1366
1367 std::string Fname = std::string(F.getName());
1368 unsigned FnCountAfter = MF->getInstructionCount();
1369 unsigned FnCountBefore = 0;
1370
1371 // Check if the function was recorded before.
1372 auto It = FunctionToInstrCount.find(Key: Fname);
1373
1374 // Did we have a previously-recorded size? If yes, then set FnCountBefore
1375 // to that.
1376 if (It != FunctionToInstrCount.end())
1377 FnCountBefore = It->second;
1378
1379 // Compute the delta and emit a remark if there was a change.
1380 int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
1381 static_cast<int64_t>(FnCountBefore);
1382 if (FnDelta == 0)
1383 continue;
1384
1385 MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
1386 MORE.emit(RemarkBuilder: [&]() {
1387 MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
1388 DiagnosticLocation(), &MF->front());
1389 R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
1390 << ": Function: "
1391 << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
1392 << ": MI instruction count changed from "
1393 << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
1394 FnCountBefore)
1395 << " to "
1396 << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
1397 FnCountAfter)
1398 << "; Delta: "
1399 << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
1400 return R;
1401 });
1402 }
1403}
1404
1405void MachineOutliner::initializeOutlinerMode(const Module &M) {
1406 if (DisableGlobalOutlining)
1407 return;
1408
1409 if (auto *IndexWrapperPass =
1410 getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) {
1411 auto *TheIndex = IndexWrapperPass->getIndex();
1412 // (Full)LTO module does not have functions added to the index.
1413 // In this case, we run the outliner without using codegen data as usual.
1414 if (TheIndex && !TheIndex->hasExportedFunctions(M))
1415 return;
1416 }
1417
1418 // When codegen data write is enabled, we want to write the local outlined
1419 // hash tree to the custom section, `__llvm_outline`.
1420 // When the outlined hash tree is available from the previous codegen data,
1421 // we want to read it to optimistically create global outlining candidates.
1422 if (cgdata::emitCGData()) {
1423 OutlinerMode = CGDataMode::Write;
1424 // Create a local outlined hash tree to be published.
1425 LocalHashTree = std::make_unique<OutlinedHashTree>();
1426 // We don't need to read the outlined hash tree from the previous codegen
1427 } else if (cgdata::hasOutlinedHashTree())
1428 OutlinerMode = CGDataMode::Read;
1429}
1430
1431void MachineOutliner::emitOutlinedHashTree(Module &M) {
1432 assert(LocalHashTree);
1433 if (!LocalHashTree->empty()) {
1434 LLVM_DEBUG({
1435 dbgs() << "Emit outlined hash tree. Size: " << LocalHashTree->size()
1436 << "\n";
1437 });
1438 SmallVector<char> Buf;
1439 raw_svector_ostream OS(Buf);
1440
1441 OutlinedHashTreeRecord HTR(std::move(LocalHashTree));
1442 HTR.serialize(OS);
1443
1444 llvm::StringRef Data(Buf.data(), Buf.size());
1445 std::unique_ptr<MemoryBuffer> Buffer =
1446 MemoryBuffer::getMemBuffer(InputData: Data, BufferName: "in-memory outlined hash tree", RequiresNullTerminator: false);
1447
1448 Triple TT(M.getTargetTriple());
1449 embedBufferInModule(
1450 M, Buf: *Buffer,
1451 SectionName: getCodeGenDataSectionName(CGSK: CG_outline, OF: TT.getObjectFormat()));
1452 }
1453}
1454
1455bool MachineOutliner::runOnModule(Module &M) {
1456 if (skipModule(M))
1457 return false;
1458
1459 // Check if there's anything in the module. If it's empty, then there's
1460 // nothing to outline.
1461 if (M.empty())
1462 return false;
1463
1464 // Initialize the outliner mode.
1465 initializeOutlinerMode(M);
1466
1467 MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
1468 TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1469
1470 // Number to append to the current outlined function.
1471 unsigned OutlinedFunctionNum = 0;
1472
1473 OutlineRepeatedNum = 0;
1474 if (!doOutline(M, OutlinedFunctionNum))
1475 return false;
1476
1477 for (unsigned I = 0; I < OutlinerReruns; ++I) {
1478 OutlinedFunctionNum = 0;
1479 OutlineRepeatedNum++;
1480 if (!doOutline(M, OutlinedFunctionNum)) {
1481 LLVM_DEBUG({
1482 dbgs() << "Did not outline on iteration " << I + 2 << " out of "
1483 << OutlinerReruns + 1 << "\n";
1484 });
1485 break;
1486 }
1487 }
1488
1489 if (OutlinerMode == CGDataMode::Write)
1490 emitOutlinedHashTree(M);
1491
1492 return true;
1493}
1494
1495bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
1496 // If the user passed -enable-machine-outliner=always or
1497 // -enable-machine-outliner, the pass will run on all functions in the module.
1498 // Otherwise, if the target supports default outlining, it will run on all
1499 // functions deemed by the target to be worth outlining from by default. Tell
1500 // the user how the outliner is running.
1501 LLVM_DEBUG({
1502 dbgs() << "Machine Outliner: Running on ";
1503 switch (RunOutlinerMode) {
1504 case RunOutliner::AlwaysOutline:
1505 dbgs() << "all functions";
1506 break;
1507 case RunOutliner::OptimisticPGO:
1508 dbgs() << "optimistically cold functions";
1509 break;
1510 case RunOutliner::ConservativePGO:
1511 dbgs() << "conservatively cold functions";
1512 break;
1513 case RunOutliner::TargetDefault:
1514 dbgs() << "target-default functions";
1515 break;
1516 case RunOutliner::NeverOutline:
1517 llvm_unreachable("should not outline");
1518 }
1519 dbgs() << "\n";
1520 });
1521
1522 // If the user specifies that they want to outline from linkonceodrs, set
1523 // it here.
1524 OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
1525 InstructionMapper Mapper(*MMI);
1526
1527 // Prepare instruction mappings for the suffix tree.
1528 populateMapper(Mapper, M);
1529 std::vector<std::unique_ptr<OutlinedFunction>> FunctionList;
1530
1531 // Find all of the outlining candidates.
1532 if (OutlinerMode == CGDataMode::Read)
1533 findGlobalCandidates(Mapper, FunctionList);
1534 else
1535 findCandidates(Mapper, FunctionList);
1536
1537 // If we've requested size remarks, then collect the MI counts of every
1538 // function before outlining, and the MI counts after outlining.
1539 // FIXME: This shouldn't be in the outliner at all; it should ultimately be
1540 // the pass manager's responsibility.
1541 // This could pretty easily be placed in outline instead, but because we
1542 // really ultimately *don't* want this here, it's done like this for now
1543 // instead.
1544
1545 // Check if we want size remarks.
1546 bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
1547 StringMap<unsigned> FunctionToInstrCount;
1548 if (ShouldEmitSizeRemarks)
1549 initSizeRemarkInfo(M, FunctionToInstrCount);
1550
1551 // Outline each of the candidates and return true if something was outlined.
1552 bool OutlinedSomething =
1553 outline(M, FunctionList, Mapper, OutlinedFunctionNum);
1554
1555 // If we outlined something, we definitely changed the MI count of the
1556 // module. If we've asked for size remarks, then output them.
1557 // FIXME: This should be in the pass manager.
1558 if (ShouldEmitSizeRemarks && OutlinedSomething)
1559 emitInstrCountChangedRemark(M, FunctionToInstrCount);
1560
1561 LLVM_DEBUG({
1562 if (!OutlinedSomething)
1563 dbgs() << "Stopped outlining at iteration " << OutlineRepeatedNum
1564 << " because no changes were found.\n";
1565 });
1566
1567 return OutlinedSomething;
1568}
1569