1//===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Replaces repeated sequences of instructions with function calls.
11///
12/// This works by placing every instruction from every basic block in a
13/// suffix tree, and repeatedly querying that tree for repeated sequences of
14/// instructions. If a sequence of instructions appears often, then it ought
15/// to be beneficial to pull out into a function.
16///
17/// The MachineOutliner communicates with a given target using hooks defined in
18/// TargetInstrInfo.h. The target supplies the outliner with information on how
19/// a specific sequence of instructions should be outlined. This information
20/// is used to deduce the number of instructions necessary to
21///
22/// * Create an outlined function
23/// * Call that outlined function
24///
25/// Targets must implement
26/// * getOutliningCandidateInfo
27/// * buildOutlinedFrame
28/// * insertOutlinedCall
29/// * isFunctionSafeToOutlineFrom
30///
31/// in order to make use of the MachineOutliner.
32///
33/// This was originally presented at the 2016 LLVM Developers' Meeting in the
34/// talk "Reducing Code Size Using Outlining". For a high-level overview of
35/// how this pass works, the talk is available on YouTube at
36///
37/// https://www.youtube.com/watch?v=yorld-WSOeU
38///
39/// The slides for the talk are available at
40///
41/// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
42///
43/// The talk provides an overview of how the outliner finds candidates and
44/// ultimately outlines them. It describes how the main data structure for this
45/// pass, the suffix tree, is queried and purged for candidates. It also gives
46/// a simplified suffix tree construction algorithm for suffix trees based off
47/// of the algorithm actually used here, Ukkonen's algorithm.
48///
49/// For the original RFC for this pass, please see
50///
51/// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
52///
53/// For more information on the suffix tree data structure, please see
54/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
55///
56//===----------------------------------------------------------------------===//
57#include "llvm/CodeGen/MachineOutliner.h"
58#include "llvm/ADT/DenseMap.h"
59#include "llvm/ADT/SmallSet.h"
60#include "llvm/ADT/Statistic.h"
61#include "llvm/ADT/Twine.h"
62#include "llvm/Analysis/ModuleSummaryAnalysis.h"
63#include "llvm/Analysis/OptimizationRemarkEmitter.h"
64#include "llvm/CGData/CodeGenDataReader.h"
65#include "llvm/CodeGen/LivePhysRegs.h"
66#include "llvm/CodeGen/MachineModuleInfo.h"
67#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
68#include "llvm/CodeGen/Passes.h"
69#include "llvm/CodeGen/TargetInstrInfo.h"
70#include "llvm/CodeGen/TargetPassConfig.h"
71#include "llvm/CodeGen/TargetSubtargetInfo.h"
72#include "llvm/IR/DIBuilder.h"
73#include "llvm/IR/IRBuilder.h"
74#include "llvm/IR/Mangler.h"
75#include "llvm/IR/Module.h"
76#include "llvm/InitializePasses.h"
77#include "llvm/Support/CommandLine.h"
78#include "llvm/Support/Debug.h"
79#include "llvm/Support/SuffixTree.h"
80#include "llvm/Support/raw_ostream.h"
81#include "llvm/Target/TargetMachine.h"
82#include "llvm/Transforms/Utils/ModuleUtils.h"
83#include <tuple>
84#include <vector>
85
86#define DEBUG_TYPE "machine-outliner"
87
88using namespace llvm;
89using namespace ore;
90using namespace outliner;
91
92// Statistics for outlined functions.
93STATISTIC(NumOutlined, "Number of candidates outlined");
94STATISTIC(FunctionsCreated, "Number of functions created");
95
96// Statistics for instruction mapping.
97STATISTIC(NumLegalInUnsignedVec, "Outlinable instructions mapped");
98STATISTIC(NumIllegalInUnsignedVec,
99 "Unoutlinable instructions mapped + number of sentinel values");
100STATISTIC(NumSentinels, "Sentinel values inserted during mapping");
101STATISTIC(NumInvisible,
102 "Invisible instructions skipped during mapping");
103STATISTIC(UnsignedVecSize,
104 "Total number of instructions mapped and saved to mapping vector");
105STATISTIC(StableHashAttempts,
106 "Count of hashing attempts made for outlined functions");
107STATISTIC(StableHashDropped,
108 "Count of unsuccessful hashing attempts for outlined functions");
109STATISTIC(NumRemovedLOHs, "Total number of Linker Optimization Hints removed");
110
111// Set to true if the user wants the outliner to run on linkonceodr linkage
112// functions. This is false by default because the linker can dedupe linkonceodr
113// functions. Since the outliner is confined to a single module (modulo LTO),
114// this is off by default. It should, however, be the default behaviour in
115// LTO.
116static cl::opt<bool> EnableLinkOnceODROutlining(
117 "enable-linkonceodr-outlining", cl::Hidden,
118 cl::desc("Enable the machine outliner on linkonceodr functions"),
119 cl::init(Val: false));
120
121/// Number of times to re-run the outliner. This is not the total number of runs
122/// as the outliner will run at least one time. The default value is set to 0,
123/// meaning the outliner will run one time and rerun zero times after that.
124static cl::opt<unsigned> OutlinerReruns(
125 "machine-outliner-reruns", cl::init(Val: 0), cl::Hidden,
126 cl::desc(
127 "Number of times to rerun the outliner after the initial outline"));
128
129static cl::opt<unsigned> OutlinerBenefitThreshold(
130 "outliner-benefit-threshold", cl::init(Val: 1), cl::Hidden,
131 cl::desc(
132 "The minimum size in bytes before an outlining candidate is accepted"));
133
134static cl::opt<bool> OutlinerLeafDescendants(
135 "outliner-leaf-descendants", cl::init(Val: true), cl::Hidden,
136 cl::desc("Consider all leaf descendants of internal nodes of the suffix "
137 "tree as candidates for outlining (if false, only leaf children "
138 "are considered)"));
139
140static cl::opt<bool>
141 DisableGlobalOutlining("disable-global-outlining", cl::Hidden,
142 cl::desc("Disable global outlining only by ignoring "
143 "the codegen data generation or use"),
144 cl::init(Val: false));
145
146static cl::opt<bool> AppendContentHashToOutlinedName(
147 "append-content-hash-outlined-name", cl::Hidden,
148 cl::desc("This appends the content hash to the globally outlined function "
149 "name. It's beneficial for enhancing the precision of the stable "
150 "hash and for ordering the outlined functions."),
151 cl::init(Val: true));
152
153namespace {
154
155/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
156struct InstructionMapper {
157 const MachineModuleInfo &MMI;
158
159 /// The next available integer to assign to a \p MachineInstr that
160 /// cannot be outlined.
161 ///
162 /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
163 unsigned IllegalInstrNumber = -3;
164
165 /// The next available integer to assign to a \p MachineInstr that can
166 /// be outlined.
167 unsigned LegalInstrNumber = 0;
168
169 /// Correspondence from \p MachineInstrs to unsigned integers.
170 DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
171 InstructionIntegerMap;
172
173 /// Correspondence between \p MachineBasicBlocks and target-defined flags.
174 DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap;
175
176 /// The vector of unsigned integers that the module is mapped to.
177 SmallVector<unsigned> UnsignedVec;
178
179 /// Stores the location of the instruction associated with the integer
180 /// at index i in \p UnsignedVec for each index i.
181 SmallVector<MachineBasicBlock::iterator> InstrList;
182
183 // Set if we added an illegal number in the previous step.
184 // Since each illegal number is unique, we only need one of them between
185 // each range of legal numbers. This lets us make sure we don't add more
186 // than one illegal number per range.
187 bool AddedIllegalLastTime = false;
188
189 /// Maps \p *It to a legal integer.
190 ///
191 /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
192 /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
193 ///
194 /// \returns The integer that \p *It was mapped to.
195 unsigned mapToLegalUnsigned(
196 MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
197 bool &HaveLegalRange, unsigned &NumLegalInBlock,
198 SmallVector<unsigned> &UnsignedVecForMBB,
199 SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
200 // We added something legal, so we should unset the AddedLegalLastTime
201 // flag.
202 AddedIllegalLastTime = false;
203
204 // If we have at least two adjacent legal instructions (which may have
205 // invisible instructions in between), remember that.
206 if (CanOutlineWithPrevInstr)
207 HaveLegalRange = true;
208 CanOutlineWithPrevInstr = true;
209
210 // Keep track of the number of legal instructions we insert.
211 NumLegalInBlock++;
212
213 // Get the integer for this instruction or give it the current
214 // LegalInstrNumber.
215 InstrListForMBB.push_back(Elt: It);
216 MachineInstr &MI = *It;
217 bool WasInserted;
218 DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
219 ResultIt;
220 std::tie(args&: ResultIt, args&: WasInserted) =
221 InstructionIntegerMap.insert(KV: std::make_pair(x: &MI, y&: LegalInstrNumber));
222 unsigned MINumber = ResultIt->second;
223
224 // There was an insertion.
225 if (WasInserted)
226 LegalInstrNumber++;
227
228 UnsignedVecForMBB.push_back(Elt: MINumber);
229
230 // Make sure we don't overflow or use any integers reserved by the DenseMap.
231 if (LegalInstrNumber >= IllegalInstrNumber)
232 report_fatal_error(reason: "Instruction mapping overflow!");
233
234 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
235 "Tried to assign DenseMap tombstone or empty key to instruction.");
236 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
237 "Tried to assign DenseMap tombstone or empty key to instruction.");
238
239 // Statistics.
240 ++NumLegalInUnsignedVec;
241 return MINumber;
242 }
243
244 /// Maps \p *It to an illegal integer.
245 ///
246 /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
247 /// IllegalInstrNumber.
248 ///
249 /// \returns The integer that \p *It was mapped to.
250 unsigned mapToIllegalUnsigned(
251 MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
252 SmallVector<unsigned> &UnsignedVecForMBB,
253 SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
254 // Can't outline an illegal instruction. Set the flag.
255 CanOutlineWithPrevInstr = false;
256
257 // Only add one illegal number per range of legal numbers.
258 if (AddedIllegalLastTime)
259 return IllegalInstrNumber;
260
261 // Remember that we added an illegal number last time.
262 AddedIllegalLastTime = true;
263 unsigned MINumber = IllegalInstrNumber;
264
265 InstrListForMBB.push_back(Elt: It);
266 UnsignedVecForMBB.push_back(Elt: IllegalInstrNumber);
267 IllegalInstrNumber--;
268 // Statistics.
269 ++NumIllegalInUnsignedVec;
270
271 assert(LegalInstrNumber < IllegalInstrNumber &&
272 "Instruction mapping overflow!");
273
274 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
275 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
276
277 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
278 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
279
280 return MINumber;
281 }
282
283 /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
284 /// and appends it to \p UnsignedVec and \p InstrList.
285 ///
286 /// Two instructions are assigned the same integer if they are identical.
287 /// If an instruction is deemed unsafe to outline, then it will be assigned an
288 /// unique integer. The resulting mapping is placed into a suffix tree and
289 /// queried for candidates.
290 ///
291 /// \param MBB The \p MachineBasicBlock to be translated into integers.
292 /// \param TII \p TargetInstrInfo for the function.
293 void convertToUnsignedVec(MachineBasicBlock &MBB,
294 const TargetInstrInfo &TII) {
295 LLVM_DEBUG(dbgs() << "*** Converting MBB '" << MBB.getName()
296 << "' to unsigned vector ***\n");
297 unsigned Flags = 0;
298
299 // Don't even map in this case.
300 if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
301 return;
302
303 auto OutlinableRanges = TII.getOutlinableRanges(MBB, Flags);
304 LLVM_DEBUG(dbgs() << MBB.getName() << ": " << OutlinableRanges.size()
305 << " outlinable range(s)\n");
306 if (OutlinableRanges.empty())
307 return;
308
309 // Store info for the MBB for later outlining.
310 MBBFlagsMap[&MBB] = Flags;
311
312 MachineBasicBlock::iterator It = MBB.begin();
313
314 // The number of instructions in this block that will be considered for
315 // outlining.
316 unsigned NumLegalInBlock = 0;
317
318 // True if we have at least two legal instructions which aren't separated
319 // by an illegal instruction.
320 bool HaveLegalRange = false;
321
322 // True if we can perform outlining given the last mapped (non-invisible)
323 // instruction. This lets us know if we have a legal range.
324 bool CanOutlineWithPrevInstr = false;
325
326 // FIXME: Should this all just be handled in the target, rather than using
327 // repeated calls to getOutliningType?
328 SmallVector<unsigned> UnsignedVecForMBB;
329 SmallVector<MachineBasicBlock::iterator> InstrListForMBB;
330
331 LLVM_DEBUG(dbgs() << "*** Mapping outlinable ranges ***\n");
332 for (auto &OutlinableRange : OutlinableRanges) {
333 auto OutlinableRangeBegin = OutlinableRange.first;
334 auto OutlinableRangeEnd = OutlinableRange.second;
335#ifndef NDEBUG
336 LLVM_DEBUG(
337 dbgs() << "Mapping "
338 << std::distance(OutlinableRangeBegin, OutlinableRangeEnd)
339 << " instruction range\n");
340 // Everything outside of an outlinable range is illegal.
341 unsigned NumSkippedInRange = 0;
342#endif
343 for (; It != OutlinableRangeBegin; ++It) {
344#ifndef NDEBUG
345 ++NumSkippedInRange;
346#endif
347 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
348 InstrListForMBB);
349 }
350#ifndef NDEBUG
351 LLVM_DEBUG(dbgs() << "Skipped " << NumSkippedInRange
352 << " instructions outside outlinable range\n");
353#endif
354 assert(It != MBB.end() && "Should still have instructions?");
355 // `It` is now positioned at the beginning of a range of instructions
356 // which may be outlinable. Check if each instruction is known to be safe.
357 for (; It != OutlinableRangeEnd; ++It) {
358 // Keep track of where this instruction is in the module.
359 switch (TII.getOutliningType(MMI, MIT&: It, Flags)) {
360 case InstrType::Illegal:
361 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
362 InstrListForMBB);
363 break;
364
365 case InstrType::Legal:
366 mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
367 NumLegalInBlock, UnsignedVecForMBB,
368 InstrListForMBB);
369 break;
370
371 case InstrType::LegalTerminator:
372 mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
373 NumLegalInBlock, UnsignedVecForMBB,
374 InstrListForMBB);
375 // The instruction also acts as a terminator, so we have to record
376 // that in the string.
377 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
378 InstrListForMBB);
379 break;
380
381 case InstrType::Invisible:
382 // Normally this is set by mapTo(Blah)Unsigned, but we just want to
383 // skip this instruction. So, unset the flag here.
384 ++NumInvisible;
385 AddedIllegalLastTime = false;
386 break;
387 }
388 }
389 }
390
391 LLVM_DEBUG(dbgs() << "HaveLegalRange = " << HaveLegalRange << "\n");
392
393 // Are there enough legal instructions in the block for outlining to be
394 // possible?
395 if (HaveLegalRange) {
396 // After we're done every insertion, uniquely terminate this part of the
397 // "string". This makes sure we won't match across basic block or function
398 // boundaries since the "end" is encoded uniquely and thus appears in no
399 // repeated substring.
400 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
401 InstrListForMBB);
402 ++NumSentinels;
403 append_range(C&: InstrList, R&: InstrListForMBB);
404 append_range(C&: UnsignedVec, R&: UnsignedVecForMBB);
405 }
406 }
407
408 InstructionMapper(const MachineModuleInfo &MMI_) : MMI(MMI_) {
409 // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
410 // changed.
411 assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
412 "DenseMapInfo<unsigned>'s empty key isn't -1!");
413 assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
414 "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
415 }
416};
417
418/// An interprocedural pass which finds repeated sequences of
419/// instructions and replaces them with calls to functions.
420///
421/// Each instruction is mapped to an unsigned integer and placed in a string.
422/// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
423/// is then repeatedly queried for repeated sequences of instructions. Each
424/// non-overlapping repeated sequence is then placed in its own
425/// \p MachineFunction and each instance is then replaced with a call to that
426/// function.
427struct MachineOutliner : public ModulePass {
428
429 static char ID;
430
431 MachineModuleInfo *MMI = nullptr;
432 const TargetMachine *TM = nullptr;
433
434 /// Set to true if the outliner should consider functions with
435 /// linkonceodr linkage.
436 bool OutlineFromLinkOnceODRs = false;
437
438 /// The current repeat number of machine outlining.
439 unsigned OutlineRepeatedNum = 0;
440
441 /// Set to true if the outliner should run on all functions in the module
442 /// considered safe for outlining.
443 /// Set to true by default for compatibility with llc's -run-pass option.
444 /// Set when the pass is constructed in TargetPassConfig.
445 bool RunOnAllFunctions = true;
446
447 /// This is a compact representation of hash sequences of outlined functions.
448 /// It is used when OutlinerMode = CGDataMode::Write.
449 /// The resulting hash tree will be emitted into __llvm_outlined section
450 /// which will be dead-stripped not going to the final binary.
451 /// A post-process using llvm-cgdata, lld, or ThinLTO can merge them into
452 /// a global oulined hash tree for the subsequent codegen.
453 std::unique_ptr<OutlinedHashTree> LocalHashTree;
454
455 /// The mode of the outliner.
456 /// When is's CGDataMode::None, candidates are populated with the suffix tree
457 /// within a module and outlined.
458 /// When it's CGDataMode::Write, in addition to CGDataMode::None, the hash
459 /// sequences of outlined functions are published into LocalHashTree.
460 /// When it's CGDataMode::Read, candidates are populated with the global
461 /// outlined hash tree that has been built by the previous codegen.
462 CGDataMode OutlinerMode = CGDataMode::None;
463
464 StringRef getPassName() const override { return "Machine Outliner"; }
465
466 void getAnalysisUsage(AnalysisUsage &AU) const override {
467 AU.addRequired<MachineModuleInfoWrapperPass>();
468 AU.addRequired<TargetPassConfig>();
469 AU.addPreserved<MachineModuleInfoWrapperPass>();
470 AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
471 AU.setPreservesAll();
472 ModulePass::getAnalysisUsage(AU);
473 }
474
475 MachineOutliner() : ModulePass(ID) {
476 initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
477 }
478
479 /// Remark output explaining that not outlining a set of candidates would be
480 /// better than outlining that set.
481 void emitNotOutliningCheaperRemark(
482 unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
483 OutlinedFunction &OF);
484
485 /// Remark output explaining that a function was outlined.
486 void emitOutlinedFunctionRemark(OutlinedFunction &OF);
487
488 /// Find all repeated substrings that satisfy the outlining cost model by
489 /// constructing a suffix tree.
490 ///
491 /// If a substring appears at least twice, then it must be represented by
492 /// an internal node which appears in at least two suffixes. Each suffix
493 /// is represented by a leaf node. To do this, we visit each internal node
494 /// in the tree, using the leaf children of each internal node. If an
495 /// internal node represents a beneficial substring, then we use each of
496 /// its leaf children to find the locations of its substring.
497 ///
498 /// \param Mapper Contains outlining mapping information.
499 /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
500 /// each type of candidate.
501 void
502 findCandidates(InstructionMapper &Mapper,
503 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
504
505 /// Find all repeated substrings that match in the global outlined hash
506 /// tree built from the previous codegen.
507 ///
508 /// \param Mapper Contains outlining mapping information.
509 /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
510 /// each type of candidate.
511 void findGlobalCandidates(
512 InstructionMapper &Mapper,
513 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
514
515 /// Replace the sequences of instructions represented by \p OutlinedFunctions
516 /// with calls to functions.
517 ///
518 /// \param M The module we are outlining from.
519 /// \param FunctionList A list of functions to be inserted into the module.
520 /// \param Mapper Contains the instruction mappings for the module.
521 /// \param[out] OutlinedFunctionNum The outlined function number.
522 bool outline(Module &M,
523 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
524 InstructionMapper &Mapper, unsigned &OutlinedFunctionNum);
525
526 /// Creates a function for \p OF and inserts it into the module.
527 MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
528 InstructionMapper &Mapper,
529 unsigned Name);
530
531 /// Compute and publish the stable hash sequence of instructions in the
532 /// outlined function, \p MF. The parameter \p CandSize represents the number
533 /// of candidates that have identical instruction sequences to \p MF.
534 void computeAndPublishHashSequence(MachineFunction &MF, unsigned CandSize);
535
536 /// Initialize the outliner mode.
537 void initializeOutlinerMode(const Module &M);
538
539 /// Emit the outlined hash tree into __llvm_outline section.
540 void emitOutlinedHashTree(Module &M);
541
542 /// Calls 'doOutline()' 1 + OutlinerReruns times.
543 bool runOnModule(Module &M) override;
544
545 /// Construct a suffix tree on the instructions in \p M and outline repeated
546 /// strings from that tree.
547 bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
548
549 /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
550 /// function for remark emission.
551 DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
552 for (const Candidate &C : OF.Candidates)
553 if (MachineFunction *MF = C.getMF())
554 if (DISubprogram *SP = MF->getFunction().getSubprogram())
555 return SP;
556 return nullptr;
557 }
558
559 /// Populate and \p InstructionMapper with instruction-to-integer mappings.
560 /// These are used to construct a suffix tree.
561 void populateMapper(InstructionMapper &Mapper, Module &M);
562
563 /// Initialize information necessary to output a size remark.
564 /// FIXME: This should be handled by the pass manager, not the outliner.
565 /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
566 /// pass manager.
567 void initSizeRemarkInfo(const Module &M,
568 StringMap<unsigned> &FunctionToInstrCount);
569
570 /// Emit the remark.
571 // FIXME: This should be handled by the pass manager, not the outliner.
572 void
573 emitInstrCountChangedRemark(const Module &M,
574 const StringMap<unsigned> &FunctionToInstrCount);
575};
576} // Anonymous namespace.
577
578char MachineOutliner::ID = 0;
579
580namespace llvm {
581ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
582 MachineOutliner *OL = new MachineOutliner();
583 OL->RunOnAllFunctions = RunOnAllFunctions;
584 return OL;
585}
586
587} // namespace llvm
588
589INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
590 false)
591
592void MachineOutliner::emitNotOutliningCheaperRemark(
593 unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
594 OutlinedFunction &OF) {
595 // FIXME: Right now, we arbitrarily choose some Candidate from the
596 // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
597 // We should probably sort these by function name or something to make sure
598 // the remarks are stable.
599 Candidate &C = CandidatesForRepeatedSeq.front();
600 MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
601 MORE.emit(RemarkBuilder: [&]() {
602 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
603 C.front().getDebugLoc(), C.getMBB());
604 R << "Did not outline " << NV("Length", StringLen) << " instructions"
605 << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
606 << " locations."
607 << " Bytes from outlining all occurrences ("
608 << NV("OutliningCost", OF.getOutliningCost()) << ")"
609 << " >= Unoutlined instruction bytes ("
610 << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
611 << " (Also found at: ";
612
613 // Tell the user the other places the candidate was found.
614 for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
615 R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
616 CandidatesForRepeatedSeq[i].front().getDebugLoc());
617 if (i != e - 1)
618 R << ", ";
619 }
620
621 R << ")";
622 return R;
623 });
624}
625
626void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
627 MachineBasicBlock *MBB = &*OF.MF->begin();
628 MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
629 MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
630 MBB->findDebugLoc(MBBI: MBB->begin()), MBB);
631 R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
632 << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
633 << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
634 << " locations. "
635 << "(Found at: ";
636
637 // Tell the user the other places the candidate was found.
638 for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
639
640 R << NV((Twine("StartLoc") + Twine(i)).str(),
641 OF.Candidates[i].front().getDebugLoc());
642 if (i != e - 1)
643 R << ", ";
644 }
645
646 R << ")";
647
648 MORE.emit(OptDiag&: R);
649}
650
651struct MatchedEntry {
652 unsigned StartIdx;
653 unsigned EndIdx;
654 unsigned Count;
655 MatchedEntry(unsigned StartIdx, unsigned EndIdx, unsigned Count)
656 : StartIdx(StartIdx), EndIdx(EndIdx), Count(Count) {}
657 MatchedEntry() = delete;
658};
659
660// Find all matches in the global outlined hash tree.
661// It's quadratic complexity in theory, but it's nearly linear in practice
662// since the length of outlined sequences are small within a block.
663static SmallVector<MatchedEntry> getMatchedEntries(InstructionMapper &Mapper) {
664 auto &InstrList = Mapper.InstrList;
665 auto &UnsignedVec = Mapper.UnsignedVec;
666
667 SmallVector<MatchedEntry> MatchedEntries;
668 auto Size = UnsignedVec.size();
669
670 // Get the global outlined hash tree built from the previous run.
671 assert(cgdata::hasOutlinedHashTree());
672 const auto *RootNode = cgdata::getOutlinedHashTree()->getRoot();
673
674 auto getValidInstr = [&](unsigned Index) -> const MachineInstr * {
675 if (UnsignedVec[Index] >= Mapper.LegalInstrNumber)
676 return nullptr;
677 return &(*InstrList[Index]);
678 };
679
680 auto getStableHashAndFollow =
681 [](const MachineInstr &MI, const HashNode *CurrNode) -> const HashNode * {
682 stable_hash StableHash = stableHashValue(MI);
683 if (!StableHash)
684 return nullptr;
685 auto It = CurrNode->Successors.find(x: StableHash);
686 return (It == CurrNode->Successors.end()) ? nullptr : It->second.get();
687 };
688
689 for (unsigned I = 0; I < Size; ++I) {
690 const MachineInstr *MI = getValidInstr(I);
691 if (!MI || MI->isDebugInstr())
692 continue;
693 const HashNode *CurrNode = getStableHashAndFollow(*MI, RootNode);
694 if (!CurrNode)
695 continue;
696
697 for (unsigned J = I + 1; J < Size; ++J) {
698 const MachineInstr *MJ = getValidInstr(J);
699 if (!MJ)
700 break;
701 // Skip debug instructions as we did for the outlined function.
702 if (MJ->isDebugInstr())
703 continue;
704 CurrNode = getStableHashAndFollow(*MJ, CurrNode);
705 if (!CurrNode)
706 break;
707 // Even with a match ending with a terminal, we continue finding
708 // matches to populate all candidates.
709 if (auto Count = CurrNode->Terminals)
710 MatchedEntries.emplace_back(Args&: I, Args&: J, Args&: *Count);
711 }
712 }
713
714 return MatchedEntries;
715}
716
717void MachineOutliner::findGlobalCandidates(
718 InstructionMapper &Mapper,
719 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
720 FunctionList.clear();
721 auto &InstrList = Mapper.InstrList;
722 auto &MBBFlagsMap = Mapper.MBBFlagsMap;
723
724 std::vector<Candidate> CandidatesForRepeatedSeq;
725 for (auto &ME : getMatchedEntries(Mapper)) {
726 CandidatesForRepeatedSeq.clear();
727 MachineBasicBlock::iterator StartIt = InstrList[ME.StartIdx];
728 MachineBasicBlock::iterator EndIt = InstrList[ME.EndIdx];
729 auto Length = ME.EndIdx - ME.StartIdx + 1;
730 MachineBasicBlock *MBB = StartIt->getParent();
731 CandidatesForRepeatedSeq.emplace_back(args&: ME.StartIdx, args&: Length, args&: StartIt, args&: EndIt,
732 args&: MBB, args: FunctionList.size(),
733 args&: MBBFlagsMap[MBB]);
734 const TargetInstrInfo *TII =
735 MBB->getParent()->getSubtarget().getInstrInfo();
736 unsigned MinRepeats = 1;
737 std::optional<std::unique_ptr<OutlinedFunction>> OF =
738 TII->getOutliningCandidateInfo(MMI: *MMI, RepeatedSequenceLocs&: CandidatesForRepeatedSeq,
739 MinRepeats);
740 if (!OF.has_value() || OF.value()->Candidates.empty())
741 continue;
742 // We create a global candidate for each match.
743 assert(OF.value()->Candidates.size() == MinRepeats);
744 FunctionList.emplace_back(args: std::make_unique<GlobalOutlinedFunction>(
745 args: std::move(OF.value()), args&: ME.Count));
746 }
747}
748
749void MachineOutliner::findCandidates(
750 InstructionMapper &Mapper,
751 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
752 FunctionList.clear();
753 SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);
754
755 // First, find all of the repeated substrings in the tree of minimum length
756 // 2.
757 std::vector<Candidate> CandidatesForRepeatedSeq;
758 LLVM_DEBUG(dbgs() << "*** Discarding overlapping candidates *** \n");
759 LLVM_DEBUG(
760 dbgs() << "Searching for overlaps in all repeated sequences...\n");
761 for (SuffixTree::RepeatedSubstring &RS : ST) {
762 CandidatesForRepeatedSeq.clear();
763 unsigned StringLen = RS.Length;
764 LLVM_DEBUG(dbgs() << " Sequence length: " << StringLen << "\n");
765 // Debug code to keep track of how many candidates we removed.
766#ifndef NDEBUG
767 unsigned NumDiscarded = 0;
768 unsigned NumKept = 0;
769#endif
770 // Sort the start indices so that we can efficiently check if candidates
771 // overlap with the ones we've already found for this sequence.
772 llvm::sort(C&: RS.StartIndices);
773 for (const unsigned &StartIdx : RS.StartIndices) {
774 // Trick: Discard some candidates that would be incompatible with the
775 // ones we've already found for this sequence. This will save us some
776 // work in candidate selection.
777 //
778 // If two candidates overlap, then we can't outline them both. This
779 // happens when we have candidates that look like, say
780 //
781 // AA (where each "A" is an instruction).
782 //
783 // We might have some portion of the module that looks like this:
784 // AAAAAA (6 A's)
785 //
786 // In this case, there are 5 different copies of "AA" in this range, but
787 // at most 3 can be outlined. If only outlining 3 of these is going to
788 // be unbeneficial, then we ought to not bother.
789 //
790 // Note that two things DON'T overlap when they look like this:
791 // start1...end1 .... start2...end2
792 // That is, one must either
793 // * End before the other starts
794 // * Start after the other ends
795 unsigned EndIdx = StartIdx + StringLen - 1;
796 if (!CandidatesForRepeatedSeq.empty() &&
797 StartIdx <= CandidatesForRepeatedSeq.back().getEndIdx()) {
798#ifndef NDEBUG
799 ++NumDiscarded;
800 LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx << ", "
801 << EndIdx << "]; overlaps with candidate @ ["
802 << CandidatesForRepeatedSeq.back().getStartIdx()
803 << ", " << CandidatesForRepeatedSeq.back().getEndIdx()
804 << "]\n");
805#endif
806 continue;
807 }
808 // It doesn't overlap with anything, so we can outline it.
809 // Each sequence is over [StartIt, EndIt].
810 // Save the candidate and its location.
811#ifndef NDEBUG
812 ++NumKept;
813#endif
814 MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
815 MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
816 MachineBasicBlock *MBB = StartIt->getParent();
817 CandidatesForRepeatedSeq.emplace_back(args: StartIdx, args&: StringLen, args&: StartIt, args&: EndIt,
818 args&: MBB, args: FunctionList.size(),
819 args&: Mapper.MBBFlagsMap[MBB]);
820 }
821#ifndef NDEBUG
822 LLVM_DEBUG(dbgs() << " Candidates discarded: " << NumDiscarded
823 << "\n");
824 LLVM_DEBUG(dbgs() << " Candidates kept: " << NumKept << "\n\n");
825#endif
826 unsigned MinRepeats = 2;
827
828 // We've found something we might want to outline.
829 // Create an OutlinedFunction to store it and check if it'd be beneficial
830 // to outline.
831 if (CandidatesForRepeatedSeq.size() < MinRepeats)
832 continue;
833
834 // Arbitrarily choose a TII from the first candidate.
835 // FIXME: Should getOutliningCandidateInfo move to TargetMachine?
836 const TargetInstrInfo *TII =
837 CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
838
839 std::optional<std::unique_ptr<OutlinedFunction>> OF =
840 TII->getOutliningCandidateInfo(MMI: *MMI, RepeatedSequenceLocs&: CandidatesForRepeatedSeq,
841 MinRepeats);
842
843 // If we deleted too many candidates, then there's nothing worth outlining.
844 // FIXME: This should take target-specified instruction sizes into account.
845 if (!OF.has_value() || OF.value()->Candidates.size() < MinRepeats)
846 continue;
847
848 // Is it better to outline this candidate than not?
849 if (OF.value()->getBenefit() < OutlinerBenefitThreshold) {
850 emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq,
851 OF&: *OF.value());
852 continue;
853 }
854
855 FunctionList.emplace_back(args: std::move(OF.value()));
856 }
857}
858
859void MachineOutliner::computeAndPublishHashSequence(MachineFunction &MF,
860 unsigned CandSize) {
861 // Compute the hash sequence for the outlined function.
862 SmallVector<stable_hash> OutlinedHashSequence;
863 for (auto &MBB : MF) {
864 for (auto &NewMI : MBB) {
865 stable_hash Hash = stableHashValue(MI: NewMI);
866 if (!Hash) {
867 OutlinedHashSequence.clear();
868 break;
869 }
870 OutlinedHashSequence.push_back(Elt: Hash);
871 }
872 }
873
874 // Append a unique name based on the non-empty hash sequence.
875 if (AppendContentHashToOutlinedName && !OutlinedHashSequence.empty()) {
876 auto CombinedHash = stable_hash_combine(Buffer: OutlinedHashSequence);
877 auto NewName =
878 MF.getName().str() + ".content." + std::to_string(val: CombinedHash);
879 MF.getFunction().setName(NewName);
880 }
881
882 // Publish the non-empty hash sequence to the local hash tree.
883 if (OutlinerMode == CGDataMode::Write) {
884 StableHashAttempts++;
885 if (!OutlinedHashSequence.empty())
886 LocalHashTree->insert(SequencePair: {OutlinedHashSequence, CandSize});
887 else
888 StableHashDropped++;
889 }
890}
891
892MachineFunction *MachineOutliner::createOutlinedFunction(
893 Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) {
894
895 // Create the function name. This should be unique.
896 // FIXME: We should have a better naming scheme. This should be stable,
897 // regardless of changes to the outliner's cost model/traversal order.
898 std::string FunctionName = "OUTLINED_FUNCTION_";
899 if (OutlineRepeatedNum > 0)
900 FunctionName += std::to_string(val: OutlineRepeatedNum + 1) + "_";
901 FunctionName += std::to_string(val: Name);
902 LLVM_DEBUG(dbgs() << "NEW FUNCTION: " << FunctionName << "\n");
903
904 // Create the function using an IR-level function.
905 LLVMContext &C = M.getContext();
906 Function *F = Function::Create(Ty: FunctionType::get(Result: Type::getVoidTy(C), isVarArg: false),
907 Linkage: Function::ExternalLinkage, N: FunctionName, M);
908
909 // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
910 // which gives us better results when we outline from linkonceodr functions.
911 F->setLinkage(GlobalValue::InternalLinkage);
912 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
913
914 // Set optsize/minsize, so we don't insert padding between outlined
915 // functions.
916 F->addFnAttr(Kind: Attribute::OptimizeForSize);
917 F->addFnAttr(Kind: Attribute::MinSize);
918
919 Candidate &FirstCand = OF.Candidates.front();
920 const TargetInstrInfo &TII =
921 *FirstCand.getMF()->getSubtarget().getInstrInfo();
922
923 TII.mergeOutliningCandidateAttributes(F&: *F, Candidates&: OF.Candidates);
924
925 // Set uwtable, so we generate eh_frame.
926 UWTableKind UW = std::accumulate(
927 first: OF.Candidates.cbegin(), last: OF.Candidates.cend(), init: UWTableKind::None,
928 binary_op: [](UWTableKind K, const outliner::Candidate &C) {
929 return std::max(a: K, b: C.getMF()->getFunction().getUWTableKind());
930 });
931 F->setUWTableKind(UW);
932
933 BasicBlock *EntryBB = BasicBlock::Create(Context&: C, Name: "entry", Parent: F);
934 IRBuilder<> Builder(EntryBB);
935 Builder.CreateRetVoid();
936
937 MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
938 MachineFunction &MF = MMI.getOrCreateMachineFunction(F&: *F);
939 MF.setIsOutlined(true);
940 MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
941
942 // Insert the new function into the module.
943 MF.insert(MBBI: MF.begin(), MBB: &MBB);
944
945 MachineFunction *OriginalMF = FirstCand.front().getMF();
946 const std::vector<MCCFIInstruction> &Instrs =
947 OriginalMF->getFrameInstructions();
948 for (auto &MI : FirstCand) {
949 if (MI.isDebugInstr())
950 continue;
951
952 // Don't keep debug information for outlined instructions.
953 auto DL = DebugLoc();
954 if (MI.isCFIInstruction()) {
955 unsigned CFIIndex = MI.getOperand(i: 0).getCFIIndex();
956 MCCFIInstruction CFI = Instrs[CFIIndex];
957 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
958 .addCFIIndex(CFIIndex: MF.addFrameInst(Inst: CFI));
959 } else {
960 MachineInstr &NewMI = TII.duplicate(MBB, InsertBefore: MBB.end(), Orig: MI);
961 NewMI.dropMemRefs(MF);
962 NewMI.setDebugLoc(DL);
963 }
964 }
965
966 if (OutlinerMode != CGDataMode::None)
967 computeAndPublishHashSequence(MF, CandSize: OF.Candidates.size());
968
969 // Set normal properties for a late MachineFunction.
970 MF.getProperties().resetIsSSA();
971 MF.getProperties().setNoPHIs();
972 MF.getProperties().setNoVRegs();
973 MF.getProperties().setTracksLiveness();
974 MF.getRegInfo().freezeReservedRegs();
975
976 // Compute live-in set for outlined fn
977 const MachineRegisterInfo &MRI = MF.getRegInfo();
978 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
979 LivePhysRegs LiveIns(TRI);
980 for (auto &Cand : OF.Candidates) {
981 // Figure out live-ins at the first instruction.
982 MachineBasicBlock &OutlineBB = *Cand.front().getParent();
983 LivePhysRegs CandLiveIns(TRI);
984 CandLiveIns.addLiveOuts(MBB: OutlineBB);
985 for (const MachineInstr &MI :
986 reverse(C: make_range(x: Cand.begin(), y: OutlineBB.end())))
987 CandLiveIns.stepBackward(MI);
988
989 // The live-in set for the outlined function is the union of the live-ins
990 // from all the outlining points.
991 for (MCPhysReg Reg : CandLiveIns)
992 LiveIns.addReg(Reg);
993 }
994 addLiveIns(MBB, LiveRegs: LiveIns);
995
996 TII.buildOutlinedFrame(MBB, MF, OF);
997
998 // If there's a DISubprogram associated with this outlined function, then
999 // emit debug info for the outlined function.
1000 if (DISubprogram *SP = getSubprogramOrNull(OF)) {
1001 // We have a DISubprogram. Get its DICompileUnit.
1002 DICompileUnit *CU = SP->getUnit();
1003 DIBuilder DB(M, true, CU);
1004 DIFile *Unit = SP->getFile();
1005 Mangler Mg;
1006 // Get the mangled name of the function for the linkage name.
1007 std::string Dummy;
1008 raw_string_ostream MangledNameStream(Dummy);
1009 Mg.getNameWithPrefix(OS&: MangledNameStream, GV: F, CannotUsePrivateLabel: false);
1010
1011 DISubprogram *OutlinedSP = DB.createFunction(
1012 Scope: Unit /* Context */, Name: F->getName(), LinkageName: StringRef(Dummy), File: Unit /* File */,
1013 LineNo: 0 /* Line 0 is reserved for compiler-generated code. */,
1014 Ty: DB.createSubroutineType(ParameterTypes: DB.getOrCreateTypeArray(Elements: {})), /* void type */
1015 ScopeLine: 0, /* Line 0 is reserved for compiler-generated code. */
1016 Flags: DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
1017 /* Outlined code is optimized code by definition. */
1018 SPFlags: DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
1019
1020 // Don't add any new variables to the subprogram.
1021 DB.finalizeSubprogram(SP: OutlinedSP);
1022
1023 // Attach subprogram to the function.
1024 F->setSubprogram(OutlinedSP);
1025 // We're done with the DIBuilder.
1026 DB.finalize();
1027 }
1028
1029 return &MF;
1030}
1031
1032bool MachineOutliner::outline(
1033 Module &M, std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
1034 InstructionMapper &Mapper, unsigned &OutlinedFunctionNum) {
1035 LLVM_DEBUG(dbgs() << "*** Outlining ***\n");
1036 LLVM_DEBUG(dbgs() << "NUMBER OF POTENTIAL FUNCTIONS: " << FunctionList.size()
1037 << "\n");
1038 bool OutlinedSomething = false;
1039
1040 // Sort by priority where priority := getNotOutlinedCost / getOutliningCost.
1041 // The function with highest priority should be outlined first.
1042 stable_sort(Range&: FunctionList, C: [](const std::unique_ptr<OutlinedFunction> &LHS,
1043 const std::unique_ptr<OutlinedFunction> &RHS) {
1044 return LHS->getNotOutlinedCost() * RHS->getOutliningCost() >
1045 RHS->getNotOutlinedCost() * LHS->getOutliningCost();
1046 });
1047
1048 // Walk over each function, outlining them as we go along. Functions are
1049 // outlined greedily, based off the sort above.
1050 auto *UnsignedVecBegin = Mapper.UnsignedVec.begin();
1051 LLVM_DEBUG(dbgs() << "WALKING FUNCTION LIST\n");
1052 for (auto &OF : FunctionList) {
1053#ifndef NDEBUG
1054 auto NumCandidatesBefore = OF->Candidates.size();
1055#endif
1056 // If we outlined something that overlapped with a candidate in a previous
1057 // step, then we can't outline from it.
1058 erase_if(C&: OF->Candidates, P: [&UnsignedVecBegin](Candidate &C) {
1059 return std::any_of(first: UnsignedVecBegin + C.getStartIdx(),
1060 last: UnsignedVecBegin + C.getEndIdx() + 1, pred: [](unsigned I) {
1061 return I == static_cast<unsigned>(-1);
1062 });
1063 });
1064
1065#ifndef NDEBUG
1066 auto NumCandidatesAfter = OF->Candidates.size();
1067 LLVM_DEBUG(dbgs() << "PRUNED: " << NumCandidatesBefore - NumCandidatesAfter
1068 << "/" << NumCandidatesBefore << " candidates\n");
1069#endif
1070
1071 // If we made it unbeneficial to outline this function, skip it.
1072 if (OF->getBenefit() < OutlinerBenefitThreshold) {
1073 LLVM_DEBUG(dbgs() << "SKIP: Expected benefit (" << OF->getBenefit()
1074 << " B) < threshold (" << OutlinerBenefitThreshold
1075 << " B)\n");
1076 continue;
1077 }
1078
1079 LLVM_DEBUG(dbgs() << "OUTLINE: Expected benefit (" << OF->getBenefit()
1080 << " B) > threshold (" << OutlinerBenefitThreshold
1081 << " B)\n");
1082
1083 // Remove all Linker Optimization Hints from the candidates.
1084 // TODO: The intersection of the LOHs from all candidates should be legal in
1085 // the outlined function.
1086 SmallPtrSet<MachineInstr *, 2> MIs;
1087 for (Candidate &C : OF->Candidates) {
1088 for (MachineInstr &MI : C)
1089 MIs.insert(Ptr: &MI);
1090 NumRemovedLOHs += TM->clearLinkerOptimizationHints(MIs);
1091 MIs.clear();
1092 }
1093
1094 // It's beneficial. Create the function and outline its sequence's
1095 // occurrences.
1096 OF->MF = createOutlinedFunction(M, OF&: *OF, Mapper, Name: OutlinedFunctionNum);
1097 emitOutlinedFunctionRemark(OF&: *OF);
1098 FunctionsCreated++;
1099 OutlinedFunctionNum++; // Created a function, move to the next name.
1100 MachineFunction *MF = OF->MF;
1101 const TargetSubtargetInfo &STI = MF->getSubtarget();
1102 const TargetInstrInfo &TII = *STI.getInstrInfo();
1103
1104 // Replace occurrences of the sequence with calls to the new function.
1105 LLVM_DEBUG(dbgs() << "CREATE OUTLINED CALLS\n");
1106 for (Candidate &C : OF->Candidates) {
1107 MachineBasicBlock &MBB = *C.getMBB();
1108 MachineBasicBlock::iterator StartIt = C.begin();
1109 MachineBasicBlock::iterator EndIt = std::prev(x: C.end());
1110
1111 // Insert the call.
1112 auto CallInst = TII.insertOutlinedCall(M, MBB, It&: StartIt, MF&: *MF, C);
1113// Insert the call.
1114#ifndef NDEBUG
1115 auto MBBBeingOutlinedFromName =
1116 MBB.getName().empty() ? "<unknown>" : MBB.getName().str();
1117 auto MFBeingOutlinedFromName = MBB.getParent()->getName().empty()
1118 ? "<unknown>"
1119 : MBB.getParent()->getName().str();
1120 LLVM_DEBUG(dbgs() << " CALL: " << MF->getName() << " in "
1121 << MFBeingOutlinedFromName << ":"
1122 << MBBBeingOutlinedFromName << "\n");
1123 LLVM_DEBUG(dbgs() << " .. " << *CallInst);
1124#endif
1125
1126 // If the caller tracks liveness, then we need to make sure that
1127 // anything we outline doesn't break liveness assumptions. The outlined
1128 // functions themselves currently don't track liveness, but we should
1129 // make sure that the ranges we yank things out of aren't wrong.
1130 if (MBB.getParent()->getProperties().hasTracksLiveness()) {
1131 // The following code is to add implicit def operands to the call
1132 // instruction. It also updates call site information for moved
1133 // code.
1134 SmallSet<Register, 2> UseRegs, DefRegs;
1135 // Copy over the defs in the outlined range.
1136 // First inst in outlined range <-- Anything that's defined in this
1137 // ... .. range has to be added as an
1138 // implicit Last inst in outlined range <-- def to the call
1139 // instruction. Also remove call site information for outlined block
1140 // of code. The exposed uses need to be copied in the outlined range.
1141 for (MachineBasicBlock::reverse_iterator
1142 Iter = EndIt.getReverse(),
1143 Last = std::next(x: CallInst.getReverse());
1144 Iter != Last; Iter++) {
1145 MachineInstr *MI = &*Iter;
1146 SmallSet<Register, 2> InstrUseRegs;
1147 for (MachineOperand &MOP : MI->operands()) {
1148 // Skip over anything that isn't a register.
1149 if (!MOP.isReg())
1150 continue;
1151
1152 if (MOP.isDef()) {
1153 // Introduce DefRegs set to skip the redundant register.
1154 DefRegs.insert(V: MOP.getReg());
1155 if (UseRegs.count(V: MOP.getReg()) &&
1156 !InstrUseRegs.count(V: MOP.getReg()))
1157 // Since the regiester is modeled as defined,
1158 // it is not necessary to be put in use register set.
1159 UseRegs.erase(V: MOP.getReg());
1160 } else if (!MOP.isUndef()) {
1161 // Any register which is not undefined should
1162 // be put in the use register set.
1163 UseRegs.insert(V: MOP.getReg());
1164 InstrUseRegs.insert(V: MOP.getReg());
1165 }
1166 }
1167 if (MI->isCandidateForAdditionalCallInfo())
1168 MI->getMF()->eraseAdditionalCallInfo(MI);
1169 }
1170
1171 for (const Register &I : DefRegs)
1172 // If it's a def, add it to the call instruction.
1173 CallInst->addOperand(
1174 Op: MachineOperand::CreateReg(Reg: I, isDef: true, /* isDef = true */
1175 isImp: true /* isImp = true */));
1176
1177 for (const Register &I : UseRegs)
1178 // If it's a exposed use, add it to the call instruction.
1179 CallInst->addOperand(
1180 Op: MachineOperand::CreateReg(Reg: I, isDef: false, /* isDef = false */
1181 isImp: true /* isImp = true */));
1182 }
1183
1184 // Erase from the point after where the call was inserted up to, and
1185 // including, the final instruction in the sequence.
1186 // Erase needs one past the end, so we need std::next there too.
1187 MBB.erase(I: std::next(x: StartIt), E: std::next(x: EndIt));
1188
1189 // Keep track of what we removed by marking them all as -1.
1190 for (unsigned &I : make_range(x: UnsignedVecBegin + C.getStartIdx(),
1191 y: UnsignedVecBegin + C.getEndIdx() + 1))
1192 I = static_cast<unsigned>(-1);
1193 OutlinedSomething = true;
1194
1195 // Statistics.
1196 NumOutlined++;
1197 }
1198 }
1199
1200 LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n");
1201 return OutlinedSomething;
1202}
1203
1204void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
1205 // Build instruction mappings for each function in the module. Start by
1206 // iterating over each Function in M.
1207 LLVM_DEBUG(dbgs() << "*** Populating mapper ***\n");
1208 for (Function &F : M) {
1209 LLVM_DEBUG(dbgs() << "MAPPING FUNCTION: " << F.getName() << "\n");
1210
1211 if (F.hasFnAttribute(Kind: "nooutline")) {
1212 LLVM_DEBUG(dbgs() << "SKIP: Function has nooutline attribute\n");
1213 continue;
1214 }
1215
1216 // There's something in F. Check if it has a MachineFunction associated with
1217 // it.
1218 MachineFunction *MF = MMI->getMachineFunction(F);
1219
1220 // If it doesn't, then there's nothing to outline from. Move to the next
1221 // Function.
1222 if (!MF) {
1223 LLVM_DEBUG(dbgs() << "SKIP: Function does not have a MachineFunction\n");
1224 continue;
1225 }
1226
1227 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1228 if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(MF&: *MF)) {
1229 LLVM_DEBUG(dbgs() << "SKIP: Target does not want to outline from "
1230 "function by default\n");
1231 continue;
1232 }
1233
1234 // We have a MachineFunction. Ask the target if it's suitable for outlining.
1235 // If it isn't, then move on to the next Function in the module.
1236 if (!TII->isFunctionSafeToOutlineFrom(MF&: *MF, OutlineFromLinkOnceODRs)) {
1237 LLVM_DEBUG(dbgs() << "SKIP: " << MF->getName()
1238 << ": unsafe to outline from\n");
1239 continue;
1240 }
1241
1242 // We have a function suitable for outlining. Iterate over every
1243 // MachineBasicBlock in MF and try to map its instructions to a list of
1244 // unsigned integers.
1245 const unsigned MinMBBSize = 2;
1246
1247 for (MachineBasicBlock &MBB : *MF) {
1248 LLVM_DEBUG(dbgs() << " MAPPING MBB: '" << MBB.getName() << "'\n");
1249 // If there isn't anything in MBB, then there's no point in outlining from
1250 // it.
1251 // If there are fewer than 2 instructions in the MBB, then it can't ever
1252 // contain something worth outlining.
1253 // FIXME: This should be based off of the maximum size in B of an outlined
1254 // call versus the size in B of the MBB.
1255 if (MBB.size() < MinMBBSize) {
1256 LLVM_DEBUG(dbgs() << " SKIP: MBB size less than minimum size of "
1257 << MinMBBSize << "\n");
1258 continue;
1259 }
1260
1261 // Check if MBB could be the target of an indirect branch. If it is, then
1262 // we don't want to outline from it.
1263 if (MBB.hasAddressTaken()) {
1264 LLVM_DEBUG(dbgs() << " SKIP: MBB's address is taken\n");
1265 continue;
1266 }
1267
1268 // MBB is suitable for outlining. Map it to a list of unsigneds.
1269 Mapper.convertToUnsignedVec(MBB, TII: *TII);
1270 }
1271 }
1272 // Statistics.
1273 UnsignedVecSize = Mapper.UnsignedVec.size();
1274}
1275
1276void MachineOutliner::initSizeRemarkInfo(
1277 const Module &M, StringMap<unsigned> &FunctionToInstrCount) {
1278 // Collect instruction counts for every function. We'll use this to emit
1279 // per-function size remarks later.
1280 for (const Function &F : M) {
1281 MachineFunction *MF = MMI->getMachineFunction(F);
1282
1283 // We only care about MI counts here. If there's no MachineFunction at this
1284 // point, then there won't be after the outliner runs, so let's move on.
1285 if (!MF)
1286 continue;
1287 FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
1288 }
1289}
1290
1291void MachineOutliner::emitInstrCountChangedRemark(
1292 const Module &M, const StringMap<unsigned> &FunctionToInstrCount) {
1293 // Iterate over each function in the module and emit remarks.
1294 // Note that we won't miss anything by doing this, because the outliner never
1295 // deletes functions.
1296 for (const Function &F : M) {
1297 MachineFunction *MF = MMI->getMachineFunction(F);
1298
1299 // The outliner never deletes functions. If we don't have a MF here, then we
1300 // didn't have one prior to outlining either.
1301 if (!MF)
1302 continue;
1303
1304 std::string Fname = std::string(F.getName());
1305 unsigned FnCountAfter = MF->getInstructionCount();
1306 unsigned FnCountBefore = 0;
1307
1308 // Check if the function was recorded before.
1309 auto It = FunctionToInstrCount.find(Key: Fname);
1310
1311 // Did we have a previously-recorded size? If yes, then set FnCountBefore
1312 // to that.
1313 if (It != FunctionToInstrCount.end())
1314 FnCountBefore = It->second;
1315
1316 // Compute the delta and emit a remark if there was a change.
1317 int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
1318 static_cast<int64_t>(FnCountBefore);
1319 if (FnDelta == 0)
1320 continue;
1321
1322 MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
1323 MORE.emit(RemarkBuilder: [&]() {
1324 MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
1325 DiagnosticLocation(), &MF->front());
1326 R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
1327 << ": Function: "
1328 << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
1329 << ": MI instruction count changed from "
1330 << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
1331 FnCountBefore)
1332 << " to "
1333 << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
1334 FnCountAfter)
1335 << "; Delta: "
1336 << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
1337 return R;
1338 });
1339 }
1340}
1341
1342void MachineOutliner::initializeOutlinerMode(const Module &M) {
1343 if (DisableGlobalOutlining)
1344 return;
1345
1346 if (auto *IndexWrapperPass =
1347 getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) {
1348 auto *TheIndex = IndexWrapperPass->getIndex();
1349 // (Full)LTO module does not have functions added to the index.
1350 // In this case, we run the outliner without using codegen data as usual.
1351 if (TheIndex && !TheIndex->hasExportedFunctions(M))
1352 return;
1353 }
1354
1355 // When codegen data write is enabled, we want to write the local outlined
1356 // hash tree to the custom section, `__llvm_outline`.
1357 // When the outlined hash tree is available from the previous codegen data,
1358 // we want to read it to optimistically create global outlining candidates.
1359 if (cgdata::emitCGData()) {
1360 OutlinerMode = CGDataMode::Write;
1361 // Create a local outlined hash tree to be published.
1362 LocalHashTree = std::make_unique<OutlinedHashTree>();
1363 // We don't need to read the outlined hash tree from the previous codegen
1364 } else if (cgdata::hasOutlinedHashTree())
1365 OutlinerMode = CGDataMode::Read;
1366}
1367
1368void MachineOutliner::emitOutlinedHashTree(Module &M) {
1369 assert(LocalHashTree);
1370 if (!LocalHashTree->empty()) {
1371 LLVM_DEBUG({
1372 dbgs() << "Emit outlined hash tree. Size: " << LocalHashTree->size()
1373 << "\n";
1374 });
1375 SmallVector<char> Buf;
1376 raw_svector_ostream OS(Buf);
1377
1378 OutlinedHashTreeRecord HTR(std::move(LocalHashTree));
1379 HTR.serialize(OS);
1380
1381 llvm::StringRef Data(Buf.data(), Buf.size());
1382 std::unique_ptr<MemoryBuffer> Buffer =
1383 MemoryBuffer::getMemBuffer(InputData: Data, BufferName: "in-memory outlined hash tree", RequiresNullTerminator: false);
1384
1385 Triple TT(M.getTargetTriple());
1386 embedBufferInModule(
1387 M, Buf: *Buffer,
1388 SectionName: getCodeGenDataSectionName(CGSK: CG_outline, OF: TT.getObjectFormat()));
1389 }
1390}
1391
1392bool MachineOutliner::runOnModule(Module &M) {
1393 if (skipModule(M))
1394 return false;
1395
1396 // Check if there's anything in the module. If it's empty, then there's
1397 // nothing to outline.
1398 if (M.empty())
1399 return false;
1400
1401 // Initialize the outliner mode.
1402 initializeOutlinerMode(M);
1403
1404 MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
1405 TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1406
1407 // Number to append to the current outlined function.
1408 unsigned OutlinedFunctionNum = 0;
1409
1410 OutlineRepeatedNum = 0;
1411 if (!doOutline(M, OutlinedFunctionNum))
1412 return false;
1413
1414 for (unsigned I = 0; I < OutlinerReruns; ++I) {
1415 OutlinedFunctionNum = 0;
1416 OutlineRepeatedNum++;
1417 if (!doOutline(M, OutlinedFunctionNum)) {
1418 LLVM_DEBUG({
1419 dbgs() << "Did not outline on iteration " << I + 2 << " out of "
1420 << OutlinerReruns + 1 << "\n";
1421 });
1422 break;
1423 }
1424 }
1425
1426 if (OutlinerMode == CGDataMode::Write)
1427 emitOutlinedHashTree(M);
1428
1429 return true;
1430}
1431
1432bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
1433 // If the user passed -enable-machine-outliner=always or
1434 // -enable-machine-outliner, the pass will run on all functions in the module.
1435 // Otherwise, if the target supports default outlining, it will run on all
1436 // functions deemed by the target to be worth outlining from by default. Tell
1437 // the user how the outliner is running.
1438 LLVM_DEBUG({
1439 dbgs() << "Machine Outliner: Running on ";
1440 if (RunOnAllFunctions)
1441 dbgs() << "all functions";
1442 else
1443 dbgs() << "target-default functions";
1444 dbgs() << "\n";
1445 });
1446
1447 // If the user specifies that they want to outline from linkonceodrs, set
1448 // it here.
1449 OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
1450 InstructionMapper Mapper(*MMI);
1451
1452 // Prepare instruction mappings for the suffix tree.
1453 populateMapper(Mapper, M);
1454 std::vector<std::unique_ptr<OutlinedFunction>> FunctionList;
1455
1456 // Find all of the outlining candidates.
1457 if (OutlinerMode == CGDataMode::Read)
1458 findGlobalCandidates(Mapper, FunctionList);
1459 else
1460 findCandidates(Mapper, FunctionList);
1461
1462 // If we've requested size remarks, then collect the MI counts of every
1463 // function before outlining, and the MI counts after outlining.
1464 // FIXME: This shouldn't be in the outliner at all; it should ultimately be
1465 // the pass manager's responsibility.
1466 // This could pretty easily be placed in outline instead, but because we
1467 // really ultimately *don't* want this here, it's done like this for now
1468 // instead.
1469
1470 // Check if we want size remarks.
1471 bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
1472 StringMap<unsigned> FunctionToInstrCount;
1473 if (ShouldEmitSizeRemarks)
1474 initSizeRemarkInfo(M, FunctionToInstrCount);
1475
1476 // Outline each of the candidates and return true if something was outlined.
1477 bool OutlinedSomething =
1478 outline(M, FunctionList, Mapper, OutlinedFunctionNum);
1479
1480 // If we outlined something, we definitely changed the MI count of the
1481 // module. If we've asked for size remarks, then output them.
1482 // FIXME: This should be in the pass manager.
1483 if (ShouldEmitSizeRemarks && OutlinedSomething)
1484 emitInstrCountChangedRemark(M, FunctionToInstrCount);
1485
1486 LLVM_DEBUG({
1487 if (!OutlinedSomething)
1488 dbgs() << "Stopped outlining at iteration " << OutlineRepeatedNum
1489 << " because no changes were found.\n";
1490 });
1491
1492 return OutlinedSomething;
1493}
1494