1//===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Replaces repeated sequences of instructions with function calls.
11///
12/// This works by placing every instruction from every basic block in a
13/// suffix tree, and repeatedly querying that tree for repeated sequences of
14/// instructions. If a sequence of instructions appears often, then it ought
15/// to be beneficial to pull out into a function.
16///
17/// The MachineOutliner communicates with a given target using hooks defined in
18/// TargetInstrInfo.h. The target supplies the outliner with information on how
19/// a specific sequence of instructions should be outlined. This information
20/// is used to deduce the number of instructions necessary to
21///
22/// * Create an outlined function
23/// * Call that outlined function
24///
25/// Targets must implement
26/// * getOutliningCandidateInfo
27/// * buildOutlinedFrame
28/// * insertOutlinedCall
29/// * isFunctionSafeToOutlineFrom
30///
31/// in order to make use of the MachineOutliner.
32///
33/// This was originally presented at the 2016 LLVM Developers' Meeting in the
34/// talk "Reducing Code Size Using Outlining". For a high-level overview of
35/// how this pass works, the talk is available on YouTube at
36///
37/// https://www.youtube.com/watch?v=yorld-WSOeU
38///
39/// The slides for the talk are available at
40///
41/// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
42///
43/// The talk provides an overview of how the outliner finds candidates and
44/// ultimately outlines them. It describes how the main data structure for this
45/// pass, the suffix tree, is queried and purged for candidates. It also gives
46/// a simplified suffix tree construction algorithm for suffix trees based off
47/// of the algorithm actually used here, Ukkonen's algorithm.
48///
49/// For the original RFC for this pass, please see
50///
51/// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
52///
53/// For more information on the suffix tree data structure, please see
54/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
55///
56//===----------------------------------------------------------------------===//
57#include "llvm/CodeGen/MachineOutliner.h"
58#include "llvm/ADT/DenseMap.h"
59#include "llvm/ADT/SmallSet.h"
60#include "llvm/ADT/Statistic.h"
61#include "llvm/ADT/Twine.h"
62#include "llvm/Analysis/BlockFrequencyInfo.h"
63#include "llvm/Analysis/ModuleSummaryAnalysis.h"
64#include "llvm/Analysis/OptimizationRemarkEmitter.h"
65#include "llvm/Analysis/ProfileSummaryInfo.h"
66#include "llvm/CGData/CodeGenDataReader.h"
67#include "llvm/CodeGen/LivePhysRegs.h"
68#include "llvm/CodeGen/MachineInstrBundle.h"
69#include "llvm/CodeGen/MachineModuleInfo.h"
70#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
71#include "llvm/CodeGen/Passes.h"
72#include "llvm/CodeGen/TargetInstrInfo.h"
73#include "llvm/CodeGen/TargetPassConfig.h"
74#include "llvm/CodeGen/TargetSubtargetInfo.h"
75#include "llvm/IR/DIBuilder.h"
76#include "llvm/IR/IRBuilder.h"
77#include "llvm/IR/Mangler.h"
78#include "llvm/IR/Module.h"
79#include "llvm/InitializePasses.h"
80#include "llvm/Support/CommandLine.h"
81#include "llvm/Support/Debug.h"
82#include "llvm/Support/SuffixTree.h"
83#include "llvm/Support/raw_ostream.h"
84#include "llvm/Target/TargetMachine.h"
85#include "llvm/Transforms/Utils/ModuleUtils.h"
86#include <tuple>
87#include <vector>
88
89#define DEBUG_TYPE "machine-outliner"
90
91using namespace llvm;
92using namespace ore;
93using namespace outliner;
94
95// Statistics for outlined functions.
96STATISTIC(NumOutlined, "Number of candidates outlined");
97STATISTIC(FunctionsCreated, "Number of functions created");
98
99// Statistics for instruction mapping.
100STATISTIC(NumLegalInUnsignedVec, "Outlinable instructions mapped");
101STATISTIC(NumIllegalInUnsignedVec,
102 "Unoutlinable instructions mapped + number of sentinel values");
103STATISTIC(NumSentinels, "Sentinel values inserted during mapping");
104STATISTIC(NumInvisible,
105 "Invisible instructions skipped during mapping");
106STATISTIC(UnsignedVecSize,
107 "Total number of instructions mapped and saved to mapping vector");
108STATISTIC(StableHashAttempts,
109 "Count of hashing attempts made for outlined functions");
110STATISTIC(StableHashDropped,
111 "Count of unsuccessful hashing attempts for outlined functions");
112STATISTIC(NumRemovedLOHs, "Total number of Linker Optimization Hints removed");
113STATISTIC(NumPGOBlockedOutlined,
114 "Number of times outlining was blocked by PGO");
115STATISTIC(NumPGOAllowedCold,
116 "Number of times outlining was allowed from cold functions");
117STATISTIC(NumPGOConservativeBlockedOutlined,
118 "Number of times outlining was blocked conservatively when profile "
119 "counts were missing");
120STATISTIC(NumPGOOptimisticOutlined,
121 "Number of times outlining was allowed optimistically when profile "
122 "counts were missing");
123
124// Set to true if the user wants the outliner to run on linkonceodr linkage
125// functions. This is false by default because the linker can dedupe linkonceodr
126// functions. Since the outliner is confined to a single module (modulo LTO),
127// this is off by default. It should, however, be the default behaviour in
128// LTO.
129static cl::opt<bool> EnableLinkOnceODROutlining(
130 "enable-linkonceodr-outlining", cl::Hidden,
131 cl::desc("Enable the machine outliner on linkonceodr functions"),
132 cl::init(Val: false));
133
134/// Number of times to re-run the outliner. This is not the total number of runs
135/// as the outliner will run at least one time. The default value is set to 0,
136/// meaning the outliner will run one time and rerun zero times after that.
137static cl::opt<unsigned> OutlinerReruns(
138 "machine-outliner-reruns", cl::init(Val: 0), cl::Hidden,
139 cl::desc(
140 "Number of times to rerun the outliner after the initial outline"));
141
142static cl::opt<unsigned> OutlinerBenefitThreshold(
143 "outliner-benefit-threshold", cl::init(Val: 1), cl::Hidden,
144 cl::desc(
145 "The minimum size in bytes before an outlining candidate is accepted"));
146
147static cl::opt<bool> OutlinerLeafDescendants(
148 "outliner-leaf-descendants", cl::init(Val: true), cl::Hidden,
149 cl::desc("Consider all leaf descendants of internal nodes of the suffix "
150 "tree as candidates for outlining (if false, only leaf children "
151 "are considered)"));
152
153static cl::opt<bool>
154 DisableGlobalOutlining("disable-global-outlining", cl::Hidden,
155 cl::desc("Disable global outlining only by ignoring "
156 "the codegen data generation or use"),
157 cl::init(Val: false));
158
159static cl::opt<bool> AppendContentHashToOutlinedName(
160 "append-content-hash-outlined-name", cl::Hidden,
161 cl::desc("This appends the content hash to the globally outlined function "
162 "name. It's beneficial for enhancing the precision of the stable "
163 "hash and for ordering the outlined functions."),
164 cl::init(Val: true));
165
166namespace {
167
168/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
169struct InstructionMapper {
170 const MachineModuleInfo &MMI;
171
172 /// The next available integer to assign to a \p MachineInstr that
173 /// cannot be outlined.
174 ///
175 /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
176 unsigned IllegalInstrNumber = -3;
177
178 /// The next available integer to assign to a \p MachineInstr that can
179 /// be outlined.
180 unsigned LegalInstrNumber = 0;
181
182 /// Correspondence from \p MachineInstrs to unsigned integers.
183 DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
184 InstructionIntegerMap;
185
186 /// Correspondence between \p MachineBasicBlocks and target-defined flags.
187 DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap;
188
189 /// The vector of unsigned integers that the module is mapped to.
190 SmallVector<unsigned> UnsignedVec;
191
192 /// Stores the location of the instruction associated with the integer
193 /// at index i in \p UnsignedVec for each index i.
194 SmallVector<MachineBasicBlock::iterator> InstrList;
195
196 // Set if we added an illegal number in the previous step.
197 // Since each illegal number is unique, we only need one of them between
198 // each range of legal numbers. This lets us make sure we don't add more
199 // than one illegal number per range.
200 bool AddedIllegalLastTime = false;
201
202 /// Maps \p *It to a legal integer.
203 ///
204 /// Updates \p CanOutlineWithPrevInstr, \p HaveLegalRange, \p InstrListForMBB,
205 /// \p UnsignedVecForMBB, \p InstructionIntegerMap, and \p LegalInstrNumber.
206 ///
207 /// \returns The integer that \p *It was mapped to.
208 unsigned mapToLegalUnsigned(
209 MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
210 bool &HaveLegalRange, unsigned &NumLegalInBlock,
211 SmallVector<unsigned> &UnsignedVecForMBB,
212 SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
213 // We added something legal, so we should unset the AddedLegalLastTime
214 // flag.
215 AddedIllegalLastTime = false;
216
217 // If we have at least two adjacent legal instructions (which may have
218 // invisible instructions in between), remember that.
219 if (CanOutlineWithPrevInstr)
220 HaveLegalRange = true;
221 CanOutlineWithPrevInstr = true;
222
223 // Keep track of the number of legal instructions we insert.
224 NumLegalInBlock++;
225
226 // Get the integer for this instruction or give it the current
227 // LegalInstrNumber.
228 InstrListForMBB.push_back(Elt: It);
229 MachineInstr &MI = *It;
230 bool WasInserted;
231 DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
232 ResultIt;
233 std::tie(args&: ResultIt, args&: WasInserted) =
234 InstructionIntegerMap.insert(KV: std::make_pair(x: &MI, y&: LegalInstrNumber));
235 unsigned MINumber = ResultIt->second;
236
237 // There was an insertion.
238 if (WasInserted)
239 LegalInstrNumber++;
240
241 UnsignedVecForMBB.push_back(Elt: MINumber);
242
243 // Make sure we don't overflow or use any integers reserved by the DenseMap.
244 if (LegalInstrNumber >= IllegalInstrNumber)
245 report_fatal_error(reason: "Instruction mapping overflow!");
246
247 // Statistics.
248 ++NumLegalInUnsignedVec;
249 return MINumber;
250 }
251
252 /// Maps \p *It to an illegal integer.
253 ///
254 /// Updates \p InstrListForMBB, \p UnsignedVecForMBB, and \p
255 /// IllegalInstrNumber.
256 ///
257 /// \returns The integer that \p *It was mapped to.
258 unsigned mapToIllegalUnsigned(
259 MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
260 SmallVector<unsigned> &UnsignedVecForMBB,
261 SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
262 // Can't outline an illegal instruction. Set the flag.
263 CanOutlineWithPrevInstr = false;
264
265 // Only add one illegal number per range of legal numbers.
266 if (AddedIllegalLastTime)
267 return IllegalInstrNumber;
268
269 // Remember that we added an illegal number last time.
270 AddedIllegalLastTime = true;
271 unsigned MINumber = IllegalInstrNumber;
272
273 InstrListForMBB.push_back(Elt: It);
274 UnsignedVecForMBB.push_back(Elt: IllegalInstrNumber);
275 IllegalInstrNumber--;
276 // Statistics.
277 ++NumIllegalInUnsignedVec;
278
279 assert(LegalInstrNumber < IllegalInstrNumber &&
280 "Instruction mapping overflow!");
281
282 return MINumber;
283 }
284
285 /// Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
286 /// and appends it to \p UnsignedVec and \p InstrList.
287 ///
288 /// Two instructions are assigned the same integer if they are identical.
289 /// If an instruction is deemed unsafe to outline, then it will be assigned an
290 /// unique integer. The resulting mapping is placed into a suffix tree and
291 /// queried for candidates.
292 ///
293 /// \param MBB The \p MachineBasicBlock to be translated into integers.
294 /// \param TII \p TargetInstrInfo for the function.
295 void convertToUnsignedVec(MachineBasicBlock &MBB,
296 const TargetInstrInfo &TII) {
297 LLVM_DEBUG(dbgs() << "*** Converting MBB '" << MBB.getName()
298 << "' to unsigned vector ***\n");
299 unsigned Flags = 0;
300
301 // Don't even map in this case.
302 if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
303 return;
304
305 auto OutlinableRanges = TII.getOutlinableRanges(MBB, Flags);
306 LLVM_DEBUG(dbgs() << MBB.getName() << ": " << OutlinableRanges.size()
307 << " outlinable range(s)\n");
308 if (OutlinableRanges.empty())
309 return;
310
311 // Store info for the MBB for later outlining.
312 MBBFlagsMap[&MBB] = Flags;
313
314 MachineBasicBlock::iterator It = MBB.begin();
315
316 // The number of instructions in this block that will be considered for
317 // outlining.
318 unsigned NumLegalInBlock = 0;
319
320 // True if we have at least two legal instructions which aren't separated
321 // by an illegal instruction.
322 bool HaveLegalRange = false;
323
324 // True if we can perform outlining given the last mapped (non-invisible)
325 // instruction. This lets us know if we have a legal range.
326 bool CanOutlineWithPrevInstr = false;
327
328 // FIXME: Should this all just be handled in the target, rather than using
329 // repeated calls to getOutliningType?
330 SmallVector<unsigned> UnsignedVecForMBB;
331 SmallVector<MachineBasicBlock::iterator> InstrListForMBB;
332
333 LLVM_DEBUG(dbgs() << "*** Mapping outlinable ranges ***\n");
334 for (auto &OutlinableRange : OutlinableRanges) {
335 auto OutlinableRangeBegin = OutlinableRange.first;
336 auto OutlinableRangeEnd = OutlinableRange.second;
337#ifndef NDEBUG
338 LLVM_DEBUG(
339 dbgs() << "Mapping "
340 << std::distance(OutlinableRangeBegin, OutlinableRangeEnd)
341 << " instruction range\n");
342 // Everything outside of an outlinable range is illegal.
343 unsigned NumSkippedInRange = 0;
344#endif
345 for (; It != OutlinableRangeBegin; ++It) {
346#ifndef NDEBUG
347 ++NumSkippedInRange;
348#endif
349 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
350 InstrListForMBB);
351 }
352#ifndef NDEBUG
353 LLVM_DEBUG(dbgs() << "Skipped " << NumSkippedInRange
354 << " instructions outside outlinable range\n");
355#endif
356 assert(It != MBB.end() && "Should still have instructions?");
357 // `It` is now positioned at the beginning of a range of instructions
358 // which may be outlinable. Check if each instruction is known to be safe.
359 for (; It != OutlinableRangeEnd; ++It) {
360 // Keep track of where this instruction is in the module.
361 switch (TII.getOutliningType(MMI, MIT&: It, Flags)) {
362 case InstrType::Illegal:
363 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
364 InstrListForMBB);
365 break;
366
367 case InstrType::Legal:
368 mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
369 NumLegalInBlock, UnsignedVecForMBB,
370 InstrListForMBB);
371 break;
372
373 case InstrType::LegalTerminator:
374 mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
375 NumLegalInBlock, UnsignedVecForMBB,
376 InstrListForMBB);
377 // The instruction also acts as a terminator, so we have to record
378 // that in the string.
379 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
380 InstrListForMBB);
381 break;
382
383 case InstrType::Invisible:
384 // Normally this is set by mapTo(Blah)Unsigned, but we just want to
385 // skip this instruction. So, unset the flag here.
386 ++NumInvisible;
387 AddedIllegalLastTime = false;
388 break;
389 }
390 }
391 }
392
393 LLVM_DEBUG(dbgs() << "HaveLegalRange = " << HaveLegalRange << "\n");
394
395 // Are there enough legal instructions in the block for outlining to be
396 // possible?
397 if (HaveLegalRange) {
398 // After we're done every insertion, uniquely terminate this part of the
399 // "string". This makes sure we won't match across basic block or function
400 // boundaries since the "end" is encoded uniquely and thus appears in no
401 // repeated substring.
402 mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
403 InstrListForMBB);
404 ++NumSentinels;
405 append_range(C&: InstrList, R&: InstrListForMBB);
406 append_range(C&: UnsignedVec, R&: UnsignedVecForMBB);
407 }
408 }
409
410 InstructionMapper(const MachineModuleInfo &MMI_) : MMI(MMI_) {}
411};
412
413/// An interprocedural pass which finds repeated sequences of
414/// instructions and replaces them with calls to functions.
415///
416/// Each instruction is mapped to an unsigned integer and placed in a string.
417/// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
418/// is then repeatedly queried for repeated sequences of instructions. Each
419/// non-overlapping repeated sequence is then placed in its own
420/// \p MachineFunction and each instance is then replaced with a call to that
421/// function.
422struct MachineOutliner : public ModulePass {
423
424 static char ID;
425
426 MachineModuleInfo *MMI = nullptr;
427 const TargetMachine *TM = nullptr;
428
429 /// Set to true if the outliner should consider functions with
430 /// linkonceodr linkage.
431 bool OutlineFromLinkOnceODRs = false;
432
433 /// The current repeat number of machine outlining.
434 unsigned OutlineRepeatedNum = 0;
435
436 /// The mode for whether to run the outliner
437 /// Set to always-outline by default for compatibility with llc's -run-pass
438 /// option.
439 RunOutliner RunOutlinerMode = RunOutliner::AlwaysOutline;
440
441 /// This is a compact representation of hash sequences of outlined functions.
442 /// It is used when OutlinerMode = CGDataMode::Write.
443 /// The resulting hash tree will be emitted into __llvm_outlined section
444 /// which will be dead-stripped not going to the final binary.
445 /// A post-process using llvm-cgdata, lld, or ThinLTO can merge them into
446 /// a global oulined hash tree for the subsequent codegen.
447 std::unique_ptr<OutlinedHashTree> LocalHashTree;
448
449 /// The mode of the outliner.
450 /// When is's CGDataMode::None, candidates are populated with the suffix tree
451 /// within a module and outlined.
452 /// When it's CGDataMode::Write, in addition to CGDataMode::None, the hash
453 /// sequences of outlined functions are published into LocalHashTree.
454 /// When it's CGDataMode::Read, candidates are populated with the global
455 /// outlined hash tree that has been built by the previous codegen.
456 CGDataMode OutlinerMode = CGDataMode::None;
457
458 StringRef getPassName() const override { return "Machine Outliner"; }
459
460 void getAnalysisUsage(AnalysisUsage &AU) const override {
461 AU.addRequired<MachineModuleInfoWrapperPass>();
462 AU.addRequired<TargetPassConfig>();
463 AU.addPreserved<MachineModuleInfoWrapperPass>();
464 AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
465 if (RunOutlinerMode == RunOutliner::OptimisticPGO ||
466 RunOutlinerMode == RunOutliner::ConservativePGO) {
467 AU.addRequired<BlockFrequencyInfoWrapperPass>();
468 AU.addRequired<ProfileSummaryInfoWrapperPass>();
469 }
470 AU.setPreservesAll();
471 ModulePass::getAnalysisUsage(AU);
472 }
473
474 MachineOutliner() : ModulePass(ID) {}
475
476 /// Remark output explaining that not outlining a set of candidates would be
477 /// better than outlining that set.
478 void emitNotOutliningCheaperRemark(
479 unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
480 OutlinedFunction &OF);
481
482 /// Remark output explaining that a function was outlined.
483 void emitOutlinedFunctionRemark(OutlinedFunction &OF);
484
485 /// Find all repeated substrings that satisfy the outlining cost model by
486 /// constructing a suffix tree.
487 ///
488 /// If a substring appears at least twice, then it must be represented by
489 /// an internal node which appears in at least two suffixes. Each suffix
490 /// is represented by a leaf node. To do this, we visit each internal node
491 /// in the tree, using the leaf children of each internal node. If an
492 /// internal node represents a beneficial substring, then we use each of
493 /// its leaf children to find the locations of its substring.
494 ///
495 /// \param Mapper Contains outlining mapping information.
496 /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
497 /// each type of candidate.
498 void
499 findCandidates(InstructionMapper &Mapper,
500 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
501
502 /// Find all repeated substrings that match in the global outlined hash
503 /// tree built from the previous codegen.
504 ///
505 /// \param Mapper Contains outlining mapping information.
506 /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
507 /// each type of candidate.
508 void findGlobalCandidates(
509 InstructionMapper &Mapper,
510 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
511
512 /// Replace the sequences of instructions represented by \p OutlinedFunctions
513 /// with calls to functions.
514 ///
515 /// \param M The module we are outlining from.
516 /// \param FunctionList A list of functions to be inserted into the module.
517 /// \param Mapper Contains the instruction mappings for the module.
518 /// \param[out] OutlinedFunctionNum The outlined function number.
519 bool outline(Module &M,
520 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
521 InstructionMapper &Mapper, unsigned &OutlinedFunctionNum);
522
523 /// Creates a function for \p OF and inserts it into the module.
524 MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
525 InstructionMapper &Mapper,
526 unsigned Name);
527
528 /// Compute and publish the stable hash sequence of instructions in the
529 /// outlined function, \p MF. The parameter \p CandSize represents the number
530 /// of candidates that have identical instruction sequences to \p MF.
531 void computeAndPublishHashSequence(MachineFunction &MF, unsigned CandSize);
532
533 /// Initialize the outliner mode.
534 void initializeOutlinerMode(const Module &M);
535
536 /// Emit the outlined hash tree into __llvm_outline section.
537 void emitOutlinedHashTree(Module &M);
538
539 /// Calls 'doOutline()' 1 + OutlinerReruns times.
540 bool runOnModule(Module &M) override;
541
542 /// Construct a suffix tree on the instructions in \p M and outline repeated
543 /// strings from that tree.
544 bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
545
546 /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
547 /// function for remark emission.
548 DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
549 for (const Candidate &C : OF.Candidates)
550 if (MachineFunction *MF = C.getMF())
551 if (DISubprogram *SP = MF->getFunction().getSubprogram())
552 return SP;
553 return nullptr;
554 }
555
556 /// Populate and \p InstructionMapper with instruction-to-integer mappings.
557 /// These are used to construct a suffix tree.
558 void populateMapper(InstructionMapper &Mapper, Module &M);
559
560 /// Initialize information necessary to output a size remark.
561 /// FIXME: This should be handled by the pass manager, not the outliner.
562 /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
563 /// pass manager.
564 void initSizeRemarkInfo(const Module &M,
565 StringMap<unsigned> &FunctionToInstrCount);
566
567 /// Emit the remark.
568 // FIXME: This should be handled by the pass manager, not the outliner.
569 void
570 emitInstrCountChangedRemark(const Module &M,
571 const StringMap<unsigned> &FunctionToInstrCount);
572};
573} // Anonymous namespace.
574
575char MachineOutliner::ID = 0;
576
577ModulePass *llvm::createMachineOutlinerPass(RunOutliner RunOutlinerMode) {
578 MachineOutliner *OL = new MachineOutliner();
579 OL->RunOutlinerMode = RunOutlinerMode;
580 return OL;
581}
582
583INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false,
584 false)
585
586void MachineOutliner::emitNotOutliningCheaperRemark(
587 unsigned StringLen, std::vector<Candidate> &CandidatesForRepeatedSeq,
588 OutlinedFunction &OF) {
589 // FIXME: Right now, we arbitrarily choose some Candidate from the
590 // OutlinedFunction. This isn't necessarily fixed, nor does it have to be.
591 // We should probably sort these by function name or something to make sure
592 // the remarks are stable.
593 Candidate &C = CandidatesForRepeatedSeq.front();
594 MachineOptimizationRemarkEmitter MORE(*(C.getMF()), nullptr);
595 MORE.emit(RemarkBuilder: [&]() {
596 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper",
597 C.front().getDebugLoc(), C.getMBB());
598 R << "Did not outline " << NV("Length", StringLen) << " instructions"
599 << " from " << NV("NumOccurrences", CandidatesForRepeatedSeq.size())
600 << " locations."
601 << " Bytes from outlining all occurrences ("
602 << NV("OutliningCost", OF.getOutliningCost()) << ")"
603 << " >= Unoutlined instruction bytes ("
604 << NV("NotOutliningCost", OF.getNotOutlinedCost()) << ")"
605 << " (Also found at: ";
606
607 // Tell the user the other places the candidate was found.
608 for (unsigned i = 1, e = CandidatesForRepeatedSeq.size(); i < e; i++) {
609 R << NV((Twine("OtherStartLoc") + Twine(i)).str(),
610 CandidatesForRepeatedSeq[i].front().getDebugLoc());
611 if (i != e - 1)
612 R << ", ";
613 }
614
615 R << ")";
616 return R;
617 });
618}
619
620void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
621 MachineBasicBlock *MBB = &*OF.MF->begin();
622 MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr);
623 MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction",
624 MBB->findDebugLoc(MBBI: MBB->begin()), MBB);
625 R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) << " bytes by "
626 << "outlining " << NV("Length", OF.getNumInstrs()) << " instructions "
627 << "from " << NV("NumOccurrences", OF.getOccurrenceCount())
628 << " locations. "
629 << "(Found at: ";
630
631 // Tell the user the other places the candidate was found.
632 for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) {
633
634 R << NV((Twine("StartLoc") + Twine(i)).str(),
635 OF.Candidates[i].front().getDebugLoc());
636 if (i != e - 1)
637 R << ", ";
638 }
639
640 R << ")";
641
642 MORE.emit(OptDiag&: R);
643}
644
645struct MatchedEntry {
646 unsigned StartIdx;
647 unsigned EndIdx;
648 unsigned Count;
649 MatchedEntry(unsigned StartIdx, unsigned EndIdx, unsigned Count)
650 : StartIdx(StartIdx), EndIdx(EndIdx), Count(Count) {}
651 MatchedEntry() = delete;
652};
653
654// Find all matches in the global outlined hash tree.
655// It's quadratic complexity in theory, but it's nearly linear in practice
656// since the length of outlined sequences are small within a block.
657static SmallVector<MatchedEntry> getMatchedEntries(InstructionMapper &Mapper) {
658 auto &InstrList = Mapper.InstrList;
659 auto &UnsignedVec = Mapper.UnsignedVec;
660
661 SmallVector<MatchedEntry> MatchedEntries;
662 auto Size = UnsignedVec.size();
663
664 // Get the global outlined hash tree built from the previous run.
665 assert(cgdata::hasOutlinedHashTree());
666 const auto *RootNode = cgdata::getOutlinedHashTree()->getRoot();
667
668 auto getValidInstr = [&](unsigned Index) -> const MachineInstr * {
669 if (UnsignedVec[Index] >= Mapper.LegalInstrNumber)
670 return nullptr;
671 return &(*InstrList[Index]);
672 };
673
674 auto getStableHashAndFollow =
675 [](const MachineInstr &MI, const HashNode *CurrNode) -> const HashNode * {
676 stable_hash StableHash = stableHashValue(MI);
677 if (!StableHash)
678 return nullptr;
679 auto It = CurrNode->Successors.find(Val: StableHash);
680 return (It == CurrNode->Successors.end()) ? nullptr : It->second.get();
681 };
682
683 for (unsigned I = 0; I < Size; ++I) {
684 const MachineInstr *MI = getValidInstr(I);
685 if (!MI || MI->isDebugInstr())
686 continue;
687 const HashNode *CurrNode = getStableHashAndFollow(*MI, RootNode);
688 if (!CurrNode)
689 continue;
690
691 for (unsigned J = I + 1; J < Size; ++J) {
692 const MachineInstr *MJ = getValidInstr(J);
693 if (!MJ)
694 break;
695 // Skip debug instructions as we did for the outlined function.
696 if (MJ->isDebugInstr())
697 continue;
698 CurrNode = getStableHashAndFollow(*MJ, CurrNode);
699 if (!CurrNode)
700 break;
701 // Even with a match ending with a terminal, we continue finding
702 // matches to populate all candidates.
703 if (auto Count = CurrNode->Terminals)
704 MatchedEntries.emplace_back(Args&: I, Args&: J, Args&: *Count);
705 }
706 }
707
708 return MatchedEntries;
709}
710
711void MachineOutliner::findGlobalCandidates(
712 InstructionMapper &Mapper,
713 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
714 FunctionList.clear();
715 auto &InstrList = Mapper.InstrList;
716 auto &MBBFlagsMap = Mapper.MBBFlagsMap;
717
718 std::vector<Candidate> CandidatesForRepeatedSeq;
719 for (auto &ME : getMatchedEntries(Mapper)) {
720 CandidatesForRepeatedSeq.clear();
721 MachineBasicBlock::iterator StartIt = InstrList[ME.StartIdx];
722 MachineBasicBlock::iterator EndIt = InstrList[ME.EndIdx];
723 auto Length = ME.EndIdx - ME.StartIdx + 1;
724 MachineBasicBlock *MBB = StartIt->getParent();
725 CandidatesForRepeatedSeq.emplace_back(args&: ME.StartIdx, args&: Length, args&: StartIt, args&: EndIt,
726 args&: MBB, args: FunctionList.size(),
727 args&: MBBFlagsMap[MBB]);
728 const TargetInstrInfo *TII =
729 MBB->getParent()->getSubtarget().getInstrInfo();
730 unsigned MinRepeats = 1;
731 std::optional<std::unique_ptr<OutlinedFunction>> OF =
732 TII->getOutliningCandidateInfo(MMI: *MMI, RepeatedSequenceLocs&: CandidatesForRepeatedSeq,
733 MinRepeats);
734 if (!OF.has_value() || OF.value()->Candidates.empty())
735 continue;
736 // We create a global candidate for each match.
737 assert(OF.value()->Candidates.size() == MinRepeats);
738 FunctionList.emplace_back(args: std::make_unique<GlobalOutlinedFunction>(
739 args: std::move(OF.value()), args&: ME.Count));
740 }
741}
742
743void MachineOutliner::findCandidates(
744 InstructionMapper &Mapper,
745 std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
746 FunctionList.clear();
747 SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);
748
749 // First, find all of the repeated substrings in the tree of minimum length
750 // 2.
751 std::vector<Candidate> CandidatesForRepeatedSeq;
752 LLVM_DEBUG(dbgs() << "*** Discarding overlapping candidates *** \n");
753 LLVM_DEBUG(
754 dbgs() << "Searching for overlaps in all repeated sequences...\n");
755 for (SuffixTree::RepeatedSubstring &RS : ST) {
756 CandidatesForRepeatedSeq.clear();
757 unsigned StringLen = RS.Length;
758 LLVM_DEBUG(dbgs() << " Sequence length: " << StringLen << "\n");
759 // Debug code to keep track of how many candidates we removed.
760#ifndef NDEBUG
761 unsigned NumDiscarded = 0;
762 unsigned NumKept = 0;
763#endif
764 // Sort the start indices so that we can efficiently check if candidates
765 // overlap with the ones we've already found for this sequence.
766 llvm::sort(C&: RS.StartIndices);
767 for (const unsigned &StartIdx : RS.StartIndices) {
768 // Trick: Discard some candidates that would be incompatible with the
769 // ones we've already found for this sequence. This will save us some
770 // work in candidate selection.
771 //
772 // If two candidates overlap, then we can't outline them both. This
773 // happens when we have candidates that look like, say
774 //
775 // AA (where each "A" is an instruction).
776 //
777 // We might have some portion of the module that looks like this:
778 // AAAAAA (6 A's)
779 //
780 // In this case, there are 5 different copies of "AA" in this range, but
781 // at most 3 can be outlined. If only outlining 3 of these is going to
782 // be unbeneficial, then we ought to not bother.
783 //
784 // Note that two things DON'T overlap when they look like this:
785 // start1...end1 .... start2...end2
786 // That is, one must either
787 // * End before the other starts
788 // * Start after the other ends
789 unsigned EndIdx = StartIdx + StringLen - 1;
790 if (!CandidatesForRepeatedSeq.empty() &&
791 StartIdx <= CandidatesForRepeatedSeq.back().getEndIdx()) {
792#ifndef NDEBUG
793 ++NumDiscarded;
794 LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx << ", "
795 << EndIdx << "]; overlaps with candidate @ ["
796 << CandidatesForRepeatedSeq.back().getStartIdx()
797 << ", " << CandidatesForRepeatedSeq.back().getEndIdx()
798 << "]\n");
799#endif
800 continue;
801 }
802 // It doesn't overlap with anything, so we can outline it.
803 // Each sequence is over [StartIt, EndIt].
804 // Save the candidate and its location.
805#ifndef NDEBUG
806 ++NumKept;
807#endif
808 MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
809 MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
810 MachineBasicBlock *MBB = StartIt->getParent();
811 CandidatesForRepeatedSeq.emplace_back(args: StartIdx, args&: StringLen, args&: StartIt, args&: EndIt,
812 args&: MBB, args: FunctionList.size(),
813 args&: Mapper.MBBFlagsMap[MBB]);
814 }
815#ifndef NDEBUG
816 LLVM_DEBUG(dbgs() << " Candidates discarded: " << NumDiscarded
817 << "\n");
818 LLVM_DEBUG(dbgs() << " Candidates kept: " << NumKept << "\n\n");
819#endif
820 unsigned MinRepeats = 2;
821
822 // We've found something we might want to outline.
823 // Create an OutlinedFunction to store it and check if it'd be beneficial
824 // to outline.
825 if (CandidatesForRepeatedSeq.size() < MinRepeats)
826 continue;
827
828 // Arbitrarily choose a TII from the first candidate.
829 // FIXME: Should getOutliningCandidateInfo move to TargetMachine?
830 const TargetInstrInfo *TII =
831 CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
832
833 std::optional<std::unique_ptr<OutlinedFunction>> OF =
834 TII->getOutliningCandidateInfo(MMI: *MMI, RepeatedSequenceLocs&: CandidatesForRepeatedSeq,
835 MinRepeats);
836
837 // If we deleted too many candidates, then there's nothing worth outlining.
838 // FIXME: This should take target-specified instruction sizes into account.
839 if (!OF.has_value() || OF.value()->Candidates.size() < MinRepeats)
840 continue;
841
842 // Is it better to outline this candidate than not?
843 if (OF.value()->getBenefit() < OutlinerBenefitThreshold) {
844 emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq,
845 OF&: *OF.value());
846 continue;
847 }
848
849 FunctionList.emplace_back(args: std::move(OF.value()));
850 }
851}
852
853void MachineOutliner::computeAndPublishHashSequence(MachineFunction &MF,
854 unsigned CandSize) {
855 // Compute the hash sequence for the outlined function.
856 SmallVector<stable_hash> OutlinedHashSequence;
857 for (auto &MBB : MF) {
858 for (auto &NewMI : MBB) {
859 stable_hash Hash = stableHashValue(MI: NewMI);
860 if (!Hash) {
861 OutlinedHashSequence.clear();
862 break;
863 }
864 OutlinedHashSequence.push_back(Elt: Hash);
865 }
866 }
867
868 // Append a unique name based on the non-empty hash sequence.
869 if (AppendContentHashToOutlinedName && !OutlinedHashSequence.empty()) {
870 auto CombinedHash = stable_hash_combine(Buffer: OutlinedHashSequence);
871 auto NewName =
872 MF.getName().str() + ".content." + std::to_string(val: CombinedHash);
873 MF.getFunction().setName(NewName);
874 }
875
876 // Publish the non-empty hash sequence to the local hash tree.
877 if (OutlinerMode == CGDataMode::Write) {
878 StableHashAttempts++;
879 if (!OutlinedHashSequence.empty())
880 LocalHashTree->insert(SequencePair: {OutlinedHashSequence, CandSize});
881 else
882 StableHashDropped++;
883 }
884}
885
886MachineFunction *MachineOutliner::createOutlinedFunction(
887 Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) {
888
889 // Create the function name. This should be unique.
890 // FIXME: We should have a better naming scheme. This should be stable,
891 // regardless of changes to the outliner's cost model/traversal order.
892 std::string FunctionName = "OUTLINED_FUNCTION_";
893 if (OutlineRepeatedNum > 0)
894 FunctionName += std::to_string(val: OutlineRepeatedNum + 1) + "_";
895 FunctionName += std::to_string(val: Name);
896 LLVM_DEBUG(dbgs() << "NEW FUNCTION: " << FunctionName << "\n");
897
898 // Create the function using an IR-level function.
899 LLVMContext &C = M.getContext();
900 Function *F = Function::Create(Ty: FunctionType::get(Result: Type::getVoidTy(C), isVarArg: false),
901 Linkage: Function::ExternalLinkage, N: FunctionName, M);
902
903 // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
904 // which gives us better results when we outline from linkonceodr functions.
905 F->setLinkage(GlobalValue::InternalLinkage);
906 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
907
908 // Set optsize/minsize, so we don't insert padding between outlined
909 // functions.
910 F->addFnAttr(Kind: Attribute::OptimizeForSize);
911 F->addFnAttr(Kind: Attribute::MinSize);
912
913 Candidate &FirstCand = OF.Candidates.front();
914 const TargetInstrInfo &TII =
915 *FirstCand.getMF()->getSubtarget().getInstrInfo();
916
917 TII.mergeOutliningCandidateAttributes(F&: *F, Candidates&: OF.Candidates);
918
919 // Set uwtable, so we generate eh_frame.
920 UWTableKind UW = std::accumulate(
921 first: OF.Candidates.cbegin(), last: OF.Candidates.cend(), init: UWTableKind::None,
922 binary_op: [](UWTableKind K, const outliner::Candidate &C) {
923 return std::max(a: K, b: C.getMF()->getFunction().getUWTableKind());
924 });
925 F->setUWTableKind(UW);
926
927 BasicBlock *EntryBB = BasicBlock::Create(Context&: C, Name: "entry", Parent: F);
928 IRBuilder<> Builder(EntryBB);
929 Builder.CreateRetVoid();
930
931 MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
932 MachineFunction &MF = MMI.getOrCreateMachineFunction(F&: *F);
933 MF.setIsOutlined(true);
934 MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
935
936 // Insert the new function into the module.
937 MF.insert(MBBI: MF.begin(), MBB: &MBB);
938
939 MachineFunction *OriginalMF = FirstCand.front().getMF();
940 const std::vector<MCCFIInstruction> &Instrs =
941 OriginalMF->getFrameInstructions();
942 for (auto &MI : FirstCand) {
943 if (MI.isDebugInstr())
944 continue;
945
946 // Don't keep debug information for outlined instructions.
947 auto DL = DebugLoc();
948 if (MI.isCFIInstruction()) {
949 unsigned CFIIndex = MI.getOperand(i: 0).getCFIIndex();
950 MCCFIInstruction CFI = Instrs[CFIIndex];
951 BuildMI(BB&: MBB, I: MBB.end(), MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION))
952 .addCFIIndex(CFIIndex: MF.addFrameInst(Inst: CFI));
953 } else {
954 MachineInstr &NewMI = TII.duplicate(MBB, InsertBefore: MBB.end(), Orig: MI);
955 NewMI.dropMemRefs(MF);
956 NewMI.setDebugLoc(DL);
957 // Also clear debug locations on any bundled instructions.
958 if (NewMI.isBundledWithSucc()) {
959 auto BundleEnd = getBundleEnd(I: NewMI.getIterator());
960 for (auto I = std::next(x: NewMI.getIterator()); I != BundleEnd; ++I)
961 I->setDebugLoc(DL);
962 }
963 }
964 }
965
966 if (OutlinerMode != CGDataMode::None)
967 computeAndPublishHashSequence(MF, CandSize: OF.Candidates.size());
968
969 // Set normal properties for a late MachineFunction.
970 MF.getProperties().resetIsSSA();
971 MF.getProperties().setNoPHIs();
972 MF.getProperties().setNoVRegs();
973 MF.getProperties().setTracksLiveness();
974 MF.getRegInfo().freezeReservedRegs();
975
976 // Compute live-in set for outlined fn
977 const MachineRegisterInfo &MRI = MF.getRegInfo();
978 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
979 LivePhysRegs LiveIns(TRI);
980 for (auto &Cand : OF.Candidates) {
981 // Figure out live-ins at the first instruction.
982 MachineBasicBlock &OutlineBB = *Cand.front().getParent();
983 LivePhysRegs CandLiveIns(TRI);
984 CandLiveIns.addLiveOuts(MBB: OutlineBB);
985 for (const MachineInstr &MI :
986 reverse(C: make_range(x: Cand.begin(), y: OutlineBB.end())))
987 CandLiveIns.stepBackward(MI);
988
989 // The live-in set for the outlined function is the union of the live-ins
990 // from all the outlining points.
991 for (MCPhysReg Reg : CandLiveIns)
992 LiveIns.addReg(Reg);
993 }
994 addLiveIns(MBB, LiveRegs: LiveIns);
995
996 TII.buildOutlinedFrame(MBB, MF, OF);
997
998 // If there's a DISubprogram associated with this outlined function, then
999 // emit debug info for the outlined function.
1000 if (DISubprogram *SP = getSubprogramOrNull(OF)) {
1001 // We have a DISubprogram. Get its DICompileUnit.
1002 DICompileUnit *CU = SP->getUnit();
1003 DIBuilder DB(M, true, CU);
1004 DIFile *Unit = SP->getFile();
1005 Mangler Mg;
1006 // Get the mangled name of the function for the linkage name.
1007 std::string Dummy;
1008 raw_string_ostream MangledNameStream(Dummy);
1009 Mg.getNameWithPrefix(OS&: MangledNameStream, GV: F, CannotUsePrivateLabel: false);
1010
1011 DISubprogram *OutlinedSP = DB.createFunction(
1012 Scope: Unit /* Context */, Name: F->getName(), LinkageName: StringRef(Dummy), File: Unit /* File */,
1013 LineNo: 0 /* Line 0 is reserved for compiler-generated code. */,
1014 Ty: DB.createSubroutineType(ParameterTypes: DB.getOrCreateTypeArray(Elements: {})), /* void type */
1015 ScopeLine: 0, /* Line 0 is reserved for compiler-generated code. */
1016 Flags: DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
1017 /* Outlined code is optimized code by definition. */
1018 SPFlags: DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
1019
1020 // Attach subprogram to the function.
1021 F->setSubprogram(OutlinedSP);
1022 // We're done with the DIBuilder.
1023 DB.finalize();
1024 }
1025
1026 return &MF;
1027}
1028
1029bool MachineOutliner::outline(
1030 Module &M, std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
1031 InstructionMapper &Mapper, unsigned &OutlinedFunctionNum) {
1032 LLVM_DEBUG(dbgs() << "*** Outlining ***\n");
1033 LLVM_DEBUG(dbgs() << "NUMBER OF POTENTIAL FUNCTIONS: " << FunctionList.size()
1034 << "\n");
1035 bool OutlinedSomething = false;
1036
1037 // Sort by priority where priority := getNotOutlinedCost / getOutliningCost.
1038 // The function with highest priority should be outlined first.
1039 stable_sort(Range&: FunctionList, C: [](const std::unique_ptr<OutlinedFunction> &LHS,
1040 const std::unique_ptr<OutlinedFunction> &RHS) {
1041 return LHS->getNotOutlinedCost() * RHS->getOutliningCost() >
1042 RHS->getNotOutlinedCost() * LHS->getOutliningCost();
1043 });
1044
1045 // Walk over each function, outlining them as we go along. Functions are
1046 // outlined greedily, based off the sort above.
1047 auto *UnsignedVecBegin = Mapper.UnsignedVec.begin();
1048 LLVM_DEBUG(dbgs() << "WALKING FUNCTION LIST\n");
1049 for (auto &OF : FunctionList) {
1050#ifndef NDEBUG
1051 auto NumCandidatesBefore = OF->Candidates.size();
1052#endif
1053 // If we outlined something that overlapped with a candidate in a previous
1054 // step, then we can't outline from it.
1055 erase_if(C&: OF->Candidates, P: [&UnsignedVecBegin](Candidate &C) {
1056 return std::any_of(first: UnsignedVecBegin + C.getStartIdx(),
1057 last: UnsignedVecBegin + C.getEndIdx() + 1, pred: [](unsigned I) {
1058 return I == static_cast<unsigned>(-1);
1059 });
1060 });
1061
1062#ifndef NDEBUG
1063 auto NumCandidatesAfter = OF->Candidates.size();
1064 LLVM_DEBUG(dbgs() << "PRUNED: " << NumCandidatesBefore - NumCandidatesAfter
1065 << "/" << NumCandidatesBefore << " candidates\n");
1066#endif
1067
1068 // If we made it unbeneficial to outline this function, skip it.
1069 if (OF->getBenefit() < OutlinerBenefitThreshold) {
1070 LLVM_DEBUG(dbgs() << "SKIP: Expected benefit (" << OF->getBenefit()
1071 << " B) < threshold (" << OutlinerBenefitThreshold
1072 << " B)\n");
1073 continue;
1074 }
1075
1076 LLVM_DEBUG(dbgs() << "OUTLINE: Expected benefit (" << OF->getBenefit()
1077 << " B) > threshold (" << OutlinerBenefitThreshold
1078 << " B)\n");
1079
1080 // Remove all Linker Optimization Hints from the candidates.
1081 // TODO: The intersection of the LOHs from all candidates should be legal in
1082 // the outlined function.
1083 SmallPtrSet<MachineInstr *, 2> MIs;
1084 for (Candidate &C : OF->Candidates) {
1085 for (MachineInstr &MI : C)
1086 MIs.insert(Ptr: &MI);
1087 NumRemovedLOHs += TM->clearLinkerOptimizationHints(MIs);
1088 MIs.clear();
1089 }
1090
1091 // It's beneficial. Create the function and outline its sequence's
1092 // occurrences.
1093 OF->MF = createOutlinedFunction(M, OF&: *OF, Mapper, Name: OutlinedFunctionNum);
1094 emitOutlinedFunctionRemark(OF&: *OF);
1095 FunctionsCreated++;
1096 OutlinedFunctionNum++; // Created a function, move to the next name.
1097 MachineFunction *MF = OF->MF;
1098 const TargetSubtargetInfo &STI = MF->getSubtarget();
1099 const TargetInstrInfo &TII = *STI.getInstrInfo();
1100
1101 // Replace occurrences of the sequence with calls to the new function.
1102 LLVM_DEBUG(dbgs() << "CREATE OUTLINED CALLS\n");
1103 for (Candidate &C : OF->Candidates) {
1104 MachineBasicBlock &MBB = *C.getMBB();
1105 MachineBasicBlock::iterator StartIt = C.begin();
1106 MachineBasicBlock::iterator EndIt = std::prev(x: C.end());
1107
1108 // Insert the call.
1109 auto CallInst = TII.insertOutlinedCall(M, MBB, It&: StartIt, MF&: *MF, C);
1110// Insert the call.
1111#ifndef NDEBUG
1112 auto MBBBeingOutlinedFromName =
1113 MBB.getName().empty() ? "<unknown>" : MBB.getName().str();
1114 auto MFBeingOutlinedFromName = MBB.getParent()->getName().empty()
1115 ? "<unknown>"
1116 : MBB.getParent()->getName().str();
1117 LLVM_DEBUG(dbgs() << " CALL: " << MF->getName() << " in "
1118 << MFBeingOutlinedFromName << ":"
1119 << MBBBeingOutlinedFromName << "\n");
1120 LLVM_DEBUG(dbgs() << " .. " << *CallInst);
1121#endif
1122
1123 // If the caller tracks liveness, then we need to make sure that
1124 // anything we outline doesn't break liveness assumptions. The outlined
1125 // functions themselves currently don't track liveness, but we should
1126 // make sure that the ranges we yank things out of aren't wrong.
1127 if (MBB.getParent()->getProperties().hasTracksLiveness()) {
1128 // The following code is to add implicit def operands to the call
1129 // instruction. It also updates call site information for moved
1130 // code.
1131 SmallSet<Register, 2> UseRegs, DefRegs;
1132 // Copy over the defs in the outlined range.
1133 // First inst in outlined range <-- Anything that's defined in this
1134 // ... .. range has to be added as an
1135 // implicit Last inst in outlined range <-- def to the call
1136 // instruction. Also remove call site information for outlined block
1137 // of code. The exposed uses need to be copied in the outlined range.
1138 for (MachineBasicBlock::reverse_iterator
1139 Iter = EndIt.getReverse(),
1140 Last = std::next(x: CallInst.getReverse());
1141 Iter != Last; Iter++) {
1142 MachineInstr *MI = &*Iter;
1143 if (MI->isDebugInstr())
1144 continue;
1145 SmallSet<Register, 2> InstrUseRegs;
1146 for (MachineOperand &MOP : MI->operands()) {
1147 // Skip over anything that isn't a register.
1148 if (!MOP.isReg())
1149 continue;
1150
1151 if (MOP.isDef()) {
1152 // Introduce DefRegs set to skip the redundant register.
1153 DefRegs.insert(V: MOP.getReg());
1154 if (UseRegs.count(V: MOP.getReg()) &&
1155 !InstrUseRegs.count(V: MOP.getReg()))
1156 // Since the regiester is modeled as defined,
1157 // it is not necessary to be put in use register set.
1158 UseRegs.erase(V: MOP.getReg());
1159 } else if (!MOP.isUndef()) {
1160 // Any register which is not undefined should
1161 // be put in the use register set.
1162 UseRegs.insert(V: MOP.getReg());
1163 InstrUseRegs.insert(V: MOP.getReg());
1164 }
1165 }
1166 if (MI->isCandidateForAdditionalCallInfo())
1167 MI->getMF()->eraseAdditionalCallInfo(MI);
1168 }
1169
1170 for (const Register &I : DefRegs)
1171 // If it's a def, add it to the call instruction.
1172 CallInst->addOperand(
1173 Op: MachineOperand::CreateReg(Reg: I, isDef: true, /* isDef = true */
1174 isImp: true /* isImp = true */));
1175
1176 for (const Register &I : UseRegs)
1177 // If it's a exposed use, add it to the call instruction.
1178 CallInst->addOperand(
1179 Op: MachineOperand::CreateReg(Reg: I, isDef: false, /* isDef = false */
1180 isImp: true /* isImp = true */));
1181 }
1182
1183 // Erase from the point after where the call was inserted up to, and
1184 // including, the final instruction in the sequence.
1185 // Erase needs one past the end, so we need std::next there too.
1186 MBB.erase(I: std::next(x: StartIt), E: std::next(x: EndIt));
1187
1188 // Keep track of what we removed by marking them all as -1.
1189 for (unsigned &I : make_range(x: UnsignedVecBegin + C.getStartIdx(),
1190 y: UnsignedVecBegin + C.getEndIdx() + 1))
1191 I = static_cast<unsigned>(-1);
1192 OutlinedSomething = true;
1193
1194 // Statistics.
1195 NumOutlined++;
1196 }
1197 }
1198
1199 LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n");
1200 return OutlinedSomething;
1201}
1202
1203static bool allowPGOOutlining(RunOutliner RunOutlinerMode,
1204 const ProfileSummaryInfo *PSI,
1205 const BlockFrequencyInfo *BFI,
1206 MachineBasicBlock &MBB) {
1207 if (RunOutlinerMode != RunOutliner::OptimisticPGO &&
1208 RunOutlinerMode != RunOutliner::ConservativePGO)
1209 return true;
1210 auto *MF = MBB.getParent();
1211 if (MF->getFunction().hasFnAttribute(Kind: Attribute::Cold)) {
1212 ++NumPGOAllowedCold;
1213 return true;
1214 }
1215
1216 auto *BB = MBB.getBasicBlock();
1217 if (BB && PSI && BFI)
1218 if (auto Count = BFI->getBlockProfileCount(BB))
1219 return *Count <= PSI->getOrCompColdCountThreshold();
1220
1221 if (RunOutlinerMode == RunOutliner::OptimisticPGO) {
1222 auto *TII = MF->getSubtarget().getInstrInfo();
1223 if (TII->shouldOutlineFromFunctionByDefault(MF&: *MF)) {
1224 // Profile data is unavailable, but we optimistically allow outlining
1225 ++NumPGOOptimisticOutlined;
1226 return true;
1227 }
1228 return false;
1229 }
1230 assert(RunOutlinerMode == RunOutliner::ConservativePGO);
1231 // Profile data is unavailable, so we conservatively block outlining
1232 ++NumPGOConservativeBlockedOutlined;
1233 return false;
1234}
1235
1236void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
1237 // Build instruction mappings for each function in the module. Start by
1238 // iterating over each Function in M.
1239 LLVM_DEBUG(dbgs() << "*** Populating mapper ***\n");
1240 bool EnableProfileGuidedOutlining =
1241 RunOutlinerMode == RunOutliner::OptimisticPGO ||
1242 RunOutlinerMode == RunOutliner::ConservativePGO;
1243 ProfileSummaryInfo *PSI = nullptr;
1244 if (EnableProfileGuidedOutlining)
1245 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1246 for (Function &F : M) {
1247 LLVM_DEBUG(dbgs() << "MAPPING FUNCTION: " << F.getName() << "\n");
1248
1249 if (F.hasFnAttribute(Kind: Attribute::NoOutline)) {
1250 LLVM_DEBUG(dbgs() << "SKIP: Function has nooutline attribute\n");
1251 continue;
1252 }
1253
1254 // There's something in F. Check if it has a MachineFunction associated with
1255 // it.
1256 MachineFunction *MF = MMI->getMachineFunction(F);
1257
1258 // If it doesn't, then there's nothing to outline from. Move to the next
1259 // Function.
1260 if (!MF) {
1261 LLVM_DEBUG(dbgs() << "SKIP: Function does not have a MachineFunction\n");
1262 continue;
1263 }
1264
1265 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1266 BlockFrequencyInfo *BFI = nullptr;
1267 if (EnableProfileGuidedOutlining && F.hasProfileData())
1268 BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1269 if (RunOutlinerMode == RunOutliner::TargetDefault &&
1270 !TII->shouldOutlineFromFunctionByDefault(MF&: *MF)) {
1271 LLVM_DEBUG(dbgs() << "SKIP: Target does not want to outline from "
1272 "function by default\n");
1273 continue;
1274 }
1275
1276 // We have a MachineFunction. Ask the target if it's suitable for outlining.
1277 // If it isn't, then move on to the next Function in the module.
1278 if (!TII->isFunctionSafeToOutlineFrom(MF&: *MF, OutlineFromLinkOnceODRs)) {
1279 LLVM_DEBUG(dbgs() << "SKIP: " << MF->getName()
1280 << ": unsafe to outline from\n");
1281 continue;
1282 }
1283
1284 // We have a function suitable for outlining. Iterate over every
1285 // MachineBasicBlock in MF and try to map its instructions to a list of
1286 // unsigned integers.
1287 const unsigned MinMBBSize = 2;
1288
1289 for (MachineBasicBlock &MBB : *MF) {
1290 LLVM_DEBUG(dbgs() << " MAPPING MBB: '" << MBB.getName() << "'\n");
1291 // If there isn't anything in MBB, then there's no point in outlining from
1292 // it.
1293 // If there are fewer than 2 instructions in the MBB, then it can't ever
1294 // contain something worth outlining.
1295 // FIXME: This should be based off of the maximum size in B of an outlined
1296 // call versus the size in B of the MBB.
1297 if (MBB.size() < MinMBBSize) {
1298 LLVM_DEBUG(dbgs() << " SKIP: MBB size less than minimum size of "
1299 << MinMBBSize << "\n");
1300 continue;
1301 }
1302
1303 // Check if MBB could be the target of an indirect branch. If it is, then
1304 // we don't want to outline from it.
1305 if (MBB.hasAddressTaken()) {
1306 LLVM_DEBUG(dbgs() << " SKIP: MBB's address is taken\n");
1307 continue;
1308 }
1309
1310 if (!allowPGOOutlining(RunOutlinerMode, PSI, BFI, MBB)) {
1311 ++NumPGOBlockedOutlined;
1312 continue;
1313 }
1314
1315 // MBB is suitable for outlining. Map it to a list of unsigneds.
1316 Mapper.convertToUnsignedVec(MBB, TII: *TII);
1317 }
1318 }
1319 // Statistics.
1320 UnsignedVecSize = Mapper.UnsignedVec.size();
1321}
1322
1323void MachineOutliner::initSizeRemarkInfo(
1324 const Module &M, StringMap<unsigned> &FunctionToInstrCount) {
1325 // Collect instruction counts for every function. We'll use this to emit
1326 // per-function size remarks later.
1327 for (const Function &F : M) {
1328 MachineFunction *MF = MMI->getMachineFunction(F);
1329
1330 // We only care about MI counts here. If there's no MachineFunction at this
1331 // point, then there won't be after the outliner runs, so let's move on.
1332 if (!MF)
1333 continue;
1334 FunctionToInstrCount[F.getName().str()] = MF->getInstructionCount();
1335 }
1336}
1337
1338void MachineOutliner::emitInstrCountChangedRemark(
1339 const Module &M, const StringMap<unsigned> &FunctionToInstrCount) {
1340 // Iterate over each function in the module and emit remarks.
1341 // Note that we won't miss anything by doing this, because the outliner never
1342 // deletes functions.
1343 for (const Function &F : M) {
1344 MachineFunction *MF = MMI->getMachineFunction(F);
1345
1346 // The outliner never deletes functions. If we don't have a MF here, then we
1347 // didn't have one prior to outlining either.
1348 if (!MF)
1349 continue;
1350
1351 std::string Fname = std::string(F.getName());
1352 unsigned FnCountAfter = MF->getInstructionCount();
1353 unsigned FnCountBefore = 0;
1354
1355 // Check if the function was recorded before.
1356 auto It = FunctionToInstrCount.find(Key: Fname);
1357
1358 // Did we have a previously-recorded size? If yes, then set FnCountBefore
1359 // to that.
1360 if (It != FunctionToInstrCount.end())
1361 FnCountBefore = It->second;
1362
1363 // Compute the delta and emit a remark if there was a change.
1364 int64_t FnDelta = static_cast<int64_t>(FnCountAfter) -
1365 static_cast<int64_t>(FnCountBefore);
1366 if (FnDelta == 0)
1367 continue;
1368
1369 MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
1370 MORE.emit(RemarkBuilder: [&]() {
1371 MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
1372 DiagnosticLocation(), &MF->front());
1373 R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
1374 << ": Function: "
1375 << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
1376 << ": MI instruction count changed from "
1377 << DiagnosticInfoOptimizationBase::Argument("MIInstrsBefore",
1378 FnCountBefore)
1379 << " to "
1380 << DiagnosticInfoOptimizationBase::Argument("MIInstrsAfter",
1381 FnCountAfter)
1382 << "; Delta: "
1383 << DiagnosticInfoOptimizationBase::Argument("Delta", FnDelta);
1384 return R;
1385 });
1386 }
1387}
1388
1389void MachineOutliner::initializeOutlinerMode(const Module &M) {
1390 if (DisableGlobalOutlining)
1391 return;
1392
1393 if (auto *IndexWrapperPass =
1394 getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) {
1395 auto *TheIndex = IndexWrapperPass->getIndex();
1396 // (Full)LTO module does not have functions added to the index.
1397 // In this case, we run the outliner without using codegen data as usual.
1398 if (TheIndex && !TheIndex->hasExportedFunctions(M))
1399 return;
1400 }
1401
1402 // When codegen data write is enabled, we want to write the local outlined
1403 // hash tree to the custom section, `__llvm_outline`.
1404 // When the outlined hash tree is available from the previous codegen data,
1405 // we want to read it to optimistically create global outlining candidates.
1406 if (cgdata::emitCGData()) {
1407 OutlinerMode = CGDataMode::Write;
1408 // Create a local outlined hash tree to be published.
1409 LocalHashTree = std::make_unique<OutlinedHashTree>();
1410 // We don't need to read the outlined hash tree from the previous codegen
1411 } else if (cgdata::hasOutlinedHashTree())
1412 OutlinerMode = CGDataMode::Read;
1413}
1414
1415void MachineOutliner::emitOutlinedHashTree(Module &M) {
1416 assert(LocalHashTree);
1417 if (!LocalHashTree->empty()) {
1418 LLVM_DEBUG({
1419 dbgs() << "Emit outlined hash tree. Size: " << LocalHashTree->size()
1420 << "\n";
1421 });
1422 SmallVector<char> Buf;
1423 raw_svector_ostream OS(Buf);
1424
1425 OutlinedHashTreeRecord HTR(std::move(LocalHashTree));
1426 HTR.serialize(OS);
1427
1428 llvm::StringRef Data(Buf.data(), Buf.size());
1429 std::unique_ptr<MemoryBuffer> Buffer =
1430 MemoryBuffer::getMemBuffer(InputData: Data, BufferName: "in-memory outlined hash tree", RequiresNullTerminator: false);
1431
1432 Triple TT(M.getTargetTriple());
1433 embedBufferInModule(
1434 M, Buf: *Buffer,
1435 SectionName: getCodeGenDataSectionName(CGSK: CG_outline, OF: TT.getObjectFormat()));
1436 }
1437}
1438
1439bool MachineOutliner::runOnModule(Module &M) {
1440 if (skipModule(M))
1441 return false;
1442
1443 // Check if there's anything in the module. If it's empty, then there's
1444 // nothing to outline.
1445 if (M.empty())
1446 return false;
1447
1448 // Initialize the outliner mode.
1449 initializeOutlinerMode(M);
1450
1451 MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
1452 TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
1453
1454 // Number to append to the current outlined function.
1455 unsigned OutlinedFunctionNum = 0;
1456
1457 OutlineRepeatedNum = 0;
1458 if (!doOutline(M, OutlinedFunctionNum))
1459 return false;
1460
1461 for (unsigned I = 0; I < OutlinerReruns; ++I) {
1462 OutlinedFunctionNum = 0;
1463 OutlineRepeatedNum++;
1464 if (!doOutline(M, OutlinedFunctionNum)) {
1465 LLVM_DEBUG({
1466 dbgs() << "Did not outline on iteration " << I + 2 << " out of "
1467 << OutlinerReruns + 1 << "\n";
1468 });
1469 break;
1470 }
1471 }
1472
1473 if (OutlinerMode == CGDataMode::Write)
1474 emitOutlinedHashTree(M);
1475
1476 return true;
1477}
1478
1479bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
1480 // If the user passed -enable-machine-outliner=always or
1481 // -enable-machine-outliner, the pass will run on all functions in the module.
1482 // Otherwise, if the target supports default outlining, it will run on all
1483 // functions deemed by the target to be worth outlining from by default. Tell
1484 // the user how the outliner is running.
1485 LLVM_DEBUG({
1486 dbgs() << "Machine Outliner: Running on ";
1487 switch (RunOutlinerMode) {
1488 case RunOutliner::AlwaysOutline:
1489 dbgs() << "all functions";
1490 break;
1491 case RunOutliner::OptimisticPGO:
1492 dbgs() << "optimistically cold functions";
1493 break;
1494 case RunOutliner::ConservativePGO:
1495 dbgs() << "conservatively cold functions";
1496 break;
1497 case RunOutliner::TargetDefault:
1498 dbgs() << "target-default functions";
1499 break;
1500 case RunOutliner::NeverOutline:
1501 llvm_unreachable("should not outline");
1502 }
1503 dbgs() << "\n";
1504 });
1505
1506 // If the user specifies that they want to outline from linkonceodrs, set
1507 // it here.
1508 OutlineFromLinkOnceODRs = EnableLinkOnceODROutlining;
1509 InstructionMapper Mapper(*MMI);
1510
1511 // Prepare instruction mappings for the suffix tree.
1512 populateMapper(Mapper, M);
1513 std::vector<std::unique_ptr<OutlinedFunction>> FunctionList;
1514
1515 // Find all of the outlining candidates.
1516 if (OutlinerMode == CGDataMode::Read)
1517 findGlobalCandidates(Mapper, FunctionList);
1518 else
1519 findCandidates(Mapper, FunctionList);
1520
1521 // If we've requested size remarks, then collect the MI counts of every
1522 // function before outlining, and the MI counts after outlining.
1523 // FIXME: This shouldn't be in the outliner at all; it should ultimately be
1524 // the pass manager's responsibility.
1525 // This could pretty easily be placed in outline instead, but because we
1526 // really ultimately *don't* want this here, it's done like this for now
1527 // instead.
1528
1529 // Check if we want size remarks.
1530 bool ShouldEmitSizeRemarks = M.shouldEmitInstrCountChangedRemark();
1531 StringMap<unsigned> FunctionToInstrCount;
1532 if (ShouldEmitSizeRemarks)
1533 initSizeRemarkInfo(M, FunctionToInstrCount);
1534
1535 // Outline each of the candidates and return true if something was outlined.
1536 bool OutlinedSomething =
1537 outline(M, FunctionList, Mapper, OutlinedFunctionNum);
1538
1539 // If we outlined something, we definitely changed the MI count of the
1540 // module. If we've asked for size remarks, then output them.
1541 // FIXME: This should be in the pass manager.
1542 if (ShouldEmitSizeRemarks && OutlinedSomething)
1543 emitInstrCountChangedRemark(M, FunctionToInstrCount);
1544
1545 LLVM_DEBUG({
1546 if (!OutlinedSomething)
1547 dbgs() << "Stopped outlining at iteration " << OutlineRepeatedNum
1548 << " because no changes were found.\n";
1549 });
1550
1551 return OutlinedSomething;
1552}
1553