1//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10// Implementation for the IROutliner which is used by the IROutliner Pass.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/IPO/IROutliner.h"
15#include "llvm/Analysis/IRSimilarityIdentifier.h"
16#include "llvm/Analysis/OptimizationRemarkEmitter.h"
17#include "llvm/Analysis/TargetTransformInfo.h"
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/DIBuilder.h"
20#include "llvm/IR/DebugInfo.h"
21#include "llvm/IR/DebugInfoMetadata.h"
22#include "llvm/IR/Dominators.h"
23#include "llvm/IR/Mangler.h"
24#include "llvm/IR/PassManager.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Transforms/IPO.h"
27#include "llvm/Transforms/Utils/ValueMapper.h"
28#include <optional>
29#include <vector>
30
31#define DEBUG_TYPE "iroutliner"
32
33using namespace llvm;
34using namespace IRSimilarity;
35
36// A command flag to be used for debugging to exclude branches from similarity
37// matching and outlining.
38namespace llvm {
39extern cl::opt<bool> DisableBranches;
40
41// A command flag to be used for debugging to indirect calls from similarity
42// matching and outlining.
43extern cl::opt<bool> DisableIndirectCalls;
44
45// A command flag to be used for debugging to exclude intrinsics from similarity
46// matching and outlining.
47extern cl::opt<bool> DisableIntrinsics;
48
49} // namespace llvm
50
51// Set to true if the user wants the ir outliner to run on linkonceodr linkage
52// functions. This is false by default because the linker can dedupe linkonceodr
53// functions. Since the outliner is confined to a single module (modulo LTO),
54// this is off by default. It should, however, be the default behavior in
55// LTO.
56static cl::opt<bool> EnableLinkOnceODRIROutlining(
57 "enable-linkonceodr-ir-outlining", cl::Hidden,
58 cl::desc("Enable the IR outliner on linkonceodr functions"),
59 cl::init(Val: false));
60
61// This is a debug option to test small pieces of code to ensure that outlining
62// works correctly.
63static cl::opt<bool> NoCostModel(
64 "ir-outlining-no-cost", cl::init(Val: false), cl::ReallyHidden,
65 cl::desc("Debug option to outline greedily, without restriction that "
66 "calculated benefit outweighs cost"));
67
68/// The OutlinableGroup holds all the overarching information for outlining
69/// a set of regions that are structurally similar to one another, such as the
70/// types of the overall function, the output blocks, the sets of stores needed
71/// and a list of the different regions. This information is used in the
72/// deduplication of extracted regions with the same structure.
73struct OutlinableGroup {
74 /// The sections that could be outlined
75 std::vector<OutlinableRegion *> Regions;
76
77 /// The argument types for the function created as the overall function to
78 /// replace the extracted function for each region.
79 std::vector<Type *> ArgumentTypes;
80 /// The FunctionType for the overall function.
81 FunctionType *OutlinedFunctionType = nullptr;
82 /// The Function for the collective overall function.
83 Function *OutlinedFunction = nullptr;
84
85 /// Flag for whether we should not consider this group of OutlinableRegions
86 /// for extraction.
87 bool IgnoreGroup = false;
88
89 /// The return blocks for the overall function.
90 DenseMap<Value *, BasicBlock *> EndBBs;
91
92 /// The PHIBlocks with their corresponding return block based on the return
93 /// value as the key.
94 DenseMap<Value *, BasicBlock *> PHIBlocks;
95
96 /// A set containing the different GVN store sets needed. Each array contains
97 /// a sorted list of the different values that need to be stored into output
98 /// registers.
99 DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
100
101 /// Flag for whether the \ref ArgumentTypes have been defined after the
102 /// extraction of the first region.
103 bool InputTypesSet = false;
104
105 /// The number of input values in \ref ArgumentTypes. Anything after this
106 /// index in ArgumentTypes is an output argument.
107 unsigned NumAggregateInputs = 0;
108
109 /// The mapping of the canonical numbering of the values in outlined sections
110 /// to specific arguments.
111 DenseMap<unsigned, unsigned> CanonicalNumberToAggArg;
112
113 /// The number of branches in the region target a basic block that is outside
114 /// of the region.
115 unsigned BranchesToOutside = 0;
116
117 /// Tracker counting backwards from the highest unsigned value possible to
118 /// avoid conflicting with the GVNs of assigned values. We start at -3 since
119 /// -2 and -1 are assigned by the DenseMap.
120 unsigned PHINodeGVNTracker = -3;
121
122 DenseMap<unsigned,
123 std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
124 PHINodeGVNToGVNs;
125 DenseMap<hash_code, unsigned> GVNsToPHINodeGVN;
126
127 /// The number of instructions that will be outlined by extracting \ref
128 /// Regions.
129 InstructionCost Benefit = 0;
130 /// The number of added instructions needed for the outlining of the \ref
131 /// Regions.
132 InstructionCost Cost = 0;
133
134 /// The argument that needs to be marked with the swifterr attribute. If not
135 /// needed, there is no value.
136 std::optional<unsigned> SwiftErrorArgument;
137
138 /// For the \ref Regions, we look at every Value. If it is a constant,
139 /// we check whether it is the same in Region.
140 ///
141 /// \param [in,out] NotSame contains the global value numbers where the
142 /// constant is not always the same, and must be passed in as an argument.
143 void findSameConstants(DenseSet<unsigned> &NotSame);
144
145 /// For the regions, look at each set of GVN stores needed and account for
146 /// each combination. Add an argument to the argument types if there is
147 /// more than one combination.
148 ///
149 /// \param [in] M - The module we are outlining from.
150 void collectGVNStoreSets(Module &M);
151};
152
153/// Move the contents of \p SourceBB to before the last instruction of \p
154/// TargetBB.
155/// \param SourceBB - the BasicBlock to pull Instructions from.
156/// \param TargetBB - the BasicBlock to put Instruction into.
157static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
158 TargetBB.splice(ToIt: TargetBB.end(), FromBB: &SourceBB);
159}
160
161/// A function to sort the keys of \p Map, which must be a mapping of constant
162/// values to basic blocks and return it in \p SortedKeys
163///
164/// \param SortedKeys - The vector the keys will be return in and sorted.
165/// \param Map - The DenseMap containing keys to sort.
166static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
167 DenseMap<Value *, BasicBlock *> &Map) {
168 for (auto &VtoBB : Map)
169 SortedKeys.push_back(x: VtoBB.first);
170
171 // Here we expect to have either 1 value that is void (nullptr) or multiple
172 // values that are all constant integers.
173 if (SortedKeys.size() == 1) {
174 assert(!SortedKeys[0] && "Expected a single void value.");
175 return;
176 }
177
178 stable_sort(Range&: SortedKeys, C: [](const Value *LHS, const Value *RHS) {
179 assert(LHS && RHS && "Expected non void values.");
180 const ConstantInt *LHSC = cast<ConstantInt>(Val: LHS);
181 const ConstantInt *RHSC = cast<ConstantInt>(Val: RHS);
182
183 return LHSC->getLimitedValue() < RHSC->getLimitedValue();
184 });
185}
186
187Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
188 Value *V) {
189 std::optional<unsigned> GVN = Candidate->getGVN(V);
190 assert(GVN && "No GVN for incoming value");
191 std::optional<unsigned> CanonNum = Candidate->getCanonicalNum(N: *GVN);
192 std::optional<unsigned> FirstGVN =
193 Other.Candidate->fromCanonicalNum(N: *CanonNum);
194 std::optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(Num: *FirstGVN);
195 return FoundValueOpt.value_or(u: nullptr);
196}
197
198BasicBlock *
199OutlinableRegion::findCorrespondingBlockIn(const OutlinableRegion &Other,
200 BasicBlock *BB) {
201 Instruction *FirstNonPHI = &*BB->getFirstNonPHIOrDbg();
202 assert(FirstNonPHI && "block is empty?");
203 Value *CorrespondingVal = findCorrespondingValueIn(Other, V: FirstNonPHI);
204 if (!CorrespondingVal)
205 return nullptr;
206 BasicBlock *CorrespondingBlock =
207 cast<Instruction>(Val: CorrespondingVal)->getParent();
208 return CorrespondingBlock;
209}
210
211/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
212/// in \p Included to branch to BasicBlock \p Replace if they currently branch
213/// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks
214/// when PHINodes are included in outlined regions.
215///
216/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
217/// checked.
218/// \param Find - The successor block to be replaced.
219/// \param Replace - The new succesor block to branch to.
220/// \param Included - The set of blocks about to be outlined.
221static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find,
222 BasicBlock *Replace,
223 DenseSet<BasicBlock *> &Included) {
224 for (PHINode &PN : PHIBlock->phis()) {
225 for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
226 ++Idx) {
227 // Check if the incoming block is included in the set of blocks being
228 // outlined.
229 BasicBlock *Incoming = PN.getIncomingBlock(i: Idx);
230 if (!Included.contains(V: Incoming))
231 continue;
232
233 BranchInst *BI = dyn_cast<BranchInst>(Val: Incoming->getTerminator());
234 assert(BI && "Not a branch instruction?");
235 // Look over the branching instructions into this block to see if we
236 // used to branch to Find in this outlined block.
237 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
238 Succ++) {
239 // If we have found the block to replace, we do so here.
240 if (BI->getSuccessor(i: Succ) != Find)
241 continue;
242 BI->setSuccessor(idx: Succ, NewSucc: Replace);
243 }
244 }
245 }
246}
247
248
249void OutlinableRegion::splitCandidate() {
250 assert(!CandidateSplit && "Candidate already split!");
251
252 Instruction *BackInst = Candidate->backInstruction();
253
254 Instruction *EndInst = nullptr;
255 // Check whether the last instruction is a terminator, if it is, we do
256 // not split on the following instruction. We leave the block as it is. We
257 // also check that this is not the last instruction in the Module, otherwise
258 // the check for whether the current following instruction matches the
259 // previously recorded instruction will be incorrect.
260 if (!BackInst->isTerminator() ||
261 BackInst->getParent() != &BackInst->getFunction()->back()) {
262 EndInst = Candidate->end()->Inst;
263 assert(EndInst && "Expected an end instruction?");
264 }
265
266 // We check if the current instruction following the last instruction in the
267 // region is the same as the recorded instruction following the last
268 // instruction. If they do not match, there could be problems in rewriting
269 // the program after outlining, so we ignore it.
270 if (!BackInst->isTerminator() && EndInst != BackInst->getNextNode())
271 return;
272
273 Instruction *StartInst = (*Candidate->begin()).Inst;
274 assert(StartInst && "Expected a start instruction?");
275 StartBB = StartInst->getParent();
276 PrevBB = StartBB;
277
278 DenseSet<BasicBlock *> BBSet;
279 Candidate->getBasicBlocks(BBSet);
280
281 // We iterate over the instructions in the region, if we find a PHINode, we
282 // check if there are predecessors outside of the region, if there are,
283 // we ignore this region since we are unable to handle the severing of the
284 // phi node right now.
285
286 // TODO: Handle extraneous inputs for PHINodes through variable number of
287 // inputs, similar to how outputs are handled.
288 BasicBlock::iterator It = StartInst->getIterator();
289 EndBB = BackInst->getParent();
290 BasicBlock *IBlock;
291 BasicBlock *PHIPredBlock = nullptr;
292 bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
293 while (PHINode *PN = dyn_cast<PHINode>(Val: &*It)) {
294 unsigned NumPredsOutsideRegion = 0;
295 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
296 if (!BBSet.contains(V: PN->getIncomingBlock(i))) {
297 PHIPredBlock = PN->getIncomingBlock(i);
298 ++NumPredsOutsideRegion;
299 continue;
300 }
301
302 // We must consider the case there the incoming block to the PHINode is
303 // the same as the final block of the OutlinableRegion. If this is the
304 // case, the branch from this block must also be outlined to be valid.
305 IBlock = PN->getIncomingBlock(i);
306 if (IBlock == EndBB && EndBBTermAndBackInstDifferent) {
307 PHIPredBlock = PN->getIncomingBlock(i);
308 ++NumPredsOutsideRegion;
309 }
310 }
311
312 if (NumPredsOutsideRegion > 1)
313 return;
314
315 It++;
316 }
317
318 // If the region starts with a PHINode, but is not the initial instruction of
319 // the BasicBlock, we ignore this region for now.
320 if (isa<PHINode>(Val: StartInst) && StartInst != &*StartBB->begin())
321 return;
322
323 // If the region ends with a PHINode, but does not contain all of the phi node
324 // instructions of the region, we ignore it for now.
325 if (isa<PHINode>(Val: BackInst) &&
326 BackInst != &*std::prev(x: EndBB->getFirstInsertionPt()))
327 return;
328
329 // The basic block gets split like so:
330 // block: block:
331 // inst1 inst1
332 // inst2 inst2
333 // region1 br block_to_outline
334 // region2 block_to_outline:
335 // region3 -> region1
336 // region4 region2
337 // inst3 region3
338 // inst4 region4
339 // br block_after_outline
340 // block_after_outline:
341 // inst3
342 // inst4
343
344 std::string OriginalName = PrevBB->getName().str();
345
346 StartBB = PrevBB->splitBasicBlock(I: StartInst, BBName: OriginalName + "_to_outline");
347 PrevBB->replaceSuccessorsPhiUsesWith(Old: PrevBB, New: StartBB);
348 // If there was a PHINode with an incoming block outside the region,
349 // make sure is correctly updated in the newly split block.
350 if (PHIPredBlock)
351 PrevBB->replaceSuccessorsPhiUsesWith(Old: PHIPredBlock, New: PrevBB);
352
353 CandidateSplit = true;
354 if (!BackInst->isTerminator()) {
355 EndBB = EndInst->getParent();
356 FollowBB = EndBB->splitBasicBlock(I: EndInst, BBName: OriginalName + "_after_outline");
357 EndBB->replaceSuccessorsPhiUsesWith(Old: EndBB, New: FollowBB);
358 FollowBB->replaceSuccessorsPhiUsesWith(Old: PrevBB, New: FollowBB);
359 } else {
360 EndBB = BackInst->getParent();
361 EndsInBranch = true;
362 FollowBB = nullptr;
363 }
364
365 // Refind the basic block set.
366 BBSet.clear();
367 Candidate->getBasicBlocks(BBSet);
368 // For the phi nodes in the new starting basic block of the region, we
369 // reassign the targets of the basic blocks branching instructions.
370 replaceTargetsFromPHINode(PHIBlock: StartBB, Find: PrevBB, Replace: StartBB, Included&: BBSet);
371 if (FollowBB)
372 replaceTargetsFromPHINode(PHIBlock: FollowBB, Find: EndBB, Replace: FollowBB, Included&: BBSet);
373}
374
375void OutlinableRegion::reattachCandidate() {
376 assert(CandidateSplit && "Candidate is not split!");
377
378 // The basic block gets reattached like so:
379 // block: block:
380 // inst1 inst1
381 // inst2 inst2
382 // br block_to_outline region1
383 // block_to_outline: -> region2
384 // region1 region3
385 // region2 region4
386 // region3 inst3
387 // region4 inst4
388 // br block_after_outline
389 // block_after_outline:
390 // inst3
391 // inst4
392 assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
393
394 assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
395 // Make sure PHINode references to the block we are merging into are
396 // updated to be incoming blocks from the predecessor to the current block.
397
398 // NOTE: If this is updated such that the outlined block can have more than
399 // one incoming block to a PHINode, this logic will have to updated
400 // to handle multiple precessors instead.
401
402 // We only need to update this if the outlined section contains a PHINode, if
403 // it does not, then the incoming block was never changed in the first place.
404 // On the other hand, if PrevBB has no predecessors, it means that all
405 // incoming blocks to the first block are contained in the region, and there
406 // will be nothing to update.
407 Instruction *StartInst = (*Candidate->begin()).Inst;
408 if (isa<PHINode>(Val: StartInst) && !PrevBB->hasNPredecessors(N: 0)) {
409 assert(!PrevBB->hasNPredecessorsOrMore(2) &&
410 "PrevBB has more than one predecessor. Should be 0 or 1.");
411 BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor();
412 PrevBB->replaceSuccessorsPhiUsesWith(Old: PrevBB, New: BeforePrevBB);
413 }
414 PrevBB->getTerminator()->eraseFromParent();
415
416 // If we reattaching after outlining, we iterate over the phi nodes to
417 // the initial block, and reassign the branch instructions of the incoming
418 // blocks to the block we are remerging into.
419 if (!ExtractedFunction) {
420 DenseSet<BasicBlock *> BBSet;
421 Candidate->getBasicBlocks(BBSet);
422
423 replaceTargetsFromPHINode(PHIBlock: StartBB, Find: StartBB, Replace: PrevBB, Included&: BBSet);
424 if (!EndsInBranch)
425 replaceTargetsFromPHINode(PHIBlock: FollowBB, Find: FollowBB, Replace: EndBB, Included&: BBSet);
426 }
427
428 moveBBContents(SourceBB&: *StartBB, TargetBB&: *PrevBB);
429
430 BasicBlock *PlacementBB = PrevBB;
431 if (StartBB != EndBB)
432 PlacementBB = EndBB;
433 if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
434 assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
435 assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
436 PlacementBB->getTerminator()->eraseFromParent();
437 moveBBContents(SourceBB&: *FollowBB, TargetBB&: *PlacementBB);
438 PlacementBB->replaceSuccessorsPhiUsesWith(Old: FollowBB, New: PlacementBB);
439 FollowBB->eraseFromParent();
440 }
441
442 PrevBB->replaceSuccessorsPhiUsesWith(Old: StartBB, New: PrevBB);
443 StartBB->eraseFromParent();
444
445 // Make sure to save changes back to the StartBB.
446 StartBB = PrevBB;
447 EndBB = nullptr;
448 PrevBB = nullptr;
449 FollowBB = nullptr;
450
451 CandidateSplit = false;
452}
453
454/// Find whether \p V matches the Constants previously found for the \p GVN.
455///
456/// \param V - The value to check for consistency.
457/// \param GVN - The global value number assigned to \p V.
458/// \param GVNToConstant - The mapping of global value number to Constants.
459/// \returns true if the Value matches the Constant mapped to by V and false if
460/// it \p V is a Constant but does not match.
461/// \returns std::nullopt if \p V is not a Constant.
462static std::optional<bool>
463constantMatches(Value *V, unsigned GVN,
464 DenseMap<unsigned, Constant *> &GVNToConstant) {
465 // See if we have a constants
466 Constant *CST = dyn_cast<Constant>(Val: V);
467 if (!CST)
468 return std::nullopt;
469
470 // Holds a mapping from a global value number to a Constant.
471 DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
472 bool Inserted;
473
474
475 // If we have a constant, try to make a new entry in the GVNToConstant.
476 std::tie(args&: GVNToConstantIt, args&: Inserted) =
477 GVNToConstant.insert(KV: std::make_pair(x&: GVN, y&: CST));
478 // If it was found and is not equal, it is not the same. We do not
479 // handle this case yet, and exit early.
480 if (Inserted || (GVNToConstantIt->second == CST))
481 return true;
482
483 return false;
484}
485
486InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
487 InstructionCost Benefit = 0;
488
489 // Estimate the benefit of outlining a specific sections of the program. We
490 // delegate mostly this task to the TargetTransformInfo so that if the target
491 // has specific changes, we can have a more accurate estimate.
492
493 // However, getInstructionCost delegates the code size calculation for
494 // arithmetic instructions to getArithmeticInstrCost in
495 // include/Analysis/TargetTransformImpl.h, where it always estimates that the
496 // code size for a division and remainder instruction to be equal to 4, and
497 // everything else to 1. This is not an accurate representation of the
498 // division instruction for targets that have a native division instruction.
499 // To be overly conservative, we only add 1 to the number of instructions for
500 // each division instruction.
501 for (IRInstructionData &ID : *Candidate) {
502 Instruction *I = ID.Inst;
503 switch (I->getOpcode()) {
504 case Instruction::FDiv:
505 case Instruction::FRem:
506 case Instruction::SDiv:
507 case Instruction::SRem:
508 case Instruction::UDiv:
509 case Instruction::URem:
510 Benefit += 1;
511 break;
512 default:
513 Benefit += TTI.getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_CodeSize);
514 break;
515 }
516 }
517
518 return Benefit;
519}
520
521/// Check the \p OutputMappings structure for value \p Input, if it exists
522/// it has been used as an output for outlining, and has been renamed, and we
523/// return the new value, otherwise, we return the same value.
524///
525/// \param OutputMappings [in] - The mapping of values to their renamed value
526/// after being used as an output for an outlined region.
527/// \param Input [in] - The value to find the remapped value of, if it exists.
528/// \return The remapped value if it has been renamed, and the same value if has
529/// not.
530static Value *findOutputMapping(const DenseMap<Value *, Value *> OutputMappings,
531 Value *Input) {
532 DenseMap<Value *, Value *>::const_iterator OutputMapping =
533 OutputMappings.find(Val: Input);
534 if (OutputMapping != OutputMappings.end())
535 return OutputMapping->second;
536 return Input;
537}
538
539/// Find whether \p Region matches the global value numbering to Constant
540/// mapping found so far.
541///
542/// \param Region - The OutlinableRegion we are checking for constants
543/// \param GVNToConstant - The mapping of global value number to Constants.
544/// \param NotSame - The set of global value numbers that do not have the same
545/// constant in each region.
546/// \returns true if all Constants are the same in every use of a Constant in \p
547/// Region and false if not
548static bool
549collectRegionsConstants(OutlinableRegion &Region,
550 DenseMap<unsigned, Constant *> &GVNToConstant,
551 DenseSet<unsigned> &NotSame) {
552 bool ConstantsTheSame = true;
553
554 IRSimilarityCandidate &C = *Region.Candidate;
555 for (IRInstructionData &ID : C) {
556
557 // Iterate over the operands in an instruction. If the global value number,
558 // assigned by the IRSimilarityCandidate, has been seen before, we check if
559 // the number has been found to be not the same value in each instance.
560 for (Value *V : ID.OperVals) {
561 std::optional<unsigned> GVNOpt = C.getGVN(V);
562 assert(GVNOpt && "Expected a GVN for operand?");
563 unsigned GVN = *GVNOpt;
564
565 // Check if this global value has been found to not be the same already.
566 if (NotSame.contains(V: GVN)) {
567 if (isa<Constant>(Val: V))
568 ConstantsTheSame = false;
569 continue;
570 }
571
572 // If it has been the same so far, we check the value for if the
573 // associated Constant value match the previous instances of the same
574 // global value number. If the global value does not map to a Constant,
575 // it is considered to not be the same value.
576 std::optional<bool> ConstantMatches =
577 constantMatches(V, GVN, GVNToConstant);
578 if (ConstantMatches) {
579 if (*ConstantMatches)
580 continue;
581 else
582 ConstantsTheSame = false;
583 }
584
585 // While this value is a register, it might not have been previously,
586 // make sure we don't already have a constant mapped to this global value
587 // number.
588 if (GVNToConstant.contains(Val: GVN))
589 ConstantsTheSame = false;
590
591 NotSame.insert(V: GVN);
592 }
593 }
594
595 return ConstantsTheSame;
596}
597
598void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
599 DenseMap<unsigned, Constant *> GVNToConstant;
600
601 for (OutlinableRegion *Region : Regions)
602 collectRegionsConstants(Region&: *Region, GVNToConstant, NotSame);
603}
604
605void OutlinableGroup::collectGVNStoreSets(Module &M) {
606 for (OutlinableRegion *OS : Regions)
607 OutputGVNCombinations.insert(V: OS->GVNStores);
608
609 // We are adding an extracted argument to decide between which output path
610 // to use in the basic block. It is used in a switch statement and only
611 // needs to be an integer.
612 if (OutputGVNCombinations.size() > 1)
613 ArgumentTypes.push_back(x: Type::getInt32Ty(C&: M.getContext()));
614}
615
616/// Get the subprogram if it exists for one of the outlined regions.
617///
618/// \param [in] Group - The set of regions to find a subprogram for.
619/// \returns the subprogram if it exists, or nullptr.
620static DISubprogram *getSubprogramOrNull(OutlinableGroup &Group) {
621 for (OutlinableRegion *OS : Group.Regions)
622 if (Function *F = OS->Call->getFunction())
623 if (DISubprogram *SP = F->getSubprogram())
624 return SP;
625
626 return nullptr;
627}
628
629Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
630 unsigned FunctionNameSuffix) {
631 assert(!Group.OutlinedFunction && "Function is already defined!");
632
633 Type *RetTy = Type::getVoidTy(C&: M.getContext());
634 // All extracted functions _should_ have the same return type at this point
635 // since the similarity identifier ensures that all branches outside of the
636 // region occur in the same place.
637
638 // NOTE: Should we ever move to the model that uses a switch at every point
639 // needed, meaning that we could branch within the region or out, it is
640 // possible that we will need to switch to using the most general case all of
641 // the time.
642 for (OutlinableRegion *R : Group.Regions) {
643 Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
644 if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
645 (RetTy->isIntegerTy(Bitwidth: 1) && ExtractedFuncType->isIntegerTy(Bitwidth: 16)))
646 RetTy = ExtractedFuncType;
647 }
648
649 Group.OutlinedFunctionType = FunctionType::get(
650 Result: RetTy, Params: Group.ArgumentTypes, isVarArg: false);
651
652 // These functions will only be called from within the same module, so
653 // we can set an internal linkage.
654 Group.OutlinedFunction = Function::Create(
655 Ty: Group.OutlinedFunctionType, Linkage: GlobalValue::InternalLinkage,
656 N: "outlined_ir_func_" + std::to_string(val: FunctionNameSuffix), M);
657
658 // Transfer the swifterr attribute to the correct function parameter.
659 if (Group.SwiftErrorArgument)
660 Group.OutlinedFunction->addParamAttr(ArgNo: *Group.SwiftErrorArgument,
661 Kind: Attribute::SwiftError);
662
663 Group.OutlinedFunction->addFnAttr(Kind: Attribute::OptimizeForSize);
664 Group.OutlinedFunction->addFnAttr(Kind: Attribute::MinSize);
665
666 // If there's a DISubprogram associated with this outlined function, then
667 // emit debug info for the outlined function.
668 if (DISubprogram *SP = getSubprogramOrNull(Group)) {
669 Function *F = Group.OutlinedFunction;
670 // We have a DISubprogram. Get its DICompileUnit.
671 DICompileUnit *CU = SP->getUnit();
672 DIBuilder DB(M, true, CU);
673 DIFile *Unit = SP->getFile();
674 Mangler Mg;
675 // Get the mangled name of the function for the linkage name.
676 std::string Dummy;
677 llvm::raw_string_ostream MangledNameStream(Dummy);
678 Mg.getNameWithPrefix(OS&: MangledNameStream, GV: F, CannotUsePrivateLabel: false);
679
680 DISubprogram *OutlinedSP = DB.createFunction(
681 Scope: Unit /* Context */, Name: F->getName(), LinkageName: Dummy, File: Unit /* File */,
682 LineNo: 0 /* Line 0 is reserved for compiler-generated code. */,
683 Ty: DB.createSubroutineType(ParameterTypes: DB.getOrCreateTypeArray(Elements: {})), /* void type */
684 ScopeLine: 0, /* Line 0 is reserved for compiler-generated code. */
685 Flags: DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
686 /* Outlined code is optimized code by definition. */
687 SPFlags: DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
688
689 // Attach subprogram to the function.
690 F->setSubprogram(OutlinedSP);
691 // We're done with the DIBuilder.
692 DB.finalize();
693 }
694
695 return Group.OutlinedFunction;
696}
697
698/// Move each BasicBlock in \p Old to \p New.
699///
700/// \param [in] Old - The function to move the basic blocks from.
701/// \param [in] New - The function to move the basic blocks to.
702/// \param [out] NewEnds - The return blocks of the new overall function.
703static void moveFunctionData(Function &Old, Function &New,
704 DenseMap<Value *, BasicBlock *> &NewEnds) {
705 for (BasicBlock &CurrBB : llvm::make_early_inc_range(Range&: Old)) {
706 CurrBB.removeFromParent();
707 CurrBB.insertInto(Parent: &New);
708 Instruction *I = CurrBB.getTerminator();
709
710 // For each block we find a return instruction is, it is a potential exit
711 // path for the function. We keep track of each block based on the return
712 // value here.
713 if (ReturnInst *RI = dyn_cast<ReturnInst>(Val: I))
714 NewEnds.insert(KV: std::make_pair(x: RI->getReturnValue(), y: &CurrBB));
715
716 for (Instruction &Val : CurrBB) {
717 // Since debug-info originates from many different locations in the
718 // program, it will cause incorrect reporting from a debugger if we keep
719 // the same debug instructions. Drop non-intrinsic DbgVariableRecords
720 // here, collect intrinsics for removal later.
721 Val.dropDbgRecords();
722
723 // We must handle the scoping of called functions differently than
724 // other outlined instructions.
725 if (!isa<CallInst>(Val: &Val)) {
726 // Remove the debug information for outlined functions.
727 Val.setDebugLoc(DebugLoc::getDropped());
728
729 // Loop info metadata may contain line locations. Update them to have no
730 // value in the new subprogram since the outlined code could be from
731 // several locations.
732 auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
733 if (DISubprogram *SP = New.getSubprogram())
734 if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD))
735 return DILocation::get(Context&: New.getContext(), Line: Loc->getLine(),
736 Column: Loc->getColumn(), Scope: SP, InlinedAt: nullptr);
737 return MD;
738 };
739 updateLoopMetadataDebugLocations(I&: Val, Updater: updateLoopInfoLoc);
740 continue;
741 }
742
743 // Edit the scope of called functions inside of outlined functions.
744 if (DISubprogram *SP = New.getSubprogram()) {
745 DILocation *DI = DILocation::get(Context&: New.getContext(), Line: 0, Column: 0, Scope: SP);
746 Val.setDebugLoc(DI);
747 }
748 }
749 }
750}
751
752/// Find the constants that will need to be lifted into arguments
753/// as they are not the same in each instance of the region.
754///
755/// \param [in] C - The IRSimilarityCandidate containing the region we are
756/// analyzing.
757/// \param [in] NotSame - The set of global value numbers that do not have a
758/// single Constant across all OutlinableRegions similar to \p C.
759/// \param [out] Inputs - The list containing the global value numbers of the
760/// arguments needed for the region of code.
761static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
762 std::vector<unsigned> &Inputs) {
763 DenseSet<unsigned> Seen;
764 // Iterate over the instructions, and find what constants will need to be
765 // extracted into arguments.
766 for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
767 IDIt != EndIDIt; IDIt++) {
768 for (Value *V : (*IDIt).OperVals) {
769 // Since these are stored before any outlining, they will be in the
770 // global value numbering.
771 unsigned GVN = *C.getGVN(V);
772 if (isa<Constant>(Val: V))
773 if (NotSame.contains(V: GVN) && Seen.insert(V: GVN).second)
774 Inputs.push_back(x: GVN);
775 }
776 }
777}
778
779/// Find the GVN for the inputs that have been found by the CodeExtractor.
780///
781/// \param [in] C - The IRSimilarityCandidate containing the region we are
782/// analyzing.
783/// \param [in] CurrentInputs - The set of inputs found by the
784/// CodeExtractor.
785/// \param [in] OutputMappings - The mapping of values that have been replaced
786/// by a new output value.
787/// \param [out] EndInputNumbers - The global value numbers for the extracted
788/// arguments.
789static void mapInputsToGVNs(IRSimilarityCandidate &C,
790 SetVector<Value *> &CurrentInputs,
791 const DenseMap<Value *, Value *> &OutputMappings,
792 std::vector<unsigned> &EndInputNumbers) {
793 // Get the Global Value Number for each input. We check if the Value has been
794 // replaced by a different value at output, and use the original value before
795 // replacement.
796 for (Value *Input : CurrentInputs) {
797 assert(Input && "Have a nullptr as an input");
798 auto It = OutputMappings.find(Val: Input);
799 if (It != OutputMappings.end())
800 Input = It->second;
801 assert(C.getGVN(Input) && "Could not find a numbering for the given input");
802 EndInputNumbers.push_back(x: *C.getGVN(V: Input));
803 }
804}
805
806/// Find the original value for the \p ArgInput values if any one of them was
807/// replaced during a previous extraction.
808///
809/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
810/// \param [in] OutputMappings - The mapping of values that have been replaced
811/// by a new output value.
812/// \param [out] RemappedArgInputs - The remapped values according to
813/// \p OutputMappings that will be extracted.
814static void
815remapExtractedInputs(const ArrayRef<Value *> ArgInputs,
816 const DenseMap<Value *, Value *> &OutputMappings,
817 SetVector<Value *> &RemappedArgInputs) {
818 // Get the global value number for each input that will be extracted as an
819 // argument by the code extractor, remapping if needed for reloaded values.
820 for (Value *Input : ArgInputs) {
821 auto It = OutputMappings.find(Val: Input);
822 if (It != OutputMappings.end())
823 Input = It->second;
824 RemappedArgInputs.insert(X: Input);
825 }
826}
827
828/// Find the input GVNs and the output values for a region of Instructions.
829/// Using the code extractor, we collect the inputs to the extracted function.
830///
831/// The \p Region can be identified as needing to be ignored in this function.
832/// It should be checked whether it should be ignored after a call to this
833/// function.
834///
835/// \param [in,out] Region - The region of code to be analyzed.
836/// \param [out] InputGVNs - The global value numbers for the extracted
837/// arguments.
838/// \param [in] NotSame - The global value numbers in the region that do not
839/// have the same constant value in the regions structurally similar to
840/// \p Region.
841/// \param [in] OutputMappings - The mapping of values that have been replaced
842/// by a new output value after extraction.
843/// \param [out] ArgInputs - The values of the inputs to the extracted function.
844/// \param [out] Outputs - The set of values extracted by the CodeExtractor
845/// as outputs.
846static void getCodeExtractorArguments(
847 OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
848 DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
849 SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
850 IRSimilarityCandidate &C = *Region.Candidate;
851
852 // OverallInputs are the inputs to the region found by the CodeExtractor,
853 // SinkCands and HoistCands are used by the CodeExtractor to find sunken
854 // allocas of values whose lifetimes are contained completely within the
855 // outlined region. PremappedInputs are the arguments found by the
856 // CodeExtractor, removing conditions such as sunken allocas, but that
857 // may need to be remapped due to the extracted output values replacing
858 // the original values. We use DummyOutputs for this first run of finding
859 // inputs and outputs since the outputs could change during findAllocas,
860 // the correct set of extracted outputs will be in the final Outputs ValueSet.
861 SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
862 DummyOutputs;
863
864 // Use the code extractor to get the inputs and outputs, without sunken
865 // allocas or removing llvm.assumes.
866 CodeExtractor *CE = Region.CE;
867 CE->findInputsOutputs(Inputs&: OverallInputs, Outputs&: DummyOutputs, Allocas: SinkCands);
868 assert(Region.StartBB && "Region must have a start BasicBlock!");
869 Function *OrigF = Region.StartBB->getParent();
870 CodeExtractorAnalysisCache CEAC(*OrigF);
871 BasicBlock *Dummy = nullptr;
872
873 // The region may be ineligible due to VarArgs in the parent function. In this
874 // case we ignore the region.
875 if (!CE->isEligible()) {
876 Region.IgnoreRegion = true;
877 return;
878 }
879
880 // Find if any values are going to be sunk into the function when extracted
881 CE->findAllocas(CEAC, SinkCands, HoistCands, ExitBlock&: Dummy);
882 CE->findInputsOutputs(Inputs&: PremappedInputs, Outputs, Allocas: SinkCands);
883
884 // TODO: Support regions with sunken allocas: values whose lifetimes are
885 // contained completely within the outlined region. These are not guaranteed
886 // to be the same in every region, so we must elevate them all to arguments
887 // when they appear. If these values are not equal, it means there is some
888 // Input in OverallInputs that was removed for ArgInputs.
889 if (OverallInputs.size() != PremappedInputs.size()) {
890 Region.IgnoreRegion = true;
891 return;
892 }
893
894 findConstants(C, NotSame, Inputs&: InputGVNs);
895
896 mapInputsToGVNs(C, CurrentInputs&: OverallInputs, OutputMappings, EndInputNumbers&: InputGVNs);
897
898 remapExtractedInputs(ArgInputs: PremappedInputs.getArrayRef(), OutputMappings,
899 RemappedArgInputs&: ArgInputs);
900
901 // Sort the GVNs, since we now have constants included in the \ref InputGVNs
902 // we need to make sure they are in a deterministic order.
903 stable_sort(Range&: InputGVNs);
904}
905
906/// Look over the inputs and map each input argument to an argument in the
907/// overall function for the OutlinableRegions. This creates a way to replace
908/// the arguments of the extracted function with the arguments of the new
909/// overall function.
910///
911/// \param [in,out] Region - The region of code to be analyzed.
912/// \param [in] InputGVNs - The global value numbering of the input values
913/// collected.
914/// \param [in] ArgInputs - The values of the arguments to the extracted
915/// function.
916static void
917findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
918 std::vector<unsigned> &InputGVNs,
919 SetVector<Value *> &ArgInputs) {
920
921 IRSimilarityCandidate &C = *Region.Candidate;
922 OutlinableGroup &Group = *Region.Parent;
923
924 // This counts the argument number in the overall function.
925 unsigned TypeIndex = 0;
926
927 // This counts the argument number in the extracted function.
928 unsigned OriginalIndex = 0;
929
930 // Find the mapping of the extracted arguments to the arguments for the
931 // overall function. Since there may be extra arguments in the overall
932 // function to account for the extracted constants, we have two different
933 // counters as we find extracted arguments, and as we come across overall
934 // arguments.
935
936 // Additionally, in our first pass, for the first extracted function,
937 // we find argument locations for the canonical value numbering. This
938 // numbering overrides any discovered location for the extracted code.
939 for (unsigned InputVal : InputGVNs) {
940 std::optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(N: InputVal);
941 assert(CanonicalNumberOpt && "Canonical number not found?");
942 unsigned CanonicalNumber = *CanonicalNumberOpt;
943
944 std::optional<Value *> InputOpt = C.fromGVN(Num: InputVal);
945 assert(InputOpt && "Global value number not found?");
946 Value *Input = *InputOpt;
947
948 DenseMap<unsigned, unsigned>::iterator AggArgIt =
949 Group.CanonicalNumberToAggArg.find(Val: CanonicalNumber);
950
951 if (!Group.InputTypesSet) {
952 Group.ArgumentTypes.push_back(x: Input->getType());
953 // If the input value has a swifterr attribute, make sure to mark the
954 // argument in the overall function.
955 if (Input->isSwiftError()) {
956 assert(
957 !Group.SwiftErrorArgument &&
958 "Argument already marked with swifterr for this OutlinableGroup!");
959 Group.SwiftErrorArgument = TypeIndex;
960 }
961 }
962
963 // Check if we have a constant. If we do add it to the overall argument
964 // number to Constant map for the region, and continue to the next input.
965 if (Constant *CST = dyn_cast<Constant>(Val: Input)) {
966 if (AggArgIt != Group.CanonicalNumberToAggArg.end())
967 Region.AggArgToConstant.insert(KV: std::make_pair(x&: AggArgIt->second, y&: CST));
968 else {
969 Group.CanonicalNumberToAggArg.insert(
970 KV: std::make_pair(x&: CanonicalNumber, y&: TypeIndex));
971 Region.AggArgToConstant.insert(KV: std::make_pair(x&: TypeIndex, y&: CST));
972 }
973 TypeIndex++;
974 continue;
975 }
976
977 // It is not a constant, we create the mapping from extracted argument list
978 // to the overall argument list, using the canonical location, if it exists.
979 assert(ArgInputs.count(Input) && "Input cannot be found!");
980
981 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
982 if (OriginalIndex != AggArgIt->second)
983 Region.ChangedArgOrder = true;
984 Region.ExtractedArgToAgg.insert(
985 KV: std::make_pair(x&: OriginalIndex, y&: AggArgIt->second));
986 Region.AggArgToExtracted.insert(
987 KV: std::make_pair(x&: AggArgIt->second, y&: OriginalIndex));
988 } else {
989 Group.CanonicalNumberToAggArg.insert(
990 KV: std::make_pair(x&: CanonicalNumber, y&: TypeIndex));
991 Region.ExtractedArgToAgg.insert(KV: std::make_pair(x&: OriginalIndex, y&: TypeIndex));
992 Region.AggArgToExtracted.insert(KV: std::make_pair(x&: TypeIndex, y&: OriginalIndex));
993 }
994 OriginalIndex++;
995 TypeIndex++;
996 }
997
998 // If the function type definitions for the OutlinableGroup holding the region
999 // have not been set, set the length of the inputs here. We should have the
1000 // same inputs for all of the different regions contained in the
1001 // OutlinableGroup since they are all structurally similar to one another.
1002 if (!Group.InputTypesSet) {
1003 Group.NumAggregateInputs = TypeIndex;
1004 Group.InputTypesSet = true;
1005 }
1006
1007 Region.NumExtractedInputs = OriginalIndex;
1008}
1009
1010/// Check if the \p V has any uses outside of the region other than \p PN.
1011///
1012/// \param V [in] - The value to check.
1013/// \param PHILoc [in] - The location in the PHINode of \p V.
1014/// \param PN [in] - The PHINode using \p V.
1015/// \param Exits [in] - The potential blocks we exit to from the outlined
1016/// region.
1017/// \param BlocksInRegion [in] - The basic blocks contained in the region.
1018/// \returns true if \p V has any use soutside its region other than \p PN.
1019static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
1020 SmallPtrSet<BasicBlock *, 1> &Exits,
1021 DenseSet<BasicBlock *> &BlocksInRegion) {
1022 // We check to see if the value is used by the PHINode from some other
1023 // predecessor not included in the region. If it is, we make sure
1024 // to keep it as an output.
1025 if (any_of(Range: llvm::seq<unsigned>(Begin: 0, End: PN.getNumIncomingValues()),
1026 P: [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
1027 return (Idx != PHILoc && V == PN.getIncomingValue(i: Idx) &&
1028 !BlocksInRegion.contains(V: PN.getIncomingBlock(i: Idx)));
1029 }))
1030 return true;
1031
1032 // Check if the value is used by any other instructions outside the region.
1033 return any_of(Range: V->users(), P: [&Exits, &BlocksInRegion](User *U) {
1034 Instruction *I = dyn_cast<Instruction>(Val: U);
1035 if (!I)
1036 return false;
1037
1038 // If the use of the item is inside the region, we skip it. Uses
1039 // inside the region give us useful information about how the item could be
1040 // used as an output.
1041 BasicBlock *Parent = I->getParent();
1042 if (BlocksInRegion.contains(V: Parent))
1043 return false;
1044
1045 // If it's not a PHINode then we definitely know the use matters. This
1046 // output value will not completely combined with another item in a PHINode
1047 // as it is directly reference by another non-phi instruction
1048 if (!isa<PHINode>(Val: I))
1049 return true;
1050
1051 // If we have a PHINode outside one of the exit locations, then it
1052 // can be considered an outside use as well. If there is a PHINode
1053 // contained in the Exit where this values use matters, it will be
1054 // caught when we analyze that PHINode.
1055 if (!Exits.contains(Ptr: Parent))
1056 return true;
1057
1058 return false;
1059 });
1060}
1061
1062/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
1063/// considered outputs. A PHINodes is an output when more than one incoming
1064/// value has been marked by the CodeExtractor as an output.
1065///
1066/// \param CurrentExitFromRegion [in] - The block to analyze.
1067/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
1068/// region.
1069/// \param RegionBlocks [in] - The basic blocks in the region.
1070/// \param Outputs [in, out] - The existing outputs for the region, we may add
1071/// PHINodes to this as we find that they replace output values.
1072/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
1073/// totally replaced by a PHINode.
1074/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
1075/// in PHINodes, but have other uses, and should still be considered outputs.
1076static void analyzeExitPHIsForOutputUses(
1077 BasicBlock *CurrentExitFromRegion,
1078 SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
1079 DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
1080 DenseSet<Value *> &OutputsReplacedByPHINode,
1081 DenseSet<Value *> &OutputsWithNonPhiUses) {
1082 for (PHINode &PN : CurrentExitFromRegion->phis()) {
1083 // Find all incoming values from the outlining region.
1084 SmallVector<unsigned, 2> IncomingVals;
1085 for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
1086 if (RegionBlocks.contains(V: PN.getIncomingBlock(i: I)))
1087 IncomingVals.push_back(Elt: I);
1088
1089 // Do not process PHI if there are no predecessors from region.
1090 unsigned NumIncomingVals = IncomingVals.size();
1091 if (NumIncomingVals == 0)
1092 continue;
1093
1094 // If there is one predecessor, we mark it as a value that needs to be kept
1095 // as an output.
1096 if (NumIncomingVals == 1) {
1097 Value *V = PN.getIncomingValue(i: *IncomingVals.begin());
1098 OutputsWithNonPhiUses.insert(V);
1099 OutputsReplacedByPHINode.erase(V);
1100 continue;
1101 }
1102
1103 // This PHINode will be used as an output value, so we add it to our list.
1104 Outputs.insert(X: &PN);
1105
1106 // Not all of the incoming values should be ignored as other inputs and
1107 // outputs may have uses in outlined region. If they have other uses
1108 // outside of the single PHINode we should not skip over it.
1109 for (unsigned Idx : IncomingVals) {
1110 Value *V = PN.getIncomingValue(i: Idx);
1111 if (!isa<Constant>(Val: V) &&
1112 outputHasNonPHI(V, PHILoc: Idx, PN, Exits&: PotentialExitsFromRegion, BlocksInRegion&: RegionBlocks)) {
1113 OutputsWithNonPhiUses.insert(V);
1114 OutputsReplacedByPHINode.erase(V);
1115 continue;
1116 }
1117 if (!OutputsWithNonPhiUses.contains(V))
1118 OutputsReplacedByPHINode.insert(V);
1119 }
1120 }
1121}
1122
1123// Represents the type for the unsigned number denoting the output number for
1124// phi node, along with the canonical number for the exit block.
1125using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
1126// The list of canonical numbers for the incoming values to a PHINode.
1127using CanonList = SmallVector<unsigned, 2>;
1128// The pair type representing the set of canonical values being combined in the
1129// PHINode, along with the location data for the PHINode.
1130using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
1131
1132/// Encode \p PND as an integer for easy lookup based on the argument location,
1133/// the parent BasicBlock canonical numbering, and the canonical numbering of
1134/// the values stored in the PHINode.
1135///
1136/// \param PND - The data to hash.
1137/// \returns The hash code of \p PND.
1138static hash_code encodePHINodeData(PHINodeData &PND) {
1139 return llvm::hash_combine(args: llvm::hash_value(value: PND.first.first),
1140 args: llvm::hash_value(value: PND.first.second),
1141 args: llvm::hash_combine_range(R&: PND.second));
1142}
1143
1144/// Create a special GVN for PHINodes that will be used outside of
1145/// the region. We create a hash code based on the Canonical number of the
1146/// parent BasicBlock, the canonical numbering of the values stored in the
1147/// PHINode and the aggregate argument location. This is used to find whether
1148/// this PHINode type has been given a canonical numbering already. If not, we
1149/// assign it a value and store it for later use. The value is returned to
1150/// identify different output schemes for the set of regions.
1151///
1152/// \param Region - The region that \p PN is an output for.
1153/// \param PN - The PHINode we are analyzing.
1154/// \param Blocks - The blocks for the region we are analyzing.
1155/// \param AggArgIdx - The argument \p PN will be stored into.
1156/// \returns An optional holding the assigned canonical number, or std::nullopt
1157/// if there is some attribute of the PHINode blocking it from being used.
1158static std::optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
1159 PHINode *PN,
1160 DenseSet<BasicBlock *> &Blocks,
1161 unsigned AggArgIdx) {
1162 OutlinableGroup &Group = *Region.Parent;
1163 IRSimilarityCandidate &Cand = *Region.Candidate;
1164 BasicBlock *PHIBB = PN->getParent();
1165 CanonList PHIGVNs;
1166 Value *Incoming;
1167 BasicBlock *IncomingBlock;
1168 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1169 Incoming = PN->getIncomingValue(i: Idx);
1170 IncomingBlock = PN->getIncomingBlock(i: Idx);
1171 // If the incoming block isn't in the region, we don't have to worry about
1172 // this incoming value.
1173 if (!Blocks.contains(V: IncomingBlock))
1174 continue;
1175
1176 // If we cannot find a GVN, and the incoming block is included in the region
1177 // this means that the input to the PHINode is not included in the region we
1178 // are trying to analyze, meaning, that if it was outlined, we would be
1179 // adding an extra input. We ignore this case for now, and so ignore the
1180 // region.
1181 std::optional<unsigned> OGVN = Cand.getGVN(V: Incoming);
1182 if (!OGVN) {
1183 Region.IgnoreRegion = true;
1184 return std::nullopt;
1185 }
1186
1187 // Collect the canonical numbers of the values in the PHINode.
1188 unsigned GVN = *OGVN;
1189 OGVN = Cand.getCanonicalNum(N: GVN);
1190 assert(OGVN && "No GVN found for incoming value?");
1191 PHIGVNs.push_back(Elt: *OGVN);
1192
1193 // Find the incoming block and use the canonical numbering as well to define
1194 // the hash for the PHINode.
1195 OGVN = Cand.getGVN(V: IncomingBlock);
1196
1197 // If there is no number for the incoming block, it is because we have
1198 // split the candidate basic blocks. So we use the previous block that it
1199 // was split from to find the valid global value numbering for the PHINode.
1200 if (!OGVN) {
1201 assert(Cand.getStartBB() == IncomingBlock &&
1202 "Unknown basic block used in exit path PHINode.");
1203
1204 BasicBlock *PrevBlock = nullptr;
1205 // Iterate over the predecessors to the incoming block of the
1206 // PHINode, when we find a block that is not contained in the region
1207 // we know that this is the first block that we split from, and should
1208 // have a valid global value numbering.
1209 for (BasicBlock *Pred : predecessors(BB: IncomingBlock))
1210 if (!Blocks.contains(V: Pred)) {
1211 PrevBlock = Pred;
1212 break;
1213 }
1214 assert(PrevBlock && "Expected a predecessor not in the reigon!");
1215 OGVN = Cand.getGVN(V: PrevBlock);
1216 }
1217 GVN = *OGVN;
1218 OGVN = Cand.getCanonicalNum(N: GVN);
1219 assert(OGVN && "No GVN found for incoming block?");
1220 PHIGVNs.push_back(Elt: *OGVN);
1221 }
1222
1223 // Now that we have the GVNs for the incoming values, we are going to combine
1224 // them with the GVN of the incoming bock, and the output location of the
1225 // PHINode to generate a hash value representing this instance of the PHINode.
1226 DenseMap<hash_code, unsigned>::iterator GVNToPHIIt;
1227 DenseMap<unsigned, PHINodeData>::iterator PHIToGVNIt;
1228 std::optional<unsigned> BBGVN = Cand.getGVN(V: PHIBB);
1229 assert(BBGVN && "Could not find GVN for the incoming block!");
1230
1231 BBGVN = Cand.getCanonicalNum(N: *BBGVN);
1232 assert(BBGVN && "Could not find canonical number for the incoming block!");
1233 // Create a pair of the exit block canonical value, and the aggregate
1234 // argument location, connected to the canonical numbers stored in the
1235 // PHINode.
1236 PHINodeData TemporaryPair =
1237 std::make_pair(x: std::make_pair(x&: *BBGVN, y&: AggArgIdx), y&: PHIGVNs);
1238 hash_code PHINodeDataHash = encodePHINodeData(PND&: TemporaryPair);
1239
1240 // Look for and create a new entry in our connection between canonical
1241 // numbers for PHINodes, and the set of objects we just created.
1242 GVNToPHIIt = Group.GVNsToPHINodeGVN.find(Val: PHINodeDataHash);
1243 if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
1244 bool Inserted = false;
1245 std::tie(args&: PHIToGVNIt, args&: Inserted) = Group.PHINodeGVNToGVNs.insert(
1246 KV: std::make_pair(x&: Group.PHINodeGVNTracker, y&: TemporaryPair));
1247 std::tie(args&: GVNToPHIIt, args&: Inserted) = Group.GVNsToPHINodeGVN.insert(
1248 KV: std::make_pair(x&: PHINodeDataHash, y: Group.PHINodeGVNTracker--));
1249 }
1250
1251 return GVNToPHIIt->second;
1252}
1253
1254/// Create a mapping of the output arguments for the \p Region to the output
1255/// arguments of the overall outlined function.
1256///
1257/// \param [in,out] Region - The region of code to be analyzed.
1258/// \param [in] Outputs - The values found by the code extractor.
1259static void
1260findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region,
1261 SetVector<Value *> &Outputs) {
1262 OutlinableGroup &Group = *Region.Parent;
1263 IRSimilarityCandidate &C = *Region.Candidate;
1264
1265 SmallVector<BasicBlock *> BE;
1266 DenseSet<BasicBlock *> BlocksInRegion;
1267 C.getBasicBlocks(BBSet&: BlocksInRegion, BBList&: BE);
1268
1269 // Find the exits to the region.
1270 SmallPtrSet<BasicBlock *, 1> Exits;
1271 for (BasicBlock *Block : BE)
1272 for (BasicBlock *Succ : successors(BB: Block))
1273 if (!BlocksInRegion.contains(V: Succ))
1274 Exits.insert(Ptr: Succ);
1275
1276 // After determining which blocks exit to PHINodes, we add these PHINodes to
1277 // the set of outputs to be processed. We also check the incoming values of
1278 // the PHINodes for whether they should no longer be considered outputs.
1279 DenseSet<Value *> OutputsReplacedByPHINode;
1280 DenseSet<Value *> OutputsWithNonPhiUses;
1281 for (BasicBlock *ExitBB : Exits)
1282 analyzeExitPHIsForOutputUses(CurrentExitFromRegion: ExitBB, PotentialExitsFromRegion&: Exits, RegionBlocks&: BlocksInRegion, Outputs,
1283 OutputsReplacedByPHINode,
1284 OutputsWithNonPhiUses);
1285
1286 // This counts the argument number in the extracted function.
1287 unsigned OriginalIndex = Region.NumExtractedInputs;
1288
1289 // This counts the argument number in the overall function.
1290 unsigned TypeIndex = Group.NumAggregateInputs;
1291 bool TypeFound;
1292 DenseSet<unsigned> AggArgsUsed;
1293
1294 // Iterate over the output types and identify if there is an aggregate pointer
1295 // type whose base type matches the current output type. If there is, we mark
1296 // that we will use this output register for this value. If not we add another
1297 // type to the overall argument type list. We also store the GVNs used for
1298 // stores to identify which values will need to be moved into an special
1299 // block that holds the stores to the output registers.
1300 for (Value *Output : Outputs) {
1301 TypeFound = false;
1302 // We can do this since it is a result value, and will have a number
1303 // that is necessarily the same. BUT if in the future, the instructions
1304 // do not have to be in same order, but are functionally the same, we will
1305 // have to use a different scheme, as one-to-one correspondence is not
1306 // guaranteed.
1307 unsigned ArgumentSize = Group.ArgumentTypes.size();
1308
1309 // If the output is combined in a PHINode, we make sure to skip over it.
1310 if (OutputsReplacedByPHINode.contains(V: Output))
1311 continue;
1312
1313 unsigned AggArgIdx = 0;
1314 for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
1315 if (!isa<PointerType>(Val: Group.ArgumentTypes[Jdx]))
1316 continue;
1317
1318 if (!AggArgsUsed.insert(V: Jdx).second)
1319 continue;
1320
1321 TypeFound = true;
1322 Region.ExtractedArgToAgg.insert(KV: std::make_pair(x&: OriginalIndex, y&: Jdx));
1323 Region.AggArgToExtracted.insert(KV: std::make_pair(x&: Jdx, y&: OriginalIndex));
1324 AggArgIdx = Jdx;
1325 break;
1326 }
1327
1328 // We were unable to find an unused type in the output type set that matches
1329 // the output, so we add a pointer type to the argument types of the overall
1330 // function to handle this output and create a mapping to it.
1331 if (!TypeFound) {
1332 Group.ArgumentTypes.push_back(x: PointerType::get(C&: Output->getContext(),
1333 AddressSpace: M.getDataLayout().getAllocaAddrSpace()));
1334 // Mark the new pointer type as the last value in the aggregate argument
1335 // list.
1336 unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
1337 AggArgsUsed.insert(V: ArgTypeIdx);
1338 Region.ExtractedArgToAgg.insert(
1339 KV: std::make_pair(x&: OriginalIndex, y&: ArgTypeIdx));
1340 Region.AggArgToExtracted.insert(
1341 KV: std::make_pair(x&: ArgTypeIdx, y&: OriginalIndex));
1342 AggArgIdx = ArgTypeIdx;
1343 }
1344
1345 // TODO: Adapt to the extra input from the PHINode.
1346 PHINode *PN = dyn_cast<PHINode>(Val: Output);
1347
1348 std::optional<unsigned> GVN;
1349 if (PN && !BlocksInRegion.contains(V: PN->getParent())) {
1350 // Values outside the region can be combined into PHINode when we
1351 // have multiple exits. We collect both of these into a list to identify
1352 // which values are being used in the PHINode. Each list identifies a
1353 // different PHINode, and a different output. We store the PHINode as it's
1354 // own canonical value. These canonical values are also dependent on the
1355 // output argument it is saved to.
1356
1357 // If two PHINodes have the same canonical values, but different aggregate
1358 // argument locations, then they will have distinct Canonical Values.
1359 GVN = getGVNForPHINode(Region, PN, Blocks&: BlocksInRegion, AggArgIdx);
1360 if (!GVN)
1361 return;
1362 } else {
1363 // If we do not have a PHINode we use the global value numbering for the
1364 // output value, to find the canonical number to add to the set of stored
1365 // values.
1366 GVN = C.getGVN(V: Output);
1367 GVN = C.getCanonicalNum(N: *GVN);
1368 }
1369
1370 // Each region has a potentially unique set of outputs. We save which
1371 // values are output in a list of canonical values so we can differentiate
1372 // among the different store schemes.
1373 Region.GVNStores.push_back(Elt: *GVN);
1374
1375 OriginalIndex++;
1376 TypeIndex++;
1377 }
1378
1379 // We sort the stored values to make sure that we are not affected by analysis
1380 // order when determining what combination of items were stored.
1381 stable_sort(Range&: Region.GVNStores);
1382}
1383
1384void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
1385 DenseSet<unsigned> &NotSame) {
1386 std::vector<unsigned> Inputs;
1387 SetVector<Value *> ArgInputs, Outputs;
1388
1389 getCodeExtractorArguments(Region, InputGVNs&: Inputs, NotSame, OutputMappings, ArgInputs,
1390 Outputs);
1391
1392 if (Region.IgnoreRegion)
1393 return;
1394
1395 // Map the inputs found by the CodeExtractor to the arguments found for
1396 // the overall function.
1397 findExtractedInputToOverallInputMapping(Region, InputGVNs&: Inputs, ArgInputs);
1398
1399 // Map the outputs found by the CodeExtractor to the arguments found for
1400 // the overall function.
1401 findExtractedOutputToOverallOutputMapping(M, Region, Outputs);
1402}
1403
1404/// Replace the extracted function in the Region with a call to the overall
1405/// function constructed from the deduplicated similar regions, replacing and
1406/// remapping the values passed to the extracted function as arguments to the
1407/// new arguments of the overall function.
1408///
1409/// \param [in] M - The module to outline from.
1410/// \param [in] Region - The regions of extracted code to be replaced with a new
1411/// function.
1412/// \returns a call instruction with the replaced function.
1413CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
1414 std::vector<Value *> NewCallArgs;
1415 DenseMap<unsigned, unsigned>::iterator ArgPair;
1416
1417 OutlinableGroup &Group = *Region.Parent;
1418 CallInst *Call = Region.Call;
1419 assert(Call && "Call to replace is nullptr?");
1420 Function *AggFunc = Group.OutlinedFunction;
1421 assert(AggFunc && "Function to replace with is nullptr?");
1422
1423 // If the arguments are the same size, there are not values that need to be
1424 // made into an argument, the argument ordering has not been change, or
1425 // different output registers to handle. We can simply replace the called
1426 // function in this case.
1427 if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
1428 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1429 << *AggFunc << " with same number of arguments\n");
1430 Call->setCalledFunction(AggFunc);
1431 return Call;
1432 }
1433
1434 // We have a different number of arguments than the new function, so
1435 // we need to use our previously mappings off extracted argument to overall
1436 // function argument, and constants to overall function argument to create the
1437 // new argument list.
1438 for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
1439
1440 if (AggArgIdx == AggFunc->arg_size() - 1 &&
1441 Group.OutputGVNCombinations.size() > 1) {
1442 // If we are on the last argument, and we need to differentiate between
1443 // output blocks, add an integer to the argument list to determine
1444 // what block to take
1445 LLVM_DEBUG(dbgs() << "Set switch block argument to "
1446 << Region.OutputBlockNum << "\n");
1447 NewCallArgs.push_back(x: ConstantInt::get(Ty: Type::getInt32Ty(C&: M.getContext()),
1448 V: Region.OutputBlockNum));
1449 continue;
1450 }
1451
1452 ArgPair = Region.AggArgToExtracted.find(Val: AggArgIdx);
1453 if (ArgPair != Region.AggArgToExtracted.end()) {
1454 Value *ArgumentValue = Call->getArgOperand(i: ArgPair->second);
1455 // If we found the mapping from the extracted function to the overall
1456 // function, we simply add it to the argument list. We use the same
1457 // value, it just needs to honor the new order of arguments.
1458 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1459 << *ArgumentValue << "\n");
1460 NewCallArgs.push_back(x: ArgumentValue);
1461 continue;
1462 }
1463
1464 // If it is a constant, we simply add it to the argument list as a value.
1465 if (auto It = Region.AggArgToConstant.find(Val: AggArgIdx);
1466 It != Region.AggArgToConstant.end()) {
1467 Constant *CST = It->second;
1468 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1469 << *CST << "\n");
1470 NewCallArgs.push_back(x: CST);
1471 continue;
1472 }
1473
1474 // Add a nullptr value if the argument is not found in the extracted
1475 // function. If we cannot find a value, it means it is not in use
1476 // for the region, so we should not pass anything to it.
1477 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1478 NewCallArgs.push_back(x: ConstantPointerNull::get(
1479 T: static_cast<PointerType *>(AggFunc->getArg(i: AggArgIdx)->getType())));
1480 }
1481
1482 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1483 << *AggFunc << " with new set of arguments\n");
1484 // Create the new call instruction and erase the old one.
1485 Call = CallInst::Create(Ty: AggFunc->getFunctionType(), Func: AggFunc, Args: NewCallArgs, NameStr: "",
1486 InsertBefore: Call->getIterator());
1487
1488 // It is possible that the call to the outlined function is either the first
1489 // instruction is in the new block, the last instruction, or both. If either
1490 // of these is the case, we need to make sure that we replace the instruction
1491 // in the IRInstructionData struct with the new call.
1492 CallInst *OldCall = Region.Call;
1493 if (Region.NewFront->Inst == OldCall)
1494 Region.NewFront->Inst = Call;
1495 if (Region.NewBack->Inst == OldCall)
1496 Region.NewBack->Inst = Call;
1497
1498 // Transfer any debug information.
1499 Call->setDebugLoc(Region.Call->getDebugLoc());
1500 // Since our output may determine which branch we go to, we make sure to
1501 // propagate this new call value through the module.
1502 OldCall->replaceAllUsesWith(V: Call);
1503
1504 // Remove the old instruction.
1505 OldCall->eraseFromParent();
1506 Region.Call = Call;
1507
1508 // Make sure that the argument in the new function has the SwiftError
1509 // argument.
1510 if (Group.SwiftErrorArgument)
1511 Call->addParamAttr(ArgNo: *Group.SwiftErrorArgument, Kind: Attribute::SwiftError);
1512
1513 return Call;
1514}
1515
1516/// Find or create a BasicBlock in the outlined function containing PhiBlocks
1517/// for \p RetVal.
1518///
1519/// \param Group - The OutlinableGroup containing the information about the
1520/// overall outlined function.
1521/// \param RetVal - The return value or exit option that we are currently
1522/// evaluating.
1523/// \returns The found or newly created BasicBlock to contain the needed
1524/// PHINodes to be used as outputs.
1525static BasicBlock *findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal) {
1526 // Find if a PHIBlock exists for this return value already. If it is
1527 // the first time we are analyzing this, we will not, so we record it.
1528 auto [PhiBlockForRetVal, Inserted] = Group.PHIBlocks.try_emplace(Key: RetVal);
1529 if (!Inserted)
1530 return PhiBlockForRetVal->second;
1531
1532 auto ReturnBlockForRetVal = Group.EndBBs.find(Val: RetVal);
1533 assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
1534 "Could not find output value!");
1535 BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
1536
1537 // If we did not find a block, we create one, and insert it into the
1538 // overall function and record it.
1539 BasicBlock *PHIBlock = BasicBlock::Create(Context&: ReturnBB->getContext(), Name: "phi_block",
1540 Parent: ReturnBB->getParent());
1541 PhiBlockForRetVal->second = PHIBlock;
1542
1543 // We find the predecessors of the return block in the newly created outlined
1544 // function in order to point them to the new PHIBlock rather than the already
1545 // existing return block.
1546 SmallVector<BranchInst *, 2> BranchesToChange;
1547 for (BasicBlock *Pred : predecessors(BB: ReturnBB))
1548 BranchesToChange.push_back(Elt: cast<BranchInst>(Val: Pred->getTerminator()));
1549
1550 // Now we mark the branch instructions found, and change the references of the
1551 // return block to the newly created PHIBlock.
1552 for (BranchInst *BI : BranchesToChange)
1553 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) {
1554 if (BI->getSuccessor(i: Succ) != ReturnBB)
1555 continue;
1556 BI->setSuccessor(idx: Succ, NewSucc: PHIBlock);
1557 }
1558
1559 BranchInst::Create(IfTrue: ReturnBB, InsertBefore: PHIBlock);
1560
1561 return PhiBlockForRetVal->second;
1562}
1563
1564/// For the function call now representing the \p Region, find the passed value
1565/// to that call that represents Argument \p A at the call location if the
1566/// call has already been replaced with a call to the overall, aggregate
1567/// function.
1568///
1569/// \param A - The Argument to get the passed value for.
1570/// \param Region - The extracted Region corresponding to the outlined function.
1571/// \returns The Value representing \p A at the call site.
1572static Value *
1573getPassedArgumentInAlreadyOutlinedFunction(const Argument *A,
1574 const OutlinableRegion &Region) {
1575 // If we don't need to adjust the argument number at all (since the call
1576 // has already been replaced by a call to the overall outlined function)
1577 // we can just get the specified argument.
1578 return Region.Call->getArgOperand(i: A->getArgNo());
1579}
1580
1581/// For the function call now representing the \p Region, find the passed value
1582/// to that call that represents Argument \p A at the call location if the
1583/// call has only been replaced by the call to the aggregate function.
1584///
1585/// \param A - The Argument to get the passed value for.
1586/// \param Region - The extracted Region corresponding to the outlined function.
1587/// \returns The Value representing \p A at the call site.
1588static Value *
1589getPassedArgumentAndAdjustArgumentLocation(const Argument *A,
1590 const OutlinableRegion &Region) {
1591 unsigned ArgNum = A->getArgNo();
1592
1593 // If it is a constant, we can look at our mapping from when we created
1594 // the outputs to figure out what the constant value is.
1595 if (auto It = Region.AggArgToConstant.find(Val: ArgNum);
1596 It != Region.AggArgToConstant.end())
1597 return It->second;
1598
1599 // If it is not a constant, and we are not looking at the overall function, we
1600 // need to adjust which argument we are looking at.
1601 ArgNum = Region.AggArgToExtracted.find(Val: ArgNum)->second;
1602 return Region.Call->getArgOperand(i: ArgNum);
1603}
1604
1605/// Find the canonical numbering for the incoming Values into the PHINode \p PN.
1606///
1607/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1608/// \param Region [in] - The OutlinableRegion containing \p PN.
1609/// \param OutputMappings [in] - The mapping of output values from outlined
1610/// region to their original values.
1611/// \param CanonNums [out] - The canonical numbering for the incoming values to
1612/// \p PN paired with their incoming block.
1613/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
1614/// of \p Region rather than the overall function's call.
1615static void findCanonNumsForPHI(
1616 PHINode *PN, OutlinableRegion &Region,
1617 const DenseMap<Value *, Value *> &OutputMappings,
1618 SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
1619 bool ReplacedWithOutlinedCall = true) {
1620 // Iterate over the incoming values.
1621 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1622 Value *IVal = PN->getIncomingValue(i: Idx);
1623 BasicBlock *IBlock = PN->getIncomingBlock(i: Idx);
1624 // If we have an argument as incoming value, we need to grab the passed
1625 // value from the call itself.
1626 if (Argument *A = dyn_cast<Argument>(Val: IVal)) {
1627 if (ReplacedWithOutlinedCall)
1628 IVal = getPassedArgumentInAlreadyOutlinedFunction(A, Region);
1629 else
1630 IVal = getPassedArgumentAndAdjustArgumentLocation(A, Region);
1631 }
1632
1633 // Get the original value if it has been replaced by an output value.
1634 IVal = findOutputMapping(OutputMappings, Input: IVal);
1635
1636 // Find and add the canonical number for the incoming value.
1637 std::optional<unsigned> GVN = Region.Candidate->getGVN(V: IVal);
1638 assert(GVN && "No GVN for incoming value");
1639 std::optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(N: *GVN);
1640 assert(CanonNum && "No Canonical Number for GVN");
1641 CanonNums.push_back(Elt: std::make_pair(x&: *CanonNum, y&: IBlock));
1642 }
1643}
1644
1645/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
1646/// in order to condense the number of instructions added to the outlined
1647/// function.
1648///
1649/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1650/// \param Region [in] - The OutlinableRegion containing \p PN.
1651/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
1652/// \p PN in.
1653/// \param OutputMappings [in] - The mapping of output values from outlined
1654/// region to their original values.
1655/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
1656/// matched.
1657/// \return the newly found or created PHINode in \p OverallPhiBlock.
1658static PHINode*
1659findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
1660 BasicBlock *OverallPhiBlock,
1661 const DenseMap<Value *, Value *> &OutputMappings,
1662 DenseSet<PHINode *> &UsedPHIs) {
1663 OutlinableGroup &Group = *Region.Parent;
1664
1665
1666 // A list of the canonical numbering assigned to each incoming value, paired
1667 // with the incoming block for the PHINode passed into this function.
1668 SmallVector<std::pair<unsigned, BasicBlock *>> PNCanonNums;
1669
1670 // We have to use the extracted function since we have merged this region into
1671 // the overall function yet. We make sure to reassign the argument numbering
1672 // since it is possible that the argument ordering is different between the
1673 // functions.
1674 findCanonNumsForPHI(PN: &PN, Region, OutputMappings, CanonNums&: PNCanonNums,
1675 /* ReplacedWithOutlinedCall = */ false);
1676
1677 OutlinableRegion *FirstRegion = Group.Regions[0];
1678
1679 // A list of the canonical numbering assigned to each incoming value, paired
1680 // with the incoming block for the PHINode that we are currently comparing
1681 // the passed PHINode to.
1682 SmallVector<std::pair<unsigned, BasicBlock *>> CurrentCanonNums;
1683
1684 // Find the Canonical Numbering for each PHINode, if it matches, we replace
1685 // the uses of the PHINode we are searching for, with the found PHINode.
1686 for (PHINode &CurrPN : OverallPhiBlock->phis()) {
1687 // If this PHINode has already been matched to another PHINode to be merged,
1688 // we skip it.
1689 if (UsedPHIs.contains(V: &CurrPN))
1690 continue;
1691
1692 CurrentCanonNums.clear();
1693 findCanonNumsForPHI(PN: &CurrPN, Region&: *FirstRegion, OutputMappings, CanonNums&: CurrentCanonNums,
1694 /* ReplacedWithOutlinedCall = */ true);
1695
1696 // If the list of incoming values is not the same length, then they cannot
1697 // match since there is not an analogue for each incoming value.
1698 if (PNCanonNums.size() != CurrentCanonNums.size())
1699 continue;
1700
1701 bool FoundMatch = true;
1702
1703 // We compare the canonical value for each incoming value in the passed
1704 // in PHINode to one already present in the outlined region. If the
1705 // incoming values do not match, then the PHINodes do not match.
1706
1707 // We also check to make sure that the incoming block matches as well by
1708 // finding the corresponding incoming block in the combined outlined region
1709 // for the current outlined region.
1710 for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
1711 std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
1712 std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
1713 if (ToCompareTo.first != ToAdd.first) {
1714 FoundMatch = false;
1715 break;
1716 }
1717
1718 BasicBlock *CorrespondingBlock =
1719 Region.findCorrespondingBlockIn(Other: *FirstRegion, BB: ToAdd.second);
1720 assert(CorrespondingBlock && "Found block is nullptr");
1721 if (CorrespondingBlock != ToCompareTo.second) {
1722 FoundMatch = false;
1723 break;
1724 }
1725 }
1726
1727 // If all incoming values and branches matched, then we can merge
1728 // into the found PHINode.
1729 if (FoundMatch) {
1730 UsedPHIs.insert(V: &CurrPN);
1731 return &CurrPN;
1732 }
1733 }
1734
1735 // If we've made it here, it means we weren't able to replace the PHINode, so
1736 // we must insert it ourselves.
1737 PHINode *NewPN = cast<PHINode>(Val: PN.clone());
1738 NewPN->insertBefore(InsertPos: OverallPhiBlock->begin());
1739 for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
1740 Idx++) {
1741 Value *IncomingVal = NewPN->getIncomingValue(i: Idx);
1742 BasicBlock *IncomingBlock = NewPN->getIncomingBlock(i: Idx);
1743
1744 // Find corresponding basic block in the overall function for the incoming
1745 // block.
1746 BasicBlock *BlockToUse =
1747 Region.findCorrespondingBlockIn(Other: *FirstRegion, BB: IncomingBlock);
1748 NewPN->setIncomingBlock(i: Idx, BB: BlockToUse);
1749
1750 // If we have an argument we make sure we replace using the argument from
1751 // the correct function.
1752 if (Argument *A = dyn_cast<Argument>(Val: IncomingVal)) {
1753 Value *Val = Group.OutlinedFunction->getArg(i: A->getArgNo());
1754 NewPN->setIncomingValue(i: Idx, V: Val);
1755 continue;
1756 }
1757
1758 // Find the corresponding value in the overall function.
1759 IncomingVal = findOutputMapping(OutputMappings, Input: IncomingVal);
1760 Value *Val = Region.findCorrespondingValueIn(Other: *FirstRegion, V: IncomingVal);
1761 assert(Val && "Value is nullptr?");
1762 DenseMap<Value *, Value *>::iterator RemappedIt =
1763 FirstRegion->RemappedArguments.find(Val);
1764 if (RemappedIt != FirstRegion->RemappedArguments.end())
1765 Val = RemappedIt->second;
1766 NewPN->setIncomingValue(i: Idx, V: Val);
1767 }
1768 return NewPN;
1769}
1770
1771// Within an extracted function, replace the argument uses of the extracted
1772// region with the arguments of the function for an OutlinableGroup.
1773//
1774/// \param [in] Region - The region of extracted code to be changed.
1775/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1776/// region.
1777/// \param [in] FirstFunction - A flag to indicate whether we are using this
1778/// function to define the overall outlined function for all the regions, or
1779/// if we are operating on one of the following regions.
1780static void
1781replaceArgumentUses(OutlinableRegion &Region,
1782 DenseMap<Value *, BasicBlock *> &OutputBBs,
1783 const DenseMap<Value *, Value *> &OutputMappings,
1784 bool FirstFunction = false) {
1785 OutlinableGroup &Group = *Region.Parent;
1786 assert(Region.ExtractedFunction && "Region has no extracted function?");
1787
1788 Function *DominatingFunction = Region.ExtractedFunction;
1789 if (FirstFunction)
1790 DominatingFunction = Group.OutlinedFunction;
1791 DominatorTree DT(*DominatingFunction);
1792 DenseSet<PHINode *> UsedPHIs;
1793
1794 for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1795 ArgIdx++) {
1796 assert(Region.ExtractedArgToAgg.contains(ArgIdx) &&
1797 "No mapping from extracted to outlined?");
1798 unsigned AggArgIdx = Region.ExtractedArgToAgg.find(Val: ArgIdx)->second;
1799 Argument *AggArg = Group.OutlinedFunction->getArg(i: AggArgIdx);
1800 Argument *Arg = Region.ExtractedFunction->getArg(i: ArgIdx);
1801 // The argument is an input, so we can simply replace it with the overall
1802 // argument value
1803 if (ArgIdx < Region.NumExtractedInputs) {
1804 LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1805 << *Region.ExtractedFunction << " with " << *AggArg
1806 << " in function " << *Group.OutlinedFunction << "\n");
1807 Arg->replaceAllUsesWith(V: AggArg);
1808 Value *V = Region.Call->getArgOperand(i: ArgIdx);
1809 Region.RemappedArguments.insert(KV: std::make_pair(x&: V, y&: AggArg));
1810 continue;
1811 }
1812
1813 // If we are replacing an output, we place the store value in its own
1814 // block inside the overall function before replacing the use of the output
1815 // in the function.
1816 assert(Arg->hasOneUse() && "Output argument can only have one use");
1817 User *InstAsUser = Arg->user_back();
1818 assert(InstAsUser && "User is nullptr!");
1819
1820 Instruction *I = cast<Instruction>(Val: InstAsUser);
1821 BasicBlock *BB = I->getParent();
1822 SmallVector<BasicBlock *, 4> Descendants;
1823 DT.getDescendants(R: BB, Result&: Descendants);
1824 bool EdgeAdded = false;
1825 if (Descendants.size() == 0) {
1826 EdgeAdded = true;
1827 DT.insertEdge(From: &DominatingFunction->getEntryBlock(), To: BB);
1828 DT.getDescendants(R: BB, Result&: Descendants);
1829 }
1830
1831 // Iterate over the following blocks, looking for return instructions,
1832 // if we find one, find the corresponding output block for the return value
1833 // and move our store instruction there.
1834 for (BasicBlock *DescendBB : Descendants) {
1835 ReturnInst *RI = dyn_cast<ReturnInst>(Val: DescendBB->getTerminator());
1836 if (!RI)
1837 continue;
1838 Value *RetVal = RI->getReturnValue();
1839 auto VBBIt = OutputBBs.find(Val: RetVal);
1840 assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1841
1842 // If this is storing a PHINode, we must make sure it is included in the
1843 // overall function.
1844 StoreInst *SI = cast<StoreInst>(Val: I);
1845
1846 Value *ValueOperand = SI->getValueOperand();
1847
1848 StoreInst *NewI = cast<StoreInst>(Val: I->clone());
1849 NewI->setDebugLoc(DebugLoc::getDropped());
1850 BasicBlock *OutputBB = VBBIt->second;
1851 NewI->insertInto(ParentBB: OutputBB, It: OutputBB->end());
1852 LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1853 << *OutputBB << "\n");
1854
1855 // If this is storing a PHINode, we must make sure it is included in the
1856 // overall function.
1857 if (!isa<PHINode>(Val: ValueOperand) ||
1858 Region.Candidate->getGVN(V: ValueOperand).has_value()) {
1859 if (FirstFunction)
1860 continue;
1861 Value *CorrVal =
1862 Region.findCorrespondingValueIn(Other: *Group.Regions[0], V: ValueOperand);
1863 assert(CorrVal && "Value is nullptr?");
1864 NewI->setOperand(i_nocapture: 0, Val_nocapture: CorrVal);
1865 continue;
1866 }
1867 PHINode *PN = cast<PHINode>(Val: SI->getValueOperand());
1868 // If it has a value, it was not split by the code extractor, which
1869 // is what we are looking for.
1870 if (Region.Candidate->getGVN(V: PN))
1871 continue;
1872
1873 // We record the parent block for the PHINode in the Region so that
1874 // we can exclude it from checks later on.
1875 Region.PHIBlocks.insert(KV: std::make_pair(x&: RetVal, y: PN->getParent()));
1876
1877 // If this is the first function, we do not need to worry about mergiing
1878 // this with any other block in the overall outlined function, so we can
1879 // just continue.
1880 if (FirstFunction) {
1881 BasicBlock *PHIBlock = PN->getParent();
1882 Group.PHIBlocks.insert(KV: std::make_pair(x&: RetVal, y&: PHIBlock));
1883 continue;
1884 }
1885
1886 // We look for the aggregate block that contains the PHINodes leading into
1887 // this exit path. If we can't find one, we create one.
1888 BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
1889
1890 // For our PHINode, we find the combined canonical numbering, and
1891 // attempt to find a matching PHINode in the overall PHIBlock. If we
1892 // cannot, we copy the PHINode and move it into this new block.
1893 PHINode *NewPN = findOrCreatePHIInBlock(PN&: *PN, Region, OverallPhiBlock,
1894 OutputMappings, UsedPHIs);
1895 NewI->setOperand(i_nocapture: 0, Val_nocapture: NewPN);
1896 }
1897
1898 // If we added an edge for basic blocks without a predecessor, we remove it
1899 // here.
1900 if (EdgeAdded)
1901 DT.deleteEdge(From: &DominatingFunction->getEntryBlock(), To: BB);
1902 I->eraseFromParent();
1903
1904 LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1905 << *Region.ExtractedFunction << " with " << *AggArg
1906 << " in function " << *Group.OutlinedFunction << "\n");
1907 Arg->replaceAllUsesWith(V: AggArg);
1908 }
1909}
1910
1911/// Within an extracted function, replace the constants that need to be lifted
1912/// into arguments with the actual argument.
1913///
1914/// \param Region [in] - The region of extracted code to be changed.
1915void replaceConstants(OutlinableRegion &Region) {
1916 OutlinableGroup &Group = *Region.Parent;
1917 Function *OutlinedFunction = Group.OutlinedFunction;
1918 ValueToValueMapTy VMap;
1919
1920 // Iterate over the constants that need to be elevated into arguments
1921 for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1922 unsigned AggArgIdx = Const.first;
1923 assert(OutlinedFunction && "Overall Function is not defined?");
1924 Constant *CST = Const.second;
1925 Argument *Arg = Group.OutlinedFunction->getArg(i: AggArgIdx);
1926 // Identify the argument it will be elevated to, and replace instances of
1927 // that constant in the function.
1928 VMap[CST] = Arg;
1929 LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1930 << " in function " << *OutlinedFunction << " with "
1931 << *Arg << '\n');
1932 }
1933
1934 RemapFunction(F&: *OutlinedFunction, VM&: VMap,
1935 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1936}
1937
1938/// It is possible that there is a basic block that already performs the same
1939/// stores. This returns a duplicate block, if it exists
1940///
1941/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1942/// \param OutputStoreBBs [in] The existing output blocks.
1943/// \returns an optional value with the number output block if there is a match.
1944std::optional<unsigned> findDuplicateOutputBlock(
1945 DenseMap<Value *, BasicBlock *> &OutputBBs,
1946 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1947
1948 bool Mismatch = false;
1949 unsigned MatchingNum = 0;
1950 // We compare the new set output blocks to the other sets of output blocks.
1951 // If they are the same number, and have identical instructions, they are
1952 // considered to be the same.
1953 for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1954 Mismatch = false;
1955 for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1956 DenseMap<Value *, BasicBlock *>::iterator OutputBBIt =
1957 OutputBBs.find(Val: VToB.first);
1958 if (OutputBBIt == OutputBBs.end()) {
1959 Mismatch = true;
1960 break;
1961 }
1962
1963 BasicBlock *CompBB = VToB.second;
1964 BasicBlock *OutputBB = OutputBBIt->second;
1965 if (CompBB->size() - 1 != OutputBB->size()) {
1966 Mismatch = true;
1967 break;
1968 }
1969
1970 BasicBlock::iterator NIt = OutputBB->begin();
1971 for (Instruction &I : *CompBB) {
1972 if (isa<BranchInst>(Val: &I))
1973 continue;
1974
1975 if (!I.isIdenticalTo(I: &(*NIt))) {
1976 Mismatch = true;
1977 break;
1978 }
1979
1980 NIt++;
1981 }
1982 }
1983
1984 if (!Mismatch)
1985 return MatchingNum;
1986
1987 MatchingNum++;
1988 }
1989
1990 return std::nullopt;
1991}
1992
1993/// Remove empty output blocks from the outlined region.
1994///
1995/// \param BlocksToPrune - Mapping of return values output blocks for the \p
1996/// Region.
1997/// \param Region - The OutlinableRegion we are analyzing.
1998static bool
1999analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune,
2000 OutlinableRegion &Region) {
2001 bool AllRemoved = true;
2002 Value *RetValueForBB;
2003 BasicBlock *NewBB;
2004 SmallVector<Value *, 4> ToRemove;
2005 // Iterate over the output blocks created in the outlined section.
2006 for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
2007 RetValueForBB = VtoBB.first;
2008 NewBB = VtoBB.second;
2009
2010 // If there are no instructions, we remove it from the module, and also
2011 // mark the value for removal from the return value to output block mapping.
2012 if (NewBB->size() == 0) {
2013 NewBB->eraseFromParent();
2014 ToRemove.push_back(Elt: RetValueForBB);
2015 continue;
2016 }
2017
2018 // Mark that we could not remove all the blocks since they were not all
2019 // empty.
2020 AllRemoved = false;
2021 }
2022
2023 // Remove the return value from the mapping.
2024 for (Value *V : ToRemove)
2025 BlocksToPrune.erase(Val: V);
2026
2027 // Mark the region as having the no output scheme.
2028 if (AllRemoved)
2029 Region.OutputBlockNum = -1;
2030
2031 return AllRemoved;
2032}
2033
2034/// For the outlined section, move needed the StoreInsts for the output
2035/// registers into their own block. Then, determine if there is a duplicate
2036/// output block already created.
2037///
2038/// \param [in] OG - The OutlinableGroup of regions to be outlined.
2039/// \param [in] Region - The OutlinableRegion that is being analyzed.
2040/// \param [in,out] OutputBBs - the blocks that stores for this region will be
2041/// placed in.
2042/// \param [in] EndBBs - the final blocks of the extracted function.
2043/// \param [in] OutputMappings - OutputMappings the mapping of values that have
2044/// been replaced by a new output value.
2045/// \param [in,out] OutputStoreBBs - The existing output blocks.
2046static void alignOutputBlockWithAggFunc(
2047 OutlinableGroup &OG, OutlinableRegion &Region,
2048 DenseMap<Value *, BasicBlock *> &OutputBBs,
2049 DenseMap<Value *, BasicBlock *> &EndBBs,
2050 const DenseMap<Value *, Value *> &OutputMappings,
2051 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2052 // If none of the output blocks have any instructions, this means that we do
2053 // not have to determine if it matches any of the other output schemes, and we
2054 // don't have to do anything else.
2055 if (analyzeAndPruneOutputBlocks(BlocksToPrune&: OutputBBs, Region))
2056 return;
2057
2058 // Determine is there is a duplicate set of blocks.
2059 std::optional<unsigned> MatchingBB =
2060 findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
2061
2062 // If there is, we remove the new output blocks. If it does not,
2063 // we add it to our list of sets of output blocks.
2064 if (MatchingBB) {
2065 LLVM_DEBUG(dbgs() << "Set output block for region in function"
2066 << Region.ExtractedFunction << " to " << *MatchingBB);
2067
2068 Region.OutputBlockNum = *MatchingBB;
2069 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
2070 VtoBB.second->eraseFromParent();
2071 return;
2072 }
2073
2074 Region.OutputBlockNum = OutputStoreBBs.size();
2075
2076 Value *RetValueForBB;
2077 BasicBlock *NewBB;
2078 OutputStoreBBs.push_back(x: DenseMap<Value *, BasicBlock *>());
2079 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
2080 RetValueForBB = VtoBB.first;
2081 NewBB = VtoBB.second;
2082 DenseMap<Value *, BasicBlock *>::iterator VBBIt =
2083 EndBBs.find(Val: RetValueForBB);
2084 LLVM_DEBUG(dbgs() << "Create output block for region in"
2085 << Region.ExtractedFunction << " to "
2086 << *NewBB);
2087 BranchInst::Create(IfTrue: VBBIt->second, InsertBefore: NewBB);
2088 OutputStoreBBs.back().insert(KV: std::make_pair(x&: RetValueForBB, y&: NewBB));
2089 }
2090}
2091
2092/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
2093/// before creating a basic block for each \p NewMap, and inserting into the new
2094/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
2095///
2096/// \param OldMap [in] - The mapping to base the new mapping off of.
2097/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
2098/// \param ParentFunc [in] - The function to put the new basic block in.
2099/// \param BaseName [in] - The start of the BasicBlock names to be appended to
2100/// by an index value.
2101static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap,
2102 DenseMap<Value *, BasicBlock *> &NewMap,
2103 Function *ParentFunc, Twine BaseName) {
2104 unsigned Idx = 0;
2105 std::vector<Value *> SortedKeys;
2106
2107 getSortedConstantKeys(SortedKeys, Map&: OldMap);
2108
2109 for (Value *RetVal : SortedKeys) {
2110 BasicBlock *NewBB = BasicBlock::Create(
2111 Context&: ParentFunc->getContext(), Name: Twine(BaseName) + Twine("_") + Twine(Idx++),
2112 Parent: ParentFunc);
2113 NewMap.insert(KV: std::make_pair(x&: RetVal, y&: NewBB));
2114 }
2115}
2116
2117/// Create the switch statement for outlined function to differentiate between
2118/// all the output blocks.
2119///
2120/// For the outlined section, determine if an outlined block already exists that
2121/// matches the needed stores for the extracted section.
2122/// \param [in] M - The module we are outlining from.
2123/// \param [in] OG - The group of regions to be outlined.
2124/// \param [in] EndBBs - The final blocks of the extracted function.
2125/// \param [in,out] OutputStoreBBs - The existing output blocks.
2126void createSwitchStatement(
2127 Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs,
2128 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2129 // We only need the switch statement if there is more than one store
2130 // combination, or there is more than one set of output blocks. The first
2131 // will occur when we store different sets of values for two different
2132 // regions. The second will occur when we have two outputs that are combined
2133 // in a PHINode outside of the region in one outlined instance, and are used
2134 // seaparately in another. This will create the same set of OutputGVNs, but
2135 // will generate two different output schemes.
2136 if (OG.OutputGVNCombinations.size() > 1) {
2137 Function *AggFunc = OG.OutlinedFunction;
2138 // Create a final block for each different return block.
2139 DenseMap<Value *, BasicBlock *> ReturnBBs;
2140 createAndInsertBasicBlocks(OldMap&: OG.EndBBs, NewMap&: ReturnBBs, ParentFunc: AggFunc, BaseName: "final_block");
2141
2142 for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
2143 std::pair<Value *, BasicBlock *> &OutputBlock =
2144 *OG.EndBBs.find(Val: RetBlockPair.first);
2145 BasicBlock *ReturnBlock = RetBlockPair.second;
2146 BasicBlock *EndBB = OutputBlock.second;
2147 Instruction *Term = EndBB->getTerminator();
2148 // Move the return value to the final block instead of the original exit
2149 // stub.
2150 Term->moveBefore(BB&: *ReturnBlock, I: ReturnBlock->end());
2151 // Put the switch statement in the old end basic block for the function
2152 // with a fall through to the new return block.
2153 LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
2154 << OutputStoreBBs.size() << "\n");
2155 SwitchInst *SwitchI =
2156 SwitchInst::Create(Value: AggFunc->getArg(i: AggFunc->arg_size() - 1),
2157 Default: ReturnBlock, NumCases: OutputStoreBBs.size(), InsertBefore: EndBB);
2158
2159 unsigned Idx = 0;
2160 for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
2161 DenseMap<Value *, BasicBlock *>::iterator OSBBIt =
2162 OutputStoreBB.find(Val: OutputBlock.first);
2163
2164 if (OSBBIt == OutputStoreBB.end())
2165 continue;
2166
2167 BasicBlock *BB = OSBBIt->second;
2168 SwitchI->addCase(
2169 OnVal: ConstantInt::get(Ty: Type::getInt32Ty(C&: M.getContext()), V: Idx), Dest: BB);
2170 Term = BB->getTerminator();
2171 Term->setSuccessor(Idx: 0, BB: ReturnBlock);
2172 Idx++;
2173 }
2174 }
2175 return;
2176 }
2177
2178 assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
2179
2180 // If there needs to be stores, move them from the output blocks to their
2181 // corresponding ending block. We do not check that the OutputGVNCombinations
2182 // is equal to 1 here since that could just been the case where there are 0
2183 // outputs. Instead, we check whether there is more than one set of output
2184 // blocks since this is the only case where we would have to move the
2185 // stores, and erase the extraneous blocks.
2186 if (OutputStoreBBs.size() == 1) {
2187 LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
2188 << *OG.OutlinedFunction << "\n");
2189 DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
2190 for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
2191 DenseMap<Value *, BasicBlock *>::iterator EndBBIt =
2192 EndBBs.find(Val: VBPair.first);
2193 assert(EndBBIt != EndBBs.end() && "Could not find end block");
2194 BasicBlock *EndBB = EndBBIt->second;
2195 BasicBlock *OutputBB = VBPair.second;
2196 Instruction *Term = OutputBB->getTerminator();
2197 Term->eraseFromParent();
2198 Term = EndBB->getTerminator();
2199 moveBBContents(SourceBB&: *OutputBB, TargetBB&: *EndBB);
2200 Term->moveBefore(BB&: *EndBB, I: EndBB->end());
2201 OutputBB->eraseFromParent();
2202 }
2203 }
2204}
2205
2206/// Fill the new function that will serve as the replacement function for all of
2207/// the extracted regions of a certain structure from the first region in the
2208/// list of regions. Replace this first region's extracted function with the
2209/// new overall function.
2210///
2211/// \param [in] M - The module we are outlining from.
2212/// \param [in] CurrentGroup - The group of regions to be outlined.
2213/// \param [in,out] OutputStoreBBs - The output blocks for each different
2214/// set of stores needed for the different functions.
2215/// \param [in,out] FuncsToRemove - Extracted functions to erase from module
2216/// once outlining is complete.
2217/// \param [in] OutputMappings - Extracted functions to erase from module
2218/// once outlining is complete.
2219static void fillOverallFunction(
2220 Module &M, OutlinableGroup &CurrentGroup,
2221 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
2222 std::vector<Function *> &FuncsToRemove,
2223 const DenseMap<Value *, Value *> &OutputMappings) {
2224 OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
2225
2226 // Move first extracted function's instructions into new function.
2227 LLVM_DEBUG(dbgs() << "Move instructions from "
2228 << *CurrentOS->ExtractedFunction << " to instruction "
2229 << *CurrentGroup.OutlinedFunction << "\n");
2230 moveFunctionData(Old&: *CurrentOS->ExtractedFunction,
2231 New&: *CurrentGroup.OutlinedFunction, NewEnds&: CurrentGroup.EndBBs);
2232
2233 // Transfer the attributes from the function to the new function.
2234 for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
2235 CurrentGroup.OutlinedFunction->addFnAttr(Attr: A);
2236
2237 // Create a new set of output blocks for the first extracted function.
2238 DenseMap<Value *, BasicBlock *> NewBBs;
2239 createAndInsertBasicBlocks(OldMap&: CurrentGroup.EndBBs, NewMap&: NewBBs,
2240 ParentFunc: CurrentGroup.OutlinedFunction, BaseName: "output_block_0");
2241 CurrentOS->OutputBlockNum = 0;
2242
2243 replaceArgumentUses(Region&: *CurrentOS, OutputBBs&: NewBBs, OutputMappings, FirstFunction: true);
2244 replaceConstants(Region&: *CurrentOS);
2245
2246 // We first identify if any output blocks are empty, if they are we remove
2247 // them. We then create a branch instruction to the basic block to the return
2248 // block for the function for each non empty output block.
2249 if (!analyzeAndPruneOutputBlocks(BlocksToPrune&: NewBBs, Region&: *CurrentOS)) {
2250 OutputStoreBBs.push_back(x: DenseMap<Value *, BasicBlock *>());
2251 for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
2252 DenseMap<Value *, BasicBlock *>::iterator VBBIt =
2253 CurrentGroup.EndBBs.find(Val: VToBB.first);
2254 BasicBlock *EndBB = VBBIt->second;
2255 BranchInst::Create(IfTrue: EndBB, InsertBefore: VToBB.second);
2256 OutputStoreBBs.back().insert(KV: VToBB);
2257 }
2258 }
2259
2260 // Replace the call to the extracted function with the outlined function.
2261 CurrentOS->Call = replaceCalledFunction(M, Region&: *CurrentOS);
2262
2263 // We only delete the extracted functions at the end since we may need to
2264 // reference instructions contained in them for mapping purposes.
2265 FuncsToRemove.push_back(x: CurrentOS->ExtractedFunction);
2266}
2267
2268void IROutliner::deduplicateExtractedSections(
2269 Module &M, OutlinableGroup &CurrentGroup,
2270 std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
2271 createFunction(M, Group&: CurrentGroup, FunctionNameSuffix: OutlinedFunctionNum);
2272
2273 std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
2274
2275 OutlinableRegion *CurrentOS;
2276
2277 fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove,
2278 OutputMappings);
2279
2280 for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
2281 CurrentOS = CurrentGroup.Regions[Idx];
2282 AttributeFuncs::mergeAttributesForOutlining(Base&: *CurrentGroup.OutlinedFunction,
2283 ToMerge: *CurrentOS->ExtractedFunction);
2284
2285 // Create a set of BasicBlocks, one for each return block, to hold the
2286 // needed store instructions.
2287 DenseMap<Value *, BasicBlock *> NewBBs;
2288 createAndInsertBasicBlocks(OldMap&: CurrentGroup.EndBBs, NewMap&: NewBBs,
2289 ParentFunc: CurrentGroup.OutlinedFunction,
2290 BaseName: "output_block_" + Twine(Idx));
2291 replaceArgumentUses(Region&: *CurrentOS, OutputBBs&: NewBBs, OutputMappings);
2292 alignOutputBlockWithAggFunc(OG&: CurrentGroup, Region&: *CurrentOS, OutputBBs&: NewBBs,
2293 EndBBs&: CurrentGroup.EndBBs, OutputMappings,
2294 OutputStoreBBs);
2295
2296 CurrentOS->Call = replaceCalledFunction(M, Region&: *CurrentOS);
2297 FuncsToRemove.push_back(x: CurrentOS->ExtractedFunction);
2298 }
2299
2300 // Create a switch statement to handle the different output schemes.
2301 createSwitchStatement(M, OG&: CurrentGroup, EndBBs&: CurrentGroup.EndBBs, OutputStoreBBs);
2302
2303 OutlinedFunctionNum++;
2304}
2305
2306/// Checks that the next instruction in the InstructionDataList matches the
2307/// next instruction in the module. If they do not, there could be the
2308/// possibility that extra code has been inserted, and we must ignore it.
2309///
2310/// \param ID - The IRInstructionData to check the next instruction of.
2311/// \returns true if the InstructionDataList and actual instruction match.
2312static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) {
2313 // We check if there is a discrepancy between the InstructionDataList
2314 // and the actual next instruction in the module. If there is, it means
2315 // that an extra instruction was added, likely by the CodeExtractor.
2316
2317 // Since we do not have any similarity data about this particular
2318 // instruction, we cannot confidently outline it, and must discard this
2319 // candidate.
2320 IRInstructionDataList::iterator NextIDIt = std::next(x: ID.getIterator());
2321 Instruction *NextIDLInst = NextIDIt->Inst;
2322 Instruction *NextModuleInst = nullptr;
2323 if (!ID.Inst->isTerminator())
2324 NextModuleInst = ID.Inst->getNextNode();
2325 else if (NextIDLInst != nullptr)
2326 NextModuleInst =
2327 &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
2328
2329 if (NextIDLInst && NextIDLInst != NextModuleInst)
2330 return false;
2331
2332 return true;
2333}
2334
2335bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
2336 const OutlinableRegion &Region) {
2337 IRSimilarityCandidate *IRSC = Region.Candidate;
2338 unsigned StartIdx = IRSC->getStartIdx();
2339 unsigned EndIdx = IRSC->getEndIdx();
2340
2341 // A check to make sure that we are not about to attempt to outline something
2342 // that has already been outlined.
2343 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2344 if (Outlined.contains(V: Idx))
2345 return false;
2346
2347 // We check if the recorded instruction matches the actual next instruction,
2348 // if it does not, we fix it in the InstructionDataList.
2349 if (!Region.Candidate->backInstruction()->isTerminator()) {
2350 Instruction *NewEndInst =
2351 Region.Candidate->backInstruction()->getNextNode();
2352 assert(NewEndInst && "Next instruction is a nullptr?");
2353 if (Region.Candidate->end()->Inst != NewEndInst) {
2354 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2355 IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
2356 IRInstructionData(*NewEndInst,
2357 InstructionClassifier.visit(I&: *NewEndInst), *IDL);
2358
2359 // Insert the first IRInstructionData of the new region after the
2360 // last IRInstructionData of the IRSimilarityCandidate.
2361 IDL->insert(I: Region.Candidate->end(), Node&: *NewEndIRID);
2362 }
2363 }
2364
2365 return none_of(Range&: *IRSC, P: [this](IRInstructionData &ID) {
2366 if (!nextIRInstructionDataMatchesNextInst(ID))
2367 return true;
2368
2369 return !this->InstructionClassifier.visit(I: ID.Inst);
2370 });
2371}
2372
2373void IROutliner::pruneIncompatibleRegions(
2374 std::vector<IRSimilarityCandidate> &CandidateVec,
2375 OutlinableGroup &CurrentGroup) {
2376 bool PreviouslyOutlined;
2377
2378 // Sort from beginning to end, so the IRSimilarityCandidates are in order.
2379 stable_sort(Range&: CandidateVec, C: [](const IRSimilarityCandidate &LHS,
2380 const IRSimilarityCandidate &RHS) {
2381 return LHS.getStartIdx() < RHS.getStartIdx();
2382 });
2383
2384 IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
2385 // Since outlining a call and a branch instruction will be the same as only
2386 // outlinining a call instruction, we ignore it as a space saving.
2387 if (FirstCandidate.getLength() == 2) {
2388 if (isa<CallInst>(Val: FirstCandidate.front()->Inst) &&
2389 isa<BranchInst>(Val: FirstCandidate.back()->Inst))
2390 return;
2391 }
2392
2393 unsigned CurrentEndIdx = 0;
2394 for (IRSimilarityCandidate &IRSC : CandidateVec) {
2395 PreviouslyOutlined = false;
2396 unsigned StartIdx = IRSC.getStartIdx();
2397 unsigned EndIdx = IRSC.getEndIdx();
2398 const Function &FnForCurrCand = *IRSC.getFunction();
2399
2400 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2401 if (Outlined.contains(V: Idx)) {
2402 PreviouslyOutlined = true;
2403 break;
2404 }
2405
2406 if (PreviouslyOutlined)
2407 continue;
2408
2409 // Check over the instructions, and if the basic block has its address
2410 // taken for use somewhere else, we do not outline that block.
2411 bool BBHasAddressTaken = any_of(Range&: IRSC, P: [](IRInstructionData &ID){
2412 return ID.Inst->getParent()->hasAddressTaken();
2413 });
2414
2415 if (BBHasAddressTaken)
2416 continue;
2417
2418 if (FnForCurrCand.hasOptNone())
2419 continue;
2420
2421 if (FnForCurrCand.hasFnAttribute(Kind: "nooutline")) {
2422 LLVM_DEBUG({
2423 dbgs() << "... Skipping function with nooutline attribute: "
2424 << FnForCurrCand.getName() << "\n";
2425 });
2426 continue;
2427 }
2428
2429 if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
2430 !OutlineFromLinkODRs)
2431 continue;
2432
2433 // Greedily prune out any regions that will overlap with already chosen
2434 // regions.
2435 if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
2436 continue;
2437
2438 bool BadInst = any_of(Range&: IRSC, P: [this](IRInstructionData &ID) {
2439 if (!nextIRInstructionDataMatchesNextInst(ID))
2440 return true;
2441
2442 return !this->InstructionClassifier.visit(I: ID.Inst);
2443 });
2444
2445 if (BadInst)
2446 continue;
2447
2448 OutlinableRegion *OS = new (RegionAllocator.Allocate())
2449 OutlinableRegion(IRSC, CurrentGroup);
2450 CurrentGroup.Regions.push_back(x: OS);
2451
2452 CurrentEndIdx = EndIdx;
2453 }
2454}
2455
2456InstructionCost
2457IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
2458 InstructionCost RegionBenefit = 0;
2459 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2460 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2461 // We add the number of instructions in the region to the benefit as an
2462 // estimate as to how much will be removed.
2463 RegionBenefit += Region->getBenefit(TTI);
2464 LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
2465 << " saved instructions to overfall benefit.\n");
2466 }
2467
2468 return RegionBenefit;
2469}
2470
2471/// For the \p OutputCanon number passed in find the value represented by this
2472/// canonical number. If it is from a PHINode, we pick the first incoming
2473/// value and return that Value instead.
2474///
2475/// \param Region - The OutlinableRegion to get the Value from.
2476/// \param OutputCanon - The canonical number to find the Value from.
2477/// \returns The Value represented by a canonical number \p OutputCanon in \p
2478/// Region.
2479static Value *findOutputValueInRegion(OutlinableRegion &Region,
2480 unsigned OutputCanon) {
2481 OutlinableGroup &CurrentGroup = *Region.Parent;
2482 // If the value is greater than the value in the tracker, we have a
2483 // PHINode and will instead use one of the incoming values to find the
2484 // type.
2485 if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
2486 auto It = CurrentGroup.PHINodeGVNToGVNs.find(Val: OutputCanon);
2487 assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
2488 "Could not find GVN set for PHINode number!");
2489 assert(It->second.second.size() > 0 && "PHINode does not have any values!");
2490 OutputCanon = *It->second.second.begin();
2491 }
2492 std::optional<unsigned> OGVN =
2493 Region.Candidate->fromCanonicalNum(N: OutputCanon);
2494 assert(OGVN && "Could not find GVN for Canonical Number?");
2495 std::optional<Value *> OV = Region.Candidate->fromGVN(Num: *OGVN);
2496 assert(OV && "Could not find value for GVN?");
2497 return *OV;
2498}
2499
2500InstructionCost
2501IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
2502 InstructionCost OverallCost = 0;
2503 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2504 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2505
2506 // Each output incurs a load after the call, so we add that to the cost.
2507 for (unsigned OutputCanon : Region->GVNStores) {
2508 Value *V = findOutputValueInRegion(Region&: *Region, OutputCanon);
2509 InstructionCost LoadCost =
2510 TTI.getMemoryOpCost(Opcode: Instruction::Load, Src: V->getType(), Alignment: Align(1), AddressSpace: 0,
2511 CostKind: TargetTransformInfo::TCK_CodeSize);
2512
2513 LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
2514 << " instructions to cost for output of type "
2515 << *V->getType() << "\n");
2516 OverallCost += LoadCost;
2517 }
2518 }
2519
2520 return OverallCost;
2521}
2522
2523/// Find the extra instructions needed to handle any output values for the
2524/// region.
2525///
2526/// \param [in] M - The Module to outline from.
2527/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
2528/// \param [in] TTI - The TargetTransformInfo used to collect information for
2529/// new instruction costs.
2530/// \returns the additional cost to handle the outputs.
2531static InstructionCost findCostForOutputBlocks(Module &M,
2532 OutlinableGroup &CurrentGroup,
2533 TargetTransformInfo &TTI) {
2534 InstructionCost OutputCost = 0;
2535 unsigned NumOutputBranches = 0;
2536
2537 OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
2538 IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
2539 DenseSet<BasicBlock *> CandidateBlocks;
2540 Candidate.getBasicBlocks(BBSet&: CandidateBlocks);
2541
2542 // Count the number of different output branches that point to blocks outside
2543 // of the region.
2544 DenseSet<BasicBlock *> FoundBlocks;
2545 for (IRInstructionData &ID : Candidate) {
2546 if (!isa<BranchInst>(Val: ID.Inst))
2547 continue;
2548
2549 for (Value *V : ID.OperVals) {
2550 BasicBlock *BB = static_cast<BasicBlock *>(V);
2551 if (!CandidateBlocks.contains(V: BB) && FoundBlocks.insert(V: BB).second)
2552 NumOutputBranches++;
2553 }
2554 }
2555
2556 CurrentGroup.BranchesToOutside = NumOutputBranches;
2557
2558 for (const ArrayRef<unsigned> &OutputUse :
2559 CurrentGroup.OutputGVNCombinations) {
2560 for (unsigned OutputCanon : OutputUse) {
2561 Value *V = findOutputValueInRegion(Region&: FirstRegion, OutputCanon);
2562 InstructionCost StoreCost =
2563 TTI.getMemoryOpCost(Opcode: Instruction::Load, Src: V->getType(), Alignment: Align(1), AddressSpace: 0,
2564 CostKind: TargetTransformInfo::TCK_CodeSize);
2565
2566 // An instruction cost is added for each store set that needs to occur for
2567 // various output combinations inside the function, plus a branch to
2568 // return to the exit block.
2569 LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
2570 << " instructions to cost for output of type "
2571 << *V->getType() << "\n");
2572 OutputCost += StoreCost * NumOutputBranches;
2573 }
2574
2575 InstructionCost BranchCost =
2576 TTI.getCFInstrCost(Opcode: Instruction::Br, CostKind: TargetTransformInfo::TCK_CodeSize);
2577 LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
2578 << " a branch instruction\n");
2579 OutputCost += BranchCost * NumOutputBranches;
2580 }
2581
2582 // If there is more than one output scheme, we must have a comparison and
2583 // branch for each different item in the switch statement.
2584 if (CurrentGroup.OutputGVNCombinations.size() > 1) {
2585 InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
2586 Opcode: Instruction::ICmp, ValTy: Type::getInt32Ty(C&: M.getContext()),
2587 CondTy: Type::getInt32Ty(C&: M.getContext()), VecPred: CmpInst::BAD_ICMP_PREDICATE,
2588 CostKind: TargetTransformInfo::TCK_CodeSize);
2589 InstructionCost BranchCost =
2590 TTI.getCFInstrCost(Opcode: Instruction::Br, CostKind: TargetTransformInfo::TCK_CodeSize);
2591
2592 unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
2593 InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
2594
2595 LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
2596 << " instructions for each switch case for each different"
2597 << " output path in a function\n");
2598 OutputCost += TotalCost * NumOutputBranches;
2599 }
2600
2601 return OutputCost;
2602}
2603
2604void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
2605 InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
2606 CurrentGroup.Benefit += RegionBenefit;
2607 LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
2608
2609 InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
2610 CurrentGroup.Cost += OutputReloadCost;
2611 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2612
2613 InstructionCost AverageRegionBenefit =
2614 RegionBenefit / CurrentGroup.Regions.size();
2615 unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
2616 unsigned NumRegions = CurrentGroup.Regions.size();
2617 TargetTransformInfo &TTI =
2618 getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
2619
2620 // We add one region to the cost once, to account for the instructions added
2621 // inside of the newly created function.
2622 LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
2623 << " instructions to cost for body of new function.\n");
2624 CurrentGroup.Cost += AverageRegionBenefit;
2625 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2626
2627 // For each argument, we must add an instruction for loading the argument
2628 // out of the register and into a value inside of the newly outlined function.
2629 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2630 << " instructions to cost for each argument in the new"
2631 << " function.\n");
2632 CurrentGroup.Cost +=
2633 OverallArgumentNum * TargetTransformInfo::TCC_Basic;
2634 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2635
2636 // Each argument needs to either be loaded into a register or onto the stack.
2637 // Some arguments will only be loaded into the stack once the argument
2638 // registers are filled.
2639 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2640 << " instructions to cost for each argument in the new"
2641 << " function " << NumRegions << " times for the "
2642 << "needed argument handling at the call site.\n");
2643 CurrentGroup.Cost +=
2644 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
2645 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2646
2647 CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
2648 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2649}
2650
2651void IROutliner::updateOutputMapping(OutlinableRegion &Region,
2652 ArrayRef<Value *> Outputs,
2653 LoadInst *LI) {
2654 // For and load instructions following the call
2655 Value *Operand = LI->getPointerOperand();
2656 std::optional<unsigned> OutputIdx;
2657 // Find if the operand it is an output register.
2658 for (unsigned ArgIdx = Region.NumExtractedInputs;
2659 ArgIdx < Region.Call->arg_size(); ArgIdx++) {
2660 if (Operand == Region.Call->getArgOperand(i: ArgIdx)) {
2661 OutputIdx = ArgIdx - Region.NumExtractedInputs;
2662 break;
2663 }
2664 }
2665
2666 // If we found an output register, place a mapping of the new value
2667 // to the original in the mapping.
2668 if (!OutputIdx)
2669 return;
2670
2671 auto It = OutputMappings.find(Val: Outputs[*OutputIdx]);
2672 if (It == OutputMappings.end()) {
2673 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
2674 << *Outputs[*OutputIdx] << "\n");
2675 OutputMappings.insert(KV: std::make_pair(x&: LI, y: Outputs[*OutputIdx]));
2676 } else {
2677 Value *Orig = It->second;
2678 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
2679 << *Outputs[*OutputIdx] << "\n");
2680 OutputMappings.insert(KV: std::make_pair(x&: LI, y&: Orig));
2681 }
2682}
2683
2684bool IROutliner::extractSection(OutlinableRegion &Region) {
2685 SetVector<Value *> ArgInputs, Outputs;
2686 assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
2687 BasicBlock *InitialStart = Region.StartBB;
2688 Function *OrigF = Region.StartBB->getParent();
2689 CodeExtractorAnalysisCache CEAC(*OrigF);
2690 Region.ExtractedFunction =
2691 Region.CE->extractCodeRegion(CEAC, Inputs&: ArgInputs, Outputs);
2692
2693 // If the extraction was successful, find the BasicBlock, and reassign the
2694 // OutlinableRegion blocks
2695 if (!Region.ExtractedFunction) {
2696 LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
2697 << "\n");
2698 Region.reattachCandidate();
2699 return false;
2700 }
2701
2702 // Get the block containing the called branch, and reassign the blocks as
2703 // necessary. If the original block still exists, it is because we ended on
2704 // a branch instruction, and so we move the contents into the block before
2705 // and assign the previous block correctly.
2706 User *InstAsUser = Region.ExtractedFunction->user_back();
2707 BasicBlock *RewrittenBB = cast<Instruction>(Val: InstAsUser)->getParent();
2708 Region.PrevBB = RewrittenBB->getSinglePredecessor();
2709 assert(Region.PrevBB && "PrevBB is nullptr?");
2710 if (Region.PrevBB == InitialStart) {
2711 BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
2712 Instruction *BI = NewPrev->getTerminator();
2713 BI->eraseFromParent();
2714 moveBBContents(SourceBB&: *InitialStart, TargetBB&: *NewPrev);
2715 Region.PrevBB = NewPrev;
2716 InitialStart->eraseFromParent();
2717 }
2718
2719 Region.StartBB = RewrittenBB;
2720 Region.EndBB = RewrittenBB;
2721
2722 // The sequences of outlinable regions has now changed. We must fix the
2723 // IRInstructionDataList for consistency. Although they may not be illegal
2724 // instructions, they should not be compared with anything else as they
2725 // should not be outlined in this round. So marking these as illegal is
2726 // allowed.
2727 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2728 Instruction *BeginRewritten = &*RewrittenBB->begin();
2729 Instruction *EndRewritten = &*RewrittenBB->begin();
2730 Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
2731 *BeginRewritten, InstructionClassifier.visit(I&: *BeginRewritten), *IDL);
2732 Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
2733 *EndRewritten, InstructionClassifier.visit(I&: *EndRewritten), *IDL);
2734
2735 // Insert the first IRInstructionData of the new region in front of the
2736 // first IRInstructionData of the IRSimilarityCandidate.
2737 IDL->insert(I: Region.Candidate->begin(), Node&: *Region.NewFront);
2738 // Insert the first IRInstructionData of the new region after the
2739 // last IRInstructionData of the IRSimilarityCandidate.
2740 IDL->insert(I: Region.Candidate->end(), Node&: *Region.NewBack);
2741 // Remove the IRInstructionData from the IRSimilarityCandidate.
2742 IDL->erase(First: Region.Candidate->begin(), Last: std::prev(x: Region.Candidate->end()));
2743
2744 assert(RewrittenBB != nullptr &&
2745 "Could not find a predecessor after extraction!");
2746
2747 // Iterate over the new set of instructions to find the new call
2748 // instruction.
2749 for (Instruction &I : *RewrittenBB)
2750 if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
2751 if (Region.ExtractedFunction == CI->getCalledFunction())
2752 Region.Call = CI;
2753 } else if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I))
2754 updateOutputMapping(Region, Outputs: Outputs.getArrayRef(), LI);
2755 Region.reattachCandidate();
2756 return true;
2757}
2758
2759unsigned IROutliner::doOutline(Module &M) {
2760 // Find the possible similarity sections.
2761 InstructionClassifier.EnableBranches = !DisableBranches;
2762 InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
2763 InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
2764
2765 IRSimilarityIdentifier &Identifier = getIRSI(M);
2766 SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
2767
2768 // Sort them by size of extracted sections
2769 unsigned OutlinedFunctionNum = 0;
2770 // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
2771 // to sort them by the potential number of instructions to be outlined
2772 if (SimilarityCandidates.size() > 1)
2773 llvm::stable_sort(Range&: SimilarityCandidates,
2774 C: [](const std::vector<IRSimilarityCandidate> &LHS,
2775 const std::vector<IRSimilarityCandidate> &RHS) {
2776 return LHS[0].getLength() * LHS.size() >
2777 RHS[0].getLength() * RHS.size();
2778 });
2779 // Creating OutlinableGroups for each SimilarityCandidate to be used in
2780 // each of the following for loops to avoid making an allocator.
2781 std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2782
2783 DenseSet<unsigned> NotSame;
2784 std::vector<OutlinableGroup *> NegativeCostGroups;
2785 std::vector<OutlinableRegion *> OutlinedRegions;
2786 // Iterate over the possible sets of similarity.
2787 unsigned PotentialGroupIdx = 0;
2788 for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2789 OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2790
2791 // Remove entries that were previously outlined
2792 pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2793
2794 // We pruned the number of regions to 0 to 1, meaning that it's not worth
2795 // trying to outlined since there is no compatible similar instance of this
2796 // code.
2797 if (CurrentGroup.Regions.size() < 2)
2798 continue;
2799
2800 // Determine if there are any values that are the same constant throughout
2801 // each section in the set.
2802 NotSame.clear();
2803 CurrentGroup.findSameConstants(NotSame);
2804
2805 if (CurrentGroup.IgnoreGroup)
2806 continue;
2807
2808 // Create a CodeExtractor for each outlinable region. Identify inputs and
2809 // outputs for each section using the code extractor and create the argument
2810 // types for the Aggregate Outlining Function.
2811 OutlinedRegions.clear();
2812 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2813 // Break the outlinable region out of its parent BasicBlock into its own
2814 // BasicBlocks (see function implementation).
2815 OS->splitCandidate();
2816
2817 // There's a chance that when the region is split, extra instructions are
2818 // added to the region. This makes the region no longer viable
2819 // to be split, so we ignore it for outlining.
2820 if (!OS->CandidateSplit)
2821 continue;
2822
2823 SmallVector<BasicBlock *> BE;
2824 DenseSet<BasicBlock *> BlocksInRegion;
2825 OS->Candidate->getBasicBlocks(BBSet&: BlocksInRegion, BBList&: BE);
2826 OS->CE = new (ExtractorAllocator.Allocate())
2827 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2828 false, nullptr, "outlined");
2829 findAddInputsOutputs(M, Region&: *OS, NotSame);
2830 if (!OS->IgnoreRegion)
2831 OutlinedRegions.push_back(x: OS);
2832
2833 // We recombine the blocks together now that we have gathered all the
2834 // needed information.
2835 OS->reattachCandidate();
2836 }
2837
2838 CurrentGroup.Regions = std::move(OutlinedRegions);
2839
2840 if (CurrentGroup.Regions.empty())
2841 continue;
2842
2843 CurrentGroup.collectGVNStoreSets(M);
2844
2845 if (CostModel)
2846 findCostBenefit(M, CurrentGroup);
2847
2848 // If we are adhering to the cost model, skip those groups where the cost
2849 // outweighs the benefits.
2850 if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2851 OptimizationRemarkEmitter &ORE =
2852 getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2853 ORE.emit(RemarkBuilder: [&]() {
2854 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2855 OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2856 C->frontInstruction());
2857 R << "did not outline "
2858 << ore::NV(std::to_string(val: CurrentGroup.Regions.size()))
2859 << " regions due to estimated increase of "
2860 << ore::NV("InstructionIncrease",
2861 CurrentGroup.Cost - CurrentGroup.Benefit)
2862 << " instructions at locations ";
2863 interleave(
2864 begin: CurrentGroup.Regions.begin(), end: CurrentGroup.Regions.end(),
2865 each_fn: [&R](OutlinableRegion *Region) {
2866 R << ore::NV(
2867 "DebugLoc",
2868 Region->Candidate->frontInstruction()->getDebugLoc());
2869 },
2870 between_fn: [&R]() { R << " "; });
2871 return R;
2872 });
2873 continue;
2874 }
2875
2876 NegativeCostGroups.push_back(x: &CurrentGroup);
2877 }
2878
2879 ExtractorAllocator.DestroyAll();
2880
2881 if (NegativeCostGroups.size() > 1)
2882 stable_sort(Range&: NegativeCostGroups,
2883 C: [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2884 return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2885 });
2886
2887 std::vector<Function *> FuncsToRemove;
2888 for (OutlinableGroup *CG : NegativeCostGroups) {
2889 OutlinableGroup &CurrentGroup = *CG;
2890
2891 OutlinedRegions.clear();
2892 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2893 // We check whether our region is compatible with what has already been
2894 // outlined, and whether we need to ignore this item.
2895 if (!isCompatibleWithAlreadyOutlinedCode(Region: *Region))
2896 continue;
2897 OutlinedRegions.push_back(x: Region);
2898 }
2899
2900 if (OutlinedRegions.size() < 2)
2901 continue;
2902
2903 // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2904 // we are still outlining enough regions to make up for the added cost.
2905 CurrentGroup.Regions = std::move(OutlinedRegions);
2906 if (CostModel) {
2907 CurrentGroup.Benefit = 0;
2908 CurrentGroup.Cost = 0;
2909 findCostBenefit(M, CurrentGroup);
2910 if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2911 continue;
2912 }
2913 OutlinedRegions.clear();
2914 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2915 Region->splitCandidate();
2916 if (!Region->CandidateSplit)
2917 continue;
2918 OutlinedRegions.push_back(x: Region);
2919 }
2920
2921 CurrentGroup.Regions = std::move(OutlinedRegions);
2922 if (CurrentGroup.Regions.size() < 2) {
2923 for (OutlinableRegion *R : CurrentGroup.Regions)
2924 R->reattachCandidate();
2925 continue;
2926 }
2927
2928 LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2929 << " and benefit " << CurrentGroup.Benefit << "\n");
2930
2931 // Create functions out of all the sections, and mark them as outlined.
2932 OutlinedRegions.clear();
2933 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2934 SmallVector<BasicBlock *> BE;
2935 DenseSet<BasicBlock *> BlocksInRegion;
2936 OS->Candidate->getBasicBlocks(BBSet&: BlocksInRegion, BBList&: BE);
2937 OS->CE = new (ExtractorAllocator.Allocate())
2938 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2939 false, nullptr, "outlined");
2940 bool FunctionOutlined = extractSection(Region&: *OS);
2941 if (FunctionOutlined) {
2942 unsigned StartIdx = OS->Candidate->getStartIdx();
2943 unsigned EndIdx = OS->Candidate->getEndIdx();
2944 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2945 Outlined.insert(V: Idx);
2946
2947 OutlinedRegions.push_back(x: OS);
2948 }
2949 }
2950
2951 LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2952 << " with benefit " << CurrentGroup.Benefit
2953 << " and cost " << CurrentGroup.Cost << "\n");
2954
2955 CurrentGroup.Regions = std::move(OutlinedRegions);
2956
2957 if (CurrentGroup.Regions.empty())
2958 continue;
2959
2960 OptimizationRemarkEmitter &ORE =
2961 getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2962 ORE.emit(RemarkBuilder: [&]() {
2963 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2964 OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2965 R << "outlined " << ore::NV(std::to_string(val: CurrentGroup.Regions.size()))
2966 << " regions with decrease of "
2967 << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2968 << " instructions at locations ";
2969 interleave(
2970 begin: CurrentGroup.Regions.begin(), end: CurrentGroup.Regions.end(),
2971 each_fn: [&R](OutlinableRegion *Region) {
2972 R << ore::NV("DebugLoc",
2973 Region->Candidate->frontInstruction()->getDebugLoc());
2974 },
2975 between_fn: [&R]() { R << " "; });
2976 return R;
2977 });
2978
2979 deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
2980 OutlinedFunctionNum);
2981 }
2982
2983 for (Function *F : FuncsToRemove)
2984 F->eraseFromParent();
2985
2986 return OutlinedFunctionNum;
2987}
2988
2989bool IROutliner::run(Module &M) {
2990 CostModel = !NoCostModel;
2991 OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
2992
2993 return doOutline(M) > 0;
2994}
2995
2996PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
2997 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
2998
2999 std::function<TargetTransformInfo &(Function &)> GTTI =
3000 [&FAM](Function &F) -> TargetTransformInfo & {
3001 return FAM.getResult<TargetIRAnalysis>(IR&: F);
3002 };
3003
3004 std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
3005 [&AM](Module &M) -> IRSimilarityIdentifier & {
3006 return AM.getResult<IRSimilarityAnalysis>(IR&: M);
3007 };
3008
3009 std::unique_ptr<OptimizationRemarkEmitter> ORE;
3010 std::function<OptimizationRemarkEmitter &(Function &)> GORE =
3011 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
3012 ORE.reset(p: new OptimizationRemarkEmitter(&F));
3013 return *ORE;
3014 };
3015
3016 if (IROutliner(GTTI, GIRSI, GORE).run(M))
3017 return PreservedAnalyses::none();
3018 return PreservedAnalyses::all();
3019}
3020