1//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10// Implementation for the IROutliner which is used by the IROutliner Pass.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/IPO/IROutliner.h"
15#include "llvm/Analysis/IRSimilarityIdentifier.h"
16#include "llvm/Analysis/OptimizationRemarkEmitter.h"
17#include "llvm/Analysis/TargetTransformInfo.h"
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/DIBuilder.h"
20#include "llvm/IR/DebugInfo.h"
21#include "llvm/IR/DebugInfoMetadata.h"
22#include "llvm/IR/Dominators.h"
23#include "llvm/IR/Mangler.h"
24#include "llvm/IR/PassManager.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Transforms/IPO.h"
27#include "llvm/Transforms/Utils/ValueMapper.h"
28#include <optional>
29#include <vector>
30
31#define DEBUG_TYPE "iroutliner"
32
33using namespace llvm;
34using namespace IRSimilarity;
35
36// A command flag to be used for debugging to exclude branches from similarity
37// matching and outlining.
38namespace llvm {
39extern cl::opt<bool> DisableBranches;
40
41// A command flag to be used for debugging to indirect calls from similarity
42// matching and outlining.
43extern cl::opt<bool> DisableIndirectCalls;
44
45// A command flag to be used for debugging to exclude intrinsics from similarity
46// matching and outlining.
47extern cl::opt<bool> DisableIntrinsics;
48
49} // namespace llvm
50
51// Set to true if the user wants the ir outliner to run on linkonceodr linkage
52// functions. This is false by default because the linker can dedupe linkonceodr
53// functions. Since the outliner is confined to a single module (modulo LTO),
54// this is off by default. It should, however, be the default behavior in
55// LTO.
56static cl::opt<bool> EnableLinkOnceODRIROutlining(
57 "enable-linkonceodr-ir-outlining", cl::Hidden,
58 cl::desc("Enable the IR outliner on linkonceodr functions"),
59 cl::init(Val: false));
60
61// This is a debug option to test small pieces of code to ensure that outlining
62// works correctly.
63static cl::opt<bool> NoCostModel(
64 "ir-outlining-no-cost", cl::init(Val: false), cl::ReallyHidden,
65 cl::desc("Debug option to outline greedily, without restriction that "
66 "calculated benefit outweighs cost"));
67
68/// The OutlinableGroup holds all the overarching information for outlining
69/// a set of regions that are structurally similar to one another, such as the
70/// types of the overall function, the output blocks, the sets of stores needed
71/// and a list of the different regions. This information is used in the
72/// deduplication of extracted regions with the same structure.
73struct OutlinableGroup {
74 /// The sections that could be outlined
75 std::vector<OutlinableRegion *> Regions;
76
77 /// The argument types for the function created as the overall function to
78 /// replace the extracted function for each region.
79 std::vector<Type *> ArgumentTypes;
80 /// The FunctionType for the overall function.
81 FunctionType *OutlinedFunctionType = nullptr;
82 /// The Function for the collective overall function.
83 Function *OutlinedFunction = nullptr;
84
85 /// Flag for whether we should not consider this group of OutlinableRegions
86 /// for extraction.
87 bool IgnoreGroup = false;
88
89 /// The return blocks for the overall function.
90 DenseMap<Value *, BasicBlock *> EndBBs;
91
92 /// The PHIBlocks with their corresponding return block based on the return
93 /// value as the key.
94 DenseMap<Value *, BasicBlock *> PHIBlocks;
95
96 /// A set containing the different GVN store sets needed. Each array contains
97 /// a sorted list of the different values that need to be stored into output
98 /// registers.
99 DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
100
101 /// Flag for whether the \ref ArgumentTypes have been defined after the
102 /// extraction of the first region.
103 bool InputTypesSet = false;
104
105 /// The number of input values in \ref ArgumentTypes. Anything after this
106 /// index in ArgumentTypes is an output argument.
107 unsigned NumAggregateInputs = 0;
108
109 /// The mapping of the canonical numbering of the values in outlined sections
110 /// to specific arguments.
111 DenseMap<unsigned, unsigned> CanonicalNumberToAggArg;
112
113 /// The number of branches in the region target a basic block that is outside
114 /// of the region.
115 unsigned BranchesToOutside = 0;
116
117 /// Tracker counting backwards from the highest unsigned value possible to
118 /// avoid conflicting with the GVNs of assigned values. We start at -3 since
119 /// -2 and -1 are assigned by the DenseMap.
120 unsigned PHINodeGVNTracker = -3;
121
122 DenseMap<unsigned,
123 std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
124 PHINodeGVNToGVNs;
125 DenseMap<hash_code, unsigned> GVNsToPHINodeGVN;
126
127 /// The number of instructions that will be outlined by extracting \ref
128 /// Regions.
129 InstructionCost Benefit = 0;
130 /// The number of added instructions needed for the outlining of the \ref
131 /// Regions.
132 InstructionCost Cost = 0;
133
134 /// The argument that needs to be marked with the swifterr attribute. If not
135 /// needed, there is no value.
136 std::optional<unsigned> SwiftErrorArgument;
137
138 /// For the \ref Regions, we look at every Value. If it is a constant,
139 /// we check whether it is the same in Region.
140 ///
141 /// \param [in,out] NotSame contains the global value numbers where the
142 /// constant is not always the same, and must be passed in as an argument.
143 void findSameConstants(DenseSet<unsigned> &NotSame);
144
145 /// For the regions, look at each set of GVN stores needed and account for
146 /// each combination. Add an argument to the argument types if there is
147 /// more than one combination.
148 ///
149 /// \param [in] M - The module we are outlining from.
150 void collectGVNStoreSets(Module &M);
151};
152
153/// Move the contents of \p SourceBB to before the last instruction of \p
154/// TargetBB.
155/// \param SourceBB - the BasicBlock to pull Instructions from.
156/// \param TargetBB - the BasicBlock to put Instruction into.
157static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
158 TargetBB.splice(ToIt: TargetBB.end(), FromBB: &SourceBB);
159}
160
161/// A function to sort the keys of \p Map, which must be a mapping of constant
162/// values to basic blocks and return it in \p SortedKeys
163///
164/// \param SortedKeys - The vector the keys will be return in and sorted.
165/// \param Map - The DenseMap containing keys to sort.
166static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
167 DenseMap<Value *, BasicBlock *> &Map) {
168 for (auto &VtoBB : Map)
169 SortedKeys.push_back(x: VtoBB.first);
170
171 // Here we expect to have either 1 value that is void (nullptr) or multiple
172 // values that are all constant integers.
173 if (SortedKeys.size() == 1) {
174 assert(!SortedKeys[0] && "Expected a single void value.");
175 return;
176 }
177
178 stable_sort(Range&: SortedKeys, C: [](const Value *LHS, const Value *RHS) {
179 assert(LHS && RHS && "Expected non void values.");
180 const ConstantInt *LHSC = cast<ConstantInt>(Val: LHS);
181 const ConstantInt *RHSC = cast<ConstantInt>(Val: RHS);
182
183 return LHSC->getLimitedValue() < RHSC->getLimitedValue();
184 });
185}
186
187Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
188 Value *V) {
189 std::optional<unsigned> GVN = Candidate->getGVN(V);
190 assert(GVN && "No GVN for incoming value");
191 std::optional<unsigned> CanonNum = Candidate->getCanonicalNum(N: *GVN);
192 std::optional<unsigned> FirstGVN =
193 Other.Candidate->fromCanonicalNum(N: *CanonNum);
194 std::optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(Num: *FirstGVN);
195 return FoundValueOpt.value_or(u: nullptr);
196}
197
198BasicBlock *
199OutlinableRegion::findCorrespondingBlockIn(const OutlinableRegion &Other,
200 BasicBlock *BB) {
201 Instruction *FirstNonPHI = &*BB->getFirstNonPHIOrDbg();
202 assert(FirstNonPHI && "block is empty?");
203 Value *CorrespondingVal = findCorrespondingValueIn(Other, V: FirstNonPHI);
204 if (!CorrespondingVal)
205 return nullptr;
206 BasicBlock *CorrespondingBlock =
207 cast<Instruction>(Val: CorrespondingVal)->getParent();
208 return CorrespondingBlock;
209}
210
211/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
212/// in \p Included to branch to BasicBlock \p Replace if they currently branch
213/// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks
214/// when PHINodes are included in outlined regions.
215///
216/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
217/// checked.
218/// \param Find - The successor block to be replaced.
219/// \param Replace - The new succesor block to branch to.
220/// \param Included - The set of blocks about to be outlined.
221static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find,
222 BasicBlock *Replace,
223 DenseSet<BasicBlock *> &Included) {
224 for (PHINode &PN : PHIBlock->phis())
225 for (BasicBlock *Incoming : PN.blocks())
226 // Check if the incoming block is included in the set of blocks being
227 // outlined.
228 if (Included.contains(V: Incoming))
229 Incoming->getTerminator()->replaceSuccessorWith(OldBB: Find, NewBB: Replace);
230}
231
232
233void OutlinableRegion::splitCandidate() {
234 assert(!CandidateSplit && "Candidate already split!");
235
236 Instruction *BackInst = Candidate->backInstruction();
237
238 Instruction *EndInst = nullptr;
239 // Check whether the last instruction is a terminator, if it is, we do
240 // not split on the following instruction. We leave the block as it is. We
241 // also check that this is not the last instruction in the Module, otherwise
242 // the check for whether the current following instruction matches the
243 // previously recorded instruction will be incorrect.
244 if (!BackInst->isTerminator() ||
245 BackInst->getParent() != &BackInst->getFunction()->back()) {
246 EndInst = Candidate->end()->Inst;
247 assert(EndInst && "Expected an end instruction?");
248 }
249
250 // We check if the current instruction following the last instruction in the
251 // region is the same as the recorded instruction following the last
252 // instruction. If they do not match, there could be problems in rewriting
253 // the program after outlining, so we ignore it.
254 if (!BackInst->isTerminator() && EndInst != BackInst->getNextNode())
255 return;
256
257 Instruction *StartInst = (*Candidate->begin()).Inst;
258 assert(StartInst && "Expected a start instruction?");
259 StartBB = StartInst->getParent();
260 PrevBB = StartBB;
261
262 DenseSet<BasicBlock *> BBSet;
263 Candidate->getBasicBlocks(BBSet);
264
265 // We iterate over the instructions in the region, if we find a PHINode, we
266 // check if there are predecessors outside of the region, if there are,
267 // we ignore this region since we are unable to handle the severing of the
268 // phi node right now.
269
270 // TODO: Handle extraneous inputs for PHINodes through variable number of
271 // inputs, similar to how outputs are handled.
272 BasicBlock::iterator It = StartInst->getIterator();
273 EndBB = BackInst->getParent();
274 BasicBlock *IBlock;
275 BasicBlock *PHIPredBlock = nullptr;
276 bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
277 while (PHINode *PN = dyn_cast<PHINode>(Val: &*It)) {
278 unsigned NumPredsOutsideRegion = 0;
279 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
280 if (!BBSet.contains(V: PN->getIncomingBlock(i))) {
281 PHIPredBlock = PN->getIncomingBlock(i);
282 ++NumPredsOutsideRegion;
283 continue;
284 }
285
286 // We must consider the case there the incoming block to the PHINode is
287 // the same as the final block of the OutlinableRegion. If this is the
288 // case, the branch from this block must also be outlined to be valid.
289 IBlock = PN->getIncomingBlock(i);
290 if (IBlock == EndBB && EndBBTermAndBackInstDifferent) {
291 PHIPredBlock = PN->getIncomingBlock(i);
292 ++NumPredsOutsideRegion;
293 }
294 }
295
296 if (NumPredsOutsideRegion > 1)
297 return;
298
299 It++;
300 }
301
302 // If the region starts with a PHINode, but is not the initial instruction of
303 // the BasicBlock, we ignore this region for now.
304 if (isa<PHINode>(Val: StartInst) && StartInst != &*StartBB->begin())
305 return;
306
307 // If the region ends with a PHINode, but does not contain all of the phi node
308 // instructions of the region, we ignore it for now.
309 if (isa<PHINode>(Val: BackInst) &&
310 BackInst != &*std::prev(x: EndBB->getFirstInsertionPt()))
311 return;
312
313 // The basic block gets split like so:
314 // block: block:
315 // inst1 inst1
316 // inst2 inst2
317 // region1 br block_to_outline
318 // region2 block_to_outline:
319 // region3 -> region1
320 // region4 region2
321 // inst3 region3
322 // inst4 region4
323 // br block_after_outline
324 // block_after_outline:
325 // inst3
326 // inst4
327
328 std::string OriginalName = PrevBB->getName().str();
329
330 StartBB = PrevBB->splitBasicBlock(I: StartInst, BBName: OriginalName + "_to_outline");
331 PrevBB->replaceSuccessorsPhiUsesWith(Old: PrevBB, New: StartBB);
332 // If there was a PHINode with an incoming block outside the region,
333 // make sure is correctly updated in the newly split block.
334 if (PHIPredBlock)
335 PrevBB->replaceSuccessorsPhiUsesWith(Old: PHIPredBlock, New: PrevBB);
336
337 CandidateSplit = true;
338 if (!BackInst->isTerminator()) {
339 EndBB = EndInst->getParent();
340 FollowBB = EndBB->splitBasicBlock(I: EndInst, BBName: OriginalName + "_after_outline");
341 EndBB->replaceSuccessorsPhiUsesWith(Old: EndBB, New: FollowBB);
342 FollowBB->replaceSuccessorsPhiUsesWith(Old: PrevBB, New: FollowBB);
343 } else {
344 EndBB = BackInst->getParent();
345 EndsInBranch = true;
346 FollowBB = nullptr;
347 }
348
349 // Refind the basic block set.
350 BBSet.clear();
351 Candidate->getBasicBlocks(BBSet);
352 // For the phi nodes in the new starting basic block of the region, we
353 // reassign the targets of the basic blocks branching instructions.
354 replaceTargetsFromPHINode(PHIBlock: StartBB, Find: PrevBB, Replace: StartBB, Included&: BBSet);
355 if (FollowBB)
356 replaceTargetsFromPHINode(PHIBlock: FollowBB, Find: EndBB, Replace: FollowBB, Included&: BBSet);
357}
358
359void OutlinableRegion::reattachCandidate() {
360 assert(CandidateSplit && "Candidate is not split!");
361
362 // The basic block gets reattached like so:
363 // block: block:
364 // inst1 inst1
365 // inst2 inst2
366 // br block_to_outline region1
367 // block_to_outline: -> region2
368 // region1 region3
369 // region2 region4
370 // region3 inst3
371 // region4 inst4
372 // br block_after_outline
373 // block_after_outline:
374 // inst3
375 // inst4
376 assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
377
378 assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
379 // Make sure PHINode references to the block we are merging into are
380 // updated to be incoming blocks from the predecessor to the current block.
381
382 // NOTE: If this is updated such that the outlined block can have more than
383 // one incoming block to a PHINode, this logic will have to updated
384 // to handle multiple precessors instead.
385
386 // We only need to update this if the outlined section contains a PHINode, if
387 // it does not, then the incoming block was never changed in the first place.
388 // On the other hand, if PrevBB has no predecessors, it means that all
389 // incoming blocks to the first block are contained in the region, and there
390 // will be nothing to update.
391 Instruction *StartInst = (*Candidate->begin()).Inst;
392 if (isa<PHINode>(Val: StartInst) && !PrevBB->hasNPredecessors(N: 0)) {
393 assert(!PrevBB->hasNPredecessorsOrMore(2) &&
394 "PrevBB has more than one predecessor. Should be 0 or 1.");
395 BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor();
396 PrevBB->replaceSuccessorsPhiUsesWith(Old: PrevBB, New: BeforePrevBB);
397 }
398 PrevBB->getTerminator()->eraseFromParent();
399
400 // If we reattaching after outlining, we iterate over the phi nodes to
401 // the initial block, and reassign the branch instructions of the incoming
402 // blocks to the block we are remerging into.
403 if (!ExtractedFunction) {
404 DenseSet<BasicBlock *> BBSet;
405 Candidate->getBasicBlocks(BBSet);
406
407 replaceTargetsFromPHINode(PHIBlock: StartBB, Find: StartBB, Replace: PrevBB, Included&: BBSet);
408 if (!EndsInBranch)
409 replaceTargetsFromPHINode(PHIBlock: FollowBB, Find: FollowBB, Replace: EndBB, Included&: BBSet);
410 }
411
412 moveBBContents(SourceBB&: *StartBB, TargetBB&: *PrevBB);
413
414 BasicBlock *PlacementBB = PrevBB;
415 if (StartBB != EndBB)
416 PlacementBB = EndBB;
417 if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
418 assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
419 assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
420 PlacementBB->getTerminator()->eraseFromParent();
421 moveBBContents(SourceBB&: *FollowBB, TargetBB&: *PlacementBB);
422 PlacementBB->replaceSuccessorsPhiUsesWith(Old: FollowBB, New: PlacementBB);
423 FollowBB->eraseFromParent();
424 }
425
426 PrevBB->replaceSuccessorsPhiUsesWith(Old: StartBB, New: PrevBB);
427 StartBB->eraseFromParent();
428
429 // Make sure to save changes back to the StartBB.
430 StartBB = PrevBB;
431 EndBB = nullptr;
432 PrevBB = nullptr;
433 FollowBB = nullptr;
434
435 CandidateSplit = false;
436}
437
438/// Find whether \p V matches the Constants previously found for the \p GVN.
439///
440/// \param V - The value to check for consistency.
441/// \param GVN - The global value number assigned to \p V.
442/// \param GVNToConstant - The mapping of global value number to Constants.
443/// \returns true if the Value matches the Constant mapped to by V and false if
444/// it \p V is a Constant but does not match.
445/// \returns std::nullopt if \p V is not a Constant.
446static std::optional<bool>
447constantMatches(Value *V, unsigned GVN,
448 DenseMap<unsigned, Constant *> &GVNToConstant) {
449 // See if we have a constants
450 Constant *CST = dyn_cast<Constant>(Val: V);
451 if (!CST)
452 return std::nullopt;
453
454 // Holds a mapping from a global value number to a Constant.
455 DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
456 bool Inserted;
457
458
459 // If we have a constant, try to make a new entry in the GVNToConstant.
460 std::tie(args&: GVNToConstantIt, args&: Inserted) =
461 GVNToConstant.insert(KV: std::make_pair(x&: GVN, y&: CST));
462 // If it was found and is not equal, it is not the same. We do not
463 // handle this case yet, and exit early.
464 if (Inserted || (GVNToConstantIt->second == CST))
465 return true;
466
467 return false;
468}
469
470InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
471 InstructionCost Benefit = 0;
472
473 // Estimate the benefit of outlining a specific sections of the program. We
474 // delegate mostly this task to the TargetTransformInfo so that if the target
475 // has specific changes, we can have a more accurate estimate.
476
477 // However, getInstructionCost delegates the code size calculation for
478 // arithmetic instructions to getArithmeticInstrCost in
479 // include/Analysis/TargetTransformImpl.h, where it always estimates that the
480 // code size for a division and remainder instruction to be equal to 4, and
481 // everything else to 1. This is not an accurate representation of the
482 // division instruction for targets that have a native division instruction.
483 // To be overly conservative, we only add 1 to the number of instructions for
484 // each division instruction.
485 for (IRInstructionData &ID : *Candidate) {
486 Instruction *I = ID.Inst;
487 switch (I->getOpcode()) {
488 case Instruction::FDiv:
489 case Instruction::FRem:
490 case Instruction::SDiv:
491 case Instruction::SRem:
492 case Instruction::UDiv:
493 case Instruction::URem:
494 Benefit += 1;
495 break;
496 default:
497 Benefit += TTI.getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_CodeSize);
498 break;
499 }
500 }
501
502 return Benefit;
503}
504
505/// Check the \p OutputMappings structure for value \p Input, if it exists
506/// it has been used as an output for outlining, and has been renamed, and we
507/// return the new value, otherwise, we return the same value.
508///
509/// \param OutputMappings [in] - The mapping of values to their renamed value
510/// after being used as an output for an outlined region.
511/// \param Input [in] - The value to find the remapped value of, if it exists.
512/// \return The remapped value if it has been renamed, and the same value if has
513/// not.
514static Value *findOutputMapping(const DenseMap<Value *, Value *> OutputMappings,
515 Value *Input) {
516 auto OutputMapping = OutputMappings.find(Val: Input);
517 if (OutputMapping != OutputMappings.end())
518 return OutputMapping->second;
519 return Input;
520}
521
522/// Find whether \p Region matches the global value numbering to Constant
523/// mapping found so far.
524///
525/// \param Region - The OutlinableRegion we are checking for constants
526/// \param GVNToConstant - The mapping of global value number to Constants.
527/// \param NotSame - The set of global value numbers that do not have the same
528/// constant in each region.
529/// \returns true if all Constants are the same in every use of a Constant in \p
530/// Region and false if not
531static bool
532collectRegionsConstants(OutlinableRegion &Region,
533 DenseMap<unsigned, Constant *> &GVNToConstant,
534 DenseSet<unsigned> &NotSame) {
535 bool ConstantsTheSame = true;
536
537 IRSimilarityCandidate &C = *Region.Candidate;
538 for (IRInstructionData &ID : C) {
539
540 // Iterate over the operands in an instruction. If the global value number,
541 // assigned by the IRSimilarityCandidate, has been seen before, we check if
542 // the number has been found to be not the same value in each instance.
543 for (Value *V : ID.OperVals) {
544 std::optional<unsigned> GVNOpt = C.getGVN(V);
545 assert(GVNOpt && "Expected a GVN for operand?");
546 unsigned GVN = *GVNOpt;
547
548 // Check if this global value has been found to not be the same already.
549 if (NotSame.contains(V: GVN)) {
550 if (isa<Constant>(Val: V))
551 ConstantsTheSame = false;
552 continue;
553 }
554
555 // If it has been the same so far, we check the value for if the
556 // associated Constant value match the previous instances of the same
557 // global value number. If the global value does not map to a Constant,
558 // it is considered to not be the same value.
559 std::optional<bool> ConstantMatches =
560 constantMatches(V, GVN, GVNToConstant);
561 if (ConstantMatches) {
562 if (*ConstantMatches)
563 continue;
564 else
565 ConstantsTheSame = false;
566 }
567
568 // While this value is a register, it might not have been previously,
569 // make sure we don't already have a constant mapped to this global value
570 // number.
571 if (GVNToConstant.contains(Val: GVN))
572 ConstantsTheSame = false;
573
574 NotSame.insert(V: GVN);
575 }
576 }
577
578 return ConstantsTheSame;
579}
580
581void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
582 DenseMap<unsigned, Constant *> GVNToConstant;
583
584 for (OutlinableRegion *Region : Regions)
585 collectRegionsConstants(Region&: *Region, GVNToConstant, NotSame);
586}
587
588void OutlinableGroup::collectGVNStoreSets(Module &M) {
589 for (OutlinableRegion *OS : Regions)
590 OutputGVNCombinations.insert(V: OS->GVNStores);
591
592 // We are adding an extracted argument to decide between which output path
593 // to use in the basic block. It is used in a switch statement and only
594 // needs to be an integer.
595 if (OutputGVNCombinations.size() > 1)
596 ArgumentTypes.push_back(x: Type::getInt32Ty(C&: M.getContext()));
597}
598
599/// Get the subprogram if it exists for one of the outlined regions.
600///
601/// \param [in] Group - The set of regions to find a subprogram for.
602/// \returns the subprogram if it exists, or nullptr.
603static DISubprogram *getSubprogramOrNull(OutlinableGroup &Group) {
604 for (OutlinableRegion *OS : Group.Regions)
605 if (Function *F = OS->Call->getFunction())
606 if (DISubprogram *SP = F->getSubprogram())
607 return SP;
608
609 return nullptr;
610}
611
612Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
613 unsigned FunctionNameSuffix) {
614 assert(!Group.OutlinedFunction && "Function is already defined!");
615
616 Type *RetTy = Type::getVoidTy(C&: M.getContext());
617 // All extracted functions _should_ have the same return type at this point
618 // since the similarity identifier ensures that all branches outside of the
619 // region occur in the same place.
620
621 // NOTE: Should we ever move to the model that uses a switch at every point
622 // needed, meaning that we could branch within the region or out, it is
623 // possible that we will need to switch to using the most general case all of
624 // the time.
625 for (OutlinableRegion *R : Group.Regions) {
626 Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
627 if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
628 (RetTy->isIntegerTy(BitWidth: 1) && ExtractedFuncType->isIntegerTy(BitWidth: 16)))
629 RetTy = ExtractedFuncType;
630 }
631
632 Group.OutlinedFunctionType = FunctionType::get(
633 Result: RetTy, Params: Group.ArgumentTypes, isVarArg: false);
634
635 // These functions will only be called from within the same module, so
636 // we can set an internal linkage.
637 Group.OutlinedFunction = Function::Create(
638 Ty: Group.OutlinedFunctionType, Linkage: GlobalValue::InternalLinkage,
639 N: "outlined_ir_func_" + std::to_string(val: FunctionNameSuffix), M);
640
641 // Transfer the swifterr attribute to the correct function parameter.
642 if (Group.SwiftErrorArgument)
643 Group.OutlinedFunction->addParamAttr(ArgNo: *Group.SwiftErrorArgument,
644 Kind: Attribute::SwiftError);
645
646 Group.OutlinedFunction->addFnAttr(Kind: Attribute::OptimizeForSize);
647 Group.OutlinedFunction->addFnAttr(Kind: Attribute::MinSize);
648
649 // If there's a DISubprogram associated with this outlined function, then
650 // emit debug info for the outlined function.
651 if (DISubprogram *SP = getSubprogramOrNull(Group)) {
652 Function *F = Group.OutlinedFunction;
653 // We have a DISubprogram. Get its DICompileUnit.
654 DICompileUnit *CU = SP->getUnit();
655 DIBuilder DB(M, true, CU);
656 DIFile *Unit = SP->getFile();
657 Mangler Mg;
658 // Get the mangled name of the function for the linkage name.
659 std::string Dummy;
660 llvm::raw_string_ostream MangledNameStream(Dummy);
661 Mg.getNameWithPrefix(OS&: MangledNameStream, GV: F, CannotUsePrivateLabel: false);
662
663 DISubprogram *OutlinedSP = DB.createFunction(
664 Scope: Unit /* Context */, Name: F->getName(), LinkageName: Dummy, File: Unit /* File */,
665 LineNo: 0 /* Line 0 is reserved for compiler-generated code. */,
666 Ty: DB.createSubroutineType(ParameterTypes: DB.getOrCreateTypeArray(Elements: {})), /* void type */
667 ScopeLine: 0, /* Line 0 is reserved for compiler-generated code. */
668 Flags: DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
669 /* Outlined code is optimized code by definition. */
670 SPFlags: DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
671
672 // Attach subprogram to the function.
673 F->setSubprogram(OutlinedSP);
674 // We're done with the DIBuilder.
675 DB.finalize();
676 }
677
678 return Group.OutlinedFunction;
679}
680
681/// Move each BasicBlock in \p Old to \p New.
682///
683/// \param [in] Old - The function to move the basic blocks from.
684/// \param [in] New - The function to move the basic blocks to.
685/// \param [out] NewEnds - The return blocks of the new overall function.
686static void moveFunctionData(Function &Old, Function &New,
687 DenseMap<Value *, BasicBlock *> &NewEnds) {
688 for (BasicBlock &CurrBB : llvm::make_early_inc_range(Range&: Old)) {
689 CurrBB.removeFromParent();
690 CurrBB.insertInto(Parent: &New);
691 Instruction *I = CurrBB.getTerminator();
692
693 // For each block we find a return instruction is, it is a potential exit
694 // path for the function. We keep track of each block based on the return
695 // value here.
696 if (ReturnInst *RI = dyn_cast<ReturnInst>(Val: I))
697 NewEnds.insert(KV: std::make_pair(x: RI->getReturnValue(), y: &CurrBB));
698
699 for (Instruction &Val : CurrBB) {
700 // Since debug-info originates from many different locations in the
701 // program, it will cause incorrect reporting from a debugger if we keep
702 // the same debug instructions. Drop non-intrinsic DbgVariableRecords
703 // here, collect intrinsics for removal later.
704 Val.dropDbgRecords();
705
706 // We must handle the scoping of called functions differently than
707 // other outlined instructions.
708 if (!isa<CallInst>(Val: &Val)) {
709 // Remove the debug information for outlined functions.
710 Val.setDebugLoc(DebugLoc::getDropped());
711
712 // Loop info metadata may contain line locations. Update them to have no
713 // value in the new subprogram since the outlined code could be from
714 // several locations.
715 auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
716 if (DISubprogram *SP = New.getSubprogram())
717 if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD))
718 return DILocation::get(Context&: New.getContext(), Line: Loc->getLine(),
719 Column: Loc->getColumn(), Scope: SP, InlinedAt: nullptr);
720 return MD;
721 };
722 updateLoopMetadataDebugLocations(I&: Val, Updater: updateLoopInfoLoc);
723 continue;
724 }
725
726 // Edit the scope of called functions inside of outlined functions.
727 if (DISubprogram *SP = New.getSubprogram()) {
728 DILocation *DI = DILocation::get(Context&: New.getContext(), Line: 0, Column: 0, Scope: SP);
729 Val.setDebugLoc(DI);
730 }
731 }
732 }
733}
734
735/// Find the constants that will need to be lifted into arguments
736/// as they are not the same in each instance of the region.
737///
738/// \param [in] C - The IRSimilarityCandidate containing the region we are
739/// analyzing.
740/// \param [in] NotSame - The set of global value numbers that do not have a
741/// single Constant across all OutlinableRegions similar to \p C.
742/// \param [out] Inputs - The list containing the global value numbers of the
743/// arguments needed for the region of code.
744static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
745 std::vector<unsigned> &Inputs) {
746 DenseSet<unsigned> Seen;
747 // Iterate over the instructions, and find what constants will need to be
748 // extracted into arguments.
749 for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
750 IDIt != EndIDIt; IDIt++) {
751 for (Value *V : (*IDIt).OperVals) {
752 // Since these are stored before any outlining, they will be in the
753 // global value numbering.
754 unsigned GVN = *C.getGVN(V);
755 if (isa<Constant>(Val: V))
756 if (NotSame.contains(V: GVN) && Seen.insert(V: GVN).second)
757 Inputs.push_back(x: GVN);
758 }
759 }
760}
761
762/// Find the GVN for the inputs that have been found by the CodeExtractor.
763///
764/// \param [in] C - The IRSimilarityCandidate containing the region we are
765/// analyzing.
766/// \param [in] CurrentInputs - The set of inputs found by the
767/// CodeExtractor.
768/// \param [in] OutputMappings - The mapping of values that have been replaced
769/// by a new output value.
770/// \param [out] EndInputNumbers - The global value numbers for the extracted
771/// arguments.
772static void mapInputsToGVNs(IRSimilarityCandidate &C,
773 SetVector<Value *> &CurrentInputs,
774 const DenseMap<Value *, Value *> &OutputMappings,
775 std::vector<unsigned> &EndInputNumbers) {
776 // Get the Global Value Number for each input. We check if the Value has been
777 // replaced by a different value at output, and use the original value before
778 // replacement.
779 for (Value *Input : CurrentInputs) {
780 assert(Input && "Have a nullptr as an input");
781 auto It = OutputMappings.find(Val: Input);
782 if (It != OutputMappings.end())
783 Input = It->second;
784 assert(C.getGVN(Input) && "Could not find a numbering for the given input");
785 EndInputNumbers.push_back(x: *C.getGVN(V: Input));
786 }
787}
788
789/// Find the original value for the \p ArgInput values if any one of them was
790/// replaced during a previous extraction.
791///
792/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
793/// \param [in] OutputMappings - The mapping of values that have been replaced
794/// by a new output value.
795/// \param [out] RemappedArgInputs - The remapped values according to
796/// \p OutputMappings that will be extracted.
797static void
798remapExtractedInputs(const ArrayRef<Value *> ArgInputs,
799 const DenseMap<Value *, Value *> &OutputMappings,
800 SetVector<Value *> &RemappedArgInputs) {
801 // Get the global value number for each input that will be extracted as an
802 // argument by the code extractor, remapping if needed for reloaded values.
803 for (Value *Input : ArgInputs) {
804 auto It = OutputMappings.find(Val: Input);
805 if (It != OutputMappings.end())
806 Input = It->second;
807 RemappedArgInputs.insert(X: Input);
808 }
809}
810
811/// Find the input GVNs and the output values for a region of Instructions.
812/// Using the code extractor, we collect the inputs to the extracted function.
813///
814/// The \p Region can be identified as needing to be ignored in this function.
815/// It should be checked whether it should be ignored after a call to this
816/// function.
817///
818/// \param [in,out] Region - The region of code to be analyzed.
819/// \param [out] InputGVNs - The global value numbers for the extracted
820/// arguments.
821/// \param [in] NotSame - The global value numbers in the region that do not
822/// have the same constant value in the regions structurally similar to
823/// \p Region.
824/// \param [in] OutputMappings - The mapping of values that have been replaced
825/// by a new output value after extraction.
826/// \param [out] ArgInputs - The values of the inputs to the extracted function.
827/// \param [out] Outputs - The set of values extracted by the CodeExtractor
828/// as outputs.
829static void getCodeExtractorArguments(
830 OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
831 DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
832 SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
833 IRSimilarityCandidate &C = *Region.Candidate;
834
835 // OverallInputs are the inputs to the region found by the CodeExtractor,
836 // SinkCands and HoistCands are used by the CodeExtractor to find sunken
837 // allocas of values whose lifetimes are contained completely within the
838 // outlined region. PremappedInputs are the arguments found by the
839 // CodeExtractor, removing conditions such as sunken allocas, but that
840 // may need to be remapped due to the extracted output values replacing
841 // the original values. We use DummyOutputs for this first run of finding
842 // inputs and outputs since the outputs could change during findAllocas,
843 // the correct set of extracted outputs will be in the final Outputs ValueSet.
844 SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
845 DummyOutputs;
846
847 // Use the code extractor to get the inputs and outputs, without sunken
848 // allocas or removing llvm.assumes.
849 CodeExtractor *CE = Region.CE;
850 CE->findInputsOutputs(Inputs&: OverallInputs, Outputs&: DummyOutputs, Allocas: SinkCands);
851 assert(Region.StartBB && "Region must have a start BasicBlock!");
852 Function *OrigF = Region.StartBB->getParent();
853 CodeExtractorAnalysisCache CEAC(*OrigF);
854 BasicBlock *Dummy = nullptr;
855
856 // The region may be ineligible due to VarArgs in the parent function. In this
857 // case we ignore the region.
858 if (!CE->isEligible()) {
859 Region.IgnoreRegion = true;
860 return;
861 }
862
863 // Find if any values are going to be sunk into the function when extracted
864 CE->findAllocas(CEAC, SinkCands, HoistCands, ExitBlock&: Dummy);
865 CE->findInputsOutputs(Inputs&: PremappedInputs, Outputs, Allocas: SinkCands);
866
867 // TODO: Support regions with sunken allocas: values whose lifetimes are
868 // contained completely within the outlined region. These are not guaranteed
869 // to be the same in every region, so we must elevate them all to arguments
870 // when they appear. If these values are not equal, it means there is some
871 // Input in OverallInputs that was removed for ArgInputs.
872 if (OverallInputs.size() != PremappedInputs.size()) {
873 Region.IgnoreRegion = true;
874 return;
875 }
876
877 findConstants(C, NotSame, Inputs&: InputGVNs);
878
879 mapInputsToGVNs(C, CurrentInputs&: OverallInputs, OutputMappings, EndInputNumbers&: InputGVNs);
880
881 remapExtractedInputs(ArgInputs: PremappedInputs.getArrayRef(), OutputMappings,
882 RemappedArgInputs&: ArgInputs);
883
884 // Sort the GVNs, since we now have constants included in the \ref InputGVNs
885 // we need to make sure they are in a deterministic order.
886 stable_sort(Range&: InputGVNs);
887}
888
889/// Look over the inputs and map each input argument to an argument in the
890/// overall function for the OutlinableRegions. This creates a way to replace
891/// the arguments of the extracted function with the arguments of the new
892/// overall function.
893///
894/// \param [in,out] Region - The region of code to be analyzed.
895/// \param [in] InputGVNs - The global value numbering of the input values
896/// collected.
897/// \param [in] ArgInputs - The values of the arguments to the extracted
898/// function.
899static void
900findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
901 std::vector<unsigned> &InputGVNs,
902 SetVector<Value *> &ArgInputs) {
903
904 IRSimilarityCandidate &C = *Region.Candidate;
905 OutlinableGroup &Group = *Region.Parent;
906
907 // This counts the argument number in the overall function.
908 unsigned TypeIndex = 0;
909
910 // This counts the argument number in the extracted function.
911 unsigned OriginalIndex = 0;
912
913 // Find the mapping of the extracted arguments to the arguments for the
914 // overall function. Since there may be extra arguments in the overall
915 // function to account for the extracted constants, we have two different
916 // counters as we find extracted arguments, and as we come across overall
917 // arguments.
918
919 // Additionally, in our first pass, for the first extracted function,
920 // we find argument locations for the canonical value numbering. This
921 // numbering overrides any discovered location for the extracted code.
922 for (unsigned InputVal : InputGVNs) {
923 std::optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(N: InputVal);
924 assert(CanonicalNumberOpt && "Canonical number not found?");
925 unsigned CanonicalNumber = *CanonicalNumberOpt;
926
927 std::optional<Value *> InputOpt = C.fromGVN(Num: InputVal);
928 assert(InputOpt && "Global value number not found?");
929 Value *Input = *InputOpt;
930
931 auto AggArgIt = Group.CanonicalNumberToAggArg.find(Val: CanonicalNumber);
932
933 if (!Group.InputTypesSet) {
934 Group.ArgumentTypes.push_back(x: Input->getType());
935 // If the input value has a swifterr attribute, make sure to mark the
936 // argument in the overall function.
937 if (Input->isSwiftError()) {
938 assert(
939 !Group.SwiftErrorArgument &&
940 "Argument already marked with swifterr for this OutlinableGroup!");
941 Group.SwiftErrorArgument = TypeIndex;
942 }
943 }
944
945 // Check if we have a constant. If we do add it to the overall argument
946 // number to Constant map for the region, and continue to the next input.
947 if (Constant *CST = dyn_cast<Constant>(Val: Input)) {
948 if (AggArgIt != Group.CanonicalNumberToAggArg.end())
949 Region.AggArgToConstant.insert(KV: std::make_pair(x&: AggArgIt->second, y&: CST));
950 else {
951 Group.CanonicalNumberToAggArg.insert(
952 KV: std::make_pair(x&: CanonicalNumber, y&: TypeIndex));
953 Region.AggArgToConstant.insert(KV: std::make_pair(x&: TypeIndex, y&: CST));
954 }
955 TypeIndex++;
956 continue;
957 }
958
959 // It is not a constant, we create the mapping from extracted argument list
960 // to the overall argument list, using the canonical location, if it exists.
961 assert(ArgInputs.count(Input) && "Input cannot be found!");
962
963 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
964 if (OriginalIndex != AggArgIt->second)
965 Region.ChangedArgOrder = true;
966 Region.ExtractedArgToAgg.insert(
967 KV: std::make_pair(x&: OriginalIndex, y&: AggArgIt->second));
968 Region.AggArgToExtracted.insert(
969 KV: std::make_pair(x&: AggArgIt->second, y&: OriginalIndex));
970 } else {
971 Group.CanonicalNumberToAggArg.insert(
972 KV: std::make_pair(x&: CanonicalNumber, y&: TypeIndex));
973 Region.ExtractedArgToAgg.insert(KV: std::make_pair(x&: OriginalIndex, y&: TypeIndex));
974 Region.AggArgToExtracted.insert(KV: std::make_pair(x&: TypeIndex, y&: OriginalIndex));
975 }
976 OriginalIndex++;
977 TypeIndex++;
978 }
979
980 // If the function type definitions for the OutlinableGroup holding the region
981 // have not been set, set the length of the inputs here. We should have the
982 // same inputs for all of the different regions contained in the
983 // OutlinableGroup since they are all structurally similar to one another.
984 if (!Group.InputTypesSet) {
985 Group.NumAggregateInputs = TypeIndex;
986 Group.InputTypesSet = true;
987 }
988
989 Region.NumExtractedInputs = OriginalIndex;
990}
991
992/// Check if the \p V has any uses outside of the region other than \p PN.
993///
994/// \param V [in] - The value to check.
995/// \param PHILoc [in] - The location in the PHINode of \p V.
996/// \param PN [in] - The PHINode using \p V.
997/// \param Exits [in] - The potential blocks we exit to from the outlined
998/// region.
999/// \param BlocksInRegion [in] - The basic blocks contained in the region.
1000/// \returns true if \p V has any use soutside its region other than \p PN.
1001static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
1002 SmallPtrSet<BasicBlock *, 1> &Exits,
1003 DenseSet<BasicBlock *> &BlocksInRegion) {
1004 // We check to see if the value is used by the PHINode from some other
1005 // predecessor not included in the region. If it is, we make sure
1006 // to keep it as an output.
1007 if (any_of(Range: llvm::seq<unsigned>(Begin: 0, End: PN.getNumIncomingValues()),
1008 P: [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
1009 return (Idx != PHILoc && V == PN.getIncomingValue(i: Idx) &&
1010 !BlocksInRegion.contains(V: PN.getIncomingBlock(i: Idx)));
1011 }))
1012 return true;
1013
1014 // Check if the value is used by any other instructions outside the region.
1015 return any_of(Range: V->users(), P: [&Exits, &BlocksInRegion](User *U) {
1016 Instruction *I = dyn_cast<Instruction>(Val: U);
1017 if (!I)
1018 return false;
1019
1020 // If the use of the item is inside the region, we skip it. Uses
1021 // inside the region give us useful information about how the item could be
1022 // used as an output.
1023 BasicBlock *Parent = I->getParent();
1024 if (BlocksInRegion.contains(V: Parent))
1025 return false;
1026
1027 // If it's not a PHINode then we definitely know the use matters. This
1028 // output value will not completely combined with another item in a PHINode
1029 // as it is directly reference by another non-phi instruction
1030 if (!isa<PHINode>(Val: I))
1031 return true;
1032
1033 // If we have a PHINode outside one of the exit locations, then it
1034 // can be considered an outside use as well. If there is a PHINode
1035 // contained in the Exit where this values use matters, it will be
1036 // caught when we analyze that PHINode.
1037 if (!Exits.contains(Ptr: Parent))
1038 return true;
1039
1040 return false;
1041 });
1042}
1043
1044/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
1045/// considered outputs. A PHINodes is an output when more than one incoming
1046/// value has been marked by the CodeExtractor as an output.
1047///
1048/// \param CurrentExitFromRegion [in] - The block to analyze.
1049/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
1050/// region.
1051/// \param RegionBlocks [in] - The basic blocks in the region.
1052/// \param Outputs [in, out] - The existing outputs for the region, we may add
1053/// PHINodes to this as we find that they replace output values.
1054/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
1055/// totally replaced by a PHINode.
1056/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
1057/// in PHINodes, but have other uses, and should still be considered outputs.
1058static void analyzeExitPHIsForOutputUses(
1059 BasicBlock *CurrentExitFromRegion,
1060 SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
1061 DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
1062 DenseSet<Value *> &OutputsReplacedByPHINode,
1063 DenseSet<Value *> &OutputsWithNonPhiUses) {
1064 for (PHINode &PN : CurrentExitFromRegion->phis()) {
1065 // Find all incoming values from the outlining region.
1066 SmallVector<unsigned, 2> IncomingVals;
1067 for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
1068 if (RegionBlocks.contains(V: PN.getIncomingBlock(i: I)))
1069 IncomingVals.push_back(Elt: I);
1070
1071 // Do not process PHI if there are no predecessors from region.
1072 unsigned NumIncomingVals = IncomingVals.size();
1073 if (NumIncomingVals == 0)
1074 continue;
1075
1076 // If there is one predecessor, we mark it as a value that needs to be kept
1077 // as an output.
1078 if (NumIncomingVals == 1) {
1079 Value *V = PN.getIncomingValue(i: *IncomingVals.begin());
1080 OutputsWithNonPhiUses.insert(V);
1081 OutputsReplacedByPHINode.erase(V);
1082 continue;
1083 }
1084
1085 // This PHINode will be used as an output value, so we add it to our list.
1086 Outputs.insert(X: &PN);
1087
1088 // Not all of the incoming values should be ignored as other inputs and
1089 // outputs may have uses in outlined region. If they have other uses
1090 // outside of the single PHINode we should not skip over it.
1091 for (unsigned Idx : IncomingVals) {
1092 Value *V = PN.getIncomingValue(i: Idx);
1093 if (!isa<Constant>(Val: V) &&
1094 outputHasNonPHI(V, PHILoc: Idx, PN, Exits&: PotentialExitsFromRegion, BlocksInRegion&: RegionBlocks)) {
1095 OutputsWithNonPhiUses.insert(V);
1096 OutputsReplacedByPHINode.erase(V);
1097 continue;
1098 }
1099 if (!OutputsWithNonPhiUses.contains(V))
1100 OutputsReplacedByPHINode.insert(V);
1101 }
1102 }
1103}
1104
1105// Represents the type for the unsigned number denoting the output number for
1106// phi node, along with the canonical number for the exit block.
1107using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
1108// The list of canonical numbers for the incoming values to a PHINode.
1109using CanonList = SmallVector<unsigned, 2>;
1110// The pair type representing the set of canonical values being combined in the
1111// PHINode, along with the location data for the PHINode.
1112using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
1113
1114/// Encode \p PND as an integer for easy lookup based on the argument location,
1115/// the parent BasicBlock canonical numbering, and the canonical numbering of
1116/// the values stored in the PHINode.
1117///
1118/// \param PND - The data to hash.
1119/// \returns The hash code of \p PND.
1120static hash_code encodePHINodeData(PHINodeData &PND) {
1121 return llvm::hash_combine(args: llvm::hash_value(value: PND.first.first),
1122 args: llvm::hash_value(value: PND.first.second),
1123 args: llvm::hash_combine_range(R&: PND.second));
1124}
1125
1126/// Create a special GVN for PHINodes that will be used outside of
1127/// the region. We create a hash code based on the Canonical number of the
1128/// parent BasicBlock, the canonical numbering of the values stored in the
1129/// PHINode and the aggregate argument location. This is used to find whether
1130/// this PHINode type has been given a canonical numbering already. If not, we
1131/// assign it a value and store it for later use. The value is returned to
1132/// identify different output schemes for the set of regions.
1133///
1134/// \param Region - The region that \p PN is an output for.
1135/// \param PN - The PHINode we are analyzing.
1136/// \param Blocks - The blocks for the region we are analyzing.
1137/// \param AggArgIdx - The argument \p PN will be stored into.
1138/// \returns An optional holding the assigned canonical number, or std::nullopt
1139/// if there is some attribute of the PHINode blocking it from being used.
1140static std::optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
1141 PHINode *PN,
1142 DenseSet<BasicBlock *> &Blocks,
1143 unsigned AggArgIdx) {
1144 OutlinableGroup &Group = *Region.Parent;
1145 IRSimilarityCandidate &Cand = *Region.Candidate;
1146 BasicBlock *PHIBB = PN->getParent();
1147 CanonList PHIGVNs;
1148 Value *Incoming;
1149 BasicBlock *IncomingBlock;
1150 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1151 Incoming = PN->getIncomingValue(i: Idx);
1152 IncomingBlock = PN->getIncomingBlock(i: Idx);
1153 // If the incoming block isn't in the region, we don't have to worry about
1154 // this incoming value.
1155 if (!Blocks.contains(V: IncomingBlock))
1156 continue;
1157
1158 // If we cannot find a GVN, and the incoming block is included in the region
1159 // this means that the input to the PHINode is not included in the region we
1160 // are trying to analyze, meaning, that if it was outlined, we would be
1161 // adding an extra input. We ignore this case for now, and so ignore the
1162 // region.
1163 std::optional<unsigned> OGVN = Cand.getGVN(V: Incoming);
1164 if (!OGVN) {
1165 Region.IgnoreRegion = true;
1166 return std::nullopt;
1167 }
1168
1169 // Collect the canonical numbers of the values in the PHINode.
1170 unsigned GVN = *OGVN;
1171 OGVN = Cand.getCanonicalNum(N: GVN);
1172 assert(OGVN && "No GVN found for incoming value?");
1173 PHIGVNs.push_back(Elt: *OGVN);
1174
1175 // Find the incoming block and use the canonical numbering as well to define
1176 // the hash for the PHINode.
1177 OGVN = Cand.getGVN(V: IncomingBlock);
1178
1179 // If there is no number for the incoming block, it is because we have
1180 // split the candidate basic blocks. So we use the previous block that it
1181 // was split from to find the valid global value numbering for the PHINode.
1182 if (!OGVN) {
1183 assert(Cand.getStartBB() == IncomingBlock &&
1184 "Unknown basic block used in exit path PHINode.");
1185
1186 BasicBlock *PrevBlock = nullptr;
1187 // Iterate over the predecessors to the incoming block of the
1188 // PHINode, when we find a block that is not contained in the region
1189 // we know that this is the first block that we split from, and should
1190 // have a valid global value numbering.
1191 for (BasicBlock *Pred : predecessors(BB: IncomingBlock))
1192 if (!Blocks.contains(V: Pred)) {
1193 PrevBlock = Pred;
1194 break;
1195 }
1196 assert(PrevBlock && "Expected a predecessor not in the reigon!");
1197 OGVN = Cand.getGVN(V: PrevBlock);
1198 }
1199 GVN = *OGVN;
1200 OGVN = Cand.getCanonicalNum(N: GVN);
1201 assert(OGVN && "No GVN found for incoming block?");
1202 PHIGVNs.push_back(Elt: *OGVN);
1203 }
1204
1205 // Now that we have the GVNs for the incoming values, we are going to combine
1206 // them with the GVN of the incoming bock, and the output location of the
1207 // PHINode to generate a hash value representing this instance of the PHINode.
1208 DenseMap<hash_code, unsigned>::iterator GVNToPHIIt;
1209 DenseMap<unsigned, PHINodeData>::iterator PHIToGVNIt;
1210 std::optional<unsigned> BBGVN = Cand.getGVN(V: PHIBB);
1211 assert(BBGVN && "Could not find GVN for the incoming block!");
1212
1213 BBGVN = Cand.getCanonicalNum(N: *BBGVN);
1214 assert(BBGVN && "Could not find canonical number for the incoming block!");
1215 // Create a pair of the exit block canonical value, and the aggregate
1216 // argument location, connected to the canonical numbers stored in the
1217 // PHINode.
1218 PHINodeData TemporaryPair =
1219 std::make_pair(x: std::make_pair(x&: *BBGVN, y&: AggArgIdx), y&: PHIGVNs);
1220 hash_code PHINodeDataHash = encodePHINodeData(PND&: TemporaryPair);
1221
1222 // Look for and create a new entry in our connection between canonical
1223 // numbers for PHINodes, and the set of objects we just created.
1224 GVNToPHIIt = Group.GVNsToPHINodeGVN.find(Val: PHINodeDataHash);
1225 if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
1226 bool Inserted = false;
1227 std::tie(args&: PHIToGVNIt, args&: Inserted) = Group.PHINodeGVNToGVNs.insert(
1228 KV: std::make_pair(x&: Group.PHINodeGVNTracker, y&: TemporaryPair));
1229 std::tie(args&: GVNToPHIIt, args&: Inserted) = Group.GVNsToPHINodeGVN.insert(
1230 KV: std::make_pair(x&: PHINodeDataHash, y: Group.PHINodeGVNTracker--));
1231 }
1232
1233 return GVNToPHIIt->second;
1234}
1235
1236/// Create a mapping of the output arguments for the \p Region to the output
1237/// arguments of the overall outlined function.
1238///
1239/// \param [in,out] Region - The region of code to be analyzed.
1240/// \param [in] Outputs - The values found by the code extractor.
1241static void
1242findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region,
1243 SetVector<Value *> &Outputs) {
1244 OutlinableGroup &Group = *Region.Parent;
1245 IRSimilarityCandidate &C = *Region.Candidate;
1246
1247 SmallVector<BasicBlock *> BE;
1248 DenseSet<BasicBlock *> BlocksInRegion;
1249 C.getBasicBlocks(BBSet&: BlocksInRegion, BBList&: BE);
1250
1251 // Find the exits to the region.
1252 SmallPtrSet<BasicBlock *, 1> Exits;
1253 for (BasicBlock *Block : BE)
1254 for (BasicBlock *Succ : successors(BB: Block))
1255 if (!BlocksInRegion.contains(V: Succ))
1256 Exits.insert(Ptr: Succ);
1257
1258 // After determining which blocks exit to PHINodes, we add these PHINodes to
1259 // the set of outputs to be processed. We also check the incoming values of
1260 // the PHINodes for whether they should no longer be considered outputs.
1261 DenseSet<Value *> OutputsReplacedByPHINode;
1262 DenseSet<Value *> OutputsWithNonPhiUses;
1263 for (BasicBlock *ExitBB : Exits)
1264 analyzeExitPHIsForOutputUses(CurrentExitFromRegion: ExitBB, PotentialExitsFromRegion&: Exits, RegionBlocks&: BlocksInRegion, Outputs,
1265 OutputsReplacedByPHINode,
1266 OutputsWithNonPhiUses);
1267
1268 // This counts the argument number in the extracted function.
1269 unsigned OriginalIndex = Region.NumExtractedInputs;
1270
1271 // This counts the argument number in the overall function.
1272 unsigned TypeIndex = Group.NumAggregateInputs;
1273 bool TypeFound;
1274 DenseSet<unsigned> AggArgsUsed;
1275
1276 // Iterate over the output types and identify if there is an aggregate pointer
1277 // type whose base type matches the current output type. If there is, we mark
1278 // that we will use this output register for this value. If not we add another
1279 // type to the overall argument type list. We also store the GVNs used for
1280 // stores to identify which values will need to be moved into an special
1281 // block that holds the stores to the output registers.
1282 for (Value *Output : Outputs) {
1283 TypeFound = false;
1284 // We can do this since it is a result value, and will have a number
1285 // that is necessarily the same. BUT if in the future, the instructions
1286 // do not have to be in same order, but are functionally the same, we will
1287 // have to use a different scheme, as one-to-one correspondence is not
1288 // guaranteed.
1289 unsigned ArgumentSize = Group.ArgumentTypes.size();
1290
1291 // If the output is combined in a PHINode, we make sure to skip over it.
1292 if (OutputsReplacedByPHINode.contains(V: Output))
1293 continue;
1294
1295 unsigned AggArgIdx = 0;
1296 for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
1297 if (!isa<PointerType>(Val: Group.ArgumentTypes[Jdx]))
1298 continue;
1299
1300 if (!AggArgsUsed.insert(V: Jdx).second)
1301 continue;
1302
1303 TypeFound = true;
1304 Region.ExtractedArgToAgg.insert(KV: std::make_pair(x&: OriginalIndex, y&: Jdx));
1305 Region.AggArgToExtracted.insert(KV: std::make_pair(x&: Jdx, y&: OriginalIndex));
1306 AggArgIdx = Jdx;
1307 break;
1308 }
1309
1310 // We were unable to find an unused type in the output type set that matches
1311 // the output, so we add a pointer type to the argument types of the overall
1312 // function to handle this output and create a mapping to it.
1313 if (!TypeFound) {
1314 Group.ArgumentTypes.push_back(x: PointerType::get(C&: Output->getContext(),
1315 AddressSpace: M.getDataLayout().getAllocaAddrSpace()));
1316 // Mark the new pointer type as the last value in the aggregate argument
1317 // list.
1318 unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
1319 AggArgsUsed.insert(V: ArgTypeIdx);
1320 Region.ExtractedArgToAgg.insert(
1321 KV: std::make_pair(x&: OriginalIndex, y&: ArgTypeIdx));
1322 Region.AggArgToExtracted.insert(
1323 KV: std::make_pair(x&: ArgTypeIdx, y&: OriginalIndex));
1324 AggArgIdx = ArgTypeIdx;
1325 }
1326
1327 // TODO: Adapt to the extra input from the PHINode.
1328 PHINode *PN = dyn_cast<PHINode>(Val: Output);
1329
1330 std::optional<unsigned> GVN;
1331 if (PN && !BlocksInRegion.contains(V: PN->getParent())) {
1332 // Values outside the region can be combined into PHINode when we
1333 // have multiple exits. We collect both of these into a list to identify
1334 // which values are being used in the PHINode. Each list identifies a
1335 // different PHINode, and a different output. We store the PHINode as it's
1336 // own canonical value. These canonical values are also dependent on the
1337 // output argument it is saved to.
1338
1339 // If two PHINodes have the same canonical values, but different aggregate
1340 // argument locations, then they will have distinct Canonical Values.
1341 GVN = getGVNForPHINode(Region, PN, Blocks&: BlocksInRegion, AggArgIdx);
1342 if (!GVN)
1343 return;
1344 } else {
1345 // If we do not have a PHINode we use the global value numbering for the
1346 // output value, to find the canonical number to add to the set of stored
1347 // values.
1348 GVN = C.getGVN(V: Output);
1349 GVN = C.getCanonicalNum(N: *GVN);
1350 }
1351
1352 // Each region has a potentially unique set of outputs. We save which
1353 // values are output in a list of canonical values so we can differentiate
1354 // among the different store schemes.
1355 Region.GVNStores.push_back(Elt: *GVN);
1356
1357 OriginalIndex++;
1358 TypeIndex++;
1359 }
1360
1361 // We sort the stored values to make sure that we are not affected by analysis
1362 // order when determining what combination of items were stored.
1363 stable_sort(Range&: Region.GVNStores);
1364}
1365
1366void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
1367 DenseSet<unsigned> &NotSame) {
1368 std::vector<unsigned> Inputs;
1369 SetVector<Value *> ArgInputs, Outputs;
1370
1371 getCodeExtractorArguments(Region, InputGVNs&: Inputs, NotSame, OutputMappings, ArgInputs,
1372 Outputs);
1373
1374 if (Region.IgnoreRegion)
1375 return;
1376
1377 // Map the inputs found by the CodeExtractor to the arguments found for
1378 // the overall function.
1379 findExtractedInputToOverallInputMapping(Region, InputGVNs&: Inputs, ArgInputs);
1380
1381 // Map the outputs found by the CodeExtractor to the arguments found for
1382 // the overall function.
1383 findExtractedOutputToOverallOutputMapping(M, Region, Outputs);
1384}
1385
1386/// Replace the extracted function in the Region with a call to the overall
1387/// function constructed from the deduplicated similar regions, replacing and
1388/// remapping the values passed to the extracted function as arguments to the
1389/// new arguments of the overall function.
1390///
1391/// \param [in] M - The module to outline from.
1392/// \param [in] Region - The regions of extracted code to be replaced with a new
1393/// function.
1394/// \returns a call instruction with the replaced function.
1395CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
1396 std::vector<Value *> NewCallArgs;
1397 DenseMap<unsigned, unsigned>::iterator ArgPair;
1398
1399 OutlinableGroup &Group = *Region.Parent;
1400 CallInst *Call = Region.Call;
1401 assert(Call && "Call to replace is nullptr?");
1402 Function *AggFunc = Group.OutlinedFunction;
1403 assert(AggFunc && "Function to replace with is nullptr?");
1404
1405 // If the arguments are the same size, there are not values that need to be
1406 // made into an argument, the argument ordering has not been change, or
1407 // different output registers to handle. We can simply replace the called
1408 // function in this case.
1409 if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
1410 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1411 << *AggFunc << " with same number of arguments\n");
1412 Call->setCalledFunction(AggFunc);
1413 return Call;
1414 }
1415
1416 // We have a different number of arguments than the new function, so
1417 // we need to use our previously mappings off extracted argument to overall
1418 // function argument, and constants to overall function argument to create the
1419 // new argument list.
1420 for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
1421
1422 if (AggArgIdx == AggFunc->arg_size() - 1 &&
1423 Group.OutputGVNCombinations.size() > 1) {
1424 // If we are on the last argument, and we need to differentiate between
1425 // output blocks, add an integer to the argument list to determine
1426 // what block to take
1427 LLVM_DEBUG(dbgs() << "Set switch block argument to "
1428 << Region.OutputBlockNum << "\n");
1429 NewCallArgs.push_back(x: ConstantInt::get(Ty: Type::getInt32Ty(C&: M.getContext()),
1430 V: Region.OutputBlockNum));
1431 continue;
1432 }
1433
1434 ArgPair = Region.AggArgToExtracted.find(Val: AggArgIdx);
1435 if (ArgPair != Region.AggArgToExtracted.end()) {
1436 Value *ArgumentValue = Call->getArgOperand(i: ArgPair->second);
1437 // If we found the mapping from the extracted function to the overall
1438 // function, we simply add it to the argument list. We use the same
1439 // value, it just needs to honor the new order of arguments.
1440 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1441 << *ArgumentValue << "\n");
1442 NewCallArgs.push_back(x: ArgumentValue);
1443 continue;
1444 }
1445
1446 // If it is a constant, we simply add it to the argument list as a value.
1447 if (auto It = Region.AggArgToConstant.find(Val: AggArgIdx);
1448 It != Region.AggArgToConstant.end()) {
1449 Constant *CST = It->second;
1450 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1451 << *CST << "\n");
1452 NewCallArgs.push_back(x: CST);
1453 continue;
1454 }
1455
1456 // Add a nullptr value if the argument is not found in the extracted
1457 // function. If we cannot find a value, it means it is not in use
1458 // for the region, so we should not pass anything to it.
1459 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1460 NewCallArgs.push_back(x: ConstantPointerNull::get(
1461 T: static_cast<PointerType *>(AggFunc->getArg(i: AggArgIdx)->getType())));
1462 }
1463
1464 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1465 << *AggFunc << " with new set of arguments\n");
1466 // Create the new call instruction and erase the old one.
1467 Call = CallInst::Create(Ty: AggFunc->getFunctionType(), Func: AggFunc, Args: NewCallArgs, NameStr: "",
1468 InsertBefore: Call->getIterator());
1469
1470 // It is possible that the call to the outlined function is either the first
1471 // instruction is in the new block, the last instruction, or both. If either
1472 // of these is the case, we need to make sure that we replace the instruction
1473 // in the IRInstructionData struct with the new call.
1474 CallInst *OldCall = Region.Call;
1475 if (Region.NewFront->Inst == OldCall)
1476 Region.NewFront->Inst = Call;
1477 if (Region.NewBack->Inst == OldCall)
1478 Region.NewBack->Inst = Call;
1479
1480 // Transfer any debug information.
1481 Call->setDebugLoc(Region.Call->getDebugLoc());
1482 // Since our output may determine which branch we go to, we make sure to
1483 // propagate this new call value through the module.
1484 OldCall->replaceAllUsesWith(V: Call);
1485
1486 // Remove the old instruction.
1487 OldCall->eraseFromParent();
1488 Region.Call = Call;
1489
1490 // Make sure that the argument in the new function has the SwiftError
1491 // argument.
1492 if (Group.SwiftErrorArgument)
1493 Call->addParamAttr(ArgNo: *Group.SwiftErrorArgument, Kind: Attribute::SwiftError);
1494
1495 return Call;
1496}
1497
1498/// Find or create a BasicBlock in the outlined function containing PhiBlocks
1499/// for \p RetVal.
1500///
1501/// \param Group - The OutlinableGroup containing the information about the
1502/// overall outlined function.
1503/// \param RetVal - The return value or exit option that we are currently
1504/// evaluating.
1505/// \returns The found or newly created BasicBlock to contain the needed
1506/// PHINodes to be used as outputs.
1507static BasicBlock *findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal) {
1508 // Find if a PHIBlock exists for this return value already. If it is
1509 // the first time we are analyzing this, we will not, so we record it.
1510 auto [PhiBlockForRetVal, Inserted] = Group.PHIBlocks.try_emplace(Key: RetVal);
1511 if (!Inserted)
1512 return PhiBlockForRetVal->second;
1513
1514 auto ReturnBlockForRetVal = Group.EndBBs.find(Val: RetVal);
1515 assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
1516 "Could not find output value!");
1517 BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
1518
1519 // If we did not find a block, we create one, and insert it into the
1520 // overall function and record it.
1521 BasicBlock *PHIBlock = BasicBlock::Create(Context&: ReturnBB->getContext(), Name: "phi_block",
1522 Parent: ReturnBB->getParent());
1523 PhiBlockForRetVal->second = PHIBlock;
1524
1525 // We replace all branches to the return block in the newly created outlined
1526 // function to point to the new PHIBlock.
1527 ReturnBB->replaceAllUsesWith(V: PHIBlock);
1528
1529 UncondBrInst::Create(Target: ReturnBB, InsertBefore: PHIBlock);
1530
1531 return PhiBlockForRetVal->second;
1532}
1533
1534/// For the function call now representing the \p Region, find the passed value
1535/// to that call that represents Argument \p A at the call location if the
1536/// call has already been replaced with a call to the overall, aggregate
1537/// function.
1538///
1539/// \param A - The Argument to get the passed value for.
1540/// \param Region - The extracted Region corresponding to the outlined function.
1541/// \returns The Value representing \p A at the call site.
1542static Value *
1543getPassedArgumentInAlreadyOutlinedFunction(const Argument *A,
1544 const OutlinableRegion &Region) {
1545 // If we don't need to adjust the argument number at all (since the call
1546 // has already been replaced by a call to the overall outlined function)
1547 // we can just get the specified argument.
1548 return Region.Call->getArgOperand(i: A->getArgNo());
1549}
1550
1551/// For the function call now representing the \p Region, find the passed value
1552/// to that call that represents Argument \p A at the call location if the
1553/// call has only been replaced by the call to the aggregate function.
1554///
1555/// \param A - The Argument to get the passed value for.
1556/// \param Region - The extracted Region corresponding to the outlined function.
1557/// \returns The Value representing \p A at the call site.
1558static Value *
1559getPassedArgumentAndAdjustArgumentLocation(const Argument *A,
1560 const OutlinableRegion &Region) {
1561 unsigned ArgNum = A->getArgNo();
1562
1563 // If it is a constant, we can look at our mapping from when we created
1564 // the outputs to figure out what the constant value is.
1565 if (auto It = Region.AggArgToConstant.find(Val: ArgNum);
1566 It != Region.AggArgToConstant.end())
1567 return It->second;
1568
1569 // If it is not a constant, and we are not looking at the overall function, we
1570 // need to adjust which argument we are looking at.
1571 ArgNum = Region.AggArgToExtracted.find(Val: ArgNum)->second;
1572 return Region.Call->getArgOperand(i: ArgNum);
1573}
1574
1575/// Find the canonical numbering for the incoming Values into the PHINode \p PN.
1576///
1577/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1578/// \param Region [in] - The OutlinableRegion containing \p PN.
1579/// \param OutputMappings [in] - The mapping of output values from outlined
1580/// region to their original values.
1581/// \param CanonNums [out] - The canonical numbering for the incoming values to
1582/// \p PN paired with their incoming block.
1583/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
1584/// of \p Region rather than the overall function's call.
1585static void findCanonNumsForPHI(
1586 PHINode *PN, OutlinableRegion &Region,
1587 const DenseMap<Value *, Value *> &OutputMappings,
1588 SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
1589 bool ReplacedWithOutlinedCall = true) {
1590 // Iterate over the incoming values.
1591 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1592 Value *IVal = PN->getIncomingValue(i: Idx);
1593 BasicBlock *IBlock = PN->getIncomingBlock(i: Idx);
1594 // If we have an argument as incoming value, we need to grab the passed
1595 // value from the call itself.
1596 if (Argument *A = dyn_cast<Argument>(Val: IVal)) {
1597 if (ReplacedWithOutlinedCall)
1598 IVal = getPassedArgumentInAlreadyOutlinedFunction(A, Region);
1599 else
1600 IVal = getPassedArgumentAndAdjustArgumentLocation(A, Region);
1601 }
1602
1603 // Get the original value if it has been replaced by an output value.
1604 IVal = findOutputMapping(OutputMappings, Input: IVal);
1605
1606 // Find and add the canonical number for the incoming value.
1607 std::optional<unsigned> GVN = Region.Candidate->getGVN(V: IVal);
1608 assert(GVN && "No GVN for incoming value");
1609 std::optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(N: *GVN);
1610 assert(CanonNum && "No Canonical Number for GVN");
1611 CanonNums.push_back(Elt: std::make_pair(x&: *CanonNum, y&: IBlock));
1612 }
1613}
1614
1615/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
1616/// in order to condense the number of instructions added to the outlined
1617/// function.
1618///
1619/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1620/// \param Region [in] - The OutlinableRegion containing \p PN.
1621/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
1622/// \p PN in.
1623/// \param OutputMappings [in] - The mapping of output values from outlined
1624/// region to their original values.
1625/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
1626/// matched.
1627/// \return the newly found or created PHINode in \p OverallPhiBlock.
1628static PHINode*
1629findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
1630 BasicBlock *OverallPhiBlock,
1631 const DenseMap<Value *, Value *> &OutputMappings,
1632 DenseSet<PHINode *> &UsedPHIs) {
1633 OutlinableGroup &Group = *Region.Parent;
1634
1635
1636 // A list of the canonical numbering assigned to each incoming value, paired
1637 // with the incoming block for the PHINode passed into this function.
1638 SmallVector<std::pair<unsigned, BasicBlock *>> PNCanonNums;
1639
1640 // We have to use the extracted function since we have merged this region into
1641 // the overall function yet. We make sure to reassign the argument numbering
1642 // since it is possible that the argument ordering is different between the
1643 // functions.
1644 findCanonNumsForPHI(PN: &PN, Region, OutputMappings, CanonNums&: PNCanonNums,
1645 /* ReplacedWithOutlinedCall = */ false);
1646
1647 OutlinableRegion *FirstRegion = Group.Regions[0];
1648
1649 // A list of the canonical numbering assigned to each incoming value, paired
1650 // with the incoming block for the PHINode that we are currently comparing
1651 // the passed PHINode to.
1652 SmallVector<std::pair<unsigned, BasicBlock *>> CurrentCanonNums;
1653
1654 // Find the Canonical Numbering for each PHINode, if it matches, we replace
1655 // the uses of the PHINode we are searching for, with the found PHINode.
1656 for (PHINode &CurrPN : OverallPhiBlock->phis()) {
1657 // If this PHINode has already been matched to another PHINode to be merged,
1658 // we skip it.
1659 if (UsedPHIs.contains(V: &CurrPN))
1660 continue;
1661
1662 CurrentCanonNums.clear();
1663 findCanonNumsForPHI(PN: &CurrPN, Region&: *FirstRegion, OutputMappings, CanonNums&: CurrentCanonNums,
1664 /* ReplacedWithOutlinedCall = */ true);
1665
1666 // If the list of incoming values is not the same length, then they cannot
1667 // match since there is not an analogue for each incoming value.
1668 if (PNCanonNums.size() != CurrentCanonNums.size())
1669 continue;
1670
1671 bool FoundMatch = true;
1672
1673 // We compare the canonical value for each incoming value in the passed
1674 // in PHINode to one already present in the outlined region. If the
1675 // incoming values do not match, then the PHINodes do not match.
1676
1677 // We also check to make sure that the incoming block matches as well by
1678 // finding the corresponding incoming block in the combined outlined region
1679 // for the current outlined region.
1680 for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
1681 std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
1682 std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
1683 if (ToCompareTo.first != ToAdd.first) {
1684 FoundMatch = false;
1685 break;
1686 }
1687
1688 BasicBlock *CorrespondingBlock =
1689 Region.findCorrespondingBlockIn(Other: *FirstRegion, BB: ToAdd.second);
1690 assert(CorrespondingBlock && "Found block is nullptr");
1691 if (CorrespondingBlock != ToCompareTo.second) {
1692 FoundMatch = false;
1693 break;
1694 }
1695 }
1696
1697 // If all incoming values and branches matched, then we can merge
1698 // into the found PHINode.
1699 if (FoundMatch) {
1700 UsedPHIs.insert(V: &CurrPN);
1701 return &CurrPN;
1702 }
1703 }
1704
1705 // If we've made it here, it means we weren't able to replace the PHINode, so
1706 // we must insert it ourselves.
1707 PHINode *NewPN = cast<PHINode>(Val: PN.clone());
1708 NewPN->insertBefore(InsertPos: OverallPhiBlock->begin());
1709 for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
1710 Idx++) {
1711 Value *IncomingVal = NewPN->getIncomingValue(i: Idx);
1712 BasicBlock *IncomingBlock = NewPN->getIncomingBlock(i: Idx);
1713
1714 // Find corresponding basic block in the overall function for the incoming
1715 // block.
1716 BasicBlock *BlockToUse =
1717 Region.findCorrespondingBlockIn(Other: *FirstRegion, BB: IncomingBlock);
1718 NewPN->setIncomingBlock(i: Idx, BB: BlockToUse);
1719
1720 // If we have an argument we make sure we replace using the argument from
1721 // the correct function.
1722 if (Argument *A = dyn_cast<Argument>(Val: IncomingVal)) {
1723 Value *Val = Group.OutlinedFunction->getArg(i: A->getArgNo());
1724 NewPN->setIncomingValue(i: Idx, V: Val);
1725 continue;
1726 }
1727
1728 // Find the corresponding value in the overall function.
1729 IncomingVal = findOutputMapping(OutputMappings, Input: IncomingVal);
1730 Value *Val = Region.findCorrespondingValueIn(Other: *FirstRegion, V: IncomingVal);
1731 assert(Val && "Value is nullptr?");
1732 auto RemappedIt = FirstRegion->RemappedArguments.find(Val);
1733 if (RemappedIt != FirstRegion->RemappedArguments.end())
1734 Val = RemappedIt->second;
1735 NewPN->setIncomingValue(i: Idx, V: Val);
1736 }
1737 return NewPN;
1738}
1739
1740// Within an extracted function, replace the argument uses of the extracted
1741// region with the arguments of the function for an OutlinableGroup.
1742//
1743/// \param [in] Region - The region of extracted code to be changed.
1744/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1745/// region.
1746/// \param [in] FirstFunction - A flag to indicate whether we are using this
1747/// function to define the overall outlined function for all the regions, or
1748/// if we are operating on one of the following regions.
1749static void
1750replaceArgumentUses(OutlinableRegion &Region,
1751 DenseMap<Value *, BasicBlock *> &OutputBBs,
1752 const DenseMap<Value *, Value *> &OutputMappings,
1753 bool FirstFunction = false) {
1754 OutlinableGroup &Group = *Region.Parent;
1755 assert(Region.ExtractedFunction && "Region has no extracted function?");
1756
1757 Function *DominatingFunction = Region.ExtractedFunction;
1758 if (FirstFunction)
1759 DominatingFunction = Group.OutlinedFunction;
1760 DominatorTree DT(*DominatingFunction);
1761 DenseSet<PHINode *> UsedPHIs;
1762
1763 for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1764 ArgIdx++) {
1765 assert(Region.ExtractedArgToAgg.contains(ArgIdx) &&
1766 "No mapping from extracted to outlined?");
1767 unsigned AggArgIdx = Region.ExtractedArgToAgg.find(Val: ArgIdx)->second;
1768 Argument *AggArg = Group.OutlinedFunction->getArg(i: AggArgIdx);
1769 Argument *Arg = Region.ExtractedFunction->getArg(i: ArgIdx);
1770 // The argument is an input, so we can simply replace it with the overall
1771 // argument value
1772 if (ArgIdx < Region.NumExtractedInputs) {
1773 LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1774 << *Region.ExtractedFunction << " with " << *AggArg
1775 << " in function " << *Group.OutlinedFunction << "\n");
1776 Arg->replaceAllUsesWith(V: AggArg);
1777 Value *V = Region.Call->getArgOperand(i: ArgIdx);
1778 Region.RemappedArguments.insert(KV: std::make_pair(x&: V, y&: AggArg));
1779 continue;
1780 }
1781
1782 // If we are replacing an output, we place the store value in its own
1783 // block inside the overall function before replacing the use of the output
1784 // in the function.
1785 assert(Arg->hasOneUse() && "Output argument can only have one use");
1786 User *InstAsUser = Arg->user_back();
1787 assert(InstAsUser && "User is nullptr!");
1788
1789 Instruction *I = cast<Instruction>(Val: InstAsUser);
1790 BasicBlock *BB = I->getParent();
1791 SmallVector<BasicBlock *, 4> Descendants;
1792 DT.getDescendants(R: BB, Result&: Descendants);
1793 bool EdgeAdded = false;
1794 if (Descendants.size() == 0) {
1795 EdgeAdded = true;
1796 DT.insertEdge(From: &DominatingFunction->getEntryBlock(), To: BB);
1797 DT.getDescendants(R: BB, Result&: Descendants);
1798 }
1799
1800 // Iterate over the following blocks, looking for return instructions,
1801 // if we find one, find the corresponding output block for the return value
1802 // and move our store instruction there.
1803 for (BasicBlock *DescendBB : Descendants) {
1804 ReturnInst *RI = dyn_cast<ReturnInst>(Val: DescendBB->getTerminator());
1805 if (!RI)
1806 continue;
1807 Value *RetVal = RI->getReturnValue();
1808 auto VBBIt = OutputBBs.find(Val: RetVal);
1809 assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1810
1811 // If this is storing a PHINode, we must make sure it is included in the
1812 // overall function.
1813 StoreInst *SI = cast<StoreInst>(Val: I);
1814
1815 Value *ValueOperand = SI->getValueOperand();
1816
1817 StoreInst *NewI = cast<StoreInst>(Val: I->clone());
1818 NewI->setDebugLoc(DebugLoc::getDropped());
1819 BasicBlock *OutputBB = VBBIt->second;
1820 NewI->insertInto(ParentBB: OutputBB, It: OutputBB->end());
1821 LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1822 << *OutputBB << "\n");
1823
1824 // If this is storing a PHINode, we must make sure it is included in the
1825 // overall function.
1826 if (!isa<PHINode>(Val: ValueOperand) ||
1827 Region.Candidate->getGVN(V: ValueOperand).has_value()) {
1828 if (FirstFunction)
1829 continue;
1830 Value *CorrVal =
1831 Region.findCorrespondingValueIn(Other: *Group.Regions[0], V: ValueOperand);
1832 assert(CorrVal && "Value is nullptr?");
1833 NewI->setOperand(i_nocapture: 0, Val_nocapture: CorrVal);
1834 continue;
1835 }
1836 PHINode *PN = cast<PHINode>(Val: SI->getValueOperand());
1837 // If it has a value, it was not split by the code extractor, which
1838 // is what we are looking for.
1839 if (Region.Candidate->getGVN(V: PN))
1840 continue;
1841
1842 // We record the parent block for the PHINode in the Region so that
1843 // we can exclude it from checks later on.
1844 Region.PHIBlocks.insert(KV: std::make_pair(x&: RetVal, y: PN->getParent()));
1845
1846 // If this is the first function, we do not need to worry about mergiing
1847 // this with any other block in the overall outlined function, so we can
1848 // just continue.
1849 if (FirstFunction) {
1850 BasicBlock *PHIBlock = PN->getParent();
1851 Group.PHIBlocks.insert(KV: std::make_pair(x&: RetVal, y&: PHIBlock));
1852 continue;
1853 }
1854
1855 // We look for the aggregate block that contains the PHINodes leading into
1856 // this exit path. If we can't find one, we create one.
1857 BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
1858
1859 // For our PHINode, we find the combined canonical numbering, and
1860 // attempt to find a matching PHINode in the overall PHIBlock. If we
1861 // cannot, we copy the PHINode and move it into this new block.
1862 PHINode *NewPN = findOrCreatePHIInBlock(PN&: *PN, Region, OverallPhiBlock,
1863 OutputMappings, UsedPHIs);
1864 NewI->setOperand(i_nocapture: 0, Val_nocapture: NewPN);
1865 }
1866
1867 // If we added an edge for basic blocks without a predecessor, we remove it
1868 // here.
1869 if (EdgeAdded)
1870 DT.deleteEdge(From: &DominatingFunction->getEntryBlock(), To: BB);
1871 I->eraseFromParent();
1872
1873 LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1874 << *Region.ExtractedFunction << " with " << *AggArg
1875 << " in function " << *Group.OutlinedFunction << "\n");
1876 Arg->replaceAllUsesWith(V: AggArg);
1877 }
1878}
1879
1880/// Within an extracted function, replace the constants that need to be lifted
1881/// into arguments with the actual argument.
1882///
1883/// \param Region [in] - The region of extracted code to be changed.
1884void replaceConstants(OutlinableRegion &Region) {
1885 OutlinableGroup &Group = *Region.Parent;
1886 Function *OutlinedFunction = Group.OutlinedFunction;
1887 ValueToValueMapTy VMap;
1888
1889 // Iterate over the constants that need to be elevated into arguments
1890 for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1891 unsigned AggArgIdx = Const.first;
1892 assert(OutlinedFunction && "Overall Function is not defined?");
1893 Constant *CST = Const.second;
1894 Argument *Arg = Group.OutlinedFunction->getArg(i: AggArgIdx);
1895 // Identify the argument it will be elevated to, and replace instances of
1896 // that constant in the function.
1897 VMap[CST] = Arg;
1898 LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1899 << " in function " << *OutlinedFunction << " with "
1900 << *Arg << '\n');
1901 }
1902
1903 RemapFunction(F&: *OutlinedFunction, VM&: VMap,
1904 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1905}
1906
1907/// It is possible that there is a basic block that already performs the same
1908/// stores. This returns a duplicate block, if it exists
1909///
1910/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1911/// \param OutputStoreBBs [in] The existing output blocks.
1912/// \returns an optional value with the number output block if there is a match.
1913std::optional<unsigned> findDuplicateOutputBlock(
1914 DenseMap<Value *, BasicBlock *> &OutputBBs,
1915 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1916
1917 bool Mismatch = false;
1918 unsigned MatchingNum = 0;
1919 // We compare the new set output blocks to the other sets of output blocks.
1920 // If they are the same number, and have identical instructions, they are
1921 // considered to be the same.
1922 for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1923 Mismatch = false;
1924 for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1925 auto OutputBBIt = OutputBBs.find(Val: VToB.first);
1926 if (OutputBBIt == OutputBBs.end()) {
1927 Mismatch = true;
1928 break;
1929 }
1930
1931 BasicBlock *CompBB = VToB.second;
1932 BasicBlock *OutputBB = OutputBBIt->second;
1933 if (CompBB->size() - 1 != OutputBB->size()) {
1934 Mismatch = true;
1935 break;
1936 }
1937
1938 BasicBlock::iterator NIt = OutputBB->begin();
1939 for (Instruction &I : *CompBB) {
1940 if (isa<UncondBrInst, CondBrInst>(Val: &I))
1941 continue;
1942
1943 if (!I.isIdenticalTo(I: &(*NIt))) {
1944 Mismatch = true;
1945 break;
1946 }
1947
1948 NIt++;
1949 }
1950 }
1951
1952 if (!Mismatch)
1953 return MatchingNum;
1954
1955 MatchingNum++;
1956 }
1957
1958 return std::nullopt;
1959}
1960
1961/// Remove empty output blocks from the outlined region.
1962///
1963/// \param BlocksToPrune - Mapping of return values output blocks for the \p
1964/// Region.
1965/// \param Region - The OutlinableRegion we are analyzing.
1966static bool
1967analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune,
1968 OutlinableRegion &Region) {
1969 bool AllRemoved = true;
1970 Value *RetValueForBB;
1971 BasicBlock *NewBB;
1972 SmallVector<Value *, 4> ToRemove;
1973 // Iterate over the output blocks created in the outlined section.
1974 for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
1975 RetValueForBB = VtoBB.first;
1976 NewBB = VtoBB.second;
1977
1978 // If there are no instructions, we remove it from the module, and also
1979 // mark the value for removal from the return value to output block mapping.
1980 if (NewBB->size() == 0) {
1981 NewBB->eraseFromParent();
1982 ToRemove.push_back(Elt: RetValueForBB);
1983 continue;
1984 }
1985
1986 // Mark that we could not remove all the blocks since they were not all
1987 // empty.
1988 AllRemoved = false;
1989 }
1990
1991 // Remove the return value from the mapping.
1992 for (Value *V : ToRemove)
1993 BlocksToPrune.erase(Val: V);
1994
1995 // Mark the region as having the no output scheme.
1996 if (AllRemoved)
1997 Region.OutputBlockNum = -1;
1998
1999 return AllRemoved;
2000}
2001
2002/// For the outlined section, move needed the StoreInsts for the output
2003/// registers into their own block. Then, determine if there is a duplicate
2004/// output block already created.
2005///
2006/// \param [in] OG - The OutlinableGroup of regions to be outlined.
2007/// \param [in] Region - The OutlinableRegion that is being analyzed.
2008/// \param [in,out] OutputBBs - the blocks that stores for this region will be
2009/// placed in.
2010/// \param [in] EndBBs - the final blocks of the extracted function.
2011/// \param [in] OutputMappings - OutputMappings the mapping of values that have
2012/// been replaced by a new output value.
2013/// \param [in,out] OutputStoreBBs - The existing output blocks.
2014static void alignOutputBlockWithAggFunc(
2015 OutlinableGroup &OG, OutlinableRegion &Region,
2016 DenseMap<Value *, BasicBlock *> &OutputBBs,
2017 DenseMap<Value *, BasicBlock *> &EndBBs,
2018 const DenseMap<Value *, Value *> &OutputMappings,
2019 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2020 // If none of the output blocks have any instructions, this means that we do
2021 // not have to determine if it matches any of the other output schemes, and we
2022 // don't have to do anything else.
2023 if (analyzeAndPruneOutputBlocks(BlocksToPrune&: OutputBBs, Region))
2024 return;
2025
2026 // Determine is there is a duplicate set of blocks.
2027 std::optional<unsigned> MatchingBB =
2028 findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
2029
2030 // If there is, we remove the new output blocks. If it does not,
2031 // we add it to our list of sets of output blocks.
2032 if (MatchingBB) {
2033 LLVM_DEBUG(dbgs() << "Set output block for region in function"
2034 << Region.ExtractedFunction << " to " << *MatchingBB);
2035
2036 Region.OutputBlockNum = *MatchingBB;
2037 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
2038 VtoBB.second->eraseFromParent();
2039 return;
2040 }
2041
2042 Region.OutputBlockNum = OutputStoreBBs.size();
2043
2044 Value *RetValueForBB;
2045 BasicBlock *NewBB;
2046 OutputStoreBBs.push_back(x: DenseMap<Value *, BasicBlock *>());
2047 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
2048 RetValueForBB = VtoBB.first;
2049 NewBB = VtoBB.second;
2050 auto VBBIt = EndBBs.find(Val: RetValueForBB);
2051 LLVM_DEBUG(dbgs() << "Create output block for region in"
2052 << Region.ExtractedFunction << " to "
2053 << *NewBB);
2054 UncondBrInst::Create(Target: VBBIt->second, InsertBefore: NewBB);
2055 OutputStoreBBs.back().insert(KV: std::make_pair(x&: RetValueForBB, y&: NewBB));
2056 }
2057}
2058
2059/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
2060/// before creating a basic block for each \p NewMap, and inserting into the new
2061/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
2062///
2063/// \param OldMap [in] - The mapping to base the new mapping off of.
2064/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
2065/// \param ParentFunc [in] - The function to put the new basic block in.
2066/// \param BaseName [in] - The start of the BasicBlock names to be appended to
2067/// by an index value.
2068static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap,
2069 DenseMap<Value *, BasicBlock *> &NewMap,
2070 Function *ParentFunc, Twine BaseName) {
2071 unsigned Idx = 0;
2072 std::vector<Value *> SortedKeys;
2073
2074 getSortedConstantKeys(SortedKeys, Map&: OldMap);
2075
2076 for (Value *RetVal : SortedKeys) {
2077 BasicBlock *NewBB = BasicBlock::Create(
2078 Context&: ParentFunc->getContext(), Name: Twine(BaseName) + Twine("_") + Twine(Idx++),
2079 Parent: ParentFunc);
2080 NewMap.insert(KV: std::make_pair(x&: RetVal, y&: NewBB));
2081 }
2082}
2083
2084/// Create the switch statement for outlined function to differentiate between
2085/// all the output blocks.
2086///
2087/// For the outlined section, determine if an outlined block already exists that
2088/// matches the needed stores for the extracted section.
2089/// \param [in] M - The module we are outlining from.
2090/// \param [in] OG - The group of regions to be outlined.
2091/// \param [in] EndBBs - The final blocks of the extracted function.
2092/// \param [in,out] OutputStoreBBs - The existing output blocks.
2093void createSwitchStatement(
2094 Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs,
2095 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2096 // We only need the switch statement if there is more than one store
2097 // combination, or there is more than one set of output blocks. The first
2098 // will occur when we store different sets of values for two different
2099 // regions. The second will occur when we have two outputs that are combined
2100 // in a PHINode outside of the region in one outlined instance, and are used
2101 // seaparately in another. This will create the same set of OutputGVNs, but
2102 // will generate two different output schemes.
2103 if (OG.OutputGVNCombinations.size() > 1) {
2104 Function *AggFunc = OG.OutlinedFunction;
2105 // Create a final block for each different return block.
2106 DenseMap<Value *, BasicBlock *> ReturnBBs;
2107 createAndInsertBasicBlocks(OldMap&: OG.EndBBs, NewMap&: ReturnBBs, ParentFunc: AggFunc, BaseName: "final_block");
2108
2109 for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
2110 std::pair<Value *, BasicBlock *> &OutputBlock =
2111 *OG.EndBBs.find(Val: RetBlockPair.first);
2112 BasicBlock *ReturnBlock = RetBlockPair.second;
2113 BasicBlock *EndBB = OutputBlock.second;
2114 Instruction *Term = EndBB->getTerminator();
2115 // Move the return value to the final block instead of the original exit
2116 // stub.
2117 Term->moveBefore(BB&: *ReturnBlock, I: ReturnBlock->end());
2118 // Put the switch statement in the old end basic block for the function
2119 // with a fall through to the new return block.
2120 LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
2121 << OutputStoreBBs.size() << "\n");
2122 SwitchInst *SwitchI =
2123 SwitchInst::Create(Value: AggFunc->getArg(i: AggFunc->arg_size() - 1),
2124 Default: ReturnBlock, NumCases: OutputStoreBBs.size(), InsertBefore: EndBB);
2125
2126 unsigned Idx = 0;
2127 for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
2128 auto OSBBIt = OutputStoreBB.find(Val: OutputBlock.first);
2129
2130 if (OSBBIt == OutputStoreBB.end())
2131 continue;
2132
2133 BasicBlock *BB = OSBBIt->second;
2134 SwitchI->addCase(
2135 OnVal: ConstantInt::get(Ty: Type::getInt32Ty(C&: M.getContext()), V: Idx), Dest: BB);
2136 Term = BB->getTerminator();
2137 Term->setSuccessor(Idx: 0, BB: ReturnBlock);
2138 Idx++;
2139 }
2140 }
2141 return;
2142 }
2143
2144 assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
2145
2146 // If there needs to be stores, move them from the output blocks to their
2147 // corresponding ending block. We do not check that the OutputGVNCombinations
2148 // is equal to 1 here since that could just been the case where there are 0
2149 // outputs. Instead, we check whether there is more than one set of output
2150 // blocks since this is the only case where we would have to move the
2151 // stores, and erase the extraneous blocks.
2152 if (OutputStoreBBs.size() == 1) {
2153 LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
2154 << *OG.OutlinedFunction << "\n");
2155 DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
2156 for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
2157 auto EndBBIt = EndBBs.find(Val: VBPair.first);
2158 assert(EndBBIt != EndBBs.end() && "Could not find end block");
2159 BasicBlock *EndBB = EndBBIt->second;
2160 BasicBlock *OutputBB = VBPair.second;
2161 Instruction *Term = OutputBB->getTerminator();
2162 Term->eraseFromParent();
2163 Term = EndBB->getTerminator();
2164 moveBBContents(SourceBB&: *OutputBB, TargetBB&: *EndBB);
2165 Term->moveBefore(BB&: *EndBB, I: EndBB->end());
2166 OutputBB->eraseFromParent();
2167 }
2168 }
2169}
2170
2171void IROutliner::fillOverallFunction(
2172 Module &M, OutlinableGroup &CurrentGroup,
2173 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
2174 std::vector<Function *> &FuncsToRemove) {
2175 OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
2176
2177 TargetTransformInfo &TTI = getTTI(*CurrentOS->StartBB->getParent());
2178
2179 // Move first extracted function's instructions into new function.
2180 LLVM_DEBUG(dbgs() << "Move instructions from "
2181 << *CurrentOS->ExtractedFunction << " to instruction "
2182 << *CurrentGroup.OutlinedFunction << "\n");
2183 moveFunctionData(Old&: *CurrentOS->ExtractedFunction,
2184 New&: *CurrentGroup.OutlinedFunction, NewEnds&: CurrentGroup.EndBBs);
2185
2186 // Transfer the attributes from the function to the new function.
2187 for (Attribute A :
2188 CurrentOS->ExtractedFunction->getAttributes().getFnAttrs()) {
2189 if (!TTI.shouldCopyAttributeWhenOutliningFrom(Caller: CurrentOS->ExtractedFunction,
2190 Attr: A))
2191 continue;
2192
2193 CurrentGroup.OutlinedFunction->addFnAttr(Attr: A);
2194 }
2195
2196 // Create a new set of output blocks for the first extracted function.
2197 DenseMap<Value *, BasicBlock *> NewBBs;
2198 createAndInsertBasicBlocks(OldMap&: CurrentGroup.EndBBs, NewMap&: NewBBs,
2199 ParentFunc: CurrentGroup.OutlinedFunction, BaseName: "output_block_0");
2200 CurrentOS->OutputBlockNum = 0;
2201
2202 replaceArgumentUses(Region&: *CurrentOS, OutputBBs&: NewBBs, OutputMappings, FirstFunction: true);
2203 replaceConstants(Region&: *CurrentOS);
2204
2205 // We first identify if any output blocks are empty, if they are we remove
2206 // them. We then create a branch instruction to the basic block to the return
2207 // block for the function for each non empty output block.
2208 if (!analyzeAndPruneOutputBlocks(BlocksToPrune&: NewBBs, Region&: *CurrentOS)) {
2209 OutputStoreBBs.push_back(x: DenseMap<Value *, BasicBlock *>());
2210 for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
2211 auto VBBIt = CurrentGroup.EndBBs.find(Val: VToBB.first);
2212 BasicBlock *EndBB = VBBIt->second;
2213 UncondBrInst::Create(Target: EndBB, InsertBefore: VToBB.second);
2214 OutputStoreBBs.back().insert(KV: VToBB);
2215 }
2216 }
2217
2218 // Replace the call to the extracted function with the outlined function.
2219 CurrentOS->Call = replaceCalledFunction(M, Region&: *CurrentOS);
2220
2221 // We only delete the extracted functions at the end since we may need to
2222 // reference instructions contained in them for mapping purposes.
2223 FuncsToRemove.push_back(x: CurrentOS->ExtractedFunction);
2224}
2225
2226void IROutliner::deduplicateExtractedSections(
2227 Module &M, OutlinableGroup &CurrentGroup,
2228 std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
2229 createFunction(M, Group&: CurrentGroup, FunctionNameSuffix: OutlinedFunctionNum);
2230
2231 std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
2232
2233 OutlinableRegion *CurrentOS;
2234
2235 fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
2236
2237 for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
2238 CurrentOS = CurrentGroup.Regions[Idx];
2239 AttributeFuncs::mergeAttributesForOutlining(Base&: *CurrentGroup.OutlinedFunction,
2240 ToMerge: *CurrentOS->ExtractedFunction);
2241
2242 // Create a set of BasicBlocks, one for each return block, to hold the
2243 // needed store instructions.
2244 DenseMap<Value *, BasicBlock *> NewBBs;
2245 createAndInsertBasicBlocks(OldMap&: CurrentGroup.EndBBs, NewMap&: NewBBs,
2246 ParentFunc: CurrentGroup.OutlinedFunction,
2247 BaseName: "output_block_" + Twine(Idx));
2248 replaceArgumentUses(Region&: *CurrentOS, OutputBBs&: NewBBs, OutputMappings);
2249 alignOutputBlockWithAggFunc(OG&: CurrentGroup, Region&: *CurrentOS, OutputBBs&: NewBBs,
2250 EndBBs&: CurrentGroup.EndBBs, OutputMappings,
2251 OutputStoreBBs);
2252
2253 CurrentOS->Call = replaceCalledFunction(M, Region&: *CurrentOS);
2254 FuncsToRemove.push_back(x: CurrentOS->ExtractedFunction);
2255 }
2256
2257 // Create a switch statement to handle the different output schemes.
2258 createSwitchStatement(M, OG&: CurrentGroup, EndBBs&: CurrentGroup.EndBBs, OutputStoreBBs);
2259
2260 OutlinedFunctionNum++;
2261}
2262
2263/// Checks that the next instruction in the InstructionDataList matches the
2264/// next instruction in the module. If they do not, there could be the
2265/// possibility that extra code has been inserted, and we must ignore it.
2266///
2267/// \param ID - The IRInstructionData to check the next instruction of.
2268/// \returns true if the InstructionDataList and actual instruction match.
2269static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) {
2270 // We check if there is a discrepancy between the InstructionDataList
2271 // and the actual next instruction in the module. If there is, it means
2272 // that an extra instruction was added, likely by the CodeExtractor.
2273
2274 // Since we do not have any similarity data about this particular
2275 // instruction, we cannot confidently outline it, and must discard this
2276 // candidate.
2277 IRInstructionDataList::iterator NextIDIt = std::next(x: ID.getIterator());
2278 Instruction *NextIDLInst = NextIDIt->Inst;
2279 Instruction *NextModuleInst = nullptr;
2280 if (!ID.Inst->isTerminator())
2281 NextModuleInst = ID.Inst->getNextNode();
2282 else if (NextIDLInst != nullptr)
2283 NextModuleInst = &*NextIDIt->Inst->getParent()->begin();
2284
2285 if (NextIDLInst && NextIDLInst != NextModuleInst)
2286 return false;
2287
2288 return true;
2289}
2290
2291bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
2292 const OutlinableRegion &Region) {
2293 IRSimilarityCandidate *IRSC = Region.Candidate;
2294 unsigned StartIdx = IRSC->getStartIdx();
2295 unsigned EndIdx = IRSC->getEndIdx();
2296
2297 // A check to make sure that we are not about to attempt to outline something
2298 // that has already been outlined.
2299 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2300 if (Outlined.contains(V: Idx))
2301 return false;
2302
2303 // We check if the recorded instruction matches the actual next instruction,
2304 // if it does not, we fix it in the InstructionDataList.
2305 if (!Region.Candidate->backInstruction()->isTerminator()) {
2306 Instruction *NewEndInst =
2307 Region.Candidate->backInstruction()->getNextNode();
2308 assert(NewEndInst && "Next instruction is a nullptr?");
2309 if (Region.Candidate->end()->Inst != NewEndInst) {
2310 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2311 IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
2312 IRInstructionData(*NewEndInst,
2313 InstructionClassifier.visit(I&: *NewEndInst), *IDL);
2314
2315 // Insert the first IRInstructionData of the new region after the
2316 // last IRInstructionData of the IRSimilarityCandidate.
2317 IDL->insert(I: Region.Candidate->end(), Node&: *NewEndIRID);
2318 }
2319 }
2320
2321 return none_of(Range&: *IRSC, P: [this](IRInstructionData &ID) {
2322 if (!nextIRInstructionDataMatchesNextInst(ID))
2323 return true;
2324
2325 return !this->InstructionClassifier.visit(I: ID.Inst);
2326 });
2327}
2328
2329void IROutliner::pruneIncompatibleRegions(
2330 std::vector<IRSimilarityCandidate> &CandidateVec,
2331 OutlinableGroup &CurrentGroup) {
2332 bool PreviouslyOutlined;
2333
2334 // Sort from beginning to end, so the IRSimilarityCandidates are in order.
2335 stable_sort(Range&: CandidateVec, C: [](const IRSimilarityCandidate &LHS,
2336 const IRSimilarityCandidate &RHS) {
2337 return LHS.getStartIdx() < RHS.getStartIdx();
2338 });
2339
2340 IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
2341 // Since outlining a call and a branch instruction will be the same as only
2342 // outlinining a call instruction, we ignore it as a space saving.
2343 if (FirstCandidate.getLength() == 2) {
2344 if (isa<CallInst>(Val: FirstCandidate.front()->Inst) &&
2345 isa<UncondBrInst, CondBrInst>(Val: FirstCandidate.back()->Inst))
2346 return;
2347 }
2348
2349 unsigned CurrentEndIdx = 0;
2350 for (IRSimilarityCandidate &IRSC : CandidateVec) {
2351 PreviouslyOutlined = false;
2352 unsigned StartIdx = IRSC.getStartIdx();
2353 unsigned EndIdx = IRSC.getEndIdx();
2354 const Function &FnForCurrCand = *IRSC.getFunction();
2355
2356 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2357 if (Outlined.contains(V: Idx)) {
2358 PreviouslyOutlined = true;
2359 break;
2360 }
2361
2362 if (PreviouslyOutlined)
2363 continue;
2364
2365 // Check over the instructions, and if the basic block has its address
2366 // taken for use somewhere else, we do not outline that block.
2367 bool BBHasAddressTaken = any_of(Range&: IRSC, P: [](IRInstructionData &ID){
2368 return ID.Inst->getParent()->hasAddressTaken();
2369 });
2370
2371 if (BBHasAddressTaken)
2372 continue;
2373
2374 if (FnForCurrCand.hasOptNone())
2375 continue;
2376
2377 if (FnForCurrCand.hasFnAttribute(Kind: Attribute::NoOutline)) {
2378 LLVM_DEBUG({
2379 dbgs() << "... Skipping function with nooutline attribute: "
2380 << FnForCurrCand.getName() << "\n";
2381 });
2382 continue;
2383 }
2384
2385 if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
2386 !OutlineFromLinkODRs)
2387 continue;
2388
2389 // Greedily prune out any regions that will overlap with already chosen
2390 // regions.
2391 if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
2392 continue;
2393
2394 bool BadInst = any_of(Range&: IRSC, P: [this](IRInstructionData &ID) {
2395 if (!nextIRInstructionDataMatchesNextInst(ID))
2396 return true;
2397
2398 return !this->InstructionClassifier.visit(I: ID.Inst);
2399 });
2400
2401 if (BadInst)
2402 continue;
2403
2404 OutlinableRegion *OS = new (RegionAllocator.Allocate())
2405 OutlinableRegion(IRSC, CurrentGroup);
2406 CurrentGroup.Regions.push_back(x: OS);
2407
2408 CurrentEndIdx = EndIdx;
2409 }
2410}
2411
2412InstructionCost
2413IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
2414 InstructionCost RegionBenefit = 0;
2415 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2416 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2417 // We add the number of instructions in the region to the benefit as an
2418 // estimate as to how much will be removed.
2419 RegionBenefit += Region->getBenefit(TTI);
2420 LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
2421 << " saved instructions to overfall benefit.\n");
2422 }
2423
2424 return RegionBenefit;
2425}
2426
2427/// For the \p OutputCanon number passed in find the value represented by this
2428/// canonical number. If it is from a PHINode, we pick the first incoming
2429/// value and return that Value instead.
2430///
2431/// \param Region - The OutlinableRegion to get the Value from.
2432/// \param OutputCanon - The canonical number to find the Value from.
2433/// \returns The Value represented by a canonical number \p OutputCanon in \p
2434/// Region.
2435static Value *findOutputValueInRegion(OutlinableRegion &Region,
2436 unsigned OutputCanon) {
2437 OutlinableGroup &CurrentGroup = *Region.Parent;
2438 // If the value is greater than the value in the tracker, we have a
2439 // PHINode and will instead use one of the incoming values to find the
2440 // type.
2441 if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
2442 auto It = CurrentGroup.PHINodeGVNToGVNs.find(Val: OutputCanon);
2443 assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
2444 "Could not find GVN set for PHINode number!");
2445 assert(It->second.second.size() > 0 && "PHINode does not have any values!");
2446 OutputCanon = *It->second.second.begin();
2447 }
2448 std::optional<unsigned> OGVN =
2449 Region.Candidate->fromCanonicalNum(N: OutputCanon);
2450 assert(OGVN && "Could not find GVN for Canonical Number?");
2451 std::optional<Value *> OV = Region.Candidate->fromGVN(Num: *OGVN);
2452 assert(OV && "Could not find value for GVN?");
2453 return *OV;
2454}
2455
2456InstructionCost
2457IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
2458 InstructionCost OverallCost = 0;
2459 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2460 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2461
2462 // Each output incurs a load after the call, so we add that to the cost.
2463 for (unsigned OutputCanon : Region->GVNStores) {
2464 Value *V = findOutputValueInRegion(Region&: *Region, OutputCanon);
2465 InstructionCost LoadCost =
2466 TTI.getMemoryOpCost(Opcode: Instruction::Load, Src: V->getType(), Alignment: Align(1), AddressSpace: 0,
2467 CostKind: TargetTransformInfo::TCK_CodeSize);
2468
2469 LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
2470 << " instructions to cost for output of type "
2471 << *V->getType() << "\n");
2472 OverallCost += LoadCost;
2473 }
2474 }
2475
2476 return OverallCost;
2477}
2478
2479/// Find the extra instructions needed to handle any output values for the
2480/// region.
2481///
2482/// \param [in] M - The Module to outline from.
2483/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
2484/// \param [in] TTI - The TargetTransformInfo used to collect information for
2485/// new instruction costs.
2486/// \returns the additional cost to handle the outputs.
2487static InstructionCost findCostForOutputBlocks(Module &M,
2488 OutlinableGroup &CurrentGroup,
2489 TargetTransformInfo &TTI) {
2490 InstructionCost OutputCost = 0;
2491 unsigned NumOutputBranches = 0;
2492
2493 OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
2494 IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
2495 DenseSet<BasicBlock *> CandidateBlocks;
2496 Candidate.getBasicBlocks(BBSet&: CandidateBlocks);
2497
2498 // Count the number of different output branches that point to blocks outside
2499 // of the region.
2500 DenseSet<BasicBlock *> FoundBlocks;
2501 for (IRInstructionData &ID : Candidate) {
2502 if (!isa<UncondBrInst, CondBrInst>(Val: ID.Inst))
2503 continue;
2504
2505 for (Value *V : ID.OperVals) {
2506 BasicBlock *BB = static_cast<BasicBlock *>(V);
2507 if (!CandidateBlocks.contains(V: BB) && FoundBlocks.insert(V: BB).second)
2508 NumOutputBranches++;
2509 }
2510 }
2511
2512 CurrentGroup.BranchesToOutside = NumOutputBranches;
2513
2514 for (const ArrayRef<unsigned> &OutputUse :
2515 CurrentGroup.OutputGVNCombinations) {
2516 for (unsigned OutputCanon : OutputUse) {
2517 Value *V = findOutputValueInRegion(Region&: FirstRegion, OutputCanon);
2518 InstructionCost StoreCost =
2519 TTI.getMemoryOpCost(Opcode: Instruction::Load, Src: V->getType(), Alignment: Align(1), AddressSpace: 0,
2520 CostKind: TargetTransformInfo::TCK_CodeSize);
2521
2522 // An instruction cost is added for each store set that needs to occur for
2523 // various output combinations inside the function, plus a branch to
2524 // return to the exit block.
2525 LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
2526 << " instructions to cost for output of type "
2527 << *V->getType() << "\n");
2528 OutputCost += StoreCost * NumOutputBranches;
2529 }
2530
2531 InstructionCost BranchCost = TTI.getCFInstrCost(
2532 Opcode: Instruction::UncondBr, CostKind: TargetTransformInfo::TCK_CodeSize);
2533 LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
2534 << " a branch instruction\n");
2535 OutputCost += BranchCost * NumOutputBranches;
2536 }
2537
2538 // If there is more than one output scheme, we must have a comparison and
2539 // branch for each different item in the switch statement.
2540 if (CurrentGroup.OutputGVNCombinations.size() > 1) {
2541 InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
2542 Opcode: Instruction::ICmp, ValTy: Type::getInt32Ty(C&: M.getContext()),
2543 CondTy: Type::getInt32Ty(C&: M.getContext()), VecPred: CmpInst::BAD_ICMP_PREDICATE,
2544 CostKind: TargetTransformInfo::TCK_CodeSize);
2545 InstructionCost BranchCost = TTI.getCFInstrCost(
2546 Opcode: Instruction::CondBr, CostKind: TargetTransformInfo::TCK_CodeSize);
2547
2548 unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
2549 InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
2550
2551 LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
2552 << " instructions for each switch case for each different"
2553 << " output path in a function\n");
2554 OutputCost += TotalCost * NumOutputBranches;
2555 }
2556
2557 return OutputCost;
2558}
2559
2560void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
2561 InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
2562 CurrentGroup.Benefit += RegionBenefit;
2563 LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
2564
2565 InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
2566 CurrentGroup.Cost += OutputReloadCost;
2567 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2568
2569 InstructionCost AverageRegionBenefit =
2570 RegionBenefit / CurrentGroup.Regions.size();
2571 unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
2572 unsigned NumRegions = CurrentGroup.Regions.size();
2573 TargetTransformInfo &TTI =
2574 getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
2575
2576 // We add one region to the cost once, to account for the instructions added
2577 // inside of the newly created function.
2578 LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
2579 << " instructions to cost for body of new function.\n");
2580 CurrentGroup.Cost += AverageRegionBenefit;
2581 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2582
2583 // For each argument, we must add an instruction for loading the argument
2584 // out of the register and into a value inside of the newly outlined function.
2585 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2586 << " instructions to cost for each argument in the new"
2587 << " function.\n");
2588 CurrentGroup.Cost +=
2589 OverallArgumentNum * TargetTransformInfo::TCC_Basic;
2590 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2591
2592 // Each argument needs to either be loaded into a register or onto the stack.
2593 // Some arguments will only be loaded into the stack once the argument
2594 // registers are filled.
2595 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2596 << " instructions to cost for each argument in the new"
2597 << " function " << NumRegions << " times for the "
2598 << "needed argument handling at the call site.\n");
2599 CurrentGroup.Cost +=
2600 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
2601 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2602
2603 CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
2604 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2605}
2606
2607void IROutliner::updateOutputMapping(OutlinableRegion &Region,
2608 ArrayRef<Value *> Outputs,
2609 LoadInst *LI) {
2610 // For and load instructions following the call
2611 Value *Operand = LI->getPointerOperand();
2612 std::optional<unsigned> OutputIdx;
2613 // Find if the operand it is an output register.
2614 for (unsigned ArgIdx = Region.NumExtractedInputs;
2615 ArgIdx < Region.Call->arg_size(); ArgIdx++) {
2616 if (Operand == Region.Call->getArgOperand(i: ArgIdx)) {
2617 OutputIdx = ArgIdx - Region.NumExtractedInputs;
2618 break;
2619 }
2620 }
2621
2622 // If we found an output register, place a mapping of the new value
2623 // to the original in the mapping.
2624 if (!OutputIdx)
2625 return;
2626
2627 auto It = OutputMappings.find(Val: Outputs[*OutputIdx]);
2628 if (It == OutputMappings.end()) {
2629 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
2630 << *Outputs[*OutputIdx] << "\n");
2631 OutputMappings.insert(KV: std::make_pair(x&: LI, y: Outputs[*OutputIdx]));
2632 } else {
2633 Value *Orig = It->second;
2634 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
2635 << *Outputs[*OutputIdx] << "\n");
2636 OutputMappings.insert(KV: std::make_pair(x&: LI, y&: Orig));
2637 }
2638}
2639
2640bool IROutliner::extractSection(OutlinableRegion &Region) {
2641 SetVector<Value *> ArgInputs, Outputs;
2642 assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
2643 BasicBlock *InitialStart = Region.StartBB;
2644 Function *OrigF = Region.StartBB->getParent();
2645 CodeExtractorAnalysisCache CEAC(*OrigF);
2646 Region.ExtractedFunction =
2647 Region.CE->extractCodeRegion(CEAC, Inputs&: ArgInputs, Outputs);
2648
2649 // If the extraction was successful, find the BasicBlock, and reassign the
2650 // OutlinableRegion blocks
2651 if (!Region.ExtractedFunction) {
2652 LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
2653 << "\n");
2654 Region.reattachCandidate();
2655 return false;
2656 }
2657
2658 // Get the block containing the called branch, and reassign the blocks as
2659 // necessary. If the original block still exists, it is because we ended on
2660 // a branch instruction, and so we move the contents into the block before
2661 // and assign the previous block correctly.
2662 User *InstAsUser = Region.ExtractedFunction->user_back();
2663 BasicBlock *RewrittenBB = cast<Instruction>(Val: InstAsUser)->getParent();
2664 Region.PrevBB = RewrittenBB->getSinglePredecessor();
2665 assert(Region.PrevBB && "PrevBB is nullptr?");
2666 if (Region.PrevBB == InitialStart) {
2667 BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
2668 Instruction *BI = NewPrev->getTerminator();
2669 BI->eraseFromParent();
2670 moveBBContents(SourceBB&: *InitialStart, TargetBB&: *NewPrev);
2671 Region.PrevBB = NewPrev;
2672 InitialStart->eraseFromParent();
2673 }
2674
2675 Region.StartBB = RewrittenBB;
2676 Region.EndBB = RewrittenBB;
2677
2678 // The sequences of outlinable regions has now changed. We must fix the
2679 // IRInstructionDataList for consistency. Although they may not be illegal
2680 // instructions, they should not be compared with anything else as they
2681 // should not be outlined in this round. So marking these as illegal is
2682 // allowed.
2683 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2684 Instruction *BeginRewritten = &*RewrittenBB->begin();
2685 Instruction *EndRewritten = &*RewrittenBB->begin();
2686 Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
2687 *BeginRewritten, InstructionClassifier.visit(I&: *BeginRewritten), *IDL);
2688 Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
2689 *EndRewritten, InstructionClassifier.visit(I&: *EndRewritten), *IDL);
2690
2691 // Insert the first IRInstructionData of the new region in front of the
2692 // first IRInstructionData of the IRSimilarityCandidate.
2693 IDL->insert(I: Region.Candidate->begin(), Node&: *Region.NewFront);
2694 // Insert the first IRInstructionData of the new region after the
2695 // last IRInstructionData of the IRSimilarityCandidate.
2696 IDL->insert(I: Region.Candidate->end(), Node&: *Region.NewBack);
2697 // Remove the IRInstructionData from the IRSimilarityCandidate.
2698 IDL->erase(First: Region.Candidate->begin(), Last: std::prev(x: Region.Candidate->end()));
2699
2700 assert(RewrittenBB != nullptr &&
2701 "Could not find a predecessor after extraction!");
2702
2703 // Iterate over the new set of instructions to find the new call
2704 // instruction.
2705 for (Instruction &I : *RewrittenBB)
2706 if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
2707 if (Region.ExtractedFunction == CI->getCalledFunction())
2708 Region.Call = CI;
2709 } else if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I))
2710 updateOutputMapping(Region, Outputs: Outputs.getArrayRef(), LI);
2711 Region.reattachCandidate();
2712 return true;
2713}
2714
2715unsigned IROutliner::doOutline(Module &M) {
2716 // Find the possible similarity sections.
2717 InstructionClassifier.EnableBranches = !DisableBranches;
2718 InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
2719 InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
2720
2721 IRSimilarityIdentifier &Identifier = getIRSI(M);
2722 SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
2723
2724 // Sort them by size of extracted sections
2725 unsigned OutlinedFunctionNum = 0;
2726 // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
2727 // to sort them by the potential number of instructions to be outlined
2728 if (SimilarityCandidates.size() > 1)
2729 llvm::stable_sort(Range&: SimilarityCandidates,
2730 C: [](const std::vector<IRSimilarityCandidate> &LHS,
2731 const std::vector<IRSimilarityCandidate> &RHS) {
2732 return LHS[0].getLength() * LHS.size() >
2733 RHS[0].getLength() * RHS.size();
2734 });
2735 // Creating OutlinableGroups for each SimilarityCandidate to be used in
2736 // each of the following for loops to avoid making an allocator.
2737 std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2738
2739 DenseSet<unsigned> NotSame;
2740 std::vector<OutlinableGroup *> NegativeCostGroups;
2741 std::vector<OutlinableRegion *> OutlinedRegions;
2742 // Iterate over the possible sets of similarity.
2743 unsigned PotentialGroupIdx = 0;
2744 for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2745 OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2746
2747 // Remove entries that were previously outlined
2748 pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2749
2750 // We pruned the number of regions to 0 to 1, meaning that it's not worth
2751 // trying to outlined since there is no compatible similar instance of this
2752 // code.
2753 if (CurrentGroup.Regions.size() < 2)
2754 continue;
2755
2756 // Determine if there are any values that are the same constant throughout
2757 // each section in the set.
2758 NotSame.clear();
2759 CurrentGroup.findSameConstants(NotSame);
2760
2761 if (CurrentGroup.IgnoreGroup)
2762 continue;
2763
2764 // Create a CodeExtractor for each outlinable region. Identify inputs and
2765 // outputs for each section using the code extractor and create the argument
2766 // types for the Aggregate Outlining Function.
2767 OutlinedRegions.clear();
2768 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2769 // Break the outlinable region out of its parent BasicBlock into its own
2770 // BasicBlocks (see function implementation).
2771 OS->splitCandidate();
2772
2773 // There's a chance that when the region is split, extra instructions are
2774 // added to the region. This makes the region no longer viable
2775 // to be split, so we ignore it for outlining.
2776 if (!OS->CandidateSplit)
2777 continue;
2778
2779 SmallVector<BasicBlock *> BE;
2780 DenseSet<BasicBlock *> BlocksInRegion;
2781 OS->Candidate->getBasicBlocks(BBSet&: BlocksInRegion, BBList&: BE);
2782 OS->CE = new (ExtractorAllocator.Allocate())
2783 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2784 false, nullptr, {}, "outlined");
2785 findAddInputsOutputs(M, Region&: *OS, NotSame);
2786 if (!OS->IgnoreRegion)
2787 OutlinedRegions.push_back(x: OS);
2788
2789 // We recombine the blocks together now that we have gathered all the
2790 // needed information.
2791 OS->reattachCandidate();
2792 }
2793
2794 CurrentGroup.Regions = std::move(OutlinedRegions);
2795
2796 if (CurrentGroup.Regions.empty())
2797 continue;
2798
2799 CurrentGroup.collectGVNStoreSets(M);
2800
2801 if (CostModel)
2802 findCostBenefit(M, CurrentGroup);
2803
2804 // If we are adhering to the cost model, skip those groups where the cost
2805 // outweighs the benefits.
2806 if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2807 OptimizationRemarkEmitter &ORE =
2808 getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2809 ORE.emit(RemarkBuilder: [&]() {
2810 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2811 OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2812 C->frontInstruction());
2813 R << "did not outline "
2814 << ore::NV(std::to_string(val: CurrentGroup.Regions.size()))
2815 << " regions due to estimated increase of "
2816 << ore::NV("InstructionIncrease",
2817 CurrentGroup.Cost - CurrentGroup.Benefit)
2818 << " instructions at locations ";
2819 interleave(
2820 begin: CurrentGroup.Regions.begin(), end: CurrentGroup.Regions.end(),
2821 each_fn: [&R](OutlinableRegion *Region) {
2822 R << ore::NV(
2823 "DebugLoc",
2824 Region->Candidate->frontInstruction()->getDebugLoc());
2825 },
2826 between_fn: [&R]() { R << " "; });
2827 return R;
2828 });
2829 continue;
2830 }
2831
2832 NegativeCostGroups.push_back(x: &CurrentGroup);
2833 }
2834
2835 ExtractorAllocator.DestroyAll();
2836
2837 if (NegativeCostGroups.size() > 1)
2838 stable_sort(Range&: NegativeCostGroups,
2839 C: [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2840 return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2841 });
2842
2843 std::vector<Function *> FuncsToRemove;
2844 for (OutlinableGroup *CG : NegativeCostGroups) {
2845 OutlinableGroup &CurrentGroup = *CG;
2846
2847 OutlinedRegions.clear();
2848 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2849 // We check whether our region is compatible with what has already been
2850 // outlined, and whether we need to ignore this item.
2851 if (!isCompatibleWithAlreadyOutlinedCode(Region: *Region))
2852 continue;
2853 OutlinedRegions.push_back(x: Region);
2854 }
2855
2856 if (OutlinedRegions.size() < 2)
2857 continue;
2858
2859 // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2860 // we are still outlining enough regions to make up for the added cost.
2861 CurrentGroup.Regions = std::move(OutlinedRegions);
2862 if (CostModel) {
2863 CurrentGroup.Benefit = 0;
2864 CurrentGroup.Cost = 0;
2865 findCostBenefit(M, CurrentGroup);
2866 if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2867 continue;
2868 }
2869 OutlinedRegions.clear();
2870 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2871 Region->splitCandidate();
2872 if (!Region->CandidateSplit)
2873 continue;
2874 OutlinedRegions.push_back(x: Region);
2875 }
2876
2877 CurrentGroup.Regions = std::move(OutlinedRegions);
2878 if (CurrentGroup.Regions.size() < 2) {
2879 for (OutlinableRegion *R : CurrentGroup.Regions)
2880 R->reattachCandidate();
2881 continue;
2882 }
2883
2884 LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2885 << " and benefit " << CurrentGroup.Benefit << "\n");
2886
2887 // Create functions out of all the sections, and mark them as outlined.
2888 OutlinedRegions.clear();
2889 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2890 SmallVector<BasicBlock *> BE;
2891 DenseSet<BasicBlock *> BlocksInRegion;
2892 OS->Candidate->getBasicBlocks(BBSet&: BlocksInRegion, BBList&: BE);
2893 OS->CE = new (ExtractorAllocator.Allocate())
2894 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2895 false, nullptr, {}, "outlined");
2896 bool FunctionOutlined = extractSection(Region&: *OS);
2897 if (FunctionOutlined) {
2898 unsigned StartIdx = OS->Candidate->getStartIdx();
2899 unsigned EndIdx = OS->Candidate->getEndIdx();
2900 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2901 Outlined.insert(V: Idx);
2902
2903 OutlinedRegions.push_back(x: OS);
2904 }
2905 }
2906
2907 LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2908 << " with benefit " << CurrentGroup.Benefit
2909 << " and cost " << CurrentGroup.Cost << "\n");
2910
2911 CurrentGroup.Regions = std::move(OutlinedRegions);
2912
2913 if (CurrentGroup.Regions.empty())
2914 continue;
2915
2916 OptimizationRemarkEmitter &ORE =
2917 getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2918 ORE.emit(RemarkBuilder: [&]() {
2919 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2920 OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2921 R << "outlined " << ore::NV(std::to_string(val: CurrentGroup.Regions.size()))
2922 << " regions with decrease of "
2923 << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2924 << " instructions at locations ";
2925 interleave(
2926 begin: CurrentGroup.Regions.begin(), end: CurrentGroup.Regions.end(),
2927 each_fn: [&R](OutlinableRegion *Region) {
2928 R << ore::NV("DebugLoc",
2929 Region->Candidate->frontInstruction()->getDebugLoc());
2930 },
2931 between_fn: [&R]() { R << " "; });
2932 return R;
2933 });
2934
2935 deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
2936 OutlinedFunctionNum);
2937 }
2938
2939 for (Function *F : FuncsToRemove)
2940 F->eraseFromParent();
2941
2942 return OutlinedFunctionNum;
2943}
2944
2945bool IROutliner::run(Module &M) {
2946 CostModel = !NoCostModel;
2947 OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
2948
2949 return doOutline(M) > 0;
2950}
2951
2952PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
2953 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
2954
2955 std::function<TargetTransformInfo &(Function &)> GTTI =
2956 [&FAM](Function &F) -> TargetTransformInfo & {
2957 return FAM.getResult<TargetIRAnalysis>(IR&: F);
2958 };
2959
2960 std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
2961 [&AM](Module &M) -> IRSimilarityIdentifier & {
2962 return AM.getResult<IRSimilarityAnalysis>(IR&: M);
2963 };
2964
2965 std::unique_ptr<OptimizationRemarkEmitter> ORE;
2966 std::function<OptimizationRemarkEmitter &(Function &)> GORE =
2967 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2968 ORE.reset(p: new OptimizationRemarkEmitter(&F));
2969 return *ORE;
2970 };
2971
2972 if (IROutliner(GTTI, GIRSI, GORE).run(M))
2973 return PreservedAnalyses::none();
2974 return PreservedAnalyses::all();
2975}
2976