1//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10// Implementation for the IROutliner which is used by the IROutliner Pass.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/IPO/IROutliner.h"
15#include "llvm/Analysis/IRSimilarityIdentifier.h"
16#include "llvm/Analysis/OptimizationRemarkEmitter.h"
17#include "llvm/Analysis/TargetTransformInfo.h"
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/DIBuilder.h"
20#include "llvm/IR/DebugInfo.h"
21#include "llvm/IR/DebugInfoMetadata.h"
22#include "llvm/IR/Dominators.h"
23#include "llvm/IR/Mangler.h"
24#include "llvm/IR/PassManager.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Transforms/IPO.h"
27#include "llvm/Transforms/Utils/ValueMapper.h"
28#include <optional>
29#include <vector>
30
31#define DEBUG_TYPE "iroutliner"
32
33using namespace llvm;
34using namespace IRSimilarity;
35
36// A command flag to be used for debugging to exclude branches from similarity
37// matching and outlining.
38namespace llvm {
39extern cl::opt<bool> DisableBranches;
40
41// A command flag to be used for debugging to indirect calls from similarity
42// matching and outlining.
43extern cl::opt<bool> DisableIndirectCalls;
44
45// A command flag to be used for debugging to exclude intrinsics from similarity
46// matching and outlining.
47extern cl::opt<bool> DisableIntrinsics;
48
49} // namespace llvm
50
51// Set to true if the user wants the ir outliner to run on linkonceodr linkage
52// functions. This is false by default because the linker can dedupe linkonceodr
53// functions. Since the outliner is confined to a single module (modulo LTO),
54// this is off by default. It should, however, be the default behavior in
55// LTO.
56static cl::opt<bool> EnableLinkOnceODRIROutlining(
57 "enable-linkonceodr-ir-outlining", cl::Hidden,
58 cl::desc("Enable the IR outliner on linkonceodr functions"),
59 cl::init(Val: false));
60
61// This is a debug option to test small pieces of code to ensure that outlining
62// works correctly.
63static cl::opt<bool> NoCostModel(
64 "ir-outlining-no-cost", cl::init(Val: false), cl::ReallyHidden,
65 cl::desc("Debug option to outline greedily, without restriction that "
66 "calculated benefit outweighs cost"));
67
68/// The OutlinableGroup holds all the overarching information for outlining
69/// a set of regions that are structurally similar to one another, such as the
70/// types of the overall function, the output blocks, the sets of stores needed
71/// and a list of the different regions. This information is used in the
72/// deduplication of extracted regions with the same structure.
73struct OutlinableGroup {
74 /// The sections that could be outlined
75 std::vector<OutlinableRegion *> Regions;
76
77 /// The argument types for the function created as the overall function to
78 /// replace the extracted function for each region.
79 std::vector<Type *> ArgumentTypes;
80 /// The FunctionType for the overall function.
81 FunctionType *OutlinedFunctionType = nullptr;
82 /// The Function for the collective overall function.
83 Function *OutlinedFunction = nullptr;
84
85 /// Flag for whether we should not consider this group of OutlinableRegions
86 /// for extraction.
87 bool IgnoreGroup = false;
88
89 /// The return blocks for the overall function.
90 DenseMap<Value *, BasicBlock *> EndBBs;
91
92 /// The PHIBlocks with their corresponding return block based on the return
93 /// value as the key.
94 DenseMap<Value *, BasicBlock *> PHIBlocks;
95
96 /// A set containing the different GVN store sets needed. Each array contains
97 /// a sorted list of the different values that need to be stored into output
98 /// registers.
99 DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
100
101 /// Flag for whether the \ref ArgumentTypes have been defined after the
102 /// extraction of the first region.
103 bool InputTypesSet = false;
104
105 /// The number of input values in \ref ArgumentTypes. Anything after this
106 /// index in ArgumentTypes is an output argument.
107 unsigned NumAggregateInputs = 0;
108
109 /// The mapping of the canonical numbering of the values in outlined sections
110 /// to specific arguments.
111 DenseMap<unsigned, unsigned> CanonicalNumberToAggArg;
112
113 /// The number of branches in the region target a basic block that is outside
114 /// of the region.
115 unsigned BranchesToOutside = 0;
116
117 /// Tracker counting backwards from the highest unsigned value possible to
118 /// avoid conflicting with the GVNs of assigned values. We start at -3 since
119 /// -2 and -1 are assigned by the DenseMap.
120 unsigned PHINodeGVNTracker = -3;
121
122 DenseMap<unsigned,
123 std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
124 PHINodeGVNToGVNs;
125 DenseMap<hash_code, unsigned> GVNsToPHINodeGVN;
126
127 /// The number of instructions that will be outlined by extracting \ref
128 /// Regions.
129 InstructionCost Benefit = 0;
130 /// The number of added instructions needed for the outlining of the \ref
131 /// Regions.
132 InstructionCost Cost = 0;
133
134 /// The argument that needs to be marked with the swifterr attribute. If not
135 /// needed, there is no value.
136 std::optional<unsigned> SwiftErrorArgument;
137
138 /// For the \ref Regions, we look at every Value. If it is a constant,
139 /// we check whether it is the same in Region.
140 ///
141 /// \param [in,out] NotSame contains the global value numbers where the
142 /// constant is not always the same, and must be passed in as an argument.
143 void findSameConstants(DenseSet<unsigned> &NotSame);
144
145 /// For the regions, look at each set of GVN stores needed and account for
146 /// each combination. Add an argument to the argument types if there is
147 /// more than one combination.
148 ///
149 /// \param [in] M - The module we are outlining from.
150 void collectGVNStoreSets(Module &M);
151};
152
153/// Move the contents of \p SourceBB to before the last instruction of \p
154/// TargetBB.
155/// \param SourceBB - the BasicBlock to pull Instructions from.
156/// \param TargetBB - the BasicBlock to put Instruction into.
157static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
158 TargetBB.splice(ToIt: TargetBB.end(), FromBB: &SourceBB);
159}
160
161/// A function to sort the keys of \p Map, which must be a mapping of constant
162/// values to basic blocks and return it in \p SortedKeys
163///
164/// \param SortedKeys - The vector the keys will be return in and sorted.
165/// \param Map - The DenseMap containing keys to sort.
166static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
167 DenseMap<Value *, BasicBlock *> &Map) {
168 for (auto &VtoBB : Map)
169 SortedKeys.push_back(x: VtoBB.first);
170
171 // Here we expect to have either 1 value that is void (nullptr) or multiple
172 // values that are all constant integers.
173 if (SortedKeys.size() == 1) {
174 assert(!SortedKeys[0] && "Expected a single void value.");
175 return;
176 }
177
178 stable_sort(Range&: SortedKeys, C: [](const Value *LHS, const Value *RHS) {
179 assert(LHS && RHS && "Expected non void values.");
180 const ConstantInt *LHSC = cast<ConstantInt>(Val: LHS);
181 const ConstantInt *RHSC = cast<ConstantInt>(Val: RHS);
182
183 return LHSC->getLimitedValue() < RHSC->getLimitedValue();
184 });
185}
186
187Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
188 Value *V) {
189 std::optional<unsigned> GVN = Candidate->getGVN(V);
190 assert(GVN && "No GVN for incoming value");
191 std::optional<unsigned> CanonNum = Candidate->getCanonicalNum(N: *GVN);
192 std::optional<unsigned> FirstGVN =
193 Other.Candidate->fromCanonicalNum(N: *CanonNum);
194 std::optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(Num: *FirstGVN);
195 return FoundValueOpt.value_or(u: nullptr);
196}
197
198BasicBlock *
199OutlinableRegion::findCorrespondingBlockIn(const OutlinableRegion &Other,
200 BasicBlock *BB) {
201 Instruction *FirstNonPHI = &*BB->getFirstNonPHIOrDbg();
202 assert(FirstNonPHI && "block is empty?");
203 Value *CorrespondingVal = findCorrespondingValueIn(Other, V: FirstNonPHI);
204 if (!CorrespondingVal)
205 return nullptr;
206 BasicBlock *CorrespondingBlock =
207 cast<Instruction>(Val: CorrespondingVal)->getParent();
208 return CorrespondingBlock;
209}
210
211/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
212/// in \p Included to branch to BasicBlock \p Replace if they currently branch
213/// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks
214/// when PHINodes are included in outlined regions.
215///
216/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
217/// checked.
218/// \param Find - The successor block to be replaced.
219/// \param Replace - The new succesor block to branch to.
220/// \param Included - The set of blocks about to be outlined.
221static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find,
222 BasicBlock *Replace,
223 DenseSet<BasicBlock *> &Included) {
224 for (PHINode &PN : PHIBlock->phis())
225 for (BasicBlock *Incoming : PN.blocks())
226 // Check if the incoming block is included in the set of blocks being
227 // outlined.
228 if (Included.contains(V: Incoming))
229 Incoming->getTerminator()->replaceSuccessorWith(OldBB: Find, NewBB: Replace);
230}
231
232
233void OutlinableRegion::splitCandidate() {
234 assert(!CandidateSplit && "Candidate already split!");
235
236 Instruction *BackInst = Candidate->backInstruction();
237
238 Instruction *EndInst = nullptr;
239 // Check whether the last instruction is a terminator, if it is, we do
240 // not split on the following instruction. We leave the block as it is. We
241 // also check that this is not the last instruction in the Module, otherwise
242 // the check for whether the current following instruction matches the
243 // previously recorded instruction will be incorrect.
244 if (!BackInst->isTerminator() ||
245 BackInst->getParent() != &BackInst->getFunction()->back()) {
246 EndInst = Candidate->end()->Inst;
247 assert(EndInst && "Expected an end instruction?");
248 }
249
250 // We check if the current instruction following the last instruction in the
251 // region is the same as the recorded instruction following the last
252 // instruction. If they do not match, there could be problems in rewriting
253 // the program after outlining, so we ignore it.
254 if (!BackInst->isTerminator() && EndInst != BackInst->getNextNode())
255 return;
256
257 Instruction *StartInst = (*Candidate->begin()).Inst;
258 assert(StartInst && "Expected a start instruction?");
259 StartBB = StartInst->getParent();
260 PrevBB = StartBB;
261
262 DenseSet<BasicBlock *> BBSet;
263 Candidate->getBasicBlocks(BBSet);
264
265 // We iterate over the instructions in the region, if we find a PHINode, we
266 // check if there are predecessors outside of the region, if there are,
267 // we ignore this region since we are unable to handle the severing of the
268 // phi node right now.
269
270 // TODO: Handle extraneous inputs for PHINodes through variable number of
271 // inputs, similar to how outputs are handled.
272 BasicBlock::iterator It = StartInst->getIterator();
273 EndBB = BackInst->getParent();
274 BasicBlock *IBlock;
275 BasicBlock *PHIPredBlock = nullptr;
276 bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
277 while (PHINode *PN = dyn_cast<PHINode>(Val: &*It)) {
278 unsigned NumPredsOutsideRegion = 0;
279 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
280 if (!BBSet.contains(V: PN->getIncomingBlock(i))) {
281 PHIPredBlock = PN->getIncomingBlock(i);
282 ++NumPredsOutsideRegion;
283 continue;
284 }
285
286 // We must consider the case there the incoming block to the PHINode is
287 // the same as the final block of the OutlinableRegion. If this is the
288 // case, the branch from this block must also be outlined to be valid.
289 IBlock = PN->getIncomingBlock(i);
290 if (IBlock == EndBB && EndBBTermAndBackInstDifferent) {
291 PHIPredBlock = PN->getIncomingBlock(i);
292 ++NumPredsOutsideRegion;
293 }
294 }
295
296 if (NumPredsOutsideRegion > 1)
297 return;
298
299 It++;
300 }
301
302 // If the region starts with a PHINode, but is not the initial instruction of
303 // the BasicBlock, we ignore this region for now.
304 if (isa<PHINode>(Val: StartInst) && StartInst != &*StartBB->begin())
305 return;
306
307 // If the region ends with a PHINode, but does not contain all of the phi node
308 // instructions of the region, we ignore it for now.
309 if (isa<PHINode>(Val: BackInst) &&
310 BackInst != &*std::prev(x: EndBB->getFirstInsertionPt()))
311 return;
312
313 // The basic block gets split like so:
314 // block: block:
315 // inst1 inst1
316 // inst2 inst2
317 // region1 br block_to_outline
318 // region2 block_to_outline:
319 // region3 -> region1
320 // region4 region2
321 // inst3 region3
322 // inst4 region4
323 // br block_after_outline
324 // block_after_outline:
325 // inst3
326 // inst4
327
328 std::string OriginalName = PrevBB->getName().str();
329
330 StartBB = PrevBB->splitBasicBlock(I: StartInst, BBName: OriginalName + "_to_outline");
331 PrevBB->replaceSuccessorsPhiUsesWith(Old: PrevBB, New: StartBB);
332 // If there was a PHINode with an incoming block outside the region,
333 // make sure is correctly updated in the newly split block.
334 if (PHIPredBlock)
335 PrevBB->replaceSuccessorsPhiUsesWith(Old: PHIPredBlock, New: PrevBB);
336
337 CandidateSplit = true;
338 if (!BackInst->isTerminator()) {
339 EndBB = EndInst->getParent();
340 FollowBB = EndBB->splitBasicBlock(I: EndInst, BBName: OriginalName + "_after_outline");
341 EndBB->replaceSuccessorsPhiUsesWith(Old: EndBB, New: FollowBB);
342 FollowBB->replaceSuccessorsPhiUsesWith(Old: PrevBB, New: FollowBB);
343 } else {
344 EndBB = BackInst->getParent();
345 EndsInBranch = true;
346 FollowBB = nullptr;
347 }
348
349 // Refind the basic block set.
350 BBSet.clear();
351 Candidate->getBasicBlocks(BBSet);
352 // For the phi nodes in the new starting basic block of the region, we
353 // reassign the targets of the basic blocks branching instructions.
354 replaceTargetsFromPHINode(PHIBlock: StartBB, Find: PrevBB, Replace: StartBB, Included&: BBSet);
355 if (FollowBB)
356 replaceTargetsFromPHINode(PHIBlock: FollowBB, Find: EndBB, Replace: FollowBB, Included&: BBSet);
357}
358
359void OutlinableRegion::reattachCandidate() {
360 assert(CandidateSplit && "Candidate is not split!");
361
362 // The basic block gets reattached like so:
363 // block: block:
364 // inst1 inst1
365 // inst2 inst2
366 // br block_to_outline region1
367 // block_to_outline: -> region2
368 // region1 region3
369 // region2 region4
370 // region3 inst3
371 // region4 inst4
372 // br block_after_outline
373 // block_after_outline:
374 // inst3
375 // inst4
376 assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
377
378 assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
379 // Make sure PHINode references to the block we are merging into are
380 // updated to be incoming blocks from the predecessor to the current block.
381
382 // NOTE: If this is updated such that the outlined block can have more than
383 // one incoming block to a PHINode, this logic will have to updated
384 // to handle multiple precessors instead.
385
386 // We only need to update this if the outlined section contains a PHINode, if
387 // it does not, then the incoming block was never changed in the first place.
388 // On the other hand, if PrevBB has no predecessors, it means that all
389 // incoming blocks to the first block are contained in the region, and there
390 // will be nothing to update.
391 Instruction *StartInst = (*Candidate->begin()).Inst;
392 if (isa<PHINode>(Val: StartInst) && !PrevBB->hasNPredecessors(N: 0)) {
393 assert(!PrevBB->hasNPredecessorsOrMore(2) &&
394 "PrevBB has more than one predecessor. Should be 0 or 1.");
395 BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor();
396 PrevBB->replaceSuccessorsPhiUsesWith(Old: PrevBB, New: BeforePrevBB);
397 }
398 PrevBB->getTerminator()->eraseFromParent();
399
400 // If we reattaching after outlining, we iterate over the phi nodes to
401 // the initial block, and reassign the branch instructions of the incoming
402 // blocks to the block we are remerging into.
403 if (!ExtractedFunction) {
404 DenseSet<BasicBlock *> BBSet;
405 Candidate->getBasicBlocks(BBSet);
406
407 replaceTargetsFromPHINode(PHIBlock: StartBB, Find: StartBB, Replace: PrevBB, Included&: BBSet);
408 if (!EndsInBranch)
409 replaceTargetsFromPHINode(PHIBlock: FollowBB, Find: FollowBB, Replace: EndBB, Included&: BBSet);
410 }
411
412 moveBBContents(SourceBB&: *StartBB, TargetBB&: *PrevBB);
413
414 BasicBlock *PlacementBB = PrevBB;
415 if (StartBB != EndBB)
416 PlacementBB = EndBB;
417 if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
418 assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
419 assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
420 PlacementBB->getTerminator()->eraseFromParent();
421 moveBBContents(SourceBB&: *FollowBB, TargetBB&: *PlacementBB);
422 PlacementBB->replaceSuccessorsPhiUsesWith(Old: FollowBB, New: PlacementBB);
423 FollowBB->eraseFromParent();
424 }
425
426 PrevBB->replaceSuccessorsPhiUsesWith(Old: StartBB, New: PrevBB);
427 StartBB->eraseFromParent();
428
429 // Make sure to save changes back to the StartBB.
430 StartBB = PrevBB;
431 EndBB = nullptr;
432 PrevBB = nullptr;
433 FollowBB = nullptr;
434
435 CandidateSplit = false;
436}
437
438/// Find whether \p V matches the Constants previously found for the \p GVN.
439///
440/// \param V - The value to check for consistency.
441/// \param GVN - The global value number assigned to \p V.
442/// \param GVNToConstant - The mapping of global value number to Constants.
443/// \returns true if the Value matches the Constant mapped to by V and false if
444/// it \p V is a Constant but does not match.
445/// \returns std::nullopt if \p V is not a Constant.
446static std::optional<bool>
447constantMatches(Value *V, unsigned GVN,
448 DenseMap<unsigned, Constant *> &GVNToConstant) {
449 // See if we have a constants
450 Constant *CST = dyn_cast<Constant>(Val: V);
451 if (!CST)
452 return std::nullopt;
453
454 // Holds a mapping from a global value number to a Constant.
455 DenseMap<unsigned, Constant *>::iterator GVNToConstantIt;
456 bool Inserted;
457
458
459 // If we have a constant, try to make a new entry in the GVNToConstant.
460 std::tie(args&: GVNToConstantIt, args&: Inserted) =
461 GVNToConstant.insert(KV: std::make_pair(x&: GVN, y&: CST));
462 // If it was found and is not equal, it is not the same. We do not
463 // handle this case yet, and exit early.
464 if (Inserted || (GVNToConstantIt->second == CST))
465 return true;
466
467 return false;
468}
469
470InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
471 InstructionCost Benefit = 0;
472
473 // Estimate the benefit of outlining a specific sections of the program. We
474 // delegate mostly this task to the TargetTransformInfo so that if the target
475 // has specific changes, we can have a more accurate estimate.
476
477 // However, getInstructionCost delegates the code size calculation for
478 // arithmetic instructions to getArithmeticInstrCost in
479 // include/Analysis/TargetTransformImpl.h, where it always estimates that the
480 // code size for a division and remainder instruction to be equal to 4, and
481 // everything else to 1. This is not an accurate representation of the
482 // division instruction for targets that have a native division instruction.
483 // To be overly conservative, we only add 1 to the number of instructions for
484 // each division instruction.
485 for (IRInstructionData &ID : *Candidate) {
486 Instruction *I = ID.Inst;
487 switch (I->getOpcode()) {
488 case Instruction::FDiv:
489 case Instruction::FRem:
490 case Instruction::SDiv:
491 case Instruction::SRem:
492 case Instruction::UDiv:
493 case Instruction::URem:
494 Benefit += 1;
495 break;
496 default:
497 Benefit += TTI.getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_CodeSize);
498 break;
499 }
500 }
501
502 return Benefit;
503}
504
505/// Check the \p OutputMappings structure for value \p Input, if it exists
506/// it has been used as an output for outlining, and has been renamed, and we
507/// return the new value, otherwise, we return the same value.
508///
509/// \param OutputMappings [in] - The mapping of values to their renamed value
510/// after being used as an output for an outlined region.
511/// \param Input [in] - The value to find the remapped value of, if it exists.
512/// \return The remapped value if it has been renamed, and the same value if has
513/// not.
514static Value *findOutputMapping(const DenseMap<Value *, Value *> OutputMappings,
515 Value *Input) {
516 DenseMap<Value *, Value *>::const_iterator OutputMapping =
517 OutputMappings.find(Val: Input);
518 if (OutputMapping != OutputMappings.end())
519 return OutputMapping->second;
520 return Input;
521}
522
523/// Find whether \p Region matches the global value numbering to Constant
524/// mapping found so far.
525///
526/// \param Region - The OutlinableRegion we are checking for constants
527/// \param GVNToConstant - The mapping of global value number to Constants.
528/// \param NotSame - The set of global value numbers that do not have the same
529/// constant in each region.
530/// \returns true if all Constants are the same in every use of a Constant in \p
531/// Region and false if not
532static bool
533collectRegionsConstants(OutlinableRegion &Region,
534 DenseMap<unsigned, Constant *> &GVNToConstant,
535 DenseSet<unsigned> &NotSame) {
536 bool ConstantsTheSame = true;
537
538 IRSimilarityCandidate &C = *Region.Candidate;
539 for (IRInstructionData &ID : C) {
540
541 // Iterate over the operands in an instruction. If the global value number,
542 // assigned by the IRSimilarityCandidate, has been seen before, we check if
543 // the number has been found to be not the same value in each instance.
544 for (Value *V : ID.OperVals) {
545 std::optional<unsigned> GVNOpt = C.getGVN(V);
546 assert(GVNOpt && "Expected a GVN for operand?");
547 unsigned GVN = *GVNOpt;
548
549 // Check if this global value has been found to not be the same already.
550 if (NotSame.contains(V: GVN)) {
551 if (isa<Constant>(Val: V))
552 ConstantsTheSame = false;
553 continue;
554 }
555
556 // If it has been the same so far, we check the value for if the
557 // associated Constant value match the previous instances of the same
558 // global value number. If the global value does not map to a Constant,
559 // it is considered to not be the same value.
560 std::optional<bool> ConstantMatches =
561 constantMatches(V, GVN, GVNToConstant);
562 if (ConstantMatches) {
563 if (*ConstantMatches)
564 continue;
565 else
566 ConstantsTheSame = false;
567 }
568
569 // While this value is a register, it might not have been previously,
570 // make sure we don't already have a constant mapped to this global value
571 // number.
572 if (GVNToConstant.contains(Val: GVN))
573 ConstantsTheSame = false;
574
575 NotSame.insert(V: GVN);
576 }
577 }
578
579 return ConstantsTheSame;
580}
581
582void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
583 DenseMap<unsigned, Constant *> GVNToConstant;
584
585 for (OutlinableRegion *Region : Regions)
586 collectRegionsConstants(Region&: *Region, GVNToConstant, NotSame);
587}
588
589void OutlinableGroup::collectGVNStoreSets(Module &M) {
590 for (OutlinableRegion *OS : Regions)
591 OutputGVNCombinations.insert(V: OS->GVNStores);
592
593 // We are adding an extracted argument to decide between which output path
594 // to use in the basic block. It is used in a switch statement and only
595 // needs to be an integer.
596 if (OutputGVNCombinations.size() > 1)
597 ArgumentTypes.push_back(x: Type::getInt32Ty(C&: M.getContext()));
598}
599
600/// Get the subprogram if it exists for one of the outlined regions.
601///
602/// \param [in] Group - The set of regions to find a subprogram for.
603/// \returns the subprogram if it exists, or nullptr.
604static DISubprogram *getSubprogramOrNull(OutlinableGroup &Group) {
605 for (OutlinableRegion *OS : Group.Regions)
606 if (Function *F = OS->Call->getFunction())
607 if (DISubprogram *SP = F->getSubprogram())
608 return SP;
609
610 return nullptr;
611}
612
613Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
614 unsigned FunctionNameSuffix) {
615 assert(!Group.OutlinedFunction && "Function is already defined!");
616
617 Type *RetTy = Type::getVoidTy(C&: M.getContext());
618 // All extracted functions _should_ have the same return type at this point
619 // since the similarity identifier ensures that all branches outside of the
620 // region occur in the same place.
621
622 // NOTE: Should we ever move to the model that uses a switch at every point
623 // needed, meaning that we could branch within the region or out, it is
624 // possible that we will need to switch to using the most general case all of
625 // the time.
626 for (OutlinableRegion *R : Group.Regions) {
627 Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
628 if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
629 (RetTy->isIntegerTy(Bitwidth: 1) && ExtractedFuncType->isIntegerTy(Bitwidth: 16)))
630 RetTy = ExtractedFuncType;
631 }
632
633 Group.OutlinedFunctionType = FunctionType::get(
634 Result: RetTy, Params: Group.ArgumentTypes, isVarArg: false);
635
636 // These functions will only be called from within the same module, so
637 // we can set an internal linkage.
638 Group.OutlinedFunction = Function::Create(
639 Ty: Group.OutlinedFunctionType, Linkage: GlobalValue::InternalLinkage,
640 N: "outlined_ir_func_" + std::to_string(val: FunctionNameSuffix), M);
641
642 // Transfer the swifterr attribute to the correct function parameter.
643 if (Group.SwiftErrorArgument)
644 Group.OutlinedFunction->addParamAttr(ArgNo: *Group.SwiftErrorArgument,
645 Kind: Attribute::SwiftError);
646
647 Group.OutlinedFunction->addFnAttr(Kind: Attribute::OptimizeForSize);
648 Group.OutlinedFunction->addFnAttr(Kind: Attribute::MinSize);
649
650 // If there's a DISubprogram associated with this outlined function, then
651 // emit debug info for the outlined function.
652 if (DISubprogram *SP = getSubprogramOrNull(Group)) {
653 Function *F = Group.OutlinedFunction;
654 // We have a DISubprogram. Get its DICompileUnit.
655 DICompileUnit *CU = SP->getUnit();
656 DIBuilder DB(M, true, CU);
657 DIFile *Unit = SP->getFile();
658 Mangler Mg;
659 // Get the mangled name of the function for the linkage name.
660 std::string Dummy;
661 llvm::raw_string_ostream MangledNameStream(Dummy);
662 Mg.getNameWithPrefix(OS&: MangledNameStream, GV: F, CannotUsePrivateLabel: false);
663
664 DISubprogram *OutlinedSP = DB.createFunction(
665 Scope: Unit /* Context */, Name: F->getName(), LinkageName: Dummy, File: Unit /* File */,
666 LineNo: 0 /* Line 0 is reserved for compiler-generated code. */,
667 Ty: DB.createSubroutineType(ParameterTypes: DB.getOrCreateTypeArray(Elements: {})), /* void type */
668 ScopeLine: 0, /* Line 0 is reserved for compiler-generated code. */
669 Flags: DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
670 /* Outlined code is optimized code by definition. */
671 SPFlags: DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
672
673 // Attach subprogram to the function.
674 F->setSubprogram(OutlinedSP);
675 // We're done with the DIBuilder.
676 DB.finalize();
677 }
678
679 return Group.OutlinedFunction;
680}
681
682/// Move each BasicBlock in \p Old to \p New.
683///
684/// \param [in] Old - The function to move the basic blocks from.
685/// \param [in] New - The function to move the basic blocks to.
686/// \param [out] NewEnds - The return blocks of the new overall function.
687static void moveFunctionData(Function &Old, Function &New,
688 DenseMap<Value *, BasicBlock *> &NewEnds) {
689 for (BasicBlock &CurrBB : llvm::make_early_inc_range(Range&: Old)) {
690 CurrBB.removeFromParent();
691 CurrBB.insertInto(Parent: &New);
692 Instruction *I = CurrBB.getTerminator();
693
694 // For each block we find a return instruction is, it is a potential exit
695 // path for the function. We keep track of each block based on the return
696 // value here.
697 if (ReturnInst *RI = dyn_cast<ReturnInst>(Val: I))
698 NewEnds.insert(KV: std::make_pair(x: RI->getReturnValue(), y: &CurrBB));
699
700 for (Instruction &Val : CurrBB) {
701 // Since debug-info originates from many different locations in the
702 // program, it will cause incorrect reporting from a debugger if we keep
703 // the same debug instructions. Drop non-intrinsic DbgVariableRecords
704 // here, collect intrinsics for removal later.
705 Val.dropDbgRecords();
706
707 // We must handle the scoping of called functions differently than
708 // other outlined instructions.
709 if (!isa<CallInst>(Val: &Val)) {
710 // Remove the debug information for outlined functions.
711 Val.setDebugLoc(DebugLoc::getDropped());
712
713 // Loop info metadata may contain line locations. Update them to have no
714 // value in the new subprogram since the outlined code could be from
715 // several locations.
716 auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
717 if (DISubprogram *SP = New.getSubprogram())
718 if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD))
719 return DILocation::get(Context&: New.getContext(), Line: Loc->getLine(),
720 Column: Loc->getColumn(), Scope: SP, InlinedAt: nullptr);
721 return MD;
722 };
723 updateLoopMetadataDebugLocations(I&: Val, Updater: updateLoopInfoLoc);
724 continue;
725 }
726
727 // Edit the scope of called functions inside of outlined functions.
728 if (DISubprogram *SP = New.getSubprogram()) {
729 DILocation *DI = DILocation::get(Context&: New.getContext(), Line: 0, Column: 0, Scope: SP);
730 Val.setDebugLoc(DI);
731 }
732 }
733 }
734}
735
736/// Find the constants that will need to be lifted into arguments
737/// as they are not the same in each instance of the region.
738///
739/// \param [in] C - The IRSimilarityCandidate containing the region we are
740/// analyzing.
741/// \param [in] NotSame - The set of global value numbers that do not have a
742/// single Constant across all OutlinableRegions similar to \p C.
743/// \param [out] Inputs - The list containing the global value numbers of the
744/// arguments needed for the region of code.
745static void findConstants(IRSimilarityCandidate &C, DenseSet<unsigned> &NotSame,
746 std::vector<unsigned> &Inputs) {
747 DenseSet<unsigned> Seen;
748 // Iterate over the instructions, and find what constants will need to be
749 // extracted into arguments.
750 for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
751 IDIt != EndIDIt; IDIt++) {
752 for (Value *V : (*IDIt).OperVals) {
753 // Since these are stored before any outlining, they will be in the
754 // global value numbering.
755 unsigned GVN = *C.getGVN(V);
756 if (isa<Constant>(Val: V))
757 if (NotSame.contains(V: GVN) && Seen.insert(V: GVN).second)
758 Inputs.push_back(x: GVN);
759 }
760 }
761}
762
763/// Find the GVN for the inputs that have been found by the CodeExtractor.
764///
765/// \param [in] C - The IRSimilarityCandidate containing the region we are
766/// analyzing.
767/// \param [in] CurrentInputs - The set of inputs found by the
768/// CodeExtractor.
769/// \param [in] OutputMappings - The mapping of values that have been replaced
770/// by a new output value.
771/// \param [out] EndInputNumbers - The global value numbers for the extracted
772/// arguments.
773static void mapInputsToGVNs(IRSimilarityCandidate &C,
774 SetVector<Value *> &CurrentInputs,
775 const DenseMap<Value *, Value *> &OutputMappings,
776 std::vector<unsigned> &EndInputNumbers) {
777 // Get the Global Value Number for each input. We check if the Value has been
778 // replaced by a different value at output, and use the original value before
779 // replacement.
780 for (Value *Input : CurrentInputs) {
781 assert(Input && "Have a nullptr as an input");
782 auto It = OutputMappings.find(Val: Input);
783 if (It != OutputMappings.end())
784 Input = It->second;
785 assert(C.getGVN(Input) && "Could not find a numbering for the given input");
786 EndInputNumbers.push_back(x: *C.getGVN(V: Input));
787 }
788}
789
790/// Find the original value for the \p ArgInput values if any one of them was
791/// replaced during a previous extraction.
792///
793/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
794/// \param [in] OutputMappings - The mapping of values that have been replaced
795/// by a new output value.
796/// \param [out] RemappedArgInputs - The remapped values according to
797/// \p OutputMappings that will be extracted.
798static void
799remapExtractedInputs(const ArrayRef<Value *> ArgInputs,
800 const DenseMap<Value *, Value *> &OutputMappings,
801 SetVector<Value *> &RemappedArgInputs) {
802 // Get the global value number for each input that will be extracted as an
803 // argument by the code extractor, remapping if needed for reloaded values.
804 for (Value *Input : ArgInputs) {
805 auto It = OutputMappings.find(Val: Input);
806 if (It != OutputMappings.end())
807 Input = It->second;
808 RemappedArgInputs.insert(X: Input);
809 }
810}
811
812/// Find the input GVNs and the output values for a region of Instructions.
813/// Using the code extractor, we collect the inputs to the extracted function.
814///
815/// The \p Region can be identified as needing to be ignored in this function.
816/// It should be checked whether it should be ignored after a call to this
817/// function.
818///
819/// \param [in,out] Region - The region of code to be analyzed.
820/// \param [out] InputGVNs - The global value numbers for the extracted
821/// arguments.
822/// \param [in] NotSame - The global value numbers in the region that do not
823/// have the same constant value in the regions structurally similar to
824/// \p Region.
825/// \param [in] OutputMappings - The mapping of values that have been replaced
826/// by a new output value after extraction.
827/// \param [out] ArgInputs - The values of the inputs to the extracted function.
828/// \param [out] Outputs - The set of values extracted by the CodeExtractor
829/// as outputs.
830static void getCodeExtractorArguments(
831 OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
832 DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
833 SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
834 IRSimilarityCandidate &C = *Region.Candidate;
835
836 // OverallInputs are the inputs to the region found by the CodeExtractor,
837 // SinkCands and HoistCands are used by the CodeExtractor to find sunken
838 // allocas of values whose lifetimes are contained completely within the
839 // outlined region. PremappedInputs are the arguments found by the
840 // CodeExtractor, removing conditions such as sunken allocas, but that
841 // may need to be remapped due to the extracted output values replacing
842 // the original values. We use DummyOutputs for this first run of finding
843 // inputs and outputs since the outputs could change during findAllocas,
844 // the correct set of extracted outputs will be in the final Outputs ValueSet.
845 SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
846 DummyOutputs;
847
848 // Use the code extractor to get the inputs and outputs, without sunken
849 // allocas or removing llvm.assumes.
850 CodeExtractor *CE = Region.CE;
851 CE->findInputsOutputs(Inputs&: OverallInputs, Outputs&: DummyOutputs, Allocas: SinkCands);
852 assert(Region.StartBB && "Region must have a start BasicBlock!");
853 Function *OrigF = Region.StartBB->getParent();
854 CodeExtractorAnalysisCache CEAC(*OrigF);
855 BasicBlock *Dummy = nullptr;
856
857 // The region may be ineligible due to VarArgs in the parent function. In this
858 // case we ignore the region.
859 if (!CE->isEligible()) {
860 Region.IgnoreRegion = true;
861 return;
862 }
863
864 // Find if any values are going to be sunk into the function when extracted
865 CE->findAllocas(CEAC, SinkCands, HoistCands, ExitBlock&: Dummy);
866 CE->findInputsOutputs(Inputs&: PremappedInputs, Outputs, Allocas: SinkCands);
867
868 // TODO: Support regions with sunken allocas: values whose lifetimes are
869 // contained completely within the outlined region. These are not guaranteed
870 // to be the same in every region, so we must elevate them all to arguments
871 // when they appear. If these values are not equal, it means there is some
872 // Input in OverallInputs that was removed for ArgInputs.
873 if (OverallInputs.size() != PremappedInputs.size()) {
874 Region.IgnoreRegion = true;
875 return;
876 }
877
878 findConstants(C, NotSame, Inputs&: InputGVNs);
879
880 mapInputsToGVNs(C, CurrentInputs&: OverallInputs, OutputMappings, EndInputNumbers&: InputGVNs);
881
882 remapExtractedInputs(ArgInputs: PremappedInputs.getArrayRef(), OutputMappings,
883 RemappedArgInputs&: ArgInputs);
884
885 // Sort the GVNs, since we now have constants included in the \ref InputGVNs
886 // we need to make sure they are in a deterministic order.
887 stable_sort(Range&: InputGVNs);
888}
889
890/// Look over the inputs and map each input argument to an argument in the
891/// overall function for the OutlinableRegions. This creates a way to replace
892/// the arguments of the extracted function with the arguments of the new
893/// overall function.
894///
895/// \param [in,out] Region - The region of code to be analyzed.
896/// \param [in] InputGVNs - The global value numbering of the input values
897/// collected.
898/// \param [in] ArgInputs - The values of the arguments to the extracted
899/// function.
900static void
901findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
902 std::vector<unsigned> &InputGVNs,
903 SetVector<Value *> &ArgInputs) {
904
905 IRSimilarityCandidate &C = *Region.Candidate;
906 OutlinableGroup &Group = *Region.Parent;
907
908 // This counts the argument number in the overall function.
909 unsigned TypeIndex = 0;
910
911 // This counts the argument number in the extracted function.
912 unsigned OriginalIndex = 0;
913
914 // Find the mapping of the extracted arguments to the arguments for the
915 // overall function. Since there may be extra arguments in the overall
916 // function to account for the extracted constants, we have two different
917 // counters as we find extracted arguments, and as we come across overall
918 // arguments.
919
920 // Additionally, in our first pass, for the first extracted function,
921 // we find argument locations for the canonical value numbering. This
922 // numbering overrides any discovered location for the extracted code.
923 for (unsigned InputVal : InputGVNs) {
924 std::optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(N: InputVal);
925 assert(CanonicalNumberOpt && "Canonical number not found?");
926 unsigned CanonicalNumber = *CanonicalNumberOpt;
927
928 std::optional<Value *> InputOpt = C.fromGVN(Num: InputVal);
929 assert(InputOpt && "Global value number not found?");
930 Value *Input = *InputOpt;
931
932 DenseMap<unsigned, unsigned>::iterator AggArgIt =
933 Group.CanonicalNumberToAggArg.find(Val: CanonicalNumber);
934
935 if (!Group.InputTypesSet) {
936 Group.ArgumentTypes.push_back(x: Input->getType());
937 // If the input value has a swifterr attribute, make sure to mark the
938 // argument in the overall function.
939 if (Input->isSwiftError()) {
940 assert(
941 !Group.SwiftErrorArgument &&
942 "Argument already marked with swifterr for this OutlinableGroup!");
943 Group.SwiftErrorArgument = TypeIndex;
944 }
945 }
946
947 // Check if we have a constant. If we do add it to the overall argument
948 // number to Constant map for the region, and continue to the next input.
949 if (Constant *CST = dyn_cast<Constant>(Val: Input)) {
950 if (AggArgIt != Group.CanonicalNumberToAggArg.end())
951 Region.AggArgToConstant.insert(KV: std::make_pair(x&: AggArgIt->second, y&: CST));
952 else {
953 Group.CanonicalNumberToAggArg.insert(
954 KV: std::make_pair(x&: CanonicalNumber, y&: TypeIndex));
955 Region.AggArgToConstant.insert(KV: std::make_pair(x&: TypeIndex, y&: CST));
956 }
957 TypeIndex++;
958 continue;
959 }
960
961 // It is not a constant, we create the mapping from extracted argument list
962 // to the overall argument list, using the canonical location, if it exists.
963 assert(ArgInputs.count(Input) && "Input cannot be found!");
964
965 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
966 if (OriginalIndex != AggArgIt->second)
967 Region.ChangedArgOrder = true;
968 Region.ExtractedArgToAgg.insert(
969 KV: std::make_pair(x&: OriginalIndex, y&: AggArgIt->second));
970 Region.AggArgToExtracted.insert(
971 KV: std::make_pair(x&: AggArgIt->second, y&: OriginalIndex));
972 } else {
973 Group.CanonicalNumberToAggArg.insert(
974 KV: std::make_pair(x&: CanonicalNumber, y&: TypeIndex));
975 Region.ExtractedArgToAgg.insert(KV: std::make_pair(x&: OriginalIndex, y&: TypeIndex));
976 Region.AggArgToExtracted.insert(KV: std::make_pair(x&: TypeIndex, y&: OriginalIndex));
977 }
978 OriginalIndex++;
979 TypeIndex++;
980 }
981
982 // If the function type definitions for the OutlinableGroup holding the region
983 // have not been set, set the length of the inputs here. We should have the
984 // same inputs for all of the different regions contained in the
985 // OutlinableGroup since they are all structurally similar to one another.
986 if (!Group.InputTypesSet) {
987 Group.NumAggregateInputs = TypeIndex;
988 Group.InputTypesSet = true;
989 }
990
991 Region.NumExtractedInputs = OriginalIndex;
992}
993
994/// Check if the \p V has any uses outside of the region other than \p PN.
995///
996/// \param V [in] - The value to check.
997/// \param PHILoc [in] - The location in the PHINode of \p V.
998/// \param PN [in] - The PHINode using \p V.
999/// \param Exits [in] - The potential blocks we exit to from the outlined
1000/// region.
1001/// \param BlocksInRegion [in] - The basic blocks contained in the region.
1002/// \returns true if \p V has any use soutside its region other than \p PN.
1003static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
1004 SmallPtrSet<BasicBlock *, 1> &Exits,
1005 DenseSet<BasicBlock *> &BlocksInRegion) {
1006 // We check to see if the value is used by the PHINode from some other
1007 // predecessor not included in the region. If it is, we make sure
1008 // to keep it as an output.
1009 if (any_of(Range: llvm::seq<unsigned>(Begin: 0, End: PN.getNumIncomingValues()),
1010 P: [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
1011 return (Idx != PHILoc && V == PN.getIncomingValue(i: Idx) &&
1012 !BlocksInRegion.contains(V: PN.getIncomingBlock(i: Idx)));
1013 }))
1014 return true;
1015
1016 // Check if the value is used by any other instructions outside the region.
1017 return any_of(Range: V->users(), P: [&Exits, &BlocksInRegion](User *U) {
1018 Instruction *I = dyn_cast<Instruction>(Val: U);
1019 if (!I)
1020 return false;
1021
1022 // If the use of the item is inside the region, we skip it. Uses
1023 // inside the region give us useful information about how the item could be
1024 // used as an output.
1025 BasicBlock *Parent = I->getParent();
1026 if (BlocksInRegion.contains(V: Parent))
1027 return false;
1028
1029 // If it's not a PHINode then we definitely know the use matters. This
1030 // output value will not completely combined with another item in a PHINode
1031 // as it is directly reference by another non-phi instruction
1032 if (!isa<PHINode>(Val: I))
1033 return true;
1034
1035 // If we have a PHINode outside one of the exit locations, then it
1036 // can be considered an outside use as well. If there is a PHINode
1037 // contained in the Exit where this values use matters, it will be
1038 // caught when we analyze that PHINode.
1039 if (!Exits.contains(Ptr: Parent))
1040 return true;
1041
1042 return false;
1043 });
1044}
1045
1046/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
1047/// considered outputs. A PHINodes is an output when more than one incoming
1048/// value has been marked by the CodeExtractor as an output.
1049///
1050/// \param CurrentExitFromRegion [in] - The block to analyze.
1051/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
1052/// region.
1053/// \param RegionBlocks [in] - The basic blocks in the region.
1054/// \param Outputs [in, out] - The existing outputs for the region, we may add
1055/// PHINodes to this as we find that they replace output values.
1056/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
1057/// totally replaced by a PHINode.
1058/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
1059/// in PHINodes, but have other uses, and should still be considered outputs.
1060static void analyzeExitPHIsForOutputUses(
1061 BasicBlock *CurrentExitFromRegion,
1062 SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
1063 DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
1064 DenseSet<Value *> &OutputsReplacedByPHINode,
1065 DenseSet<Value *> &OutputsWithNonPhiUses) {
1066 for (PHINode &PN : CurrentExitFromRegion->phis()) {
1067 // Find all incoming values from the outlining region.
1068 SmallVector<unsigned, 2> IncomingVals;
1069 for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
1070 if (RegionBlocks.contains(V: PN.getIncomingBlock(i: I)))
1071 IncomingVals.push_back(Elt: I);
1072
1073 // Do not process PHI if there are no predecessors from region.
1074 unsigned NumIncomingVals = IncomingVals.size();
1075 if (NumIncomingVals == 0)
1076 continue;
1077
1078 // If there is one predecessor, we mark it as a value that needs to be kept
1079 // as an output.
1080 if (NumIncomingVals == 1) {
1081 Value *V = PN.getIncomingValue(i: *IncomingVals.begin());
1082 OutputsWithNonPhiUses.insert(V);
1083 OutputsReplacedByPHINode.erase(V);
1084 continue;
1085 }
1086
1087 // This PHINode will be used as an output value, so we add it to our list.
1088 Outputs.insert(X: &PN);
1089
1090 // Not all of the incoming values should be ignored as other inputs and
1091 // outputs may have uses in outlined region. If they have other uses
1092 // outside of the single PHINode we should not skip over it.
1093 for (unsigned Idx : IncomingVals) {
1094 Value *V = PN.getIncomingValue(i: Idx);
1095 if (!isa<Constant>(Val: V) &&
1096 outputHasNonPHI(V, PHILoc: Idx, PN, Exits&: PotentialExitsFromRegion, BlocksInRegion&: RegionBlocks)) {
1097 OutputsWithNonPhiUses.insert(V);
1098 OutputsReplacedByPHINode.erase(V);
1099 continue;
1100 }
1101 if (!OutputsWithNonPhiUses.contains(V))
1102 OutputsReplacedByPHINode.insert(V);
1103 }
1104 }
1105}
1106
1107// Represents the type for the unsigned number denoting the output number for
1108// phi node, along with the canonical number for the exit block.
1109using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
1110// The list of canonical numbers for the incoming values to a PHINode.
1111using CanonList = SmallVector<unsigned, 2>;
1112// The pair type representing the set of canonical values being combined in the
1113// PHINode, along with the location data for the PHINode.
1114using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
1115
1116/// Encode \p PND as an integer for easy lookup based on the argument location,
1117/// the parent BasicBlock canonical numbering, and the canonical numbering of
1118/// the values stored in the PHINode.
1119///
1120/// \param PND - The data to hash.
1121/// \returns The hash code of \p PND.
1122static hash_code encodePHINodeData(PHINodeData &PND) {
1123 return llvm::hash_combine(args: llvm::hash_value(value: PND.first.first),
1124 args: llvm::hash_value(value: PND.first.second),
1125 args: llvm::hash_combine_range(R&: PND.second));
1126}
1127
1128/// Create a special GVN for PHINodes that will be used outside of
1129/// the region. We create a hash code based on the Canonical number of the
1130/// parent BasicBlock, the canonical numbering of the values stored in the
1131/// PHINode and the aggregate argument location. This is used to find whether
1132/// this PHINode type has been given a canonical numbering already. If not, we
1133/// assign it a value and store it for later use. The value is returned to
1134/// identify different output schemes for the set of regions.
1135///
1136/// \param Region - The region that \p PN is an output for.
1137/// \param PN - The PHINode we are analyzing.
1138/// \param Blocks - The blocks for the region we are analyzing.
1139/// \param AggArgIdx - The argument \p PN will be stored into.
1140/// \returns An optional holding the assigned canonical number, or std::nullopt
1141/// if there is some attribute of the PHINode blocking it from being used.
1142static std::optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
1143 PHINode *PN,
1144 DenseSet<BasicBlock *> &Blocks,
1145 unsigned AggArgIdx) {
1146 OutlinableGroup &Group = *Region.Parent;
1147 IRSimilarityCandidate &Cand = *Region.Candidate;
1148 BasicBlock *PHIBB = PN->getParent();
1149 CanonList PHIGVNs;
1150 Value *Incoming;
1151 BasicBlock *IncomingBlock;
1152 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1153 Incoming = PN->getIncomingValue(i: Idx);
1154 IncomingBlock = PN->getIncomingBlock(i: Idx);
1155 // If the incoming block isn't in the region, we don't have to worry about
1156 // this incoming value.
1157 if (!Blocks.contains(V: IncomingBlock))
1158 continue;
1159
1160 // If we cannot find a GVN, and the incoming block is included in the region
1161 // this means that the input to the PHINode is not included in the region we
1162 // are trying to analyze, meaning, that if it was outlined, we would be
1163 // adding an extra input. We ignore this case for now, and so ignore the
1164 // region.
1165 std::optional<unsigned> OGVN = Cand.getGVN(V: Incoming);
1166 if (!OGVN) {
1167 Region.IgnoreRegion = true;
1168 return std::nullopt;
1169 }
1170
1171 // Collect the canonical numbers of the values in the PHINode.
1172 unsigned GVN = *OGVN;
1173 OGVN = Cand.getCanonicalNum(N: GVN);
1174 assert(OGVN && "No GVN found for incoming value?");
1175 PHIGVNs.push_back(Elt: *OGVN);
1176
1177 // Find the incoming block and use the canonical numbering as well to define
1178 // the hash for the PHINode.
1179 OGVN = Cand.getGVN(V: IncomingBlock);
1180
1181 // If there is no number for the incoming block, it is because we have
1182 // split the candidate basic blocks. So we use the previous block that it
1183 // was split from to find the valid global value numbering for the PHINode.
1184 if (!OGVN) {
1185 assert(Cand.getStartBB() == IncomingBlock &&
1186 "Unknown basic block used in exit path PHINode.");
1187
1188 BasicBlock *PrevBlock = nullptr;
1189 // Iterate over the predecessors to the incoming block of the
1190 // PHINode, when we find a block that is not contained in the region
1191 // we know that this is the first block that we split from, and should
1192 // have a valid global value numbering.
1193 for (BasicBlock *Pred : predecessors(BB: IncomingBlock))
1194 if (!Blocks.contains(V: Pred)) {
1195 PrevBlock = Pred;
1196 break;
1197 }
1198 assert(PrevBlock && "Expected a predecessor not in the reigon!");
1199 OGVN = Cand.getGVN(V: PrevBlock);
1200 }
1201 GVN = *OGVN;
1202 OGVN = Cand.getCanonicalNum(N: GVN);
1203 assert(OGVN && "No GVN found for incoming block?");
1204 PHIGVNs.push_back(Elt: *OGVN);
1205 }
1206
1207 // Now that we have the GVNs for the incoming values, we are going to combine
1208 // them with the GVN of the incoming bock, and the output location of the
1209 // PHINode to generate a hash value representing this instance of the PHINode.
1210 DenseMap<hash_code, unsigned>::iterator GVNToPHIIt;
1211 DenseMap<unsigned, PHINodeData>::iterator PHIToGVNIt;
1212 std::optional<unsigned> BBGVN = Cand.getGVN(V: PHIBB);
1213 assert(BBGVN && "Could not find GVN for the incoming block!");
1214
1215 BBGVN = Cand.getCanonicalNum(N: *BBGVN);
1216 assert(BBGVN && "Could not find canonical number for the incoming block!");
1217 // Create a pair of the exit block canonical value, and the aggregate
1218 // argument location, connected to the canonical numbers stored in the
1219 // PHINode.
1220 PHINodeData TemporaryPair =
1221 std::make_pair(x: std::make_pair(x&: *BBGVN, y&: AggArgIdx), y&: PHIGVNs);
1222 hash_code PHINodeDataHash = encodePHINodeData(PND&: TemporaryPair);
1223
1224 // Look for and create a new entry in our connection between canonical
1225 // numbers for PHINodes, and the set of objects we just created.
1226 GVNToPHIIt = Group.GVNsToPHINodeGVN.find(Val: PHINodeDataHash);
1227 if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
1228 bool Inserted = false;
1229 std::tie(args&: PHIToGVNIt, args&: Inserted) = Group.PHINodeGVNToGVNs.insert(
1230 KV: std::make_pair(x&: Group.PHINodeGVNTracker, y&: TemporaryPair));
1231 std::tie(args&: GVNToPHIIt, args&: Inserted) = Group.GVNsToPHINodeGVN.insert(
1232 KV: std::make_pair(x&: PHINodeDataHash, y: Group.PHINodeGVNTracker--));
1233 }
1234
1235 return GVNToPHIIt->second;
1236}
1237
1238/// Create a mapping of the output arguments for the \p Region to the output
1239/// arguments of the overall outlined function.
1240///
1241/// \param [in,out] Region - The region of code to be analyzed.
1242/// \param [in] Outputs - The values found by the code extractor.
1243static void
1244findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region,
1245 SetVector<Value *> &Outputs) {
1246 OutlinableGroup &Group = *Region.Parent;
1247 IRSimilarityCandidate &C = *Region.Candidate;
1248
1249 SmallVector<BasicBlock *> BE;
1250 DenseSet<BasicBlock *> BlocksInRegion;
1251 C.getBasicBlocks(BBSet&: BlocksInRegion, BBList&: BE);
1252
1253 // Find the exits to the region.
1254 SmallPtrSet<BasicBlock *, 1> Exits;
1255 for (BasicBlock *Block : BE)
1256 for (BasicBlock *Succ : successors(BB: Block))
1257 if (!BlocksInRegion.contains(V: Succ))
1258 Exits.insert(Ptr: Succ);
1259
1260 // After determining which blocks exit to PHINodes, we add these PHINodes to
1261 // the set of outputs to be processed. We also check the incoming values of
1262 // the PHINodes for whether they should no longer be considered outputs.
1263 DenseSet<Value *> OutputsReplacedByPHINode;
1264 DenseSet<Value *> OutputsWithNonPhiUses;
1265 for (BasicBlock *ExitBB : Exits)
1266 analyzeExitPHIsForOutputUses(CurrentExitFromRegion: ExitBB, PotentialExitsFromRegion&: Exits, RegionBlocks&: BlocksInRegion, Outputs,
1267 OutputsReplacedByPHINode,
1268 OutputsWithNonPhiUses);
1269
1270 // This counts the argument number in the extracted function.
1271 unsigned OriginalIndex = Region.NumExtractedInputs;
1272
1273 // This counts the argument number in the overall function.
1274 unsigned TypeIndex = Group.NumAggregateInputs;
1275 bool TypeFound;
1276 DenseSet<unsigned> AggArgsUsed;
1277
1278 // Iterate over the output types and identify if there is an aggregate pointer
1279 // type whose base type matches the current output type. If there is, we mark
1280 // that we will use this output register for this value. If not we add another
1281 // type to the overall argument type list. We also store the GVNs used for
1282 // stores to identify which values will need to be moved into an special
1283 // block that holds the stores to the output registers.
1284 for (Value *Output : Outputs) {
1285 TypeFound = false;
1286 // We can do this since it is a result value, and will have a number
1287 // that is necessarily the same. BUT if in the future, the instructions
1288 // do not have to be in same order, but are functionally the same, we will
1289 // have to use a different scheme, as one-to-one correspondence is not
1290 // guaranteed.
1291 unsigned ArgumentSize = Group.ArgumentTypes.size();
1292
1293 // If the output is combined in a PHINode, we make sure to skip over it.
1294 if (OutputsReplacedByPHINode.contains(V: Output))
1295 continue;
1296
1297 unsigned AggArgIdx = 0;
1298 for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
1299 if (!isa<PointerType>(Val: Group.ArgumentTypes[Jdx]))
1300 continue;
1301
1302 if (!AggArgsUsed.insert(V: Jdx).second)
1303 continue;
1304
1305 TypeFound = true;
1306 Region.ExtractedArgToAgg.insert(KV: std::make_pair(x&: OriginalIndex, y&: Jdx));
1307 Region.AggArgToExtracted.insert(KV: std::make_pair(x&: Jdx, y&: OriginalIndex));
1308 AggArgIdx = Jdx;
1309 break;
1310 }
1311
1312 // We were unable to find an unused type in the output type set that matches
1313 // the output, so we add a pointer type to the argument types of the overall
1314 // function to handle this output and create a mapping to it.
1315 if (!TypeFound) {
1316 Group.ArgumentTypes.push_back(x: PointerType::get(C&: Output->getContext(),
1317 AddressSpace: M.getDataLayout().getAllocaAddrSpace()));
1318 // Mark the new pointer type as the last value in the aggregate argument
1319 // list.
1320 unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
1321 AggArgsUsed.insert(V: ArgTypeIdx);
1322 Region.ExtractedArgToAgg.insert(
1323 KV: std::make_pair(x&: OriginalIndex, y&: ArgTypeIdx));
1324 Region.AggArgToExtracted.insert(
1325 KV: std::make_pair(x&: ArgTypeIdx, y&: OriginalIndex));
1326 AggArgIdx = ArgTypeIdx;
1327 }
1328
1329 // TODO: Adapt to the extra input from the PHINode.
1330 PHINode *PN = dyn_cast<PHINode>(Val: Output);
1331
1332 std::optional<unsigned> GVN;
1333 if (PN && !BlocksInRegion.contains(V: PN->getParent())) {
1334 // Values outside the region can be combined into PHINode when we
1335 // have multiple exits. We collect both of these into a list to identify
1336 // which values are being used in the PHINode. Each list identifies a
1337 // different PHINode, and a different output. We store the PHINode as it's
1338 // own canonical value. These canonical values are also dependent on the
1339 // output argument it is saved to.
1340
1341 // If two PHINodes have the same canonical values, but different aggregate
1342 // argument locations, then they will have distinct Canonical Values.
1343 GVN = getGVNForPHINode(Region, PN, Blocks&: BlocksInRegion, AggArgIdx);
1344 if (!GVN)
1345 return;
1346 } else {
1347 // If we do not have a PHINode we use the global value numbering for the
1348 // output value, to find the canonical number to add to the set of stored
1349 // values.
1350 GVN = C.getGVN(V: Output);
1351 GVN = C.getCanonicalNum(N: *GVN);
1352 }
1353
1354 // Each region has a potentially unique set of outputs. We save which
1355 // values are output in a list of canonical values so we can differentiate
1356 // among the different store schemes.
1357 Region.GVNStores.push_back(Elt: *GVN);
1358
1359 OriginalIndex++;
1360 TypeIndex++;
1361 }
1362
1363 // We sort the stored values to make sure that we are not affected by analysis
1364 // order when determining what combination of items were stored.
1365 stable_sort(Range&: Region.GVNStores);
1366}
1367
1368void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
1369 DenseSet<unsigned> &NotSame) {
1370 std::vector<unsigned> Inputs;
1371 SetVector<Value *> ArgInputs, Outputs;
1372
1373 getCodeExtractorArguments(Region, InputGVNs&: Inputs, NotSame, OutputMappings, ArgInputs,
1374 Outputs);
1375
1376 if (Region.IgnoreRegion)
1377 return;
1378
1379 // Map the inputs found by the CodeExtractor to the arguments found for
1380 // the overall function.
1381 findExtractedInputToOverallInputMapping(Region, InputGVNs&: Inputs, ArgInputs);
1382
1383 // Map the outputs found by the CodeExtractor to the arguments found for
1384 // the overall function.
1385 findExtractedOutputToOverallOutputMapping(M, Region, Outputs);
1386}
1387
1388/// Replace the extracted function in the Region with a call to the overall
1389/// function constructed from the deduplicated similar regions, replacing and
1390/// remapping the values passed to the extracted function as arguments to the
1391/// new arguments of the overall function.
1392///
1393/// \param [in] M - The module to outline from.
1394/// \param [in] Region - The regions of extracted code to be replaced with a new
1395/// function.
1396/// \returns a call instruction with the replaced function.
1397CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
1398 std::vector<Value *> NewCallArgs;
1399 DenseMap<unsigned, unsigned>::iterator ArgPair;
1400
1401 OutlinableGroup &Group = *Region.Parent;
1402 CallInst *Call = Region.Call;
1403 assert(Call && "Call to replace is nullptr?");
1404 Function *AggFunc = Group.OutlinedFunction;
1405 assert(AggFunc && "Function to replace with is nullptr?");
1406
1407 // If the arguments are the same size, there are not values that need to be
1408 // made into an argument, the argument ordering has not been change, or
1409 // different output registers to handle. We can simply replace the called
1410 // function in this case.
1411 if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
1412 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1413 << *AggFunc << " with same number of arguments\n");
1414 Call->setCalledFunction(AggFunc);
1415 return Call;
1416 }
1417
1418 // We have a different number of arguments than the new function, so
1419 // we need to use our previously mappings off extracted argument to overall
1420 // function argument, and constants to overall function argument to create the
1421 // new argument list.
1422 for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
1423
1424 if (AggArgIdx == AggFunc->arg_size() - 1 &&
1425 Group.OutputGVNCombinations.size() > 1) {
1426 // If we are on the last argument, and we need to differentiate between
1427 // output blocks, add an integer to the argument list to determine
1428 // what block to take
1429 LLVM_DEBUG(dbgs() << "Set switch block argument to "
1430 << Region.OutputBlockNum << "\n");
1431 NewCallArgs.push_back(x: ConstantInt::get(Ty: Type::getInt32Ty(C&: M.getContext()),
1432 V: Region.OutputBlockNum));
1433 continue;
1434 }
1435
1436 ArgPair = Region.AggArgToExtracted.find(Val: AggArgIdx);
1437 if (ArgPair != Region.AggArgToExtracted.end()) {
1438 Value *ArgumentValue = Call->getArgOperand(i: ArgPair->second);
1439 // If we found the mapping from the extracted function to the overall
1440 // function, we simply add it to the argument list. We use the same
1441 // value, it just needs to honor the new order of arguments.
1442 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1443 << *ArgumentValue << "\n");
1444 NewCallArgs.push_back(x: ArgumentValue);
1445 continue;
1446 }
1447
1448 // If it is a constant, we simply add it to the argument list as a value.
1449 if (auto It = Region.AggArgToConstant.find(Val: AggArgIdx);
1450 It != Region.AggArgToConstant.end()) {
1451 Constant *CST = It->second;
1452 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1453 << *CST << "\n");
1454 NewCallArgs.push_back(x: CST);
1455 continue;
1456 }
1457
1458 // Add a nullptr value if the argument is not found in the extracted
1459 // function. If we cannot find a value, it means it is not in use
1460 // for the region, so we should not pass anything to it.
1461 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1462 NewCallArgs.push_back(x: ConstantPointerNull::get(
1463 T: static_cast<PointerType *>(AggFunc->getArg(i: AggArgIdx)->getType())));
1464 }
1465
1466 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1467 << *AggFunc << " with new set of arguments\n");
1468 // Create the new call instruction and erase the old one.
1469 Call = CallInst::Create(Ty: AggFunc->getFunctionType(), Func: AggFunc, Args: NewCallArgs, NameStr: "",
1470 InsertBefore: Call->getIterator());
1471
1472 // It is possible that the call to the outlined function is either the first
1473 // instruction is in the new block, the last instruction, or both. If either
1474 // of these is the case, we need to make sure that we replace the instruction
1475 // in the IRInstructionData struct with the new call.
1476 CallInst *OldCall = Region.Call;
1477 if (Region.NewFront->Inst == OldCall)
1478 Region.NewFront->Inst = Call;
1479 if (Region.NewBack->Inst == OldCall)
1480 Region.NewBack->Inst = Call;
1481
1482 // Transfer any debug information.
1483 Call->setDebugLoc(Region.Call->getDebugLoc());
1484 // Since our output may determine which branch we go to, we make sure to
1485 // propagate this new call value through the module.
1486 OldCall->replaceAllUsesWith(V: Call);
1487
1488 // Remove the old instruction.
1489 OldCall->eraseFromParent();
1490 Region.Call = Call;
1491
1492 // Make sure that the argument in the new function has the SwiftError
1493 // argument.
1494 if (Group.SwiftErrorArgument)
1495 Call->addParamAttr(ArgNo: *Group.SwiftErrorArgument, Kind: Attribute::SwiftError);
1496
1497 return Call;
1498}
1499
1500/// Find or create a BasicBlock in the outlined function containing PhiBlocks
1501/// for \p RetVal.
1502///
1503/// \param Group - The OutlinableGroup containing the information about the
1504/// overall outlined function.
1505/// \param RetVal - The return value or exit option that we are currently
1506/// evaluating.
1507/// \returns The found or newly created BasicBlock to contain the needed
1508/// PHINodes to be used as outputs.
1509static BasicBlock *findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal) {
1510 // Find if a PHIBlock exists for this return value already. If it is
1511 // the first time we are analyzing this, we will not, so we record it.
1512 auto [PhiBlockForRetVal, Inserted] = Group.PHIBlocks.try_emplace(Key: RetVal);
1513 if (!Inserted)
1514 return PhiBlockForRetVal->second;
1515
1516 auto ReturnBlockForRetVal = Group.EndBBs.find(Val: RetVal);
1517 assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
1518 "Could not find output value!");
1519 BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
1520
1521 // If we did not find a block, we create one, and insert it into the
1522 // overall function and record it.
1523 BasicBlock *PHIBlock = BasicBlock::Create(Context&: ReturnBB->getContext(), Name: "phi_block",
1524 Parent: ReturnBB->getParent());
1525 PhiBlockForRetVal->second = PHIBlock;
1526
1527 // We replace all branches to the return block in the newly created outlined
1528 // function to point to the new PHIBlock.
1529 ReturnBB->replaceAllUsesWith(V: PHIBlock);
1530
1531 UncondBrInst::Create(IfTrue: ReturnBB, InsertBefore: PHIBlock);
1532
1533 return PhiBlockForRetVal->second;
1534}
1535
1536/// For the function call now representing the \p Region, find the passed value
1537/// to that call that represents Argument \p A at the call location if the
1538/// call has already been replaced with a call to the overall, aggregate
1539/// function.
1540///
1541/// \param A - The Argument to get the passed value for.
1542/// \param Region - The extracted Region corresponding to the outlined function.
1543/// \returns The Value representing \p A at the call site.
1544static Value *
1545getPassedArgumentInAlreadyOutlinedFunction(const Argument *A,
1546 const OutlinableRegion &Region) {
1547 // If we don't need to adjust the argument number at all (since the call
1548 // has already been replaced by a call to the overall outlined function)
1549 // we can just get the specified argument.
1550 return Region.Call->getArgOperand(i: A->getArgNo());
1551}
1552
1553/// For the function call now representing the \p Region, find the passed value
1554/// to that call that represents Argument \p A at the call location if the
1555/// call has only been replaced by the call to the aggregate function.
1556///
1557/// \param A - The Argument to get the passed value for.
1558/// \param Region - The extracted Region corresponding to the outlined function.
1559/// \returns The Value representing \p A at the call site.
1560static Value *
1561getPassedArgumentAndAdjustArgumentLocation(const Argument *A,
1562 const OutlinableRegion &Region) {
1563 unsigned ArgNum = A->getArgNo();
1564
1565 // If it is a constant, we can look at our mapping from when we created
1566 // the outputs to figure out what the constant value is.
1567 if (auto It = Region.AggArgToConstant.find(Val: ArgNum);
1568 It != Region.AggArgToConstant.end())
1569 return It->second;
1570
1571 // If it is not a constant, and we are not looking at the overall function, we
1572 // need to adjust which argument we are looking at.
1573 ArgNum = Region.AggArgToExtracted.find(Val: ArgNum)->second;
1574 return Region.Call->getArgOperand(i: ArgNum);
1575}
1576
1577/// Find the canonical numbering for the incoming Values into the PHINode \p PN.
1578///
1579/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1580/// \param Region [in] - The OutlinableRegion containing \p PN.
1581/// \param OutputMappings [in] - The mapping of output values from outlined
1582/// region to their original values.
1583/// \param CanonNums [out] - The canonical numbering for the incoming values to
1584/// \p PN paired with their incoming block.
1585/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
1586/// of \p Region rather than the overall function's call.
1587static void findCanonNumsForPHI(
1588 PHINode *PN, OutlinableRegion &Region,
1589 const DenseMap<Value *, Value *> &OutputMappings,
1590 SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
1591 bool ReplacedWithOutlinedCall = true) {
1592 // Iterate over the incoming values.
1593 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1594 Value *IVal = PN->getIncomingValue(i: Idx);
1595 BasicBlock *IBlock = PN->getIncomingBlock(i: Idx);
1596 // If we have an argument as incoming value, we need to grab the passed
1597 // value from the call itself.
1598 if (Argument *A = dyn_cast<Argument>(Val: IVal)) {
1599 if (ReplacedWithOutlinedCall)
1600 IVal = getPassedArgumentInAlreadyOutlinedFunction(A, Region);
1601 else
1602 IVal = getPassedArgumentAndAdjustArgumentLocation(A, Region);
1603 }
1604
1605 // Get the original value if it has been replaced by an output value.
1606 IVal = findOutputMapping(OutputMappings, Input: IVal);
1607
1608 // Find and add the canonical number for the incoming value.
1609 std::optional<unsigned> GVN = Region.Candidate->getGVN(V: IVal);
1610 assert(GVN && "No GVN for incoming value");
1611 std::optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(N: *GVN);
1612 assert(CanonNum && "No Canonical Number for GVN");
1613 CanonNums.push_back(Elt: std::make_pair(x&: *CanonNum, y&: IBlock));
1614 }
1615}
1616
1617/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
1618/// in order to condense the number of instructions added to the outlined
1619/// function.
1620///
1621/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1622/// \param Region [in] - The OutlinableRegion containing \p PN.
1623/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
1624/// \p PN in.
1625/// \param OutputMappings [in] - The mapping of output values from outlined
1626/// region to their original values.
1627/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
1628/// matched.
1629/// \return the newly found or created PHINode in \p OverallPhiBlock.
1630static PHINode*
1631findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
1632 BasicBlock *OverallPhiBlock,
1633 const DenseMap<Value *, Value *> &OutputMappings,
1634 DenseSet<PHINode *> &UsedPHIs) {
1635 OutlinableGroup &Group = *Region.Parent;
1636
1637
1638 // A list of the canonical numbering assigned to each incoming value, paired
1639 // with the incoming block for the PHINode passed into this function.
1640 SmallVector<std::pair<unsigned, BasicBlock *>> PNCanonNums;
1641
1642 // We have to use the extracted function since we have merged this region into
1643 // the overall function yet. We make sure to reassign the argument numbering
1644 // since it is possible that the argument ordering is different between the
1645 // functions.
1646 findCanonNumsForPHI(PN: &PN, Region, OutputMappings, CanonNums&: PNCanonNums,
1647 /* ReplacedWithOutlinedCall = */ false);
1648
1649 OutlinableRegion *FirstRegion = Group.Regions[0];
1650
1651 // A list of the canonical numbering assigned to each incoming value, paired
1652 // with the incoming block for the PHINode that we are currently comparing
1653 // the passed PHINode to.
1654 SmallVector<std::pair<unsigned, BasicBlock *>> CurrentCanonNums;
1655
1656 // Find the Canonical Numbering for each PHINode, if it matches, we replace
1657 // the uses of the PHINode we are searching for, with the found PHINode.
1658 for (PHINode &CurrPN : OverallPhiBlock->phis()) {
1659 // If this PHINode has already been matched to another PHINode to be merged,
1660 // we skip it.
1661 if (UsedPHIs.contains(V: &CurrPN))
1662 continue;
1663
1664 CurrentCanonNums.clear();
1665 findCanonNumsForPHI(PN: &CurrPN, Region&: *FirstRegion, OutputMappings, CanonNums&: CurrentCanonNums,
1666 /* ReplacedWithOutlinedCall = */ true);
1667
1668 // If the list of incoming values is not the same length, then they cannot
1669 // match since there is not an analogue for each incoming value.
1670 if (PNCanonNums.size() != CurrentCanonNums.size())
1671 continue;
1672
1673 bool FoundMatch = true;
1674
1675 // We compare the canonical value for each incoming value in the passed
1676 // in PHINode to one already present in the outlined region. If the
1677 // incoming values do not match, then the PHINodes do not match.
1678
1679 // We also check to make sure that the incoming block matches as well by
1680 // finding the corresponding incoming block in the combined outlined region
1681 // for the current outlined region.
1682 for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
1683 std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
1684 std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
1685 if (ToCompareTo.first != ToAdd.first) {
1686 FoundMatch = false;
1687 break;
1688 }
1689
1690 BasicBlock *CorrespondingBlock =
1691 Region.findCorrespondingBlockIn(Other: *FirstRegion, BB: ToAdd.second);
1692 assert(CorrespondingBlock && "Found block is nullptr");
1693 if (CorrespondingBlock != ToCompareTo.second) {
1694 FoundMatch = false;
1695 break;
1696 }
1697 }
1698
1699 // If all incoming values and branches matched, then we can merge
1700 // into the found PHINode.
1701 if (FoundMatch) {
1702 UsedPHIs.insert(V: &CurrPN);
1703 return &CurrPN;
1704 }
1705 }
1706
1707 // If we've made it here, it means we weren't able to replace the PHINode, so
1708 // we must insert it ourselves.
1709 PHINode *NewPN = cast<PHINode>(Val: PN.clone());
1710 NewPN->insertBefore(InsertPos: OverallPhiBlock->begin());
1711 for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
1712 Idx++) {
1713 Value *IncomingVal = NewPN->getIncomingValue(i: Idx);
1714 BasicBlock *IncomingBlock = NewPN->getIncomingBlock(i: Idx);
1715
1716 // Find corresponding basic block in the overall function for the incoming
1717 // block.
1718 BasicBlock *BlockToUse =
1719 Region.findCorrespondingBlockIn(Other: *FirstRegion, BB: IncomingBlock);
1720 NewPN->setIncomingBlock(i: Idx, BB: BlockToUse);
1721
1722 // If we have an argument we make sure we replace using the argument from
1723 // the correct function.
1724 if (Argument *A = dyn_cast<Argument>(Val: IncomingVal)) {
1725 Value *Val = Group.OutlinedFunction->getArg(i: A->getArgNo());
1726 NewPN->setIncomingValue(i: Idx, V: Val);
1727 continue;
1728 }
1729
1730 // Find the corresponding value in the overall function.
1731 IncomingVal = findOutputMapping(OutputMappings, Input: IncomingVal);
1732 Value *Val = Region.findCorrespondingValueIn(Other: *FirstRegion, V: IncomingVal);
1733 assert(Val && "Value is nullptr?");
1734 DenseMap<Value *, Value *>::iterator RemappedIt =
1735 FirstRegion->RemappedArguments.find(Val);
1736 if (RemappedIt != FirstRegion->RemappedArguments.end())
1737 Val = RemappedIt->second;
1738 NewPN->setIncomingValue(i: Idx, V: Val);
1739 }
1740 return NewPN;
1741}
1742
1743// Within an extracted function, replace the argument uses of the extracted
1744// region with the arguments of the function for an OutlinableGroup.
1745//
1746/// \param [in] Region - The region of extracted code to be changed.
1747/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1748/// region.
1749/// \param [in] FirstFunction - A flag to indicate whether we are using this
1750/// function to define the overall outlined function for all the regions, or
1751/// if we are operating on one of the following regions.
1752static void
1753replaceArgumentUses(OutlinableRegion &Region,
1754 DenseMap<Value *, BasicBlock *> &OutputBBs,
1755 const DenseMap<Value *, Value *> &OutputMappings,
1756 bool FirstFunction = false) {
1757 OutlinableGroup &Group = *Region.Parent;
1758 assert(Region.ExtractedFunction && "Region has no extracted function?");
1759
1760 Function *DominatingFunction = Region.ExtractedFunction;
1761 if (FirstFunction)
1762 DominatingFunction = Group.OutlinedFunction;
1763 DominatorTree DT(*DominatingFunction);
1764 DenseSet<PHINode *> UsedPHIs;
1765
1766 for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1767 ArgIdx++) {
1768 assert(Region.ExtractedArgToAgg.contains(ArgIdx) &&
1769 "No mapping from extracted to outlined?");
1770 unsigned AggArgIdx = Region.ExtractedArgToAgg.find(Val: ArgIdx)->second;
1771 Argument *AggArg = Group.OutlinedFunction->getArg(i: AggArgIdx);
1772 Argument *Arg = Region.ExtractedFunction->getArg(i: ArgIdx);
1773 // The argument is an input, so we can simply replace it with the overall
1774 // argument value
1775 if (ArgIdx < Region.NumExtractedInputs) {
1776 LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1777 << *Region.ExtractedFunction << " with " << *AggArg
1778 << " in function " << *Group.OutlinedFunction << "\n");
1779 Arg->replaceAllUsesWith(V: AggArg);
1780 Value *V = Region.Call->getArgOperand(i: ArgIdx);
1781 Region.RemappedArguments.insert(KV: std::make_pair(x&: V, y&: AggArg));
1782 continue;
1783 }
1784
1785 // If we are replacing an output, we place the store value in its own
1786 // block inside the overall function before replacing the use of the output
1787 // in the function.
1788 assert(Arg->hasOneUse() && "Output argument can only have one use");
1789 User *InstAsUser = Arg->user_back();
1790 assert(InstAsUser && "User is nullptr!");
1791
1792 Instruction *I = cast<Instruction>(Val: InstAsUser);
1793 BasicBlock *BB = I->getParent();
1794 SmallVector<BasicBlock *, 4> Descendants;
1795 DT.getDescendants(R: BB, Result&: Descendants);
1796 bool EdgeAdded = false;
1797 if (Descendants.size() == 0) {
1798 EdgeAdded = true;
1799 DT.insertEdge(From: &DominatingFunction->getEntryBlock(), To: BB);
1800 DT.getDescendants(R: BB, Result&: Descendants);
1801 }
1802
1803 // Iterate over the following blocks, looking for return instructions,
1804 // if we find one, find the corresponding output block for the return value
1805 // and move our store instruction there.
1806 for (BasicBlock *DescendBB : Descendants) {
1807 ReturnInst *RI = dyn_cast<ReturnInst>(Val: DescendBB->getTerminator());
1808 if (!RI)
1809 continue;
1810 Value *RetVal = RI->getReturnValue();
1811 auto VBBIt = OutputBBs.find(Val: RetVal);
1812 assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1813
1814 // If this is storing a PHINode, we must make sure it is included in the
1815 // overall function.
1816 StoreInst *SI = cast<StoreInst>(Val: I);
1817
1818 Value *ValueOperand = SI->getValueOperand();
1819
1820 StoreInst *NewI = cast<StoreInst>(Val: I->clone());
1821 NewI->setDebugLoc(DebugLoc::getDropped());
1822 BasicBlock *OutputBB = VBBIt->second;
1823 NewI->insertInto(ParentBB: OutputBB, It: OutputBB->end());
1824 LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1825 << *OutputBB << "\n");
1826
1827 // If this is storing a PHINode, we must make sure it is included in the
1828 // overall function.
1829 if (!isa<PHINode>(Val: ValueOperand) ||
1830 Region.Candidate->getGVN(V: ValueOperand).has_value()) {
1831 if (FirstFunction)
1832 continue;
1833 Value *CorrVal =
1834 Region.findCorrespondingValueIn(Other: *Group.Regions[0], V: ValueOperand);
1835 assert(CorrVal && "Value is nullptr?");
1836 NewI->setOperand(i_nocapture: 0, Val_nocapture: CorrVal);
1837 continue;
1838 }
1839 PHINode *PN = cast<PHINode>(Val: SI->getValueOperand());
1840 // If it has a value, it was not split by the code extractor, which
1841 // is what we are looking for.
1842 if (Region.Candidate->getGVN(V: PN))
1843 continue;
1844
1845 // We record the parent block for the PHINode in the Region so that
1846 // we can exclude it from checks later on.
1847 Region.PHIBlocks.insert(KV: std::make_pair(x&: RetVal, y: PN->getParent()));
1848
1849 // If this is the first function, we do not need to worry about mergiing
1850 // this with any other block in the overall outlined function, so we can
1851 // just continue.
1852 if (FirstFunction) {
1853 BasicBlock *PHIBlock = PN->getParent();
1854 Group.PHIBlocks.insert(KV: std::make_pair(x&: RetVal, y&: PHIBlock));
1855 continue;
1856 }
1857
1858 // We look for the aggregate block that contains the PHINodes leading into
1859 // this exit path. If we can't find one, we create one.
1860 BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
1861
1862 // For our PHINode, we find the combined canonical numbering, and
1863 // attempt to find a matching PHINode in the overall PHIBlock. If we
1864 // cannot, we copy the PHINode and move it into this new block.
1865 PHINode *NewPN = findOrCreatePHIInBlock(PN&: *PN, Region, OverallPhiBlock,
1866 OutputMappings, UsedPHIs);
1867 NewI->setOperand(i_nocapture: 0, Val_nocapture: NewPN);
1868 }
1869
1870 // If we added an edge for basic blocks without a predecessor, we remove it
1871 // here.
1872 if (EdgeAdded)
1873 DT.deleteEdge(From: &DominatingFunction->getEntryBlock(), To: BB);
1874 I->eraseFromParent();
1875
1876 LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1877 << *Region.ExtractedFunction << " with " << *AggArg
1878 << " in function " << *Group.OutlinedFunction << "\n");
1879 Arg->replaceAllUsesWith(V: AggArg);
1880 }
1881}
1882
1883/// Within an extracted function, replace the constants that need to be lifted
1884/// into arguments with the actual argument.
1885///
1886/// \param Region [in] - The region of extracted code to be changed.
1887void replaceConstants(OutlinableRegion &Region) {
1888 OutlinableGroup &Group = *Region.Parent;
1889 Function *OutlinedFunction = Group.OutlinedFunction;
1890 ValueToValueMapTy VMap;
1891
1892 // Iterate over the constants that need to be elevated into arguments
1893 for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1894 unsigned AggArgIdx = Const.first;
1895 assert(OutlinedFunction && "Overall Function is not defined?");
1896 Constant *CST = Const.second;
1897 Argument *Arg = Group.OutlinedFunction->getArg(i: AggArgIdx);
1898 // Identify the argument it will be elevated to, and replace instances of
1899 // that constant in the function.
1900 VMap[CST] = Arg;
1901 LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1902 << " in function " << *OutlinedFunction << " with "
1903 << *Arg << '\n');
1904 }
1905
1906 RemapFunction(F&: *OutlinedFunction, VM&: VMap,
1907 Flags: RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1908}
1909
1910/// It is possible that there is a basic block that already performs the same
1911/// stores. This returns a duplicate block, if it exists
1912///
1913/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1914/// \param OutputStoreBBs [in] The existing output blocks.
1915/// \returns an optional value with the number output block if there is a match.
1916std::optional<unsigned> findDuplicateOutputBlock(
1917 DenseMap<Value *, BasicBlock *> &OutputBBs,
1918 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1919
1920 bool Mismatch = false;
1921 unsigned MatchingNum = 0;
1922 // We compare the new set output blocks to the other sets of output blocks.
1923 // If they are the same number, and have identical instructions, they are
1924 // considered to be the same.
1925 for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1926 Mismatch = false;
1927 for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1928 DenseMap<Value *, BasicBlock *>::iterator OutputBBIt =
1929 OutputBBs.find(Val: VToB.first);
1930 if (OutputBBIt == OutputBBs.end()) {
1931 Mismatch = true;
1932 break;
1933 }
1934
1935 BasicBlock *CompBB = VToB.second;
1936 BasicBlock *OutputBB = OutputBBIt->second;
1937 if (CompBB->size() - 1 != OutputBB->size()) {
1938 Mismatch = true;
1939 break;
1940 }
1941
1942 BasicBlock::iterator NIt = OutputBB->begin();
1943 for (Instruction &I : *CompBB) {
1944 if (isa<UncondBrInst, CondBrInst>(Val: &I))
1945 continue;
1946
1947 if (!I.isIdenticalTo(I: &(*NIt))) {
1948 Mismatch = true;
1949 break;
1950 }
1951
1952 NIt++;
1953 }
1954 }
1955
1956 if (!Mismatch)
1957 return MatchingNum;
1958
1959 MatchingNum++;
1960 }
1961
1962 return std::nullopt;
1963}
1964
1965/// Remove empty output blocks from the outlined region.
1966///
1967/// \param BlocksToPrune - Mapping of return values output blocks for the \p
1968/// Region.
1969/// \param Region - The OutlinableRegion we are analyzing.
1970static bool
1971analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune,
1972 OutlinableRegion &Region) {
1973 bool AllRemoved = true;
1974 Value *RetValueForBB;
1975 BasicBlock *NewBB;
1976 SmallVector<Value *, 4> ToRemove;
1977 // Iterate over the output blocks created in the outlined section.
1978 for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
1979 RetValueForBB = VtoBB.first;
1980 NewBB = VtoBB.second;
1981
1982 // If there are no instructions, we remove it from the module, and also
1983 // mark the value for removal from the return value to output block mapping.
1984 if (NewBB->size() == 0) {
1985 NewBB->eraseFromParent();
1986 ToRemove.push_back(Elt: RetValueForBB);
1987 continue;
1988 }
1989
1990 // Mark that we could not remove all the blocks since they were not all
1991 // empty.
1992 AllRemoved = false;
1993 }
1994
1995 // Remove the return value from the mapping.
1996 for (Value *V : ToRemove)
1997 BlocksToPrune.erase(Val: V);
1998
1999 // Mark the region as having the no output scheme.
2000 if (AllRemoved)
2001 Region.OutputBlockNum = -1;
2002
2003 return AllRemoved;
2004}
2005
2006/// For the outlined section, move needed the StoreInsts for the output
2007/// registers into their own block. Then, determine if there is a duplicate
2008/// output block already created.
2009///
2010/// \param [in] OG - The OutlinableGroup of regions to be outlined.
2011/// \param [in] Region - The OutlinableRegion that is being analyzed.
2012/// \param [in,out] OutputBBs - the blocks that stores for this region will be
2013/// placed in.
2014/// \param [in] EndBBs - the final blocks of the extracted function.
2015/// \param [in] OutputMappings - OutputMappings the mapping of values that have
2016/// been replaced by a new output value.
2017/// \param [in,out] OutputStoreBBs - The existing output blocks.
2018static void alignOutputBlockWithAggFunc(
2019 OutlinableGroup &OG, OutlinableRegion &Region,
2020 DenseMap<Value *, BasicBlock *> &OutputBBs,
2021 DenseMap<Value *, BasicBlock *> &EndBBs,
2022 const DenseMap<Value *, Value *> &OutputMappings,
2023 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2024 // If none of the output blocks have any instructions, this means that we do
2025 // not have to determine if it matches any of the other output schemes, and we
2026 // don't have to do anything else.
2027 if (analyzeAndPruneOutputBlocks(BlocksToPrune&: OutputBBs, Region))
2028 return;
2029
2030 // Determine is there is a duplicate set of blocks.
2031 std::optional<unsigned> MatchingBB =
2032 findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
2033
2034 // If there is, we remove the new output blocks. If it does not,
2035 // we add it to our list of sets of output blocks.
2036 if (MatchingBB) {
2037 LLVM_DEBUG(dbgs() << "Set output block for region in function"
2038 << Region.ExtractedFunction << " to " << *MatchingBB);
2039
2040 Region.OutputBlockNum = *MatchingBB;
2041 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
2042 VtoBB.second->eraseFromParent();
2043 return;
2044 }
2045
2046 Region.OutputBlockNum = OutputStoreBBs.size();
2047
2048 Value *RetValueForBB;
2049 BasicBlock *NewBB;
2050 OutputStoreBBs.push_back(x: DenseMap<Value *, BasicBlock *>());
2051 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
2052 RetValueForBB = VtoBB.first;
2053 NewBB = VtoBB.second;
2054 DenseMap<Value *, BasicBlock *>::iterator VBBIt =
2055 EndBBs.find(Val: RetValueForBB);
2056 LLVM_DEBUG(dbgs() << "Create output block for region in"
2057 << Region.ExtractedFunction << " to "
2058 << *NewBB);
2059 UncondBrInst::Create(IfTrue: VBBIt->second, InsertBefore: NewBB);
2060 OutputStoreBBs.back().insert(KV: std::make_pair(x&: RetValueForBB, y&: NewBB));
2061 }
2062}
2063
2064/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
2065/// before creating a basic block for each \p NewMap, and inserting into the new
2066/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
2067///
2068/// \param OldMap [in] - The mapping to base the new mapping off of.
2069/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
2070/// \param ParentFunc [in] - The function to put the new basic block in.
2071/// \param BaseName [in] - The start of the BasicBlock names to be appended to
2072/// by an index value.
2073static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap,
2074 DenseMap<Value *, BasicBlock *> &NewMap,
2075 Function *ParentFunc, Twine BaseName) {
2076 unsigned Idx = 0;
2077 std::vector<Value *> SortedKeys;
2078
2079 getSortedConstantKeys(SortedKeys, Map&: OldMap);
2080
2081 for (Value *RetVal : SortedKeys) {
2082 BasicBlock *NewBB = BasicBlock::Create(
2083 Context&: ParentFunc->getContext(), Name: Twine(BaseName) + Twine("_") + Twine(Idx++),
2084 Parent: ParentFunc);
2085 NewMap.insert(KV: std::make_pair(x&: RetVal, y&: NewBB));
2086 }
2087}
2088
2089/// Create the switch statement for outlined function to differentiate between
2090/// all the output blocks.
2091///
2092/// For the outlined section, determine if an outlined block already exists that
2093/// matches the needed stores for the extracted section.
2094/// \param [in] M - The module we are outlining from.
2095/// \param [in] OG - The group of regions to be outlined.
2096/// \param [in] EndBBs - The final blocks of the extracted function.
2097/// \param [in,out] OutputStoreBBs - The existing output blocks.
2098void createSwitchStatement(
2099 Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs,
2100 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2101 // We only need the switch statement if there is more than one store
2102 // combination, or there is more than one set of output blocks. The first
2103 // will occur when we store different sets of values for two different
2104 // regions. The second will occur when we have two outputs that are combined
2105 // in a PHINode outside of the region in one outlined instance, and are used
2106 // seaparately in another. This will create the same set of OutputGVNs, but
2107 // will generate two different output schemes.
2108 if (OG.OutputGVNCombinations.size() > 1) {
2109 Function *AggFunc = OG.OutlinedFunction;
2110 // Create a final block for each different return block.
2111 DenseMap<Value *, BasicBlock *> ReturnBBs;
2112 createAndInsertBasicBlocks(OldMap&: OG.EndBBs, NewMap&: ReturnBBs, ParentFunc: AggFunc, BaseName: "final_block");
2113
2114 for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
2115 std::pair<Value *, BasicBlock *> &OutputBlock =
2116 *OG.EndBBs.find(Val: RetBlockPair.first);
2117 BasicBlock *ReturnBlock = RetBlockPair.second;
2118 BasicBlock *EndBB = OutputBlock.second;
2119 Instruction *Term = EndBB->getTerminator();
2120 // Move the return value to the final block instead of the original exit
2121 // stub.
2122 Term->moveBefore(BB&: *ReturnBlock, I: ReturnBlock->end());
2123 // Put the switch statement in the old end basic block for the function
2124 // with a fall through to the new return block.
2125 LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
2126 << OutputStoreBBs.size() << "\n");
2127 SwitchInst *SwitchI =
2128 SwitchInst::Create(Value: AggFunc->getArg(i: AggFunc->arg_size() - 1),
2129 Default: ReturnBlock, NumCases: OutputStoreBBs.size(), InsertBefore: EndBB);
2130
2131 unsigned Idx = 0;
2132 for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
2133 DenseMap<Value *, BasicBlock *>::iterator OSBBIt =
2134 OutputStoreBB.find(Val: OutputBlock.first);
2135
2136 if (OSBBIt == OutputStoreBB.end())
2137 continue;
2138
2139 BasicBlock *BB = OSBBIt->second;
2140 SwitchI->addCase(
2141 OnVal: ConstantInt::get(Ty: Type::getInt32Ty(C&: M.getContext()), V: Idx), Dest: BB);
2142 Term = BB->getTerminator();
2143 Term->setSuccessor(Idx: 0, BB: ReturnBlock);
2144 Idx++;
2145 }
2146 }
2147 return;
2148 }
2149
2150 assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
2151
2152 // If there needs to be stores, move them from the output blocks to their
2153 // corresponding ending block. We do not check that the OutputGVNCombinations
2154 // is equal to 1 here since that could just been the case where there are 0
2155 // outputs. Instead, we check whether there is more than one set of output
2156 // blocks since this is the only case where we would have to move the
2157 // stores, and erase the extraneous blocks.
2158 if (OutputStoreBBs.size() == 1) {
2159 LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
2160 << *OG.OutlinedFunction << "\n");
2161 DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
2162 for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
2163 DenseMap<Value *, BasicBlock *>::iterator EndBBIt =
2164 EndBBs.find(Val: VBPair.first);
2165 assert(EndBBIt != EndBBs.end() && "Could not find end block");
2166 BasicBlock *EndBB = EndBBIt->second;
2167 BasicBlock *OutputBB = VBPair.second;
2168 Instruction *Term = OutputBB->getTerminator();
2169 Term->eraseFromParent();
2170 Term = EndBB->getTerminator();
2171 moveBBContents(SourceBB&: *OutputBB, TargetBB&: *EndBB);
2172 Term->moveBefore(BB&: *EndBB, I: EndBB->end());
2173 OutputBB->eraseFromParent();
2174 }
2175 }
2176}
2177
2178void IROutliner::fillOverallFunction(
2179 Module &M, OutlinableGroup &CurrentGroup,
2180 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
2181 std::vector<Function *> &FuncsToRemove) {
2182 OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
2183
2184 TargetTransformInfo &TTI = getTTI(*CurrentOS->StartBB->getParent());
2185
2186 // Move first extracted function's instructions into new function.
2187 LLVM_DEBUG(dbgs() << "Move instructions from "
2188 << *CurrentOS->ExtractedFunction << " to instruction "
2189 << *CurrentGroup.OutlinedFunction << "\n");
2190 moveFunctionData(Old&: *CurrentOS->ExtractedFunction,
2191 New&: *CurrentGroup.OutlinedFunction, NewEnds&: CurrentGroup.EndBBs);
2192
2193 // Transfer the attributes from the function to the new function.
2194 for (Attribute A :
2195 CurrentOS->ExtractedFunction->getAttributes().getFnAttrs()) {
2196 if (!TTI.shouldCopyAttributeWhenOutliningFrom(Caller: CurrentOS->ExtractedFunction,
2197 Attr: A))
2198 continue;
2199
2200 CurrentGroup.OutlinedFunction->addFnAttr(Attr: A);
2201 }
2202
2203 // Create a new set of output blocks for the first extracted function.
2204 DenseMap<Value *, BasicBlock *> NewBBs;
2205 createAndInsertBasicBlocks(OldMap&: CurrentGroup.EndBBs, NewMap&: NewBBs,
2206 ParentFunc: CurrentGroup.OutlinedFunction, BaseName: "output_block_0");
2207 CurrentOS->OutputBlockNum = 0;
2208
2209 replaceArgumentUses(Region&: *CurrentOS, OutputBBs&: NewBBs, OutputMappings, FirstFunction: true);
2210 replaceConstants(Region&: *CurrentOS);
2211
2212 // We first identify if any output blocks are empty, if they are we remove
2213 // them. We then create a branch instruction to the basic block to the return
2214 // block for the function for each non empty output block.
2215 if (!analyzeAndPruneOutputBlocks(BlocksToPrune&: NewBBs, Region&: *CurrentOS)) {
2216 OutputStoreBBs.push_back(x: DenseMap<Value *, BasicBlock *>());
2217 for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
2218 DenseMap<Value *, BasicBlock *>::iterator VBBIt =
2219 CurrentGroup.EndBBs.find(Val: VToBB.first);
2220 BasicBlock *EndBB = VBBIt->second;
2221 UncondBrInst::Create(IfTrue: EndBB, InsertBefore: VToBB.second);
2222 OutputStoreBBs.back().insert(KV: VToBB);
2223 }
2224 }
2225
2226 // Replace the call to the extracted function with the outlined function.
2227 CurrentOS->Call = replaceCalledFunction(M, Region&: *CurrentOS);
2228
2229 // We only delete the extracted functions at the end since we may need to
2230 // reference instructions contained in them for mapping purposes.
2231 FuncsToRemove.push_back(x: CurrentOS->ExtractedFunction);
2232}
2233
2234void IROutliner::deduplicateExtractedSections(
2235 Module &M, OutlinableGroup &CurrentGroup,
2236 std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
2237 createFunction(M, Group&: CurrentGroup, FunctionNameSuffix: OutlinedFunctionNum);
2238
2239 std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
2240
2241 OutlinableRegion *CurrentOS;
2242
2243 fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
2244
2245 for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
2246 CurrentOS = CurrentGroup.Regions[Idx];
2247 AttributeFuncs::mergeAttributesForOutlining(Base&: *CurrentGroup.OutlinedFunction,
2248 ToMerge: *CurrentOS->ExtractedFunction);
2249
2250 // Create a set of BasicBlocks, one for each return block, to hold the
2251 // needed store instructions.
2252 DenseMap<Value *, BasicBlock *> NewBBs;
2253 createAndInsertBasicBlocks(OldMap&: CurrentGroup.EndBBs, NewMap&: NewBBs,
2254 ParentFunc: CurrentGroup.OutlinedFunction,
2255 BaseName: "output_block_" + Twine(Idx));
2256 replaceArgumentUses(Region&: *CurrentOS, OutputBBs&: NewBBs, OutputMappings);
2257 alignOutputBlockWithAggFunc(OG&: CurrentGroup, Region&: *CurrentOS, OutputBBs&: NewBBs,
2258 EndBBs&: CurrentGroup.EndBBs, OutputMappings,
2259 OutputStoreBBs);
2260
2261 CurrentOS->Call = replaceCalledFunction(M, Region&: *CurrentOS);
2262 FuncsToRemove.push_back(x: CurrentOS->ExtractedFunction);
2263 }
2264
2265 // Create a switch statement to handle the different output schemes.
2266 createSwitchStatement(M, OG&: CurrentGroup, EndBBs&: CurrentGroup.EndBBs, OutputStoreBBs);
2267
2268 OutlinedFunctionNum++;
2269}
2270
2271/// Checks that the next instruction in the InstructionDataList matches the
2272/// next instruction in the module. If they do not, there could be the
2273/// possibility that extra code has been inserted, and we must ignore it.
2274///
2275/// \param ID - The IRInstructionData to check the next instruction of.
2276/// \returns true if the InstructionDataList and actual instruction match.
2277static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) {
2278 // We check if there is a discrepancy between the InstructionDataList
2279 // and the actual next instruction in the module. If there is, it means
2280 // that an extra instruction was added, likely by the CodeExtractor.
2281
2282 // Since we do not have any similarity data about this particular
2283 // instruction, we cannot confidently outline it, and must discard this
2284 // candidate.
2285 IRInstructionDataList::iterator NextIDIt = std::next(x: ID.getIterator());
2286 Instruction *NextIDLInst = NextIDIt->Inst;
2287 Instruction *NextModuleInst = nullptr;
2288 if (!ID.Inst->isTerminator())
2289 NextModuleInst = ID.Inst->getNextNode();
2290 else if (NextIDLInst != nullptr)
2291 NextModuleInst =
2292 &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
2293
2294 if (NextIDLInst && NextIDLInst != NextModuleInst)
2295 return false;
2296
2297 return true;
2298}
2299
2300bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
2301 const OutlinableRegion &Region) {
2302 IRSimilarityCandidate *IRSC = Region.Candidate;
2303 unsigned StartIdx = IRSC->getStartIdx();
2304 unsigned EndIdx = IRSC->getEndIdx();
2305
2306 // A check to make sure that we are not about to attempt to outline something
2307 // that has already been outlined.
2308 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2309 if (Outlined.contains(V: Idx))
2310 return false;
2311
2312 // We check if the recorded instruction matches the actual next instruction,
2313 // if it does not, we fix it in the InstructionDataList.
2314 if (!Region.Candidate->backInstruction()->isTerminator()) {
2315 Instruction *NewEndInst =
2316 Region.Candidate->backInstruction()->getNextNode();
2317 assert(NewEndInst && "Next instruction is a nullptr?");
2318 if (Region.Candidate->end()->Inst != NewEndInst) {
2319 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2320 IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
2321 IRInstructionData(*NewEndInst,
2322 InstructionClassifier.visit(I&: *NewEndInst), *IDL);
2323
2324 // Insert the first IRInstructionData of the new region after the
2325 // last IRInstructionData of the IRSimilarityCandidate.
2326 IDL->insert(I: Region.Candidate->end(), Node&: *NewEndIRID);
2327 }
2328 }
2329
2330 return none_of(Range&: *IRSC, P: [this](IRInstructionData &ID) {
2331 if (!nextIRInstructionDataMatchesNextInst(ID))
2332 return true;
2333
2334 return !this->InstructionClassifier.visit(I: ID.Inst);
2335 });
2336}
2337
2338void IROutliner::pruneIncompatibleRegions(
2339 std::vector<IRSimilarityCandidate> &CandidateVec,
2340 OutlinableGroup &CurrentGroup) {
2341 bool PreviouslyOutlined;
2342
2343 // Sort from beginning to end, so the IRSimilarityCandidates are in order.
2344 stable_sort(Range&: CandidateVec, C: [](const IRSimilarityCandidate &LHS,
2345 const IRSimilarityCandidate &RHS) {
2346 return LHS.getStartIdx() < RHS.getStartIdx();
2347 });
2348
2349 IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
2350 // Since outlining a call and a branch instruction will be the same as only
2351 // outlinining a call instruction, we ignore it as a space saving.
2352 if (FirstCandidate.getLength() == 2) {
2353 if (isa<CallInst>(Val: FirstCandidate.front()->Inst) &&
2354 isa<UncondBrInst, CondBrInst>(Val: FirstCandidate.back()->Inst))
2355 return;
2356 }
2357
2358 unsigned CurrentEndIdx = 0;
2359 for (IRSimilarityCandidate &IRSC : CandidateVec) {
2360 PreviouslyOutlined = false;
2361 unsigned StartIdx = IRSC.getStartIdx();
2362 unsigned EndIdx = IRSC.getEndIdx();
2363 const Function &FnForCurrCand = *IRSC.getFunction();
2364
2365 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2366 if (Outlined.contains(V: Idx)) {
2367 PreviouslyOutlined = true;
2368 break;
2369 }
2370
2371 if (PreviouslyOutlined)
2372 continue;
2373
2374 // Check over the instructions, and if the basic block has its address
2375 // taken for use somewhere else, we do not outline that block.
2376 bool BBHasAddressTaken = any_of(Range&: IRSC, P: [](IRInstructionData &ID){
2377 return ID.Inst->getParent()->hasAddressTaken();
2378 });
2379
2380 if (BBHasAddressTaken)
2381 continue;
2382
2383 if (FnForCurrCand.hasOptNone())
2384 continue;
2385
2386 if (FnForCurrCand.hasFnAttribute(Kind: Attribute::NoOutline)) {
2387 LLVM_DEBUG({
2388 dbgs() << "... Skipping function with nooutline attribute: "
2389 << FnForCurrCand.getName() << "\n";
2390 });
2391 continue;
2392 }
2393
2394 if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
2395 !OutlineFromLinkODRs)
2396 continue;
2397
2398 // Greedily prune out any regions that will overlap with already chosen
2399 // regions.
2400 if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
2401 continue;
2402
2403 bool BadInst = any_of(Range&: IRSC, P: [this](IRInstructionData &ID) {
2404 if (!nextIRInstructionDataMatchesNextInst(ID))
2405 return true;
2406
2407 return !this->InstructionClassifier.visit(I: ID.Inst);
2408 });
2409
2410 if (BadInst)
2411 continue;
2412
2413 OutlinableRegion *OS = new (RegionAllocator.Allocate())
2414 OutlinableRegion(IRSC, CurrentGroup);
2415 CurrentGroup.Regions.push_back(x: OS);
2416
2417 CurrentEndIdx = EndIdx;
2418 }
2419}
2420
2421InstructionCost
2422IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
2423 InstructionCost RegionBenefit = 0;
2424 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2425 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2426 // We add the number of instructions in the region to the benefit as an
2427 // estimate as to how much will be removed.
2428 RegionBenefit += Region->getBenefit(TTI);
2429 LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
2430 << " saved instructions to overfall benefit.\n");
2431 }
2432
2433 return RegionBenefit;
2434}
2435
2436/// For the \p OutputCanon number passed in find the value represented by this
2437/// canonical number. If it is from a PHINode, we pick the first incoming
2438/// value and return that Value instead.
2439///
2440/// \param Region - The OutlinableRegion to get the Value from.
2441/// \param OutputCanon - The canonical number to find the Value from.
2442/// \returns The Value represented by a canonical number \p OutputCanon in \p
2443/// Region.
2444static Value *findOutputValueInRegion(OutlinableRegion &Region,
2445 unsigned OutputCanon) {
2446 OutlinableGroup &CurrentGroup = *Region.Parent;
2447 // If the value is greater than the value in the tracker, we have a
2448 // PHINode and will instead use one of the incoming values to find the
2449 // type.
2450 if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
2451 auto It = CurrentGroup.PHINodeGVNToGVNs.find(Val: OutputCanon);
2452 assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
2453 "Could not find GVN set for PHINode number!");
2454 assert(It->second.second.size() > 0 && "PHINode does not have any values!");
2455 OutputCanon = *It->second.second.begin();
2456 }
2457 std::optional<unsigned> OGVN =
2458 Region.Candidate->fromCanonicalNum(N: OutputCanon);
2459 assert(OGVN && "Could not find GVN for Canonical Number?");
2460 std::optional<Value *> OV = Region.Candidate->fromGVN(Num: *OGVN);
2461 assert(OV && "Could not find value for GVN?");
2462 return *OV;
2463}
2464
2465InstructionCost
2466IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
2467 InstructionCost OverallCost = 0;
2468 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2469 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2470
2471 // Each output incurs a load after the call, so we add that to the cost.
2472 for (unsigned OutputCanon : Region->GVNStores) {
2473 Value *V = findOutputValueInRegion(Region&: *Region, OutputCanon);
2474 InstructionCost LoadCost =
2475 TTI.getMemoryOpCost(Opcode: Instruction::Load, Src: V->getType(), Alignment: Align(1), AddressSpace: 0,
2476 CostKind: TargetTransformInfo::TCK_CodeSize);
2477
2478 LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
2479 << " instructions to cost for output of type "
2480 << *V->getType() << "\n");
2481 OverallCost += LoadCost;
2482 }
2483 }
2484
2485 return OverallCost;
2486}
2487
2488/// Find the extra instructions needed to handle any output values for the
2489/// region.
2490///
2491/// \param [in] M - The Module to outline from.
2492/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
2493/// \param [in] TTI - The TargetTransformInfo used to collect information for
2494/// new instruction costs.
2495/// \returns the additional cost to handle the outputs.
2496static InstructionCost findCostForOutputBlocks(Module &M,
2497 OutlinableGroup &CurrentGroup,
2498 TargetTransformInfo &TTI) {
2499 InstructionCost OutputCost = 0;
2500 unsigned NumOutputBranches = 0;
2501
2502 OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
2503 IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
2504 DenseSet<BasicBlock *> CandidateBlocks;
2505 Candidate.getBasicBlocks(BBSet&: CandidateBlocks);
2506
2507 // Count the number of different output branches that point to blocks outside
2508 // of the region.
2509 DenseSet<BasicBlock *> FoundBlocks;
2510 for (IRInstructionData &ID : Candidate) {
2511 if (!isa<UncondBrInst, CondBrInst>(Val: ID.Inst))
2512 continue;
2513
2514 for (Value *V : ID.OperVals) {
2515 BasicBlock *BB = static_cast<BasicBlock *>(V);
2516 if (!CandidateBlocks.contains(V: BB) && FoundBlocks.insert(V: BB).second)
2517 NumOutputBranches++;
2518 }
2519 }
2520
2521 CurrentGroup.BranchesToOutside = NumOutputBranches;
2522
2523 for (const ArrayRef<unsigned> &OutputUse :
2524 CurrentGroup.OutputGVNCombinations) {
2525 for (unsigned OutputCanon : OutputUse) {
2526 Value *V = findOutputValueInRegion(Region&: FirstRegion, OutputCanon);
2527 InstructionCost StoreCost =
2528 TTI.getMemoryOpCost(Opcode: Instruction::Load, Src: V->getType(), Alignment: Align(1), AddressSpace: 0,
2529 CostKind: TargetTransformInfo::TCK_CodeSize);
2530
2531 // An instruction cost is added for each store set that needs to occur for
2532 // various output combinations inside the function, plus a branch to
2533 // return to the exit block.
2534 LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
2535 << " instructions to cost for output of type "
2536 << *V->getType() << "\n");
2537 OutputCost += StoreCost * NumOutputBranches;
2538 }
2539
2540 InstructionCost BranchCost = TTI.getCFInstrCost(
2541 Opcode: Instruction::UncondBr, CostKind: TargetTransformInfo::TCK_CodeSize);
2542 LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
2543 << " a branch instruction\n");
2544 OutputCost += BranchCost * NumOutputBranches;
2545 }
2546
2547 // If there is more than one output scheme, we must have a comparison and
2548 // branch for each different item in the switch statement.
2549 if (CurrentGroup.OutputGVNCombinations.size() > 1) {
2550 InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
2551 Opcode: Instruction::ICmp, ValTy: Type::getInt32Ty(C&: M.getContext()),
2552 CondTy: Type::getInt32Ty(C&: M.getContext()), VecPred: CmpInst::BAD_ICMP_PREDICATE,
2553 CostKind: TargetTransformInfo::TCK_CodeSize);
2554 InstructionCost BranchCost = TTI.getCFInstrCost(
2555 Opcode: Instruction::CondBr, CostKind: TargetTransformInfo::TCK_CodeSize);
2556
2557 unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
2558 InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
2559
2560 LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
2561 << " instructions for each switch case for each different"
2562 << " output path in a function\n");
2563 OutputCost += TotalCost * NumOutputBranches;
2564 }
2565
2566 return OutputCost;
2567}
2568
2569void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
2570 InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
2571 CurrentGroup.Benefit += RegionBenefit;
2572 LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
2573
2574 InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
2575 CurrentGroup.Cost += OutputReloadCost;
2576 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2577
2578 InstructionCost AverageRegionBenefit =
2579 RegionBenefit / CurrentGroup.Regions.size();
2580 unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
2581 unsigned NumRegions = CurrentGroup.Regions.size();
2582 TargetTransformInfo &TTI =
2583 getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
2584
2585 // We add one region to the cost once, to account for the instructions added
2586 // inside of the newly created function.
2587 LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
2588 << " instructions to cost for body of new function.\n");
2589 CurrentGroup.Cost += AverageRegionBenefit;
2590 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2591
2592 // For each argument, we must add an instruction for loading the argument
2593 // out of the register and into a value inside of the newly outlined function.
2594 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2595 << " instructions to cost for each argument in the new"
2596 << " function.\n");
2597 CurrentGroup.Cost +=
2598 OverallArgumentNum * TargetTransformInfo::TCC_Basic;
2599 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2600
2601 // Each argument needs to either be loaded into a register or onto the stack.
2602 // Some arguments will only be loaded into the stack once the argument
2603 // registers are filled.
2604 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2605 << " instructions to cost for each argument in the new"
2606 << " function " << NumRegions << " times for the "
2607 << "needed argument handling at the call site.\n");
2608 CurrentGroup.Cost +=
2609 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
2610 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2611
2612 CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
2613 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2614}
2615
2616void IROutliner::updateOutputMapping(OutlinableRegion &Region,
2617 ArrayRef<Value *> Outputs,
2618 LoadInst *LI) {
2619 // For and load instructions following the call
2620 Value *Operand = LI->getPointerOperand();
2621 std::optional<unsigned> OutputIdx;
2622 // Find if the operand it is an output register.
2623 for (unsigned ArgIdx = Region.NumExtractedInputs;
2624 ArgIdx < Region.Call->arg_size(); ArgIdx++) {
2625 if (Operand == Region.Call->getArgOperand(i: ArgIdx)) {
2626 OutputIdx = ArgIdx - Region.NumExtractedInputs;
2627 break;
2628 }
2629 }
2630
2631 // If we found an output register, place a mapping of the new value
2632 // to the original in the mapping.
2633 if (!OutputIdx)
2634 return;
2635
2636 auto It = OutputMappings.find(Val: Outputs[*OutputIdx]);
2637 if (It == OutputMappings.end()) {
2638 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
2639 << *Outputs[*OutputIdx] << "\n");
2640 OutputMappings.insert(KV: std::make_pair(x&: LI, y: Outputs[*OutputIdx]));
2641 } else {
2642 Value *Orig = It->second;
2643 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
2644 << *Outputs[*OutputIdx] << "\n");
2645 OutputMappings.insert(KV: std::make_pair(x&: LI, y&: Orig));
2646 }
2647}
2648
2649bool IROutliner::extractSection(OutlinableRegion &Region) {
2650 SetVector<Value *> ArgInputs, Outputs;
2651 assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
2652 BasicBlock *InitialStart = Region.StartBB;
2653 Function *OrigF = Region.StartBB->getParent();
2654 CodeExtractorAnalysisCache CEAC(*OrigF);
2655 Region.ExtractedFunction =
2656 Region.CE->extractCodeRegion(CEAC, Inputs&: ArgInputs, Outputs);
2657
2658 // If the extraction was successful, find the BasicBlock, and reassign the
2659 // OutlinableRegion blocks
2660 if (!Region.ExtractedFunction) {
2661 LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
2662 << "\n");
2663 Region.reattachCandidate();
2664 return false;
2665 }
2666
2667 // Get the block containing the called branch, and reassign the blocks as
2668 // necessary. If the original block still exists, it is because we ended on
2669 // a branch instruction, and so we move the contents into the block before
2670 // and assign the previous block correctly.
2671 User *InstAsUser = Region.ExtractedFunction->user_back();
2672 BasicBlock *RewrittenBB = cast<Instruction>(Val: InstAsUser)->getParent();
2673 Region.PrevBB = RewrittenBB->getSinglePredecessor();
2674 assert(Region.PrevBB && "PrevBB is nullptr?");
2675 if (Region.PrevBB == InitialStart) {
2676 BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
2677 Instruction *BI = NewPrev->getTerminator();
2678 BI->eraseFromParent();
2679 moveBBContents(SourceBB&: *InitialStart, TargetBB&: *NewPrev);
2680 Region.PrevBB = NewPrev;
2681 InitialStart->eraseFromParent();
2682 }
2683
2684 Region.StartBB = RewrittenBB;
2685 Region.EndBB = RewrittenBB;
2686
2687 // The sequences of outlinable regions has now changed. We must fix the
2688 // IRInstructionDataList for consistency. Although they may not be illegal
2689 // instructions, they should not be compared with anything else as they
2690 // should not be outlined in this round. So marking these as illegal is
2691 // allowed.
2692 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2693 Instruction *BeginRewritten = &*RewrittenBB->begin();
2694 Instruction *EndRewritten = &*RewrittenBB->begin();
2695 Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
2696 *BeginRewritten, InstructionClassifier.visit(I&: *BeginRewritten), *IDL);
2697 Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
2698 *EndRewritten, InstructionClassifier.visit(I&: *EndRewritten), *IDL);
2699
2700 // Insert the first IRInstructionData of the new region in front of the
2701 // first IRInstructionData of the IRSimilarityCandidate.
2702 IDL->insert(I: Region.Candidate->begin(), Node&: *Region.NewFront);
2703 // Insert the first IRInstructionData of the new region after the
2704 // last IRInstructionData of the IRSimilarityCandidate.
2705 IDL->insert(I: Region.Candidate->end(), Node&: *Region.NewBack);
2706 // Remove the IRInstructionData from the IRSimilarityCandidate.
2707 IDL->erase(First: Region.Candidate->begin(), Last: std::prev(x: Region.Candidate->end()));
2708
2709 assert(RewrittenBB != nullptr &&
2710 "Could not find a predecessor after extraction!");
2711
2712 // Iterate over the new set of instructions to find the new call
2713 // instruction.
2714 for (Instruction &I : *RewrittenBB)
2715 if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) {
2716 if (Region.ExtractedFunction == CI->getCalledFunction())
2717 Region.Call = CI;
2718 } else if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I))
2719 updateOutputMapping(Region, Outputs: Outputs.getArrayRef(), LI);
2720 Region.reattachCandidate();
2721 return true;
2722}
2723
2724unsigned IROutliner::doOutline(Module &M) {
2725 // Find the possible similarity sections.
2726 InstructionClassifier.EnableBranches = !DisableBranches;
2727 InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
2728 InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
2729
2730 IRSimilarityIdentifier &Identifier = getIRSI(M);
2731 SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
2732
2733 // Sort them by size of extracted sections
2734 unsigned OutlinedFunctionNum = 0;
2735 // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
2736 // to sort them by the potential number of instructions to be outlined
2737 if (SimilarityCandidates.size() > 1)
2738 llvm::stable_sort(Range&: SimilarityCandidates,
2739 C: [](const std::vector<IRSimilarityCandidate> &LHS,
2740 const std::vector<IRSimilarityCandidate> &RHS) {
2741 return LHS[0].getLength() * LHS.size() >
2742 RHS[0].getLength() * RHS.size();
2743 });
2744 // Creating OutlinableGroups for each SimilarityCandidate to be used in
2745 // each of the following for loops to avoid making an allocator.
2746 std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2747
2748 DenseSet<unsigned> NotSame;
2749 std::vector<OutlinableGroup *> NegativeCostGroups;
2750 std::vector<OutlinableRegion *> OutlinedRegions;
2751 // Iterate over the possible sets of similarity.
2752 unsigned PotentialGroupIdx = 0;
2753 for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2754 OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2755
2756 // Remove entries that were previously outlined
2757 pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2758
2759 // We pruned the number of regions to 0 to 1, meaning that it's not worth
2760 // trying to outlined since there is no compatible similar instance of this
2761 // code.
2762 if (CurrentGroup.Regions.size() < 2)
2763 continue;
2764
2765 // Determine if there are any values that are the same constant throughout
2766 // each section in the set.
2767 NotSame.clear();
2768 CurrentGroup.findSameConstants(NotSame);
2769
2770 if (CurrentGroup.IgnoreGroup)
2771 continue;
2772
2773 // Create a CodeExtractor for each outlinable region. Identify inputs and
2774 // outputs for each section using the code extractor and create the argument
2775 // types for the Aggregate Outlining Function.
2776 OutlinedRegions.clear();
2777 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2778 // Break the outlinable region out of its parent BasicBlock into its own
2779 // BasicBlocks (see function implementation).
2780 OS->splitCandidate();
2781
2782 // There's a chance that when the region is split, extra instructions are
2783 // added to the region. This makes the region no longer viable
2784 // to be split, so we ignore it for outlining.
2785 if (!OS->CandidateSplit)
2786 continue;
2787
2788 SmallVector<BasicBlock *> BE;
2789 DenseSet<BasicBlock *> BlocksInRegion;
2790 OS->Candidate->getBasicBlocks(BBSet&: BlocksInRegion, BBList&: BE);
2791 OS->CE = new (ExtractorAllocator.Allocate())
2792 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2793 false, nullptr, "outlined");
2794 findAddInputsOutputs(M, Region&: *OS, NotSame);
2795 if (!OS->IgnoreRegion)
2796 OutlinedRegions.push_back(x: OS);
2797
2798 // We recombine the blocks together now that we have gathered all the
2799 // needed information.
2800 OS->reattachCandidate();
2801 }
2802
2803 CurrentGroup.Regions = std::move(OutlinedRegions);
2804
2805 if (CurrentGroup.Regions.empty())
2806 continue;
2807
2808 CurrentGroup.collectGVNStoreSets(M);
2809
2810 if (CostModel)
2811 findCostBenefit(M, CurrentGroup);
2812
2813 // If we are adhering to the cost model, skip those groups where the cost
2814 // outweighs the benefits.
2815 if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2816 OptimizationRemarkEmitter &ORE =
2817 getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2818 ORE.emit(RemarkBuilder: [&]() {
2819 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2820 OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2821 C->frontInstruction());
2822 R << "did not outline "
2823 << ore::NV(std::to_string(val: CurrentGroup.Regions.size()))
2824 << " regions due to estimated increase of "
2825 << ore::NV("InstructionIncrease",
2826 CurrentGroup.Cost - CurrentGroup.Benefit)
2827 << " instructions at locations ";
2828 interleave(
2829 begin: CurrentGroup.Regions.begin(), end: CurrentGroup.Regions.end(),
2830 each_fn: [&R](OutlinableRegion *Region) {
2831 R << ore::NV(
2832 "DebugLoc",
2833 Region->Candidate->frontInstruction()->getDebugLoc());
2834 },
2835 between_fn: [&R]() { R << " "; });
2836 return R;
2837 });
2838 continue;
2839 }
2840
2841 NegativeCostGroups.push_back(x: &CurrentGroup);
2842 }
2843
2844 ExtractorAllocator.DestroyAll();
2845
2846 if (NegativeCostGroups.size() > 1)
2847 stable_sort(Range&: NegativeCostGroups,
2848 C: [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2849 return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2850 });
2851
2852 std::vector<Function *> FuncsToRemove;
2853 for (OutlinableGroup *CG : NegativeCostGroups) {
2854 OutlinableGroup &CurrentGroup = *CG;
2855
2856 OutlinedRegions.clear();
2857 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2858 // We check whether our region is compatible with what has already been
2859 // outlined, and whether we need to ignore this item.
2860 if (!isCompatibleWithAlreadyOutlinedCode(Region: *Region))
2861 continue;
2862 OutlinedRegions.push_back(x: Region);
2863 }
2864
2865 if (OutlinedRegions.size() < 2)
2866 continue;
2867
2868 // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2869 // we are still outlining enough regions to make up for the added cost.
2870 CurrentGroup.Regions = std::move(OutlinedRegions);
2871 if (CostModel) {
2872 CurrentGroup.Benefit = 0;
2873 CurrentGroup.Cost = 0;
2874 findCostBenefit(M, CurrentGroup);
2875 if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2876 continue;
2877 }
2878 OutlinedRegions.clear();
2879 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2880 Region->splitCandidate();
2881 if (!Region->CandidateSplit)
2882 continue;
2883 OutlinedRegions.push_back(x: Region);
2884 }
2885
2886 CurrentGroup.Regions = std::move(OutlinedRegions);
2887 if (CurrentGroup.Regions.size() < 2) {
2888 for (OutlinableRegion *R : CurrentGroup.Regions)
2889 R->reattachCandidate();
2890 continue;
2891 }
2892
2893 LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2894 << " and benefit " << CurrentGroup.Benefit << "\n");
2895
2896 // Create functions out of all the sections, and mark them as outlined.
2897 OutlinedRegions.clear();
2898 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2899 SmallVector<BasicBlock *> BE;
2900 DenseSet<BasicBlock *> BlocksInRegion;
2901 OS->Candidate->getBasicBlocks(BBSet&: BlocksInRegion, BBList&: BE);
2902 OS->CE = new (ExtractorAllocator.Allocate())
2903 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2904 false, nullptr, "outlined");
2905 bool FunctionOutlined = extractSection(Region&: *OS);
2906 if (FunctionOutlined) {
2907 unsigned StartIdx = OS->Candidate->getStartIdx();
2908 unsigned EndIdx = OS->Candidate->getEndIdx();
2909 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2910 Outlined.insert(V: Idx);
2911
2912 OutlinedRegions.push_back(x: OS);
2913 }
2914 }
2915
2916 LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2917 << " with benefit " << CurrentGroup.Benefit
2918 << " and cost " << CurrentGroup.Cost << "\n");
2919
2920 CurrentGroup.Regions = std::move(OutlinedRegions);
2921
2922 if (CurrentGroup.Regions.empty())
2923 continue;
2924
2925 OptimizationRemarkEmitter &ORE =
2926 getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2927 ORE.emit(RemarkBuilder: [&]() {
2928 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2929 OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2930 R << "outlined " << ore::NV(std::to_string(val: CurrentGroup.Regions.size()))
2931 << " regions with decrease of "
2932 << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2933 << " instructions at locations ";
2934 interleave(
2935 begin: CurrentGroup.Regions.begin(), end: CurrentGroup.Regions.end(),
2936 each_fn: [&R](OutlinableRegion *Region) {
2937 R << ore::NV("DebugLoc",
2938 Region->Candidate->frontInstruction()->getDebugLoc());
2939 },
2940 between_fn: [&R]() { R << " "; });
2941 return R;
2942 });
2943
2944 deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
2945 OutlinedFunctionNum);
2946 }
2947
2948 for (Function *F : FuncsToRemove)
2949 F->eraseFromParent();
2950
2951 return OutlinedFunctionNum;
2952}
2953
2954bool IROutliner::run(Module &M) {
2955 CostModel = !NoCostModel;
2956 OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
2957
2958 return doOutline(M) > 0;
2959}
2960
2961PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
2962 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(IR&: M).getManager();
2963
2964 std::function<TargetTransformInfo &(Function &)> GTTI =
2965 [&FAM](Function &F) -> TargetTransformInfo & {
2966 return FAM.getResult<TargetIRAnalysis>(IR&: F);
2967 };
2968
2969 std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
2970 [&AM](Module &M) -> IRSimilarityIdentifier & {
2971 return AM.getResult<IRSimilarityAnalysis>(IR&: M);
2972 };
2973
2974 std::unique_ptr<OptimizationRemarkEmitter> ORE;
2975 std::function<OptimizationRemarkEmitter &(Function &)> GORE =
2976 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2977 ORE.reset(p: new OptimizationRemarkEmitter(&F));
2978 return *ORE;
2979 };
2980
2981 if (IROutliner(GTTI, GIRSI, GORE).run(M))
2982 return PreservedAnalyses::none();
2983 return PreservedAnalyses::all();
2984}
2985