1 | //===- InlineFunction.cpp - Code to perform function inlining -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements inlining of a function into a call site, resolving |
10 | // parameters and the return value as appropriate. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/ADT/DenseMap.h" |
15 | #include "llvm/ADT/STLExtras.h" |
16 | #include "llvm/ADT/SetVector.h" |
17 | #include "llvm/ADT/SmallPtrSet.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/StringExtras.h" |
20 | #include "llvm/ADT/iterator_range.h" |
21 | #include "llvm/Analysis/AliasAnalysis.h" |
22 | #include "llvm/Analysis/AssumptionCache.h" |
23 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
24 | #include "llvm/Analysis/CallGraph.h" |
25 | #include "llvm/Analysis/CaptureTracking.h" |
26 | #include "llvm/Analysis/CtxProfAnalysis.h" |
27 | #include "llvm/Analysis/IndirectCallVisitor.h" |
28 | #include "llvm/Analysis/InstructionSimplify.h" |
29 | #include "llvm/Analysis/MemoryProfileInfo.h" |
30 | #include "llvm/Analysis/ObjCARCAnalysisUtils.h" |
31 | #include "llvm/Analysis/ObjCARCUtil.h" |
32 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
33 | #include "llvm/Analysis/ValueTracking.h" |
34 | #include "llvm/Analysis/VectorUtils.h" |
35 | #include "llvm/IR/Argument.h" |
36 | #include "llvm/IR/AttributeMask.h" |
37 | #include "llvm/IR/Attributes.h" |
38 | #include "llvm/IR/BasicBlock.h" |
39 | #include "llvm/IR/CFG.h" |
40 | #include "llvm/IR/Constant.h" |
41 | #include "llvm/IR/ConstantRange.h" |
42 | #include "llvm/IR/Constants.h" |
43 | #include "llvm/IR/DataLayout.h" |
44 | #include "llvm/IR/DebugInfo.h" |
45 | #include "llvm/IR/DebugInfoMetadata.h" |
46 | #include "llvm/IR/DebugLoc.h" |
47 | #include "llvm/IR/DerivedTypes.h" |
48 | #include "llvm/IR/Dominators.h" |
49 | #include "llvm/IR/EHPersonalities.h" |
50 | #include "llvm/IR/Function.h" |
51 | #include "llvm/IR/GlobalVariable.h" |
52 | #include "llvm/IR/IRBuilder.h" |
53 | #include "llvm/IR/InlineAsm.h" |
54 | #include "llvm/IR/InstrTypes.h" |
55 | #include "llvm/IR/Instruction.h" |
56 | #include "llvm/IR/Instructions.h" |
57 | #include "llvm/IR/IntrinsicInst.h" |
58 | #include "llvm/IR/Intrinsics.h" |
59 | #include "llvm/IR/LLVMContext.h" |
60 | #include "llvm/IR/MDBuilder.h" |
61 | #include "llvm/IR/Metadata.h" |
62 | #include "llvm/IR/Module.h" |
63 | #include "llvm/IR/PatternMatch.h" |
64 | #include "llvm/IR/ProfDataUtils.h" |
65 | #include "llvm/IR/Type.h" |
66 | #include "llvm/IR/User.h" |
67 | #include "llvm/IR/Value.h" |
68 | #include "llvm/Support/Casting.h" |
69 | #include "llvm/Support/CommandLine.h" |
70 | #include "llvm/Support/ErrorHandling.h" |
71 | #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" |
72 | #include "llvm/Transforms/Utils/Cloning.h" |
73 | #include "llvm/Transforms/Utils/Local.h" |
74 | #include "llvm/Transforms/Utils/ValueMapper.h" |
75 | #include <algorithm> |
76 | #include <cassert> |
77 | #include <cstdint> |
78 | #include <deque> |
79 | #include <iterator> |
80 | #include <limits> |
81 | #include <optional> |
82 | #include <string> |
83 | #include <utility> |
84 | #include <vector> |
85 | |
86 | #define DEBUG_TYPE "inline-function" |
87 | |
88 | using namespace llvm; |
89 | using namespace llvm::memprof; |
90 | using ProfileCount = Function::ProfileCount; |
91 | |
92 | static cl::opt<bool> |
93 | EnableNoAliasConversion("enable-noalias-to-md-conversion" , cl::init(Val: true), |
94 | cl::Hidden, |
95 | cl::desc("Convert noalias attributes to metadata during inlining." )); |
96 | |
97 | static cl::opt<bool> |
98 | UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining" , cl::Hidden, |
99 | cl::init(Val: true), |
100 | cl::desc("Use the llvm.experimental.noalias.scope.decl " |
101 | "intrinsic during inlining." )); |
102 | |
103 | // Disabled by default, because the added alignment assumptions may increase |
104 | // compile-time and block optimizations. This option is not suitable for use |
105 | // with frontends that emit comprehensive parameter alignment annotations. |
106 | static cl::opt<bool> |
107 | PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining" , |
108 | cl::init(Val: false), cl::Hidden, |
109 | cl::desc("Convert align attributes to assumptions during inlining." )); |
110 | |
111 | static cl::opt<unsigned> InlinerAttributeWindow( |
112 | "max-inst-checked-for-throw-during-inlining" , cl::Hidden, |
113 | cl::desc("the maximum number of instructions analyzed for may throw during " |
114 | "attribute inference in inlined body" ), |
115 | cl::init(Val: 4)); |
116 | |
117 | namespace { |
118 | |
119 | /// A class for recording information about inlining a landing pad. |
120 | class LandingPadInliningInfo { |
121 | /// Destination of the invoke's unwind. |
122 | BasicBlock *OuterResumeDest; |
123 | |
124 | /// Destination for the callee's resume. |
125 | BasicBlock *InnerResumeDest = nullptr; |
126 | |
127 | /// LandingPadInst associated with the invoke. |
128 | LandingPadInst *CallerLPad = nullptr; |
129 | |
130 | /// PHI for EH values from landingpad insts. |
131 | PHINode *InnerEHValuesPHI = nullptr; |
132 | |
133 | SmallVector<Value*, 8> UnwindDestPHIValues; |
134 | |
135 | public: |
136 | LandingPadInliningInfo(InvokeInst *II) |
137 | : OuterResumeDest(II->getUnwindDest()) { |
138 | // If there are PHI nodes in the unwind destination block, we need to keep |
139 | // track of which values came into them from the invoke before removing |
140 | // the edge from this block. |
141 | BasicBlock *InvokeBB = II->getParent(); |
142 | BasicBlock::iterator I = OuterResumeDest->begin(); |
143 | for (; isa<PHINode>(Val: I); ++I) { |
144 | // Save the value to use for this edge. |
145 | PHINode *PHI = cast<PHINode>(Val&: I); |
146 | UnwindDestPHIValues.push_back(Elt: PHI->getIncomingValueForBlock(BB: InvokeBB)); |
147 | } |
148 | |
149 | CallerLPad = cast<LandingPadInst>(Val&: I); |
150 | } |
151 | |
152 | /// The outer unwind destination is the target of |
153 | /// unwind edges introduced for calls within the inlined function. |
154 | BasicBlock *getOuterResumeDest() const { |
155 | return OuterResumeDest; |
156 | } |
157 | |
158 | BasicBlock *getInnerResumeDest(); |
159 | |
160 | LandingPadInst *getLandingPadInst() const { return CallerLPad; } |
161 | |
162 | /// Forward the 'resume' instruction to the caller's landing pad block. |
163 | /// When the landing pad block has only one predecessor, this is |
164 | /// a simple branch. When there is more than one predecessor, we need to |
165 | /// split the landing pad block after the landingpad instruction and jump |
166 | /// to there. |
167 | void forwardResume(ResumeInst *RI, |
168 | SmallPtrSetImpl<LandingPadInst*> &InlinedLPads); |
169 | |
170 | /// Add incoming-PHI values to the unwind destination block for the given |
171 | /// basic block, using the values for the original invoke's source block. |
172 | void addIncomingPHIValuesFor(BasicBlock *BB) const { |
173 | addIncomingPHIValuesForInto(src: BB, dest: OuterResumeDest); |
174 | } |
175 | |
176 | void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { |
177 | BasicBlock::iterator I = dest->begin(); |
178 | for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { |
179 | PHINode *phi = cast<PHINode>(Val&: I); |
180 | phi->addIncoming(V: UnwindDestPHIValues[i], BB: src); |
181 | } |
182 | } |
183 | }; |
184 | } // end anonymous namespace |
185 | |
186 | static IntrinsicInst *getConvergenceEntry(BasicBlock &BB) { |
187 | BasicBlock::iterator It = BB.getFirstNonPHIIt(); |
188 | while (It != BB.end()) { |
189 | if (auto *IntrinsicCall = dyn_cast<ConvergenceControlInst>(Val&: It)) { |
190 | if (IntrinsicCall->isEntry()) { |
191 | return IntrinsicCall; |
192 | } |
193 | } |
194 | It = std::next(x: It); |
195 | } |
196 | return nullptr; |
197 | } |
198 | |
199 | /// Get or create a target for the branch from ResumeInsts. |
200 | BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { |
201 | if (InnerResumeDest) return InnerResumeDest; |
202 | |
203 | // Split the landing pad. |
204 | BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); |
205 | InnerResumeDest = |
206 | OuterResumeDest->splitBasicBlock(I: SplitPoint, |
207 | BBName: OuterResumeDest->getName() + ".body" ); |
208 | |
209 | // The number of incoming edges we expect to the inner landing pad. |
210 | const unsigned PHICapacity = 2; |
211 | |
212 | // Create corresponding new PHIs for all the PHIs in the outer landing pad. |
213 | BasicBlock::iterator InsertPoint = InnerResumeDest->begin(); |
214 | BasicBlock::iterator I = OuterResumeDest->begin(); |
215 | for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { |
216 | PHINode *OuterPHI = cast<PHINode>(Val&: I); |
217 | PHINode *InnerPHI = PHINode::Create(Ty: OuterPHI->getType(), NumReservedValues: PHICapacity, |
218 | NameStr: OuterPHI->getName() + ".lpad-body" ); |
219 | InnerPHI->insertBefore(InsertPos: InsertPoint); |
220 | OuterPHI->replaceAllUsesWith(V: InnerPHI); |
221 | InnerPHI->addIncoming(V: OuterPHI, BB: OuterResumeDest); |
222 | } |
223 | |
224 | // Create a PHI for the exception values. |
225 | InnerEHValuesPHI = |
226 | PHINode::Create(Ty: CallerLPad->getType(), NumReservedValues: PHICapacity, NameStr: "eh.lpad-body" ); |
227 | InnerEHValuesPHI->insertBefore(InsertPos: InsertPoint); |
228 | CallerLPad->replaceAllUsesWith(V: InnerEHValuesPHI); |
229 | InnerEHValuesPHI->addIncoming(V: CallerLPad, BB: OuterResumeDest); |
230 | |
231 | // All done. |
232 | return InnerResumeDest; |
233 | } |
234 | |
235 | /// Forward the 'resume' instruction to the caller's landing pad block. |
236 | /// When the landing pad block has only one predecessor, this is a simple |
237 | /// branch. When there is more than one predecessor, we need to split the |
238 | /// landing pad block after the landingpad instruction and jump to there. |
239 | void LandingPadInliningInfo::forwardResume( |
240 | ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) { |
241 | BasicBlock *Dest = getInnerResumeDest(); |
242 | BasicBlock *Src = RI->getParent(); |
243 | |
244 | auto *BI = BranchInst::Create(IfTrue: Dest, InsertBefore: Src); |
245 | BI->setDebugLoc(RI->getDebugLoc()); |
246 | |
247 | // Update the PHIs in the destination. They were inserted in an order which |
248 | // makes this work. |
249 | addIncomingPHIValuesForInto(src: Src, dest: Dest); |
250 | |
251 | InnerEHValuesPHI->addIncoming(V: RI->getOperand(i_nocapture: 0), BB: Src); |
252 | RI->eraseFromParent(); |
253 | } |
254 | |
255 | /// Helper for getUnwindDestToken/getUnwindDestTokenHelper. |
256 | static Value *getParentPad(Value *EHPad) { |
257 | if (auto *FPI = dyn_cast<FuncletPadInst>(Val: EHPad)) |
258 | return FPI->getParentPad(); |
259 | return cast<CatchSwitchInst>(Val: EHPad)->getParentPad(); |
260 | } |
261 | |
262 | using UnwindDestMemoTy = DenseMap<Instruction *, Value *>; |
263 | |
264 | /// Helper for getUnwindDestToken that does the descendant-ward part of |
265 | /// the search. |
266 | static Value *getUnwindDestTokenHelper(Instruction *EHPad, |
267 | UnwindDestMemoTy &MemoMap) { |
268 | SmallVector<Instruction *, 8> Worklist(1, EHPad); |
269 | |
270 | while (!Worklist.empty()) { |
271 | Instruction *CurrentPad = Worklist.pop_back_val(); |
272 | // We only put pads on the worklist that aren't in the MemoMap. When |
273 | // we find an unwind dest for a pad we may update its ancestors, but |
274 | // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, |
275 | // so they should never get updated while queued on the worklist. |
276 | assert(!MemoMap.count(CurrentPad)); |
277 | Value *UnwindDestToken = nullptr; |
278 | if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: CurrentPad)) { |
279 | if (CatchSwitch->hasUnwindDest()) { |
280 | UnwindDestToken = &*CatchSwitch->getUnwindDest()->getFirstNonPHIIt(); |
281 | } else { |
282 | // Catchswitch doesn't have a 'nounwind' variant, and one might be |
283 | // annotated as "unwinds to caller" when really it's nounwind (see |
284 | // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the |
285 | // parent's unwind dest from this. We can check its catchpads' |
286 | // descendants, since they might include a cleanuppad with an |
287 | // "unwinds to caller" cleanupret, which can be trusted. |
288 | for (auto HI = CatchSwitch->handler_begin(), |
289 | HE = CatchSwitch->handler_end(); |
290 | HI != HE && !UnwindDestToken; ++HI) { |
291 | BasicBlock *HandlerBlock = *HI; |
292 | auto *CatchPad = |
293 | cast<CatchPadInst>(Val: &*HandlerBlock->getFirstNonPHIIt()); |
294 | for (User *Child : CatchPad->users()) { |
295 | // Intentionally ignore invokes here -- since the catchswitch is |
296 | // marked "unwind to caller", it would be a verifier error if it |
297 | // contained an invoke which unwinds out of it, so any invoke we'd |
298 | // encounter must unwind to some child of the catch. |
299 | if (!isa<CleanupPadInst>(Val: Child) && !isa<CatchSwitchInst>(Val: Child)) |
300 | continue; |
301 | |
302 | Instruction *ChildPad = cast<Instruction>(Val: Child); |
303 | auto Memo = MemoMap.find(Val: ChildPad); |
304 | if (Memo == MemoMap.end()) { |
305 | // Haven't figured out this child pad yet; queue it. |
306 | Worklist.push_back(Elt: ChildPad); |
307 | continue; |
308 | } |
309 | // We've already checked this child, but might have found that |
310 | // it offers no proof either way. |
311 | Value *ChildUnwindDestToken = Memo->second; |
312 | if (!ChildUnwindDestToken) |
313 | continue; |
314 | // We already know the child's unwind dest, which can either |
315 | // be ConstantTokenNone to indicate unwind to caller, or can |
316 | // be another child of the catchpad. Only the former indicates |
317 | // the unwind dest of the catchswitch. |
318 | if (isa<ConstantTokenNone>(Val: ChildUnwindDestToken)) { |
319 | UnwindDestToken = ChildUnwindDestToken; |
320 | break; |
321 | } |
322 | assert(getParentPad(ChildUnwindDestToken) == CatchPad); |
323 | } |
324 | } |
325 | } |
326 | } else { |
327 | auto *CleanupPad = cast<CleanupPadInst>(Val: CurrentPad); |
328 | for (User *U : CleanupPad->users()) { |
329 | if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: U)) { |
330 | if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) |
331 | UnwindDestToken = &*RetUnwindDest->getFirstNonPHIIt(); |
332 | else |
333 | UnwindDestToken = ConstantTokenNone::get(Context&: CleanupPad->getContext()); |
334 | break; |
335 | } |
336 | Value *ChildUnwindDestToken; |
337 | if (auto *Invoke = dyn_cast<InvokeInst>(Val: U)) { |
338 | ChildUnwindDestToken = &*Invoke->getUnwindDest()->getFirstNonPHIIt(); |
339 | } else if (isa<CleanupPadInst>(Val: U) || isa<CatchSwitchInst>(Val: U)) { |
340 | Instruction *ChildPad = cast<Instruction>(Val: U); |
341 | auto Memo = MemoMap.find(Val: ChildPad); |
342 | if (Memo == MemoMap.end()) { |
343 | // Haven't resolved this child yet; queue it and keep searching. |
344 | Worklist.push_back(Elt: ChildPad); |
345 | continue; |
346 | } |
347 | // We've checked this child, but still need to ignore it if it |
348 | // had no proof either way. |
349 | ChildUnwindDestToken = Memo->second; |
350 | if (!ChildUnwindDestToken) |
351 | continue; |
352 | } else { |
353 | // Not a relevant user of the cleanuppad |
354 | continue; |
355 | } |
356 | // In a well-formed program, the child/invoke must either unwind to |
357 | // an(other) child of the cleanup, or exit the cleanup. In the |
358 | // first case, continue searching. |
359 | if (isa<Instruction>(Val: ChildUnwindDestToken) && |
360 | getParentPad(EHPad: ChildUnwindDestToken) == CleanupPad) |
361 | continue; |
362 | UnwindDestToken = ChildUnwindDestToken; |
363 | break; |
364 | } |
365 | } |
366 | // If we haven't found an unwind dest for CurrentPad, we may have queued its |
367 | // children, so move on to the next in the worklist. |
368 | if (!UnwindDestToken) |
369 | continue; |
370 | |
371 | // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits |
372 | // any ancestors of CurrentPad up to but not including UnwindDestToken's |
373 | // parent pad. Record this in the memo map, and check to see if the |
374 | // original EHPad being queried is one of the ones exited. |
375 | Value *UnwindParent; |
376 | if (auto *UnwindPad = dyn_cast<Instruction>(Val: UnwindDestToken)) |
377 | UnwindParent = getParentPad(EHPad: UnwindPad); |
378 | else |
379 | UnwindParent = nullptr; |
380 | bool ExitedOriginalPad = false; |
381 | for (Instruction *ExitedPad = CurrentPad; |
382 | ExitedPad && ExitedPad != UnwindParent; |
383 | ExitedPad = dyn_cast<Instruction>(Val: getParentPad(EHPad: ExitedPad))) { |
384 | // Skip over catchpads since they just follow their catchswitches. |
385 | if (isa<CatchPadInst>(Val: ExitedPad)) |
386 | continue; |
387 | MemoMap[ExitedPad] = UnwindDestToken; |
388 | ExitedOriginalPad |= (ExitedPad == EHPad); |
389 | } |
390 | |
391 | if (ExitedOriginalPad) |
392 | return UnwindDestToken; |
393 | |
394 | // Continue the search. |
395 | } |
396 | |
397 | // No definitive information is contained within this funclet. |
398 | return nullptr; |
399 | } |
400 | |
401 | /// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, |
402 | /// return that pad instruction. If it unwinds to caller, return |
403 | /// ConstantTokenNone. If it does not have a definitive unwind destination, |
404 | /// return nullptr. |
405 | /// |
406 | /// This routine gets invoked for calls in funclets in inlinees when inlining |
407 | /// an invoke. Since many funclets don't have calls inside them, it's queried |
408 | /// on-demand rather than building a map of pads to unwind dests up front. |
409 | /// Determining a funclet's unwind dest may require recursively searching its |
410 | /// descendants, and also ancestors and cousins if the descendants don't provide |
411 | /// an answer. Since most funclets will have their unwind dest immediately |
412 | /// available as the unwind dest of a catchswitch or cleanupret, this routine |
413 | /// searches top-down from the given pad and then up. To avoid worst-case |
414 | /// quadratic run-time given that approach, it uses a memo map to avoid |
415 | /// re-processing funclet trees. The callers that rewrite the IR as they go |
416 | /// take advantage of this, for correctness, by checking/forcing rewritten |
417 | /// pads' entries to match the original callee view. |
418 | static Value *getUnwindDestToken(Instruction *EHPad, |
419 | UnwindDestMemoTy &MemoMap) { |
420 | // Catchpads unwind to the same place as their catchswitch; |
421 | // redirct any queries on catchpads so the code below can |
422 | // deal with just catchswitches and cleanuppads. |
423 | if (auto *CPI = dyn_cast<CatchPadInst>(Val: EHPad)) |
424 | EHPad = CPI->getCatchSwitch(); |
425 | |
426 | // Check if we've already determined the unwind dest for this pad. |
427 | auto Memo = MemoMap.find(Val: EHPad); |
428 | if (Memo != MemoMap.end()) |
429 | return Memo->second; |
430 | |
431 | // Search EHPad and, if necessary, its descendants. |
432 | Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); |
433 | assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); |
434 | if (UnwindDestToken) |
435 | return UnwindDestToken; |
436 | |
437 | // No information is available for this EHPad from itself or any of its |
438 | // descendants. An unwind all the way out to a pad in the caller would |
439 | // need also to agree with the unwind dest of the parent funclet, so |
440 | // search up the chain to try to find a funclet with information. Put |
441 | // null entries in the memo map to avoid re-processing as we go up. |
442 | MemoMap[EHPad] = nullptr; |
443 | #ifndef NDEBUG |
444 | SmallPtrSet<Instruction *, 4> TempMemos; |
445 | TempMemos.insert(EHPad); |
446 | #endif |
447 | Instruction *LastUselessPad = EHPad; |
448 | Value *AncestorToken; |
449 | for (AncestorToken = getParentPad(EHPad); |
450 | auto *AncestorPad = dyn_cast<Instruction>(Val: AncestorToken); |
451 | AncestorToken = getParentPad(EHPad: AncestorToken)) { |
452 | // Skip over catchpads since they just follow their catchswitches. |
453 | if (isa<CatchPadInst>(Val: AncestorPad)) |
454 | continue; |
455 | // If the MemoMap had an entry mapping AncestorPad to nullptr, since we |
456 | // haven't yet called getUnwindDestTokenHelper for AncestorPad in this |
457 | // call to getUnwindDestToken, that would mean that AncestorPad had no |
458 | // information in itself, its descendants, or its ancestors. If that |
459 | // were the case, then we should also have recorded the lack of information |
460 | // for the descendant that we're coming from. So assert that we don't |
461 | // find a null entry in the MemoMap for AncestorPad. |
462 | assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); |
463 | auto AncestorMemo = MemoMap.find(Val: AncestorPad); |
464 | if (AncestorMemo == MemoMap.end()) { |
465 | UnwindDestToken = getUnwindDestTokenHelper(EHPad: AncestorPad, MemoMap); |
466 | } else { |
467 | UnwindDestToken = AncestorMemo->second; |
468 | } |
469 | if (UnwindDestToken) |
470 | break; |
471 | LastUselessPad = AncestorPad; |
472 | MemoMap[LastUselessPad] = nullptr; |
473 | #ifndef NDEBUG |
474 | TempMemos.insert(LastUselessPad); |
475 | #endif |
476 | } |
477 | |
478 | // We know that getUnwindDestTokenHelper was called on LastUselessPad and |
479 | // returned nullptr (and likewise for EHPad and any of its ancestors up to |
480 | // LastUselessPad), so LastUselessPad has no information from below. Since |
481 | // getUnwindDestTokenHelper must investigate all downward paths through |
482 | // no-information nodes to prove that a node has no information like this, |
483 | // and since any time it finds information it records it in the MemoMap for |
484 | // not just the immediately-containing funclet but also any ancestors also |
485 | // exited, it must be the case that, walking downward from LastUselessPad, |
486 | // visiting just those nodes which have not been mapped to an unwind dest |
487 | // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since |
488 | // they are just used to keep getUnwindDestTokenHelper from repeating work), |
489 | // any node visited must have been exhaustively searched with no information |
490 | // for it found. |
491 | SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); |
492 | while (!Worklist.empty()) { |
493 | Instruction *UselessPad = Worklist.pop_back_val(); |
494 | auto Memo = MemoMap.find(Val: UselessPad); |
495 | if (Memo != MemoMap.end() && Memo->second) { |
496 | // Here the name 'UselessPad' is a bit of a misnomer, because we've found |
497 | // that it is a funclet that does have information about unwinding to |
498 | // a particular destination; its parent was a useless pad. |
499 | // Since its parent has no information, the unwind edge must not escape |
500 | // the parent, and must target a sibling of this pad. This local unwind |
501 | // gives us no information about EHPad. Leave it and the subtree rooted |
502 | // at it alone. |
503 | assert(getParentPad(Memo->second) == getParentPad(UselessPad)); |
504 | continue; |
505 | } |
506 | // We know we don't have information for UselesPad. If it has an entry in |
507 | // the MemoMap (mapping it to nullptr), it must be one of the TempMemos |
508 | // added on this invocation of getUnwindDestToken; if a previous invocation |
509 | // recorded nullptr, it would have had to prove that the ancestors of |
510 | // UselessPad, which include LastUselessPad, had no information, and that |
511 | // in turn would have required proving that the descendants of |
512 | // LastUselesPad, which include EHPad, have no information about |
513 | // LastUselessPad, which would imply that EHPad was mapped to nullptr in |
514 | // the MemoMap on that invocation, which isn't the case if we got here. |
515 | assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad)); |
516 | // Assert as we enumerate users that 'UselessPad' doesn't have any unwind |
517 | // information that we'd be contradicting by making a map entry for it |
518 | // (which is something that getUnwindDestTokenHelper must have proved for |
519 | // us to get here). Just assert on is direct users here; the checks in |
520 | // this downward walk at its descendants will verify that they don't have |
521 | // any unwind edges that exit 'UselessPad' either (i.e. they either have no |
522 | // unwind edges or unwind to a sibling). |
523 | MemoMap[UselessPad] = UnwindDestToken; |
524 | if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: UselessPad)) { |
525 | assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad" ); |
526 | for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) { |
527 | auto *CatchPad = &*HandlerBlock->getFirstNonPHIIt(); |
528 | for (User *U : CatchPad->users()) { |
529 | assert((!isa<InvokeInst>(U) || |
530 | (getParentPad(&*cast<InvokeInst>(U) |
531 | ->getUnwindDest() |
532 | ->getFirstNonPHIIt()) == CatchPad)) && |
533 | "Expected useless pad" ); |
534 | if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U)) |
535 | Worklist.push_back(Elt: cast<Instruction>(Val: U)); |
536 | } |
537 | } |
538 | } else { |
539 | assert(isa<CleanupPadInst>(UselessPad)); |
540 | for (User *U : UselessPad->users()) { |
541 | assert(!isa<CleanupReturnInst>(U) && "Expected useless pad" ); |
542 | assert( |
543 | (!isa<InvokeInst>(U) || |
544 | (getParentPad( |
545 | &*cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHIIt()) == |
546 | UselessPad)) && |
547 | "Expected useless pad" ); |
548 | if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U)) |
549 | Worklist.push_back(Elt: cast<Instruction>(Val: U)); |
550 | } |
551 | } |
552 | } |
553 | |
554 | return UnwindDestToken; |
555 | } |
556 | |
557 | /// When we inline a basic block into an invoke, |
558 | /// we have to turn all of the calls that can throw into invokes. |
559 | /// This function analyze BB to see if there are any calls, and if so, |
560 | /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI |
561 | /// nodes in that block with the values specified in InvokeDestPHIValues. |
562 | static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( |
563 | BasicBlock *BB, BasicBlock *UnwindEdge, |
564 | UnwindDestMemoTy *FuncletUnwindMap = nullptr) { |
565 | for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) { |
566 | // We only need to check for function calls: inlined invoke |
567 | // instructions require no special handling. |
568 | CallInst *CI = dyn_cast<CallInst>(Val: &I); |
569 | |
570 | if (!CI || CI->doesNotThrow()) |
571 | continue; |
572 | |
573 | // We do not need to (and in fact, cannot) convert possibly throwing calls |
574 | // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into |
575 | // invokes. The caller's "segment" of the deoptimization continuation |
576 | // attached to the newly inlined @llvm.experimental_deoptimize |
577 | // (resp. @llvm.experimental.guard) call should contain the exception |
578 | // handling logic, if any. |
579 | if (auto *F = CI->getCalledFunction()) |
580 | if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || |
581 | F->getIntrinsicID() == Intrinsic::experimental_guard) |
582 | continue; |
583 | |
584 | if (auto FuncletBundle = CI->getOperandBundle(ID: LLVMContext::OB_funclet)) { |
585 | // This call is nested inside a funclet. If that funclet has an unwind |
586 | // destination within the inlinee, then unwinding out of this call would |
587 | // be UB. Rewriting this call to an invoke which targets the inlined |
588 | // invoke's unwind dest would give the call's parent funclet multiple |
589 | // unwind destinations, which is something that subsequent EH table |
590 | // generation can't handle and that the veirifer rejects. So when we |
591 | // see such a call, leave it as a call. |
592 | auto *FuncletPad = cast<Instruction>(Val: FuncletBundle->Inputs[0]); |
593 | Value *UnwindDestToken = |
594 | getUnwindDestToken(EHPad: FuncletPad, MemoMap&: *FuncletUnwindMap); |
595 | if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken)) |
596 | continue; |
597 | #ifndef NDEBUG |
598 | Instruction *MemoKey; |
599 | if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) |
600 | MemoKey = CatchPad->getCatchSwitch(); |
601 | else |
602 | MemoKey = FuncletPad; |
603 | assert(FuncletUnwindMap->count(MemoKey) && |
604 | (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && |
605 | "must get memoized to avoid confusing later searches" ); |
606 | #endif // NDEBUG |
607 | } |
608 | |
609 | changeToInvokeAndSplitBasicBlock(CI, UnwindEdge); |
610 | return BB; |
611 | } |
612 | return nullptr; |
613 | } |
614 | |
615 | /// If we inlined an invoke site, we need to convert calls |
616 | /// in the body of the inlined function into invokes. |
617 | /// |
618 | /// II is the invoke instruction being inlined. FirstNewBlock is the first |
619 | /// block of the inlined code (the last block is the end of the function), |
620 | /// and InlineCodeInfo is information about the code that got inlined. |
621 | static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, |
622 | ClonedCodeInfo &InlinedCodeInfo) { |
623 | BasicBlock *InvokeDest = II->getUnwindDest(); |
624 | |
625 | Function *Caller = FirstNewBlock->getParent(); |
626 | |
627 | // The inlined code is currently at the end of the function, scan from the |
628 | // start of the inlined code to its end, checking for stuff we need to |
629 | // rewrite. |
630 | LandingPadInliningInfo Invoke(II); |
631 | |
632 | // Get all of the inlined landing pad instructions. |
633 | SmallPtrSet<LandingPadInst*, 16> InlinedLPads; |
634 | for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); |
635 | I != E; ++I) |
636 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: I->getTerminator())) |
637 | InlinedLPads.insert(Ptr: II->getLandingPadInst()); |
638 | |
639 | // Append the clauses from the outer landing pad instruction into the inlined |
640 | // landing pad instructions. |
641 | LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); |
642 | for (LandingPadInst *InlinedLPad : InlinedLPads) { |
643 | unsigned OuterNum = OuterLPad->getNumClauses(); |
644 | InlinedLPad->reserveClauses(Size: OuterNum); |
645 | for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) |
646 | InlinedLPad->addClause(ClauseVal: OuterLPad->getClause(Idx: OuterIdx)); |
647 | if (OuterLPad->isCleanup()) |
648 | InlinedLPad->setCleanup(true); |
649 | } |
650 | |
651 | for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); |
652 | BB != E; ++BB) { |
653 | if (InlinedCodeInfo.ContainsCalls) |
654 | if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( |
655 | BB: &*BB, UnwindEdge: Invoke.getOuterResumeDest())) |
656 | // Update any PHI nodes in the exceptional block to indicate that there |
657 | // is now a new entry in them. |
658 | Invoke.addIncomingPHIValuesFor(BB: NewBB); |
659 | |
660 | // Forward any resumes that are remaining here. |
661 | if (ResumeInst *RI = dyn_cast<ResumeInst>(Val: BB->getTerminator())) |
662 | Invoke.forwardResume(RI, InlinedLPads); |
663 | } |
664 | |
665 | // Now that everything is happy, we have one final detail. The PHI nodes in |
666 | // the exception destination block still have entries due to the original |
667 | // invoke instruction. Eliminate these entries (which might even delete the |
668 | // PHI node) now. |
669 | InvokeDest->removePredecessor(Pred: II->getParent()); |
670 | } |
671 | |
672 | /// If we inlined an invoke site, we need to convert calls |
673 | /// in the body of the inlined function into invokes. |
674 | /// |
675 | /// II is the invoke instruction being inlined. FirstNewBlock is the first |
676 | /// block of the inlined code (the last block is the end of the function), |
677 | /// and InlineCodeInfo is information about the code that got inlined. |
678 | static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, |
679 | ClonedCodeInfo &InlinedCodeInfo) { |
680 | BasicBlock *UnwindDest = II->getUnwindDest(); |
681 | Function *Caller = FirstNewBlock->getParent(); |
682 | |
683 | assert(UnwindDest->getFirstNonPHIIt()->isEHPad() && "unexpected BasicBlock!" ); |
684 | |
685 | // If there are PHI nodes in the unwind destination block, we need to keep |
686 | // track of which values came into them from the invoke before removing the |
687 | // edge from this block. |
688 | SmallVector<Value *, 8> UnwindDestPHIValues; |
689 | BasicBlock *InvokeBB = II->getParent(); |
690 | for (PHINode &PHI : UnwindDest->phis()) { |
691 | // Save the value to use for this edge. |
692 | UnwindDestPHIValues.push_back(Elt: PHI.getIncomingValueForBlock(BB: InvokeBB)); |
693 | } |
694 | |
695 | // Add incoming-PHI values to the unwind destination block for the given basic |
696 | // block, using the values for the original invoke's source block. |
697 | auto UpdatePHINodes = [&](BasicBlock *Src) { |
698 | BasicBlock::iterator I = UnwindDest->begin(); |
699 | for (Value *V : UnwindDestPHIValues) { |
700 | PHINode *PHI = cast<PHINode>(Val&: I); |
701 | PHI->addIncoming(V, BB: Src); |
702 | ++I; |
703 | } |
704 | }; |
705 | |
706 | // This connects all the instructions which 'unwind to caller' to the invoke |
707 | // destination. |
708 | UnwindDestMemoTy FuncletUnwindMap; |
709 | for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); |
710 | BB != E; ++BB) { |
711 | if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator())) { |
712 | if (CRI->unwindsToCaller()) { |
713 | auto *CleanupPad = CRI->getCleanupPad(); |
714 | CleanupReturnInst::Create(CleanupPad, UnwindBB: UnwindDest, InsertBefore: CRI->getIterator()); |
715 | CRI->eraseFromParent(); |
716 | UpdatePHINodes(&*BB); |
717 | // Finding a cleanupret with an unwind destination would confuse |
718 | // subsequent calls to getUnwindDestToken, so map the cleanuppad |
719 | // to short-circuit any such calls and recognize this as an "unwind |
720 | // to caller" cleanup. |
721 | assert(!FuncletUnwindMap.count(CleanupPad) || |
722 | isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])); |
723 | FuncletUnwindMap[CleanupPad] = |
724 | ConstantTokenNone::get(Context&: Caller->getContext()); |
725 | } |
726 | } |
727 | |
728 | BasicBlock::iterator I = BB->getFirstNonPHIIt(); |
729 | if (!I->isEHPad()) |
730 | continue; |
731 | |
732 | Instruction *Replacement = nullptr; |
733 | if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) { |
734 | if (CatchSwitch->unwindsToCaller()) { |
735 | Value *UnwindDestToken; |
736 | if (auto *ParentPad = |
737 | dyn_cast<Instruction>(Val: CatchSwitch->getParentPad())) { |
738 | // This catchswitch is nested inside another funclet. If that |
739 | // funclet has an unwind destination within the inlinee, then |
740 | // unwinding out of this catchswitch would be UB. Rewriting this |
741 | // catchswitch to unwind to the inlined invoke's unwind dest would |
742 | // give the parent funclet multiple unwind destinations, which is |
743 | // something that subsequent EH table generation can't handle and |
744 | // that the veirifer rejects. So when we see such a call, leave it |
745 | // as "unwind to caller". |
746 | UnwindDestToken = getUnwindDestToken(EHPad: ParentPad, MemoMap&: FuncletUnwindMap); |
747 | if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken)) |
748 | continue; |
749 | } else { |
750 | // This catchswitch has no parent to inherit constraints from, and |
751 | // none of its descendants can have an unwind edge that exits it and |
752 | // targets another funclet in the inlinee. It may or may not have a |
753 | // descendant that definitively has an unwind to caller. In either |
754 | // case, we'll have to assume that any unwinds out of it may need to |
755 | // be routed to the caller, so treat it as though it has a definitive |
756 | // unwind to caller. |
757 | UnwindDestToken = ConstantTokenNone::get(Context&: Caller->getContext()); |
758 | } |
759 | auto *NewCatchSwitch = CatchSwitchInst::Create( |
760 | ParentPad: CatchSwitch->getParentPad(), UnwindDest, |
761 | NumHandlers: CatchSwitch->getNumHandlers(), NameStr: CatchSwitch->getName(), |
762 | InsertBefore: CatchSwitch->getIterator()); |
763 | for (BasicBlock *PadBB : CatchSwitch->handlers()) |
764 | NewCatchSwitch->addHandler(Dest: PadBB); |
765 | // Propagate info for the old catchswitch over to the new one in |
766 | // the unwind map. This also serves to short-circuit any subsequent |
767 | // checks for the unwind dest of this catchswitch, which would get |
768 | // confused if they found the outer handler in the callee. |
769 | FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; |
770 | Replacement = NewCatchSwitch; |
771 | } |
772 | } else if (!isa<FuncletPadInst>(Val: I)) { |
773 | llvm_unreachable("unexpected EHPad!" ); |
774 | } |
775 | |
776 | if (Replacement) { |
777 | Replacement->takeName(V: &*I); |
778 | I->replaceAllUsesWith(V: Replacement); |
779 | I->eraseFromParent(); |
780 | UpdatePHINodes(&*BB); |
781 | } |
782 | } |
783 | |
784 | if (InlinedCodeInfo.ContainsCalls) |
785 | for (Function::iterator BB = FirstNewBlock->getIterator(), |
786 | E = Caller->end(); |
787 | BB != E; ++BB) |
788 | if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( |
789 | BB: &*BB, UnwindEdge: UnwindDest, FuncletUnwindMap: &FuncletUnwindMap)) |
790 | // Update any PHI nodes in the exceptional block to indicate that there |
791 | // is now a new entry in them. |
792 | UpdatePHINodes(NewBB); |
793 | |
794 | // Now that everything is happy, we have one final detail. The PHI nodes in |
795 | // the exception destination block still have entries due to the original |
796 | // invoke instruction. Eliminate these entries (which might even delete the |
797 | // PHI node) now. |
798 | UnwindDest->removePredecessor(Pred: InvokeBB); |
799 | } |
800 | |
801 | static bool haveCommonPrefix(MDNode *MIBStackContext, |
802 | MDNode *CallsiteStackContext) { |
803 | assert(MIBStackContext->getNumOperands() > 0 && |
804 | CallsiteStackContext->getNumOperands() > 0); |
805 | // Because of the context trimming performed during matching, the callsite |
806 | // context could have more stack ids than the MIB. We match up to the end of |
807 | // the shortest stack context. |
808 | for (auto MIBStackIter = MIBStackContext->op_begin(), |
809 | CallsiteStackIter = CallsiteStackContext->op_begin(); |
810 | MIBStackIter != MIBStackContext->op_end() && |
811 | CallsiteStackIter != CallsiteStackContext->op_end(); |
812 | MIBStackIter++, CallsiteStackIter++) { |
813 | auto *Val1 = mdconst::dyn_extract<ConstantInt>(MD: *MIBStackIter); |
814 | auto *Val2 = mdconst::dyn_extract<ConstantInt>(MD: *CallsiteStackIter); |
815 | assert(Val1 && Val2); |
816 | if (Val1->getZExtValue() != Val2->getZExtValue()) |
817 | return false; |
818 | } |
819 | return true; |
820 | } |
821 | |
822 | static void removeMemProfMetadata(CallBase *Call) { |
823 | Call->setMetadata(KindID: LLVMContext::MD_memprof, Node: nullptr); |
824 | } |
825 | |
826 | static void removeCallsiteMetadata(CallBase *Call) { |
827 | Call->setMetadata(KindID: LLVMContext::MD_callsite, Node: nullptr); |
828 | } |
829 | |
830 | static void (CallBase *CI, |
831 | const std::vector<Metadata *> &MIBList, |
832 | OptimizationRemarkEmitter *ORE) { |
833 | assert(!MIBList.empty()); |
834 | // Remove existing memprof, which will either be replaced or may not be needed |
835 | // if we are able to use a single allocation type function attribute. |
836 | removeMemProfMetadata(Call: CI); |
837 | CallStackTrie CallStack(ORE); |
838 | for (Metadata *MIB : MIBList) |
839 | CallStack.addCallStack(MIB: cast<MDNode>(Val: MIB)); |
840 | bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI); |
841 | assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof)); |
842 | if (!MemprofMDAttached) |
843 | // If we used a function attribute remove the callsite metadata as well. |
844 | removeCallsiteMetadata(Call: CI); |
845 | } |
846 | |
847 | // Update the metadata on the inlined copy ClonedCall of a call OrigCall in the |
848 | // inlined callee body, based on the callsite metadata InlinedCallsiteMD from |
849 | // the call that was inlined. |
850 | static void (const CallBase *OrigCall, |
851 | CallBase *ClonedCall, |
852 | MDNode *InlinedCallsiteMD, |
853 | OptimizationRemarkEmitter *ORE) { |
854 | MDNode *OrigCallsiteMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_callsite); |
855 | MDNode *ClonedCallsiteMD = nullptr; |
856 | // Check if the call originally had callsite metadata, and update it for the |
857 | // new call in the inlined body. |
858 | if (OrigCallsiteMD) { |
859 | // The cloned call's context is now the concatenation of the original call's |
860 | // callsite metadata and the callsite metadata on the call where it was |
861 | // inlined. |
862 | ClonedCallsiteMD = MDNode::concatenate(A: OrigCallsiteMD, B: InlinedCallsiteMD); |
863 | ClonedCall->setMetadata(KindID: LLVMContext::MD_callsite, Node: ClonedCallsiteMD); |
864 | } |
865 | |
866 | // Update any memprof metadata on the cloned call. |
867 | MDNode *OrigMemProfMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_memprof); |
868 | if (!OrigMemProfMD) |
869 | return; |
870 | // We currently expect that allocations with memprof metadata also have |
871 | // callsite metadata for the allocation's part of the context. |
872 | assert(OrigCallsiteMD); |
873 | |
874 | // New call's MIB list. |
875 | std::vector<Metadata *> NewMIBList; |
876 | |
877 | // For each MIB metadata, check if its call stack context starts with the |
878 | // new clone's callsite metadata. If so, that MIB goes onto the cloned call in |
879 | // the inlined body. If not, it stays on the out-of-line original call. |
880 | for (auto &MIBOp : OrigMemProfMD->operands()) { |
881 | MDNode *MIB = dyn_cast<MDNode>(Val: MIBOp); |
882 | // Stack is first operand of MIB. |
883 | MDNode *StackMD = getMIBStackNode(MIB); |
884 | assert(StackMD); |
885 | // See if the new cloned callsite context matches this profiled context. |
886 | if (haveCommonPrefix(MIBStackContext: StackMD, CallsiteStackContext: ClonedCallsiteMD)) |
887 | // Add it to the cloned call's MIB list. |
888 | NewMIBList.push_back(x: MIB); |
889 | } |
890 | if (NewMIBList.empty()) { |
891 | removeMemProfMetadata(Call: ClonedCall); |
892 | removeCallsiteMetadata(Call: ClonedCall); |
893 | return; |
894 | } |
895 | if (NewMIBList.size() < OrigMemProfMD->getNumOperands()) |
896 | updateMemprofMetadata(CI: ClonedCall, MIBList: NewMIBList, ORE); |
897 | } |
898 | |
899 | // Update memprof related metadata (!memprof and !callsite) based on the |
900 | // inlining of Callee into the callsite at CB. The updates include merging the |
901 | // inlined callee's callsite metadata with that of the inlined call, |
902 | // and moving the subset of any memprof contexts to the inlined callee |
903 | // allocations if they match the new inlined call stack. |
904 | static void |
905 | propagateMemProfMetadata(Function *Callee, CallBase &CB, |
906 | bool ContainsMemProfMetadata, |
907 | const ValueMap<const Value *, WeakTrackingVH> &VMap, |
908 | OptimizationRemarkEmitter *ORE) { |
909 | MDNode *CallsiteMD = CB.getMetadata(KindID: LLVMContext::MD_callsite); |
910 | // Only need to update if the inlined callsite had callsite metadata, or if |
911 | // there was any memprof metadata inlined. |
912 | if (!CallsiteMD && !ContainsMemProfMetadata) |
913 | return; |
914 | |
915 | // Propagate metadata onto the cloned calls in the inlined callee. |
916 | for (const auto &Entry : VMap) { |
917 | // See if this is a call that has been inlined and remapped, and not |
918 | // simplified away in the process. |
919 | auto *OrigCall = dyn_cast_or_null<CallBase>(Val: Entry.first); |
920 | auto *ClonedCall = dyn_cast_or_null<CallBase>(Val: Entry.second); |
921 | if (!OrigCall || !ClonedCall) |
922 | continue; |
923 | // If the inlined callsite did not have any callsite metadata, then it isn't |
924 | // involved in any profiled call contexts, and we can remove any memprof |
925 | // metadata on the cloned call. |
926 | if (!CallsiteMD) { |
927 | removeMemProfMetadata(Call: ClonedCall); |
928 | removeCallsiteMetadata(Call: ClonedCall); |
929 | continue; |
930 | } |
931 | propagateMemProfHelper(OrigCall, ClonedCall, InlinedCallsiteMD: CallsiteMD, ORE); |
932 | } |
933 | } |
934 | |
935 | /// When inlining a call site that has !llvm.mem.parallel_loop_access, |
936 | /// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should |
937 | /// be propagated to all memory-accessing cloned instructions. |
938 | static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, |
939 | Function::iterator FEnd) { |
940 | MDNode *MemParallelLoopAccess = |
941 | CB.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access); |
942 | MDNode *AccessGroup = CB.getMetadata(KindID: LLVMContext::MD_access_group); |
943 | MDNode *AliasScope = CB.getMetadata(KindID: LLVMContext::MD_alias_scope); |
944 | MDNode *NoAlias = CB.getMetadata(KindID: LLVMContext::MD_noalias); |
945 | if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) |
946 | return; |
947 | |
948 | for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) { |
949 | for (Instruction &I : BB) { |
950 | // This metadata is only relevant for instructions that access memory. |
951 | if (!I.mayReadOrWriteMemory()) |
952 | continue; |
953 | |
954 | if (MemParallelLoopAccess) { |
955 | // TODO: This probably should not overwrite MemParalleLoopAccess. |
956 | MemParallelLoopAccess = MDNode::concatenate( |
957 | A: I.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access), |
958 | B: MemParallelLoopAccess); |
959 | I.setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, |
960 | Node: MemParallelLoopAccess); |
961 | } |
962 | |
963 | if (AccessGroup) |
964 | I.setMetadata(KindID: LLVMContext::MD_access_group, Node: uniteAccessGroups( |
965 | AccGroups1: I.getMetadata(KindID: LLVMContext::MD_access_group), AccGroups2: AccessGroup)); |
966 | |
967 | if (AliasScope) |
968 | I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MDNode::concatenate( |
969 | A: I.getMetadata(KindID: LLVMContext::MD_alias_scope), B: AliasScope)); |
970 | |
971 | if (NoAlias) |
972 | I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::concatenate( |
973 | A: I.getMetadata(KindID: LLVMContext::MD_noalias), B: NoAlias)); |
974 | } |
975 | } |
976 | } |
977 | |
978 | /// Bundle operands of the inlined function must be added to inlined call sites. |
979 | static void PropagateOperandBundles(Function::iterator InlinedBB, |
980 | Instruction *CallSiteEHPad) { |
981 | for (Instruction &II : llvm::make_early_inc_range(Range&: *InlinedBB)) { |
982 | CallBase *I = dyn_cast<CallBase>(Val: &II); |
983 | if (!I) |
984 | continue; |
985 | // Skip call sites which already have a "funclet" bundle. |
986 | if (I->getOperandBundle(ID: LLVMContext::OB_funclet)) |
987 | continue; |
988 | // Skip call sites which are nounwind intrinsics (as long as they don't |
989 | // lower into regular function calls in the course of IR transformations). |
990 | auto *CalledFn = |
991 | dyn_cast<Function>(Val: I->getCalledOperand()->stripPointerCasts()); |
992 | if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() && |
993 | !IntrinsicInst::mayLowerToFunctionCall(IID: CalledFn->getIntrinsicID())) |
994 | continue; |
995 | |
996 | SmallVector<OperandBundleDef, 1> OpBundles; |
997 | I->getOperandBundlesAsDefs(Defs&: OpBundles); |
998 | OpBundles.emplace_back(Args: "funclet" , Args&: CallSiteEHPad); |
999 | |
1000 | Instruction *NewInst = CallBase::Create(CB: I, Bundles: OpBundles, InsertPt: I->getIterator()); |
1001 | NewInst->takeName(V: I); |
1002 | I->replaceAllUsesWith(V: NewInst); |
1003 | I->eraseFromParent(); |
1004 | } |
1005 | } |
1006 | |
1007 | namespace { |
1008 | /// Utility for cloning !noalias and !alias.scope metadata. When a code region |
1009 | /// using scoped alias metadata is inlined, the aliasing relationships may not |
1010 | /// hold between the two version. It is necessary to create a deep clone of the |
1011 | /// metadata, putting the two versions in separate scope domains. |
1012 | class ScopedAliasMetadataDeepCloner { |
1013 | using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>; |
1014 | SetVector<const MDNode *> MD; |
1015 | MetadataMap MDMap; |
1016 | void addRecursiveMetadataUses(); |
1017 | |
1018 | public: |
1019 | ScopedAliasMetadataDeepCloner(const Function *F); |
1020 | |
1021 | /// Create a new clone of the scoped alias metadata, which will be used by |
1022 | /// subsequent remap() calls. |
1023 | void clone(); |
1024 | |
1025 | /// Remap instructions in the given range from the original to the cloned |
1026 | /// metadata. |
1027 | void remap(Function::iterator FStart, Function::iterator FEnd); |
1028 | }; |
1029 | } // namespace |
1030 | |
1031 | ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( |
1032 | const Function *F) { |
1033 | for (const BasicBlock &BB : *F) { |
1034 | for (const Instruction &I : BB) { |
1035 | if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope)) |
1036 | MD.insert(X: M); |
1037 | if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias)) |
1038 | MD.insert(X: M); |
1039 | |
1040 | // We also need to clone the metadata in noalias intrinsics. |
1041 | if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I)) |
1042 | MD.insert(X: Decl->getScopeList()); |
1043 | } |
1044 | } |
1045 | addRecursiveMetadataUses(); |
1046 | } |
1047 | |
1048 | void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() { |
1049 | SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end()); |
1050 | while (!Queue.empty()) { |
1051 | const MDNode *M = cast<MDNode>(Val: Queue.pop_back_val()); |
1052 | for (const Metadata *Op : M->operands()) |
1053 | if (const MDNode *OpMD = dyn_cast<MDNode>(Val: Op)) |
1054 | if (MD.insert(X: OpMD)) |
1055 | Queue.push_back(Elt: OpMD); |
1056 | } |
1057 | } |
1058 | |
1059 | void ScopedAliasMetadataDeepCloner::clone() { |
1060 | assert(MDMap.empty() && "clone() already called ?" ); |
1061 | |
1062 | SmallVector<TempMDTuple, 16> DummyNodes; |
1063 | for (const MDNode *I : MD) { |
1064 | DummyNodes.push_back(Elt: MDTuple::getTemporary(Context&: I->getContext(), MDs: {})); |
1065 | MDMap[I].reset(MD: DummyNodes.back().get()); |
1066 | } |
1067 | |
1068 | // Create new metadata nodes to replace the dummy nodes, replacing old |
1069 | // metadata references with either a dummy node or an already-created new |
1070 | // node. |
1071 | SmallVector<Metadata *, 4> NewOps; |
1072 | for (const MDNode *I : MD) { |
1073 | for (const Metadata *Op : I->operands()) { |
1074 | if (const MDNode *M = dyn_cast<MDNode>(Val: Op)) |
1075 | NewOps.push_back(Elt: MDMap[M]); |
1076 | else |
1077 | NewOps.push_back(Elt: const_cast<Metadata *>(Op)); |
1078 | } |
1079 | |
1080 | MDNode *NewM = MDNode::get(Context&: I->getContext(), MDs: NewOps); |
1081 | MDTuple *TempM = cast<MDTuple>(Val&: MDMap[I]); |
1082 | assert(TempM->isTemporary() && "Expected temporary node" ); |
1083 | |
1084 | TempM->replaceAllUsesWith(MD: NewM); |
1085 | NewOps.clear(); |
1086 | } |
1087 | } |
1088 | |
1089 | void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, |
1090 | Function::iterator FEnd) { |
1091 | if (MDMap.empty()) |
1092 | return; // Nothing to do. |
1093 | |
1094 | for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) { |
1095 | for (Instruction &I : BB) { |
1096 | // TODO: The null checks for the MDMap.lookup() results should no longer |
1097 | // be necessary. |
1098 | if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope)) |
1099 | if (MDNode *MNew = MDMap.lookup(Val: M)) |
1100 | I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MNew); |
1101 | |
1102 | if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias)) |
1103 | if (MDNode *MNew = MDMap.lookup(Val: M)) |
1104 | I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MNew); |
1105 | |
1106 | if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I)) |
1107 | if (MDNode *MNew = MDMap.lookup(Val: Decl->getScopeList())) |
1108 | Decl->setScopeList(MNew); |
1109 | } |
1110 | } |
1111 | } |
1112 | |
1113 | /// If the inlined function has noalias arguments, |
1114 | /// then add new alias scopes for each noalias argument, tag the mapped noalias |
1115 | /// parameters with noalias metadata specifying the new scope, and tag all |
1116 | /// non-derived loads, stores and memory intrinsics with the new alias scopes. |
1117 | static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, |
1118 | const DataLayout &DL, AAResults *CalleeAAR, |
1119 | ClonedCodeInfo &InlinedFunctionInfo) { |
1120 | if (!EnableNoAliasConversion) |
1121 | return; |
1122 | |
1123 | const Function *CalledFunc = CB.getCalledFunction(); |
1124 | SmallVector<const Argument *, 4> NoAliasArgs; |
1125 | |
1126 | for (const Argument &Arg : CalledFunc->args()) |
1127 | if (CB.paramHasAttr(ArgNo: Arg.getArgNo(), Kind: Attribute::NoAlias) && !Arg.use_empty()) |
1128 | NoAliasArgs.push_back(Elt: &Arg); |
1129 | |
1130 | if (NoAliasArgs.empty()) |
1131 | return; |
1132 | |
1133 | // To do a good job, if a noalias variable is captured, we need to know if |
1134 | // the capture point dominates the particular use we're considering. |
1135 | DominatorTree DT; |
1136 | DT.recalculate(Func&: const_cast<Function&>(*CalledFunc)); |
1137 | |
1138 | // noalias indicates that pointer values based on the argument do not alias |
1139 | // pointer values which are not based on it. So we add a new "scope" for each |
1140 | // noalias function argument. Accesses using pointers based on that argument |
1141 | // become part of that alias scope, accesses using pointers not based on that |
1142 | // argument are tagged as noalias with that scope. |
1143 | |
1144 | DenseMap<const Argument *, MDNode *> NewScopes; |
1145 | MDBuilder MDB(CalledFunc->getContext()); |
1146 | |
1147 | // Create a new scope domain for this function. |
1148 | MDNode *NewDomain = |
1149 | MDB.createAnonymousAliasScopeDomain(Name: CalledFunc->getName()); |
1150 | for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { |
1151 | const Argument *A = NoAliasArgs[i]; |
1152 | |
1153 | std::string Name = std::string(CalledFunc->getName()); |
1154 | if (A->hasName()) { |
1155 | Name += ": %" ; |
1156 | Name += A->getName(); |
1157 | } else { |
1158 | Name += ": argument " ; |
1159 | Name += utostr(X: i); |
1160 | } |
1161 | |
1162 | // Note: We always create a new anonymous root here. This is true regardless |
1163 | // of the linkage of the callee because the aliasing "scope" is not just a |
1164 | // property of the callee, but also all control dependencies in the caller. |
1165 | MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name); |
1166 | NewScopes.insert(KV: std::make_pair(x&: A, y&: NewScope)); |
1167 | |
1168 | if (UseNoAliasIntrinsic) { |
1169 | // Introduce a llvm.experimental.noalias.scope.decl for the noalias |
1170 | // argument. |
1171 | MDNode *AScopeList = MDNode::get(Context&: CalledFunc->getContext(), MDs: NewScope); |
1172 | auto *NoAliasDecl = |
1173 | IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(ScopeTag: AScopeList); |
1174 | // Ignore the result for now. The result will be used when the |
1175 | // llvm.noalias intrinsic is introduced. |
1176 | (void)NoAliasDecl; |
1177 | } |
1178 | } |
1179 | |
1180 | // Iterate over all new instructions in the map; for all memory-access |
1181 | // instructions, add the alias scope metadata. |
1182 | for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); |
1183 | VMI != VMIE; ++VMI) { |
1184 | if (const Instruction *I = dyn_cast<Instruction>(Val: VMI->first)) { |
1185 | if (!VMI->second) |
1186 | continue; |
1187 | |
1188 | Instruction *NI = dyn_cast<Instruction>(Val&: VMI->second); |
1189 | if (!NI || InlinedFunctionInfo.isSimplified(From: I, To: NI)) |
1190 | continue; |
1191 | |
1192 | bool IsArgMemOnlyCall = false, IsFuncCall = false; |
1193 | SmallVector<const Value *, 2> PtrArgs; |
1194 | |
1195 | if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I)) |
1196 | PtrArgs.push_back(Elt: LI->getPointerOperand()); |
1197 | else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I)) |
1198 | PtrArgs.push_back(Elt: SI->getPointerOperand()); |
1199 | else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(Val: I)) |
1200 | PtrArgs.push_back(Elt: VAAI->getPointerOperand()); |
1201 | else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Val: I)) |
1202 | PtrArgs.push_back(Elt: CXI->getPointerOperand()); |
1203 | else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Val: I)) |
1204 | PtrArgs.push_back(Elt: RMWI->getPointerOperand()); |
1205 | else if (const auto *Call = dyn_cast<CallBase>(Val: I)) { |
1206 | // If we know that the call does not access memory, then we'll still |
1207 | // know that about the inlined clone of this call site, and we don't |
1208 | // need to add metadata. |
1209 | if (Call->doesNotAccessMemory()) |
1210 | continue; |
1211 | |
1212 | IsFuncCall = true; |
1213 | if (CalleeAAR) { |
1214 | MemoryEffects ME = CalleeAAR->getMemoryEffects(Call); |
1215 | |
1216 | // We'll retain this knowledge without additional metadata. |
1217 | if (ME.onlyAccessesInaccessibleMem()) |
1218 | continue; |
1219 | |
1220 | if (ME.onlyAccessesArgPointees()) |
1221 | IsArgMemOnlyCall = true; |
1222 | } |
1223 | |
1224 | for (Value *Arg : Call->args()) { |
1225 | // Only care about pointer arguments. If a noalias argument is |
1226 | // accessed through a non-pointer argument, it must be captured |
1227 | // first (e.g. via ptrtoint), and we protect against captures below. |
1228 | if (!Arg->getType()->isPointerTy()) |
1229 | continue; |
1230 | |
1231 | PtrArgs.push_back(Elt: Arg); |
1232 | } |
1233 | } |
1234 | |
1235 | // If we found no pointers, then this instruction is not suitable for |
1236 | // pairing with an instruction to receive aliasing metadata. |
1237 | // However, if this is a call, this we might just alias with none of the |
1238 | // noalias arguments. |
1239 | if (PtrArgs.empty() && !IsFuncCall) |
1240 | continue; |
1241 | |
1242 | // It is possible that there is only one underlying object, but you |
1243 | // need to go through several PHIs to see it, and thus could be |
1244 | // repeated in the Objects list. |
1245 | SmallPtrSet<const Value *, 4> ObjSet; |
1246 | SmallVector<Metadata *, 4> Scopes, NoAliases; |
1247 | |
1248 | for (const Value *V : PtrArgs) { |
1249 | SmallVector<const Value *, 4> Objects; |
1250 | getUnderlyingObjects(V, Objects, /* LI = */ nullptr); |
1251 | |
1252 | ObjSet.insert_range(R&: Objects); |
1253 | } |
1254 | |
1255 | // Figure out if we're derived from anything that is not a noalias |
1256 | // argument. |
1257 | bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false, |
1258 | UsesUnknownObject = false; |
1259 | for (const Value *V : ObjSet) { |
1260 | // Is this value a constant that cannot be derived from any pointer |
1261 | // value (we need to exclude constant expressions, for example, that |
1262 | // are formed from arithmetic on global symbols). |
1263 | bool IsNonPtrConst = isa<ConstantInt>(Val: V) || isa<ConstantFP>(Val: V) || |
1264 | isa<ConstantPointerNull>(Val: V) || |
1265 | isa<ConstantDataVector>(Val: V) || isa<UndefValue>(Val: V); |
1266 | if (IsNonPtrConst) |
1267 | continue; |
1268 | |
1269 | // If this is anything other than a noalias argument, then we cannot |
1270 | // completely describe the aliasing properties using alias.scope |
1271 | // metadata (and, thus, won't add any). |
1272 | if (const Argument *A = dyn_cast<Argument>(Val: V)) { |
1273 | if (!CB.paramHasAttr(ArgNo: A->getArgNo(), Kind: Attribute::NoAlias)) |
1274 | UsesAliasingPtr = true; |
1275 | } else { |
1276 | UsesAliasingPtr = true; |
1277 | } |
1278 | |
1279 | if (isEscapeSource(V)) { |
1280 | // An escape source can only alias with a noalias argument if it has |
1281 | // been captured beforehand. |
1282 | RequiresNoCaptureBefore = true; |
1283 | } else if (!isa<Argument>(Val: V) && !isIdentifiedObject(V)) { |
1284 | // If this is neither an escape source, nor some identified object |
1285 | // (which cannot directly alias a noalias argument), nor some other |
1286 | // argument (which, by definition, also cannot alias a noalias |
1287 | // argument), conservatively do not make any assumptions. |
1288 | UsesUnknownObject = true; |
1289 | } |
1290 | } |
1291 | |
1292 | // Nothing we can do if the used underlying object cannot be reliably |
1293 | // determined. |
1294 | if (UsesUnknownObject) |
1295 | continue; |
1296 | |
1297 | // A function call can always get captured noalias pointers (via other |
1298 | // parameters, globals, etc.). |
1299 | if (IsFuncCall && !IsArgMemOnlyCall) |
1300 | RequiresNoCaptureBefore = true; |
1301 | |
1302 | // First, we want to figure out all of the sets with which we definitely |
1303 | // don't alias. Iterate over all noalias set, and add those for which: |
1304 | // 1. The noalias argument is not in the set of objects from which we |
1305 | // definitely derive. |
1306 | // 2. The noalias argument has not yet been captured. |
1307 | // An arbitrary function that might load pointers could see captured |
1308 | // noalias arguments via other noalias arguments or globals, and so we |
1309 | // must always check for prior capture. |
1310 | for (const Argument *A : NoAliasArgs) { |
1311 | if (ObjSet.contains(Ptr: A)) |
1312 | continue; // May be based on a noalias argument. |
1313 | |
1314 | // It might be tempting to skip the PointerMayBeCapturedBefore check if |
1315 | // A->hasNoCaptureAttr() is true, but this is incorrect because |
1316 | // nocapture only guarantees that no copies outlive the function, not |
1317 | // that the value cannot be locally captured. |
1318 | if (!RequiresNoCaptureBefore || |
1319 | !capturesAnything(CC: PointerMayBeCapturedBefore( |
1320 | V: A, /*ReturnCaptures=*/false, I, DT: &DT, /*IncludeI=*/false, |
1321 | Mask: CaptureComponents::Provenance))) |
1322 | NoAliases.push_back(Elt: NewScopes[A]); |
1323 | } |
1324 | |
1325 | if (!NoAliases.empty()) |
1326 | NI->setMetadata(KindID: LLVMContext::MD_noalias, |
1327 | Node: MDNode::concatenate( |
1328 | A: NI->getMetadata(KindID: LLVMContext::MD_noalias), |
1329 | B: MDNode::get(Context&: CalledFunc->getContext(), MDs: NoAliases))); |
1330 | |
1331 | // Next, we want to figure out all of the sets to which we might belong. |
1332 | // We might belong to a set if the noalias argument is in the set of |
1333 | // underlying objects. If there is some non-noalias argument in our list |
1334 | // of underlying objects, then we cannot add a scope because the fact |
1335 | // that some access does not alias with any set of our noalias arguments |
1336 | // cannot itself guarantee that it does not alias with this access |
1337 | // (because there is some pointer of unknown origin involved and the |
1338 | // other access might also depend on this pointer). We also cannot add |
1339 | // scopes to arbitrary functions unless we know they don't access any |
1340 | // non-parameter pointer-values. |
1341 | bool CanAddScopes = !UsesAliasingPtr; |
1342 | if (CanAddScopes && IsFuncCall) |
1343 | CanAddScopes = IsArgMemOnlyCall; |
1344 | |
1345 | if (CanAddScopes) |
1346 | for (const Argument *A : NoAliasArgs) { |
1347 | if (ObjSet.count(Ptr: A)) |
1348 | Scopes.push_back(Elt: NewScopes[A]); |
1349 | } |
1350 | |
1351 | if (!Scopes.empty()) |
1352 | NI->setMetadata( |
1353 | KindID: LLVMContext::MD_alias_scope, |
1354 | Node: MDNode::concatenate(A: NI->getMetadata(KindID: LLVMContext::MD_alias_scope), |
1355 | B: MDNode::get(Context&: CalledFunc->getContext(), MDs: Scopes))); |
1356 | } |
1357 | } |
1358 | } |
1359 | |
1360 | static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin, |
1361 | ReturnInst *End) { |
1362 | |
1363 | assert(Begin->getParent() == End->getParent() && |
1364 | "Expected to be in same basic block!" ); |
1365 | auto BeginIt = Begin->getIterator(); |
1366 | assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator" ); |
1367 | return !llvm::isGuaranteedToTransferExecutionToSuccessor( |
1368 | Begin: ++BeginIt, End: End->getIterator(), ScanLimit: InlinerAttributeWindow + 1); |
1369 | } |
1370 | |
1371 | // Add attributes from CB params and Fn attributes that can always be propagated |
1372 | // to the corresponding argument / inner callbases. |
1373 | static void AddParamAndFnBasicAttributes(const CallBase &CB, |
1374 | ValueToValueMapTy &VMap, |
1375 | ClonedCodeInfo &InlinedFunctionInfo) { |
1376 | auto *CalledFunction = CB.getCalledFunction(); |
1377 | auto &Context = CalledFunction->getContext(); |
1378 | |
1379 | // Collect valid attributes for all params. |
1380 | SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs; |
1381 | bool HasAttrToPropagate = false; |
1382 | |
1383 | // Attributes we can only propagate if the exact parameter is forwarded. |
1384 | // We can propagate both poison generating and UB generating attributes |
1385 | // without any extra checks. The only attribute that is tricky to propagate |
1386 | // is `noundef` (skipped for now) as that can create new UB where previous |
1387 | // behavior was just using a poison value. |
1388 | static const Attribute::AttrKind ExactAttrsToPropagate[] = { |
1389 | Attribute::Dereferenceable, Attribute::DereferenceableOrNull, |
1390 | Attribute::NonNull, Attribute::NoFPClass, |
1391 | Attribute::Alignment, Attribute::Range}; |
1392 | |
1393 | for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) { |
1394 | ValidObjParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()}); |
1395 | ValidExactParamAttrs.emplace_back(Args: AttrBuilder{CB.getContext()}); |
1396 | // Access attributes can be propagated to any param with the same underlying |
1397 | // object as the argument. |
1398 | if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadNone)) |
1399 | ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadNone); |
1400 | if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::ReadOnly)) |
1401 | ValidObjParamAttrs.back().addAttribute(Val: Attribute::ReadOnly); |
1402 | |
1403 | for (Attribute::AttrKind AK : ExactAttrsToPropagate) { |
1404 | Attribute Attr = CB.getParamAttr(ArgNo: I, Kind: AK); |
1405 | if (Attr.isValid()) |
1406 | ValidExactParamAttrs.back().addAttribute(A: Attr); |
1407 | } |
1408 | |
1409 | HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes(); |
1410 | HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes(); |
1411 | } |
1412 | |
1413 | // Won't be able to propagate anything. |
1414 | if (!HasAttrToPropagate) |
1415 | return; |
1416 | |
1417 | for (BasicBlock &BB : *CalledFunction) { |
1418 | for (Instruction &Ins : BB) { |
1419 | const auto *InnerCB = dyn_cast<CallBase>(Val: &Ins); |
1420 | if (!InnerCB) |
1421 | continue; |
1422 | auto *NewInnerCB = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: InnerCB)); |
1423 | if (!NewInnerCB) |
1424 | continue; |
1425 | // The InnerCB might have be simplified during the inlining |
1426 | // process which can make propagation incorrect. |
1427 | if (InlinedFunctionInfo.isSimplified(From: InnerCB, To: NewInnerCB)) |
1428 | continue; |
1429 | |
1430 | AttributeList AL = NewInnerCB->getAttributes(); |
1431 | for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) { |
1432 | // It's unsound or requires special handling to propagate |
1433 | // attributes to byval arguments. Even if CalledFunction |
1434 | // doesn't e.g. write to the argument (readonly), the call to |
1435 | // NewInnerCB may write to its by-value copy. |
1436 | if (NewInnerCB->paramHasAttr(ArgNo: I, Kind: Attribute::ByVal)) |
1437 | continue; |
1438 | |
1439 | // Don't bother propagating attrs to constants. |
1440 | if (match(V: NewInnerCB->getArgOperand(i: I), |
1441 | P: llvm::PatternMatch::m_ImmConstant())) |
1442 | continue; |
1443 | |
1444 | // Check if the underlying value for the parameter is an argument. |
1445 | const Argument *Arg = dyn_cast<Argument>(Val: InnerCB->getArgOperand(i: I)); |
1446 | unsigned ArgNo; |
1447 | if (Arg) { |
1448 | ArgNo = Arg->getArgNo(); |
1449 | // For dereferenceable, dereferenceable_or_null, align, etc... |
1450 | // we don't want to propagate if the existing param has the same |
1451 | // attribute with "better" constraints. So remove from the |
1452 | // new AL if the region of the existing param is larger than |
1453 | // what we can propagate. |
1454 | AttrBuilder NewAB{ |
1455 | Context, AttributeSet::get(C&: Context, B: ValidExactParamAttrs[ArgNo])}; |
1456 | if (AL.getParamDereferenceableBytes(Index: I) > |
1457 | NewAB.getDereferenceableBytes()) |
1458 | NewAB.removeAttribute(Val: Attribute::Dereferenceable); |
1459 | if (AL.getParamDereferenceableOrNullBytes(ArgNo: I) > |
1460 | NewAB.getDereferenceableOrNullBytes()) |
1461 | NewAB.removeAttribute(Val: Attribute::DereferenceableOrNull); |
1462 | if (AL.getParamAlignment(ArgNo: I).valueOrOne() > |
1463 | NewAB.getAlignment().valueOrOne()) |
1464 | NewAB.removeAttribute(Val: Attribute::Alignment); |
1465 | if (auto ExistingRange = AL.getParamRange(ArgNo: I)) { |
1466 | if (auto NewRange = NewAB.getRange()) { |
1467 | ConstantRange CombinedRange = |
1468 | ExistingRange->intersectWith(CR: *NewRange); |
1469 | NewAB.removeAttribute(Val: Attribute::Range); |
1470 | NewAB.addRangeAttr(CR: CombinedRange); |
1471 | } |
1472 | } |
1473 | |
1474 | if (FPClassTest ExistingNoFP = AL.getParamNoFPClass(ArgNo: I)) |
1475 | NewAB.addNoFPClassAttr(NoFPClassMask: ExistingNoFP | NewAB.getNoFPClass()); |
1476 | |
1477 | AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: NewAB); |
1478 | } else if (NewInnerCB->getArgOperand(i: I)->getType()->isPointerTy()) { |
1479 | // Check if the underlying value for the parameter is an argument. |
1480 | const Value *UnderlyingV = |
1481 | getUnderlyingObject(V: InnerCB->getArgOperand(i: I)); |
1482 | Arg = dyn_cast<Argument>(Val: UnderlyingV); |
1483 | if (!Arg) |
1484 | continue; |
1485 | ArgNo = Arg->getArgNo(); |
1486 | } else { |
1487 | continue; |
1488 | } |
1489 | |
1490 | // If so, propagate its access attributes. |
1491 | AL = AL.addParamAttributes(C&: Context, ArgNo: I, B: ValidObjParamAttrs[ArgNo]); |
1492 | |
1493 | // We can have conflicting attributes from the inner callsite and |
1494 | // to-be-inlined callsite. In that case, choose the most |
1495 | // restrictive. |
1496 | |
1497 | // readonly + writeonly means we can never deref so make readnone. |
1498 | if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) && |
1499 | AL.hasParamAttr(ArgNo: I, Kind: Attribute::WriteOnly)) |
1500 | AL = AL.addParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadNone); |
1501 | |
1502 | // If have readnone, need to clear readonly/writeonly |
1503 | if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone)) { |
1504 | AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::ReadOnly); |
1505 | AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::WriteOnly); |
1506 | } |
1507 | |
1508 | // Writable cannot exist in conjunction w/ readonly/readnone |
1509 | if (AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadOnly) || |
1510 | AL.hasParamAttr(ArgNo: I, Kind: Attribute::ReadNone)) |
1511 | AL = AL.removeParamAttribute(C&: Context, ArgNo: I, Kind: Attribute::Writable); |
1512 | } |
1513 | NewInnerCB->setAttributes(AL); |
1514 | } |
1515 | } |
1516 | } |
1517 | |
1518 | // Only allow these white listed attributes to be propagated back to the |
1519 | // callee. This is because other attributes may only be valid on the call |
1520 | // itself, i.e. attributes such as signext and zeroext. |
1521 | |
1522 | // Attributes that are always okay to propagate as if they are violated its |
1523 | // immediate UB. |
1524 | static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) { |
1525 | AttrBuilder Valid(CB.getContext()); |
1526 | if (auto DerefBytes = CB.getRetDereferenceableBytes()) |
1527 | Valid.addDereferenceableAttr(Bytes: DerefBytes); |
1528 | if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes()) |
1529 | Valid.addDereferenceableOrNullAttr(Bytes: DerefOrNullBytes); |
1530 | if (CB.hasRetAttr(Kind: Attribute::NoAlias)) |
1531 | Valid.addAttribute(Val: Attribute::NoAlias); |
1532 | if (CB.hasRetAttr(Kind: Attribute::NoUndef)) |
1533 | Valid.addAttribute(Val: Attribute::NoUndef); |
1534 | return Valid; |
1535 | } |
1536 | |
1537 | // Attributes that need additional checks as propagating them may change |
1538 | // behavior or cause new UB. |
1539 | static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) { |
1540 | AttrBuilder Valid(CB.getContext()); |
1541 | if (CB.hasRetAttr(Kind: Attribute::NonNull)) |
1542 | Valid.addAttribute(Val: Attribute::NonNull); |
1543 | if (CB.hasRetAttr(Kind: Attribute::Alignment)) |
1544 | Valid.addAlignmentAttr(Align: CB.getRetAlign()); |
1545 | if (std::optional<ConstantRange> Range = CB.getRange()) |
1546 | Valid.addRangeAttr(CR: *Range); |
1547 | return Valid; |
1548 | } |
1549 | |
1550 | static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap, |
1551 | ClonedCodeInfo &InlinedFunctionInfo) { |
1552 | AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB); |
1553 | AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB); |
1554 | if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes()) |
1555 | return; |
1556 | auto *CalledFunction = CB.getCalledFunction(); |
1557 | auto &Context = CalledFunction->getContext(); |
1558 | |
1559 | for (auto &BB : *CalledFunction) { |
1560 | auto *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator()); |
1561 | if (!RI || !isa<CallBase>(Val: RI->getOperand(i_nocapture: 0))) |
1562 | continue; |
1563 | auto *RetVal = cast<CallBase>(Val: RI->getOperand(i_nocapture: 0)); |
1564 | // Check that the cloned RetVal exists and is a call, otherwise we cannot |
1565 | // add the attributes on the cloned RetVal. Simplification during inlining |
1566 | // could have transformed the cloned instruction. |
1567 | auto *NewRetVal = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: RetVal)); |
1568 | if (!NewRetVal) |
1569 | continue; |
1570 | |
1571 | // The RetVal might have be simplified during the inlining |
1572 | // process which can make propagation incorrect. |
1573 | if (InlinedFunctionInfo.isSimplified(From: RetVal, To: NewRetVal)) |
1574 | continue; |
1575 | // Backward propagation of attributes to the returned value may be incorrect |
1576 | // if it is control flow dependent. |
1577 | // Consider: |
1578 | // @callee { |
1579 | // %rv = call @foo() |
1580 | // %rv2 = call @bar() |
1581 | // if (%rv2 != null) |
1582 | // return %rv2 |
1583 | // if (%rv == null) |
1584 | // exit() |
1585 | // return %rv |
1586 | // } |
1587 | // caller() { |
1588 | // %val = call nonnull @callee() |
1589 | // } |
1590 | // Here we cannot add the nonnull attribute on either foo or bar. So, we |
1591 | // limit the check to both RetVal and RI are in the same basic block and |
1592 | // there are no throwing/exiting instructions between these instructions. |
1593 | if (RI->getParent() != RetVal->getParent() || |
1594 | MayContainThrowingOrExitingCallAfterCB(Begin: RetVal, End: RI)) |
1595 | continue; |
1596 | // Add to the existing attributes of NewRetVal, i.e. the cloned call |
1597 | // instruction. |
1598 | // NB! When we have the same attribute already existing on NewRetVal, but |
1599 | // with a differing value, the AttributeList's merge API honours the already |
1600 | // existing attribute value (i.e. attributes such as dereferenceable, |
1601 | // dereferenceable_or_null etc). See AttrBuilder::merge for more details. |
1602 | AttributeList AL = NewRetVal->getAttributes(); |
1603 | if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes()) |
1604 | ValidUB.removeAttribute(Val: Attribute::Dereferenceable); |
1605 | if (ValidUB.getDereferenceableOrNullBytes() < |
1606 | AL.getRetDereferenceableOrNullBytes()) |
1607 | ValidUB.removeAttribute(Val: Attribute::DereferenceableOrNull); |
1608 | AttributeList NewAL = AL.addRetAttributes(C&: Context, B: ValidUB); |
1609 | // Attributes that may generate poison returns are a bit tricky. If we |
1610 | // propagate them, other uses of the callsite might have their behavior |
1611 | // change or cause UB (if they have noundef) b.c of the new potential |
1612 | // poison. |
1613 | // Take the following three cases: |
1614 | // |
1615 | // 1) |
1616 | // define nonnull ptr @foo() { |
1617 | // %p = call ptr @bar() |
1618 | // call void @use(ptr %p) willreturn nounwind |
1619 | // ret ptr %p |
1620 | // } |
1621 | // |
1622 | // 2) |
1623 | // define noundef nonnull ptr @foo() { |
1624 | // %p = call ptr @bar() |
1625 | // call void @use(ptr %p) willreturn nounwind |
1626 | // ret ptr %p |
1627 | // } |
1628 | // |
1629 | // 3) |
1630 | // define nonnull ptr @foo() { |
1631 | // %p = call noundef ptr @bar() |
1632 | // ret ptr %p |
1633 | // } |
1634 | // |
1635 | // In case 1, we can't propagate nonnull because poison value in @use may |
1636 | // change behavior or trigger UB. |
1637 | // In case 2, we don't need to be concerned about propagating nonnull, as |
1638 | // any new poison at @use will trigger UB anyways. |
1639 | // In case 3, we can never propagate nonnull because it may create UB due to |
1640 | // the noundef on @bar. |
1641 | if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne()) |
1642 | ValidPG.removeAttribute(Val: Attribute::Alignment); |
1643 | if (ValidPG.hasAttributes()) { |
1644 | Attribute CBRange = ValidPG.getAttribute(Kind: Attribute::Range); |
1645 | if (CBRange.isValid()) { |
1646 | Attribute NewRange = AL.getRetAttr(Kind: Attribute::Range); |
1647 | if (NewRange.isValid()) { |
1648 | ValidPG.addRangeAttr( |
1649 | CR: CBRange.getRange().intersectWith(CR: NewRange.getRange())); |
1650 | } |
1651 | } |
1652 | // Three checks. |
1653 | // If the callsite has `noundef`, then a poison due to violating the |
1654 | // return attribute will create UB anyways so we can always propagate. |
1655 | // Otherwise, if the return value (callee to be inlined) has `noundef`, we |
1656 | // can't propagate as a new poison return will cause UB. |
1657 | // Finally, check if the return value has no uses whose behavior may |
1658 | // change/may cause UB if we potentially return poison. At the moment this |
1659 | // is implemented overly conservatively with a single-use check. |
1660 | // TODO: Update the single-use check to iterate through uses and only bail |
1661 | // if we have a potentially dangerous use. |
1662 | |
1663 | if (CB.hasRetAttr(Kind: Attribute::NoUndef) || |
1664 | (RetVal->hasOneUse() && !RetVal->hasRetAttr(Kind: Attribute::NoUndef))) |
1665 | NewAL = NewAL.addRetAttributes(C&: Context, B: ValidPG); |
1666 | } |
1667 | NewRetVal->setAttributes(NewAL); |
1668 | } |
1669 | } |
1670 | |
1671 | /// If the inlined function has non-byval align arguments, then |
1672 | /// add @llvm.assume-based alignment assumptions to preserve this information. |
1673 | static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { |
1674 | if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) |
1675 | return; |
1676 | |
1677 | AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller()); |
1678 | auto &DL = CB.getDataLayout(); |
1679 | |
1680 | // To avoid inserting redundant assumptions, we should check for assumptions |
1681 | // already in the caller. To do this, we might need a DT of the caller. |
1682 | DominatorTree DT; |
1683 | bool DTCalculated = false; |
1684 | |
1685 | Function *CalledFunc = CB.getCalledFunction(); |
1686 | for (Argument &Arg : CalledFunc->args()) { |
1687 | if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() || |
1688 | Arg.use_empty()) |
1689 | continue; |
1690 | MaybeAlign Alignment = Arg.getParamAlign(); |
1691 | if (!Alignment) |
1692 | continue; |
1693 | |
1694 | if (!DTCalculated) { |
1695 | DT.recalculate(Func&: *CB.getCaller()); |
1696 | DTCalculated = true; |
1697 | } |
1698 | // If we can already prove the asserted alignment in the context of the |
1699 | // caller, then don't bother inserting the assumption. |
1700 | Value *ArgVal = CB.getArgOperand(i: Arg.getArgNo()); |
1701 | if (getKnownAlignment(V: ArgVal, DL, CxtI: &CB, AC, DT: &DT) >= *Alignment) |
1702 | continue; |
1703 | |
1704 | CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption( |
1705 | DL, PtrValue: ArgVal, Alignment: Alignment->value()); |
1706 | AC->registerAssumption(CI: cast<AssumeInst>(Val: NewAsmp)); |
1707 | } |
1708 | } |
1709 | |
1710 | static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src, |
1711 | MaybeAlign SrcAlign, Module *M, |
1712 | BasicBlock *InsertBlock, |
1713 | InlineFunctionInfo &IFI, |
1714 | Function *CalledFunc) { |
1715 | IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); |
1716 | |
1717 | Value *Size = |
1718 | Builder.getInt64(C: M->getDataLayout().getTypeStoreSize(Ty: ByValType)); |
1719 | |
1720 | Align DstAlign = Dst->getPointerAlignment(DL: M->getDataLayout()); |
1721 | |
1722 | // Generate a memcpy with the correct alignments. |
1723 | CallInst *CI = Builder.CreateMemCpy(Dst, DstAlign, Src, SrcAlign, Size); |
1724 | |
1725 | // The verifier requires that all calls of debug-info-bearing functions |
1726 | // from debug-info-bearing functions have a debug location (for inlining |
1727 | // purposes). Assign a dummy location to satisfy the constraint. |
1728 | if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram()) |
1729 | if (DISubprogram *SP = CalledFunc->getSubprogram()) |
1730 | CI->setDebugLoc(DILocation::get(Context&: SP->getContext(), Line: 0, Column: 0, Scope: SP)); |
1731 | } |
1732 | |
1733 | /// When inlining a call site that has a byval argument, |
1734 | /// we have to make the implicit memcpy explicit by adding it. |
1735 | static Value *HandleByValArgument(Type *ByValType, Value *Arg, |
1736 | Instruction *TheCall, |
1737 | const Function *CalledFunc, |
1738 | InlineFunctionInfo &IFI, |
1739 | MaybeAlign ByValAlignment) { |
1740 | Function *Caller = TheCall->getFunction(); |
1741 | const DataLayout &DL = Caller->getDataLayout(); |
1742 | |
1743 | // If the called function is readonly, then it could not mutate the caller's |
1744 | // copy of the byval'd memory. In this case, it is safe to elide the copy and |
1745 | // temporary. |
1746 | if (CalledFunc->onlyReadsMemory()) { |
1747 | // If the byval argument has a specified alignment that is greater than the |
1748 | // passed in pointer, then we either have to round up the input pointer or |
1749 | // give up on this transformation. |
1750 | if (ByValAlignment.valueOrOne() == 1) |
1751 | return Arg; |
1752 | |
1753 | AssumptionCache *AC = |
1754 | IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; |
1755 | |
1756 | // If the pointer is already known to be sufficiently aligned, or if we can |
1757 | // round it up to a larger alignment, then we don't need a temporary. |
1758 | if (getOrEnforceKnownAlignment(V: Arg, PrefAlign: *ByValAlignment, DL, CxtI: TheCall, AC) >= |
1759 | *ByValAlignment) |
1760 | return Arg; |
1761 | |
1762 | // Otherwise, we have to make a memcpy to get a safe alignment. This is bad |
1763 | // for code quality, but rarely happens and is required for correctness. |
1764 | } |
1765 | |
1766 | // Create the alloca. If we have DataLayout, use nice alignment. |
1767 | Align Alignment = DL.getPrefTypeAlign(Ty: ByValType); |
1768 | |
1769 | // If the byval had an alignment specified, we *must* use at least that |
1770 | // alignment, as it is required by the byval argument (and uses of the |
1771 | // pointer inside the callee). |
1772 | if (ByValAlignment) |
1773 | Alignment = std::max(a: Alignment, b: *ByValAlignment); |
1774 | |
1775 | AllocaInst *NewAlloca = |
1776 | new AllocaInst(ByValType, Arg->getType()->getPointerAddressSpace(), |
1777 | nullptr, Alignment, Arg->getName()); |
1778 | NewAlloca->setDebugLoc(DebugLoc::getCompilerGenerated()); |
1779 | NewAlloca->insertBefore(InsertPos: Caller->begin()->begin()); |
1780 | IFI.StaticAllocas.push_back(Elt: NewAlloca); |
1781 | |
1782 | // Uses of the argument in the function should use our new alloca |
1783 | // instead. |
1784 | return NewAlloca; |
1785 | } |
1786 | |
1787 | // Check whether this Value is used by a lifetime intrinsic. |
1788 | static bool isUsedByLifetimeMarker(Value *V) { |
1789 | for (User *U : V->users()) |
1790 | if (isa<LifetimeIntrinsic>(Val: U)) |
1791 | return true; |
1792 | return false; |
1793 | } |
1794 | |
1795 | // Check whether the given alloca already has |
1796 | // lifetime.start or lifetime.end intrinsics. |
1797 | static bool hasLifetimeMarkers(AllocaInst *AI) { |
1798 | Type *Ty = AI->getType(); |
1799 | Type *Int8PtrTy = |
1800 | PointerType::get(C&: Ty->getContext(), AddressSpace: Ty->getPointerAddressSpace()); |
1801 | if (Ty == Int8PtrTy) |
1802 | return isUsedByLifetimeMarker(V: AI); |
1803 | |
1804 | // Do a scan to find all the casts to i8*. |
1805 | for (User *U : AI->users()) { |
1806 | if (U->getType() != Int8PtrTy) continue; |
1807 | if (U->stripPointerCasts() != AI) continue; |
1808 | if (isUsedByLifetimeMarker(V: U)) |
1809 | return true; |
1810 | } |
1811 | return false; |
1812 | } |
1813 | |
1814 | /// Return the result of AI->isStaticAlloca() if AI were moved to the entry |
1815 | /// block. Allocas used in inalloca calls and allocas of dynamic array size |
1816 | /// cannot be static. |
1817 | static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { |
1818 | return isa<Constant>(Val: AI->getArraySize()) && !AI->isUsedWithInAlloca(); |
1819 | } |
1820 | |
1821 | /// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL |
1822 | /// inlined at \p InlinedAt. \p IANodes is an inlined-at cache. |
1823 | static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, |
1824 | LLVMContext &Ctx, |
1825 | DenseMap<const MDNode *, MDNode *> &IANodes) { |
1826 | auto IA = DebugLoc::appendInlinedAt(DL: OrigDL, InlinedAt, Ctx, Cache&: IANodes); |
1827 | return DILocation::get(Context&: Ctx, Line: OrigDL.getLine(), Column: OrigDL.getCol(), |
1828 | Scope: OrigDL.getScope(), InlinedAt: IA, ImplicitCode: OrigDL.isImplicitCode(), |
1829 | AtomGroup: OrigDL->getAtomGroup(), AtomRank: OrigDL->getAtomRank()); |
1830 | } |
1831 | |
1832 | /// Update inlined instructions' line numbers to |
1833 | /// to encode location where these instructions are inlined. |
1834 | static void fixupLineNumbers(Function *Fn, Function::iterator FI, |
1835 | Instruction *TheCall, bool CalleeHasDebugInfo) { |
1836 | if (!TheCall->getDebugLoc()) |
1837 | return; |
1838 | |
1839 | // Don't propagate the source location atom from the call to inlined nodebug |
1840 | // instructions, and avoid putting it in the InlinedAt field of inlined |
1841 | // not-nodebug instructions. FIXME: Possibly worth transferring/generating |
1842 | // an atom for the returned value, otherwise we miss stepping on inlined |
1843 | // nodebug functions (which is different to existing behaviour). |
1844 | DebugLoc TheCallDL = TheCall->getDebugLoc()->getWithoutAtom(); |
1845 | |
1846 | auto &Ctx = Fn->getContext(); |
1847 | DILocation *InlinedAtNode = TheCallDL; |
1848 | |
1849 | // Create a unique call site, not to be confused with any other call from the |
1850 | // same location. |
1851 | InlinedAtNode = DILocation::getDistinct( |
1852 | Context&: Ctx, Line: InlinedAtNode->getLine(), Column: InlinedAtNode->getColumn(), |
1853 | Scope: InlinedAtNode->getScope(), InlinedAt: InlinedAtNode->getInlinedAt()); |
1854 | |
1855 | // Cache the inlined-at nodes as they're built so they are reused, without |
1856 | // this every instruction's inlined-at chain would become distinct from each |
1857 | // other. |
1858 | DenseMap<const MDNode *, MDNode *> IANodes; |
1859 | |
1860 | // Check if we are not generating inline line tables and want to use |
1861 | // the call site location instead. |
1862 | bool NoInlineLineTables = Fn->hasFnAttribute(Kind: "no-inline-line-tables" ); |
1863 | |
1864 | // Helper-util for updating the metadata attached to an instruction. |
1865 | auto UpdateInst = [&](Instruction &I) { |
1866 | // Loop metadata needs to be updated so that the start and end locs |
1867 | // reference inlined-at locations. |
1868 | auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, |
1869 | &IANodes](Metadata *MD) -> Metadata * { |
1870 | if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD)) |
1871 | return inlineDebugLoc(OrigDL: Loc, InlinedAt: InlinedAtNode, Ctx, IANodes).get(); |
1872 | return MD; |
1873 | }; |
1874 | updateLoopMetadataDebugLocations(I, Updater: updateLoopInfoLoc); |
1875 | |
1876 | if (!NoInlineLineTables) |
1877 | if (DebugLoc DL = I.getDebugLoc()) { |
1878 | DebugLoc IDL = |
1879 | inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode, Ctx&: I.getContext(), IANodes); |
1880 | I.setDebugLoc(IDL); |
1881 | return; |
1882 | } |
1883 | |
1884 | if (CalleeHasDebugInfo && !NoInlineLineTables) |
1885 | return; |
1886 | |
1887 | // If the inlined instruction has no line number, or if inline info |
1888 | // is not being generated, make it look as if it originates from the call |
1889 | // location. This is important for ((__always_inline, __nodebug__)) |
1890 | // functions which must use caller location for all instructions in their |
1891 | // function body. |
1892 | |
1893 | // Don't update static allocas, as they may get moved later. |
1894 | if (auto *AI = dyn_cast<AllocaInst>(Val: &I)) |
1895 | if (allocaWouldBeStaticInEntry(AI)) |
1896 | return; |
1897 | |
1898 | // Do not force a debug loc for pseudo probes, since they do not need to |
1899 | // be debuggable, and also they are expected to have a zero/null dwarf |
1900 | // discriminator at this point which could be violated otherwise. |
1901 | if (isa<PseudoProbeInst>(Val: I)) |
1902 | return; |
1903 | |
1904 | I.setDebugLoc(TheCallDL); |
1905 | }; |
1906 | |
1907 | // Helper-util for updating debug-info records attached to instructions. |
1908 | auto UpdateDVR = [&](DbgRecord *DVR) { |
1909 | assert(DVR->getDebugLoc() && "Debug Value must have debug loc" ); |
1910 | if (NoInlineLineTables) { |
1911 | DVR->setDebugLoc(TheCallDL); |
1912 | return; |
1913 | } |
1914 | DebugLoc DL = DVR->getDebugLoc(); |
1915 | DebugLoc IDL = |
1916 | inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode, |
1917 | Ctx&: DVR->getMarker()->getParent()->getContext(), IANodes); |
1918 | DVR->setDebugLoc(IDL); |
1919 | }; |
1920 | |
1921 | // Iterate over all instructions, updating metadata and debug-info records. |
1922 | for (; FI != Fn->end(); ++FI) { |
1923 | for (Instruction &I : *FI) { |
1924 | UpdateInst(I); |
1925 | for (DbgRecord &DVR : I.getDbgRecordRange()) { |
1926 | UpdateDVR(&DVR); |
1927 | } |
1928 | } |
1929 | |
1930 | // Remove debug info records if we're not keeping inline info. |
1931 | if (NoInlineLineTables) { |
1932 | BasicBlock::iterator BI = FI->begin(); |
1933 | while (BI != FI->end()) { |
1934 | BI->dropDbgRecords(); |
1935 | ++BI; |
1936 | } |
1937 | } |
1938 | } |
1939 | } |
1940 | |
1941 | #undef DEBUG_TYPE |
1942 | #define DEBUG_TYPE "assignment-tracking" |
1943 | /// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB. |
1944 | static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL, |
1945 | const CallBase &CB) { |
1946 | at::StorageToVarsMap EscapedLocals; |
1947 | SmallPtrSet<const Value *, 4> SeenBases; |
1948 | |
1949 | LLVM_DEBUG( |
1950 | errs() << "# Finding caller local variables escaped by callee\n" ); |
1951 | for (const Value *Arg : CB.args()) { |
1952 | LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n" ); |
1953 | if (!Arg->getType()->isPointerTy()) { |
1954 | LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n" ); |
1955 | continue; |
1956 | } |
1957 | |
1958 | const Instruction *I = dyn_cast<Instruction>(Val: Arg); |
1959 | if (!I) { |
1960 | LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n" ); |
1961 | continue; |
1962 | } |
1963 | |
1964 | // Walk back to the base storage. |
1965 | assert(Arg->getType()->isPtrOrPtrVectorTy()); |
1966 | APInt TmpOffset(DL.getIndexTypeSizeInBits(Ty: Arg->getType()), 0, false); |
1967 | const AllocaInst *Base = dyn_cast<AllocaInst>( |
1968 | Val: Arg->stripAndAccumulateConstantOffsets(DL, Offset&: TmpOffset, AllowNonInbounds: true)); |
1969 | if (!Base) { |
1970 | LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n" ); |
1971 | continue; |
1972 | } |
1973 | |
1974 | assert(Base); |
1975 | LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n" ); |
1976 | // We only need to process each base address once - skip any duplicates. |
1977 | if (!SeenBases.insert(Ptr: Base).second) |
1978 | continue; |
1979 | |
1980 | // Find all local variables associated with the backing storage. |
1981 | auto CollectAssignsForStorage = [&](auto *DbgAssign) { |
1982 | // Skip variables from inlined functions - they are not local variables. |
1983 | if (DbgAssign->getDebugLoc().getInlinedAt()) |
1984 | return; |
1985 | LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n" ); |
1986 | EscapedLocals[Base].insert(X: at::VarRecord(DbgAssign)); |
1987 | }; |
1988 | for_each(Range: at::getAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage); |
1989 | for_each(Range: at::getDVRAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage); |
1990 | } |
1991 | return EscapedLocals; |
1992 | } |
1993 | |
1994 | static void trackInlinedStores(Function::iterator Start, Function::iterator End, |
1995 | const CallBase &CB) { |
1996 | LLVM_DEBUG(errs() << "trackInlinedStores into " |
1997 | << Start->getParent()->getName() << " from " |
1998 | << CB.getCalledFunction()->getName() << "\n" ); |
1999 | const DataLayout &DL = CB.getDataLayout(); |
2000 | at::trackAssignments(Start, End, Vars: collectEscapedLocals(DL, CB), DL); |
2001 | } |
2002 | |
2003 | /// Update inlined instructions' DIAssignID metadata. We need to do this |
2004 | /// otherwise a function inlined more than once into the same function |
2005 | /// will cause DIAssignID to be shared by many instructions. |
2006 | static void fixupAssignments(Function::iterator Start, Function::iterator End) { |
2007 | DenseMap<DIAssignID *, DIAssignID *> Map; |
2008 | // Loop over all the inlined instructions. If we find a DIAssignID |
2009 | // attachment or use, replace it with a new version. |
2010 | for (auto BBI = Start; BBI != End; ++BBI) { |
2011 | for (Instruction &I : *BBI) |
2012 | at::remapAssignID(Map, I); |
2013 | } |
2014 | } |
2015 | #undef DEBUG_TYPE |
2016 | #define DEBUG_TYPE "inline-function" |
2017 | |
2018 | /// Update the block frequencies of the caller after a callee has been inlined. |
2019 | /// |
2020 | /// Each block cloned into the caller has its block frequency scaled by the |
2021 | /// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of |
2022 | /// callee's entry block gets the same frequency as the callsite block and the |
2023 | /// relative frequencies of all cloned blocks remain the same after cloning. |
2024 | static void updateCallerBFI(BasicBlock *CallSiteBlock, |
2025 | const ValueToValueMapTy &VMap, |
2026 | BlockFrequencyInfo *CallerBFI, |
2027 | BlockFrequencyInfo *CalleeBFI, |
2028 | const BasicBlock &CalleeEntryBlock) { |
2029 | SmallPtrSet<BasicBlock *, 16> ClonedBBs; |
2030 | for (auto Entry : VMap) { |
2031 | if (!isa<BasicBlock>(Val: Entry.first) || !Entry.second) |
2032 | continue; |
2033 | auto *OrigBB = cast<BasicBlock>(Val: Entry.first); |
2034 | auto *ClonedBB = cast<BasicBlock>(Val: Entry.second); |
2035 | BlockFrequency Freq = CalleeBFI->getBlockFreq(BB: OrigBB); |
2036 | if (!ClonedBBs.insert(Ptr: ClonedBB).second) { |
2037 | // Multiple blocks in the callee might get mapped to one cloned block in |
2038 | // the caller since we prune the callee as we clone it. When that happens, |
2039 | // we want to use the maximum among the original blocks' frequencies. |
2040 | BlockFrequency NewFreq = CallerBFI->getBlockFreq(BB: ClonedBB); |
2041 | if (NewFreq > Freq) |
2042 | Freq = NewFreq; |
2043 | } |
2044 | CallerBFI->setBlockFreq(BB: ClonedBB, Freq); |
2045 | } |
2046 | BasicBlock *EntryClone = cast<BasicBlock>(Val: VMap.lookup(Val: &CalleeEntryBlock)); |
2047 | CallerBFI->setBlockFreqAndScale( |
2048 | ReferenceBB: EntryClone, Freq: CallerBFI->getBlockFreq(BB: CallSiteBlock), BlocksToScale&: ClonedBBs); |
2049 | } |
2050 | |
2051 | /// Update the branch metadata for cloned call instructions. |
2052 | static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, |
2053 | const ProfileCount &CalleeEntryCount, |
2054 | const CallBase &TheCall, ProfileSummaryInfo *PSI, |
2055 | BlockFrequencyInfo *CallerBFI) { |
2056 | if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1) |
2057 | return; |
2058 | auto CallSiteCount = |
2059 | PSI ? PSI->getProfileCount(CallInst: TheCall, BFI: CallerBFI) : std::nullopt; |
2060 | int64_t CallCount = |
2061 | std::min(a: CallSiteCount.value_or(u: 0), b: CalleeEntryCount.getCount()); |
2062 | updateProfileCallee(Callee, EntryDelta: -CallCount, VMap: &VMap); |
2063 | } |
2064 | |
2065 | void llvm::updateProfileCallee( |
2066 | Function *Callee, int64_t EntryDelta, |
2067 | const ValueMap<const Value *, WeakTrackingVH> *VMap) { |
2068 | auto CalleeCount = Callee->getEntryCount(); |
2069 | if (!CalleeCount) |
2070 | return; |
2071 | |
2072 | const uint64_t PriorEntryCount = CalleeCount->getCount(); |
2073 | |
2074 | // Since CallSiteCount is an estimate, it could exceed the original callee |
2075 | // count and has to be set to 0 so guard against underflow. |
2076 | const uint64_t NewEntryCount = |
2077 | (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount) |
2078 | ? 0 |
2079 | : PriorEntryCount + EntryDelta; |
2080 | |
2081 | auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount, |
2082 | const uint64_t PriorEntryCount) { |
2083 | Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB); |
2084 | if (VPtr) |
2085 | scaleProfData(I&: *VPtr, S: NewEntryCount, T: PriorEntryCount); |
2086 | }; |
2087 | |
2088 | // During inlining ? |
2089 | if (VMap) { |
2090 | uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; |
2091 | for (auto Entry : *VMap) { |
2092 | if (isa<CallInst>(Val: Entry.first)) |
2093 | if (auto *CI = dyn_cast_or_null<CallInst>(Val: Entry.second)) { |
2094 | CI->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount); |
2095 | updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount); |
2096 | } |
2097 | |
2098 | if (isa<InvokeInst>(Val: Entry.first)) |
2099 | if (auto *II = dyn_cast_or_null<InvokeInst>(Val: Entry.second)) { |
2100 | II->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount); |
2101 | updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount); |
2102 | } |
2103 | } |
2104 | } |
2105 | |
2106 | if (EntryDelta) { |
2107 | Callee->setEntryCount(Count: NewEntryCount); |
2108 | |
2109 | for (BasicBlock &BB : *Callee) |
2110 | // No need to update the callsite if it is pruned during inlining. |
2111 | if (!VMap || VMap->count(Val: &BB)) |
2112 | for (Instruction &I : BB) { |
2113 | if (CallInst *CI = dyn_cast<CallInst>(Val: &I)) { |
2114 | CI->updateProfWeight(S: NewEntryCount, T: PriorEntryCount); |
2115 | updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount); |
2116 | } |
2117 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &I)) { |
2118 | II->updateProfWeight(S: NewEntryCount, T: PriorEntryCount); |
2119 | updateVTableProfWeight(II, NewEntryCount, PriorEntryCount); |
2120 | } |
2121 | } |
2122 | } |
2123 | } |
2124 | |
2125 | /// An operand bundle "clang.arc.attachedcall" on a call indicates the call |
2126 | /// result is implicitly consumed by a call to retainRV or claimRV immediately |
2127 | /// after the call. This function inlines the retainRV/claimRV calls. |
2128 | /// |
2129 | /// There are three cases to consider: |
2130 | /// |
2131 | /// 1. If there is a call to autoreleaseRV that takes a pointer to the returned |
2132 | /// object in the callee return block, the autoreleaseRV call and the |
2133 | /// retainRV/claimRV call in the caller cancel out. If the call in the caller |
2134 | /// is a claimRV call, a call to objc_release is emitted. |
2135 | /// |
2136 | /// 2. If there is a call in the callee return block that doesn't have operand |
2137 | /// bundle "clang.arc.attachedcall", the operand bundle on the original call |
2138 | /// is transferred to the call in the callee. |
2139 | /// |
2140 | /// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is |
2141 | /// a retainRV call. |
2142 | static void |
2143 | inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, |
2144 | const SmallVectorImpl<ReturnInst *> &Returns) { |
2145 | assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function" ); |
2146 | bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV, |
2147 | IsUnsafeClaimRV = !IsRetainRV; |
2148 | |
2149 | for (auto *RI : Returns) { |
2150 | Value *RetOpnd = objcarc::GetRCIdentityRoot(V: RI->getOperand(i_nocapture: 0)); |
2151 | bool InsertRetainCall = IsRetainRV; |
2152 | IRBuilder<> Builder(RI->getContext()); |
2153 | |
2154 | // Walk backwards through the basic block looking for either a matching |
2155 | // autoreleaseRV call or an unannotated call. |
2156 | auto InstRange = llvm::make_range(x: ++(RI->getIterator().getReverse()), |
2157 | y: RI->getParent()->rend()); |
2158 | for (Instruction &I : llvm::make_early_inc_range(Range&: InstRange)) { |
2159 | // Ignore casts. |
2160 | if (isa<CastInst>(Val: I)) |
2161 | continue; |
2162 | |
2163 | if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) { |
2164 | if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue || |
2165 | !II->use_empty() || |
2166 | objcarc::GetRCIdentityRoot(V: II->getOperand(i_nocapture: 0)) != RetOpnd) |
2167 | break; |
2168 | |
2169 | // If we've found a matching authoreleaseRV call: |
2170 | // - If claimRV is attached to the call, insert a call to objc_release |
2171 | // and erase the autoreleaseRV call. |
2172 | // - If retainRV is attached to the call, just erase the autoreleaseRV |
2173 | // call. |
2174 | if (IsUnsafeClaimRV) { |
2175 | Builder.SetInsertPoint(II); |
2176 | Builder.CreateIntrinsic(ID: Intrinsic::objc_release, Args: RetOpnd); |
2177 | } |
2178 | II->eraseFromParent(); |
2179 | InsertRetainCall = false; |
2180 | break; |
2181 | } |
2182 | |
2183 | auto *CI = dyn_cast<CallInst>(Val: &I); |
2184 | |
2185 | if (!CI) |
2186 | break; |
2187 | |
2188 | if (objcarc::GetRCIdentityRoot(V: CI) != RetOpnd || |
2189 | objcarc::hasAttachedCallOpBundle(CB: CI)) |
2190 | break; |
2191 | |
2192 | // If we've found an unannotated call that defines RetOpnd, add a |
2193 | // "clang.arc.attachedcall" operand bundle. |
2194 | Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(CB: &CB)}; |
2195 | OperandBundleDef OB("clang.arc.attachedcall" , BundleArgs); |
2196 | auto *NewCall = CallBase::addOperandBundle( |
2197 | CB: CI, ID: LLVMContext::OB_clang_arc_attachedcall, OB, InsertPt: CI->getIterator()); |
2198 | NewCall->copyMetadata(SrcInst: *CI); |
2199 | CI->replaceAllUsesWith(V: NewCall); |
2200 | CI->eraseFromParent(); |
2201 | InsertRetainCall = false; |
2202 | break; |
2203 | } |
2204 | |
2205 | if (InsertRetainCall) { |
2206 | // The retainRV is attached to the call and we've failed to find a |
2207 | // matching autoreleaseRV or an annotated call in the callee. Emit a call |
2208 | // to objc_retain. |
2209 | Builder.SetInsertPoint(RI); |
2210 | Builder.CreateIntrinsic(ID: Intrinsic::objc_retain, Args: RetOpnd); |
2211 | } |
2212 | } |
2213 | } |
2214 | |
2215 | // In contextual profiling, when an inline succeeds, we want to remap the |
2216 | // indices of the callee into the index space of the caller. We can't just leave |
2217 | // them as-is because the same callee may appear in other places in this caller |
2218 | // (other callsites), and its (callee's) counters and sub-contextual profile |
2219 | // tree would be potentially different. |
2220 | // Not all BBs of the callee may survive the opportunistic DCE InlineFunction |
2221 | // does (same goes for callsites in the callee). |
2222 | // We will return a pair of vectors, one for basic block IDs and one for |
2223 | // callsites. For such a vector V, V[Idx] will be -1 if the callee |
2224 | // instrumentation with index Idx did not survive inlining, and a new value |
2225 | // otherwise. |
2226 | // This function will update the caller's instrumentation intrinsics |
2227 | // accordingly, mapping indices as described above. We also replace the "name" |
2228 | // operand because we use it to distinguish between "own" instrumentation and |
2229 | // "from callee" instrumentation when performing the traversal of the CFG of the |
2230 | // caller. We traverse depth-first from the callsite's BB and up to the point we |
2231 | // hit BBs owned by the caller. |
2232 | // The return values will be then used to update the contextual |
2233 | // profile. Note: we only update the "name" and "index" operands in the |
2234 | // instrumentation intrinsics, we leave the hash and total nr of indices as-is, |
2235 | // it's not worth updating those. |
2236 | static std::pair<std::vector<int64_t>, std::vector<int64_t>> |
2237 | remapIndices(Function &Caller, BasicBlock *StartBB, |
2238 | PGOContextualProfile &CtxProf, uint32_t CalleeCounters, |
2239 | uint32_t CalleeCallsites) { |
2240 | // We'll allocate a new ID to imported callsite counters and callsites. We're |
2241 | // using -1 to indicate a counter we delete. Most likely the entry ID, for |
2242 | // example, will be deleted - we don't want 2 IDs in the same BB, and the |
2243 | // entry would have been cloned in the callsite's old BB. |
2244 | std::vector<int64_t> CalleeCounterMap; |
2245 | std::vector<int64_t> CalleeCallsiteMap; |
2246 | CalleeCounterMap.resize(new_size: CalleeCounters, x: -1); |
2247 | CalleeCallsiteMap.resize(new_size: CalleeCallsites, x: -1); |
2248 | |
2249 | auto RewriteInstrIfNeeded = [&](InstrProfIncrementInst &Ins) -> bool { |
2250 | if (Ins.getNameValue() == &Caller) |
2251 | return false; |
2252 | const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue()); |
2253 | if (CalleeCounterMap[OldID] == -1) |
2254 | CalleeCounterMap[OldID] = CtxProf.allocateNextCounterIndex(F: Caller); |
2255 | const auto NewID = static_cast<uint32_t>(CalleeCounterMap[OldID]); |
2256 | |
2257 | Ins.setNameValue(&Caller); |
2258 | Ins.setIndex(NewID); |
2259 | return true; |
2260 | }; |
2261 | |
2262 | auto RewriteCallsiteInsIfNeeded = [&](InstrProfCallsite &Ins) -> bool { |
2263 | if (Ins.getNameValue() == &Caller) |
2264 | return false; |
2265 | const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue()); |
2266 | if (CalleeCallsiteMap[OldID] == -1) |
2267 | CalleeCallsiteMap[OldID] = CtxProf.allocateNextCallsiteIndex(F: Caller); |
2268 | const auto NewID = static_cast<uint32_t>(CalleeCallsiteMap[OldID]); |
2269 | |
2270 | Ins.setNameValue(&Caller); |
2271 | Ins.setIndex(NewID); |
2272 | return true; |
2273 | }; |
2274 | |
2275 | std::deque<BasicBlock *> Worklist; |
2276 | DenseSet<const BasicBlock *> Seen; |
2277 | // We will traverse the BBs starting from the callsite BB. The callsite BB |
2278 | // will have at least a BB ID - maybe its own, and in any case the one coming |
2279 | // from the cloned function's entry BB. The other BBs we'll start seeing from |
2280 | // there on may or may not have BB IDs. BBs with IDs belonging to our caller |
2281 | // are definitely not coming from the imported function and form a boundary |
2282 | // past which we don't need to traverse anymore. BBs may have no |
2283 | // instrumentation (because we originally inserted instrumentation as per |
2284 | // MST), in which case we'll traverse past them. An invariant we'll keep is |
2285 | // that a BB will have at most 1 BB ID. For example, in the callsite BB, we |
2286 | // will delete the callee BB's instrumentation. This doesn't result in |
2287 | // information loss: the entry BB of the callee will have the same count as |
2288 | // the callsite's BB. At the end of this traversal, all the callee's |
2289 | // instrumentation would be mapped into the caller's instrumentation index |
2290 | // space. Some of the callee's counters may be deleted (as mentioned, this |
2291 | // should result in no loss of information). |
2292 | Worklist.push_back(x: StartBB); |
2293 | while (!Worklist.empty()) { |
2294 | auto *BB = Worklist.front(); |
2295 | Worklist.pop_front(); |
2296 | bool Changed = false; |
2297 | auto *BBID = CtxProfAnalysis::getBBInstrumentation(BB&: *BB); |
2298 | if (BBID) { |
2299 | Changed |= RewriteInstrIfNeeded(*BBID); |
2300 | // this may be the entryblock from the inlined callee, coming into a BB |
2301 | // that didn't have instrumentation because of MST decisions. Let's make |
2302 | // sure it's placed accordingly. This is a noop elsewhere. |
2303 | BBID->moveBefore(InsertPos: BB->getFirstInsertionPt()); |
2304 | } |
2305 | for (auto &I : llvm::make_early_inc_range(Range&: *BB)) { |
2306 | if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Val: &I)) { |
2307 | if (isa<InstrProfIncrementInstStep>(Val: Inc)) { |
2308 | // Step instrumentation is used for select instructions. Inlining may |
2309 | // have propagated a constant resulting in the condition of the select |
2310 | // being resolved, case in which function cloning resolves the value |
2311 | // of the select, and elides the select instruction. If that is the |
2312 | // case, the step parameter of the instrumentation will reflect that. |
2313 | // We can delete the instrumentation in that case. |
2314 | if (isa<Constant>(Val: Inc->getStep())) { |
2315 | assert(!Inc->getNextNode() || !isa<SelectInst>(Inc->getNextNode())); |
2316 | Inc->eraseFromParent(); |
2317 | } else { |
2318 | assert(isa_and_nonnull<SelectInst>(Inc->getNextNode())); |
2319 | RewriteInstrIfNeeded(*Inc); |
2320 | } |
2321 | } else if (Inc != BBID) { |
2322 | // If we're here it means that the BB had more than 1 IDs, presumably |
2323 | // some coming from the callee. We "made up our mind" to keep the |
2324 | // first one (which may or may not have been originally the caller's). |
2325 | // All the others are superfluous and we delete them. |
2326 | Inc->eraseFromParent(); |
2327 | Changed = true; |
2328 | } |
2329 | } else if (auto *CS = dyn_cast<InstrProfCallsite>(Val: &I)) { |
2330 | Changed |= RewriteCallsiteInsIfNeeded(*CS); |
2331 | } |
2332 | } |
2333 | if (!BBID || Changed) |
2334 | for (auto *Succ : successors(BB)) |
2335 | if (Seen.insert(V: Succ).second) |
2336 | Worklist.push_back(x: Succ); |
2337 | } |
2338 | |
2339 | assert(!llvm::is_contained(CalleeCounterMap, 0) && |
2340 | "Counter index mapping should be either to -1 or to non-zero index, " |
2341 | "because the 0 " |
2342 | "index corresponds to the entry BB of the caller" ); |
2343 | assert(!llvm::is_contained(CalleeCallsiteMap, 0) && |
2344 | "Callsite index mapping should be either to -1 or to non-zero index, " |
2345 | "because there should have been at least a callsite - the inlined one " |
2346 | "- which would have had a 0 index." ); |
2347 | |
2348 | return {std::move(CalleeCounterMap), std::move(CalleeCallsiteMap)}; |
2349 | } |
2350 | |
2351 | // Inline. If successful, update the contextual profile (if a valid one is |
2352 | // given). |
2353 | // The contextual profile data is organized in trees, as follows: |
2354 | // - each node corresponds to a function |
2355 | // - the root of each tree corresponds to an "entrypoint" - e.g. |
2356 | // RPC handler for server side |
2357 | // - the path from the root to a node is a particular call path |
2358 | // - the counters stored in a node are counter values observed in that |
2359 | // particular call path ("context") |
2360 | // - the edges between nodes are annotated with callsite IDs. |
2361 | // |
2362 | // Updating the contextual profile after an inlining means, at a high level, |
2363 | // copying over the data of the callee, **intentionally without any value |
2364 | // scaling**, and copying over the callees of the inlined callee. |
2365 | llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, |
2366 | PGOContextualProfile &CtxProf, |
2367 | bool MergeAttributes, |
2368 | AAResults *CalleeAAR, |
2369 | bool InsertLifetime, |
2370 | Function *ForwardVarArgsTo) { |
2371 | if (!CtxProf.isInSpecializedModule()) |
2372 | return InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, |
2373 | ForwardVarArgsTo); |
2374 | |
2375 | auto &Caller = *CB.getCaller(); |
2376 | auto &Callee = *CB.getCalledFunction(); |
2377 | auto *StartBB = CB.getParent(); |
2378 | |
2379 | // Get some preliminary data about the callsite before it might get inlined. |
2380 | // Inlining shouldn't delete the callee, but it's cleaner (and low-cost) to |
2381 | // get this data upfront and rely less on InlineFunction's behavior. |
2382 | const auto CalleeGUID = AssignGUIDPass::getGUID(F: Callee); |
2383 | auto *CallsiteIDIns = CtxProfAnalysis::getCallsiteInstrumentation(CB); |
2384 | const auto CallsiteID = |
2385 | static_cast<uint32_t>(CallsiteIDIns->getIndex()->getZExtValue()); |
2386 | |
2387 | const auto NumCalleeCounters = CtxProf.getNumCounters(F: Callee); |
2388 | const auto NumCalleeCallsites = CtxProf.getNumCallsites(F: Callee); |
2389 | |
2390 | auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, |
2391 | ForwardVarArgsTo); |
2392 | if (!Ret.isSuccess()) |
2393 | return Ret; |
2394 | |
2395 | // Inlining succeeded, we don't need the instrumentation of the inlined |
2396 | // callsite. |
2397 | CallsiteIDIns->eraseFromParent(); |
2398 | |
2399 | // Assinging Maps and then capturing references into it in the lambda because |
2400 | // captured structured bindings are a C++20 extension. We do also need a |
2401 | // capture here, though. |
2402 | const auto IndicesMaps = remapIndices(Caller, StartBB, CtxProf, |
2403 | CalleeCounters: NumCalleeCounters, CalleeCallsites: NumCalleeCallsites); |
2404 | const uint32_t = CtxProf.getNumCounters(F: Caller); |
2405 | |
2406 | auto Updater = [&](PGOCtxProfContext &Ctx) { |
2407 | assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller)); |
2408 | const auto &[CalleeCounterMap, CalleeCallsiteMap] = IndicesMaps; |
2409 | assert( |
2410 | (Ctx.counters().size() + |
2411 | llvm::count_if(CalleeCounterMap, [](auto V) { return V != -1; }) == |
2412 | NewCountersSize) && |
2413 | "The caller's counters size should have grown by the number of new " |
2414 | "distinct counters inherited from the inlined callee." ); |
2415 | Ctx.resizeCounters(Size: NewCountersSize); |
2416 | // If the callsite wasn't exercised in this context, the value of the |
2417 | // counters coming from it is 0 - which it is right now, after resizing them |
2418 | // - and so we're done. |
2419 | auto CSIt = Ctx.callsites().find(x: CallsiteID); |
2420 | if (CSIt == Ctx.callsites().end()) |
2421 | return; |
2422 | auto CalleeCtxIt = CSIt->second.find(x: CalleeGUID); |
2423 | // The callsite was exercised, but not with this callee (so presumably this |
2424 | // is an indirect callsite). Again, we're done here. |
2425 | if (CalleeCtxIt == CSIt->second.end()) |
2426 | return; |
2427 | |
2428 | // Let's pull in the counter values and the subcontexts coming from the |
2429 | // inlined callee. |
2430 | auto &CalleeCtx = CalleeCtxIt->second; |
2431 | assert(CalleeCtx.guid() == CalleeGUID); |
2432 | |
2433 | for (auto I = 0U; I < CalleeCtx.counters().size(); ++I) { |
2434 | const int64_t NewIndex = CalleeCounterMap[I]; |
2435 | if (NewIndex >= 0) { |
2436 | assert(NewIndex != 0 && "counter index mapping shouldn't happen to a 0 " |
2437 | "index, that's the caller's entry BB" ); |
2438 | Ctx.counters()[NewIndex] = CalleeCtx.counters()[I]; |
2439 | } |
2440 | } |
2441 | for (auto &[I, OtherSet] : CalleeCtx.callsites()) { |
2442 | const int64_t NewCSIdx = CalleeCallsiteMap[I]; |
2443 | if (NewCSIdx >= 0) { |
2444 | assert(NewCSIdx != 0 && |
2445 | "callsite index mapping shouldn't happen to a 0 index, the " |
2446 | "caller must've had at least one callsite (with such an index)" ); |
2447 | Ctx.ingestAllContexts(CSId: NewCSIdx, Other: std::move(OtherSet)); |
2448 | } |
2449 | } |
2450 | // We know the traversal is preorder, so it wouldn't have yet looked at the |
2451 | // sub-contexts of this context that it's currently visiting. Meaning, the |
2452 | // erase below invalidates no iterators. |
2453 | auto Deleted = Ctx.callsites().erase(x: CallsiteID); |
2454 | assert(Deleted); |
2455 | (void)Deleted; |
2456 | }; |
2457 | CtxProf.update(Updater, F: Caller); |
2458 | return Ret; |
2459 | } |
2460 | |
2461 | /// This function inlines the called function into the basic block of the |
2462 | /// caller. This returns false if it is not possible to inline this call. |
2463 | /// The program is still in a well defined state if this occurs though. |
2464 | /// |
2465 | /// Note that this only does one level of inlining. For example, if the |
2466 | /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now |
2467 | /// exists in the instruction stream. Similarly this will inline a recursive |
2468 | /// function by one level. |
2469 | llvm::InlineResult llvm::(CallBase &CB, InlineFunctionInfo &IFI, |
2470 | bool MergeAttributes, |
2471 | AAResults *CalleeAAR, |
2472 | bool InsertLifetime, |
2473 | Function *ForwardVarArgsTo, |
2474 | OptimizationRemarkEmitter *ORE) { |
2475 | assert(CB.getParent() && CB.getFunction() && "Instruction not in function!" ); |
2476 | |
2477 | // FIXME: we don't inline callbr yet. |
2478 | if (isa<CallBrInst>(Val: CB)) |
2479 | return InlineResult::failure(Reason: "We don't inline callbr yet." ); |
2480 | |
2481 | // If IFI has any state in it, zap it before we fill it in. |
2482 | IFI.reset(); |
2483 | |
2484 | Function *CalledFunc = CB.getCalledFunction(); |
2485 | if (!CalledFunc || // Can't inline external function or indirect |
2486 | CalledFunc->isDeclaration()) // call! |
2487 | return InlineResult::failure(Reason: "external or indirect" ); |
2488 | |
2489 | // The inliner does not know how to inline through calls with operand bundles |
2490 | // in general ... |
2491 | Value *ConvergenceControlToken = nullptr; |
2492 | if (CB.hasOperandBundles()) { |
2493 | for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) { |
2494 | auto OBUse = CB.getOperandBundleAt(Index: i); |
2495 | uint32_t Tag = OBUse.getTagID(); |
2496 | // ... but it knows how to inline through "deopt" operand bundles ... |
2497 | if (Tag == LLVMContext::OB_deopt) |
2498 | continue; |
2499 | // ... and "funclet" operand bundles. |
2500 | if (Tag == LLVMContext::OB_funclet) |
2501 | continue; |
2502 | if (Tag == LLVMContext::OB_clang_arc_attachedcall) |
2503 | continue; |
2504 | if (Tag == LLVMContext::OB_kcfi) |
2505 | continue; |
2506 | if (Tag == LLVMContext::OB_convergencectrl) { |
2507 | ConvergenceControlToken = OBUse.Inputs[0].get(); |
2508 | continue; |
2509 | } |
2510 | |
2511 | return InlineResult::failure(Reason: "unsupported operand bundle" ); |
2512 | } |
2513 | } |
2514 | |
2515 | // FIXME: The check below is redundant and incomplete. According to spec, if a |
2516 | // convergent call is missing a token, then the caller is using uncontrolled |
2517 | // convergence. If the callee has an entry intrinsic, then the callee is using |
2518 | // controlled convergence, and the call cannot be inlined. A proper |
2519 | // implemenation of this check requires a whole new analysis that identifies |
2520 | // convergence in every function. For now, we skip that and just do this one |
2521 | // cursory check. The underlying assumption is that in a compiler flow that |
2522 | // fully implements convergence control tokens, there is no mixing of |
2523 | // controlled and uncontrolled convergent operations in the whole program. |
2524 | if (CB.isConvergent()) { |
2525 | if (!ConvergenceControlToken && |
2526 | getConvergenceEntry(BB&: CalledFunc->getEntryBlock())) { |
2527 | return InlineResult::failure( |
2528 | Reason: "convergent call needs convergencectrl operand" ); |
2529 | } |
2530 | } |
2531 | |
2532 | // If the call to the callee cannot throw, set the 'nounwind' flag on any |
2533 | // calls that we inline. |
2534 | bool MarkNoUnwind = CB.doesNotThrow(); |
2535 | |
2536 | BasicBlock *OrigBB = CB.getParent(); |
2537 | Function *Caller = OrigBB->getParent(); |
2538 | |
2539 | // GC poses two hazards to inlining, which only occur when the callee has GC: |
2540 | // 1. If the caller has no GC, then the callee's GC must be propagated to the |
2541 | // caller. |
2542 | // 2. If the caller has a differing GC, it is invalid to inline. |
2543 | if (CalledFunc->hasGC()) { |
2544 | if (!Caller->hasGC()) |
2545 | Caller->setGC(CalledFunc->getGC()); |
2546 | else if (CalledFunc->getGC() != Caller->getGC()) |
2547 | return InlineResult::failure(Reason: "incompatible GC" ); |
2548 | } |
2549 | |
2550 | // Get the personality function from the callee if it contains a landing pad. |
2551 | Constant *CalledPersonality = |
2552 | CalledFunc->hasPersonalityFn() |
2553 | ? CalledFunc->getPersonalityFn()->stripPointerCasts() |
2554 | : nullptr; |
2555 | |
2556 | // Find the personality function used by the landing pads of the caller. If it |
2557 | // exists, then check to see that it matches the personality function used in |
2558 | // the callee. |
2559 | Constant *CallerPersonality = |
2560 | Caller->hasPersonalityFn() |
2561 | ? Caller->getPersonalityFn()->stripPointerCasts() |
2562 | : nullptr; |
2563 | if (CalledPersonality) { |
2564 | if (!CallerPersonality) |
2565 | Caller->setPersonalityFn(CalledPersonality); |
2566 | // If the personality functions match, then we can perform the |
2567 | // inlining. Otherwise, we can't inline. |
2568 | // TODO: This isn't 100% true. Some personality functions are proper |
2569 | // supersets of others and can be used in place of the other. |
2570 | else if (CalledPersonality != CallerPersonality) |
2571 | return InlineResult::failure(Reason: "incompatible personality" ); |
2572 | } |
2573 | |
2574 | // We need to figure out which funclet the callsite was in so that we may |
2575 | // properly nest the callee. |
2576 | Instruction *CallSiteEHPad = nullptr; |
2577 | if (CallerPersonality) { |
2578 | EHPersonality Personality = classifyEHPersonality(Pers: CallerPersonality); |
2579 | if (isScopedEHPersonality(Pers: Personality)) { |
2580 | std::optional<OperandBundleUse> ParentFunclet = |
2581 | CB.getOperandBundle(ID: LLVMContext::OB_funclet); |
2582 | if (ParentFunclet) |
2583 | CallSiteEHPad = cast<FuncletPadInst>(Val: ParentFunclet->Inputs.front()); |
2584 | |
2585 | // OK, the inlining site is legal. What about the target function? |
2586 | |
2587 | if (CallSiteEHPad) { |
2588 | if (Personality == EHPersonality::MSVC_CXX) { |
2589 | // The MSVC personality cannot tolerate catches getting inlined into |
2590 | // cleanup funclets. |
2591 | if (isa<CleanupPadInst>(Val: CallSiteEHPad)) { |
2592 | // Ok, the call site is within a cleanuppad. Let's check the callee |
2593 | // for catchpads. |
2594 | for (const BasicBlock &CalledBB : *CalledFunc) { |
2595 | if (isa<CatchSwitchInst>(Val: CalledBB.getFirstNonPHIIt())) |
2596 | return InlineResult::failure(Reason: "catch in cleanup funclet" ); |
2597 | } |
2598 | } |
2599 | } else if (isAsynchronousEHPersonality(Pers: Personality)) { |
2600 | // SEH is even less tolerant, there may not be any sort of exceptional |
2601 | // funclet in the callee. |
2602 | for (const BasicBlock &CalledBB : *CalledFunc) { |
2603 | if (CalledBB.isEHPad()) |
2604 | return InlineResult::failure(Reason: "SEH in cleanup funclet" ); |
2605 | } |
2606 | } |
2607 | } |
2608 | } |
2609 | } |
2610 | |
2611 | // Determine if we are dealing with a call in an EHPad which does not unwind |
2612 | // to caller. |
2613 | bool EHPadForCallUnwindsLocally = false; |
2614 | if (CallSiteEHPad && isa<CallInst>(Val: CB)) { |
2615 | UnwindDestMemoTy FuncletUnwindMap; |
2616 | Value *CallSiteUnwindDestToken = |
2617 | getUnwindDestToken(EHPad: CallSiteEHPad, MemoMap&: FuncletUnwindMap); |
2618 | |
2619 | EHPadForCallUnwindsLocally = |
2620 | CallSiteUnwindDestToken && |
2621 | !isa<ConstantTokenNone>(Val: CallSiteUnwindDestToken); |
2622 | } |
2623 | |
2624 | // Get an iterator to the last basic block in the function, which will have |
2625 | // the new function inlined after it. |
2626 | Function::iterator LastBlock = --Caller->end(); |
2627 | |
2628 | // Make sure to capture all of the return instructions from the cloned |
2629 | // function. |
2630 | SmallVector<ReturnInst*, 8> Returns; |
2631 | ClonedCodeInfo InlinedFunctionInfo; |
2632 | Function::iterator FirstNewBlock; |
2633 | |
2634 | { // Scope to destroy VMap after cloning. |
2635 | ValueToValueMapTy VMap; |
2636 | struct ByValInit { |
2637 | Value *Dst; |
2638 | Value *Src; |
2639 | MaybeAlign SrcAlign; |
2640 | Type *Ty; |
2641 | }; |
2642 | // Keep a list of tuples (dst, src, src_align) to emit byval |
2643 | // initializations. Src Alignment is only available though the callbase, |
2644 | // therefore has to be saved. |
2645 | SmallVector<ByValInit, 4> ByValInits; |
2646 | |
2647 | // When inlining a function that contains noalias scope metadata, |
2648 | // this metadata needs to be cloned so that the inlined blocks |
2649 | // have different "unique scopes" at every call site. |
2650 | // Track the metadata that must be cloned. Do this before other changes to |
2651 | // the function, so that we do not get in trouble when inlining caller == |
2652 | // callee. |
2653 | ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction()); |
2654 | |
2655 | auto &DL = Caller->getDataLayout(); |
2656 | |
2657 | // Calculate the vector of arguments to pass into the function cloner, which |
2658 | // matches up the formal to the actual argument values. |
2659 | auto AI = CB.arg_begin(); |
2660 | unsigned ArgNo = 0; |
2661 | for (Function::arg_iterator I = CalledFunc->arg_begin(), |
2662 | E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { |
2663 | Value *ActualArg = *AI; |
2664 | |
2665 | // When byval arguments actually inlined, we need to make the copy implied |
2666 | // by them explicit. However, we don't do this if the callee is readonly |
2667 | // or readnone, because the copy would be unneeded: the callee doesn't |
2668 | // modify the struct. |
2669 | if (CB.isByValArgument(ArgNo)) { |
2670 | ActualArg = HandleByValArgument(ByValType: CB.getParamByValType(ArgNo), Arg: ActualArg, |
2671 | TheCall: &CB, CalledFunc, IFI, |
2672 | ByValAlignment: CalledFunc->getParamAlign(ArgNo)); |
2673 | if (ActualArg != *AI) |
2674 | ByValInits.push_back(Elt: {.Dst: ActualArg, .Src: (Value *)*AI, |
2675 | .SrcAlign: CB.getParamAlign(ArgNo), |
2676 | .Ty: CB.getParamByValType(ArgNo)}); |
2677 | } |
2678 | |
2679 | VMap[&*I] = ActualArg; |
2680 | } |
2681 | |
2682 | // TODO: Remove this when users have been updated to the assume bundles. |
2683 | // Add alignment assumptions if necessary. We do this before the inlined |
2684 | // instructions are actually cloned into the caller so that we can easily |
2685 | // check what will be known at the start of the inlined code. |
2686 | AddAlignmentAssumptions(CB, IFI); |
2687 | |
2688 | AssumptionCache *AC = |
2689 | IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; |
2690 | |
2691 | /// Preserve all attributes on of the call and its parameters. |
2692 | salvageKnowledge(I: &CB, AC); |
2693 | |
2694 | // We want the inliner to prune the code as it copies. We would LOVE to |
2695 | // have no dead or constant instructions leftover after inlining occurs |
2696 | // (which can happen, e.g., because an argument was constant), but we'll be |
2697 | // happy with whatever the cloner can do. |
2698 | CloneAndPruneFunctionInto(NewFunc: Caller, OldFunc: CalledFunc, VMap, |
2699 | /*ModuleLevelChanges=*/false, Returns, NameSuffix: ".i" , |
2700 | CodeInfo: &InlinedFunctionInfo); |
2701 | // Remember the first block that is newly cloned over. |
2702 | FirstNewBlock = LastBlock; ++FirstNewBlock; |
2703 | |
2704 | // Insert retainRV/clainRV runtime calls. |
2705 | objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(CB: &CB); |
2706 | if (RVCallKind != objcarc::ARCInstKind::None) |
2707 | inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns); |
2708 | |
2709 | // Updated caller/callee profiles only when requested. For sample loader |
2710 | // inlining, the context-sensitive inlinee profile doesn't need to be |
2711 | // subtracted from callee profile, and the inlined clone also doesn't need |
2712 | // to be scaled based on call site count. |
2713 | if (IFI.UpdateProfile) { |
2714 | if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) |
2715 | // Update the BFI of blocks cloned into the caller. |
2716 | updateCallerBFI(CallSiteBlock: OrigBB, VMap, CallerBFI: IFI.CallerBFI, CalleeBFI: IFI.CalleeBFI, |
2717 | CalleeEntryBlock: CalledFunc->front()); |
2718 | |
2719 | if (auto Profile = CalledFunc->getEntryCount()) |
2720 | updateCallProfile(Callee: CalledFunc, VMap, CalleeEntryCount: *Profile, TheCall: CB, PSI: IFI.PSI, |
2721 | CallerBFI: IFI.CallerBFI); |
2722 | } |
2723 | |
2724 | // Inject byval arguments initialization. |
2725 | for (ByValInit &Init : ByValInits) |
2726 | HandleByValArgumentInit(ByValType: Init.Ty, Dst: Init.Dst, Src: Init.Src, SrcAlign: Init.SrcAlign, |
2727 | M: Caller->getParent(), InsertBlock: &*FirstNewBlock, IFI, |
2728 | CalledFunc); |
2729 | |
2730 | std::optional<OperandBundleUse> ParentDeopt = |
2731 | CB.getOperandBundle(ID: LLVMContext::OB_deopt); |
2732 | if (ParentDeopt) { |
2733 | SmallVector<OperandBundleDef, 2> OpDefs; |
2734 | |
2735 | for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { |
2736 | CallBase *ICS = dyn_cast_or_null<CallBase>(Val&: VH); |
2737 | if (!ICS) |
2738 | continue; // instruction was DCE'd or RAUW'ed to undef |
2739 | |
2740 | OpDefs.clear(); |
2741 | |
2742 | OpDefs.reserve(N: ICS->getNumOperandBundles()); |
2743 | |
2744 | for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe; |
2745 | ++COBi) { |
2746 | auto ChildOB = ICS->getOperandBundleAt(Index: COBi); |
2747 | if (ChildOB.getTagID() != LLVMContext::OB_deopt) { |
2748 | // If the inlined call has other operand bundles, let them be |
2749 | OpDefs.emplace_back(Args&: ChildOB); |
2750 | continue; |
2751 | } |
2752 | |
2753 | // It may be useful to separate this logic (of handling operand |
2754 | // bundles) out to a separate "policy" component if this gets crowded. |
2755 | // Prepend the parent's deoptimization continuation to the newly |
2756 | // inlined call's deoptimization continuation. |
2757 | std::vector<Value *> MergedDeoptArgs; |
2758 | MergedDeoptArgs.reserve(n: ParentDeopt->Inputs.size() + |
2759 | ChildOB.Inputs.size()); |
2760 | |
2761 | llvm::append_range(C&: MergedDeoptArgs, R&: ParentDeopt->Inputs); |
2762 | llvm::append_range(C&: MergedDeoptArgs, R&: ChildOB.Inputs); |
2763 | |
2764 | OpDefs.emplace_back(Args: "deopt" , Args: std::move(MergedDeoptArgs)); |
2765 | } |
2766 | |
2767 | Instruction *NewI = CallBase::Create(CB: ICS, Bundles: OpDefs, InsertPt: ICS->getIterator()); |
2768 | |
2769 | // Note: the RAUW does the appropriate fixup in VMap, so we need to do |
2770 | // this even if the call returns void. |
2771 | ICS->replaceAllUsesWith(V: NewI); |
2772 | |
2773 | VH = nullptr; |
2774 | ICS->eraseFromParent(); |
2775 | } |
2776 | } |
2777 | |
2778 | // For 'nodebug' functions, the associated DISubprogram is always null. |
2779 | // Conservatively avoid propagating the callsite debug location to |
2780 | // instructions inlined from a function whose DISubprogram is not null. |
2781 | fixupLineNumbers(Fn: Caller, FI: FirstNewBlock, TheCall: &CB, |
2782 | CalleeHasDebugInfo: CalledFunc->getSubprogram() != nullptr); |
2783 | |
2784 | if (isAssignmentTrackingEnabled(M: *Caller->getParent())) { |
2785 | // Interpret inlined stores to caller-local variables as assignments. |
2786 | trackInlinedStores(Start: FirstNewBlock, End: Caller->end(), CB); |
2787 | |
2788 | // Update DIAssignID metadata attachments and uses so that they are |
2789 | // unique to this inlined instance. |
2790 | fixupAssignments(Start: FirstNewBlock, End: Caller->end()); |
2791 | } |
2792 | |
2793 | // Now clone the inlined noalias scope metadata. |
2794 | SAMetadataCloner.clone(); |
2795 | SAMetadataCloner.remap(FStart: FirstNewBlock, FEnd: Caller->end()); |
2796 | |
2797 | // Add noalias metadata if necessary. |
2798 | AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo); |
2799 | |
2800 | // Clone return attributes on the callsite into the calls within the inlined |
2801 | // function which feed into its return value. |
2802 | AddReturnAttributes(CB, VMap, InlinedFunctionInfo); |
2803 | |
2804 | // Clone attributes on the params of the callsite to calls within the |
2805 | // inlined function which use the same param. |
2806 | AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo); |
2807 | |
2808 | propagateMemProfMetadata( |
2809 | Callee: CalledFunc, CB, ContainsMemProfMetadata: InlinedFunctionInfo.ContainsMemProfMetadata, VMap, ORE); |
2810 | |
2811 | // Propagate metadata on the callsite if necessary. |
2812 | PropagateCallSiteMetadata(CB, FStart: FirstNewBlock, FEnd: Caller->end()); |
2813 | |
2814 | // Register any cloned assumptions. |
2815 | if (IFI.GetAssumptionCache) |
2816 | for (BasicBlock &NewBlock : |
2817 | make_range(x: FirstNewBlock->getIterator(), y: Caller->end())) |
2818 | for (Instruction &I : NewBlock) |
2819 | if (auto *II = dyn_cast<AssumeInst>(Val: &I)) |
2820 | IFI.GetAssumptionCache(*Caller).registerAssumption(CI: II); |
2821 | } |
2822 | |
2823 | if (ConvergenceControlToken) { |
2824 | IntrinsicInst *IntrinsicCall = getConvergenceEntry(BB&: *FirstNewBlock); |
2825 | if (IntrinsicCall) { |
2826 | IntrinsicCall->replaceAllUsesWith(V: ConvergenceControlToken); |
2827 | IntrinsicCall->eraseFromParent(); |
2828 | } |
2829 | } |
2830 | |
2831 | // If there are any alloca instructions in the block that used to be the entry |
2832 | // block for the callee, move them to the entry block of the caller. First |
2833 | // calculate which instruction they should be inserted before. We insert the |
2834 | // instructions at the end of the current alloca list. |
2835 | { |
2836 | BasicBlock::iterator InsertPoint = Caller->begin()->begin(); |
2837 | for (BasicBlock::iterator I = FirstNewBlock->begin(), |
2838 | E = FirstNewBlock->end(); I != E; ) { |
2839 | AllocaInst *AI = dyn_cast<AllocaInst>(Val: I++); |
2840 | if (!AI) continue; |
2841 | |
2842 | // If the alloca is now dead, remove it. This often occurs due to code |
2843 | // specialization. |
2844 | if (AI->use_empty()) { |
2845 | AI->eraseFromParent(); |
2846 | continue; |
2847 | } |
2848 | |
2849 | if (!allocaWouldBeStaticInEntry(AI)) |
2850 | continue; |
2851 | |
2852 | // Keep track of the static allocas that we inline into the caller. |
2853 | IFI.StaticAllocas.push_back(Elt: AI); |
2854 | |
2855 | // Scan for the block of allocas that we can move over, and move them |
2856 | // all at once. |
2857 | while (isa<AllocaInst>(Val: I) && |
2858 | !cast<AllocaInst>(Val&: I)->use_empty() && |
2859 | allocaWouldBeStaticInEntry(AI: cast<AllocaInst>(Val&: I))) { |
2860 | IFI.StaticAllocas.push_back(Elt: cast<AllocaInst>(Val&: I)); |
2861 | ++I; |
2862 | } |
2863 | |
2864 | // Transfer all of the allocas over in a block. Using splice means |
2865 | // that the instructions aren't removed from the symbol table, then |
2866 | // reinserted. |
2867 | I.setTailBit(true); |
2868 | Caller->getEntryBlock().splice(ToIt: InsertPoint, FromBB: &*FirstNewBlock, |
2869 | FromBeginIt: AI->getIterator(), FromEndIt: I); |
2870 | } |
2871 | } |
2872 | |
2873 | SmallVector<Value*,4> VarArgsToForward; |
2874 | SmallVector<AttributeSet, 4> VarArgsAttrs; |
2875 | for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); |
2876 | i < CB.arg_size(); i++) { |
2877 | VarArgsToForward.push_back(Elt: CB.getArgOperand(i)); |
2878 | VarArgsAttrs.push_back(Elt: CB.getAttributes().getParamAttrs(ArgNo: i)); |
2879 | } |
2880 | |
2881 | bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; |
2882 | if (InlinedFunctionInfo.ContainsCalls) { |
2883 | CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; |
2884 | if (CallInst *CI = dyn_cast<CallInst>(Val: &CB)) |
2885 | CallSiteTailKind = CI->getTailCallKind(); |
2886 | |
2887 | // For inlining purposes, the "notail" marker is the same as no marker. |
2888 | if (CallSiteTailKind == CallInst::TCK_NoTail) |
2889 | CallSiteTailKind = CallInst::TCK_None; |
2890 | |
2891 | for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; |
2892 | ++BB) { |
2893 | for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) { |
2894 | CallInst *CI = dyn_cast<CallInst>(Val: &I); |
2895 | if (!CI) |
2896 | continue; |
2897 | |
2898 | // Forward varargs from inlined call site to calls to the |
2899 | // ForwardVarArgsTo function, if requested, and to musttail calls. |
2900 | if (!VarArgsToForward.empty() && |
2901 | ((ForwardVarArgsTo && |
2902 | CI->getCalledFunction() == ForwardVarArgsTo) || |
2903 | CI->isMustTailCall())) { |
2904 | // Collect attributes for non-vararg parameters. |
2905 | AttributeList Attrs = CI->getAttributes(); |
2906 | SmallVector<AttributeSet, 8> ArgAttrs; |
2907 | if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) { |
2908 | for (unsigned ArgNo = 0; |
2909 | ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) |
2910 | ArgAttrs.push_back(Elt: Attrs.getParamAttrs(ArgNo)); |
2911 | } |
2912 | |
2913 | // Add VarArg attributes. |
2914 | ArgAttrs.append(in_start: VarArgsAttrs.begin(), in_end: VarArgsAttrs.end()); |
2915 | Attrs = AttributeList::get(C&: CI->getContext(), FnAttrs: Attrs.getFnAttrs(), |
2916 | RetAttrs: Attrs.getRetAttrs(), ArgAttrs); |
2917 | // Add VarArgs to existing parameters. |
2918 | SmallVector<Value *, 6> Params(CI->args()); |
2919 | Params.append(in_start: VarArgsToForward.begin(), in_end: VarArgsToForward.end()); |
2920 | CallInst *NewCI = CallInst::Create( |
2921 | Ty: CI->getFunctionType(), Func: CI->getCalledOperand(), Args: Params, NameStr: "" , InsertBefore: CI->getIterator()); |
2922 | NewCI->setDebugLoc(CI->getDebugLoc()); |
2923 | NewCI->setAttributes(Attrs); |
2924 | NewCI->setCallingConv(CI->getCallingConv()); |
2925 | CI->replaceAllUsesWith(V: NewCI); |
2926 | CI->eraseFromParent(); |
2927 | CI = NewCI; |
2928 | } |
2929 | |
2930 | if (Function *F = CI->getCalledFunction()) |
2931 | InlinedDeoptimizeCalls |= |
2932 | F->getIntrinsicID() == Intrinsic::experimental_deoptimize; |
2933 | |
2934 | // We need to reduce the strength of any inlined tail calls. For |
2935 | // musttail, we have to avoid introducing potential unbounded stack |
2936 | // growth. For example, if functions 'f' and 'g' are mutually recursive |
2937 | // with musttail, we can inline 'g' into 'f' so long as we preserve |
2938 | // musttail on the cloned call to 'f'. If either the inlined call site |
2939 | // or the cloned call site is *not* musttail, the program already has |
2940 | // one frame of stack growth, so it's safe to remove musttail. Here is |
2941 | // a table of example transformations: |
2942 | // |
2943 | // f -> musttail g -> musttail f ==> f -> musttail f |
2944 | // f -> musttail g -> tail f ==> f -> tail f |
2945 | // f -> g -> musttail f ==> f -> f |
2946 | // f -> g -> tail f ==> f -> f |
2947 | // |
2948 | // Inlined notail calls should remain notail calls. |
2949 | CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); |
2950 | if (ChildTCK != CallInst::TCK_NoTail) |
2951 | ChildTCK = std::min(a: CallSiteTailKind, b: ChildTCK); |
2952 | CI->setTailCallKind(ChildTCK); |
2953 | InlinedMustTailCalls |= CI->isMustTailCall(); |
2954 | |
2955 | // Call sites inlined through a 'nounwind' call site should be |
2956 | // 'nounwind' as well. However, avoid marking call sites explicitly |
2957 | // where possible. This helps expose more opportunities for CSE after |
2958 | // inlining, commonly when the callee is an intrinsic. |
2959 | if (MarkNoUnwind && !CI->doesNotThrow()) |
2960 | CI->setDoesNotThrow(); |
2961 | } |
2962 | } |
2963 | } |
2964 | |
2965 | // Leave lifetime markers for the static alloca's, scoping them to the |
2966 | // function we just inlined. |
2967 | // We need to insert lifetime intrinsics even at O0 to avoid invalid |
2968 | // access caused by multithreaded coroutines. The check |
2969 | // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only. |
2970 | if ((InsertLifetime || Caller->isPresplitCoroutine()) && |
2971 | !IFI.StaticAllocas.empty()) { |
2972 | IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin()); |
2973 | for (AllocaInst *AI : IFI.StaticAllocas) { |
2974 | // Don't mark swifterror allocas. They can't have bitcast uses. |
2975 | if (AI->isSwiftError()) |
2976 | continue; |
2977 | |
2978 | // If the alloca is already scoped to something smaller than the whole |
2979 | // function then there's no need to add redundant, less accurate markers. |
2980 | if (hasLifetimeMarkers(AI)) |
2981 | continue; |
2982 | |
2983 | // Try to determine the size of the allocation. |
2984 | ConstantInt *AllocaSize = nullptr; |
2985 | if (ConstantInt *AIArraySize = |
2986 | dyn_cast<ConstantInt>(Val: AI->getArraySize())) { |
2987 | auto &DL = Caller->getDataLayout(); |
2988 | Type *AllocaType = AI->getAllocatedType(); |
2989 | TypeSize AllocaTypeSize = DL.getTypeAllocSize(Ty: AllocaType); |
2990 | uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); |
2991 | |
2992 | // Don't add markers for zero-sized allocas. |
2993 | if (AllocaArraySize == 0) |
2994 | continue; |
2995 | |
2996 | // Check that array size doesn't saturate uint64_t and doesn't |
2997 | // overflow when it's multiplied by type size. |
2998 | if (!AllocaTypeSize.isScalable() && |
2999 | AllocaArraySize != std::numeric_limits<uint64_t>::max() && |
3000 | std::numeric_limits<uint64_t>::max() / AllocaArraySize >= |
3001 | AllocaTypeSize.getFixedValue()) { |
3002 | AllocaSize = ConstantInt::get(Ty: Type::getInt64Ty(C&: AI->getContext()), |
3003 | V: AllocaArraySize * AllocaTypeSize); |
3004 | } |
3005 | } |
3006 | |
3007 | builder.CreateLifetimeStart(Ptr: AI, Size: AllocaSize); |
3008 | for (ReturnInst *RI : Returns) { |
3009 | // Don't insert llvm.lifetime.end calls between a musttail or deoptimize |
3010 | // call and a return. The return kills all local allocas. |
3011 | if (InlinedMustTailCalls && |
3012 | RI->getParent()->getTerminatingMustTailCall()) |
3013 | continue; |
3014 | if (InlinedDeoptimizeCalls && |
3015 | RI->getParent()->getTerminatingDeoptimizeCall()) |
3016 | continue; |
3017 | IRBuilder<>(RI).CreateLifetimeEnd(Ptr: AI, Size: AllocaSize); |
3018 | } |
3019 | } |
3020 | } |
3021 | |
3022 | // If the inlined code contained dynamic alloca instructions, wrap the inlined |
3023 | // code with llvm.stacksave/llvm.stackrestore intrinsics. |
3024 | if (InlinedFunctionInfo.ContainsDynamicAllocas) { |
3025 | // Insert the llvm.stacksave. |
3026 | CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) |
3027 | .CreateStackSave(Name: "savedstack" ); |
3028 | |
3029 | // Insert a call to llvm.stackrestore before any return instructions in the |
3030 | // inlined function. |
3031 | for (ReturnInst *RI : Returns) { |
3032 | // Don't insert llvm.stackrestore calls between a musttail or deoptimize |
3033 | // call and a return. The return will restore the stack pointer. |
3034 | if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) |
3035 | continue; |
3036 | if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) |
3037 | continue; |
3038 | IRBuilder<>(RI).CreateStackRestore(Ptr: SavedPtr); |
3039 | } |
3040 | } |
3041 | |
3042 | // If we are inlining for an invoke instruction, we must make sure to rewrite |
3043 | // any call instructions into invoke instructions. This is sensitive to which |
3044 | // funclet pads were top-level in the inlinee, so must be done before |
3045 | // rewriting the "parent pad" links. |
3046 | if (auto *II = dyn_cast<InvokeInst>(Val: &CB)) { |
3047 | BasicBlock *UnwindDest = II->getUnwindDest(); |
3048 | BasicBlock::iterator FirstNonPHI = UnwindDest->getFirstNonPHIIt(); |
3049 | if (isa<LandingPadInst>(Val: FirstNonPHI)) { |
3050 | HandleInlinedLandingPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo); |
3051 | } else { |
3052 | HandleInlinedEHPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo); |
3053 | } |
3054 | } |
3055 | |
3056 | // Update the lexical scopes of the new funclets and callsites. |
3057 | // Anything that had 'none' as its parent is now nested inside the callsite's |
3058 | // EHPad. |
3059 | if (CallSiteEHPad) { |
3060 | for (Function::iterator BB = FirstNewBlock->getIterator(), |
3061 | E = Caller->end(); |
3062 | BB != E; ++BB) { |
3063 | // Add bundle operands to inlined call sites. |
3064 | PropagateOperandBundles(InlinedBB: BB, CallSiteEHPad); |
3065 | |
3066 | // It is problematic if the inlinee has a cleanupret which unwinds to |
3067 | // caller and we inline it into a call site which doesn't unwind but into |
3068 | // an EH pad that does. Such an edge must be dynamically unreachable. |
3069 | // As such, we replace the cleanupret with unreachable. |
3070 | if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator())) |
3071 | if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) |
3072 | changeToUnreachable(I: CleanupRet); |
3073 | |
3074 | BasicBlock::iterator I = BB->getFirstNonPHIIt(); |
3075 | if (!I->isEHPad()) |
3076 | continue; |
3077 | |
3078 | if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: I)) { |
3079 | if (isa<ConstantTokenNone>(Val: CatchSwitch->getParentPad())) |
3080 | CatchSwitch->setParentPad(CallSiteEHPad); |
3081 | } else { |
3082 | auto *FPI = cast<FuncletPadInst>(Val&: I); |
3083 | if (isa<ConstantTokenNone>(Val: FPI->getParentPad())) |
3084 | FPI->setParentPad(CallSiteEHPad); |
3085 | } |
3086 | } |
3087 | } |
3088 | |
3089 | if (InlinedDeoptimizeCalls) { |
3090 | // We need to at least remove the deoptimizing returns from the Return set, |
3091 | // so that the control flow from those returns does not get merged into the |
3092 | // caller (but terminate it instead). If the caller's return type does not |
3093 | // match the callee's return type, we also need to change the return type of |
3094 | // the intrinsic. |
3095 | if (Caller->getReturnType() == CB.getType()) { |
3096 | llvm::erase_if(C&: Returns, P: [](ReturnInst *RI) { |
3097 | return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; |
3098 | }); |
3099 | } else { |
3100 | SmallVector<ReturnInst *, 8> NormalReturns; |
3101 | Function *NewDeoptIntrinsic = Intrinsic::getOrInsertDeclaration( |
3102 | M: Caller->getParent(), id: Intrinsic::experimental_deoptimize, |
3103 | Tys: {Caller->getReturnType()}); |
3104 | |
3105 | for (ReturnInst *RI : Returns) { |
3106 | CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); |
3107 | if (!DeoptCall) { |
3108 | NormalReturns.push_back(Elt: RI); |
3109 | continue; |
3110 | } |
3111 | |
3112 | // The calling convention on the deoptimize call itself may be bogus, |
3113 | // since the code we're inlining may have undefined behavior (and may |
3114 | // never actually execute at runtime); but all |
3115 | // @llvm.experimental.deoptimize declarations have to have the same |
3116 | // calling convention in a well-formed module. |
3117 | auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); |
3118 | NewDeoptIntrinsic->setCallingConv(CallingConv); |
3119 | auto *CurBB = RI->getParent(); |
3120 | RI->eraseFromParent(); |
3121 | |
3122 | SmallVector<Value *, 4> CallArgs(DeoptCall->args()); |
3123 | |
3124 | SmallVector<OperandBundleDef, 1> OpBundles; |
3125 | DeoptCall->getOperandBundlesAsDefs(Defs&: OpBundles); |
3126 | auto DeoptAttributes = DeoptCall->getAttributes(); |
3127 | DeoptCall->eraseFromParent(); |
3128 | assert(!OpBundles.empty() && |
3129 | "Expected at least the deopt operand bundle" ); |
3130 | |
3131 | IRBuilder<> Builder(CurBB); |
3132 | CallInst *NewDeoptCall = |
3133 | Builder.CreateCall(Callee: NewDeoptIntrinsic, Args: CallArgs, OpBundles); |
3134 | NewDeoptCall->setCallingConv(CallingConv); |
3135 | NewDeoptCall->setAttributes(DeoptAttributes); |
3136 | if (NewDeoptCall->getType()->isVoidTy()) |
3137 | Builder.CreateRetVoid(); |
3138 | else |
3139 | Builder.CreateRet(V: NewDeoptCall); |
3140 | // Since the ret type is changed, remove the incompatible attributes. |
3141 | NewDeoptCall->removeRetAttrs(AttrsToRemove: AttributeFuncs::typeIncompatible( |
3142 | Ty: NewDeoptCall->getType(), AS: NewDeoptCall->getRetAttributes())); |
3143 | } |
3144 | |
3145 | // Leave behind the normal returns so we can merge control flow. |
3146 | std::swap(LHS&: Returns, RHS&: NormalReturns); |
3147 | } |
3148 | } |
3149 | |
3150 | // Handle any inlined musttail call sites. In order for a new call site to be |
3151 | // musttail, the source of the clone and the inlined call site must have been |
3152 | // musttail. Therefore it's safe to return without merging control into the |
3153 | // phi below. |
3154 | if (InlinedMustTailCalls) { |
3155 | // Check if we need to bitcast the result of any musttail calls. |
3156 | Type *NewRetTy = Caller->getReturnType(); |
3157 | bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy; |
3158 | |
3159 | // Handle the returns preceded by musttail calls separately. |
3160 | SmallVector<ReturnInst *, 8> NormalReturns; |
3161 | for (ReturnInst *RI : Returns) { |
3162 | CallInst *ReturnedMustTail = |
3163 | RI->getParent()->getTerminatingMustTailCall(); |
3164 | if (!ReturnedMustTail) { |
3165 | NormalReturns.push_back(Elt: RI); |
3166 | continue; |
3167 | } |
3168 | if (!NeedBitCast) |
3169 | continue; |
3170 | |
3171 | // Delete the old return and any preceding bitcast. |
3172 | BasicBlock *CurBB = RI->getParent(); |
3173 | auto *OldCast = dyn_cast_or_null<BitCastInst>(Val: RI->getReturnValue()); |
3174 | RI->eraseFromParent(); |
3175 | if (OldCast) |
3176 | OldCast->eraseFromParent(); |
3177 | |
3178 | // Insert a new bitcast and return with the right type. |
3179 | IRBuilder<> Builder(CurBB); |
3180 | Builder.CreateRet(V: Builder.CreateBitCast(V: ReturnedMustTail, DestTy: NewRetTy)); |
3181 | } |
3182 | |
3183 | // Leave behind the normal returns so we can merge control flow. |
3184 | std::swap(LHS&: Returns, RHS&: NormalReturns); |
3185 | } |
3186 | |
3187 | // Now that all of the transforms on the inlined code have taken place but |
3188 | // before we splice the inlined code into the CFG and lose track of which |
3189 | // blocks were actually inlined, collect the call sites. We only do this if |
3190 | // call graph updates weren't requested, as those provide value handle based |
3191 | // tracking of inlined call sites instead. Calls to intrinsics are not |
3192 | // collected because they are not inlineable. |
3193 | if (InlinedFunctionInfo.ContainsCalls) { |
3194 | // Otherwise just collect the raw call sites that were inlined. |
3195 | for (BasicBlock &NewBB : |
3196 | make_range(x: FirstNewBlock->getIterator(), y: Caller->end())) |
3197 | for (Instruction &I : NewBB) |
3198 | if (auto *CB = dyn_cast<CallBase>(Val: &I)) |
3199 | if (!(CB->getCalledFunction() && |
3200 | CB->getCalledFunction()->isIntrinsic())) |
3201 | IFI.InlinedCallSites.push_back(Elt: CB); |
3202 | } |
3203 | |
3204 | // If we cloned in _exactly one_ basic block, and if that block ends in a |
3205 | // return instruction, we splice the body of the inlined callee directly into |
3206 | // the calling basic block. |
3207 | if (Returns.size() == 1 && std::distance(first: FirstNewBlock, last: Caller->end()) == 1) { |
3208 | // Move all of the instructions right before the call. |
3209 | OrigBB->splice(ToIt: CB.getIterator(), FromBB: &*FirstNewBlock, FromBeginIt: FirstNewBlock->begin(), |
3210 | FromEndIt: FirstNewBlock->end()); |
3211 | // Remove the cloned basic block. |
3212 | Caller->back().eraseFromParent(); |
3213 | |
3214 | // If the call site was an invoke instruction, add a branch to the normal |
3215 | // destination. |
3216 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) { |
3217 | BranchInst *NewBr = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator()); |
3218 | NewBr->setDebugLoc(Returns[0]->getDebugLoc()); |
3219 | } |
3220 | |
3221 | // If the return instruction returned a value, replace uses of the call with |
3222 | // uses of the returned value. |
3223 | if (!CB.use_empty()) { |
3224 | ReturnInst *R = Returns[0]; |
3225 | if (&CB == R->getReturnValue()) |
3226 | CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType())); |
3227 | else |
3228 | CB.replaceAllUsesWith(V: R->getReturnValue()); |
3229 | } |
3230 | // Since we are now done with the Call/Invoke, we can delete it. |
3231 | CB.eraseFromParent(); |
3232 | |
3233 | // Since we are now done with the return instruction, delete it also. |
3234 | Returns[0]->eraseFromParent(); |
3235 | |
3236 | if (MergeAttributes) |
3237 | AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc); |
3238 | |
3239 | // We are now done with the inlining. |
3240 | return InlineResult::success(); |
3241 | } |
3242 | |
3243 | // Otherwise, we have the normal case, of more than one block to inline or |
3244 | // multiple return sites. |
3245 | |
3246 | // We want to clone the entire callee function into the hole between the |
3247 | // "starter" and "ender" blocks. How we accomplish this depends on whether |
3248 | // this is an invoke instruction or a call instruction. |
3249 | BasicBlock *AfterCallBB; |
3250 | BranchInst *CreatedBranchToNormalDest = nullptr; |
3251 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) { |
3252 | |
3253 | // Add an unconditional branch to make this look like the CallInst case... |
3254 | CreatedBranchToNormalDest = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator()); |
3255 | // We intend to replace this DebugLoc with another later. |
3256 | CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getTemporary()); |
3257 | |
3258 | // Split the basic block. This guarantees that no PHI nodes will have to be |
3259 | // updated due to new incoming edges, and make the invoke case more |
3260 | // symmetric to the call case. |
3261 | AfterCallBB = |
3262 | OrigBB->splitBasicBlock(I: CreatedBranchToNormalDest->getIterator(), |
3263 | BBName: CalledFunc->getName() + ".exit" ); |
3264 | |
3265 | } else { // It's a call |
3266 | // If this is a call instruction, we need to split the basic block that |
3267 | // the call lives in. |
3268 | // |
3269 | AfterCallBB = OrigBB->splitBasicBlock(I: CB.getIterator(), |
3270 | BBName: CalledFunc->getName() + ".exit" ); |
3271 | } |
3272 | |
3273 | if (IFI.CallerBFI) { |
3274 | // Copy original BB's block frequency to AfterCallBB |
3275 | IFI.CallerBFI->setBlockFreq(BB: AfterCallBB, |
3276 | Freq: IFI.CallerBFI->getBlockFreq(BB: OrigBB)); |
3277 | } |
3278 | |
3279 | // Change the branch that used to go to AfterCallBB to branch to the first |
3280 | // basic block of the inlined function. |
3281 | // |
3282 | Instruction *Br = OrigBB->getTerminator(); |
3283 | assert(Br && Br->getOpcode() == Instruction::Br && |
3284 | "splitBasicBlock broken!" ); |
3285 | Br->setOperand(i: 0, Val: &*FirstNewBlock); |
3286 | |
3287 | // Now that the function is correct, make it a little bit nicer. In |
3288 | // particular, move the basic blocks inserted from the end of the function |
3289 | // into the space made by splitting the source basic block. |
3290 | Caller->splice(ToIt: AfterCallBB->getIterator(), FromF: Caller, FromBeginIt: FirstNewBlock, |
3291 | FromEndIt: Caller->end()); |
3292 | |
3293 | // Handle all of the return instructions that we just cloned in, and eliminate |
3294 | // any users of the original call/invoke instruction. |
3295 | Type *RTy = CalledFunc->getReturnType(); |
3296 | |
3297 | PHINode *PHI = nullptr; |
3298 | if (Returns.size() > 1) { |
3299 | // The PHI node should go at the front of the new basic block to merge all |
3300 | // possible incoming values. |
3301 | if (!CB.use_empty()) { |
3302 | PHI = PHINode::Create(Ty: RTy, NumReservedValues: Returns.size(), NameStr: CB.getName()); |
3303 | PHI->insertBefore(InsertPos: AfterCallBB->begin()); |
3304 | // Anything that used the result of the function call should now use the |
3305 | // PHI node as their operand. |
3306 | CB.replaceAllUsesWith(V: PHI); |
3307 | } |
3308 | |
3309 | // Loop over all of the return instructions adding entries to the PHI node |
3310 | // as appropriate. |
3311 | if (PHI) { |
3312 | for (ReturnInst *RI : Returns) { |
3313 | assert(RI->getReturnValue()->getType() == PHI->getType() && |
3314 | "Ret value not consistent in function!" ); |
3315 | PHI->addIncoming(V: RI->getReturnValue(), BB: RI->getParent()); |
3316 | } |
3317 | } |
3318 | |
3319 | // Add a branch to the merge points and remove return instructions. |
3320 | DebugLoc Loc; |
3321 | for (ReturnInst *RI : Returns) { |
3322 | BranchInst *BI = BranchInst::Create(IfTrue: AfterCallBB, InsertBefore: RI->getIterator()); |
3323 | Loc = RI->getDebugLoc(); |
3324 | BI->setDebugLoc(Loc); |
3325 | RI->eraseFromParent(); |
3326 | } |
3327 | // We need to set the debug location to *somewhere* inside the |
3328 | // inlined function. The line number may be nonsensical, but the |
3329 | // instruction will at least be associated with the right |
3330 | // function. |
3331 | if (CreatedBranchToNormalDest) |
3332 | CreatedBranchToNormalDest->setDebugLoc(Loc); |
3333 | } else if (!Returns.empty()) { |
3334 | // Otherwise, if there is exactly one return value, just replace anything |
3335 | // using the return value of the call with the computed value. |
3336 | if (!CB.use_empty()) { |
3337 | if (&CB == Returns[0]->getReturnValue()) |
3338 | CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType())); |
3339 | else |
3340 | CB.replaceAllUsesWith(V: Returns[0]->getReturnValue()); |
3341 | } |
3342 | |
3343 | // Update PHI nodes that use the ReturnBB to use the AfterCallBB. |
3344 | BasicBlock *ReturnBB = Returns[0]->getParent(); |
3345 | ReturnBB->replaceAllUsesWith(V: AfterCallBB); |
3346 | |
3347 | // Splice the code from the return block into the block that it will return |
3348 | // to, which contains the code that was after the call. |
3349 | AfterCallBB->splice(ToIt: AfterCallBB->begin(), FromBB: ReturnBB); |
3350 | |
3351 | if (CreatedBranchToNormalDest) |
3352 | CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); |
3353 | |
3354 | // Delete the return instruction now and empty ReturnBB now. |
3355 | Returns[0]->eraseFromParent(); |
3356 | ReturnBB->eraseFromParent(); |
3357 | } else if (!CB.use_empty()) { |
3358 | // In this case there are no returns to use, so there is no clear source |
3359 | // location for the "return". |
3360 | // FIXME: It may be correct to use the scope end line of the function here, |
3361 | // since this likely means we are falling out of the function. |
3362 | if (CreatedBranchToNormalDest) |
3363 | CreatedBranchToNormalDest->setDebugLoc(DebugLoc::getUnknown()); |
3364 | // No returns, but something is using the return value of the call. Just |
3365 | // nuke the result. |
3366 | CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType())); |
3367 | } |
3368 | |
3369 | // Since we are now done with the Call/Invoke, we can delete it. |
3370 | CB.eraseFromParent(); |
3371 | |
3372 | // If we inlined any musttail calls and the original return is now |
3373 | // unreachable, delete it. It can only contain a bitcast and ret. |
3374 | if (InlinedMustTailCalls && pred_empty(BB: AfterCallBB)) |
3375 | AfterCallBB->eraseFromParent(); |
3376 | |
3377 | // We should always be able to fold the entry block of the function into the |
3378 | // single predecessor of the block... |
3379 | assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!" ); |
3380 | BasicBlock *CalleeEntry = cast<BranchInst>(Val: Br)->getSuccessor(i: 0); |
3381 | |
3382 | // Splice the code entry block into calling block, right before the |
3383 | // unconditional branch. |
3384 | CalleeEntry->replaceAllUsesWith(V: OrigBB); // Update PHI nodes |
3385 | OrigBB->splice(ToIt: Br->getIterator(), FromBB: CalleeEntry); |
3386 | |
3387 | // Remove the unconditional branch. |
3388 | Br->eraseFromParent(); |
3389 | |
3390 | // Now we can remove the CalleeEntry block, which is now empty. |
3391 | CalleeEntry->eraseFromParent(); |
3392 | |
3393 | // If we inserted a phi node, check to see if it has a single value (e.g. all |
3394 | // the entries are the same or undef). If so, remove the PHI so it doesn't |
3395 | // block other optimizations. |
3396 | if (PHI) { |
3397 | AssumptionCache *AC = |
3398 | IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; |
3399 | auto &DL = Caller->getDataLayout(); |
3400 | if (Value *V = simplifyInstruction(I: PHI, Q: {DL, nullptr, nullptr, AC})) { |
3401 | PHI->replaceAllUsesWith(V); |
3402 | PHI->eraseFromParent(); |
3403 | } |
3404 | } |
3405 | |
3406 | if (MergeAttributes) |
3407 | AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc); |
3408 | |
3409 | return InlineResult::success(); |
3410 | } |
3411 | |