1 | //===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements utilities useful for promoting indirect call sites to |
10 | // direct call sites. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Transforms/Utils/CallPromotionUtils.h" |
15 | #include "llvm/Analysis/CtxProfAnalysis.h" |
16 | #include "llvm/Analysis/Loads.h" |
17 | #include "llvm/Analysis/TypeMetadataUtils.h" |
18 | #include "llvm/IR/AttributeMask.h" |
19 | #include "llvm/IR/Constant.h" |
20 | #include "llvm/IR/IRBuilder.h" |
21 | #include "llvm/IR/Instructions.h" |
22 | #include "llvm/IR/IntrinsicInst.h" |
23 | #include "llvm/IR/Module.h" |
24 | #include "llvm/ProfileData/PGOCtxProfReader.h" |
25 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
26 | |
27 | using namespace llvm; |
28 | |
29 | #define DEBUG_TYPE "call-promotion-utils" |
30 | |
31 | /// Fix-up phi nodes in an invoke instruction's normal destination. |
32 | /// |
33 | /// After versioning an invoke instruction, values coming from the original |
34 | /// block will now be coming from the "merge" block. For example, in the code |
35 | /// below: |
36 | /// |
37 | /// then_bb: |
38 | /// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst |
39 | /// |
40 | /// else_bb: |
41 | /// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst |
42 | /// |
43 | /// merge_bb: |
44 | /// %t2 = phi i32 [ %t0, %then_bb ], [ %t1, %else_bb ] |
45 | /// br %normal_dst |
46 | /// |
47 | /// normal_dst: |
48 | /// %t3 = phi i32 [ %x, %orig_bb ], ... |
49 | /// |
50 | /// "orig_bb" is no longer a predecessor of "normal_dst", so the phi nodes in |
51 | /// "normal_dst" must be fixed to refer to "merge_bb": |
52 | /// |
53 | /// normal_dst: |
54 | /// %t3 = phi i32 [ %x, %merge_bb ], ... |
55 | /// |
56 | static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock, |
57 | BasicBlock *MergeBlock) { |
58 | for (PHINode &Phi : Invoke->getNormalDest()->phis()) { |
59 | int Idx = Phi.getBasicBlockIndex(BB: OrigBlock); |
60 | if (Idx == -1) |
61 | continue; |
62 | Phi.setIncomingBlock(i: Idx, BB: MergeBlock); |
63 | } |
64 | } |
65 | |
66 | /// Fix-up phi nodes in an invoke instruction's unwind destination. |
67 | /// |
68 | /// After versioning an invoke instruction, values coming from the original |
69 | /// block will now be coming from either the "then" block or the "else" block. |
70 | /// For example, in the code below: |
71 | /// |
72 | /// then_bb: |
73 | /// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst |
74 | /// |
75 | /// else_bb: |
76 | /// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst |
77 | /// |
78 | /// unwind_dst: |
79 | /// %t3 = phi i32 [ %x, %orig_bb ], ... |
80 | /// |
81 | /// "orig_bb" is no longer a predecessor of "unwind_dst", so the phi nodes in |
82 | /// "unwind_dst" must be fixed to refer to "then_bb" and "else_bb": |
83 | /// |
84 | /// unwind_dst: |
85 | /// %t3 = phi i32 [ %x, %then_bb ], [ %x, %else_bb ], ... |
86 | /// |
87 | static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock, |
88 | BasicBlock *ThenBlock, |
89 | BasicBlock *ElseBlock) { |
90 | for (PHINode &Phi : Invoke->getUnwindDest()->phis()) { |
91 | int Idx = Phi.getBasicBlockIndex(BB: OrigBlock); |
92 | if (Idx == -1) |
93 | continue; |
94 | auto *V = Phi.getIncomingValue(i: Idx); |
95 | Phi.setIncomingBlock(i: Idx, BB: ThenBlock); |
96 | Phi.addIncoming(V, BB: ElseBlock); |
97 | } |
98 | } |
99 | |
100 | /// Create a phi node for the returned value of a call or invoke instruction. |
101 | /// |
102 | /// After versioning a call or invoke instruction that returns a value, we have |
103 | /// to merge the value of the original and new instructions. We do this by |
104 | /// creating a phi node and replacing uses of the original instruction with this |
105 | /// phi node. |
106 | /// |
107 | /// For example, if \p OrigInst is defined in "else_bb" and \p NewInst is |
108 | /// defined in "then_bb", we create the following phi node: |
109 | /// |
110 | /// ; Uses of the original instruction are replaced by uses of the phi node. |
111 | /// %t0 = phi i32 [ %orig_inst, %else_bb ], [ %new_inst, %then_bb ], |
112 | /// |
113 | static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst, |
114 | BasicBlock *MergeBlock, IRBuilder<> &Builder) { |
115 | |
116 | if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty()) |
117 | return; |
118 | |
119 | Builder.SetInsertPoint(TheBB: MergeBlock, IP: MergeBlock->begin()); |
120 | PHINode *Phi = Builder.CreatePHI(Ty: OrigInst->getType(), NumReservedValues: 0); |
121 | SmallVector<User *, 16> UsersToUpdate(OrigInst->users()); |
122 | for (User *U : UsersToUpdate) |
123 | U->replaceUsesOfWith(From: OrigInst, To: Phi); |
124 | Phi->addIncoming(V: OrigInst, BB: OrigInst->getParent()); |
125 | Phi->addIncoming(V: NewInst, BB: NewInst->getParent()); |
126 | } |
127 | |
128 | /// Cast a call or invoke instruction to the given type. |
129 | /// |
130 | /// When promoting a call site, the return type of the call site might not match |
131 | /// that of the callee. If this is the case, we have to cast the returned value |
132 | /// to the correct type. The location of the cast depends on if we have a call |
133 | /// or invoke instruction. |
134 | /// |
135 | /// For example, if the call instruction below requires a bitcast after |
136 | /// promotion: |
137 | /// |
138 | /// orig_bb: |
139 | /// %t0 = call i32 @func() |
140 | /// ... |
141 | /// |
142 | /// The bitcast is placed after the call instruction: |
143 | /// |
144 | /// orig_bb: |
145 | /// ; Uses of the original return value are replaced by uses of the bitcast. |
146 | /// %t0 = call i32 @func() |
147 | /// %t1 = bitcast i32 %t0 to ... |
148 | /// ... |
149 | /// |
150 | /// A similar transformation is performed for invoke instructions. However, |
151 | /// since invokes are terminating, a new block is created for the bitcast. For |
152 | /// example, if the invoke instruction below requires a bitcast after promotion: |
153 | /// |
154 | /// orig_bb: |
155 | /// %t0 = invoke i32 @func() to label %normal_dst unwind label %unwind_dst |
156 | /// |
157 | /// The edge between the original block and the invoke's normal destination is |
158 | /// split, and the bitcast is placed there: |
159 | /// |
160 | /// orig_bb: |
161 | /// %t0 = invoke i32 @func() to label %split_bb unwind label %unwind_dst |
162 | /// |
163 | /// split_bb: |
164 | /// ; Uses of the original return value are replaced by uses of the bitcast. |
165 | /// %t1 = bitcast i32 %t0 to ... |
166 | /// br label %normal_dst |
167 | /// |
168 | static void createRetBitCast(CallBase &CB, Type *RetTy, CastInst **RetBitCast) { |
169 | |
170 | // Save the users of the calling instruction. These uses will be changed to |
171 | // use the bitcast after we create it. |
172 | SmallVector<User *, 16> UsersToUpdate(CB.users()); |
173 | |
174 | // Determine an appropriate location to create the bitcast for the return |
175 | // value. The location depends on if we have a call or invoke instruction. |
176 | BasicBlock::iterator InsertBefore; |
177 | if (auto *Invoke = dyn_cast<InvokeInst>(Val: &CB)) |
178 | InsertBefore = |
179 | SplitEdge(From: Invoke->getParent(), To: Invoke->getNormalDest())->begin(); |
180 | else |
181 | InsertBefore = std::next(x: CB.getIterator()); |
182 | |
183 | // Bitcast the return value to the correct type. |
184 | auto *Cast = CastInst::CreateBitOrPointerCast(S: &CB, Ty: RetTy, Name: "" , InsertBefore); |
185 | if (RetBitCast) |
186 | *RetBitCast = Cast; |
187 | |
188 | // Replace all the original uses of the calling instruction with the bitcast. |
189 | for (User *U : UsersToUpdate) |
190 | U->replaceUsesOfWith(From: &CB, To: Cast); |
191 | } |
192 | |
193 | /// Predicate and clone the given call site. |
194 | /// |
195 | /// This function creates an if-then-else structure at the location of the call |
196 | /// site. The "if" condition is specified by `Cond`. |
197 | /// The original call site is moved into the "else" block, and a clone of the |
198 | /// call site is placed in the "then" block. The cloned instruction is returned. |
199 | /// |
200 | /// For example, the call instruction below: |
201 | /// |
202 | /// orig_bb: |
203 | /// %t0 = call i32 %ptr() |
204 | /// ... |
205 | /// |
206 | /// Is replace by the following: |
207 | /// |
208 | /// orig_bb: |
209 | /// %cond = Cond |
210 | /// br i1 %cond, %then_bb, %else_bb |
211 | /// |
212 | /// then_bb: |
213 | /// ; The clone of the original call instruction is placed in the "then" |
214 | /// ; block. It is not yet promoted. |
215 | /// %t1 = call i32 %ptr() |
216 | /// br merge_bb |
217 | /// |
218 | /// else_bb: |
219 | /// ; The original call instruction is moved to the "else" block. |
220 | /// %t0 = call i32 %ptr() |
221 | /// br merge_bb |
222 | /// |
223 | /// merge_bb: |
224 | /// ; Uses of the original call instruction are replaced by uses of the phi |
225 | /// ; node. |
226 | /// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ] |
227 | /// ... |
228 | /// |
229 | /// A similar transformation is performed for invoke instructions. However, |
230 | /// since invokes are terminating, more work is required. For example, the |
231 | /// invoke instruction below: |
232 | /// |
233 | /// orig_bb: |
234 | /// %t0 = invoke %ptr() to label %normal_dst unwind label %unwind_dst |
235 | /// |
236 | /// Is replace by the following: |
237 | /// |
238 | /// orig_bb: |
239 | /// %cond = Cond |
240 | /// br i1 %cond, %then_bb, %else_bb |
241 | /// |
242 | /// then_bb: |
243 | /// ; The clone of the original invoke instruction is placed in the "then" |
244 | /// ; block, and its normal destination is set to the "merge" block. It is |
245 | /// ; not yet promoted. |
246 | /// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst |
247 | /// |
248 | /// else_bb: |
249 | /// ; The original invoke instruction is moved into the "else" block, and |
250 | /// ; its normal destination is set to the "merge" block. |
251 | /// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst |
252 | /// |
253 | /// merge_bb: |
254 | /// ; Uses of the original invoke instruction are replaced by uses of the |
255 | /// ; phi node, and the merge block branches to the normal destination. |
256 | /// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ] |
257 | /// br %normal_dst |
258 | /// |
259 | /// An indirect musttail call is processed slightly differently in that: |
260 | /// 1. No merge block needed for the orginal and the cloned callsite, since |
261 | /// either one ends the flow. No phi node is needed either. |
262 | /// 2. The return statement following the original call site is duplicated too |
263 | /// and placed immediately after the cloned call site per the IR convention. |
264 | /// |
265 | /// For example, the musttail call instruction below: |
266 | /// |
267 | /// orig_bb: |
268 | /// %t0 = musttail call i32 %ptr() |
269 | /// ... |
270 | /// |
271 | /// Is replaced by the following: |
272 | /// |
273 | /// cond_bb: |
274 | /// %cond = Cond |
275 | /// br i1 %cond, %then_bb, %orig_bb |
276 | /// |
277 | /// then_bb: |
278 | /// ; The clone of the original call instruction is placed in the "then" |
279 | /// ; block. It is not yet promoted. |
280 | /// %t1 = musttail call i32 %ptr() |
281 | /// ret %t1 |
282 | /// |
283 | /// orig_bb: |
284 | /// ; The original call instruction stays in its original block. |
285 | /// %t0 = musttail call i32 %ptr() |
286 | /// ret %t0 |
287 | static CallBase &versionCallSiteWithCond(CallBase &CB, Value *Cond, |
288 | MDNode *BranchWeights) { |
289 | |
290 | IRBuilder<> Builder(&CB); |
291 | CallBase *OrigInst = &CB; |
292 | BasicBlock *OrigBlock = OrigInst->getParent(); |
293 | |
294 | if (OrigInst->isMustTailCall()) { |
295 | // Create an if-then structure. The original instruction stays in its block, |
296 | // and a clone of the original instruction is placed in the "then" block. |
297 | Instruction *ThenTerm = |
298 | SplitBlockAndInsertIfThen(Cond, SplitBefore: &CB, Unreachable: false, BranchWeights); |
299 | BasicBlock *ThenBlock = ThenTerm->getParent(); |
300 | ThenBlock->setName("if.true.direct_targ" ); |
301 | CallBase *NewInst = cast<CallBase>(Val: OrigInst->clone()); |
302 | NewInst->insertBefore(InsertPos: ThenTerm->getIterator()); |
303 | |
304 | // Place a clone of the optional bitcast after the new call site. |
305 | Value *NewRetVal = NewInst; |
306 | auto Next = OrigInst->getNextNode(); |
307 | if (auto *BitCast = dyn_cast_or_null<BitCastInst>(Val: Next)) { |
308 | assert(BitCast->getOperand(0) == OrigInst && |
309 | "bitcast following musttail call must use the call" ); |
310 | auto NewBitCast = BitCast->clone(); |
311 | NewBitCast->replaceUsesOfWith(From: OrigInst, To: NewInst); |
312 | NewBitCast->insertBefore(InsertPos: ThenTerm->getIterator()); |
313 | NewRetVal = NewBitCast; |
314 | Next = BitCast->getNextNode(); |
315 | } |
316 | |
317 | // Place a clone of the return instruction after the new call site. |
318 | ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Val: Next); |
319 | assert(Ret && "musttail call must precede a ret with an optional bitcast" ); |
320 | auto NewRet = Ret->clone(); |
321 | if (Ret->getReturnValue()) |
322 | NewRet->replaceUsesOfWith(From: Ret->getReturnValue(), To: NewRetVal); |
323 | NewRet->insertBefore(InsertPos: ThenTerm->getIterator()); |
324 | |
325 | // A return instructions is terminating, so we don't need the terminator |
326 | // instruction just created. |
327 | ThenTerm->eraseFromParent(); |
328 | |
329 | return *NewInst; |
330 | } |
331 | |
332 | // Create an if-then-else structure. The original instruction is moved into |
333 | // the "else" block, and a clone of the original instruction is placed in the |
334 | // "then" block. |
335 | Instruction *ThenTerm = nullptr; |
336 | Instruction *ElseTerm = nullptr; |
337 | SplitBlockAndInsertIfThenElse(Cond, SplitBefore: &CB, ThenTerm: &ThenTerm, ElseTerm: &ElseTerm, BranchWeights); |
338 | BasicBlock *ThenBlock = ThenTerm->getParent(); |
339 | BasicBlock *ElseBlock = ElseTerm->getParent(); |
340 | BasicBlock *MergeBlock = OrigInst->getParent(); |
341 | |
342 | ThenBlock->setName("if.true.direct_targ" ); |
343 | ElseBlock->setName("if.false.orig_indirect" ); |
344 | MergeBlock->setName("if.end.icp" ); |
345 | |
346 | CallBase *NewInst = cast<CallBase>(Val: OrigInst->clone()); |
347 | OrigInst->moveBefore(InsertPos: ElseTerm->getIterator()); |
348 | NewInst->insertBefore(InsertPos: ThenTerm->getIterator()); |
349 | |
350 | // If the original call site is an invoke instruction, we have extra work to |
351 | // do since invoke instructions are terminating. We have to fix-up phi nodes |
352 | // in the invoke's normal and unwind destinations. |
353 | if (auto *OrigInvoke = dyn_cast<InvokeInst>(Val: OrigInst)) { |
354 | auto *NewInvoke = cast<InvokeInst>(Val: NewInst); |
355 | |
356 | // Invoke instructions are terminating, so we don't need the terminator |
357 | // instructions that were just created. |
358 | ThenTerm->eraseFromParent(); |
359 | ElseTerm->eraseFromParent(); |
360 | |
361 | // Branch from the "merge" block to the original normal destination. |
362 | Builder.SetInsertPoint(MergeBlock); |
363 | Builder.CreateBr(Dest: OrigInvoke->getNormalDest()); |
364 | |
365 | // Fix-up phi nodes in the original invoke's normal and unwind destinations. |
366 | fixupPHINodeForNormalDest(Invoke: OrigInvoke, OrigBlock, MergeBlock); |
367 | fixupPHINodeForUnwindDest(Invoke: OrigInvoke, OrigBlock: MergeBlock, ThenBlock, ElseBlock); |
368 | |
369 | // Now set the normal destinations of the invoke instructions to be the |
370 | // "merge" block. |
371 | OrigInvoke->setNormalDest(MergeBlock); |
372 | NewInvoke->setNormalDest(MergeBlock); |
373 | } |
374 | |
375 | // Create a phi node for the returned value of the call site. |
376 | createRetPHINode(OrigInst, NewInst, MergeBlock, Builder); |
377 | |
378 | return *NewInst; |
379 | } |
380 | |
381 | // Predicate and clone the given call site using condition `CB.callee == |
382 | // Callee`. See the comment `versionCallSiteWithCond` for the transformation. |
383 | CallBase &llvm::versionCallSite(CallBase &CB, Value *Callee, |
384 | MDNode *BranchWeights) { |
385 | |
386 | IRBuilder<> Builder(&CB); |
387 | |
388 | // Create the compare. The called value and callee must have the same type to |
389 | // be compared. |
390 | if (CB.getCalledOperand()->getType() != Callee->getType()) |
391 | Callee = Builder.CreateBitCast(V: Callee, DestTy: CB.getCalledOperand()->getType()); |
392 | auto *Cond = Builder.CreateICmpEQ(LHS: CB.getCalledOperand(), RHS: Callee); |
393 | |
394 | return versionCallSiteWithCond(CB, Cond, BranchWeights); |
395 | } |
396 | |
397 | bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee, |
398 | const char **FailureReason) { |
399 | assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted" ); |
400 | |
401 | auto &DL = Callee->getDataLayout(); |
402 | |
403 | // Check the return type. The callee's return value type must be bitcast |
404 | // compatible with the call site's type. |
405 | Type *CallRetTy = CB.getType(); |
406 | Type *FuncRetTy = Callee->getReturnType(); |
407 | if (CallRetTy != FuncRetTy) |
408 | if (!CastInst::isBitOrNoopPointerCastable(SrcTy: FuncRetTy, DestTy: CallRetTy, DL)) { |
409 | if (FailureReason) |
410 | *FailureReason = "Return type mismatch" ; |
411 | return false; |
412 | } |
413 | |
414 | // The number of formal arguments of the callee. |
415 | unsigned NumParams = Callee->getFunctionType()->getNumParams(); |
416 | |
417 | // The number of actual arguments in the call. |
418 | unsigned NumArgs = CB.arg_size(); |
419 | |
420 | // Check the number of arguments. The callee and call site must agree on the |
421 | // number of arguments. |
422 | if (NumArgs != NumParams && !Callee->isVarArg()) { |
423 | if (FailureReason) |
424 | *FailureReason = "The number of arguments mismatch" ; |
425 | return false; |
426 | } |
427 | |
428 | // Check the argument types. The callee's formal argument types must be |
429 | // bitcast compatible with the corresponding actual argument types of the call |
430 | // site. |
431 | unsigned I = 0; |
432 | for (; I < NumParams; ++I) { |
433 | // Make sure that the callee and call agree on byval/inalloca. The types do |
434 | // not have to match. |
435 | if (Callee->hasParamAttribute(ArgNo: I, Kind: Attribute::ByVal) != |
436 | CB.getAttributes().hasParamAttr(ArgNo: I, Kind: Attribute::ByVal)) { |
437 | if (FailureReason) |
438 | *FailureReason = "byval mismatch" ; |
439 | return false; |
440 | } |
441 | if (Callee->hasParamAttribute(ArgNo: I, Kind: Attribute::InAlloca) != |
442 | CB.getAttributes().hasParamAttr(ArgNo: I, Kind: Attribute::InAlloca)) { |
443 | if (FailureReason) |
444 | *FailureReason = "inalloca mismatch" ; |
445 | return false; |
446 | } |
447 | |
448 | Type *FormalTy = Callee->getFunctionType()->getFunctionParamType(i: I); |
449 | Type *ActualTy = CB.getArgOperand(i: I)->getType(); |
450 | if (FormalTy == ActualTy) |
451 | continue; |
452 | if (!CastInst::isBitOrNoopPointerCastable(SrcTy: ActualTy, DestTy: FormalTy, DL)) { |
453 | if (FailureReason) |
454 | *FailureReason = "Argument type mismatch" ; |
455 | return false; |
456 | } |
457 | |
458 | // MustTail call needs stricter type match. See |
459 | // Verifier::verifyMustTailCall(). |
460 | if (CB.isMustTailCall()) { |
461 | PointerType *PF = dyn_cast<PointerType>(Val: FormalTy); |
462 | PointerType *PA = dyn_cast<PointerType>(Val: ActualTy); |
463 | if (!PF || !PA || PF->getAddressSpace() != PA->getAddressSpace()) { |
464 | if (FailureReason) |
465 | *FailureReason = "Musttail call Argument type mismatch" ; |
466 | return false; |
467 | } |
468 | } |
469 | } |
470 | for (; I < NumArgs; I++) { |
471 | // Vararg functions can have more arguments than parameters. |
472 | assert(Callee->isVarArg()); |
473 | if (CB.paramHasAttr(ArgNo: I, Kind: Attribute::StructRet)) { |
474 | if (FailureReason) |
475 | *FailureReason = "SRet arg to vararg function" ; |
476 | return false; |
477 | } |
478 | } |
479 | |
480 | return true; |
481 | } |
482 | |
483 | CallBase &llvm::promoteCall(CallBase &CB, Function *Callee, |
484 | CastInst **RetBitCast) { |
485 | assert(!CB.getCalledFunction() && "Only indirect call sites can be promoted" ); |
486 | |
487 | // Set the called function of the call site to be the given callee (but don't |
488 | // change the type). |
489 | CB.setCalledOperand(Callee); |
490 | |
491 | // Since the call site will no longer be direct, we must clear metadata that |
492 | // is only appropriate for indirect calls. This includes !prof and !callees |
493 | // metadata. |
494 | CB.setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr); |
495 | CB.setMetadata(KindID: LLVMContext::MD_callees, Node: nullptr); |
496 | |
497 | // If the function type of the call site matches that of the callee, no |
498 | // additional work is required. |
499 | if (CB.getFunctionType() == Callee->getFunctionType()) |
500 | return CB; |
501 | |
502 | // Save the return types of the call site and callee. |
503 | Type *CallSiteRetTy = CB.getType(); |
504 | Type *CalleeRetTy = Callee->getReturnType(); |
505 | |
506 | // Change the function type of the call site the match that of the callee. |
507 | CB.mutateFunctionType(FTy: Callee->getFunctionType()); |
508 | |
509 | // Inspect the arguments of the call site. If an argument's type doesn't |
510 | // match the corresponding formal argument's type in the callee, bitcast it |
511 | // to the correct type. |
512 | auto CalleeType = Callee->getFunctionType(); |
513 | auto CalleeParamNum = CalleeType->getNumParams(); |
514 | |
515 | LLVMContext &Ctx = Callee->getContext(); |
516 | const AttributeList &CallerPAL = CB.getAttributes(); |
517 | // The new list of argument attributes. |
518 | SmallVector<AttributeSet, 4> NewArgAttrs; |
519 | bool AttributeChanged = false; |
520 | |
521 | for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) { |
522 | auto *Arg = CB.getArgOperand(i: ArgNo); |
523 | Type *FormalTy = CalleeType->getParamType(i: ArgNo); |
524 | Type *ActualTy = Arg->getType(); |
525 | if (FormalTy != ActualTy) { |
526 | auto *Cast = |
527 | CastInst::CreateBitOrPointerCast(S: Arg, Ty: FormalTy, Name: "" , InsertBefore: CB.getIterator()); |
528 | CB.setArgOperand(i: ArgNo, v: Cast); |
529 | |
530 | // Remove any incompatible attributes for the argument. |
531 | AttrBuilder ArgAttrs(Ctx, CallerPAL.getParamAttrs(ArgNo)); |
532 | ArgAttrs.remove(AM: AttributeFuncs::typeIncompatible( |
533 | Ty: FormalTy, AS: CallerPAL.getParamAttrs(ArgNo))); |
534 | |
535 | // We may have a different byval/inalloca type. |
536 | if (ArgAttrs.getByValType()) |
537 | ArgAttrs.addByValAttr(Ty: Callee->getParamByValType(ArgNo)); |
538 | if (ArgAttrs.getInAllocaType()) |
539 | ArgAttrs.addInAllocaAttr(Ty: Callee->getParamInAllocaType(ArgNo)); |
540 | |
541 | NewArgAttrs.push_back(Elt: AttributeSet::get(C&: Ctx, B: ArgAttrs)); |
542 | AttributeChanged = true; |
543 | } else |
544 | NewArgAttrs.push_back(Elt: CallerPAL.getParamAttrs(ArgNo)); |
545 | } |
546 | |
547 | // If the return type of the call site doesn't match that of the callee, cast |
548 | // the returned value to the appropriate type. |
549 | // Remove any incompatible return value attribute. |
550 | AttrBuilder RAttrs(Ctx, CallerPAL.getRetAttrs()); |
551 | if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) { |
552 | createRetBitCast(CB, RetTy: CallSiteRetTy, RetBitCast); |
553 | RAttrs.remove( |
554 | AM: AttributeFuncs::typeIncompatible(Ty: CalleeRetTy, AS: CallerPAL.getRetAttrs())); |
555 | AttributeChanged = true; |
556 | } |
557 | |
558 | // Set the new callsite attribute. |
559 | if (AttributeChanged) |
560 | CB.setAttributes(AttributeList::get(C&: Ctx, FnAttrs: CallerPAL.getFnAttrs(), |
561 | RetAttrs: AttributeSet::get(C&: Ctx, B: RAttrs), |
562 | ArgAttrs: NewArgAttrs)); |
563 | |
564 | return CB; |
565 | } |
566 | |
567 | CallBase &llvm::promoteCallWithIfThenElse(CallBase &CB, Function *Callee, |
568 | MDNode *BranchWeights) { |
569 | |
570 | // Version the indirect call site. If the called value is equal to the given |
571 | // callee, 'NewInst' will be executed, otherwise the original call site will |
572 | // be executed. |
573 | CallBase &NewInst = versionCallSite(CB, Callee, BranchWeights); |
574 | |
575 | // Promote 'NewInst' so that it directly calls the desired function. |
576 | return promoteCall(CB&: NewInst, Callee); |
577 | } |
578 | |
579 | CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee, |
580 | PGOContextualProfile &CtxProf) { |
581 | assert(CB.isIndirectCall()); |
582 | if (!CtxProf.isFunctionKnown(F: Callee)) |
583 | return nullptr; |
584 | auto &Caller = *CB.getFunction(); |
585 | auto *CSInstr = CtxProfAnalysis::getCallsiteInstrumentation(CB); |
586 | if (!CSInstr) |
587 | return nullptr; |
588 | const uint64_t CSIndex = CSInstr->getIndex()->getZExtValue(); |
589 | |
590 | CallBase &DirectCall = promoteCall( |
591 | CB&: versionCallSite(CB, Callee: &Callee, /*BranchWeights=*/nullptr), Callee: &Callee); |
592 | CSInstr->moveBefore(InsertPos: CB.getIterator()); |
593 | const auto NewCSID = CtxProf.allocateNextCallsiteIndex(F: Caller); |
594 | auto *NewCSInstr = cast<InstrProfCallsite>(Val: CSInstr->clone()); |
595 | NewCSInstr->setIndex(NewCSID); |
596 | NewCSInstr->setCallee(&Callee); |
597 | NewCSInstr->insertBefore(InsertPos: DirectCall.getIterator()); |
598 | auto &DirectBB = *DirectCall.getParent(); |
599 | auto &IndirectBB = *CB.getParent(); |
600 | |
601 | assert((CtxProfAnalysis::getBBInstrumentation(IndirectBB) == nullptr) && |
602 | "The ICP direct BB is new, it shouldn't have instrumentation" ); |
603 | assert((CtxProfAnalysis::getBBInstrumentation(DirectBB) == nullptr) && |
604 | "The ICP indirect BB is new, it shouldn't have instrumentation" ); |
605 | |
606 | // Allocate counters for the new basic blocks. |
607 | const uint32_t DirectID = CtxProf.allocateNextCounterIndex(F: Caller); |
608 | const uint32_t IndirectID = CtxProf.allocateNextCounterIndex(F: Caller); |
609 | auto *EntryBBIns = |
610 | CtxProfAnalysis::getBBInstrumentation(BB&: Caller.getEntryBlock()); |
611 | auto *DirectBBIns = cast<InstrProfCntrInstBase>(Val: EntryBBIns->clone()); |
612 | DirectBBIns->setIndex(DirectID); |
613 | DirectBBIns->insertInto(ParentBB: &DirectBB, It: DirectBB.getFirstInsertionPt()); |
614 | |
615 | auto *IndirectBBIns = cast<InstrProfCntrInstBase>(Val: EntryBBIns->clone()); |
616 | IndirectBBIns->setIndex(IndirectID); |
617 | IndirectBBIns->insertInto(ParentBB: &IndirectBB, It: IndirectBB.getFirstInsertionPt()); |
618 | |
619 | const GlobalValue::GUID CalleeGUID = AssignGUIDPass::getGUID(F: Callee); |
620 | const uint32_t = IndirectID + 1; |
621 | |
622 | auto ProfileUpdater = [&](PGOCtxProfContext &Ctx) { |
623 | assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller)); |
624 | assert(NewCountersSize - 2 == Ctx.counters().size()); |
625 | // All the ctx-es belonging to a function must have the same size counters. |
626 | Ctx.resizeCounters(Size: NewCountersSize); |
627 | |
628 | // Maybe in this context, the indirect callsite wasn't observed at all. That |
629 | // would make both direct and indirect BBs cold - which is what we already |
630 | // have from resising the counters. |
631 | if (!Ctx.hasCallsite(I: CSIndex)) |
632 | return; |
633 | auto &CSData = Ctx.callsite(I: CSIndex); |
634 | |
635 | uint64_t TotalCount = 0; |
636 | for (const auto &[_, V] : CSData) |
637 | TotalCount += V.getEntrycount(); |
638 | uint64_t DirectCount = 0; |
639 | // If we called the direct target, update the DirectCount. If we didn't, we |
640 | // still want to update the indirect BB (to which the TotalCount goes, in |
641 | // that case). |
642 | if (auto It = CSData.find(x: CalleeGUID); It != CSData.end()) { |
643 | assert(CalleeGUID == It->second.guid()); |
644 | DirectCount = It->second.getEntrycount(); |
645 | // This direct target needs to be moved to this caller under the |
646 | // newly-allocated callsite index. |
647 | assert(Ctx.callsites().count(NewCSID) == 0); |
648 | Ctx.ingestContext(CSId: NewCSID, Other: std::move(It->second)); |
649 | CSData.erase(x: CalleeGUID); |
650 | } |
651 | |
652 | assert(TotalCount >= DirectCount); |
653 | uint64_t IndirectCount = TotalCount - DirectCount; |
654 | // The ICP's effect is as-if the direct BB would have been taken DirectCount |
655 | // times, and the indirect BB, IndirectCount times |
656 | Ctx.counters()[DirectID] = DirectCount; |
657 | Ctx.counters()[IndirectID] = IndirectCount; |
658 | |
659 | }; |
660 | CtxProf.update(ProfileUpdater, F: Caller); |
661 | return &DirectCall; |
662 | } |
663 | |
664 | CallBase &llvm::promoteCallWithVTableCmp(CallBase &CB, Instruction *VPtr, |
665 | Function *Callee, |
666 | ArrayRef<Constant *> AddressPoints, |
667 | MDNode *BranchWeights) { |
668 | assert(!AddressPoints.empty() && "Caller should guarantee" ); |
669 | IRBuilder<> Builder(&CB); |
670 | SmallVector<Value *, 2> ICmps; |
671 | for (auto &AddressPoint : AddressPoints) |
672 | ICmps.push_back(Elt: Builder.CreateICmpEQ(LHS: VPtr, RHS: AddressPoint)); |
673 | |
674 | // TODO: Perform tree height reduction if the number of ICmps is high. |
675 | Value *Cond = Builder.CreateOr(Ops: ICmps); |
676 | |
677 | // Version the indirect call site. If Cond is true, 'NewInst' will be |
678 | // executed, otherwise the original call site will be executed. |
679 | CallBase &NewInst = versionCallSiteWithCond(CB, Cond, BranchWeights); |
680 | |
681 | // Promote 'NewInst' so that it directly calls the desired function. |
682 | return promoteCall(CB&: NewInst, Callee); |
683 | } |
684 | |
685 | bool llvm::tryPromoteCall(CallBase &CB) { |
686 | assert(!CB.getCalledFunction()); |
687 | Module *M = CB.getCaller()->getParent(); |
688 | const DataLayout &DL = M->getDataLayout(); |
689 | Value *Callee = CB.getCalledOperand(); |
690 | |
691 | LoadInst *VTableEntryLoad = dyn_cast<LoadInst>(Val: Callee); |
692 | if (!VTableEntryLoad) |
693 | return false; // Not a vtable entry load. |
694 | Value *VTableEntryPtr = VTableEntryLoad->getPointerOperand(); |
695 | APInt VTableOffset(DL.getIndexTypeSizeInBits(Ty: VTableEntryPtr->getType()), 0); |
696 | Value *VTableBasePtr = VTableEntryPtr->stripAndAccumulateConstantOffsets( |
697 | DL, Offset&: VTableOffset, /* AllowNonInbounds */ true); |
698 | LoadInst *VTablePtrLoad = dyn_cast<LoadInst>(Val: VTableBasePtr); |
699 | if (!VTablePtrLoad) |
700 | return false; // Not a vtable load. |
701 | Value *Object = VTablePtrLoad->getPointerOperand(); |
702 | APInt ObjectOffset(DL.getIndexTypeSizeInBits(Ty: Object->getType()), 0); |
703 | Value *ObjectBase = Object->stripAndAccumulateConstantOffsets( |
704 | DL, Offset&: ObjectOffset, /* AllowNonInbounds */ true); |
705 | if (!(isa<AllocaInst>(Val: ObjectBase) && ObjectOffset == 0)) |
706 | // Not an Alloca or the offset isn't zero. |
707 | return false; |
708 | |
709 | // Look for the vtable pointer store into the object by the ctor. |
710 | BasicBlock::iterator BBI(VTablePtrLoad); |
711 | Value *VTablePtr = FindAvailableLoadedValue( |
712 | Load: VTablePtrLoad, ScanBB: VTablePtrLoad->getParent(), ScanFrom&: BBI, MaxInstsToScan: 0, AA: nullptr, IsLoadCSE: nullptr); |
713 | if (!VTablePtr || !VTablePtr->getType()->isPointerTy()) |
714 | return false; // No vtable found. |
715 | APInt VTableOffsetGVBase(DL.getIndexTypeSizeInBits(Ty: VTablePtr->getType()), 0); |
716 | Value *VTableGVBase = VTablePtr->stripAndAccumulateConstantOffsets( |
717 | DL, Offset&: VTableOffsetGVBase, /* AllowNonInbounds */ true); |
718 | GlobalVariable *GV = dyn_cast<GlobalVariable>(Val: VTableGVBase); |
719 | if (!(GV && GV->isConstant() && GV->hasDefinitiveInitializer())) |
720 | // Not in the form of a global constant variable with an initializer. |
721 | return false; |
722 | |
723 | APInt VTableGVOffset = VTableOffsetGVBase + VTableOffset; |
724 | if (!(VTableGVOffset.getActiveBits() <= 64)) |
725 | return false; // Out of range. |
726 | |
727 | Function *DirectCallee = nullptr; |
728 | std::tie(args&: DirectCallee, args: std::ignore) = |
729 | getFunctionAtVTableOffset(GV, Offset: VTableGVOffset.getZExtValue(), M&: *M); |
730 | if (!DirectCallee) |
731 | return false; // No function pointer found. |
732 | |
733 | if (!isLegalToPromote(CB, Callee: DirectCallee)) |
734 | return false; |
735 | |
736 | // Success. |
737 | promoteCall(CB, Callee: DirectCallee); |
738 | return true; |
739 | } |
740 | |
741 | #undef DEBUG_TYPE |
742 | |