1 | //==- AArch64PromoteConstant.cpp - Promote constant to global for AArch64 --==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the AArch64PromoteConstant pass which promotes constants |
10 | // to global variables when this is likely to be more efficient. Currently only |
11 | // types related to constant vector (i.e., constant vector, array of constant |
12 | // vectors, constant structure with a constant vector field, etc.) are promoted |
13 | // to global variables. Constant vectors are likely to be lowered in target |
14 | // constant pool during instruction selection already; therefore, the access |
15 | // will remain the same (memory load), but the structure types are not split |
16 | // into different constant pool accesses for each field. A bonus side effect is |
17 | // that created globals may be merged by the global merge pass. |
18 | // |
19 | // FIXME: This pass may be useful for other targets too. |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #include "AArch64.h" |
23 | #include "llvm/ADT/DenseMap.h" |
24 | #include "llvm/ADT/SmallVector.h" |
25 | #include "llvm/ADT/Statistic.h" |
26 | #include "llvm/IR/BasicBlock.h" |
27 | #include "llvm/IR/Constant.h" |
28 | #include "llvm/IR/Constants.h" |
29 | #include "llvm/IR/Dominators.h" |
30 | #include "llvm/IR/Function.h" |
31 | #include "llvm/IR/GlobalValue.h" |
32 | #include "llvm/IR/GlobalVariable.h" |
33 | #include "llvm/IR/IRBuilder.h" |
34 | #include "llvm/IR/InlineAsm.h" |
35 | #include "llvm/IR/InstIterator.h" |
36 | #include "llvm/IR/Instruction.h" |
37 | #include "llvm/IR/Instructions.h" |
38 | #include "llvm/IR/IntrinsicInst.h" |
39 | #include "llvm/IR/Module.h" |
40 | #include "llvm/IR/Type.h" |
41 | #include "llvm/InitializePasses.h" |
42 | #include "llvm/Pass.h" |
43 | #include "llvm/Support/Casting.h" |
44 | #include "llvm/Support/CommandLine.h" |
45 | #include "llvm/Support/Debug.h" |
46 | #include "llvm/Support/raw_ostream.h" |
47 | #include <algorithm> |
48 | #include <cassert> |
49 | #include <utility> |
50 | |
51 | using namespace llvm; |
52 | |
53 | #define DEBUG_TYPE "aarch64-promote-const" |
54 | |
55 | // Stress testing mode - disable heuristics. |
56 | static cl::opt<bool> Stress("aarch64-stress-promote-const" , cl::Hidden, |
57 | cl::desc("Promote all vector constants" )); |
58 | |
59 | STATISTIC(NumPromoted, "Number of promoted constants" ); |
60 | STATISTIC(NumPromotedUses, "Number of promoted constants uses" ); |
61 | |
62 | //===----------------------------------------------------------------------===// |
63 | // AArch64PromoteConstant |
64 | //===----------------------------------------------------------------------===// |
65 | |
66 | namespace { |
67 | |
68 | /// Promotes interesting constant into global variables. |
69 | /// The motivating example is: |
70 | /// static const uint16_t TableA[32] = { |
71 | /// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768, |
72 | /// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215, |
73 | /// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846, |
74 | /// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725, |
75 | /// }; |
76 | /// |
77 | /// uint8x16x4_t LoadStatic(void) { |
78 | /// uint8x16x4_t ret; |
79 | /// ret.val[0] = vld1q_u16(TableA + 0); |
80 | /// ret.val[1] = vld1q_u16(TableA + 8); |
81 | /// ret.val[2] = vld1q_u16(TableA + 16); |
82 | /// ret.val[3] = vld1q_u16(TableA + 24); |
83 | /// return ret; |
84 | /// } |
85 | /// |
86 | /// The constants in this example are folded into the uses. Thus, 4 different |
87 | /// constants are created. |
88 | /// |
89 | /// As their type is vector the cheapest way to create them is to load them |
90 | /// for the memory. |
91 | /// |
92 | /// Therefore the final assembly final has 4 different loads. With this pass |
93 | /// enabled, only one load is issued for the constants. |
94 | class AArch64PromoteConstant : public ModulePass { |
95 | public: |
96 | struct PromotedConstant { |
97 | bool ShouldConvert = false; |
98 | GlobalVariable *GV = nullptr; |
99 | }; |
100 | using PromotionCacheTy = SmallDenseMap<Constant *, PromotedConstant, 16>; |
101 | |
102 | struct UpdateRecord { |
103 | Constant *C; |
104 | Instruction *User; |
105 | unsigned Op; |
106 | |
107 | UpdateRecord(Constant *C, Instruction *User, unsigned Op) |
108 | : C(C), User(User), Op(Op) {} |
109 | }; |
110 | |
111 | static char ID; |
112 | |
113 | AArch64PromoteConstant() : ModulePass(ID) { |
114 | initializeAArch64PromoteConstantPass(*PassRegistry::getPassRegistry()); |
115 | } |
116 | |
117 | StringRef getPassName() const override { return "AArch64 Promote Constant" ; } |
118 | |
119 | /// Iterate over the functions and promote the interesting constants into |
120 | /// global variables with module scope. |
121 | bool runOnModule(Module &M) override { |
122 | LLVM_DEBUG(dbgs() << getPassName() << '\n'); |
123 | if (skipModule(M)) |
124 | return false; |
125 | bool Changed = false; |
126 | PromotionCacheTy PromotionCache; |
127 | for (auto &MF : M) { |
128 | Changed |= runOnFunction(F&: MF, PromotionCache); |
129 | } |
130 | return Changed; |
131 | } |
132 | |
133 | private: |
134 | /// Look for interesting constants used within the given function. |
135 | /// Promote them into global variables, load these global variables within |
136 | /// the related function, so that the number of inserted load is minimal. |
137 | bool runOnFunction(Function &F, PromotionCacheTy &PromotionCache); |
138 | |
139 | // This transformation requires dominator info |
140 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
141 | AU.setPreservesCFG(); |
142 | AU.addRequired<DominatorTreeWrapperPass>(); |
143 | AU.addPreserved<DominatorTreeWrapperPass>(); |
144 | } |
145 | |
146 | /// Type to store a list of Uses. |
147 | using Uses = SmallVector<std::pair<Instruction *, unsigned>, 4>; |
148 | /// Map an insertion point to all the uses it dominates. |
149 | using InsertionPoints = DenseMap<Instruction *, Uses>; |
150 | |
151 | /// Find the closest point that dominates the given Use. |
152 | Instruction *findInsertionPoint(Instruction &User, unsigned OpNo); |
153 | |
154 | /// Check if the given insertion point is dominated by an existing |
155 | /// insertion point. |
156 | /// If true, the given use is added to the list of dominated uses for |
157 | /// the related existing point. |
158 | /// \param NewPt the insertion point to be checked |
159 | /// \param User the user of the constant |
160 | /// \param OpNo the operand number of the use |
161 | /// \param InsertPts existing insertion points |
162 | /// \pre NewPt and all instruction in InsertPts belong to the same function |
163 | /// \return true if one of the insertion point in InsertPts dominates NewPt, |
164 | /// false otherwise |
165 | bool isDominated(Instruction *NewPt, Instruction *User, unsigned OpNo, |
166 | InsertionPoints &InsertPts); |
167 | |
168 | /// Check if the given insertion point can be merged with an existing |
169 | /// insertion point in a common dominator. |
170 | /// If true, the given use is added to the list of the created insertion |
171 | /// point. |
172 | /// \param NewPt the insertion point to be checked |
173 | /// \param User the user of the constant |
174 | /// \param OpNo the operand number of the use |
175 | /// \param InsertPts existing insertion points |
176 | /// \pre NewPt and all instruction in InsertPts belong to the same function |
177 | /// \pre isDominated returns false for the exact same parameters. |
178 | /// \return true if it exists an insertion point in InsertPts that could |
179 | /// have been merged with NewPt in a common dominator, |
180 | /// false otherwise |
181 | bool tryAndMerge(Instruction *NewPt, Instruction *User, unsigned OpNo, |
182 | InsertionPoints &InsertPts); |
183 | |
184 | /// Compute the minimal insertion points to dominates all the interesting |
185 | /// uses of value. |
186 | /// Insertion points are group per function and each insertion point |
187 | /// contains a list of all the uses it dominates within the related function |
188 | /// \param User the user of the constant |
189 | /// \param OpNo the operand number of the constant |
190 | /// \param[out] InsertPts output storage of the analysis |
191 | void computeInsertionPoint(Instruction *User, unsigned OpNo, |
192 | InsertionPoints &InsertPts); |
193 | |
194 | /// Insert a definition of a new global variable at each point contained in |
195 | /// InsPtsPerFunc and update the related uses (also contained in |
196 | /// InsPtsPerFunc). |
197 | void insertDefinitions(Function &F, GlobalVariable &GV, |
198 | InsertionPoints &InsertPts); |
199 | |
200 | /// Do the constant promotion indicated by the Updates records, keeping track |
201 | /// of globals in PromotionCache. |
202 | void promoteConstants(Function &F, SmallVectorImpl<UpdateRecord> &Updates, |
203 | PromotionCacheTy &PromotionCache); |
204 | |
205 | /// Transfer the list of dominated uses of IPI to NewPt in InsertPts. |
206 | /// Append Use to this list and delete the entry of IPI in InsertPts. |
207 | static void appendAndTransferDominatedUses(Instruction *NewPt, |
208 | Instruction *User, unsigned OpNo, |
209 | InsertionPoints::iterator &IPI, |
210 | InsertionPoints &InsertPts) { |
211 | // Record the dominated use. |
212 | IPI->second.emplace_back(Args&: User, Args&: OpNo); |
213 | // Transfer the dominated uses of IPI to NewPt |
214 | // Inserting into the DenseMap may invalidate existing iterator. |
215 | // Keep a copy of the key to find the iterator to erase. Keep a copy of the |
216 | // value so that we don't have to dereference IPI->second. |
217 | Instruction *OldInstr = IPI->first; |
218 | Uses OldUses = std::move(IPI->second); |
219 | InsertPts[NewPt] = std::move(OldUses); |
220 | // Erase IPI. |
221 | InsertPts.erase(Val: OldInstr); |
222 | } |
223 | }; |
224 | |
225 | } // end anonymous namespace |
226 | |
227 | char AArch64PromoteConstant::ID = 0; |
228 | |
229 | INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const" , |
230 | "AArch64 Promote Constant Pass" , false, false) |
231 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
232 | INITIALIZE_PASS_END(AArch64PromoteConstant, "aarch64-promote-const" , |
233 | "AArch64 Promote Constant Pass" , false, false) |
234 | |
235 | ModulePass *llvm::createAArch64PromoteConstantPass() { |
236 | return new AArch64PromoteConstant(); |
237 | } |
238 | |
239 | /// Check if the given type uses a vector type. |
240 | static bool isConstantUsingVectorTy(const Type *CstTy) { |
241 | if (CstTy->isVectorTy()) |
242 | return true; |
243 | if (CstTy->isStructTy()) { |
244 | for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements(); |
245 | EltIdx < EndEltIdx; ++EltIdx) |
246 | if (isConstantUsingVectorTy(CstTy: CstTy->getStructElementType(N: EltIdx))) |
247 | return true; |
248 | } else if (CstTy->isArrayTy()) |
249 | return isConstantUsingVectorTy(CstTy: CstTy->getArrayElementType()); |
250 | return false; |
251 | } |
252 | |
253 | // Returns true if \p C contains only ConstantData leafs and no global values, |
254 | // block addresses or constant expressions. Traverses ConstantAggregates. |
255 | static bool containsOnlyConstantData(const Constant *C) { |
256 | if (isa<ConstantData>(Val: C)) |
257 | return true; |
258 | |
259 | if (isa<GlobalValue>(Val: C) || isa<BlockAddress>(Val: C) || isa<ConstantExpr>(Val: C)) |
260 | return false; |
261 | |
262 | return all_of(Range: C->operands(), P: [](const Use &U) { |
263 | return containsOnlyConstantData(C: cast<Constant>(Val: &U)); |
264 | }); |
265 | } |
266 | |
267 | /// Check if the given use (Instruction + OpIdx) of Cst should be converted into |
268 | /// a load of a global variable initialized with Cst. |
269 | /// A use should be converted if it is legal to do so. |
270 | /// For instance, it is not legal to turn the mask operand of a shuffle vector |
271 | /// into a load of a global variable. |
272 | static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr, |
273 | unsigned OpIdx) { |
274 | // shufflevector instruction expects a const for the mask argument, i.e., the |
275 | // third argument. Do not promote this use in that case. |
276 | if (isa<const ShuffleVectorInst>(Val: Instr) && OpIdx == 2) |
277 | return false; |
278 | |
279 | // extractvalue instruction expects a const idx. |
280 | if (isa<const ExtractValueInst>(Val: Instr) && OpIdx > 0) |
281 | return false; |
282 | |
283 | // extractvalue instruction expects a const idx. |
284 | if (isa<const InsertValueInst>(Val: Instr) && OpIdx > 1) |
285 | return false; |
286 | |
287 | if (isa<const AllocaInst>(Val: Instr) && OpIdx > 0) |
288 | return false; |
289 | |
290 | // Alignment argument must be constant. |
291 | if (isa<const LoadInst>(Val: Instr) && OpIdx > 0) |
292 | return false; |
293 | |
294 | // Alignment argument must be constant. |
295 | if (isa<const StoreInst>(Val: Instr) && OpIdx > 1) |
296 | return false; |
297 | |
298 | // Index must be constant. |
299 | if (isa<const GetElementPtrInst>(Val: Instr) && OpIdx > 0) |
300 | return false; |
301 | |
302 | // Personality function and filters must be constant. |
303 | // Give up on that instruction. |
304 | if (isa<const LandingPadInst>(Val: Instr)) |
305 | return false; |
306 | |
307 | // Switch instruction expects constants to compare to. |
308 | if (isa<const SwitchInst>(Val: Instr)) |
309 | return false; |
310 | |
311 | // Expected address must be a constant. |
312 | if (isa<const IndirectBrInst>(Val: Instr)) |
313 | return false; |
314 | |
315 | // Do not mess with intrinsics. |
316 | if (isa<const IntrinsicInst>(Val: Instr)) |
317 | return false; |
318 | |
319 | // Do not mess with inline asm. |
320 | const CallInst *CI = dyn_cast<const CallInst>(Val: Instr); |
321 | return !(CI && CI->isInlineAsm()); |
322 | } |
323 | |
324 | /// Check if the given Cst should be converted into |
325 | /// a load of a global variable initialized with Cst. |
326 | /// A constant should be converted if it is likely that the materialization of |
327 | /// the constant will be tricky. Thus, we give up on zero or undef values. |
328 | /// |
329 | /// \todo Currently, accept only vector related types. |
330 | /// Also we give up on all simple vector type to keep the existing |
331 | /// behavior. Otherwise, we should push here all the check of the lowering of |
332 | /// BUILD_VECTOR. By giving up, we lose the potential benefit of merging |
333 | /// constant via global merge and the fact that the same constant is stored |
334 | /// only once with this method (versus, as many function that uses the constant |
335 | /// for the regular approach, even for float). |
336 | /// Again, the simplest solution would be to promote every |
337 | /// constant and rematerialize them when they are actually cheap to create. |
338 | static bool shouldConvertImpl(const Constant *Cst) { |
339 | if (isa<const UndefValue>(Val: Cst)) |
340 | return false; |
341 | |
342 | // FIXME: In some cases, it may be interesting to promote in memory |
343 | // a zero initialized constant. |
344 | // E.g., when the type of Cst require more instructions than the |
345 | // adrp/add/load sequence or when this sequence can be shared by several |
346 | // instances of Cst. |
347 | // Ideally, we could promote this into a global and rematerialize the constant |
348 | // when it was a bad idea. |
349 | if (Cst->isZeroValue()) |
350 | return false; |
351 | |
352 | if (Stress) |
353 | return true; |
354 | |
355 | // FIXME: see function \todo |
356 | if (Cst->getType()->isVectorTy()) |
357 | return false; |
358 | return isConstantUsingVectorTy(CstTy: Cst->getType()); |
359 | } |
360 | |
361 | static bool |
362 | shouldConvert(Constant &C, |
363 | AArch64PromoteConstant::PromotionCacheTy &PromotionCache) { |
364 | auto Converted = PromotionCache.insert( |
365 | KV: std::make_pair(x: &C, y: AArch64PromoteConstant::PromotedConstant())); |
366 | if (Converted.second) |
367 | Converted.first->second.ShouldConvert = shouldConvertImpl(Cst: &C); |
368 | return Converted.first->second.ShouldConvert; |
369 | } |
370 | |
371 | Instruction *AArch64PromoteConstant::findInsertionPoint(Instruction &User, |
372 | unsigned OpNo) { |
373 | // If this user is a phi, the insertion point is in the related |
374 | // incoming basic block. |
375 | if (PHINode *PhiInst = dyn_cast<PHINode>(Val: &User)) |
376 | return PhiInst->getIncomingBlock(i: OpNo)->getTerminator(); |
377 | |
378 | return &User; |
379 | } |
380 | |
381 | bool AArch64PromoteConstant::isDominated(Instruction *NewPt, Instruction *User, |
382 | unsigned OpNo, |
383 | InsertionPoints &InsertPts) { |
384 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( |
385 | F&: *NewPt->getParent()->getParent()).getDomTree(); |
386 | |
387 | // Traverse all the existing insertion points and check if one is dominating |
388 | // NewPt. If it is, remember that. |
389 | for (auto &IPI : InsertPts) { |
390 | if (NewPt == IPI.first || DT.dominates(Def: IPI.first, User: NewPt) || |
391 | // When IPI.first is a terminator instruction, DT may think that |
392 | // the result is defined on the edge. |
393 | // Here we are testing the insertion point, not the definition. |
394 | (IPI.first->getParent() != NewPt->getParent() && |
395 | DT.dominates(A: IPI.first->getParent(), B: NewPt->getParent()))) { |
396 | // No need to insert this point. Just record the dominated use. |
397 | LLVM_DEBUG(dbgs() << "Insertion point dominated by:\n" ); |
398 | LLVM_DEBUG(IPI.first->print(dbgs())); |
399 | LLVM_DEBUG(dbgs() << '\n'); |
400 | IPI.second.emplace_back(Args&: User, Args&: OpNo); |
401 | return true; |
402 | } |
403 | } |
404 | return false; |
405 | } |
406 | |
407 | bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, Instruction *User, |
408 | unsigned OpNo, |
409 | InsertionPoints &InsertPts) { |
410 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( |
411 | F&: *NewPt->getParent()->getParent()).getDomTree(); |
412 | BasicBlock *NewBB = NewPt->getParent(); |
413 | |
414 | // Traverse all the existing insertion point and check if one is dominated by |
415 | // NewPt and thus useless or can be combined with NewPt into a common |
416 | // dominator. |
417 | for (InsertionPoints::iterator IPI = InsertPts.begin(), |
418 | EndIPI = InsertPts.end(); |
419 | IPI != EndIPI; ++IPI) { |
420 | BasicBlock *CurBB = IPI->first->getParent(); |
421 | if (NewBB == CurBB) { |
422 | // Instructions are in the same block. |
423 | // By construction, NewPt is dominating the other. |
424 | // Indeed, isDominated returned false with the exact same arguments. |
425 | LLVM_DEBUG(dbgs() << "Merge insertion point with:\n" ); |
426 | LLVM_DEBUG(IPI->first->print(dbgs())); |
427 | LLVM_DEBUG(dbgs() << "\nat considered insertion point.\n" ); |
428 | appendAndTransferDominatedUses(NewPt, User, OpNo, IPI, InsertPts); |
429 | return true; |
430 | } |
431 | |
432 | // Look for a common dominator |
433 | BasicBlock *CommonDominator = DT.findNearestCommonDominator(A: NewBB, B: CurBB); |
434 | // If none exists, we cannot merge these two points. |
435 | if (!CommonDominator) |
436 | continue; |
437 | |
438 | if (CommonDominator != NewBB) { |
439 | // By construction, the CommonDominator cannot be CurBB. |
440 | assert(CommonDominator != CurBB && |
441 | "Instruction has not been rejected during isDominated check!" ); |
442 | // Take the last instruction of the CommonDominator as insertion point |
443 | NewPt = CommonDominator->getTerminator(); |
444 | } |
445 | // else, CommonDominator is the block of NewBB, hence NewBB is the last |
446 | // possible insertion point in that block. |
447 | LLVM_DEBUG(dbgs() << "Merge insertion point with:\n" ); |
448 | LLVM_DEBUG(IPI->first->print(dbgs())); |
449 | LLVM_DEBUG(dbgs() << '\n'); |
450 | LLVM_DEBUG(NewPt->print(dbgs())); |
451 | LLVM_DEBUG(dbgs() << '\n'); |
452 | appendAndTransferDominatedUses(NewPt, User, OpNo, IPI, InsertPts); |
453 | return true; |
454 | } |
455 | return false; |
456 | } |
457 | |
458 | void AArch64PromoteConstant::computeInsertionPoint( |
459 | Instruction *User, unsigned OpNo, InsertionPoints &InsertPts) { |
460 | LLVM_DEBUG(dbgs() << "Considered use, opidx " << OpNo << ":\n" ); |
461 | LLVM_DEBUG(User->print(dbgs())); |
462 | LLVM_DEBUG(dbgs() << '\n'); |
463 | |
464 | Instruction *InsertionPoint = findInsertionPoint(User&: *User, OpNo); |
465 | |
466 | LLVM_DEBUG(dbgs() << "Considered insertion point:\n" ); |
467 | LLVM_DEBUG(InsertionPoint->print(dbgs())); |
468 | LLVM_DEBUG(dbgs() << '\n'); |
469 | |
470 | if (isDominated(NewPt: InsertionPoint, User, OpNo, InsertPts)) |
471 | return; |
472 | // This insertion point is useful, check if we can merge some insertion |
473 | // point in a common dominator or if NewPt dominates an existing one. |
474 | if (tryAndMerge(NewPt: InsertionPoint, User, OpNo, InsertPts)) |
475 | return; |
476 | |
477 | LLVM_DEBUG(dbgs() << "Keep considered insertion point\n" ); |
478 | |
479 | // It is definitely useful by its own |
480 | InsertPts[InsertionPoint].emplace_back(Args&: User, Args&: OpNo); |
481 | } |
482 | |
483 | static void ensurePromotedGV(Function &F, Constant &C, |
484 | AArch64PromoteConstant::PromotedConstant &PC) { |
485 | assert(PC.ShouldConvert && |
486 | "Expected that we should convert this to a global" ); |
487 | if (PC.GV) |
488 | return; |
489 | PC.GV = new GlobalVariable( |
490 | *F.getParent(), C.getType(), true, GlobalValue::InternalLinkage, nullptr, |
491 | "_PromotedConst" , nullptr, GlobalVariable::NotThreadLocal); |
492 | PC.GV->setInitializer(&C); |
493 | LLVM_DEBUG(dbgs() << "Global replacement: " ); |
494 | LLVM_DEBUG(PC.GV->print(dbgs())); |
495 | LLVM_DEBUG(dbgs() << '\n'); |
496 | ++NumPromoted; |
497 | } |
498 | |
499 | void AArch64PromoteConstant::insertDefinitions(Function &F, |
500 | GlobalVariable &PromotedGV, |
501 | InsertionPoints &InsertPts) { |
502 | #ifndef NDEBUG |
503 | // Do more checking for debug purposes. |
504 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); |
505 | #endif |
506 | assert(!InsertPts.empty() && "Empty uses does not need a definition" ); |
507 | |
508 | for (const auto &IPI : InsertPts) { |
509 | // Create the load of the global variable. |
510 | IRBuilder<> Builder(IPI.first); |
511 | LoadInst *LoadedCst = |
512 | Builder.CreateLoad(Ty: PromotedGV.getValueType(), Ptr: &PromotedGV); |
513 | LLVM_DEBUG(dbgs() << "**********\n" ); |
514 | LLVM_DEBUG(dbgs() << "New def: " ); |
515 | LLVM_DEBUG(LoadedCst->print(dbgs())); |
516 | LLVM_DEBUG(dbgs() << '\n'); |
517 | |
518 | // Update the dominated uses. |
519 | for (auto Use : IPI.second) { |
520 | #ifndef NDEBUG |
521 | assert(DT.dominates(LoadedCst, |
522 | findInsertionPoint(*Use.first, Use.second)) && |
523 | "Inserted definition does not dominate all its uses!" ); |
524 | #endif |
525 | LLVM_DEBUG({ |
526 | dbgs() << "Use to update " << Use.second << ":" ; |
527 | Use.first->print(dbgs()); |
528 | dbgs() << '\n'; |
529 | }); |
530 | Use.first->setOperand(i: Use.second, Val: LoadedCst); |
531 | ++NumPromotedUses; |
532 | } |
533 | } |
534 | } |
535 | |
536 | void AArch64PromoteConstant::promoteConstants( |
537 | Function &F, SmallVectorImpl<UpdateRecord> &Updates, |
538 | PromotionCacheTy &PromotionCache) { |
539 | // Promote the constants. |
540 | for (auto U = Updates.begin(), E = Updates.end(); U != E;) { |
541 | LLVM_DEBUG(dbgs() << "** Compute insertion points **\n" ); |
542 | auto First = U; |
543 | Constant *C = First->C; |
544 | InsertionPoints InsertPts; |
545 | do { |
546 | computeInsertionPoint(User: U->User, OpNo: U->Op, InsertPts); |
547 | } while (++U != E && U->C == C); |
548 | |
549 | auto &Promotion = PromotionCache[C]; |
550 | ensurePromotedGV(F, C&: *C, PC&: Promotion); |
551 | insertDefinitions(F, PromotedGV&: *Promotion.GV, InsertPts); |
552 | } |
553 | } |
554 | |
555 | bool AArch64PromoteConstant::runOnFunction(Function &F, |
556 | PromotionCacheTy &PromotionCache) { |
557 | // Look for instructions using constant vector. Promote that constant to a |
558 | // global variable. Create as few loads of this variable as possible and |
559 | // update the uses accordingly. |
560 | SmallVector<UpdateRecord, 64> Updates; |
561 | for (Instruction &I : instructions(F: &F)) { |
562 | // Traverse the operand, looking for constant vectors. Replace them by a |
563 | // load of a global variable of constant vector type. |
564 | for (Use &U : I.operands()) { |
565 | Constant *Cst = dyn_cast<Constant>(Val&: U); |
566 | // There is no point in promoting global values as they are already |
567 | // global. Do not promote constants containing constant expression, global |
568 | // values or blockaddresses either, as they may require some code |
569 | // expansion. |
570 | if (!Cst || isa<GlobalValue>(Val: Cst) || !containsOnlyConstantData(C: Cst)) |
571 | continue; |
572 | |
573 | // Check if this constant is worth promoting. |
574 | if (!shouldConvert(C&: *Cst, PromotionCache)) |
575 | continue; |
576 | |
577 | // Check if this use should be promoted. |
578 | unsigned OpNo = &U - I.op_begin(); |
579 | if (!shouldConvertUse(Cst, Instr: &I, OpIdx: OpNo)) |
580 | continue; |
581 | |
582 | Updates.emplace_back(Args&: Cst, Args: &I, Args&: OpNo); |
583 | } |
584 | } |
585 | |
586 | if (Updates.empty()) |
587 | return false; |
588 | |
589 | promoteConstants(F, Updates, PromotionCache); |
590 | return true; |
591 | } |
592 | |