1 | //==- AArch64PromoteConstant.cpp - Promote constant to global for AArch64 --==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the AArch64PromoteConstant pass which promotes constants |
10 | // to global variables when this is likely to be more efficient. Currently only |
11 | // types related to constant vector (i.e., constant vector, array of constant |
12 | // vectors, constant structure with a constant vector field, etc.) are promoted |
13 | // to global variables. Constant vectors are likely to be lowered in target |
14 | // constant pool during instruction selection already; therefore, the access |
15 | // will remain the same (memory load), but the structure types are not split |
16 | // into different constant pool accesses for each field. A bonus side effect is |
17 | // that created globals may be merged by the global merge pass. |
18 | // |
19 | // FIXME: This pass may be useful for other targets too. |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #include "AArch64.h" |
23 | #include "llvm/ADT/DenseMap.h" |
24 | #include "llvm/ADT/SmallVector.h" |
25 | #include "llvm/ADT/Statistic.h" |
26 | #include "llvm/IR/BasicBlock.h" |
27 | #include "llvm/IR/Constant.h" |
28 | #include "llvm/IR/Constants.h" |
29 | #include "llvm/IR/Dominators.h" |
30 | #include "llvm/IR/Function.h" |
31 | #include "llvm/IR/GlobalValue.h" |
32 | #include "llvm/IR/GlobalVariable.h" |
33 | #include "llvm/IR/IRBuilder.h" |
34 | #include "llvm/IR/InstIterator.h" |
35 | #include "llvm/IR/Instruction.h" |
36 | #include "llvm/IR/Instructions.h" |
37 | #include "llvm/IR/IntrinsicInst.h" |
38 | #include "llvm/IR/Module.h" |
39 | #include "llvm/IR/Type.h" |
40 | #include "llvm/InitializePasses.h" |
41 | #include "llvm/Pass.h" |
42 | #include "llvm/Support/Casting.h" |
43 | #include "llvm/Support/CommandLine.h" |
44 | #include "llvm/Support/Debug.h" |
45 | #include "llvm/Support/raw_ostream.h" |
46 | #include <cassert> |
47 | #include <utility> |
48 | |
49 | using namespace llvm; |
50 | |
51 | #define DEBUG_TYPE "aarch64-promote-const" |
52 | |
53 | // Stress testing mode - disable heuristics. |
54 | static cl::opt<bool> Stress("aarch64-stress-promote-const" , cl::Hidden, |
55 | cl::desc("Promote all vector constants" )); |
56 | |
57 | STATISTIC(NumPromoted, "Number of promoted constants" ); |
58 | STATISTIC(NumPromotedUses, "Number of promoted constants uses" ); |
59 | |
60 | //===----------------------------------------------------------------------===// |
61 | // AArch64PromoteConstant |
62 | //===----------------------------------------------------------------------===// |
63 | |
64 | namespace { |
65 | |
66 | /// Promotes interesting constant into global variables. |
67 | /// The motivating example is: |
68 | /// static const uint16_t TableA[32] = { |
69 | /// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768, |
70 | /// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215, |
71 | /// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846, |
72 | /// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725, |
73 | /// }; |
74 | /// |
75 | /// uint8x16x4_t LoadStatic(void) { |
76 | /// uint8x16x4_t ret; |
77 | /// ret.val[0] = vld1q_u16(TableA + 0); |
78 | /// ret.val[1] = vld1q_u16(TableA + 8); |
79 | /// ret.val[2] = vld1q_u16(TableA + 16); |
80 | /// ret.val[3] = vld1q_u16(TableA + 24); |
81 | /// return ret; |
82 | /// } |
83 | /// |
84 | /// The constants in this example are folded into the uses. Thus, 4 different |
85 | /// constants are created. |
86 | /// |
87 | /// As their type is vector the cheapest way to create them is to load them |
88 | /// for the memory. |
89 | /// |
90 | /// Therefore the final assembly final has 4 different loads. With this pass |
91 | /// enabled, only one load is issued for the constants. |
92 | class AArch64PromoteConstant : public ModulePass { |
93 | public: |
94 | struct PromotedConstant { |
95 | bool ShouldConvert = false; |
96 | GlobalVariable *GV = nullptr; |
97 | }; |
98 | using PromotionCacheTy = SmallDenseMap<Constant *, PromotedConstant, 16>; |
99 | |
100 | struct UpdateRecord { |
101 | Constant *C; |
102 | Instruction *User; |
103 | unsigned Op; |
104 | |
105 | UpdateRecord(Constant *C, Instruction *User, unsigned Op) |
106 | : C(C), User(User), Op(Op) {} |
107 | }; |
108 | |
109 | static char ID; |
110 | |
111 | AArch64PromoteConstant() : ModulePass(ID) {} |
112 | |
113 | StringRef getPassName() const override { return "AArch64 Promote Constant" ; } |
114 | |
115 | /// Iterate over the functions and promote the interesting constants into |
116 | /// global variables with module scope. |
117 | bool runOnModule(Module &M) override { |
118 | LLVM_DEBUG(dbgs() << getPassName() << '\n'); |
119 | if (skipModule(M)) |
120 | return false; |
121 | bool Changed = false; |
122 | PromotionCacheTy PromotionCache; |
123 | for (auto &MF : M) { |
124 | Changed |= runOnFunction(F&: MF, PromotionCache); |
125 | } |
126 | return Changed; |
127 | } |
128 | |
129 | private: |
130 | /// Look for interesting constants used within the given function. |
131 | /// Promote them into global variables, load these global variables within |
132 | /// the related function, so that the number of inserted load is minimal. |
133 | bool runOnFunction(Function &F, PromotionCacheTy &PromotionCache); |
134 | |
135 | // This transformation requires dominator info |
136 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
137 | AU.setPreservesCFG(); |
138 | AU.addRequired<DominatorTreeWrapperPass>(); |
139 | AU.addPreserved<DominatorTreeWrapperPass>(); |
140 | } |
141 | |
142 | /// Type to store a list of Uses. |
143 | using Uses = SmallVector<std::pair<Instruction *, unsigned>, 4>; |
144 | /// Map an insertion point to all the uses it dominates. |
145 | using InsertionPoints = DenseMap<Instruction *, Uses>; |
146 | |
147 | /// Find the closest point that dominates the given Use. |
148 | Instruction *findInsertionPoint(Instruction &User, unsigned OpNo); |
149 | |
150 | /// Check if the given insertion point is dominated by an existing |
151 | /// insertion point. |
152 | /// If true, the given use is added to the list of dominated uses for |
153 | /// the related existing point. |
154 | /// \param NewPt the insertion point to be checked |
155 | /// \param User the user of the constant |
156 | /// \param OpNo the operand number of the use |
157 | /// \param InsertPts existing insertion points |
158 | /// \pre NewPt and all instruction in InsertPts belong to the same function |
159 | /// \return true if one of the insertion point in InsertPts dominates NewPt, |
160 | /// false otherwise |
161 | bool isDominated(Instruction *NewPt, Instruction *User, unsigned OpNo, |
162 | InsertionPoints &InsertPts); |
163 | |
164 | /// Check if the given insertion point can be merged with an existing |
165 | /// insertion point in a common dominator. |
166 | /// If true, the given use is added to the list of the created insertion |
167 | /// point. |
168 | /// \param NewPt the insertion point to be checked |
169 | /// \param User the user of the constant |
170 | /// \param OpNo the operand number of the use |
171 | /// \param InsertPts existing insertion points |
172 | /// \pre NewPt and all instruction in InsertPts belong to the same function |
173 | /// \pre isDominated returns false for the exact same parameters. |
174 | /// \return true if it exists an insertion point in InsertPts that could |
175 | /// have been merged with NewPt in a common dominator, |
176 | /// false otherwise |
177 | bool tryAndMerge(Instruction *NewPt, Instruction *User, unsigned OpNo, |
178 | InsertionPoints &InsertPts); |
179 | |
180 | /// Compute the minimal insertion points to dominates all the interesting |
181 | /// uses of value. |
182 | /// Insertion points are group per function and each insertion point |
183 | /// contains a list of all the uses it dominates within the related function |
184 | /// \param User the user of the constant |
185 | /// \param OpNo the operand number of the constant |
186 | /// \param[out] InsertPts output storage of the analysis |
187 | void computeInsertionPoint(Instruction *User, unsigned OpNo, |
188 | InsertionPoints &InsertPts); |
189 | |
190 | /// Insert a definition of a new global variable at each point contained in |
191 | /// InsPtsPerFunc and update the related uses (also contained in |
192 | /// InsPtsPerFunc). |
193 | void insertDefinitions(Function &F, GlobalVariable &GV, |
194 | InsertionPoints &InsertPts); |
195 | |
196 | /// Do the constant promotion indicated by the Updates records, keeping track |
197 | /// of globals in PromotionCache. |
198 | void promoteConstants(Function &F, SmallVectorImpl<UpdateRecord> &Updates, |
199 | PromotionCacheTy &PromotionCache); |
200 | |
201 | /// Transfer the list of dominated uses of IPI to NewPt in InsertPts. |
202 | /// Append Use to this list and delete the entry of IPI in InsertPts. |
203 | static void appendAndTransferDominatedUses(Instruction *NewPt, |
204 | Instruction *User, unsigned OpNo, |
205 | InsertionPoints::iterator &IPI, |
206 | InsertionPoints &InsertPts) { |
207 | // Record the dominated use. |
208 | IPI->second.emplace_back(Args&: User, Args&: OpNo); |
209 | // Transfer the dominated uses of IPI to NewPt |
210 | // Inserting into the DenseMap may invalidate existing iterator. |
211 | // Keep a copy of the key to find the iterator to erase. Keep a copy of the |
212 | // value so that we don't have to dereference IPI->second. |
213 | Instruction *OldInstr = IPI->first; |
214 | Uses OldUses = std::move(IPI->second); |
215 | InsertPts[NewPt] = std::move(OldUses); |
216 | // Erase IPI. |
217 | InsertPts.erase(Val: OldInstr); |
218 | } |
219 | }; |
220 | |
221 | } // end anonymous namespace |
222 | |
223 | char AArch64PromoteConstant::ID = 0; |
224 | |
225 | INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const" , |
226 | "AArch64 Promote Constant Pass" , false, false) |
227 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
228 | INITIALIZE_PASS_END(AArch64PromoteConstant, "aarch64-promote-const" , |
229 | "AArch64 Promote Constant Pass" , false, false) |
230 | |
231 | ModulePass *llvm::createAArch64PromoteConstantPass() { |
232 | return new AArch64PromoteConstant(); |
233 | } |
234 | |
235 | /// Check if the given type uses a vector type. |
236 | static bool isConstantUsingVectorTy(const Type *CstTy) { |
237 | if (CstTy->isVectorTy()) |
238 | return true; |
239 | if (CstTy->isStructTy()) { |
240 | for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements(); |
241 | EltIdx < EndEltIdx; ++EltIdx) |
242 | if (isConstantUsingVectorTy(CstTy: CstTy->getStructElementType(N: EltIdx))) |
243 | return true; |
244 | } else if (CstTy->isArrayTy()) |
245 | return isConstantUsingVectorTy(CstTy: CstTy->getArrayElementType()); |
246 | return false; |
247 | } |
248 | |
249 | // Returns true if \p C contains only ConstantData leafs and no global values, |
250 | // block addresses or constant expressions. Traverses ConstantAggregates. |
251 | static bool containsOnlyConstantData(const Constant *C) { |
252 | if (isa<ConstantData>(Val: C)) |
253 | return true; |
254 | |
255 | if (isa<GlobalValue>(Val: C) || isa<BlockAddress>(Val: C) || isa<ConstantExpr>(Val: C)) |
256 | return false; |
257 | |
258 | return all_of(Range: C->operands(), P: [](const Use &U) { |
259 | return containsOnlyConstantData(C: cast<Constant>(Val: &U)); |
260 | }); |
261 | } |
262 | |
263 | /// Check if the given use (Instruction + OpIdx) of Cst should be converted into |
264 | /// a load of a global variable initialized with Cst. |
265 | /// A use should be converted if it is legal to do so. |
266 | /// For instance, it is not legal to turn the mask operand of a shuffle vector |
267 | /// into a load of a global variable. |
268 | static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr, |
269 | unsigned OpIdx) { |
270 | // shufflevector instruction expects a const for the mask argument, i.e., the |
271 | // third argument. Do not promote this use in that case. |
272 | if (isa<const ShuffleVectorInst>(Val: Instr) && OpIdx == 2) |
273 | return false; |
274 | |
275 | // extractvalue instruction expects a const idx. |
276 | if (isa<const ExtractValueInst>(Val: Instr) && OpIdx > 0) |
277 | return false; |
278 | |
279 | // extractvalue instruction expects a const idx. |
280 | if (isa<const InsertValueInst>(Val: Instr) && OpIdx > 1) |
281 | return false; |
282 | |
283 | if (isa<const AllocaInst>(Val: Instr) && OpIdx > 0) |
284 | return false; |
285 | |
286 | // Alignment argument must be constant. |
287 | if (isa<const LoadInst>(Val: Instr) && OpIdx > 0) |
288 | return false; |
289 | |
290 | // Alignment argument must be constant. |
291 | if (isa<const StoreInst>(Val: Instr) && OpIdx > 1) |
292 | return false; |
293 | |
294 | // Index must be constant. |
295 | if (isa<const GetElementPtrInst>(Val: Instr) && OpIdx > 0) |
296 | return false; |
297 | |
298 | // Personality function and filters must be constant. |
299 | // Give up on that instruction. |
300 | if (isa<const LandingPadInst>(Val: Instr)) |
301 | return false; |
302 | |
303 | // Switch instruction expects constants to compare to. |
304 | if (isa<const SwitchInst>(Val: Instr)) |
305 | return false; |
306 | |
307 | // Expected address must be a constant. |
308 | if (isa<const IndirectBrInst>(Val: Instr)) |
309 | return false; |
310 | |
311 | // Do not mess with intrinsics. |
312 | if (isa<const IntrinsicInst>(Val: Instr)) |
313 | return false; |
314 | |
315 | // Do not mess with inline asm. |
316 | const CallInst *CI = dyn_cast<const CallInst>(Val: Instr); |
317 | return !(CI && CI->isInlineAsm()); |
318 | } |
319 | |
320 | /// Check if the given Cst should be converted into |
321 | /// a load of a global variable initialized with Cst. |
322 | /// A constant should be converted if it is likely that the materialization of |
323 | /// the constant will be tricky. Thus, we give up on zero or undef values. |
324 | /// |
325 | /// \todo Currently, accept only vector related types. |
326 | /// Also we give up on all simple vector type to keep the existing |
327 | /// behavior. Otherwise, we should push here all the check of the lowering of |
328 | /// BUILD_VECTOR. By giving up, we lose the potential benefit of merging |
329 | /// constant via global merge and the fact that the same constant is stored |
330 | /// only once with this method (versus, as many function that uses the constant |
331 | /// for the regular approach, even for float). |
332 | /// Again, the simplest solution would be to promote every |
333 | /// constant and rematerialize them when they are actually cheap to create. |
334 | static bool shouldConvertImpl(const Constant *Cst) { |
335 | if (isa<const UndefValue>(Val: Cst)) |
336 | return false; |
337 | |
338 | // FIXME: In some cases, it may be interesting to promote in memory |
339 | // a zero initialized constant. |
340 | // E.g., when the type of Cst require more instructions than the |
341 | // adrp/add/load sequence or when this sequence can be shared by several |
342 | // instances of Cst. |
343 | // Ideally, we could promote this into a global and rematerialize the constant |
344 | // when it was a bad idea. |
345 | if (Cst->isZeroValue()) |
346 | return false; |
347 | |
348 | if (Stress) |
349 | return true; |
350 | |
351 | // FIXME: see function \todo |
352 | if (Cst->getType()->isVectorTy()) |
353 | return false; |
354 | return isConstantUsingVectorTy(CstTy: Cst->getType()); |
355 | } |
356 | |
357 | static bool |
358 | shouldConvert(Constant &C, |
359 | AArch64PromoteConstant::PromotionCacheTy &PromotionCache) { |
360 | auto Converted = PromotionCache.insert( |
361 | KV: std::make_pair(x: &C, y: AArch64PromoteConstant::PromotedConstant())); |
362 | if (Converted.second) |
363 | Converted.first->second.ShouldConvert = shouldConvertImpl(Cst: &C); |
364 | return Converted.first->second.ShouldConvert; |
365 | } |
366 | |
367 | Instruction *AArch64PromoteConstant::findInsertionPoint(Instruction &User, |
368 | unsigned OpNo) { |
369 | // If this user is a phi, the insertion point is in the related |
370 | // incoming basic block. |
371 | if (PHINode *PhiInst = dyn_cast<PHINode>(Val: &User)) |
372 | return PhiInst->getIncomingBlock(i: OpNo)->getTerminator(); |
373 | |
374 | return &User; |
375 | } |
376 | |
377 | bool AArch64PromoteConstant::isDominated(Instruction *NewPt, Instruction *User, |
378 | unsigned OpNo, |
379 | InsertionPoints &InsertPts) { |
380 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( |
381 | F&: *NewPt->getParent()->getParent()).getDomTree(); |
382 | |
383 | // Traverse all the existing insertion points and check if one is dominating |
384 | // NewPt. If it is, remember that. |
385 | for (auto &IPI : InsertPts) { |
386 | if (NewPt == IPI.first || DT.dominates(Def: IPI.first, User: NewPt) || |
387 | // When IPI.first is a terminator instruction, DT may think that |
388 | // the result is defined on the edge. |
389 | // Here we are testing the insertion point, not the definition. |
390 | (IPI.first->getParent() != NewPt->getParent() && |
391 | DT.dominates(A: IPI.first->getParent(), B: NewPt->getParent()))) { |
392 | // No need to insert this point. Just record the dominated use. |
393 | LLVM_DEBUG(dbgs() << "Insertion point dominated by:\n" ); |
394 | LLVM_DEBUG(IPI.first->print(dbgs())); |
395 | LLVM_DEBUG(dbgs() << '\n'); |
396 | IPI.second.emplace_back(Args&: User, Args&: OpNo); |
397 | return true; |
398 | } |
399 | } |
400 | return false; |
401 | } |
402 | |
403 | bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, Instruction *User, |
404 | unsigned OpNo, |
405 | InsertionPoints &InsertPts) { |
406 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( |
407 | F&: *NewPt->getParent()->getParent()).getDomTree(); |
408 | BasicBlock *NewBB = NewPt->getParent(); |
409 | |
410 | // Traverse all the existing insertion point and check if one is dominated by |
411 | // NewPt and thus useless or can be combined with NewPt into a common |
412 | // dominator. |
413 | for (InsertionPoints::iterator IPI = InsertPts.begin(), |
414 | EndIPI = InsertPts.end(); |
415 | IPI != EndIPI; ++IPI) { |
416 | BasicBlock *CurBB = IPI->first->getParent(); |
417 | if (NewBB == CurBB) { |
418 | // Instructions are in the same block. |
419 | // By construction, NewPt is dominating the other. |
420 | // Indeed, isDominated returned false with the exact same arguments. |
421 | LLVM_DEBUG(dbgs() << "Merge insertion point with:\n" ); |
422 | LLVM_DEBUG(IPI->first->print(dbgs())); |
423 | LLVM_DEBUG(dbgs() << "\nat considered insertion point.\n" ); |
424 | appendAndTransferDominatedUses(NewPt, User, OpNo, IPI, InsertPts); |
425 | return true; |
426 | } |
427 | |
428 | // Look for a common dominator |
429 | BasicBlock *CommonDominator = DT.findNearestCommonDominator(A: NewBB, B: CurBB); |
430 | // If none exists, we cannot merge these two points. |
431 | if (!CommonDominator) |
432 | continue; |
433 | |
434 | if (CommonDominator != NewBB) { |
435 | // By construction, the CommonDominator cannot be CurBB. |
436 | assert(CommonDominator != CurBB && |
437 | "Instruction has not been rejected during isDominated check!" ); |
438 | // Take the last instruction of the CommonDominator as insertion point |
439 | NewPt = CommonDominator->getTerminator(); |
440 | } |
441 | // else, CommonDominator is the block of NewBB, hence NewBB is the last |
442 | // possible insertion point in that block. |
443 | LLVM_DEBUG(dbgs() << "Merge insertion point with:\n" ); |
444 | LLVM_DEBUG(IPI->first->print(dbgs())); |
445 | LLVM_DEBUG(dbgs() << '\n'); |
446 | LLVM_DEBUG(NewPt->print(dbgs())); |
447 | LLVM_DEBUG(dbgs() << '\n'); |
448 | appendAndTransferDominatedUses(NewPt, User, OpNo, IPI, InsertPts); |
449 | return true; |
450 | } |
451 | return false; |
452 | } |
453 | |
454 | void AArch64PromoteConstant::computeInsertionPoint( |
455 | Instruction *User, unsigned OpNo, InsertionPoints &InsertPts) { |
456 | LLVM_DEBUG(dbgs() << "Considered use, opidx " << OpNo << ":\n" ); |
457 | LLVM_DEBUG(User->print(dbgs())); |
458 | LLVM_DEBUG(dbgs() << '\n'); |
459 | |
460 | Instruction *InsertionPoint = findInsertionPoint(User&: *User, OpNo); |
461 | |
462 | LLVM_DEBUG(dbgs() << "Considered insertion point:\n" ); |
463 | LLVM_DEBUG(InsertionPoint->print(dbgs())); |
464 | LLVM_DEBUG(dbgs() << '\n'); |
465 | |
466 | if (isDominated(NewPt: InsertionPoint, User, OpNo, InsertPts)) |
467 | return; |
468 | // This insertion point is useful, check if we can merge some insertion |
469 | // point in a common dominator or if NewPt dominates an existing one. |
470 | if (tryAndMerge(NewPt: InsertionPoint, User, OpNo, InsertPts)) |
471 | return; |
472 | |
473 | LLVM_DEBUG(dbgs() << "Keep considered insertion point\n" ); |
474 | |
475 | // It is definitely useful by its own |
476 | InsertPts[InsertionPoint].emplace_back(Args&: User, Args&: OpNo); |
477 | } |
478 | |
479 | static void ensurePromotedGV(Function &F, Constant &C, |
480 | AArch64PromoteConstant::PromotedConstant &PC) { |
481 | assert(PC.ShouldConvert && |
482 | "Expected that we should convert this to a global" ); |
483 | if (PC.GV) |
484 | return; |
485 | PC.GV = new GlobalVariable( |
486 | *F.getParent(), C.getType(), true, GlobalValue::InternalLinkage, nullptr, |
487 | "_PromotedConst" , nullptr, GlobalVariable::NotThreadLocal); |
488 | PC.GV->setInitializer(&C); |
489 | LLVM_DEBUG(dbgs() << "Global replacement: " ); |
490 | LLVM_DEBUG(PC.GV->print(dbgs())); |
491 | LLVM_DEBUG(dbgs() << '\n'); |
492 | ++NumPromoted; |
493 | } |
494 | |
495 | void AArch64PromoteConstant::insertDefinitions(Function &F, |
496 | GlobalVariable &PromotedGV, |
497 | InsertionPoints &InsertPts) { |
498 | #ifndef NDEBUG |
499 | // Do more checking for debug purposes. |
500 | DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); |
501 | #endif |
502 | assert(!InsertPts.empty() && "Empty uses does not need a definition" ); |
503 | |
504 | for (const auto &IPI : InsertPts) { |
505 | // Create the load of the global variable. |
506 | IRBuilder<> Builder(IPI.first); |
507 | LoadInst *LoadedCst = |
508 | Builder.CreateLoad(Ty: PromotedGV.getValueType(), Ptr: &PromotedGV); |
509 | LLVM_DEBUG(dbgs() << "**********\n" ); |
510 | LLVM_DEBUG(dbgs() << "New def: " ); |
511 | LLVM_DEBUG(LoadedCst->print(dbgs())); |
512 | LLVM_DEBUG(dbgs() << '\n'); |
513 | |
514 | // Update the dominated uses. |
515 | for (auto Use : IPI.second) { |
516 | #ifndef NDEBUG |
517 | assert(DT.dominates(LoadedCst, |
518 | findInsertionPoint(*Use.first, Use.second)) && |
519 | "Inserted definition does not dominate all its uses!" ); |
520 | #endif |
521 | LLVM_DEBUG({ |
522 | dbgs() << "Use to update " << Use.second << ":" ; |
523 | Use.first->print(dbgs()); |
524 | dbgs() << '\n'; |
525 | }); |
526 | Use.first->setOperand(i: Use.second, Val: LoadedCst); |
527 | ++NumPromotedUses; |
528 | } |
529 | } |
530 | } |
531 | |
532 | void AArch64PromoteConstant::promoteConstants( |
533 | Function &F, SmallVectorImpl<UpdateRecord> &Updates, |
534 | PromotionCacheTy &PromotionCache) { |
535 | // Promote the constants. |
536 | for (auto U = Updates.begin(), E = Updates.end(); U != E;) { |
537 | LLVM_DEBUG(dbgs() << "** Compute insertion points **\n" ); |
538 | auto First = U; |
539 | Constant *C = First->C; |
540 | InsertionPoints InsertPts; |
541 | do { |
542 | computeInsertionPoint(User: U->User, OpNo: U->Op, InsertPts); |
543 | } while (++U != E && U->C == C); |
544 | |
545 | auto &Promotion = PromotionCache[C]; |
546 | ensurePromotedGV(F, C&: *C, PC&: Promotion); |
547 | insertDefinitions(F, PromotedGV&: *Promotion.GV, InsertPts); |
548 | } |
549 | } |
550 | |
551 | bool AArch64PromoteConstant::runOnFunction(Function &F, |
552 | PromotionCacheTy &PromotionCache) { |
553 | // Look for instructions using constant vector. Promote that constant to a |
554 | // global variable. Create as few loads of this variable as possible and |
555 | // update the uses accordingly. |
556 | SmallVector<UpdateRecord, 64> Updates; |
557 | for (Instruction &I : instructions(F: &F)) { |
558 | // Traverse the operand, looking for constant vectors. Replace them by a |
559 | // load of a global variable of constant vector type. |
560 | for (Use &U : I.operands()) { |
561 | Constant *Cst = dyn_cast<Constant>(Val&: U); |
562 | // There is no point in promoting global values as they are already |
563 | // global. Do not promote constants containing constant expression, global |
564 | // values or blockaddresses either, as they may require some code |
565 | // expansion. |
566 | if (!Cst || isa<GlobalValue>(Val: Cst) || !containsOnlyConstantData(C: Cst)) |
567 | continue; |
568 | |
569 | // Check if this constant is worth promoting. |
570 | if (!shouldConvert(C&: *Cst, PromotionCache)) |
571 | continue; |
572 | |
573 | // Check if this use should be promoted. |
574 | unsigned OpNo = &U - I.op_begin(); |
575 | if (!shouldConvertUse(Cst, Instr: &I, OpIdx: OpNo)) |
576 | continue; |
577 | |
578 | Updates.emplace_back(Args&: Cst, Args: &I, Args&: OpNo); |
579 | } |
580 | } |
581 | |
582 | if (Updates.empty()) |
583 | return false; |
584 | |
585 | promoteConstants(F, Updates, PromotionCache); |
586 | return true; |
587 | } |
588 | |