1 | //===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements a pass to prepare loops for ppc preferred addressing |
10 | // modes, leveraging different instruction form. (eg: DS/DQ form, D/DS form with |
11 | // update) |
12 | // Additional PHIs are created for loop induction variables used by load/store |
13 | // instructions so that preferred addressing modes can be used. |
14 | // |
15 | // 1: DS/DQ form preparation, prepare the load/store instructions so that they |
16 | // can satisfy the DS/DQ form displacement requirements. |
17 | // Generically, this means transforming loops like this: |
18 | // for (int i = 0; i < n; ++i) { |
19 | // unsigned long x1 = *(unsigned long *)(p + i + 5); |
20 | // unsigned long x2 = *(unsigned long *)(p + i + 9); |
21 | // } |
22 | // |
23 | // to look like this: |
24 | // |
25 | // unsigned NewP = p + 5; |
26 | // for (int i = 0; i < n; ++i) { |
27 | // unsigned long x1 = *(unsigned long *)(i + NewP); |
28 | // unsigned long x2 = *(unsigned long *)(i + NewP + 4); |
29 | // } |
30 | // |
31 | // 2: D/DS form with update preparation, prepare the load/store instructions so |
32 | // that we can use update form to do pre-increment. |
33 | // Generically, this means transforming loops like this: |
34 | // for (int i = 0; i < n; ++i) |
35 | // array[i] = c; |
36 | // |
37 | // to look like this: |
38 | // |
39 | // T *p = array[-1]; |
40 | // for (int i = 0; i < n; ++i) |
41 | // *++p = c; |
42 | // |
43 | // 3: common multiple chains for the load/stores with same offsets in the loop, |
44 | // so that we can reuse the offsets and reduce the register pressure in the |
45 | // loop. This transformation can also increase the loop ILP as now each chain |
46 | // uses its own loop induction add/addi. But this will increase the number of |
47 | // add/addi in the loop. |
48 | // |
49 | // Generically, this means transforming loops like this: |
50 | // |
51 | // char *p; |
52 | // A1 = p + base1 |
53 | // A2 = p + base1 + offset |
54 | // B1 = p + base2 |
55 | // B2 = p + base2 + offset |
56 | // |
57 | // for (int i = 0; i < n; i++) |
58 | // unsigned long x1 = *(unsigned long *)(A1 + i); |
59 | // unsigned long x2 = *(unsigned long *)(A2 + i) |
60 | // unsigned long x3 = *(unsigned long *)(B1 + i); |
61 | // unsigned long x4 = *(unsigned long *)(B2 + i); |
62 | // } |
63 | // |
64 | // to look like this: |
65 | // |
66 | // A1_new = p + base1 // chain 1 |
67 | // B1_new = p + base2 // chain 2, now inside the loop, common offset is |
68 | // // reused. |
69 | // |
70 | // for (long long i = 0; i < n; i+=count) { |
71 | // unsigned long x1 = *(unsigned long *)(A1_new + i); |
72 | // unsigned long x2 = *(unsigned long *)((A1_new + i) + offset); |
73 | // unsigned long x3 = *(unsigned long *)(B1_new + i); |
74 | // unsigned long x4 = *(unsigned long *)((B1_new + i) + offset); |
75 | // } |
76 | //===----------------------------------------------------------------------===// |
77 | |
78 | #include "PPC.h" |
79 | #include "PPCSubtarget.h" |
80 | #include "PPCTargetMachine.h" |
81 | #include "llvm/ADT/DepthFirstIterator.h" |
82 | #include "llvm/ADT/SmallPtrSet.h" |
83 | #include "llvm/ADT/SmallSet.h" |
84 | #include "llvm/ADT/SmallVector.h" |
85 | #include "llvm/ADT/Statistic.h" |
86 | #include "llvm/Analysis/LoopInfo.h" |
87 | #include "llvm/Analysis/ScalarEvolution.h" |
88 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
89 | #include "llvm/IR/BasicBlock.h" |
90 | #include "llvm/IR/CFG.h" |
91 | #include "llvm/IR/Dominators.h" |
92 | #include "llvm/IR/Instruction.h" |
93 | #include "llvm/IR/Instructions.h" |
94 | #include "llvm/IR/IntrinsicInst.h" |
95 | #include "llvm/IR/IntrinsicsPowerPC.h" |
96 | #include "llvm/IR/Module.h" |
97 | #include "llvm/IR/Type.h" |
98 | #include "llvm/IR/Value.h" |
99 | #include "llvm/InitializePasses.h" |
100 | #include "llvm/Pass.h" |
101 | #include "llvm/Support/Casting.h" |
102 | #include "llvm/Support/CommandLine.h" |
103 | #include "llvm/Support/Debug.h" |
104 | #include "llvm/Transforms/Scalar.h" |
105 | #include "llvm/Transforms/Utils.h" |
106 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
107 | #include "llvm/Transforms/Utils/Local.h" |
108 | #include "llvm/Transforms/Utils/LoopUtils.h" |
109 | #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" |
110 | #include <cassert> |
111 | #include <cmath> |
112 | #include <iterator> |
113 | #include <utility> |
114 | |
115 | #define DEBUG_TYPE "ppc-loop-instr-form-prep" |
116 | |
117 | using namespace llvm; |
118 | |
119 | static cl::opt<unsigned> |
120 | MaxVarsPrep("ppc-formprep-max-vars" , cl::Hidden, cl::init(Val: 24), |
121 | cl::desc("Potential common base number threshold per function " |
122 | "for PPC loop prep" )); |
123 | |
124 | static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update" , |
125 | cl::init(Val: true), cl::Hidden, |
126 | cl::desc("prefer update form when ds form is also a update form" )); |
127 | |
128 | static cl::opt<bool> EnableUpdateFormForNonConstInc( |
129 | "ppc-formprep-update-nonconst-inc" , cl::init(Val: false), cl::Hidden, |
130 | cl::desc("prepare update form when the load/store increment is a loop " |
131 | "invariant non-const value." )); |
132 | |
133 | static cl::opt<bool> EnableChainCommoning( |
134 | "ppc-formprep-chain-commoning" , cl::init(Val: false), cl::Hidden, |
135 | cl::desc("Enable chain commoning in PPC loop prepare pass." )); |
136 | |
137 | // Sum of following 3 per loop thresholds for all loops can not be larger |
138 | // than MaxVarsPrep. |
139 | // now the thresholds for each kind prep are exterimental values on Power9. |
140 | static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars" , |
141 | cl::Hidden, cl::init(Val: 3), |
142 | cl::desc("Potential PHI threshold per loop for PPC loop prep of update " |
143 | "form" )); |
144 | |
145 | static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars" , |
146 | cl::Hidden, cl::init(Val: 3), |
147 | cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form" )); |
148 | |
149 | static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars" , |
150 | cl::Hidden, cl::init(Val: 8), |
151 | cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form" )); |
152 | |
153 | // Commoning chain will reduce the register pressure, so we don't consider about |
154 | // the PHI nodes number. |
155 | // But commoning chain will increase the addi/add number in the loop and also |
156 | // increase loop ILP. Maximum chain number should be same with hardware |
157 | // IssueWidth, because we won't benefit from ILP if the parallel chains number |
158 | // is bigger than IssueWidth. We assume there are 2 chains in one bucket, so |
159 | // there would be 4 buckets at most on P9(IssueWidth is 8). |
160 | static cl::opt<unsigned> MaxVarsChainCommon( |
161 | "ppc-chaincommon-max-vars" , cl::Hidden, cl::init(Val: 4), |
162 | cl::desc("Bucket number per loop for PPC loop chain common" )); |
163 | |
164 | // If would not be profitable if the common base has only one load/store, ISEL |
165 | // should already be able to choose best load/store form based on offset for |
166 | // single load/store. Set minimal profitable value default to 2 and make it as |
167 | // an option. |
168 | static cl::opt<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold" , |
169 | cl::Hidden, cl::init(Val: 2), |
170 | cl::desc("Minimal common base load/store instructions triggering DS/DQ form " |
171 | "preparation" )); |
172 | |
173 | static cl::opt<unsigned> ChainCommonPrepMinThreshold( |
174 | "ppc-chaincommon-min-threshold" , cl::Hidden, cl::init(Val: 4), |
175 | cl::desc("Minimal common base load/store instructions triggering chain " |
176 | "commoning preparation. Must be not smaller than 4" )); |
177 | |
178 | STATISTIC(PHINodeAlreadyExistsUpdate, "PHI node already in pre-increment form" ); |
179 | STATISTIC(PHINodeAlreadyExistsDS, "PHI node already in DS form" ); |
180 | STATISTIC(PHINodeAlreadyExistsDQ, "PHI node already in DQ form" ); |
181 | STATISTIC(DSFormChainRewritten, "Num of DS form chain rewritten" ); |
182 | STATISTIC(DQFormChainRewritten, "Num of DQ form chain rewritten" ); |
183 | STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten" ); |
184 | STATISTIC(ChainCommoningRewritten, "Num of commoning chains" ); |
185 | |
186 | namespace { |
187 | struct BucketElement { |
188 | BucketElement(const SCEV *O, Instruction *I) : Offset(O), Instr(I) {} |
189 | BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {} |
190 | |
191 | const SCEV *Offset; |
192 | Instruction *Instr; |
193 | }; |
194 | |
195 | struct Bucket { |
196 | Bucket(const SCEV *B, Instruction *I) |
197 | : BaseSCEV(B), Elements(1, BucketElement(I)) { |
198 | ChainSize = 0; |
199 | } |
200 | |
201 | // The base of the whole bucket. |
202 | const SCEV *BaseSCEV; |
203 | |
204 | // All elements in the bucket. In the bucket, the element with the BaseSCEV |
205 | // has no offset and all other elements are stored as offsets to the |
206 | // BaseSCEV. |
207 | SmallVector<BucketElement, 16> Elements; |
208 | |
209 | // The potential chains size. This is used for chain commoning only. |
210 | unsigned ChainSize; |
211 | |
212 | // The base for each potential chain. This is used for chain commoning only. |
213 | SmallVector<BucketElement, 16> ChainBases; |
214 | }; |
215 | |
216 | // "UpdateForm" is not a real PPC instruction form, it stands for dform |
217 | // load/store with update like ldu/stdu, or Prefetch intrinsic. |
218 | // For DS form instructions, their displacements must be multiple of 4. |
219 | // For DQ form instructions, their displacements must be multiple of 16. |
220 | enum PrepForm { UpdateForm = 1, DSForm = 4, DQForm = 16, ChainCommoning }; |
221 | |
222 | class PPCLoopInstrFormPrep : public FunctionPass { |
223 | public: |
224 | static char ID; // Pass ID, replacement for typeid |
225 | |
226 | PPCLoopInstrFormPrep() : FunctionPass(ID) { |
227 | initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry()); |
228 | } |
229 | |
230 | PPCLoopInstrFormPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { |
231 | initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry()); |
232 | } |
233 | |
234 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
235 | AU.addPreserved<DominatorTreeWrapperPass>(); |
236 | AU.addRequired<LoopInfoWrapperPass>(); |
237 | AU.addPreserved<LoopInfoWrapperPass>(); |
238 | AU.addRequired<ScalarEvolutionWrapperPass>(); |
239 | } |
240 | |
241 | bool runOnFunction(Function &F) override; |
242 | |
243 | private: |
244 | PPCTargetMachine *TM = nullptr; |
245 | const PPCSubtarget *ST; |
246 | DominatorTree *DT; |
247 | LoopInfo *LI; |
248 | ScalarEvolution *SE; |
249 | bool PreserveLCSSA; |
250 | bool HasCandidateForPrepare; |
251 | |
252 | /// Successful preparation number for Update/DS/DQ form in all inner most |
253 | /// loops. One successful preparation will put one common base out of loop, |
254 | /// this may leads to register presure like LICM does. |
255 | /// Make sure total preparation number can be controlled by option. |
256 | unsigned SuccPrepCount; |
257 | |
258 | bool runOnLoop(Loop *L); |
259 | |
260 | /// Check if required PHI node is already exist in Loop \p L. |
261 | bool alreadyPrepared(Loop *L, Instruction *MemI, |
262 | const SCEV *BasePtrStartSCEV, |
263 | const SCEV *BasePtrIncSCEV, PrepForm Form); |
264 | |
265 | /// Get the value which defines the increment SCEV \p BasePtrIncSCEV. |
266 | Value *getNodeForInc(Loop *L, Instruction *MemI, |
267 | const SCEV *BasePtrIncSCEV); |
268 | |
269 | /// Common chains to reuse offsets for a loop to reduce register pressure. |
270 | bool chainCommoning(Loop *L, SmallVector<Bucket, 16> &Buckets); |
271 | |
272 | /// Find out the potential commoning chains and their bases. |
273 | bool prepareBasesForCommoningChains(Bucket &BucketChain); |
274 | |
275 | /// Rewrite load/store according to the common chains. |
276 | bool |
277 | rewriteLoadStoresForCommoningChains(Loop *L, Bucket &Bucket, |
278 | SmallSet<BasicBlock *, 16> &BBChanged); |
279 | |
280 | /// Collect condition matched(\p isValidCandidate() returns true) |
281 | /// candidates in Loop \p L. |
282 | SmallVector<Bucket, 16> collectCandidates( |
283 | Loop *L, |
284 | std::function<bool(const Instruction *, Value *, const Type *)> |
285 | isValidCandidate, |
286 | std::function<bool(const SCEV *)> isValidDiff, |
287 | unsigned MaxCandidateNum); |
288 | |
289 | /// Add a candidate to candidates \p Buckets if diff between candidate and |
290 | /// one base in \p Buckets matches \p isValidDiff. |
291 | void addOneCandidate(Instruction *MemI, const SCEV *LSCEV, |
292 | SmallVector<Bucket, 16> &Buckets, |
293 | std::function<bool(const SCEV *)> isValidDiff, |
294 | unsigned MaxCandidateNum); |
295 | |
296 | /// Prepare all candidates in \p Buckets for update form. |
297 | bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets); |
298 | |
299 | /// Prepare all candidates in \p Buckets for displacement form, now for |
300 | /// ds/dq. |
301 | bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, PrepForm Form); |
302 | |
303 | /// Prepare for one chain \p BucketChain, find the best base element and |
304 | /// update all other elements in \p BucketChain accordingly. |
305 | /// \p Form is used to find the best base element. |
306 | /// If success, best base element must be stored as the first element of |
307 | /// \p BucketChain. |
308 | /// Return false if no base element found, otherwise return true. |
309 | bool prepareBaseForDispFormChain(Bucket &BucketChain, PrepForm Form); |
310 | |
311 | /// Prepare for one chain \p BucketChain, find the best base element and |
312 | /// update all other elements in \p BucketChain accordingly. |
313 | /// If success, best base element must be stored as the first element of |
314 | /// \p BucketChain. |
315 | /// Return false if no base element found, otherwise return true. |
316 | bool prepareBaseForUpdateFormChain(Bucket &BucketChain); |
317 | |
318 | /// Rewrite load/store instructions in \p BucketChain according to |
319 | /// preparation. |
320 | bool rewriteLoadStores(Loop *L, Bucket &BucketChain, |
321 | SmallSet<BasicBlock *, 16> &BBChanged, |
322 | PrepForm Form); |
323 | |
324 | /// Rewrite for the base load/store of a chain. |
325 | std::pair<Instruction *, Instruction *> |
326 | rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV, |
327 | Instruction *BaseMemI, bool CanPreInc, PrepForm Form, |
328 | SCEVExpander &SCEVE, SmallPtrSet<Value *, 16> &DeletedPtrs); |
329 | |
330 | /// Rewrite for the other load/stores of a chain according to the new \p |
331 | /// Base. |
332 | Instruction * |
333 | rewriteForBucketElement(std::pair<Instruction *, Instruction *> Base, |
334 | const BucketElement &Element, Value *OffToBase, |
335 | SmallPtrSet<Value *, 16> &DeletedPtrs); |
336 | }; |
337 | |
338 | } // end anonymous namespace |
339 | |
340 | char PPCLoopInstrFormPrep::ID = 0; |
341 | static const char *name = "Prepare loop for ppc preferred instruction forms" ; |
342 | INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false) |
343 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
344 | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) |
345 | INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false) |
346 | |
347 | static constexpr StringRef PHINodeNameSuffix = ".phi" ; |
348 | static constexpr StringRef CastNodeNameSuffix = ".cast" ; |
349 | static constexpr StringRef GEPNodeIncNameSuffix = ".inc" ; |
350 | static constexpr StringRef GEPNodeOffNameSuffix = ".off" ; |
351 | |
352 | FunctionPass *llvm::createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM) { |
353 | return new PPCLoopInstrFormPrep(TM); |
354 | } |
355 | |
356 | static bool IsPtrInBounds(Value *BasePtr) { |
357 | Value *StrippedBasePtr = BasePtr; |
358 | while (BitCastInst *BC = dyn_cast<BitCastInst>(Val: StrippedBasePtr)) |
359 | StrippedBasePtr = BC->getOperand(i_nocapture: 0); |
360 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: StrippedBasePtr)) |
361 | return GEP->isInBounds(); |
362 | |
363 | return false; |
364 | } |
365 | |
366 | static std::string getInstrName(const Value *I, StringRef Suffix) { |
367 | assert(I && "Invalid paramater!" ); |
368 | if (I->hasName()) |
369 | return (I->getName() + Suffix).str(); |
370 | else |
371 | return "" ; |
372 | } |
373 | |
374 | static Value *getPointerOperandAndType(Value *MemI, |
375 | Type **PtrElementType = nullptr) { |
376 | |
377 | Value *PtrValue = nullptr; |
378 | Type *PointerElementType = nullptr; |
379 | |
380 | if (LoadInst *LMemI = dyn_cast<LoadInst>(Val: MemI)) { |
381 | PtrValue = LMemI->getPointerOperand(); |
382 | PointerElementType = LMemI->getType(); |
383 | } else if (StoreInst *SMemI = dyn_cast<StoreInst>(Val: MemI)) { |
384 | PtrValue = SMemI->getPointerOperand(); |
385 | PointerElementType = SMemI->getValueOperand()->getType(); |
386 | } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(Val: MemI)) { |
387 | PointerElementType = Type::getInt8Ty(C&: MemI->getContext()); |
388 | if (IMemI->getIntrinsicID() == Intrinsic::prefetch || |
389 | IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) { |
390 | PtrValue = IMemI->getArgOperand(i: 0); |
391 | } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) { |
392 | PtrValue = IMemI->getArgOperand(i: 1); |
393 | } |
394 | } |
395 | /*Get ElementType if PtrElementType is not null.*/ |
396 | if (PtrElementType) |
397 | *PtrElementType = PointerElementType; |
398 | |
399 | return PtrValue; |
400 | } |
401 | |
402 | bool PPCLoopInstrFormPrep::runOnFunction(Function &F) { |
403 | if (skipFunction(F)) |
404 | return false; |
405 | |
406 | LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
407 | SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); |
408 | auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); |
409 | DT = DTWP ? &DTWP->getDomTree() : nullptr; |
410 | PreserveLCSSA = mustPreserveAnalysisID(AID&: LCSSAID); |
411 | ST = TM ? TM->getSubtargetImpl(F) : nullptr; |
412 | SuccPrepCount = 0; |
413 | |
414 | bool MadeChange = false; |
415 | |
416 | for (Loop *I : *LI) |
417 | for (Loop *L : depth_first(G: I)) |
418 | MadeChange |= runOnLoop(L); |
419 | |
420 | return MadeChange; |
421 | } |
422 | |
423 | // Finding the minimal(chain_number + reusable_offset_number) is a complicated |
424 | // algorithmic problem. |
425 | // For now, the algorithm used here is simply adjusted to handle the case for |
426 | // manually unrolling cases. |
427 | // FIXME: use a more powerful algorithm to find minimal sum of chain_number and |
428 | // reusable_offset_number for one base with multiple offsets. |
429 | bool PPCLoopInstrFormPrep::prepareBasesForCommoningChains(Bucket &CBucket) { |
430 | // The minimal size for profitable chain commoning: |
431 | // A1 = base + offset1 |
432 | // A2 = base + offset2 (offset2 - offset1 = X) |
433 | // A3 = base + offset3 |
434 | // A4 = base + offset4 (offset4 - offset3 = X) |
435 | // ======> |
436 | // base1 = base + offset1 |
437 | // base2 = base + offset3 |
438 | // A1 = base1 |
439 | // A2 = base1 + X |
440 | // A3 = base2 |
441 | // A4 = base2 + X |
442 | // |
443 | // There is benefit because of reuse of offest 'X'. |
444 | |
445 | assert(ChainCommonPrepMinThreshold >= 4 && |
446 | "Thredhold can not be smaller than 4!\n" ); |
447 | if (CBucket.Elements.size() < ChainCommonPrepMinThreshold) |
448 | return false; |
449 | |
450 | // We simply select the FirstOffset as the first reusable offset between each |
451 | // chain element 1 and element 0. |
452 | const SCEV *FirstOffset = CBucket.Elements[1].Offset; |
453 | |
454 | // Figure out how many times above FirstOffset is used in the chain. |
455 | // For a success commoning chain candidate, offset difference between each |
456 | // chain element 1 and element 0 must be also FirstOffset. |
457 | unsigned FirstOffsetReusedCount = 1; |
458 | |
459 | // Figure out how many times above FirstOffset is used in the first chain. |
460 | // Chain number is FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain |
461 | unsigned FirstOffsetReusedCountInFirstChain = 1; |
462 | |
463 | unsigned EleNum = CBucket.Elements.size(); |
464 | bool SawChainSeparater = false; |
465 | for (unsigned j = 2; j != EleNum; ++j) { |
466 | if (SE->getMinusSCEV(LHS: CBucket.Elements[j].Offset, |
467 | RHS: CBucket.Elements[j - 1].Offset) == FirstOffset) { |
468 | if (!SawChainSeparater) |
469 | FirstOffsetReusedCountInFirstChain++; |
470 | FirstOffsetReusedCount++; |
471 | } else |
472 | // For now, if we meet any offset which is not FirstOffset, we assume we |
473 | // find a new Chain. |
474 | // This makes us miss some opportunities. |
475 | // For example, we can common: |
476 | // |
477 | // {OffsetA, Offset A, OffsetB, OffsetA, OffsetA, OffsetB} |
478 | // |
479 | // as two chains: |
480 | // {{OffsetA, Offset A, OffsetB}, {OffsetA, OffsetA, OffsetB}} |
481 | // FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 2 |
482 | // |
483 | // But we fail to common: |
484 | // |
485 | // {OffsetA, OffsetB, OffsetA, OffsetA, OffsetB, OffsetA} |
486 | // FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 1 |
487 | |
488 | SawChainSeparater = true; |
489 | } |
490 | |
491 | // FirstOffset is not reused, skip this bucket. |
492 | if (FirstOffsetReusedCount == 1) |
493 | return false; |
494 | |
495 | unsigned ChainNum = |
496 | FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain; |
497 | |
498 | // All elements are increased by FirstOffset. |
499 | // The number of chains should be sqrt(EleNum). |
500 | if (!SawChainSeparater) |
501 | ChainNum = (unsigned)sqrt(x: (double)EleNum); |
502 | |
503 | CBucket.ChainSize = (unsigned)(EleNum / ChainNum); |
504 | |
505 | // If this is not a perfect chain(eg: not all elements can be put inside |
506 | // commoning chains.), skip now. |
507 | if (CBucket.ChainSize * ChainNum != EleNum) |
508 | return false; |
509 | |
510 | if (SawChainSeparater) { |
511 | // Check that the offset seqs are the same for all chains. |
512 | for (unsigned i = 1; i < CBucket.ChainSize; i++) |
513 | for (unsigned j = 1; j < ChainNum; j++) |
514 | if (CBucket.Elements[i].Offset != |
515 | SE->getMinusSCEV(LHS: CBucket.Elements[i + j * CBucket.ChainSize].Offset, |
516 | RHS: CBucket.Elements[j * CBucket.ChainSize].Offset)) |
517 | return false; |
518 | } |
519 | |
520 | for (unsigned i = 0; i < ChainNum; i++) |
521 | CBucket.ChainBases.push_back(Elt: CBucket.Elements[i * CBucket.ChainSize]); |
522 | |
523 | LLVM_DEBUG(dbgs() << "Bucket has " << ChainNum << " chains.\n" ); |
524 | |
525 | return true; |
526 | } |
527 | |
528 | bool PPCLoopInstrFormPrep::chainCommoning(Loop *L, |
529 | SmallVector<Bucket, 16> &Buckets) { |
530 | bool MadeChange = false; |
531 | |
532 | if (Buckets.empty()) |
533 | return MadeChange; |
534 | |
535 | SmallSet<BasicBlock *, 16> BBChanged; |
536 | |
537 | for (auto &Bucket : Buckets) { |
538 | if (prepareBasesForCommoningChains(CBucket&: Bucket)) |
539 | MadeChange |= rewriteLoadStoresForCommoningChains(L, Bucket, BBChanged); |
540 | } |
541 | |
542 | if (MadeChange) |
543 | for (auto *BB : BBChanged) |
544 | DeleteDeadPHIs(BB); |
545 | return MadeChange; |
546 | } |
547 | |
548 | bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains( |
549 | Loop *L, Bucket &Bucket, SmallSet<BasicBlock *, 16> &BBChanged) { |
550 | bool MadeChange = false; |
551 | |
552 | assert(Bucket.Elements.size() == |
553 | Bucket.ChainBases.size() * Bucket.ChainSize && |
554 | "invalid bucket for chain commoning!\n" ); |
555 | SmallPtrSet<Value *, 16> DeletedPtrs; |
556 | |
557 | BasicBlock * = L->getHeader(); |
558 | BasicBlock *LoopPredecessor = L->getLoopPredecessor(); |
559 | |
560 | SCEVExpander SCEVE(*SE, Header->getDataLayout(), |
561 | "loopprepare-chaincommon" ); |
562 | |
563 | for (unsigned ChainIdx = 0; ChainIdx < Bucket.ChainBases.size(); ++ChainIdx) { |
564 | unsigned BaseElemIdx = Bucket.ChainSize * ChainIdx; |
565 | const SCEV *BaseSCEV = |
566 | ChainIdx ? SE->getAddExpr(LHS: Bucket.BaseSCEV, |
567 | RHS: Bucket.Elements[BaseElemIdx].Offset) |
568 | : Bucket.BaseSCEV; |
569 | const SCEVAddRecExpr *BasePtrSCEV = cast<SCEVAddRecExpr>(Val: BaseSCEV); |
570 | |
571 | // Make sure the base is able to expand. |
572 | if (!SCEVE.isSafeToExpand(S: BasePtrSCEV->getStart())) |
573 | return MadeChange; |
574 | |
575 | assert(BasePtrSCEV->isAffine() && |
576 | "Invalid SCEV type for the base ptr for a candidate chain!\n" ); |
577 | |
578 | std::pair<Instruction *, Instruction *> Base = rewriteForBase( |
579 | L, BasePtrSCEV, BaseMemI: Bucket.Elements[BaseElemIdx].Instr, |
580 | CanPreInc: false /* CanPreInc */, Form: ChainCommoning, SCEVE, DeletedPtrs); |
581 | |
582 | if (!Base.first || !Base.second) |
583 | return MadeChange; |
584 | |
585 | // Keep track of the replacement pointer values we've inserted so that we |
586 | // don't generate more pointer values than necessary. |
587 | SmallPtrSet<Value *, 16> NewPtrs; |
588 | NewPtrs.insert(Ptr: Base.first); |
589 | |
590 | for (unsigned Idx = BaseElemIdx + 1; Idx < BaseElemIdx + Bucket.ChainSize; |
591 | ++Idx) { |
592 | BucketElement &I = Bucket.Elements[Idx]; |
593 | Value *Ptr = getPointerOperandAndType(MemI: I.Instr); |
594 | assert(Ptr && "No pointer operand" ); |
595 | if (NewPtrs.count(Ptr)) |
596 | continue; |
597 | |
598 | const SCEV *OffsetSCEV = |
599 | BaseElemIdx ? SE->getMinusSCEV(LHS: Bucket.Elements[Idx].Offset, |
600 | RHS: Bucket.Elements[BaseElemIdx].Offset) |
601 | : Bucket.Elements[Idx].Offset; |
602 | |
603 | // Make sure offset is able to expand. Only need to check one time as the |
604 | // offsets are reused between different chains. |
605 | if (!BaseElemIdx) |
606 | if (!SCEVE.isSafeToExpand(S: OffsetSCEV)) |
607 | return false; |
608 | |
609 | Value *OffsetValue = SCEVE.expandCodeFor( |
610 | SH: OffsetSCEV, Ty: OffsetSCEV->getType(), I: LoopPredecessor->getTerminator()); |
611 | |
612 | Instruction *NewPtr = rewriteForBucketElement(Base, Element: Bucket.Elements[Idx], |
613 | OffToBase: OffsetValue, DeletedPtrs); |
614 | |
615 | assert(NewPtr && "Wrong rewrite!\n" ); |
616 | NewPtrs.insert(Ptr: NewPtr); |
617 | } |
618 | |
619 | ++ChainCommoningRewritten; |
620 | } |
621 | |
622 | // Clear the rewriter cache, because values that are in the rewriter's cache |
623 | // can be deleted below, causing the AssertingVH in the cache to trigger. |
624 | SCEVE.clear(); |
625 | |
626 | for (auto *Ptr : DeletedPtrs) { |
627 | if (Instruction *IDel = dyn_cast<Instruction>(Val: Ptr)) |
628 | BBChanged.insert(Ptr: IDel->getParent()); |
629 | RecursivelyDeleteTriviallyDeadInstructions(V: Ptr); |
630 | } |
631 | |
632 | MadeChange = true; |
633 | return MadeChange; |
634 | } |
635 | |
636 | // Rewrite the new base according to BasePtrSCEV. |
637 | // bb.loop.preheader: |
638 | // %newstart = ... |
639 | // bb.loop.body: |
640 | // %phinode = phi [ %newstart, %bb.loop.preheader ], [ %add, %bb.loop.body ] |
641 | // ... |
642 | // %add = getelementptr %phinode, %inc |
643 | // |
644 | // First returned instruciton is %phinode (or a type cast to %phinode), caller |
645 | // needs this value to rewrite other load/stores in the same chain. |
646 | // Second returned instruction is %add, caller needs this value to rewrite other |
647 | // load/stores in the same chain. |
648 | std::pair<Instruction *, Instruction *> |
649 | PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV, |
650 | Instruction *BaseMemI, bool CanPreInc, |
651 | PrepForm Form, SCEVExpander &SCEVE, |
652 | SmallPtrSet<Value *, 16> &DeletedPtrs) { |
653 | |
654 | LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n" ); |
655 | |
656 | assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?" ); |
657 | |
658 | Value *BasePtr = getPointerOperandAndType(MemI: BaseMemI); |
659 | assert(BasePtr && "No pointer operand" ); |
660 | |
661 | Type *I8Ty = Type::getInt8Ty(C&: BaseMemI->getParent()->getContext()); |
662 | Type *I8PtrTy = |
663 | PointerType::get(C&: BaseMemI->getParent()->getContext(), |
664 | AddressSpace: BasePtr->getType()->getPointerAddressSpace()); |
665 | |
666 | bool IsConstantInc = false; |
667 | const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(SE&: *SE); |
668 | Value *IncNode = getNodeForInc(L, MemI: BaseMemI, BasePtrIncSCEV); |
669 | |
670 | const SCEVConstant *BasePtrIncConstantSCEV = |
671 | dyn_cast<SCEVConstant>(Val: BasePtrIncSCEV); |
672 | if (BasePtrIncConstantSCEV) |
673 | IsConstantInc = true; |
674 | |
675 | // No valid representation for the increment. |
676 | if (!IncNode) { |
677 | LLVM_DEBUG(dbgs() << "Loop Increasement can not be represented!\n" ); |
678 | return std::make_pair(x: nullptr, y: nullptr); |
679 | } |
680 | |
681 | if (Form == UpdateForm && !IsConstantInc && !EnableUpdateFormForNonConstInc) { |
682 | LLVM_DEBUG( |
683 | dbgs() |
684 | << "Update form prepare for non-const increment is not enabled!\n" ); |
685 | return std::make_pair(x: nullptr, y: nullptr); |
686 | } |
687 | |
688 | const SCEV *BasePtrStartSCEV = nullptr; |
689 | if (CanPreInc) { |
690 | assert(SE->isLoopInvariant(BasePtrIncSCEV, L) && |
691 | "Increment is not loop invariant!\n" ); |
692 | BasePtrStartSCEV = SE->getMinusSCEV(LHS: BasePtrSCEV->getStart(), |
693 | RHS: IsConstantInc ? BasePtrIncConstantSCEV |
694 | : BasePtrIncSCEV); |
695 | } else |
696 | BasePtrStartSCEV = BasePtrSCEV->getStart(); |
697 | |
698 | if (alreadyPrepared(L, MemI: BaseMemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) { |
699 | LLVM_DEBUG(dbgs() << "Instruction form is already prepared!\n" ); |
700 | return std::make_pair(x: nullptr, y: nullptr); |
701 | } |
702 | |
703 | LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n" ); |
704 | |
705 | BasicBlock * = L->getHeader(); |
706 | unsigned = pred_size(BB: Header); |
707 | BasicBlock *LoopPredecessor = L->getLoopPredecessor(); |
708 | |
709 | PHINode *NewPHI = PHINode::Create(Ty: I8PtrTy, NumReservedValues: HeaderLoopPredCount, |
710 | NameStr: getInstrName(I: BaseMemI, Suffix: PHINodeNameSuffix)); |
711 | NewPHI->insertBefore(InsertPos: Header->getFirstNonPHIIt()); |
712 | |
713 | Value *BasePtrStart = SCEVE.expandCodeFor(SH: BasePtrStartSCEV, Ty: I8PtrTy, |
714 | I: LoopPredecessor->getTerminator()); |
715 | |
716 | // Note that LoopPredecessor might occur in the predecessor list multiple |
717 | // times, and we need to add it the right number of times. |
718 | for (auto *PI : predecessors(BB: Header)) { |
719 | if (PI != LoopPredecessor) |
720 | continue; |
721 | |
722 | NewPHI->addIncoming(V: BasePtrStart, BB: LoopPredecessor); |
723 | } |
724 | |
725 | Instruction *PtrInc = nullptr; |
726 | Instruction *NewBasePtr = nullptr; |
727 | if (CanPreInc) { |
728 | BasicBlock::iterator InsPoint = Header->getFirstInsertionPt(); |
729 | PtrInc = GetElementPtrInst::Create( |
730 | PointeeType: I8Ty, Ptr: NewPHI, IdxList: IncNode, NameStr: getInstrName(I: BaseMemI, Suffix: GEPNodeIncNameSuffix), |
731 | InsertBefore: InsPoint); |
732 | cast<GetElementPtrInst>(Val: PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr)); |
733 | for (auto *PI : predecessors(BB: Header)) { |
734 | if (PI == LoopPredecessor) |
735 | continue; |
736 | |
737 | NewPHI->addIncoming(V: PtrInc, BB: PI); |
738 | } |
739 | if (PtrInc->getType() != BasePtr->getType()) |
740 | NewBasePtr = |
741 | new BitCastInst(PtrInc, BasePtr->getType(), |
742 | getInstrName(I: PtrInc, Suffix: CastNodeNameSuffix), InsPoint); |
743 | else |
744 | NewBasePtr = PtrInc; |
745 | } else { |
746 | // Note that LoopPredecessor might occur in the predecessor list multiple |
747 | // times, and we need to make sure no more incoming value for them in PHI. |
748 | for (auto *PI : predecessors(BB: Header)) { |
749 | if (PI == LoopPredecessor) |
750 | continue; |
751 | |
752 | // For the latch predecessor, we need to insert a GEP just before the |
753 | // terminator to increase the address. |
754 | BasicBlock *BB = PI; |
755 | BasicBlock::iterator InsPoint = BB->getTerminator()->getIterator(); |
756 | PtrInc = GetElementPtrInst::Create( |
757 | PointeeType: I8Ty, Ptr: NewPHI, IdxList: IncNode, NameStr: getInstrName(I: BaseMemI, Suffix: GEPNodeIncNameSuffix), |
758 | InsertBefore: InsPoint); |
759 | cast<GetElementPtrInst>(Val: PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr)); |
760 | |
761 | NewPHI->addIncoming(V: PtrInc, BB: PI); |
762 | } |
763 | PtrInc = NewPHI; |
764 | if (NewPHI->getType() != BasePtr->getType()) |
765 | NewBasePtr = new BitCastInst(NewPHI, BasePtr->getType(), |
766 | getInstrName(I: NewPHI, Suffix: CastNodeNameSuffix), |
767 | Header->getFirstInsertionPt()); |
768 | else |
769 | NewBasePtr = NewPHI; |
770 | } |
771 | |
772 | BasePtr->replaceAllUsesWith(V: NewBasePtr); |
773 | |
774 | DeletedPtrs.insert(Ptr: BasePtr); |
775 | |
776 | return std::make_pair(x&: NewBasePtr, y&: PtrInc); |
777 | } |
778 | |
779 | Instruction *PPCLoopInstrFormPrep::rewriteForBucketElement( |
780 | std::pair<Instruction *, Instruction *> Base, const BucketElement &Element, |
781 | Value *OffToBase, SmallPtrSet<Value *, 16> &DeletedPtrs) { |
782 | Instruction *NewBasePtr = Base.first; |
783 | Instruction *PtrInc = Base.second; |
784 | assert((NewBasePtr && PtrInc) && "base does not exist!\n" ); |
785 | |
786 | Type *I8Ty = Type::getInt8Ty(C&: PtrInc->getParent()->getContext()); |
787 | |
788 | Value *Ptr = getPointerOperandAndType(MemI: Element.Instr); |
789 | assert(Ptr && "No pointer operand" ); |
790 | |
791 | Instruction *RealNewPtr; |
792 | if (!Element.Offset || |
793 | (isa<SCEVConstant>(Val: Element.Offset) && |
794 | cast<SCEVConstant>(Val: Element.Offset)->getValue()->isZero())) { |
795 | RealNewPtr = NewBasePtr; |
796 | } else { |
797 | std::optional<BasicBlock::iterator> PtrIP = std::nullopt; |
798 | if (Instruction *I = dyn_cast<Instruction>(Val: Ptr)) |
799 | PtrIP = I->getIterator(); |
800 | |
801 | if (PtrIP && isa<Instruction>(Val: NewBasePtr) && |
802 | cast<Instruction>(Val: NewBasePtr)->getParent() == (*PtrIP)->getParent()) |
803 | PtrIP = std::nullopt; |
804 | else if (PtrIP && isa<PHINode>(Val: *PtrIP)) |
805 | PtrIP = (*PtrIP)->getParent()->getFirstInsertionPt(); |
806 | else if (!PtrIP) |
807 | PtrIP = Element.Instr->getIterator(); |
808 | |
809 | assert(OffToBase && "There should be an offset for non base element!\n" ); |
810 | GetElementPtrInst *NewPtr = GetElementPtrInst::Create( |
811 | PointeeType: I8Ty, Ptr: PtrInc, IdxList: OffToBase, |
812 | NameStr: getInstrName(I: Element.Instr, Suffix: GEPNodeOffNameSuffix)); |
813 | if (PtrIP) |
814 | NewPtr->insertBefore(BB&: *(*PtrIP)->getParent(), InsertPos: *PtrIP); |
815 | else |
816 | NewPtr->insertAfter(InsertPos: cast<Instruction>(Val: PtrInc)); |
817 | NewPtr->setIsInBounds(IsPtrInBounds(BasePtr: Ptr)); |
818 | RealNewPtr = NewPtr; |
819 | } |
820 | |
821 | Instruction *ReplNewPtr; |
822 | if (Ptr->getType() != RealNewPtr->getType()) { |
823 | ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(), |
824 | getInstrName(I: Ptr, Suffix: CastNodeNameSuffix)); |
825 | ReplNewPtr->insertAfter(InsertPos: RealNewPtr); |
826 | } else |
827 | ReplNewPtr = RealNewPtr; |
828 | |
829 | Ptr->replaceAllUsesWith(V: ReplNewPtr); |
830 | DeletedPtrs.insert(Ptr); |
831 | |
832 | return ReplNewPtr; |
833 | } |
834 | |
835 | void PPCLoopInstrFormPrep::addOneCandidate( |
836 | Instruction *MemI, const SCEV *LSCEV, SmallVector<Bucket, 16> &Buckets, |
837 | std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) { |
838 | assert((MemI && getPointerOperandAndType(MemI)) && |
839 | "Candidate should be a memory instruction." ); |
840 | assert(LSCEV && "Invalid SCEV for Ptr value." ); |
841 | |
842 | bool FoundBucket = false; |
843 | for (auto &B : Buckets) { |
844 | if (cast<SCEVAddRecExpr>(Val: B.BaseSCEV)->getStepRecurrence(SE&: *SE) != |
845 | cast<SCEVAddRecExpr>(Val: LSCEV)->getStepRecurrence(SE&: *SE)) |
846 | continue; |
847 | const SCEV *Diff = SE->getMinusSCEV(LHS: LSCEV, RHS: B.BaseSCEV); |
848 | if (isValidDiff(Diff)) { |
849 | B.Elements.push_back(Elt: BucketElement(Diff, MemI)); |
850 | FoundBucket = true; |
851 | break; |
852 | } |
853 | } |
854 | |
855 | if (!FoundBucket) { |
856 | if (Buckets.size() == MaxCandidateNum) { |
857 | LLVM_DEBUG(dbgs() << "Can not prepare more chains, reach maximum limit " |
858 | << MaxCandidateNum << "\n" ); |
859 | return; |
860 | } |
861 | Buckets.push_back(Elt: Bucket(LSCEV, MemI)); |
862 | } |
863 | } |
864 | |
865 | SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates( |
866 | Loop *L, |
867 | std::function<bool(const Instruction *, Value *, const Type *)> |
868 | isValidCandidate, |
869 | std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) { |
870 | SmallVector<Bucket, 16> Buckets; |
871 | |
872 | for (const auto &BB : L->blocks()) |
873 | for (auto &J : *BB) { |
874 | Value *PtrValue = nullptr; |
875 | Type *PointerElementType = nullptr; |
876 | PtrValue = getPointerOperandAndType(MemI: &J, PtrElementType: &PointerElementType); |
877 | |
878 | if (!PtrValue) |
879 | continue; |
880 | |
881 | if (PtrValue->getType()->getPointerAddressSpace()) |
882 | continue; |
883 | |
884 | if (L->isLoopInvariant(V: PtrValue)) |
885 | continue; |
886 | |
887 | const SCEV *LSCEV = SE->getSCEVAtScope(V: PtrValue, L); |
888 | const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(Val: LSCEV); |
889 | if (!LARSCEV || LARSCEV->getLoop() != L) |
890 | continue; |
891 | |
892 | // Mark that we have candidates for preparing. |
893 | HasCandidateForPrepare = true; |
894 | |
895 | if (isValidCandidate(&J, PtrValue, PointerElementType)) |
896 | addOneCandidate(MemI: &J, LSCEV, Buckets, isValidDiff, MaxCandidateNum); |
897 | } |
898 | return Buckets; |
899 | } |
900 | |
901 | bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain, |
902 | PrepForm Form) { |
903 | // RemainderOffsetInfo details: |
904 | // key: value of (Offset urem DispConstraint). For DSForm, it can |
905 | // be [0, 4). |
906 | // first of pair: the index of first BucketElement whose remainder is equal |
907 | // to key. For key 0, this value must be 0. |
908 | // second of pair: number of load/stores with the same remainder. |
909 | DenseMap<unsigned, std::pair<unsigned, unsigned>> RemainderOffsetInfo; |
910 | |
911 | for (unsigned j = 0, je = BucketChain.Elements.size(); j != je; ++j) { |
912 | if (!BucketChain.Elements[j].Offset) |
913 | RemainderOffsetInfo[0] = std::make_pair(x: 0, y: 1); |
914 | else { |
915 | unsigned Remainder = cast<SCEVConstant>(Val: BucketChain.Elements[j].Offset) |
916 | ->getAPInt() |
917 | .urem(RHS: Form); |
918 | if (!RemainderOffsetInfo.contains(Val: Remainder)) |
919 | RemainderOffsetInfo[Remainder] = std::make_pair(x&: j, y: 1); |
920 | else |
921 | RemainderOffsetInfo[Remainder].second++; |
922 | } |
923 | } |
924 | // Currently we choose the most profitable base as the one which has the max |
925 | // number of load/store with same remainder. |
926 | // FIXME: adjust the base selection strategy according to load/store offset |
927 | // distribution. |
928 | // For example, if we have one candidate chain for DS form preparation, which |
929 | // contains following load/stores with different remainders: |
930 | // 1: 10 load/store whose remainder is 1; |
931 | // 2: 9 load/store whose remainder is 2; |
932 | // 3: 1 for remainder 3 and 0 for remainder 0; |
933 | // Now we will choose the first load/store whose remainder is 1 as base and |
934 | // adjust all other load/stores according to new base, so we will get 10 DS |
935 | // form and 10 X form. |
936 | // But we should be more clever, for this case we could use two bases, one for |
937 | // remainder 1 and the other for remainder 2, thus we could get 19 DS form and |
938 | // 1 X form. |
939 | unsigned MaxCountRemainder = 0; |
940 | for (unsigned j = 0; j < (unsigned)Form; j++) |
941 | if ((RemainderOffsetInfo.contains(Val: j)) && |
942 | RemainderOffsetInfo[j].second > |
943 | RemainderOffsetInfo[MaxCountRemainder].second) |
944 | MaxCountRemainder = j; |
945 | |
946 | // Abort when there are too few insts with common base. |
947 | if (RemainderOffsetInfo[MaxCountRemainder].second < DispFormPrepMinThreshold) |
948 | return false; |
949 | |
950 | // If the first value is most profitable, no needed to adjust BucketChain |
951 | // elements as they are substracted the first value when collecting. |
952 | if (MaxCountRemainder == 0) |
953 | return true; |
954 | |
955 | // Adjust load/store to the new chosen base. |
956 | const SCEV *Offset = |
957 | BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first].Offset; |
958 | BucketChain.BaseSCEV = SE->getAddExpr(LHS: BucketChain.BaseSCEV, RHS: Offset); |
959 | for (auto &E : BucketChain.Elements) { |
960 | if (E.Offset) |
961 | E.Offset = cast<SCEVConstant>(Val: SE->getMinusSCEV(LHS: E.Offset, RHS: Offset)); |
962 | else |
963 | E.Offset = cast<SCEVConstant>(Val: SE->getNegativeSCEV(V: Offset)); |
964 | } |
965 | |
966 | std::swap(a&: BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first], |
967 | b&: BucketChain.Elements[0]); |
968 | return true; |
969 | } |
970 | |
971 | // FIXME: implement a more clever base choosing policy. |
972 | // Currently we always choose an exist load/store offset. This maybe lead to |
973 | // suboptimal code sequences. For example, for one DS chain with offsets |
974 | // {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp |
975 | // for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a |
976 | // multipler of 4, it cannot be represented by sint16. |
977 | bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { |
978 | // We have a choice now of which instruction's memory operand we use as the |
979 | // base for the generated PHI. Always picking the first instruction in each |
980 | // bucket does not work well, specifically because that instruction might |
981 | // be a prefetch (and there are no pre-increment dcbt variants). Otherwise, |
982 | // the choice is somewhat arbitrary, because the backend will happily |
983 | // generate direct offsets from both the pre-incremented and |
984 | // post-incremented pointer values. Thus, we'll pick the first non-prefetch |
985 | // instruction in each bucket, and adjust the recurrence and other offsets |
986 | // accordingly. |
987 | for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) { |
988 | if (auto *II = dyn_cast<IntrinsicInst>(Val: BucketChain.Elements[j].Instr)) |
989 | if (II->getIntrinsicID() == Intrinsic::prefetch) |
990 | continue; |
991 | |
992 | // If we'd otherwise pick the first element anyway, there's nothing to do. |
993 | if (j == 0) |
994 | break; |
995 | |
996 | // If our chosen element has no offset from the base pointer, there's |
997 | // nothing to do. |
998 | if (!BucketChain.Elements[j].Offset || |
999 | cast<SCEVConstant>(Val: BucketChain.Elements[j].Offset)->isZero()) |
1000 | break; |
1001 | |
1002 | const SCEV *Offset = BucketChain.Elements[j].Offset; |
1003 | BucketChain.BaseSCEV = SE->getAddExpr(LHS: BucketChain.BaseSCEV, RHS: Offset); |
1004 | for (auto &E : BucketChain.Elements) { |
1005 | if (E.Offset) |
1006 | E.Offset = cast<SCEVConstant>(Val: SE->getMinusSCEV(LHS: E.Offset, RHS: Offset)); |
1007 | else |
1008 | E.Offset = cast<SCEVConstant>(Val: SE->getNegativeSCEV(V: Offset)); |
1009 | } |
1010 | |
1011 | std::swap(a&: BucketChain.Elements[j], b&: BucketChain.Elements[0]); |
1012 | break; |
1013 | } |
1014 | return true; |
1015 | } |
1016 | |
1017 | bool PPCLoopInstrFormPrep::rewriteLoadStores( |
1018 | Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged, |
1019 | PrepForm Form) { |
1020 | bool MadeChange = false; |
1021 | |
1022 | const SCEVAddRecExpr *BasePtrSCEV = |
1023 | cast<SCEVAddRecExpr>(Val: BucketChain.BaseSCEV); |
1024 | if (!BasePtrSCEV->isAffine()) |
1025 | return MadeChange; |
1026 | |
1027 | BasicBlock * = L->getHeader(); |
1028 | SCEVExpander SCEVE(*SE, Header->getDataLayout(), |
1029 | "loopprepare-formrewrite" ); |
1030 | if (!SCEVE.isSafeToExpand(S: BasePtrSCEV->getStart())) |
1031 | return MadeChange; |
1032 | |
1033 | SmallPtrSet<Value *, 16> DeletedPtrs; |
1034 | |
1035 | // For some DS form load/store instructions, it can also be an update form, |
1036 | // if the stride is constant and is a multipler of 4. Use update form if |
1037 | // prefer it. |
1038 | bool CanPreInc = (Form == UpdateForm || |
1039 | ((Form == DSForm) && |
1040 | isa<SCEVConstant>(Val: BasePtrSCEV->getStepRecurrence(SE&: *SE)) && |
1041 | !cast<SCEVConstant>(Val: BasePtrSCEV->getStepRecurrence(SE&: *SE)) |
1042 | ->getAPInt() |
1043 | .urem(RHS: 4) && |
1044 | PreferUpdateForm)); |
1045 | |
1046 | std::pair<Instruction *, Instruction *> Base = |
1047 | rewriteForBase(L, BasePtrSCEV, BaseMemI: BucketChain.Elements.begin()->Instr, |
1048 | CanPreInc, Form, SCEVE, DeletedPtrs); |
1049 | |
1050 | if (!Base.first || !Base.second) |
1051 | return MadeChange; |
1052 | |
1053 | // Keep track of the replacement pointer values we've inserted so that we |
1054 | // don't generate more pointer values than necessary. |
1055 | SmallPtrSet<Value *, 16> NewPtrs; |
1056 | NewPtrs.insert(Ptr: Base.first); |
1057 | |
1058 | for (const BucketElement &BE : llvm::drop_begin(RangeOrContainer&: BucketChain.Elements)) { |
1059 | Value *Ptr = getPointerOperandAndType(MemI: BE.Instr); |
1060 | assert(Ptr && "No pointer operand" ); |
1061 | if (NewPtrs.count(Ptr)) |
1062 | continue; |
1063 | |
1064 | Instruction *NewPtr = rewriteForBucketElement( |
1065 | Base, Element: BE, |
1066 | OffToBase: BE.Offset ? cast<SCEVConstant>(Val: BE.Offset)->getValue() : nullptr, |
1067 | DeletedPtrs); |
1068 | assert(NewPtr && "wrong rewrite!\n" ); |
1069 | NewPtrs.insert(Ptr: NewPtr); |
1070 | } |
1071 | |
1072 | // Clear the rewriter cache, because values that are in the rewriter's cache |
1073 | // can be deleted below, causing the AssertingVH in the cache to trigger. |
1074 | SCEVE.clear(); |
1075 | |
1076 | for (auto *Ptr : DeletedPtrs) { |
1077 | if (Instruction *IDel = dyn_cast<Instruction>(Val: Ptr)) |
1078 | BBChanged.insert(Ptr: IDel->getParent()); |
1079 | RecursivelyDeleteTriviallyDeadInstructions(V: Ptr); |
1080 | } |
1081 | |
1082 | MadeChange = true; |
1083 | |
1084 | SuccPrepCount++; |
1085 | |
1086 | if (Form == DSForm && !CanPreInc) |
1087 | DSFormChainRewritten++; |
1088 | else if (Form == DQForm) |
1089 | DQFormChainRewritten++; |
1090 | else if (Form == UpdateForm || (Form == DSForm && CanPreInc)) |
1091 | UpdFormChainRewritten++; |
1092 | |
1093 | return MadeChange; |
1094 | } |
1095 | |
1096 | bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L, |
1097 | SmallVector<Bucket, 16> &Buckets) { |
1098 | bool MadeChange = false; |
1099 | if (Buckets.empty()) |
1100 | return MadeChange; |
1101 | SmallSet<BasicBlock *, 16> BBChanged; |
1102 | for (auto &Bucket : Buckets) |
1103 | // The base address of each bucket is transformed into a phi and the others |
1104 | // are rewritten based on new base. |
1105 | if (prepareBaseForUpdateFormChain(BucketChain&: Bucket)) |
1106 | MadeChange |= rewriteLoadStores(L, BucketChain&: Bucket, BBChanged, Form: UpdateForm); |
1107 | |
1108 | if (MadeChange) |
1109 | for (auto *BB : BBChanged) |
1110 | DeleteDeadPHIs(BB); |
1111 | return MadeChange; |
1112 | } |
1113 | |
1114 | bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, |
1115 | SmallVector<Bucket, 16> &Buckets, |
1116 | PrepForm Form) { |
1117 | bool MadeChange = false; |
1118 | |
1119 | if (Buckets.empty()) |
1120 | return MadeChange; |
1121 | |
1122 | SmallSet<BasicBlock *, 16> BBChanged; |
1123 | for (auto &Bucket : Buckets) { |
1124 | if (Bucket.Elements.size() < DispFormPrepMinThreshold) |
1125 | continue; |
1126 | if (prepareBaseForDispFormChain(BucketChain&: Bucket, Form)) |
1127 | MadeChange |= rewriteLoadStores(L, BucketChain&: Bucket, BBChanged, Form); |
1128 | } |
1129 | |
1130 | if (MadeChange) |
1131 | for (auto *BB : BBChanged) |
1132 | DeleteDeadPHIs(BB); |
1133 | return MadeChange; |
1134 | } |
1135 | |
1136 | // Find the loop invariant increment node for SCEV BasePtrIncSCEV. |
1137 | // bb.loop.preheader: |
1138 | // %start = ... |
1139 | // bb.loop.body: |
1140 | // %phinode = phi [ %start, %bb.loop.preheader ], [ %add, %bb.loop.body ] |
1141 | // ... |
1142 | // %add = add %phinode, %inc ; %inc is what we want to get. |
1143 | // |
1144 | Value *PPCLoopInstrFormPrep::getNodeForInc(Loop *L, Instruction *MemI, |
1145 | const SCEV *BasePtrIncSCEV) { |
1146 | // If the increment is a constant, no definition is needed. |
1147 | // Return the value directly. |
1148 | if (isa<SCEVConstant>(Val: BasePtrIncSCEV)) |
1149 | return cast<SCEVConstant>(Val: BasePtrIncSCEV)->getValue(); |
1150 | |
1151 | if (!SE->isLoopInvariant(S: BasePtrIncSCEV, L)) |
1152 | return nullptr; |
1153 | |
1154 | BasicBlock *BB = MemI->getParent(); |
1155 | if (!BB) |
1156 | return nullptr; |
1157 | |
1158 | BasicBlock *LatchBB = L->getLoopLatch(); |
1159 | |
1160 | if (!LatchBB) |
1161 | return nullptr; |
1162 | |
1163 | // Run through the PHIs and check their operands to find valid representation |
1164 | // for the increment SCEV. |
1165 | iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis(); |
1166 | for (auto &CurrentPHI : PHIIter) { |
1167 | PHINode *CurrentPHINode = dyn_cast<PHINode>(Val: &CurrentPHI); |
1168 | if (!CurrentPHINode) |
1169 | continue; |
1170 | |
1171 | if (!SE->isSCEVable(Ty: CurrentPHINode->getType())) |
1172 | continue; |
1173 | |
1174 | const SCEV *PHISCEV = SE->getSCEVAtScope(V: CurrentPHINode, L); |
1175 | |
1176 | const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(Val: PHISCEV); |
1177 | if (!PHIBasePtrSCEV) |
1178 | continue; |
1179 | |
1180 | const SCEV *PHIBasePtrIncSCEV = PHIBasePtrSCEV->getStepRecurrence(SE&: *SE); |
1181 | |
1182 | if (!PHIBasePtrIncSCEV || (PHIBasePtrIncSCEV != BasePtrIncSCEV)) |
1183 | continue; |
1184 | |
1185 | // Get the incoming value from the loop latch and check if the value has |
1186 | // the add form with the required increment. |
1187 | if (CurrentPHINode->getBasicBlockIndex(BB: LatchBB) < 0) |
1188 | continue; |
1189 | if (Instruction *I = dyn_cast<Instruction>( |
1190 | Val: CurrentPHINode->getIncomingValueForBlock(BB: LatchBB))) { |
1191 | Value *StrippedBaseI = I; |
1192 | while (BitCastInst *BC = dyn_cast<BitCastInst>(Val: StrippedBaseI)) |
1193 | StrippedBaseI = BC->getOperand(i_nocapture: 0); |
1194 | |
1195 | Instruction *StrippedI = dyn_cast<Instruction>(Val: StrippedBaseI); |
1196 | if (!StrippedI) |
1197 | continue; |
1198 | |
1199 | // LSR pass may add a getelementptr instruction to do the loop increment, |
1200 | // also search in that getelementptr instruction. |
1201 | if (StrippedI->getOpcode() == Instruction::Add || |
1202 | (StrippedI->getOpcode() == Instruction::GetElementPtr && |
1203 | StrippedI->getNumOperands() == 2)) { |
1204 | if (SE->getSCEVAtScope(V: StrippedI->getOperand(i: 0), L) == BasePtrIncSCEV) |
1205 | return StrippedI->getOperand(i: 0); |
1206 | if (SE->getSCEVAtScope(V: StrippedI->getOperand(i: 1), L) == BasePtrIncSCEV) |
1207 | return StrippedI->getOperand(i: 1); |
1208 | } |
1209 | } |
1210 | } |
1211 | return nullptr; |
1212 | } |
1213 | |
1214 | // In order to prepare for the preferred instruction form, a PHI is added. |
1215 | // This function will check to see if that PHI already exists and will return |
1216 | // true if it found an existing PHI with the matched start and increment as the |
1217 | // one we wanted to create. |
1218 | bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI, |
1219 | const SCEV *BasePtrStartSCEV, |
1220 | const SCEV *BasePtrIncSCEV, |
1221 | PrepForm Form) { |
1222 | BasicBlock *BB = MemI->getParent(); |
1223 | if (!BB) |
1224 | return false; |
1225 | |
1226 | BasicBlock *PredBB = L->getLoopPredecessor(); |
1227 | BasicBlock *LatchBB = L->getLoopLatch(); |
1228 | |
1229 | if (!PredBB || !LatchBB) |
1230 | return false; |
1231 | |
1232 | // Run through the PHIs and see if we have some that looks like a preparation |
1233 | iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis(); |
1234 | for (auto & CurrentPHI : PHIIter) { |
1235 | PHINode *CurrentPHINode = dyn_cast<PHINode>(Val: &CurrentPHI); |
1236 | if (!CurrentPHINode) |
1237 | continue; |
1238 | |
1239 | if (!SE->isSCEVable(Ty: CurrentPHINode->getType())) |
1240 | continue; |
1241 | |
1242 | const SCEV *PHISCEV = SE->getSCEVAtScope(V: CurrentPHINode, L); |
1243 | |
1244 | const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(Val: PHISCEV); |
1245 | if (!PHIBasePtrSCEV) |
1246 | continue; |
1247 | |
1248 | const SCEVConstant *PHIBasePtrIncSCEV = |
1249 | dyn_cast<SCEVConstant>(Val: PHIBasePtrSCEV->getStepRecurrence(SE&: *SE)); |
1250 | if (!PHIBasePtrIncSCEV) |
1251 | continue; |
1252 | |
1253 | if (CurrentPHINode->getNumIncomingValues() == 2) { |
1254 | if ((CurrentPHINode->getIncomingBlock(i: 0) == LatchBB && |
1255 | CurrentPHINode->getIncomingBlock(i: 1) == PredBB) || |
1256 | (CurrentPHINode->getIncomingBlock(i: 1) == LatchBB && |
1257 | CurrentPHINode->getIncomingBlock(i: 0) == PredBB)) { |
1258 | if (PHIBasePtrIncSCEV == BasePtrIncSCEV) { |
1259 | // The existing PHI (CurrentPHINode) has the same start and increment |
1260 | // as the PHI that we wanted to create. |
1261 | if ((Form == UpdateForm || Form == ChainCommoning ) && |
1262 | PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) { |
1263 | ++PHINodeAlreadyExistsUpdate; |
1264 | return true; |
1265 | } |
1266 | if (Form == DSForm || Form == DQForm) { |
1267 | const SCEVConstant *Diff = dyn_cast<SCEVConstant>( |
1268 | Val: SE->getMinusSCEV(LHS: PHIBasePtrSCEV->getStart(), RHS: BasePtrStartSCEV)); |
1269 | if (Diff && !Diff->getAPInt().urem(RHS: Form)) { |
1270 | if (Form == DSForm) |
1271 | ++PHINodeAlreadyExistsDS; |
1272 | else |
1273 | ++PHINodeAlreadyExistsDQ; |
1274 | return true; |
1275 | } |
1276 | } |
1277 | } |
1278 | } |
1279 | } |
1280 | } |
1281 | return false; |
1282 | } |
1283 | |
1284 | bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) { |
1285 | bool MadeChange = false; |
1286 | |
1287 | // Only prep. the inner-most loop |
1288 | if (!L->isInnermost()) |
1289 | return MadeChange; |
1290 | |
1291 | // Return if already done enough preparation. |
1292 | if (SuccPrepCount >= MaxVarsPrep) |
1293 | return MadeChange; |
1294 | |
1295 | LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n" ); |
1296 | |
1297 | BasicBlock *LoopPredecessor = L->getLoopPredecessor(); |
1298 | // If there is no loop predecessor, or the loop predecessor's terminator |
1299 | // returns a value (which might contribute to determining the loop's |
1300 | // iteration space), insert a new preheader for the loop. |
1301 | if (!LoopPredecessor || |
1302 | !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { |
1303 | LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, MSSAU: nullptr, PreserveLCSSA); |
1304 | if (LoopPredecessor) |
1305 | MadeChange = true; |
1306 | } |
1307 | if (!LoopPredecessor) { |
1308 | LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n" ); |
1309 | return MadeChange; |
1310 | } |
1311 | // Check if a load/store has update form. This lambda is used by function |
1312 | // collectCandidates which can collect candidates for types defined by lambda. |
1313 | auto isUpdateFormCandidate = [&](const Instruction *I, Value *PtrValue, |
1314 | const Type *PointerElementType) { |
1315 | assert((PtrValue && I) && "Invalid parameter!" ); |
1316 | // There are no update forms for Altivec vector load/stores. |
1317 | if (ST && ST->hasAltivec() && PointerElementType->isVectorTy()) |
1318 | return false; |
1319 | // There are no update forms for P10 lxvp/stxvp intrinsic. |
1320 | auto *II = dyn_cast<IntrinsicInst>(Val: I); |
1321 | if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) || |
1322 | II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp)) |
1323 | return false; |
1324 | // See getPreIndexedAddressParts, the displacement for LDU/STDU has to |
1325 | // be 4's multiple (DS-form). For i64 loads/stores when the displacement |
1326 | // fits in a 16-bit signed field but isn't a multiple of 4, it will be |
1327 | // useless and possible to break some original well-form addressing mode |
1328 | // to make this pre-inc prep for it. |
1329 | if (PointerElementType->isIntegerTy(Bitwidth: 64)) { |
1330 | const SCEV *LSCEV = SE->getSCEVAtScope(V: const_cast<Value *>(PtrValue), L); |
1331 | const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(Val: LSCEV); |
1332 | if (!LARSCEV || LARSCEV->getLoop() != L) |
1333 | return false; |
1334 | if (const SCEVConstant *StepConst = |
1335 | dyn_cast<SCEVConstant>(Val: LARSCEV->getStepRecurrence(SE&: *SE))) { |
1336 | const APInt &ConstInt = StepConst->getValue()->getValue(); |
1337 | if (ConstInt.isSignedIntN(N: 16) && ConstInt.srem(RHS: 4) != 0) |
1338 | return false; |
1339 | } |
1340 | } |
1341 | return true; |
1342 | }; |
1343 | |
1344 | // Check if a load/store has DS form. |
1345 | auto isDSFormCandidate = [](const Instruction *I, Value *PtrValue, |
1346 | const Type *PointerElementType) { |
1347 | assert((PtrValue && I) && "Invalid parameter!" ); |
1348 | if (isa<IntrinsicInst>(Val: I)) |
1349 | return false; |
1350 | return (PointerElementType->isIntegerTy(Bitwidth: 64)) || |
1351 | (PointerElementType->isFloatTy()) || |
1352 | (PointerElementType->isDoubleTy()) || |
1353 | (PointerElementType->isIntegerTy(Bitwidth: 32) && |
1354 | llvm::any_of(Range: I->users(), |
1355 | P: [](const User *U) { return isa<SExtInst>(Val: U); })); |
1356 | }; |
1357 | |
1358 | // Check if a load/store has DQ form. |
1359 | auto isDQFormCandidate = [&](const Instruction *I, Value *PtrValue, |
1360 | const Type *PointerElementType) { |
1361 | assert((PtrValue && I) && "Invalid parameter!" ); |
1362 | // Check if it is a P10 lxvp/stxvp intrinsic. |
1363 | auto *II = dyn_cast<IntrinsicInst>(Val: I); |
1364 | if (II) |
1365 | return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp || |
1366 | II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp; |
1367 | // Check if it is a P9 vector load/store. |
1368 | return ST && ST->hasP9Vector() && (PointerElementType->isVectorTy()); |
1369 | }; |
1370 | |
1371 | // Check if a load/store is candidate for chain commoning. |
1372 | // If the SCEV is only with one ptr operand in its start, we can use that |
1373 | // start as a chain separator. Mark this load/store as a candidate. |
1374 | auto isChainCommoningCandidate = [&](const Instruction *I, Value *PtrValue, |
1375 | const Type *PointerElementType) { |
1376 | const SCEVAddRecExpr *ARSCEV = |
1377 | cast<SCEVAddRecExpr>(Val: SE->getSCEVAtScope(V: PtrValue, L)); |
1378 | if (!ARSCEV) |
1379 | return false; |
1380 | |
1381 | if (!ARSCEV->isAffine()) |
1382 | return false; |
1383 | |
1384 | const SCEV *Start = ARSCEV->getStart(); |
1385 | |
1386 | // A single pointer. We can treat it as offset 0. |
1387 | if (isa<SCEVUnknown>(Val: Start) && Start->getType()->isPointerTy()) |
1388 | return true; |
1389 | |
1390 | const SCEVAddExpr *ASCEV = dyn_cast<SCEVAddExpr>(Val: Start); |
1391 | |
1392 | // We need a SCEVAddExpr to include both base and offset. |
1393 | if (!ASCEV) |
1394 | return false; |
1395 | |
1396 | // Make sure there is only one pointer operand(base) and all other operands |
1397 | // are integer type. |
1398 | bool SawPointer = false; |
1399 | for (const SCEV *Op : ASCEV->operands()) { |
1400 | if (Op->getType()->isPointerTy()) { |
1401 | if (SawPointer) |
1402 | return false; |
1403 | SawPointer = true; |
1404 | } else if (!Op->getType()->isIntegerTy()) |
1405 | return false; |
1406 | } |
1407 | |
1408 | return SawPointer; |
1409 | }; |
1410 | |
1411 | // Check if the diff is a constant type. This is used for update/DS/DQ form |
1412 | // preparation. |
1413 | auto isValidConstantDiff = [](const SCEV *Diff) { |
1414 | return dyn_cast<SCEVConstant>(Val: Diff) != nullptr; |
1415 | }; |
1416 | |
1417 | // Make sure the diff between the base and new candidate is required type. |
1418 | // This is used for chain commoning preparation. |
1419 | auto isValidChainCommoningDiff = [](const SCEV *Diff) { |
1420 | assert(Diff && "Invalid Diff!\n" ); |
1421 | |
1422 | // Don't mess up previous dform prepare. |
1423 | if (isa<SCEVConstant>(Val: Diff)) |
1424 | return false; |
1425 | |
1426 | // A single integer type offset. |
1427 | if (isa<SCEVUnknown>(Val: Diff) && Diff->getType()->isIntegerTy()) |
1428 | return true; |
1429 | |
1430 | const SCEVNAryExpr *ADiff = dyn_cast<SCEVNAryExpr>(Val: Diff); |
1431 | if (!ADiff) |
1432 | return false; |
1433 | |
1434 | for (const SCEV *Op : ADiff->operands()) |
1435 | if (!Op->getType()->isIntegerTy()) |
1436 | return false; |
1437 | |
1438 | return true; |
1439 | }; |
1440 | |
1441 | HasCandidateForPrepare = false; |
1442 | |
1443 | LLVM_DEBUG(dbgs() << "Start to prepare for update form.\n" ); |
1444 | // Collect buckets of comparable addresses used by loads and stores for update |
1445 | // form. |
1446 | SmallVector<Bucket, 16> UpdateFormBuckets = collectCandidates( |
1447 | L, isValidCandidate: isUpdateFormCandidate, isValidDiff: isValidConstantDiff, MaxCandidateNum: MaxVarsUpdateForm); |
1448 | |
1449 | // Prepare for update form. |
1450 | if (!UpdateFormBuckets.empty()) |
1451 | MadeChange |= updateFormPrep(L, Buckets&: UpdateFormBuckets); |
1452 | else if (!HasCandidateForPrepare) { |
1453 | LLVM_DEBUG( |
1454 | dbgs() |
1455 | << "No prepare candidates found, stop praparation for current loop!\n" ); |
1456 | // If no candidate for preparing, return early. |
1457 | return MadeChange; |
1458 | } |
1459 | |
1460 | LLVM_DEBUG(dbgs() << "Start to prepare for DS form.\n" ); |
1461 | // Collect buckets of comparable addresses used by loads and stores for DS |
1462 | // form. |
1463 | SmallVector<Bucket, 16> DSFormBuckets = collectCandidates( |
1464 | L, isValidCandidate: isDSFormCandidate, isValidDiff: isValidConstantDiff, MaxCandidateNum: MaxVarsDSForm); |
1465 | |
1466 | // Prepare for DS form. |
1467 | if (!DSFormBuckets.empty()) |
1468 | MadeChange |= dispFormPrep(L, Buckets&: DSFormBuckets, Form: DSForm); |
1469 | |
1470 | LLVM_DEBUG(dbgs() << "Start to prepare for DQ form.\n" ); |
1471 | // Collect buckets of comparable addresses used by loads and stores for DQ |
1472 | // form. |
1473 | SmallVector<Bucket, 16> DQFormBuckets = collectCandidates( |
1474 | L, isValidCandidate: isDQFormCandidate, isValidDiff: isValidConstantDiff, MaxCandidateNum: MaxVarsDQForm); |
1475 | |
1476 | // Prepare for DQ form. |
1477 | if (!DQFormBuckets.empty()) |
1478 | MadeChange |= dispFormPrep(L, Buckets&: DQFormBuckets, Form: DQForm); |
1479 | |
1480 | // Collect buckets of comparable addresses used by loads and stores for chain |
1481 | // commoning. With chain commoning, we reuse offsets between the chains, so |
1482 | // the register pressure will be reduced. |
1483 | if (!EnableChainCommoning) { |
1484 | LLVM_DEBUG(dbgs() << "Chain commoning is not enabled.\n" ); |
1485 | return MadeChange; |
1486 | } |
1487 | |
1488 | LLVM_DEBUG(dbgs() << "Start to prepare for chain commoning.\n" ); |
1489 | SmallVector<Bucket, 16> Buckets = |
1490 | collectCandidates(L, isValidCandidate: isChainCommoningCandidate, isValidDiff: isValidChainCommoningDiff, |
1491 | MaxCandidateNum: MaxVarsChainCommon); |
1492 | |
1493 | // Prepare for chain commoning. |
1494 | if (!Buckets.empty()) |
1495 | MadeChange |= chainCommoning(L, Buckets); |
1496 | |
1497 | return MadeChange; |
1498 | } |
1499 | |