1//===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass removes the computation of provably redundant expressions that have
10// been computed earlier in a previous iteration. It relies on the use of PHIs
11// to identify loop carried dependences. This is scalar replacement for vector
12// types.
13//
14//===----------------------------------------------------------------------===//
15
16#include "HexagonVectorLoopCarriedReuse.h"
17#include "Hexagon.h"
18#include "llvm/ADT/SetVector.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/Statistic.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/LoopPass.h"
23#include "llvm/Analysis/OptimizationRemarkEmitter.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/DerivedTypes.h"
26#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/Instruction.h"
28#include "llvm/IR/Instructions.h"
29#include "llvm/IR/IntrinsicInst.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/IntrinsicsHexagon.h"
32#include "llvm/IR/Use.h"
33#include "llvm/IR/Value.h"
34#include "llvm/InitializePasses.h"
35#include "llvm/Pass.h"
36#include "llvm/Support/Casting.h"
37#include "llvm/Support/CommandLine.h"
38#include "llvm/Support/Compiler.h"
39#include "llvm/Support/Debug.h"
40#include "llvm/Support/raw_ostream.h"
41#include "llvm/Transforms/Scalar.h"
42#include "llvm/Transforms/Utils.h"
43#include <cassert>
44#include <map>
45#include <memory>
46#include <set>
47
48using namespace llvm;
49
50#define DEBUG_TYPE "hexagon-vlcr"
51
52STATISTIC(HexagonNumVectorLoopCarriedReuse,
53 "Number of values that were reused from a previous iteration.");
54
55static cl::opt<int> HexagonVLCRIterationLim(
56 "hexagon-vlcr-iteration-lim", cl::Hidden,
57 cl::desc("Maximum distance of loop carried dependences that are handled"),
58 cl::init(Val: 2));
59
60namespace {
61
62 // See info about DepChain in the comments at the top of this file.
63 using ChainOfDependences = SmallVector<Instruction *, 4>;
64
65 class DepChain {
66 ChainOfDependences Chain;
67
68 public:
69 bool isIdentical(DepChain &Other) const {
70 if (Other.size() != size())
71 return false;
72 ChainOfDependences &OtherChain = Other.getChain();
73 for (int i = 0; i < size(); ++i) {
74 if (Chain[i] != OtherChain[i])
75 return false;
76 }
77 return true;
78 }
79
80 ChainOfDependences &getChain() {
81 return Chain;
82 }
83
84 int size() const {
85 return Chain.size();
86 }
87
88 void clear() {
89 Chain.clear();
90 }
91
92 void push_back(Instruction *I) {
93 Chain.push_back(Elt: I);
94 }
95
96 int iterations() const {
97 return size() - 1;
98 }
99
100 Instruction *front() const {
101 return Chain.front();
102 }
103
104 Instruction *back() const {
105 return Chain.back();
106 }
107
108 Instruction *&operator[](const int index) {
109 return Chain[index];
110 }
111
112 friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
113 };
114
115 [[maybe_unused]]
116 raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
117 const ChainOfDependences &CD = D.Chain;
118 int ChainSize = CD.size();
119 OS << "**DepChain Start::**\n";
120 for (int i = 0; i < ChainSize -1; ++i) {
121 OS << *(CD[i]) << " -->\n";
122 }
123 OS << *CD[ChainSize-1] << "\n";
124 return OS;
125 }
126
127 struct ReuseValue {
128 Instruction *Inst2Replace = nullptr;
129
130 // In the new PHI node that we'll construct this is the value that'll be
131 // used over the backedge. This is the value that gets reused from a
132 // previous iteration.
133 Instruction *BackedgeInst = nullptr;
134 std::map<Instruction *, DepChain *> DepChains;
135 int Iterations = -1;
136
137 ReuseValue() = default;
138
139 void reset() {
140 Inst2Replace = nullptr;
141 BackedgeInst = nullptr;
142 DepChains.clear();
143 Iterations = -1;
144 }
145 bool isDefined() { return Inst2Replace != nullptr; }
146 };
147
148 [[maybe_unused]]
149 raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
150 OS << "** ReuseValue ***\n";
151 OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
152 OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
153 return OS;
154 }
155
156 class HexagonVectorLoopCarriedReuseLegacyPass : public LoopPass {
157 public:
158 static char ID;
159
160 explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) {}
161
162 StringRef getPassName() const override {
163 return "Hexagon-specific loop carried reuse for HVX vectors";
164 }
165
166 void getAnalysisUsage(AnalysisUsage &AU) const override {
167 AU.addRequiredID(ID&: LoopSimplifyID);
168 AU.addRequiredID(ID&: LCSSAID);
169 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
170 AU.addPreservedID(ID&: LCSSAID);
171 AU.setPreservesCFG();
172 }
173
174 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
175 };
176
177 class HexagonVectorLoopCarriedReuse {
178 public:
179 HexagonVectorLoopCarriedReuse(Loop *L, OptimizationRemarkEmitter &ORE)
180 : CurLoop(L), ORE(ORE) {};
181
182 bool run();
183
184 private:
185 SetVector<DepChain *> Dependences;
186 std::set<Instruction *> ReplacedInsts;
187 Loop *CurLoop;
188 OptimizationRemarkEmitter &ORE;
189 ReuseValue ReuseCandidate;
190
191 bool doVLCR();
192 void findLoopCarriedDeps();
193 void findValueToReuse();
194 void findDepChainFromPHI(Instruction *I, DepChain &D);
195 void reuseValue();
196 Value *findValueInBlock(Value *Op, BasicBlock *BB);
197 DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
198 bool isEquivalentOperation(Instruction *I1, Instruction *I2);
199 bool canReplace(Instruction *I);
200 bool isCallInstCommutative(CallInst *C);
201 };
202
203} // end anonymous namespace
204
205char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;
206
207INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr",
208 "Hexagon-specific predictive commoning for HVX vectors",
209 false, false)
210INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
211INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
212INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
213INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr",
214 "Hexagon-specific predictive commoning for HVX vectors",
215 false, false)
216
217PreservedAnalyses
218HexagonVectorLoopCarriedReusePass::run(Loop &L, LoopAnalysisManager &LAM,
219 LoopStandardAnalysisResults &AR,
220 LPMUpdater &U) {
221 OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
222 HexagonVectorLoopCarriedReuse Vlcr(&L, ORE);
223 if (!Vlcr.run())
224 return PreservedAnalyses::all();
225 PreservedAnalyses PA;
226 PA.preserveSet<CFGAnalyses>();
227 return PA;
228}
229
230bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop *L,
231 LPPassManager &LPM) {
232 if (skipLoop(L))
233 return false;
234 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
235 HexagonVectorLoopCarriedReuse Vlcr(L, ORE);
236 return Vlcr.run();
237}
238
239bool HexagonVectorLoopCarriedReuse::run() {
240 if (!CurLoop->getLoopPreheader()) {
241 ORE.emit(RemarkBuilder: [&]() {
242 return OptimizationRemarkMissed(DEBUG_TYPE, "NoPreheader",
243 CurLoop->getStartLoc(),
244 CurLoop->getHeader())
245 << "loop has no preheader";
246 });
247 return false;
248 }
249
250 // Work only on innermost loops.
251 if (!CurLoop->getSubLoops().empty()) {
252 ORE.emit(RemarkBuilder: [&]() {
253 return OptimizationRemarkMissed(DEBUG_TYPE, "NotInnermostLoop",
254 CurLoop->getStartLoc(),
255 CurLoop->getHeader())
256 << "pass only works on innermost loops";
257 });
258 return false;
259 }
260
261 // Work only on single basic blocks loops.
262 if (CurLoop->getNumBlocks() != 1) {
263 ORE.emit(RemarkBuilder: [&]() {
264 return OptimizationRemarkMissed(DEBUG_TYPE, "MultipleBlocks",
265 CurLoop->getStartLoc(),
266 CurLoop->getHeader())
267 << "loop has multiple basic blocks";
268 });
269 return false;
270 }
271
272 return doVLCR();
273}
274
275bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
276 switch (C->getCalledFunction()->getIntrinsicID()) {
277 case Intrinsic::hexagon_V6_vaddb:
278 case Intrinsic::hexagon_V6_vaddb_128B:
279 case Intrinsic::hexagon_V6_vaddh:
280 case Intrinsic::hexagon_V6_vaddh_128B:
281 case Intrinsic::hexagon_V6_vaddw:
282 case Intrinsic::hexagon_V6_vaddw_128B:
283 case Intrinsic::hexagon_V6_vaddubh:
284 case Intrinsic::hexagon_V6_vaddubh_128B:
285 case Intrinsic::hexagon_V6_vadduhw:
286 case Intrinsic::hexagon_V6_vadduhw_128B:
287 case Intrinsic::hexagon_V6_vaddhw:
288 case Intrinsic::hexagon_V6_vaddhw_128B:
289 case Intrinsic::hexagon_V6_vmaxb:
290 case Intrinsic::hexagon_V6_vmaxb_128B:
291 case Intrinsic::hexagon_V6_vmaxh:
292 case Intrinsic::hexagon_V6_vmaxh_128B:
293 case Intrinsic::hexagon_V6_vmaxw:
294 case Intrinsic::hexagon_V6_vmaxw_128B:
295 case Intrinsic::hexagon_V6_vmaxub:
296 case Intrinsic::hexagon_V6_vmaxub_128B:
297 case Intrinsic::hexagon_V6_vmaxuh:
298 case Intrinsic::hexagon_V6_vmaxuh_128B:
299 case Intrinsic::hexagon_V6_vminub:
300 case Intrinsic::hexagon_V6_vminub_128B:
301 case Intrinsic::hexagon_V6_vminuh:
302 case Intrinsic::hexagon_V6_vminuh_128B:
303 case Intrinsic::hexagon_V6_vminb:
304 case Intrinsic::hexagon_V6_vminb_128B:
305 case Intrinsic::hexagon_V6_vminh:
306 case Intrinsic::hexagon_V6_vminh_128B:
307 case Intrinsic::hexagon_V6_vminw:
308 case Intrinsic::hexagon_V6_vminw_128B:
309 case Intrinsic::hexagon_V6_vmpyub:
310 case Intrinsic::hexagon_V6_vmpyub_128B:
311 case Intrinsic::hexagon_V6_vmpyuh:
312 case Intrinsic::hexagon_V6_vmpyuh_128B:
313 case Intrinsic::hexagon_V6_vavgub:
314 case Intrinsic::hexagon_V6_vavgub_128B:
315 case Intrinsic::hexagon_V6_vavgh:
316 case Intrinsic::hexagon_V6_vavgh_128B:
317 case Intrinsic::hexagon_V6_vavguh:
318 case Intrinsic::hexagon_V6_vavguh_128B:
319 case Intrinsic::hexagon_V6_vavgw:
320 case Intrinsic::hexagon_V6_vavgw_128B:
321 case Intrinsic::hexagon_V6_vavgb:
322 case Intrinsic::hexagon_V6_vavgb_128B:
323 case Intrinsic::hexagon_V6_vavguw:
324 case Intrinsic::hexagon_V6_vavguw_128B:
325 case Intrinsic::hexagon_V6_vabsdiffh:
326 case Intrinsic::hexagon_V6_vabsdiffh_128B:
327 case Intrinsic::hexagon_V6_vabsdiffub:
328 case Intrinsic::hexagon_V6_vabsdiffub_128B:
329 case Intrinsic::hexagon_V6_vabsdiffuh:
330 case Intrinsic::hexagon_V6_vabsdiffuh_128B:
331 case Intrinsic::hexagon_V6_vabsdiffw:
332 case Intrinsic::hexagon_V6_vabsdiffw_128B:
333 return true;
334 default:
335 return false;
336 }
337}
338
339bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
340 Instruction *I2) {
341 if (!I1->isSameOperationAs(I: I2))
342 return false;
343 // This check is in place specifically for intrinsics. isSameOperationAs will
344 // return two for any two hexagon intrinsics because they are essentially the
345 // same instruction (CallInst). We need to scratch the surface to see if they
346 // are calls to the same function.
347 if (CallInst *C1 = dyn_cast<CallInst>(Val: I1)) {
348 if (CallInst *C2 = dyn_cast<CallInst>(Val: I2)) {
349 if (C1->getCalledFunction() != C2->getCalledFunction())
350 return false;
351 }
352 }
353
354 // If both the Instructions are of Vector Type and any of the element
355 // is integer constant, check their values too for equivalence.
356 if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
357 unsigned NumOperands = I1->getNumOperands();
358 for (unsigned i = 0; i < NumOperands; ++i) {
359 ConstantInt *C1 = dyn_cast<ConstantInt>(Val: I1->getOperand(i));
360 ConstantInt *C2 = dyn_cast<ConstantInt>(Val: I2->getOperand(i));
361 if(!C1) continue;
362 assert(C2);
363 if (C1->getSExtValue() != C2->getSExtValue())
364 return false;
365 }
366 }
367
368 return true;
369}
370
371bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
372 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I);
373 if (!II)
374 return true;
375
376 switch (II->getIntrinsicID()) {
377 case Intrinsic::hexagon_V6_hi:
378 case Intrinsic::hexagon_V6_lo:
379 case Intrinsic::hexagon_V6_hi_128B:
380 case Intrinsic::hexagon_V6_lo_128B:
381 LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
382 return false;
383 default:
384 return true;
385 }
386}
387void HexagonVectorLoopCarriedReuse::findValueToReuse() {
388 for (auto *D : Dependences) {
389 LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
390 if (D->iterations() > HexagonVLCRIterationLim) {
391 LLVM_DEBUG(
392 dbgs()
393 << ".. Skipping because number of iterations > than the limit\n");
394 continue;
395 }
396
397 PHINode *PN = cast<PHINode>(Val: D->front());
398 Instruction *BEInst = D->back();
399 int Iters = D->iterations();
400 BasicBlock *BB = PN->getParent();
401 LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
402 << " can be reused\n");
403
404 SmallVector<Instruction *, 4> PNUsers;
405 for (Use &U : PN->uses()) {
406 Instruction *User = cast<Instruction>(Val: U.getUser());
407
408 if (User->getParent() != BB)
409 continue;
410 if (ReplacedInsts.count(x: User)) {
411 LLVM_DEBUG(dbgs() << *User
412 << " has already been replaced. Skipping...\n");
413 continue;
414 }
415 if (isa<PHINode>(Val: User))
416 continue;
417 if (User->mayHaveSideEffects())
418 continue;
419 if (!canReplace(I: User))
420 continue;
421
422 PNUsers.push_back(Elt: User);
423 }
424 LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
425
426 // For each interesting use I of PN, find an Instruction BEUser that
427 // performs the same operation as I on BEInst and whose other operands,
428 // if any, can also be rematerialized in OtherBB. We stop when we find the
429 // first such Instruction BEUser. This is because once BEUser is
430 // rematerialized in OtherBB, we may find more such "fixup" opportunities
431 // in this block. So, we'll start over again.
432 for (Instruction *I : PNUsers) {
433 for (Use &U : BEInst->uses()) {
434 Instruction *BEUser = cast<Instruction>(Val: U.getUser());
435
436 if (BEUser->getParent() != BB)
437 continue;
438 if (!isEquivalentOperation(I1: I, I2: BEUser))
439 continue;
440
441 int NumOperands = I->getNumOperands();
442
443 // Take operands of each PNUser one by one and try to find DepChain
444 // with every operand of the BEUser. If any of the operands of BEUser
445 // has DepChain with current operand of the PNUser, break the matcher
446 // loop. Keep doing this for Every PNUser operand. If PNUser operand
447 // does not have DepChain with any of the BEUser operand, break the
448 // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
449 // This ensures that DepChain exist for all the PNUser operand with
450 // BEUser operand. This also ensures that DepChains are independent of
451 // the positions in PNUser and BEUser.
452 std::map<Instruction *, DepChain *> DepChains;
453 CallInst *C1 = dyn_cast<CallInst>(Val: I);
454 if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C: C1))) {
455 bool Found = false;
456 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
457 Value *Op = I->getOperand(i: OpNo);
458 Instruction *OpInst = dyn_cast<Instruction>(Val: Op);
459 Found = false;
460 for (int T = 0; T < NumOperands; ++T) {
461 Value *BEOp = BEUser->getOperand(i: T);
462 Instruction *BEOpInst = dyn_cast<Instruction>(Val: BEOp);
463 if (!OpInst && !BEOpInst) {
464 if (Op == BEOp) {
465 Found = true;
466 break;
467 }
468 }
469
470 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
471 continue;
472
473 DepChain *D = getDepChainBtwn(I1: OpInst, I2: BEOpInst, Iters);
474
475 if (D) {
476 Found = true;
477 DepChains[OpInst] = D;
478 break;
479 }
480 }
481 if (!Found) {
482 BEUser = nullptr;
483 break;
484 }
485 }
486 } else {
487
488 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
489 Value *Op = I->getOperand(i: OpNo);
490 Value *BEOp = BEUser->getOperand(i: OpNo);
491
492 Instruction *OpInst = dyn_cast<Instruction>(Val: Op);
493 if (!OpInst) {
494 if (Op == BEOp)
495 continue;
496 // Do not allow reuse to occur when the operands may be different
497 // values.
498 BEUser = nullptr;
499 break;
500 }
501
502 Instruction *BEOpInst = dyn_cast<Instruction>(Val: BEOp);
503 DepChain *D = getDepChainBtwn(I1: OpInst, I2: BEOpInst, Iters);
504
505 if (D) {
506 DepChains[OpInst] = D;
507 } else {
508 BEUser = nullptr;
509 break;
510 }
511 }
512 }
513 if (BEUser) {
514 LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
515 ReuseCandidate.Inst2Replace = I;
516 ReuseCandidate.BackedgeInst = BEUser;
517 ReuseCandidate.DepChains = std::move(DepChains);
518 ReuseCandidate.Iterations = Iters;
519 return;
520 }
521 ReuseCandidate.reset();
522 }
523 }
524 }
525 ReuseCandidate.reset();
526}
527
528Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
529 BasicBlock *BB) {
530 PHINode *PN = dyn_cast<PHINode>(Val: Op);
531 assert(PN);
532 Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
533 return ValueInBlock;
534}
535
536void HexagonVectorLoopCarriedReuse::reuseValue() {
537 LLVM_DEBUG(dbgs() << ReuseCandidate);
538 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
539 Instruction *BEInst = ReuseCandidate.BackedgeInst;
540 int NumOperands = Inst2Replace->getNumOperands();
541 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
542 int Iterations = ReuseCandidate.Iterations;
543 BasicBlock *LoopPH = CurLoop->getLoopPreheader();
544 assert(!DepChains.empty() && "No DepChains");
545 LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
546
547 SmallVector<Instruction *, 4> InstsInPreheader;
548 for (int i = 0; i < Iterations; ++i) {
549 Instruction *InstInPreheader = Inst2Replace->clone();
550 for (int j = 0; j < NumOperands; ++j) {
551 Instruction *I = dyn_cast<Instruction>(Val: Inst2Replace->getOperand(i: j));
552 if (!I)
553 continue;
554 // Get the DepChain corresponding to this operand.
555 DepChain &D = *DepChains[I];
556 // Get the PHI for the iteration number and find
557 // the incoming value from the Loop Preheader for
558 // that PHI.
559 Value *ValInPreheader = findValueInBlock(Op: D[i], BB: LoopPH);
560 InstInPreheader->setOperand(i: j, Val: ValInPreheader);
561 }
562 InstsInPreheader.push_back(Elt: InstInPreheader);
563 InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
564 InstInPreheader->insertBefore(InsertPos: LoopPH->getTerminator()->getIterator());
565 LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
566 << LoopPH->getName() << "\n");
567 }
568 BasicBlock *BB = BEInst->getParent();
569 IRBuilder<> IRB(BB);
570 IRB.SetInsertPoint(TheBB: BB, IP: BB->getFirstNonPHIIt());
571 Value *BEVal = BEInst;
572 PHINode *NewPhi;
573 for (int i = Iterations-1; i >=0 ; --i) {
574 Instruction *InstInPreheader = InstsInPreheader[i];
575 NewPhi = IRB.CreatePHI(Ty: InstInPreheader->getType(), NumReservedValues: 2);
576 NewPhi->addIncoming(V: InstInPreheader, BB: LoopPH);
577 NewPhi->addIncoming(V: BEVal, BB);
578 LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
579 << "\n");
580 BEVal = NewPhi;
581 }
582 // We are in LCSSA form. So, a value defined inside the Loop is used only
583 // inside the loop. So, the following is safe.
584 Inst2Replace->replaceAllUsesWith(V: NewPhi);
585 ReplacedInsts.insert(x: Inst2Replace);
586 ++HexagonNumVectorLoopCarriedReuse;
587 ORE.emit(RemarkBuilder: [&]() {
588 return OptimizationRemark(DEBUG_TYPE, "VectorReuse",
589 Inst2Replace->getDebugLoc(),
590 Inst2Replace->getParent())
591 << "reused loop-carried vector value";
592 });
593}
594
595bool HexagonVectorLoopCarriedReuse::doVLCR() {
596 assert(CurLoop->getSubLoops().empty() &&
597 "Can do VLCR on the innermost loop only");
598 assert((CurLoop->getNumBlocks() == 1) &&
599 "Can do VLCR only on single block loops");
600
601 bool Changed = false;
602 bool Continue;
603
604 LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
605 do {
606 // Reset datastructures.
607 Dependences.clear();
608 Continue = false;
609
610 findLoopCarriedDeps();
611 findValueToReuse();
612 if (ReuseCandidate.isDefined()) {
613 reuseValue();
614 Changed = true;
615 Continue = true;
616 } else if (!Changed) {
617 ORE.emit(RemarkBuilder: [&]() {
618 return OptimizationRemarkMissed(DEBUG_TYPE, "NoCandidateFound",
619 CurLoop->getStartLoc(),
620 CurLoop->getHeader())
621 << "no loop-carried reuse candidate found";
622 });
623 }
624 llvm::for_each(Range&: Dependences, F: std::default_delete<DepChain>());
625 } while (Continue);
626 return Changed;
627}
628
629void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
630 DepChain &D) {
631 PHINode *PN = dyn_cast<PHINode>(Val: I);
632 if (!PN) {
633 D.push_back(I);
634 return;
635 } else {
636 auto NumIncomingValues = PN->getNumIncomingValues();
637 if (NumIncomingValues != 2) {
638 D.clear();
639 return;
640 }
641
642 BasicBlock *BB = PN->getParent();
643 if (BB != CurLoop->getHeader()) {
644 D.clear();
645 return;
646 }
647
648 Value *BEVal = PN->getIncomingValueForBlock(BB);
649 Instruction *BEInst = dyn_cast<Instruction>(Val: BEVal);
650 // This is a single block loop with a preheader, so at least
651 // one value should come over the backedge.
652 assert(BEInst && "There should be a value over the backedge");
653
654 Value *PreHdrVal =
655 PN->getIncomingValueForBlock(BB: CurLoop->getLoopPreheader());
656 if(!PreHdrVal || !isa<Instruction>(Val: PreHdrVal)) {
657 D.clear();
658 return;
659 }
660 D.push_back(I: PN);
661 findDepChainFromPHI(I: BEInst, D);
662 }
663}
664
665DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
666 Instruction *I2,
667 int Iters) {
668 for (auto *D : Dependences) {
669 if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
670 return D;
671 }
672 return nullptr;
673}
674
675void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
676 BasicBlock *BB = CurLoop->getHeader();
677 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(Val: I); ++I) {
678 auto *PN = cast<PHINode>(Val&: I);
679 if (!isa<VectorType>(Val: PN->getType()))
680 continue;
681
682 DepChain *D = new DepChain();
683 findDepChainFromPHI(I: PN, D&: *D);
684 if (D->size() != 0)
685 Dependences.insert(X: D);
686 else
687 delete D;
688 }
689 LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
690 LLVM_DEBUG(for (const DepChain *D : Dependences) dbgs() << *D << "\n";);
691}
692
693Pass *llvm::createHexagonVectorLoopCarriedReuseLegacyPass() {
694 return new HexagonVectorLoopCarriedReuseLegacyPass();
695}
696