1//===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass identifies loops where we can generate the Hexagon hardware
10// loop instruction. The hardware loop can perform loop branches with a
11// zero-cycle overhead.
12//
13// The pattern that defines the induction variable can changed depending on
14// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
15// normalizes induction variables, and the Loop Strength Reduction pass
16// run by 'llc' may also make changes to the induction variable.
17// The pattern detected by this phase is due to running Strength Reduction.
18//
19// Criteria for hardware loops:
20// - Countable loops (w/ ind. var for a trip count)
21// - Assumes loops are normalized by IndVarSimplify
22// - Try inner-most loops first
23// - No function calls in loops.
24//
25//===----------------------------------------------------------------------===//
26
27#include "Hexagon.h"
28#include "HexagonInstrInfo.h"
29#include "HexagonSubtarget.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
36#include "llvm/CodeGen/MachineBasicBlock.h"
37#include "llvm/CodeGen/MachineDominators.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineFunctionPass.h"
40#include "llvm/CodeGen/MachineInstr.h"
41#include "llvm/CodeGen/MachineInstrBuilder.h"
42#include "llvm/CodeGen/MachineLoopInfo.h"
43#include "llvm/CodeGen/MachineOperand.h"
44#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
45#include "llvm/CodeGen/MachineRegisterInfo.h"
46#include "llvm/CodeGen/TargetRegisterInfo.h"
47#include "llvm/IR/DebugLoc.h"
48#include "llvm/InitializePasses.h"
49#include "llvm/Pass.h"
50#include "llvm/Support/CommandLine.h"
51#include "llvm/Support/Debug.h"
52#include "llvm/Support/ErrorHandling.h"
53#include "llvm/Support/MathExtras.h"
54#include "llvm/Support/raw_ostream.h"
55#include <cassert>
56#include <cstdint>
57#include <cstdlib>
58#include <iterator>
59#include <map>
60#include <set>
61#include <string>
62#include <utility>
63#include <vector>
64
65using namespace llvm;
66
67#define DEBUG_TYPE "hwloops"
68
69#ifndef NDEBUG
70static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
71
72// Option to create preheader only for a specific function.
73static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
74 cl::init(""));
75#endif
76
77// Option to create a preheader if one doesn't exist.
78static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
79 cl::Hidden, cl::init(Val: true),
80 cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
81
82// Turn it off by default. If a preheader block is not created here, the
83// software pipeliner may be unable to find a block suitable to serve as
84// a preheader. In that case SWP will not run.
85static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::Hidden,
86 cl::desc("Allow speculation of preheader "
87 "instructions"));
88
89STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
90
91namespace {
92
93 class CountValue;
94
95 struct HexagonHardwareLoops : public MachineFunctionPass {
96 MachineLoopInfo *MLI;
97 MachineRegisterInfo *MRI;
98 MachineDominatorTree *MDT;
99 const HexagonInstrInfo *TII;
100 const HexagonRegisterInfo *TRI;
101 MachineOptimizationRemarkEmitter *MORE;
102#ifndef NDEBUG
103 static int Counter;
104#endif
105
106 public:
107 static char ID;
108
109 HexagonHardwareLoops() : MachineFunctionPass(ID) {}
110
111 bool runOnMachineFunction(MachineFunction &MF) override;
112
113 StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
114
115 void getAnalysisUsage(AnalysisUsage &AU) const override {
116 AU.addRequired<MachineDominatorTreeWrapperPass>();
117 AU.addRequired<MachineLoopInfoWrapperPass>();
118 AU.addRequired<MachineOptimizationRemarkEmitterPass>();
119 MachineFunctionPass::getAnalysisUsage(AU);
120 }
121
122 private:
123 using LoopFeederMap = std::map<Register, MachineInstr *>;
124
125 /// Kinds of comparisons in the compare instructions.
126 struct Comparison {
127 enum Kind {
128 EQ = 0x01,
129 NE = 0x02,
130 L = 0x04,
131 G = 0x08,
132 U = 0x40,
133 LTs = L,
134 LEs = L | EQ,
135 GTs = G,
136 GEs = G | EQ,
137 LTu = L | U,
138 LEu = L | EQ | U,
139 GTu = G | U,
140 GEu = G | EQ | U
141 };
142
143 static Kind getSwappedComparison(Kind Cmp) {
144 assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
145 if ((Cmp & L) || (Cmp & G))
146 return (Kind)(Cmp ^ (L|G));
147 return Cmp;
148 }
149
150 static Kind getNegatedComparison(Kind Cmp) {
151 if ((Cmp & L) || (Cmp & G))
152 return (Kind)((Cmp ^ (L | G)) ^ EQ);
153 if ((Cmp & NE) || (Cmp & EQ))
154 return (Kind)(Cmp ^ (EQ | NE));
155 return (Kind)0;
156 }
157
158 static bool isSigned(Kind Cmp) {
159 return (Cmp & (L | G) && !(Cmp & U));
160 }
161
162 static bool isUnsigned(Kind Cmp) {
163 return (Cmp & U);
164 }
165 };
166
167 /// Find the register that contains the loop controlling
168 /// induction variable.
169 /// If successful, it will return true and set the \p Reg, \p IVBump
170 /// and \p IVOp arguments. Otherwise it will return false.
171 /// The returned induction register is the register R that follows the
172 /// following induction pattern:
173 /// loop:
174 /// R = phi ..., [ R.next, LatchBlock ]
175 /// R.next = R + #bump
176 /// if (R.next < #N) goto loop
177 /// IVBump is the immediate value added to R, and IVOp is the instruction
178 /// "R.next = R + #bump".
179 bool findInductionRegister(MachineLoop *L, Register &Reg,
180 int64_t &IVBump, MachineInstr *&IVOp) const;
181
182 /// Return the comparison kind for the specified opcode.
183 Comparison::Kind getComparisonKind(unsigned CondOpc,
184 MachineOperand *InitialValue,
185 const MachineOperand *Endvalue,
186 int64_t IVBump) const;
187
188 /// Analyze the statements in a loop to determine if the loop
189 /// has a computable trip count and, if so, return a value that represents
190 /// the trip count expression.
191 CountValue *getLoopTripCount(MachineLoop *L,
192 SmallVectorImpl<MachineInstr *> &OldInsts);
193
194 /// Return the expression that represents the number of times
195 /// a loop iterates. The function takes the operands that represent the
196 /// loop start value, loop end value, and induction value. Based upon
197 /// these operands, the function attempts to compute the trip count.
198 /// If the trip count is not directly available (as an immediate value,
199 /// or a register), the function will attempt to insert computation of it
200 /// to the loop's preheader.
201 CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
202 const MachineOperand *End, Register IVReg,
203 int64_t IVBump, Comparison::Kind Cmp) const;
204
205 /// Return true if the instruction is not valid within a hardware
206 /// loop.
207 bool isInvalidLoopOperation(const MachineInstr *MI,
208 bool IsInnerHWLoop) const;
209
210 /// Return true if the loop contains an instruction that inhibits
211 /// using the hardware loop.
212 bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
213
214 /// Given a loop, check if we can convert it to a hardware loop.
215 /// If so, then perform the conversion and return true.
216 bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
217
218 /// Return true if the instruction is now dead.
219 bool isDead(const MachineInstr *MI,
220 SmallVectorImpl<MachineInstr *> &DeadPhis) const;
221
222 /// Remove the instruction if it is now dead.
223 void removeIfDead(MachineInstr *MI);
224
225 /// Make sure that the "bump" instruction executes before the
226 /// compare. We need that for the IV fixup, so that the compare
227 /// instruction would not use a bumped value that has not yet been
228 /// defined. If the instructions are out of order, try to reorder them.
229 bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
230
231 /// Return true if MO and MI pair is visited only once. If visited
232 /// more than once, this indicates there is recursion. In such a case,
233 /// return false.
234 bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
235 const MachineOperand *MO,
236 LoopFeederMap &LoopFeederPhi) const;
237
238 /// Return true if the Phi may generate a value that may underflow,
239 /// or may wrap.
240 bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
241 MachineBasicBlock *MBB, MachineLoop *L,
242 LoopFeederMap &LoopFeederPhi) const;
243
244 /// Return true if the induction variable may underflow an unsigned
245 /// value in the first iteration.
246 bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
247 const MachineOperand *EndVal,
248 MachineBasicBlock *MBB, MachineLoop *L,
249 LoopFeederMap &LoopFeederPhi) const;
250
251 /// Check if the given operand has a compile-time known constant
252 /// value. Return true if yes, and false otherwise. When returning true, set
253 /// Val to the corresponding constant value.
254 bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
255
256 /// Check if the operand has a compile-time known constant value.
257 bool isImmediate(const MachineOperand &MO) const {
258 int64_t V;
259 return checkForImmediate(MO, Val&: V);
260 }
261
262 /// Return the immediate for the specified operand.
263 int64_t getImmediate(const MachineOperand &MO) const {
264 int64_t V;
265 if (!checkForImmediate(MO, Val&: V))
266 llvm_unreachable("Invalid operand");
267 return V;
268 }
269
270 /// Reset the given machine operand to now refer to a new immediate
271 /// value. Assumes that the operand was already referencing an immediate
272 /// value, either directly, or via a register.
273 void setImmediate(MachineOperand &MO, int64_t Val);
274
275 /// Fix the data flow of the induction variable.
276 /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
277 /// |
278 /// +-> back to phi
279 /// where "bump" is the increment of the induction variable:
280 /// iv = iv + #const.
281 /// Due to some prior code transformations, the actual flow may look
282 /// like this:
283 /// phi -+-> bump ---> back to phi
284 /// |
285 /// +-> comparison-in-latch (against upper_bound-bump),
286 /// i.e. the comparison that controls the loop execution may be using
287 /// the value of the induction variable from before the increment.
288 ///
289 /// Return true if the loop's flow is the desired one (i.e. it's
290 /// either been fixed, or no fixing was necessary).
291 /// Otherwise, return false. This can happen if the induction variable
292 /// couldn't be identified, or if the value in the latch's comparison
293 /// cannot be adjusted to reflect the post-bump value.
294 bool fixupInductionVariable(MachineLoop *L);
295
296 /// Given a loop, if it does not have a preheader, create one.
297 /// Return the block that is the preheader.
298 MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
299 };
300
301 char HexagonHardwareLoops::ID = 0;
302#ifndef NDEBUG
303 int HexagonHardwareLoops::Counter = 0;
304#endif
305
306 /// Abstraction for a trip count of a loop. A smaller version
307 /// of the MachineOperand class without the concerns of changing the
308 /// operand representation.
309 class CountValue {
310 public:
311 enum CountValueType {
312 CV_Register,
313 CV_Immediate
314 };
315
316 private:
317 CountValueType Kind;
318 union Values {
319 Values() : R{.Reg: Register(), .Sub: 0} {}
320 Values(const Values&) = default;
321 struct {
322 Register Reg;
323 unsigned Sub;
324 } R;
325 unsigned ImmVal;
326 } Contents;
327
328 public:
329 explicit CountValue(CountValueType t, Register v, unsigned u = 0) {
330 Kind = t;
331 if (Kind == CV_Register) {
332 Contents.R.Reg = v;
333 Contents.R.Sub = u;
334 } else {
335 Contents.ImmVal = v;
336 }
337 }
338
339 bool isReg() const { return Kind == CV_Register; }
340 bool isImm() const { return Kind == CV_Immediate; }
341
342 Register getReg() const {
343 assert(isReg() && "Wrong CountValue accessor");
344 return Contents.R.Reg;
345 }
346
347 unsigned getSubReg() const {
348 assert(isReg() && "Wrong CountValue accessor");
349 return Contents.R.Sub;
350 }
351
352 unsigned getImm() const {
353 assert(isImm() && "Wrong CountValue accessor");
354 return Contents.ImmVal;
355 }
356
357 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
358 if (isReg()) { OS << printReg(Reg: Contents.R.Reg, TRI, SubIdx: Contents.R.Sub); }
359 if (isImm()) { OS << Contents.ImmVal; }
360 }
361 };
362
363} // end anonymous namespace
364
365INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
366 "Hexagon Hardware Loops", false, false)
367INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
368INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
369INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
370 "Hexagon Hardware Loops", false, false)
371
372FunctionPass *llvm::createHexagonHardwareLoops() {
373 return new HexagonHardwareLoops();
374}
375
376bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
377 LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
378 if (skipFunction(F: MF.getFunction()))
379 return false;
380
381 bool Changed = false;
382
383 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
384 MRI = &MF.getRegInfo();
385 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
386 const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
387 TII = HST.getInstrInfo();
388 TRI = HST.getRegisterInfo();
389
390 MORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
391
392 for (auto &L : *MLI)
393 if (L->isOutermost()) {
394 bool L0Used = false;
395 bool L1Used = false;
396 Changed |= convertToHardwareLoop(L, L0used&: L0Used, L1used&: L1Used);
397 }
398
399 return Changed;
400}
401
402bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
403 Register &Reg,
404 int64_t &IVBump,
405 MachineInstr *&IVOp
406 ) const {
407 MachineBasicBlock *Header = L->getHeader();
408 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpeculativePreheader: SpecPreheader);
409 MachineBasicBlock *Latch = L->getLoopLatch();
410 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
411 if (!Header || !Preheader || !Latch || !ExitingBlock)
412 return false;
413
414 // This pair represents an induction register together with an immediate
415 // value that will be added to it in each loop iteration.
416 using RegisterBump = std::pair<Register, int64_t>;
417
418 // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
419 // from an induction operation
420 // R.next = R + bump
421 // where bump is an immediate value.
422 using InductionMap = std::map<Register, RegisterBump>;
423
424 InductionMap IndMap;
425
426 using instr_iterator = MachineBasicBlock::instr_iterator;
427
428 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
429 I != E && I->isPHI(); ++I) {
430 MachineInstr *Phi = &*I;
431
432 // Have a PHI instruction. Get the operand that corresponds to the
433 // latch block, and see if is a result of an addition of form "reg+imm",
434 // where the "reg" is defined by the PHI node we are looking at.
435 for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
436 if (Phi->getOperand(i: i+1).getMBB() != Latch)
437 continue;
438
439 Register PhiOpReg = Phi->getOperand(i).getReg();
440 MachineInstr *DI = MRI->getVRegDef(Reg: PhiOpReg);
441
442 if (DI->getDesc().isAdd()) {
443 // If the register operand to the add is the PHI we're looking at, this
444 // meets the induction pattern.
445 Register IndReg = DI->getOperand(i: 1).getReg();
446 MachineOperand &Opnd2 = DI->getOperand(i: 2);
447 int64_t V;
448 if (MRI->getVRegDef(Reg: IndReg) == Phi && checkForImmediate(MO: Opnd2, Val&: V)) {
449 Register UpdReg = DI->getOperand(i: 0).getReg();
450 IndMap.insert(x: std::make_pair(x&: UpdReg, y: std::make_pair(x&: IndReg, y&: V)));
451 }
452 }
453 } // for (i)
454 } // for (instr)
455
456 SmallVector<MachineOperand,2> Cond;
457 MachineBasicBlock *TB = nullptr, *FB = nullptr;
458 bool NotAnalyzed = TII->analyzeBranch(MBB&: *ExitingBlock, TBB&: TB, FBB&: FB, Cond, AllowModify: false);
459 if (NotAnalyzed)
460 return false;
461
462 Register PredR;
463 unsigned PredPos;
464 RegState PredRegFlags;
465 if (!TII->getPredReg(Cond, PredReg&: PredR, PredRegPos&: PredPos, PredRegFlags))
466 return false;
467
468 MachineInstr *PredI = MRI->getVRegDef(Reg: PredR);
469 if (!PredI->isCompare())
470 return false;
471
472 Register CmpReg1, CmpReg2;
473 int64_t CmpImm = 0, CmpMask = 0;
474 bool CmpAnalyzed =
475 TII->analyzeCompare(MI: *PredI, SrcReg&: CmpReg1, SrcReg2&: CmpReg2, Mask&: CmpMask, Value&: CmpImm);
476 // Fail if the compare was not analyzed, or it's not comparing a register
477 // with an immediate value. Not checking the mask here, since we handle
478 // the individual compare opcodes (including A4_cmpb*) later on.
479 if (!CmpAnalyzed)
480 return false;
481
482 // Exactly one of the input registers to the comparison should be among
483 // the induction registers.
484 InductionMap::iterator IndMapEnd = IndMap.end();
485 InductionMap::iterator F = IndMapEnd;
486 if (CmpReg1 != 0) {
487 InductionMap::iterator F1 = IndMap.find(x: CmpReg1);
488 if (F1 != IndMapEnd)
489 F = F1;
490 }
491 if (CmpReg2 != 0) {
492 InductionMap::iterator F2 = IndMap.find(x: CmpReg2);
493 if (F2 != IndMapEnd) {
494 if (F != IndMapEnd)
495 return false;
496 F = F2;
497 }
498 }
499 if (F == IndMapEnd)
500 return false;
501
502 Reg = F->second.first;
503 IVBump = F->second.second;
504 IVOp = MRI->getVRegDef(Reg: F->first);
505 return true;
506}
507
508// Return the comparison kind for the specified opcode.
509HexagonHardwareLoops::Comparison::Kind
510HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
511 MachineOperand *InitialValue,
512 const MachineOperand *EndValue,
513 int64_t IVBump) const {
514 Comparison::Kind Cmp = (Comparison::Kind)0;
515 switch (CondOpc) {
516 case Hexagon::C2_cmpeq:
517 case Hexagon::C2_cmpeqi:
518 case Hexagon::C2_cmpeqp:
519 Cmp = Comparison::EQ;
520 break;
521 case Hexagon::C4_cmpneq:
522 case Hexagon::C4_cmpneqi:
523 Cmp = Comparison::NE;
524 break;
525 case Hexagon::C2_cmplt:
526 Cmp = Comparison::LTs;
527 break;
528 case Hexagon::C2_cmpltu:
529 Cmp = Comparison::LTu;
530 break;
531 case Hexagon::C4_cmplte:
532 case Hexagon::C4_cmpltei:
533 Cmp = Comparison::LEs;
534 break;
535 case Hexagon::C4_cmplteu:
536 case Hexagon::C4_cmplteui:
537 Cmp = Comparison::LEu;
538 break;
539 case Hexagon::C2_cmpgt:
540 case Hexagon::C2_cmpgti:
541 case Hexagon::C2_cmpgtp:
542 Cmp = Comparison::GTs;
543 break;
544 case Hexagon::C2_cmpgtu:
545 case Hexagon::C2_cmpgtui:
546 case Hexagon::C2_cmpgtup:
547 Cmp = Comparison::GTu;
548 break;
549 case Hexagon::C2_cmpgei:
550 Cmp = Comparison::GEs;
551 break;
552 case Hexagon::C2_cmpgeui:
553 Cmp = Comparison::GEs;
554 break;
555 default:
556 return (Comparison::Kind)0;
557 }
558 return Cmp;
559}
560
561/// Analyze the statements in a loop to determine if the loop has
562/// a computable trip count and, if so, return a value that represents
563/// the trip count expression.
564///
565/// This function iterates over the phi nodes in the loop to check for
566/// induction variable patterns that are used in the calculation for
567/// the number of time the loop is executed.
568CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
569 SmallVectorImpl<MachineInstr *> &OldInsts) {
570 MachineBasicBlock *TopMBB = L->getTopBlock();
571 MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
572 assert(PI != TopMBB->pred_end() &&
573 "Loop must have more than one incoming edge!");
574 MachineBasicBlock *Backedge = *PI++;
575 if (PI == TopMBB->pred_end()) // dead loop?
576 return nullptr;
577 MachineBasicBlock *Incoming = *PI++;
578 if (PI != TopMBB->pred_end()) // multiple backedges?
579 return nullptr;
580
581 // Make sure there is one incoming and one backedge and determine which
582 // is which.
583 if (L->contains(BB: Incoming)) {
584 if (L->contains(BB: Backedge))
585 return nullptr;
586 std::swap(a&: Incoming, b&: Backedge);
587 } else if (!L->contains(BB: Backedge))
588 return nullptr;
589
590 // Look for the cmp instruction to determine if we can get a useful trip
591 // count. The trip count can be either a register or an immediate. The
592 // location of the value depends upon the type (reg or imm).
593 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
594 if (!ExitingBlock)
595 return nullptr;
596
597 Register IVReg = 0;
598 int64_t IVBump = 0;
599 MachineInstr *IVOp;
600 bool FoundIV = findInductionRegister(L, Reg&: IVReg, IVBump, IVOp);
601 if (!FoundIV)
602 return nullptr;
603
604 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpeculativePreheader: SpecPreheader);
605
606 MachineOperand *InitialValue = nullptr;
607 MachineInstr *IV_Phi = MRI->getVRegDef(Reg: IVReg);
608 MachineBasicBlock *Latch = L->getLoopLatch();
609 for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
610 MachineBasicBlock *MBB = IV_Phi->getOperand(i: i+1).getMBB();
611 if (MBB == Preheader)
612 InitialValue = &IV_Phi->getOperand(i);
613 else if (MBB == Latch)
614 IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
615 }
616 if (!InitialValue)
617 return nullptr;
618
619 SmallVector<MachineOperand,2> Cond;
620 MachineBasicBlock *TB = nullptr, *FB = nullptr;
621 bool NotAnalyzed = TII->analyzeBranch(MBB&: *ExitingBlock, TBB&: TB, FBB&: FB, Cond, AllowModify: false);
622 if (NotAnalyzed)
623 return nullptr;
624
625 MachineBasicBlock *Header = L->getHeader();
626 // TB must be non-null. If FB is also non-null, one of them must be
627 // the header. Otherwise, branch to TB could be exiting the loop, and
628 // the fall through can go to the header.
629 assert (TB && "Exit block without a branch?");
630 if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
631 MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
632 SmallVector<MachineOperand,2> LCond;
633 bool NotAnalyzed = TII->analyzeBranch(MBB&: *Latch, TBB&: LTB, FBB&: LFB, Cond&: LCond, AllowModify: false);
634 if (NotAnalyzed)
635 return nullptr;
636 if (TB == Latch)
637 TB = (LTB == Header) ? LTB : LFB;
638 else
639 FB = (LTB == Header) ? LTB: LFB;
640 }
641 assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
642 if (!TB || (FB && TB != Header && FB != Header))
643 return nullptr;
644
645 // Branches of form "if (!P) ..." cause HexagonInstrInfo::analyzeBranch
646 // to put imm(0), followed by P in the vector Cond.
647 // If TB is not the header, it means that the "not-taken" path must lead
648 // to the header.
649 bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
650 Register PredReg;
651 unsigned PredPos;
652 RegState PredRegFlags;
653 if (!TII->getPredReg(Cond, PredReg, PredRegPos&: PredPos, PredRegFlags))
654 return nullptr;
655 MachineInstr *CondI = MRI->getVRegDef(Reg: PredReg);
656 unsigned CondOpc = CondI->getOpcode();
657
658 Register CmpReg1, CmpReg2;
659 int64_t Mask = 0, ImmValue = 0;
660 bool AnalyzedCmp =
661 TII->analyzeCompare(MI: *CondI, SrcReg&: CmpReg1, SrcReg2&: CmpReg2, Mask, Value&: ImmValue);
662 if (!AnalyzedCmp)
663 return nullptr;
664
665 // The comparison operator type determines how we compute the loop
666 // trip count.
667 OldInsts.push_back(Elt: CondI);
668 OldInsts.push_back(Elt: IVOp);
669
670 // Sadly, the following code gets information based on the position
671 // of the operands in the compare instruction. This has to be done
672 // this way, because the comparisons check for a specific relationship
673 // between the operands (e.g. is-less-than), rather than to find out
674 // what relationship the operands are in (as on PPC).
675 Comparison::Kind Cmp;
676 bool isSwapped = false;
677 const MachineOperand &Op1 = CondI->getOperand(i: 1);
678 const MachineOperand &Op2 = CondI->getOperand(i: 2);
679 const MachineOperand *EndValue = nullptr;
680
681 if (Op1.isReg()) {
682 if (Op2.isImm() || Op1.getReg() == IVReg)
683 EndValue = &Op2;
684 else {
685 EndValue = &Op1;
686 isSwapped = true;
687 }
688 }
689
690 if (!EndValue)
691 return nullptr;
692
693 Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
694 if (!Cmp)
695 return nullptr;
696 if (Negated)
697 Cmp = Comparison::getNegatedComparison(Cmp);
698 if (isSwapped)
699 Cmp = Comparison::getSwappedComparison(Cmp);
700
701 if (InitialValue->isReg()) {
702 Register R = InitialValue->getReg();
703 MachineBasicBlock *DefBB = MRI->getVRegDef(Reg: R)->getParent();
704 if (!MDT->properlyDominates(A: DefBB, B: Header)) {
705 int64_t V;
706 if (!checkForImmediate(MO: *InitialValue, Val&: V))
707 return nullptr;
708 }
709 OldInsts.push_back(Elt: MRI->getVRegDef(Reg: R));
710 }
711 if (EndValue->isReg()) {
712 Register R = EndValue->getReg();
713 MachineBasicBlock *DefBB = MRI->getVRegDef(Reg: R)->getParent();
714 if (!MDT->properlyDominates(A: DefBB, B: Header)) {
715 int64_t V;
716 if (!checkForImmediate(MO: *EndValue, Val&: V))
717 return nullptr;
718 }
719 OldInsts.push_back(Elt: MRI->getVRegDef(Reg: R));
720 }
721
722 return computeCount(Loop: L, Start: InitialValue, End: EndValue, IVReg, IVBump, Cmp);
723}
724
725/// Helper function that returns the expression that represents the
726/// number of times a loop iterates. The function takes the operands that
727/// represent the loop start value, loop end value, and induction value.
728/// Based upon these operands, the function attempts to compute the trip count.
729CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
730 const MachineOperand *Start,
731 const MachineOperand *End,
732 Register IVReg,
733 int64_t IVBump,
734 Comparison::Kind Cmp) const {
735 LLVM_DEBUG(llvm::dbgs() << "Loop: " << *Loop << "\n");
736 LLVM_DEBUG(llvm::dbgs() << "Initial Value: " << *Start << "\n");
737 LLVM_DEBUG(llvm::dbgs() << "End Value: " << *End << "\n");
738 LLVM_DEBUG(llvm::dbgs() << "Inc/Dec Value: " << IVBump << "\n");
739 LLVM_DEBUG(llvm::dbgs() << "Comparison: " << Cmp << "\n");
740 // Cannot handle comparison EQ, i.e. while (A == B).
741 if (Cmp == Comparison::EQ)
742 return nullptr;
743
744 // Check if either the start or end values are an assignment of an immediate.
745 // If so, use the immediate value rather than the register.
746 if (Start->isReg()) {
747 const MachineInstr *StartValInstr = MRI->getVRegDef(Reg: Start->getReg());
748 if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
749 StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
750 Start = &StartValInstr->getOperand(i: 1);
751 }
752 if (End->isReg()) {
753 const MachineInstr *EndValInstr = MRI->getVRegDef(Reg: End->getReg());
754 if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
755 EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
756 End = &EndValInstr->getOperand(i: 1);
757 }
758
759 if (!Start->isReg() && !Start->isImm())
760 return nullptr;
761 if (!End->isReg() && !End->isImm())
762 return nullptr;
763
764 bool CmpLess = Cmp & Comparison::L;
765 bool CmpGreater = Cmp & Comparison::G;
766 bool CmpHasEqual = Cmp & Comparison::EQ;
767
768 // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
769 if (CmpLess && IVBump < 0)
770 // Loop going while iv is "less" with the iv value going down. Must wrap.
771 return nullptr;
772
773 if (CmpGreater && IVBump > 0)
774 // Loop going while iv is "greater" with the iv value going up. Must wrap.
775 return nullptr;
776
777 // Phis that may feed into the loop.
778 LoopFeederMap LoopFeederPhi;
779
780 // Check if the initial value may be zero and can be decremented in the first
781 // iteration. If the value is zero, the endloop instruction will not decrement
782 // the loop counter, so we shouldn't generate a hardware loop in this case.
783 if (loopCountMayWrapOrUnderFlow(InitVal: Start, EndVal: End, MBB: Loop->getLoopPreheader(), L: Loop,
784 LoopFeederPhi))
785 return nullptr;
786
787 if (Start->isImm() && End->isImm()) {
788 // Both, start and end are immediates.
789 int64_t StartV = Start->getImm();
790 int64_t EndV = End->getImm();
791 int64_t Dist = EndV - StartV;
792 if (Dist == 0)
793 return nullptr;
794
795 bool Exact = (Dist % IVBump) == 0;
796
797 if (Cmp == Comparison::NE) {
798 if (!Exact)
799 return nullptr;
800 if ((Dist < 0) ^ (IVBump < 0))
801 return nullptr;
802 }
803
804 // For comparisons that include the final value (i.e. include equality
805 // with the final value), we need to increase the distance by 1.
806 if (CmpHasEqual)
807 Dist = Dist > 0 ? Dist+1 : Dist-1;
808
809 // For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
810 // CmpGreater should imply Dist < 0. These conditions could actually
811 // fail, for example, in unreachable code (which may still appear to be
812 // reachable in the CFG).
813 if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
814 return nullptr;
815
816 // "Normalized" distance, i.e. with the bump set to +-1.
817 int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
818 : (-Dist + (-IVBump - 1)) / (-IVBump);
819 assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
820
821 uint64_t Count = Dist1;
822
823 if (Count > 0xFFFFFFFFULL)
824 return nullptr;
825
826 return new CountValue(CountValue::CV_Immediate, Count);
827 }
828
829 // A general case: Start and End are some values, but the actual
830 // iteration count may not be available. If it is not, insert
831 // a computation of it into the preheader.
832
833 // If the induction variable bump is not a power of 2, quit.
834 // Otherwise we'd need a general integer division.
835 if (!isPowerOf2_64(Value: std::abs(i: IVBump)))
836 return nullptr;
837
838 MachineBasicBlock *PH = MLI->findLoopPreheader(L: Loop, SpeculativePreheader: SpecPreheader);
839 assert (PH && "Should have a preheader by now");
840 MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
841 DebugLoc DL;
842 if (InsertPos != PH->end())
843 DL = InsertPos->getDebugLoc();
844
845 // If Start is an immediate and End is a register, the trip count
846 // will be "reg - imm". Hexagon's "subtract immediate" instruction
847 // is actually "reg + -imm".
848
849 // If the loop IV is going downwards, i.e. if the bump is negative,
850 // then the iteration count (computed as End-Start) will need to be
851 // negated. To avoid the negation, just swap Start and End.
852 if (IVBump < 0) {
853 std::swap(a&: Start, b&: End);
854 IVBump = -IVBump;
855 std::swap(a&: CmpLess, b&: CmpGreater);
856 }
857 // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
858 // Signedness, and "including equality" are preserved.
859
860 bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
861 bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
862
863 int64_t StartV = 0, EndV = 0;
864 if (Start->isImm())
865 StartV = Start->getImm();
866 if (End->isImm())
867 EndV = End->getImm();
868
869 int64_t AdjV = 0;
870 // To compute the iteration count, we would need this computation:
871 // Count = (End - Start + (IVBump-1)) / IVBump
872 // or, when CmpHasEqual:
873 // Count = (End - Start + (IVBump-1)+1) / IVBump
874 // The "IVBump-1" part is the adjustment (AdjV). We can avoid
875 // generating an instruction specifically to add it if we can adjust
876 // the immediate values for Start or End.
877
878 if (CmpHasEqual) {
879 // Need to add 1 to the total iteration count.
880 if (Start->isImm())
881 StartV--;
882 else if (End->isImm())
883 EndV++;
884 else
885 AdjV += 1;
886 }
887
888 if (Cmp != Comparison::NE) {
889 if (Start->isImm())
890 StartV -= (IVBump-1);
891 else if (End->isImm())
892 EndV += (IVBump-1);
893 else
894 AdjV += (IVBump-1);
895 }
896
897 Register R = 0;
898 unsigned SR = 0;
899 if (Start->isReg()) {
900 R = Start->getReg();
901 SR = Start->getSubReg();
902 } else {
903 R = End->getReg();
904 SR = End->getSubReg();
905 }
906 const TargetRegisterClass *RC = MRI->getRegClass(Reg: R);
907 // Hardware loops cannot handle 64-bit registers. If it's a double
908 // register, it has to have a subregister.
909 if (!SR && RC == &Hexagon::DoubleRegsRegClass)
910 return nullptr;
911 const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
912
913 // Compute DistR (register with the distance between Start and End).
914 Register DistR;
915 unsigned DistSR;
916
917 // Avoid special case, where the start value is an imm(0).
918 if (Start->isImm() && StartV == 0) {
919 DistR = End->getReg();
920 DistSR = End->getSubReg();
921 } else {
922 const MCInstrDesc &SubD = RegToReg ? TII->get(Opcode: Hexagon::A2_sub) :
923 (RegToImm ? TII->get(Opcode: Hexagon::A2_subri) :
924 TII->get(Opcode: Hexagon::A2_addi));
925 if (RegToReg || RegToImm) {
926 Register SubR = MRI->createVirtualRegister(RegClass: IntRC);
927 MachineInstrBuilder SubIB =
928 BuildMI(BB&: *PH, I: InsertPos, MIMD: DL, MCID: SubD, DestReg: SubR);
929
930 if (RegToReg)
931 SubIB.addReg(RegNo: End->getReg(), Flags: {}, SubReg: End->getSubReg())
932 .addReg(RegNo: Start->getReg(), Flags: {}, SubReg: Start->getSubReg());
933 else
934 SubIB.addImm(Val: EndV).addReg(RegNo: Start->getReg(), Flags: {}, SubReg: Start->getSubReg());
935 DistR = SubR;
936 } else {
937 // If the loop has been unrolled, we should use the original loop count
938 // instead of recalculating the value. This will avoid additional
939 // 'Add' instruction.
940 const MachineInstr *EndValInstr = MRI->getVRegDef(Reg: End->getReg());
941 if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
942 EndValInstr->getOperand(i: 1).getSubReg() == 0 &&
943 EndValInstr->getOperand(i: 2).getImm() == StartV) {
944 DistR = EndValInstr->getOperand(i: 1).getReg();
945 } else {
946 Register SubR = MRI->createVirtualRegister(RegClass: IntRC);
947 MachineInstrBuilder SubIB =
948 BuildMI(BB&: *PH, I: InsertPos, MIMD: DL, MCID: SubD, DestReg: SubR);
949 SubIB.addReg(RegNo: End->getReg(), Flags: {}, SubReg: End->getSubReg()).addImm(Val: -StartV);
950 DistR = SubR;
951 }
952 }
953 DistSR = 0;
954 }
955
956 // From DistR, compute AdjR (register with the adjusted distance).
957 Register AdjR;
958 unsigned AdjSR;
959
960 if (AdjV == 0) {
961 AdjR = DistR;
962 AdjSR = DistSR;
963 } else {
964 // Generate CountR = ADD DistR, AdjVal
965 Register AddR = MRI->createVirtualRegister(RegClass: IntRC);
966 MCInstrDesc const &AddD = TII->get(Opcode: Hexagon::A2_addi);
967 BuildMI(BB&: *PH, I: InsertPos, MIMD: DL, MCID: AddD, DestReg: AddR)
968 .addReg(RegNo: DistR, Flags: {}, SubReg: DistSR)
969 .addImm(Val: AdjV);
970
971 AdjR = AddR;
972 AdjSR = 0;
973 }
974
975 // From AdjR, compute CountR (register with the final count).
976 Register CountR;
977 unsigned CountSR;
978
979 if (IVBump == 1) {
980 CountR = AdjR;
981 CountSR = AdjSR;
982 } else {
983 // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
984 unsigned Shift = Log2_32(Value: IVBump);
985
986 // Generate NormR = LSR DistR, Shift.
987 Register LsrR = MRI->createVirtualRegister(RegClass: IntRC);
988 const MCInstrDesc &LsrD = TII->get(Opcode: Hexagon::S2_lsr_i_r);
989 BuildMI(BB&: *PH, I: InsertPos, MIMD: DL, MCID: LsrD, DestReg: LsrR)
990 .addReg(RegNo: AdjR, Flags: {}, SubReg: AdjSR)
991 .addImm(Val: Shift);
992
993 CountR = LsrR;
994 CountSR = 0;
995 }
996
997 const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
998 Register MuxR = CountR;
999 unsigned MuxSR = CountSR;
1000 // For the loop count to be valid unsigned number, CmpLess should imply
1001 // Dist >= 0. Similarly, CmpGreater should imply Dist < 0. We can skip the
1002 // check if the initial distance is zero and the comparison is LTu || LTEu.
1003 if (!(Start->isImm() && StartV == 0 && Comparison::isUnsigned(Cmp) &&
1004 CmpLess) &&
1005 (CmpLess || CmpGreater)) {
1006 // Generate:
1007 // DistCheck = CMP_GT DistR, 0 --> CmpLess
1008 // DistCheck = CMP_GT DistR, -1 --> CmpGreater
1009 Register DistCheckR = MRI->createVirtualRegister(RegClass: PredRC);
1010 const MCInstrDesc &DistCheckD = TII->get(Opcode: Hexagon::C2_cmpgti);
1011 BuildMI(BB&: *PH, I: InsertPos, MIMD: DL, MCID: DistCheckD, DestReg: DistCheckR)
1012 .addReg(RegNo: DistR, Flags: {}, SubReg: DistSR)
1013 .addImm(Val: (CmpLess) ? 0 : -1);
1014
1015 // Generate:
1016 // MUXR = MUX DistCheck, CountR, 1 --> CmpLess
1017 // MUXR = MUX DistCheck, 1, CountR --> CmpGreater
1018 MuxR = MRI->createVirtualRegister(RegClass: IntRC);
1019 if (CmpLess) {
1020 const MCInstrDesc &MuxD = TII->get(Opcode: Hexagon::C2_muxir);
1021 BuildMI(BB&: *PH, I: InsertPos, MIMD: DL, MCID: MuxD, DestReg: MuxR)
1022 .addReg(RegNo: DistCheckR)
1023 .addReg(RegNo: CountR, Flags: {}, SubReg: CountSR)
1024 .addImm(Val: 1);
1025 } else {
1026 const MCInstrDesc &MuxD = TII->get(Opcode: Hexagon::C2_muxri);
1027 BuildMI(BB&: *PH, I: InsertPos, MIMD: DL, MCID: MuxD, DestReg: MuxR)
1028 .addReg(RegNo: DistCheckR)
1029 .addImm(Val: 1)
1030 .addReg(RegNo: CountR, Flags: {}, SubReg: CountSR);
1031 }
1032 MuxSR = 0;
1033 }
1034
1035 return new CountValue(CountValue::CV_Register, MuxR, MuxSR);
1036}
1037
1038/// Return true if the operation is invalid within hardware loop.
1039bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
1040 bool IsInnerHWLoop) const {
1041 // Call is not allowed because the callee may use a hardware loop except for
1042 // the case when the call never returns.
1043 if (MI->getDesc().isCall())
1044 return !TII->doesNotReturn(CallMI: *MI);
1045
1046 // Check if the instruction defines a hardware loop register.
1047 using namespace Hexagon;
1048
1049 static const Register Regs01[] = { LC0, SA0, LC1, SA1 };
1050 static const Register Regs1[] = { LC1, SA1 };
1051 auto CheckRegs = IsInnerHWLoop ? ArrayRef(Regs01) : ArrayRef(Regs1);
1052 for (Register R : CheckRegs)
1053 if (MI->modifiesRegister(Reg: R, TRI))
1054 return true;
1055
1056 return false;
1057}
1058
1059/// Return true if the loop contains an instruction that inhibits
1060/// the use of the hardware loop instruction.
1061bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
1062 bool IsInnerHWLoop) const {
1063 LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1064 << printMBBReference(**L->block_begin()));
1065 for (MachineBasicBlock *MBB : L->getBlocks()) {
1066 for (const MachineInstr &MI : *MBB) {
1067 if (isInvalidLoopOperation(MI: &MI, IsInnerHWLoop)) {
1068 LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";
1069 MI.dump(););
1070 return true;
1071 }
1072 }
1073 }
1074 return false;
1075}
1076
1077/// Returns true if the instruction is dead. This was essentially
1078/// copied from DeadMachineInstructionElim::isDead, but with special cases
1079/// for inline asm, physical registers and instructions with side effects
1080/// removed.
1081bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1082 SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1083 // Examine each operand.
1084 for (const MachineOperand &MO : MI->operands()) {
1085 if (!MO.isReg() || !MO.isDef())
1086 continue;
1087
1088 Register Reg = MO.getReg();
1089 if (MRI->use_nodbg_empty(RegNo: Reg))
1090 continue;
1091
1092 using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;
1093
1094 // This instruction has users, but if the only user is the phi node for the
1095 // parent block, and the only use of that phi node is this instruction, then
1096 // this instruction is dead: both it (and the phi node) can be removed.
1097 use_nodbg_iterator I = MRI->use_nodbg_begin(RegNo: Reg);
1098 use_nodbg_iterator End = MRI->use_nodbg_end();
1099 if (std::next(x: I) != End || !I->getParent()->isPHI())
1100 return false;
1101
1102 MachineInstr *OnePhi = I->getParent();
1103 for (const MachineOperand &OPO : OnePhi->operands()) {
1104 if (!OPO.isReg() || !OPO.isDef())
1105 continue;
1106
1107 Register OPReg = OPO.getReg();
1108 use_nodbg_iterator nextJ;
1109 for (use_nodbg_iterator J = MRI->use_nodbg_begin(RegNo: OPReg);
1110 J != End; J = nextJ) {
1111 nextJ = std::next(x: J);
1112 MachineOperand &Use = *J;
1113 MachineInstr *UseMI = Use.getParent();
1114
1115 // If the phi node has a user that is not MI, bail.
1116 if (MI != UseMI)
1117 return false;
1118 }
1119 }
1120 DeadPhis.push_back(Elt: OnePhi);
1121 }
1122
1123 // If there are no defs with uses, the instruction is dead.
1124 return true;
1125}
1126
1127void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1128 // This procedure was essentially copied from DeadMachineInstructionElim.
1129
1130 SmallVector<MachineInstr*, 1> DeadPhis;
1131 if (isDead(MI, DeadPhis)) {
1132 LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI);
1133
1134 // It is possible that some DBG_VALUE instructions refer to this
1135 // instruction. Examine each def operand for such references;
1136 // if found, mark the DBG_VALUE as undef (but don't delete it).
1137 for (const MachineOperand &MO : MI->operands()) {
1138 if (!MO.isReg() || !MO.isDef())
1139 continue;
1140 Register Reg = MO.getReg();
1141 // We use make_early_inc_range here because setReg below invalidates the
1142 // iterator.
1143 for (MachineOperand &MO :
1144 llvm::make_early_inc_range(Range: MRI->use_operands(Reg))) {
1145 MachineInstr *UseMI = MO.getParent();
1146 if (UseMI == MI)
1147 continue;
1148 if (MO.isDebug())
1149 MO.setReg(0U);
1150 }
1151 }
1152
1153 MI->eraseFromParent();
1154 for (unsigned i = 0; i < DeadPhis.size(); ++i)
1155 DeadPhis[i]->eraseFromParent();
1156 }
1157}
1158
1159/// Check if the loop is a candidate for converting to a hardware
1160/// loop. If so, then perform the transformation.
1161///
1162/// This function works on innermost loops first. A loop can be converted
1163/// if it is a counting loop; either a register value or an immediate.
1164///
1165/// The code makes several assumptions about the representation of the loop
1166/// in llvm.
1167bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1168 bool &RecL0used,
1169 bool &RecL1used) {
1170 // This is just to confirm basic correctness.
1171 assert(L->getHeader() && "Loop without a header?");
1172
1173 bool Changed = false;
1174 bool L0Used = false;
1175 bool L1Used = false;
1176
1177 // Process nested loops first.
1178 for (MachineLoop *I : *L) {
1179 Changed |= convertToHardwareLoop(L: I, RecL0used, RecL1used);
1180 L0Used |= RecL0used;
1181 L1Used |= RecL1used;
1182 }
1183
1184 // If a nested loop has been converted, then we can't convert this loop.
1185 if (Changed && L0Used && L1Used)
1186 return Changed;
1187
1188 unsigned LOOP_i;
1189 unsigned LOOP_r;
1190 unsigned ENDLOOP;
1191
1192 // Flag used to track loopN instruction:
1193 // 1 - Hardware loop is being generated for the inner most loop.
1194 // 0 - Hardware loop is being generated for the outer loop.
1195 unsigned IsInnerHWLoop = 1;
1196
1197 if (L0Used) {
1198 LOOP_i = Hexagon::J2_loop1i;
1199 LOOP_r = Hexagon::J2_loop1r;
1200 ENDLOOP = Hexagon::ENDLOOP1;
1201 IsInnerHWLoop = 0;
1202 } else {
1203 LOOP_i = Hexagon::J2_loop0i;
1204 LOOP_r = Hexagon::J2_loop0r;
1205 ENDLOOP = Hexagon::ENDLOOP0;
1206 }
1207
1208#ifndef NDEBUG
1209 // Stop trying after reaching the limit (if any).
1210 int Limit = HWLoopLimit;
1211 if (Limit >= 0) {
1212 if (Counter >= HWLoopLimit)
1213 return false;
1214 Counter++;
1215 }
1216#endif
1217
1218 // Does the loop contain any invalid instructions?
1219 if (containsInvalidInstruction(L, IsInnerHWLoop)) {
1220 MORE->emit(RemarkBuilder: [&]() {
1221 return MachineOptimizationRemarkMissed(DEBUG_TYPE, "InvalidInstruction",
1222 L->getStartLoc(), L->getHeader())
1223 << "loop contains an instruction that prevents hardware loop "
1224 "generation (e.g. a call or hardware loop register definition)";
1225 });
1226 return false;
1227 }
1228
1229 MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1230 // Don't generate hw loop if the loop has more than one exit.
1231 if (!LastMBB) {
1232 MORE->emit(RemarkBuilder: [&]() {
1233 return MachineOptimizationRemarkMissed(DEBUG_TYPE, "MultipleExits",
1234 L->getStartLoc(), L->getHeader())
1235 << "loop has multiple exits and cannot be converted to a "
1236 "hardware loop";
1237 });
1238 return false;
1239 }
1240
1241 MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
1242 if (LastI == LastMBB->end())
1243 return false;
1244
1245 // Is the induction variable bump feeding the latch condition?
1246 if (!fixupInductionVariable(L)) {
1247 MORE->emit(RemarkBuilder: [&]() {
1248 return MachineOptimizationRemarkMissed(DEBUG_TYPE, "InductionVariable",
1249 L->getStartLoc(), L->getHeader())
1250 << "could not identify or fix up the induction variable";
1251 });
1252 return false;
1253 }
1254
1255 // Ensure the loop has a preheader: the loop instruction will be
1256 // placed there.
1257 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpeculativePreheader: SpecPreheader);
1258 if (!Preheader) {
1259 Preheader = createPreheaderForLoop(L);
1260 if (!Preheader)
1261 return false;
1262 }
1263
1264 MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1265
1266 SmallVector<MachineInstr*, 2> OldInsts;
1267 // Are we able to determine the trip count for the loop?
1268 CountValue *TripCount = getLoopTripCount(L, OldInsts);
1269 if (!TripCount) {
1270 MORE->emit(RemarkBuilder: [&]() {
1271 return MachineOptimizationRemarkMissed(DEBUG_TYPE, "TripCount",
1272 L->getStartLoc(), L->getHeader())
1273 << "trip count of the loop could not be computed";
1274 });
1275 return false;
1276 }
1277
1278 // Is the trip count available in the preheader?
1279 if (TripCount->isReg()) {
1280 // There will be a use of the register inserted into the preheader,
1281 // so make sure that the register is actually defined at that point.
1282 MachineInstr *TCDef = MRI->getVRegDef(Reg: TripCount->getReg());
1283 MachineBasicBlock *BBDef = TCDef->getParent();
1284 if (!MDT->dominates(A: BBDef, B: Preheader)) {
1285 MORE->emit(RemarkBuilder: [&]() {
1286 return MachineOptimizationRemarkMissed(DEBUG_TYPE,
1287 "TripCountNotDominating",
1288 L->getStartLoc(), L->getHeader())
1289 << "trip count register is not available in the loop preheader";
1290 });
1291 return false;
1292 }
1293 }
1294
1295 // Determine the loop start.
1296 MachineBasicBlock *TopBlock = L->getTopBlock();
1297 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1298 MachineBasicBlock *LoopStart = nullptr;
1299 if (ExitingBlock != L->getLoopLatch()) {
1300 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1301 SmallVector<MachineOperand, 2> Cond;
1302
1303 if (TII->analyzeBranch(MBB&: *ExitingBlock, TBB&: TB, FBB&: FB, Cond, AllowModify: false))
1304 return false;
1305
1306 if (L->contains(BB: TB))
1307 LoopStart = TB;
1308 else if (L->contains(BB: FB))
1309 LoopStart = FB;
1310 else
1311 return false;
1312 }
1313 else
1314 LoopStart = TopBlock;
1315
1316 // Convert the loop to a hardware loop.
1317 LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1318 DebugLoc DL;
1319 if (InsertPos != Preheader->end())
1320 DL = InsertPos->getDebugLoc();
1321
1322 if (TripCount->isReg()) {
1323 // Create a copy of the loop count register.
1324 Register CountReg = MRI->createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
1325 BuildMI(BB&: *Preheader, I: InsertPos, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: CountReg)
1326 .addReg(RegNo: TripCount->getReg(), Flags: {}, SubReg: TripCount->getSubReg());
1327 // Add the Loop instruction to the beginning of the loop.
1328 BuildMI(BB&: *Preheader, I: InsertPos, MIMD: DL, MCID: TII->get(Opcode: LOOP_r)).addMBB(MBB: LoopStart)
1329 .addReg(RegNo: CountReg);
1330 } else {
1331 assert(TripCount->isImm() && "Expecting immediate value for trip count");
1332 // Add the Loop immediate instruction to the beginning of the loop,
1333 // if the immediate fits in the instructions. Otherwise, we need to
1334 // create a new virtual register.
1335 int64_t CountImm = TripCount->getImm();
1336 if (!TII->isValidOffset(Opcode: LOOP_i, Offset: CountImm, TRI)) {
1337 Register CountReg = MRI->createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
1338 BuildMI(BB&: *Preheader, I: InsertPos, MIMD: DL, MCID: TII->get(Opcode: Hexagon::A2_tfrsi), DestReg: CountReg)
1339 .addImm(Val: CountImm);
1340 BuildMI(BB&: *Preheader, I: InsertPos, MIMD: DL, MCID: TII->get(Opcode: LOOP_r))
1341 .addMBB(MBB: LoopStart).addReg(RegNo: CountReg);
1342 } else
1343 BuildMI(BB&: *Preheader, I: InsertPos, MIMD: DL, MCID: TII->get(Opcode: LOOP_i))
1344 .addMBB(MBB: LoopStart).addImm(Val: CountImm);
1345 }
1346
1347 // Make sure the loop start always has a reference in the CFG.
1348 LoopStart->setMachineBlockAddressTaken();
1349
1350 // Replace the loop branch with an endloop instruction.
1351 DebugLoc LastIDL = LastI->getDebugLoc();
1352 BuildMI(BB&: *LastMBB, I: LastI, MIMD: LastIDL, MCID: TII->get(Opcode: ENDLOOP)).addMBB(MBB: LoopStart);
1353
1354 // The loop ends with either:
1355 // - a conditional branch followed by an unconditional branch, or
1356 // - a conditional branch to the loop start.
1357 if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1358 LastI->getOpcode() == Hexagon::J2_jumpf) {
1359 // Delete one and change/add an uncond. branch to out of the loop.
1360 MachineBasicBlock *BranchTarget = LastI->getOperand(i: 1).getMBB();
1361 LastI = LastMBB->erase(I: LastI);
1362 if (!L->contains(BB: BranchTarget)) {
1363 if (LastI != LastMBB->end())
1364 LastI = LastMBB->erase(I: LastI);
1365 SmallVector<MachineOperand, 0> Cond;
1366 TII->insertBranch(MBB&: *LastMBB, TBB: BranchTarget, FBB: nullptr, Cond, DL: LastIDL);
1367 }
1368 } else {
1369 // Conditional branch to loop start; just delete it.
1370 LastMBB->erase(I: LastI);
1371 }
1372 delete TripCount;
1373
1374 // The induction operation and the comparison may now be
1375 // unneeded. If these are unneeded, then remove them.
1376 for (unsigned i = 0; i < OldInsts.size(); ++i)
1377 removeIfDead(MI: OldInsts[i]);
1378
1379 ++NumHWLoops;
1380
1381 MORE->emit(RemarkBuilder: [&]() {
1382 return MachineOptimizationRemark(DEBUG_TYPE, "HardwareLoop",
1383 L->getStartLoc(), L->getHeader())
1384 << "converted loop to hardware loop";
1385 });
1386
1387 // Set RecL1used and RecL0used only after hardware loop has been
1388 // successfully generated. Doing it earlier can cause wrong loop instruction
1389 // to be used.
1390 if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1391 RecL1used = true;
1392 else
1393 RecL0used = true;
1394
1395 return true;
1396}
1397
1398bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1399 MachineInstr *CmpI) {
1400 assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1401
1402 MachineBasicBlock *BB = BumpI->getParent();
1403 if (CmpI->getParent() != BB)
1404 return false;
1405
1406 using instr_iterator = MachineBasicBlock::instr_iterator;
1407
1408 // Check if things are in order to begin with.
1409 for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1410 if (&*I == CmpI)
1411 return true;
1412
1413 // Out of order.
1414 Register PredR = CmpI->getOperand(i: 0).getReg();
1415 bool FoundBump = false;
1416 instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(x: CmpIt);
1417 for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1418 MachineInstr *In = &*I;
1419 for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1420 MachineOperand &MO = In->getOperand(i);
1421 if (MO.isReg() && MO.isUse()) {
1422 if (MO.getReg() == PredR) // Found an intervening use of PredR.
1423 return false;
1424 }
1425 }
1426
1427 if (In == BumpI) {
1428 BB->splice(Where: ++BumpI->getIterator(), Other: BB, From: CmpI->getIterator());
1429 FoundBump = true;
1430 break;
1431 }
1432 }
1433 assert (FoundBump && "Cannot determine instruction order");
1434 return FoundBump;
1435}
1436
1437/// This function is required to break recursion. Visiting phis in a loop may
1438/// result in recursion during compilation. We break the recursion by making
1439/// sure that we visit a MachineOperand and its definition in a
1440/// MachineInstruction only once. If we attempt to visit more than once, then
1441/// there is recursion, and will return false.
1442bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1443 MachineInstr *MI,
1444 const MachineOperand *MO,
1445 LoopFeederMap &LoopFeederPhi) const {
1446 if (LoopFeederPhi.find(x: MO->getReg()) == LoopFeederPhi.end()) {
1447 LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1448 << printMBBReference(**L->block_begin()));
1449 // Ignore all BBs that form Loop.
1450 if (llvm::is_contained(Range: L->getBlocks(), Element: A))
1451 return false;
1452 MachineInstr *Def = MRI->getVRegDef(Reg: MO->getReg());
1453 LoopFeederPhi.insert(x: std::make_pair(x: MO->getReg(), y&: Def));
1454 return true;
1455 } else
1456 // Already visited node.
1457 return false;
1458}
1459
1460/// Return true if a Phi may generate a value that can underflow.
1461/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1462bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1463 MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1464 MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1465 assert(Phi->isPHI() && "Expecting a Phi.");
1466 // Walk through each Phi, and its used operands. Make sure that
1467 // if there is recursion in Phi, we won't generate hardware loops.
1468 for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1469 if (isLoopFeeder(L, A: MBB, MI: Phi, MO: &(Phi->getOperand(i)), LoopFeederPhi))
1470 if (loopCountMayWrapOrUnderFlow(InitVal: &(Phi->getOperand(i)), EndVal,
1471 MBB: Phi->getParent(), L, LoopFeederPhi))
1472 return true;
1473 return false;
1474}
1475
1476/// Return true if the induction variable can underflow in the first iteration.
1477/// An example, is an initial unsigned value that is 0 and is decrement in the
1478/// first itertion of a do-while loop. In this case, we cannot generate a
1479/// hardware loop because the endloop instruction does not decrement the loop
1480/// counter if it is <= 1. We only need to perform this analysis if the
1481/// initial value is a register.
1482///
1483/// This function assumes the initial value may underflow unless proven
1484/// otherwise. If the type is signed, then we don't care because signed
1485/// underflow is undefined. We attempt to prove the initial value is not
1486/// zero by performing a crude analysis of the loop counter. This function
1487/// checks if the initial value is used in any comparison prior to the loop
1488/// and, if so, assumes the comparison is a range check. This is inexact,
1489/// but will catch the simple cases.
1490bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1491 const MachineOperand *InitVal, const MachineOperand *EndVal,
1492 MachineBasicBlock *MBB, MachineLoop *L,
1493 LoopFeederMap &LoopFeederPhi) const {
1494 // Only check register values since they are unknown.
1495 if (!InitVal->isReg())
1496 return false;
1497
1498 if (!EndVal->isImm())
1499 return false;
1500
1501 // A register value that is assigned an immediate is a known value, and it
1502 // won't underflow in the first iteration.
1503 int64_t Imm;
1504 if (checkForImmediate(MO: *InitVal, Val&: Imm))
1505 return (EndVal->getImm() == Imm);
1506
1507 Register Reg = InitVal->getReg();
1508
1509 // We don't know the value of a physical register.
1510 if (!Reg.isVirtual())
1511 return true;
1512
1513 MachineInstr *Def = MRI->getVRegDef(Reg);
1514 if (!Def)
1515 return true;
1516
1517 // If the initial value is a Phi or copy and the operands may not underflow,
1518 // then the definition cannot be underflow either.
1519 if (Def->isPHI() && !phiMayWrapOrUnderflow(Phi: Def, EndVal, MBB: Def->getParent(),
1520 L, LoopFeederPhi))
1521 return false;
1522 if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(InitVal: &(Def->getOperand(i: 1)),
1523 EndVal, MBB: Def->getParent(),
1524 L, LoopFeederPhi))
1525 return false;
1526
1527 // Iterate over the uses of the initial value. If the initial value is used
1528 // in a compare, then we assume this is a range check that ensures the loop
1529 // doesn't underflow. This is not an exact test and should be improved.
1530 for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(RegNo: Reg),
1531 E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1532 MachineInstr *MI = &*I;
1533 Register CmpReg1, CmpReg2;
1534 int64_t CmpMask = 0, CmpValue = 0;
1535
1536 if (!TII->analyzeCompare(MI: *MI, SrcReg&: CmpReg1, SrcReg2&: CmpReg2, Mask&: CmpMask, Value&: CmpValue))
1537 continue;
1538
1539 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1540 SmallVector<MachineOperand, 2> Cond;
1541 if (TII->analyzeBranch(MBB&: *MI->getParent(), TBB, FBB, Cond, AllowModify: false))
1542 continue;
1543
1544 Comparison::Kind Cmp =
1545 getComparisonKind(CondOpc: MI->getOpcode(), InitialValue: nullptr, EndValue: nullptr, IVBump: 0);
1546 if (Cmp == 0)
1547 continue;
1548 if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1549 Cmp = Comparison::getNegatedComparison(Cmp);
1550 if (CmpReg2 != 0 && CmpReg2 == Reg)
1551 Cmp = Comparison::getSwappedComparison(Cmp);
1552
1553 // Signed underflow is undefined.
1554 if (Comparison::isSigned(Cmp))
1555 return false;
1556
1557 // Check if there is a comparison of the initial value. If the initial value
1558 // is greater than or not equal to another value, then assume this is a
1559 // range check.
1560 if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1561 return false;
1562 }
1563
1564 // OK - this is a hack that needs to be improved. We really need to analyze
1565 // the instructions performed on the initial value. This works on the simplest
1566 // cases only.
1567 if (!Def->isCopy() && !Def->isPHI())
1568 return false;
1569
1570 return true;
1571}
1572
1573bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1574 int64_t &Val) const {
1575 if (MO.isImm()) {
1576 Val = MO.getImm();
1577 return true;
1578 }
1579 if (!MO.isReg())
1580 return false;
1581
1582 // MO is a register. Check whether it is defined as an immediate value,
1583 // and if so, get the value of it in TV. That value will then need to be
1584 // processed to handle potential subregisters in MO.
1585 int64_t TV;
1586
1587 Register R = MO.getReg();
1588 if (!R.isVirtual())
1589 return false;
1590 MachineInstr *DI = MRI->getVRegDef(Reg: R);
1591 unsigned DOpc = DI->getOpcode();
1592 switch (DOpc) {
1593 case TargetOpcode::COPY:
1594 case Hexagon::A2_tfrsi:
1595 case Hexagon::A2_tfrpi:
1596 case Hexagon::CONST32:
1597 case Hexagon::CONST64:
1598 // Call recursively to avoid an extra check whether operand(1) is
1599 // indeed an immediate (it could be a global address, for example),
1600 // plus we can handle COPY at the same time.
1601 if (!checkForImmediate(MO: DI->getOperand(i: 1), Val&: TV))
1602 return false;
1603 break;
1604 case Hexagon::A2_combineii:
1605 case Hexagon::A4_combineir:
1606 case Hexagon::A4_combineii:
1607 case Hexagon::A4_combineri:
1608 case Hexagon::A2_combinew: {
1609 const MachineOperand &S1 = DI->getOperand(i: 1);
1610 const MachineOperand &S2 = DI->getOperand(i: 2);
1611 int64_t V1, V2;
1612 if (!checkForImmediate(MO: S1, Val&: V1) || !checkForImmediate(MO: S2, Val&: V2))
1613 return false;
1614 TV = V2 | (static_cast<uint64_t>(V1) << 32);
1615 break;
1616 }
1617 case TargetOpcode::REG_SEQUENCE: {
1618 const MachineOperand &S1 = DI->getOperand(i: 1);
1619 const MachineOperand &S3 = DI->getOperand(i: 3);
1620 int64_t V1, V3;
1621 if (!checkForImmediate(MO: S1, Val&: V1) || !checkForImmediate(MO: S3, Val&: V3))
1622 return false;
1623 unsigned Sub2 = DI->getOperand(i: 2).getImm();
1624 unsigned Sub4 = DI->getOperand(i: 4).getImm();
1625 if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1626 TV = V1 | (V3 << 32);
1627 else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1628 TV = V3 | (V1 << 32);
1629 else
1630 llvm_unreachable("Unexpected form of REG_SEQUENCE");
1631 break;
1632 }
1633
1634 default:
1635 return false;
1636 }
1637
1638 // By now, we should have successfully obtained the immediate value defining
1639 // the register referenced in MO. Handle a potential use of a subregister.
1640 switch (MO.getSubReg()) {
1641 case Hexagon::isub_lo:
1642 Val = TV & 0xFFFFFFFFULL;
1643 break;
1644 case Hexagon::isub_hi:
1645 Val = (TV >> 32) & 0xFFFFFFFFULL;
1646 break;
1647 default:
1648 Val = TV;
1649 break;
1650 }
1651 return true;
1652}
1653
1654void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1655 if (MO.isImm()) {
1656 MO.setImm(Val);
1657 return;
1658 }
1659
1660 assert(MO.isReg());
1661 Register R = MO.getReg();
1662 MachineInstr *DI = MRI->getVRegDef(Reg: R);
1663
1664 const TargetRegisterClass *RC = MRI->getRegClass(Reg: R);
1665 Register NewR = MRI->createVirtualRegister(RegClass: RC);
1666 MachineBasicBlock &B = *DI->getParent();
1667 DebugLoc DL = DI->getDebugLoc();
1668 BuildMI(BB&: B, I: DI, MIMD: DL, MCID: TII->get(Opcode: DI->getOpcode()), DestReg: NewR).addImm(Val);
1669 MO.setReg(NewR);
1670}
1671
1672bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1673 MachineBasicBlock *Header = L->getHeader();
1674 MachineBasicBlock *Latch = L->getLoopLatch();
1675 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1676
1677 if (!(Header && Latch && ExitingBlock))
1678 return false;
1679
1680 // These data structures follow the same concept as the corresponding
1681 // ones in findInductionRegister (where some comments are).
1682 using RegisterBump = std::pair<Register, int64_t>;
1683 using RegisterInduction = std::pair<Register, RegisterBump>;
1684 using RegisterInductionSet = std::set<RegisterInduction>;
1685
1686 // Register candidates for induction variables, with their associated bumps.
1687 RegisterInductionSet IndRegs;
1688
1689 // Look for induction patterns:
1690 // %1 = PHI ..., [ latch, %2 ]
1691 // %2 = ADD %1, imm
1692 using instr_iterator = MachineBasicBlock::instr_iterator;
1693
1694 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1695 I != E && I->isPHI(); ++I) {
1696 MachineInstr *Phi = &*I;
1697
1698 // Have a PHI instruction.
1699 for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1700 if (Phi->getOperand(i: i+1).getMBB() != Latch)
1701 continue;
1702
1703 Register PhiReg = Phi->getOperand(i).getReg();
1704 MachineInstr *DI = MRI->getVRegDef(Reg: PhiReg);
1705
1706 if (DI->getDesc().isAdd()) {
1707 // If the register operand to the add/sub is the PHI we are looking
1708 // at, this meets the induction pattern.
1709 Register IndReg = DI->getOperand(i: 1).getReg();
1710 MachineOperand &Opnd2 = DI->getOperand(i: 2);
1711 int64_t V;
1712 if (MRI->getVRegDef(Reg: IndReg) == Phi && checkForImmediate(MO: Opnd2, Val&: V)) {
1713 Register UpdReg = DI->getOperand(i: 0).getReg();
1714 IndRegs.insert(x: std::make_pair(x&: UpdReg, y: std::make_pair(x&: IndReg, y&: V)));
1715 }
1716 }
1717 } // for (i)
1718 } // for (instr)
1719
1720 if (IndRegs.empty())
1721 return false;
1722
1723 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1724 SmallVector<MachineOperand,2> Cond;
1725 // analyzeBranch returns true if it fails to analyze branch.
1726 bool NotAnalyzed = TII->analyzeBranch(MBB&: *ExitingBlock, TBB&: TB, FBB&: FB, Cond, AllowModify: false);
1727 if (NotAnalyzed || Cond.empty())
1728 return false;
1729
1730 if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1731 MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1732 SmallVector<MachineOperand,2> LCond;
1733 bool NotAnalyzed = TII->analyzeBranch(MBB&: *Latch, TBB&: LTB, FBB&: LFB, Cond&: LCond, AllowModify: false);
1734 if (NotAnalyzed)
1735 return false;
1736
1737 // Since latch is not the exiting block, the latch branch should be an
1738 // unconditional branch to the loop header.
1739 if (TB == Latch)
1740 TB = (LTB == Header) ? LTB : LFB;
1741 else
1742 FB = (LTB == Header) ? LTB : LFB;
1743 }
1744 if (TB != Header) {
1745 if (FB != Header) {
1746 // The latch/exit block does not go back to the header.
1747 return false;
1748 }
1749 // FB is the header (i.e., uncond. jump to branch header)
1750 // In this case, the LoopBody -> TB should not be a back edge otherwise
1751 // it could result in an infinite loop after conversion to hw_loop.
1752 // This case can happen when the Latch has two jumps like this:
1753 // Jmp_c OuterLoopHeader <-- TB
1754 // Jmp InnerLoopHeader <-- FB
1755 if (MDT->dominates(A: TB, B: FB))
1756 return false;
1757 }
1758
1759 // Expecting a predicate register as a condition. It won't be a hardware
1760 // predicate register at this point yet, just a vreg.
1761 // HexagonInstrInfo::analyzeBranch for negated branches inserts imm(0)
1762 // into Cond, followed by the predicate register. For non-negated branches
1763 // it's just the register.
1764 unsigned CSz = Cond.size();
1765 if (CSz != 1 && CSz != 2)
1766 return false;
1767
1768 if (!Cond[CSz-1].isReg())
1769 return false;
1770
1771 Register P = Cond[CSz - 1].getReg();
1772 MachineInstr *PredDef = MRI->getVRegDef(Reg: P);
1773
1774 if (!PredDef->isCompare())
1775 return false;
1776
1777 SmallSet<Register,2> CmpRegs;
1778 MachineOperand *CmpImmOp = nullptr;
1779
1780 // Go over all operands to the compare and look for immediate and register
1781 // operands. Assume that if the compare has a single register use and a
1782 // single immediate operand, then the register is being compared with the
1783 // immediate value.
1784 for (MachineOperand &MO : PredDef->operands()) {
1785 if (MO.isReg()) {
1786 // Skip all implicit references. In one case there was:
1787 // %140 = FCMPUGT32_rr %138, %139, implicit %usr
1788 if (MO.isImplicit())
1789 continue;
1790 if (MO.isUse()) {
1791 if (!isImmediate(MO)) {
1792 CmpRegs.insert(V: MO.getReg());
1793 continue;
1794 }
1795 // Consider the register to be the "immediate" operand.
1796 if (CmpImmOp)
1797 return false;
1798 CmpImmOp = &MO;
1799 }
1800 } else if (MO.isImm()) {
1801 if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
1802 return false;
1803 CmpImmOp = &MO;
1804 }
1805 }
1806
1807 if (CmpRegs.empty())
1808 return false;
1809
1810 // Check if the compared register follows the order we want. Fix if needed.
1811 for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1812 I != E; ++I) {
1813 // This is a success. If the register used in the comparison is one that
1814 // we have identified as a bumped (updated) induction register, there is
1815 // nothing to do.
1816 if (CmpRegs.count(V: I->first))
1817 return true;
1818
1819 // Otherwise, if the register being compared comes out of a PHI node,
1820 // and has been recognized as following the induction pattern, and is
1821 // compared against an immediate, we can fix it.
1822 const RegisterBump &RB = I->second;
1823 if (CmpRegs.count(V: RB.first)) {
1824 if (!CmpImmOp) {
1825 // If both operands to the compare instruction are registers, see if
1826 // it can be changed to use induction register as one of the operands.
1827 MachineInstr *IndI = nullptr;
1828 MachineInstr *nonIndI = nullptr;
1829 MachineOperand *IndMO = nullptr;
1830 MachineOperand *nonIndMO = nullptr;
1831
1832 for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1833 MachineOperand &MO = PredDef->getOperand(i);
1834 if (MO.isReg() && MO.getReg() == RB.first) {
1835 LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1836 << ") = " << *(MRI->getVRegDef(I->first)));
1837 if (IndI)
1838 return false;
1839
1840 IndI = MRI->getVRegDef(Reg: I->first);
1841 IndMO = &MO;
1842 } else if (MO.isReg()) {
1843 LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1844 << ") = " << *(MRI->getVRegDef(MO.getReg())));
1845 if (nonIndI)
1846 return false;
1847
1848 nonIndI = MRI->getVRegDef(Reg: MO.getReg());
1849 nonIndMO = &MO;
1850 }
1851 }
1852 if (IndI && nonIndI &&
1853 nonIndI->getOpcode() == Hexagon::A2_addi &&
1854 nonIndI->getOperand(i: 2).isImm() &&
1855 nonIndI->getOperand(i: 2).getImm() == - RB.second) {
1856 bool Order = orderBumpCompare(BumpI: IndI, CmpI: PredDef);
1857 if (Order) {
1858 IndMO->setReg(I->first);
1859 nonIndMO->setReg(nonIndI->getOperand(i: 1).getReg());
1860 return true;
1861 }
1862 }
1863 return false;
1864 }
1865
1866 // It is not valid to do this transformation on an unsigned comparison
1867 // because it may underflow.
1868 Comparison::Kind Cmp =
1869 getComparisonKind(CondOpc: PredDef->getOpcode(), InitialValue: nullptr, EndValue: nullptr, IVBump: 0);
1870 if (!Cmp || Comparison::isUnsigned(Cmp))
1871 return false;
1872
1873 // If the register is being compared against an immediate, try changing
1874 // the compare instruction to use induction register and adjust the
1875 // immediate operand.
1876 int64_t CmpImm = getImmediate(MO: *CmpImmOp);
1877 int64_t V = RB.second;
1878 // Handle Overflow (64-bit).
1879 if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1880 ((V < 0) && (CmpImm < INT64_MIN - V)))
1881 return false;
1882 CmpImm += V;
1883 // Most comparisons of register against an immediate value allow
1884 // the immediate to be constant-extended. There are some exceptions
1885 // though. Make sure the new combination will work.
1886 if (CmpImmOp->isImm() && !TII->isExtendable(MI: *PredDef) &&
1887 !TII->isValidOffset(Opcode: PredDef->getOpcode(), Offset: CmpImm, TRI, Extend: false))
1888 return false;
1889
1890 // Make sure that the compare happens after the bump. Otherwise,
1891 // after the fixup, the compare would use a yet-undefined register.
1892 MachineInstr *BumpI = MRI->getVRegDef(Reg: I->first);
1893 bool Order = orderBumpCompare(BumpI, CmpI: PredDef);
1894 if (!Order)
1895 return false;
1896
1897 // Finally, fix the compare instruction.
1898 setImmediate(MO&: *CmpImmOp, Val: CmpImm);
1899 for (MachineOperand &MO : PredDef->operands()) {
1900 if (MO.isReg() && MO.getReg() == RB.first) {
1901 MO.setReg(I->first);
1902 return true;
1903 }
1904 }
1905 }
1906 }
1907
1908 return false;
1909}
1910
1911/// createPreheaderForLoop - Create a preheader for a given loop.
1912MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1913 MachineLoop *L) {
1914 if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpeculativePreheader: SpecPreheader))
1915 return TmpPH;
1916 if (!HWCreatePreheader)
1917 return nullptr;
1918
1919 MachineBasicBlock *Header = L->getHeader();
1920 MachineBasicBlock *Latch = L->getLoopLatch();
1921 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1922 MachineFunction *MF = Header->getParent();
1923 DebugLoc DL;
1924
1925#ifndef NDEBUG
1926 if ((!PHFn.empty()) && (PHFn != MF->getName()))
1927 return nullptr;
1928#endif
1929
1930 if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1931 return nullptr;
1932
1933 using instr_iterator = MachineBasicBlock::instr_iterator;
1934
1935 // Verify that all existing predecessors have analyzable branches
1936 // (or no branches at all).
1937 using MBBVector = std::vector<MachineBasicBlock *>;
1938
1939 MBBVector Preds(Header->pred_begin(), Header->pred_end());
1940 SmallVector<MachineOperand,2> Tmp1;
1941 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1942
1943 if (TII->analyzeBranch(MBB&: *ExitingBlock, TBB&: TB, FBB&: FB, Cond&: Tmp1, AllowModify: false))
1944 return nullptr;
1945
1946 for (MachineBasicBlock *PB : Preds) {
1947 bool NotAnalyzed = TII->analyzeBranch(MBB&: *PB, TBB&: TB, FBB&: FB, Cond&: Tmp1, AllowModify: false);
1948 if (NotAnalyzed)
1949 return nullptr;
1950 }
1951
1952 MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
1953 MF->insert(MBBI: Header->getIterator(), MBB: NewPH);
1954
1955 if (Header->pred_size() > 2) {
1956 // Ensure that the header has only two predecessors: the preheader and
1957 // the loop latch. Any additional predecessors of the header should
1958 // join at the newly created preheader. Inspect all PHI nodes from the
1959 // header and create appropriate corresponding PHI nodes in the preheader.
1960
1961 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1962 I != E && I->isPHI(); ++I) {
1963 MachineInstr *PN = &*I;
1964
1965 const MCInstrDesc &PD = TII->get(Opcode: TargetOpcode::PHI);
1966 MachineInstr *NewPN = MF->CreateMachineInstr(MCID: PD, DL);
1967 NewPH->insert(I: NewPH->end(), MI: NewPN);
1968
1969 Register PR = PN->getOperand(i: 0).getReg();
1970 const TargetRegisterClass *RC = MRI->getRegClass(Reg: PR);
1971 Register NewPR = MRI->createVirtualRegister(RegClass: RC);
1972 NewPN->addOperand(Op: MachineOperand::CreateReg(Reg: NewPR, isDef: true));
1973
1974 // Copy all non-latch operands of a header's PHI node to the newly
1975 // created PHI node in the preheader.
1976 for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1977 Register PredR = PN->getOperand(i).getReg();
1978 unsigned PredRSub = PN->getOperand(i).getSubReg();
1979 MachineBasicBlock *PredB = PN->getOperand(i: i+1).getMBB();
1980 if (PredB == Latch)
1981 continue;
1982
1983 MachineOperand MO = MachineOperand::CreateReg(Reg: PredR, isDef: false);
1984 MO.setSubReg(PredRSub);
1985 NewPN->addOperand(Op: MO);
1986 NewPN->addOperand(Op: MachineOperand::CreateMBB(MBB: PredB));
1987 }
1988
1989 // Remove copied operands from the old PHI node and add the value
1990 // coming from the preheader's PHI.
1991 for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1992 MachineBasicBlock *PredB = PN->getOperand(i: i+1).getMBB();
1993 if (PredB != Latch) {
1994 PN->removeOperand(OpNo: i+1);
1995 PN->removeOperand(OpNo: i);
1996 }
1997 }
1998 PN->addOperand(Op: MachineOperand::CreateReg(Reg: NewPR, isDef: false));
1999 PN->addOperand(Op: MachineOperand::CreateMBB(MBB: NewPH));
2000 }
2001 } else {
2002 assert(Header->pred_size() == 2);
2003
2004 // The header has only two predecessors, but the non-latch predecessor
2005 // is not a preheader (e.g. it has other successors, etc.)
2006 // In such a case we don't need any extra PHI nodes in the new preheader,
2007 // all we need is to adjust existing PHIs in the header to now refer to
2008 // the new preheader.
2009 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
2010 I != E && I->isPHI(); ++I) {
2011 MachineInstr *PN = &*I;
2012 for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
2013 MachineOperand &MO = PN->getOperand(i: i+1);
2014 if (MO.getMBB() != Latch)
2015 MO.setMBB(NewPH);
2016 }
2017 }
2018 }
2019
2020 // "Reroute" the CFG edges to link in the new preheader.
2021 // If any of the predecessors falls through to the header, insert a branch
2022 // to the new preheader in that place.
2023 SmallVector<MachineOperand,1> Tmp2;
2024 SmallVector<MachineOperand,1> EmptyCond;
2025
2026 TB = FB = nullptr;
2027
2028 for (MachineBasicBlock *PB : Preds) {
2029 if (PB != Latch) {
2030 Tmp2.clear();
2031 bool NotAnalyzed = TII->analyzeBranch(MBB&: *PB, TBB&: TB, FBB&: FB, Cond&: Tmp2, AllowModify: false);
2032 (void)NotAnalyzed; // suppress compiler warning
2033 assert (!NotAnalyzed && "Should be analyzable!");
2034 if (TB != Header && (Tmp2.empty() || FB != Header))
2035 TII->insertBranch(MBB&: *PB, TBB: NewPH, FBB: nullptr, Cond: EmptyCond, DL);
2036 PB->ReplaceUsesOfBlockWith(Old: Header, New: NewPH);
2037 }
2038 }
2039
2040 // It can happen that the latch block will fall through into the header.
2041 // Insert an unconditional branch to the header.
2042 TB = FB = nullptr;
2043 bool LatchNotAnalyzed = TII->analyzeBranch(MBB&: *Latch, TBB&: TB, FBB&: FB, Cond&: Tmp2, AllowModify: false);
2044 (void)LatchNotAnalyzed; // suppress compiler warning
2045 assert (!LatchNotAnalyzed && "Should be analyzable!");
2046 if (!TB && !FB)
2047 TII->insertBranch(MBB&: *Latch, TBB: Header, FBB: nullptr, Cond: EmptyCond, DL);
2048
2049 // Finally, the branch from the preheader to the header.
2050 TII->insertBranch(MBB&: *NewPH, TBB: Header, FBB: nullptr, Cond: EmptyCond, DL);
2051 NewPH->addSuccessor(Succ: Header);
2052
2053 MachineLoop *ParentLoop = L->getParentLoop();
2054 if (ParentLoop)
2055 ParentLoop->addBasicBlockToLoop(NewBB: NewPH, LI&: *MLI);
2056
2057 // Update the dominator information with the new preheader.
2058 if (MDT) {
2059 if (MachineDomTreeNode *HN = MDT->getNode(BB: Header)) {
2060 if (MachineDomTreeNode *DHN = HN->getIDom()) {
2061 MDT->addNewBlock(BB: NewPH, DomBB: DHN->getBlock());
2062 MDT->changeImmediateDominator(BB: Header, NewBB: NewPH);
2063 }
2064 }
2065 }
2066
2067 return NewPH;
2068}
2069