1//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Insert hardware loop intrinsics into loops which are deemed profitable by
10/// the target, by querying TargetTransformInfo. A hardware loop comprises of
11/// two intrinsics: one, outside the loop, to set the loop iteration count and
12/// another, in the exit block, to decrement the counter. The decremented value
13/// can either be carried through the loop via a phi or handled in some opaque
14/// way by the target.
15///
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/HardwareLoops.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/Analysis/AssumptionCache.h"
21#include "llvm/Analysis/BranchProbabilityInfo.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/OptimizationRemarkEmitter.h"
24#include "llvm/Analysis/ScalarEvolution.h"
25#include "llvm/Analysis/TargetLibraryInfo.h"
26#include "llvm/Analysis/TargetTransformInfo.h"
27#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/Instructions.h"
33#include "llvm/IR/Value.h"
34#include "llvm/InitializePasses.h"
35#include "llvm/Pass.h"
36#include "llvm/PassRegistry.h"
37#include "llvm/Support/CommandLine.h"
38#include "llvm/Support/Debug.h"
39#include "llvm/Transforms/Utils.h"
40#include "llvm/Transforms/Utils/BasicBlockUtils.h"
41#include "llvm/Transforms/Utils/Local.h"
42#include "llvm/Transforms/Utils/LoopUtils.h"
43#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
44
45#define DEBUG_TYPE "hardware-loops"
46
47#define HW_LOOPS_NAME "Hardware Loop Insertion"
48
49using namespace llvm;
50
51static cl::opt<bool>
52ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(Val: false),
53 cl::desc("Force hardware loops intrinsics to be inserted"));
54
55static cl::opt<bool>
56ForceHardwareLoopPHI(
57 "force-hardware-loop-phi", cl::Hidden, cl::init(Val: false),
58 cl::desc("Force hardware loop counter to be updated through a phi"));
59
60static cl::opt<bool>
61ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(Val: false),
62 cl::desc("Force allowance of nested hardware loops"));
63
64static cl::opt<unsigned>
65LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(Val: 1),
66 cl::desc("Set the loop decrement value"));
67
68static cl::opt<unsigned>
69CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(Val: 32),
70 cl::desc("Set the loop counter bitwidth"));
71
72static cl::opt<bool>
73ForceGuardLoopEntry(
74 "force-hardware-loop-guard", cl::Hidden, cl::init(Val: false),
75 cl::desc("Force generation of loop guard intrinsic"));
76
77STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
78
79#ifndef NDEBUG
80static void debugHWLoopFailure(const StringRef DebugMsg,
81 Instruction *I) {
82 dbgs() << "HWLoops: " << DebugMsg;
83 if (I)
84 dbgs() << ' ' << *I;
85 else
86 dbgs() << '.';
87 dbgs() << '\n';
88}
89#endif
90
91static OptimizationRemarkAnalysis
92createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
93 BasicBlock *CodeRegion = L->getHeader();
94 DebugLoc DL = L->getStartLoc();
95
96 if (I) {
97 CodeRegion = I->getParent();
98 // If there is no debug location attached to the instruction, revert back to
99 // using the loop's.
100 if (I->getDebugLoc())
101 DL = I->getDebugLoc();
102 }
103
104 OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
105 R << "hardware-loop not created: ";
106 return R;
107}
108
109namespace {
110
111 void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
112 OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
113 LLVM_DEBUG(debugHWLoopFailure(Msg, I));
114 ORE->emit(OptDiag: createHWLoopAnalysis(RemarkName: ORETag, L: TheLoop, I) << Msg);
115 }
116
117 using TTI = TargetTransformInfo;
118
119 class HardwareLoopsLegacy : public FunctionPass {
120 public:
121 static char ID;
122
123 HardwareLoopsLegacy() : FunctionPass(ID) {
124 initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry());
125 }
126
127 bool runOnFunction(Function &F) override;
128
129 void getAnalysisUsage(AnalysisUsage &AU) const override {
130 AU.addRequired<LoopInfoWrapperPass>();
131 AU.addPreserved<LoopInfoWrapperPass>();
132 AU.addRequired<DominatorTreeWrapperPass>();
133 AU.addPreserved<DominatorTreeWrapperPass>();
134 AU.addRequired<ScalarEvolutionWrapperPass>();
135 AU.addPreserved<ScalarEvolutionWrapperPass>();
136 AU.addRequired<AssumptionCacheTracker>();
137 AU.addRequired<TargetTransformInfoWrapperPass>();
138 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
139 AU.addPreserved<BranchProbabilityInfoWrapperPass>();
140 }
141 };
142
143 class HardwareLoopsImpl {
144 public:
145 HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
146 DominatorTree &DT, const DataLayout &DL,
147 const TargetTransformInfo &TTI, TargetLibraryInfo *TLI,
148 AssumptionCache &AC, OptimizationRemarkEmitter *ORE,
149 HardwareLoopOptions &Opts)
150 : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
151 TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
152
153 bool run(Function &F);
154
155 private:
156 // Try to convert the given Loop into a hardware loop.
157 bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
158
159 // Given that the target believes the loop to be profitable, try to
160 // convert it.
161 bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
162
163 ScalarEvolution &SE;
164 LoopInfo &LI;
165 bool PreserveLCSSA;
166 DominatorTree &DT;
167 const DataLayout &DL;
168 const TargetTransformInfo &TTI;
169 TargetLibraryInfo *TLI = nullptr;
170 AssumptionCache &AC;
171 OptimizationRemarkEmitter *ORE;
172 HardwareLoopOptions &Opts;
173 bool MadeChange = false;
174 };
175
176 class HardwareLoop {
177 // Expand the trip count scev into a value that we can use.
178 Value *InitLoopCount();
179
180 // Insert the set_loop_iteration intrinsic.
181 Value *InsertIterationSetup(Value *LoopCountInit);
182
183 // Insert the loop_decrement intrinsic.
184 void InsertLoopDec();
185
186 // Insert the loop_decrement_reg intrinsic.
187 Instruction *InsertLoopRegDec(Value *EltsRem);
188
189 // If the target requires the counter value to be updated in the loop,
190 // insert a phi to hold the value. The intended purpose is for use by
191 // loop_decrement_reg.
192 PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
193
194 // Create a new cmp, that checks the returned value of loop_decrement*,
195 // and update the exit branch to use it.
196 void UpdateBranch(Value *EltsRem);
197
198 public:
199 HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
200 const DataLayout &DL,
201 OptimizationRemarkEmitter *ORE,
202 HardwareLoopOptions &Opts) :
203 SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
204 ExitCount(Info.ExitCount),
205 CountType(Info.CountType),
206 ExitBranch(Info.ExitBranch),
207 LoopDecrement(Info.LoopDecrement),
208 UsePHICounter(Info.CounterInReg),
209 UseLoopGuard(Info.PerformEntryTest) { }
210
211 void Create();
212
213 private:
214 ScalarEvolution &SE;
215 const DataLayout &DL;
216 OptimizationRemarkEmitter *ORE = nullptr;
217 HardwareLoopOptions &Opts;
218 Loop *L = nullptr;
219 Module *M = nullptr;
220 const SCEV *ExitCount = nullptr;
221 Type *CountType = nullptr;
222 BranchInst *ExitBranch = nullptr;
223 Value *LoopDecrement = nullptr;
224 bool UsePHICounter = false;
225 bool UseLoopGuard = false;
226 BasicBlock *BeginBB = nullptr;
227 };
228}
229
230char HardwareLoopsLegacy::ID = 0;
231
232bool HardwareLoopsLegacy::runOnFunction(Function &F) {
233 if (skipFunction(F))
234 return false;
235
236 LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
237
238 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
239 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
240 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
241 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
242 auto &DL = F.getDataLayout();
243 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
244 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
245 auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
246 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
247 bool PreserveLCSSA = mustPreserveAnalysisID(AID&: LCSSAID);
248
249 HardwareLoopOptions Opts;
250 if (ForceHardwareLoops.getNumOccurrences())
251 Opts.setForce(ForceHardwareLoops);
252 if (ForceHardwareLoopPHI.getNumOccurrences())
253 Opts.setForcePhi(ForceHardwareLoopPHI);
254 if (ForceNestedLoop.getNumOccurrences())
255 Opts.setForceNested(ForceNestedLoop);
256 if (ForceGuardLoopEntry.getNumOccurrences())
257 Opts.setForceGuard(ForceGuardLoopEntry);
258 if (LoopDecrement.getNumOccurrences())
259 Opts.setDecrement(LoopDecrement);
260 if (CounterBitWidth.getNumOccurrences())
261 Opts.setCounterBitwidth(CounterBitWidth);
262
263 HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
264 Opts);
265 return Impl.run(F);
266}
267
268PreservedAnalyses HardwareLoopsPass::run(Function &F,
269 FunctionAnalysisManager &AM) {
270 auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
271 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(IR&: F);
272 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
273 auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
274 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(IR&: F);
275 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
276 auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
277 auto &DL = F.getDataLayout();
278
279 HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
280 bool Changed = Impl.run(F);
281 if (!Changed)
282 return PreservedAnalyses::all();
283
284 PreservedAnalyses PA;
285 PA.preserve<LoopAnalysis>();
286 PA.preserve<ScalarEvolutionAnalysis>();
287 PA.preserve<DominatorTreeAnalysis>();
288 PA.preserve<BranchProbabilityAnalysis>();
289 return PA;
290}
291
292bool HardwareLoopsImpl::run(Function &F) {
293 LLVMContext &Ctx = F.getContext();
294 for (Loop *L : LI)
295 if (L->isOutermost())
296 TryConvertLoop(L, Ctx);
297 return MadeChange;
298}
299
300// Return true if the search should stop, which will be when an inner loop is
301// converted and the parent loop doesn't support containing a hardware loop.
302bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
303 // Process nested loops first.
304 bool AnyChanged = false;
305 for (Loop *SL : *L)
306 AnyChanged |= TryConvertLoop(L: SL, Ctx);
307 if (AnyChanged) {
308 reportHWLoopFailure(Msg: "nested hardware-loops not supported", ORETag: "HWLoopNested",
309 ORE, TheLoop: L);
310 return true; // Stop search.
311 }
312
313 LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
314
315 HardwareLoopInfo HWLoopInfo(L);
316 if (!HWLoopInfo.canAnalyze(LI)) {
317 reportHWLoopFailure(Msg: "cannot analyze loop, irreducible control flow",
318 ORETag: "HWLoopCannotAnalyze", ORE, TheLoop: L);
319 return false;
320 }
321
322 if (!Opts.Force &&
323 !TTI.isHardwareLoopProfitable(L, SE, AC, LibInfo: TLI, HWLoopInfo)) {
324 reportHWLoopFailure(Msg: "it's not profitable to create a hardware-loop",
325 ORETag: "HWLoopNotProfitable", ORE, TheLoop: L);
326 return false;
327 }
328
329 // Allow overriding of the counter width and loop decrement value.
330 if (Opts.Bitwidth.has_value()) {
331 HWLoopInfo.CountType = IntegerType::get(C&: Ctx, NumBits: Opts.Bitwidth.value());
332 }
333
334 if (Opts.Decrement.has_value())
335 HWLoopInfo.LoopDecrement =
336 ConstantInt::get(Ty: HWLoopInfo.CountType, V: Opts.Decrement.value());
337
338 MadeChange |= TryConvertLoop(HWLoopInfo);
339 return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
340}
341
342bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
343
344 Loop *L = HWLoopInfo.L;
345 LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
346
347 if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, ForceNestedLoop: Opts.getForceNested(),
348 ForceHardwareLoopPHI: Opts.getForcePhi())) {
349 // TODO: there can be many reasons a loop is not considered a
350 // candidate, so we should let isHardwareLoopCandidate fill in the
351 // reason and then report a better message here.
352 reportHWLoopFailure(Msg: "loop is not a candidate", ORETag: "HWLoopNoCandidate", ORE, TheLoop: L);
353 return false;
354 }
355
356 assert(
357 (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
358 "Hardware Loop must have set exit info.");
359
360 BasicBlock *Preheader = L->getLoopPreheader();
361
362 // If we don't have a preheader, then insert one.
363 if (!Preheader)
364 Preheader = InsertPreheaderForLoop(L, DT: &DT, LI: &LI, MSSAU: nullptr, PreserveLCSSA);
365 if (!Preheader)
366 return false;
367
368 HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
369 HWLoop.Create();
370 ++NumHWLoops;
371 return true;
372}
373
374void HardwareLoop::Create() {
375 LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
376
377 Value *LoopCountInit = InitLoopCount();
378 if (!LoopCountInit) {
379 reportHWLoopFailure(Msg: "could not safely create a loop count expression",
380 ORETag: "HWLoopNotSafe", ORE, TheLoop: L);
381 return;
382 }
383
384 Value *Setup = InsertIterationSetup(LoopCountInit);
385
386 if (UsePHICounter || Opts.ForcePhi) {
387 Instruction *LoopDec = InsertLoopRegDec(EltsRem: LoopCountInit);
388 Value *EltsRem = InsertPHICounter(NumElts: Setup, EltsRem: LoopDec);
389 LoopDec->setOperand(i: 0, Val: EltsRem);
390 UpdateBranch(EltsRem: LoopDec);
391 } else
392 InsertLoopDec();
393
394 // Run through the basic blocks of the loop and see if any of them have dead
395 // PHIs that can be removed.
396 for (auto *I : L->blocks())
397 DeleteDeadPHIs(BB: I);
398}
399
400static bool CanGenerateTest(Loop *L, Value *Count) {
401 BasicBlock *Preheader = L->getLoopPreheader();
402 if (!Preheader->getSinglePredecessor())
403 return false;
404
405 BasicBlock *Pred = Preheader->getSinglePredecessor();
406 if (!isa<BranchInst>(Val: Pred->getTerminator()))
407 return false;
408
409 auto *BI = cast<BranchInst>(Val: Pred->getTerminator());
410 if (BI->isUnconditional() || !isa<ICmpInst>(Val: BI->getCondition()))
411 return false;
412
413 // Check that the icmp is checking for equality of Count and zero and that
414 // a non-zero value results in entering the loop.
415 auto ICmp = cast<ICmpInst>(Val: BI->getCondition());
416 LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
417 if (!ICmp->isEquality())
418 return false;
419
420 auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
421 if (auto *Const = dyn_cast<ConstantInt>(Val: ICmp->getOperand(i_nocapture: OpIdx)))
422 return Const->isZero() && ICmp->getOperand(i_nocapture: OpIdx ^ 1) == Count;
423 return false;
424 };
425
426 // Check if Count is a zext.
427 Value *CountBefZext =
428 isa<ZExtInst>(Val: Count) ? cast<ZExtInst>(Val: Count)->getOperand(i_nocapture: 0) : nullptr;
429
430 if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
431 !IsCompareZero(ICmp, CountBefZext, 0) &&
432 !IsCompareZero(ICmp, CountBefZext, 1))
433 return false;
434
435 unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
436 if (BI->getSuccessor(i: SuccIdx) != Preheader)
437 return false;
438
439 return true;
440}
441
442Value *HardwareLoop::InitLoopCount() {
443 LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
444 // Can we replace a conditional branch with an intrinsic that sets the
445 // loop counter and tests that is not zero?
446
447 SCEVExpander SCEVE(SE, DL, "loopcnt");
448 if (!ExitCount->getType()->isPointerTy() &&
449 ExitCount->getType() != CountType)
450 ExitCount = SE.getZeroExtendExpr(Op: ExitCount, Ty: CountType);
451
452 ExitCount = SE.getAddExpr(LHS: ExitCount, RHS: SE.getOne(Ty: CountType));
453
454 // If we're trying to use the 'test and set' form of the intrinsic, we need
455 // to replace a conditional branch that is controlling entry to the loop. It
456 // is likely (guaranteed?) that the preheader has an unconditional branch to
457 // the loop header, so also check if it has a single predecessor.
458 if (SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_NE, LHS: ExitCount,
459 RHS: SE.getZero(Ty: ExitCount->getType()))) {
460 LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
461 if (Opts.ForceGuard)
462 UseLoopGuard = true;
463 } else
464 UseLoopGuard = false;
465
466 BasicBlock *BB = L->getLoopPreheader();
467 if (UseLoopGuard && BB->getSinglePredecessor() &&
468 cast<BranchInst>(Val: BB->getTerminator())->isUnconditional()) {
469 BasicBlock *Predecessor = BB->getSinglePredecessor();
470 // If it's not safe to create a while loop then don't force it and create a
471 // do-while loop instead
472 if (!SCEVE.isSafeToExpandAt(S: ExitCount, InsertionPoint: Predecessor->getTerminator()))
473 UseLoopGuard = false;
474 else
475 BB = Predecessor;
476 }
477
478 if (!SCEVE.isSafeToExpandAt(S: ExitCount, InsertionPoint: BB->getTerminator())) {
479 LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
480 << *ExitCount << "\n");
481 return nullptr;
482 }
483
484 Value *Count = SCEVE.expandCodeFor(SH: ExitCount, Ty: CountType,
485 I: BB->getTerminator());
486
487 // FIXME: We've expanded Count where we hope to insert the counter setting
488 // intrinsic. But, in the case of the 'test and set' form, we may fallback to
489 // the just 'set' form and in which case the insertion block is most likely
490 // different. It means there will be instruction(s) in a block that possibly
491 // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
492 // but it's doesn't appear to work in all cases.
493
494 UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
495 BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
496 LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
497 << " - Expanded Count in " << BB->getName() << "\n"
498 << " - Will insert set counter intrinsic into: "
499 << BeginBB->getName() << "\n");
500 return Count;
501}
502
503Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
504 IRBuilder<> Builder(BeginBB->getTerminator());
505 if (BeginBB->getParent()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP))
506 Builder.setIsFPConstrained(true);
507 Type *Ty = LoopCountInit->getType();
508 bool UsePhi = UsePHICounter || Opts.ForcePhi;
509 Intrinsic::ID ID = UseLoopGuard
510 ? (UsePhi ? Intrinsic::test_start_loop_iterations
511 : Intrinsic::test_set_loop_iterations)
512 : (UsePhi ? Intrinsic::start_loop_iterations
513 : Intrinsic::set_loop_iterations);
514 Value *LoopSetup = Builder.CreateIntrinsic(ID, Types: Ty, Args: LoopCountInit);
515
516 // Use the return value of the intrinsic to control the entry of the loop.
517 if (UseLoopGuard) {
518 assert((isa<BranchInst>(BeginBB->getTerminator()) &&
519 cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
520 "Expected conditional branch");
521
522 Value *SetCount =
523 UsePhi ? Builder.CreateExtractValue(Agg: LoopSetup, Idxs: 1) : LoopSetup;
524 auto *LoopGuard = cast<BranchInst>(Val: BeginBB->getTerminator());
525 LoopGuard->setCondition(SetCount);
526 if (LoopGuard->getSuccessor(i: 0) != L->getLoopPreheader())
527 LoopGuard->swapSuccessors();
528 }
529 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
530 << "\n");
531 if (UsePhi && UseLoopGuard)
532 LoopSetup = Builder.CreateExtractValue(Agg: LoopSetup, Idxs: 0);
533 return !UsePhi ? LoopCountInit : LoopSetup;
534}
535
536void HardwareLoop::InsertLoopDec() {
537 IRBuilder<> CondBuilder(ExitBranch);
538 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
539 Kind: Attribute::StrictFP))
540 CondBuilder.setIsFPConstrained(true);
541
542 Value *Ops[] = { LoopDecrement };
543 Value *NewCond = CondBuilder.CreateIntrinsic(ID: Intrinsic::loop_decrement,
544 Types: LoopDecrement->getType(), Args: Ops);
545 Value *OldCond = ExitBranch->getCondition();
546 ExitBranch->setCondition(NewCond);
547
548 // The false branch must exit the loop.
549 if (!L->contains(BB: ExitBranch->getSuccessor(i: 0)))
550 ExitBranch->swapSuccessors();
551
552 // The old condition may be dead now, and may have even created a dead PHI
553 // (the original induction variable).
554 RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
555
556 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
557}
558
559Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
560 IRBuilder<> CondBuilder(ExitBranch);
561 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
562 Kind: Attribute::StrictFP))
563 CondBuilder.setIsFPConstrained(true);
564
565 Value *Ops[] = { EltsRem, LoopDecrement };
566 Value *Call = CondBuilder.CreateIntrinsic(ID: Intrinsic::loop_decrement_reg,
567 Types: {EltsRem->getType()}, Args: Ops);
568
569 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
570 return cast<Instruction>(Val: Call);
571}
572
573PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
574 BasicBlock *Preheader = L->getLoopPreheader();
575 BasicBlock *Header = L->getHeader();
576 BasicBlock *Latch = ExitBranch->getParent();
577 IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());
578 PHINode *Index = Builder.CreatePHI(Ty: NumElts->getType(), NumReservedValues: 2);
579 Index->addIncoming(V: NumElts, BB: Preheader);
580 Index->addIncoming(V: EltsRem, BB: Latch);
581 LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
582 return Index;
583}
584
585void HardwareLoop::UpdateBranch(Value *EltsRem) {
586 IRBuilder<> CondBuilder(ExitBranch);
587 Value *NewCond =
588 CondBuilder.CreateICmpNE(LHS: EltsRem, RHS: ConstantInt::get(Ty: EltsRem->getType(), V: 0));
589 Value *OldCond = ExitBranch->getCondition();
590 ExitBranch->setCondition(NewCond);
591
592 // The false branch must exit the loop.
593 if (!L->contains(BB: ExitBranch->getSuccessor(i: 0)))
594 ExitBranch->swapSuccessors();
595
596 // The old condition may be dead now, and may have even created a dead PHI
597 // (the original induction variable).
598 RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
599}
600
601INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
602INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
603INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
604INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
605INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
606INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
607
608FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }
609