1//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Insert hardware loop intrinsics into loops which are deemed profitable by
10/// the target, by querying TargetTransformInfo. A hardware loop comprises of
11/// two intrinsics: one, outside the loop, to set the loop iteration count and
12/// another, in the exit block, to decrement the counter. The decremented value
13/// can either be carried through the loop via a phi or handled in some opaque
14/// way by the target.
15///
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/HardwareLoops.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/Analysis/AssumptionCache.h"
21#include "llvm/Analysis/BranchProbabilityInfo.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/OptimizationRemarkEmitter.h"
24#include "llvm/Analysis/ScalarEvolution.h"
25#include "llvm/Analysis/TargetLibraryInfo.h"
26#include "llvm/Analysis/TargetTransformInfo.h"
27#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/Instructions.h"
33#include "llvm/IR/IntrinsicInst.h"
34#include "llvm/IR/Value.h"
35#include "llvm/InitializePasses.h"
36#include "llvm/Pass.h"
37#include "llvm/PassRegistry.h"
38#include "llvm/Support/CommandLine.h"
39#include "llvm/Support/Debug.h"
40#include "llvm/Transforms/Utils.h"
41#include "llvm/Transforms/Utils/BasicBlockUtils.h"
42#include "llvm/Transforms/Utils/Local.h"
43#include "llvm/Transforms/Utils/LoopUtils.h"
44#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
45
46#define DEBUG_TYPE "hardware-loops"
47
48#define HW_LOOPS_NAME "Hardware Loop Insertion"
49
50using namespace llvm;
51
52static cl::opt<bool>
53ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(Val: false),
54 cl::desc("Force hardware loops intrinsics to be inserted"));
55
56static cl::opt<bool>
57ForceHardwareLoopPHI(
58 "force-hardware-loop-phi", cl::Hidden, cl::init(Val: false),
59 cl::desc("Force hardware loop counter to be updated through a phi"));
60
61static cl::opt<bool>
62ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(Val: false),
63 cl::desc("Force allowance of nested hardware loops"));
64
65static cl::opt<unsigned>
66LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(Val: 1),
67 cl::desc("Set the loop decrement value"));
68
69static cl::opt<unsigned>
70CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(Val: 32),
71 cl::desc("Set the loop counter bitwidth"));
72
73static cl::opt<bool>
74ForceGuardLoopEntry(
75 "force-hardware-loop-guard", cl::Hidden, cl::init(Val: false),
76 cl::desc("Force generation of loop guard intrinsic"));
77
78STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
79
80#ifndef NDEBUG
81static void debugHWLoopFailure(const StringRef DebugMsg,
82 Instruction *I) {
83 dbgs() << "HWLoops: " << DebugMsg;
84 if (I)
85 dbgs() << ' ' << *I;
86 else
87 dbgs() << '.';
88 dbgs() << '\n';
89}
90#endif
91
92static OptimizationRemarkAnalysis
93createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
94 Value *CodeRegion = L->getHeader();
95 DebugLoc DL = L->getStartLoc();
96
97 if (I) {
98 CodeRegion = I->getParent();
99 // If there is no debug location attached to the instruction, revert back to
100 // using the loop's.
101 if (I->getDebugLoc())
102 DL = I->getDebugLoc();
103 }
104
105 OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
106 R << "hardware-loop not created: ";
107 return R;
108}
109
110namespace {
111
112 void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
113 OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
114 LLVM_DEBUG(debugHWLoopFailure(Msg, I));
115 ORE->emit(OptDiag&: createHWLoopAnalysis(RemarkName: ORETag, L: TheLoop, I) << Msg);
116 }
117
118 using TTI = TargetTransformInfo;
119
120 class HardwareLoopsLegacy : public FunctionPass {
121 public:
122 static char ID;
123
124 HardwareLoopsLegacy() : FunctionPass(ID) {
125 initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry());
126 }
127
128 bool runOnFunction(Function &F) override;
129
130 void getAnalysisUsage(AnalysisUsage &AU) const override {
131 AU.addRequired<LoopInfoWrapperPass>();
132 AU.addPreserved<LoopInfoWrapperPass>();
133 AU.addRequired<DominatorTreeWrapperPass>();
134 AU.addPreserved<DominatorTreeWrapperPass>();
135 AU.addRequired<ScalarEvolutionWrapperPass>();
136 AU.addPreserved<ScalarEvolutionWrapperPass>();
137 AU.addRequired<AssumptionCacheTracker>();
138 AU.addRequired<TargetTransformInfoWrapperPass>();
139 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
140 AU.addPreserved<BranchProbabilityInfoWrapperPass>();
141 }
142 };
143
144 class HardwareLoopsImpl {
145 public:
146 HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
147 DominatorTree &DT, const DataLayout &DL,
148 const TargetTransformInfo &TTI, TargetLibraryInfo *TLI,
149 AssumptionCache &AC, OptimizationRemarkEmitter *ORE,
150 HardwareLoopOptions &Opts)
151 : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
152 TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
153
154 bool run(Function &F);
155
156 private:
157 // Try to convert the given Loop into a hardware loop.
158 bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
159
160 // Given that the target believes the loop to be profitable, try to
161 // convert it.
162 bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
163
164 ScalarEvolution &SE;
165 LoopInfo &LI;
166 bool PreserveLCSSA;
167 DominatorTree &DT;
168 const DataLayout &DL;
169 const TargetTransformInfo &TTI;
170 TargetLibraryInfo *TLI = nullptr;
171 AssumptionCache &AC;
172 OptimizationRemarkEmitter *ORE;
173 HardwareLoopOptions &Opts;
174 bool MadeChange = false;
175 };
176
177 class HardwareLoop {
178 // Expand the trip count scev into a value that we can use.
179 Value *InitLoopCount();
180
181 // Insert the set_loop_iteration intrinsic.
182 Value *InsertIterationSetup(Value *LoopCountInit);
183
184 // Insert the loop_decrement intrinsic.
185 void InsertLoopDec();
186
187 // Insert the loop_decrement_reg intrinsic.
188 Instruction *InsertLoopRegDec(Value *EltsRem);
189
190 // If the target requires the counter value to be updated in the loop,
191 // insert a phi to hold the value. The intended purpose is for use by
192 // loop_decrement_reg.
193 PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
194
195 // Create a new cmp, that checks the returned value of loop_decrement*,
196 // and update the exit branch to use it.
197 void UpdateBranch(Value *EltsRem);
198
199 public:
200 HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
201 const DataLayout &DL,
202 OptimizationRemarkEmitter *ORE,
203 HardwareLoopOptions &Opts) :
204 SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
205 ExitCount(Info.ExitCount),
206 CountType(Info.CountType),
207 ExitBranch(Info.ExitBranch),
208 LoopDecrement(Info.LoopDecrement),
209 UsePHICounter(Info.CounterInReg),
210 UseLoopGuard(Info.PerformEntryTest) { }
211
212 void Create();
213
214 private:
215 ScalarEvolution &SE;
216 const DataLayout &DL;
217 OptimizationRemarkEmitter *ORE = nullptr;
218 HardwareLoopOptions &Opts;
219 Loop *L = nullptr;
220 Module *M = nullptr;
221 const SCEV *ExitCount = nullptr;
222 Type *CountType = nullptr;
223 BranchInst *ExitBranch = nullptr;
224 Value *LoopDecrement = nullptr;
225 bool UsePHICounter = false;
226 bool UseLoopGuard = false;
227 BasicBlock *BeginBB = nullptr;
228 };
229}
230
231char HardwareLoopsLegacy::ID = 0;
232
233bool HardwareLoopsLegacy::runOnFunction(Function &F) {
234 if (skipFunction(F))
235 return false;
236
237 LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
238
239 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
240 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
241 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
242 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
243 auto &DL = F.getDataLayout();
244 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
245 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
246 auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
247 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
248 bool PreserveLCSSA = mustPreserveAnalysisID(AID&: LCSSAID);
249
250 HardwareLoopOptions Opts;
251 if (ForceHardwareLoops.getNumOccurrences())
252 Opts.setForce(ForceHardwareLoops);
253 if (ForceHardwareLoopPHI.getNumOccurrences())
254 Opts.setForcePhi(ForceHardwareLoopPHI);
255 if (ForceNestedLoop.getNumOccurrences())
256 Opts.setForceNested(ForceNestedLoop);
257 if (ForceGuardLoopEntry.getNumOccurrences())
258 Opts.setForceGuard(ForceGuardLoopEntry);
259 if (LoopDecrement.getNumOccurrences())
260 Opts.setDecrement(LoopDecrement);
261 if (CounterBitWidth.getNumOccurrences())
262 Opts.setCounterBitwidth(CounterBitWidth);
263
264 HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
265 Opts);
266 return Impl.run(F);
267}
268
269PreservedAnalyses HardwareLoopsPass::run(Function &F,
270 FunctionAnalysisManager &AM) {
271 auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
272 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(IR&: F);
273 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
274 auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
275 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(IR&: F);
276 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
277 auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
278 auto &DL = F.getDataLayout();
279
280 HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
281 bool Changed = Impl.run(F);
282 if (!Changed)
283 return PreservedAnalyses::all();
284
285 PreservedAnalyses PA;
286 PA.preserve<LoopAnalysis>();
287 PA.preserve<ScalarEvolutionAnalysis>();
288 PA.preserve<DominatorTreeAnalysis>();
289 PA.preserve<BranchProbabilityAnalysis>();
290 return PA;
291}
292
293bool HardwareLoopsImpl::run(Function &F) {
294 LLVMContext &Ctx = F.getContext();
295 for (Loop *L : LI)
296 if (L->isOutermost())
297 TryConvertLoop(L, Ctx);
298 return MadeChange;
299}
300
301// Return true if the search should stop, which will be when an inner loop is
302// converted and the parent loop doesn't support containing a hardware loop.
303bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
304 // Process nested loops first.
305 bool AnyChanged = false;
306 for (Loop *SL : *L)
307 AnyChanged |= TryConvertLoop(L: SL, Ctx);
308 if (AnyChanged) {
309 reportHWLoopFailure(Msg: "nested hardware-loops not supported", ORETag: "HWLoopNested",
310 ORE, TheLoop: L);
311 return true; // Stop search.
312 }
313
314 LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
315
316 HardwareLoopInfo HWLoopInfo(L);
317 if (!HWLoopInfo.canAnalyze(LI)) {
318 reportHWLoopFailure(Msg: "cannot analyze loop, irreducible control flow",
319 ORETag: "HWLoopCannotAnalyze", ORE, TheLoop: L);
320 return false;
321 }
322
323 if (!Opts.Force &&
324 !TTI.isHardwareLoopProfitable(L, SE, AC, LibInfo: TLI, HWLoopInfo)) {
325 reportHWLoopFailure(Msg: "it's not profitable to create a hardware-loop",
326 ORETag: "HWLoopNotProfitable", ORE, TheLoop: L);
327 return false;
328 }
329
330 // Allow overriding of the counter width and loop decrement value.
331 if (Opts.Bitwidth.has_value()) {
332 HWLoopInfo.CountType = IntegerType::get(C&: Ctx, NumBits: Opts.Bitwidth.value());
333 }
334
335 if (Opts.Decrement.has_value())
336 HWLoopInfo.LoopDecrement =
337 ConstantInt::get(Ty: HWLoopInfo.CountType, V: Opts.Decrement.value());
338
339 MadeChange |= TryConvertLoop(HWLoopInfo);
340 return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
341}
342
343bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
344
345 Loop *L = HWLoopInfo.L;
346 LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
347
348 if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, ForceNestedLoop: Opts.getForceNested(),
349 ForceHardwareLoopPHI: Opts.getForcePhi())) {
350 // TODO: there can be many reasons a loop is not considered a
351 // candidate, so we should let isHardwareLoopCandidate fill in the
352 // reason and then report a better message here.
353 reportHWLoopFailure(Msg: "loop is not a candidate", ORETag: "HWLoopNoCandidate", ORE, TheLoop: L);
354 return false;
355 }
356
357 assert(
358 (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
359 "Hardware Loop must have set exit info.");
360
361 BasicBlock *Preheader = L->getLoopPreheader();
362
363 // If we don't have a preheader, then insert one.
364 if (!Preheader)
365 Preheader = InsertPreheaderForLoop(L, DT: &DT, LI: &LI, MSSAU: nullptr, PreserveLCSSA);
366 if (!Preheader)
367 return false;
368
369 HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
370 HWLoop.Create();
371 ++NumHWLoops;
372 return true;
373}
374
375void HardwareLoop::Create() {
376 LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
377
378 Value *LoopCountInit = InitLoopCount();
379 if (!LoopCountInit) {
380 reportHWLoopFailure(Msg: "could not safely create a loop count expression",
381 ORETag: "HWLoopNotSafe", ORE, TheLoop: L);
382 return;
383 }
384
385 Value *Setup = InsertIterationSetup(LoopCountInit);
386
387 if (UsePHICounter || Opts.ForcePhi) {
388 Instruction *LoopDec = InsertLoopRegDec(EltsRem: LoopCountInit);
389 Value *EltsRem = InsertPHICounter(NumElts: Setup, EltsRem: LoopDec);
390 LoopDec->setOperand(i: 0, Val: EltsRem);
391 UpdateBranch(EltsRem: LoopDec);
392 } else
393 InsertLoopDec();
394
395 // Run through the basic blocks of the loop and see if any of them have dead
396 // PHIs that can be removed.
397 for (auto *I : L->blocks())
398 DeleteDeadPHIs(BB: I);
399}
400
401static bool CanGenerateTest(Loop *L, Value *Count) {
402 BasicBlock *Preheader = L->getLoopPreheader();
403 if (!Preheader->getSinglePredecessor())
404 return false;
405
406 BasicBlock *Pred = Preheader->getSinglePredecessor();
407 if (!isa<BranchInst>(Val: Pred->getTerminator()))
408 return false;
409
410 auto *BI = cast<BranchInst>(Val: Pred->getTerminator());
411 if (BI->isUnconditional() || !isa<ICmpInst>(Val: BI->getCondition()))
412 return false;
413
414 // Check that the icmp is checking for equality of Count and zero and that
415 // a non-zero value results in entering the loop.
416 auto ICmp = cast<ICmpInst>(Val: BI->getCondition());
417 LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
418 if (!ICmp->isEquality())
419 return false;
420
421 auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
422 if (auto *Const = dyn_cast<ConstantInt>(Val: ICmp->getOperand(i_nocapture: OpIdx)))
423 return Const->isZero() && ICmp->getOperand(i_nocapture: OpIdx ^ 1) == Count;
424 return false;
425 };
426
427 // Check if Count is a zext.
428 Value *CountBefZext =
429 isa<ZExtInst>(Val: Count) ? cast<ZExtInst>(Val: Count)->getOperand(i_nocapture: 0) : nullptr;
430
431 if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
432 !IsCompareZero(ICmp, CountBefZext, 0) &&
433 !IsCompareZero(ICmp, CountBefZext, 1))
434 return false;
435
436 unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
437 if (BI->getSuccessor(i: SuccIdx) != Preheader)
438 return false;
439
440 return true;
441}
442
443Value *HardwareLoop::InitLoopCount() {
444 LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
445 // Can we replace a conditional branch with an intrinsic that sets the
446 // loop counter and tests that is not zero?
447
448 SCEVExpander SCEVE(SE, DL, "loopcnt");
449 if (!ExitCount->getType()->isPointerTy() &&
450 ExitCount->getType() != CountType)
451 ExitCount = SE.getZeroExtendExpr(Op: ExitCount, Ty: CountType);
452
453 ExitCount = SE.getAddExpr(LHS: ExitCount, RHS: SE.getOne(Ty: CountType));
454
455 // If we're trying to use the 'test and set' form of the intrinsic, we need
456 // to replace a conditional branch that is controlling entry to the loop. It
457 // is likely (guaranteed?) that the preheader has an unconditional branch to
458 // the loop header, so also check if it has a single predecessor.
459 if (SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_NE, LHS: ExitCount,
460 RHS: SE.getZero(Ty: ExitCount->getType()))) {
461 LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
462 if (Opts.ForceGuard)
463 UseLoopGuard = true;
464 } else
465 UseLoopGuard = false;
466
467 BasicBlock *BB = L->getLoopPreheader();
468 if (UseLoopGuard && BB->getSinglePredecessor() &&
469 cast<BranchInst>(Val: BB->getTerminator())->isUnconditional()) {
470 BasicBlock *Predecessor = BB->getSinglePredecessor();
471 // If it's not safe to create a while loop then don't force it and create a
472 // do-while loop instead
473 if (!SCEVE.isSafeToExpandAt(S: ExitCount, InsertionPoint: Predecessor->getTerminator()))
474 UseLoopGuard = false;
475 else
476 BB = Predecessor;
477 }
478
479 if (!SCEVE.isSafeToExpandAt(S: ExitCount, InsertionPoint: BB->getTerminator())) {
480 LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
481 << *ExitCount << "\n");
482 return nullptr;
483 }
484
485 Value *Count = SCEVE.expandCodeFor(SH: ExitCount, Ty: CountType,
486 I: BB->getTerminator());
487
488 // FIXME: We've expanded Count where we hope to insert the counter setting
489 // intrinsic. But, in the case of the 'test and set' form, we may fallback to
490 // the just 'set' form and in which case the insertion block is most likely
491 // different. It means there will be instruction(s) in a block that possibly
492 // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
493 // but it's doesn't appear to work in all cases.
494
495 UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
496 BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
497 LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
498 << " - Expanded Count in " << BB->getName() << "\n"
499 << " - Will insert set counter intrinsic into: "
500 << BeginBB->getName() << "\n");
501 return Count;
502}
503
504Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
505 IRBuilder<> Builder(BeginBB->getTerminator());
506 if (BeginBB->getParent()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP))
507 Builder.setIsFPConstrained(true);
508 Type *Ty = LoopCountInit->getType();
509 bool UsePhi = UsePHICounter || Opts.ForcePhi;
510 Intrinsic::ID ID = UseLoopGuard
511 ? (UsePhi ? Intrinsic::test_start_loop_iterations
512 : Intrinsic::test_set_loop_iterations)
513 : (UsePhi ? Intrinsic::start_loop_iterations
514 : Intrinsic::set_loop_iterations);
515 Function *LoopIter = Intrinsic::getDeclaration(M, id: ID, Tys: Ty);
516 Value *LoopSetup = Builder.CreateCall(Callee: LoopIter, Args: LoopCountInit);
517
518 // Use the return value of the intrinsic to control the entry of the loop.
519 if (UseLoopGuard) {
520 assert((isa<BranchInst>(BeginBB->getTerminator()) &&
521 cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
522 "Expected conditional branch");
523
524 Value *SetCount =
525 UsePhi ? Builder.CreateExtractValue(Agg: LoopSetup, Idxs: 1) : LoopSetup;
526 auto *LoopGuard = cast<BranchInst>(Val: BeginBB->getTerminator());
527 LoopGuard->setCondition(SetCount);
528 if (LoopGuard->getSuccessor(i: 0) != L->getLoopPreheader())
529 LoopGuard->swapSuccessors();
530 }
531 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
532 << "\n");
533 if (UsePhi && UseLoopGuard)
534 LoopSetup = Builder.CreateExtractValue(Agg: LoopSetup, Idxs: 0);
535 return !UsePhi ? LoopCountInit : LoopSetup;
536}
537
538void HardwareLoop::InsertLoopDec() {
539 IRBuilder<> CondBuilder(ExitBranch);
540 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
541 Kind: Attribute::StrictFP))
542 CondBuilder.setIsFPConstrained(true);
543
544 Function *DecFunc =
545 Intrinsic::getDeclaration(M, id: Intrinsic::loop_decrement,
546 Tys: LoopDecrement->getType());
547 Value *Ops[] = { LoopDecrement };
548 Value *NewCond = CondBuilder.CreateCall(Callee: DecFunc, Args: Ops);
549 Value *OldCond = ExitBranch->getCondition();
550 ExitBranch->setCondition(NewCond);
551
552 // The false branch must exit the loop.
553 if (!L->contains(BB: ExitBranch->getSuccessor(i: 0)))
554 ExitBranch->swapSuccessors();
555
556 // The old condition may be dead now, and may have even created a dead PHI
557 // (the original induction variable).
558 RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
559
560 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
561}
562
563Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
564 IRBuilder<> CondBuilder(ExitBranch);
565 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
566 Kind: Attribute::StrictFP))
567 CondBuilder.setIsFPConstrained(true);
568
569 Function *DecFunc =
570 Intrinsic::getDeclaration(M, id: Intrinsic::loop_decrement_reg,
571 Tys: { EltsRem->getType() });
572 Value *Ops[] = { EltsRem, LoopDecrement };
573 Value *Call = CondBuilder.CreateCall(Callee: DecFunc, Args: Ops);
574
575 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
576 return cast<Instruction>(Val: Call);
577}
578
579PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
580 BasicBlock *Preheader = L->getLoopPreheader();
581 BasicBlock *Header = L->getHeader();
582 BasicBlock *Latch = ExitBranch->getParent();
583 IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());
584 PHINode *Index = Builder.CreatePHI(Ty: NumElts->getType(), NumReservedValues: 2);
585 Index->addIncoming(V: NumElts, BB: Preheader);
586 Index->addIncoming(V: EltsRem, BB: Latch);
587 LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
588 return Index;
589}
590
591void HardwareLoop::UpdateBranch(Value *EltsRem) {
592 IRBuilder<> CondBuilder(ExitBranch);
593 Value *NewCond =
594 CondBuilder.CreateICmpNE(LHS: EltsRem, RHS: ConstantInt::get(Ty: EltsRem->getType(), V: 0));
595 Value *OldCond = ExitBranch->getCondition();
596 ExitBranch->setCondition(NewCond);
597
598 // The false branch must exit the loop.
599 if (!L->contains(BB: ExitBranch->getSuccessor(i: 0)))
600 ExitBranch->swapSuccessors();
601
602 // The old condition may be dead now, and may have even created a dead PHI
603 // (the original induction variable).
604 RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
605}
606
607INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
608INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
609INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
610INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
611INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
612INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
613
614FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }
615