1//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Insert hardware loop intrinsics into loops which are deemed profitable by
10/// the target, by querying TargetTransformInfo. A hardware loop comprises of
11/// two intrinsics: one, outside the loop, to set the loop iteration count and
12/// another, in the exit block, to decrement the counter. The decremented value
13/// can either be carried through the loop via a phi or handled in some opaque
14/// way by the target.
15///
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/HardwareLoops.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/Analysis/AssumptionCache.h"
21#include "llvm/Analysis/BranchProbabilityInfo.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/OptimizationRemarkEmitter.h"
24#include "llvm/Analysis/ScalarEvolution.h"
25#include "llvm/Analysis/TargetLibraryInfo.h"
26#include "llvm/Analysis/TargetTransformInfo.h"
27#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constants.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/Instructions.h"
33#include "llvm/IR/Value.h"
34#include "llvm/InitializePasses.h"
35#include "llvm/Pass.h"
36#include "llvm/PassRegistry.h"
37#include "llvm/Support/CommandLine.h"
38#include "llvm/Support/Debug.h"
39#include "llvm/Transforms/Utils.h"
40#include "llvm/Transforms/Utils/BasicBlockUtils.h"
41#include "llvm/Transforms/Utils/Local.h"
42#include "llvm/Transforms/Utils/LoopUtils.h"
43#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
44
45#define DEBUG_TYPE "hardware-loops"
46
47#define HW_LOOPS_NAME "Hardware Loop Insertion"
48
49using namespace llvm;
50
51static cl::opt<bool>
52ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(Val: false),
53 cl::desc("Force hardware loops intrinsics to be inserted"));
54
55static cl::opt<bool>
56ForceHardwareLoopPHI(
57 "force-hardware-loop-phi", cl::Hidden, cl::init(Val: false),
58 cl::desc("Force hardware loop counter to be updated through a phi"));
59
60static cl::opt<bool>
61ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(Val: false),
62 cl::desc("Force allowance of nested hardware loops"));
63
64static cl::opt<unsigned>
65LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(Val: 1),
66 cl::desc("Set the loop decrement value"));
67
68static cl::opt<unsigned>
69CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(Val: 32),
70 cl::desc("Set the loop counter bitwidth"));
71
72static cl::opt<bool>
73ForceGuardLoopEntry(
74 "force-hardware-loop-guard", cl::Hidden, cl::init(Val: false),
75 cl::desc("Force generation of loop guard intrinsic"));
76
77STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
78
79#ifndef NDEBUG
80static void debugHWLoopFailure(const StringRef DebugMsg,
81 Instruction *I) {
82 dbgs() << "HWLoops: " << DebugMsg;
83 if (I)
84 dbgs() << ' ' << *I;
85 else
86 dbgs() << '.';
87 dbgs() << '\n';
88}
89#endif
90
91static OptimizationRemarkAnalysis
92createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
93 BasicBlock *CodeRegion = L->getHeader();
94 DebugLoc DL = L->getStartLoc();
95
96 if (I) {
97 CodeRegion = I->getParent();
98 // If there is no debug location attached to the instruction, revert back to
99 // using the loop's.
100 if (I->getDebugLoc())
101 DL = I->getDebugLoc();
102 }
103
104 OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
105 R << "hardware-loop not created: ";
106 return R;
107}
108
109namespace {
110
111 void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
112 OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
113 LLVM_DEBUG(debugHWLoopFailure(Msg, I));
114 ORE->emit(OptDiag: createHWLoopAnalysis(RemarkName: ORETag, L: TheLoop, I) << Msg);
115 }
116
117 using TTI = TargetTransformInfo;
118
119 class HardwareLoopsLegacy : public FunctionPass {
120 public:
121 static char ID;
122
123 HardwareLoopsLegacy() : FunctionPass(ID) {}
124
125 bool runOnFunction(Function &F) override;
126
127 void getAnalysisUsage(AnalysisUsage &AU) const override {
128 AU.addRequired<LoopInfoWrapperPass>();
129 AU.addPreserved<LoopInfoWrapperPass>();
130 AU.addRequired<DominatorTreeWrapperPass>();
131 AU.addPreserved<DominatorTreeWrapperPass>();
132 AU.addRequired<ScalarEvolutionWrapperPass>();
133 AU.addPreserved<ScalarEvolutionWrapperPass>();
134 AU.addRequired<AssumptionCacheTracker>();
135 AU.addRequired<TargetTransformInfoWrapperPass>();
136 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
137 AU.addPreserved<BranchProbabilityInfoWrapperPass>();
138 }
139 };
140
141 class HardwareLoopsImpl {
142 public:
143 HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
144 DominatorTree &DT, const TargetTransformInfo &TTI,
145 TargetLibraryInfo *TLI, AssumptionCache &AC,
146 OptimizationRemarkEmitter *ORE, HardwareLoopOptions &Opts)
147 : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), TTI(TTI),
148 TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) {}
149
150 bool run(Function &F);
151
152 private:
153 // Try to convert the given Loop into a hardware loop.
154 bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
155
156 // Given that the target believes the loop to be profitable, try to
157 // convert it.
158 bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
159
160 ScalarEvolution &SE;
161 LoopInfo &LI;
162 bool PreserveLCSSA;
163 DominatorTree &DT;
164 const TargetTransformInfo &TTI;
165 TargetLibraryInfo *TLI = nullptr;
166 AssumptionCache &AC;
167 OptimizationRemarkEmitter *ORE;
168 HardwareLoopOptions &Opts;
169 bool MadeChange = false;
170 };
171
172 class HardwareLoop {
173 // Expand the trip count scev into a value that we can use.
174 Value *InitLoopCount();
175
176 // Insert the set_loop_iteration intrinsic.
177 Value *InsertIterationSetup(Value *LoopCountInit);
178
179 // Insert the loop_decrement intrinsic.
180 void InsertLoopDec();
181
182 // Insert the loop_decrement_reg intrinsic.
183 Instruction *InsertLoopRegDec(Value *EltsRem);
184
185 // If the target requires the counter value to be updated in the loop,
186 // insert a phi to hold the value. The intended purpose is for use by
187 // loop_decrement_reg.
188 PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
189
190 // Create a new cmp, that checks the returned value of loop_decrement*,
191 // and update the exit branch to use it.
192 void UpdateBranch(Value *EltsRem);
193
194 public:
195 HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
196 OptimizationRemarkEmitter *ORE, HardwareLoopOptions &Opts)
197 : SE(SE), ORE(ORE), Opts(Opts), L(Info.L),
198 M(L->getHeader()->getModule()), ExitCount(Info.ExitCount),
199 CountType(Info.CountType), ExitBranch(Info.ExitBranch),
200 LoopDecrement(Info.LoopDecrement), UsePHICounter(Info.CounterInReg),
201 UseLoopGuard(Info.PerformEntryTest) {}
202
203 void Create();
204
205 private:
206 ScalarEvolution &SE;
207 OptimizationRemarkEmitter *ORE = nullptr;
208 HardwareLoopOptions &Opts;
209 Loop *L = nullptr;
210 Module *M = nullptr;
211 const SCEV *ExitCount = nullptr;
212 Type *CountType = nullptr;
213 BranchInst *ExitBranch = nullptr;
214 Value *LoopDecrement = nullptr;
215 bool UsePHICounter = false;
216 bool UseLoopGuard = false;
217 BasicBlock *BeginBB = nullptr;
218 };
219}
220
221char HardwareLoopsLegacy::ID = 0;
222
223bool HardwareLoopsLegacy::runOnFunction(Function &F) {
224 if (skipFunction(F))
225 return false;
226
227 LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
228
229 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
230 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
231 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
232 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
233 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
234 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
235 auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
236 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
237 bool PreserveLCSSA = mustPreserveAnalysisID(AID&: LCSSAID);
238
239 HardwareLoopOptions Opts;
240 if (ForceHardwareLoops.getNumOccurrences())
241 Opts.setForce(ForceHardwareLoops);
242 if (ForceHardwareLoopPHI.getNumOccurrences())
243 Opts.setForcePhi(ForceHardwareLoopPHI);
244 if (ForceNestedLoop.getNumOccurrences())
245 Opts.setForceNested(ForceNestedLoop);
246 if (ForceGuardLoopEntry.getNumOccurrences())
247 Opts.setForceGuard(ForceGuardLoopEntry);
248 if (LoopDecrement.getNumOccurrences())
249 Opts.setDecrement(LoopDecrement);
250 if (CounterBitWidth.getNumOccurrences())
251 Opts.setCounterBitwidth(CounterBitWidth);
252
253 HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, TTI, TLI, AC, ORE, Opts);
254 return Impl.run(F);
255}
256
257PreservedAnalyses HardwareLoopsPass::run(Function &F,
258 FunctionAnalysisManager &AM) {
259 auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
260 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(IR&: F);
261 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
262 auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
263 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(IR&: F);
264 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
265 auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
266
267 HardwareLoopsImpl Impl(SE, LI, true, DT, TTI, TLI, AC, ORE, Opts);
268 bool Changed = Impl.run(F);
269 if (!Changed)
270 return PreservedAnalyses::all();
271
272 PreservedAnalyses PA;
273 PA.preserve<LoopAnalysis>();
274 PA.preserve<ScalarEvolutionAnalysis>();
275 PA.preserve<DominatorTreeAnalysis>();
276 PA.preserve<BranchProbabilityAnalysis>();
277 return PA;
278}
279
280bool HardwareLoopsImpl::run(Function &F) {
281 LLVMContext &Ctx = F.getContext();
282 for (Loop *L : LI)
283 if (L->isOutermost())
284 TryConvertLoop(L, Ctx);
285 return MadeChange;
286}
287
288// Return true if the search should stop, which will be when an inner loop is
289// converted and the parent loop doesn't support containing a hardware loop.
290bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
291 // Process nested loops first.
292 bool AnyChanged = false;
293 for (Loop *SL : *L)
294 AnyChanged |= TryConvertLoop(L: SL, Ctx);
295 if (AnyChanged) {
296 reportHWLoopFailure(Msg: "nested hardware-loops not supported", ORETag: "HWLoopNested",
297 ORE, TheLoop: L);
298 return true; // Stop search.
299 }
300
301 LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
302
303 HardwareLoopInfo HWLoopInfo(L);
304 if (!HWLoopInfo.canAnalyze(LI)) {
305 reportHWLoopFailure(Msg: "cannot analyze loop, irreducible control flow",
306 ORETag: "HWLoopCannotAnalyze", ORE, TheLoop: L);
307 return false;
308 }
309
310 if (!Opts.Force &&
311 !TTI.isHardwareLoopProfitable(L, SE, AC, LibInfo: TLI, HWLoopInfo)) {
312 reportHWLoopFailure(Msg: "it's not profitable to create a hardware-loop",
313 ORETag: "HWLoopNotProfitable", ORE, TheLoop: L);
314 return false;
315 }
316
317 // Allow overriding of the counter width and loop decrement value.
318 if (Opts.Bitwidth.has_value()) {
319 HWLoopInfo.CountType = IntegerType::get(C&: Ctx, NumBits: Opts.Bitwidth.value());
320 }
321
322 if (Opts.Decrement.has_value())
323 HWLoopInfo.LoopDecrement =
324 ConstantInt::get(Ty: HWLoopInfo.CountType, V: Opts.Decrement.value());
325
326 MadeChange |= TryConvertLoop(HWLoopInfo);
327 return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
328}
329
330bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
331
332 Loop *L = HWLoopInfo.L;
333 LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
334
335 if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, ForceNestedLoop: Opts.getForceNested(),
336 ForceHardwareLoopPHI: Opts.getForcePhi())) {
337 // TODO: there can be many reasons a loop is not considered a
338 // candidate, so we should let isHardwareLoopCandidate fill in the
339 // reason and then report a better message here.
340 reportHWLoopFailure(Msg: "loop is not a candidate", ORETag: "HWLoopNoCandidate", ORE, TheLoop: L);
341 return false;
342 }
343
344 assert(
345 (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
346 "Hardware Loop must have set exit info.");
347
348 BasicBlock *Preheader = L->getLoopPreheader();
349
350 // If we don't have a preheader, then insert one.
351 if (!Preheader)
352 Preheader = InsertPreheaderForLoop(L, DT: &DT, LI: &LI, MSSAU: nullptr, PreserveLCSSA);
353 if (!Preheader)
354 return false;
355
356 HardwareLoop HWLoop(HWLoopInfo, SE, ORE, Opts);
357 HWLoop.Create();
358 ++NumHWLoops;
359 return true;
360}
361
362void HardwareLoop::Create() {
363 LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
364
365 Value *LoopCountInit = InitLoopCount();
366 if (!LoopCountInit) {
367 reportHWLoopFailure(Msg: "could not safely create a loop count expression",
368 ORETag: "HWLoopNotSafe", ORE, TheLoop: L);
369 return;
370 }
371
372 Value *Setup = InsertIterationSetup(LoopCountInit);
373
374 if (UsePHICounter || Opts.ForcePhi) {
375 Instruction *LoopDec = InsertLoopRegDec(EltsRem: LoopCountInit);
376 Value *EltsRem = InsertPHICounter(NumElts: Setup, EltsRem: LoopDec);
377 LoopDec->setOperand(i: 0, Val: EltsRem);
378 UpdateBranch(EltsRem: LoopDec);
379 } else
380 InsertLoopDec();
381
382 // Run through the basic blocks of the loop and see if any of them have dead
383 // PHIs that can be removed.
384 for (auto *I : L->blocks())
385 DeleteDeadPHIs(BB: I);
386}
387
388static bool CanGenerateTest(Loop *L, Value *Count) {
389 BasicBlock *Preheader = L->getLoopPreheader();
390 if (!Preheader->getSinglePredecessor())
391 return false;
392
393 BasicBlock *Pred = Preheader->getSinglePredecessor();
394 if (!isa<BranchInst>(Val: Pred->getTerminator()))
395 return false;
396
397 auto *BI = cast<BranchInst>(Val: Pred->getTerminator());
398 if (BI->isUnconditional() || !isa<ICmpInst>(Val: BI->getCondition()))
399 return false;
400
401 // Check that the icmp is checking for equality of Count and zero and that
402 // a non-zero value results in entering the loop.
403 auto ICmp = cast<ICmpInst>(Val: BI->getCondition());
404 LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
405 if (!ICmp->isEquality())
406 return false;
407
408 auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
409 if (auto *Const = dyn_cast<ConstantInt>(Val: ICmp->getOperand(i_nocapture: OpIdx)))
410 return Const->isZero() && ICmp->getOperand(i_nocapture: OpIdx ^ 1) == Count;
411 return false;
412 };
413
414 // Check if Count is a zext.
415 Value *CountBefZext =
416 isa<ZExtInst>(Val: Count) ? cast<ZExtInst>(Val: Count)->getOperand(i_nocapture: 0) : nullptr;
417
418 if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
419 !IsCompareZero(ICmp, CountBefZext, 0) &&
420 !IsCompareZero(ICmp, CountBefZext, 1))
421 return false;
422
423 unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
424 if (BI->getSuccessor(i: SuccIdx) != Preheader)
425 return false;
426
427 return true;
428}
429
430Value *HardwareLoop::InitLoopCount() {
431 LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
432 // Can we replace a conditional branch with an intrinsic that sets the
433 // loop counter and tests that is not zero?
434
435 SCEVExpander SCEVE(SE, "loopcnt");
436 if (!ExitCount->getType()->isPointerTy() &&
437 ExitCount->getType() != CountType)
438 ExitCount = SE.getZeroExtendExpr(Op: ExitCount, Ty: CountType);
439
440 ExitCount = SE.getAddExpr(LHS: ExitCount, RHS: SE.getOne(Ty: CountType));
441
442 // If we're trying to use the 'test and set' form of the intrinsic, we need
443 // to replace a conditional branch that is controlling entry to the loop. It
444 // is likely (guaranteed?) that the preheader has an unconditional branch to
445 // the loop header, so also check if it has a single predecessor.
446 if (SE.isLoopEntryGuardedByCond(L, Pred: ICmpInst::ICMP_NE, LHS: ExitCount,
447 RHS: SE.getZero(Ty: ExitCount->getType()))) {
448 LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
449 if (Opts.ForceGuard)
450 UseLoopGuard = true;
451 } else
452 UseLoopGuard = false;
453
454 BasicBlock *BB = L->getLoopPreheader();
455 if (UseLoopGuard && BB->getSinglePredecessor() &&
456 cast<BranchInst>(Val: BB->getTerminator())->isUnconditional()) {
457 BasicBlock *Predecessor = BB->getSinglePredecessor();
458 // If it's not safe to create a while loop then don't force it and create a
459 // do-while loop instead
460 if (!SCEVE.isSafeToExpandAt(S: ExitCount, InsertionPoint: Predecessor->getTerminator()))
461 UseLoopGuard = false;
462 else
463 BB = Predecessor;
464 }
465
466 if (!SCEVE.isSafeToExpandAt(S: ExitCount, InsertionPoint: BB->getTerminator())) {
467 LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
468 << *ExitCount << "\n");
469 return nullptr;
470 }
471
472 Value *Count = SCEVE.expandCodeFor(SH: ExitCount, Ty: CountType,
473 I: BB->getTerminator());
474
475 // FIXME: We've expanded Count where we hope to insert the counter setting
476 // intrinsic. But, in the case of the 'test and set' form, we may fallback to
477 // the just 'set' form and in which case the insertion block is most likely
478 // different. It means there will be instruction(s) in a block that possibly
479 // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
480 // but it's doesn't appear to work in all cases.
481
482 UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
483 BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
484 LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
485 << " - Expanded Count in " << BB->getName() << "\n"
486 << " - Will insert set counter intrinsic into: "
487 << BeginBB->getName() << "\n");
488 return Count;
489}
490
491Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
492 IRBuilder<> Builder(BeginBB->getTerminator());
493 if (BeginBB->getParent()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP))
494 Builder.setIsFPConstrained(true);
495 Type *Ty = LoopCountInit->getType();
496 bool UsePhi = UsePHICounter || Opts.ForcePhi;
497 Intrinsic::ID ID = UseLoopGuard
498 ? (UsePhi ? Intrinsic::test_start_loop_iterations
499 : Intrinsic::test_set_loop_iterations)
500 : (UsePhi ? Intrinsic::start_loop_iterations
501 : Intrinsic::set_loop_iterations);
502 Value *LoopSetup = Builder.CreateIntrinsic(ID, Types: Ty, Args: LoopCountInit);
503
504 // Use the return value of the intrinsic to control the entry of the loop.
505 if (UseLoopGuard) {
506 assert((isa<BranchInst>(BeginBB->getTerminator()) &&
507 cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
508 "Expected conditional branch");
509
510 Value *SetCount =
511 UsePhi ? Builder.CreateExtractValue(Agg: LoopSetup, Idxs: 1) : LoopSetup;
512 auto *LoopGuard = cast<BranchInst>(Val: BeginBB->getTerminator());
513 LoopGuard->setCondition(SetCount);
514 if (LoopGuard->getSuccessor(i: 0) != L->getLoopPreheader())
515 LoopGuard->swapSuccessors();
516 }
517 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup
518 << "\n");
519 if (UsePhi && UseLoopGuard)
520 LoopSetup = Builder.CreateExtractValue(Agg: LoopSetup, Idxs: 0);
521 return !UsePhi ? LoopCountInit : LoopSetup;
522}
523
524void HardwareLoop::InsertLoopDec() {
525 IRBuilder<> CondBuilder(ExitBranch);
526 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
527 Kind: Attribute::StrictFP))
528 CondBuilder.setIsFPConstrained(true);
529
530 Value *Ops[] = { LoopDecrement };
531 Value *NewCond = CondBuilder.CreateIntrinsic(ID: Intrinsic::loop_decrement,
532 Types: LoopDecrement->getType(), Args: Ops);
533 Value *OldCond = ExitBranch->getCondition();
534 ExitBranch->setCondition(NewCond);
535
536 // The false branch must exit the loop.
537 if (!L->contains(BB: ExitBranch->getSuccessor(i: 0)))
538 ExitBranch->swapSuccessors();
539
540 // The old condition may be dead now, and may have even created a dead PHI
541 // (the original induction variable).
542 RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
543
544 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
545}
546
547Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
548 IRBuilder<> CondBuilder(ExitBranch);
549 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr(
550 Kind: Attribute::StrictFP))
551 CondBuilder.setIsFPConstrained(true);
552
553 Value *Ops[] = { EltsRem, LoopDecrement };
554 Value *Call = CondBuilder.CreateIntrinsic(ID: Intrinsic::loop_decrement_reg,
555 Types: {EltsRem->getType()}, Args: Ops);
556
557 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
558 return cast<Instruction>(Val: Call);
559}
560
561PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
562 BasicBlock *Preheader = L->getLoopPreheader();
563 BasicBlock *Header = L->getHeader();
564 BasicBlock *Latch = ExitBranch->getParent();
565 IRBuilder<> Builder(Header, Header->getFirstNonPHIIt());
566 PHINode *Index = Builder.CreatePHI(Ty: NumElts->getType(), NumReservedValues: 2);
567 Index->addIncoming(V: NumElts, BB: Preheader);
568 Index->addIncoming(V: EltsRem, BB: Latch);
569 LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
570 return Index;
571}
572
573void HardwareLoop::UpdateBranch(Value *EltsRem) {
574 IRBuilder<> CondBuilder(ExitBranch);
575 Value *NewCond =
576 CondBuilder.CreateICmpNE(LHS: EltsRem, RHS: ConstantInt::get(Ty: EltsRem->getType(), V: 0));
577 Value *OldCond = ExitBranch->getCondition();
578 ExitBranch->setCondition(NewCond);
579
580 // The false branch must exit the loop.
581 if (!L->contains(BB: ExitBranch->getSuccessor(i: 0)))
582 ExitBranch->swapSuccessors();
583
584 // The old condition may be dead now, and may have even created a dead PHI
585 // (the original induction variable).
586 RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
587}
588
589INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
590INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
591INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
592INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
593INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
594INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
595
596FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }
597