1//===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs loop invariant code motion on machine instructions. We
10// attempt to remove as much code from the body of a loop as possible.
11//
12// This pass is not intended to be a replacement or a complete alternative
13// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
14// constructs that are not exposed before lowering and instruction selection.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/MachineLICM.h"
19#include "llvm/ADT/BitVector.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/AliasAnalysis.h"
25#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
27#include "llvm/CodeGen/MachineDomTreeUpdater.h"
28#include "llvm/CodeGen/MachineDominators.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineFunction.h"
31#include "llvm/CodeGen/MachineFunctionPass.h"
32#include "llvm/CodeGen/MachineInstr.h"
33#include "llvm/CodeGen/MachineLoopInfo.h"
34#include "llvm/CodeGen/MachineMemOperand.h"
35#include "llvm/CodeGen/MachineOperand.h"
36#include "llvm/CodeGen/MachineRegisterInfo.h"
37#include "llvm/CodeGen/PseudoSourceValue.h"
38#include "llvm/CodeGen/TargetInstrInfo.h"
39#include "llvm/CodeGen/TargetLowering.h"
40#include "llvm/CodeGen/TargetRegisterInfo.h"
41#include "llvm/CodeGen/TargetSchedule.h"
42#include "llvm/CodeGen/TargetSubtargetInfo.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/InitializePasses.h"
45#include "llvm/MC/MCInstrDesc.h"
46#include "llvm/MC/MCRegister.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/Debug.h"
51#include "llvm/Support/raw_ostream.h"
52#include <cassert>
53#include <limits>
54#include <vector>
55
56using namespace llvm;
57
58#define DEBUG_TYPE "machinelicm"
59
60static cl::opt<bool>
61AvoidSpeculation("avoid-speculation",
62 cl::desc("MachineLICM should avoid speculation"),
63 cl::init(Val: true), cl::Hidden);
64
65static cl::opt<bool>
66HoistCheapInsts("hoist-cheap-insts",
67 cl::desc("MachineLICM should hoist even cheap instructions"),
68 cl::init(Val: false), cl::Hidden);
69
70static cl::opt<bool>
71HoistConstStores("hoist-const-stores",
72 cl::desc("Hoist invariant stores"),
73 cl::init(Val: true), cl::Hidden);
74
75static cl::opt<bool> HoistConstLoads("hoist-const-loads",
76 cl::desc("Hoist invariant loads"),
77 cl::init(Val: true), cl::Hidden);
78
79// The default threshold of 100 (i.e. if target block is 100 times hotter)
80// is based on empirical data on a single target and is subject to tuning.
81static cl::opt<unsigned>
82BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
83 cl::desc("Do not hoist instructions if target"
84 "block is N times hotter than the source."),
85 cl::init(Val: 100), cl::Hidden);
86
87enum class UseBFI { None, PGO, All };
88
89static cl::opt<UseBFI>
90DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
91 cl::desc("Disable hoisting instructions to"
92 " hotter blocks"),
93 cl::init(Val: UseBFI::PGO), cl::Hidden,
94 cl::values(clEnumValN(UseBFI::None, "none",
95 "disable the feature"),
96 clEnumValN(UseBFI::PGO, "pgo",
97 "enable the feature when using profile data"),
98 clEnumValN(UseBFI::All, "all",
99 "enable the feature with/wo profile data")));
100
101STATISTIC(NumHoisted,
102 "Number of machine instructions hoisted out of loops");
103STATISTIC(NumLowRP,
104 "Number of instructions hoisted in low reg pressure situation");
105STATISTIC(NumHighLatency,
106 "Number of high latency instructions hoisted");
107STATISTIC(NumCSEed,
108 "Number of hoisted machine instructions CSEed");
109STATISTIC(NumPostRAHoisted,
110 "Number of machine instructions hoisted out of loops post regalloc");
111STATISTIC(NumStoreConst,
112 "Number of stores of const phys reg hoisted out of loops");
113STATISTIC(NumNotHoistedDueToHotness,
114 "Number of instructions not hoisted due to block frequency");
115
116namespace {
117 enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
118
119 class MachineLICMImpl {
120 const TargetInstrInfo *TII = nullptr;
121 const TargetLoweringBase *TLI = nullptr;
122 const TargetRegisterInfo *TRI = nullptr;
123 const MachineFrameInfo *MFI = nullptr;
124 MachineRegisterInfo *MRI = nullptr;
125 TargetSchedModel SchedModel;
126 bool PreRegAlloc = false;
127 bool HasProfileData = false;
128 Pass *LegacyPass;
129 MachineFunctionAnalysisManager *MFAM;
130
131 // Various analyses that we use...
132 AliasAnalysis *AA = nullptr; // Alias analysis info.
133 MachineBlockFrequencyInfo *MBFI = nullptr; // Machine block frequncy info
134 MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo
135 MachineDomTreeUpdater *MDTU = nullptr; // Wraps current dominator tree
136
137 // State that is updated as we process loops
138 bool Changed = false; // True if a loop is changed.
139 bool FirstInLoop = false; // True if it's the first LICM in the loop.
140
141 // Holds information about whether it is allowed to move load instructions
142 // out of the loop
143 SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
144
145 // Exit blocks of each Loop.
146 DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;
147
148 bool isExitBlock(MachineLoop *CurLoop, const MachineBasicBlock *MBB) {
149 auto [It, Inserted] = ExitBlockMap.try_emplace(Key: CurLoop);
150 if (Inserted) {
151 SmallVector<MachineBasicBlock *, 8> ExitBlocks;
152 CurLoop->getExitBlocks(ExitBlocks);
153 It->second = std::move(ExitBlocks);
154 }
155 return is_contained(Range&: It->second, Element: MBB);
156 }
157
158 // Track 'estimated' register pressure.
159 SmallDenseSet<Register> RegSeen;
160 SmallVector<unsigned, 8> RegPressure;
161
162 // Register pressure "limit" per register pressure set. If the pressure
163 // is higher than the limit, then it's considered high.
164 SmallVector<unsigned, 8> RegLimit;
165
166 // Register pressure on path leading from loop preheader to current BB.
167 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
168
169 // For each opcode per preheader, keep a list of potential CSE instructions.
170 DenseMap<MachineBasicBlock *,
171 DenseMap<unsigned, std::vector<MachineInstr *>>>
172 CSEMap;
173
174 enum {
175 SpeculateFalse = 0,
176 SpeculateTrue = 1,
177 SpeculateUnknown = 2
178 };
179
180 // If a MBB does not dominate loop exiting blocks then it may not safe
181 // to hoist loads from this block.
182 // Tri-state: 0 - false, 1 - true, 2 - unknown
183 unsigned SpeculationState = SpeculateUnknown;
184
185 public:
186 MachineLICMImpl(bool PreRegAlloc, Pass *LegacyPass,
187 MachineFunctionAnalysisManager *MFAM)
188 : PreRegAlloc(PreRegAlloc), LegacyPass(LegacyPass), MFAM(MFAM) {
189 assert((LegacyPass || MFAM) && "LegacyPass or MFAM must be provided");
190 assert(!(LegacyPass && MFAM) &&
191 "LegacyPass and MFAM cannot be provided at the same time");
192 }
193
194 bool run(MachineFunction &MF);
195
196 void releaseMemory() {
197 RegSeen.clear();
198 RegPressure.clear();
199 RegLimit.clear();
200 BackTrace.clear();
201 CSEMap.clear();
202 ExitBlockMap.clear();
203 }
204
205 private:
206 /// Keep track of information about hoisting candidates.
207 struct CandidateInfo {
208 MachineInstr *MI;
209 Register Def;
210 int FI;
211
212 CandidateInfo(MachineInstr *mi, Register def, int fi)
213 : MI(mi), Def(def), FI(fi) {}
214 };
215
216 void HoistRegionPostRA(MachineLoop *CurLoop);
217
218 void HoistPostRA(MachineInstr *MI, Register Def, MachineLoop *CurLoop);
219
220 void ProcessMI(MachineInstr *MI, BitVector &RUDefs, BitVector &RUClobbers,
221 SmallDenseSet<int> &StoredFIs,
222 SmallVectorImpl<CandidateInfo> &Candidates,
223 MachineLoop *CurLoop);
224
225 void AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop);
226
227 bool IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop);
228
229 bool IsLoopInvariantInst(MachineInstr &I, MachineLoop *CurLoop);
230
231 bool HasLoopPHIUse(const MachineInstr *MI, MachineLoop *CurLoop);
232
233 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, Register Reg,
234 MachineLoop *CurLoop) const;
235
236 bool IsCheapInstruction(MachineInstr &MI) const;
237
238 bool CanCauseHighRegPressure(const SmallDenseMap<unsigned, int> &Cost,
239 bool Cheap);
240
241 void UpdateBackTraceRegPressure(const MachineInstr *MI);
242
243 bool IsProfitableToHoist(MachineInstr &MI, MachineLoop *CurLoop);
244
245 bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);
246
247 void EnterScope(MachineBasicBlock *MBB);
248
249 void ExitScope(MachineBasicBlock *MBB);
250
251 void ExitScopeIfDone(
252 MachineDomTreeNode *Node,
253 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
254 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
255
256 void HoistOutOfLoop(MachineDomTreeNode *HeaderN, MachineLoop *CurLoop);
257
258 void InitRegPressure(MachineBasicBlock *BB);
259
260 SmallDenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
261 bool ConsiderSeen,
262 bool ConsiderUnseenAsDef);
263
264 void UpdateRegPressure(const MachineInstr *MI,
265 bool ConsiderUnseenAsDef = false);
266
267 MachineInstr *ExtractHoistableLoad(MachineInstr *MI, MachineLoop *CurLoop);
268
269 MachineInstr *LookForDuplicate(const MachineInstr *MI,
270 std::vector<MachineInstr *> &PrevMIs);
271
272 bool
273 EliminateCSE(MachineInstr *MI,
274 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);
275
276 bool MayCSE(MachineInstr *MI);
277
278 unsigned Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
279 MachineLoop *CurLoop);
280
281 void InitCSEMap(MachineBasicBlock *BB);
282
283 void InitializeLoadsHoistableLoops();
284
285 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
286 MachineBasicBlock *TgtBlock);
287 MachineBasicBlock *getOrCreatePreheader(MachineLoop *CurLoop);
288 };
289
290 class MachineLICMBase : public MachineFunctionPass {
291 bool PreRegAlloc;
292
293 public:
294 MachineLICMBase(char &ID, bool PreRegAlloc)
295 : MachineFunctionPass(ID), PreRegAlloc(PreRegAlloc) {}
296
297 bool runOnMachineFunction(MachineFunction &MF) override;
298
299 void getAnalysisUsage(AnalysisUsage &AU) const override {
300 AU.addRequired<MachineLoopInfoWrapperPass>();
301 if (DisableHoistingToHotterBlocks != UseBFI::None)
302 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
303 AU.addRequired<MachineDominatorTreeWrapperPass>();
304 AU.addRequired<AAResultsWrapperPass>();
305 AU.addPreserved<MachineLoopInfoWrapperPass>();
306 MachineFunctionPass::getAnalysisUsage(AU);
307 }
308 };
309
310 class MachineLICM : public MachineLICMBase {
311 public:
312 static char ID;
313 MachineLICM() : MachineLICMBase(ID, false) {}
314 };
315
316 class EarlyMachineLICM : public MachineLICMBase {
317 public:
318 static char ID;
319 EarlyMachineLICM() : MachineLICMBase(ID, true) {}
320 };
321
322} // end anonymous namespace
323
324char MachineLICM::ID;
325char EarlyMachineLICM::ID;
326
327char &llvm::MachineLICMID = MachineLICM::ID;
328char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
329
330INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
331 "Machine Loop Invariant Code Motion", false, false)
332INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
333INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
334INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
335INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
336INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
337 "Machine Loop Invariant Code Motion", false, false)
338
339INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
340 "Early Machine Loop Invariant Code Motion", false, false)
341INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
342INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
343INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
344INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
345INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
346 "Early Machine Loop Invariant Code Motion", false, false)
347
348bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
349 if (skipFunction(F: MF.getFunction()))
350 return false;
351
352 MachineLICMImpl Impl(PreRegAlloc, this, nullptr);
353 return Impl.run(MF);
354}
355
356#define GET_RESULT(RESULT, GETTER, INFIX) \
357 ((LegacyPass) \
358 ? &LegacyPass->getAnalysis<RESULT##INFIX##WrapperPass>().GETTER() \
359 : &MFAM->getResult<RESULT##Analysis>(MF))
360
361bool MachineLICMImpl::run(MachineFunction &MF) {
362 AA = MFAM != nullptr
363 ? &MFAM->getResult<FunctionAnalysisManagerMachineFunctionProxy>(IR&: MF)
364 .getManager()
365 .getResult<AAManager>(IR&: MF.getFunction())
366 : &LegacyPass->getAnalysis<AAResultsWrapperPass>().getAAResults();
367 MachineDomTreeUpdater DTU(GET_RESULT(MachineDominatorTree, getDomTree, ),
368 MachineDomTreeUpdater::UpdateStrategy::Lazy);
369 MDTU = &DTU;
370 MLI = GET_RESULT(MachineLoop, getLI, Info);
371 MBFI = DisableHoistingToHotterBlocks != UseBFI::None
372 ? GET_RESULT(MachineBlockFrequency, getMBFI, Info)
373 : nullptr;
374
375 Changed = FirstInLoop = false;
376 const TargetSubtargetInfo &ST = MF.getSubtarget();
377 TII = ST.getInstrInfo();
378 TLI = ST.getTargetLowering();
379 TRI = ST.getRegisterInfo();
380 MFI = &MF.getFrameInfo();
381 MRI = &MF.getRegInfo();
382 SchedModel.init(TSInfo: &ST);
383
384 HasProfileData = MF.getFunction().hasProfileData();
385
386 if (PreRegAlloc)
387 LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
388 else
389 LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
390 LLVM_DEBUG(dbgs() << MF.getName() << " ********\n");
391
392 if (PreRegAlloc) {
393 // Estimate register pressure during pre-regalloc pass.
394 unsigned NumRPS = TRI->getNumRegPressureSets();
395 RegPressure.resize(N: NumRPS);
396 llvm::fill(Range&: RegPressure, Value: 0);
397 RegLimit.resize(N: NumRPS);
398 for (unsigned i = 0, e = NumRPS; i != e; ++i)
399 RegLimit[i] = TRI->getRegPressureSetLimit(MF, Idx: i);
400 }
401
402 if (HoistConstLoads)
403 InitializeLoadsHoistableLoops();
404
405 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
406 while (!Worklist.empty()) {
407 MachineLoop *CurLoop = Worklist.pop_back_val();
408
409 if (!PreRegAlloc) {
410 HoistRegionPostRA(CurLoop);
411 } else {
412 // CSEMap is initialized for loop header when the first instruction is
413 // being hoisted.
414 MachineDomTreeNode *N = MDTU->getDomTree().getNode(BB: CurLoop->getHeader());
415 FirstInLoop = true;
416 HoistOutOfLoop(HeaderN: N, CurLoop);
417 CSEMap.clear();
418 }
419 }
420 releaseMemory();
421 return Changed;
422}
423
424/// Return true if instruction stores to the specified frame.
425static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
426 // Check mayStore before memory operands so that e.g. DBG_VALUEs will return
427 // true since they have no memory operands.
428 if (!MI->mayStore())
429 return false;
430 // If we lost memory operands, conservatively assume that the instruction
431 // writes to all slots.
432 if (MI->memoperands_empty())
433 return true;
434 for (const MachineMemOperand *MemOp : MI->memoperands()) {
435 if (!MemOp->isStore() || !MemOp->getPseudoValue())
436 continue;
437 if (const FixedStackPseudoSourceValue *Value =
438 dyn_cast<FixedStackPseudoSourceValue>(Val: MemOp->getPseudoValue())) {
439 if (Value->getFrameIndex() == FI)
440 return true;
441 }
442 }
443 return false;
444}
445
446static void applyBitsNotInRegMaskToRegUnitsMask(const TargetRegisterInfo &TRI,
447 BitVector &RUs,
448 const uint32_t *Mask) {
449 // FIXME: This intentionally works in reverse due to some issues with the
450 // Register Units infrastructure.
451 //
452 // This is used to apply callee-saved-register masks to the clobbered regunits
453 // mask.
454 //
455 // The right way to approach this is to start with a BitVector full of ones,
456 // then reset all the bits of the regunits of each register that is set in the
457 // mask (registers preserved), then OR the resulting bits with the Clobbers
458 // mask. This correctly prioritizes the saved registers, so if a RU is shared
459 // between a register that is preserved, and one that is NOT preserved, that
460 // RU will not be set in the output vector (the clobbers).
461 //
462 // What we have to do for now is the opposite: we have to assume that the
463 // regunits of all registers that are NOT preserved are clobbered, even if
464 // those regunits are preserved by another register. So if a RU is shared
465 // like described previously, that RU will be set.
466 //
467 // This is to work around an issue which appears in AArch64, but isn't
468 // exclusive to that target: AArch64's Qn registers (128 bits) have Dn
469 // register (lower 64 bits). A few Dn registers are preserved by some calling
470 // conventions, but Qn and Dn share exactly the same reg units.
471 //
472 // If we do this the right way, Qn will be marked as NOT clobbered even though
473 // its upper 64 bits are NOT preserved. The conservative approach handles this
474 // correctly at the cost of some missed optimizations on other targets.
475 //
476 // This is caused by how RegUnits are handled within TableGen. Ideally, Qn
477 // should have an extra RegUnit to model the "unknown" bits not covered by the
478 // subregs.
479 BitVector RUsFromRegsNotInMask(TRI.getNumRegUnits());
480 const unsigned NumRegs = TRI.getNumRegs();
481 const unsigned MaskWords = (NumRegs + 31) / 32;
482 for (unsigned K = 0; K < MaskWords; ++K) {
483 const uint32_t Word = Mask[K];
484 for (unsigned Bit = 0; Bit < 32; ++Bit) {
485 const unsigned PhysReg = (K * 32) + Bit;
486 if (PhysReg == NumRegs)
487 break;
488
489 if (PhysReg && !((Word >> Bit) & 1)) {
490 for (MCRegUnit Unit : TRI.regunits(Reg: PhysReg))
491 RUsFromRegsNotInMask.set(static_cast<unsigned>(Unit));
492 }
493 }
494 }
495
496 RUs |= RUsFromRegsNotInMask;
497}
498
499/// Examine the instruction for potential LICM candidate. Also
500/// gather register def and frame object update information.
501void MachineLICMImpl::ProcessMI(MachineInstr *MI, BitVector &RUDefs,
502 BitVector &RUClobbers,
503 SmallDenseSet<int> &StoredFIs,
504 SmallVectorImpl<CandidateInfo> &Candidates,
505 MachineLoop *CurLoop) {
506 bool RuledOut = false;
507 bool HasNonInvariantUse = false;
508 Register Def;
509 for (const MachineOperand &MO : MI->operands()) {
510 if (MO.isFI()) {
511 // Remember if the instruction stores to the frame index.
512 int FI = MO.getIndex();
513 if (!StoredFIs.count(V: FI) &&
514 MFI->isSpillSlotObjectIndex(ObjectIdx: FI) &&
515 InstructionStoresToFI(MI, FI))
516 StoredFIs.insert(V: FI);
517 HasNonInvariantUse = true;
518 continue;
519 }
520
521 // We can't hoist an instruction defining a physreg that is clobbered in
522 // the loop.
523 if (MO.isRegMask()) {
524 applyBitsNotInRegMaskToRegUnitsMask(TRI: *TRI, RUs&: RUClobbers, Mask: MO.getRegMask());
525 continue;
526 }
527
528 if (!MO.isReg())
529 continue;
530 Register Reg = MO.getReg();
531 if (!Reg)
532 continue;
533 assert(Reg.isPhysical() && "Not expecting virtual register!");
534
535 if (!MO.isDef()) {
536 if (!HasNonInvariantUse) {
537 for (MCRegUnit Unit : TRI->regunits(Reg)) {
538 // If it's using a non-loop-invariant register, then it's obviously
539 // not safe to hoist.
540 if (RUDefs.test(Idx: static_cast<unsigned>(Unit)) ||
541 RUClobbers.test(Idx: static_cast<unsigned>(Unit))) {
542 HasNonInvariantUse = true;
543 break;
544 }
545 }
546 }
547 continue;
548 }
549
550 // FIXME: For now, avoid instructions with multiple defs, unless it's dead.
551 if (!MO.isDead()) {
552 if (Def)
553 RuledOut = true;
554 else
555 Def = Reg;
556 }
557
558 // If we have already seen another instruction that defines the same
559 // register, then this is not safe. Two defs is indicated by setting a
560 // PhysRegClobbers bit.
561 for (MCRegUnit Unit : TRI->regunits(Reg)) {
562 if (RUDefs.test(Idx: static_cast<unsigned>(Unit))) {
563 RUClobbers.set(static_cast<unsigned>(Unit));
564 RuledOut = true;
565 } else if (RUClobbers.test(Idx: static_cast<unsigned>(Unit))) {
566 // MI defined register is seen defined by another instruction in
567 // the loop, it cannot be a LICM candidate.
568 RuledOut = true;
569 }
570
571 RUDefs.set(static_cast<unsigned>(Unit));
572 }
573 }
574
575 // Only consider reloads for now and remats which do not have register
576 // operands. FIXME: Consider unfold load folding instructions.
577 if (Def && !RuledOut) {
578 int FI = std::numeric_limits<int>::min();
579 if ((!HasNonInvariantUse && IsLICMCandidate(I&: *MI, CurLoop)) ||
580 (TII->isLoadFromStackSlot(MI: *MI, FrameIndex&: FI) && MFI->isSpillSlotObjectIndex(ObjectIdx: FI)))
581 Candidates.push_back(Elt: CandidateInfo(MI, Def, FI));
582 }
583}
584
585/// Walk the specified region of the CFG and hoist loop invariants out to the
586/// preheader.
587void MachineLICMImpl::HoistRegionPostRA(MachineLoop *CurLoop) {
588 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
589 if (!Preheader)
590 return;
591
592 unsigned NumRegUnits = TRI->getNumRegUnits();
593 BitVector RUDefs(NumRegUnits); // RUs defined once in the loop.
594 BitVector RUClobbers(NumRegUnits); // RUs defined more than once.
595
596 SmallVector<CandidateInfo, 32> Candidates;
597 SmallDenseSet<int> StoredFIs;
598
599 // Walk the entire region, count number of defs for each register, and
600 // collect potential LICM candidates.
601 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
602 // If the header of the loop containing this basic block is a landing pad,
603 // then don't try to hoist instructions out of this loop.
604 const MachineLoop *ML = MLI->getLoopFor(BB);
605 if (ML && ML->getHeader()->isEHPad()) continue;
606
607 // Conservatively treat live-in's as an external def.
608 // FIXME: That means a reload that're reused in successor block(s) will not
609 // be LICM'ed.
610 for (const auto &LI : BB->liveins()) {
611 for (MCRegUnit Unit : TRI->regunits(Reg: LI.PhysReg))
612 RUDefs.set(static_cast<unsigned>(Unit));
613 }
614
615 // Funclet entry blocks will clobber all registers
616 if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))
617 applyBitsNotInRegMaskToRegUnitsMask(TRI: *TRI, RUs&: RUClobbers, Mask);
618
619 // EH landing pads clobber exception pointer/selector registers.
620 if (BB->isEHPad()) {
621 const MachineFunction &MF = *BB->getParent();
622 const Constant *PersonalityFn = MF.getFunction().getPersonalityFn();
623 const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
624 if (MCRegister Reg = TLI.getExceptionPointerRegister(PersonalityFn))
625 for (MCRegUnit Unit : TRI->regunits(Reg))
626 RUClobbers.set(static_cast<unsigned>(Unit));
627 if (MCRegister Reg = TLI.getExceptionSelectorRegister(PersonalityFn))
628 for (MCRegUnit Unit : TRI->regunits(Reg))
629 RUClobbers.set(static_cast<unsigned>(Unit));
630 }
631
632 SpeculationState = SpeculateUnknown;
633 for (MachineInstr &MI : *BB)
634 ProcessMI(MI: &MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
635 }
636
637 // Gather the registers read / clobbered by the terminator.
638 BitVector TermRUs(NumRegUnits);
639 MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
640 if (TI != Preheader->end()) {
641 for (const MachineOperand &MO : TI->operands()) {
642 if (!MO.isReg())
643 continue;
644 Register Reg = MO.getReg();
645 if (!Reg)
646 continue;
647 for (MCRegUnit Unit : TRI->regunits(Reg))
648 TermRUs.set(static_cast<unsigned>(Unit));
649 }
650 }
651
652 // Now evaluate whether the potential candidates qualify.
653 // 1. Check if the candidate defined register is defined by another
654 // instruction in the loop.
655 // 2. If the candidate is a load from stack slot (always true for now),
656 // check if the slot is stored anywhere in the loop.
657 // 3. Make sure candidate def should not clobber
658 // registers read by the terminator. Similarly its def should not be
659 // clobbered by the terminator.
660 for (CandidateInfo &Candidate : Candidates) {
661 if (Candidate.FI != std::numeric_limits<int>::min() &&
662 StoredFIs.count(V: Candidate.FI))
663 continue;
664
665 Register Def = Candidate.Def;
666 bool Safe = true;
667 for (MCRegUnit Unit : TRI->regunits(Reg: Def)) {
668 if (RUClobbers.test(Idx: static_cast<unsigned>(Unit)) ||
669 TermRUs.test(Idx: static_cast<unsigned>(Unit))) {
670 Safe = false;
671 break;
672 }
673 }
674
675 if (!Safe)
676 continue;
677
678 MachineInstr *MI = Candidate.MI;
679 for (const MachineOperand &MO : MI->all_uses()) {
680 if (!MO.getReg())
681 continue;
682 for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg())) {
683 if (RUDefs.test(Idx: static_cast<unsigned>(Unit)) ||
684 RUClobbers.test(Idx: static_cast<unsigned>(Unit))) {
685 // If it's using a non-loop-invariant register, then it's obviously
686 // not safe to hoist.
687 Safe = false;
688 break;
689 }
690 }
691
692 if (!Safe)
693 break;
694 }
695
696 if (Safe)
697 HoistPostRA(MI, Def: Candidate.Def, CurLoop);
698 }
699}
700
701/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
702/// sure it is not killed by any instructions in the loop.
703void MachineLICMImpl::AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop) {
704 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
705 if (!BB->isLiveIn(Reg))
706 BB->addLiveIn(PhysReg: Reg);
707 for (MachineInstr &MI : *BB) {
708 for (MachineOperand &MO : MI.all_uses()) {
709 if (!MO.getReg())
710 continue;
711 if (TRI->regsOverlap(RegA: Reg, RegB: MO.getReg()))
712 MO.setIsKill(false);
713 }
714 }
715 }
716}
717
718/// When an instruction is found to only use loop invariant operands that is
719/// safe to hoist, this instruction is called to do the dirty work.
720void MachineLICMImpl::HoistPostRA(MachineInstr *MI, Register Def,
721 MachineLoop *CurLoop) {
722 MachineBasicBlock *Preheader = CurLoop->getLoopPreheader();
723
724 // Now move the instructions to the predecessor, inserting it before any
725 // terminator instructions.
726 LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader)
727 << " from " << printMBBReference(*MI->getParent()) << ": "
728 << *MI);
729
730 // Splice the instruction to the preheader.
731 MachineBasicBlock *MBB = MI->getParent();
732 Preheader->splice(Where: Preheader->getFirstTerminator(), Other: MBB, From: MI);
733
734 // Since we are moving the instruction out of its basic block, we do not
735 // retain its debug location. Doing so would degrade the debugging
736 // experience and adversely affect the accuracy of profiling information.
737 assert(!MI->isDebugInstr() && "Should not hoist debug inst");
738 MI->setDebugLoc(DebugLoc());
739
740 // Add register to livein list to all the BBs in the current loop since a
741 // loop invariant must be kept live throughout the whole loop. This is
742 // important to ensure later passes do not scavenge the def register.
743 AddToLiveIns(Reg: Def, CurLoop);
744
745 ++NumPostRAHoisted;
746 Changed = true;
747}
748
749/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
750/// may not be safe to hoist.
751bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB,
752 MachineLoop *CurLoop) {
753 if (SpeculationState != SpeculateUnknown)
754 return SpeculationState == SpeculateFalse;
755
756 if (BB != CurLoop->getHeader()) {
757 // Check loop exiting blocks.
758 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
759 CurLoop->getExitingBlocks(ExitingBlocks&: CurrentLoopExitingBlocks);
760 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
761 if (!MDTU->getDomTree().dominates(A: BB, B: CurrentLoopExitingBlock)) {
762 SpeculationState = SpeculateTrue;
763 return false;
764 }
765 }
766
767 SpeculationState = SpeculateFalse;
768 return true;
769}
770
771void MachineLICMImpl::EnterScope(MachineBasicBlock *MBB) {
772 LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
773
774 // Remember livein register pressure.
775 BackTrace.push_back(Elt: RegPressure);
776}
777
778void MachineLICMImpl::ExitScope(MachineBasicBlock *MBB) {
779 LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
780 BackTrace.pop_back();
781}
782
783/// Destroy scope for the MBB that corresponds to the given dominator tree node
784/// if its a leaf or all of its children are done. Walk up the dominator tree to
785/// destroy ancestors which are now done.
786void MachineLICMImpl::ExitScopeIfDone(
787 MachineDomTreeNode *Node,
788 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
789 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap) {
790 if (OpenChildren[Node])
791 return;
792
793 for(;;) {
794 ExitScope(MBB: Node->getBlock());
795 // Now traverse upwards to pop ancestors whose offsprings are all done.
796 MachineDomTreeNode *Parent = ParentMap.lookup(Val: Node);
797 if (!Parent || --OpenChildren[Parent] != 0)
798 break;
799 Node = Parent;
800 }
801}
802
803/// Walk the specified loop in the CFG (defined by all blocks dominated by the
804/// specified header block, and that are in the current loop) in depth first
805/// order w.r.t the DominatorTree. This allows us to visit definitions before
806/// uses, allowing us to hoist a loop body in one pass without iteration.
807void MachineLICMImpl::HoistOutOfLoop(MachineDomTreeNode *HeaderN,
808 MachineLoop *CurLoop) {
809 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
810 if (!Preheader)
811 return;
812
813 SmallVector<MachineDomTreeNode*, 32> Scopes;
814 SmallVector<MachineDomTreeNode*, 8> WorkList;
815 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
816 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
817
818 // Perform a DFS walk to determine the order of visit.
819 WorkList.push_back(Elt: HeaderN);
820 while (!WorkList.empty()) {
821 MachineDomTreeNode *Node = WorkList.pop_back_val();
822 assert(Node && "Null dominator tree node?");
823 MachineBasicBlock *BB = Node->getBlock();
824
825 // If the header of the loop containing this basic block is a landing pad,
826 // then don't try to hoist instructions out of this loop.
827 const MachineLoop *ML = MLI->getLoopFor(BB);
828 if (ML && ML->getHeader()->isEHPad())
829 continue;
830
831 // If this subregion is not in the top level loop at all, exit.
832 if (!CurLoop->contains(BB))
833 continue;
834
835 Scopes.push_back(Elt: Node);
836
837 // Don't hoist things out of a large switch statement. This often causes
838 // code to be hoisted that wasn't going to be executed, and increases
839 // register pressure in a situation where it's likely to matter.
840 if (BB->succ_size() >= 25) {
841 OpenChildren[Node] = 0;
842 continue;
843 }
844
845 // Add children in reverse order as then the next popped worklist node is
846 // the first child of this node. This means we ultimately traverse the
847 // DOM tree in exactly the same order as if we'd recursed.
848 size_t WorkListStart = WorkList.size();
849 for (MachineDomTreeNode *Child : Node->children()) {
850 ParentMap[Child] = Node;
851 WorkList.push_back(Elt: Child);
852 }
853 std::reverse(first: WorkList.begin() + WorkListStart, last: WorkList.end());
854 OpenChildren[Node] = WorkList.size() - WorkListStart;
855 }
856
857 if (Scopes.size() == 0)
858 return;
859
860 // Compute registers which are livein into the loop headers.
861 RegSeen.clear();
862 BackTrace.clear();
863 InitRegPressure(BB: Preheader);
864
865 // Now perform LICM.
866 for (MachineDomTreeNode *Node : Scopes) {
867 MachineBasicBlock *MBB = Node->getBlock();
868
869 EnterScope(MBB);
870
871 // Process the block
872 SpeculationState = SpeculateUnknown;
873 for (MachineInstr &MI : llvm::make_early_inc_range(Range&: *MBB)) {
874 unsigned HoistRes = HoistResult::NotHoisted;
875 HoistRes = Hoist(MI: &MI, Preheader, CurLoop);
876 if (HoistRes & HoistResult::NotHoisted) {
877 // We have failed to hoist MI to outermost loop's preheader. If MI is in
878 // a subloop, try to hoist it to subloop's preheader.
879 SmallVector<MachineLoop *> InnerLoopWorkList;
880 for (MachineLoop *L = MLI->getLoopFor(BB: MI.getParent()); L != CurLoop;
881 L = L->getParentLoop())
882 InnerLoopWorkList.push_back(Elt: L);
883
884 while (!InnerLoopWorkList.empty()) {
885 MachineLoop *InnerLoop = InnerLoopWorkList.pop_back_val();
886 MachineBasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
887 if (InnerLoopPreheader) {
888 HoistRes = Hoist(MI: &MI, Preheader: InnerLoopPreheader, CurLoop: InnerLoop);
889 if (HoistRes & HoistResult::Hoisted)
890 break;
891 }
892 }
893 }
894
895 if (HoistRes & HoistResult::ErasedMI)
896 continue;
897
898 UpdateRegPressure(MI: &MI);
899 }
900
901 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
902 ExitScopeIfDone(Node, OpenChildren, ParentMap);
903 }
904}
905
906static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
907 return MO.isKill() || MRI->hasOneNonDBGUse(RegNo: MO.getReg());
908}
909
910/// Find all virtual register references that are liveout of the preheader to
911/// initialize the starting "register pressure". Note this does not count live
912/// through (livein but not used) registers.
913void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) {
914 llvm::fill(Range&: RegPressure, Value: 0);
915
916 // If the preheader has only a single predecessor and it ends with a
917 // fallthrough or an unconditional branch, then scan its predecessor for live
918 // defs as well. This happens whenever the preheader is created by splitting
919 // the critical edge from the loop predecessor to the loop header.
920 if (BB->pred_size() == 1) {
921 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
922 SmallVector<MachineOperand, 4> Cond;
923 if (!TII->analyzeBranch(MBB&: *BB, TBB, FBB, Cond, AllowModify: false) && Cond.empty())
924 InitRegPressure(BB: *BB->pred_begin());
925 }
926
927 for (const MachineInstr &MI : *BB)
928 UpdateRegPressure(MI: &MI, /*ConsiderUnseenAsDef=*/true);
929}
930
931/// Update estimate of register pressure after the specified instruction.
932void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI,
933 bool ConsiderUnseenAsDef) {
934 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
935 for (const auto &[Class, Weight] : Cost) {
936 if (static_cast<int>(RegPressure[Class]) < -Weight)
937 RegPressure[Class] = 0;
938 else
939 RegPressure[Class] += Weight;
940 }
941}
942
943/// Calculate the additional register pressure that the registers used in MI
944/// cause.
945///
946/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
947/// figure out which usages are live-ins.
948/// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
949SmallDenseMap<unsigned, int>
950MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
951 bool ConsiderUnseenAsDef) {
952 SmallDenseMap<unsigned, int> Cost;
953 if (MI->isImplicitDef())
954 return Cost;
955 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
956 const MachineOperand &MO = MI->getOperand(i);
957 if (!MO.isReg() || MO.isImplicit())
958 continue;
959 Register Reg = MO.getReg();
960 if (!Reg.isVirtual())
961 continue;
962
963 // FIXME: It seems bad to use RegSeen only for some of these calculations.
964 bool isNew = ConsiderSeen ? RegSeen.insert(V: Reg).second : false;
965 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
966
967 RegClassWeight W = TRI->getRegClassWeight(RC);
968 int RCCost = 0;
969 if (MO.isDef())
970 RCCost = W.RegWeight;
971 else {
972 bool isKill = isOperandKill(MO, MRI);
973 if (isNew && !isKill && ConsiderUnseenAsDef)
974 // Haven't seen this, it must be a livein.
975 RCCost = W.RegWeight;
976 else if (!isNew && isKill)
977 RCCost = -W.RegWeight;
978 }
979 if (RCCost == 0)
980 continue;
981 const int *PS = TRI->getRegClassPressureSets(RC);
982 for (; *PS != -1; ++PS)
983 Cost[*PS] += RCCost;
984 }
985 return Cost;
986}
987
988/// Return true if this machine instruction loads from global offset table or
989/// constant pool.
990static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
991 assert(MI.mayLoad() && "Expected MI that loads!");
992
993 // If we lost memory operands, conservatively assume that the instruction
994 // reads from everything..
995 if (MI.memoperands_empty())
996 return true;
997
998 for (MachineMemOperand *MemOp : MI.memoperands())
999 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
1000 if (PSV->isGOT() || PSV->isConstantPool())
1001 return true;
1002
1003 return false;
1004}
1005
1006// This function iterates through all the operands of the input store MI and
1007// checks that each register operand statisfies isCallerPreservedPhysReg.
1008// This means, the value being stored and the address where it is being stored
1009// is constant throughout the body of the function (not including prologue and
1010// epilogue). When called with an MI that isn't a store, it returns false.
1011// A future improvement can be to check if the store registers are constant
1012// throughout the loop rather than throughout the funtion.
1013static bool isInvariantStore(const MachineInstr &MI,
1014 const TargetRegisterInfo *TRI,
1015 const MachineRegisterInfo *MRI) {
1016
1017 bool FoundCallerPresReg = false;
1018 if (!MI.mayStore() || MI.hasUnmodeledSideEffects() ||
1019 (MI.getNumOperands() == 0))
1020 return false;
1021
1022 // Check that all register operands are caller-preserved physical registers.
1023 for (const MachineOperand &MO : MI.operands()) {
1024 if (MO.isReg()) {
1025 Register Reg = MO.getReg();
1026 // If operand is a virtual register, check if it comes from a copy of a
1027 // physical register.
1028 if (Reg.isVirtual())
1029 Reg = TRI->lookThruCopyLike(SrcReg: MO.getReg(), MRI);
1030 if (Reg.isVirtual())
1031 return false;
1032 if (!TRI->isCallerPreservedPhysReg(PhysReg: Reg.asMCReg(), MF: *MI.getMF()))
1033 return false;
1034 else
1035 FoundCallerPresReg = true;
1036 } else if (!MO.isImm()) {
1037 return false;
1038 }
1039 }
1040 return FoundCallerPresReg;
1041}
1042
1043// Return true if the input MI is a copy instruction that feeds an invariant
1044// store instruction. This means that the src of the copy has to satisfy
1045// isCallerPreservedPhysReg and atleast one of it's users should satisfy
1046// isInvariantStore.
1047static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
1048 const MachineRegisterInfo *MRI,
1049 const TargetRegisterInfo *TRI) {
1050
1051 // FIXME: If targets would like to look through instructions that aren't
1052 // pure copies, this can be updated to a query.
1053 if (!MI.isCopy())
1054 return false;
1055
1056 const MachineFunction *MF = MI.getMF();
1057 // Check that we are copying a constant physical register.
1058 Register CopySrcReg = MI.getOperand(i: 1).getReg();
1059 if (CopySrcReg.isVirtual())
1060 return false;
1061
1062 if (!TRI->isCallerPreservedPhysReg(PhysReg: CopySrcReg.asMCReg(), MF: *MF))
1063 return false;
1064
1065 Register CopyDstReg = MI.getOperand(i: 0).getReg();
1066 // Check if any of the uses of the copy are invariant stores.
1067 assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg");
1068
1069 for (MachineInstr &UseMI : MRI->use_instructions(Reg: CopyDstReg)) {
1070 if (UseMI.mayStore() && isInvariantStore(MI: UseMI, TRI, MRI))
1071 return true;
1072 }
1073 return false;
1074}
1075
1076/// Returns true if the instruction may be a suitable candidate for LICM.
1077/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
1078bool MachineLICMImpl::IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop) {
1079 // Check if it's safe to move the instruction.
1080 bool DontMoveAcrossStore = !HoistConstLoads || !AllowedToHoistLoads[CurLoop];
1081 if ((!I.isSafeToMove(SawStore&: DontMoveAcrossStore)) &&
1082 !(HoistConstStores && isInvariantStore(MI: I, TRI, MRI))) {
1083 LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
1084 return false;
1085 }
1086
1087 // If it is a load then check if it is guaranteed to execute by making sure
1088 // that it dominates all exiting blocks. If it doesn't, then there is a path
1089 // out of the loop which does not execute this load, so we can't hoist it.
1090 // Loads from constant memory are safe to speculate, for example indexed load
1091 // from a jump table.
1092 // Stores and side effects are already checked by isSafeToMove.
1093 if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(MI&: I) &&
1094 !IsGuaranteedToExecute(BB: I.getParent(), CurLoop)) {
1095 LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
1096 return false;
1097 }
1098
1099 // Convergent attribute has been used on operations that involve inter-thread
1100 // communication which results are implicitly affected by the enclosing
1101 // control flows. It is not safe to hoist or sink such operations across
1102 // control flow.
1103 if (I.isConvergent())
1104 return false;
1105
1106 if (!TII->shouldHoist(MI: I, FromLoop: CurLoop))
1107 return false;
1108
1109 return true;
1110}
1111
1112/// Returns true if the instruction is loop invariant.
1113bool MachineLICMImpl::IsLoopInvariantInst(MachineInstr &I,
1114 MachineLoop *CurLoop) {
1115 if (!IsLICMCandidate(I, CurLoop)) {
1116 LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
1117 return false;
1118 }
1119 return CurLoop->isLoopInvariant(I);
1120}
1121
1122/// Return true if the specified instruction is used by a phi node and hoisting
1123/// it could cause a copy to be inserted.
1124bool MachineLICMImpl::HasLoopPHIUse(const MachineInstr *MI,
1125 MachineLoop *CurLoop) {
1126 SmallVector<const MachineInstr *, 8> Work(1, MI);
1127 do {
1128 MI = Work.pop_back_val();
1129 for (const MachineOperand &MO : MI->all_defs()) {
1130 Register Reg = MO.getReg();
1131 if (!Reg.isVirtual())
1132 continue;
1133 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
1134 // A PHI may cause a copy to be inserted.
1135 if (UseMI.isPHI()) {
1136 // A PHI inside the loop causes a copy because the live range of Reg is
1137 // extended across the PHI.
1138 if (CurLoop->contains(Inst: &UseMI))
1139 return true;
1140 // A PHI in an exit block can cause a copy to be inserted if the PHI
1141 // has multiple predecessors in the loop with different values.
1142 // For now, approximate by rejecting all exit blocks.
1143 if (isExitBlock(CurLoop, MBB: UseMI.getParent()))
1144 return true;
1145 continue;
1146 }
1147 // Look past copies as well.
1148 if (UseMI.isCopy() && CurLoop->contains(Inst: &UseMI))
1149 Work.push_back(Elt: &UseMI);
1150 }
1151 }
1152 } while (!Work.empty());
1153 return false;
1154}
1155
1156/// Compute operand latency between a def of 'Reg' and an use in the current
1157/// loop, return true if the target considered it high.
1158bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
1159 Register Reg,
1160 MachineLoop *CurLoop) const {
1161 if (MRI->use_nodbg_empty(RegNo: Reg))
1162 return false;
1163
1164 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
1165 if (UseMI.isCopyLike())
1166 continue;
1167 if (!CurLoop->contains(BB: UseMI.getParent()))
1168 continue;
1169 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
1170 const MachineOperand &MO = UseMI.getOperand(i);
1171 if (!MO.isReg() || !MO.isUse())
1172 continue;
1173 Register MOReg = MO.getReg();
1174 if (MOReg != Reg)
1175 continue;
1176
1177 if (TII->hasHighOperandLatency(SchedModel, MRI, DefMI: MI, DefIdx, UseMI, UseIdx: i))
1178 return true;
1179 }
1180
1181 // Only look at the first in loop use.
1182 break;
1183 }
1184
1185 return false;
1186}
1187
1188/// Return true if the instruction is marked "cheap" or the operand latency
1189/// between its def and a use is one or less.
1190bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const {
1191 if (TII->isAsCheapAsAMove(MI) || MI.isSubregToReg())
1192 return true;
1193
1194 bool isCheap = false;
1195 unsigned NumDefs = MI.getDesc().getNumDefs();
1196 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
1197 MachineOperand &DefMO = MI.getOperand(i);
1198 if (!DefMO.isReg() || !DefMO.isDef())
1199 continue;
1200 --NumDefs;
1201 Register Reg = DefMO.getReg();
1202 if (Reg.isPhysical())
1203 continue;
1204
1205 if (!TII->hasLowDefLatency(SchedModel, DefMI: MI, DefIdx: i))
1206 return false;
1207 isCheap = true;
1208 }
1209
1210 return isCheap;
1211}
1212
1213/// Visit BBs from header to current BB, check if hoisting an instruction of the
1214/// given cost matrix can cause high register pressure.
1215bool MachineLICMImpl::CanCauseHighRegPressure(
1216 const SmallDenseMap<unsigned, int> &Cost, bool CheapInstr) {
1217 for (const auto &[Class, Weight] : Cost) {
1218 if (Weight <= 0)
1219 continue;
1220
1221 int Limit = RegLimit[Class];
1222
1223 // Don't hoist cheap instructions if they would increase register pressure,
1224 // even if we're under the limit.
1225 if (CheapInstr && !HoistCheapInsts)
1226 return true;
1227
1228 for (const auto &RP : BackTrace)
1229 if (static_cast<int>(RP[Class]) + Weight >= Limit)
1230 return true;
1231 }
1232
1233 return false;
1234}
1235
1236/// Traverse the back trace from header to the current block and update their
1237/// register pressures to reflect the effect of hoisting MI from the current
1238/// block to the preheader.
1239void MachineLICMImpl::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1240 // First compute the 'cost' of the instruction, i.e. its contribution
1241 // to register pressure.
1242 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
1243 /*ConsiderUnseenAsDef=*/false);
1244
1245 // Update register pressure of blocks from loop header to current block.
1246 for (auto &RP : BackTrace)
1247 for (const auto &[Class, Weight] : Cost)
1248 RP[Class] += Weight;
1249}
1250
1251/// Return true if it is potentially profitable to hoist the given loop
1252/// invariant.
1253bool MachineLICMImpl::IsProfitableToHoist(MachineInstr &MI,
1254 MachineLoop *CurLoop) {
1255 if (MI.isImplicitDef())
1256 return true;
1257
1258 // Besides removing computation from the loop, hoisting an instruction has
1259 // these effects:
1260 //
1261 // - The value defined by the instruction becomes live across the entire
1262 // loop. This increases register pressure in the loop.
1263 //
1264 // - If the value is used by a PHI in the loop, a copy will be required for
1265 // lowering the PHI after extending the live range.
1266 //
1267 // - When hoisting the last use of a value in the loop, that value no longer
1268 // needs to be live in the loop. This lowers register pressure in the loop.
1269
1270 if (HoistConstStores && isCopyFeedingInvariantStore(MI, MRI, TRI))
1271 return true;
1272
1273 bool CheapInstr = IsCheapInstruction(MI);
1274 bool CreatesCopy = HasLoopPHIUse(MI: &MI, CurLoop);
1275
1276 // Don't hoist a cheap instruction if it would create a copy in the loop.
1277 if (CheapInstr && CreatesCopy) {
1278 LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1279 return false;
1280 }
1281
1282 // Trivially rematerializable instructions should always be hoisted
1283 // providing the register allocator can just pull them down again when needed.
1284 if (TII->isTriviallyReMaterializable(MI))
1285 return true;
1286
1287 // FIXME: If there are long latency loop-invariant instructions inside the
1288 // loop at this point, why didn't the optimizer's LICM hoist them?
1289 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1290 const MachineOperand &MO = MI.getOperand(i);
1291 if (!MO.isReg() || MO.isImplicit())
1292 continue;
1293 Register Reg = MO.getReg();
1294 if (!Reg.isVirtual())
1295 continue;
1296 if (MO.isDef() && HasHighOperandLatency(MI, DefIdx: i, Reg, CurLoop)) {
1297 LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
1298 ++NumHighLatency;
1299 return true;
1300 }
1301 }
1302
1303 // Estimate register pressure to determine whether to LICM the instruction.
1304 // In low register pressure situation, we can be more aggressive about
1305 // hoisting. Also, favors hoisting long latency instructions even in
1306 // moderately high pressure situation.
1307 // Cheap instructions will only be hoisted if they don't increase register
1308 // pressure at all.
1309 auto Cost = calcRegisterCost(MI: &MI, /*ConsiderSeen=*/false,
1310 /*ConsiderUnseenAsDef=*/false);
1311
1312 // Visit BBs from header to current BB, if hoisting this doesn't cause
1313 // high register pressure, then it's safe to proceed.
1314 if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1315 LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
1316 ++NumLowRP;
1317 return true;
1318 }
1319
1320 // Don't risk increasing register pressure if it would create copies.
1321 if (CreatesCopy) {
1322 LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1323 return false;
1324 }
1325
1326 // Do not "speculate" in high register pressure situation. If an
1327 // instruction is not guaranteed to be executed in the loop, it's best to be
1328 // conservative.
1329 if (AvoidSpeculation &&
1330 (!IsGuaranteedToExecute(BB: MI.getParent(), CurLoop) && !MayCSE(MI: &MI))) {
1331 LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);
1332 return false;
1333 }
1334
1335 // If we have a COPY with other uses in the loop, hoist to allow the users to
1336 // also be hoisted.
1337 // TODO: Handle all isCopyLike?
1338 if (MI.isCopy() || MI.isRegSequence()) {
1339 Register DefReg = MI.getOperand(i: 0).getReg();
1340 if (DefReg.isVirtual() &&
1341 all_of(Range: MI.uses(),
1342 P: [this](const MachineOperand &UseOp) {
1343 return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
1344 MRI->isConstantPhysReg(PhysReg: UseOp.getReg());
1345 }) &&
1346 IsLoopInvariantInst(I&: MI, CurLoop) &&
1347 any_of(Range: MRI->use_nodbg_instructions(Reg: DefReg),
1348 P: [&CurLoop, this, DefReg,
1349 Cost = std::move(Cost)](MachineInstr &UseMI) {
1350 if (!CurLoop->contains(Inst: &UseMI))
1351 return false;
1352
1353 // COPY is a cheap instruction, but if moving it won't cause
1354 // high RP we're fine to hoist it even if the user can't be
1355 // hoisted later Otherwise we want to check the user if it's
1356 // hoistable
1357 if (CanCauseHighRegPressure(Cost, CheapInstr: false) &&
1358 !CurLoop->isLoopInvariant(I&: UseMI, ExcludeReg: DefReg))
1359 return false;
1360
1361 return true;
1362 }))
1363 return true;
1364 }
1365
1366 // High register pressure situation, only hoist if the instruction is going
1367 // to be remat'ed.
1368 if (!TII->isTriviallyReMaterializable(MI) &&
1369 !MI.isDereferenceableInvariantLoad()) {
1370 LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1371 return false;
1372 }
1373
1374 return true;
1375}
1376
1377/// Unfold a load from the given machineinstr if the load itself could be
1378/// hoisted. Return the unfolded and hoistable load, or null if the load
1379/// couldn't be unfolded or if it wouldn't be hoistable.
1380MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *MI,
1381 MachineLoop *CurLoop) {
1382 // Don't unfold simple loads.
1383 if (MI->canFoldAsLoad())
1384 return nullptr;
1385
1386 // If not, we may be able to unfold a load and hoist that.
1387 // First test whether the instruction is loading from an amenable
1388 // memory location.
1389 if (!MI->isDereferenceableInvariantLoad())
1390 return nullptr;
1391
1392 // Next determine the register class for a temporary register.
1393 unsigned LoadRegIndex;
1394 unsigned NewOpc =
1395 TII->getOpcodeAfterMemoryUnfold(Opc: MI->getOpcode(),
1396 /*UnfoldLoad=*/true,
1397 /*UnfoldStore=*/false,
1398 LoadRegIndex: &LoadRegIndex);
1399 if (NewOpc == 0) return nullptr;
1400 const MCInstrDesc &MID = TII->get(Opcode: NewOpc);
1401 MachineFunction &MF = *MI->getMF();
1402 const TargetRegisterClass *RC = TII->getRegClass(MCID: MID, OpNum: LoadRegIndex);
1403 // Ok, we're unfolding. Create a temporary register and do the unfold.
1404 Register Reg = MRI->createVirtualRegister(RegClass: RC);
1405
1406 SmallVector<MachineInstr *, 2> NewMIs;
1407 bool Success = TII->unfoldMemoryOperand(MF, MI&: *MI, Reg,
1408 /*UnfoldLoad=*/true,
1409 /*UnfoldStore=*/false, NewMIs);
1410 (void)Success;
1411 assert(Success &&
1412 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1413 "succeeded!");
1414 assert(NewMIs.size() == 2 &&
1415 "Unfolded a load into multiple instructions!");
1416 MachineBasicBlock *MBB = MI->getParent();
1417 MachineBasicBlock::iterator Pos = MI;
1418 MBB->insert(I: Pos, MI: NewMIs[0]);
1419 MBB->insert(I: Pos, MI: NewMIs[1]);
1420 // If unfolding produced a load that wasn't loop-invariant or profitable to
1421 // hoist, discard the new instructions and bail.
1422 if (!IsLoopInvariantInst(I&: *NewMIs[0], CurLoop) ||
1423 !IsProfitableToHoist(MI&: *NewMIs[0], CurLoop)) {
1424 NewMIs[0]->eraseFromParent();
1425 NewMIs[1]->eraseFromParent();
1426 return nullptr;
1427 }
1428
1429 // Update register pressure for the unfolded instruction.
1430 UpdateRegPressure(MI: NewMIs[1]);
1431
1432 // Otherwise we successfully unfolded a load that we can hoist.
1433
1434 // Update the call info.
1435 if (MI->shouldUpdateAdditionalCallInfo())
1436 MF.eraseAdditionalCallInfo(MI);
1437
1438 MI->eraseFromParent();
1439 return NewMIs[0];
1440}
1441
1442/// Initialize the CSE map with instructions that are in the current loop
1443/// preheader that may become duplicates of instructions that are hoisted
1444/// out of the loop.
1445void MachineLICMImpl::InitCSEMap(MachineBasicBlock *BB) {
1446 for (MachineInstr &MI : *BB)
1447 CSEMap[BB][MI.getOpcode()].push_back(x: &MI);
1448}
1449
1450/// Initialize AllowedToHoistLoads with information about whether invariant
1451/// loads can be moved outside a given loop
1452void MachineLICMImpl::InitializeLoadsHoistableLoops() {
1453 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
1454 SmallVector<MachineLoop *, 8> LoopsInPreOrder;
1455
1456 // Mark all loops as hoistable initially and prepare a list of loops in
1457 // pre-order DFS.
1458 while (!Worklist.empty()) {
1459 auto *L = Worklist.pop_back_val();
1460 AllowedToHoistLoads[L] = true;
1461 LoopsInPreOrder.push_back(Elt: L);
1462 llvm::append_range(C&: Worklist, R: L->getSubLoops());
1463 }
1464
1465 // Going from the innermost to outermost loops, check if a loop has
1466 // instructions preventing invariant load hoisting. If such instruction is
1467 // found, mark this loop and its parent as non-hoistable and continue
1468 // investigating the next loop.
1469 // Visiting in a reversed pre-ordered DFS manner
1470 // allows us to not process all the instructions of the outer loop if the
1471 // inner loop is proved to be non-load-hoistable.
1472 for (auto *Loop : reverse(C&: LoopsInPreOrder)) {
1473 for (auto *MBB : Loop->blocks()) {
1474 // If this loop has already been marked as non-hoistable, skip it.
1475 if (!AllowedToHoistLoads[Loop])
1476 continue;
1477 for (auto &MI : *MBB) {
1478 if (!MI.isLoadFoldBarrier() && !MI.mayStore() && !MI.isCall() &&
1479 !(MI.mayLoad() && MI.hasOrderedMemoryRef()))
1480 continue;
1481 for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop())
1482 AllowedToHoistLoads[L] = false;
1483 break;
1484 }
1485 }
1486 }
1487}
1488
1489/// Find an instruction amount PrevMIs that is a duplicate of MI.
1490/// Return this instruction if it's found.
1491MachineInstr *
1492MachineLICMImpl::LookForDuplicate(const MachineInstr *MI,
1493 std::vector<MachineInstr *> &PrevMIs) {
1494 for (MachineInstr *PrevMI : PrevMIs)
1495 if (TII->produceSameValue(MI0: *MI, MI1: *PrevMI, MRI: (PreRegAlloc ? MRI : nullptr)))
1496 return PrevMI;
1497
1498 return nullptr;
1499}
1500
1501/// Given a LICM'ed instruction, look for an instruction on the preheader that
1502/// computes the same value. If it's found, do a RAU on with the definition of
1503/// the existing instruction rather than hoisting the instruction to the
1504/// preheader.
1505bool MachineLICMImpl::EliminateCSE(
1506 MachineInstr *MI,
1507 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1508 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1509 // the undef property onto uses.
1510 if (MI->isImplicitDef())
1511 return false;
1512
1513 // Do not CSE normal loads because between them could be store instructions
1514 // that change the loaded value
1515 if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad())
1516 return false;
1517
1518 if (MachineInstr *Dup = LookForDuplicate(MI, PrevMIs&: CI->second)) {
1519 LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1520
1521 // Replace virtual registers defined by MI by their counterparts defined
1522 // by Dup.
1523 SmallVector<unsigned, 2> Defs;
1524 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1525 const MachineOperand &MO = MI->getOperand(i);
1526
1527 // Physical registers may not differ here.
1528 assert((!MO.isReg() || MO.getReg() == 0 || !MO.getReg().isPhysical() ||
1529 MO.getReg() == Dup->getOperand(i).getReg()) &&
1530 "Instructions with different phys regs are not identical!");
1531
1532 if (MO.isReg() && MO.isDef() && !MO.getReg().isPhysical())
1533 Defs.push_back(Elt: i);
1534 }
1535
1536 SmallVector<const TargetRegisterClass*, 2> OrigRCs;
1537 for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1538 unsigned Idx = Defs[i];
1539 Register Reg = MI->getOperand(i: Idx).getReg();
1540 Register DupReg = Dup->getOperand(i: Idx).getReg();
1541 OrigRCs.push_back(Elt: MRI->getRegClass(Reg: DupReg));
1542
1543 if (!MRI->constrainRegClass(Reg: DupReg, RC: MRI->getRegClass(Reg))) {
1544 // Restore old RCs if more than one defs.
1545 for (unsigned j = 0; j != i; ++j)
1546 MRI->setRegClass(Reg: Dup->getOperand(i: Defs[j]).getReg(), RC: OrigRCs[j]);
1547 return false;
1548 }
1549 }
1550
1551 for (unsigned Idx : Defs) {
1552 Register Reg = MI->getOperand(i: Idx).getReg();
1553 Register DupReg = Dup->getOperand(i: Idx).getReg();
1554 MRI->replaceRegWith(FromReg: Reg, ToReg: DupReg);
1555 MRI->clearKillFlags(Reg: DupReg);
1556 // Clear Dup dead flag if any, we reuse it for Reg.
1557 if (!MRI->use_nodbg_empty(RegNo: DupReg))
1558 Dup->getOperand(i: Idx).setIsDead(false);
1559 }
1560
1561 MI->eraseFromParent();
1562 ++NumCSEed;
1563 return true;
1564 }
1565 return false;
1566}
1567
1568/// Return true if the given instruction will be CSE'd if it's hoisted out of
1569/// the loop.
1570bool MachineLICMImpl::MayCSE(MachineInstr *MI) {
1571 if (MI->mayLoad() && !MI->isDereferenceableInvariantLoad())
1572 return false;
1573
1574 unsigned Opcode = MI->getOpcode();
1575 for (auto &Map : CSEMap) {
1576 // Check this CSEMap's preheader dominates MI's basic block.
1577 if (MDTU->getDomTree().dominates(A: Map.first, B: MI->getParent())) {
1578 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1579 Map.second.find(Val: Opcode);
1580 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1581 // the undef property onto uses.
1582 if (CI == Map.second.end() || MI->isImplicitDef())
1583 continue;
1584 if (LookForDuplicate(MI, PrevMIs&: CI->second) != nullptr)
1585 return true;
1586 }
1587 }
1588
1589 return false;
1590}
1591
1592/// When an instruction is found to use only loop invariant operands
1593/// that are safe to hoist, this instruction is called to do the dirty work.
1594/// It returns true if the instruction is hoisted.
1595unsigned MachineLICMImpl::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,
1596 MachineLoop *CurLoop) {
1597 MachineBasicBlock *SrcBlock = MI->getParent();
1598
1599 // Disable the instruction hoisting due to block hotness
1600 if ((DisableHoistingToHotterBlocks == UseBFI::All ||
1601 (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
1602 isTgtHotterThanSrc(SrcBlock, TgtBlock: Preheader)) {
1603 ++NumNotHoistedDueToHotness;
1604 return HoistResult::NotHoisted;
1605 }
1606 // First check whether we should hoist this instruction.
1607 bool HasExtractHoistableLoad = false;
1608 if (!IsLoopInvariantInst(I&: *MI, CurLoop) ||
1609 !IsProfitableToHoist(MI&: *MI, CurLoop)) {
1610 // If not, try unfolding a hoistable load.
1611 MI = ExtractHoistableLoad(MI, CurLoop);
1612 if (!MI)
1613 return HoistResult::NotHoisted;
1614 HasExtractHoistableLoad = true;
1615 }
1616
1617 // If we have hoisted an instruction that may store, it can only be a constant
1618 // store.
1619 if (MI->mayStore())
1620 NumStoreConst++;
1621
1622 // Now move the instructions to the predecessor, inserting it before any
1623 // terminator instructions.
1624 LLVM_DEBUG({
1625 dbgs() << "Hoisting " << *MI;
1626 if (MI->getParent()->getBasicBlock())
1627 dbgs() << " from " << printMBBReference(*MI->getParent());
1628 if (Preheader->getBasicBlock())
1629 dbgs() << " to " << printMBBReference(*Preheader);
1630 dbgs() << "\n";
1631 });
1632
1633 // If this is the first instruction being hoisted to the preheader,
1634 // initialize the CSE map with potential common expressions.
1635 if (FirstInLoop) {
1636 InitCSEMap(BB: Preheader);
1637 FirstInLoop = false;
1638 }
1639
1640 // Look for opportunity to CSE the hoisted instruction.
1641 unsigned Opcode = MI->getOpcode();
1642 bool HasCSEDone = false;
1643 for (auto &Map : CSEMap) {
1644 // Check this CSEMap's preheader dominates MI's basic block.
1645 if (MDTU->getDomTree().dominates(A: Map.first, B: MI->getParent())) {
1646 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1647 Map.second.find(Val: Opcode);
1648 if (CI != Map.second.end()) {
1649 if (EliminateCSE(MI, CI)) {
1650 HasCSEDone = true;
1651 break;
1652 }
1653 }
1654 }
1655 }
1656
1657 if (!HasCSEDone) {
1658 // Otherwise, splice the instruction to the preheader.
1659 Preheader->splice(Where: Preheader->getFirstTerminator(),Other: MI->getParent(),From: MI);
1660
1661 // Since we are moving the instruction out of its basic block, we do not
1662 // retain its debug location. Doing so would degrade the debugging
1663 // experience and adversely affect the accuracy of profiling information.
1664 assert(!MI->isDebugInstr() && "Should not hoist debug inst");
1665 MI->setDebugLoc(DebugLoc());
1666
1667 // Update register pressure for BBs from header to this block.
1668 UpdateBackTraceRegPressure(MI);
1669
1670 // Clear the kill flags of any register this instruction defines,
1671 // since they may need to be live throughout the entire loop
1672 // rather than just live for part of it.
1673 for (MachineOperand &MO : MI->all_defs())
1674 if (!MO.isDead())
1675 MRI->clearKillFlags(Reg: MO.getReg());
1676
1677 CSEMap[Preheader][Opcode].push_back(x: MI);
1678 }
1679
1680 ++NumHoisted;
1681 Changed = true;
1682
1683 if (HasCSEDone || HasExtractHoistableLoad)
1684 return HoistResult::Hoisted | HoistResult::ErasedMI;
1685 return HoistResult::Hoisted;
1686}
1687
1688/// Get the preheader for the current loop, splitting a critical edge if needed.
1689MachineBasicBlock *MachineLICMImpl::getOrCreatePreheader(MachineLoop *CurLoop) {
1690 // Determine the block to which to hoist instructions. If we can't find a
1691 // suitable loop predecessor, we can't do any hoisting.
1692 if (MachineBasicBlock *Preheader = CurLoop->getLoopPreheader())
1693 return Preheader;
1694
1695 // Try forming a preheader by splitting the critical edge between the single
1696 // predecessor and the loop header.
1697 if (MachineBasicBlock *Pred = CurLoop->getLoopPredecessor()) {
1698 MachineBasicBlock *NewPreheader = Pred->SplitCriticalEdge(
1699 Succ: CurLoop->getHeader(), P: LegacyPass, MFAM, LiveInSets: nullptr, MDTU);
1700 if (NewPreheader)
1701 Changed = true;
1702 return NewPreheader;
1703 }
1704
1705 return nullptr;
1706}
1707
1708/// Is the target basic block at least "BlockFrequencyRatioThreshold"
1709/// times hotter than the source basic block.
1710bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1711 MachineBasicBlock *TgtBlock) {
1712 // Parse source and target basic block frequency from MBFI
1713 uint64_t SrcBF = MBFI->getBlockFreq(MBB: SrcBlock).getFrequency();
1714 uint64_t DstBF = MBFI->getBlockFreq(MBB: TgtBlock).getFrequency();
1715
1716 // Disable the hoisting if source block frequency is zero
1717 if (!SrcBF)
1718 return true;
1719
1720 double Ratio = (double)DstBF / SrcBF;
1721
1722 // Compare the block frequency ratio with the threshold
1723 return Ratio > BlockFrequencyRatioThreshold;
1724}
1725
1726template <typename DerivedT, bool PreRegAlloc>
1727PreservedAnalyses MachineLICMBasePass<DerivedT, PreRegAlloc>::run(
1728 MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) {
1729 bool Changed = MachineLICMImpl(PreRegAlloc, nullptr, &MFAM).run(MF);
1730 if (!Changed)
1731 return PreservedAnalyses::all();
1732 auto PA = getMachineFunctionPassPreservedAnalyses();
1733 PA.preserve<MachineLoopAnalysis>();
1734 return PA;
1735}
1736
1737template class llvm::MachineLICMBasePass<EarlyMachineLICMPass, true>;
1738template class llvm::MachineLICMBasePass<MachineLICMPass, false>;
1739