1//===- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Simple pass to fill delay slots with useful instructions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "Mips.h"
14#include "MipsInstrInfo.h"
15#include "MipsSubtarget.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/PointerUnion.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Analysis/AliasAnalysis.h"
24#include "llvm/Analysis/ValueTracking.h"
25#include "llvm/CodeGen/MachineBasicBlock.h"
26#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineFunctionPass.h"
29#include "llvm/CodeGen/MachineInstr.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
31#include "llvm/CodeGen/MachineOperand.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/CodeGen/PseudoSourceValue.h"
34#include "llvm/CodeGen/TargetRegisterInfo.h"
35#include "llvm/CodeGen/TargetSubtargetInfo.h"
36#include "llvm/MC/MCInstrDesc.h"
37#include "llvm/MC/MCRegisterInfo.h"
38#include "llvm/Support/Casting.h"
39#include "llvm/Support/CodeGen.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Support/raw_ostream.h"
43#include "llvm/Target/TargetMachine.h"
44#include <cassert>
45#include <iterator>
46#include <memory>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "mips-delay-slot-filler"
52
53STATISTIC(FilledSlots, "Number of delay slots filled");
54STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that"
55 " are not NOP.");
56STATISTIC(R5900ShortLoopNops, "Number of delay slots left as NOP for R5900 "
57 "short loop fix");
58
59static cl::opt<bool> DisableDelaySlotFiller(
60 "disable-mips-delay-filler",
61 cl::init(Val: false),
62 cl::desc("Fill all delay slots with NOPs."),
63 cl::Hidden);
64
65static cl::opt<bool> DisableForwardSearch(
66 "disable-mips-df-forward-search",
67 cl::init(Val: true),
68 cl::desc("Disallow MIPS delay filler to search forward."),
69 cl::Hidden);
70
71static cl::opt<bool> DisableSuccBBSearch(
72 "disable-mips-df-succbb-search",
73 cl::init(Val: true),
74 cl::desc("Disallow MIPS delay filler to search successor basic blocks."),
75 cl::Hidden);
76
77static cl::opt<bool> DisableBackwardSearch(
78 "disable-mips-df-backward-search",
79 cl::init(Val: false),
80 cl::desc("Disallow MIPS delay filler to search backward."),
81 cl::Hidden);
82
83extern cl::opt<CompactBranchPolicy> MipsCompactBranchPolicy;
84
85namespace {
86
87 using Iter = MachineBasicBlock::iterator;
88 using ReverseIter = MachineBasicBlock::reverse_iterator;
89 using BB2BrMap = SmallDenseMap<MachineBasicBlock *, MachineInstr *, 2>;
90
91 // Holds information about one branch instruction
92 // This is used by the MIPS1 target to easily find all paths of a branch to
93 // then check the first instruction for possible load delay hazards
94 class BranchInformation {
95 private:
96 // The pointer to the actual branch instruction
97 const MachineInstr *BranchInstr = nullptr;
98 // The pointer to the instruction after the branch (= the `else` case)
99 const MachineInstr *ElseBranchInstr = nullptr;
100
101 // Check if `Adr` is a pseudo instruction and if so, then treat it as non
102 // existing
103 static const MachineInstr *filterPseudoInstr(const MachineInstr *Adr) {
104 if (Adr && !Adr->isPseudo()) {
105 return Adr;
106 }
107 return nullptr;
108 }
109
110 public:
111 // Creates a new `BranchInformation` from the branch candidate `CurrentSlot`
112 // together with the end (`MBBEnd`) of the current MBB and the first
113 // instruction of the next MBB `NextMBBInstr`
114 BranchInformation(MachineInstrBundleIterator<MachineInstr> CurrentSlot,
115 MachineInstrBundleIterator<MachineInstr> MBBEnd,
116 const MachineInstr *NextMBBInstr)
117 : BranchInstr(
118 CurrentSlot->isBranch()
119 ? BranchInformation::filterPseudoInstr(Adr: &(*CurrentSlot))
120 : nullptr),
121 ElseBranchInstr(
122 (++CurrentSlot) == MBBEnd
123 ? BranchInformation::filterPseudoInstr(Adr: NextMBBInstr)
124 : BranchInformation::filterPseudoInstr(Adr: &(*CurrentSlot))) {}
125
126 // Checks if we have a branch
127 constexpr bool hasBranchInstr() const { return this->BranchInstr; }
128
129 // Checks if we have an else branch
130 constexpr bool hasBranchElseInstr() const { return this->ElseBranchInstr; }
131
132 // Checks if we have an indirect branch
133 constexpr bool isIndirectBranch() const {
134 if (this->BranchInstr) {
135 return this->BranchInstr->isIndirectBranch();
136 }
137 return false;
138 }
139
140 // Checks if we have an unconditional branch
141 constexpr bool isUnconditionalBranch() const {
142 if (this->BranchInstr) {
143 return this->BranchInstr->isUnconditionalBranch();
144 }
145 return false;
146 }
147
148 // Accesses the branch instruction
149 const MachineInstr *getBranchInstr() const { return this->BranchInstr; }
150
151 // Accesses the instruction after the branch
152 const MachineInstr *getBranchElseInstr() const {
153 return this->ElseBranchInstr;
154 }
155
156 // Gets the target of the branch
157 const MachineBasicBlock *getBranchTarget() const {
158 if (this->isIndirectBranch() || !this->hasBranchInstr()) {
159 // Indirect branch has no known target
160 return nullptr;
161 }
162
163 for (const MachineOperand &MO : this->BranchInstr->operands()) {
164 if (MO.isMBB()) {
165 return MO.getMBB();
166 }
167 }
168 return nullptr;
169 }
170 };
171
172 class RegDefsUses {
173 public:
174 RegDefsUses(const TargetRegisterInfo &TRI);
175
176 void init(const MachineInstr &MI);
177
178 /// This function sets all caller-saved registers in Defs.
179 void setCallerSaved(const MachineInstr &MI);
180
181 /// This function sets all unallocatable registers in Defs.
182 void setUnallocatableRegs(const MachineFunction &MF);
183
184 /// Set bits in Uses corresponding to MBB's live-out registers except for
185 /// the registers that are live-in to SuccBB.
186 void addLiveOut(const MachineBasicBlock &MBB,
187 const MachineBasicBlock &SuccBB);
188
189 bool update(const MachineInstr &MI, unsigned Begin, unsigned End);
190
191 private:
192 bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg,
193 bool IsDef) const;
194
195 /// Returns true if Reg or its alias is in RegSet.
196 bool isRegInSet(const BitVector &RegSet, unsigned Reg) const;
197
198 const TargetRegisterInfo &TRI;
199 BitVector Defs, Uses;
200 };
201
202 /// Base class for inspecting loads and stores.
203 class InspectMemInstr {
204 public:
205 InspectMemInstr(bool ForbidMemInstr_) : ForbidMemInstr(ForbidMemInstr_) {}
206 virtual ~InspectMemInstr() = default;
207
208 /// Return true if MI cannot be moved to delay slot.
209 bool hasHazard(const MachineInstr &MI);
210
211 protected:
212 /// Flags indicating whether loads or stores have been seen.
213 bool OrigSeenLoad = false;
214 bool OrigSeenStore = false;
215 bool SeenLoad = false;
216 bool SeenStore = false;
217
218 /// Memory instructions are not allowed to move to delay slot if this flag
219 /// is true.
220 bool ForbidMemInstr;
221
222 private:
223 virtual bool hasHazard_(const MachineInstr &MI) = 0;
224 };
225
226 /// This subclass rejects any memory instructions.
227 class NoMemInstr : public InspectMemInstr {
228 public:
229 NoMemInstr() : InspectMemInstr(true) {}
230
231 private:
232 bool hasHazard_(const MachineInstr &MI) override { return true; }
233 };
234
235 /// This subclass accepts loads from stacks and constant loads.
236 class LoadFromStackOrConst : public InspectMemInstr {
237 public:
238 LoadFromStackOrConst() : InspectMemInstr(false) {}
239
240 private:
241 bool hasHazard_(const MachineInstr &MI) override;
242 };
243
244 /// This subclass uses memory dependence information to determine whether a
245 /// memory instruction can be moved to a delay slot.
246 class MemDefsUses : public InspectMemInstr {
247 public:
248 explicit MemDefsUses(const MachineFrameInfo *MFI);
249
250 private:
251 using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;
252
253 bool hasHazard_(const MachineInstr &MI) override;
254
255 /// Update Defs and Uses. Return true if there exist dependences that
256 /// disqualify the delay slot candidate between V and values in Uses and
257 /// Defs.
258 bool updateDefsUses(ValueType V, bool MayStore);
259
260 /// Get the list of underlying objects of MI's memory operand.
261 bool getUnderlyingObjects(const MachineInstr &MI,
262 SmallVectorImpl<ValueType> &Objects) const;
263
264 const MachineFrameInfo *MFI;
265 SmallPtrSet<ValueType, 4> Uses, Defs;
266
267 /// Flags indicating whether loads or stores with no underlying objects have
268 /// been seen.
269 bool SeenNoObjLoad = false;
270 bool SeenNoObjStore = false;
271 };
272
273 class MipsDelaySlotFiller : public MachineFunctionPass {
274 public:
275 MipsDelaySlotFiller() : MachineFunctionPass(ID) {}
276
277 StringRef getPassName() const override { return "Mips Delay Slot Filler"; }
278
279 bool runOnMachineFunction(MachineFunction &F) override {
280 TM = &F.getTarget();
281 bool Changed = false;
282 for (auto MBB = F.begin(); MBB != F.end();) {
283 auto curMBB = MBB;
284 MBB++;
285
286 Changed |= runOnMachineBasicBlock(
287 MBB&: *curMBB, FirstNextMBBInstr: (MBB != F.end() && !(*MBB).empty()) ? &(*MBB).instr_front()
288 : nullptr);
289 }
290
291 // This pass invalidates liveness information when it reorders
292 // instructions to fill delay slot. Without this, -verify-machineinstrs
293 // will fail.
294 if (Changed)
295 F.getRegInfo().invalidateLiveness();
296
297 return Changed;
298 }
299
300 MachineFunctionProperties getRequiredProperties() const override {
301 return MachineFunctionProperties().setNoVRegs();
302 }
303
304 void getAnalysisUsage(AnalysisUsage &AU) const override {
305 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
306 MachineFunctionPass::getAnalysisUsage(AU);
307 }
308
309 static char ID;
310
311 private:
312 bool runOnMachineBasicBlock(MachineBasicBlock &MBB,
313 MachineInstr *FirstNextMBBInstr);
314
315 Iter replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch,
316 const DebugLoc &DL);
317
318 /// This function checks if it is valid to move Candidate to the delay slot
319 /// and returns true if it isn't. It also updates memory and register
320 /// dependence information.
321 bool delayHasHazard(const MipsSubtarget &STI, const MachineInstr &Candidate,
322 const BranchInformation &BranchInfo, RegDefsUses &RegDU,
323 InspectMemInstr &IM) const;
324
325 /// This function searches range [Begin, End) for an instruction that can be
326 /// moved to the delay slot. Returns true on success.
327 template <typename IterTy>
328 bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
329 const BranchInformation &BranchInfo, RegDefsUses &RegDU,
330 InspectMemInstr &IM, Iter Slot, IterTy &Filler) const;
331
332 /// This function searches in the backward direction for an instruction that
333 /// can be moved to the delay slot. Returns true on success.
334 bool searchBackward(MachineBasicBlock &MBB, MachineInstr &Slot,
335 const BranchInformation &BranchInfo) const;
336
337 /// This function searches MBB in the forward direction for an instruction
338 /// that can be moved to the delay slot. Returns true on success.
339 bool searchForward(MachineBasicBlock &MBB, Iter Slot,
340 const BranchInformation &BranchInfo) const;
341
342 /// This function searches one of MBB's successor blocks for an instruction
343 /// that can be moved to the delay slot and inserts clones of the
344 /// instruction into the successor's predecessor blocks.
345 bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot,
346 const BranchInformation &BranchInfo) const;
347
348 /// Pick a successor block of MBB. Return NULL if MBB doesn't have a
349 /// successor block that is not a landing pad.
350 MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const;
351
352 /// This function analyzes MBB and returns an instruction with an unoccupied
353 /// slot that branches to Dst.
354 std::pair<MipsInstrInfo::BranchType, MachineInstr *>
355 getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const;
356
357 /// Examine Pred and see if it is possible to insert an instruction into
358 /// one of its branches delay slot or its end.
359 bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
360 RegDefsUses &RegDU, bool &HasMultipleSuccs,
361 BB2BrMap &BrMap) const;
362
363 bool terminateSearch(const MachineInstr &Candidate) const;
364
365 const TargetMachine *TM = nullptr;
366 };
367
368} // end anonymous namespace
369
370char MipsDelaySlotFiller::ID = 0;
371
372static bool hasUnoccupiedSlot(const MachineInstr *MI) {
373 return MI->hasDelaySlot() && !MI->isBundledWithSucc();
374}
375
376/// Check if a branch is a short backward loop that triggers the R5900 erratum.
377/// Quote from binutils-gdb/gas/config/tc-mips.c:
378///
379/// On the R5900 short loops need to be fixed by inserting a NOP in the
380/// branch delay slot.
381///
382/// The short loop bug under certain conditions causes loops to execute
383/// only once or twice. We must ensure that the assembler never
384/// generates loops that satisfy all of the following conditions:
385///
386/// - a loop consists of less than or equal to six instructions
387/// (including the branch delay slot);
388/// - a loop contains only one conditional branch instruction at the end
389/// of the loop;
390/// - a loop does not contain any other branch or jump instructions;
391/// - a branch delay slot of the loop is not NOP (EE 2.9 or later).
392///
393/// We need to do this because of a hardware bug in the R5900 chip.
394static bool isR5900ShortLoopBranch(const MachineInstr *MI,
395 const MachineBasicBlock &MBB) {
396 // Must be a conditional branch (not jump or indirect branch)
397 if (!MI->isBranch() || MI->isIndirectBranch())
398 return false;
399
400 // Check if this is a conditional branch by looking for an MBB operand
401 const MachineBasicBlock *TargetMBB = nullptr;
402 for (const MachineOperand &MO : MI->operands()) {
403 if (MO.isMBB()) {
404 TargetMBB = MO.getMBB();
405 break;
406 }
407 }
408
409 // Must have a target and must target the same basic block (backward branch)
410 if (!TargetMBB || TargetMBB != &MBB)
411 return false;
412
413 // Count instructions from the beginning of the block to the branch
414 // A short loop is 6 instructions or fewer (including branch + delay slot)
415 // The delay slot adds 1 more, so we check if instructions before branch <= 5
416 unsigned InstrCount = 0;
417 bool HasOtherBranch = false;
418
419 for (const MachineInstr &Instr : MBB) {
420 if (&Instr == MI)
421 break;
422
423 // Skip debug and pseudo instructions
424 if (Instr.isDebugInstr() || Instr.isTransient())
425 continue;
426
427 ++InstrCount;
428
429 // If there's another branch in the loop, the erratum doesn't apply
430 if (Instr.isBranch() || Instr.isCall()) {
431 HasOtherBranch = true;
432 break;
433 }
434 }
435
436 // If there's another branch/call in the loop, erratum doesn't apply
437 if (HasOtherBranch)
438 return false;
439
440 // Add 1 for the branch itself, +1 for delay slot = InstrCount + 2
441 // Erratum triggers when total <= 6, so InstrCount + 2 <= 6 => InstrCount <= 4
442 // But we're conservative: if InstrCount <= 5 (total <= 7), skip filling
443 // to match the exact condition from r5900check: offset -5 to -1 (2-6 instrs)
444 return InstrCount <= 5;
445}
446
447INITIALIZE_PASS(MipsDelaySlotFiller, DEBUG_TYPE,
448 "Fill delay slot for MIPS", false, false)
449
450/// This function inserts clones of Filler into predecessor blocks.
451static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) {
452 MachineFunction *MF = Filler->getParent()->getParent();
453
454 for (const auto &I : BrMap) {
455 if (I.second) {
456 MIBundleBuilder(I.second).append(MI: MF->CloneMachineInstr(Orig: &*Filler));
457 ++UsefulSlots;
458 } else {
459 I.first->push_back(MI: MF->CloneMachineInstr(Orig: &*Filler));
460 }
461 }
462}
463
464/// This function adds registers Filler defines to MBB's live-in register list.
465static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
466 for (const MachineOperand &MO : Filler->operands()) {
467 unsigned R;
468
469 if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg()))
470 continue;
471
472#ifndef NDEBUG
473 const MachineFunction &MF = *MBB.getParent();
474 assert(MF.getSubtarget().getRegisterInfo()->getAllocatableSet(MF).test(R) &&
475 "Shouldn't move an instruction with unallocatable registers across "
476 "basic block boundaries.");
477#endif
478
479 if (!MBB.isLiveIn(Reg: R))
480 MBB.addLiveIn(PhysReg: R);
481 }
482}
483
484RegDefsUses::RegDefsUses(const TargetRegisterInfo &TRI)
485 : TRI(TRI), Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {}
486
487void RegDefsUses::init(const MachineInstr &MI) {
488 // Add all register operands which are explicit and non-variadic.
489 update(MI, Begin: 0, End: MI.getDesc().getNumOperands());
490
491 // If MI is a call, add RA to Defs to prevent users of RA from going into
492 // delay slot.
493 if (MI.isCall())
494 Defs.set(Mips::RA);
495
496 // Add all implicit register operands of branch instructions except
497 // register AT.
498 if (MI.isBranch()) {
499 update(MI, Begin: MI.getDesc().getNumOperands(), End: MI.getNumOperands());
500 Defs.reset(Idx: Mips::AT);
501 }
502}
503
504void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
505 assert(MI.isCall());
506
507 // Add RA/RA_64 to Defs to prevent users of RA/RA_64 from going into
508 // the delay slot. The reason is that RA/RA_64 must not be changed
509 // in the delay slot so that the callee can return to the caller.
510 if (MI.definesRegister(Reg: Mips::RA, /*TRI=*/nullptr) ||
511 MI.definesRegister(Reg: Mips::RA_64, /*TRI=*/nullptr)) {
512 Defs.set(Mips::RA);
513 Defs.set(Mips::RA_64);
514 }
515
516 // If MI is a call, add all caller-saved registers to Defs.
517 BitVector CallerSavedRegs(TRI.getNumRegs(), true);
518
519 CallerSavedRegs.reset(Idx: Mips::ZERO);
520 CallerSavedRegs.reset(Idx: Mips::ZERO_64);
521
522 for (const MCPhysReg *R = TRI.getCalleeSavedRegs(MF: MI.getParent()->getParent());
523 *R; ++R)
524 for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI)
525 CallerSavedRegs.reset(Idx: *AI);
526
527 Defs |= CallerSavedRegs;
528}
529
530void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) {
531 BitVector AllocSet = TRI.getAllocatableSet(MF);
532
533 for (unsigned R : AllocSet.set_bits())
534 for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
535 AllocSet.set(*AI);
536
537 AllocSet.set(Mips::ZERO);
538 AllocSet.set(Mips::ZERO_64);
539
540 Defs |= AllocSet.flip();
541}
542
543void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
544 const MachineBasicBlock &SuccBB) {
545 for (const MachineBasicBlock *S : MBB.successors())
546 if (S != &SuccBB)
547 for (const auto &LI : S->liveins())
548 Uses.set(LI.PhysReg.id());
549}
550
551bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
552 BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs());
553 bool HasHazard = false;
554
555 for (unsigned I = Begin; I != End; ++I) {
556 const MachineOperand &MO = MI.getOperand(i: I);
557
558 if (MO.isReg() && MO.getReg()) {
559 if (checkRegDefsUses(NewDefs, NewUses, Reg: MO.getReg(), IsDef: MO.isDef())) {
560 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found register hazard for operand "
561 << I << ": ";
562 MO.dump());
563 HasHazard = true;
564 }
565 }
566 }
567
568 Defs |= NewDefs;
569 Uses |= NewUses;
570
571 return HasHazard;
572}
573
574bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses,
575 unsigned Reg, bool IsDef) const {
576 if (IsDef) {
577 NewDefs.set(Reg);
578 // check whether Reg has already been defined or used.
579 return (isRegInSet(RegSet: Defs, Reg) || isRegInSet(RegSet: Uses, Reg));
580 }
581
582 NewUses.set(Reg);
583 // check whether Reg has already been defined.
584 return isRegInSet(RegSet: Defs, Reg);
585}
586
587bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const {
588 // Check Reg and all aliased Registers.
589 for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
590 if (RegSet.test(Idx: *AI))
591 return true;
592 return false;
593}
594
595bool InspectMemInstr::hasHazard(const MachineInstr &MI) {
596 if (!MI.mayStore() && !MI.mayLoad())
597 return false;
598
599 if (ForbidMemInstr)
600 return true;
601
602 OrigSeenLoad = SeenLoad;
603 OrigSeenStore = SeenStore;
604 SeenLoad |= MI.mayLoad();
605 SeenStore |= MI.mayStore();
606
607 // If MI is an ordered or volatile memory reference, disallow moving
608 // subsequent loads and stores to delay slot.
609 if (MI.hasOrderedMemoryRef() && (OrigSeenLoad || OrigSeenStore)) {
610 ForbidMemInstr = true;
611 return true;
612 }
613
614 return hasHazard_(MI);
615}
616
617bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
618 if (MI.mayStore())
619 return true;
620
621 if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getPseudoValue())
622 return true;
623
624 if (const PseudoSourceValue *PSV =
625 (*MI.memoperands_begin())->getPseudoValue()) {
626 if (isa<FixedStackPseudoSourceValue>(Val: PSV))
627 return false;
628 return !PSV->isConstant(nullptr) && !PSV->isStack();
629 }
630
631 return true;
632}
633
634MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
635 : InspectMemInstr(false), MFI(MFI_) {}
636
637bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
638 bool HasHazard = false;
639
640 // Check underlying object list.
641 SmallVector<ValueType, 4> Objs;
642 if (getUnderlyingObjects(MI, Objects&: Objs)) {
643 for (ValueType VT : Objs)
644 HasHazard |= updateDefsUses(V: VT, MayStore: MI.mayStore());
645 return HasHazard;
646 }
647
648 // No underlying objects found.
649 HasHazard = MI.mayStore() && (OrigSeenLoad || OrigSeenStore);
650 HasHazard |= MI.mayLoad() || OrigSeenStore;
651
652 SeenNoObjLoad |= MI.mayLoad();
653 SeenNoObjStore |= MI.mayStore();
654
655 return HasHazard;
656}
657
658bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) {
659 if (MayStore)
660 return !Defs.insert(Ptr: V).second || Uses.count(Ptr: V) || SeenNoObjStore ||
661 SeenNoObjLoad;
662
663 Uses.insert(Ptr: V);
664 return Defs.count(Ptr: V) || SeenNoObjStore;
665}
666
667bool MemDefsUses::
668getUnderlyingObjects(const MachineInstr &MI,
669 SmallVectorImpl<ValueType> &Objects) const {
670 if (!MI.hasOneMemOperand())
671 return false;
672
673 auto & MMO = **MI.memoperands_begin();
674
675 if (const PseudoSourceValue *PSV = MMO.getPseudoValue()) {
676 if (!PSV->isAliased(MFI))
677 return false;
678 Objects.push_back(Elt: PSV);
679 return true;
680 }
681
682 if (const Value *V = MMO.getValue()) {
683 SmallVector<const Value *, 4> Objs;
684 ::getUnderlyingObjects(V, Objects&: Objs);
685
686 for (const Value *UValue : Objs) {
687 if (!isIdentifiedObject(V))
688 return false;
689
690 Objects.push_back(Elt: UValue);
691 }
692 return true;
693 }
694
695 return false;
696}
697
698// Replace Branch with the compact branch instruction.
699Iter MipsDelaySlotFiller::replaceWithCompactBranch(MachineBasicBlock &MBB,
700 Iter Branch,
701 const DebugLoc &DL) {
702 const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>();
703 const MipsInstrInfo *TII = STI.getInstrInfo();
704
705 unsigned NewOpcode = TII->getEquivalentCompactForm(I: Branch);
706 Branch = TII->genInstrWithNewOpc(NewOpc: NewOpcode, I: Branch);
707
708 auto *ToErase = cast<MachineInstr>(Val: &*std::next(x: Branch));
709 // Update call info for the Branch.
710 if (ToErase->shouldUpdateAdditionalCallInfo())
711 ToErase->getMF()->moveAdditionalCallInfo(Old: ToErase,
712 New: cast<MachineInstr>(Val: &*Branch));
713 ToErase->eraseFromParent();
714 return Branch;
715}
716
717// For given opcode returns opcode of corresponding instruction with short
718// delay slot.
719// For the pseudo TAILCALL*_MM instructions return the short delay slot
720// form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range
721// that is too short to make use of for tail calls.
722static int getEquivalentCallShort(int Opcode) {
723 switch (Opcode) {
724 case Mips::BGEZAL:
725 return Mips::BGEZALS_MM;
726 case Mips::BLTZAL:
727 return Mips::BLTZALS_MM;
728 case Mips::JAL:
729 case Mips::JAL_MM:
730 return Mips::JALS_MM;
731 case Mips::JALR:
732 return Mips::JALRS_MM;
733 case Mips::JALR16_MM:
734 return Mips::JALRS16_MM;
735 case Mips::TAILCALL_MM:
736 llvm_unreachable("Attempting to shorten the TAILCALL_MM pseudo!");
737 case Mips::TAILCALLREG:
738 return Mips::JR16_MM;
739 default:
740 llvm_unreachable("Unexpected call instruction for microMIPS.");
741 }
742}
743
744/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
745/// We assume there is only one delay slot per delayed instruction.
746bool MipsDelaySlotFiller::runOnMachineBasicBlock(
747 MachineBasicBlock &MBB, MachineInstr *FirstNextMBBInstr) {
748 bool Changed = false;
749 const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>();
750 bool InMicroMipsMode = STI.inMicroMipsMode();
751 const MipsInstrInfo *TII = STI.getInstrInfo();
752
753 for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
754 if (!hasUnoccupiedSlot(MI: &*I))
755 continue;
756
757 // R5900 short loop erratum fix: skip delay slot filling for short backward
758 // loops to avoid triggering a hardware bug where short loops may exit
759 // early. The fix can be controlled with -mfix-r5900 / -mno-fix-r5900.
760 bool SkipForFixR5900 = false;
761 if (STI.fixR5900() && isR5900ShortLoopBranch(MI: &*I, MBB)) {
762 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": skipping delay slot fill for R5900 "
763 "short loop branch.\n");
764 ++R5900ShortLoopNops;
765 SkipForFixR5900 = true;
766 }
767
768 // Delay slot filling is disabled at -O0, in microMIPS32R6, or for R5900
769 // short loop branches.
770 if (!DisableDelaySlotFiller &&
771 (TM->getOptLevel() != CodeGenOptLevel::None) &&
772 !(InMicroMipsMode && STI.hasMips32r6()) && !SkipForFixR5900) {
773
774 bool Filled = false;
775 const auto BranchInfo =
776 BranchInformation(I, MBB.end(), FirstNextMBBInstr);
777
778 if (MipsCompactBranchPolicy.getValue() != CB_Always ||
779 !TII->getEquivalentCompactForm(I)) {
780 if (searchBackward(MBB, Slot&: *I, BranchInfo)) {
781 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot"
782 " in backwards search.\n");
783 Filled = true;
784 } else if (I->isTerminator()) {
785 if (searchSuccBBs(MBB, Slot: I, BranchInfo)) {
786 Filled = true;
787 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot"
788 " in successor BB search.\n");
789 }
790 } else if (searchForward(MBB, Slot: I, BranchInfo)) {
791 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot"
792 " in forwards search.\n");
793 Filled = true;
794 }
795 }
796
797 if (Filled) {
798 // Get instruction with delay slot.
799 MachineBasicBlock::instr_iterator DSI = I.getInstrIterator();
800
801 if (InMicroMipsMode && TII->getInstSizeInBytes(MI: *std::next(x: DSI)) == 2 &&
802 DSI->isCall()) {
803 // If instruction in delay slot is 16b change opcode to
804 // corresponding instruction with short delay slot.
805
806 // TODO: Implement an instruction mapping table of 16bit opcodes to
807 // 32bit opcodes so that an instruction can be expanded. This would
808 // save 16 bits as a TAILCALL_MM pseudo requires a fullsized nop.
809 // TODO: Permit b16 when branching backwards to the same function
810 // if it is in range.
811 DSI->setDesc(TII->get(Opcode: getEquivalentCallShort(Opcode: DSI->getOpcode())));
812 }
813 ++FilledSlots;
814 Changed = true;
815 continue;
816 }
817 }
818
819 // For microMIPS if instruction is BEQ or BNE with one ZERO register, then
820 // instead of adding NOP replace this instruction with the corresponding
821 // compact branch instruction, i.e. BEQZC or BNEZC. Additionally
822 // PseudoReturn and PseudoIndirectBranch are expanded to JR_MM, so they can
823 // be replaced with JRC16_MM.
824
825 // For MIPSR6 attempt to produce the corresponding compact (no delay slot)
826 // form of the CTI. For indirect jumps this will not require inserting a
827 // NOP and for branches will hopefully avoid requiring a NOP.
828 if ((InMicroMipsMode ||
829 (STI.hasMips32r6() && MipsCompactBranchPolicy != CB_Never)) &&
830 TII->getEquivalentCompactForm(I)) {
831 I = replaceWithCompactBranch(MBB, Branch: I, DL: I->getDebugLoc());
832 Changed = true;
833 continue;
834 }
835
836 // Bundle the NOP to the instruction with the delay slot.
837 LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": could not fill delay slot for ";
838 I->dump());
839 TII->insertNop(MBB, MI: std::next(x: I), DL: I->getDebugLoc());
840 MIBundleBuilder(MBB, I, std::next(x: I, n: 2));
841 ++FilledSlots;
842 Changed = true;
843 }
844 return Changed;
845}
846
847template <typename IterTy>
848bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
849 IterTy End,
850 const BranchInformation &BranchInfo,
851 RegDefsUses &RegDU, InspectMemInstr &IM,
852 Iter Slot, IterTy &Filler) const {
853 for (IterTy I = Begin; I != End;) {
854 IterTy CurrI = I;
855 ++I;
856 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": checking instruction: "; CurrI->dump());
857 // Skip debug value.
858 // Instruction TargetOpcode::JUMP_TABLE_DEBUG_INFO is only used to note
859 // jump table debug info.
860 if (CurrI->isDebugInstr() || CurrI->isJumpTableDebugInfo()) {
861 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring debug instruction: ";
862 CurrI->dump());
863 continue;
864 }
865
866 if (CurrI->isBundle()) {
867 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring BUNDLE instruction: ";
868 CurrI->dump());
869 // However, we still need to update the register def-use information.
870 RegDU.update(MI: *CurrI, Begin: 0, End: CurrI->getNumOperands());
871 continue;
872 }
873
874 if (terminateSearch(Candidate: *CurrI)) {
875 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": should terminate search: ";
876 CurrI->dump());
877 break;
878 }
879
880 assert((!CurrI->isCall() && !CurrI->isReturn() && !CurrI->isBranch()) &&
881 "Cannot put calls, returns or branches in delay slot.");
882
883 if (CurrI->isKill()) {
884 CurrI->eraseFromParent();
885 continue;
886 }
887
888 const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>();
889 if (delayHasHazard(STI, Candidate: *CurrI, BranchInfo, RegDU, IM))
890 continue;
891
892 bool InMicroMipsMode = STI.inMicroMipsMode();
893 const MipsInstrInfo *TII = STI.getInstrInfo();
894 unsigned Opcode = (*Slot).getOpcode();
895
896 // In mips1-4, should not put mflo into the delay slot for the return.
897 if ((IsMFLOMFHI(CurrI->getOpcode())) &&
898 (!STI.hasMips32() && !STI.hasMips5()))
899 continue;
900
901 // This is complicated by the tail call optimization. For non-PIC code
902 // there is only a 32bit sized unconditional branch which can be assumed
903 // to be able to reach the target. b16 only has a range of +/- 1 KB.
904 // It's entirely possible that the target function is reachable with b16
905 // but we don't have enough information to make that decision.
906 if (InMicroMipsMode && TII->getInstSizeInBytes(MI: *CurrI) == 2 &&
907 (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
908 Opcode == Mips::PseudoIndirectBranch_MM ||
909 Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
910 continue;
911 // Instructions LWP/SWP and MOVEP should not be in a delay slot as that
912 // results in unpredictable behaviour
913 if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM ||
914 Opcode == Mips::MOVEP_MM))
915 continue;
916
917 Filler = CurrI;
918 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot: ";
919 CurrI->dump());
920
921 return true;
922 }
923
924 return false;
925}
926
927bool MipsDelaySlotFiller::searchBackward(
928 MachineBasicBlock &MBB, MachineInstr &Slot,
929 const BranchInformation &BranchInfo) const {
930 if (DisableBackwardSearch)
931 return false;
932
933 auto *Fn = MBB.getParent();
934 RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo());
935 MemDefsUses MemDU(&Fn->getFrameInfo());
936 ReverseIter Filler;
937
938 RegDU.init(MI: Slot);
939
940 MachineBasicBlock::iterator SlotI = Slot;
941 if (!searchRange(MBB, Begin: ++SlotI.getReverse(), End: MBB.rend(), BranchInfo, RegDU,
942 IM&: MemDU, Slot, Filler)) {
943 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay "
944 "slot using backwards search.\n");
945 return false;
946 }
947
948 MBB.splice(Where: std::next(x: SlotI), Other: &MBB, From: Filler.getReverse());
949 MIBundleBuilder(MBB, SlotI, std::next(x: SlotI, n: 2));
950 ++UsefulSlots;
951 return true;
952}
953
954bool MipsDelaySlotFiller::searchForward(
955 MachineBasicBlock &MBB, Iter Slot,
956 const BranchInformation &BranchInfo) const {
957 // Can handle only calls.
958 if (DisableForwardSearch || !Slot->isCall())
959 return false;
960
961 RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
962 NoMemInstr NM;
963 Iter Filler;
964
965 RegDU.setCallerSaved(*Slot);
966
967 if (!searchRange(MBB, Begin: std::next(x: Slot), End: MBB.end(), BranchInfo, RegDU, IM&: NM, Slot,
968 Filler)) {
969 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay "
970 "slot using forwards search.\n");
971 return false;
972 }
973
974 MBB.splice(Where: std::next(x: Slot), Other: &MBB, From: Filler);
975 MIBundleBuilder(MBB, Slot, std::next(x: Slot, n: 2));
976 ++UsefulSlots;
977 return true;
978}
979
980bool MipsDelaySlotFiller::searchSuccBBs(
981 MachineBasicBlock &MBB, Iter Slot,
982 const BranchInformation &BranchInfo) const {
983 if (DisableSuccBBSearch)
984 return false;
985
986 MachineBasicBlock *SuccBB = selectSuccBB(B&: MBB);
987
988 if (!SuccBB)
989 return false;
990
991 RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
992 bool HasMultipleSuccs = false;
993 BB2BrMap BrMap;
994 std::unique_ptr<InspectMemInstr> IM;
995 Iter Filler;
996 auto *Fn = MBB.getParent();
997
998 // Iterate over SuccBB's predecessor list.
999 for (MachineBasicBlock *Pred : SuccBB->predecessors())
1000 if (!examinePred(Pred&: *Pred, Succ: *SuccBB, RegDU, HasMultipleSuccs, BrMap))
1001 return false;
1002
1003 // Do not allow moving instructions which have unallocatable register operands
1004 // across basic block boundaries.
1005 RegDU.setUnallocatableRegs(*Fn);
1006
1007 // Only allow moving loads from stack or constants if any of the SuccBB's
1008 // predecessors have multiple successors.
1009 if (HasMultipleSuccs) {
1010 IM.reset(p: new LoadFromStackOrConst());
1011 } else {
1012 const MachineFrameInfo &MFI = Fn->getFrameInfo();
1013 IM.reset(p: new MemDefsUses(&MFI));
1014 }
1015
1016 if (!searchRange(MBB, Begin: SuccBB->begin(), End: SuccBB->end(), BranchInfo, RegDU, IM&: *IM,
1017 Slot, Filler))
1018 return false;
1019
1020 insertDelayFiller(Filler, BrMap);
1021 addLiveInRegs(Filler, MBB&: *SuccBB);
1022 Filler->eraseFromParent();
1023
1024 return true;
1025}
1026
1027MachineBasicBlock *
1028MipsDelaySlotFiller::selectSuccBB(MachineBasicBlock &B) const {
1029 if (B.succ_empty())
1030 return nullptr;
1031
1032 // Select the successor with the larget edge weight.
1033 auto &Prob = getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
1034 MachineBasicBlock *S =
1035 *llvm::max_element(Range: B.successors(), C: [&](const MachineBasicBlock *Dst0,
1036 const MachineBasicBlock *Dst1) {
1037 return Prob.getEdgeProbability(Src: &B, Dst: Dst0) <
1038 Prob.getEdgeProbability(Src: &B, Dst: Dst1);
1039 });
1040 return S->isEHPad() ? nullptr : S;
1041}
1042
1043std::pair<MipsInstrInfo::BranchType, MachineInstr *>
1044MipsDelaySlotFiller::getBranch(MachineBasicBlock &MBB,
1045 const MachineBasicBlock &Dst) const {
1046 const MipsInstrInfo *TII =
1047 MBB.getParent()->getSubtarget<MipsSubtarget>().getInstrInfo();
1048 MachineBasicBlock *TrueBB = nullptr, *FalseBB = nullptr;
1049 SmallVector<MachineInstr*, 2> BranchInstrs;
1050 SmallVector<MachineOperand, 2> Cond;
1051
1052 MipsInstrInfo::BranchType R =
1053 TII->analyzeBranch(MBB, TBB&: TrueBB, FBB&: FalseBB, Cond, AllowModify: false, BranchInstrs);
1054
1055 if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch))
1056 return std::make_pair(x&: R, y: nullptr);
1057
1058 if (R != MipsInstrInfo::BT_CondUncond) {
1059 if (!hasUnoccupiedSlot(MI: BranchInstrs[0]))
1060 return std::make_pair(x: MipsInstrInfo::BT_None, y: nullptr);
1061
1062 assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst)));
1063
1064 return std::make_pair(x&: R, y&: BranchInstrs[0]);
1065 }
1066
1067 assert((TrueBB == &Dst) || (FalseBB == &Dst));
1068
1069 // Examine the conditional branch. See if its slot is occupied.
1070 if (hasUnoccupiedSlot(MI: BranchInstrs[0]))
1071 return std::make_pair(x: MipsInstrInfo::BT_Cond, y&: BranchInstrs[0]);
1072
1073 // If that fails, try the unconditional branch.
1074 if (hasUnoccupiedSlot(MI: BranchInstrs[1]) && (FalseBB == &Dst))
1075 return std::make_pair(x: MipsInstrInfo::BT_Uncond, y&: BranchInstrs[1]);
1076
1077 return std::make_pair(x: MipsInstrInfo::BT_None, y: nullptr);
1078}
1079
1080bool MipsDelaySlotFiller::examinePred(MachineBasicBlock &Pred,
1081 const MachineBasicBlock &Succ,
1082 RegDefsUses &RegDU,
1083 bool &HasMultipleSuccs,
1084 BB2BrMap &BrMap) const {
1085 std::pair<MipsInstrInfo::BranchType, MachineInstr *> P =
1086 getBranch(MBB&: Pred, Dst: Succ);
1087
1088 // Return if either getBranch wasn't able to analyze the branches or there
1089 // were no branches with unoccupied slots.
1090 if (P.first == MipsInstrInfo::BT_None)
1091 return false;
1092
1093 if ((P.first != MipsInstrInfo::BT_Uncond) &&
1094 (P.first != MipsInstrInfo::BT_NoBranch)) {
1095 HasMultipleSuccs = true;
1096 RegDU.addLiveOut(MBB: Pred, SuccBB: Succ);
1097 }
1098
1099 BrMap[&Pred] = P.second;
1100 return true;
1101}
1102
1103bool MipsDelaySlotFiller::delayHasHazard(const MipsSubtarget &STI,
1104 const MachineInstr &Candidate,
1105 const BranchInformation &BranchInfo,
1106 RegDefsUses &RegDU,
1107 InspectMemInstr &IM) const {
1108 assert(!Candidate.isKill() &&
1109 "KILL instructions should have been eliminated at this point.");
1110
1111 bool HasHazard = Candidate.isImplicitDef();
1112
1113 HasHazard |= IM.hasHazard(MI: Candidate);
1114 HasHazard |= RegDU.update(MI: Candidate, Begin: 0, End: Candidate.getNumOperands());
1115
1116 // This only matters for MIPS1 and only if we do not have a hazard already
1117 if (STI.hasMips1() && !STI.hasMips2() && !HasHazard) {
1118 const MipsInstrInfo *TII = STI.getInstrInfo();
1119 const bool HasLoadDelaySlot = TII->HasLoadDelaySlot(MI: Candidate);
1120
1121 // We only need to act if the candidate is having a load delay slot
1122 if (HasLoadDelaySlot) {
1123 // We have no branch so we can not determine a hazard
1124 // Assume the worst
1125 if (!BranchInfo.hasBranchInstr()) {
1126 return true;
1127 }
1128
1129 // Being an indirect branch means we can not tell if we are a hazard
1130 // Assume the worst
1131 if (BranchInfo.isIndirectBranch()) {
1132 return true;
1133 }
1134
1135 // If this is a direct branch we should find a MBB operand for the jump
1136 // target
1137 const MachineBasicBlock *TargetMBB = BranchInfo.getBranchTarget();
1138 if (!TargetMBB || TargetMBB->empty()) {
1139 return true;
1140 }
1141
1142 const auto &BranchTargetInstr = TargetMBB->instr_front();
1143 bool HasNewHazard =
1144 !TII->SafeInLoadDelaySlot(MIInSlot: BranchTargetInstr, LoadMI: Candidate);
1145 // If the branch is unconditional then we do not need to bother to check
1146 // the next instruction after the branch
1147 if (!BranchInfo.isUnconditionalBranch()) {
1148 // We are a conditional branch so we should have an `else` branch
1149 if (BranchInfo.hasBranchElseInstr()) {
1150 HasNewHazard |= !TII->SafeInLoadDelaySlot(
1151 MIInSlot: *BranchInfo.getBranchElseInstr(), LoadMI: Candidate);
1152 } else {
1153 // Without the `else` branch we need to assume the worst
1154 HasNewHazard = true;
1155 }
1156 }
1157 return HasNewHazard;
1158 }
1159 }
1160
1161 return HasHazard;
1162}
1163
1164bool MipsDelaySlotFiller::terminateSearch(const MachineInstr &Candidate) const {
1165 return (Candidate.isTerminator() || Candidate.isCall() ||
1166 Candidate.isPosition() || Candidate.isInlineAsm() ||
1167 Candidate.hasUnmodeledSideEffects());
1168}
1169
1170/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
1171/// slots in Mips MachineFunctions
1172FunctionPass *llvm::createMipsDelaySlotFillerPass() {
1173 return new MipsDelaySlotFiller();
1174}
1175