| 1 | //===- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler -------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Simple pass to fill delay slots with useful instructions. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "Mips.h" |
| 14 | #include "MipsInstrInfo.h" |
| 15 | #include "MipsSubtarget.h" |
| 16 | #include "llvm/ADT/BitVector.h" |
| 17 | #include "llvm/ADT/DenseMap.h" |
| 18 | #include "llvm/ADT/PointerUnion.h" |
| 19 | #include "llvm/ADT/SmallPtrSet.h" |
| 20 | #include "llvm/ADT/SmallVector.h" |
| 21 | #include "llvm/ADT/Statistic.h" |
| 22 | #include "llvm/ADT/StringRef.h" |
| 23 | #include "llvm/Analysis/AliasAnalysis.h" |
| 24 | #include "llvm/Analysis/ValueTracking.h" |
| 25 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 26 | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
| 27 | #include "llvm/CodeGen/MachineFunction.h" |
| 28 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 29 | #include "llvm/CodeGen/MachineInstr.h" |
| 30 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 31 | #include "llvm/CodeGen/MachineOperand.h" |
| 32 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 33 | #include "llvm/CodeGen/PseudoSourceValue.h" |
| 34 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 35 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| 36 | #include "llvm/MC/MCInstrDesc.h" |
| 37 | #include "llvm/MC/MCRegisterInfo.h" |
| 38 | #include "llvm/Support/Casting.h" |
| 39 | #include "llvm/Support/CodeGen.h" |
| 40 | #include "llvm/Support/CommandLine.h" |
| 41 | #include "llvm/Support/ErrorHandling.h" |
| 42 | #include "llvm/Support/raw_ostream.h" |
| 43 | #include "llvm/Target/TargetMachine.h" |
| 44 | #include <cassert> |
| 45 | #include <iterator> |
| 46 | #include <memory> |
| 47 | #include <utility> |
| 48 | |
| 49 | using namespace llvm; |
| 50 | |
| 51 | #define DEBUG_TYPE "mips-delay-slot-filler" |
| 52 | |
| 53 | STATISTIC(FilledSlots, "Number of delay slots filled" ); |
| 54 | STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that" |
| 55 | " are not NOP." ); |
| 56 | STATISTIC(R5900ShortLoopNops, "Number of delay slots left as NOP for R5900 " |
| 57 | "short loop fix" ); |
| 58 | |
| 59 | static cl::opt<bool> DisableDelaySlotFiller( |
| 60 | "disable-mips-delay-filler" , |
| 61 | cl::init(Val: false), |
| 62 | cl::desc("Fill all delay slots with NOPs." ), |
| 63 | cl::Hidden); |
| 64 | |
| 65 | static cl::opt<bool> DisableForwardSearch( |
| 66 | "disable-mips-df-forward-search" , |
| 67 | cl::init(Val: true), |
| 68 | cl::desc("Disallow MIPS delay filler to search forward." ), |
| 69 | cl::Hidden); |
| 70 | |
| 71 | static cl::opt<bool> DisableSuccBBSearch( |
| 72 | "disable-mips-df-succbb-search" , |
| 73 | cl::init(Val: true), |
| 74 | cl::desc("Disallow MIPS delay filler to search successor basic blocks." ), |
| 75 | cl::Hidden); |
| 76 | |
| 77 | static cl::opt<bool> DisableBackwardSearch( |
| 78 | "disable-mips-df-backward-search" , |
| 79 | cl::init(Val: false), |
| 80 | cl::desc("Disallow MIPS delay filler to search backward." ), |
| 81 | cl::Hidden); |
| 82 | |
| 83 | extern cl::opt<CompactBranchPolicy> MipsCompactBranchPolicy; |
| 84 | |
| 85 | namespace { |
| 86 | |
| 87 | using Iter = MachineBasicBlock::iterator; |
| 88 | using ReverseIter = MachineBasicBlock::reverse_iterator; |
| 89 | using BB2BrMap = SmallDenseMap<MachineBasicBlock *, MachineInstr *, 2>; |
| 90 | |
| 91 | // Holds information about one branch instruction |
| 92 | // This is used by the MIPS1 target to easily find all paths of a branch to |
| 93 | // then check the first instruction for possible load delay hazards |
| 94 | class BranchInformation { |
| 95 | private: |
| 96 | // The pointer to the actual branch instruction |
| 97 | const MachineInstr *BranchInstr = nullptr; |
| 98 | // The pointer to the instruction after the branch (= the `else` case) |
| 99 | const MachineInstr *ElseBranchInstr = nullptr; |
| 100 | |
| 101 | // Check if `Adr` is a pseudo instruction and if so, then treat it as non |
| 102 | // existing |
| 103 | static const MachineInstr *filterPseudoInstr(const MachineInstr *Adr) { |
| 104 | if (Adr && !Adr->isPseudo()) { |
| 105 | return Adr; |
| 106 | } |
| 107 | return nullptr; |
| 108 | } |
| 109 | |
| 110 | public: |
| 111 | // Creates a new `BranchInformation` from the branch candidate `CurrentSlot` |
| 112 | // together with the end (`MBBEnd`) of the current MBB and the first |
| 113 | // instruction of the next MBB `NextMBBInstr` |
| 114 | BranchInformation(MachineInstrBundleIterator<MachineInstr> CurrentSlot, |
| 115 | MachineInstrBundleIterator<MachineInstr> MBBEnd, |
| 116 | const MachineInstr *NextMBBInstr) |
| 117 | : BranchInstr( |
| 118 | CurrentSlot->isBranch() |
| 119 | ? BranchInformation::filterPseudoInstr(Adr: &(*CurrentSlot)) |
| 120 | : nullptr), |
| 121 | ElseBranchInstr( |
| 122 | (++CurrentSlot) == MBBEnd |
| 123 | ? BranchInformation::filterPseudoInstr(Adr: NextMBBInstr) |
| 124 | : BranchInformation::filterPseudoInstr(Adr: &(*CurrentSlot))) {} |
| 125 | |
| 126 | // Checks if we have a branch |
| 127 | constexpr bool hasBranchInstr() const { return this->BranchInstr; } |
| 128 | |
| 129 | // Checks if we have an else branch |
| 130 | constexpr bool hasBranchElseInstr() const { return this->ElseBranchInstr; } |
| 131 | |
| 132 | // Checks if we have an indirect branch |
| 133 | constexpr bool isIndirectBranch() const { |
| 134 | if (this->BranchInstr) { |
| 135 | return this->BranchInstr->isIndirectBranch(); |
| 136 | } |
| 137 | return false; |
| 138 | } |
| 139 | |
| 140 | // Checks if we have an unconditional branch |
| 141 | constexpr bool isUnconditionalBranch() const { |
| 142 | if (this->BranchInstr) { |
| 143 | return this->BranchInstr->isUnconditionalBranch(); |
| 144 | } |
| 145 | return false; |
| 146 | } |
| 147 | |
| 148 | // Accesses the branch instruction |
| 149 | const MachineInstr *getBranchInstr() const { return this->BranchInstr; } |
| 150 | |
| 151 | // Accesses the instruction after the branch |
| 152 | const MachineInstr *getBranchElseInstr() const { |
| 153 | return this->ElseBranchInstr; |
| 154 | } |
| 155 | |
| 156 | // Gets the target of the branch |
| 157 | const MachineBasicBlock *getBranchTarget() const { |
| 158 | if (this->isIndirectBranch() || !this->hasBranchInstr()) { |
| 159 | // Indirect branch has no known target |
| 160 | return nullptr; |
| 161 | } |
| 162 | |
| 163 | for (const MachineOperand &MO : this->BranchInstr->operands()) { |
| 164 | if (MO.isMBB()) { |
| 165 | return MO.getMBB(); |
| 166 | } |
| 167 | } |
| 168 | return nullptr; |
| 169 | } |
| 170 | }; |
| 171 | |
| 172 | class RegDefsUses { |
| 173 | public: |
| 174 | RegDefsUses(const TargetRegisterInfo &TRI); |
| 175 | |
| 176 | void init(const MachineInstr &MI); |
| 177 | |
| 178 | /// This function sets all caller-saved registers in Defs. |
| 179 | void setCallerSaved(const MachineInstr &MI); |
| 180 | |
| 181 | /// This function sets all unallocatable registers in Defs. |
| 182 | void setUnallocatableRegs(const MachineFunction &MF); |
| 183 | |
| 184 | /// Set bits in Uses corresponding to MBB's live-out registers except for |
| 185 | /// the registers that are live-in to SuccBB. |
| 186 | void addLiveOut(const MachineBasicBlock &MBB, |
| 187 | const MachineBasicBlock &SuccBB); |
| 188 | |
| 189 | bool update(const MachineInstr &MI, unsigned Begin, unsigned End); |
| 190 | |
| 191 | private: |
| 192 | bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg, |
| 193 | bool IsDef) const; |
| 194 | |
| 195 | /// Returns true if Reg or its alias is in RegSet. |
| 196 | bool isRegInSet(const BitVector &RegSet, unsigned Reg) const; |
| 197 | |
| 198 | const TargetRegisterInfo &TRI; |
| 199 | BitVector Defs, Uses; |
| 200 | }; |
| 201 | |
| 202 | /// Base class for inspecting loads and stores. |
| 203 | class InspectMemInstr { |
| 204 | public: |
| 205 | InspectMemInstr(bool ForbidMemInstr_) : ForbidMemInstr(ForbidMemInstr_) {} |
| 206 | virtual ~InspectMemInstr() = default; |
| 207 | |
| 208 | /// Return true if MI cannot be moved to delay slot. |
| 209 | bool hasHazard(const MachineInstr &MI); |
| 210 | |
| 211 | protected: |
| 212 | /// Flags indicating whether loads or stores have been seen. |
| 213 | bool OrigSeenLoad = false; |
| 214 | bool OrigSeenStore = false; |
| 215 | bool SeenLoad = false; |
| 216 | bool SeenStore = false; |
| 217 | |
| 218 | /// Memory instructions are not allowed to move to delay slot if this flag |
| 219 | /// is true. |
| 220 | bool ForbidMemInstr; |
| 221 | |
| 222 | private: |
| 223 | virtual bool hasHazard_(const MachineInstr &MI) = 0; |
| 224 | }; |
| 225 | |
| 226 | /// This subclass rejects any memory instructions. |
| 227 | class NoMemInstr : public InspectMemInstr { |
| 228 | public: |
| 229 | NoMemInstr() : InspectMemInstr(true) {} |
| 230 | |
| 231 | private: |
| 232 | bool hasHazard_(const MachineInstr &MI) override { return true; } |
| 233 | }; |
| 234 | |
| 235 | /// This subclass accepts loads from stacks and constant loads. |
| 236 | class LoadFromStackOrConst : public InspectMemInstr { |
| 237 | public: |
| 238 | LoadFromStackOrConst() : InspectMemInstr(false) {} |
| 239 | |
| 240 | private: |
| 241 | bool hasHazard_(const MachineInstr &MI) override; |
| 242 | }; |
| 243 | |
| 244 | /// This subclass uses memory dependence information to determine whether a |
| 245 | /// memory instruction can be moved to a delay slot. |
| 246 | class MemDefsUses : public InspectMemInstr { |
| 247 | public: |
| 248 | explicit MemDefsUses(const MachineFrameInfo *MFI); |
| 249 | |
| 250 | private: |
| 251 | using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>; |
| 252 | |
| 253 | bool hasHazard_(const MachineInstr &MI) override; |
| 254 | |
| 255 | /// Update Defs and Uses. Return true if there exist dependences that |
| 256 | /// disqualify the delay slot candidate between V and values in Uses and |
| 257 | /// Defs. |
| 258 | bool updateDefsUses(ValueType V, bool MayStore); |
| 259 | |
| 260 | /// Get the list of underlying objects of MI's memory operand. |
| 261 | bool getUnderlyingObjects(const MachineInstr &MI, |
| 262 | SmallVectorImpl<ValueType> &Objects) const; |
| 263 | |
| 264 | const MachineFrameInfo *MFI; |
| 265 | SmallPtrSet<ValueType, 4> Uses, Defs; |
| 266 | |
| 267 | /// Flags indicating whether loads or stores with no underlying objects have |
| 268 | /// been seen. |
| 269 | bool SeenNoObjLoad = false; |
| 270 | bool SeenNoObjStore = false; |
| 271 | }; |
| 272 | |
| 273 | class MipsDelaySlotFiller : public MachineFunctionPass { |
| 274 | public: |
| 275 | MipsDelaySlotFiller() : MachineFunctionPass(ID) {} |
| 276 | |
| 277 | StringRef getPassName() const override { return "Mips Delay Slot Filler" ; } |
| 278 | |
| 279 | bool runOnMachineFunction(MachineFunction &F) override { |
| 280 | TM = &F.getTarget(); |
| 281 | bool Changed = false; |
| 282 | for (auto MBB = F.begin(); MBB != F.end();) { |
| 283 | auto curMBB = MBB; |
| 284 | MBB++; |
| 285 | |
| 286 | Changed |= runOnMachineBasicBlock( |
| 287 | MBB&: *curMBB, FirstNextMBBInstr: (MBB != F.end() && !(*MBB).empty()) ? &(*MBB).instr_front() |
| 288 | : nullptr); |
| 289 | } |
| 290 | |
| 291 | // This pass invalidates liveness information when it reorders |
| 292 | // instructions to fill delay slot. Without this, -verify-machineinstrs |
| 293 | // will fail. |
| 294 | if (Changed) |
| 295 | F.getRegInfo().invalidateLiveness(); |
| 296 | |
| 297 | return Changed; |
| 298 | } |
| 299 | |
| 300 | MachineFunctionProperties getRequiredProperties() const override { |
| 301 | return MachineFunctionProperties().setNoVRegs(); |
| 302 | } |
| 303 | |
| 304 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 305 | AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); |
| 306 | MachineFunctionPass::getAnalysisUsage(AU); |
| 307 | } |
| 308 | |
| 309 | static char ID; |
| 310 | |
| 311 | private: |
| 312 | bool runOnMachineBasicBlock(MachineBasicBlock &MBB, |
| 313 | MachineInstr *FirstNextMBBInstr); |
| 314 | |
| 315 | Iter replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch, |
| 316 | const DebugLoc &DL); |
| 317 | |
| 318 | /// This function checks if it is valid to move Candidate to the delay slot |
| 319 | /// and returns true if it isn't. It also updates memory and register |
| 320 | /// dependence information. |
| 321 | bool delayHasHazard(const MipsSubtarget &STI, const MachineInstr &Candidate, |
| 322 | const BranchInformation &BranchInfo, RegDefsUses &RegDU, |
| 323 | InspectMemInstr &IM) const; |
| 324 | |
| 325 | /// This function searches range [Begin, End) for an instruction that can be |
| 326 | /// moved to the delay slot. Returns true on success. |
| 327 | template <typename IterTy> |
| 328 | bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End, |
| 329 | const BranchInformation &BranchInfo, RegDefsUses &RegDU, |
| 330 | InspectMemInstr &IM, Iter Slot, IterTy &Filler) const; |
| 331 | |
| 332 | /// This function searches in the backward direction for an instruction that |
| 333 | /// can be moved to the delay slot. Returns true on success. |
| 334 | bool searchBackward(MachineBasicBlock &MBB, MachineInstr &Slot, |
| 335 | const BranchInformation &BranchInfo) const; |
| 336 | |
| 337 | /// This function searches MBB in the forward direction for an instruction |
| 338 | /// that can be moved to the delay slot. Returns true on success. |
| 339 | bool searchForward(MachineBasicBlock &MBB, Iter Slot, |
| 340 | const BranchInformation &BranchInfo) const; |
| 341 | |
| 342 | /// This function searches one of MBB's successor blocks for an instruction |
| 343 | /// that can be moved to the delay slot and inserts clones of the |
| 344 | /// instruction into the successor's predecessor blocks. |
| 345 | bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot, |
| 346 | const BranchInformation &BranchInfo) const; |
| 347 | |
| 348 | /// Pick a successor block of MBB. Return NULL if MBB doesn't have a |
| 349 | /// successor block that is not a landing pad. |
| 350 | MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const; |
| 351 | |
| 352 | /// This function analyzes MBB and returns an instruction with an unoccupied |
| 353 | /// slot that branches to Dst. |
| 354 | std::pair<MipsInstrInfo::BranchType, MachineInstr *> |
| 355 | getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const; |
| 356 | |
| 357 | /// Examine Pred and see if it is possible to insert an instruction into |
| 358 | /// one of its branches delay slot or its end. |
| 359 | bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ, |
| 360 | RegDefsUses &RegDU, bool &HasMultipleSuccs, |
| 361 | BB2BrMap &BrMap) const; |
| 362 | |
| 363 | bool terminateSearch(const MachineInstr &Candidate) const; |
| 364 | |
| 365 | const TargetMachine *TM = nullptr; |
| 366 | }; |
| 367 | |
| 368 | } // end anonymous namespace |
| 369 | |
| 370 | char MipsDelaySlotFiller::ID = 0; |
| 371 | |
| 372 | static bool hasUnoccupiedSlot(const MachineInstr *MI) { |
| 373 | return MI->hasDelaySlot() && !MI->isBundledWithSucc(); |
| 374 | } |
| 375 | |
| 376 | /// Check if a branch is a short backward loop that triggers the R5900 erratum. |
| 377 | /// Quote from binutils-gdb/gas/config/tc-mips.c: |
| 378 | /// |
| 379 | /// On the R5900 short loops need to be fixed by inserting a NOP in the |
| 380 | /// branch delay slot. |
| 381 | /// |
| 382 | /// The short loop bug under certain conditions causes loops to execute |
| 383 | /// only once or twice. We must ensure that the assembler never |
| 384 | /// generates loops that satisfy all of the following conditions: |
| 385 | /// |
| 386 | /// - a loop consists of less than or equal to six instructions |
| 387 | /// (including the branch delay slot); |
| 388 | /// - a loop contains only one conditional branch instruction at the end |
| 389 | /// of the loop; |
| 390 | /// - a loop does not contain any other branch or jump instructions; |
| 391 | /// - a branch delay slot of the loop is not NOP (EE 2.9 or later). |
| 392 | /// |
| 393 | /// We need to do this because of a hardware bug in the R5900 chip. |
| 394 | static bool isR5900ShortLoopBranch(const MachineInstr *MI, |
| 395 | const MachineBasicBlock &MBB) { |
| 396 | // Must be a conditional branch (not jump or indirect branch) |
| 397 | if (!MI->isBranch() || MI->isIndirectBranch()) |
| 398 | return false; |
| 399 | |
| 400 | // Check if this is a conditional branch by looking for an MBB operand |
| 401 | const MachineBasicBlock *TargetMBB = nullptr; |
| 402 | for (const MachineOperand &MO : MI->operands()) { |
| 403 | if (MO.isMBB()) { |
| 404 | TargetMBB = MO.getMBB(); |
| 405 | break; |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | // Must have a target and must target the same basic block (backward branch) |
| 410 | if (!TargetMBB || TargetMBB != &MBB) |
| 411 | return false; |
| 412 | |
| 413 | // Count instructions from the beginning of the block to the branch |
| 414 | // A short loop is 6 instructions or fewer (including branch + delay slot) |
| 415 | // The delay slot adds 1 more, so we check if instructions before branch <= 5 |
| 416 | unsigned InstrCount = 0; |
| 417 | bool HasOtherBranch = false; |
| 418 | |
| 419 | for (const MachineInstr &Instr : MBB) { |
| 420 | if (&Instr == MI) |
| 421 | break; |
| 422 | |
| 423 | // Skip debug and pseudo instructions |
| 424 | if (Instr.isDebugInstr() || Instr.isTransient()) |
| 425 | continue; |
| 426 | |
| 427 | ++InstrCount; |
| 428 | |
| 429 | // If there's another branch in the loop, the erratum doesn't apply |
| 430 | if (Instr.isBranch() || Instr.isCall()) { |
| 431 | HasOtherBranch = true; |
| 432 | break; |
| 433 | } |
| 434 | } |
| 435 | |
| 436 | // If there's another branch/call in the loop, erratum doesn't apply |
| 437 | if (HasOtherBranch) |
| 438 | return false; |
| 439 | |
| 440 | // Add 1 for the branch itself, +1 for delay slot = InstrCount + 2 |
| 441 | // Erratum triggers when total <= 6, so InstrCount + 2 <= 6 => InstrCount <= 4 |
| 442 | // But we're conservative: if InstrCount <= 5 (total <= 7), skip filling |
| 443 | // to match the exact condition from r5900check: offset -5 to -1 (2-6 instrs) |
| 444 | return InstrCount <= 5; |
| 445 | } |
| 446 | |
| 447 | INITIALIZE_PASS(MipsDelaySlotFiller, DEBUG_TYPE, |
| 448 | "Fill delay slot for MIPS" , false, false) |
| 449 | |
| 450 | /// This function inserts clones of Filler into predecessor blocks. |
| 451 | static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) { |
| 452 | MachineFunction *MF = Filler->getParent()->getParent(); |
| 453 | |
| 454 | for (const auto &I : BrMap) { |
| 455 | if (I.second) { |
| 456 | MIBundleBuilder(I.second).append(MI: MF->CloneMachineInstr(Orig: &*Filler)); |
| 457 | ++UsefulSlots; |
| 458 | } else { |
| 459 | I.first->push_back(MI: MF->CloneMachineInstr(Orig: &*Filler)); |
| 460 | } |
| 461 | } |
| 462 | } |
| 463 | |
| 464 | /// This function adds registers Filler defines to MBB's live-in register list. |
| 465 | static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) { |
| 466 | for (const MachineOperand &MO : Filler->operands()) { |
| 467 | unsigned R; |
| 468 | |
| 469 | if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg())) |
| 470 | continue; |
| 471 | |
| 472 | #ifndef NDEBUG |
| 473 | const MachineFunction &MF = *MBB.getParent(); |
| 474 | assert(MF.getSubtarget().getRegisterInfo()->getAllocatableSet(MF).test(R) && |
| 475 | "Shouldn't move an instruction with unallocatable registers across " |
| 476 | "basic block boundaries." ); |
| 477 | #endif |
| 478 | |
| 479 | if (!MBB.isLiveIn(Reg: R)) |
| 480 | MBB.addLiveIn(PhysReg: R); |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | RegDefsUses::RegDefsUses(const TargetRegisterInfo &TRI) |
| 485 | : TRI(TRI), Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {} |
| 486 | |
| 487 | void RegDefsUses::init(const MachineInstr &MI) { |
| 488 | // Add all register operands which are explicit and non-variadic. |
| 489 | update(MI, Begin: 0, End: MI.getDesc().getNumOperands()); |
| 490 | |
| 491 | // If MI is a call, add RA to Defs to prevent users of RA from going into |
| 492 | // delay slot. |
| 493 | if (MI.isCall()) |
| 494 | Defs.set(Mips::RA); |
| 495 | |
| 496 | // Add all implicit register operands of branch instructions except |
| 497 | // register AT. |
| 498 | if (MI.isBranch()) { |
| 499 | update(MI, Begin: MI.getDesc().getNumOperands(), End: MI.getNumOperands()); |
| 500 | Defs.reset(Idx: Mips::AT); |
| 501 | } |
| 502 | } |
| 503 | |
| 504 | void RegDefsUses::setCallerSaved(const MachineInstr &MI) { |
| 505 | assert(MI.isCall()); |
| 506 | |
| 507 | // Add RA/RA_64 to Defs to prevent users of RA/RA_64 from going into |
| 508 | // the delay slot. The reason is that RA/RA_64 must not be changed |
| 509 | // in the delay slot so that the callee can return to the caller. |
| 510 | if (MI.definesRegister(Reg: Mips::RA, /*TRI=*/nullptr) || |
| 511 | MI.definesRegister(Reg: Mips::RA_64, /*TRI=*/nullptr)) { |
| 512 | Defs.set(Mips::RA); |
| 513 | Defs.set(Mips::RA_64); |
| 514 | } |
| 515 | |
| 516 | // If MI is a call, add all caller-saved registers to Defs. |
| 517 | BitVector CallerSavedRegs(TRI.getNumRegs(), true); |
| 518 | |
| 519 | CallerSavedRegs.reset(Idx: Mips::ZERO); |
| 520 | CallerSavedRegs.reset(Idx: Mips::ZERO_64); |
| 521 | |
| 522 | for (const MCPhysReg *R = TRI.getCalleeSavedRegs(MF: MI.getParent()->getParent()); |
| 523 | *R; ++R) |
| 524 | for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI) |
| 525 | CallerSavedRegs.reset(Idx: *AI); |
| 526 | |
| 527 | Defs |= CallerSavedRegs; |
| 528 | } |
| 529 | |
| 530 | void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) { |
| 531 | BitVector AllocSet = TRI.getAllocatableSet(MF); |
| 532 | |
| 533 | for (unsigned R : AllocSet.set_bits()) |
| 534 | for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI) |
| 535 | AllocSet.set(*AI); |
| 536 | |
| 537 | AllocSet.set(Mips::ZERO); |
| 538 | AllocSet.set(Mips::ZERO_64); |
| 539 | |
| 540 | Defs |= AllocSet.flip(); |
| 541 | } |
| 542 | |
| 543 | void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB, |
| 544 | const MachineBasicBlock &SuccBB) { |
| 545 | for (const MachineBasicBlock *S : MBB.successors()) |
| 546 | if (S != &SuccBB) |
| 547 | for (const auto &LI : S->liveins()) |
| 548 | Uses.set(LI.PhysReg.id()); |
| 549 | } |
| 550 | |
| 551 | bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) { |
| 552 | BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs()); |
| 553 | bool HasHazard = false; |
| 554 | |
| 555 | for (unsigned I = Begin; I != End; ++I) { |
| 556 | const MachineOperand &MO = MI.getOperand(i: I); |
| 557 | |
| 558 | if (MO.isReg() && MO.getReg()) { |
| 559 | if (checkRegDefsUses(NewDefs, NewUses, Reg: MO.getReg(), IsDef: MO.isDef())) { |
| 560 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found register hazard for operand " |
| 561 | << I << ": " ; |
| 562 | MO.dump()); |
| 563 | HasHazard = true; |
| 564 | } |
| 565 | } |
| 566 | } |
| 567 | |
| 568 | Defs |= NewDefs; |
| 569 | Uses |= NewUses; |
| 570 | |
| 571 | return HasHazard; |
| 572 | } |
| 573 | |
| 574 | bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, |
| 575 | unsigned Reg, bool IsDef) const { |
| 576 | if (IsDef) { |
| 577 | NewDefs.set(Reg); |
| 578 | // check whether Reg has already been defined or used. |
| 579 | return (isRegInSet(RegSet: Defs, Reg) || isRegInSet(RegSet: Uses, Reg)); |
| 580 | } |
| 581 | |
| 582 | NewUses.set(Reg); |
| 583 | // check whether Reg has already been defined. |
| 584 | return isRegInSet(RegSet: Defs, Reg); |
| 585 | } |
| 586 | |
| 587 | bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const { |
| 588 | // Check Reg and all aliased Registers. |
| 589 | for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) |
| 590 | if (RegSet.test(Idx: *AI)) |
| 591 | return true; |
| 592 | return false; |
| 593 | } |
| 594 | |
| 595 | bool InspectMemInstr::hasHazard(const MachineInstr &MI) { |
| 596 | if (!MI.mayStore() && !MI.mayLoad()) |
| 597 | return false; |
| 598 | |
| 599 | if (ForbidMemInstr) |
| 600 | return true; |
| 601 | |
| 602 | OrigSeenLoad = SeenLoad; |
| 603 | OrigSeenStore = SeenStore; |
| 604 | SeenLoad |= MI.mayLoad(); |
| 605 | SeenStore |= MI.mayStore(); |
| 606 | |
| 607 | // If MI is an ordered or volatile memory reference, disallow moving |
| 608 | // subsequent loads and stores to delay slot. |
| 609 | if (MI.hasOrderedMemoryRef() && (OrigSeenLoad || OrigSeenStore)) { |
| 610 | ForbidMemInstr = true; |
| 611 | return true; |
| 612 | } |
| 613 | |
| 614 | return hasHazard_(MI); |
| 615 | } |
| 616 | |
| 617 | bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) { |
| 618 | if (MI.mayStore()) |
| 619 | return true; |
| 620 | |
| 621 | if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getPseudoValue()) |
| 622 | return true; |
| 623 | |
| 624 | if (const PseudoSourceValue *PSV = |
| 625 | (*MI.memoperands_begin())->getPseudoValue()) { |
| 626 | if (isa<FixedStackPseudoSourceValue>(Val: PSV)) |
| 627 | return false; |
| 628 | return !PSV->isConstant(nullptr) && !PSV->isStack(); |
| 629 | } |
| 630 | |
| 631 | return true; |
| 632 | } |
| 633 | |
| 634 | MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_) |
| 635 | : InspectMemInstr(false), MFI(MFI_) {} |
| 636 | |
| 637 | bool MemDefsUses::hasHazard_(const MachineInstr &MI) { |
| 638 | bool HasHazard = false; |
| 639 | |
| 640 | // Check underlying object list. |
| 641 | SmallVector<ValueType, 4> Objs; |
| 642 | if (getUnderlyingObjects(MI, Objects&: Objs)) { |
| 643 | for (ValueType VT : Objs) |
| 644 | HasHazard |= updateDefsUses(V: VT, MayStore: MI.mayStore()); |
| 645 | return HasHazard; |
| 646 | } |
| 647 | |
| 648 | // No underlying objects found. |
| 649 | HasHazard = MI.mayStore() && (OrigSeenLoad || OrigSeenStore); |
| 650 | HasHazard |= MI.mayLoad() || OrigSeenStore; |
| 651 | |
| 652 | SeenNoObjLoad |= MI.mayLoad(); |
| 653 | SeenNoObjStore |= MI.mayStore(); |
| 654 | |
| 655 | return HasHazard; |
| 656 | } |
| 657 | |
| 658 | bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) { |
| 659 | if (MayStore) |
| 660 | return !Defs.insert(Ptr: V).second || Uses.count(Ptr: V) || SeenNoObjStore || |
| 661 | SeenNoObjLoad; |
| 662 | |
| 663 | Uses.insert(Ptr: V); |
| 664 | return Defs.count(Ptr: V) || SeenNoObjStore; |
| 665 | } |
| 666 | |
| 667 | bool MemDefsUses:: |
| 668 | getUnderlyingObjects(const MachineInstr &MI, |
| 669 | SmallVectorImpl<ValueType> &Objects) const { |
| 670 | if (!MI.hasOneMemOperand()) |
| 671 | return false; |
| 672 | |
| 673 | auto & MMO = **MI.memoperands_begin(); |
| 674 | |
| 675 | if (const PseudoSourceValue *PSV = MMO.getPseudoValue()) { |
| 676 | if (!PSV->isAliased(MFI)) |
| 677 | return false; |
| 678 | Objects.push_back(Elt: PSV); |
| 679 | return true; |
| 680 | } |
| 681 | |
| 682 | if (const Value *V = MMO.getValue()) { |
| 683 | SmallVector<const Value *, 4> Objs; |
| 684 | ::getUnderlyingObjects(V, Objects&: Objs); |
| 685 | |
| 686 | for (const Value *UValue : Objs) { |
| 687 | if (!isIdentifiedObject(V)) |
| 688 | return false; |
| 689 | |
| 690 | Objects.push_back(Elt: UValue); |
| 691 | } |
| 692 | return true; |
| 693 | } |
| 694 | |
| 695 | return false; |
| 696 | } |
| 697 | |
| 698 | // Replace Branch with the compact branch instruction. |
| 699 | Iter MipsDelaySlotFiller::replaceWithCompactBranch(MachineBasicBlock &MBB, |
| 700 | Iter Branch, |
| 701 | const DebugLoc &DL) { |
| 702 | const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>(); |
| 703 | const MipsInstrInfo *TII = STI.getInstrInfo(); |
| 704 | |
| 705 | unsigned NewOpcode = TII->getEquivalentCompactForm(I: Branch); |
| 706 | Branch = TII->genInstrWithNewOpc(NewOpc: NewOpcode, I: Branch); |
| 707 | |
| 708 | auto *ToErase = cast<MachineInstr>(Val: &*std::next(x: Branch)); |
| 709 | // Update call info for the Branch. |
| 710 | if (ToErase->shouldUpdateAdditionalCallInfo()) |
| 711 | ToErase->getMF()->moveAdditionalCallInfo(Old: ToErase, |
| 712 | New: cast<MachineInstr>(Val: &*Branch)); |
| 713 | ToErase->eraseFromParent(); |
| 714 | return Branch; |
| 715 | } |
| 716 | |
| 717 | // For given opcode returns opcode of corresponding instruction with short |
| 718 | // delay slot. |
| 719 | // For the pseudo TAILCALL*_MM instructions return the short delay slot |
| 720 | // form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range |
| 721 | // that is too short to make use of for tail calls. |
| 722 | static int getEquivalentCallShort(int Opcode) { |
| 723 | switch (Opcode) { |
| 724 | case Mips::BGEZAL: |
| 725 | return Mips::BGEZALS_MM; |
| 726 | case Mips::BLTZAL: |
| 727 | return Mips::BLTZALS_MM; |
| 728 | case Mips::JAL: |
| 729 | case Mips::JAL_MM: |
| 730 | return Mips::JALS_MM; |
| 731 | case Mips::JALR: |
| 732 | return Mips::JALRS_MM; |
| 733 | case Mips::JALR16_MM: |
| 734 | return Mips::JALRS16_MM; |
| 735 | case Mips::TAILCALL_MM: |
| 736 | llvm_unreachable("Attempting to shorten the TAILCALL_MM pseudo!" ); |
| 737 | case Mips::TAILCALLREG: |
| 738 | return Mips::JR16_MM; |
| 739 | default: |
| 740 | llvm_unreachable("Unexpected call instruction for microMIPS." ); |
| 741 | } |
| 742 | } |
| 743 | |
| 744 | /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. |
| 745 | /// We assume there is only one delay slot per delayed instruction. |
| 746 | bool MipsDelaySlotFiller::runOnMachineBasicBlock( |
| 747 | MachineBasicBlock &MBB, MachineInstr *FirstNextMBBInstr) { |
| 748 | bool Changed = false; |
| 749 | const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>(); |
| 750 | bool InMicroMipsMode = STI.inMicroMipsMode(); |
| 751 | const MipsInstrInfo *TII = STI.getInstrInfo(); |
| 752 | |
| 753 | for (Iter I = MBB.begin(); I != MBB.end(); ++I) { |
| 754 | if (!hasUnoccupiedSlot(MI: &*I)) |
| 755 | continue; |
| 756 | |
| 757 | // R5900 short loop erratum fix: skip delay slot filling for short backward |
| 758 | // loops to avoid triggering a hardware bug where short loops may exit |
| 759 | // early. The fix can be controlled with -mfix-r5900 / -mno-fix-r5900. |
| 760 | bool SkipForFixR5900 = false; |
| 761 | if (STI.fixR5900() && isR5900ShortLoopBranch(MI: &*I, MBB)) { |
| 762 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": skipping delay slot fill for R5900 " |
| 763 | "short loop branch.\n" ); |
| 764 | ++R5900ShortLoopNops; |
| 765 | SkipForFixR5900 = true; |
| 766 | } |
| 767 | |
| 768 | // Delay slot filling is disabled at -O0, in microMIPS32R6, or for R5900 |
| 769 | // short loop branches. |
| 770 | if (!DisableDelaySlotFiller && |
| 771 | (TM->getOptLevel() != CodeGenOptLevel::None) && |
| 772 | !(InMicroMipsMode && STI.hasMips32r6()) && !SkipForFixR5900) { |
| 773 | |
| 774 | bool Filled = false; |
| 775 | const auto BranchInfo = |
| 776 | BranchInformation(I, MBB.end(), FirstNextMBBInstr); |
| 777 | |
| 778 | if (MipsCompactBranchPolicy.getValue() != CB_Always || |
| 779 | !TII->getEquivalentCompactForm(I)) { |
| 780 | if (searchBackward(MBB, Slot&: *I, BranchInfo)) { |
| 781 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot" |
| 782 | " in backwards search.\n" ); |
| 783 | Filled = true; |
| 784 | } else if (I->isTerminator()) { |
| 785 | if (searchSuccBBs(MBB, Slot: I, BranchInfo)) { |
| 786 | Filled = true; |
| 787 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot" |
| 788 | " in successor BB search.\n" ); |
| 789 | } |
| 790 | } else if (searchForward(MBB, Slot: I, BranchInfo)) { |
| 791 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot" |
| 792 | " in forwards search.\n" ); |
| 793 | Filled = true; |
| 794 | } |
| 795 | } |
| 796 | |
| 797 | if (Filled) { |
| 798 | // Get instruction with delay slot. |
| 799 | MachineBasicBlock::instr_iterator DSI = I.getInstrIterator(); |
| 800 | |
| 801 | if (InMicroMipsMode && TII->getInstSizeInBytes(MI: *std::next(x: DSI)) == 2 && |
| 802 | DSI->isCall()) { |
| 803 | // If instruction in delay slot is 16b change opcode to |
| 804 | // corresponding instruction with short delay slot. |
| 805 | |
| 806 | // TODO: Implement an instruction mapping table of 16bit opcodes to |
| 807 | // 32bit opcodes so that an instruction can be expanded. This would |
| 808 | // save 16 bits as a TAILCALL_MM pseudo requires a fullsized nop. |
| 809 | // TODO: Permit b16 when branching backwards to the same function |
| 810 | // if it is in range. |
| 811 | DSI->setDesc(TII->get(Opcode: getEquivalentCallShort(Opcode: DSI->getOpcode()))); |
| 812 | } |
| 813 | ++FilledSlots; |
| 814 | Changed = true; |
| 815 | continue; |
| 816 | } |
| 817 | } |
| 818 | |
| 819 | // For microMIPS if instruction is BEQ or BNE with one ZERO register, then |
| 820 | // instead of adding NOP replace this instruction with the corresponding |
| 821 | // compact branch instruction, i.e. BEQZC or BNEZC. Additionally |
| 822 | // PseudoReturn and PseudoIndirectBranch are expanded to JR_MM, so they can |
| 823 | // be replaced with JRC16_MM. |
| 824 | |
| 825 | // For MIPSR6 attempt to produce the corresponding compact (no delay slot) |
| 826 | // form of the CTI. For indirect jumps this will not require inserting a |
| 827 | // NOP and for branches will hopefully avoid requiring a NOP. |
| 828 | if ((InMicroMipsMode || |
| 829 | (STI.hasMips32r6() && MipsCompactBranchPolicy != CB_Never)) && |
| 830 | TII->getEquivalentCompactForm(I)) { |
| 831 | I = replaceWithCompactBranch(MBB, Branch: I, DL: I->getDebugLoc()); |
| 832 | Changed = true; |
| 833 | continue; |
| 834 | } |
| 835 | |
| 836 | // Bundle the NOP to the instruction with the delay slot. |
| 837 | LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": could not fill delay slot for " ; |
| 838 | I->dump()); |
| 839 | TII->insertNop(MBB, MI: std::next(x: I), DL: I->getDebugLoc()); |
| 840 | MIBundleBuilder(MBB, I, std::next(x: I, n: 2)); |
| 841 | ++FilledSlots; |
| 842 | Changed = true; |
| 843 | } |
| 844 | return Changed; |
| 845 | } |
| 846 | |
| 847 | template <typename IterTy> |
| 848 | bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin, |
| 849 | IterTy End, |
| 850 | const BranchInformation &BranchInfo, |
| 851 | RegDefsUses &RegDU, InspectMemInstr &IM, |
| 852 | Iter Slot, IterTy &Filler) const { |
| 853 | for (IterTy I = Begin; I != End;) { |
| 854 | IterTy CurrI = I; |
| 855 | ++I; |
| 856 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": checking instruction: " ; CurrI->dump()); |
| 857 | // Skip debug value. |
| 858 | // Instruction TargetOpcode::JUMP_TABLE_DEBUG_INFO is only used to note |
| 859 | // jump table debug info. |
| 860 | if (CurrI->isDebugInstr() || CurrI->isJumpTableDebugInfo()) { |
| 861 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring debug instruction: " ; |
| 862 | CurrI->dump()); |
| 863 | continue; |
| 864 | } |
| 865 | |
| 866 | if (CurrI->isBundle()) { |
| 867 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring BUNDLE instruction: " ; |
| 868 | CurrI->dump()); |
| 869 | // However, we still need to update the register def-use information. |
| 870 | RegDU.update(MI: *CurrI, Begin: 0, End: CurrI->getNumOperands()); |
| 871 | continue; |
| 872 | } |
| 873 | |
| 874 | if (terminateSearch(Candidate: *CurrI)) { |
| 875 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": should terminate search: " ; |
| 876 | CurrI->dump()); |
| 877 | break; |
| 878 | } |
| 879 | |
| 880 | assert((!CurrI->isCall() && !CurrI->isReturn() && !CurrI->isBranch()) && |
| 881 | "Cannot put calls, returns or branches in delay slot." ); |
| 882 | |
| 883 | if (CurrI->isKill()) { |
| 884 | CurrI->eraseFromParent(); |
| 885 | continue; |
| 886 | } |
| 887 | |
| 888 | const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>(); |
| 889 | if (delayHasHazard(STI, Candidate: *CurrI, BranchInfo, RegDU, IM)) |
| 890 | continue; |
| 891 | |
| 892 | bool InMicroMipsMode = STI.inMicroMipsMode(); |
| 893 | const MipsInstrInfo *TII = STI.getInstrInfo(); |
| 894 | unsigned Opcode = (*Slot).getOpcode(); |
| 895 | |
| 896 | // In mips1-4, should not put mflo into the delay slot for the return. |
| 897 | if ((IsMFLOMFHI(CurrI->getOpcode())) && |
| 898 | (!STI.hasMips32() && !STI.hasMips5())) |
| 899 | continue; |
| 900 | |
| 901 | // This is complicated by the tail call optimization. For non-PIC code |
| 902 | // there is only a 32bit sized unconditional branch which can be assumed |
| 903 | // to be able to reach the target. b16 only has a range of +/- 1 KB. |
| 904 | // It's entirely possible that the target function is reachable with b16 |
| 905 | // but we don't have enough information to make that decision. |
| 906 | if (InMicroMipsMode && TII->getInstSizeInBytes(MI: *CurrI) == 2 && |
| 907 | (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch || |
| 908 | Opcode == Mips::PseudoIndirectBranch_MM || |
| 909 | Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL)) |
| 910 | continue; |
| 911 | // Instructions LWP/SWP and MOVEP should not be in a delay slot as that |
| 912 | // results in unpredictable behaviour |
| 913 | if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM || |
| 914 | Opcode == Mips::MOVEP_MM)) |
| 915 | continue; |
| 916 | |
| 917 | Filler = CurrI; |
| 918 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot: " ; |
| 919 | CurrI->dump()); |
| 920 | |
| 921 | return true; |
| 922 | } |
| 923 | |
| 924 | return false; |
| 925 | } |
| 926 | |
| 927 | bool MipsDelaySlotFiller::searchBackward( |
| 928 | MachineBasicBlock &MBB, MachineInstr &Slot, |
| 929 | const BranchInformation &BranchInfo) const { |
| 930 | if (DisableBackwardSearch) |
| 931 | return false; |
| 932 | |
| 933 | auto *Fn = MBB.getParent(); |
| 934 | RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo()); |
| 935 | MemDefsUses MemDU(&Fn->getFrameInfo()); |
| 936 | ReverseIter Filler; |
| 937 | |
| 938 | RegDU.init(MI: Slot); |
| 939 | |
| 940 | MachineBasicBlock::iterator SlotI = Slot; |
| 941 | if (!searchRange(MBB, Begin: ++SlotI.getReverse(), End: MBB.rend(), BranchInfo, RegDU, |
| 942 | IM&: MemDU, Slot, Filler)) { |
| 943 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay " |
| 944 | "slot using backwards search.\n" ); |
| 945 | return false; |
| 946 | } |
| 947 | |
| 948 | MBB.splice(Where: std::next(x: SlotI), Other: &MBB, From: Filler.getReverse()); |
| 949 | MIBundleBuilder(MBB, SlotI, std::next(x: SlotI, n: 2)); |
| 950 | ++UsefulSlots; |
| 951 | return true; |
| 952 | } |
| 953 | |
| 954 | bool MipsDelaySlotFiller::searchForward( |
| 955 | MachineBasicBlock &MBB, Iter Slot, |
| 956 | const BranchInformation &BranchInfo) const { |
| 957 | // Can handle only calls. |
| 958 | if (DisableForwardSearch || !Slot->isCall()) |
| 959 | return false; |
| 960 | |
| 961 | RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo()); |
| 962 | NoMemInstr NM; |
| 963 | Iter Filler; |
| 964 | |
| 965 | RegDU.setCallerSaved(*Slot); |
| 966 | |
| 967 | if (!searchRange(MBB, Begin: std::next(x: Slot), End: MBB.end(), BranchInfo, RegDU, IM&: NM, Slot, |
| 968 | Filler)) { |
| 969 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay " |
| 970 | "slot using forwards search.\n" ); |
| 971 | return false; |
| 972 | } |
| 973 | |
| 974 | MBB.splice(Where: std::next(x: Slot), Other: &MBB, From: Filler); |
| 975 | MIBundleBuilder(MBB, Slot, std::next(x: Slot, n: 2)); |
| 976 | ++UsefulSlots; |
| 977 | return true; |
| 978 | } |
| 979 | |
| 980 | bool MipsDelaySlotFiller::searchSuccBBs( |
| 981 | MachineBasicBlock &MBB, Iter Slot, |
| 982 | const BranchInformation &BranchInfo) const { |
| 983 | if (DisableSuccBBSearch) |
| 984 | return false; |
| 985 | |
| 986 | MachineBasicBlock *SuccBB = selectSuccBB(B&: MBB); |
| 987 | |
| 988 | if (!SuccBB) |
| 989 | return false; |
| 990 | |
| 991 | RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo()); |
| 992 | bool HasMultipleSuccs = false; |
| 993 | BB2BrMap BrMap; |
| 994 | std::unique_ptr<InspectMemInstr> IM; |
| 995 | Iter Filler; |
| 996 | auto *Fn = MBB.getParent(); |
| 997 | |
| 998 | // Iterate over SuccBB's predecessor list. |
| 999 | for (MachineBasicBlock *Pred : SuccBB->predecessors()) |
| 1000 | if (!examinePred(Pred&: *Pred, Succ: *SuccBB, RegDU, HasMultipleSuccs, BrMap)) |
| 1001 | return false; |
| 1002 | |
| 1003 | // Do not allow moving instructions which have unallocatable register operands |
| 1004 | // across basic block boundaries. |
| 1005 | RegDU.setUnallocatableRegs(*Fn); |
| 1006 | |
| 1007 | // Only allow moving loads from stack or constants if any of the SuccBB's |
| 1008 | // predecessors have multiple successors. |
| 1009 | if (HasMultipleSuccs) { |
| 1010 | IM.reset(p: new LoadFromStackOrConst()); |
| 1011 | } else { |
| 1012 | const MachineFrameInfo &MFI = Fn->getFrameInfo(); |
| 1013 | IM.reset(p: new MemDefsUses(&MFI)); |
| 1014 | } |
| 1015 | |
| 1016 | if (!searchRange(MBB, Begin: SuccBB->begin(), End: SuccBB->end(), BranchInfo, RegDU, IM&: *IM, |
| 1017 | Slot, Filler)) |
| 1018 | return false; |
| 1019 | |
| 1020 | insertDelayFiller(Filler, BrMap); |
| 1021 | addLiveInRegs(Filler, MBB&: *SuccBB); |
| 1022 | Filler->eraseFromParent(); |
| 1023 | |
| 1024 | return true; |
| 1025 | } |
| 1026 | |
| 1027 | MachineBasicBlock * |
| 1028 | MipsDelaySlotFiller::selectSuccBB(MachineBasicBlock &B) const { |
| 1029 | if (B.succ_empty()) |
| 1030 | return nullptr; |
| 1031 | |
| 1032 | // Select the successor with the larget edge weight. |
| 1033 | auto &Prob = getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); |
| 1034 | MachineBasicBlock *S = |
| 1035 | *llvm::max_element(Range: B.successors(), C: [&](const MachineBasicBlock *Dst0, |
| 1036 | const MachineBasicBlock *Dst1) { |
| 1037 | return Prob.getEdgeProbability(Src: &B, Dst: Dst0) < |
| 1038 | Prob.getEdgeProbability(Src: &B, Dst: Dst1); |
| 1039 | }); |
| 1040 | return S->isEHPad() ? nullptr : S; |
| 1041 | } |
| 1042 | |
| 1043 | std::pair<MipsInstrInfo::BranchType, MachineInstr *> |
| 1044 | MipsDelaySlotFiller::getBranch(MachineBasicBlock &MBB, |
| 1045 | const MachineBasicBlock &Dst) const { |
| 1046 | const MipsInstrInfo *TII = |
| 1047 | MBB.getParent()->getSubtarget<MipsSubtarget>().getInstrInfo(); |
| 1048 | MachineBasicBlock *TrueBB = nullptr, *FalseBB = nullptr; |
| 1049 | SmallVector<MachineInstr*, 2> BranchInstrs; |
| 1050 | SmallVector<MachineOperand, 2> Cond; |
| 1051 | |
| 1052 | MipsInstrInfo::BranchType R = |
| 1053 | TII->analyzeBranch(MBB, TBB&: TrueBB, FBB&: FalseBB, Cond, AllowModify: false, BranchInstrs); |
| 1054 | |
| 1055 | if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch)) |
| 1056 | return std::make_pair(x&: R, y: nullptr); |
| 1057 | |
| 1058 | if (R != MipsInstrInfo::BT_CondUncond) { |
| 1059 | if (!hasUnoccupiedSlot(MI: BranchInstrs[0])) |
| 1060 | return std::make_pair(x: MipsInstrInfo::BT_None, y: nullptr); |
| 1061 | |
| 1062 | assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst))); |
| 1063 | |
| 1064 | return std::make_pair(x&: R, y&: BranchInstrs[0]); |
| 1065 | } |
| 1066 | |
| 1067 | assert((TrueBB == &Dst) || (FalseBB == &Dst)); |
| 1068 | |
| 1069 | // Examine the conditional branch. See if its slot is occupied. |
| 1070 | if (hasUnoccupiedSlot(MI: BranchInstrs[0])) |
| 1071 | return std::make_pair(x: MipsInstrInfo::BT_Cond, y&: BranchInstrs[0]); |
| 1072 | |
| 1073 | // If that fails, try the unconditional branch. |
| 1074 | if (hasUnoccupiedSlot(MI: BranchInstrs[1]) && (FalseBB == &Dst)) |
| 1075 | return std::make_pair(x: MipsInstrInfo::BT_Uncond, y&: BranchInstrs[1]); |
| 1076 | |
| 1077 | return std::make_pair(x: MipsInstrInfo::BT_None, y: nullptr); |
| 1078 | } |
| 1079 | |
| 1080 | bool MipsDelaySlotFiller::examinePred(MachineBasicBlock &Pred, |
| 1081 | const MachineBasicBlock &Succ, |
| 1082 | RegDefsUses &RegDU, |
| 1083 | bool &HasMultipleSuccs, |
| 1084 | BB2BrMap &BrMap) const { |
| 1085 | std::pair<MipsInstrInfo::BranchType, MachineInstr *> P = |
| 1086 | getBranch(MBB&: Pred, Dst: Succ); |
| 1087 | |
| 1088 | // Return if either getBranch wasn't able to analyze the branches or there |
| 1089 | // were no branches with unoccupied slots. |
| 1090 | if (P.first == MipsInstrInfo::BT_None) |
| 1091 | return false; |
| 1092 | |
| 1093 | if ((P.first != MipsInstrInfo::BT_Uncond) && |
| 1094 | (P.first != MipsInstrInfo::BT_NoBranch)) { |
| 1095 | HasMultipleSuccs = true; |
| 1096 | RegDU.addLiveOut(MBB: Pred, SuccBB: Succ); |
| 1097 | } |
| 1098 | |
| 1099 | BrMap[&Pred] = P.second; |
| 1100 | return true; |
| 1101 | } |
| 1102 | |
| 1103 | bool MipsDelaySlotFiller::delayHasHazard(const MipsSubtarget &STI, |
| 1104 | const MachineInstr &Candidate, |
| 1105 | const BranchInformation &BranchInfo, |
| 1106 | RegDefsUses &RegDU, |
| 1107 | InspectMemInstr &IM) const { |
| 1108 | assert(!Candidate.isKill() && |
| 1109 | "KILL instructions should have been eliminated at this point." ); |
| 1110 | |
| 1111 | bool HasHazard = Candidate.isImplicitDef(); |
| 1112 | |
| 1113 | HasHazard |= IM.hasHazard(MI: Candidate); |
| 1114 | HasHazard |= RegDU.update(MI: Candidate, Begin: 0, End: Candidate.getNumOperands()); |
| 1115 | |
| 1116 | // This only matters for MIPS1 and only if we do not have a hazard already |
| 1117 | if (STI.hasMips1() && !STI.hasMips2() && !HasHazard) { |
| 1118 | const MipsInstrInfo *TII = STI.getInstrInfo(); |
| 1119 | const bool HasLoadDelaySlot = TII->HasLoadDelaySlot(MI: Candidate); |
| 1120 | |
| 1121 | // We only need to act if the candidate is having a load delay slot |
| 1122 | if (HasLoadDelaySlot) { |
| 1123 | // We have no branch so we can not determine a hazard |
| 1124 | // Assume the worst |
| 1125 | if (!BranchInfo.hasBranchInstr()) { |
| 1126 | return true; |
| 1127 | } |
| 1128 | |
| 1129 | // Being an indirect branch means we can not tell if we are a hazard |
| 1130 | // Assume the worst |
| 1131 | if (BranchInfo.isIndirectBranch()) { |
| 1132 | return true; |
| 1133 | } |
| 1134 | |
| 1135 | // If this is a direct branch we should find a MBB operand for the jump |
| 1136 | // target |
| 1137 | const MachineBasicBlock *TargetMBB = BranchInfo.getBranchTarget(); |
| 1138 | if (!TargetMBB || TargetMBB->empty()) { |
| 1139 | return true; |
| 1140 | } |
| 1141 | |
| 1142 | const auto &BranchTargetInstr = TargetMBB->instr_front(); |
| 1143 | bool HasNewHazard = |
| 1144 | !TII->SafeInLoadDelaySlot(MIInSlot: BranchTargetInstr, LoadMI: Candidate); |
| 1145 | // If the branch is unconditional then we do not need to bother to check |
| 1146 | // the next instruction after the branch |
| 1147 | if (!BranchInfo.isUnconditionalBranch()) { |
| 1148 | // We are a conditional branch so we should have an `else` branch |
| 1149 | if (BranchInfo.hasBranchElseInstr()) { |
| 1150 | HasNewHazard |= !TII->SafeInLoadDelaySlot( |
| 1151 | MIInSlot: *BranchInfo.getBranchElseInstr(), LoadMI: Candidate); |
| 1152 | } else { |
| 1153 | // Without the `else` branch we need to assume the worst |
| 1154 | HasNewHazard = true; |
| 1155 | } |
| 1156 | } |
| 1157 | return HasNewHazard; |
| 1158 | } |
| 1159 | } |
| 1160 | |
| 1161 | return HasHazard; |
| 1162 | } |
| 1163 | |
| 1164 | bool MipsDelaySlotFiller::terminateSearch(const MachineInstr &Candidate) const { |
| 1165 | return (Candidate.isTerminator() || Candidate.isCall() || |
| 1166 | Candidate.isPosition() || Candidate.isInlineAsm() || |
| 1167 | Candidate.hasUnmodeledSideEffects()); |
| 1168 | } |
| 1169 | |
| 1170 | /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay |
| 1171 | /// slots in Mips MachineFunctions |
| 1172 | FunctionPass *llvm::createMipsDelaySlotFillerPass() { |
| 1173 | return new MipsDelaySlotFiller(); |
| 1174 | } |
| 1175 | |