1 | //===- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler -------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Simple pass to fill delay slots with useful instructions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "MCTargetDesc/MipsMCNaCl.h" |
14 | #include "Mips.h" |
15 | #include "MipsInstrInfo.h" |
16 | #include "MipsRegisterInfo.h" |
17 | #include "MipsSubtarget.h" |
18 | #include "llvm/ADT/BitVector.h" |
19 | #include "llvm/ADT/DenseMap.h" |
20 | #include "llvm/ADT/PointerUnion.h" |
21 | #include "llvm/ADT/SmallPtrSet.h" |
22 | #include "llvm/ADT/SmallVector.h" |
23 | #include "llvm/ADT/Statistic.h" |
24 | #include "llvm/ADT/StringRef.h" |
25 | #include "llvm/Analysis/AliasAnalysis.h" |
26 | #include "llvm/Analysis/ValueTracking.h" |
27 | #include "llvm/CodeGen/MachineBasicBlock.h" |
28 | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
29 | #include "llvm/CodeGen/MachineFunction.h" |
30 | #include "llvm/CodeGen/MachineFunctionPass.h" |
31 | #include "llvm/CodeGen/MachineInstr.h" |
32 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
33 | #include "llvm/CodeGen/MachineOperand.h" |
34 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
35 | #include "llvm/CodeGen/PseudoSourceValue.h" |
36 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
37 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
38 | #include "llvm/MC/MCInstrDesc.h" |
39 | #include "llvm/MC/MCRegisterInfo.h" |
40 | #include "llvm/Support/Casting.h" |
41 | #include "llvm/Support/CodeGen.h" |
42 | #include "llvm/Support/CommandLine.h" |
43 | #include "llvm/Support/ErrorHandling.h" |
44 | #include "llvm/Target/TargetMachine.h" |
45 | #include <algorithm> |
46 | #include <cassert> |
47 | #include <iterator> |
48 | #include <memory> |
49 | #include <utility> |
50 | |
51 | using namespace llvm; |
52 | |
53 | #define DEBUG_TYPE "mips-delay-slot-filler" |
54 | |
55 | STATISTIC(FilledSlots, "Number of delay slots filled" ); |
56 | STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that" |
57 | " are not NOP." ); |
58 | |
59 | static cl::opt<bool> DisableDelaySlotFiller( |
60 | "disable-mips-delay-filler" , |
61 | cl::init(Val: false), |
62 | cl::desc("Fill all delay slots with NOPs." ), |
63 | cl::Hidden); |
64 | |
65 | static cl::opt<bool> DisableForwardSearch( |
66 | "disable-mips-df-forward-search" , |
67 | cl::init(Val: true), |
68 | cl::desc("Disallow MIPS delay filler to search forward." ), |
69 | cl::Hidden); |
70 | |
71 | static cl::opt<bool> DisableSuccBBSearch( |
72 | "disable-mips-df-succbb-search" , |
73 | cl::init(Val: true), |
74 | cl::desc("Disallow MIPS delay filler to search successor basic blocks." ), |
75 | cl::Hidden); |
76 | |
77 | static cl::opt<bool> DisableBackwardSearch( |
78 | "disable-mips-df-backward-search" , |
79 | cl::init(Val: false), |
80 | cl::desc("Disallow MIPS delay filler to search backward." ), |
81 | cl::Hidden); |
82 | |
83 | enum CompactBranchPolicy { |
84 | CB_Never, ///< The policy 'never' may in some circumstances or for some |
85 | ///< ISAs not be absolutely adhered to. |
86 | CB_Optimal, ///< Optimal is the default and will produce compact branches |
87 | ///< when delay slots cannot be filled. |
88 | CB_Always ///< 'always' may in some circumstances may not be |
89 | ///< absolutely adhered to there may not be a corresponding |
90 | ///< compact form of a branch. |
91 | }; |
92 | |
93 | static cl::opt<CompactBranchPolicy> MipsCompactBranchPolicy( |
94 | "mips-compact-branches" , cl::Optional, cl::init(Val: CB_Optimal), |
95 | cl::desc("MIPS Specific: Compact branch policy." ), |
96 | cl::values(clEnumValN(CB_Never, "never" , |
97 | "Do not use compact branches if possible." ), |
98 | clEnumValN(CB_Optimal, "optimal" , |
99 | "Use compact branches where appropriate (default)." ), |
100 | clEnumValN(CB_Always, "always" , |
101 | "Always use compact branches if possible." ))); |
102 | |
103 | namespace { |
104 | |
105 | using Iter = MachineBasicBlock::iterator; |
106 | using ReverseIter = MachineBasicBlock::reverse_iterator; |
107 | using BB2BrMap = SmallDenseMap<MachineBasicBlock *, MachineInstr *, 2>; |
108 | |
109 | class RegDefsUses { |
110 | public: |
111 | RegDefsUses(const TargetRegisterInfo &TRI); |
112 | |
113 | void init(const MachineInstr &MI); |
114 | |
115 | /// This function sets all caller-saved registers in Defs. |
116 | void setCallerSaved(const MachineInstr &MI); |
117 | |
118 | /// This function sets all unallocatable registers in Defs. |
119 | void setUnallocatableRegs(const MachineFunction &MF); |
120 | |
121 | /// Set bits in Uses corresponding to MBB's live-out registers except for |
122 | /// the registers that are live-in to SuccBB. |
123 | void addLiveOut(const MachineBasicBlock &MBB, |
124 | const MachineBasicBlock &SuccBB); |
125 | |
126 | bool update(const MachineInstr &MI, unsigned Begin, unsigned End); |
127 | |
128 | private: |
129 | bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg, |
130 | bool IsDef) const; |
131 | |
132 | /// Returns true if Reg or its alias is in RegSet. |
133 | bool isRegInSet(const BitVector &RegSet, unsigned Reg) const; |
134 | |
135 | const TargetRegisterInfo &TRI; |
136 | BitVector Defs, Uses; |
137 | }; |
138 | |
139 | /// Base class for inspecting loads and stores. |
140 | class InspectMemInstr { |
141 | public: |
142 | InspectMemInstr(bool ForbidMemInstr_) : ForbidMemInstr(ForbidMemInstr_) {} |
143 | virtual ~InspectMemInstr() = default; |
144 | |
145 | /// Return true if MI cannot be moved to delay slot. |
146 | bool hasHazard(const MachineInstr &MI); |
147 | |
148 | protected: |
149 | /// Flags indicating whether loads or stores have been seen. |
150 | bool OrigSeenLoad = false; |
151 | bool OrigSeenStore = false; |
152 | bool SeenLoad = false; |
153 | bool SeenStore = false; |
154 | |
155 | /// Memory instructions are not allowed to move to delay slot if this flag |
156 | /// is true. |
157 | bool ForbidMemInstr; |
158 | |
159 | private: |
160 | virtual bool hasHazard_(const MachineInstr &MI) = 0; |
161 | }; |
162 | |
163 | /// This subclass rejects any memory instructions. |
164 | class NoMemInstr : public InspectMemInstr { |
165 | public: |
166 | NoMemInstr() : InspectMemInstr(true) {} |
167 | |
168 | private: |
169 | bool hasHazard_(const MachineInstr &MI) override { return true; } |
170 | }; |
171 | |
172 | /// This subclass accepts loads from stacks and constant loads. |
173 | class LoadFromStackOrConst : public InspectMemInstr { |
174 | public: |
175 | LoadFromStackOrConst() : InspectMemInstr(false) {} |
176 | |
177 | private: |
178 | bool hasHazard_(const MachineInstr &MI) override; |
179 | }; |
180 | |
181 | /// This subclass uses memory dependence information to determine whether a |
182 | /// memory instruction can be moved to a delay slot. |
183 | class MemDefsUses : public InspectMemInstr { |
184 | public: |
185 | explicit MemDefsUses(const MachineFrameInfo *MFI); |
186 | |
187 | private: |
188 | using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>; |
189 | |
190 | bool hasHazard_(const MachineInstr &MI) override; |
191 | |
192 | /// Update Defs and Uses. Return true if there exist dependences that |
193 | /// disqualify the delay slot candidate between V and values in Uses and |
194 | /// Defs. |
195 | bool updateDefsUses(ValueType V, bool MayStore); |
196 | |
197 | /// Get the list of underlying objects of MI's memory operand. |
198 | bool getUnderlyingObjects(const MachineInstr &MI, |
199 | SmallVectorImpl<ValueType> &Objects) const; |
200 | |
201 | const MachineFrameInfo *MFI; |
202 | SmallPtrSet<ValueType, 4> Uses, Defs; |
203 | |
204 | /// Flags indicating whether loads or stores with no underlying objects have |
205 | /// been seen. |
206 | bool SeenNoObjLoad = false; |
207 | bool SeenNoObjStore = false; |
208 | }; |
209 | |
210 | class MipsDelaySlotFiller : public MachineFunctionPass { |
211 | public: |
212 | MipsDelaySlotFiller() : MachineFunctionPass(ID) { |
213 | initializeMipsDelaySlotFillerPass(*PassRegistry::getPassRegistry()); |
214 | } |
215 | |
216 | StringRef getPassName() const override { return "Mips Delay Slot Filler" ; } |
217 | |
218 | bool runOnMachineFunction(MachineFunction &F) override { |
219 | TM = &F.getTarget(); |
220 | bool Changed = false; |
221 | for (MachineBasicBlock &MBB : F) |
222 | Changed |= runOnMachineBasicBlock(MBB); |
223 | |
224 | // This pass invalidates liveness information when it reorders |
225 | // instructions to fill delay slot. Without this, -verify-machineinstrs |
226 | // will fail. |
227 | if (Changed) |
228 | F.getRegInfo().invalidateLiveness(); |
229 | |
230 | return Changed; |
231 | } |
232 | |
233 | MachineFunctionProperties getRequiredProperties() const override { |
234 | return MachineFunctionProperties().set( |
235 | MachineFunctionProperties::Property::NoVRegs); |
236 | } |
237 | |
238 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
239 | AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); |
240 | MachineFunctionPass::getAnalysisUsage(AU); |
241 | } |
242 | |
243 | static char ID; |
244 | |
245 | private: |
246 | bool runOnMachineBasicBlock(MachineBasicBlock &MBB); |
247 | |
248 | Iter replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch, |
249 | const DebugLoc &DL); |
250 | |
251 | /// This function checks if it is valid to move Candidate to the delay slot |
252 | /// and returns true if it isn't. It also updates memory and register |
253 | /// dependence information. |
254 | bool delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU, |
255 | InspectMemInstr &IM) const; |
256 | |
257 | /// This function searches range [Begin, End) for an instruction that can be |
258 | /// moved to the delay slot. Returns true on success. |
259 | template<typename IterTy> |
260 | bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End, |
261 | RegDefsUses &RegDU, InspectMemInstr &IM, Iter Slot, |
262 | IterTy &Filler) const; |
263 | |
264 | /// This function searches in the backward direction for an instruction that |
265 | /// can be moved to the delay slot. Returns true on success. |
266 | bool searchBackward(MachineBasicBlock &MBB, MachineInstr &Slot) const; |
267 | |
268 | /// This function searches MBB in the forward direction for an instruction |
269 | /// that can be moved to the delay slot. Returns true on success. |
270 | bool searchForward(MachineBasicBlock &MBB, Iter Slot) const; |
271 | |
272 | /// This function searches one of MBB's successor blocks for an instruction |
273 | /// that can be moved to the delay slot and inserts clones of the |
274 | /// instruction into the successor's predecessor blocks. |
275 | bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const; |
276 | |
277 | /// Pick a successor block of MBB. Return NULL if MBB doesn't have a |
278 | /// successor block that is not a landing pad. |
279 | MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const; |
280 | |
281 | /// This function analyzes MBB and returns an instruction with an unoccupied |
282 | /// slot that branches to Dst. |
283 | std::pair<MipsInstrInfo::BranchType, MachineInstr *> |
284 | getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const; |
285 | |
286 | /// Examine Pred and see if it is possible to insert an instruction into |
287 | /// one of its branches delay slot or its end. |
288 | bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ, |
289 | RegDefsUses &RegDU, bool &HasMultipleSuccs, |
290 | BB2BrMap &BrMap) const; |
291 | |
292 | bool terminateSearch(const MachineInstr &Candidate) const; |
293 | |
294 | const TargetMachine *TM = nullptr; |
295 | }; |
296 | |
297 | } // end anonymous namespace |
298 | |
299 | char MipsDelaySlotFiller::ID = 0; |
300 | |
301 | static bool hasUnoccupiedSlot(const MachineInstr *MI) { |
302 | return MI->hasDelaySlot() && !MI->isBundledWithSucc(); |
303 | } |
304 | |
305 | INITIALIZE_PASS(MipsDelaySlotFiller, DEBUG_TYPE, |
306 | "Fill delay slot for MIPS" , false, false) |
307 | |
308 | /// This function inserts clones of Filler into predecessor blocks. |
309 | static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) { |
310 | MachineFunction *MF = Filler->getParent()->getParent(); |
311 | |
312 | for (const auto &I : BrMap) { |
313 | if (I.second) { |
314 | MIBundleBuilder(I.second).append(MI: MF->CloneMachineInstr(Orig: &*Filler)); |
315 | ++UsefulSlots; |
316 | } else { |
317 | I.first->push_back(MI: MF->CloneMachineInstr(Orig: &*Filler)); |
318 | } |
319 | } |
320 | } |
321 | |
322 | /// This function adds registers Filler defines to MBB's live-in register list. |
323 | static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) { |
324 | for (const MachineOperand &MO : Filler->operands()) { |
325 | unsigned R; |
326 | |
327 | if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg())) |
328 | continue; |
329 | |
330 | #ifndef NDEBUG |
331 | const MachineFunction &MF = *MBB.getParent(); |
332 | assert(MF.getSubtarget().getRegisterInfo()->getAllocatableSet(MF).test(R) && |
333 | "Shouldn't move an instruction with unallocatable registers across " |
334 | "basic block boundaries." ); |
335 | #endif |
336 | |
337 | if (!MBB.isLiveIn(Reg: R)) |
338 | MBB.addLiveIn(PhysReg: R); |
339 | } |
340 | } |
341 | |
342 | RegDefsUses::RegDefsUses(const TargetRegisterInfo &TRI) |
343 | : TRI(TRI), Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {} |
344 | |
345 | void RegDefsUses::init(const MachineInstr &MI) { |
346 | // Add all register operands which are explicit and non-variadic. |
347 | update(MI, Begin: 0, End: MI.getDesc().getNumOperands()); |
348 | |
349 | // If MI is a call, add RA to Defs to prevent users of RA from going into |
350 | // delay slot. |
351 | if (MI.isCall()) |
352 | Defs.set(Mips::RA); |
353 | |
354 | // Add all implicit register operands of branch instructions except |
355 | // register AT. |
356 | if (MI.isBranch()) { |
357 | update(MI, Begin: MI.getDesc().getNumOperands(), End: MI.getNumOperands()); |
358 | Defs.reset(Idx: Mips::AT); |
359 | } |
360 | } |
361 | |
362 | void RegDefsUses::setCallerSaved(const MachineInstr &MI) { |
363 | assert(MI.isCall()); |
364 | |
365 | // Add RA/RA_64 to Defs to prevent users of RA/RA_64 from going into |
366 | // the delay slot. The reason is that RA/RA_64 must not be changed |
367 | // in the delay slot so that the callee can return to the caller. |
368 | if (MI.definesRegister(Reg: Mips::RA, /*TRI=*/nullptr) || |
369 | MI.definesRegister(Reg: Mips::RA_64, /*TRI=*/nullptr)) { |
370 | Defs.set(Mips::RA); |
371 | Defs.set(Mips::RA_64); |
372 | } |
373 | |
374 | // If MI is a call, add all caller-saved registers to Defs. |
375 | BitVector CallerSavedRegs(TRI.getNumRegs(), true); |
376 | |
377 | CallerSavedRegs.reset(Idx: Mips::ZERO); |
378 | CallerSavedRegs.reset(Idx: Mips::ZERO_64); |
379 | |
380 | for (const MCPhysReg *R = TRI.getCalleeSavedRegs(MF: MI.getParent()->getParent()); |
381 | *R; ++R) |
382 | for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI) |
383 | CallerSavedRegs.reset(Idx: *AI); |
384 | |
385 | Defs |= CallerSavedRegs; |
386 | } |
387 | |
388 | void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) { |
389 | BitVector AllocSet = TRI.getAllocatableSet(MF); |
390 | |
391 | for (unsigned R : AllocSet.set_bits()) |
392 | for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI) |
393 | AllocSet.set(*AI); |
394 | |
395 | AllocSet.set(Mips::ZERO); |
396 | AllocSet.set(Mips::ZERO_64); |
397 | |
398 | Defs |= AllocSet.flip(); |
399 | } |
400 | |
401 | void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB, |
402 | const MachineBasicBlock &SuccBB) { |
403 | for (const MachineBasicBlock *S : MBB.successors()) |
404 | if (S != &SuccBB) |
405 | for (const auto &LI : S->liveins()) |
406 | Uses.set(LI.PhysReg); |
407 | } |
408 | |
409 | bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) { |
410 | BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs()); |
411 | bool HasHazard = false; |
412 | |
413 | for (unsigned I = Begin; I != End; ++I) { |
414 | const MachineOperand &MO = MI.getOperand(i: I); |
415 | |
416 | if (MO.isReg() && MO.getReg()) { |
417 | if (checkRegDefsUses(NewDefs, NewUses, Reg: MO.getReg(), IsDef: MO.isDef())) { |
418 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found register hazard for operand " |
419 | << I << ": " ; |
420 | MO.dump()); |
421 | HasHazard = true; |
422 | } |
423 | } |
424 | } |
425 | |
426 | Defs |= NewDefs; |
427 | Uses |= NewUses; |
428 | |
429 | return HasHazard; |
430 | } |
431 | |
432 | bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, |
433 | unsigned Reg, bool IsDef) const { |
434 | if (IsDef) { |
435 | NewDefs.set(Reg); |
436 | // check whether Reg has already been defined or used. |
437 | return (isRegInSet(RegSet: Defs, Reg) || isRegInSet(RegSet: Uses, Reg)); |
438 | } |
439 | |
440 | NewUses.set(Reg); |
441 | // check whether Reg has already been defined. |
442 | return isRegInSet(RegSet: Defs, Reg); |
443 | } |
444 | |
445 | bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const { |
446 | // Check Reg and all aliased Registers. |
447 | for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) |
448 | if (RegSet.test(Idx: *AI)) |
449 | return true; |
450 | return false; |
451 | } |
452 | |
453 | bool InspectMemInstr::hasHazard(const MachineInstr &MI) { |
454 | if (!MI.mayStore() && !MI.mayLoad()) |
455 | return false; |
456 | |
457 | if (ForbidMemInstr) |
458 | return true; |
459 | |
460 | OrigSeenLoad = SeenLoad; |
461 | OrigSeenStore = SeenStore; |
462 | SeenLoad |= MI.mayLoad(); |
463 | SeenStore |= MI.mayStore(); |
464 | |
465 | // If MI is an ordered or volatile memory reference, disallow moving |
466 | // subsequent loads and stores to delay slot. |
467 | if (MI.hasOrderedMemoryRef() && (OrigSeenLoad || OrigSeenStore)) { |
468 | ForbidMemInstr = true; |
469 | return true; |
470 | } |
471 | |
472 | return hasHazard_(MI); |
473 | } |
474 | |
475 | bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) { |
476 | if (MI.mayStore()) |
477 | return true; |
478 | |
479 | if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getPseudoValue()) |
480 | return true; |
481 | |
482 | if (const PseudoSourceValue *PSV = |
483 | (*MI.memoperands_begin())->getPseudoValue()) { |
484 | if (isa<FixedStackPseudoSourceValue>(Val: PSV)) |
485 | return false; |
486 | return !PSV->isConstant(nullptr) && !PSV->isStack(); |
487 | } |
488 | |
489 | return true; |
490 | } |
491 | |
492 | MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_) |
493 | : InspectMemInstr(false), MFI(MFI_) {} |
494 | |
495 | bool MemDefsUses::hasHazard_(const MachineInstr &MI) { |
496 | bool HasHazard = false; |
497 | |
498 | // Check underlying object list. |
499 | SmallVector<ValueType, 4> Objs; |
500 | if (getUnderlyingObjects(MI, Objects&: Objs)) { |
501 | for (ValueType VT : Objs) |
502 | HasHazard |= updateDefsUses(V: VT, MayStore: MI.mayStore()); |
503 | return HasHazard; |
504 | } |
505 | |
506 | // No underlying objects found. |
507 | HasHazard = MI.mayStore() && (OrigSeenLoad || OrigSeenStore); |
508 | HasHazard |= MI.mayLoad() || OrigSeenStore; |
509 | |
510 | SeenNoObjLoad |= MI.mayLoad(); |
511 | SeenNoObjStore |= MI.mayStore(); |
512 | |
513 | return HasHazard; |
514 | } |
515 | |
516 | bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) { |
517 | if (MayStore) |
518 | return !Defs.insert(Ptr: V).second || Uses.count(Ptr: V) || SeenNoObjStore || |
519 | SeenNoObjLoad; |
520 | |
521 | Uses.insert(Ptr: V); |
522 | return Defs.count(Ptr: V) || SeenNoObjStore; |
523 | } |
524 | |
525 | bool MemDefsUses:: |
526 | getUnderlyingObjects(const MachineInstr &MI, |
527 | SmallVectorImpl<ValueType> &Objects) const { |
528 | if (!MI.hasOneMemOperand()) |
529 | return false; |
530 | |
531 | auto & MMO = **MI.memoperands_begin(); |
532 | |
533 | if (const PseudoSourceValue *PSV = MMO.getPseudoValue()) { |
534 | if (!PSV->isAliased(MFI)) |
535 | return false; |
536 | Objects.push_back(Elt: PSV); |
537 | return true; |
538 | } |
539 | |
540 | if (const Value *V = MMO.getValue()) { |
541 | SmallVector<const Value *, 4> Objs; |
542 | ::getUnderlyingObjects(V, Objects&: Objs); |
543 | |
544 | for (const Value *UValue : Objs) { |
545 | if (!isIdentifiedObject(V)) |
546 | return false; |
547 | |
548 | Objects.push_back(Elt: UValue); |
549 | } |
550 | return true; |
551 | } |
552 | |
553 | return false; |
554 | } |
555 | |
556 | // Replace Branch with the compact branch instruction. |
557 | Iter MipsDelaySlotFiller::replaceWithCompactBranch(MachineBasicBlock &MBB, |
558 | Iter Branch, |
559 | const DebugLoc &DL) { |
560 | const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>(); |
561 | const MipsInstrInfo *TII = STI.getInstrInfo(); |
562 | |
563 | unsigned NewOpcode = TII->getEquivalentCompactForm(I: Branch); |
564 | Branch = TII->genInstrWithNewOpc(NewOpc: NewOpcode, I: Branch); |
565 | |
566 | auto *ToErase = cast<MachineInstr>(Val: &*std::next(x: Branch)); |
567 | // Update call site info for the Branch. |
568 | if (ToErase->shouldUpdateCallSiteInfo()) |
569 | ToErase->getMF()->moveCallSiteInfo(Old: ToErase, New: cast<MachineInstr>(Val: &*Branch)); |
570 | ToErase->eraseFromParent(); |
571 | return Branch; |
572 | } |
573 | |
574 | // For given opcode returns opcode of corresponding instruction with short |
575 | // delay slot. |
576 | // For the pseudo TAILCALL*_MM instructions return the short delay slot |
577 | // form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range |
578 | // that is too short to make use of for tail calls. |
579 | static int getEquivalentCallShort(int Opcode) { |
580 | switch (Opcode) { |
581 | case Mips::BGEZAL: |
582 | return Mips::BGEZALS_MM; |
583 | case Mips::BLTZAL: |
584 | return Mips::BLTZALS_MM; |
585 | case Mips::JAL: |
586 | case Mips::JAL_MM: |
587 | return Mips::JALS_MM; |
588 | case Mips::JALR: |
589 | return Mips::JALRS_MM; |
590 | case Mips::JALR16_MM: |
591 | return Mips::JALRS16_MM; |
592 | case Mips::TAILCALL_MM: |
593 | llvm_unreachable("Attempting to shorten the TAILCALL_MM pseudo!" ); |
594 | case Mips::TAILCALLREG: |
595 | return Mips::JR16_MM; |
596 | default: |
597 | llvm_unreachable("Unexpected call instruction for microMIPS." ); |
598 | } |
599 | } |
600 | |
601 | /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. |
602 | /// We assume there is only one delay slot per delayed instruction. |
603 | bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) { |
604 | bool Changed = false; |
605 | const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>(); |
606 | bool InMicroMipsMode = STI.inMicroMipsMode(); |
607 | const MipsInstrInfo *TII = STI.getInstrInfo(); |
608 | |
609 | for (Iter I = MBB.begin(); I != MBB.end(); ++I) { |
610 | if (!hasUnoccupiedSlot(MI: &*I)) |
611 | continue; |
612 | |
613 | // Delay slot filling is disabled at -O0, or in microMIPS32R6. |
614 | if (!DisableDelaySlotFiller && |
615 | (TM->getOptLevel() != CodeGenOptLevel::None) && |
616 | !(InMicroMipsMode && STI.hasMips32r6())) { |
617 | |
618 | bool Filled = false; |
619 | |
620 | if (MipsCompactBranchPolicy.getValue() != CB_Always || |
621 | !TII->getEquivalentCompactForm(I)) { |
622 | if (searchBackward(MBB, Slot&: *I)) { |
623 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot" |
624 | " in backwards search.\n" ); |
625 | Filled = true; |
626 | } else if (I->isTerminator()) { |
627 | if (searchSuccBBs(MBB, Slot: I)) { |
628 | Filled = true; |
629 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot" |
630 | " in successor BB search.\n" ); |
631 | } |
632 | } else if (searchForward(MBB, Slot: I)) { |
633 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot" |
634 | " in forwards search.\n" ); |
635 | Filled = true; |
636 | } |
637 | } |
638 | |
639 | if (Filled) { |
640 | // Get instruction with delay slot. |
641 | MachineBasicBlock::instr_iterator DSI = I.getInstrIterator(); |
642 | |
643 | if (InMicroMipsMode && TII->getInstSizeInBytes(MI: *std::next(x: DSI)) == 2 && |
644 | DSI->isCall()) { |
645 | // If instruction in delay slot is 16b change opcode to |
646 | // corresponding instruction with short delay slot. |
647 | |
648 | // TODO: Implement an instruction mapping table of 16bit opcodes to |
649 | // 32bit opcodes so that an instruction can be expanded. This would |
650 | // save 16 bits as a TAILCALL_MM pseudo requires a fullsized nop. |
651 | // TODO: Permit b16 when branching backwards to the same function |
652 | // if it is in range. |
653 | DSI->setDesc(TII->get(Opcode: getEquivalentCallShort(Opcode: DSI->getOpcode()))); |
654 | } |
655 | ++FilledSlots; |
656 | Changed = true; |
657 | continue; |
658 | } |
659 | } |
660 | |
661 | // For microMIPS if instruction is BEQ or BNE with one ZERO register, then |
662 | // instead of adding NOP replace this instruction with the corresponding |
663 | // compact branch instruction, i.e. BEQZC or BNEZC. Additionally |
664 | // PseudoReturn and PseudoIndirectBranch are expanded to JR_MM, so they can |
665 | // be replaced with JRC16_MM. |
666 | |
667 | // For MIPSR6 attempt to produce the corresponding compact (no delay slot) |
668 | // form of the CTI. For indirect jumps this will not require inserting a |
669 | // NOP and for branches will hopefully avoid requiring a NOP. |
670 | if ((InMicroMipsMode || |
671 | (STI.hasMips32r6() && MipsCompactBranchPolicy != CB_Never)) && |
672 | TII->getEquivalentCompactForm(I)) { |
673 | I = replaceWithCompactBranch(MBB, Branch: I, DL: I->getDebugLoc()); |
674 | Changed = true; |
675 | continue; |
676 | } |
677 | |
678 | // Bundle the NOP to the instruction with the delay slot. |
679 | LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": could not fill delay slot for " ; |
680 | I->dump()); |
681 | TII->insertNop(MBB, MI: std::next(x: I), DL: I->getDebugLoc()); |
682 | MIBundleBuilder(MBB, I, std::next(x: I, n: 2)); |
683 | ++FilledSlots; |
684 | Changed = true; |
685 | } |
686 | |
687 | return Changed; |
688 | } |
689 | |
690 | template <typename IterTy> |
691 | bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin, |
692 | IterTy End, RegDefsUses &RegDU, |
693 | InspectMemInstr &IM, Iter Slot, |
694 | IterTy &Filler) const { |
695 | for (IterTy I = Begin; I != End;) { |
696 | IterTy CurrI = I; |
697 | ++I; |
698 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": checking instruction: " ; CurrI->dump()); |
699 | // skip debug value |
700 | if (CurrI->isDebugInstr()) { |
701 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring debug instruction: " ; |
702 | CurrI->dump()); |
703 | continue; |
704 | } |
705 | |
706 | if (CurrI->isBundle()) { |
707 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring BUNDLE instruction: " ; |
708 | CurrI->dump()); |
709 | // However, we still need to update the register def-use information. |
710 | RegDU.update(MI: *CurrI, Begin: 0, End: CurrI->getNumOperands()); |
711 | continue; |
712 | } |
713 | |
714 | if (terminateSearch(Candidate: *CurrI)) { |
715 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": should terminate search: " ; |
716 | CurrI->dump()); |
717 | break; |
718 | } |
719 | |
720 | assert((!CurrI->isCall() && !CurrI->isReturn() && !CurrI->isBranch()) && |
721 | "Cannot put calls, returns or branches in delay slot." ); |
722 | |
723 | if (CurrI->isKill()) { |
724 | CurrI->eraseFromParent(); |
725 | continue; |
726 | } |
727 | |
728 | if (delayHasHazard(Candidate: *CurrI, RegDU, IM)) |
729 | continue; |
730 | |
731 | const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>(); |
732 | if (STI.isTargetNaCl()) { |
733 | // In NaCl, instructions that must be masked are forbidden in delay slots. |
734 | // We only check for loads, stores and SP changes. Calls, returns and |
735 | // branches are not checked because non-NaCl targets never put them in |
736 | // delay slots. |
737 | unsigned AddrIdx; |
738 | if ((isBasePlusOffsetMemoryAccess(CurrI->getOpcode(), &AddrIdx) && |
739 | baseRegNeedsLoadStoreMask(CurrI->getOperand(AddrIdx).getReg())) || |
740 | CurrI->modifiesRegister(Mips::SP, STI.getRegisterInfo())) |
741 | continue; |
742 | } |
743 | |
744 | bool InMicroMipsMode = STI.inMicroMipsMode(); |
745 | const MipsInstrInfo *TII = STI.getInstrInfo(); |
746 | unsigned Opcode = (*Slot).getOpcode(); |
747 | // This is complicated by the tail call optimization. For non-PIC code |
748 | // there is only a 32bit sized unconditional branch which can be assumed |
749 | // to be able to reach the target. b16 only has a range of +/- 1 KB. |
750 | // It's entirely possible that the target function is reachable with b16 |
751 | // but we don't have enough information to make that decision. |
752 | if (InMicroMipsMode && TII->getInstSizeInBytes(MI: *CurrI) == 2 && |
753 | (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch || |
754 | Opcode == Mips::PseudoIndirectBranch_MM || |
755 | Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL)) |
756 | continue; |
757 | // Instructions LWP/SWP and MOVEP should not be in a delay slot as that |
758 | // results in unpredictable behaviour |
759 | if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM || |
760 | Opcode == Mips::MOVEP_MM)) |
761 | continue; |
762 | |
763 | Filler = CurrI; |
764 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot: " ; |
765 | CurrI->dump()); |
766 | |
767 | return true; |
768 | } |
769 | |
770 | return false; |
771 | } |
772 | |
773 | bool MipsDelaySlotFiller::searchBackward(MachineBasicBlock &MBB, |
774 | MachineInstr &Slot) const { |
775 | if (DisableBackwardSearch) |
776 | return false; |
777 | |
778 | auto *Fn = MBB.getParent(); |
779 | RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo()); |
780 | MemDefsUses MemDU(&Fn->getFrameInfo()); |
781 | ReverseIter Filler; |
782 | |
783 | RegDU.init(MI: Slot); |
784 | |
785 | MachineBasicBlock::iterator SlotI = Slot; |
786 | if (!searchRange(MBB, Begin: ++SlotI.getReverse(), End: MBB.rend(), RegDU, IM&: MemDU, Slot, |
787 | Filler)) { |
788 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay " |
789 | "slot using backwards search.\n" ); |
790 | return false; |
791 | } |
792 | |
793 | MBB.splice(Where: std::next(x: SlotI), Other: &MBB, From: Filler.getReverse()); |
794 | MIBundleBuilder(MBB, SlotI, std::next(x: SlotI, n: 2)); |
795 | ++UsefulSlots; |
796 | return true; |
797 | } |
798 | |
799 | bool MipsDelaySlotFiller::searchForward(MachineBasicBlock &MBB, |
800 | Iter Slot) const { |
801 | // Can handle only calls. |
802 | if (DisableForwardSearch || !Slot->isCall()) |
803 | return false; |
804 | |
805 | RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo()); |
806 | NoMemInstr NM; |
807 | Iter Filler; |
808 | |
809 | RegDU.setCallerSaved(*Slot); |
810 | |
811 | if (!searchRange(MBB, Begin: std::next(x: Slot), End: MBB.end(), RegDU, IM&: NM, Slot, Filler)) { |
812 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay " |
813 | "slot using forwards search.\n" ); |
814 | return false; |
815 | } |
816 | |
817 | MBB.splice(Where: std::next(x: Slot), Other: &MBB, From: Filler); |
818 | MIBundleBuilder(MBB, Slot, std::next(x: Slot, n: 2)); |
819 | ++UsefulSlots; |
820 | return true; |
821 | } |
822 | |
823 | bool MipsDelaySlotFiller::searchSuccBBs(MachineBasicBlock &MBB, |
824 | Iter Slot) const { |
825 | if (DisableSuccBBSearch) |
826 | return false; |
827 | |
828 | MachineBasicBlock *SuccBB = selectSuccBB(B&: MBB); |
829 | |
830 | if (!SuccBB) |
831 | return false; |
832 | |
833 | RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo()); |
834 | bool HasMultipleSuccs = false; |
835 | BB2BrMap BrMap; |
836 | std::unique_ptr<InspectMemInstr> IM; |
837 | Iter Filler; |
838 | auto *Fn = MBB.getParent(); |
839 | |
840 | // Iterate over SuccBB's predecessor list. |
841 | for (MachineBasicBlock *Pred : SuccBB->predecessors()) |
842 | if (!examinePred(Pred&: *Pred, Succ: *SuccBB, RegDU, HasMultipleSuccs, BrMap)) |
843 | return false; |
844 | |
845 | // Do not allow moving instructions which have unallocatable register operands |
846 | // across basic block boundaries. |
847 | RegDU.setUnallocatableRegs(*Fn); |
848 | |
849 | // Only allow moving loads from stack or constants if any of the SuccBB's |
850 | // predecessors have multiple successors. |
851 | if (HasMultipleSuccs) { |
852 | IM.reset(p: new LoadFromStackOrConst()); |
853 | } else { |
854 | const MachineFrameInfo &MFI = Fn->getFrameInfo(); |
855 | IM.reset(p: new MemDefsUses(&MFI)); |
856 | } |
857 | |
858 | if (!searchRange(MBB, Begin: SuccBB->begin(), End: SuccBB->end(), RegDU, IM&: *IM, Slot, |
859 | Filler)) |
860 | return false; |
861 | |
862 | insertDelayFiller(Filler, BrMap); |
863 | addLiveInRegs(Filler, MBB&: *SuccBB); |
864 | Filler->eraseFromParent(); |
865 | |
866 | return true; |
867 | } |
868 | |
869 | MachineBasicBlock * |
870 | MipsDelaySlotFiller::selectSuccBB(MachineBasicBlock &B) const { |
871 | if (B.succ_empty()) |
872 | return nullptr; |
873 | |
874 | // Select the successor with the larget edge weight. |
875 | auto &Prob = getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); |
876 | MachineBasicBlock *S = *std::max_element( |
877 | first: B.succ_begin(), last: B.succ_end(), |
878 | comp: [&](const MachineBasicBlock *Dst0, const MachineBasicBlock *Dst1) { |
879 | return Prob.getEdgeProbability(Src: &B, Dst: Dst0) < |
880 | Prob.getEdgeProbability(Src: &B, Dst: Dst1); |
881 | }); |
882 | return S->isEHPad() ? nullptr : S; |
883 | } |
884 | |
885 | std::pair<MipsInstrInfo::BranchType, MachineInstr *> |
886 | MipsDelaySlotFiller::getBranch(MachineBasicBlock &MBB, |
887 | const MachineBasicBlock &Dst) const { |
888 | const MipsInstrInfo *TII = |
889 | MBB.getParent()->getSubtarget<MipsSubtarget>().getInstrInfo(); |
890 | MachineBasicBlock *TrueBB = nullptr, *FalseBB = nullptr; |
891 | SmallVector<MachineInstr*, 2> BranchInstrs; |
892 | SmallVector<MachineOperand, 2> Cond; |
893 | |
894 | MipsInstrInfo::BranchType R = |
895 | TII->analyzeBranch(MBB, TBB&: TrueBB, FBB&: FalseBB, Cond, AllowModify: false, BranchInstrs); |
896 | |
897 | if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch)) |
898 | return std::make_pair(x&: R, y: nullptr); |
899 | |
900 | if (R != MipsInstrInfo::BT_CondUncond) { |
901 | if (!hasUnoccupiedSlot(MI: BranchInstrs[0])) |
902 | return std::make_pair(x: MipsInstrInfo::BT_None, y: nullptr); |
903 | |
904 | assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst))); |
905 | |
906 | return std::make_pair(x&: R, y&: BranchInstrs[0]); |
907 | } |
908 | |
909 | assert((TrueBB == &Dst) || (FalseBB == &Dst)); |
910 | |
911 | // Examine the conditional branch. See if its slot is occupied. |
912 | if (hasUnoccupiedSlot(MI: BranchInstrs[0])) |
913 | return std::make_pair(x: MipsInstrInfo::BT_Cond, y&: BranchInstrs[0]); |
914 | |
915 | // If that fails, try the unconditional branch. |
916 | if (hasUnoccupiedSlot(MI: BranchInstrs[1]) && (FalseBB == &Dst)) |
917 | return std::make_pair(x: MipsInstrInfo::BT_Uncond, y&: BranchInstrs[1]); |
918 | |
919 | return std::make_pair(x: MipsInstrInfo::BT_None, y: nullptr); |
920 | } |
921 | |
922 | bool MipsDelaySlotFiller::examinePred(MachineBasicBlock &Pred, |
923 | const MachineBasicBlock &Succ, |
924 | RegDefsUses &RegDU, |
925 | bool &HasMultipleSuccs, |
926 | BB2BrMap &BrMap) const { |
927 | std::pair<MipsInstrInfo::BranchType, MachineInstr *> P = |
928 | getBranch(MBB&: Pred, Dst: Succ); |
929 | |
930 | // Return if either getBranch wasn't able to analyze the branches or there |
931 | // were no branches with unoccupied slots. |
932 | if (P.first == MipsInstrInfo::BT_None) |
933 | return false; |
934 | |
935 | if ((P.first != MipsInstrInfo::BT_Uncond) && |
936 | (P.first != MipsInstrInfo::BT_NoBranch)) { |
937 | HasMultipleSuccs = true; |
938 | RegDU.addLiveOut(MBB: Pred, SuccBB: Succ); |
939 | } |
940 | |
941 | BrMap[&Pred] = P.second; |
942 | return true; |
943 | } |
944 | |
945 | bool MipsDelaySlotFiller::delayHasHazard(const MachineInstr &Candidate, |
946 | RegDefsUses &RegDU, |
947 | InspectMemInstr &IM) const { |
948 | assert(!Candidate.isKill() && |
949 | "KILL instructions should have been eliminated at this point." ); |
950 | |
951 | bool HasHazard = Candidate.isImplicitDef(); |
952 | |
953 | HasHazard |= IM.hasHazard(MI: Candidate); |
954 | HasHazard |= RegDU.update(MI: Candidate, Begin: 0, End: Candidate.getNumOperands()); |
955 | |
956 | return HasHazard; |
957 | } |
958 | |
959 | bool MipsDelaySlotFiller::terminateSearch(const MachineInstr &Candidate) const { |
960 | return (Candidate.isTerminator() || Candidate.isCall() || |
961 | Candidate.isPosition() || Candidate.isInlineAsm() || |
962 | Candidate.hasUnmodeledSideEffects()); |
963 | } |
964 | |
965 | /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay |
966 | /// slots in Mips MachineFunctions |
967 | FunctionPass *llvm::createMipsDelaySlotFillerPass() { |
968 | return new MipsDelaySlotFiller(); |
969 | } |
970 | |