1 | //===- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler -------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Simple pass to fill delay slots with useful instructions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "MCTargetDesc/MipsMCNaCl.h" |
14 | #include "Mips.h" |
15 | #include "MipsInstrInfo.h" |
16 | #include "MipsSubtarget.h" |
17 | #include "llvm/ADT/BitVector.h" |
18 | #include "llvm/ADT/DenseMap.h" |
19 | #include "llvm/ADT/PointerUnion.h" |
20 | #include "llvm/ADT/SmallPtrSet.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/ADT/StringRef.h" |
24 | #include "llvm/Analysis/AliasAnalysis.h" |
25 | #include "llvm/Analysis/ValueTracking.h" |
26 | #include "llvm/CodeGen/MachineBasicBlock.h" |
27 | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
28 | #include "llvm/CodeGen/MachineFunction.h" |
29 | #include "llvm/CodeGen/MachineFunctionPass.h" |
30 | #include "llvm/CodeGen/MachineInstr.h" |
31 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
32 | #include "llvm/CodeGen/MachineOperand.h" |
33 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
34 | #include "llvm/CodeGen/PseudoSourceValue.h" |
35 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
36 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
37 | #include "llvm/MC/MCInstrDesc.h" |
38 | #include "llvm/MC/MCRegisterInfo.h" |
39 | #include "llvm/Support/Casting.h" |
40 | #include "llvm/Support/CodeGen.h" |
41 | #include "llvm/Support/CommandLine.h" |
42 | #include "llvm/Support/ErrorHandling.h" |
43 | #include "llvm/Target/TargetMachine.h" |
44 | #include <cassert> |
45 | #include <iterator> |
46 | #include <memory> |
47 | #include <utility> |
48 | |
49 | using namespace llvm; |
50 | |
51 | #define DEBUG_TYPE "mips-delay-slot-filler" |
52 | |
53 | STATISTIC(FilledSlots, "Number of delay slots filled" ); |
54 | STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that" |
55 | " are not NOP." ); |
56 | |
57 | static cl::opt<bool> DisableDelaySlotFiller( |
58 | "disable-mips-delay-filler" , |
59 | cl::init(Val: false), |
60 | cl::desc("Fill all delay slots with NOPs." ), |
61 | cl::Hidden); |
62 | |
63 | static cl::opt<bool> DisableForwardSearch( |
64 | "disable-mips-df-forward-search" , |
65 | cl::init(Val: true), |
66 | cl::desc("Disallow MIPS delay filler to search forward." ), |
67 | cl::Hidden); |
68 | |
69 | static cl::opt<bool> DisableSuccBBSearch( |
70 | "disable-mips-df-succbb-search" , |
71 | cl::init(Val: true), |
72 | cl::desc("Disallow MIPS delay filler to search successor basic blocks." ), |
73 | cl::Hidden); |
74 | |
75 | static cl::opt<bool> DisableBackwardSearch( |
76 | "disable-mips-df-backward-search" , |
77 | cl::init(Val: false), |
78 | cl::desc("Disallow MIPS delay filler to search backward." ), |
79 | cl::Hidden); |
80 | |
81 | enum CompactBranchPolicy { |
82 | CB_Never, ///< The policy 'never' may in some circumstances or for some |
83 | ///< ISAs not be absolutely adhered to. |
84 | CB_Optimal, ///< Optimal is the default and will produce compact branches |
85 | ///< when delay slots cannot be filled. |
86 | CB_Always ///< 'always' may in some circumstances may not be |
87 | ///< absolutely adhered to there may not be a corresponding |
88 | ///< compact form of a branch. |
89 | }; |
90 | |
91 | static cl::opt<CompactBranchPolicy> MipsCompactBranchPolicy( |
92 | "mips-compact-branches" , cl::Optional, cl::init(Val: CB_Optimal), |
93 | cl::desc("MIPS Specific: Compact branch policy." ), |
94 | cl::values(clEnumValN(CB_Never, "never" , |
95 | "Do not use compact branches if possible." ), |
96 | clEnumValN(CB_Optimal, "optimal" , |
97 | "Use compact branches where appropriate (default)." ), |
98 | clEnumValN(CB_Always, "always" , |
99 | "Always use compact branches if possible." ))); |
100 | |
101 | namespace { |
102 | |
103 | using Iter = MachineBasicBlock::iterator; |
104 | using ReverseIter = MachineBasicBlock::reverse_iterator; |
105 | using BB2BrMap = SmallDenseMap<MachineBasicBlock *, MachineInstr *, 2>; |
106 | |
107 | class RegDefsUses { |
108 | public: |
109 | RegDefsUses(const TargetRegisterInfo &TRI); |
110 | |
111 | void init(const MachineInstr &MI); |
112 | |
113 | /// This function sets all caller-saved registers in Defs. |
114 | void setCallerSaved(const MachineInstr &MI); |
115 | |
116 | /// This function sets all unallocatable registers in Defs. |
117 | void setUnallocatableRegs(const MachineFunction &MF); |
118 | |
119 | /// Set bits in Uses corresponding to MBB's live-out registers except for |
120 | /// the registers that are live-in to SuccBB. |
121 | void addLiveOut(const MachineBasicBlock &MBB, |
122 | const MachineBasicBlock &SuccBB); |
123 | |
124 | bool update(const MachineInstr &MI, unsigned Begin, unsigned End); |
125 | |
126 | private: |
127 | bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg, |
128 | bool IsDef) const; |
129 | |
130 | /// Returns true if Reg or its alias is in RegSet. |
131 | bool isRegInSet(const BitVector &RegSet, unsigned Reg) const; |
132 | |
133 | const TargetRegisterInfo &TRI; |
134 | BitVector Defs, Uses; |
135 | }; |
136 | |
137 | /// Base class for inspecting loads and stores. |
138 | class InspectMemInstr { |
139 | public: |
140 | InspectMemInstr(bool ForbidMemInstr_) : ForbidMemInstr(ForbidMemInstr_) {} |
141 | virtual ~InspectMemInstr() = default; |
142 | |
143 | /// Return true if MI cannot be moved to delay slot. |
144 | bool hasHazard(const MachineInstr &MI); |
145 | |
146 | protected: |
147 | /// Flags indicating whether loads or stores have been seen. |
148 | bool OrigSeenLoad = false; |
149 | bool OrigSeenStore = false; |
150 | bool SeenLoad = false; |
151 | bool SeenStore = false; |
152 | |
153 | /// Memory instructions are not allowed to move to delay slot if this flag |
154 | /// is true. |
155 | bool ForbidMemInstr; |
156 | |
157 | private: |
158 | virtual bool hasHazard_(const MachineInstr &MI) = 0; |
159 | }; |
160 | |
161 | /// This subclass rejects any memory instructions. |
162 | class NoMemInstr : public InspectMemInstr { |
163 | public: |
164 | NoMemInstr() : InspectMemInstr(true) {} |
165 | |
166 | private: |
167 | bool hasHazard_(const MachineInstr &MI) override { return true; } |
168 | }; |
169 | |
170 | /// This subclass accepts loads from stacks and constant loads. |
171 | class LoadFromStackOrConst : public InspectMemInstr { |
172 | public: |
173 | LoadFromStackOrConst() : InspectMemInstr(false) {} |
174 | |
175 | private: |
176 | bool hasHazard_(const MachineInstr &MI) override; |
177 | }; |
178 | |
179 | /// This subclass uses memory dependence information to determine whether a |
180 | /// memory instruction can be moved to a delay slot. |
181 | class MemDefsUses : public InspectMemInstr { |
182 | public: |
183 | explicit MemDefsUses(const MachineFrameInfo *MFI); |
184 | |
185 | private: |
186 | using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>; |
187 | |
188 | bool hasHazard_(const MachineInstr &MI) override; |
189 | |
190 | /// Update Defs and Uses. Return true if there exist dependences that |
191 | /// disqualify the delay slot candidate between V and values in Uses and |
192 | /// Defs. |
193 | bool updateDefsUses(ValueType V, bool MayStore); |
194 | |
195 | /// Get the list of underlying objects of MI's memory operand. |
196 | bool getUnderlyingObjects(const MachineInstr &MI, |
197 | SmallVectorImpl<ValueType> &Objects) const; |
198 | |
199 | const MachineFrameInfo *MFI; |
200 | SmallPtrSet<ValueType, 4> Uses, Defs; |
201 | |
202 | /// Flags indicating whether loads or stores with no underlying objects have |
203 | /// been seen. |
204 | bool SeenNoObjLoad = false; |
205 | bool SeenNoObjStore = false; |
206 | }; |
207 | |
208 | class MipsDelaySlotFiller : public MachineFunctionPass { |
209 | public: |
210 | MipsDelaySlotFiller() : MachineFunctionPass(ID) {} |
211 | |
212 | StringRef getPassName() const override { return "Mips Delay Slot Filler" ; } |
213 | |
214 | bool runOnMachineFunction(MachineFunction &F) override { |
215 | TM = &F.getTarget(); |
216 | bool Changed = false; |
217 | for (MachineBasicBlock &MBB : F) |
218 | Changed |= runOnMachineBasicBlock(MBB); |
219 | |
220 | // This pass invalidates liveness information when it reorders |
221 | // instructions to fill delay slot. Without this, -verify-machineinstrs |
222 | // will fail. |
223 | if (Changed) |
224 | F.getRegInfo().invalidateLiveness(); |
225 | |
226 | return Changed; |
227 | } |
228 | |
229 | MachineFunctionProperties getRequiredProperties() const override { |
230 | return MachineFunctionProperties().setNoVRegs(); |
231 | } |
232 | |
233 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
234 | AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); |
235 | MachineFunctionPass::getAnalysisUsage(AU); |
236 | } |
237 | |
238 | static char ID; |
239 | |
240 | private: |
241 | bool runOnMachineBasicBlock(MachineBasicBlock &MBB); |
242 | |
243 | Iter replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch, |
244 | const DebugLoc &DL); |
245 | |
246 | /// This function checks if it is valid to move Candidate to the delay slot |
247 | /// and returns true if it isn't. It also updates memory and register |
248 | /// dependence information. |
249 | bool delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU, |
250 | InspectMemInstr &IM) const; |
251 | |
252 | /// This function searches range [Begin, End) for an instruction that can be |
253 | /// moved to the delay slot. Returns true on success. |
254 | template<typename IterTy> |
255 | bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End, |
256 | RegDefsUses &RegDU, InspectMemInstr &IM, Iter Slot, |
257 | IterTy &Filler) const; |
258 | |
259 | /// This function searches in the backward direction for an instruction that |
260 | /// can be moved to the delay slot. Returns true on success. |
261 | bool searchBackward(MachineBasicBlock &MBB, MachineInstr &Slot) const; |
262 | |
263 | /// This function searches MBB in the forward direction for an instruction |
264 | /// that can be moved to the delay slot. Returns true on success. |
265 | bool searchForward(MachineBasicBlock &MBB, Iter Slot) const; |
266 | |
267 | /// This function searches one of MBB's successor blocks for an instruction |
268 | /// that can be moved to the delay slot and inserts clones of the |
269 | /// instruction into the successor's predecessor blocks. |
270 | bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const; |
271 | |
272 | /// Pick a successor block of MBB. Return NULL if MBB doesn't have a |
273 | /// successor block that is not a landing pad. |
274 | MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const; |
275 | |
276 | /// This function analyzes MBB and returns an instruction with an unoccupied |
277 | /// slot that branches to Dst. |
278 | std::pair<MipsInstrInfo::BranchType, MachineInstr *> |
279 | getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const; |
280 | |
281 | /// Examine Pred and see if it is possible to insert an instruction into |
282 | /// one of its branches delay slot or its end. |
283 | bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ, |
284 | RegDefsUses &RegDU, bool &HasMultipleSuccs, |
285 | BB2BrMap &BrMap) const; |
286 | |
287 | bool terminateSearch(const MachineInstr &Candidate) const; |
288 | |
289 | const TargetMachine *TM = nullptr; |
290 | }; |
291 | |
292 | } // end anonymous namespace |
293 | |
294 | char MipsDelaySlotFiller::ID = 0; |
295 | |
296 | static bool hasUnoccupiedSlot(const MachineInstr *MI) { |
297 | return MI->hasDelaySlot() && !MI->isBundledWithSucc(); |
298 | } |
299 | |
300 | INITIALIZE_PASS(MipsDelaySlotFiller, DEBUG_TYPE, |
301 | "Fill delay slot for MIPS" , false, false) |
302 | |
303 | /// This function inserts clones of Filler into predecessor blocks. |
304 | static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) { |
305 | MachineFunction *MF = Filler->getParent()->getParent(); |
306 | |
307 | for (const auto &I : BrMap) { |
308 | if (I.second) { |
309 | MIBundleBuilder(I.second).append(MI: MF->CloneMachineInstr(Orig: &*Filler)); |
310 | ++UsefulSlots; |
311 | } else { |
312 | I.first->push_back(MI: MF->CloneMachineInstr(Orig: &*Filler)); |
313 | } |
314 | } |
315 | } |
316 | |
317 | /// This function adds registers Filler defines to MBB's live-in register list. |
318 | static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) { |
319 | for (const MachineOperand &MO : Filler->operands()) { |
320 | unsigned R; |
321 | |
322 | if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg())) |
323 | continue; |
324 | |
325 | #ifndef NDEBUG |
326 | const MachineFunction &MF = *MBB.getParent(); |
327 | assert(MF.getSubtarget().getRegisterInfo()->getAllocatableSet(MF).test(R) && |
328 | "Shouldn't move an instruction with unallocatable registers across " |
329 | "basic block boundaries." ); |
330 | #endif |
331 | |
332 | if (!MBB.isLiveIn(Reg: R)) |
333 | MBB.addLiveIn(PhysReg: R); |
334 | } |
335 | } |
336 | |
337 | RegDefsUses::RegDefsUses(const TargetRegisterInfo &TRI) |
338 | : TRI(TRI), Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {} |
339 | |
340 | void RegDefsUses::init(const MachineInstr &MI) { |
341 | // Add all register operands which are explicit and non-variadic. |
342 | update(MI, Begin: 0, End: MI.getDesc().getNumOperands()); |
343 | |
344 | // If MI is a call, add RA to Defs to prevent users of RA from going into |
345 | // delay slot. |
346 | if (MI.isCall()) |
347 | Defs.set(Mips::RA); |
348 | |
349 | // Add all implicit register operands of branch instructions except |
350 | // register AT. |
351 | if (MI.isBranch()) { |
352 | update(MI, Begin: MI.getDesc().getNumOperands(), End: MI.getNumOperands()); |
353 | Defs.reset(Idx: Mips::AT); |
354 | } |
355 | } |
356 | |
357 | void RegDefsUses::setCallerSaved(const MachineInstr &MI) { |
358 | assert(MI.isCall()); |
359 | |
360 | // Add RA/RA_64 to Defs to prevent users of RA/RA_64 from going into |
361 | // the delay slot. The reason is that RA/RA_64 must not be changed |
362 | // in the delay slot so that the callee can return to the caller. |
363 | if (MI.definesRegister(Reg: Mips::RA, /*TRI=*/nullptr) || |
364 | MI.definesRegister(Reg: Mips::RA_64, /*TRI=*/nullptr)) { |
365 | Defs.set(Mips::RA); |
366 | Defs.set(Mips::RA_64); |
367 | } |
368 | |
369 | // If MI is a call, add all caller-saved registers to Defs. |
370 | BitVector CallerSavedRegs(TRI.getNumRegs(), true); |
371 | |
372 | CallerSavedRegs.reset(Idx: Mips::ZERO); |
373 | CallerSavedRegs.reset(Idx: Mips::ZERO_64); |
374 | |
375 | for (const MCPhysReg *R = TRI.getCalleeSavedRegs(MF: MI.getParent()->getParent()); |
376 | *R; ++R) |
377 | for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI) |
378 | CallerSavedRegs.reset(Idx: *AI); |
379 | |
380 | Defs |= CallerSavedRegs; |
381 | } |
382 | |
383 | void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) { |
384 | BitVector AllocSet = TRI.getAllocatableSet(MF); |
385 | |
386 | for (unsigned R : AllocSet.set_bits()) |
387 | for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI) |
388 | AllocSet.set(*AI); |
389 | |
390 | AllocSet.set(Mips::ZERO); |
391 | AllocSet.set(Mips::ZERO_64); |
392 | |
393 | Defs |= AllocSet.flip(); |
394 | } |
395 | |
396 | void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB, |
397 | const MachineBasicBlock &SuccBB) { |
398 | for (const MachineBasicBlock *S : MBB.successors()) |
399 | if (S != &SuccBB) |
400 | for (const auto &LI : S->liveins()) |
401 | Uses.set(LI.PhysReg.id()); |
402 | } |
403 | |
404 | bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) { |
405 | BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs()); |
406 | bool HasHazard = false; |
407 | |
408 | for (unsigned I = Begin; I != End; ++I) { |
409 | const MachineOperand &MO = MI.getOperand(i: I); |
410 | |
411 | if (MO.isReg() && MO.getReg()) { |
412 | if (checkRegDefsUses(NewDefs, NewUses, Reg: MO.getReg(), IsDef: MO.isDef())) { |
413 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found register hazard for operand " |
414 | << I << ": " ; |
415 | MO.dump()); |
416 | HasHazard = true; |
417 | } |
418 | } |
419 | } |
420 | |
421 | Defs |= NewDefs; |
422 | Uses |= NewUses; |
423 | |
424 | return HasHazard; |
425 | } |
426 | |
427 | bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, |
428 | unsigned Reg, bool IsDef) const { |
429 | if (IsDef) { |
430 | NewDefs.set(Reg); |
431 | // check whether Reg has already been defined or used. |
432 | return (isRegInSet(RegSet: Defs, Reg) || isRegInSet(RegSet: Uses, Reg)); |
433 | } |
434 | |
435 | NewUses.set(Reg); |
436 | // check whether Reg has already been defined. |
437 | return isRegInSet(RegSet: Defs, Reg); |
438 | } |
439 | |
440 | bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const { |
441 | // Check Reg and all aliased Registers. |
442 | for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) |
443 | if (RegSet.test(Idx: *AI)) |
444 | return true; |
445 | return false; |
446 | } |
447 | |
448 | bool InspectMemInstr::hasHazard(const MachineInstr &MI) { |
449 | if (!MI.mayStore() && !MI.mayLoad()) |
450 | return false; |
451 | |
452 | if (ForbidMemInstr) |
453 | return true; |
454 | |
455 | OrigSeenLoad = SeenLoad; |
456 | OrigSeenStore = SeenStore; |
457 | SeenLoad |= MI.mayLoad(); |
458 | SeenStore |= MI.mayStore(); |
459 | |
460 | // If MI is an ordered or volatile memory reference, disallow moving |
461 | // subsequent loads and stores to delay slot. |
462 | if (MI.hasOrderedMemoryRef() && (OrigSeenLoad || OrigSeenStore)) { |
463 | ForbidMemInstr = true; |
464 | return true; |
465 | } |
466 | |
467 | return hasHazard_(MI); |
468 | } |
469 | |
470 | bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) { |
471 | if (MI.mayStore()) |
472 | return true; |
473 | |
474 | if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getPseudoValue()) |
475 | return true; |
476 | |
477 | if (const PseudoSourceValue *PSV = |
478 | (*MI.memoperands_begin())->getPseudoValue()) { |
479 | if (isa<FixedStackPseudoSourceValue>(Val: PSV)) |
480 | return false; |
481 | return !PSV->isConstant(nullptr) && !PSV->isStack(); |
482 | } |
483 | |
484 | return true; |
485 | } |
486 | |
487 | MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_) |
488 | : InspectMemInstr(false), MFI(MFI_) {} |
489 | |
490 | bool MemDefsUses::hasHazard_(const MachineInstr &MI) { |
491 | bool HasHazard = false; |
492 | |
493 | // Check underlying object list. |
494 | SmallVector<ValueType, 4> Objs; |
495 | if (getUnderlyingObjects(MI, Objects&: Objs)) { |
496 | for (ValueType VT : Objs) |
497 | HasHazard |= updateDefsUses(V: VT, MayStore: MI.mayStore()); |
498 | return HasHazard; |
499 | } |
500 | |
501 | // No underlying objects found. |
502 | HasHazard = MI.mayStore() && (OrigSeenLoad || OrigSeenStore); |
503 | HasHazard |= MI.mayLoad() || OrigSeenStore; |
504 | |
505 | SeenNoObjLoad |= MI.mayLoad(); |
506 | SeenNoObjStore |= MI.mayStore(); |
507 | |
508 | return HasHazard; |
509 | } |
510 | |
511 | bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) { |
512 | if (MayStore) |
513 | return !Defs.insert(Ptr: V).second || Uses.count(Ptr: V) || SeenNoObjStore || |
514 | SeenNoObjLoad; |
515 | |
516 | Uses.insert(Ptr: V); |
517 | return Defs.count(Ptr: V) || SeenNoObjStore; |
518 | } |
519 | |
520 | bool MemDefsUses:: |
521 | getUnderlyingObjects(const MachineInstr &MI, |
522 | SmallVectorImpl<ValueType> &Objects) const { |
523 | if (!MI.hasOneMemOperand()) |
524 | return false; |
525 | |
526 | auto & MMO = **MI.memoperands_begin(); |
527 | |
528 | if (const PseudoSourceValue *PSV = MMO.getPseudoValue()) { |
529 | if (!PSV->isAliased(MFI)) |
530 | return false; |
531 | Objects.push_back(Elt: PSV); |
532 | return true; |
533 | } |
534 | |
535 | if (const Value *V = MMO.getValue()) { |
536 | SmallVector<const Value *, 4> Objs; |
537 | ::getUnderlyingObjects(V, Objects&: Objs); |
538 | |
539 | for (const Value *UValue : Objs) { |
540 | if (!isIdentifiedObject(V)) |
541 | return false; |
542 | |
543 | Objects.push_back(Elt: UValue); |
544 | } |
545 | return true; |
546 | } |
547 | |
548 | return false; |
549 | } |
550 | |
551 | // Replace Branch with the compact branch instruction. |
552 | Iter MipsDelaySlotFiller::replaceWithCompactBranch(MachineBasicBlock &MBB, |
553 | Iter Branch, |
554 | const DebugLoc &DL) { |
555 | const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>(); |
556 | const MipsInstrInfo *TII = STI.getInstrInfo(); |
557 | |
558 | unsigned NewOpcode = TII->getEquivalentCompactForm(I: Branch); |
559 | Branch = TII->genInstrWithNewOpc(NewOpc: NewOpcode, I: Branch); |
560 | |
561 | auto *ToErase = cast<MachineInstr>(Val: &*std::next(x: Branch)); |
562 | // Update call info for the Branch. |
563 | if (ToErase->shouldUpdateAdditionalCallInfo()) |
564 | ToErase->getMF()->moveAdditionalCallInfo(Old: ToErase, |
565 | New: cast<MachineInstr>(Val: &*Branch)); |
566 | ToErase->eraseFromParent(); |
567 | return Branch; |
568 | } |
569 | |
570 | // For given opcode returns opcode of corresponding instruction with short |
571 | // delay slot. |
572 | // For the pseudo TAILCALL*_MM instructions return the short delay slot |
573 | // form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range |
574 | // that is too short to make use of for tail calls. |
575 | static int getEquivalentCallShort(int Opcode) { |
576 | switch (Opcode) { |
577 | case Mips::BGEZAL: |
578 | return Mips::BGEZALS_MM; |
579 | case Mips::BLTZAL: |
580 | return Mips::BLTZALS_MM; |
581 | case Mips::JAL: |
582 | case Mips::JAL_MM: |
583 | return Mips::JALS_MM; |
584 | case Mips::JALR: |
585 | return Mips::JALRS_MM; |
586 | case Mips::JALR16_MM: |
587 | return Mips::JALRS16_MM; |
588 | case Mips::TAILCALL_MM: |
589 | llvm_unreachable("Attempting to shorten the TAILCALL_MM pseudo!" ); |
590 | case Mips::TAILCALLREG: |
591 | return Mips::JR16_MM; |
592 | default: |
593 | llvm_unreachable("Unexpected call instruction for microMIPS." ); |
594 | } |
595 | } |
596 | |
597 | /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. |
598 | /// We assume there is only one delay slot per delayed instruction. |
599 | bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) { |
600 | bool Changed = false; |
601 | const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>(); |
602 | bool InMicroMipsMode = STI.inMicroMipsMode(); |
603 | const MipsInstrInfo *TII = STI.getInstrInfo(); |
604 | |
605 | for (Iter I = MBB.begin(); I != MBB.end(); ++I) { |
606 | if (!hasUnoccupiedSlot(MI: &*I)) |
607 | continue; |
608 | |
609 | // Delay slot filling is disabled at -O0, or in microMIPS32R6. |
610 | if (!DisableDelaySlotFiller && |
611 | (TM->getOptLevel() != CodeGenOptLevel::None) && |
612 | !(InMicroMipsMode && STI.hasMips32r6())) { |
613 | |
614 | bool Filled = false; |
615 | |
616 | if (MipsCompactBranchPolicy.getValue() != CB_Always || |
617 | !TII->getEquivalentCompactForm(I)) { |
618 | if (searchBackward(MBB, Slot&: *I)) { |
619 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot" |
620 | " in backwards search.\n" ); |
621 | Filled = true; |
622 | } else if (I->isTerminator()) { |
623 | if (searchSuccBBs(MBB, Slot: I)) { |
624 | Filled = true; |
625 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot" |
626 | " in successor BB search.\n" ); |
627 | } |
628 | } else if (searchForward(MBB, Slot: I)) { |
629 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot" |
630 | " in forwards search.\n" ); |
631 | Filled = true; |
632 | } |
633 | } |
634 | |
635 | if (Filled) { |
636 | // Get instruction with delay slot. |
637 | MachineBasicBlock::instr_iterator DSI = I.getInstrIterator(); |
638 | |
639 | if (InMicroMipsMode && TII->getInstSizeInBytes(MI: *std::next(x: DSI)) == 2 && |
640 | DSI->isCall()) { |
641 | // If instruction in delay slot is 16b change opcode to |
642 | // corresponding instruction with short delay slot. |
643 | |
644 | // TODO: Implement an instruction mapping table of 16bit opcodes to |
645 | // 32bit opcodes so that an instruction can be expanded. This would |
646 | // save 16 bits as a TAILCALL_MM pseudo requires a fullsized nop. |
647 | // TODO: Permit b16 when branching backwards to the same function |
648 | // if it is in range. |
649 | DSI->setDesc(TII->get(Opcode: getEquivalentCallShort(Opcode: DSI->getOpcode()))); |
650 | } |
651 | ++FilledSlots; |
652 | Changed = true; |
653 | continue; |
654 | } |
655 | } |
656 | |
657 | // For microMIPS if instruction is BEQ or BNE with one ZERO register, then |
658 | // instead of adding NOP replace this instruction with the corresponding |
659 | // compact branch instruction, i.e. BEQZC or BNEZC. Additionally |
660 | // PseudoReturn and PseudoIndirectBranch are expanded to JR_MM, so they can |
661 | // be replaced with JRC16_MM. |
662 | |
663 | // For MIPSR6 attempt to produce the corresponding compact (no delay slot) |
664 | // form of the CTI. For indirect jumps this will not require inserting a |
665 | // NOP and for branches will hopefully avoid requiring a NOP. |
666 | if ((InMicroMipsMode || |
667 | (STI.hasMips32r6() && MipsCompactBranchPolicy != CB_Never)) && |
668 | TII->getEquivalentCompactForm(I)) { |
669 | I = replaceWithCompactBranch(MBB, Branch: I, DL: I->getDebugLoc()); |
670 | Changed = true; |
671 | continue; |
672 | } |
673 | |
674 | // Bundle the NOP to the instruction with the delay slot. |
675 | LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": could not fill delay slot for " ; |
676 | I->dump()); |
677 | TII->insertNop(MBB, MI: std::next(x: I), DL: I->getDebugLoc()); |
678 | MIBundleBuilder(MBB, I, std::next(x: I, n: 2)); |
679 | ++FilledSlots; |
680 | Changed = true; |
681 | } |
682 | |
683 | return Changed; |
684 | } |
685 | |
686 | template <typename IterTy> |
687 | bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin, |
688 | IterTy End, RegDefsUses &RegDU, |
689 | InspectMemInstr &IM, Iter Slot, |
690 | IterTy &Filler) const { |
691 | for (IterTy I = Begin; I != End;) { |
692 | IterTy CurrI = I; |
693 | ++I; |
694 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": checking instruction: " ; CurrI->dump()); |
695 | // Skip debug value. |
696 | // Instruction TargetOpcode::JUMP_TABLE_DEBUG_INFO is only used to note |
697 | // jump table debug info. |
698 | if (CurrI->isDebugInstr() || CurrI->isJumpTableDebugInfo()) { |
699 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring debug instruction: " ; |
700 | CurrI->dump()); |
701 | continue; |
702 | } |
703 | |
704 | if (CurrI->isBundle()) { |
705 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring BUNDLE instruction: " ; |
706 | CurrI->dump()); |
707 | // However, we still need to update the register def-use information. |
708 | RegDU.update(MI: *CurrI, Begin: 0, End: CurrI->getNumOperands()); |
709 | continue; |
710 | } |
711 | |
712 | if (terminateSearch(Candidate: *CurrI)) { |
713 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": should terminate search: " ; |
714 | CurrI->dump()); |
715 | break; |
716 | } |
717 | |
718 | assert((!CurrI->isCall() && !CurrI->isReturn() && !CurrI->isBranch()) && |
719 | "Cannot put calls, returns or branches in delay slot." ); |
720 | |
721 | if (CurrI->isKill()) { |
722 | CurrI->eraseFromParent(); |
723 | continue; |
724 | } |
725 | |
726 | if (delayHasHazard(Candidate: *CurrI, RegDU, IM)) |
727 | continue; |
728 | |
729 | const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>(); |
730 | if (STI.isTargetNaCl()) { |
731 | // In NaCl, instructions that must be masked are forbidden in delay slots. |
732 | // We only check for loads, stores and SP changes. Calls, returns and |
733 | // branches are not checked because non-NaCl targets never put them in |
734 | // delay slots. |
735 | unsigned AddrIdx; |
736 | if ((isBasePlusOffsetMemoryAccess(CurrI->getOpcode(), &AddrIdx) && |
737 | baseRegNeedsLoadStoreMask(CurrI->getOperand(AddrIdx).getReg())) || |
738 | CurrI->modifiesRegister(Mips::SP, STI.getRegisterInfo())) |
739 | continue; |
740 | } |
741 | |
742 | bool InMicroMipsMode = STI.inMicroMipsMode(); |
743 | const MipsInstrInfo *TII = STI.getInstrInfo(); |
744 | unsigned Opcode = (*Slot).getOpcode(); |
745 | |
746 | // In mips1-4, should not put mflo into the delay slot for the return. |
747 | if ((IsMFLOMFHI(CurrI->getOpcode())) && |
748 | (!STI.hasMips32() && !STI.hasMips5())) |
749 | continue; |
750 | |
751 | // This is complicated by the tail call optimization. For non-PIC code |
752 | // there is only a 32bit sized unconditional branch which can be assumed |
753 | // to be able to reach the target. b16 only has a range of +/- 1 KB. |
754 | // It's entirely possible that the target function is reachable with b16 |
755 | // but we don't have enough information to make that decision. |
756 | if (InMicroMipsMode && TII->getInstSizeInBytes(MI: *CurrI) == 2 && |
757 | (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch || |
758 | Opcode == Mips::PseudoIndirectBranch_MM || |
759 | Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL)) |
760 | continue; |
761 | // Instructions LWP/SWP and MOVEP should not be in a delay slot as that |
762 | // results in unpredictable behaviour |
763 | if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM || |
764 | Opcode == Mips::MOVEP_MM)) |
765 | continue; |
766 | |
767 | Filler = CurrI; |
768 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot: " ; |
769 | CurrI->dump()); |
770 | |
771 | return true; |
772 | } |
773 | |
774 | return false; |
775 | } |
776 | |
777 | bool MipsDelaySlotFiller::searchBackward(MachineBasicBlock &MBB, |
778 | MachineInstr &Slot) const { |
779 | if (DisableBackwardSearch) |
780 | return false; |
781 | |
782 | auto *Fn = MBB.getParent(); |
783 | RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo()); |
784 | MemDefsUses MemDU(&Fn->getFrameInfo()); |
785 | ReverseIter Filler; |
786 | |
787 | RegDU.init(MI: Slot); |
788 | |
789 | MachineBasicBlock::iterator SlotI = Slot; |
790 | if (!searchRange(MBB, Begin: ++SlotI.getReverse(), End: MBB.rend(), RegDU, IM&: MemDU, Slot, |
791 | Filler)) { |
792 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay " |
793 | "slot using backwards search.\n" ); |
794 | return false; |
795 | } |
796 | |
797 | MBB.splice(Where: std::next(x: SlotI), Other: &MBB, From: Filler.getReverse()); |
798 | MIBundleBuilder(MBB, SlotI, std::next(x: SlotI, n: 2)); |
799 | ++UsefulSlots; |
800 | return true; |
801 | } |
802 | |
803 | bool MipsDelaySlotFiller::searchForward(MachineBasicBlock &MBB, |
804 | Iter Slot) const { |
805 | // Can handle only calls. |
806 | if (DisableForwardSearch || !Slot->isCall()) |
807 | return false; |
808 | |
809 | RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo()); |
810 | NoMemInstr NM; |
811 | Iter Filler; |
812 | |
813 | RegDU.setCallerSaved(*Slot); |
814 | |
815 | if (!searchRange(MBB, Begin: std::next(x: Slot), End: MBB.end(), RegDU, IM&: NM, Slot, Filler)) { |
816 | LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay " |
817 | "slot using forwards search.\n" ); |
818 | return false; |
819 | } |
820 | |
821 | MBB.splice(Where: std::next(x: Slot), Other: &MBB, From: Filler); |
822 | MIBundleBuilder(MBB, Slot, std::next(x: Slot, n: 2)); |
823 | ++UsefulSlots; |
824 | return true; |
825 | } |
826 | |
827 | bool MipsDelaySlotFiller::searchSuccBBs(MachineBasicBlock &MBB, |
828 | Iter Slot) const { |
829 | if (DisableSuccBBSearch) |
830 | return false; |
831 | |
832 | MachineBasicBlock *SuccBB = selectSuccBB(B&: MBB); |
833 | |
834 | if (!SuccBB) |
835 | return false; |
836 | |
837 | RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo()); |
838 | bool HasMultipleSuccs = false; |
839 | BB2BrMap BrMap; |
840 | std::unique_ptr<InspectMemInstr> IM; |
841 | Iter Filler; |
842 | auto *Fn = MBB.getParent(); |
843 | |
844 | // Iterate over SuccBB's predecessor list. |
845 | for (MachineBasicBlock *Pred : SuccBB->predecessors()) |
846 | if (!examinePred(Pred&: *Pred, Succ: *SuccBB, RegDU, HasMultipleSuccs, BrMap)) |
847 | return false; |
848 | |
849 | // Do not allow moving instructions which have unallocatable register operands |
850 | // across basic block boundaries. |
851 | RegDU.setUnallocatableRegs(*Fn); |
852 | |
853 | // Only allow moving loads from stack or constants if any of the SuccBB's |
854 | // predecessors have multiple successors. |
855 | if (HasMultipleSuccs) { |
856 | IM.reset(p: new LoadFromStackOrConst()); |
857 | } else { |
858 | const MachineFrameInfo &MFI = Fn->getFrameInfo(); |
859 | IM.reset(p: new MemDefsUses(&MFI)); |
860 | } |
861 | |
862 | if (!searchRange(MBB, Begin: SuccBB->begin(), End: SuccBB->end(), RegDU, IM&: *IM, Slot, |
863 | Filler)) |
864 | return false; |
865 | |
866 | insertDelayFiller(Filler, BrMap); |
867 | addLiveInRegs(Filler, MBB&: *SuccBB); |
868 | Filler->eraseFromParent(); |
869 | |
870 | return true; |
871 | } |
872 | |
873 | MachineBasicBlock * |
874 | MipsDelaySlotFiller::selectSuccBB(MachineBasicBlock &B) const { |
875 | if (B.succ_empty()) |
876 | return nullptr; |
877 | |
878 | // Select the successor with the larget edge weight. |
879 | auto &Prob = getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); |
880 | MachineBasicBlock *S = |
881 | *llvm::max_element(Range: B.successors(), C: [&](const MachineBasicBlock *Dst0, |
882 | const MachineBasicBlock *Dst1) { |
883 | return Prob.getEdgeProbability(Src: &B, Dst: Dst0) < |
884 | Prob.getEdgeProbability(Src: &B, Dst: Dst1); |
885 | }); |
886 | return S->isEHPad() ? nullptr : S; |
887 | } |
888 | |
889 | std::pair<MipsInstrInfo::BranchType, MachineInstr *> |
890 | MipsDelaySlotFiller::getBranch(MachineBasicBlock &MBB, |
891 | const MachineBasicBlock &Dst) const { |
892 | const MipsInstrInfo *TII = |
893 | MBB.getParent()->getSubtarget<MipsSubtarget>().getInstrInfo(); |
894 | MachineBasicBlock *TrueBB = nullptr, *FalseBB = nullptr; |
895 | SmallVector<MachineInstr*, 2> BranchInstrs; |
896 | SmallVector<MachineOperand, 2> Cond; |
897 | |
898 | MipsInstrInfo::BranchType R = |
899 | TII->analyzeBranch(MBB, TBB&: TrueBB, FBB&: FalseBB, Cond, AllowModify: false, BranchInstrs); |
900 | |
901 | if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch)) |
902 | return std::make_pair(x&: R, y: nullptr); |
903 | |
904 | if (R != MipsInstrInfo::BT_CondUncond) { |
905 | if (!hasUnoccupiedSlot(MI: BranchInstrs[0])) |
906 | return std::make_pair(x: MipsInstrInfo::BT_None, y: nullptr); |
907 | |
908 | assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst))); |
909 | |
910 | return std::make_pair(x&: R, y&: BranchInstrs[0]); |
911 | } |
912 | |
913 | assert((TrueBB == &Dst) || (FalseBB == &Dst)); |
914 | |
915 | // Examine the conditional branch. See if its slot is occupied. |
916 | if (hasUnoccupiedSlot(MI: BranchInstrs[0])) |
917 | return std::make_pair(x: MipsInstrInfo::BT_Cond, y&: BranchInstrs[0]); |
918 | |
919 | // If that fails, try the unconditional branch. |
920 | if (hasUnoccupiedSlot(MI: BranchInstrs[1]) && (FalseBB == &Dst)) |
921 | return std::make_pair(x: MipsInstrInfo::BT_Uncond, y&: BranchInstrs[1]); |
922 | |
923 | return std::make_pair(x: MipsInstrInfo::BT_None, y: nullptr); |
924 | } |
925 | |
926 | bool MipsDelaySlotFiller::examinePred(MachineBasicBlock &Pred, |
927 | const MachineBasicBlock &Succ, |
928 | RegDefsUses &RegDU, |
929 | bool &HasMultipleSuccs, |
930 | BB2BrMap &BrMap) const { |
931 | std::pair<MipsInstrInfo::BranchType, MachineInstr *> P = |
932 | getBranch(MBB&: Pred, Dst: Succ); |
933 | |
934 | // Return if either getBranch wasn't able to analyze the branches or there |
935 | // were no branches with unoccupied slots. |
936 | if (P.first == MipsInstrInfo::BT_None) |
937 | return false; |
938 | |
939 | if ((P.first != MipsInstrInfo::BT_Uncond) && |
940 | (P.first != MipsInstrInfo::BT_NoBranch)) { |
941 | HasMultipleSuccs = true; |
942 | RegDU.addLiveOut(MBB: Pred, SuccBB: Succ); |
943 | } |
944 | |
945 | BrMap[&Pred] = P.second; |
946 | return true; |
947 | } |
948 | |
949 | bool MipsDelaySlotFiller::delayHasHazard(const MachineInstr &Candidate, |
950 | RegDefsUses &RegDU, |
951 | InspectMemInstr &IM) const { |
952 | assert(!Candidate.isKill() && |
953 | "KILL instructions should have been eliminated at this point." ); |
954 | |
955 | bool HasHazard = Candidate.isImplicitDef(); |
956 | |
957 | HasHazard |= IM.hasHazard(MI: Candidate); |
958 | HasHazard |= RegDU.update(MI: Candidate, Begin: 0, End: Candidate.getNumOperands()); |
959 | |
960 | return HasHazard; |
961 | } |
962 | |
963 | bool MipsDelaySlotFiller::terminateSearch(const MachineInstr &Candidate) const { |
964 | return (Candidate.isTerminator() || Candidate.isCall() || |
965 | Candidate.isPosition() || Candidate.isInlineAsm() || |
966 | Candidate.hasUnmodeledSideEffects()); |
967 | } |
968 | |
969 | /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay |
970 | /// slots in Mips MachineFunctions |
971 | FunctionPass *llvm::createMipsDelaySlotFillerPass() { |
972 | return new MipsDelaySlotFiller(); |
973 | } |
974 | |