1 | //===----------------------- MipsBranchExpansion.cpp ----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// |
10 | /// This pass do two things: |
11 | /// - it expands a branch or jump instruction into a long branch if its offset |
12 | /// is too large to fit into its immediate field, |
13 | /// - it inserts nops to prevent forbidden slot hazards. |
14 | /// |
15 | /// The reason why this pass combines these two tasks is that one of these two |
16 | /// tasks can break the result of the previous one. |
17 | /// |
18 | /// Example of that is a situation where at first, no branch should be expanded, |
19 | /// but after adding at least one nop somewhere in the code to prevent a |
20 | /// forbidden slot hazard, offset of some branches may go out of range. In that |
21 | /// case it is necessary to check again if there is some branch that needs |
22 | /// expansion. On the other hand, expanding some branch may cause a control |
23 | /// transfer instruction to appear in the forbidden slot, which is a hazard that |
24 | /// should be fixed. This pass alternates between this two tasks untill no |
25 | /// changes are made. Only then we can be sure that all branches are expanded |
26 | /// properly, and no hazard situations exist. |
27 | /// |
28 | /// Regarding branch expanding: |
29 | /// |
30 | /// When branch instruction like beqzc or bnezc has offset that is too large |
31 | /// to fit into its immediate field, it has to be expanded to another |
32 | /// instruction or series of instructions. |
33 | /// |
34 | /// FIXME: Fix pc-region jump instructions which cross 256MB segment boundaries. |
35 | /// TODO: Handle out of range bc, b (pseudo) instructions. |
36 | /// |
37 | /// Regarding compact branch hazard prevention: |
38 | /// |
39 | /// Hazards handled: forbidden slots for MIPSR6, FPU slots for MIPS3 and below, |
40 | /// load delay slots for MIPS1. |
41 | /// |
42 | /// A forbidden slot hazard occurs when a compact branch instruction is executed |
43 | /// and the adjacent instruction in memory is a control transfer instruction |
44 | /// such as a branch or jump, ERET, ERETNC, DERET, WAIT and PAUSE. |
45 | /// |
46 | /// For example: |
47 | /// |
48 | /// 0x8004 bnec a1,v0,<P+0x18> |
49 | /// 0x8008 beqc a1,a2,<P+0x54> |
50 | /// |
51 | /// In such cases, the processor is required to signal a Reserved Instruction |
52 | /// exception. |
53 | /// |
54 | /// Here, if the instruction at 0x8004 is executed, the processor will raise an |
55 | /// exception as there is a control transfer instruction at 0x8008. |
56 | /// |
57 | /// There are two sources of forbidden slot hazards: |
58 | /// |
59 | /// A) A previous pass has created a compact branch directly. |
60 | /// B) Transforming a delay slot branch into compact branch. This case can be |
61 | /// difficult to process as lookahead for hazards is insufficient, as |
62 | /// backwards delay slot fillling can also produce hazards in previously |
63 | /// processed instuctions. |
64 | /// |
65 | /// In future this pass can be extended (or new pass can be created) to handle |
66 | /// other pipeline hazards, such as various MIPS1 hazards, processor errata that |
67 | /// require instruction reorganization, etc. |
68 | /// |
69 | /// This pass has to run after the delay slot filler as that pass can introduce |
70 | /// pipeline hazards such as compact branch hazard, hence the existing hazard |
71 | /// recognizer is not suitable. |
72 | /// |
73 | //===----------------------------------------------------------------------===// |
74 | |
75 | #include "MCTargetDesc/MipsABIInfo.h" |
76 | #include "MCTargetDesc/MipsBaseInfo.h" |
77 | #include "MCTargetDesc/MipsMCNaCl.h" |
78 | #include "MCTargetDesc/MipsMCTargetDesc.h" |
79 | #include "Mips.h" |
80 | #include "MipsInstrInfo.h" |
81 | #include "MipsMachineFunction.h" |
82 | #include "MipsSubtarget.h" |
83 | #include "MipsTargetMachine.h" |
84 | #include "llvm/ADT/SmallVector.h" |
85 | #include "llvm/ADT/Statistic.h" |
86 | #include "llvm/ADT/StringRef.h" |
87 | #include "llvm/CodeGen/MachineBasicBlock.h" |
88 | #include "llvm/CodeGen/MachineFunction.h" |
89 | #include "llvm/CodeGen/MachineFunctionPass.h" |
90 | #include "llvm/CodeGen/MachineInstr.h" |
91 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
92 | #include "llvm/CodeGen/MachineModuleInfo.h" |
93 | #include "llvm/CodeGen/MachineOperand.h" |
94 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
95 | #include "llvm/IR/DebugLoc.h" |
96 | #include "llvm/Support/CommandLine.h" |
97 | #include "llvm/Support/ErrorHandling.h" |
98 | #include "llvm/Support/MathExtras.h" |
99 | #include "llvm/Target/TargetMachine.h" |
100 | #include <algorithm> |
101 | #include <cassert> |
102 | #include <cstdint> |
103 | #include <iterator> |
104 | #include <utility> |
105 | |
106 | using namespace llvm; |
107 | |
108 | #define DEBUG_TYPE "mips-branch-expansion" |
109 | |
110 | STATISTIC(NumInsertedNops, "Number of nops inserted" ); |
111 | STATISTIC(LongBranches, "Number of long branches." ); |
112 | |
113 | static cl::opt<bool> |
114 | SkipLongBranch("skip-mips-long-branch" , cl::init(Val: false), |
115 | cl::desc("MIPS: Skip branch expansion pass." ), cl::Hidden); |
116 | |
117 | static cl::opt<bool> |
118 | ForceLongBranch("force-mips-long-branch" , cl::init(Val: false), |
119 | cl::desc("MIPS: Expand all branches to long format." ), |
120 | cl::Hidden); |
121 | |
122 | namespace { |
123 | |
124 | using Iter = MachineBasicBlock::iterator; |
125 | using ReverseIter = MachineBasicBlock::reverse_iterator; |
126 | |
127 | struct MBBInfo { |
128 | uint64_t Size = 0; |
129 | bool HasLongBranch = false; |
130 | MachineInstr *Br = nullptr; |
131 | uint64_t Offset = 0; |
132 | MBBInfo() = default; |
133 | }; |
134 | |
135 | class MipsBranchExpansion : public MachineFunctionPass { |
136 | public: |
137 | static char ID; |
138 | |
139 | MipsBranchExpansion() : MachineFunctionPass(ID), ABI(MipsABIInfo::Unknown()) { |
140 | initializeMipsBranchExpansionPass(*PassRegistry::getPassRegistry()); |
141 | } |
142 | |
143 | StringRef getPassName() const override { |
144 | return "Mips Branch Expansion Pass" ; |
145 | } |
146 | |
147 | bool runOnMachineFunction(MachineFunction &F) override; |
148 | |
149 | MachineFunctionProperties getRequiredProperties() const override { |
150 | return MachineFunctionProperties().set( |
151 | MachineFunctionProperties::Property::NoVRegs); |
152 | } |
153 | |
154 | private: |
155 | void splitMBB(MachineBasicBlock *MBB); |
156 | void initMBBInfo(); |
157 | int64_t computeOffset(const MachineInstr *Br); |
158 | uint64_t computeOffsetFromTheBeginning(int MBB); |
159 | void replaceBranch(MachineBasicBlock &MBB, Iter Br, const DebugLoc &DL, |
160 | MachineBasicBlock *MBBOpnd); |
161 | bool buildProperJumpMI(MachineBasicBlock *MBB, |
162 | MachineBasicBlock::iterator Pos, DebugLoc DL); |
163 | void expandToLongBranch(MBBInfo &Info); |
164 | template <typename Pred, typename Safe> |
165 | bool handleSlot(Pred Predicate, Safe SafeInSlot); |
166 | bool handleForbiddenSlot(); |
167 | bool handleFPUDelaySlot(); |
168 | bool handleLoadDelaySlot(); |
169 | bool handlePossibleLongBranch(); |
170 | |
171 | const MipsSubtarget *STI; |
172 | const MipsInstrInfo *TII; |
173 | |
174 | MachineFunction *MFp; |
175 | SmallVector<MBBInfo, 16> MBBInfos; |
176 | bool IsPIC; |
177 | MipsABIInfo ABI; |
178 | bool ForceLongBranchFirstPass = false; |
179 | }; |
180 | |
181 | } // end of anonymous namespace |
182 | |
183 | char MipsBranchExpansion::ID = 0; |
184 | |
185 | INITIALIZE_PASS(MipsBranchExpansion, DEBUG_TYPE, |
186 | "Expand out of range branch instructions and fix forbidden" |
187 | " slot hazards" , |
188 | false, false) |
189 | |
190 | /// Returns a pass that clears pipeline hazards. |
191 | FunctionPass *llvm::createMipsBranchExpansion() { |
192 | return new MipsBranchExpansion(); |
193 | } |
194 | |
195 | // Find the next real instruction from the current position in current basic |
196 | // block. |
197 | static Iter getNextMachineInstrInBB(Iter Position) { |
198 | Iter I = Position, E = Position->getParent()->end(); |
199 | I = std::find_if_not(first: I, last: E, |
200 | pred: [](const Iter &Insn) { return Insn->isTransient(); }); |
201 | |
202 | return I; |
203 | } |
204 | |
205 | // Find the next real instruction from the current position, looking through |
206 | // basic block boundaries. |
207 | static std::pair<Iter, bool> getNextMachineInstr(Iter Position, |
208 | MachineBasicBlock *Parent) { |
209 | if (Position == Parent->end()) { |
210 | do { |
211 | MachineBasicBlock *Succ = Parent->getNextNode(); |
212 | if (Succ != nullptr && Parent->isSuccessor(MBB: Succ)) { |
213 | Position = Succ->begin(); |
214 | Parent = Succ; |
215 | } else { |
216 | return std::make_pair(x&: Position, y: true); |
217 | } |
218 | } while (Parent->empty()); |
219 | } |
220 | |
221 | Iter Instr = getNextMachineInstrInBB(Position); |
222 | if (Instr == Parent->end()) { |
223 | return getNextMachineInstr(Position: Instr, Parent); |
224 | } |
225 | return std::make_pair(x&: Instr, y: false); |
226 | } |
227 | |
228 | /// Iterate over list of Br's operands and search for a MachineBasicBlock |
229 | /// operand. |
230 | static MachineBasicBlock *getTargetMBB(const MachineInstr &Br) { |
231 | for (unsigned I = 0, E = Br.getDesc().getNumOperands(); I < E; ++I) { |
232 | const MachineOperand &MO = Br.getOperand(i: I); |
233 | |
234 | if (MO.isMBB()) |
235 | return MO.getMBB(); |
236 | } |
237 | |
238 | llvm_unreachable("This instruction does not have an MBB operand." ); |
239 | } |
240 | |
241 | // Traverse the list of instructions backwards until a non-debug instruction is |
242 | // found or it reaches E. |
243 | static ReverseIter getNonDebugInstr(ReverseIter B, const ReverseIter &E) { |
244 | for (; B != E; ++B) |
245 | if (!B->isDebugInstr()) |
246 | return B; |
247 | |
248 | return E; |
249 | } |
250 | |
251 | // Split MBB if it has two direct jumps/branches. |
252 | void MipsBranchExpansion::splitMBB(MachineBasicBlock *MBB) { |
253 | ReverseIter End = MBB->rend(); |
254 | ReverseIter LastBr = getNonDebugInstr(B: MBB->rbegin(), E: End); |
255 | |
256 | // Return if MBB has no branch instructions. |
257 | if ((LastBr == End) || |
258 | (!LastBr->isConditionalBranch() && !LastBr->isUnconditionalBranch())) |
259 | return; |
260 | |
261 | ReverseIter FirstBr = getNonDebugInstr(B: std::next(x: LastBr), E: End); |
262 | |
263 | // MBB has only one branch instruction if FirstBr is not a branch |
264 | // instruction. |
265 | if ((FirstBr == End) || |
266 | (!FirstBr->isConditionalBranch() && !FirstBr->isUnconditionalBranch())) |
267 | return; |
268 | |
269 | assert(!FirstBr->isIndirectBranch() && "Unexpected indirect branch found." ); |
270 | |
271 | // Create a new MBB. Move instructions in MBB to the newly created MBB. |
272 | MachineBasicBlock *NewMBB = |
273 | MFp->CreateMachineBasicBlock(BB: MBB->getBasicBlock()); |
274 | |
275 | // Insert NewMBB and fix control flow. |
276 | MachineBasicBlock *Tgt = getTargetMBB(Br: *FirstBr); |
277 | NewMBB->transferSuccessors(FromMBB: MBB); |
278 | if (Tgt != getTargetMBB(Br: *LastBr)) |
279 | NewMBB->removeSuccessor(Succ: Tgt, NormalizeSuccProbs: true); |
280 | MBB->addSuccessor(Succ: NewMBB); |
281 | MBB->addSuccessor(Succ: Tgt); |
282 | MFp->insert(MBBI: std::next(x: MachineFunction::iterator(MBB)), MBB: NewMBB); |
283 | |
284 | NewMBB->splice(Where: NewMBB->end(), Other: MBB, From: LastBr.getReverse(), To: MBB->end()); |
285 | } |
286 | |
287 | // Fill MBBInfos. |
288 | void MipsBranchExpansion::initMBBInfo() { |
289 | // Split the MBBs if they have two branches. Each basic block should have at |
290 | // most one branch after this loop is executed. |
291 | for (auto &MBB : *MFp) |
292 | splitMBB(MBB: &MBB); |
293 | |
294 | MFp->RenumberBlocks(); |
295 | MBBInfos.clear(); |
296 | MBBInfos.resize(N: MFp->size()); |
297 | |
298 | for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) { |
299 | MachineBasicBlock *MBB = MFp->getBlockNumbered(N: I); |
300 | |
301 | // Compute size of MBB. |
302 | for (MachineInstr &MI : MBB->instrs()) |
303 | MBBInfos[I].Size += TII->getInstSizeInBytes(MI); |
304 | } |
305 | } |
306 | |
307 | // Compute offset of branch in number of bytes. |
308 | int64_t MipsBranchExpansion::computeOffset(const MachineInstr *Br) { |
309 | int64_t Offset = 0; |
310 | int ThisMBB = Br->getParent()->getNumber(); |
311 | int TargetMBB = getTargetMBB(Br: *Br)->getNumber(); |
312 | |
313 | // Compute offset of a forward branch. |
314 | if (ThisMBB < TargetMBB) { |
315 | for (int N = ThisMBB + 1; N < TargetMBB; ++N) |
316 | Offset += MBBInfos[N].Size; |
317 | |
318 | return Offset + 4; |
319 | } |
320 | |
321 | // Compute offset of a backward branch. |
322 | for (int N = ThisMBB; N >= TargetMBB; --N) |
323 | Offset += MBBInfos[N].Size; |
324 | |
325 | return -Offset + 4; |
326 | } |
327 | |
328 | // Returns the distance in bytes up until MBB |
329 | uint64_t MipsBranchExpansion::computeOffsetFromTheBeginning(int MBB) { |
330 | uint64_t Offset = 0; |
331 | for (int N = 0; N < MBB; ++N) |
332 | Offset += MBBInfos[N].Size; |
333 | return Offset; |
334 | } |
335 | |
336 | // Replace Br with a branch which has the opposite condition code and a |
337 | // MachineBasicBlock operand MBBOpnd. |
338 | void MipsBranchExpansion::replaceBranch(MachineBasicBlock &MBB, Iter Br, |
339 | const DebugLoc &DL, |
340 | MachineBasicBlock *MBBOpnd) { |
341 | unsigned NewOpc = TII->getOppositeBranchOpc(Opc: Br->getOpcode()); |
342 | const MCInstrDesc &NewDesc = TII->get(Opcode: NewOpc); |
343 | |
344 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: Br, MIMD: DL, MCID: NewDesc); |
345 | |
346 | for (unsigned I = 0, E = Br->getDesc().getNumOperands(); I < E; ++I) { |
347 | MachineOperand &MO = Br->getOperand(i: I); |
348 | |
349 | switch (MO.getType()) { |
350 | case MachineOperand::MO_Register: |
351 | MIB.addReg(RegNo: MO.getReg()); |
352 | break; |
353 | case MachineOperand::MO_Immediate: |
354 | // Octeon BBIT family of branch has an immediate operand |
355 | // (e.g. BBIT0 $v0, 3, %bb.1). |
356 | if (!TII->isBranchWithImm(Opc: Br->getOpcode())) |
357 | llvm_unreachable("Unexpected immediate in branch instruction" ); |
358 | MIB.addImm(Val: MO.getImm()); |
359 | break; |
360 | case MachineOperand::MO_MachineBasicBlock: |
361 | MIB.addMBB(MBB: MBBOpnd); |
362 | break; |
363 | default: |
364 | llvm_unreachable("Unexpected operand type in branch instruction" ); |
365 | } |
366 | } |
367 | |
368 | if (Br->hasDelaySlot()) { |
369 | // Bundle the instruction in the delay slot to the newly created branch |
370 | // and erase the original branch. |
371 | assert(Br->isBundledWithSucc()); |
372 | MachineBasicBlock::instr_iterator II = Br.getInstrIterator(); |
373 | MIBundleBuilder(&*MIB).append(MI: (++II)->removeFromBundle()); |
374 | } |
375 | Br->eraseFromParent(); |
376 | } |
377 | |
378 | bool MipsBranchExpansion::buildProperJumpMI(MachineBasicBlock *MBB, |
379 | MachineBasicBlock::iterator Pos, |
380 | DebugLoc DL) { |
381 | bool HasR6 = ABI.IsN64() ? STI->hasMips64r6() : STI->hasMips32r6(); |
382 | bool AddImm = HasR6 && !STI->useIndirectJumpsHazard(); |
383 | |
384 | unsigned JR = ABI.IsN64() ? Mips::JR64 : Mips::JR; |
385 | unsigned JIC = ABI.IsN64() ? Mips::JIC64 : Mips::JIC; |
386 | unsigned JR_HB = ABI.IsN64() ? Mips::JR_HB64 : Mips::JR_HB; |
387 | unsigned JR_HB_R6 = ABI.IsN64() ? Mips::JR_HB64_R6 : Mips::JR_HB_R6; |
388 | |
389 | unsigned JumpOp; |
390 | if (STI->useIndirectJumpsHazard()) |
391 | JumpOp = HasR6 ? JR_HB_R6 : JR_HB; |
392 | else |
393 | JumpOp = HasR6 ? JIC : JR; |
394 | |
395 | if (JumpOp == Mips::JIC && STI->inMicroMipsMode()) |
396 | JumpOp = Mips::JIC_MMR6; |
397 | |
398 | unsigned ATReg = ABI.IsN64() ? Mips::AT_64 : Mips::AT; |
399 | MachineInstrBuilder Instr = |
400 | BuildMI(BB&: *MBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: JumpOp)).addReg(RegNo: ATReg); |
401 | if (AddImm) |
402 | Instr.addImm(Val: 0); |
403 | |
404 | return !AddImm; |
405 | } |
406 | |
407 | // Expand branch instructions to long branches. |
408 | // TODO: This function has to be fixed for beqz16 and bnez16, because it |
409 | // currently assumes that all branches have 16-bit offsets, and will produce |
410 | // wrong code if branches whose allowed offsets are [-128, -126, ..., 126] |
411 | // are present. |
412 | void MipsBranchExpansion::expandToLongBranch(MBBInfo &I) { |
413 | MachineBasicBlock::iterator Pos; |
414 | MachineBasicBlock *MBB = I.Br->getParent(), *TgtMBB = getTargetMBB(Br: *I.Br); |
415 | DebugLoc DL = I.Br->getDebugLoc(); |
416 | const BasicBlock *BB = MBB->getBasicBlock(); |
417 | MachineFunction::iterator FallThroughMBB = ++MachineFunction::iterator(MBB); |
418 | MachineBasicBlock *LongBrMBB = MFp->CreateMachineBasicBlock(BB); |
419 | |
420 | MFp->insert(MBBI: FallThroughMBB, MBB: LongBrMBB); |
421 | MBB->replaceSuccessor(Old: TgtMBB, New: LongBrMBB); |
422 | |
423 | if (IsPIC) { |
424 | MachineBasicBlock *BalTgtMBB = MFp->CreateMachineBasicBlock(BB); |
425 | MFp->insert(MBBI: FallThroughMBB, MBB: BalTgtMBB); |
426 | LongBrMBB->addSuccessor(Succ: BalTgtMBB); |
427 | BalTgtMBB->addSuccessor(Succ: TgtMBB); |
428 | |
429 | // We must select between the MIPS32r6/MIPS64r6 BALC (which is a normal |
430 | // instruction) and the pre-MIPS32r6/MIPS64r6 definition (which is an |
431 | // pseudo-instruction wrapping BGEZAL). |
432 | const unsigned BalOp = |
433 | STI->hasMips32r6() |
434 | ? STI->inMicroMipsMode() ? Mips::BALC_MMR6 : Mips::BALC |
435 | : STI->inMicroMipsMode() ? Mips::BAL_BR_MM : Mips::BAL_BR; |
436 | |
437 | if (!ABI.IsN64()) { |
438 | // Pre R6: |
439 | // $longbr: |
440 | // addiu $sp, $sp, -8 |
441 | // sw $ra, 0($sp) |
442 | // lui $at, %hi($tgt - $baltgt) |
443 | // bal $baltgt |
444 | // addiu $at, $at, %lo($tgt - $baltgt) |
445 | // $baltgt: |
446 | // addu $at, $ra, $at |
447 | // lw $ra, 0($sp) |
448 | // jr $at |
449 | // addiu $sp, $sp, 8 |
450 | // $fallthrough: |
451 | // |
452 | |
453 | // R6: |
454 | // $longbr: |
455 | // addiu $sp, $sp, -8 |
456 | // sw $ra, 0($sp) |
457 | // lui $at, %hi($tgt - $baltgt) |
458 | // addiu $at, $at, %lo($tgt - $baltgt) |
459 | // balc $baltgt |
460 | // $baltgt: |
461 | // addu $at, $ra, $at |
462 | // lw $ra, 0($sp) |
463 | // addiu $sp, $sp, 8 |
464 | // jic $at, 0 |
465 | // $fallthrough: |
466 | |
467 | Pos = LongBrMBB->begin(); |
468 | |
469 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: Mips::SP) |
470 | .addReg(RegNo: Mips::SP) |
471 | .addImm(Val: -8); |
472 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::SW)) |
473 | .addReg(RegNo: Mips::RA) |
474 | .addReg(RegNo: Mips::SP) |
475 | .addImm(Val: 0); |
476 | |
477 | // LUi and ADDiu instructions create 32-bit offset of the target basic |
478 | // block from the target of BAL(C) instruction. We cannot use immediate |
479 | // value for this offset because it cannot be determined accurately when |
480 | // the program has inline assembly statements. We therefore use the |
481 | // relocation expressions %hi($tgt-$baltgt) and %lo($tgt-$baltgt) which |
482 | // are resolved during the fixup, so the values will always be correct. |
483 | // |
484 | // Since we cannot create %hi($tgt-$baltgt) and %lo($tgt-$baltgt) |
485 | // expressions at this point (it is possible only at the MC layer), |
486 | // we replace LUi and ADDiu with pseudo instructions |
487 | // LONG_BRANCH_LUi and LONG_BRANCH_ADDiu, and add both basic |
488 | // blocks as operands to these instructions. When lowering these pseudo |
489 | // instructions to LUi and ADDiu in the MC layer, we will create |
490 | // %hi($tgt-$baltgt) and %lo($tgt-$baltgt) expressions and add them as |
491 | // operands to lowered instructions. |
492 | |
493 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::LONG_BRANCH_LUi), DestReg: Mips::AT) |
494 | .addMBB(MBB: TgtMBB, TargetFlags: MipsII::MO_ABS_HI) |
495 | .addMBB(MBB: BalTgtMBB); |
496 | |
497 | MachineInstrBuilder BalInstr = |
498 | BuildMI(MF&: *MFp, MIMD: DL, MCID: TII->get(Opcode: BalOp)).addMBB(MBB: BalTgtMBB); |
499 | MachineInstrBuilder ADDiuInstr = |
500 | BuildMI(MF&: *MFp, MIMD: DL, MCID: TII->get(Opcode: Mips::LONG_BRANCH_ADDiu), DestReg: Mips::AT) |
501 | .addReg(RegNo: Mips::AT) |
502 | .addMBB(MBB: TgtMBB, TargetFlags: MipsII::MO_ABS_LO) |
503 | .addMBB(MBB: BalTgtMBB); |
504 | if (STI->hasMips32r6()) { |
505 | LongBrMBB->insert(I: Pos, MI: ADDiuInstr); |
506 | LongBrMBB->insert(I: Pos, MI: BalInstr); |
507 | } else { |
508 | LongBrMBB->insert(I: Pos, MI: BalInstr); |
509 | LongBrMBB->insert(I: Pos, MI: ADDiuInstr); |
510 | LongBrMBB->rbegin()->bundleWithPred(); |
511 | } |
512 | |
513 | Pos = BalTgtMBB->begin(); |
514 | |
515 | BuildMI(BB&: *BalTgtMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::ADDu), DestReg: Mips::AT) |
516 | .addReg(RegNo: Mips::RA) |
517 | .addReg(RegNo: Mips::AT); |
518 | BuildMI(BB&: *BalTgtMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::LW), DestReg: Mips::RA) |
519 | .addReg(RegNo: Mips::SP) |
520 | .addImm(Val: 0); |
521 | if (STI->isTargetNaCl()) |
522 | // Bundle-align the target of indirect branch JR. |
523 | TgtMBB->setAlignment(MIPS_NACL_BUNDLE_ALIGN); |
524 | |
525 | // In NaCl, modifying the sp is not allowed in branch delay slot. |
526 | // For MIPS32R6, we can skip using a delay slot branch. |
527 | bool hasDelaySlot = buildProperJumpMI(MBB: BalTgtMBB, Pos, DL); |
528 | |
529 | if (STI->isTargetNaCl() || !hasDelaySlot) { |
530 | BuildMI(BB&: *BalTgtMBB, I: std::prev(x: Pos), MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: Mips::SP) |
531 | .addReg(RegNo: Mips::SP) |
532 | .addImm(Val: 8); |
533 | } |
534 | if (hasDelaySlot) { |
535 | if (STI->isTargetNaCl()) { |
536 | TII->insertNop(MBB&: *BalTgtMBB, MI: Pos, DL); |
537 | } else { |
538 | BuildMI(BB&: *BalTgtMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: Mips::SP) |
539 | .addReg(RegNo: Mips::SP) |
540 | .addImm(Val: 8); |
541 | } |
542 | BalTgtMBB->rbegin()->bundleWithPred(); |
543 | } |
544 | } else { |
545 | // Pre R6: |
546 | // $longbr: |
547 | // daddiu $sp, $sp, -16 |
548 | // sd $ra, 0($sp) |
549 | // daddiu $at, $zero, %hi($tgt - $baltgt) |
550 | // dsll $at, $at, 16 |
551 | // bal $baltgt |
552 | // daddiu $at, $at, %lo($tgt - $baltgt) |
553 | // $baltgt: |
554 | // daddu $at, $ra, $at |
555 | // ld $ra, 0($sp) |
556 | // jr64 $at |
557 | // daddiu $sp, $sp, 16 |
558 | // $fallthrough: |
559 | |
560 | // R6: |
561 | // $longbr: |
562 | // daddiu $sp, $sp, -16 |
563 | // sd $ra, 0($sp) |
564 | // daddiu $at, $zero, %hi($tgt - $baltgt) |
565 | // dsll $at, $at, 16 |
566 | // daddiu $at, $at, %lo($tgt - $baltgt) |
567 | // balc $baltgt |
568 | // $baltgt: |
569 | // daddu $at, $ra, $at |
570 | // ld $ra, 0($sp) |
571 | // daddiu $sp, $sp, 16 |
572 | // jic $at, 0 |
573 | // $fallthrough: |
574 | |
575 | // We assume the branch is within-function, and that offset is within |
576 | // +/- 2GB. High 32 bits will therefore always be zero. |
577 | |
578 | // Note that this will work even if the offset is negative, because |
579 | // of the +1 modification that's added in that case. For example, if the |
580 | // offset is -1MB (0xFFFFFFFFFFF00000), the computation for %higher is |
581 | // |
582 | // 0xFFFFFFFFFFF00000 + 0x80008000 = 0x000000007FF08000 |
583 | // |
584 | // and the bits [47:32] are zero. For %highest |
585 | // |
586 | // 0xFFFFFFFFFFF00000 + 0x800080008000 = 0x000080007FF08000 |
587 | // |
588 | // and the bits [63:48] are zero. |
589 | |
590 | Pos = LongBrMBB->begin(); |
591 | |
592 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::DADDiu), DestReg: Mips::SP_64) |
593 | .addReg(RegNo: Mips::SP_64) |
594 | .addImm(Val: -16); |
595 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::SD)) |
596 | .addReg(RegNo: Mips::RA_64) |
597 | .addReg(RegNo: Mips::SP_64) |
598 | .addImm(Val: 0); |
599 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::LONG_BRANCH_DADDiu), |
600 | DestReg: Mips::AT_64) |
601 | .addReg(RegNo: Mips::ZERO_64) |
602 | .addMBB(MBB: TgtMBB, TargetFlags: MipsII::MO_ABS_HI) |
603 | .addMBB(MBB: BalTgtMBB); |
604 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::DSLL), DestReg: Mips::AT_64) |
605 | .addReg(RegNo: Mips::AT_64) |
606 | .addImm(Val: 16); |
607 | |
608 | MachineInstrBuilder BalInstr = |
609 | BuildMI(MF&: *MFp, MIMD: DL, MCID: TII->get(Opcode: BalOp)).addMBB(MBB: BalTgtMBB); |
610 | MachineInstrBuilder DADDiuInstr = |
611 | BuildMI(MF&: *MFp, MIMD: DL, MCID: TII->get(Opcode: Mips::LONG_BRANCH_DADDiu), DestReg: Mips::AT_64) |
612 | .addReg(RegNo: Mips::AT_64) |
613 | .addMBB(MBB: TgtMBB, TargetFlags: MipsII::MO_ABS_LO) |
614 | .addMBB(MBB: BalTgtMBB); |
615 | if (STI->hasMips32r6()) { |
616 | LongBrMBB->insert(I: Pos, MI: DADDiuInstr); |
617 | LongBrMBB->insert(I: Pos, MI: BalInstr); |
618 | } else { |
619 | LongBrMBB->insert(I: Pos, MI: BalInstr); |
620 | LongBrMBB->insert(I: Pos, MI: DADDiuInstr); |
621 | LongBrMBB->rbegin()->bundleWithPred(); |
622 | } |
623 | |
624 | Pos = BalTgtMBB->begin(); |
625 | |
626 | BuildMI(BB&: *BalTgtMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::DADDu), DestReg: Mips::AT_64) |
627 | .addReg(RegNo: Mips::RA_64) |
628 | .addReg(RegNo: Mips::AT_64); |
629 | BuildMI(BB&: *BalTgtMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::LD), DestReg: Mips::RA_64) |
630 | .addReg(RegNo: Mips::SP_64) |
631 | .addImm(Val: 0); |
632 | |
633 | bool hasDelaySlot = buildProperJumpMI(MBB: BalTgtMBB, Pos, DL); |
634 | // If there is no delay slot, Insert stack adjustment before |
635 | if (!hasDelaySlot) { |
636 | BuildMI(BB&: *BalTgtMBB, I: std::prev(x: Pos), MIMD: DL, MCID: TII->get(Opcode: Mips::DADDiu), |
637 | DestReg: Mips::SP_64) |
638 | .addReg(RegNo: Mips::SP_64) |
639 | .addImm(Val: 16); |
640 | } else { |
641 | BuildMI(BB&: *BalTgtMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::DADDiu), DestReg: Mips::SP_64) |
642 | .addReg(RegNo: Mips::SP_64) |
643 | .addImm(Val: 16); |
644 | BalTgtMBB->rbegin()->bundleWithPred(); |
645 | } |
646 | } |
647 | } else { // Not PIC |
648 | Pos = LongBrMBB->begin(); |
649 | LongBrMBB->addSuccessor(Succ: TgtMBB); |
650 | |
651 | // Compute the position of the potentiall jump instruction (basic blocks |
652 | // before + 4 for the instruction) |
653 | uint64_t JOffset = computeOffsetFromTheBeginning(MBB: MBB->getNumber()) + |
654 | MBBInfos[MBB->getNumber()].Size + 4; |
655 | uint64_t TgtMBBOffset = computeOffsetFromTheBeginning(MBB: TgtMBB->getNumber()); |
656 | // If it's a forward jump, then TgtMBBOffset will be shifted by two |
657 | // instructions |
658 | if (JOffset < TgtMBBOffset) |
659 | TgtMBBOffset += 2 * 4; |
660 | // Compare 4 upper bits to check if it's the same segment |
661 | bool SameSegmentJump = JOffset >> 28 == TgtMBBOffset >> 28; |
662 | |
663 | if (STI->hasMips32r6() && TII->isBranchOffsetInRange(BranchOpc: Mips::BC, BrOffset: I.Offset)) { |
664 | // R6: |
665 | // $longbr: |
666 | // bc $tgt |
667 | // $fallthrough: |
668 | // |
669 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, |
670 | MCID: TII->get(Opcode: STI->inMicroMipsMode() ? Mips::BC_MMR6 : Mips::BC)) |
671 | .addMBB(MBB: TgtMBB); |
672 | } else if (SameSegmentJump) { |
673 | // Pre R6: |
674 | // $longbr: |
675 | // j $tgt |
676 | // nop |
677 | // $fallthrough: |
678 | // |
679 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::J)).addMBB(MBB: TgtMBB); |
680 | TII->insertNop(MBB&: *LongBrMBB, MI: Pos, DL)->bundleWithPred(); |
681 | } else { |
682 | // At this point, offset where we need to branch does not fit into |
683 | // immediate field of the branch instruction and is not in the same |
684 | // segment as jump instruction. Therefore we will break it into couple |
685 | // instructions, where we first load the offset into register, and then we |
686 | // do branch register. |
687 | if (ABI.IsN64()) { |
688 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::LONG_BRANCH_LUi2Op_64), |
689 | DestReg: Mips::AT_64) |
690 | .addMBB(MBB: TgtMBB, TargetFlags: MipsII::MO_HIGHEST); |
691 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::LONG_BRANCH_DADDiu2Op), |
692 | DestReg: Mips::AT_64) |
693 | .addReg(RegNo: Mips::AT_64) |
694 | .addMBB(MBB: TgtMBB, TargetFlags: MipsII::MO_HIGHER); |
695 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::DSLL), DestReg: Mips::AT_64) |
696 | .addReg(RegNo: Mips::AT_64) |
697 | .addImm(Val: 16); |
698 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::LONG_BRANCH_DADDiu2Op), |
699 | DestReg: Mips::AT_64) |
700 | .addReg(RegNo: Mips::AT_64) |
701 | .addMBB(MBB: TgtMBB, TargetFlags: MipsII::MO_ABS_HI); |
702 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::DSLL), DestReg: Mips::AT_64) |
703 | .addReg(RegNo: Mips::AT_64) |
704 | .addImm(Val: 16); |
705 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::LONG_BRANCH_DADDiu2Op), |
706 | DestReg: Mips::AT_64) |
707 | .addReg(RegNo: Mips::AT_64) |
708 | .addMBB(MBB: TgtMBB, TargetFlags: MipsII::MO_ABS_LO); |
709 | } else { |
710 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::LONG_BRANCH_LUi2Op), |
711 | DestReg: Mips::AT) |
712 | .addMBB(MBB: TgtMBB, TargetFlags: MipsII::MO_ABS_HI); |
713 | BuildMI(BB&: *LongBrMBB, I: Pos, MIMD: DL, MCID: TII->get(Opcode: Mips::LONG_BRANCH_ADDiu2Op), |
714 | DestReg: Mips::AT) |
715 | .addReg(RegNo: Mips::AT) |
716 | .addMBB(MBB: TgtMBB, TargetFlags: MipsII::MO_ABS_LO); |
717 | } |
718 | buildProperJumpMI(MBB: LongBrMBB, Pos, DL); |
719 | } |
720 | } |
721 | |
722 | if (I.Br->isUnconditionalBranch()) { |
723 | // Change branch destination. |
724 | assert(I.Br->getDesc().getNumOperands() == 1); |
725 | I.Br->removeOperand(OpNo: 0); |
726 | I.Br->addOperand(Op: MachineOperand::CreateMBB(MBB: LongBrMBB)); |
727 | } else |
728 | // Change branch destination and reverse condition. |
729 | replaceBranch(MBB&: *MBB, Br: I.Br, DL, MBBOpnd: &*FallThroughMBB); |
730 | } |
731 | |
732 | static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) { |
733 | MachineBasicBlock &MBB = F.front(); |
734 | MachineBasicBlock::iterator I = MBB.begin(); |
735 | DebugLoc DL = MBB.findDebugLoc(MBBI: MBB.begin()); |
736 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Mips::LUi), DestReg: Mips::V0) |
737 | .addExternalSymbol(FnName: "_gp_disp" , TargetFlags: MipsII::MO_ABS_HI); |
738 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Mips::ADDiu), DestReg: Mips::V0) |
739 | .addReg(RegNo: Mips::V0) |
740 | .addExternalSymbol(FnName: "_gp_disp" , TargetFlags: MipsII::MO_ABS_LO); |
741 | MBB.removeLiveIn(Reg: Mips::V0); |
742 | } |
743 | |
744 | template <typename Pred, typename Safe> |
745 | bool MipsBranchExpansion::handleSlot(Pred Predicate, Safe SafeInSlot) { |
746 | bool Changed = false; |
747 | |
748 | for (MachineFunction::iterator FI = MFp->begin(); FI != MFp->end(); ++FI) { |
749 | for (Iter I = FI->begin(); I != FI->end(); ++I) { |
750 | |
751 | // Delay slot hazard handling. Use lookahead over state. |
752 | if (!Predicate(*I)) |
753 | continue; |
754 | |
755 | Iter IInSlot; |
756 | bool LastInstInFunction = |
757 | std::next(x: I) == FI->end() && std::next(x: FI) == MFp->end(); |
758 | if (!LastInstInFunction) { |
759 | std::pair<Iter, bool> Res = getNextMachineInstr(Position: std::next(x: I), Parent: &*FI); |
760 | LastInstInFunction |= Res.second; |
761 | IInSlot = Res.first; |
762 | } |
763 | |
764 | if (LastInstInFunction || !SafeInSlot(*IInSlot, *I)) { |
765 | MachineBasicBlock::instr_iterator Iit = I->getIterator(); |
766 | if (std::next(x: Iit) == FI->end() || |
767 | std::next(x: Iit)->getOpcode() != Mips::NOP) { |
768 | Changed = true; |
769 | TII->insertNop(MBB&: *(I->getParent()), MI: std::next(x: I), DL: I->getDebugLoc()) |
770 | ->bundleWithPred(); |
771 | NumInsertedNops++; |
772 | } |
773 | } |
774 | } |
775 | } |
776 | |
777 | return Changed; |
778 | } |
779 | |
780 | bool MipsBranchExpansion::handleForbiddenSlot() { |
781 | // Forbidden slot hazards are only defined for MIPSR6 but not microMIPSR6. |
782 | if (!STI->hasMips32r6() || STI->inMicroMipsMode()) |
783 | return false; |
784 | |
785 | return handleSlot( |
786 | Predicate: [this](auto &I) -> bool { return TII->HasForbiddenSlot(MI: I); }, |
787 | SafeInSlot: [this](auto &IInSlot, auto &I) -> bool { |
788 | return TII->SafeInForbiddenSlot(MI: IInSlot); |
789 | }); |
790 | } |
791 | |
792 | bool MipsBranchExpansion::handleFPUDelaySlot() { |
793 | // FPU delay slots are only defined for MIPS3 and below. |
794 | if (STI->hasMips32() || STI->hasMips4()) |
795 | return false; |
796 | |
797 | return handleSlot(Predicate: [this](auto &I) -> bool { return TII->HasFPUDelaySlot(MI: I); }, |
798 | SafeInSlot: [this](auto &IInSlot, auto &I) -> bool { |
799 | return TII->SafeInFPUDelaySlot(MIInSlot: IInSlot, FPUMI: I); |
800 | }); |
801 | } |
802 | |
803 | bool MipsBranchExpansion::handleLoadDelaySlot() { |
804 | // Load delay slot hazards are only for MIPS1. |
805 | if (STI->hasMips2()) |
806 | return false; |
807 | |
808 | return handleSlot( |
809 | Predicate: [this](auto &I) -> bool { return TII->HasLoadDelaySlot(MI: I); }, |
810 | SafeInSlot: [this](auto &IInSlot, auto &I) -> bool { |
811 | return TII->SafeInLoadDelaySlot(MIInSlot: IInSlot, LoadMI: I); |
812 | }); |
813 | } |
814 | |
815 | bool MipsBranchExpansion::handlePossibleLongBranch() { |
816 | if (STI->inMips16Mode() || !STI->enableLongBranchPass()) |
817 | return false; |
818 | |
819 | if (SkipLongBranch) |
820 | return false; |
821 | |
822 | bool EverMadeChange = false, MadeChange = true; |
823 | |
824 | while (MadeChange) { |
825 | MadeChange = false; |
826 | |
827 | initMBBInfo(); |
828 | |
829 | for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) { |
830 | MachineBasicBlock *MBB = MFp->getBlockNumbered(N: I); |
831 | // Search for MBB's branch instruction. |
832 | ReverseIter End = MBB->rend(); |
833 | ReverseIter Br = getNonDebugInstr(B: MBB->rbegin(), E: End); |
834 | |
835 | if ((Br != End) && Br->isBranch() && !Br->isIndirectBranch() && |
836 | (Br->isConditionalBranch() || |
837 | (Br->isUnconditionalBranch() && IsPIC))) { |
838 | int64_t Offset = computeOffset(Br: &*Br); |
839 | |
840 | if (STI->isTargetNaCl()) { |
841 | // The offset calculation does not include sandboxing instructions |
842 | // that will be added later in the MC layer. Since at this point we |
843 | // don't know the exact amount of code that "sandboxing" will add, we |
844 | // conservatively estimate that code will not grow more than 100%. |
845 | Offset *= 2; |
846 | } |
847 | |
848 | if (ForceLongBranchFirstPass || |
849 | !TII->isBranchOffsetInRange(BranchOpc: Br->getOpcode(), BrOffset: Offset)) { |
850 | MBBInfos[I].Offset = Offset; |
851 | MBBInfos[I].Br = &*Br; |
852 | } |
853 | } |
854 | } // End for |
855 | |
856 | ForceLongBranchFirstPass = false; |
857 | |
858 | SmallVectorImpl<MBBInfo>::iterator I, E = MBBInfos.end(); |
859 | |
860 | for (I = MBBInfos.begin(); I != E; ++I) { |
861 | // Skip if this MBB doesn't have a branch or the branch has already been |
862 | // converted to a long branch. |
863 | if (!I->Br) |
864 | continue; |
865 | |
866 | expandToLongBranch(I&: *I); |
867 | ++LongBranches; |
868 | EverMadeChange = MadeChange = true; |
869 | } |
870 | |
871 | MFp->RenumberBlocks(); |
872 | } |
873 | |
874 | return EverMadeChange; |
875 | } |
876 | |
877 | bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) { |
878 | const TargetMachine &TM = MF.getTarget(); |
879 | IsPIC = TM.isPositionIndependent(); |
880 | ABI = static_cast<const MipsTargetMachine &>(TM).getABI(); |
881 | STI = &MF.getSubtarget<MipsSubtarget>(); |
882 | TII = static_cast<const MipsInstrInfo *>(STI->getInstrInfo()); |
883 | |
884 | if (IsPIC && ABI.IsO32() && |
885 | MF.getInfo<MipsFunctionInfo>()->globalBaseRegSet()) |
886 | emitGPDisp(F&: MF, TII); |
887 | |
888 | MFp = &MF; |
889 | |
890 | ForceLongBranchFirstPass = ForceLongBranch; |
891 | // Run these at least once. |
892 | bool longBranchChanged = handlePossibleLongBranch(); |
893 | bool forbiddenSlotChanged = handleForbiddenSlot(); |
894 | bool fpuDelaySlotChanged = handleFPUDelaySlot(); |
895 | bool loadDelaySlotChanged = handleLoadDelaySlot(); |
896 | |
897 | bool Changed = longBranchChanged || forbiddenSlotChanged || |
898 | fpuDelaySlotChanged || loadDelaySlotChanged; |
899 | |
900 | // Then run them alternatively while there are changes. |
901 | while (forbiddenSlotChanged) { |
902 | longBranchChanged = handlePossibleLongBranch(); |
903 | fpuDelaySlotChanged = handleFPUDelaySlot(); |
904 | loadDelaySlotChanged = handleLoadDelaySlot(); |
905 | if (!longBranchChanged && !fpuDelaySlotChanged && !loadDelaySlotChanged) |
906 | break; |
907 | forbiddenSlotChanged = handleForbiddenSlot(); |
908 | } |
909 | |
910 | return Changed; |
911 | } |
912 | |