1 | //===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This is a simple local pass that attempts to fill delay slots with useful |
10 | // instructions. If no instructions can be moved into the delay slot, then a |
11 | // NOP is placed. |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "Sparc.h" |
15 | #include "SparcSubtarget.h" |
16 | #include "llvm/ADT/SmallSet.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/CodeGen/MachineFunctionPass.h" |
19 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
20 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
21 | #include "llvm/CodeGen/TargetInstrInfo.h" |
22 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
23 | #include "llvm/Support/CommandLine.h" |
24 | |
25 | using namespace llvm; |
26 | |
27 | #define DEBUG_TYPE "delay-slot-filler" |
28 | |
29 | STATISTIC(FilledSlots, "Number of delay slots filled" ); |
30 | |
31 | static cl::opt<bool> DisableDelaySlotFiller( |
32 | "disable-sparc-delay-filler" , |
33 | cl::init(Val: false), |
34 | cl::desc("Disable the Sparc delay slot filler." ), |
35 | cl::Hidden); |
36 | |
37 | namespace { |
38 | struct Filler : public MachineFunctionPass { |
39 | const SparcSubtarget *Subtarget = nullptr; |
40 | |
41 | static char ID; |
42 | Filler() : MachineFunctionPass(ID) {} |
43 | |
44 | StringRef getPassName() const override { return "SPARC Delay Slot Filler" ; } |
45 | |
46 | bool runOnMachineBasicBlock(MachineBasicBlock &MBB); |
47 | bool runOnMachineFunction(MachineFunction &F) override { |
48 | bool Changed = false; |
49 | Subtarget = &F.getSubtarget<SparcSubtarget>(); |
50 | |
51 | // This pass invalidates liveness information when it reorders |
52 | // instructions to fill delay slot. |
53 | F.getRegInfo().invalidateLiveness(); |
54 | |
55 | for (MachineBasicBlock &MBB : F) |
56 | Changed |= runOnMachineBasicBlock(MBB); |
57 | return Changed; |
58 | } |
59 | |
60 | MachineFunctionProperties getRequiredProperties() const override { |
61 | return MachineFunctionProperties().setNoVRegs(); |
62 | } |
63 | |
64 | void insertCallDefsUses(MachineBasicBlock::iterator MI, |
65 | SmallSet<unsigned, 32>& RegDefs, |
66 | SmallSet<unsigned, 32>& RegUses); |
67 | |
68 | void insertDefsUses(MachineBasicBlock::iterator MI, |
69 | SmallSet<unsigned, 32>& RegDefs, |
70 | SmallSet<unsigned, 32>& RegUses); |
71 | |
72 | bool IsRegInSet(SmallSet<unsigned, 32>& RegSet, |
73 | unsigned Reg); |
74 | |
75 | bool delayHasHazard(MachineBasicBlock::iterator candidate, |
76 | bool &sawLoad, bool &sawStore, |
77 | SmallSet<unsigned, 32> &RegDefs, |
78 | SmallSet<unsigned, 32> &RegUses); |
79 | |
80 | MachineBasicBlock::iterator |
81 | findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot); |
82 | |
83 | bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize); |
84 | |
85 | bool tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB, |
86 | MachineBasicBlock::iterator MBBI); |
87 | |
88 | }; |
89 | char Filler::ID = 0; |
90 | } // end of anonymous namespace |
91 | |
92 | /// createSparcDelaySlotFillerPass - Returns a pass that fills in delay |
93 | /// slots in Sparc MachineFunctions |
94 | /// |
95 | FunctionPass *llvm::createSparcDelaySlotFillerPass() { |
96 | return new Filler; |
97 | } |
98 | |
99 | |
100 | /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. |
101 | /// We assume there is only one delay slot per delayed instruction. |
102 | /// |
103 | bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { |
104 | bool Changed = false; |
105 | Subtarget = &MBB.getParent()->getSubtarget<SparcSubtarget>(); |
106 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
107 | |
108 | for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { |
109 | MachineBasicBlock::iterator MI = I; |
110 | ++I; |
111 | |
112 | // If MI is restore, try combining it with previous inst. |
113 | if (!DisableDelaySlotFiller && |
114 | (MI->getOpcode() == SP::RESTORErr |
115 | || MI->getOpcode() == SP::RESTOREri)) { |
116 | Changed |= tryCombineRestoreWithPrevInst(MBB, MBBI: MI); |
117 | continue; |
118 | } |
119 | |
120 | // TODO: If we ever want to support v7, this needs to be extended |
121 | // to cover all floating point operations. |
122 | if (!Subtarget->isV9() && |
123 | (MI->getOpcode() == SP::FCMPS || MI->getOpcode() == SP::FCMPD |
124 | || MI->getOpcode() == SP::FCMPQ)) { |
125 | BuildMI(BB&: MBB, I, MIMD: MI->getDebugLoc(), MCID: TII->get(Opcode: SP::NOP)); |
126 | Changed = true; |
127 | continue; |
128 | } |
129 | |
130 | // If MI has no delay slot, skip. |
131 | if (!MI->hasDelaySlot()) |
132 | continue; |
133 | |
134 | MachineBasicBlock::iterator D = MBB.end(); |
135 | |
136 | if (!DisableDelaySlotFiller) |
137 | D = findDelayInstr(MBB, slot: MI); |
138 | |
139 | ++FilledSlots; |
140 | Changed = true; |
141 | |
142 | if (D == MBB.end()) |
143 | BuildMI(BB&: MBB, I, MIMD: MI->getDebugLoc(), MCID: TII->get(Opcode: SP::NOP)); |
144 | else |
145 | MBB.splice(Where: I, Other: &MBB, From: D); |
146 | |
147 | unsigned structSize = 0; |
148 | if (needsUnimp(I: MI, StructSize&: structSize)) { |
149 | MachineBasicBlock::iterator J = MI; |
150 | ++J; // skip the delay filler. |
151 | assert (J != MBB.end() && "MI needs a delay instruction." ); |
152 | BuildMI(BB&: MBB, I: ++J, MIMD: MI->getDebugLoc(), |
153 | MCID: TII->get(Opcode: SP::UNIMP)).addImm(Val: structSize); |
154 | // Bundle the delay filler and unimp with the instruction. |
155 | MIBundleBuilder(MBB, MachineBasicBlock::iterator(MI), J); |
156 | } else { |
157 | MIBundleBuilder(MBB, MachineBasicBlock::iterator(MI), I); |
158 | } |
159 | } |
160 | return Changed; |
161 | } |
162 | |
163 | MachineBasicBlock::iterator |
164 | Filler::findDelayInstr(MachineBasicBlock &MBB, |
165 | MachineBasicBlock::iterator slot) |
166 | { |
167 | SmallSet<unsigned, 32> RegDefs; |
168 | SmallSet<unsigned, 32> RegUses; |
169 | bool sawLoad = false; |
170 | bool sawStore = false; |
171 | |
172 | if (slot == MBB.begin()) |
173 | return MBB.end(); |
174 | |
175 | unsigned Opc = slot->getOpcode(); |
176 | |
177 | if (Opc == SP::RET || Opc == SP::TLS_CALL) |
178 | return MBB.end(); |
179 | |
180 | if (Opc == SP::RETL || Opc == SP::TAIL_CALL || Opc == SP::TAIL_CALLri) { |
181 | MachineBasicBlock::iterator J = slot; |
182 | --J; |
183 | |
184 | if (J->getOpcode() == SP::RESTORErr |
185 | || J->getOpcode() == SP::RESTOREri) { |
186 | // change retl to ret. |
187 | if (Opc == SP::RETL) |
188 | slot->setDesc(Subtarget->getInstrInfo()->get(Opcode: SP::RET)); |
189 | return J; |
190 | } |
191 | } |
192 | |
193 | // Call's delay filler can def some of call's uses. |
194 | if (slot->isCall()) |
195 | insertCallDefsUses(MI: slot, RegDefs, RegUses); |
196 | else |
197 | insertDefsUses(MI: slot, RegDefs, RegUses); |
198 | |
199 | bool done = false; |
200 | |
201 | MachineBasicBlock::iterator I = slot; |
202 | |
203 | while (!done) { |
204 | done = (I == MBB.begin()); |
205 | |
206 | if (!done) |
207 | --I; |
208 | |
209 | // skip debug instruction |
210 | if (I->isDebugInstr()) |
211 | continue; |
212 | |
213 | if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isPosition() || |
214 | I->hasDelaySlot() || I->isBundledWithSucc()) |
215 | break; |
216 | |
217 | if (delayHasHazard(candidate: I, sawLoad, sawStore, RegDefs, RegUses)) { |
218 | insertDefsUses(MI: I, RegDefs, RegUses); |
219 | continue; |
220 | } |
221 | |
222 | return I; |
223 | } |
224 | return MBB.end(); |
225 | } |
226 | |
227 | bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, |
228 | bool &sawLoad, |
229 | bool &sawStore, |
230 | SmallSet<unsigned, 32> &RegDefs, |
231 | SmallSet<unsigned, 32> &RegUses) |
232 | { |
233 | |
234 | if (candidate->isImplicitDef() || candidate->isKill()) |
235 | return true; |
236 | |
237 | if (candidate->mayLoad()) { |
238 | sawLoad = true; |
239 | if (sawStore) |
240 | return true; |
241 | } |
242 | |
243 | if (candidate->mayStore()) { |
244 | if (sawStore) |
245 | return true; |
246 | sawStore = true; |
247 | if (sawLoad) |
248 | return true; |
249 | } |
250 | |
251 | for (const MachineOperand &MO : candidate->operands()) { |
252 | if (!MO.isReg()) |
253 | continue; // skip |
254 | |
255 | Register Reg = MO.getReg(); |
256 | |
257 | if (MO.isDef()) { |
258 | // check whether Reg is defined or used before delay slot. |
259 | if (IsRegInSet(RegSet&: RegDefs, Reg) || IsRegInSet(RegSet&: RegUses, Reg)) |
260 | return true; |
261 | } |
262 | if (MO.isUse()) { |
263 | // check whether Reg is defined before delay slot. |
264 | if (IsRegInSet(RegSet&: RegDefs, Reg)) |
265 | return true; |
266 | } |
267 | } |
268 | |
269 | unsigned Opcode = candidate->getOpcode(); |
270 | // LD and LDD may have NOPs inserted afterwards in the case of some LEON |
271 | // processors, so we can't use the delay slot if this feature is switched-on. |
272 | if (Subtarget->insertNOPLoad() |
273 | && |
274 | Opcode >= SP::LDDArr && Opcode <= SP::LDrr) |
275 | return true; |
276 | |
277 | // Same as above for FDIV and FSQRT on some LEON processors. |
278 | if (Subtarget->fixAllFDIVSQRT() |
279 | && |
280 | Opcode >= SP::FDIVD && Opcode <= SP::FSQRTD) |
281 | return true; |
282 | |
283 | if (Subtarget->fixTN0009() && candidate->mayStore()) |
284 | return true; |
285 | |
286 | if (Subtarget->fixTN0013()) { |
287 | switch (Opcode) { |
288 | case SP::FDIVS: |
289 | case SP::FDIVD: |
290 | case SP::FSQRTS: |
291 | case SP::FSQRTD: |
292 | return true; |
293 | default: |
294 | break; |
295 | } |
296 | } |
297 | |
298 | return false; |
299 | } |
300 | |
301 | |
302 | void Filler::insertCallDefsUses(MachineBasicBlock::iterator MI, |
303 | SmallSet<unsigned, 32>& RegDefs, |
304 | SmallSet<unsigned, 32>& RegUses) |
305 | { |
306 | // Call defines o7, which is visible to the instruction in delay slot. |
307 | RegDefs.insert(V: SP::O7); |
308 | |
309 | switch(MI->getOpcode()) { |
310 | default: llvm_unreachable("Unknown opcode." ); |
311 | case SP::CALL: |
312 | break; |
313 | case SP::CALLrr: |
314 | case SP::CALLri: |
315 | assert(MI->getNumOperands() >= 2); |
316 | const MachineOperand &Reg = MI->getOperand(i: 0); |
317 | assert(Reg.isReg() && "CALL first operand is not a register." ); |
318 | assert(Reg.isUse() && "CALL first operand is not a use." ); |
319 | RegUses.insert(V: Reg.getReg()); |
320 | |
321 | const MachineOperand &Operand1 = MI->getOperand(i: 1); |
322 | if (Operand1.isImm() || Operand1.isGlobal()) |
323 | break; |
324 | assert(Operand1.isReg() && "CALLrr second operand is not a register." ); |
325 | assert(Operand1.isUse() && "CALLrr second operand is not a use." ); |
326 | RegUses.insert(V: Operand1.getReg()); |
327 | break; |
328 | } |
329 | } |
330 | |
331 | // Insert Defs and Uses of MI into the sets RegDefs and RegUses. |
332 | void Filler::insertDefsUses(MachineBasicBlock::iterator MI, |
333 | SmallSet<unsigned, 32>& RegDefs, |
334 | SmallSet<unsigned, 32>& RegUses) |
335 | { |
336 | for (const MachineOperand &MO : MI->operands()) { |
337 | if (!MO.isReg()) |
338 | continue; |
339 | |
340 | Register Reg = MO.getReg(); |
341 | if (Reg == 0) |
342 | continue; |
343 | if (MO.isDef()) |
344 | RegDefs.insert(V: Reg); |
345 | if (MO.isUse()) { |
346 | // Implicit register uses of retl are return values and |
347 | // retl does not use them. |
348 | if (MO.isImplicit() && MI->getOpcode() == SP::RETL) |
349 | continue; |
350 | RegUses.insert(V: Reg); |
351 | } |
352 | } |
353 | } |
354 | |
355 | // returns true if the Reg or its alias is in the RegSet. |
356 | bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) |
357 | { |
358 | // Check Reg and all aliased Registers. |
359 | for (MCRegAliasIterator AI(Reg, Subtarget->getRegisterInfo(), true); |
360 | AI.isValid(); ++AI) |
361 | if (RegSet.count(V: *AI)) |
362 | return true; |
363 | return false; |
364 | } |
365 | |
366 | bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize) |
367 | { |
368 | if (!I->isCall()) |
369 | return false; |
370 | |
371 | unsigned structSizeOpNum = 0; |
372 | switch (I->getOpcode()) { |
373 | default: llvm_unreachable("Unknown call opcode." ); |
374 | case SP::CALL: |
375 | structSizeOpNum = 1; |
376 | break; |
377 | case SP::CALLrr: |
378 | case SP::CALLri: |
379 | structSizeOpNum = 2; |
380 | break; |
381 | case SP::TLS_CALL: return false; |
382 | case SP::TAIL_CALLri: |
383 | case SP::TAIL_CALL: return false; |
384 | } |
385 | |
386 | const MachineOperand &MO = I->getOperand(i: structSizeOpNum); |
387 | if (!MO.isImm()) |
388 | return false; |
389 | StructSize = MO.getImm(); |
390 | return true; |
391 | } |
392 | |
393 | static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI, |
394 | MachineBasicBlock::iterator AddMI, |
395 | const TargetInstrInfo *TII) |
396 | { |
397 | // Before: add <op0>, <op1>, %i[0-7] |
398 | // restore %g0, %g0, %i[0-7] |
399 | // |
400 | // After : restore <op0>, <op1>, %o[0-7] |
401 | |
402 | Register reg = AddMI->getOperand(i: 0).getReg(); |
403 | if (reg < SP::I0 || reg > SP::I7) |
404 | return false; |
405 | |
406 | // Erase RESTORE. |
407 | RestoreMI->eraseFromParent(); |
408 | |
409 | // Change ADD to RESTORE. |
410 | AddMI->setDesc(TII->get(Opcode: (AddMI->getOpcode() == SP::ADDrr) |
411 | ? SP::RESTORErr |
412 | : SP::RESTOREri)); |
413 | |
414 | // Map the destination register. |
415 | AddMI->getOperand(i: 0).setReg(reg - SP::I0 + SP::O0); |
416 | |
417 | return true; |
418 | } |
419 | |
420 | static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI, |
421 | MachineBasicBlock::iterator OrMI, |
422 | const TargetInstrInfo *TII) |
423 | { |
424 | // Before: or <op0>, <op1>, %i[0-7] |
425 | // restore %g0, %g0, %i[0-7] |
426 | // and <op0> or <op1> is zero, |
427 | // |
428 | // After : restore <op0>, <op1>, %o[0-7] |
429 | |
430 | Register reg = OrMI->getOperand(i: 0).getReg(); |
431 | if (reg < SP::I0 || reg > SP::I7) |
432 | return false; |
433 | |
434 | // check whether it is a copy. |
435 | if (OrMI->getOpcode() == SP::ORrr |
436 | && OrMI->getOperand(i: 1).getReg() != SP::G0 |
437 | && OrMI->getOperand(i: 2).getReg() != SP::G0) |
438 | return false; |
439 | |
440 | if (OrMI->getOpcode() == SP::ORri |
441 | && OrMI->getOperand(i: 1).getReg() != SP::G0 |
442 | && (!OrMI->getOperand(i: 2).isImm() || OrMI->getOperand(i: 2).getImm() != 0)) |
443 | return false; |
444 | |
445 | // Erase RESTORE. |
446 | RestoreMI->eraseFromParent(); |
447 | |
448 | // Change OR to RESTORE. |
449 | OrMI->setDesc(TII->get(Opcode: (OrMI->getOpcode() == SP::ORrr) |
450 | ? SP::RESTORErr |
451 | : SP::RESTOREri)); |
452 | |
453 | // Map the destination register. |
454 | OrMI->getOperand(i: 0).setReg(reg - SP::I0 + SP::O0); |
455 | |
456 | return true; |
457 | } |
458 | |
459 | static bool combineRestoreSETHIi(MachineBasicBlock::iterator RestoreMI, |
460 | MachineBasicBlock::iterator SetHiMI, |
461 | const TargetInstrInfo *TII) |
462 | { |
463 | // Before: sethi imm3, %i[0-7] |
464 | // restore %g0, %g0, %g0 |
465 | // |
466 | // After : restore %g0, (imm3<<10), %o[0-7] |
467 | |
468 | Register reg = SetHiMI->getOperand(i: 0).getReg(); |
469 | if (reg < SP::I0 || reg > SP::I7) |
470 | return false; |
471 | |
472 | if (!SetHiMI->getOperand(i: 1).isImm()) |
473 | return false; |
474 | |
475 | int64_t imm = SetHiMI->getOperand(i: 1).getImm(); |
476 | |
477 | // Is it a 3 bit immediate? |
478 | if (!isInt<3>(x: imm)) |
479 | return false; |
480 | |
481 | // Make it a 13 bit immediate. |
482 | imm = (imm << 10) & 0x1FFF; |
483 | |
484 | assert(RestoreMI->getOpcode() == SP::RESTORErr); |
485 | |
486 | RestoreMI->setDesc(TII->get(Opcode: SP::RESTOREri)); |
487 | |
488 | RestoreMI->getOperand(i: 0).setReg(reg - SP::I0 + SP::O0); |
489 | RestoreMI->getOperand(i: 1).setReg(SP::G0); |
490 | RestoreMI->getOperand(i: 2).ChangeToImmediate(ImmVal: imm); |
491 | |
492 | |
493 | // Erase the original SETHI. |
494 | SetHiMI->eraseFromParent(); |
495 | |
496 | return true; |
497 | } |
498 | |
499 | bool Filler::tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB, |
500 | MachineBasicBlock::iterator MBBI) |
501 | { |
502 | // No previous instruction. |
503 | if (MBBI == MBB.begin()) |
504 | return false; |
505 | |
506 | // assert that MBBI is a "restore %g0, %g0, %g0". |
507 | assert(MBBI->getOpcode() == SP::RESTORErr |
508 | && MBBI->getOperand(0).getReg() == SP::G0 |
509 | && MBBI->getOperand(1).getReg() == SP::G0 |
510 | && MBBI->getOperand(2).getReg() == SP::G0); |
511 | |
512 | MachineBasicBlock::iterator PrevInst = std::prev(x: MBBI); |
513 | |
514 | // It cannot be combined with a bundled instruction. |
515 | if (PrevInst->isBundledWithSucc()) |
516 | return false; |
517 | |
518 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
519 | |
520 | switch (PrevInst->getOpcode()) { |
521 | default: break; |
522 | case SP::ADDrr: |
523 | case SP::ADDri: return combineRestoreADD(RestoreMI: MBBI, AddMI: PrevInst, TII); break; |
524 | case SP::ORrr: |
525 | case SP::ORri: return combineRestoreOR(RestoreMI: MBBI, OrMI: PrevInst, TII); break; |
526 | case SP::SETHIi: return combineRestoreSETHIi(RestoreMI: MBBI, SetHiMI: PrevInst, TII); break; |
527 | } |
528 | // It cannot combine with the previous instruction. |
529 | return false; |
530 | } |
531 | |