1 | //===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This is a simple local pass that attempts to fill delay slots with useful |
10 | // instructions. If no instructions can be moved into the delay slot, then a |
11 | // NOP is placed. |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "Sparc.h" |
15 | #include "SparcSubtarget.h" |
16 | #include "llvm/ADT/SmallSet.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/CodeGen/MachineFunctionPass.h" |
19 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
20 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
21 | #include "llvm/CodeGen/TargetInstrInfo.h" |
22 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
23 | #include "llvm/Support/CommandLine.h" |
24 | #include "llvm/Target/TargetMachine.h" |
25 | |
26 | using namespace llvm; |
27 | |
28 | #define DEBUG_TYPE "delay-slot-filler" |
29 | |
30 | STATISTIC(FilledSlots, "Number of delay slots filled" ); |
31 | |
32 | static cl::opt<bool> DisableDelaySlotFiller( |
33 | "disable-sparc-delay-filler" , |
34 | cl::init(Val: false), |
35 | cl::desc("Disable the Sparc delay slot filler." ), |
36 | cl::Hidden); |
37 | |
38 | namespace { |
39 | struct Filler : public MachineFunctionPass { |
40 | const SparcSubtarget *Subtarget = nullptr; |
41 | |
42 | static char ID; |
43 | Filler() : MachineFunctionPass(ID) {} |
44 | |
45 | StringRef getPassName() const override { return "SPARC Delay Slot Filler" ; } |
46 | |
47 | bool runOnMachineBasicBlock(MachineBasicBlock &MBB); |
48 | bool runOnMachineFunction(MachineFunction &F) override { |
49 | bool Changed = false; |
50 | Subtarget = &F.getSubtarget<SparcSubtarget>(); |
51 | |
52 | // This pass invalidates liveness information when it reorders |
53 | // instructions to fill delay slot. |
54 | F.getRegInfo().invalidateLiveness(); |
55 | |
56 | for (MachineBasicBlock &MBB : F) |
57 | Changed |= runOnMachineBasicBlock(MBB); |
58 | return Changed; |
59 | } |
60 | |
61 | MachineFunctionProperties getRequiredProperties() const override { |
62 | return MachineFunctionProperties().set( |
63 | MachineFunctionProperties::Property::NoVRegs); |
64 | } |
65 | |
66 | void insertCallDefsUses(MachineBasicBlock::iterator MI, |
67 | SmallSet<unsigned, 32>& RegDefs, |
68 | SmallSet<unsigned, 32>& RegUses); |
69 | |
70 | void insertDefsUses(MachineBasicBlock::iterator MI, |
71 | SmallSet<unsigned, 32>& RegDefs, |
72 | SmallSet<unsigned, 32>& RegUses); |
73 | |
74 | bool IsRegInSet(SmallSet<unsigned, 32>& RegSet, |
75 | unsigned Reg); |
76 | |
77 | bool delayHasHazard(MachineBasicBlock::iterator candidate, |
78 | bool &sawLoad, bool &sawStore, |
79 | SmallSet<unsigned, 32> &RegDefs, |
80 | SmallSet<unsigned, 32> &RegUses); |
81 | |
82 | MachineBasicBlock::iterator |
83 | findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot); |
84 | |
85 | bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize); |
86 | |
87 | bool tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB, |
88 | MachineBasicBlock::iterator MBBI); |
89 | |
90 | }; |
91 | char Filler::ID = 0; |
92 | } // end of anonymous namespace |
93 | |
94 | /// createSparcDelaySlotFillerPass - Returns a pass that fills in delay |
95 | /// slots in Sparc MachineFunctions |
96 | /// |
97 | FunctionPass *llvm::createSparcDelaySlotFillerPass() { |
98 | return new Filler; |
99 | } |
100 | |
101 | |
102 | /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. |
103 | /// We assume there is only one delay slot per delayed instruction. |
104 | /// |
105 | bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { |
106 | bool Changed = false; |
107 | Subtarget = &MBB.getParent()->getSubtarget<SparcSubtarget>(); |
108 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
109 | |
110 | for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { |
111 | MachineBasicBlock::iterator MI = I; |
112 | ++I; |
113 | |
114 | // If MI is restore, try combining it with previous inst. |
115 | if (!DisableDelaySlotFiller && |
116 | (MI->getOpcode() == SP::RESTORErr |
117 | || MI->getOpcode() == SP::RESTOREri)) { |
118 | Changed |= tryCombineRestoreWithPrevInst(MBB, MBBI: MI); |
119 | continue; |
120 | } |
121 | |
122 | // TODO: If we ever want to support v7, this needs to be extended |
123 | // to cover all floating point operations. |
124 | if (!Subtarget->isV9() && |
125 | (MI->getOpcode() == SP::FCMPS || MI->getOpcode() == SP::FCMPD |
126 | || MI->getOpcode() == SP::FCMPQ)) { |
127 | BuildMI(BB&: MBB, I, MIMD: MI->getDebugLoc(), MCID: TII->get(Opcode: SP::NOP)); |
128 | Changed = true; |
129 | continue; |
130 | } |
131 | |
132 | // If MI has no delay slot, skip. |
133 | if (!MI->hasDelaySlot()) |
134 | continue; |
135 | |
136 | MachineBasicBlock::iterator D = MBB.end(); |
137 | |
138 | if (!DisableDelaySlotFiller) |
139 | D = findDelayInstr(MBB, slot: MI); |
140 | |
141 | ++FilledSlots; |
142 | Changed = true; |
143 | |
144 | if (D == MBB.end()) |
145 | BuildMI(BB&: MBB, I, MIMD: MI->getDebugLoc(), MCID: TII->get(Opcode: SP::NOP)); |
146 | else |
147 | MBB.splice(Where: I, Other: &MBB, From: D); |
148 | |
149 | unsigned structSize = 0; |
150 | if (needsUnimp(I: MI, StructSize&: structSize)) { |
151 | MachineBasicBlock::iterator J = MI; |
152 | ++J; // skip the delay filler. |
153 | assert (J != MBB.end() && "MI needs a delay instruction." ); |
154 | BuildMI(BB&: MBB, I: ++J, MIMD: MI->getDebugLoc(), |
155 | MCID: TII->get(Opcode: SP::UNIMP)).addImm(Val: structSize); |
156 | // Bundle the delay filler and unimp with the instruction. |
157 | MIBundleBuilder(MBB, MachineBasicBlock::iterator(MI), J); |
158 | } else { |
159 | MIBundleBuilder(MBB, MachineBasicBlock::iterator(MI), I); |
160 | } |
161 | } |
162 | return Changed; |
163 | } |
164 | |
165 | MachineBasicBlock::iterator |
166 | Filler::findDelayInstr(MachineBasicBlock &MBB, |
167 | MachineBasicBlock::iterator slot) |
168 | { |
169 | SmallSet<unsigned, 32> RegDefs; |
170 | SmallSet<unsigned, 32> RegUses; |
171 | bool sawLoad = false; |
172 | bool sawStore = false; |
173 | |
174 | if (slot == MBB.begin()) |
175 | return MBB.end(); |
176 | |
177 | unsigned Opc = slot->getOpcode(); |
178 | |
179 | if (Opc == SP::RET || Opc == SP::TLS_CALL) |
180 | return MBB.end(); |
181 | |
182 | if (Opc == SP::RETL || Opc == SP::TAIL_CALL || Opc == SP::TAIL_CALLri) { |
183 | MachineBasicBlock::iterator J = slot; |
184 | --J; |
185 | |
186 | if (J->getOpcode() == SP::RESTORErr |
187 | || J->getOpcode() == SP::RESTOREri) { |
188 | // change retl to ret. |
189 | if (Opc == SP::RETL) |
190 | slot->setDesc(Subtarget->getInstrInfo()->get(Opcode: SP::RET)); |
191 | return J; |
192 | } |
193 | } |
194 | |
195 | // Call's delay filler can def some of call's uses. |
196 | if (slot->isCall()) |
197 | insertCallDefsUses(MI: slot, RegDefs, RegUses); |
198 | else |
199 | insertDefsUses(MI: slot, RegDefs, RegUses); |
200 | |
201 | bool done = false; |
202 | |
203 | MachineBasicBlock::iterator I = slot; |
204 | |
205 | while (!done) { |
206 | done = (I == MBB.begin()); |
207 | |
208 | if (!done) |
209 | --I; |
210 | |
211 | // skip debug instruction |
212 | if (I->isDebugInstr()) |
213 | continue; |
214 | |
215 | if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isPosition() || |
216 | I->hasDelaySlot() || I->isBundledWithSucc()) |
217 | break; |
218 | |
219 | if (delayHasHazard(candidate: I, sawLoad, sawStore, RegDefs, RegUses)) { |
220 | insertDefsUses(MI: I, RegDefs, RegUses); |
221 | continue; |
222 | } |
223 | |
224 | return I; |
225 | } |
226 | return MBB.end(); |
227 | } |
228 | |
229 | bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, |
230 | bool &sawLoad, |
231 | bool &sawStore, |
232 | SmallSet<unsigned, 32> &RegDefs, |
233 | SmallSet<unsigned, 32> &RegUses) |
234 | { |
235 | |
236 | if (candidate->isImplicitDef() || candidate->isKill()) |
237 | return true; |
238 | |
239 | if (candidate->mayLoad()) { |
240 | sawLoad = true; |
241 | if (sawStore) |
242 | return true; |
243 | } |
244 | |
245 | if (candidate->mayStore()) { |
246 | if (sawStore) |
247 | return true; |
248 | sawStore = true; |
249 | if (sawLoad) |
250 | return true; |
251 | } |
252 | |
253 | for (const MachineOperand &MO : candidate->operands()) { |
254 | if (!MO.isReg()) |
255 | continue; // skip |
256 | |
257 | Register Reg = MO.getReg(); |
258 | |
259 | if (MO.isDef()) { |
260 | // check whether Reg is defined or used before delay slot. |
261 | if (IsRegInSet(RegSet&: RegDefs, Reg) || IsRegInSet(RegSet&: RegUses, Reg)) |
262 | return true; |
263 | } |
264 | if (MO.isUse()) { |
265 | // check whether Reg is defined before delay slot. |
266 | if (IsRegInSet(RegSet&: RegDefs, Reg)) |
267 | return true; |
268 | } |
269 | } |
270 | |
271 | unsigned Opcode = candidate->getOpcode(); |
272 | // LD and LDD may have NOPs inserted afterwards in the case of some LEON |
273 | // processors, so we can't use the delay slot if this feature is switched-on. |
274 | if (Subtarget->insertNOPLoad() |
275 | && |
276 | Opcode >= SP::LDDArr && Opcode <= SP::LDrr) |
277 | return true; |
278 | |
279 | // Same as above for FDIV and FSQRT on some LEON processors. |
280 | if (Subtarget->fixAllFDIVSQRT() |
281 | && |
282 | Opcode >= SP::FDIVD && Opcode <= SP::FSQRTD) |
283 | return true; |
284 | |
285 | |
286 | return false; |
287 | } |
288 | |
289 | |
290 | void Filler::insertCallDefsUses(MachineBasicBlock::iterator MI, |
291 | SmallSet<unsigned, 32>& RegDefs, |
292 | SmallSet<unsigned, 32>& RegUses) |
293 | { |
294 | // Call defines o7, which is visible to the instruction in delay slot. |
295 | RegDefs.insert(V: SP::O7); |
296 | |
297 | switch(MI->getOpcode()) { |
298 | default: llvm_unreachable("Unknown opcode." ); |
299 | case SP::CALL: break; |
300 | case SP::CALLrr: |
301 | case SP::CALLri: |
302 | assert(MI->getNumOperands() >= 2); |
303 | const MachineOperand &Reg = MI->getOperand(i: 0); |
304 | assert(Reg.isReg() && "CALL first operand is not a register." ); |
305 | assert(Reg.isUse() && "CALL first operand is not a use." ); |
306 | RegUses.insert(V: Reg.getReg()); |
307 | |
308 | const MachineOperand &Operand1 = MI->getOperand(i: 1); |
309 | if (Operand1.isImm() || Operand1.isGlobal()) |
310 | break; |
311 | assert(Operand1.isReg() && "CALLrr second operand is not a register." ); |
312 | assert(Operand1.isUse() && "CALLrr second operand is not a use." ); |
313 | RegUses.insert(V: Operand1.getReg()); |
314 | break; |
315 | } |
316 | } |
317 | |
318 | // Insert Defs and Uses of MI into the sets RegDefs and RegUses. |
319 | void Filler::insertDefsUses(MachineBasicBlock::iterator MI, |
320 | SmallSet<unsigned, 32>& RegDefs, |
321 | SmallSet<unsigned, 32>& RegUses) |
322 | { |
323 | for (const MachineOperand &MO : MI->operands()) { |
324 | if (!MO.isReg()) |
325 | continue; |
326 | |
327 | Register Reg = MO.getReg(); |
328 | if (Reg == 0) |
329 | continue; |
330 | if (MO.isDef()) |
331 | RegDefs.insert(V: Reg); |
332 | if (MO.isUse()) { |
333 | // Implicit register uses of retl are return values and |
334 | // retl does not use them. |
335 | if (MO.isImplicit() && MI->getOpcode() == SP::RETL) |
336 | continue; |
337 | RegUses.insert(V: Reg); |
338 | } |
339 | } |
340 | } |
341 | |
342 | // returns true if the Reg or its alias is in the RegSet. |
343 | bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) |
344 | { |
345 | // Check Reg and all aliased Registers. |
346 | for (MCRegAliasIterator AI(Reg, Subtarget->getRegisterInfo(), true); |
347 | AI.isValid(); ++AI) |
348 | if (RegSet.count(V: *AI)) |
349 | return true; |
350 | return false; |
351 | } |
352 | |
353 | bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize) |
354 | { |
355 | if (!I->isCall()) |
356 | return false; |
357 | |
358 | unsigned structSizeOpNum = 0; |
359 | switch (I->getOpcode()) { |
360 | default: llvm_unreachable("Unknown call opcode." ); |
361 | case SP::CALL: structSizeOpNum = 1; break; |
362 | case SP::CALLrr: |
363 | case SP::CALLri: structSizeOpNum = 2; break; |
364 | case SP::TLS_CALL: return false; |
365 | case SP::TAIL_CALLri: |
366 | case SP::TAIL_CALL: return false; |
367 | } |
368 | |
369 | const MachineOperand &MO = I->getOperand(i: structSizeOpNum); |
370 | if (!MO.isImm()) |
371 | return false; |
372 | StructSize = MO.getImm(); |
373 | return true; |
374 | } |
375 | |
376 | static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI, |
377 | MachineBasicBlock::iterator AddMI, |
378 | const TargetInstrInfo *TII) |
379 | { |
380 | // Before: add <op0>, <op1>, %i[0-7] |
381 | // restore %g0, %g0, %i[0-7] |
382 | // |
383 | // After : restore <op0>, <op1>, %o[0-7] |
384 | |
385 | Register reg = AddMI->getOperand(i: 0).getReg(); |
386 | if (reg < SP::I0 || reg > SP::I7) |
387 | return false; |
388 | |
389 | // Erase RESTORE. |
390 | RestoreMI->eraseFromParent(); |
391 | |
392 | // Change ADD to RESTORE. |
393 | AddMI->setDesc(TII->get(Opcode: (AddMI->getOpcode() == SP::ADDrr) |
394 | ? SP::RESTORErr |
395 | : SP::RESTOREri)); |
396 | |
397 | // Map the destination register. |
398 | AddMI->getOperand(i: 0).setReg(reg - SP::I0 + SP::O0); |
399 | |
400 | return true; |
401 | } |
402 | |
403 | static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI, |
404 | MachineBasicBlock::iterator OrMI, |
405 | const TargetInstrInfo *TII) |
406 | { |
407 | // Before: or <op0>, <op1>, %i[0-7] |
408 | // restore %g0, %g0, %i[0-7] |
409 | // and <op0> or <op1> is zero, |
410 | // |
411 | // After : restore <op0>, <op1>, %o[0-7] |
412 | |
413 | Register reg = OrMI->getOperand(i: 0).getReg(); |
414 | if (reg < SP::I0 || reg > SP::I7) |
415 | return false; |
416 | |
417 | // check whether it is a copy. |
418 | if (OrMI->getOpcode() == SP::ORrr |
419 | && OrMI->getOperand(i: 1).getReg() != SP::G0 |
420 | && OrMI->getOperand(i: 2).getReg() != SP::G0) |
421 | return false; |
422 | |
423 | if (OrMI->getOpcode() == SP::ORri |
424 | && OrMI->getOperand(i: 1).getReg() != SP::G0 |
425 | && (!OrMI->getOperand(i: 2).isImm() || OrMI->getOperand(i: 2).getImm() != 0)) |
426 | return false; |
427 | |
428 | // Erase RESTORE. |
429 | RestoreMI->eraseFromParent(); |
430 | |
431 | // Change OR to RESTORE. |
432 | OrMI->setDesc(TII->get(Opcode: (OrMI->getOpcode() == SP::ORrr) |
433 | ? SP::RESTORErr |
434 | : SP::RESTOREri)); |
435 | |
436 | // Map the destination register. |
437 | OrMI->getOperand(i: 0).setReg(reg - SP::I0 + SP::O0); |
438 | |
439 | return true; |
440 | } |
441 | |
442 | static bool combineRestoreSETHIi(MachineBasicBlock::iterator RestoreMI, |
443 | MachineBasicBlock::iterator SetHiMI, |
444 | const TargetInstrInfo *TII) |
445 | { |
446 | // Before: sethi imm3, %i[0-7] |
447 | // restore %g0, %g0, %g0 |
448 | // |
449 | // After : restore %g0, (imm3<<10), %o[0-7] |
450 | |
451 | Register reg = SetHiMI->getOperand(i: 0).getReg(); |
452 | if (reg < SP::I0 || reg > SP::I7) |
453 | return false; |
454 | |
455 | if (!SetHiMI->getOperand(i: 1).isImm()) |
456 | return false; |
457 | |
458 | int64_t imm = SetHiMI->getOperand(i: 1).getImm(); |
459 | |
460 | // Is it a 3 bit immediate? |
461 | if (!isInt<3>(x: imm)) |
462 | return false; |
463 | |
464 | // Make it a 13 bit immediate. |
465 | imm = (imm << 10) & 0x1FFF; |
466 | |
467 | assert(RestoreMI->getOpcode() == SP::RESTORErr); |
468 | |
469 | RestoreMI->setDesc(TII->get(Opcode: SP::RESTOREri)); |
470 | |
471 | RestoreMI->getOperand(i: 0).setReg(reg - SP::I0 + SP::O0); |
472 | RestoreMI->getOperand(i: 1).setReg(SP::G0); |
473 | RestoreMI->getOperand(i: 2).ChangeToImmediate(ImmVal: imm); |
474 | |
475 | |
476 | // Erase the original SETHI. |
477 | SetHiMI->eraseFromParent(); |
478 | |
479 | return true; |
480 | } |
481 | |
482 | bool Filler::tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB, |
483 | MachineBasicBlock::iterator MBBI) |
484 | { |
485 | // No previous instruction. |
486 | if (MBBI == MBB.begin()) |
487 | return false; |
488 | |
489 | // assert that MBBI is a "restore %g0, %g0, %g0". |
490 | assert(MBBI->getOpcode() == SP::RESTORErr |
491 | && MBBI->getOperand(0).getReg() == SP::G0 |
492 | && MBBI->getOperand(1).getReg() == SP::G0 |
493 | && MBBI->getOperand(2).getReg() == SP::G0); |
494 | |
495 | MachineBasicBlock::iterator PrevInst = std::prev(x: MBBI); |
496 | |
497 | // It cannot be combined with a bundled instruction. |
498 | if (PrevInst->isBundledWithSucc()) |
499 | return false; |
500 | |
501 | const TargetInstrInfo *TII = Subtarget->getInstrInfo(); |
502 | |
503 | switch (PrevInst->getOpcode()) { |
504 | default: break; |
505 | case SP::ADDrr: |
506 | case SP::ADDri: return combineRestoreADD(RestoreMI: MBBI, AddMI: PrevInst, TII); break; |
507 | case SP::ORrr: |
508 | case SP::ORri: return combineRestoreOR(RestoreMI: MBBI, OrMI: PrevInst, TII); break; |
509 | case SP::SETHIi: return combineRestoreSETHIi(RestoreMI: MBBI, SetHiMI: PrevInst, TII); break; |
510 | } |
511 | // It cannot combine with the previous instruction. |
512 | return false; |
513 | } |
514 | |