1 | //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass generates machine instructions for the CTR loops related pseudos: |
10 | // 1: MTCTRloop/DecreaseCTRloop |
11 | // 2: MTCTR8loop/DecreaseCTR8loop |
12 | // |
13 | // If a CTR loop can be generated: |
14 | // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr" |
15 | // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and |
16 | // its user branch instruction can be deleted. |
17 | // |
18 | // If a CTR loop can not be generated due to clobber of CTR: |
19 | // 1: MTCTRloop/MTCTR8loop can be deleted. |
20 | // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and |
21 | // a "cmplwi/cmpldi". |
22 | // |
23 | // This pass runs just before register allocation, because we don't want |
24 | // register allocator to allocate register for DecreaseCTRloop if a CTR can be |
25 | // generated or if a CTR loop can not be generated, we don't have any condition |
26 | // register for the new added "cmplwi/cmpldi". |
27 | // |
28 | //===----------------------------------------------------------------------===// |
29 | |
30 | #include "PPC.h" |
31 | #include "PPCInstrInfo.h" |
32 | #include "PPCSubtarget.h" |
33 | #include "llvm/ADT/Statistic.h" |
34 | #include "llvm/CodeGen/MachineBasicBlock.h" |
35 | #include "llvm/CodeGen/MachineFunction.h" |
36 | #include "llvm/CodeGen/MachineFunctionPass.h" |
37 | #include "llvm/CodeGen/MachineInstr.h" |
38 | #include "llvm/CodeGen/MachineLoopInfo.h" |
39 | #include "llvm/CodeGen/MachineOperand.h" |
40 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
41 | #include "llvm/CodeGen/Register.h" |
42 | #include "llvm/InitializePasses.h" |
43 | #include "llvm/Pass.h" |
44 | #include "llvm/PassRegistry.h" |
45 | #include "llvm/Support/CodeGen.h" |
46 | #include "llvm/Support/Debug.h" |
47 | #include "llvm/Support/ErrorHandling.h" |
48 | #include <cassert> |
49 | |
50 | using namespace llvm; |
51 | |
52 | #define DEBUG_TYPE "ppc-ctrloops" |
53 | |
54 | STATISTIC(NumCTRLoops, "Number of CTR loops generated" ); |
55 | STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated" ); |
56 | |
57 | namespace { |
58 | class PPCCTRLoops : public MachineFunctionPass { |
59 | public: |
60 | static char ID; |
61 | |
62 | PPCCTRLoops() : MachineFunctionPass(ID) { |
63 | initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); |
64 | } |
65 | |
66 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
67 | AU.addRequired<MachineLoopInfoWrapperPass>(); |
68 | MachineFunctionPass::getAnalysisUsage(AU); |
69 | } |
70 | |
71 | bool runOnMachineFunction(MachineFunction &MF) override; |
72 | |
73 | private: |
74 | const PPCInstrInfo *TII = nullptr; |
75 | MachineRegisterInfo *MRI = nullptr; |
76 | |
77 | bool processLoop(MachineLoop *ML); |
78 | bool isCTRClobber(MachineInstr *MI, bool CheckReads) const; |
79 | void expandNormalLoops(MachineLoop *ML, MachineInstr *Start, |
80 | MachineInstr *Dec); |
81 | void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec); |
82 | }; |
83 | } // namespace |
84 | |
85 | char PPCCTRLoops::ID = 0; |
86 | |
87 | INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation" , |
88 | false, false) |
89 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) |
90 | INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation" , |
91 | false, false) |
92 | |
93 | FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); } |
94 | |
95 | bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { |
96 | bool Changed = false; |
97 | |
98 | auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); |
99 | TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); |
100 | MRI = &MF.getRegInfo(); |
101 | |
102 | for (auto *ML : MLI) { |
103 | if (ML->isOutermost()) |
104 | Changed |= processLoop(ML); |
105 | } |
106 | |
107 | #ifndef NDEBUG |
108 | for (const MachineBasicBlock &BB : MF) { |
109 | for (const MachineInstr &I : BB) |
110 | assert((I.getOpcode() != PPC::DecreaseCTRloop && |
111 | I.getOpcode() != PPC::DecreaseCTR8loop) && |
112 | "CTR loop pseudo is not expanded!" ); |
113 | } |
114 | #endif |
115 | |
116 | return Changed; |
117 | } |
118 | |
119 | bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const { |
120 | if (!CheckReads) { |
121 | // If we are only checking for defs, that is we are going to find |
122 | // definitions before MTCTRloop, for this case: |
123 | // CTR defination inside the callee of a call instruction will not impact |
124 | // the defination of MTCTRloop, so we can use definesRegister() for the |
125 | // check, no need to check the regmask. |
126 | return MI->definesRegister(Reg: PPC::CTR, /*TRI=*/nullptr) || |
127 | MI->definesRegister(Reg: PPC::CTR8, /*TRI=*/nullptr); |
128 | } |
129 | |
130 | if (MI->modifiesRegister(Reg: PPC::CTR, /*TRI=*/nullptr) || |
131 | MI->modifiesRegister(Reg: PPC::CTR8, /*TRI=*/nullptr)) |
132 | return true; |
133 | |
134 | if (MI->getDesc().isCall()) |
135 | return true; |
136 | |
137 | // We define the CTR in the loop preheader, so if there is any CTR reader in |
138 | // the loop, we also can not use CTR loop form. |
139 | if (MI->readsRegister(Reg: PPC::CTR, /*TRI=*/nullptr) || |
140 | MI->readsRegister(Reg: PPC::CTR8, /*TRI=*/nullptr)) |
141 | return true; |
142 | |
143 | return false; |
144 | } |
145 | |
146 | bool PPCCTRLoops::processLoop(MachineLoop *ML) { |
147 | bool Changed = false; |
148 | |
149 | // Align with HardwareLoop pass, process inner loops first. |
150 | for (MachineLoop *I : *ML) |
151 | Changed |= processLoop(ML: I); |
152 | |
153 | // If any inner loop is changed, outter loop must be without hardware loop |
154 | // intrinsics. |
155 | if (Changed) |
156 | return true; |
157 | |
158 | auto IsLoopStart = [](MachineInstr &MI) { |
159 | return MI.getOpcode() == PPC::MTCTRloop || |
160 | MI.getOpcode() == PPC::MTCTR8loop; |
161 | }; |
162 | |
163 | auto SearchForStart = |
164 | [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * { |
165 | for (auto &MI : *MBB) { |
166 | if (IsLoopStart(MI)) |
167 | return &MI; |
168 | } |
169 | return nullptr; |
170 | }; |
171 | |
172 | MachineInstr *Start = nullptr; |
173 | MachineInstr *Dec = nullptr; |
174 | bool InvalidCTRLoop = false; |
175 | |
176 | MachineBasicBlock * = ML->getLoopPreheader(); |
177 | // If there is no preheader for this loop, there must be no MTCTRloop |
178 | // either. |
179 | if (!Preheader) |
180 | return false; |
181 | |
182 | Start = SearchForStart(Preheader); |
183 | // This is not a CTR loop candidate. |
184 | if (!Start) |
185 | return false; |
186 | |
187 | // If CTR is live to the preheader, we can not redefine the CTR register. |
188 | if (Preheader->isLiveIn(Reg: PPC::CTR) || Preheader->isLiveIn(Reg: PPC::CTR8)) |
189 | InvalidCTRLoop = true; |
190 | |
191 | // Make sure there is also no CTR clobber in the block preheader between the |
192 | // begin and MTCTR. |
193 | for (MachineBasicBlock::reverse_instr_iterator I = |
194 | std::next(x: Start->getReverseIterator()); |
195 | I != Preheader->instr_rend(); ++I) |
196 | // Only check the definitions of CTR. If there is non-dead definition for |
197 | // the CTR, we conservatively don't generate a CTR loop. |
198 | if (isCTRClobber(MI: &*I, /* CheckReads */ false)) { |
199 | InvalidCTRLoop = true; |
200 | break; |
201 | } |
202 | |
203 | // Make sure there is also no CTR clobber/user in the block preheader between |
204 | // MTCTR and the end. |
205 | for (MachineBasicBlock::instr_iterator I = std::next(x: Start->getIterator()); |
206 | I != Preheader->instr_end(); ++I) |
207 | if (isCTRClobber(MI: &*I, /* CheckReads */ true)) { |
208 | InvalidCTRLoop = true; |
209 | break; |
210 | } |
211 | |
212 | // Find the CTR loop components and decide whether or not to fall back to a |
213 | // normal loop. |
214 | for (auto *MBB : reverse(C: ML->getBlocks())) { |
215 | for (auto &MI : *MBB) { |
216 | if (MI.getOpcode() == PPC::DecreaseCTRloop || |
217 | MI.getOpcode() == PPC::DecreaseCTR8loop) |
218 | Dec = &MI; |
219 | else if (!InvalidCTRLoop) |
220 | // If any instruction clobber CTR, then we can not generate a CTR loop. |
221 | InvalidCTRLoop |= isCTRClobber(MI: &MI, /* CheckReads */ true); |
222 | } |
223 | if (Dec && InvalidCTRLoop) |
224 | break; |
225 | } |
226 | |
227 | assert(Dec && "CTR loop is not complete!" ); |
228 | |
229 | if (InvalidCTRLoop) { |
230 | expandNormalLoops(ML, Start, Dec); |
231 | ++NumNormalLoops; |
232 | } |
233 | else { |
234 | expandCTRLoops(ML, Start, Dec); |
235 | ++NumCTRLoops; |
236 | } |
237 | return true; |
238 | } |
239 | |
240 | void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start, |
241 | MachineInstr *Dec) { |
242 | bool Is64Bit = |
243 | Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64(); |
244 | |
245 | MachineBasicBlock * = Start->getParent(); |
246 | MachineBasicBlock *Exiting = Dec->getParent(); |
247 | assert((Preheader && Exiting) && |
248 | "Preheader and exiting should exist for CTR loop!" ); |
249 | |
250 | assert(Dec->getOperand(1).getImm() == 1 && |
251 | "Loop decrement stride must be 1" ); |
252 | |
253 | unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI; |
254 | unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI; |
255 | |
256 | Register PHIDef = |
257 | MRI->createVirtualRegister(RegClass: Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass |
258 | : &PPC::GPRC_and_GPRC_NOR0RegClass); |
259 | |
260 | Start->getParent()->getParent()->getProperties().reset( |
261 | P: MachineFunctionProperties::Property::NoPHIs); |
262 | |
263 | // Generate "PHI" in the header block. |
264 | auto PHIMIB = BuildMI(BB&: *ML->getHeader(), I: ML->getHeader()->getFirstNonPHI(), |
265 | MIMD: DebugLoc(), MCID: TII->get(Opcode: TargetOpcode::PHI), DestReg: PHIDef); |
266 | PHIMIB.addReg(RegNo: Start->getOperand(i: 0).getReg()).addMBB(MBB: Preheader); |
267 | |
268 | Register ADDIDef = |
269 | MRI->createVirtualRegister(RegClass: Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass |
270 | : &PPC::GPRC_and_GPRC_NOR0RegClass); |
271 | // Generate "addi -1" in the exiting block. |
272 | BuildMI(BB&: *Exiting, I: Dec, MIMD: Dec->getDebugLoc(), MCID: TII->get(Opcode: ADDIOpcode), DestReg: ADDIDef) |
273 | .addReg(RegNo: PHIDef) |
274 | .addImm(Val: -1); |
275 | |
276 | // Add other inputs for the PHI node. |
277 | if (ML->isLoopLatch(BB: Exiting)) { |
278 | // There must be only two predecessors for the loop header, one is the |
279 | // Preheader and the other one is loop latch Exiting. In hardware loop |
280 | // insertion pass, the block containing DecreaseCTRloop must dominate all |
281 | // loop latches. So there must be only one latch. |
282 | assert(ML->getHeader()->pred_size() == 2 && |
283 | "Loop header predecessor is not right!" ); |
284 | PHIMIB.addReg(RegNo: ADDIDef).addMBB(MBB: Exiting); |
285 | } else { |
286 | // If the block containing DecreaseCTRloop is not a loop latch, we can use |
287 | // ADDIDef as the value for all other blocks for the PHI. In hardware loop |
288 | // insertion pass, the block containing DecreaseCTRloop must dominate all |
289 | // loop latches. |
290 | for (MachineBasicBlock *P : ML->getHeader()->predecessors()) { |
291 | if (ML->contains(BB: P)) { |
292 | assert(ML->isLoopLatch(P) && |
293 | "Loop's header in-loop predecessor is not loop latch!" ); |
294 | PHIMIB.addReg(RegNo: ADDIDef).addMBB(MBB: P); |
295 | } else |
296 | assert(P == Preheader && |
297 | "CTR loop should not be generated for irreducible loop!" ); |
298 | } |
299 | } |
300 | |
301 | // Generate the compare in the exiting block. |
302 | Register CMPDef = MRI->createVirtualRegister(RegClass: &PPC::CRRCRegClass); |
303 | auto CMPMIB = |
304 | BuildMI(BB&: *Exiting, I: Dec, MIMD: Dec->getDebugLoc(), MCID: TII->get(Opcode: CMPOpcode), DestReg: CMPDef) |
305 | .addReg(RegNo: ADDIDef) |
306 | .addImm(Val: 0); |
307 | |
308 | BuildMI(BB&: *Exiting, I: Dec, MIMD: Dec->getDebugLoc(), MCID: TII->get(Opcode: TargetOpcode::COPY), |
309 | DestReg: Dec->getOperand(i: 0).getReg()) |
310 | .addReg(RegNo: CMPMIB->getOperand(i: 0).getReg(), flags: 0, SubReg: PPC::sub_gt); |
311 | |
312 | // Remove the pseudo instructions. |
313 | Start->eraseFromParent(); |
314 | Dec->eraseFromParent(); |
315 | } |
316 | |
317 | void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start, |
318 | MachineInstr *Dec) { |
319 | bool Is64Bit = |
320 | Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64(); |
321 | |
322 | MachineBasicBlock * = Start->getParent(); |
323 | MachineBasicBlock *Exiting = Dec->getParent(); |
324 | |
325 | (void)Preheader; |
326 | assert((Preheader && Exiting) && |
327 | "Preheader and exiting should exist for CTR loop!" ); |
328 | |
329 | assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!" ); |
330 | |
331 | unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ; |
332 | unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ; |
333 | auto BrInstr = MRI->use_instr_begin(RegNo: Dec->getOperand(i: 0).getReg()); |
334 | assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) && |
335 | "There should be only one user for loop decrement pseudo!" ); |
336 | |
337 | unsigned Opcode = 0; |
338 | switch (BrInstr->getOpcode()) { |
339 | case PPC::BC: |
340 | Opcode = BDNZOpcode; |
341 | (void) ML; |
342 | assert(ML->contains(BrInstr->getOperand(1).getMBB()) && |
343 | "Invalid ctr loop!" ); |
344 | break; |
345 | case PPC::BCn: |
346 | Opcode = BDZOpcode; |
347 | assert(!ML->contains(BrInstr->getOperand(1).getMBB()) && |
348 | "Invalid ctr loop!" ); |
349 | break; |
350 | default: |
351 | llvm_unreachable("Unhandled branch user for DecreaseCTRloop." ); |
352 | } |
353 | |
354 | // Generate "bdnz/bdz" in the exiting block just before the terminator. |
355 | BuildMI(BB&: *Exiting, I: &*BrInstr, MIMD: BrInstr->getDebugLoc(), MCID: TII->get(Opcode)) |
356 | .addMBB(MBB: BrInstr->getOperand(i: 1).getMBB()); |
357 | |
358 | // Remove the pseudo instructions. |
359 | BrInstr->eraseFromParent(); |
360 | Dec->eraseFromParent(); |
361 | } |
362 | |