1//===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass generates machine instructions for the CTR loops related pseudos:
10// 1: MTCTRloop/DecreaseCTRloop
11// 2: MTCTR8loop/DecreaseCTR8loop
12//
13// If a CTR loop can be generated:
14// 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16// its user branch instruction can be deleted.
17//
18// If a CTR loop can not be generated due to clobber of CTR:
19// 1: MTCTRloop/MTCTR8loop can be deleted.
20// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21// a "cmplwi/cmpldi".
22//
23// This pass runs just before register allocation, because we don't want
24// register allocator to allocate register for DecreaseCTRloop if a CTR can be
25// generated or if a CTR loop can not be generated, we don't have any condition
26// register for the new added "cmplwi/cmpldi".
27//
28//===----------------------------------------------------------------------===//
29
30#include "PPC.h"
31#include "PPCInstrInfo.h"
32#include "PPCSubtarget.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineFunctionPass.h"
37#include "llvm/CodeGen/MachineInstr.h"
38#include "llvm/CodeGen/MachineLoopInfo.h"
39#include "llvm/CodeGen/MachineOperand.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/Register.h"
42#include "llvm/Pass.h"
43#include "llvm/Support/ErrorHandling.h"
44#include <cassert>
45
46using namespace llvm;
47
48#define DEBUG_TYPE "ppc-ctrloops"
49
50STATISTIC(NumCTRLoops, "Number of CTR loops generated");
51STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
52
53namespace {
54class PPCCTRLoops : public MachineFunctionPass {
55public:
56 static char ID;
57
58 PPCCTRLoops() : MachineFunctionPass(ID) {}
59
60 void getAnalysisUsage(AnalysisUsage &AU) const override {
61 AU.addRequired<MachineLoopInfoWrapperPass>();
62 MachineFunctionPass::getAnalysisUsage(AU);
63 }
64
65 bool runOnMachineFunction(MachineFunction &MF) override;
66
67private:
68 const PPCInstrInfo *TII = nullptr;
69 MachineRegisterInfo *MRI = nullptr;
70
71 bool processLoop(MachineLoop *ML);
72 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
73 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
74 MachineInstr *Dec);
75 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
76};
77} // namespace
78
79char PPCCTRLoops::ID = 0;
80
81INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
82 false, false)
83INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
84INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
85 false, false)
86
87FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
88
89bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
90 bool Changed = false;
91
92 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
93 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
94 MRI = &MF.getRegInfo();
95
96 for (auto *ML : MLI) {
97 if (ML->isOutermost())
98 Changed |= processLoop(ML);
99 }
100
101#ifndef NDEBUG
102 for (const MachineBasicBlock &BB : MF) {
103 for (const MachineInstr &I : BB)
104 assert((I.getOpcode() != PPC::DecreaseCTRloop &&
105 I.getOpcode() != PPC::DecreaseCTR8loop) &&
106 "CTR loop pseudo is not expanded!");
107 }
108#endif
109
110 return Changed;
111}
112
113bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
114 if (!CheckReads) {
115 // If we are only checking for defs, that is we are going to find
116 // definitions before MTCTRloop, for this case:
117 // CTR defination inside the callee of a call instruction will not impact
118 // the defination of MTCTRloop, so we can use definesRegister() for the
119 // check, no need to check the regmask.
120 return MI->definesRegister(Reg: PPC::CTR, /*TRI=*/nullptr) ||
121 MI->definesRegister(Reg: PPC::CTR8, /*TRI=*/nullptr);
122 }
123
124 if (MI->modifiesRegister(Reg: PPC::CTR, /*TRI=*/nullptr) ||
125 MI->modifiesRegister(Reg: PPC::CTR8, /*TRI=*/nullptr))
126 return true;
127
128 if (MI->getDesc().isCall())
129 return true;
130
131 // We define the CTR in the loop preheader, so if there is any CTR reader in
132 // the loop, we also can not use CTR loop form.
133 if (MI->readsRegister(Reg: PPC::CTR, /*TRI=*/nullptr) ||
134 MI->readsRegister(Reg: PPC::CTR8, /*TRI=*/nullptr))
135 return true;
136
137 return false;
138}
139
140bool PPCCTRLoops::processLoop(MachineLoop *ML) {
141 bool Changed = false;
142
143 // Align with HardwareLoop pass, process inner loops first.
144 for (MachineLoop *I : *ML)
145 Changed |= processLoop(ML: I);
146
147 // If any inner loop is changed, outter loop must be without hardware loop
148 // intrinsics.
149 if (Changed)
150 return true;
151
152 auto IsLoopStart = [](MachineInstr &MI) {
153 return MI.getOpcode() == PPC::MTCTRloop ||
154 MI.getOpcode() == PPC::MTCTR8loop;
155 };
156
157 auto SearchForStart =
158 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
159 for (auto &MI : *MBB) {
160 if (IsLoopStart(MI))
161 return &MI;
162 }
163 return nullptr;
164 };
165
166 MachineInstr *Start = nullptr;
167 MachineInstr *Dec = nullptr;
168 bool InvalidCTRLoop = false;
169
170 MachineBasicBlock *Preheader = ML->getLoopPreheader();
171 // If there is no preheader for this loop, there must be no MTCTRloop
172 // either.
173 if (!Preheader)
174 return false;
175
176 Start = SearchForStart(Preheader);
177 // This is not a CTR loop candidate.
178 if (!Start)
179 return false;
180
181 // If CTR is live to the preheader, we can not redefine the CTR register.
182 if (Preheader->isLiveIn(Reg: PPC::CTR) || Preheader->isLiveIn(Reg: PPC::CTR8))
183 InvalidCTRLoop = true;
184
185 // Make sure there is also no CTR clobber in the block preheader between the
186 // begin and MTCTR.
187 for (MachineBasicBlock::reverse_instr_iterator I =
188 std::next(x: Start->getReverseIterator());
189 I != Preheader->instr_rend(); ++I)
190 // Only check the definitions of CTR. If there is non-dead definition for
191 // the CTR, we conservatively don't generate a CTR loop.
192 if (isCTRClobber(MI: &*I, /* CheckReads */ false)) {
193 InvalidCTRLoop = true;
194 break;
195 }
196
197 // Make sure there is also no CTR clobber/user in the block preheader between
198 // MTCTR and the end.
199 for (MachineBasicBlock::instr_iterator I = std::next(x: Start->getIterator());
200 I != Preheader->instr_end(); ++I)
201 if (isCTRClobber(MI: &*I, /* CheckReads */ true)) {
202 InvalidCTRLoop = true;
203 break;
204 }
205
206 // Find the CTR loop components and decide whether or not to fall back to a
207 // normal loop.
208 for (auto *MBB : reverse(C: ML->getBlocks())) {
209 for (auto &MI : *MBB) {
210 if (MI.getOpcode() == PPC::DecreaseCTRloop ||
211 MI.getOpcode() == PPC::DecreaseCTR8loop)
212 Dec = &MI;
213 else if (!InvalidCTRLoop)
214 // If any instruction clobber CTR, then we can not generate a CTR loop.
215 InvalidCTRLoop |= isCTRClobber(MI: &MI, /* CheckReads */ true);
216 }
217 if (Dec && InvalidCTRLoop)
218 break;
219 }
220
221 assert(Dec && "CTR loop is not complete!");
222
223 if (InvalidCTRLoop) {
224 expandNormalLoops(ML, Start, Dec);
225 ++NumNormalLoops;
226 }
227 else {
228 expandCTRLoops(ML, Start, Dec);
229 ++NumCTRLoops;
230 }
231 return true;
232}
233
234void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
235 MachineInstr *Dec) {
236 bool Is64Bit =
237 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
238
239 MachineBasicBlock *Preheader = Start->getParent();
240 MachineBasicBlock *Exiting = Dec->getParent();
241 assert((Preheader && Exiting) &&
242 "Preheader and exiting should exist for CTR loop!");
243
244 assert(Dec->getOperand(1).getImm() == 1 &&
245 "Loop decrement stride must be 1");
246
247 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
248 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
249
250 Register PHIDef =
251 MRI->createVirtualRegister(RegClass: Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
252 : &PPC::GPRC_and_GPRC_NOR0RegClass);
253
254 Start->getParent()->getParent()->getProperties().resetNoPHIs();
255
256 // Generate "PHI" in the header block.
257 auto PHIMIB = BuildMI(BB&: *ML->getHeader(), I: ML->getHeader()->getFirstNonPHI(),
258 MIMD: DebugLoc(), MCID: TII->get(Opcode: TargetOpcode::PHI), DestReg: PHIDef);
259 PHIMIB.addReg(RegNo: Start->getOperand(i: 0).getReg()).addMBB(MBB: Preheader);
260
261 Register ADDIDef =
262 MRI->createVirtualRegister(RegClass: Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
263 : &PPC::GPRC_and_GPRC_NOR0RegClass);
264 // Generate "addi -1" in the exiting block.
265 BuildMI(BB&: *Exiting, I: Dec, MIMD: Dec->getDebugLoc(), MCID: TII->get(Opcode: ADDIOpcode), DestReg: ADDIDef)
266 .addReg(RegNo: PHIDef)
267 .addImm(Val: -1);
268
269 // Add other inputs for the PHI node.
270 if (ML->isLoopLatch(BB: Exiting)) {
271 // There must be only two predecessors for the loop header, one is the
272 // Preheader and the other one is loop latch Exiting. In hardware loop
273 // insertion pass, the block containing DecreaseCTRloop must dominate all
274 // loop latches. So there must be only one latch.
275 assert(ML->getHeader()->pred_size() == 2 &&
276 "Loop header predecessor is not right!");
277 PHIMIB.addReg(RegNo: ADDIDef).addMBB(MBB: Exiting);
278 } else {
279 // If the block containing DecreaseCTRloop is not a loop latch, we can use
280 // ADDIDef as the value for all other blocks for the PHI. In hardware loop
281 // insertion pass, the block containing DecreaseCTRloop must dominate all
282 // loop latches.
283 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
284 if (ML->contains(BB: P)) {
285 assert(ML->isLoopLatch(P) &&
286 "Loop's header in-loop predecessor is not loop latch!");
287 PHIMIB.addReg(RegNo: ADDIDef).addMBB(MBB: P);
288 } else
289 assert(P == Preheader &&
290 "CTR loop should not be generated for irreducible loop!");
291 }
292 }
293
294 // Generate the compare in the exiting block.
295 Register CMPDef = MRI->createVirtualRegister(RegClass: &PPC::CRRCRegClass);
296 auto CMPMIB =
297 BuildMI(BB&: *Exiting, I: Dec, MIMD: Dec->getDebugLoc(), MCID: TII->get(Opcode: CMPOpcode), DestReg: CMPDef)
298 .addReg(RegNo: ADDIDef)
299 .addImm(Val: 0);
300
301 BuildMI(BB&: *Exiting, I: Dec, MIMD: Dec->getDebugLoc(), MCID: TII->get(Opcode: TargetOpcode::COPY),
302 DestReg: Dec->getOperand(i: 0).getReg())
303 .addReg(RegNo: CMPMIB->getOperand(i: 0).getReg(), flags: 0, SubReg: PPC::sub_gt);
304
305 // Remove the pseudo instructions.
306 Start->eraseFromParent();
307 Dec->eraseFromParent();
308}
309
310void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
311 MachineInstr *Dec) {
312 bool Is64Bit =
313 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
314
315 MachineBasicBlock *Preheader = Start->getParent();
316 MachineBasicBlock *Exiting = Dec->getParent();
317
318 (void)Preheader;
319 assert((Preheader && Exiting) &&
320 "Preheader and exiting should exist for CTR loop!");
321
322 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
323
324 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
325 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
326 auto BrInstr = MRI->use_instr_begin(RegNo: Dec->getOperand(i: 0).getReg());
327 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
328 "There should be only one user for loop decrement pseudo!");
329
330 unsigned Opcode = 0;
331 switch (BrInstr->getOpcode()) {
332 case PPC::BC:
333 Opcode = BDNZOpcode;
334 (void) ML;
335 assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
336 "Invalid ctr loop!");
337 break;
338 case PPC::BCn:
339 Opcode = BDZOpcode;
340 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
341 "Invalid ctr loop!");
342 break;
343 default:
344 llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
345 }
346
347 // Generate "bdnz/bdz" in the exiting block just before the terminator.
348 BuildMI(BB&: *Exiting, I: &*BrInstr, MIMD: BrInstr->getDebugLoc(), MCID: TII->get(Opcode))
349 .addMBB(MBB: BrInstr->getOperand(i: 1).getMBB());
350
351 // Remove the pseudo instructions.
352 BrInstr->eraseFromParent();
353 Dec->eraseFromParent();
354}
355