1//===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass generates machine instructions for the CTR loops related pseudos:
10// 1: MTCTRloop/DecreaseCTRloop
11// 2: MTCTR8loop/DecreaseCTR8loop
12//
13// If a CTR loop can be generated:
14// 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16// its user branch instruction can be deleted.
17//
18// If a CTR loop can not be generated due to clobber of CTR:
19// 1: MTCTRloop/MTCTR8loop can be deleted.
20// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21// a "cmplwi/cmpldi".
22//
23// This pass runs just before register allocation, because we don't want
24// register allocator to allocate register for DecreaseCTRloop if a CTR can be
25// generated or if a CTR loop can not be generated, we don't have any condition
26// register for the new added "cmplwi/cmpldi".
27//
28//===----------------------------------------------------------------------===//
29
30#include "PPC.h"
31#include "PPCInstrInfo.h"
32#include "PPCSubtarget.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineFunctionPass.h"
37#include "llvm/CodeGen/MachineInstr.h"
38#include "llvm/CodeGen/MachineLoopInfo.h"
39#include "llvm/CodeGen/MachineOperand.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/Register.h"
42#include "llvm/InitializePasses.h"
43#include "llvm/Pass.h"
44#include "llvm/PassRegistry.h"
45#include "llvm/Support/CodeGen.h"
46#include "llvm/Support/Debug.h"
47#include "llvm/Support/ErrorHandling.h"
48#include <cassert>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "ppc-ctrloops"
53
54STATISTIC(NumCTRLoops, "Number of CTR loops generated");
55STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
56
57namespace {
58class PPCCTRLoops : public MachineFunctionPass {
59public:
60 static char ID;
61
62 PPCCTRLoops() : MachineFunctionPass(ID) {
63 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
64 }
65
66 void getAnalysisUsage(AnalysisUsage &AU) const override {
67 AU.addRequired<MachineLoopInfoWrapperPass>();
68 MachineFunctionPass::getAnalysisUsage(AU);
69 }
70
71 bool runOnMachineFunction(MachineFunction &MF) override;
72
73private:
74 const PPCInstrInfo *TII = nullptr;
75 MachineRegisterInfo *MRI = nullptr;
76
77 bool processLoop(MachineLoop *ML);
78 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
79 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
80 MachineInstr *Dec);
81 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
82};
83} // namespace
84
85char PPCCTRLoops::ID = 0;
86
87INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
88 false, false)
89INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
90INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
91 false, false)
92
93FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
94
95bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
96 bool Changed = false;
97
98 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
99 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
100 MRI = &MF.getRegInfo();
101
102 for (auto *ML : MLI) {
103 if (ML->isOutermost())
104 Changed |= processLoop(ML);
105 }
106
107#ifndef NDEBUG
108 for (const MachineBasicBlock &BB : MF) {
109 for (const MachineInstr &I : BB)
110 assert((I.getOpcode() != PPC::DecreaseCTRloop &&
111 I.getOpcode() != PPC::DecreaseCTR8loop) &&
112 "CTR loop pseudo is not expanded!");
113 }
114#endif
115
116 return Changed;
117}
118
119bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
120 if (!CheckReads) {
121 // If we are only checking for defs, that is we are going to find
122 // definitions before MTCTRloop, for this case:
123 // CTR defination inside the callee of a call instruction will not impact
124 // the defination of MTCTRloop, so we can use definesRegister() for the
125 // check, no need to check the regmask.
126 return MI->definesRegister(Reg: PPC::CTR, /*TRI=*/nullptr) ||
127 MI->definesRegister(Reg: PPC::CTR8, /*TRI=*/nullptr);
128 }
129
130 if (MI->modifiesRegister(Reg: PPC::CTR, /*TRI=*/nullptr) ||
131 MI->modifiesRegister(Reg: PPC::CTR8, /*TRI=*/nullptr))
132 return true;
133
134 if (MI->getDesc().isCall())
135 return true;
136
137 // We define the CTR in the loop preheader, so if there is any CTR reader in
138 // the loop, we also can not use CTR loop form.
139 if (MI->readsRegister(Reg: PPC::CTR, /*TRI=*/nullptr) ||
140 MI->readsRegister(Reg: PPC::CTR8, /*TRI=*/nullptr))
141 return true;
142
143 return false;
144}
145
146bool PPCCTRLoops::processLoop(MachineLoop *ML) {
147 bool Changed = false;
148
149 // Align with HardwareLoop pass, process inner loops first.
150 for (MachineLoop *I : *ML)
151 Changed |= processLoop(ML: I);
152
153 // If any inner loop is changed, outter loop must be without hardware loop
154 // intrinsics.
155 if (Changed)
156 return true;
157
158 auto IsLoopStart = [](MachineInstr &MI) {
159 return MI.getOpcode() == PPC::MTCTRloop ||
160 MI.getOpcode() == PPC::MTCTR8loop;
161 };
162
163 auto SearchForStart =
164 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
165 for (auto &MI : *MBB) {
166 if (IsLoopStart(MI))
167 return &MI;
168 }
169 return nullptr;
170 };
171
172 MachineInstr *Start = nullptr;
173 MachineInstr *Dec = nullptr;
174 bool InvalidCTRLoop = false;
175
176 MachineBasicBlock *Preheader = ML->getLoopPreheader();
177 // If there is no preheader for this loop, there must be no MTCTRloop
178 // either.
179 if (!Preheader)
180 return false;
181
182 Start = SearchForStart(Preheader);
183 // This is not a CTR loop candidate.
184 if (!Start)
185 return false;
186
187 // If CTR is live to the preheader, we can not redefine the CTR register.
188 if (Preheader->isLiveIn(Reg: PPC::CTR) || Preheader->isLiveIn(Reg: PPC::CTR8))
189 InvalidCTRLoop = true;
190
191 // Make sure there is also no CTR clobber in the block preheader between the
192 // begin and MTCTR.
193 for (MachineBasicBlock::reverse_instr_iterator I =
194 std::next(x: Start->getReverseIterator());
195 I != Preheader->instr_rend(); ++I)
196 // Only check the definitions of CTR. If there is non-dead definition for
197 // the CTR, we conservatively don't generate a CTR loop.
198 if (isCTRClobber(MI: &*I, /* CheckReads */ false)) {
199 InvalidCTRLoop = true;
200 break;
201 }
202
203 // Make sure there is also no CTR clobber/user in the block preheader between
204 // MTCTR and the end.
205 for (MachineBasicBlock::instr_iterator I = std::next(x: Start->getIterator());
206 I != Preheader->instr_end(); ++I)
207 if (isCTRClobber(MI: &*I, /* CheckReads */ true)) {
208 InvalidCTRLoop = true;
209 break;
210 }
211
212 // Find the CTR loop components and decide whether or not to fall back to a
213 // normal loop.
214 for (auto *MBB : reverse(C: ML->getBlocks())) {
215 for (auto &MI : *MBB) {
216 if (MI.getOpcode() == PPC::DecreaseCTRloop ||
217 MI.getOpcode() == PPC::DecreaseCTR8loop)
218 Dec = &MI;
219 else if (!InvalidCTRLoop)
220 // If any instruction clobber CTR, then we can not generate a CTR loop.
221 InvalidCTRLoop |= isCTRClobber(MI: &MI, /* CheckReads */ true);
222 }
223 if (Dec && InvalidCTRLoop)
224 break;
225 }
226
227 assert(Dec && "CTR loop is not complete!");
228
229 if (InvalidCTRLoop) {
230 expandNormalLoops(ML, Start, Dec);
231 ++NumNormalLoops;
232 }
233 else {
234 expandCTRLoops(ML, Start, Dec);
235 ++NumCTRLoops;
236 }
237 return true;
238}
239
240void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
241 MachineInstr *Dec) {
242 bool Is64Bit =
243 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
244
245 MachineBasicBlock *Preheader = Start->getParent();
246 MachineBasicBlock *Exiting = Dec->getParent();
247 assert((Preheader && Exiting) &&
248 "Preheader and exiting should exist for CTR loop!");
249
250 assert(Dec->getOperand(1).getImm() == 1 &&
251 "Loop decrement stride must be 1");
252
253 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
254 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
255
256 Register PHIDef =
257 MRI->createVirtualRegister(RegClass: Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
258 : &PPC::GPRC_and_GPRC_NOR0RegClass);
259
260 Start->getParent()->getParent()->getProperties().reset(
261 P: MachineFunctionProperties::Property::NoPHIs);
262
263 // Generate "PHI" in the header block.
264 auto PHIMIB = BuildMI(BB&: *ML->getHeader(), I: ML->getHeader()->getFirstNonPHI(),
265 MIMD: DebugLoc(), MCID: TII->get(Opcode: TargetOpcode::PHI), DestReg: PHIDef);
266 PHIMIB.addReg(RegNo: Start->getOperand(i: 0).getReg()).addMBB(MBB: Preheader);
267
268 Register ADDIDef =
269 MRI->createVirtualRegister(RegClass: Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
270 : &PPC::GPRC_and_GPRC_NOR0RegClass);
271 // Generate "addi -1" in the exiting block.
272 BuildMI(BB&: *Exiting, I: Dec, MIMD: Dec->getDebugLoc(), MCID: TII->get(Opcode: ADDIOpcode), DestReg: ADDIDef)
273 .addReg(RegNo: PHIDef)
274 .addImm(Val: -1);
275
276 // Add other inputs for the PHI node.
277 if (ML->isLoopLatch(BB: Exiting)) {
278 // There must be only two predecessors for the loop header, one is the
279 // Preheader and the other one is loop latch Exiting. In hardware loop
280 // insertion pass, the block containing DecreaseCTRloop must dominate all
281 // loop latches. So there must be only one latch.
282 assert(ML->getHeader()->pred_size() == 2 &&
283 "Loop header predecessor is not right!");
284 PHIMIB.addReg(RegNo: ADDIDef).addMBB(MBB: Exiting);
285 } else {
286 // If the block containing DecreaseCTRloop is not a loop latch, we can use
287 // ADDIDef as the value for all other blocks for the PHI. In hardware loop
288 // insertion pass, the block containing DecreaseCTRloop must dominate all
289 // loop latches.
290 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
291 if (ML->contains(BB: P)) {
292 assert(ML->isLoopLatch(P) &&
293 "Loop's header in-loop predecessor is not loop latch!");
294 PHIMIB.addReg(RegNo: ADDIDef).addMBB(MBB: P);
295 } else
296 assert(P == Preheader &&
297 "CTR loop should not be generated for irreducible loop!");
298 }
299 }
300
301 // Generate the compare in the exiting block.
302 Register CMPDef = MRI->createVirtualRegister(RegClass: &PPC::CRRCRegClass);
303 auto CMPMIB =
304 BuildMI(BB&: *Exiting, I: Dec, MIMD: Dec->getDebugLoc(), MCID: TII->get(Opcode: CMPOpcode), DestReg: CMPDef)
305 .addReg(RegNo: ADDIDef)
306 .addImm(Val: 0);
307
308 BuildMI(BB&: *Exiting, I: Dec, MIMD: Dec->getDebugLoc(), MCID: TII->get(Opcode: TargetOpcode::COPY),
309 DestReg: Dec->getOperand(i: 0).getReg())
310 .addReg(RegNo: CMPMIB->getOperand(i: 0).getReg(), flags: 0, SubReg: PPC::sub_gt);
311
312 // Remove the pseudo instructions.
313 Start->eraseFromParent();
314 Dec->eraseFromParent();
315}
316
317void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
318 MachineInstr *Dec) {
319 bool Is64Bit =
320 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
321
322 MachineBasicBlock *Preheader = Start->getParent();
323 MachineBasicBlock *Exiting = Dec->getParent();
324
325 (void)Preheader;
326 assert((Preheader && Exiting) &&
327 "Preheader and exiting should exist for CTR loop!");
328
329 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
330
331 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
332 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
333 auto BrInstr = MRI->use_instr_begin(RegNo: Dec->getOperand(i: 0).getReg());
334 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
335 "There should be only one user for loop decrement pseudo!");
336
337 unsigned Opcode = 0;
338 switch (BrInstr->getOpcode()) {
339 case PPC::BC:
340 Opcode = BDNZOpcode;
341 (void) ML;
342 assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
343 "Invalid ctr loop!");
344 break;
345 case PPC::BCn:
346 Opcode = BDZOpcode;
347 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
348 "Invalid ctr loop!");
349 break;
350 default:
351 llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
352 }
353
354 // Generate "bdnz/bdz" in the exiting block just before the terminator.
355 BuildMI(BB&: *Exiting, I: &*BrInstr, MIMD: BrInstr->getDebugLoc(), MCID: TII->get(Opcode))
356 .addMBB(MBB: BrInstr->getOperand(i: 1).getMBB());
357
358 // Remove the pseudo instructions.
359 BrInstr->eraseFromParent();
360 Dec->eraseFromParent();
361}
362