1//===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass mutates the form of VSX FMA instructions to avoid unnecessary
10// copies.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPC.h"
15#include "PPCInstrInfo.h"
16#include "PPCTargetMachine.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/CodeGen/LiveIntervals.h"
20#include "llvm/CodeGen/MachineDominators.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineMemOperand.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/PseudoSourceValue.h"
26#include "llvm/CodeGen/ScheduleDAG.h"
27#include "llvm/CodeGen/SlotIndexes.h"
28#include "llvm/MC/TargetRegistry.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/Support/raw_ostream.h"
33
34using namespace llvm;
35
36// Temporarily disable FMA mutation by default, since it doesn't handle
37// cross-basic-block intervals well.
38// See: http://lists.llvm.org/pipermail/llvm-dev/2016-February/095669.html
39// http://reviews.llvm.org/D17087
40static cl::opt<bool> DisableVSXFMAMutate(
41 "disable-ppc-vsx-fma-mutation",
42 cl::desc("Disable VSX FMA instruction mutation"), cl::init(Val: true),
43 cl::Hidden);
44
45#define DEBUG_TYPE "ppc-vsx-fma-mutate"
46
47namespace llvm { namespace PPC {
48 int getAltVSXFMAOpcode(uint16_t Opcode);
49} }
50
51namespace {
52 // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
53 // (Altivec and scalar floating-point registers), we need to transform the
54 // copies into subregister copies with other restrictions.
55 struct PPCVSXFMAMutate : public MachineFunctionPass {
56 static char ID;
57 PPCVSXFMAMutate() : MachineFunctionPass(ID) {}
58
59 LiveIntervals *LIS;
60 const PPCInstrInfo *TII;
61
62protected:
63 bool processBlock(MachineBasicBlock &MBB) {
64 bool Changed = false;
65
66 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
67 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
68 for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
69 I != IE; ++I) {
70 MachineInstr &MI = *I;
71
72 // The default (A-type) VSX FMA form kills the addend (it is taken from
73 // the target register, which is then updated to reflect the result of
74 // the FMA). If the instruction, however, kills one of the registers
75 // used for the product, then we can use the M-form instruction (which
76 // will take that value from the to-be-defined register).
77
78 int AltOpc = PPC::getAltVSXFMAOpcode(Opcode: MI.getOpcode());
79 if (AltOpc == -1)
80 continue;
81
82 // This pass is run after register coalescing, and so we're looking for
83 // a situation like this:
84 // ...
85 // %5 = COPY %9; VSLRC:%5,%9
86 // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
87 // implicit %rm; VSLRC:%5,%17,%16
88 // ...
89 // %9<def,tied1> = XSMADDADP %9<tied0>, %17, %19,
90 // implicit %rm; VSLRC:%9,%17,%19
91 // ...
92 // Where we can eliminate the copy by changing from the A-type to the
93 // M-type instruction. Specifically, for this example, this means:
94 // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16,
95 // implicit %rm; VSLRC:%5,%17,%16
96 // is replaced by:
97 // %16<def,tied1> = XSMADDMDP %16<tied0>, %18, %9,
98 // implicit %rm; VSLRC:%16,%18,%9
99 // and we remove: %5 = COPY %9; VSLRC:%5,%9
100
101 SlotIndex FMAIdx = LIS->getInstructionIndex(Instr: MI);
102
103 VNInfo *AddendValNo =
104 LIS->getInterval(Reg: MI.getOperand(i: 1).getReg()).Query(Idx: FMAIdx).valueIn();
105
106 // This can be null if the register is undef.
107 if (!AddendValNo)
108 continue;
109
110 MachineInstr *AddendMI = LIS->getInstructionFromIndex(index: AddendValNo->def);
111
112 // The addend and this instruction must be in the same block.
113
114 if (!AddendMI || AddendMI->getParent() != MI.getParent())
115 continue;
116
117 // The addend must be a full copy within the same register class.
118
119 if (!AddendMI->isFullCopy())
120 continue;
121
122 Register AddendSrcReg = AddendMI->getOperand(i: 1).getReg();
123 if (AddendSrcReg.isVirtual()) {
124 if (MRI.getRegClass(Reg: AddendMI->getOperand(i: 0).getReg()) !=
125 MRI.getRegClass(Reg: AddendSrcReg))
126 continue;
127 } else {
128 // If AddendSrcReg is a physical register, make sure the destination
129 // register class contains it.
130 if (!MRI.getRegClass(Reg: AddendMI->getOperand(i: 0).getReg())
131 ->contains(Reg: AddendSrcReg))
132 continue;
133 }
134
135 // In theory, there could be other uses of the addend copy before this
136 // fma. We could deal with this, but that would require additional
137 // logic below and I suspect it will not occur in any relevant
138 // situations. Additionally, check whether the copy source is killed
139 // prior to the fma. In order to replace the addend here with the
140 // source of the copy, it must still be live here. We can't use
141 // interval testing for a physical register, so as long as we're
142 // walking the MIs we may as well test liveness here.
143 //
144 // FIXME: There is a case that occurs in practice, like this:
145 // %9 = COPY %f1; VSSRC:%9
146 // ...
147 // %6 = COPY %9; VSSRC:%6,%9
148 // %7 = COPY %9; VSSRC:%7,%9
149 // %9<def,tied1> = XSMADDASP %9<tied0>, %1, %4; VSSRC:
150 // %6<def,tied1> = XSMADDASP %6<tied0>, %1, %2; VSSRC:
151 // %7<def,tied1> = XSMADDASP %7<tied0>, %1, %3; VSSRC:
152 // which prevents an otherwise-profitable transformation.
153 bool OtherUsers = false, KillsAddendSrc = false;
154 for (auto J = std::prev(x: I), JE = MachineBasicBlock::iterator(AddendMI);
155 J != JE; --J) {
156 if (J->readsVirtualRegister(Reg: AddendMI->getOperand(i: 0).getReg())) {
157 OtherUsers = true;
158 break;
159 }
160 if (J->modifiesRegister(Reg: AddendSrcReg, TRI) ||
161 J->killsRegister(Reg: AddendSrcReg, TRI)) {
162 KillsAddendSrc = true;
163 break;
164 }
165 }
166
167 if (OtherUsers || KillsAddendSrc)
168 continue;
169
170
171 // The transformation doesn't work well with things like:
172 // %5 = A-form-op %5, %11, %5;
173 // unless %11 is also a kill, so skip when it is not,
174 // and check operand 3 to see it is also a kill to handle the case:
175 // %5 = A-form-op %5, %5, %11;
176 // where %5 and %11 are both kills. This case would be skipped
177 // otherwise.
178 Register OldFMAReg = MI.getOperand(i: 0).getReg();
179
180 // Find one of the product operands that is killed by this instruction.
181 unsigned KilledProdOp = 0, OtherProdOp = 0;
182 Register Reg2 = MI.getOperand(i: 2).getReg();
183 Register Reg3 = MI.getOperand(i: 3).getReg();
184 if (LIS->getInterval(Reg: Reg2).Query(Idx: FMAIdx).isKill()
185 && Reg2 != OldFMAReg) {
186 KilledProdOp = 2;
187 OtherProdOp = 3;
188 } else if (LIS->getInterval(Reg: Reg3).Query(Idx: FMAIdx).isKill()
189 && Reg3 != OldFMAReg) {
190 KilledProdOp = 3;
191 OtherProdOp = 2;
192 }
193
194 // If there are no usable killed product operands, then this
195 // transformation is likely not profitable.
196 if (!KilledProdOp)
197 continue;
198
199 // If the addend copy is used only by this MI, then the addend source
200 // register is likely not live here. This could be fixed (based on the
201 // legality checks above, the live range for the addend source register
202 // could be extended), but it seems likely that such a trivial copy can
203 // be coalesced away later, and thus is not worth the effort.
204 if (AddendSrcReg.isVirtual() &&
205 !LIS->getInterval(Reg: AddendSrcReg).liveAt(index: FMAIdx))
206 continue;
207
208 // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
209
210 Register KilledProdReg = MI.getOperand(i: KilledProdOp).getReg();
211 Register OtherProdReg = MI.getOperand(i: OtherProdOp).getReg();
212
213 unsigned AddSubReg = AddendMI->getOperand(i: 1).getSubReg();
214 unsigned KilledProdSubReg = MI.getOperand(i: KilledProdOp).getSubReg();
215 unsigned OtherProdSubReg = MI.getOperand(i: OtherProdOp).getSubReg();
216
217 bool AddRegKill = AddendMI->getOperand(i: 1).isKill();
218 bool KilledProdRegKill = MI.getOperand(i: KilledProdOp).isKill();
219 bool OtherProdRegKill = MI.getOperand(i: OtherProdOp).isKill();
220
221 bool AddRegUndef = AddendMI->getOperand(i: 1).isUndef();
222 bool KilledProdRegUndef = MI.getOperand(i: KilledProdOp).isUndef();
223 bool OtherProdRegUndef = MI.getOperand(i: OtherProdOp).isUndef();
224
225 // If there isn't a class that fits, we can't perform the transform.
226 // This is needed for correctness with a mixture of VSX and Altivec
227 // instructions to make sure that a low VSX register is not assigned to
228 // the Altivec instruction.
229 if (!MRI.constrainRegClass(Reg: KilledProdReg,
230 RC: MRI.getRegClass(Reg: OldFMAReg)))
231 continue;
232
233 assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
234 "Addend copy not tied to old FMA output!");
235
236 LLVM_DEBUG(dbgs() << "VSX FMA Mutation:\n " << MI);
237
238 MI.getOperand(i: 0).setReg(KilledProdReg);
239 MI.getOperand(i: 1).setReg(KilledProdReg);
240 MI.getOperand(i: 3).setReg(AddendSrcReg);
241
242 MI.getOperand(i: 0).setSubReg(KilledProdSubReg);
243 MI.getOperand(i: 1).setSubReg(KilledProdSubReg);
244 MI.getOperand(i: 3).setSubReg(AddSubReg);
245
246 MI.getOperand(i: 1).setIsKill(KilledProdRegKill);
247 MI.getOperand(i: 3).setIsKill(AddRegKill);
248
249 MI.getOperand(i: 1).setIsUndef(KilledProdRegUndef);
250 MI.getOperand(i: 3).setIsUndef(AddRegUndef);
251
252 MI.setDesc(TII->get(Opcode: AltOpc));
253
254 // If the addend is also a multiplicand, replace it with the addend
255 // source in both places.
256 if (OtherProdReg == AddendMI->getOperand(i: 0).getReg()) {
257 MI.getOperand(i: 2).setReg(AddendSrcReg);
258 MI.getOperand(i: 2).setSubReg(AddSubReg);
259 MI.getOperand(i: 2).setIsKill(AddRegKill);
260 MI.getOperand(i: 2).setIsUndef(AddRegUndef);
261 } else {
262 MI.getOperand(i: 2).setReg(OtherProdReg);
263 MI.getOperand(i: 2).setSubReg(OtherProdSubReg);
264 MI.getOperand(i: 2).setIsKill(OtherProdRegKill);
265 MI.getOperand(i: 2).setIsUndef(OtherProdRegUndef);
266 }
267
268 LLVM_DEBUG(dbgs() << " -> " << MI);
269
270 // The killed product operand was killed here, so we can reuse it now
271 // for the result of the fma.
272
273 LiveInterval &FMAInt = LIS->getInterval(Reg: OldFMAReg);
274 VNInfo *FMAValNo = FMAInt.getVNInfoAt(Idx: FMAIdx.getRegSlot());
275 for (auto UI = MRI.reg_nodbg_begin(RegNo: OldFMAReg), UE = MRI.reg_nodbg_end();
276 UI != UE;) {
277 MachineOperand &UseMO = *UI;
278 MachineInstr *UseMI = UseMO.getParent();
279 ++UI;
280
281 // Don't replace the result register of the copy we're about to erase.
282 if (UseMI == AddendMI)
283 continue;
284
285 UseMO.substVirtReg(Reg: KilledProdReg, SubIdx: KilledProdSubReg, *TRI);
286 }
287
288 // Recalculate the live intervals of the killed product operand.
289 LIS->removeInterval(Reg: KilledProdReg);
290 LiveInterval &NewFMAInt =
291 LIS->createAndComputeVirtRegInterval(Reg: KilledProdReg);
292
293 LLVM_DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
294 (void)NewFMAInt;
295
296 // Extend the live interval of the addend source (it might end at the
297 // copy to be removed, or somewhere in between there and here). This
298 // is necessary only if it is a physical register.
299 if (!AddendSrcReg.isVirtual())
300 for (MCRegUnit Unit : TRI->regunits(Reg: AddendSrcReg.asMCReg())) {
301 LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
302 AddendSrcRange.extendInBlock(StartIdx: LIS->getMBBStartIdx(mbb: &MBB),
303 Kill: FMAIdx.getRegSlot());
304 LLVM_DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n');
305 }
306
307 FMAInt.removeValNo(ValNo: FMAValNo);
308 LLVM_DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
309
310 // Remove the (now unused) copy.
311
312 LLVM_DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
313 LIS->RemoveMachineInstrFromMaps(MI&: *AddendMI);
314 AddendMI->eraseFromParent();
315
316 Changed = true;
317 }
318
319 return Changed;
320 }
321
322public:
323 bool runOnMachineFunction(MachineFunction &MF) override {
324 if (skipFunction(F: MF.getFunction()))
325 return false;
326
327 // If we don't have VSX then go ahead and return without doing
328 // anything.
329 const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
330 if (!STI.hasVSX())
331 return false;
332
333 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
334
335 TII = STI.getInstrInfo();
336
337 bool Changed = false;
338
339 if (DisableVSXFMAMutate)
340 return Changed;
341
342 for (MachineBasicBlock &B : llvm::make_early_inc_range(Range&: MF))
343 if (processBlock(MBB&: B))
344 Changed = true;
345
346 return Changed;
347 }
348
349 void getAnalysisUsage(AnalysisUsage &AU) const override {
350 AU.addRequired<LiveIntervalsWrapperPass>();
351 AU.addPreserved<LiveIntervalsWrapperPass>();
352 AU.addRequired<SlotIndexesWrapperPass>();
353 AU.addPreserved<SlotIndexesWrapperPass>();
354 AU.addRequired<MachineDominatorTreeWrapperPass>();
355 AU.addPreserved<MachineDominatorTreeWrapperPass>();
356 MachineFunctionPass::getAnalysisUsage(AU);
357 }
358 };
359}
360
361INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
362 "PowerPC VSX FMA Mutation", false, false)
363INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
364INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
365INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
366INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
367 "PowerPC VSX FMA Mutation", false, false)
368
369char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
370
371char PPCVSXFMAMutate::ID = 0;
372FunctionPass *llvm::createPPCVSXFMAMutatePass() {
373 return new PPCVSXFMAMutate();
374}
375