1//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions
2// optimizer ------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10// Basic infrastructure for optimizing intermediate conversion instructions
11// generated while performing vector floating point operations.
12// Currently run at the starting of the code generation for Hexagon, cleans
13// up redundant conversion instructions and replaces the uses of conversion
14// with appropriate machine operand. Liveness is preserved after this pass.
15//
16// @note: The redundant conversion instructions are not eliminated in this pass.
17// In this pass, we are only trying to replace the uses of conversion
18// instructions with its appropriate QFP instruction. We are leaving the job to
19// Dead instruction Elimination pass to remove redundant conversion
20// instructions.
21//
22// Brief overview of working of this QFP optimizer.
23// This version of Hexagon QFP optimizer basically iterates over each
24// instruction, checks whether if it belongs to hexagon floating point HVX
25// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique
26// definition for the machine operands corresponding to the instruction.
27//
28// Example:
29// MachineInstruction *MI be the HVX vadd instruction
30// MI -> $v0 = V6_vadd_sf $v1, $v2
31// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
32// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
33//
34// In the above example, DefMI1 and DefMI2 gives the unique definitions
35// corresponding to the operands($v1 and &v2 respectively) of instruction MI.
36//
37// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32,
38// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and
39// iterates over next instruction.
40//
41// If one the definitions is conversion instruction then our pass will replace
42// the arithmetic instruction with its corresponding mix variant.
43// In the above example, if $v1 is conversion instruction
44// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
45// After Transformation:
46// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3)
47//
48// If both the definitions are conversion instructions then the instruction will
49// be replaced with its qf variant
50// In the above example, if $v1 and $v2 are conversion instructions
51// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
52// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4
53// After Transformation:
54// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced
55// with $v4)
56//
57// Currently, in this pass, we are not handling the case when the definitions
58// are PHI inst.
59//
60//===----------------------------------------------------------------------===//
61
62#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
63
64#include "Hexagon.h"
65#include "HexagonInstrInfo.h"
66#include "HexagonSubtarget.h"
67#include "llvm/ADT/SmallVector.h"
68#include "llvm/ADT/StringRef.h"
69#include "llvm/CodeGen/MachineBasicBlock.h"
70#include "llvm/CodeGen/MachineFunction.h"
71#include "llvm/CodeGen/MachineFunctionPass.h"
72#include "llvm/CodeGen/MachineInstr.h"
73#include "llvm/CodeGen/MachineOperand.h"
74#include "llvm/CodeGen/Passes.h"
75#include "llvm/Pass.h"
76#include "llvm/Support/CommandLine.h"
77#include "llvm/Support/Debug.h"
78#include "llvm/Support/raw_ostream.h"
79#include <map>
80
81#define DEBUG_TYPE "hexagon-qfp-optimizer"
82
83using namespace llvm;
84
85cl::opt<bool>
86 DisableQFOptimizer("disable-qfp-opt", cl::init(Val: false),
87 cl::desc("Disable optimization of Qfloat operations."));
88cl::opt<bool> DisableQFOptForMul(
89 "disable-qfp-opt-mul", cl::init(Val: true),
90 cl::desc("Disable optimization of Qfloat operations for multiply."));
91
92namespace {
93const std::map<unsigned short, unsigned short> QFPInstMap{
94 {Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix},
95 {Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16},
96 {Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix},
97 {Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32},
98 {Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix},
99 {Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16},
100 {Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix},
101 {Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32},
102 {Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf},
103 {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
104 {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
105 {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
106 {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32},
107 {Hexagon::V6_vilog2_sf, Hexagon::V6_vilog2_qf32},
108 {Hexagon::V6_vilog2_hf, Hexagon::V6_vilog2_qf16},
109 {Hexagon::V6_vabs_qf32_sf, Hexagon::V6_vabs_qf32_qf32},
110 {Hexagon::V6_vabs_qf16_hf, Hexagon::V6_vabs_qf16_qf16},
111 {Hexagon::V6_vneg_qf32_sf, Hexagon::V6_vneg_qf32_qf32},
112 {Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}};
113} // namespace
114
115namespace {
116struct HexagonQFPOptimizer : public MachineFunctionPass {
117public:
118 static char ID;
119
120 HexagonQFPOptimizer() : MachineFunctionPass(ID) {}
121
122 bool runOnMachineFunction(MachineFunction &MF) override;
123
124 bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
125
126 bool optimizeQfpTwoOp(MachineInstr *MI, MachineBasicBlock *MBB);
127
128 bool optimizeQfpOneOp(MachineInstr *MI, MachineBasicBlock *MBB);
129
130 StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
131
132 void getAnalysisUsage(AnalysisUsage &AU) const override {
133 AU.setPreservesCFG();
134 MachineFunctionPass::getAnalysisUsage(AU);
135 }
136
137private:
138 const HexagonSubtarget *HST = nullptr;
139 const HexagonInstrInfo *HII = nullptr;
140 const MachineRegisterInfo *MRI = nullptr;
141};
142
143char HexagonQFPOptimizer::ID = 0;
144} // namespace
145
146INITIALIZE_PASS(HexagonQFPOptimizer, "hexagon-qfp-optimizer",
147 HEXAGON_QFP_OPTIMIZER, false, false)
148
149FunctionPass *llvm::createHexagonQFPOptimizer() {
150 return new HexagonQFPOptimizer();
151}
152
153bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
154 MachineBasicBlock *MBB) {
155
156 if (MI->getNumOperands() == 2)
157 return optimizeQfpOneOp(MI, MBB);
158 else if (MI->getNumOperands() == 3)
159 return optimizeQfpTwoOp(MI, MBB);
160 else
161 return false;
162}
163
164bool HexagonQFPOptimizer::optimizeQfpOneOp(MachineInstr *MI,
165 MachineBasicBlock *MBB) {
166
167 RegState Op0F = {};
168 auto It = QFPInstMap.find(x: MI->getOpcode());
169 if (It == QFPInstMap.end())
170 return false;
171
172 unsigned short InstTy = It->second;
173 // Get the reachind defs of MI
174 MachineInstr *DefMI = MRI->getVRegDef(Reg: MI->getOperand(i: 1).getReg());
175 MachineOperand &Res = MI->getOperand(i: 0);
176 if (!Res.isReg())
177 return false;
178
179 LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI->dump());
180 MachineInstr *ReachDefDef = nullptr;
181
182 // Get the reaching def of the reaching def to check for W reg def
183 if (DefMI->getNumOperands() > 1 && DefMI->getOperand(i: 1).isReg() &&
184 DefMI->getOperand(i: 1).getReg().isVirtual())
185 ReachDefDef = MRI->getVRegDef(Reg: DefMI->getOperand(i: 1).getReg());
186 unsigned ReachDefOp = DefMI->getOpcode();
187 MachineInstrBuilder MIB;
188
189 // Check if the reaching def is a conversion
190 if (ReachDefOp == Hexagon::V6_vconv_sf_qf32 ||
191 ReachDefOp == Hexagon::V6_vconv_hf_qf16) {
192
193 // Return if the reaching def of reaching def is W type
194 if (ReachDefDef && MRI->getRegClass(Reg: ReachDefDef->getOperand(i: 0).getReg()) ==
195 &Hexagon::HvxWRRegClass)
196 return false;
197
198 // Analyze the use operands of the conversion to get their KILL status
199 MachineOperand &SrcOp = DefMI->getOperand(i: 1);
200 Op0F = getKillRegState(B: SrcOp.isKill());
201 SrcOp.setIsKill(false);
202 MIB = BuildMI(BB&: *MBB, I: MI, MIMD: MI->getDebugLoc(), MCID: HII->get(Opcode: InstTy), DestReg: Res.getReg())
203 .addReg(RegNo: SrcOp.getReg(), Flags: Op0F, SubReg: SrcOp.getSubReg());
204 LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
205 return true;
206 }
207 return false;
208}
209
210bool HexagonQFPOptimizer::optimizeQfpTwoOp(MachineInstr *MI,
211 MachineBasicBlock *MBB) {
212
213 RegState Op0F = {};
214 RegState Op1F = {};
215 auto It = QFPInstMap.find(x: MI->getOpcode());
216 if (It == QFPInstMap.end())
217 return false;
218 unsigned short InstTy = It->second;
219 // Get the reaching defs of MI, DefMI1 and DefMI2
220 MachineInstr *DefMI1 = nullptr;
221 MachineInstr *DefMI2 = nullptr;
222
223 if (MI->getOperand(i: 1).isReg())
224 DefMI1 = MRI->getVRegDef(Reg: MI->getOperand(i: 1).getReg());
225 if (MI->getOperand(i: 2).isReg())
226 DefMI2 = MRI->getVRegDef(Reg: MI->getOperand(i: 2).getReg());
227 if (!DefMI1 || !DefMI2)
228 return false;
229
230 MachineOperand &Res = MI->getOperand(i: 0);
231 if (!Res.isReg())
232 return false;
233
234 MachineInstr *Inst1 = nullptr;
235 MachineInstr *Inst2 = nullptr;
236 LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
237 DefMI2->dump());
238
239 // Get the reaching defs of DefMI
240 if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(i: 1).isReg() &&
241 DefMI1->getOperand(i: 1).getReg().isVirtual())
242 Inst1 = MRI->getVRegDef(Reg: DefMI1->getOperand(i: 1).getReg());
243
244 if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(i: 1).isReg() &&
245 DefMI2->getOperand(i: 1).getReg().isVirtual())
246 Inst2 = MRI->getVRegDef(Reg: DefMI2->getOperand(i: 1).getReg());
247
248 unsigned Def1OP = DefMI1->getOpcode();
249 unsigned Def2OP = DefMI2->getOpcode();
250
251 MachineInstrBuilder MIB;
252
253 // Check if the both the reaching defs of MI are qf to sf/hf conversions
254 if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
255 Def2OP == Hexagon::V6_vconv_sf_qf32) ||
256 (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
257 Def2OP == Hexagon::V6_vconv_hf_qf16)) {
258
259 // If the reaching defs of DefMI are W register type, we return
260 if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(i: 0).isReg() &&
261 MRI->getRegClass(Reg: Inst1->getOperand(i: 0).getReg()) ==
262 &Hexagon::HvxWRRegClass) ||
263 (Inst2 && Inst2->getNumOperands() > 0 && Inst2->getOperand(i: 0).isReg() &&
264 MRI->getRegClass(Reg: Inst2->getOperand(i: 0).getReg()) ==
265 &Hexagon::HvxWRRegClass))
266 return false;
267
268 // Analyze the use operands of the conversion to get their KILL status
269 MachineOperand &Src1 = DefMI1->getOperand(i: 1);
270 MachineOperand &Src2 = DefMI2->getOperand(i: 1);
271
272 Op0F = getKillRegState(B: Src1.isKill());
273 Src1.setIsKill(false);
274
275 Op1F = getKillRegState(B: Src2.isKill());
276 Src2.setIsKill(false);
277
278 if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) {
279 auto OuterIt = QFPInstMap.find(x: MI->getOpcode());
280 if (OuterIt == QFPInstMap.end())
281 return false;
282 auto InnerIt = QFPInstMap.find(x: OuterIt->second);
283 if (InnerIt == QFPInstMap.end())
284 return false;
285 InstTy = InnerIt->second;
286 }
287
288 MIB = BuildMI(BB&: *MBB, I: MI, MIMD: MI->getDebugLoc(), MCID: HII->get(Opcode: InstTy), DestReg: Res.getReg())
289 .addReg(RegNo: Src1.getReg(), Flags: Op0F, SubReg: Src1.getSubReg())
290 .addReg(RegNo: Src2.getReg(), Flags: Op1F, SubReg: Src2.getSubReg());
291 LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
292 return true;
293
294 // Check if left operand's reaching def is a conversion to sf/hf
295 } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
296 Def2OP != Hexagon::V6_vconv_sf_qf32) ||
297 (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
298 Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
299 !DefMI2->isPHI() &&
300 (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
301
302 if (Inst1 && MRI->getRegClass(Reg: Inst1->getOperand(i: 0).getReg()) ==
303 &Hexagon::HvxWRRegClass)
304 return false;
305
306 MachineOperand &Src1 = DefMI1->getOperand(i: 1);
307 MachineOperand &Src2 = MI->getOperand(i: 2);
308
309 Op0F = getKillRegState(B: Src1.isKill());
310 Src1.setIsKill(false);
311 Op1F = getKillRegState(B: Src2.isKill());
312 MIB = BuildMI(BB&: *MBB, I: MI, MIMD: MI->getDebugLoc(), MCID: HII->get(Opcode: InstTy), DestReg: Res.getReg())
313 .addReg(RegNo: Src1.getReg(), Flags: Op0F, SubReg: Src1.getSubReg())
314 .addReg(RegNo: Src2.getReg(), Flags: Op1F, SubReg: Src2.getSubReg());
315 LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
316 return true;
317
318 // Check if right operand's reaching def is a conversion to sf/hf
319 } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
320 Def2OP == Hexagon::V6_vconv_sf_qf32) ||
321 (Def1OP != Hexagon::V6_vconv_hf_qf16 &&
322 Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
323 !DefMI1->isPHI() &&
324 (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
325 // The second operand of original instruction is converted.
326 if (Inst2 && MRI->getRegClass(Reg: Inst2->getOperand(i: 0).getReg()) ==
327 &Hexagon::HvxWRRegClass)
328 return false;
329
330 MachineOperand &Src1 = MI->getOperand(i: 1);
331 MachineOperand &Src2 = DefMI2->getOperand(i: 1);
332
333 Op1F = getKillRegState(B: Src2.isKill());
334 Src2.setIsKill(false);
335 Op0F = getKillRegState(B: Src1.isKill());
336 if (InstTy == Hexagon::V6_vsub_qf16_mix ||
337 InstTy == Hexagon::V6_vsub_qf32_mix) {
338 if (!HST->useHVXV81Ops())
339 // vsub_(hf|sf)_mix insts are only avlbl on hvx81+
340 return false;
341 // vsub is not commutative w.r.t. operands -> treat it as a special case
342 // to choose the correct mix instruction.
343 if (Def2OP == Hexagon::V6_vconv_sf_qf32)
344 InstTy = Hexagon::V6_vsub_sf_mix;
345 else if (Def2OP == Hexagon::V6_vconv_hf_qf16)
346 InstTy = Hexagon::V6_vsub_hf_mix;
347 MIB = BuildMI(BB&: *MBB, I: MI, MIMD: MI->getDebugLoc(), MCID: HII->get(Opcode: InstTy), DestReg: Res.getReg())
348 .addReg(RegNo: Src1.getReg(), Flags: Op0F, SubReg: Src1.getSubReg())
349 .addReg(RegNo: Src2.getReg(), Flags: Op1F, SubReg: Src2.getSubReg());
350 } else {
351 MIB = BuildMI(BB&: *MBB, I: MI, MIMD: MI->getDebugLoc(), MCID: HII->get(Opcode: InstTy), DestReg: Res.getReg())
352 .addReg(RegNo: Src2.getReg(), Flags: Op1F,
353 SubReg: Src2.getSubReg()) // Notice the operands are flipped.
354 .addReg(RegNo: Src1.getReg(), Flags: Op0F, SubReg: Src1.getSubReg());
355 }
356 LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
357 return true;
358 }
359
360 return false;
361}
362
363bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
364
365 bool Changed = false;
366
367 if (DisableQFOptimizer)
368 return Changed;
369
370 HST = &MF.getSubtarget<HexagonSubtarget>();
371 if (!HST->useHVXV68Ops() || !HST->usePackets() ||
372 skipFunction(F: MF.getFunction()))
373 return false;
374 HII = HST->getInstrInfo();
375 MRI = &MF.getRegInfo();
376
377 MachineFunction::iterator MBBI = MF.begin();
378 LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName()
379 << " Optimize intermediate conversions ===\n");
380 while (MBBI != MF.end()) {
381 MachineBasicBlock *MBB = &*MBBI;
382 MachineBasicBlock::iterator MII = MBBI->instr_begin();
383 while (MII != MBBI->instr_end()) {
384 MachineInstr *MI = &*MII;
385 ++MII; // As MI might be removed.
386 if (QFPInstMap.count(x: MI->getOpcode())) {
387 auto OpC = MI->getOpcode();
388 if (DisableQFOptForMul && HII->isQFPMul(MF: MI))
389 continue;
390 if (OpC != Hexagon::V6_vconv_sf_qf32 &&
391 OpC != Hexagon::V6_vconv_hf_qf16) {
392 LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
393 if (optimizeQfp(MI, MBB)) {
394 MI->eraseFromParent();
395 LLVM_DEBUG(dbgs() << "\t....Removing....");
396 Changed = true;
397 }
398 }
399 }
400 }
401 ++MBBI;
402 }
403 return Changed;
404}
405