1 | //===-- X86FixupInstTunings.cpp - replace instructions -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file does a tuning pass replacing slower machine instructions |
10 | // with faster ones. We do this here, as opposed to during normal ISel, as |
11 | // attempting to get the "right" instruction can break patterns. This pass |
12 | // is not meant search for special cases where an instruction can be transformed |
13 | // to another, it is only meant to do transformations where the old instruction |
14 | // is always replacable with the new instructions. For example: |
15 | // |
16 | // `vpermq ymm` -> `vshufd ymm` |
17 | // -- BAD, not always valid (lane cross/non-repeated mask) |
18 | // |
19 | // `vpermilps ymm` -> `vshufd ymm` |
20 | // -- GOOD, always replaceable |
21 | // |
22 | //===----------------------------------------------------------------------===// |
23 | |
24 | #include "X86.h" |
25 | #include "X86InstrInfo.h" |
26 | #include "X86Subtarget.h" |
27 | #include "llvm/ADT/Statistic.h" |
28 | #include "llvm/CodeGen/MachineFunctionPass.h" |
29 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
30 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
31 | |
32 | using namespace llvm; |
33 | |
34 | #define DEBUG_TYPE "x86-fixup-inst-tuning" |
35 | |
36 | STATISTIC(NumInstChanges, "Number of instructions changes" ); |
37 | |
38 | namespace { |
39 | class X86FixupInstTuningPass : public MachineFunctionPass { |
40 | public: |
41 | static char ID; |
42 | |
43 | X86FixupInstTuningPass() : MachineFunctionPass(ID) {} |
44 | |
45 | StringRef getPassName() const override { return "X86 Fixup Inst Tuning" ; } |
46 | |
47 | bool runOnMachineFunction(MachineFunction &MF) override; |
48 | bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB, |
49 | MachineBasicBlock::iterator &I); |
50 | |
51 | // This pass runs after regalloc and doesn't support VReg operands. |
52 | MachineFunctionProperties getRequiredProperties() const override { |
53 | return MachineFunctionProperties().set( |
54 | MachineFunctionProperties::Property::NoVRegs); |
55 | } |
56 | |
57 | private: |
58 | const X86InstrInfo *TII = nullptr; |
59 | const X86Subtarget *ST = nullptr; |
60 | const MCSchedModel *SM = nullptr; |
61 | }; |
62 | } // end anonymous namespace |
63 | |
64 | char X86FixupInstTuningPass::ID = 0; |
65 | |
66 | INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false) |
67 | |
68 | FunctionPass *llvm::createX86FixupInstTuning() { |
69 | return new X86FixupInstTuningPass(); |
70 | } |
71 | |
72 | template <typename T> |
73 | static std::optional<bool> CmpOptionals(T NewVal, T CurVal) { |
74 | if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal) |
75 | return *NewVal < *CurVal; |
76 | |
77 | return std::nullopt; |
78 | } |
79 | |
80 | bool X86FixupInstTuningPass::processInstruction( |
81 | MachineFunction &MF, MachineBasicBlock &MBB, |
82 | MachineBasicBlock::iterator &I) { |
83 | MachineInstr &MI = *I; |
84 | unsigned Opc = MI.getOpcode(); |
85 | unsigned NumOperands = MI.getDesc().getNumOperands(); |
86 | |
87 | auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> { |
88 | // We already checked that SchedModel exists in `NewOpcPreferable`. |
89 | return MCSchedModel::getReciprocalThroughput( |
90 | STI: *ST, SCDesc: *(SM->getSchedClassDesc(SchedClassIdx: TII->get(Opcode).getSchedClass()))); |
91 | }; |
92 | |
93 | auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> { |
94 | // We already checked that SchedModel exists in `NewOpcPreferable`. |
95 | return MCSchedModel::computeInstrLatency( |
96 | STI: *ST, SCDesc: *(SM->getSchedClassDesc(SchedClassIdx: TII->get(Opcode).getSchedClass()))); |
97 | }; |
98 | |
99 | auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> { |
100 | if (unsigned Size = TII->get(Opcode).getSize()) |
101 | return Size; |
102 | // Zero size means we where unable to compute it. |
103 | return std::nullopt; |
104 | }; |
105 | |
106 | auto NewOpcPreferable = [&](unsigned NewOpc, |
107 | bool ReplaceInTie = true) -> bool { |
108 | std::optional<bool> Res; |
109 | if (SM->hasInstrSchedModel()) { |
110 | // Compare tput -> lat -> code size. |
111 | Res = CmpOptionals(NewVal: GetInstTput(NewOpc), CurVal: GetInstTput(Opc)); |
112 | if (Res.has_value()) |
113 | return *Res; |
114 | |
115 | Res = CmpOptionals(NewVal: GetInstLat(NewOpc), CurVal: GetInstLat(Opc)); |
116 | if (Res.has_value()) |
117 | return *Res; |
118 | } |
119 | |
120 | Res = CmpOptionals(NewVal: GetInstSize(Opc), CurVal: GetInstSize(NewOpc)); |
121 | if (Res.has_value()) |
122 | return *Res; |
123 | |
124 | // We either have either were unable to get tput/lat/codesize or all values |
125 | // were equal. Return specified option for a tie. |
126 | return ReplaceInTie; |
127 | }; |
128 | |
129 | // `vpermilpd r, i` -> `vshufpd r, r, i` |
130 | // `vpermilpd r, i, k` -> `vshufpd r, r, i, k` |
131 | // `vshufpd` is always as fast or faster than `vpermilpd` and takes |
132 | // 1 less byte of code size for VEX and EVEX encoding. |
133 | auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool { |
134 | if (!NewOpcPreferable(NewOpc)) |
135 | return false; |
136 | unsigned MaskImm = MI.getOperand(i: NumOperands - 1).getImm(); |
137 | MI.removeOperand(OpNo: NumOperands - 1); |
138 | MI.addOperand(Op: MI.getOperand(i: NumOperands - 2)); |
139 | MI.setDesc(TII->get(Opcode: NewOpc)); |
140 | MI.addOperand(Op: MachineOperand::CreateImm(Val: MaskImm)); |
141 | return true; |
142 | }; |
143 | |
144 | // `vpermilps r, i` -> `vshufps r, r, i` |
145 | // `vpermilps r, i, k` -> `vshufps r, r, i, k` |
146 | // `vshufps` is always as fast or faster than `vpermilps` and takes |
147 | // 1 less byte of code size for VEX and EVEX encoding. |
148 | auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool { |
149 | if (!NewOpcPreferable(NewOpc)) |
150 | return false; |
151 | unsigned MaskImm = MI.getOperand(i: NumOperands - 1).getImm(); |
152 | MI.removeOperand(OpNo: NumOperands - 1); |
153 | MI.addOperand(Op: MI.getOperand(i: NumOperands - 2)); |
154 | MI.setDesc(TII->get(Opcode: NewOpc)); |
155 | MI.addOperand(Op: MachineOperand::CreateImm(Val: MaskImm)); |
156 | return true; |
157 | }; |
158 | |
159 | // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles. |
160 | // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less |
161 | // byte of code size. |
162 | auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool { |
163 | // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as |
164 | // `vpshufd` saves a byte of code size. |
165 | if (!ST->hasNoDomainDelayShuffle() || |
166 | !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) |
167 | return false; |
168 | MI.setDesc(TII->get(Opcode: NewOpc)); |
169 | return true; |
170 | }; |
171 | |
172 | // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00` |
173 | // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff` |
174 | // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00` |
175 | // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff` |
176 | // `vunpcklpd r, m` -> `vunpcklqdq r, m, k` |
177 | // `vunpckhpd r, m` -> `vunpckhqdq r, m, k` |
178 | // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k` |
179 | // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k` |
180 | // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd` |
181 | // -> `vunpck{l|h}qdq` |
182 | // 2) If `vshufpd` faster than `vunpck{l|h}pd` |
183 | // -> `vshufpd` |
184 | // |
185 | // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay) |
186 | auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool { |
187 | if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) |
188 | return false; |
189 | |
190 | MI.setDesc(TII->get(Opcode: NewOpc)); |
191 | MI.addOperand(Op: MachineOperand::CreateImm(Val: MaskImm)); |
192 | return true; |
193 | }; |
194 | |
195 | auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool { |
196 | // TODO it may be worth it to set ReplaceInTie to `true` as there is no real |
197 | // downside to the integer unpck, but if someone doesn't specify exact |
198 | // target we won't find it faster. |
199 | if (!ST->hasNoDomainDelayShuffle() || |
200 | !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) |
201 | return false; |
202 | MI.setDesc(TII->get(Opcode: NewOpc)); |
203 | return true; |
204 | }; |
205 | |
206 | auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain, |
207 | unsigned NewOpc) -> bool { |
208 | if (ProcessUNPCKToIntDomain(NewOpcIntDomain)) |
209 | return true; |
210 | return ProcessUNPCK(NewOpc, 0x00); |
211 | }; |
212 | auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain, |
213 | unsigned NewOpc) -> bool { |
214 | if (ProcessUNPCKToIntDomain(NewOpcIntDomain)) |
215 | return true; |
216 | return ProcessUNPCK(NewOpc, 0xff); |
217 | }; |
218 | |
219 | auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool { |
220 | return ProcessUNPCKToIntDomain(NewOpcIntDomain); |
221 | }; |
222 | |
223 | auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool { |
224 | return ProcessUNPCKToIntDomain(NewOpc); |
225 | }; |
226 | |
227 | switch (Opc) { |
228 | case X86::VPERMILPDri: |
229 | return ProcessVPERMILPDri(X86::VSHUFPDrri); |
230 | case X86::VPERMILPDYri: |
231 | return ProcessVPERMILPDri(X86::VSHUFPDYrri); |
232 | case X86::VPERMILPDZ128ri: |
233 | return ProcessVPERMILPDri(X86::VSHUFPDZ128rri); |
234 | case X86::VPERMILPDZ256ri: |
235 | return ProcessVPERMILPDri(X86::VSHUFPDZ256rri); |
236 | case X86::VPERMILPDZri: |
237 | return ProcessVPERMILPDri(X86::VSHUFPDZrri); |
238 | case X86::VPERMILPDZ128rikz: |
239 | return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz); |
240 | case X86::VPERMILPDZ256rikz: |
241 | return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz); |
242 | case X86::VPERMILPDZrikz: |
243 | return ProcessVPERMILPDri(X86::VSHUFPDZrrikz); |
244 | case X86::VPERMILPDZ128rik: |
245 | return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik); |
246 | case X86::VPERMILPDZ256rik: |
247 | return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik); |
248 | case X86::VPERMILPDZrik: |
249 | return ProcessVPERMILPDri(X86::VSHUFPDZrrik); |
250 | |
251 | case X86::VPERMILPSri: |
252 | return ProcessVPERMILPSri(X86::VSHUFPSrri); |
253 | case X86::VPERMILPSYri: |
254 | return ProcessVPERMILPSri(X86::VSHUFPSYrri); |
255 | case X86::VPERMILPSZ128ri: |
256 | return ProcessVPERMILPSri(X86::VSHUFPSZ128rri); |
257 | case X86::VPERMILPSZ256ri: |
258 | return ProcessVPERMILPSri(X86::VSHUFPSZ256rri); |
259 | case X86::VPERMILPSZri: |
260 | return ProcessVPERMILPSri(X86::VSHUFPSZrri); |
261 | case X86::VPERMILPSZ128rikz: |
262 | return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz); |
263 | case X86::VPERMILPSZ256rikz: |
264 | return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz); |
265 | case X86::VPERMILPSZrikz: |
266 | return ProcessVPERMILPSri(X86::VSHUFPSZrrikz); |
267 | case X86::VPERMILPSZ128rik: |
268 | return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik); |
269 | case X86::VPERMILPSZ256rik: |
270 | return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik); |
271 | case X86::VPERMILPSZrik: |
272 | return ProcessVPERMILPSri(X86::VSHUFPSZrrik); |
273 | case X86::VPERMILPSmi: |
274 | return ProcessVPERMILPSmi(X86::VPSHUFDmi); |
275 | case X86::VPERMILPSYmi: |
276 | // TODO: See if there is a more generic way we can test if the replacement |
277 | // instruction is supported. |
278 | return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false; |
279 | case X86::VPERMILPSZ128mi: |
280 | return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi); |
281 | case X86::VPERMILPSZ256mi: |
282 | return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi); |
283 | case X86::VPERMILPSZmi: |
284 | return ProcessVPERMILPSmi(X86::VPSHUFDZmi); |
285 | case X86::VPERMILPSZ128mikz: |
286 | return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz); |
287 | case X86::VPERMILPSZ256mikz: |
288 | return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz); |
289 | case X86::VPERMILPSZmikz: |
290 | return ProcessVPERMILPSmi(X86::VPSHUFDZmikz); |
291 | case X86::VPERMILPSZ128mik: |
292 | return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik); |
293 | case X86::VPERMILPSZ256mik: |
294 | return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik); |
295 | case X86::VPERMILPSZmik: |
296 | return ProcessVPERMILPSmi(X86::VPSHUFDZmik); |
297 | |
298 | case X86::MOVLHPSrr: |
299 | case X86::UNPCKLPDrr: |
300 | return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri); |
301 | case X86::VMOVLHPSrr: |
302 | case X86::VUNPCKLPDrr: |
303 | return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri); |
304 | case X86::VUNPCKLPDYrr: |
305 | return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri); |
306 | // VMOVLHPS is always 128 bits. |
307 | case X86::VMOVLHPSZrr: |
308 | case X86::VUNPCKLPDZ128rr: |
309 | return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri); |
310 | case X86::VUNPCKLPDZ256rr: |
311 | return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri); |
312 | case X86::VUNPCKLPDZrr: |
313 | return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri); |
314 | case X86::VUNPCKLPDZ128rrk: |
315 | return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik); |
316 | case X86::VUNPCKLPDZ256rrk: |
317 | return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik); |
318 | case X86::VUNPCKLPDZrrk: |
319 | return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik); |
320 | case X86::VUNPCKLPDZ128rrkz: |
321 | return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz); |
322 | case X86::VUNPCKLPDZ256rrkz: |
323 | return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz); |
324 | case X86::VUNPCKLPDZrrkz: |
325 | return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz); |
326 | case X86::UNPCKHPDrr: |
327 | return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri); |
328 | case X86::VUNPCKHPDrr: |
329 | return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri); |
330 | case X86::VUNPCKHPDYrr: |
331 | return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri); |
332 | case X86::VUNPCKHPDZ128rr: |
333 | return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri); |
334 | case X86::VUNPCKHPDZ256rr: |
335 | return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri); |
336 | case X86::VUNPCKHPDZrr: |
337 | return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri); |
338 | case X86::VUNPCKHPDZ128rrk: |
339 | return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik); |
340 | case X86::VUNPCKHPDZ256rrk: |
341 | return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik); |
342 | case X86::VUNPCKHPDZrrk: |
343 | return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik); |
344 | case X86::VUNPCKHPDZ128rrkz: |
345 | return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz); |
346 | case X86::VUNPCKHPDZ256rrkz: |
347 | return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz); |
348 | case X86::VUNPCKHPDZrrkz: |
349 | return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz); |
350 | case X86::UNPCKLPDrm: |
351 | return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm); |
352 | case X86::VUNPCKLPDrm: |
353 | return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm); |
354 | case X86::VUNPCKLPDYrm: |
355 | return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm); |
356 | case X86::VUNPCKLPDZ128rm: |
357 | return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm); |
358 | case X86::VUNPCKLPDZ256rm: |
359 | return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm); |
360 | case X86::VUNPCKLPDZrm: |
361 | return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm); |
362 | case X86::VUNPCKLPDZ128rmk: |
363 | return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk); |
364 | case X86::VUNPCKLPDZ256rmk: |
365 | return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk); |
366 | case X86::VUNPCKLPDZrmk: |
367 | return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk); |
368 | case X86::VUNPCKLPDZ128rmkz: |
369 | return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz); |
370 | case X86::VUNPCKLPDZ256rmkz: |
371 | return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz); |
372 | case X86::VUNPCKLPDZrmkz: |
373 | return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz); |
374 | case X86::UNPCKHPDrm: |
375 | return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm); |
376 | case X86::VUNPCKHPDrm: |
377 | return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm); |
378 | case X86::VUNPCKHPDYrm: |
379 | return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm); |
380 | case X86::VUNPCKHPDZ128rm: |
381 | return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm); |
382 | case X86::VUNPCKHPDZ256rm: |
383 | return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm); |
384 | case X86::VUNPCKHPDZrm: |
385 | return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm); |
386 | case X86::VUNPCKHPDZ128rmk: |
387 | return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk); |
388 | case X86::VUNPCKHPDZ256rmk: |
389 | return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk); |
390 | case X86::VUNPCKHPDZrmk: |
391 | return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk); |
392 | case X86::VUNPCKHPDZ128rmkz: |
393 | return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz); |
394 | case X86::VUNPCKHPDZ256rmkz: |
395 | return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz); |
396 | case X86::VUNPCKHPDZrmkz: |
397 | return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz); |
398 | |
399 | case X86::UNPCKLPSrr: |
400 | return ProcessUNPCKPS(X86::PUNPCKLDQrr); |
401 | case X86::VUNPCKLPSrr: |
402 | return ProcessUNPCKPS(X86::VPUNPCKLDQrr); |
403 | case X86::VUNPCKLPSYrr: |
404 | return ProcessUNPCKPS(X86::VPUNPCKLDQYrr); |
405 | case X86::VUNPCKLPSZ128rr: |
406 | return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr); |
407 | case X86::VUNPCKLPSZ256rr: |
408 | return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr); |
409 | case X86::VUNPCKLPSZrr: |
410 | return ProcessUNPCKPS(X86::VPUNPCKLDQZrr); |
411 | case X86::VUNPCKLPSZ128rrk: |
412 | return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk); |
413 | case X86::VUNPCKLPSZ256rrk: |
414 | return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk); |
415 | case X86::VUNPCKLPSZrrk: |
416 | return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk); |
417 | case X86::VUNPCKLPSZ128rrkz: |
418 | return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz); |
419 | case X86::VUNPCKLPSZ256rrkz: |
420 | return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz); |
421 | case X86::VUNPCKLPSZrrkz: |
422 | return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz); |
423 | case X86::UNPCKHPSrr: |
424 | return ProcessUNPCKPS(X86::PUNPCKHDQrr); |
425 | case X86::VUNPCKHPSrr: |
426 | return ProcessUNPCKPS(X86::VPUNPCKHDQrr); |
427 | case X86::VUNPCKHPSYrr: |
428 | return ProcessUNPCKPS(X86::VPUNPCKHDQYrr); |
429 | case X86::VUNPCKHPSZ128rr: |
430 | return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr); |
431 | case X86::VUNPCKHPSZ256rr: |
432 | return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr); |
433 | case X86::VUNPCKHPSZrr: |
434 | return ProcessUNPCKPS(X86::VPUNPCKHDQZrr); |
435 | case X86::VUNPCKHPSZ128rrk: |
436 | return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk); |
437 | case X86::VUNPCKHPSZ256rrk: |
438 | return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk); |
439 | case X86::VUNPCKHPSZrrk: |
440 | return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk); |
441 | case X86::VUNPCKHPSZ128rrkz: |
442 | return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz); |
443 | case X86::VUNPCKHPSZ256rrkz: |
444 | return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz); |
445 | case X86::VUNPCKHPSZrrkz: |
446 | return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz); |
447 | case X86::UNPCKLPSrm: |
448 | return ProcessUNPCKPS(X86::PUNPCKLDQrm); |
449 | case X86::VUNPCKLPSrm: |
450 | return ProcessUNPCKPS(X86::VPUNPCKLDQrm); |
451 | case X86::VUNPCKLPSYrm: |
452 | return ProcessUNPCKPS(X86::VPUNPCKLDQYrm); |
453 | case X86::VUNPCKLPSZ128rm: |
454 | return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm); |
455 | case X86::VUNPCKLPSZ256rm: |
456 | return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm); |
457 | case X86::VUNPCKLPSZrm: |
458 | return ProcessUNPCKPS(X86::VPUNPCKLDQZrm); |
459 | case X86::VUNPCKLPSZ128rmk: |
460 | return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk); |
461 | case X86::VUNPCKLPSZ256rmk: |
462 | return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk); |
463 | case X86::VUNPCKLPSZrmk: |
464 | return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk); |
465 | case X86::VUNPCKLPSZ128rmkz: |
466 | return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz); |
467 | case X86::VUNPCKLPSZ256rmkz: |
468 | return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz); |
469 | case X86::VUNPCKLPSZrmkz: |
470 | return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz); |
471 | case X86::UNPCKHPSrm: |
472 | return ProcessUNPCKPS(X86::PUNPCKHDQrm); |
473 | case X86::VUNPCKHPSrm: |
474 | return ProcessUNPCKPS(X86::VPUNPCKHDQrm); |
475 | case X86::VUNPCKHPSYrm: |
476 | return ProcessUNPCKPS(X86::VPUNPCKHDQYrm); |
477 | case X86::VUNPCKHPSZ128rm: |
478 | return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm); |
479 | case X86::VUNPCKHPSZ256rm: |
480 | return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm); |
481 | case X86::VUNPCKHPSZrm: |
482 | return ProcessUNPCKPS(X86::VPUNPCKHDQZrm); |
483 | case X86::VUNPCKHPSZ128rmk: |
484 | return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk); |
485 | case X86::VUNPCKHPSZ256rmk: |
486 | return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk); |
487 | case X86::VUNPCKHPSZrmk: |
488 | return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk); |
489 | case X86::VUNPCKHPSZ128rmkz: |
490 | return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz); |
491 | case X86::VUNPCKHPSZ256rmkz: |
492 | return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz); |
493 | case X86::VUNPCKHPSZrmkz: |
494 | return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz); |
495 | default: |
496 | return false; |
497 | } |
498 | } |
499 | |
500 | bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) { |
501 | LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n" ;); |
502 | bool Changed = false; |
503 | ST = &MF.getSubtarget<X86Subtarget>(); |
504 | TII = ST->getInstrInfo(); |
505 | SM = &ST->getSchedModel(); |
506 | |
507 | for (MachineBasicBlock &MBB : MF) { |
508 | for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { |
509 | if (processInstruction(MF, MBB, I)) { |
510 | ++NumInstChanges; |
511 | Changed = true; |
512 | } |
513 | } |
514 | } |
515 | LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n" ;); |
516 | return Changed; |
517 | } |
518 | |