1//===-- X86FixupInstTunings.cpp - replace instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file does a tuning pass replacing slower machine instructions
10// with faster ones. We do this here, as opposed to during normal ISel, as
11// attempting to get the "right" instruction can break patterns. This pass
12// is not meant search for special cases where an instruction can be transformed
13// to another, it is only meant to do transformations where the old instruction
14// is always replacable with the new instructions. For example:
15//
16// `vpermq ymm` -> `vshufd ymm`
17// -- BAD, not always valid (lane cross/non-repeated mask)
18//
19// `vpermilps ymm` -> `vshufd ymm`
20// -- GOOD, always replaceable
21//
22//===----------------------------------------------------------------------===//
23
24#include "X86.h"
25#include "X86InstrInfo.h"
26#include "X86Subtarget.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/CodeGen/MachineFunctionPass.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30
31using namespace llvm;
32
33#define DEBUG_TYPE "x86-fixup-inst-tuning"
34
35STATISTIC(NumInstChanges, "Number of instructions changes");
36
37namespace {
38class X86FixupInstTuningPass : public MachineFunctionPass {
39public:
40 static char ID;
41
42 X86FixupInstTuningPass() : MachineFunctionPass(ID) {}
43
44 StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; }
45
46 bool runOnMachineFunction(MachineFunction &MF) override;
47 bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
48 MachineBasicBlock::iterator &I);
49
50 // This pass runs after regalloc and doesn't support VReg operands.
51 MachineFunctionProperties getRequiredProperties() const override {
52 return MachineFunctionProperties().setNoVRegs();
53 }
54
55private:
56 const X86InstrInfo *TII = nullptr;
57 const X86Subtarget *ST = nullptr;
58 const MCSchedModel *SM = nullptr;
59};
60} // end anonymous namespace
61
62char X86FixupInstTuningPass::ID = 0;
63
64INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
65
66FunctionPass *llvm::createX86FixupInstTuning() {
67 return new X86FixupInstTuningPass();
68}
69
70template <typename T>
71static std::optional<bool> CmpOptionals(T NewVal, T CurVal) {
72 if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal)
73 return *NewVal < *CurVal;
74
75 return std::nullopt;
76}
77
78bool X86FixupInstTuningPass::processInstruction(
79 MachineFunction &MF, MachineBasicBlock &MBB,
80 MachineBasicBlock::iterator &I) {
81 MachineInstr &MI = *I;
82 unsigned Opc = MI.getOpcode();
83 unsigned NumOperands = MI.getDesc().getNumOperands();
84 bool OptSize = MF.getFunction().hasOptSize();
85
86 auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> {
87 // We already checked that SchedModel exists in `NewOpcPreferable`.
88 return MCSchedModel::getReciprocalThroughput(
89 STI: *ST, SCDesc: *(SM->getSchedClassDesc(SchedClassIdx: TII->get(Opcode).getSchedClass())));
90 };
91
92 auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> {
93 // We already checked that SchedModel exists in `NewOpcPreferable`.
94 return MCSchedModel::computeInstrLatency(
95 STI: *ST, SCDesc: *(SM->getSchedClassDesc(SchedClassIdx: TII->get(Opcode).getSchedClass())));
96 };
97
98 auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> {
99 if (unsigned Size = TII->get(Opcode).getSize())
100 return Size;
101 // Zero size means we where unable to compute it.
102 return std::nullopt;
103 };
104
105 auto NewOpcPreferable = [&](unsigned NewOpc,
106 bool ReplaceInTie = true) -> bool {
107 std::optional<bool> Res;
108 if (SM->hasInstrSchedModel()) {
109 // Compare tput -> lat -> code size.
110 Res = CmpOptionals(NewVal: GetInstTput(NewOpc), CurVal: GetInstTput(Opc));
111 if (Res.has_value())
112 return *Res;
113
114 Res = CmpOptionals(NewVal: GetInstLat(NewOpc), CurVal: GetInstLat(Opc));
115 if (Res.has_value())
116 return *Res;
117 }
118
119 Res = CmpOptionals(NewVal: GetInstSize(Opc), CurVal: GetInstSize(NewOpc));
120 if (Res.has_value())
121 return *Res;
122
123 // We either have either were unable to get tput/lat/codesize or all values
124 // were equal. Return specified option for a tie.
125 return ReplaceInTie;
126 };
127
128 // `vpermilpd r, i` -> `vshufpd r, r, i`
129 // `vpermilpd r, i, k` -> `vshufpd r, r, i, k`
130 // `vshufpd` is always as fast or faster than `vpermilpd` and takes
131 // 1 less byte of code size for VEX and EVEX encoding.
132 auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool {
133 if (!NewOpcPreferable(NewOpc))
134 return false;
135 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
136 {
137 unsigned MaskImm = MI.getOperand(i: NumOperands - 1).getImm();
138 MI.removeOperand(OpNo: NumOperands - 1);
139 MI.addOperand(Op: MI.getOperand(i: NumOperands - 2));
140 MI.setDesc(TII->get(Opcode: NewOpc));
141 MI.addOperand(Op: MachineOperand::CreateImm(Val: MaskImm));
142 }
143 LLVM_DEBUG(dbgs() << " With: " << MI);
144 return true;
145 };
146
147 // `vpermilps r, i` -> `vshufps r, r, i`
148 // `vpermilps r, i, k` -> `vshufps r, r, i, k`
149 // `vshufps` is always as fast or faster than `vpermilps` and takes
150 // 1 less byte of code size for VEX and EVEX encoding.
151 auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool {
152 if (!NewOpcPreferable(NewOpc))
153 return false;
154 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
155 {
156 unsigned MaskImm = MI.getOperand(i: NumOperands - 1).getImm();
157 MI.removeOperand(OpNo: NumOperands - 1);
158 MI.addOperand(Op: MI.getOperand(i: NumOperands - 2));
159 MI.setDesc(TII->get(Opcode: NewOpc));
160 MI.addOperand(Op: MachineOperand::CreateImm(Val: MaskImm));
161 }
162 LLVM_DEBUG(dbgs() << " With: " << MI);
163 return true;
164 };
165
166 // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles.
167 // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less
168 // byte of code size.
169 auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool {
170 // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as
171 // `vpshufd` saves a byte of code size.
172 if (!ST->hasNoDomainDelayShuffle() ||
173 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
174 return false;
175 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
176 {
177 MI.setDesc(TII->get(Opcode: NewOpc));
178 }
179 LLVM_DEBUG(dbgs() << " With: " << MI);
180 return true;
181 };
182
183 // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00`
184 // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff`
185 // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00`
186 // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff`
187 // `vunpcklpd r, m` -> `vunpcklqdq r, m, k`
188 // `vunpckhpd r, m` -> `vunpckhqdq r, m, k`
189 // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k`
190 // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k`
191 // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd`
192 // -> `vunpck{l|h}qdq`
193 // 2) If `vshufpd` faster than `vunpck{l|h}pd`
194 // -> `vshufpd`
195 //
196 // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay)
197 auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool {
198 if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
199 return false;
200 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
201 {
202 MI.setDesc(TII->get(Opcode: NewOpc));
203 MI.addOperand(Op: MachineOperand::CreateImm(Val: MaskImm));
204 }
205 LLVM_DEBUG(dbgs() << " With: " << MI);
206 return true;
207 };
208
209 auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool {
210 // TODO it may be worth it to set ReplaceInTie to `true` as there is no real
211 // downside to the integer unpck, but if someone doesn't specify exact
212 // target we won't find it faster.
213 if (!ST->hasNoDomainDelayShuffle() ||
214 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
215 return false;
216 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
217 {
218 MI.setDesc(TII->get(Opcode: NewOpc));
219 }
220 LLVM_DEBUG(dbgs() << " With: " << MI);
221 return true;
222 };
223
224 auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain,
225 unsigned NewOpc) -> bool {
226 if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
227 return true;
228 return ProcessUNPCK(NewOpc, 0x00);
229 };
230 auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain,
231 unsigned NewOpc) -> bool {
232 if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
233 return true;
234 return ProcessUNPCK(NewOpc, 0xff);
235 };
236
237 auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool {
238 return ProcessUNPCKToIntDomain(NewOpcIntDomain);
239 };
240
241 auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
242 return ProcessUNPCKToIntDomain(NewOpc);
243 };
244
245 auto ProcessBLENDWToBLENDD = [&](unsigned MovOpc, unsigned NumElts) -> bool {
246 if (!ST->hasAVX2() || !NewOpcPreferable(MovOpc))
247 return false;
248 // Convert to VPBLENDD if scaling the VPBLENDW mask down/up loses no bits.
249 APInt MaskW =
250 APInt(8, MI.getOperand(i: NumOperands - 1).getImm(), /*IsSigned=*/false);
251 APInt MaskD = APIntOps::ScaleBitMask(A: MaskW, NewBitWidth: 4, /*MatchAllBits=*/true);
252 if (MaskW != APIntOps::ScaleBitMask(A: MaskD, NewBitWidth: 8, /*MatchAllBits=*/true))
253 return false;
254 APInt NewMaskD = APInt::getSplat(NewLen: NumElts, V: MaskD);
255 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
256 {
257 MI.setDesc(TII->get(Opcode: MovOpc));
258 MI.removeOperand(OpNo: NumOperands - 1);
259 MI.addOperand(Op: MachineOperand::CreateImm(Val: NewMaskD.getZExtValue()));
260 }
261 LLVM_DEBUG(dbgs() << " With: " << MI);
262 return true;
263 };
264
265 auto ProcessBLENDToMOV = [&](unsigned MovOpc, unsigned Mask,
266 unsigned MovImm) -> bool {
267 if ((MI.getOperand(i: NumOperands - 1).getImm() & Mask) != MovImm)
268 return false;
269 if (!OptSize && !NewOpcPreferable(MovOpc))
270 return false;
271 LLVM_DEBUG(dbgs() << "Replacing: " << MI);
272 {
273 MI.setDesc(TII->get(Opcode: MovOpc));
274 MI.removeOperand(OpNo: NumOperands - 1);
275 }
276 LLVM_DEBUG(dbgs() << " With: " << MI);
277 return true;
278 };
279
280 switch (Opc) {
281 case X86::BLENDPDrri:
282 return ProcessBLENDToMOV(X86::MOVSDrr, 0x3, 0x1);
283 case X86::VBLENDPDrri:
284 return ProcessBLENDToMOV(X86::VMOVSDrr, 0x3, 0x1);
285
286 case X86::BLENDPSrri:
287 return ProcessBLENDToMOV(X86::MOVSSrr, 0xF, 0x1) ||
288 ProcessBLENDToMOV(X86::MOVSDrr, 0xF, 0x3);
289 case X86::VBLENDPSrri:
290 return ProcessBLENDToMOV(X86::VMOVSSrr, 0xF, 0x1) ||
291 ProcessBLENDToMOV(X86::VMOVSDrr, 0xF, 0x3);
292
293 case X86::VPBLENDWrri:
294 // TODO: Add X86::VPBLENDWrmi handling
295 // TODO: Add X86::VPBLENDWYrri handling
296 // TODO: Add X86::VPBLENDWYrmi handling
297 return ProcessBLENDWToBLENDD(X86::VPBLENDDrri, 4);
298
299 case X86::VPERMILPDri:
300 return ProcessVPERMILPDri(X86::VSHUFPDrri);
301 case X86::VPERMILPDYri:
302 return ProcessVPERMILPDri(X86::VSHUFPDYrri);
303 case X86::VPERMILPDZ128ri:
304 return ProcessVPERMILPDri(X86::VSHUFPDZ128rri);
305 case X86::VPERMILPDZ256ri:
306 return ProcessVPERMILPDri(X86::VSHUFPDZ256rri);
307 case X86::VPERMILPDZri:
308 return ProcessVPERMILPDri(X86::VSHUFPDZrri);
309 case X86::VPERMILPDZ128rikz:
310 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz);
311 case X86::VPERMILPDZ256rikz:
312 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz);
313 case X86::VPERMILPDZrikz:
314 return ProcessVPERMILPDri(X86::VSHUFPDZrrikz);
315 case X86::VPERMILPDZ128rik:
316 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik);
317 case X86::VPERMILPDZ256rik:
318 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik);
319 case X86::VPERMILPDZrik:
320 return ProcessVPERMILPDri(X86::VSHUFPDZrrik);
321
322 case X86::VPERMILPSri:
323 return ProcessVPERMILPSri(X86::VSHUFPSrri);
324 case X86::VPERMILPSYri:
325 return ProcessVPERMILPSri(X86::VSHUFPSYrri);
326 case X86::VPERMILPSZ128ri:
327 return ProcessVPERMILPSri(X86::VSHUFPSZ128rri);
328 case X86::VPERMILPSZ256ri:
329 return ProcessVPERMILPSri(X86::VSHUFPSZ256rri);
330 case X86::VPERMILPSZri:
331 return ProcessVPERMILPSri(X86::VSHUFPSZrri);
332 case X86::VPERMILPSZ128rikz:
333 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz);
334 case X86::VPERMILPSZ256rikz:
335 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz);
336 case X86::VPERMILPSZrikz:
337 return ProcessVPERMILPSri(X86::VSHUFPSZrrikz);
338 case X86::VPERMILPSZ128rik:
339 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik);
340 case X86::VPERMILPSZ256rik:
341 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik);
342 case X86::VPERMILPSZrik:
343 return ProcessVPERMILPSri(X86::VSHUFPSZrrik);
344 case X86::VPERMILPSmi:
345 return ProcessVPERMILPSmi(X86::VPSHUFDmi);
346 case X86::VPERMILPSYmi:
347 // TODO: See if there is a more generic way we can test if the replacement
348 // instruction is supported.
349 return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false;
350 case X86::VPERMILPSZ128mi:
351 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi);
352 case X86::VPERMILPSZ256mi:
353 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi);
354 case X86::VPERMILPSZmi:
355 return ProcessVPERMILPSmi(X86::VPSHUFDZmi);
356 case X86::VPERMILPSZ128mikz:
357 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz);
358 case X86::VPERMILPSZ256mikz:
359 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz);
360 case X86::VPERMILPSZmikz:
361 return ProcessVPERMILPSmi(X86::VPSHUFDZmikz);
362 case X86::VPERMILPSZ128mik:
363 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik);
364 case X86::VPERMILPSZ256mik:
365 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
366 case X86::VPERMILPSZmik:
367 return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
368
369 case X86::MOVLHPSrr:
370 case X86::UNPCKLPDrr:
371 return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
372 case X86::VMOVLHPSrr:
373 case X86::VUNPCKLPDrr:
374 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri);
375 case X86::VUNPCKLPDYrr:
376 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri);
377 // VMOVLHPS is always 128 bits.
378 case X86::VMOVLHPSZrr:
379 case X86::VUNPCKLPDZ128rr:
380 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri);
381 case X86::VUNPCKLPDZ256rr:
382 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri);
383 case X86::VUNPCKLPDZrr:
384 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri);
385 case X86::VUNPCKLPDZ128rrk:
386 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik);
387 case X86::VUNPCKLPDZ256rrk:
388 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik);
389 case X86::VUNPCKLPDZrrk:
390 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik);
391 case X86::VUNPCKLPDZ128rrkz:
392 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
393 case X86::VUNPCKLPDZ256rrkz:
394 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
395 case X86::VUNPCKLPDZrrkz:
396 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz);
397 case X86::UNPCKHPDrr:
398 return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri);
399 case X86::VUNPCKHPDrr:
400 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri);
401 case X86::VUNPCKHPDYrr:
402 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri);
403 case X86::VUNPCKHPDZ128rr:
404 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri);
405 case X86::VUNPCKHPDZ256rr:
406 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri);
407 case X86::VUNPCKHPDZrr:
408 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri);
409 case X86::VUNPCKHPDZ128rrk:
410 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik);
411 case X86::VUNPCKHPDZ256rrk:
412 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik);
413 case X86::VUNPCKHPDZrrk:
414 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik);
415 case X86::VUNPCKHPDZ128rrkz:
416 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
417 case X86::VUNPCKHPDZ256rrkz:
418 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
419 case X86::VUNPCKHPDZrrkz:
420 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz);
421 case X86::UNPCKLPDrm:
422 return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm);
423 case X86::VUNPCKLPDrm:
424 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm);
425 case X86::VUNPCKLPDYrm:
426 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm);
427 case X86::VUNPCKLPDZ128rm:
428 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm);
429 case X86::VUNPCKLPDZ256rm:
430 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm);
431 case X86::VUNPCKLPDZrm:
432 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm);
433 case X86::VUNPCKLPDZ128rmk:
434 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk);
435 case X86::VUNPCKLPDZ256rmk:
436 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk);
437 case X86::VUNPCKLPDZrmk:
438 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk);
439 case X86::VUNPCKLPDZ128rmkz:
440 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz);
441 case X86::VUNPCKLPDZ256rmkz:
442 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz);
443 case X86::VUNPCKLPDZrmkz:
444 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz);
445 case X86::UNPCKHPDrm:
446 return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm);
447 case X86::VUNPCKHPDrm:
448 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm);
449 case X86::VUNPCKHPDYrm:
450 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm);
451 case X86::VUNPCKHPDZ128rm:
452 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm);
453 case X86::VUNPCKHPDZ256rm:
454 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm);
455 case X86::VUNPCKHPDZrm:
456 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm);
457 case X86::VUNPCKHPDZ128rmk:
458 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk);
459 case X86::VUNPCKHPDZ256rmk:
460 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk);
461 case X86::VUNPCKHPDZrmk:
462 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk);
463 case X86::VUNPCKHPDZ128rmkz:
464 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz);
465 case X86::VUNPCKHPDZ256rmkz:
466 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz);
467 case X86::VUNPCKHPDZrmkz:
468 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz);
469
470 case X86::UNPCKLPSrr:
471 return ProcessUNPCKPS(X86::PUNPCKLDQrr);
472 case X86::VUNPCKLPSrr:
473 return ProcessUNPCKPS(X86::VPUNPCKLDQrr);
474 case X86::VUNPCKLPSYrr:
475 return ProcessUNPCKPS(X86::VPUNPCKLDQYrr);
476 case X86::VUNPCKLPSZ128rr:
477 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr);
478 case X86::VUNPCKLPSZ256rr:
479 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr);
480 case X86::VUNPCKLPSZrr:
481 return ProcessUNPCKPS(X86::VPUNPCKLDQZrr);
482 case X86::VUNPCKLPSZ128rrk:
483 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk);
484 case X86::VUNPCKLPSZ256rrk:
485 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk);
486 case X86::VUNPCKLPSZrrk:
487 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk);
488 case X86::VUNPCKLPSZ128rrkz:
489 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz);
490 case X86::VUNPCKLPSZ256rrkz:
491 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz);
492 case X86::VUNPCKLPSZrrkz:
493 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz);
494 case X86::UNPCKHPSrr:
495 return ProcessUNPCKPS(X86::PUNPCKHDQrr);
496 case X86::VUNPCKHPSrr:
497 return ProcessUNPCKPS(X86::VPUNPCKHDQrr);
498 case X86::VUNPCKHPSYrr:
499 return ProcessUNPCKPS(X86::VPUNPCKHDQYrr);
500 case X86::VUNPCKHPSZ128rr:
501 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr);
502 case X86::VUNPCKHPSZ256rr:
503 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr);
504 case X86::VUNPCKHPSZrr:
505 return ProcessUNPCKPS(X86::VPUNPCKHDQZrr);
506 case X86::VUNPCKHPSZ128rrk:
507 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk);
508 case X86::VUNPCKHPSZ256rrk:
509 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk);
510 case X86::VUNPCKHPSZrrk:
511 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk);
512 case X86::VUNPCKHPSZ128rrkz:
513 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz);
514 case X86::VUNPCKHPSZ256rrkz:
515 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz);
516 case X86::VUNPCKHPSZrrkz:
517 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz);
518 case X86::UNPCKLPSrm:
519 return ProcessUNPCKPS(X86::PUNPCKLDQrm);
520 case X86::VUNPCKLPSrm:
521 return ProcessUNPCKPS(X86::VPUNPCKLDQrm);
522 case X86::VUNPCKLPSYrm:
523 return ProcessUNPCKPS(X86::VPUNPCKLDQYrm);
524 case X86::VUNPCKLPSZ128rm:
525 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm);
526 case X86::VUNPCKLPSZ256rm:
527 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm);
528 case X86::VUNPCKLPSZrm:
529 return ProcessUNPCKPS(X86::VPUNPCKLDQZrm);
530 case X86::VUNPCKLPSZ128rmk:
531 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk);
532 case X86::VUNPCKLPSZ256rmk:
533 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk);
534 case X86::VUNPCKLPSZrmk:
535 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk);
536 case X86::VUNPCKLPSZ128rmkz:
537 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz);
538 case X86::VUNPCKLPSZ256rmkz:
539 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz);
540 case X86::VUNPCKLPSZrmkz:
541 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz);
542 case X86::UNPCKHPSrm:
543 return ProcessUNPCKPS(X86::PUNPCKHDQrm);
544 case X86::VUNPCKHPSrm:
545 return ProcessUNPCKPS(X86::VPUNPCKHDQrm);
546 case X86::VUNPCKHPSYrm:
547 return ProcessUNPCKPS(X86::VPUNPCKHDQYrm);
548 case X86::VUNPCKHPSZ128rm:
549 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm);
550 case X86::VUNPCKHPSZ256rm:
551 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm);
552 case X86::VUNPCKHPSZrm:
553 return ProcessUNPCKPS(X86::VPUNPCKHDQZrm);
554 case X86::VUNPCKHPSZ128rmk:
555 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk);
556 case X86::VUNPCKHPSZ256rmk:
557 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk);
558 case X86::VUNPCKHPSZrmk:
559 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk);
560 case X86::VUNPCKHPSZ128rmkz:
561 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz);
562 case X86::VUNPCKHPSZ256rmkz:
563 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);
564 case X86::VUNPCKHPSZrmkz:
565 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);
566 default:
567 return false;
568 }
569}
570
571bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) {
572 LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";);
573 bool Changed = false;
574 ST = &MF.getSubtarget<X86Subtarget>();
575 TII = ST->getInstrInfo();
576 SM = &ST->getSchedModel();
577
578 for (MachineBasicBlock &MBB : MF) {
579 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
580 if (processInstruction(MF, MBB, I)) {
581 ++NumInstChanges;
582 Changed = true;
583 }
584 }
585 }
586 LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";);
587 return Changed;
588}
589