1//===-- X86FixupVectorConstants.cpp - optimize constant generation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file examines all full size vector constant pool loads and attempts to
10// replace them with smaller constant pool entries, including:
11// * Converting AVX512 memory-fold instructions to their broadcast-fold form.
12// * Using vzload scalar loads.
13// * Broadcasting of full width loads.
14// * Sign/Zero extension of full width loads.
15//
16//===----------------------------------------------------------------------===//
17
18#include "X86.h"
19#include "X86InstrFoldTables.h"
20#include "X86InstrInfo.h"
21#include "X86Subtarget.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/MachineConstantPool.h"
24
25using namespace llvm;
26
27#define DEBUG_TYPE "x86-fixup-vector-constants"
28
29STATISTIC(NumInstChanges, "Number of instructions changes");
30
31namespace {
32class X86FixupVectorConstantsImpl {
33public:
34 bool runOnMachineFunction(MachineFunction &MF);
35
36private:
37 bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
38 MachineInstr &MI);
39
40 const X86InstrInfo *TII = nullptr;
41 const X86Subtarget *ST = nullptr;
42 const MCSchedModel *SM = nullptr;
43};
44
45class X86FixupVectorConstantsLegacy : public MachineFunctionPass {
46public:
47 static char ID;
48
49 X86FixupVectorConstantsLegacy() : MachineFunctionPass(ID) {}
50
51 StringRef getPassName() const override {
52 return "X86 Fixup Vector Constants";
53 }
54
55 bool runOnMachineFunction(MachineFunction &MF) override;
56
57 // This pass runs after regalloc and doesn't support VReg operands.
58 MachineFunctionProperties getRequiredProperties() const override {
59 return MachineFunctionProperties().setNoVRegs();
60 }
61};
62} // end anonymous namespace
63
64char X86FixupVectorConstantsLegacy::ID = 0;
65
66INITIALIZE_PASS(X86FixupVectorConstantsLegacy, DEBUG_TYPE, DEBUG_TYPE, false,
67 false)
68
69FunctionPass *llvm::createX86FixupVectorConstantsLegacyPass() {
70 return new X86FixupVectorConstantsLegacy();
71}
72
73/// Normally, we only allow poison in vector splats. However, as this is part
74/// of the backend, and working with the DAG representation, which currently
75/// only natively represents undef values, we need to accept undefs here.
76static Constant *getSplatValueAllowUndef(const ConstantVector *C) {
77 Constant *Res = nullptr;
78 for (Value *Op : C->operands()) {
79 Constant *OpC = cast<Constant>(Val: Op);
80 if (isa<UndefValue>(Val: OpC))
81 continue;
82 if (!Res)
83 Res = OpC;
84 else if (Res != OpC)
85 return nullptr;
86 }
87 return Res;
88}
89
90// Attempt to extract the full width of bits data from the constant.
91static std::optional<APInt> extractConstantBits(const Constant *C) {
92 unsigned NumBits = C->getType()->getPrimitiveSizeInBits();
93
94 if (isa<UndefValue>(Val: C))
95 return APInt::getZero(numBits: NumBits);
96
97 if (auto *CInt = dyn_cast<ConstantInt>(Val: C)) {
98 if (isa<VectorType>(Val: CInt->getType()))
99 return APInt::getSplat(NewLen: NumBits, V: CInt->getValue());
100
101 return CInt->getValue();
102 }
103
104 if (auto *CFP = dyn_cast<ConstantFP>(Val: C)) {
105 if (isa<VectorType>(Val: CFP->getType()))
106 return APInt::getSplat(NewLen: NumBits, V: CFP->getValue().bitcastToAPInt());
107
108 return CFP->getValue().bitcastToAPInt();
109 }
110
111 if (auto *CV = dyn_cast<ConstantVector>(Val: C)) {
112 if (auto *CVSplat = getSplatValueAllowUndef(C: CV)) {
113 if (std::optional<APInt> Bits = extractConstantBits(C: CVSplat)) {
114 assert((NumBits % Bits->getBitWidth()) == 0 && "Illegal splat");
115 return APInt::getSplat(NewLen: NumBits, V: *Bits);
116 }
117 }
118
119 APInt Bits = APInt::getZero(numBits: NumBits);
120 for (unsigned I = 0, E = CV->getNumOperands(); I != E; ++I) {
121 Constant *Elt = CV->getOperand(i_nocapture: I);
122 std::optional<APInt> SubBits = extractConstantBits(C: Elt);
123 if (!SubBits)
124 return std::nullopt;
125 assert(NumBits == (E * SubBits->getBitWidth()) &&
126 "Illegal vector element size");
127 Bits.insertBits(SubBits: *SubBits, bitPosition: I * SubBits->getBitWidth());
128 }
129 return Bits;
130 }
131
132 if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: C)) {
133 bool IsInteger = CDS->getElementType()->isIntegerTy();
134 bool IsFloat = CDS->getElementType()->isHalfTy() ||
135 CDS->getElementType()->isBFloatTy() ||
136 CDS->getElementType()->isFloatTy() ||
137 CDS->getElementType()->isDoubleTy();
138 if (IsInteger || IsFloat) {
139 APInt Bits = APInt::getZero(numBits: NumBits);
140 unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
141 for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
142 if (IsInteger)
143 Bits.insertBits(SubBits: CDS->getElementAsAPInt(i: I), bitPosition: I * EltBits);
144 else
145 Bits.insertBits(SubBits: CDS->getElementAsAPFloat(i: I).bitcastToAPInt(),
146 bitPosition: I * EltBits);
147 }
148 return Bits;
149 }
150 }
151
152 return std::nullopt;
153}
154
155static std::optional<APInt> extractConstantBits(const Constant *C,
156 unsigned NumBits) {
157 if (std::optional<APInt> Bits = extractConstantBits(C))
158 return Bits->zextOrTrunc(width: NumBits);
159 return std::nullopt;
160}
161
162// Attempt to compute the splat width of bits data by normalizing the splat to
163// remove undefs.
164static std::optional<APInt> getSplatableConstant(const Constant *C,
165 unsigned SplatBitWidth) {
166 const Type *Ty = C->getType();
167 assert((Ty->getPrimitiveSizeInBits() % SplatBitWidth) == 0 &&
168 "Illegal splat width");
169
170 if (std::optional<APInt> Bits = extractConstantBits(C))
171 if (Bits->isSplat(SplatSizeInBits: SplatBitWidth))
172 return Bits->trunc(width: SplatBitWidth);
173
174 // Detect general splats with undefs.
175 // TODO: Do we need to handle NumEltsBits > SplatBitWidth splitting?
176 if (auto *CV = dyn_cast<ConstantVector>(Val: C)) {
177 unsigned NumOps = CV->getNumOperands();
178 unsigned NumEltsBits = Ty->getScalarSizeInBits();
179 unsigned NumScaleOps = SplatBitWidth / NumEltsBits;
180 if ((SplatBitWidth % NumEltsBits) == 0) {
181 // Collect the elements and ensure that within the repeated splat sequence
182 // they either match or are undef.
183 SmallVector<Constant *, 16> Sequence(NumScaleOps, nullptr);
184 for (unsigned Idx = 0; Idx != NumOps; ++Idx) {
185 if (Constant *Elt = CV->getAggregateElement(Elt: Idx)) {
186 if (isa<UndefValue>(Val: Elt))
187 continue;
188 unsigned SplatIdx = Idx % NumScaleOps;
189 if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) {
190 Sequence[SplatIdx] = Elt;
191 continue;
192 }
193 }
194 return std::nullopt;
195 }
196 // Extract the constant bits forming the splat and insert into the bits
197 // data, leave undef as zero.
198 APInt SplatBits = APInt::getZero(numBits: SplatBitWidth);
199 for (unsigned I = 0; I != NumScaleOps; ++I) {
200 if (!Sequence[I])
201 continue;
202 if (std::optional<APInt> Bits = extractConstantBits(C: Sequence[I])) {
203 SplatBits.insertBits(SubBits: *Bits, bitPosition: I * Bits->getBitWidth());
204 continue;
205 }
206 return std::nullopt;
207 }
208 return SplatBits;
209 }
210 }
211
212 return std::nullopt;
213}
214
215// Split raw bits into a constant vector of elements of a specific bit width.
216// NOTE: We don't always bother converting to scalars if the vector length is 1.
217static Constant *rebuildConstant(LLVMContext &Ctx, Type *SclTy,
218 const APInt &Bits, unsigned NumSclBits) {
219 unsigned BitWidth = Bits.getBitWidth();
220
221 if (NumSclBits == 8) {
222 SmallVector<uint8_t> RawBits;
223 for (unsigned I = 0; I != BitWidth; I += 8)
224 RawBits.push_back(Elt: Bits.extractBits(numBits: 8, bitPosition: I).getZExtValue());
225 return ConstantDataVector::get(Context&: Ctx, Elts: RawBits);
226 }
227
228 if (NumSclBits == 16) {
229 SmallVector<uint16_t> RawBits;
230 for (unsigned I = 0; I != BitWidth; I += 16)
231 RawBits.push_back(Elt: Bits.extractBits(numBits: 16, bitPosition: I).getZExtValue());
232 if (SclTy->is16bitFPTy())
233 return ConstantDataVector::getFP(ElementType: SclTy, Elts: RawBits);
234 return ConstantDataVector::get(Context&: Ctx, Elts: RawBits);
235 }
236
237 if (NumSclBits == 32) {
238 SmallVector<uint32_t> RawBits;
239 for (unsigned I = 0; I != BitWidth; I += 32)
240 RawBits.push_back(Elt: Bits.extractBits(numBits: 32, bitPosition: I).getZExtValue());
241 if (SclTy->isFloatTy())
242 return ConstantDataVector::getFP(ElementType: SclTy, Elts: RawBits);
243 return ConstantDataVector::get(Context&: Ctx, Elts: RawBits);
244 }
245
246 assert(NumSclBits == 64 && "Unhandled vector element width");
247
248 SmallVector<uint64_t> RawBits;
249 for (unsigned I = 0; I != BitWidth; I += 64)
250 RawBits.push_back(Elt: Bits.extractBits(numBits: 64, bitPosition: I).getZExtValue());
251 if (SclTy->isDoubleTy())
252 return ConstantDataVector::getFP(ElementType: SclTy, Elts: RawBits);
253 return ConstantDataVector::get(Context&: Ctx, Elts: RawBits);
254}
255
256// Attempt to rebuild a normalized splat vector constant of the requested splat
257// width, built up of potentially smaller scalar values.
258static Constant *rebuildSplatCst(const Constant *C, unsigned /*NumBits*/,
259 unsigned /*NumElts*/, unsigned SplatBitWidth) {
260 // TODO: Truncate to NumBits once ConvertToBroadcastAVX512 support this.
261 std::optional<APInt> Splat = getSplatableConstant(C, SplatBitWidth);
262 if (!Splat)
263 return nullptr;
264
265 // Determine scalar size to use for the constant splat vector, clamping as we
266 // might have found a splat smaller than the original constant data.
267 Type *SclTy = C->getType()->getScalarType();
268 unsigned NumSclBits = SclTy->getPrimitiveSizeInBits();
269 NumSclBits = std::min<unsigned>(a: NumSclBits, b: SplatBitWidth);
270
271 // Fallback to i64 / double.
272 NumSclBits = (NumSclBits == 8 || NumSclBits == 16 || NumSclBits == 32)
273 ? NumSclBits
274 : 64;
275
276 // Extract per-element bits.
277 return rebuildConstant(Ctx&: C->getContext(), SclTy, Bits: *Splat, NumSclBits);
278}
279
280static Constant *rebuildZeroUpperCst(const Constant *C, unsigned NumBits,
281 unsigned /*NumElts*/,
282 unsigned ScalarBitWidth) {
283 Type *SclTy = C->getType()->getScalarType();
284 unsigned NumSclBits = SclTy->getPrimitiveSizeInBits();
285 LLVMContext &Ctx = C->getContext();
286
287 if (NumBits > ScalarBitWidth) {
288 // Determine if the upper bits are all zero.
289 if (std::optional<APInt> Bits = extractConstantBits(C, NumBits)) {
290 if (Bits->countLeadingZeros() >= (NumBits - ScalarBitWidth)) {
291 // If the original constant was made of smaller elements, try to retain
292 // those types.
293 if (ScalarBitWidth > NumSclBits && (ScalarBitWidth % NumSclBits) == 0)
294 return rebuildConstant(Ctx, SclTy, Bits: *Bits, NumSclBits);
295
296 // Fallback to raw integer bits.
297 APInt RawBits = Bits->zextOrTrunc(width: ScalarBitWidth);
298 return ConstantInt::get(Context&: Ctx, V: RawBits);
299 }
300 }
301 }
302
303 return nullptr;
304}
305
306static Constant *rebuildExtCst(const Constant *C, bool IsSExt,
307 unsigned NumBits, unsigned NumElts,
308 unsigned SrcEltBitWidth) {
309 unsigned DstEltBitWidth = NumBits / NumElts;
310 assert((NumBits % NumElts) == 0 && (NumBits % SrcEltBitWidth) == 0 &&
311 (DstEltBitWidth % SrcEltBitWidth) == 0 &&
312 (DstEltBitWidth > SrcEltBitWidth) && "Illegal extension width");
313
314 if (std::optional<APInt> Bits = extractConstantBits(C, NumBits)) {
315 assert((Bits->getBitWidth() / DstEltBitWidth) == NumElts &&
316 (Bits->getBitWidth() % DstEltBitWidth) == 0 &&
317 "Unexpected constant extension");
318
319 // Ensure every vector element can be represented by the src bitwidth.
320 APInt TruncBits = APInt::getZero(numBits: NumElts * SrcEltBitWidth);
321 for (unsigned I = 0; I != NumElts; ++I) {
322 APInt Elt = Bits->extractBits(numBits: DstEltBitWidth, bitPosition: I * DstEltBitWidth);
323 if ((IsSExt && Elt.getSignificantBits() > SrcEltBitWidth) ||
324 (!IsSExt && Elt.getActiveBits() > SrcEltBitWidth))
325 return nullptr;
326 TruncBits.insertBits(SubBits: Elt.trunc(width: SrcEltBitWidth), bitPosition: I * SrcEltBitWidth);
327 }
328
329 Type *Ty = C->getType();
330 return rebuildConstant(Ctx&: Ty->getContext(), SclTy: Ty->getScalarType(), Bits: TruncBits,
331 NumSclBits: SrcEltBitWidth);
332 }
333
334 return nullptr;
335}
336static Constant *rebuildSExtCst(const Constant *C, unsigned NumBits,
337 unsigned NumElts, unsigned SrcEltBitWidth) {
338 return rebuildExtCst(C, IsSExt: true, NumBits, NumElts, SrcEltBitWidth);
339}
340static Constant *rebuildZExtCst(const Constant *C, unsigned NumBits,
341 unsigned NumElts, unsigned SrcEltBitWidth) {
342 return rebuildExtCst(C, IsSExt: false, NumBits, NumElts, SrcEltBitWidth);
343}
344
345bool X86FixupVectorConstantsImpl::processInstruction(MachineFunction &MF,
346 MachineBasicBlock &MBB,
347 MachineInstr &MI) {
348 unsigned Opc = MI.getOpcode();
349 MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool();
350 bool HasSSE2 = ST->hasSSE2();
351 bool HasSSE41 = ST->hasSSE41();
352 bool HasAVX2 = ST->hasAVX2();
353 bool HasDQI = ST->hasDQI();
354 bool HasBWI = ST->hasBWI();
355 bool HasVLX = ST->hasVLX();
356 bool MultiDomain = ST->hasAVX512() || ST->hasNoDomainDelayMov();
357 bool OptSize = MF.getFunction().hasOptSize();
358
359 struct FixupEntry {
360 int Op;
361 int NumCstElts;
362 int MemBitWidth;
363 std::function<Constant *(const Constant *, unsigned, unsigned, unsigned)>
364 RebuildConstant;
365 };
366
367 auto NewOpcPreferable = [&](const FixupEntry &Fixup,
368 unsigned RegBitWidth) -> bool {
369 if (SM->hasInstrSchedModel()) {
370 unsigned NewOpc = Fixup.Op;
371 auto *OldDesc = SM->getSchedClassDesc(SchedClassIdx: TII->get(Opcode: Opc).getSchedClass());
372 auto *NewDesc = SM->getSchedClassDesc(SchedClassIdx: TII->get(Opcode: NewOpc).getSchedClass());
373 unsigned BitsSaved = RegBitWidth - (Fixup.NumCstElts * Fixup.MemBitWidth);
374
375 // Compare tput/lat - avoid any regressions, but allow extra cycle of
376 // latency in exchange for each 128-bit (or less) constant pool reduction
377 // (this is a very simple cost:benefit estimate - there will probably be
378 // better ways to calculate this).
379 double OldTput = MCSchedModel::getReciprocalThroughput(STI: *ST, SCDesc: *OldDesc);
380 double NewTput = MCSchedModel::getReciprocalThroughput(STI: *ST, SCDesc: *NewDesc);
381 if (OldTput != NewTput)
382 return NewTput < OldTput;
383
384 int LatTol = (BitsSaved + 127) / 128;
385 int OldLat = MCSchedModel::computeInstrLatency(STI: *ST, SCDesc: *OldDesc);
386 int NewLat = MCSchedModel::computeInstrLatency(STI: *ST, SCDesc: *NewDesc);
387 if (OldLat != NewLat)
388 return NewLat < (OldLat + LatTol);
389 }
390
391 // We either were unable to get tput/lat or all values were equal.
392 // Prefer the new opcode for reduced constant pool size.
393 return true;
394 };
395
396 auto FixupConstant = [&](ArrayRef<FixupEntry> Fixups, unsigned RegBitWidth,
397 unsigned OperandNo) {
398#ifdef EXPENSIVE_CHECKS
399 assert(llvm::is_sorted(Fixups,
400 [](const FixupEntry &A, const FixupEntry &B) {
401 return (A.NumCstElts * A.MemBitWidth) <
402 (B.NumCstElts * B.MemBitWidth);
403 }) &&
404 "Constant fixup table not sorted in ascending constant size");
405#endif
406 assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) &&
407 "Unexpected number of operands!");
408 if (auto *C = X86::getConstantFromPool(MI, OpNo: OperandNo)) {
409 unsigned CstBitWidth = C->getType()->getPrimitiveSizeInBits();
410 RegBitWidth = RegBitWidth ? RegBitWidth : CstBitWidth;
411 for (const FixupEntry &Fixup : Fixups) {
412 // Always uses the smallest possible constant load with opt/minsize,
413 // otherwise use the smallest instruction that doesn't affect
414 // performance.
415 // TODO: If constant has been hoisted from loop, use smallest constant.
416 if (Fixup.Op && (OptSize || NewOpcPreferable(Fixup, RegBitWidth))) {
417 // Construct a suitable constant and adjust the MI to use the new
418 // constant pool entry.
419 if (Constant *NewCst = Fixup.RebuildConstant(
420 C, RegBitWidth, Fixup.NumCstElts, Fixup.MemBitWidth)) {
421 unsigned NewCPI =
422 CP->getConstantPoolIndex(C: NewCst, Alignment: Align(Fixup.MemBitWidth / 8));
423 MI.setDesc(TII->get(Opcode: Fixup.Op));
424 MI.getOperand(i: OperandNo + X86::AddrDisp).setIndex(NewCPI);
425 return true;
426 }
427 }
428 }
429 }
430 return false;
431 };
432
433 // Attempt to detect a suitable vzload/broadcast/vextload from increasing
434 // constant bitwidths. Prefer vzload/broadcast/vextload for same bitwidth:
435 // - vzload shouldn't ever need a shuffle port to zero the upper elements and
436 // the fp/int domain versions are equally available so we don't introduce a
437 // domain crossing penalty.
438 // - broadcast sometimes need a shuffle port (especially for 8/16-bit
439 // variants), AVX1 only has fp domain broadcasts but AVX2+ have good fp/int
440 // domain equivalents.
441 // - vextload always needs a shuffle port and is only ever int domain.
442 switch (Opc) {
443 /* FP Loads */
444 case X86::MOVAPDrm:
445 case X86::MOVAPSrm:
446 case X86::MOVUPDrm:
447 case X86::MOVUPSrm: {
448 // TODO: SSE3 MOVDDUP Handling
449 FixupEntry Fixups[] = {
450 {.Op: X86::MOVSSrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildZeroUpperCst},
451 {.Op: HasSSE2 ? X86::MOVSDrm : 0, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildZeroUpperCst}};
452 return FixupConstant(Fixups, 128, 1);
453 }
454 case X86::VMOVAPDrm:
455 case X86::VMOVAPSrm:
456 case X86::VMOVUPDrm:
457 case X86::VMOVUPSrm: {
458 FixupEntry Fixups[] = {
459 {.Op: MultiDomain ? X86::VPMOVSXBQrm : 0, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
460 {.Op: MultiDomain ? X86::VPMOVZXBQrm : 0, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
461 {.Op: X86::VMOVSSrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildZeroUpperCst},
462 {.Op: X86::VBROADCASTSSrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst},
463 {.Op: MultiDomain ? X86::VPMOVSXBDrm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
464 {.Op: MultiDomain ? X86::VPMOVZXBDrm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
465 {.Op: MultiDomain ? X86::VPMOVSXWQrm : 0, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
466 {.Op: MultiDomain ? X86::VPMOVZXWQrm : 0, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
467 {.Op: X86::VMOVSDrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildZeroUpperCst},
468 {.Op: X86::VMOVDDUPrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst},
469 {.Op: MultiDomain ? X86::VPMOVSXWDrm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
470 {.Op: MultiDomain ? X86::VPMOVZXWDrm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
471 {.Op: MultiDomain ? X86::VPMOVSXDQrm : 0, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst},
472 {.Op: MultiDomain ? X86::VPMOVZXDQrm : 0, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}};
473 return FixupConstant(Fixups, 128, 1);
474 }
475 case X86::VMOVAPDYrm:
476 case X86::VMOVAPSYrm:
477 case X86::VMOVUPDYrm:
478 case X86::VMOVUPSYrm: {
479 FixupEntry Fixups[] = {
480 {.Op: X86::VBROADCASTSSYrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst},
481 {.Op: HasAVX2 && MultiDomain ? X86::VPMOVSXBQYrm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
482 {.Op: HasAVX2 && MultiDomain ? X86::VPMOVZXBQYrm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
483 {.Op: X86::VBROADCASTSDYrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst},
484 {.Op: HasAVX2 && MultiDomain ? X86::VPMOVSXBDYrm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
485 {.Op: HasAVX2 && MultiDomain ? X86::VPMOVZXBDYrm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
486 {.Op: HasAVX2 && MultiDomain ? X86::VPMOVSXWQYrm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
487 {.Op: HasAVX2 && MultiDomain ? X86::VPMOVZXWQYrm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
488 {.Op: X86::VBROADCASTF128rm, .NumCstElts: 1, .MemBitWidth: 128, .RebuildConstant: rebuildSplatCst},
489 {.Op: HasAVX2 && MultiDomain ? X86::VPMOVSXWDYrm : 0, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
490 {.Op: HasAVX2 && MultiDomain ? X86::VPMOVZXWDYrm : 0, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
491 {.Op: HasAVX2 && MultiDomain ? X86::VPMOVSXDQYrm : 0, .NumCstElts: 4, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst},
492 {.Op: HasAVX2 && MultiDomain ? X86::VPMOVZXDQYrm : 0, .NumCstElts: 4, .MemBitWidth: 32,
493 .RebuildConstant: rebuildZExtCst}};
494 return FixupConstant(Fixups, 256, 1);
495 }
496 case X86::VMOVAPDZ128rm:
497 case X86::VMOVAPSZ128rm:
498 case X86::VMOVUPDZ128rm:
499 case X86::VMOVUPSZ128rm: {
500 FixupEntry Fixups[] = {
501 {.Op: MultiDomain ? X86::VPMOVSXBQZ128rm : 0, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
502 {.Op: MultiDomain ? X86::VPMOVZXBQZ128rm : 0, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
503 {.Op: X86::VMOVSSZrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildZeroUpperCst},
504 {.Op: X86::VBROADCASTSSZ128rm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst},
505 {.Op: MultiDomain ? X86::VPMOVSXBDZ128rm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
506 {.Op: MultiDomain ? X86::VPMOVZXBDZ128rm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
507 {.Op: MultiDomain ? X86::VPMOVSXWQZ128rm : 0, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
508 {.Op: MultiDomain ? X86::VPMOVZXWQZ128rm : 0, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
509 {.Op: X86::VMOVSDZrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildZeroUpperCst},
510 {.Op: X86::VMOVDDUPZ128rm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst},
511 {.Op: MultiDomain ? X86::VPMOVSXWDZ128rm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
512 {.Op: MultiDomain ? X86::VPMOVZXWDZ128rm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
513 {.Op: MultiDomain ? X86::VPMOVSXDQZ128rm : 0, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst},
514 {.Op: MultiDomain ? X86::VPMOVZXDQZ128rm : 0, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}};
515 return FixupConstant(Fixups, 128, 1);
516 }
517 case X86::VMOVAPDZ256rm:
518 case X86::VMOVAPSZ256rm:
519 case X86::VMOVUPDZ256rm:
520 case X86::VMOVUPSZ256rm: {
521 FixupEntry Fixups[] = {
522 {.Op: X86::VBROADCASTSSZ256rm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst},
523 {.Op: MultiDomain ? X86::VPMOVSXBQZ256rm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
524 {.Op: MultiDomain ? X86::VPMOVZXBQZ256rm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
525 {.Op: X86::VBROADCASTSDZ256rm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst},
526 {.Op: MultiDomain ? X86::VPMOVSXBDZ256rm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
527 {.Op: MultiDomain ? X86::VPMOVZXBDZ256rm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
528 {.Op: MultiDomain ? X86::VPMOVSXWQZ256rm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
529 {.Op: MultiDomain ? X86::VPMOVZXWQZ256rm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
530 {.Op: X86::VBROADCASTF32X4Z256rm, .NumCstElts: 1, .MemBitWidth: 128, .RebuildConstant: rebuildSplatCst},
531 {.Op: MultiDomain ? X86::VPMOVSXWDZ256rm : 0, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
532 {.Op: MultiDomain ? X86::VPMOVZXWDZ256rm : 0, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
533 {.Op: MultiDomain ? X86::VPMOVSXDQZ256rm : 0, .NumCstElts: 4, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst},
534 {.Op: MultiDomain ? X86::VPMOVZXDQZ256rm : 0, .NumCstElts: 4, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}};
535 return FixupConstant(Fixups, 256, 1);
536 }
537 case X86::VMOVAPDZrm:
538 case X86::VMOVAPSZrm:
539 case X86::VMOVUPDZrm:
540 case X86::VMOVUPSZrm: {
541 FixupEntry Fixups[] = {
542 {.Op: X86::VBROADCASTSSZrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst},
543 {.Op: X86::VBROADCASTSDZrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst},
544 {.Op: MultiDomain ? X86::VPMOVSXBQZrm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
545 {.Op: MultiDomain ? X86::VPMOVZXBQZrm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
546 {.Op: X86::VBROADCASTF32X4Zrm, .NumCstElts: 1, .MemBitWidth: 128, .RebuildConstant: rebuildSplatCst},
547 {.Op: MultiDomain ? X86::VPMOVSXBDZrm : 0, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
548 {.Op: MultiDomain ? X86::VPMOVZXBDZrm : 0, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
549 {.Op: MultiDomain ? X86::VPMOVSXWQZrm : 0, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
550 {.Op: MultiDomain ? X86::VPMOVZXWQZrm : 0, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
551 {.Op: X86::VBROADCASTF64X4Zrm, .NumCstElts: 1, .MemBitWidth: 256, .RebuildConstant: rebuildSplatCst},
552 {.Op: MultiDomain ? X86::VPMOVSXWDZrm : 0, .NumCstElts: 16, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
553 {.Op: MultiDomain ? X86::VPMOVZXWDZrm : 0, .NumCstElts: 16, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
554 {.Op: MultiDomain ? X86::VPMOVSXDQZrm : 0, .NumCstElts: 8, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst},
555 {.Op: MultiDomain ? X86::VPMOVZXDQZrm : 0, .NumCstElts: 8, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}};
556 return FixupConstant(Fixups, 512, 1);
557 }
558 /* Integer Loads */
559 case X86::MOVDQArm:
560 case X86::MOVDQUrm: {
561 FixupEntry Fixups[] = {
562 {.Op: HasSSE41 ? X86::PMOVSXBQrm : 0, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
563 {.Op: HasSSE41 ? X86::PMOVZXBQrm : 0, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
564 {.Op: X86::MOVDI2PDIrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildZeroUpperCst},
565 {.Op: HasSSE41 ? X86::PMOVSXBDrm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
566 {.Op: HasSSE41 ? X86::PMOVZXBDrm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
567 {.Op: HasSSE41 ? X86::PMOVSXWQrm : 0, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
568 {.Op: HasSSE41 ? X86::PMOVZXWQrm : 0, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
569 {.Op: X86::MOVQI2PQIrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildZeroUpperCst},
570 {.Op: HasSSE41 ? X86::PMOVSXBWrm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
571 {.Op: HasSSE41 ? X86::PMOVZXBWrm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
572 {.Op: HasSSE41 ? X86::PMOVSXWDrm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
573 {.Op: HasSSE41 ? X86::PMOVZXWDrm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
574 {.Op: HasSSE41 ? X86::PMOVSXDQrm : 0, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst},
575 {.Op: HasSSE41 ? X86::PMOVZXDQrm : 0, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}};
576 return FixupConstant(Fixups, 128, 1);
577 }
578 case X86::VMOVDQArm:
579 case X86::VMOVDQUrm: {
580 FixupEntry Fixups[] = {
581 {.Op: HasAVX2 ? X86::VPBROADCASTBrm : 0, .NumCstElts: 1, .MemBitWidth: 8, .RebuildConstant: rebuildSplatCst},
582 {.Op: HasAVX2 ? X86::VPBROADCASTWrm : 0, .NumCstElts: 1, .MemBitWidth: 16, .RebuildConstant: rebuildSplatCst},
583 {.Op: X86::VPMOVSXBQrm, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
584 {.Op: X86::VPMOVZXBQrm, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
585 {.Op: X86::VMOVDI2PDIrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildZeroUpperCst},
586 {.Op: HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm, .NumCstElts: 1, .MemBitWidth: 32,
587 .RebuildConstant: rebuildSplatCst},
588 {.Op: X86::VPMOVSXBDrm, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
589 {.Op: X86::VPMOVZXBDrm, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
590 {.Op: X86::VPMOVSXWQrm, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
591 {.Op: X86::VPMOVZXWQrm, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
592 {.Op: X86::VMOVQI2PQIrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildZeroUpperCst},
593 {.Op: HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm, .NumCstElts: 1, .MemBitWidth: 64,
594 .RebuildConstant: rebuildSplatCst},
595 {.Op: X86::VPMOVSXBWrm, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
596 {.Op: X86::VPMOVZXBWrm, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
597 {.Op: X86::VPMOVSXWDrm, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
598 {.Op: X86::VPMOVZXWDrm, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
599 {.Op: X86::VPMOVSXDQrm, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst},
600 {.Op: X86::VPMOVZXDQrm, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}};
601 return FixupConstant(Fixups, 128, 1);
602 }
603 case X86::VMOVDQAYrm:
604 case X86::VMOVDQUYrm: {
605 FixupEntry Fixups[] = {
606 {.Op: HasAVX2 ? X86::VPBROADCASTBYrm : 0, .NumCstElts: 1, .MemBitWidth: 8, .RebuildConstant: rebuildSplatCst},
607 {.Op: HasAVX2 ? X86::VPBROADCASTWYrm : 0, .NumCstElts: 1, .MemBitWidth: 16, .RebuildConstant: rebuildSplatCst},
608 {.Op: HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm, .NumCstElts: 1, .MemBitWidth: 32,
609 .RebuildConstant: rebuildSplatCst},
610 {.Op: HasAVX2 ? X86::VPMOVSXBQYrm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
611 {.Op: HasAVX2 ? X86::VPMOVZXBQYrm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
612 {.Op: HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm, .NumCstElts: 1, .MemBitWidth: 64,
613 .RebuildConstant: rebuildSplatCst},
614 {.Op: HasAVX2 ? X86::VPMOVSXBDYrm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
615 {.Op: HasAVX2 ? X86::VPMOVZXBDYrm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
616 {.Op: HasAVX2 ? X86::VPMOVSXWQYrm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
617 {.Op: HasAVX2 ? X86::VPMOVZXWQYrm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
618 {.Op: HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm, .NumCstElts: 1, .MemBitWidth: 128,
619 .RebuildConstant: rebuildSplatCst},
620 {.Op: HasAVX2 ? X86::VPMOVSXBWYrm : 0, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
621 {.Op: HasAVX2 ? X86::VPMOVZXBWYrm : 0, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
622 {.Op: HasAVX2 ? X86::VPMOVSXWDYrm : 0, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
623 {.Op: HasAVX2 ? X86::VPMOVZXWDYrm : 0, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
624 {.Op: HasAVX2 ? X86::VPMOVSXDQYrm : 0, .NumCstElts: 4, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst},
625 {.Op: HasAVX2 ? X86::VPMOVZXDQYrm : 0, .NumCstElts: 4, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}};
626 return FixupConstant(Fixups, 256, 1);
627 }
628 case X86::VMOVDQA32Z128rm:
629 case X86::VMOVDQA64Z128rm:
630 case X86::VMOVDQU32Z128rm:
631 case X86::VMOVDQU64Z128rm: {
632 FixupEntry Fixups[] = {
633 {.Op: HasBWI ? X86::VPBROADCASTBZ128rm : 0, .NumCstElts: 1, .MemBitWidth: 8, .RebuildConstant: rebuildSplatCst},
634 {.Op: HasBWI ? X86::VPBROADCASTWZ128rm : 0, .NumCstElts: 1, .MemBitWidth: 16, .RebuildConstant: rebuildSplatCst},
635 {.Op: X86::VPMOVSXBQZ128rm, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
636 {.Op: X86::VPMOVZXBQZ128rm, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
637 {.Op: X86::VMOVDI2PDIZrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildZeroUpperCst},
638 {.Op: X86::VPBROADCASTDZ128rm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst},
639 {.Op: X86::VPMOVSXBDZ128rm, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
640 {.Op: X86::VPMOVZXBDZ128rm, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
641 {.Op: X86::VPMOVSXWQZ128rm, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
642 {.Op: X86::VPMOVZXWQZ128rm, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
643 {.Op: X86::VMOVQI2PQIZrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildZeroUpperCst},
644 {.Op: X86::VPBROADCASTQZ128rm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst},
645 {.Op: HasBWI ? X86::VPMOVSXBWZ128rm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
646 {.Op: HasBWI ? X86::VPMOVZXBWZ128rm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
647 {.Op: X86::VPMOVSXWDZ128rm, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
648 {.Op: X86::VPMOVZXWDZ128rm, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
649 {.Op: X86::VPMOVSXDQZ128rm, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst},
650 {.Op: X86::VPMOVZXDQZ128rm, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}};
651 return FixupConstant(Fixups, 128, 1);
652 }
653 case X86::VMOVDQA32Z256rm:
654 case X86::VMOVDQA64Z256rm:
655 case X86::VMOVDQU32Z256rm:
656 case X86::VMOVDQU64Z256rm: {
657 FixupEntry Fixups[] = {
658 {.Op: HasBWI ? X86::VPBROADCASTBZ256rm : 0, .NumCstElts: 1, .MemBitWidth: 8, .RebuildConstant: rebuildSplatCst},
659 {.Op: HasBWI ? X86::VPBROADCASTWZ256rm : 0, .NumCstElts: 1, .MemBitWidth: 16, .RebuildConstant: rebuildSplatCst},
660 {.Op: X86::VPBROADCASTDZ256rm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst},
661 {.Op: X86::VPMOVSXBQZ256rm, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
662 {.Op: X86::VPMOVZXBQZ256rm, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
663 {.Op: X86::VPBROADCASTQZ256rm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst},
664 {.Op: X86::VPMOVSXBDZ256rm, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
665 {.Op: X86::VPMOVZXBDZ256rm, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
666 {.Op: X86::VPMOVSXWQZ256rm, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
667 {.Op: X86::VPMOVZXWQZ256rm, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
668 {.Op: X86::VBROADCASTI32X4Z256rm, .NumCstElts: 1, .MemBitWidth: 128, .RebuildConstant: rebuildSplatCst},
669 {.Op: HasBWI ? X86::VPMOVSXBWZ256rm : 0, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
670 {.Op: HasBWI ? X86::VPMOVZXBWZ256rm : 0, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
671 {.Op: X86::VPMOVSXWDZ256rm, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
672 {.Op: X86::VPMOVZXWDZ256rm, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
673 {.Op: X86::VPMOVSXDQZ256rm, .NumCstElts: 4, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst},
674 {.Op: X86::VPMOVZXDQZ256rm, .NumCstElts: 4, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}};
675 return FixupConstant(Fixups, 256, 1);
676 }
677 case X86::VMOVDQA32Zrm:
678 case X86::VMOVDQA64Zrm:
679 case X86::VMOVDQU32Zrm:
680 case X86::VMOVDQU64Zrm: {
681 FixupEntry Fixups[] = {
682 {.Op: HasBWI ? X86::VPBROADCASTBZrm : 0, .NumCstElts: 1, .MemBitWidth: 8, .RebuildConstant: rebuildSplatCst},
683 {.Op: HasBWI ? X86::VPBROADCASTWZrm : 0, .NumCstElts: 1, .MemBitWidth: 16, .RebuildConstant: rebuildSplatCst},
684 {.Op: X86::VPBROADCASTDZrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst},
685 {.Op: X86::VPBROADCASTQZrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst},
686 {.Op: X86::VPMOVSXBQZrm, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
687 {.Op: X86::VPMOVZXBQZrm, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
688 {.Op: X86::VBROADCASTI32X4Zrm, .NumCstElts: 1, .MemBitWidth: 128, .RebuildConstant: rebuildSplatCst},
689 {.Op: X86::VPMOVSXBDZrm, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
690 {.Op: X86::VPMOVZXBDZrm, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
691 {.Op: X86::VPMOVSXWQZrm, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
692 {.Op: X86::VPMOVZXWQZrm, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
693 {.Op: X86::VBROADCASTI64X4Zrm, .NumCstElts: 1, .MemBitWidth: 256, .RebuildConstant: rebuildSplatCst},
694 {.Op: HasBWI ? X86::VPMOVSXBWZrm : 0, .NumCstElts: 32, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst},
695 {.Op: HasBWI ? X86::VPMOVZXBWZrm : 0, .NumCstElts: 32, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst},
696 {.Op: X86::VPMOVSXWDZrm, .NumCstElts: 16, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst},
697 {.Op: X86::VPMOVZXWDZrm, .NumCstElts: 16, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst},
698 {.Op: X86::VPMOVSXDQZrm, .NumCstElts: 8, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst},
699 {.Op: X86::VPMOVZXDQZrm, .NumCstElts: 8, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}};
700 return FixupConstant(Fixups, 512, 1);
701 }
702 }
703
704 auto ConvertToBroadcast = [&](unsigned OpSrc, int BW) {
705 if (OpSrc) {
706 if (const X86FoldTableEntry *Mem2Bcst =
707 llvm::lookupBroadcastFoldTableBySize(MemOp: OpSrc, BroadcastBits: BW)) {
708 unsigned OpBcst = Mem2Bcst->DstOp;
709 unsigned OpNoBcst = Mem2Bcst->Flags & TB_INDEX_MASK;
710 FixupEntry Fixups[] = {{.Op: (int)OpBcst, .NumCstElts: 1, .MemBitWidth: BW, .RebuildConstant: rebuildSplatCst}};
711 // TODO: Add support for RegBitWidth, but currently rebuildSplatCst
712 // doesn't require it (defaults to Constant::getPrimitiveSizeInBits).
713 return FixupConstant(Fixups, 0, OpNoBcst);
714 }
715 }
716 return false;
717 };
718
719 // Attempt to find a AVX512 mapping from a full width memory-fold instruction
720 // to a broadcast-fold instruction variant.
721 if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX)
722 return ConvertToBroadcast(Opc, 32) || ConvertToBroadcast(Opc, 64);
723
724 // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
725 // conversion to see if we can convert to a broadcasted (integer) logic op.
726 if (HasVLX && !HasDQI) {
727 unsigned OpSrc32 = 0, OpSrc64 = 0;
728 switch (Opc) {
729 case X86::VANDPDrm:
730 case X86::VANDPSrm:
731 case X86::VPANDrm:
732 OpSrc32 = X86 ::VPANDDZ128rm;
733 OpSrc64 = X86 ::VPANDQZ128rm;
734 break;
735 case X86::VANDPDYrm:
736 case X86::VANDPSYrm:
737 case X86::VPANDYrm:
738 OpSrc32 = X86 ::VPANDDZ256rm;
739 OpSrc64 = X86 ::VPANDQZ256rm;
740 break;
741 case X86::VANDNPDrm:
742 case X86::VANDNPSrm:
743 case X86::VPANDNrm:
744 OpSrc32 = X86 ::VPANDNDZ128rm;
745 OpSrc64 = X86 ::VPANDNQZ128rm;
746 break;
747 case X86::VANDNPDYrm:
748 case X86::VANDNPSYrm:
749 case X86::VPANDNYrm:
750 OpSrc32 = X86 ::VPANDNDZ256rm;
751 OpSrc64 = X86 ::VPANDNQZ256rm;
752 break;
753 case X86::VORPDrm:
754 case X86::VORPSrm:
755 case X86::VPORrm:
756 OpSrc32 = X86 ::VPORDZ128rm;
757 OpSrc64 = X86 ::VPORQZ128rm;
758 break;
759 case X86::VORPDYrm:
760 case X86::VORPSYrm:
761 case X86::VPORYrm:
762 OpSrc32 = X86 ::VPORDZ256rm;
763 OpSrc64 = X86 ::VPORQZ256rm;
764 break;
765 case X86::VXORPDrm:
766 case X86::VXORPSrm:
767 case X86::VPXORrm:
768 OpSrc32 = X86 ::VPXORDZ128rm;
769 OpSrc64 = X86 ::VPXORQZ128rm;
770 break;
771 case X86::VXORPDYrm:
772 case X86::VXORPSYrm:
773 case X86::VPXORYrm:
774 OpSrc32 = X86 ::VPXORDZ256rm;
775 OpSrc64 = X86 ::VPXORQZ256rm;
776 break;
777 }
778 if (OpSrc32 || OpSrc64)
779 return ConvertToBroadcast(OpSrc32, 32) || ConvertToBroadcast(OpSrc64, 64);
780 }
781
782 return false;
783}
784
785bool X86FixupVectorConstantsImpl::runOnMachineFunction(MachineFunction &MF) {
786 LLVM_DEBUG(dbgs() << "Start X86FixupVectorConstants\n";);
787 bool Changed = false;
788 ST = &MF.getSubtarget<X86Subtarget>();
789 TII = ST->getInstrInfo();
790 SM = &ST->getSchedModel();
791
792 for (MachineBasicBlock &MBB : MF) {
793 for (MachineInstr &MI : MBB) {
794 if (processInstruction(MF, MBB, MI)) {
795 ++NumInstChanges;
796 Changed = true;
797 }
798 }
799 }
800 LLVM_DEBUG(dbgs() << "End X86FixupVectorConstants\n";);
801 return Changed;
802}
803
804bool X86FixupVectorConstantsLegacy::runOnMachineFunction(MachineFunction &MF) {
805 X86FixupVectorConstantsImpl Impl;
806 return Impl.runOnMachineFunction(MF);
807}
808
809PreservedAnalyses
810X86FixupVectorConstantsPass::run(MachineFunction &MF,
811 MachineFunctionAnalysisManager &MFAM) {
812 X86FixupVectorConstantsImpl Impl;
813 return Impl.runOnMachineFunction(MF)
814 ? getMachineFunctionPassPreservedAnalyses()
815 .preserveSet<CFGAnalyses>()
816 : PreservedAnalyses::all();
817}
818