1 | //===-- X86FixupVectorConstants.cpp - optimize constant generation -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file examines all full size vector constant pool loads and attempts to |
10 | // replace them with smaller constant pool entries, including: |
11 | // * Converting AVX512 memory-fold instructions to their broadcast-fold form. |
12 | // * Using vzload scalar loads. |
13 | // * Broadcasting of full width loads. |
14 | // * Sign/Zero extension of full width loads. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #include "X86.h" |
19 | #include "X86InstrFoldTables.h" |
20 | #include "X86InstrInfo.h" |
21 | #include "X86Subtarget.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/CodeGen/MachineConstantPool.h" |
24 | |
25 | using namespace llvm; |
26 | |
27 | #define DEBUG_TYPE "x86-fixup-vector-constants" |
28 | |
29 | STATISTIC(NumInstChanges, "Number of instructions changes" ); |
30 | |
31 | namespace { |
32 | class X86FixupVectorConstantsPass : public MachineFunctionPass { |
33 | public: |
34 | static char ID; |
35 | |
36 | X86FixupVectorConstantsPass() : MachineFunctionPass(ID) {} |
37 | |
38 | StringRef getPassName() const override { |
39 | return "X86 Fixup Vector Constants" ; |
40 | } |
41 | |
42 | bool runOnMachineFunction(MachineFunction &MF) override; |
43 | bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB, |
44 | MachineInstr &MI); |
45 | |
46 | // This pass runs after regalloc and doesn't support VReg operands. |
47 | MachineFunctionProperties getRequiredProperties() const override { |
48 | return MachineFunctionProperties().set( |
49 | MachineFunctionProperties::Property::NoVRegs); |
50 | } |
51 | |
52 | private: |
53 | const X86InstrInfo *TII = nullptr; |
54 | const X86Subtarget *ST = nullptr; |
55 | const MCSchedModel *SM = nullptr; |
56 | }; |
57 | } // end anonymous namespace |
58 | |
59 | char X86FixupVectorConstantsPass::ID = 0; |
60 | |
61 | INITIALIZE_PASS(X86FixupVectorConstantsPass, DEBUG_TYPE, DEBUG_TYPE, false, false) |
62 | |
63 | FunctionPass *llvm::createX86FixupVectorConstants() { |
64 | return new X86FixupVectorConstantsPass(); |
65 | } |
66 | |
67 | /// Normally, we only allow poison in vector splats. However, as this is part |
68 | /// of the backend, and working with the DAG representation, which currently |
69 | /// only natively represents undef values, we need to accept undefs here. |
70 | static Constant *getSplatValueAllowUndef(const ConstantVector *C) { |
71 | Constant *Res = nullptr; |
72 | for (Value *Op : C->operands()) { |
73 | Constant *OpC = cast<Constant>(Val: Op); |
74 | if (isa<UndefValue>(Val: OpC)) |
75 | continue; |
76 | if (!Res) |
77 | Res = OpC; |
78 | else if (Res != OpC) |
79 | return nullptr; |
80 | } |
81 | return Res; |
82 | } |
83 | |
84 | // Attempt to extract the full width of bits data from the constant. |
85 | static std::optional<APInt> (const Constant *C) { |
86 | unsigned NumBits = C->getType()->getPrimitiveSizeInBits(); |
87 | |
88 | if (isa<UndefValue>(Val: C)) |
89 | return APInt::getZero(numBits: NumBits); |
90 | |
91 | if (auto *CInt = dyn_cast<ConstantInt>(Val: C)) |
92 | return CInt->getValue(); |
93 | |
94 | if (auto *CFP = dyn_cast<ConstantFP>(Val: C)) |
95 | return CFP->getValue().bitcastToAPInt(); |
96 | |
97 | if (auto *CV = dyn_cast<ConstantVector>(Val: C)) { |
98 | if (auto *CVSplat = getSplatValueAllowUndef(C: CV)) { |
99 | if (std::optional<APInt> Bits = extractConstantBits(C: CVSplat)) { |
100 | assert((NumBits % Bits->getBitWidth()) == 0 && "Illegal splat" ); |
101 | return APInt::getSplat(NewLen: NumBits, V: *Bits); |
102 | } |
103 | } |
104 | |
105 | APInt Bits = APInt::getZero(numBits: NumBits); |
106 | for (unsigned I = 0, E = CV->getNumOperands(); I != E; ++I) { |
107 | Constant *Elt = CV->getOperand(i_nocapture: I); |
108 | std::optional<APInt> SubBits = extractConstantBits(C: Elt); |
109 | if (!SubBits) |
110 | return std::nullopt; |
111 | assert(NumBits == (E * SubBits->getBitWidth()) && |
112 | "Illegal vector element size" ); |
113 | Bits.insertBits(SubBits: *SubBits, bitPosition: I * SubBits->getBitWidth()); |
114 | } |
115 | return Bits; |
116 | } |
117 | |
118 | if (auto *CDS = dyn_cast<ConstantDataSequential>(Val: C)) { |
119 | bool IsInteger = CDS->getElementType()->isIntegerTy(); |
120 | bool IsFloat = CDS->getElementType()->isHalfTy() || |
121 | CDS->getElementType()->isBFloatTy() || |
122 | CDS->getElementType()->isFloatTy() || |
123 | CDS->getElementType()->isDoubleTy(); |
124 | if (IsInteger || IsFloat) { |
125 | APInt Bits = APInt::getZero(numBits: NumBits); |
126 | unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits(); |
127 | for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { |
128 | if (IsInteger) |
129 | Bits.insertBits(SubBits: CDS->getElementAsAPInt(i: I), bitPosition: I * EltBits); |
130 | else |
131 | Bits.insertBits(SubBits: CDS->getElementAsAPFloat(i: I).bitcastToAPInt(), |
132 | bitPosition: I * EltBits); |
133 | } |
134 | return Bits; |
135 | } |
136 | } |
137 | |
138 | return std::nullopt; |
139 | } |
140 | |
141 | static std::optional<APInt> (const Constant *C, |
142 | unsigned NumBits) { |
143 | if (std::optional<APInt> Bits = extractConstantBits(C)) |
144 | return Bits->zextOrTrunc(width: NumBits); |
145 | return std::nullopt; |
146 | } |
147 | |
148 | // Attempt to compute the splat width of bits data by normalizing the splat to |
149 | // remove undefs. |
150 | static std::optional<APInt> getSplatableConstant(const Constant *C, |
151 | unsigned SplatBitWidth) { |
152 | const Type *Ty = C->getType(); |
153 | assert((Ty->getPrimitiveSizeInBits() % SplatBitWidth) == 0 && |
154 | "Illegal splat width" ); |
155 | |
156 | if (std::optional<APInt> Bits = extractConstantBits(C)) |
157 | if (Bits->isSplat(SplatSizeInBits: SplatBitWidth)) |
158 | return Bits->trunc(width: SplatBitWidth); |
159 | |
160 | // Detect general splats with undefs. |
161 | // TODO: Do we need to handle NumEltsBits > SplatBitWidth splitting? |
162 | if (auto *CV = dyn_cast<ConstantVector>(Val: C)) { |
163 | unsigned NumOps = CV->getNumOperands(); |
164 | unsigned NumEltsBits = Ty->getScalarSizeInBits(); |
165 | unsigned NumScaleOps = SplatBitWidth / NumEltsBits; |
166 | if ((SplatBitWidth % NumEltsBits) == 0) { |
167 | // Collect the elements and ensure that within the repeated splat sequence |
168 | // they either match or are undef. |
169 | SmallVector<Constant *, 16> Sequence(NumScaleOps, nullptr); |
170 | for (unsigned Idx = 0; Idx != NumOps; ++Idx) { |
171 | if (Constant *Elt = CV->getAggregateElement(Elt: Idx)) { |
172 | if (isa<UndefValue>(Val: Elt)) |
173 | continue; |
174 | unsigned SplatIdx = Idx % NumScaleOps; |
175 | if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) { |
176 | Sequence[SplatIdx] = Elt; |
177 | continue; |
178 | } |
179 | } |
180 | return std::nullopt; |
181 | } |
182 | // Extract the constant bits forming the splat and insert into the bits |
183 | // data, leave undef as zero. |
184 | APInt SplatBits = APInt::getZero(numBits: SplatBitWidth); |
185 | for (unsigned I = 0; I != NumScaleOps; ++I) { |
186 | if (!Sequence[I]) |
187 | continue; |
188 | if (std::optional<APInt> Bits = extractConstantBits(C: Sequence[I])) { |
189 | SplatBits.insertBits(SubBits: *Bits, bitPosition: I * Bits->getBitWidth()); |
190 | continue; |
191 | } |
192 | return std::nullopt; |
193 | } |
194 | return SplatBits; |
195 | } |
196 | } |
197 | |
198 | return std::nullopt; |
199 | } |
200 | |
201 | // Split raw bits into a constant vector of elements of a specific bit width. |
202 | // NOTE: We don't always bother converting to scalars if the vector length is 1. |
203 | static Constant *rebuildConstant(LLVMContext &Ctx, Type *SclTy, |
204 | const APInt &Bits, unsigned NumSclBits) { |
205 | unsigned BitWidth = Bits.getBitWidth(); |
206 | |
207 | if (NumSclBits == 8) { |
208 | SmallVector<uint8_t> RawBits; |
209 | for (unsigned I = 0; I != BitWidth; I += 8) |
210 | RawBits.push_back(Elt: Bits.extractBits(numBits: 8, bitPosition: I).getZExtValue()); |
211 | return ConstantDataVector::get(Context&: Ctx, Elts: RawBits); |
212 | } |
213 | |
214 | if (NumSclBits == 16) { |
215 | SmallVector<uint16_t> RawBits; |
216 | for (unsigned I = 0; I != BitWidth; I += 16) |
217 | RawBits.push_back(Elt: Bits.extractBits(numBits: 16, bitPosition: I).getZExtValue()); |
218 | if (SclTy->is16bitFPTy()) |
219 | return ConstantDataVector::getFP(ElementType: SclTy, Elts: RawBits); |
220 | return ConstantDataVector::get(Context&: Ctx, Elts: RawBits); |
221 | } |
222 | |
223 | if (NumSclBits == 32) { |
224 | SmallVector<uint32_t> RawBits; |
225 | for (unsigned I = 0; I != BitWidth; I += 32) |
226 | RawBits.push_back(Elt: Bits.extractBits(numBits: 32, bitPosition: I).getZExtValue()); |
227 | if (SclTy->isFloatTy()) |
228 | return ConstantDataVector::getFP(ElementType: SclTy, Elts: RawBits); |
229 | return ConstantDataVector::get(Context&: Ctx, Elts: RawBits); |
230 | } |
231 | |
232 | assert(NumSclBits == 64 && "Unhandled vector element width" ); |
233 | |
234 | SmallVector<uint64_t> RawBits; |
235 | for (unsigned I = 0; I != BitWidth; I += 64) |
236 | RawBits.push_back(Elt: Bits.extractBits(numBits: 64, bitPosition: I).getZExtValue()); |
237 | if (SclTy->isDoubleTy()) |
238 | return ConstantDataVector::getFP(ElementType: SclTy, Elts: RawBits); |
239 | return ConstantDataVector::get(Context&: Ctx, Elts: RawBits); |
240 | } |
241 | |
242 | // Attempt to rebuild a normalized splat vector constant of the requested splat |
243 | // width, built up of potentially smaller scalar values. |
244 | static Constant *rebuildSplatCst(const Constant *C, unsigned /*NumBits*/, |
245 | unsigned /*NumElts*/, unsigned SplatBitWidth) { |
246 | // TODO: Truncate to NumBits once ConvertToBroadcastAVX512 support this. |
247 | std::optional<APInt> Splat = getSplatableConstant(C, SplatBitWidth); |
248 | if (!Splat) |
249 | return nullptr; |
250 | |
251 | // Determine scalar size to use for the constant splat vector, clamping as we |
252 | // might have found a splat smaller than the original constant data. |
253 | Type *SclTy = C->getType()->getScalarType(); |
254 | unsigned NumSclBits = SclTy->getPrimitiveSizeInBits(); |
255 | NumSclBits = std::min<unsigned>(a: NumSclBits, b: SplatBitWidth); |
256 | |
257 | // Fallback to i64 / double. |
258 | NumSclBits = (NumSclBits == 8 || NumSclBits == 16 || NumSclBits == 32) |
259 | ? NumSclBits |
260 | : 64; |
261 | |
262 | // Extract per-element bits. |
263 | return rebuildConstant(Ctx&: C->getContext(), SclTy, Bits: *Splat, NumSclBits); |
264 | } |
265 | |
266 | static Constant *rebuildZeroUpperCst(const Constant *C, unsigned NumBits, |
267 | unsigned /*NumElts*/, |
268 | unsigned ScalarBitWidth) { |
269 | Type *SclTy = C->getType()->getScalarType(); |
270 | unsigned NumSclBits = SclTy->getPrimitiveSizeInBits(); |
271 | LLVMContext &Ctx = C->getContext(); |
272 | |
273 | if (NumBits > ScalarBitWidth) { |
274 | // Determine if the upper bits are all zero. |
275 | if (std::optional<APInt> Bits = extractConstantBits(C, NumBits)) { |
276 | if (Bits->countLeadingZeros() >= (NumBits - ScalarBitWidth)) { |
277 | // If the original constant was made of smaller elements, try to retain |
278 | // those types. |
279 | if (ScalarBitWidth > NumSclBits && (ScalarBitWidth % NumSclBits) == 0) |
280 | return rebuildConstant(Ctx, SclTy, Bits: *Bits, NumSclBits); |
281 | |
282 | // Fallback to raw integer bits. |
283 | APInt RawBits = Bits->zextOrTrunc(width: ScalarBitWidth); |
284 | return ConstantInt::get(Context&: Ctx, V: RawBits); |
285 | } |
286 | } |
287 | } |
288 | |
289 | return nullptr; |
290 | } |
291 | |
292 | static Constant *rebuildExtCst(const Constant *C, bool IsSExt, |
293 | unsigned NumBits, unsigned NumElts, |
294 | unsigned SrcEltBitWidth) { |
295 | unsigned DstEltBitWidth = NumBits / NumElts; |
296 | assert((NumBits % NumElts) == 0 && (NumBits % SrcEltBitWidth) == 0 && |
297 | (DstEltBitWidth % SrcEltBitWidth) == 0 && |
298 | (DstEltBitWidth > SrcEltBitWidth) && "Illegal extension width" ); |
299 | |
300 | if (std::optional<APInt> Bits = extractConstantBits(C, NumBits)) { |
301 | assert((Bits->getBitWidth() / DstEltBitWidth) == NumElts && |
302 | (Bits->getBitWidth() % DstEltBitWidth) == 0 && |
303 | "Unexpected constant extension" ); |
304 | |
305 | // Ensure every vector element can be represented by the src bitwidth. |
306 | APInt TruncBits = APInt::getZero(numBits: NumElts * SrcEltBitWidth); |
307 | for (unsigned I = 0; I != NumElts; ++I) { |
308 | APInt Elt = Bits->extractBits(numBits: DstEltBitWidth, bitPosition: I * DstEltBitWidth); |
309 | if ((IsSExt && Elt.getSignificantBits() > SrcEltBitWidth) || |
310 | (!IsSExt && Elt.getActiveBits() > SrcEltBitWidth)) |
311 | return nullptr; |
312 | TruncBits.insertBits(SubBits: Elt.trunc(width: SrcEltBitWidth), bitPosition: I * SrcEltBitWidth); |
313 | } |
314 | |
315 | Type *Ty = C->getType(); |
316 | return rebuildConstant(Ctx&: Ty->getContext(), SclTy: Ty->getScalarType(), Bits: TruncBits, |
317 | NumSclBits: SrcEltBitWidth); |
318 | } |
319 | |
320 | return nullptr; |
321 | } |
322 | static Constant *rebuildSExtCst(const Constant *C, unsigned NumBits, |
323 | unsigned NumElts, unsigned SrcEltBitWidth) { |
324 | return rebuildExtCst(C, IsSExt: true, NumBits, NumElts, SrcEltBitWidth); |
325 | } |
326 | static Constant *rebuildZExtCst(const Constant *C, unsigned NumBits, |
327 | unsigned NumElts, unsigned SrcEltBitWidth) { |
328 | return rebuildExtCst(C, IsSExt: false, NumBits, NumElts, SrcEltBitWidth); |
329 | } |
330 | |
331 | bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF, |
332 | MachineBasicBlock &MBB, |
333 | MachineInstr &MI) { |
334 | unsigned Opc = MI.getOpcode(); |
335 | MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool(); |
336 | bool HasSSE41 = ST->hasSSE41(); |
337 | bool HasAVX2 = ST->hasAVX2(); |
338 | bool HasDQI = ST->hasDQI(); |
339 | bool HasBWI = ST->hasBWI(); |
340 | bool HasVLX = ST->hasVLX(); |
341 | |
342 | struct FixupEntry { |
343 | int Op; |
344 | int NumCstElts; |
345 | int MemBitWidth; |
346 | std::function<Constant *(const Constant *, unsigned, unsigned, unsigned)> |
347 | RebuildConstant; |
348 | }; |
349 | auto FixupConstant = [&](ArrayRef<FixupEntry> Fixups, unsigned RegBitWidth, |
350 | unsigned OperandNo) { |
351 | #ifdef EXPENSIVE_CHECKS |
352 | assert(llvm::is_sorted(Fixups, |
353 | [](const FixupEntry &A, const FixupEntry &B) { |
354 | return (A.NumCstElts * A.MemBitWidth) < |
355 | (B.NumCstElts * B.MemBitWidth); |
356 | }) && |
357 | "Constant fixup table not sorted in ascending constant size" ); |
358 | #endif |
359 | assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) && |
360 | "Unexpected number of operands!" ); |
361 | if (auto *C = X86::getConstantFromPool(MI, OpNo: OperandNo)) { |
362 | RegBitWidth = |
363 | RegBitWidth ? RegBitWidth : C->getType()->getPrimitiveSizeInBits(); |
364 | for (const FixupEntry &Fixup : Fixups) { |
365 | if (Fixup.Op) { |
366 | // Construct a suitable constant and adjust the MI to use the new |
367 | // constant pool entry. |
368 | if (Constant *NewCst = Fixup.RebuildConstant( |
369 | C, RegBitWidth, Fixup.NumCstElts, Fixup.MemBitWidth)) { |
370 | unsigned NewCPI = |
371 | CP->getConstantPoolIndex(C: NewCst, Alignment: Align(Fixup.MemBitWidth / 8)); |
372 | MI.setDesc(TII->get(Opcode: Fixup.Op)); |
373 | MI.getOperand(i: OperandNo + X86::AddrDisp).setIndex(NewCPI); |
374 | return true; |
375 | } |
376 | } |
377 | } |
378 | } |
379 | return false; |
380 | }; |
381 | |
382 | // Attempt to detect a suitable vzload/broadcast/vextload from increasing |
383 | // constant bitwidths. Prefer vzload/broadcast/vextload for same bitwidth: |
384 | // - vzload shouldn't ever need a shuffle port to zero the upper elements and |
385 | // the fp/int domain versions are equally available so we don't introduce a |
386 | // domain crossing penalty. |
387 | // - broadcast sometimes need a shuffle port (especially for 8/16-bit |
388 | // variants), AVX1 only has fp domain broadcasts but AVX2+ have good fp/int |
389 | // domain equivalents. |
390 | // - vextload always needs a shuffle port and is only ever int domain. |
391 | switch (Opc) { |
392 | /* FP Loads */ |
393 | case X86::MOVAPDrm: |
394 | case X86::MOVAPSrm: |
395 | case X86::MOVUPDrm: |
396 | case X86::MOVUPSrm: |
397 | // TODO: SSE3 MOVDDUP Handling |
398 | return FixupConstant({{.Op: X86::MOVSSrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildZeroUpperCst}, |
399 | {.Op: X86::MOVSDrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildZeroUpperCst}}, |
400 | 128, 1); |
401 | case X86::VMOVAPDrm: |
402 | case X86::VMOVAPSrm: |
403 | case X86::VMOVUPDrm: |
404 | case X86::VMOVUPSrm: |
405 | return FixupConstant({{.Op: X86::VMOVSSrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildZeroUpperCst}, |
406 | {.Op: X86::VBROADCASTSSrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst}, |
407 | {.Op: X86::VMOVSDrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildZeroUpperCst}, |
408 | {.Op: X86::VMOVDDUPrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst}}, |
409 | 128, 1); |
410 | case X86::VMOVAPDYrm: |
411 | case X86::VMOVAPSYrm: |
412 | case X86::VMOVUPDYrm: |
413 | case X86::VMOVUPSYrm: |
414 | return FixupConstant({{.Op: X86::VBROADCASTSSYrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst}, |
415 | {.Op: X86::VBROADCASTSDYrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst}, |
416 | {.Op: X86::VBROADCASTF128rm, .NumCstElts: 1, .MemBitWidth: 128, .RebuildConstant: rebuildSplatCst}}, |
417 | 256, 1); |
418 | case X86::VMOVAPDZ128rm: |
419 | case X86::VMOVAPSZ128rm: |
420 | case X86::VMOVUPDZ128rm: |
421 | case X86::VMOVUPSZ128rm: |
422 | return FixupConstant({{.Op: X86::VMOVSSZrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildZeroUpperCst}, |
423 | {.Op: X86::VBROADCASTSSZ128rm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst}, |
424 | {.Op: X86::VMOVSDZrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildZeroUpperCst}, |
425 | {.Op: X86::VMOVDDUPZ128rm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst}}, |
426 | 128, 1); |
427 | case X86::VMOVAPDZ256rm: |
428 | case X86::VMOVAPSZ256rm: |
429 | case X86::VMOVUPDZ256rm: |
430 | case X86::VMOVUPSZ256rm: |
431 | return FixupConstant( |
432 | {{.Op: X86::VBROADCASTSSZ256rm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst}, |
433 | {.Op: X86::VBROADCASTSDZ256rm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst}, |
434 | {.Op: X86::VBROADCASTF32X4Z256rm, .NumCstElts: 1, .MemBitWidth: 128, .RebuildConstant: rebuildSplatCst}}, |
435 | 256, 1); |
436 | case X86::VMOVAPDZrm: |
437 | case X86::VMOVAPSZrm: |
438 | case X86::VMOVUPDZrm: |
439 | case X86::VMOVUPSZrm: |
440 | return FixupConstant({{.Op: X86::VBROADCASTSSZrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst}, |
441 | {.Op: X86::VBROADCASTSDZrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst}, |
442 | {.Op: X86::VBROADCASTF32X4rm, .NumCstElts: 1, .MemBitWidth: 128, .RebuildConstant: rebuildSplatCst}, |
443 | {.Op: X86::VBROADCASTF64X4rm, .NumCstElts: 1, .MemBitWidth: 256, .RebuildConstant: rebuildSplatCst}}, |
444 | 512, 1); |
445 | /* Integer Loads */ |
446 | case X86::MOVDQArm: |
447 | case X86::MOVDQUrm: { |
448 | FixupEntry Fixups[] = { |
449 | {.Op: HasSSE41 ? X86::PMOVSXBQrm : 0, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
450 | {.Op: HasSSE41 ? X86::PMOVZXBQrm : 0, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
451 | {.Op: X86::MOVDI2PDIrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildZeroUpperCst}, |
452 | {.Op: HasSSE41 ? X86::PMOVSXBDrm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
453 | {.Op: HasSSE41 ? X86::PMOVZXBDrm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
454 | {.Op: HasSSE41 ? X86::PMOVSXWQrm : 0, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst}, |
455 | {.Op: HasSSE41 ? X86::PMOVZXWQrm : 0, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst}, |
456 | {.Op: X86::MOVQI2PQIrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildZeroUpperCst}, |
457 | {.Op: HasSSE41 ? X86::PMOVSXBWrm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
458 | {.Op: HasSSE41 ? X86::PMOVZXBWrm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
459 | {.Op: HasSSE41 ? X86::PMOVSXWDrm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst}, |
460 | {.Op: HasSSE41 ? X86::PMOVZXWDrm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst}, |
461 | {.Op: HasSSE41 ? X86::PMOVSXDQrm : 0, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst}, |
462 | {.Op: HasSSE41 ? X86::PMOVZXDQrm : 0, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}}; |
463 | return FixupConstant(Fixups, 128, 1); |
464 | } |
465 | case X86::VMOVDQArm: |
466 | case X86::VMOVDQUrm: { |
467 | FixupEntry Fixups[] = { |
468 | {.Op: HasAVX2 ? X86::VPBROADCASTBrm : 0, .NumCstElts: 1, .MemBitWidth: 8, .RebuildConstant: rebuildSplatCst}, |
469 | {.Op: HasAVX2 ? X86::VPBROADCASTWrm : 0, .NumCstElts: 1, .MemBitWidth: 16, .RebuildConstant: rebuildSplatCst}, |
470 | {.Op: X86::VPMOVSXBQrm, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
471 | {.Op: X86::VPMOVZXBQrm, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
472 | {.Op: X86::VMOVDI2PDIrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildZeroUpperCst}, |
473 | {.Op: HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm, .NumCstElts: 1, .MemBitWidth: 32, |
474 | .RebuildConstant: rebuildSplatCst}, |
475 | {.Op: X86::VPMOVSXBDrm, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
476 | {.Op: X86::VPMOVZXBDrm, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
477 | {.Op: X86::VPMOVSXWQrm, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst}, |
478 | {.Op: X86::VPMOVZXWQrm, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst}, |
479 | {.Op: X86::VMOVQI2PQIrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildZeroUpperCst}, |
480 | {.Op: HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm, .NumCstElts: 1, .MemBitWidth: 64, |
481 | .RebuildConstant: rebuildSplatCst}, |
482 | {.Op: X86::VPMOVSXBWrm, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
483 | {.Op: X86::VPMOVZXBWrm, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
484 | {.Op: X86::VPMOVSXWDrm, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst}, |
485 | {.Op: X86::VPMOVZXWDrm, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst}, |
486 | {.Op: X86::VPMOVSXDQrm, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst}, |
487 | {.Op: X86::VPMOVZXDQrm, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}}; |
488 | return FixupConstant(Fixups, 128, 1); |
489 | } |
490 | case X86::VMOVDQAYrm: |
491 | case X86::VMOVDQUYrm: { |
492 | FixupEntry Fixups[] = { |
493 | {.Op: HasAVX2 ? X86::VPBROADCASTBYrm : 0, .NumCstElts: 1, .MemBitWidth: 8, .RebuildConstant: rebuildSplatCst}, |
494 | {.Op: HasAVX2 ? X86::VPBROADCASTWYrm : 0, .NumCstElts: 1, .MemBitWidth: 16, .RebuildConstant: rebuildSplatCst}, |
495 | {.Op: HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm, .NumCstElts: 1, .MemBitWidth: 32, |
496 | .RebuildConstant: rebuildSplatCst}, |
497 | {.Op: HasAVX2 ? X86::VPMOVSXBQYrm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
498 | {.Op: HasAVX2 ? X86::VPMOVZXBQYrm : 0, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
499 | {.Op: HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm, .NumCstElts: 1, .MemBitWidth: 64, |
500 | .RebuildConstant: rebuildSplatCst}, |
501 | {.Op: HasAVX2 ? X86::VPMOVSXBDYrm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
502 | {.Op: HasAVX2 ? X86::VPMOVZXBDYrm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
503 | {.Op: HasAVX2 ? X86::VPMOVSXWQYrm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst}, |
504 | {.Op: HasAVX2 ? X86::VPMOVZXWQYrm : 0, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst}, |
505 | {.Op: HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm, .NumCstElts: 1, .MemBitWidth: 128, |
506 | .RebuildConstant: rebuildSplatCst}, |
507 | {.Op: HasAVX2 ? X86::VPMOVSXBWYrm : 0, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
508 | {.Op: HasAVX2 ? X86::VPMOVZXBWYrm : 0, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
509 | {.Op: HasAVX2 ? X86::VPMOVSXWDYrm : 0, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst}, |
510 | {.Op: HasAVX2 ? X86::VPMOVZXWDYrm : 0, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst}, |
511 | {.Op: HasAVX2 ? X86::VPMOVSXDQYrm : 0, .NumCstElts: 4, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst}, |
512 | {.Op: HasAVX2 ? X86::VPMOVZXDQYrm : 0, .NumCstElts: 4, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}}; |
513 | return FixupConstant(Fixups, 256, 1); |
514 | } |
515 | case X86::VMOVDQA32Z128rm: |
516 | case X86::VMOVDQA64Z128rm: |
517 | case X86::VMOVDQU32Z128rm: |
518 | case X86::VMOVDQU64Z128rm: { |
519 | FixupEntry Fixups[] = { |
520 | {.Op: HasBWI ? X86::VPBROADCASTBZ128rm : 0, .NumCstElts: 1, .MemBitWidth: 8, .RebuildConstant: rebuildSplatCst}, |
521 | {.Op: HasBWI ? X86::VPBROADCASTWZ128rm : 0, .NumCstElts: 1, .MemBitWidth: 16, .RebuildConstant: rebuildSplatCst}, |
522 | {.Op: X86::VPMOVSXBQZ128rm, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
523 | {.Op: X86::VPMOVZXBQZ128rm, .NumCstElts: 2, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
524 | {.Op: X86::VMOVDI2PDIZrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildZeroUpperCst}, |
525 | {.Op: X86::VPBROADCASTDZ128rm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst}, |
526 | {.Op: X86::VPMOVSXBDZ128rm, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
527 | {.Op: X86::VPMOVZXBDZ128rm, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
528 | {.Op: X86::VPMOVSXWQZ128rm, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst}, |
529 | {.Op: X86::VPMOVZXWQZ128rm, .NumCstElts: 2, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst}, |
530 | {.Op: X86::VMOVQI2PQIZrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildZeroUpperCst}, |
531 | {.Op: X86::VPBROADCASTQZ128rm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst}, |
532 | {.Op: HasBWI ? X86::VPMOVSXBWZ128rm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
533 | {.Op: HasBWI ? X86::VPMOVZXBWZ128rm : 0, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
534 | {.Op: X86::VPMOVSXWDZ128rm, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst}, |
535 | {.Op: X86::VPMOVZXWDZ128rm, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst}, |
536 | {.Op: X86::VPMOVSXDQZ128rm, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst}, |
537 | {.Op: X86::VPMOVZXDQZ128rm, .NumCstElts: 2, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}}; |
538 | return FixupConstant(Fixups, 128, 1); |
539 | } |
540 | case X86::VMOVDQA32Z256rm: |
541 | case X86::VMOVDQA64Z256rm: |
542 | case X86::VMOVDQU32Z256rm: |
543 | case X86::VMOVDQU64Z256rm: { |
544 | FixupEntry Fixups[] = { |
545 | {.Op: HasBWI ? X86::VPBROADCASTBZ256rm : 0, .NumCstElts: 1, .MemBitWidth: 8, .RebuildConstant: rebuildSplatCst}, |
546 | {.Op: HasBWI ? X86::VPBROADCASTWZ256rm : 0, .NumCstElts: 1, .MemBitWidth: 16, .RebuildConstant: rebuildSplatCst}, |
547 | {.Op: X86::VPBROADCASTDZ256rm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst}, |
548 | {.Op: X86::VPMOVSXBQZ256rm, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
549 | {.Op: X86::VPMOVZXBQZ256rm, .NumCstElts: 4, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
550 | {.Op: X86::VPBROADCASTQZ256rm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst}, |
551 | {.Op: X86::VPMOVSXBDZ256rm, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
552 | {.Op: X86::VPMOVZXBDZ256rm, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
553 | {.Op: X86::VPMOVSXWQZ256rm, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst}, |
554 | {.Op: X86::VPMOVZXWQZ256rm, .NumCstElts: 4, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst}, |
555 | {.Op: X86::VBROADCASTI32X4Z256rm, .NumCstElts: 1, .MemBitWidth: 128, .RebuildConstant: rebuildSplatCst}, |
556 | {.Op: HasBWI ? X86::VPMOVSXBWZ256rm : 0, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
557 | {.Op: HasBWI ? X86::VPMOVZXBWZ256rm : 0, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
558 | {.Op: X86::VPMOVSXWDZ256rm, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst}, |
559 | {.Op: X86::VPMOVZXWDZ256rm, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst}, |
560 | {.Op: X86::VPMOVSXDQZ256rm, .NumCstElts: 4, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst}, |
561 | {.Op: X86::VPMOVZXDQZ256rm, .NumCstElts: 4, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}}; |
562 | return FixupConstant(Fixups, 256, 1); |
563 | } |
564 | case X86::VMOVDQA32Zrm: |
565 | case X86::VMOVDQA64Zrm: |
566 | case X86::VMOVDQU32Zrm: |
567 | case X86::VMOVDQU64Zrm: { |
568 | FixupEntry Fixups[] = { |
569 | {.Op: HasBWI ? X86::VPBROADCASTBZrm : 0, .NumCstElts: 1, .MemBitWidth: 8, .RebuildConstant: rebuildSplatCst}, |
570 | {.Op: HasBWI ? X86::VPBROADCASTWZrm : 0, .NumCstElts: 1, .MemBitWidth: 16, .RebuildConstant: rebuildSplatCst}, |
571 | {.Op: X86::VPBROADCASTDZrm, .NumCstElts: 1, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst}, |
572 | {.Op: X86::VPBROADCASTQZrm, .NumCstElts: 1, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst}, |
573 | {.Op: X86::VPMOVSXBQZrm, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
574 | {.Op: X86::VPMOVZXBQZrm, .NumCstElts: 8, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
575 | {.Op: X86::VBROADCASTI32X4rm, .NumCstElts: 1, .MemBitWidth: 128, .RebuildConstant: rebuildSplatCst}, |
576 | {.Op: X86::VPMOVSXBDZrm, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
577 | {.Op: X86::VPMOVZXBDZrm, .NumCstElts: 16, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
578 | {.Op: X86::VPMOVSXWQZrm, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst}, |
579 | {.Op: X86::VPMOVZXWQZrm, .NumCstElts: 8, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst}, |
580 | {.Op: X86::VBROADCASTI64X4rm, .NumCstElts: 1, .MemBitWidth: 256, .RebuildConstant: rebuildSplatCst}, |
581 | {.Op: HasBWI ? X86::VPMOVSXBWZrm : 0, .NumCstElts: 32, .MemBitWidth: 8, .RebuildConstant: rebuildSExtCst}, |
582 | {.Op: HasBWI ? X86::VPMOVZXBWZrm : 0, .NumCstElts: 32, .MemBitWidth: 8, .RebuildConstant: rebuildZExtCst}, |
583 | {.Op: X86::VPMOVSXWDZrm, .NumCstElts: 16, .MemBitWidth: 16, .RebuildConstant: rebuildSExtCst}, |
584 | {.Op: X86::VPMOVZXWDZrm, .NumCstElts: 16, .MemBitWidth: 16, .RebuildConstant: rebuildZExtCst}, |
585 | {.Op: X86::VPMOVSXDQZrm, .NumCstElts: 8, .MemBitWidth: 32, .RebuildConstant: rebuildSExtCst}, |
586 | {.Op: X86::VPMOVZXDQZrm, .NumCstElts: 8, .MemBitWidth: 32, .RebuildConstant: rebuildZExtCst}}; |
587 | return FixupConstant(Fixups, 512, 1); |
588 | } |
589 | } |
590 | |
591 | auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) { |
592 | unsigned OpBcst32 = 0, OpBcst64 = 0; |
593 | unsigned OpNoBcst32 = 0, OpNoBcst64 = 0; |
594 | if (OpSrc32) { |
595 | if (const X86FoldTableEntry *Mem2Bcst = |
596 | llvm::lookupBroadcastFoldTableBySize(MemOp: OpSrc32, BroadcastBits: 32)) { |
597 | OpBcst32 = Mem2Bcst->DstOp; |
598 | OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK; |
599 | } |
600 | } |
601 | if (OpSrc64) { |
602 | if (const X86FoldTableEntry *Mem2Bcst = |
603 | llvm::lookupBroadcastFoldTableBySize(MemOp: OpSrc64, BroadcastBits: 64)) { |
604 | OpBcst64 = Mem2Bcst->DstOp; |
605 | OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK; |
606 | } |
607 | } |
608 | assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) && |
609 | "OperandNo mismatch" ); |
610 | |
611 | if (OpBcst32 || OpBcst64) { |
612 | unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32; |
613 | FixupEntry Fixups[] = {{.Op: (int)OpBcst32, .NumCstElts: 32, .MemBitWidth: 32, .RebuildConstant: rebuildSplatCst}, |
614 | {.Op: (int)OpBcst64, .NumCstElts: 64, .MemBitWidth: 64, .RebuildConstant: rebuildSplatCst}}; |
615 | // TODO: Add support for RegBitWidth, but currently rebuildSplatCst |
616 | // doesn't require it (defaults to Constant::getPrimitiveSizeInBits). |
617 | return FixupConstant(Fixups, 0, OpNo); |
618 | } |
619 | return false; |
620 | }; |
621 | |
622 | // Attempt to find a AVX512 mapping from a full width memory-fold instruction |
623 | // to a broadcast-fold instruction variant. |
624 | if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX) |
625 | return ConvertToBroadcastAVX512(Opc, Opc); |
626 | |
627 | // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic |
628 | // conversion to see if we can convert to a broadcasted (integer) logic op. |
629 | if (HasVLX && !HasDQI) { |
630 | unsigned OpSrc32 = 0, OpSrc64 = 0; |
631 | switch (Opc) { |
632 | case X86::VANDPDrm: |
633 | case X86::VANDPSrm: |
634 | case X86::VPANDrm: |
635 | OpSrc32 = X86 ::VPANDDZ128rm; |
636 | OpSrc64 = X86 ::VPANDQZ128rm; |
637 | break; |
638 | case X86::VANDPDYrm: |
639 | case X86::VANDPSYrm: |
640 | case X86::VPANDYrm: |
641 | OpSrc32 = X86 ::VPANDDZ256rm; |
642 | OpSrc64 = X86 ::VPANDQZ256rm; |
643 | break; |
644 | case X86::VANDNPDrm: |
645 | case X86::VANDNPSrm: |
646 | case X86::VPANDNrm: |
647 | OpSrc32 = X86 ::VPANDNDZ128rm; |
648 | OpSrc64 = X86 ::VPANDNQZ128rm; |
649 | break; |
650 | case X86::VANDNPDYrm: |
651 | case X86::VANDNPSYrm: |
652 | case X86::VPANDNYrm: |
653 | OpSrc32 = X86 ::VPANDNDZ256rm; |
654 | OpSrc64 = X86 ::VPANDNQZ256rm; |
655 | break; |
656 | case X86::VORPDrm: |
657 | case X86::VORPSrm: |
658 | case X86::VPORrm: |
659 | OpSrc32 = X86 ::VPORDZ128rm; |
660 | OpSrc64 = X86 ::VPORQZ128rm; |
661 | break; |
662 | case X86::VORPDYrm: |
663 | case X86::VORPSYrm: |
664 | case X86::VPORYrm: |
665 | OpSrc32 = X86 ::VPORDZ256rm; |
666 | OpSrc64 = X86 ::VPORQZ256rm; |
667 | break; |
668 | case X86::VXORPDrm: |
669 | case X86::VXORPSrm: |
670 | case X86::VPXORrm: |
671 | OpSrc32 = X86 ::VPXORDZ128rm; |
672 | OpSrc64 = X86 ::VPXORQZ128rm; |
673 | break; |
674 | case X86::VXORPDYrm: |
675 | case X86::VXORPSYrm: |
676 | case X86::VPXORYrm: |
677 | OpSrc32 = X86 ::VPXORDZ256rm; |
678 | OpSrc64 = X86 ::VPXORQZ256rm; |
679 | break; |
680 | } |
681 | if (OpSrc32 || OpSrc64) |
682 | return ConvertToBroadcastAVX512(OpSrc32, OpSrc64); |
683 | } |
684 | |
685 | return false; |
686 | } |
687 | |
688 | bool X86FixupVectorConstantsPass::runOnMachineFunction(MachineFunction &MF) { |
689 | LLVM_DEBUG(dbgs() << "Start X86FixupVectorConstants\n" ;); |
690 | bool Changed = false; |
691 | ST = &MF.getSubtarget<X86Subtarget>(); |
692 | TII = ST->getInstrInfo(); |
693 | SM = &ST->getSchedModel(); |
694 | |
695 | for (MachineBasicBlock &MBB : MF) { |
696 | for (MachineInstr &MI : MBB) { |
697 | if (processInstruction(MF, MBB, MI)) { |
698 | ++NumInstChanges; |
699 | Changed = true; |
700 | } |
701 | } |
702 | } |
703 | LLVM_DEBUG(dbgs() << "End X86FixupVectorConstants\n" ;); |
704 | return Changed; |
705 | } |
706 | |