1//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization lowering for instructions.
11///
12/// This is used to offload pattern matching from the selector.
13///
14/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15/// a G_ZIP, G_UZP, etc.
16///
17/// General optimization combines should be handled by either the
18/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
22#include "AArch64ExpandImm.h"
23#include "AArch64GlobalISelUtils.h"
24#include "AArch64PerfectShuffle.h"
25#include "AArch64Subtarget.h"
26#include "GISel/AArch64LegalizerInfo.h"
27#include "MCTargetDesc/AArch64MCTargetDesc.h"
28#include "Utils/AArch64BaseInfo.h"
29#include "llvm/CodeGen/GlobalISel/Combiner.h"
30#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
31#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
32#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
33#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
34#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
35#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
36#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
37#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
38#include "llvm/CodeGen/GlobalISel/Utils.h"
39#include "llvm/CodeGen/MachineFrameInfo.h"
40#include "llvm/CodeGen/MachineFunctionPass.h"
41#include "llvm/CodeGen/MachineInstrBuilder.h"
42#include "llvm/CodeGen/MachineRegisterInfo.h"
43#include "llvm/CodeGen/TargetOpcodes.h"
44#include "llvm/CodeGen/TargetPassConfig.h"
45#include "llvm/IR/InstrTypes.h"
46#include "llvm/Support/ErrorHandling.h"
47#include <optional>
48
49#define GET_GICOMBINER_DEPS
50#include "AArch64GenPostLegalizeGILowering.inc"
51#undef GET_GICOMBINER_DEPS
52
53#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
54
55using namespace llvm;
56using namespace MIPatternMatch;
57using namespace AArch64GISelUtils;
58
59namespace {
60
61#define GET_GICOMBINER_TYPES
62#include "AArch64GenPostLegalizeGILowering.inc"
63#undef GET_GICOMBINER_TYPES
64
65/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
66///
67/// Used for matching target-supported shuffles before codegen.
68struct ShuffleVectorPseudo {
69 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
70 Register Dst; ///< Destination register.
71 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
72 ShuffleVectorPseudo(unsigned Opc, Register Dst,
73 std::initializer_list<SrcOp> SrcOps)
74 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
75 ShuffleVectorPseudo() = default;
76};
77
78/// Return true if a G_FCONSTANT instruction is known to be better-represented
79/// as a G_CONSTANT.
80bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
81 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
82 Register DstReg = MI.getOperand(i: 0).getReg();
83 const unsigned DstSize = MRI.getType(Reg: DstReg).getSizeInBits();
84 if (DstSize != 16 && DstSize != 32 && DstSize != 64)
85 return false;
86
87 // When we're storing a value, it doesn't matter what register bank it's on.
88 // Since not all floating point constants can be materialized using a fmov,
89 // it makes more sense to just use a GPR.
90 return all_of(Range: MRI.use_nodbg_instructions(Reg: DstReg),
91 P: [](const MachineInstr &Use) { return Use.mayStore(); });
92}
93
94/// Change a G_FCONSTANT into a G_CONSTANT.
95void applyFConstantToConstant(MachineInstr &MI) {
96 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
97 MachineIRBuilder MIB(MI);
98 const APFloat &ImmValAPF = MI.getOperand(i: 1).getFPImm()->getValueAPF();
99 MIB.buildConstant(Res: MI.getOperand(i: 0).getReg(), Val: ImmValAPF.bitcastToAPInt());
100 MI.eraseFromParent();
101}
102
103/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
104/// sources of the shuffle are different.
105std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
106 unsigned NumElts) {
107 // Look for the first non-undef element.
108 auto FirstRealElt = find_if(Range&: M, P: [](int Elt) { return Elt >= 0; });
109 if (FirstRealElt == M.end())
110 return std::nullopt;
111
112 // Use APInt to handle overflow when calculating expected element.
113 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
114 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, false, true);
115
116 // The following shuffle indices must be the successive elements after the
117 // first real element.
118 if (any_of(
119 Range: make_range(x: std::next(x: FirstRealElt), y: M.end()),
120 P: [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
121 return std::nullopt;
122
123 // The index of an EXT is the first element if it is not UNDEF.
124 // Watch out for the beginning UNDEFs. The EXT index should be the expected
125 // value of the first element. E.g.
126 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
127 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
128 // ExpectedElt is the last mask index plus 1.
129 uint64_t Imm = ExpectedElt.getZExtValue();
130 bool ReverseExt = false;
131
132 // There are two difference cases requiring to reverse input vectors.
133 // For example, for vector <4 x i32> we have the following cases,
134 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
135 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
136 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
137 // to reverse two input vectors.
138 if (Imm < NumElts)
139 ReverseExt = true;
140 else
141 Imm -= NumElts;
142 return std::make_pair(x&: ReverseExt, y&: Imm);
143}
144
145/// Helper function for matchINS.
146///
147/// \returns a value when \p M is an ins mask for \p NumInputElements.
148///
149/// First element of the returned pair is true when the produced
150/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
151///
152/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
153std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
154 int NumInputElements) {
155 if (M.size() != static_cast<size_t>(NumInputElements))
156 return std::nullopt;
157 int NumLHSMatch = 0, NumRHSMatch = 0;
158 int LastLHSMismatch = -1, LastRHSMismatch = -1;
159 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
160 if (M[Idx] == -1) {
161 ++NumLHSMatch;
162 ++NumRHSMatch;
163 continue;
164 }
165 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
166 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
167 }
168 const int NumNeededToMatch = NumInputElements - 1;
169 if (NumLHSMatch == NumNeededToMatch)
170 return std::make_pair(x: true, y&: LastLHSMismatch);
171 if (NumRHSMatch == NumNeededToMatch)
172 return std::make_pair(x: false, y&: LastRHSMismatch);
173 return std::nullopt;
174}
175
176/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
177/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
178bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
179 ShuffleVectorPseudo &MatchInfo) {
180 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
181 ArrayRef<int> ShuffleMask = MI.getOperand(i: 3).getShuffleMask();
182 Register Dst = MI.getOperand(i: 0).getReg();
183 Register Src = MI.getOperand(i: 1).getReg();
184 LLT Ty = MRI.getType(Reg: Dst);
185 unsigned EltSize = Ty.getScalarSizeInBits();
186
187 // Element size for a rev cannot be 64.
188 if (EltSize == 64)
189 return false;
190
191 unsigned NumElts = Ty.getNumElements();
192
193 // Try to produce a G_REV instruction
194 for (unsigned LaneSize : {64U, 32U, 16U}) {
195 if (isREVMask(M: ShuffleMask, EltSize, NumElts, BlockSize: LaneSize)) {
196 unsigned Opcode;
197 if (LaneSize == 64U)
198 Opcode = AArch64::G_REV64;
199 else if (LaneSize == 32U)
200 Opcode = AArch64::G_REV32;
201 else
202 Opcode = AArch64::G_REV16;
203
204 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
205 return true;
206 }
207 }
208
209 return false;
210}
211
212/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
213/// a G_TRN1 or G_TRN2 instruction.
214bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
215 ShuffleVectorPseudo &MatchInfo) {
216 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
217 unsigned WhichResult;
218 unsigned OperandOrder;
219 ArrayRef<int> ShuffleMask = MI.getOperand(i: 3).getShuffleMask();
220 Register Dst = MI.getOperand(i: 0).getReg();
221 unsigned NumElts = MRI.getType(Reg: Dst).getNumElements();
222 if (!isTRNMask(M: ShuffleMask, NumElts, WhichResultOut&: WhichResult, OperandOrderOut&: OperandOrder))
223 return false;
224 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
225 Register V1 = MI.getOperand(i: OperandOrder == 0 ? 1 : 2).getReg();
226 Register V2 = MI.getOperand(i: OperandOrder == 0 ? 2 : 1).getReg();
227 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
228 return true;
229}
230
231/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
232/// a G_UZP1 or G_UZP2 instruction.
233///
234/// \param [in] MI - The shuffle vector instruction.
235/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
236bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
237 ShuffleVectorPseudo &MatchInfo) {
238 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
239 unsigned WhichResult;
240 ArrayRef<int> ShuffleMask = MI.getOperand(i: 3).getShuffleMask();
241 Register Dst = MI.getOperand(i: 0).getReg();
242 unsigned NumElts = MRI.getType(Reg: Dst).getNumElements();
243 if (!isUZPMask(M: ShuffleMask, NumElts, WhichResultOut&: WhichResult))
244 return false;
245 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
246 Register V1 = MI.getOperand(i: 1).getReg();
247 Register V2 = MI.getOperand(i: 2).getReg();
248 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
249 return true;
250}
251
252bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
253 ShuffleVectorPseudo &MatchInfo) {
254 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
255 unsigned WhichResult;
256 unsigned OperandOrder;
257 ArrayRef<int> ShuffleMask = MI.getOperand(i: 3).getShuffleMask();
258 Register Dst = MI.getOperand(i: 0).getReg();
259 unsigned NumElts = MRI.getType(Reg: Dst).getNumElements();
260 if (!isZIPMask(M: ShuffleMask, NumElts, WhichResultOut&: WhichResult, OperandOrderOut&: OperandOrder))
261 return false;
262 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
263 Register V1 = MI.getOperand(i: OperandOrder == 0 ? 1 : 2).getReg();
264 Register V2 = MI.getOperand(i: OperandOrder == 0 ? 2 : 1).getReg();
265 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
266 return true;
267}
268
269/// Helper function for matchDup.
270bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
271 MachineRegisterInfo &MRI,
272 ShuffleVectorPseudo &MatchInfo) {
273 if (Lane != 0)
274 return false;
275
276 // Try to match a vector splat operation into a dup instruction.
277 // We're looking for this pattern:
278 //
279 // %scalar:gpr(s64) = COPY $x0
280 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
281 // %cst0:gpr(s32) = G_CONSTANT i32 0
282 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
283 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
284 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
285 // %zerovec(<2 x s32>)
286 //
287 // ...into:
288 // %splat = G_DUP %scalar
289
290 // Begin matching the insert.
291 auto *InsMI = getOpcodeDef(Opcode: TargetOpcode::G_INSERT_VECTOR_ELT,
292 Reg: MI.getOperand(i: 1).getReg(), MRI);
293 if (!InsMI)
294 return false;
295 // Match the undef vector operand.
296 if (!getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: InsMI->getOperand(i: 1).getReg(),
297 MRI))
298 return false;
299
300 // Match the index constant 0.
301 if (!mi_match(R: InsMI->getOperand(i: 3).getReg(), MRI, P: m_ZeroInt()))
302 return false;
303
304 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(i: 0).getReg(),
305 {InsMI->getOperand(i: 2).getReg()});
306 return true;
307}
308
309/// Helper function for matchDup.
310bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
311 MachineRegisterInfo &MRI,
312 ShuffleVectorPseudo &MatchInfo) {
313 assert(Lane >= 0 && "Expected positive lane?");
314 int NumElements = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
315 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
316 // lane's definition directly.
317 auto *BuildVecMI =
318 getOpcodeDef(Opcode: TargetOpcode::G_BUILD_VECTOR,
319 Reg: MI.getOperand(i: Lane < NumElements ? 1 : 2).getReg(), MRI);
320 // If Lane >= NumElements then it is point to RHS, just check from RHS
321 if (NumElements <= Lane)
322 Lane -= NumElements;
323
324 if (!BuildVecMI)
325 return false;
326 Register Reg = BuildVecMI->getOperand(i: Lane + 1).getReg();
327 MatchInfo =
328 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(i: 0).getReg(), {Reg});
329 return true;
330}
331
332bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
333 ShuffleVectorPseudo &MatchInfo) {
334 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
335 auto MaybeLane = getSplatIndex(MI);
336 if (!MaybeLane)
337 return false;
338 int Lane = *MaybeLane;
339 // If this is undef splat, generate it via "just" vdup, if possible.
340 if (Lane < 0)
341 Lane = 0;
342 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
343 return true;
344 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
345 return true;
346 return false;
347}
348
349// Check if an EXT instruction can handle the shuffle mask when the vector
350// sources of the shuffle are the same.
351bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
352 unsigned NumElts = Ty.getNumElements();
353
354 // Assume that the first shuffle index is not UNDEF. Fail if it is.
355 if (M[0] < 0)
356 return false;
357
358 // If this is a VEXT shuffle, the immediate value is the index of the first
359 // element. The other shuffle indices must be the successive elements after
360 // the first one.
361 unsigned ExpectedElt = M[0];
362 for (unsigned I = 1; I < NumElts; ++I) {
363 // Increment the expected index. If it wraps around, just follow it
364 // back to index zero and keep going.
365 ++ExpectedElt;
366 if (ExpectedElt == NumElts)
367 ExpectedElt = 0;
368
369 if (M[I] < 0)
370 continue; // Ignore UNDEF indices.
371 if (ExpectedElt != static_cast<unsigned>(M[I]))
372 return false;
373 }
374
375 return true;
376}
377
378bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
379 ShuffleVectorPseudo &MatchInfo) {
380 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
381 Register Dst = MI.getOperand(i: 0).getReg();
382 LLT DstTy = MRI.getType(Reg: Dst);
383 Register V1 = MI.getOperand(i: 1).getReg();
384 Register V2 = MI.getOperand(i: 2).getReg();
385 auto Mask = MI.getOperand(i: 3).getShuffleMask();
386 uint64_t Imm;
387 auto ExtInfo = getExtMask(M: Mask, NumElts: DstTy.getNumElements());
388 uint64_t ExtFactor = MRI.getType(Reg: V1).getScalarSizeInBits() / 8;
389
390 if (!ExtInfo) {
391 if (!getOpcodeDef<GImplicitDef>(Reg: V2, MRI) ||
392 !isSingletonExtMask(M: Mask, Ty: DstTy))
393 return false;
394
395 Imm = Mask[0] * ExtFactor;
396 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
397 return true;
398 }
399 bool ReverseExt;
400 std::tie(args&: ReverseExt, args&: Imm) = *ExtInfo;
401 if (ReverseExt)
402 std::swap(a&: V1, b&: V2);
403 Imm *= ExtFactor;
404 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
405 return true;
406}
407
408/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
409/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
410void applyShuffleVectorPseudo(MachineInstr &MI,
411 ShuffleVectorPseudo &MatchInfo) {
412 MachineIRBuilder MIRBuilder(MI);
413 MIRBuilder.buildInstr(Opc: MatchInfo.Opc, DstOps: {MatchInfo.Dst}, SrcOps: MatchInfo.SrcOps);
414 MI.eraseFromParent();
415}
416
417/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
418/// Special-cased because the constant operand must be emitted as a G_CONSTANT
419/// for the imported tablegen patterns to work.
420void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
421 MachineIRBuilder MIRBuilder(MI);
422 if (MatchInfo.SrcOps[2].getImm() == 0)
423 MIRBuilder.buildCopy(Res: MatchInfo.Dst, Op: MatchInfo.SrcOps[0]);
424 else {
425 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
426 auto Cst =
427 MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: MatchInfo.SrcOps[2].getImm());
428 MIRBuilder.buildInstr(Opc: MatchInfo.Opc, DstOps: {MatchInfo.Dst},
429 SrcOps: {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
430 }
431 MI.eraseFromParent();
432}
433
434void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
435 Register Dst = MI.getOperand(i: 0).getReg();
436 Register Src = MI.getOperand(i: 1).getReg();
437 LLT DstTy = MRI.getType(Reg: Dst);
438 assert(DstTy.getSizeInBits() == 128 &&
439 "Expected 128bit vector in applyFullRev");
440 MachineIRBuilder MIRBuilder(MI);
441 auto Cst = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: 8);
442 auto Rev = MIRBuilder.buildInstr(Opc: AArch64::G_REV64, DstOps: {DstTy}, SrcOps: {Src});
443 MIRBuilder.buildInstr(Opc: AArch64::G_EXT, DstOps: {Dst}, SrcOps: {Rev, Rev, Cst});
444 MI.eraseFromParent();
445}
446
447bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
448 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
449
450 auto ValAndVReg =
451 getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 3).getReg(), MRI);
452 return !ValAndVReg;
453}
454
455void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
456 MachineIRBuilder &Builder) {
457 auto &Insert = cast<GInsertVectorElement>(Val&: MI);
458 Builder.setInstrAndDebugLoc(Insert);
459
460 Register Offset = Insert.getIndexReg();
461 LLT VecTy = MRI.getType(Reg: Insert.getReg(Idx: 0));
462 LLT EltTy = MRI.getType(Reg: Insert.getElementReg());
463 LLT IdxTy = MRI.getType(Reg: Insert.getIndexReg());
464
465 if (VecTy.isScalableVector())
466 return;
467
468 // Create a stack slot and store the vector into it
469 MachineFunction &MF = Builder.getMF();
470 Align Alignment(
471 std::min<uint64_t>(a: VecTy.getSizeInBytes().getKnownMinValue(), b: 16));
472 int FrameIdx = MF.getFrameInfo().CreateStackObject(Size: VecTy.getSizeInBytes(),
473 Alignment, isSpillSlot: false);
474 LLT FramePtrTy = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
475 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIdx);
476 auto StackTemp = Builder.buildFrameIndex(Res: FramePtrTy, Idx: FrameIdx);
477
478 Builder.buildStore(Val: Insert.getOperand(i: 1), Addr: StackTemp, PtrInfo, Alignment: Align(8));
479
480 // Get the pointer to the element, and be sure not to hit undefined behavior
481 // if the index is out of bounds.
482 assert(isPowerOf2_64(VecTy.getNumElements()) &&
483 "Expected a power-2 vector size");
484 auto Mask = Builder.buildConstant(Res: IdxTy, Val: VecTy.getNumElements() - 1);
485 Register And = Builder.buildAnd(Dst: IdxTy, Src0: Offset, Src1: Mask).getReg(Idx: 0);
486 auto EltSize = Builder.buildConstant(Res: IdxTy, Val: EltTy.getSizeInBytes());
487 Register Mul = Builder.buildMul(Dst: IdxTy, Src0: And, Src1: EltSize).getReg(Idx: 0);
488 Register EltPtr =
489 Builder.buildPtrAdd(Res: MRI.getType(Reg: StackTemp.getReg(Idx: 0)), Op0: StackTemp, Op1: Mul)
490 .getReg(Idx: 0);
491
492 // Write the inserted element
493 Builder.buildStore(Val: Insert.getElementReg(), Addr: EltPtr, PtrInfo, Alignment: Align(1));
494 // Reload the whole vector.
495 Builder.buildLoad(Res: Insert.getReg(Idx: 0), Addr: StackTemp, PtrInfo, Alignment: Align(8));
496 Insert.eraseFromParent();
497}
498
499/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
500/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
501///
502/// e.g.
503/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
504///
505/// Can be represented as
506///
507/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
508/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
509///
510bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
511 std::tuple<Register, int, Register, int> &MatchInfo) {
512 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
513 ArrayRef<int> ShuffleMask = MI.getOperand(i: 3).getShuffleMask();
514 Register Dst = MI.getOperand(i: 0).getReg();
515 int NumElts = MRI.getType(Reg: Dst).getNumElements();
516 auto DstIsLeftAndDstLane = isINSMask(M: ShuffleMask, NumInputElements: NumElts);
517 if (!DstIsLeftAndDstLane)
518 return false;
519 bool DstIsLeft;
520 int DstLane;
521 std::tie(args&: DstIsLeft, args&: DstLane) = *DstIsLeftAndDstLane;
522 Register Left = MI.getOperand(i: 1).getReg();
523 Register Right = MI.getOperand(i: 2).getReg();
524 Register DstVec = DstIsLeft ? Left : Right;
525 Register SrcVec = Left;
526
527 int SrcLane = ShuffleMask[DstLane];
528 if (SrcLane >= NumElts) {
529 SrcVec = Right;
530 SrcLane -= NumElts;
531 }
532
533 MatchInfo = std::make_tuple(args&: DstVec, args&: DstLane, args&: SrcVec, args&: SrcLane);
534 return true;
535}
536
537void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
538 MachineIRBuilder &Builder,
539 std::tuple<Register, int, Register, int> &MatchInfo) {
540 Builder.setInstrAndDebugLoc(MI);
541 Register Dst = MI.getOperand(i: 0).getReg();
542 auto ScalarTy = MRI.getType(Reg: Dst).getElementType();
543 Register DstVec, SrcVec;
544 int DstLane, SrcLane;
545 std::tie(args&: DstVec, args&: DstLane, args&: SrcVec, args&: SrcLane) = MatchInfo;
546 auto SrcCst = Builder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: SrcLane);
547 auto Extract = Builder.buildExtractVectorElement(Res: ScalarTy, Val: SrcVec, Idx: SrcCst);
548 auto DstCst = Builder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: DstLane);
549 Builder.buildInsertVectorElement(Res: Dst, Val: DstVec, Elt: Extract, Idx: DstCst);
550 MI.eraseFromParent();
551}
552
553/// isVShiftRImm - Check if this is a valid vector for the immediate
554/// operand of a vector shift right operation. The value must be in the range:
555/// 1 <= Value <= ElementBits for a right shift.
556bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
557 int64_t &Cnt) {
558 assert(Ty.isVector() && "vector shift count is not a vector type");
559 MachineInstr *MI = MRI.getVRegDef(Reg);
560 auto Cst = getAArch64VectorSplatScalar(MI: *MI, MRI);
561 if (!Cst)
562 return false;
563 Cnt = *Cst;
564 int64_t ElementBits = Ty.getScalarSizeInBits();
565 return Cnt >= 1 && Cnt <= ElementBits;
566}
567
568/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
569bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
570 int64_t &Imm) {
571 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
572 MI.getOpcode() == TargetOpcode::G_LSHR);
573 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
574 if (!Ty.isVector())
575 return false;
576 return isVShiftRImm(Reg: MI.getOperand(i: 2).getReg(), MRI, Ty, Cnt&: Imm);
577}
578
579void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
580 int64_t &Imm) {
581 unsigned Opc = MI.getOpcode();
582 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
583 unsigned NewOpc =
584 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
585 MachineIRBuilder MIB(MI);
586 MIB.buildInstr(Opc: NewOpc, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 1)}).addImm(Val: Imm);
587 MI.eraseFromParent();
588}
589
590/// Determine if it is possible to modify the \p RHS and predicate \p P of a
591/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
592///
593/// \returns A pair containing the updated immediate and predicate which may
594/// be used to optimize the instruction.
595///
596/// \note This assumes that the comparison has been legalized.
597std::optional<std::pair<uint64_t, CmpInst::Predicate>>
598tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
599 const MachineRegisterInfo &MRI) {
600 const auto &Ty = MRI.getType(Reg: RHS);
601 if (Ty.isVector())
602 return std::nullopt;
603 unsigned Size = Ty.getSizeInBits();
604 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
605
606 // If the RHS is not a constant, or the RHS is already a valid arithmetic
607 // immediate, then there is nothing to change.
608 auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
609 if (!ValAndVReg)
610 return std::nullopt;
611 uint64_t OriginalC = ValAndVReg->Value.getZExtValue();
612 uint64_t C = OriginalC;
613 if (isLegalArithImmed(C))
614 return std::nullopt;
615
616 // We have a non-arithmetic immediate. Check if adjusting the immediate and
617 // adjusting the predicate will result in a legal arithmetic immediate.
618 switch (P) {
619 default:
620 return std::nullopt;
621 case CmpInst::ICMP_SLT:
622 case CmpInst::ICMP_SGE:
623 // Check for
624 //
625 // x slt c => x sle c - 1
626 // x sge c => x sgt c - 1
627 //
628 // When c is not the smallest possible negative number.
629 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
630 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
631 return std::nullopt;
632 P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
633 C -= 1;
634 break;
635 case CmpInst::ICMP_ULT:
636 case CmpInst::ICMP_UGE:
637 // Check for
638 //
639 // x ult c => x ule c - 1
640 // x uge c => x ugt c - 1
641 //
642 // When c is not zero.
643 assert(C != 0 && "C should not be zero here!");
644 P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
645 C -= 1;
646 break;
647 case CmpInst::ICMP_SLE:
648 case CmpInst::ICMP_SGT:
649 // Check for
650 //
651 // x sle c => x slt c + 1
652 // x sgt c => s sge c + 1
653 //
654 // When c is not the largest possible signed integer.
655 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
656 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
657 return std::nullopt;
658 P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
659 C += 1;
660 break;
661 case CmpInst::ICMP_ULE:
662 case CmpInst::ICMP_UGT:
663 // Check for
664 //
665 // x ule c => x ult c + 1
666 // x ugt c => s uge c + 1
667 //
668 // When c is not the largest possible unsigned integer.
669 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
670 (Size == 64 && C == UINT64_MAX))
671 return std::nullopt;
672 P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
673 C += 1;
674 break;
675 }
676
677 // Check if the new constant is valid, and return the updated constant and
678 // predicate if it is.
679 if (Size == 32)
680 C = static_cast<uint32_t>(C);
681 if (isLegalArithImmed(C))
682 return {{C, P}};
683
684 auto NumberOfInstrToLoadImm = [=](uint64_t Imm) {
685 SmallVector<AArch64_IMM::ImmInsnModel> Insn;
686 AArch64_IMM::expandMOVImm(Imm, BitSize: 32, Insn);
687 return Insn.size();
688 };
689
690 if (NumberOfInstrToLoadImm(OriginalC) > NumberOfInstrToLoadImm(C))
691 return {{C, P}};
692
693 return std::nullopt;
694}
695
696/// Determine whether or not it is possible to update the RHS and predicate of
697/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
698/// immediate.
699///
700/// \p MI - The G_ICMP instruction
701/// \p MatchInfo - The new RHS immediate and predicate on success
702///
703/// See tryAdjustICmpImmAndPred for valid transformations.
704bool matchAdjustICmpImmAndPred(
705 MachineInstr &MI, const MachineRegisterInfo &MRI,
706 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
707 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
708 Register RHS = MI.getOperand(i: 3).getReg();
709 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
710 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, P: Pred, MRI)) {
711 MatchInfo = *MaybeNewImmAndPred;
712 return true;
713 }
714 return false;
715}
716
717void applyAdjustICmpImmAndPred(
718 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
719 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
720 MIB.setInstrAndDebugLoc(MI);
721 MachineOperand &RHS = MI.getOperand(i: 3);
722 MachineRegisterInfo &MRI = *MIB.getMRI();
723 auto Cst = MIB.buildConstant(Res: MRI.cloneVirtualRegister(VReg: RHS.getReg()),
724 Val: MatchInfo.first);
725 Observer.changingInstr(MI);
726 RHS.setReg(Cst->getOperand(i: 0).getReg());
727 MI.getOperand(i: 1).setPredicate(MatchInfo.second);
728 Observer.changedInstr(MI);
729}
730
731bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
732 std::pair<unsigned, int> &MatchInfo) {
733 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
734 Register Src1Reg = MI.getOperand(i: 1).getReg();
735 const LLT SrcTy = MRI.getType(Reg: Src1Reg);
736 const LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
737
738 auto LaneIdx = getSplatIndex(MI);
739 if (!LaneIdx)
740 return false;
741
742 // The lane idx should be within the first source vector.
743 if (*LaneIdx >= SrcTy.getNumElements())
744 return false;
745
746 if (DstTy != SrcTy)
747 return false;
748
749 LLT ScalarTy = SrcTy.getElementType();
750 unsigned ScalarSize = ScalarTy.getSizeInBits();
751
752 unsigned Opc = 0;
753 switch (SrcTy.getNumElements()) {
754 case 2:
755 if (ScalarSize == 64)
756 Opc = AArch64::G_DUPLANE64;
757 else if (ScalarSize == 32)
758 Opc = AArch64::G_DUPLANE32;
759 break;
760 case 4:
761 if (ScalarSize == 32)
762 Opc = AArch64::G_DUPLANE32;
763 else if (ScalarSize == 16)
764 Opc = AArch64::G_DUPLANE16;
765 break;
766 case 8:
767 if (ScalarSize == 8)
768 Opc = AArch64::G_DUPLANE8;
769 else if (ScalarSize == 16)
770 Opc = AArch64::G_DUPLANE16;
771 break;
772 case 16:
773 if (ScalarSize == 8)
774 Opc = AArch64::G_DUPLANE8;
775 break;
776 default:
777 break;
778 }
779 if (!Opc)
780 return false;
781
782 MatchInfo.first = Opc;
783 MatchInfo.second = *LaneIdx;
784 return true;
785}
786
787void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
788 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
789 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
790 Register Src1Reg = MI.getOperand(i: 1).getReg();
791 const LLT SrcTy = MRI.getType(Reg: Src1Reg);
792
793 B.setInstrAndDebugLoc(MI);
794 auto Lane = B.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: MatchInfo.second);
795
796 Register DupSrc = MI.getOperand(i: 1).getReg();
797 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
798 // To do this, we can use a G_CONCAT_VECTORS to do the widening.
799 if (SrcTy.getSizeInBits() == 64) {
800 auto Undef = B.buildUndef(Res: SrcTy);
801 DupSrc = B.buildConcatVectors(Res: SrcTy.multiplyElements(Factor: 2),
802 Ops: {Src1Reg, Undef.getReg(Idx: 0)})
803 .getReg(Idx: 0);
804 }
805 B.buildInstr(Opc: MatchInfo.first, DstOps: {MI.getOperand(i: 0).getReg()}, SrcOps: {DupSrc, Lane});
806 MI.eraseFromParent();
807}
808
809bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
810 auto &Unmerge = cast<GUnmerge>(Val&: MI);
811 Register Src1Reg = Unmerge.getReg(Idx: Unmerge.getNumOperands() - 1);
812 const LLT SrcTy = MRI.getType(Reg: Src1Reg);
813 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
814 return false;
815 return SrcTy.isVector() && !SrcTy.isScalable() &&
816 Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
817}
818
819void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
820 MachineIRBuilder &B) {
821 auto &Unmerge = cast<GUnmerge>(Val&: MI);
822 Register Src1Reg = Unmerge.getReg(Idx: Unmerge.getNumOperands() - 1);
823 const LLT SrcTy = MRI.getType(Reg: Src1Reg);
824 assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
825 "Expected a fixed length vector");
826
827 for (int I = 0; I < SrcTy.getNumElements(); ++I)
828 B.buildExtractVectorElementConstant(Res: Unmerge.getReg(Idx: I), Val: Src1Reg, Idx: I);
829 MI.eraseFromParent();
830}
831
832bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
833 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
834
835 // Later, during selection, we'll try to match imported patterns using
836 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
837 // G_BUILD_VECTORs which could match those patterns.
838 if (isBuildVectorAllZeros(MI, MRI) || isBuildVectorAllOnes(MI, MRI))
839 return false;
840
841 return getAArch64VectorSplat(MI, MRI).has_value();
842}
843
844void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
845 MachineIRBuilder &B) {
846 B.setInstrAndDebugLoc(MI);
847 B.buildInstr(Opc: AArch64::G_DUP, DstOps: {MI.getOperand(i: 0).getReg()},
848 SrcOps: {MI.getOperand(i: 1).getReg()});
849 MI.eraseFromParent();
850}
851
852/// \returns how many instructions would be saved by folding a G_ICMP's shift
853/// and/or extension operations.
854unsigned getCmpOperandFoldingProfit(Register CmpOp, MachineRegisterInfo &MRI) {
855 // No instructions to save if there's more than one use or no uses.
856 if (!MRI.hasOneNonDBGUse(RegNo: CmpOp))
857 return 0;
858
859 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
860 auto IsSupportedExtend = [&](const MachineInstr &MI) {
861 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
862 return true;
863 if (MI.getOpcode() != TargetOpcode::G_AND)
864 return false;
865 auto ValAndVReg =
866 getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 2).getReg(), MRI);
867 if (!ValAndVReg)
868 return false;
869 uint64_t Mask = ValAndVReg->Value.getZExtValue();
870 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
871 };
872
873 MachineInstr *Def = getDefIgnoringCopies(Reg: CmpOp, MRI);
874 if (IsSupportedExtend(*Def))
875 return 1;
876
877 unsigned Opc = Def->getOpcode();
878 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
879 Opc != TargetOpcode::G_LSHR)
880 return 0;
881
882 auto MaybeShiftAmt =
883 getIConstantVRegValWithLookThrough(VReg: Def->getOperand(i: 2).getReg(), MRI);
884 if (!MaybeShiftAmt)
885 return 0;
886 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
887 MachineInstr *ShiftLHS =
888 getDefIgnoringCopies(Reg: Def->getOperand(i: 1).getReg(), MRI);
889
890 // Check if we can fold an extend and a shift.
891 // FIXME: This is duplicated with the selector. (See:
892 // selectArithExtendedRegister)
893 if (IsSupportedExtend(*ShiftLHS))
894 return (ShiftAmt <= 4) ? 2 : 1;
895
896 LLT Ty = MRI.getType(Reg: Def->getOperand(i: 0).getReg());
897 if (Ty.isVector())
898 return 0;
899 unsigned ShiftSize = Ty.getSizeInBits();
900 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
901 (ShiftSize == 64 && ShiftAmt <= 63))
902 return 1;
903 return 0;
904}
905
906/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
907/// instruction \p MI.
908bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
909 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
910 // Swap the operands if it would introduce a profitable folding opportunity.
911 // (e.g. a shift + extend).
912 //
913 // For example:
914 // lsl w13, w11, #1
915 // cmp w13, w12
916 // can be turned into:
917 // cmp w12, w11, lsl #1
918
919 // Don't swap if there's a constant on the RHS, because we know we can fold
920 // that.
921 Register RHS = MI.getOperand(i: 3).getReg();
922 auto RHSCst = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
923 if (RHSCst && isLegalArithImmed(C: RHSCst->Value.getSExtValue()))
924 return false;
925
926 Register LHS = MI.getOperand(i: 2).getReg();
927 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
928 auto GetRegForProfit = [&](Register Reg) {
929 MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
930 return isCMN(MaybeSub: Def, Pred, MRI) ? Def->getOperand(i: 2).getReg() : Reg;
931 };
932
933 // Don't have a constant on the RHS. If we swap the LHS and RHS of the
934 // compare, would we be able to fold more instructions?
935 Register TheLHS = GetRegForProfit(LHS);
936 Register TheRHS = GetRegForProfit(RHS);
937
938 // If the LHS is more likely to give us a folding opportunity, then swap the
939 // LHS and RHS.
940 return (getCmpOperandFoldingProfit(CmpOp: TheLHS, MRI) >
941 getCmpOperandFoldingProfit(CmpOp: TheRHS, MRI));
942}
943
944void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
945 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
946 Register LHS = MI.getOperand(i: 2).getReg();
947 Register RHS = MI.getOperand(i: 3).getReg();
948 Observer.changedInstr(MI);
949 MI.getOperand(i: 1).setPredicate(CmpInst::getSwappedPredicate(pred: Pred));
950 MI.getOperand(i: 2).setReg(RHS);
951 MI.getOperand(i: 3).setReg(LHS);
952 Observer.changedInstr(MI);
953}
954
955/// \returns a function which builds a vector floating point compare instruction
956/// for a condition code \p CC.
957/// \param [in] NoNans - True if the target has NoNansFPMath.
958std::function<Register(MachineIRBuilder &)>
959getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
960 MachineRegisterInfo &MRI) {
961 LLT DstTy = MRI.getType(Reg: LHS);
962 assert(DstTy.isVector() && "Expected vector types only?");
963 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
964 switch (CC) {
965 default:
966 llvm_unreachable("Unexpected condition code!");
967 case AArch64CC::NE:
968 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
969 auto FCmp = MIB.buildInstr(Opc: AArch64::G_FCMEQ, DstOps: {DstTy}, SrcOps: {LHS, RHS});
970 return MIB.buildNot(Dst: DstTy, Src0: FCmp).getReg(Idx: 0);
971 };
972 case AArch64CC::EQ:
973 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
974 return MIB.buildInstr(Opc: AArch64::G_FCMEQ, DstOps: {DstTy}, SrcOps: {LHS, RHS}).getReg(Idx: 0);
975 };
976 case AArch64CC::GE:
977 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
978 return MIB.buildInstr(Opc: AArch64::G_FCMGE, DstOps: {DstTy}, SrcOps: {LHS, RHS}).getReg(Idx: 0);
979 };
980 case AArch64CC::GT:
981 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
982 return MIB.buildInstr(Opc: AArch64::G_FCMGT, DstOps: {DstTy}, SrcOps: {LHS, RHS}).getReg(Idx: 0);
983 };
984 case AArch64CC::LS:
985 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
986 return MIB.buildInstr(Opc: AArch64::G_FCMGE, DstOps: {DstTy}, SrcOps: {RHS, LHS}).getReg(Idx: 0);
987 };
988 case AArch64CC::MI:
989 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
990 return MIB.buildInstr(Opc: AArch64::G_FCMGT, DstOps: {DstTy}, SrcOps: {RHS, LHS}).getReg(Idx: 0);
991 };
992 }
993}
994
995/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
996bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
997 MachineIRBuilder &MIB) {
998 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
999 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
1000
1001 Register Dst = MI.getOperand(i: 0).getReg();
1002 LLT DstTy = MRI.getType(Reg: Dst);
1003 if (!DstTy.isVector() || !ST.hasNEON())
1004 return false;
1005 Register LHS = MI.getOperand(i: 2).getReg();
1006 unsigned EltSize = MRI.getType(Reg: LHS).getScalarSizeInBits();
1007 if (EltSize == 16 && !ST.hasFullFP16())
1008 return false;
1009 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
1010 return false;
1011
1012 return true;
1013}
1014
1015/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1016void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1017 MachineIRBuilder &MIB) {
1018 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1019 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
1020
1021 const auto &CmpMI = cast<GFCmp>(Val&: MI);
1022
1023 Register Dst = CmpMI.getReg(Idx: 0);
1024 CmpInst::Predicate Pred = CmpMI.getCond();
1025 Register LHS = CmpMI.getLHSReg();
1026 Register RHS = CmpMI.getRHSReg();
1027
1028 LLT DstTy = MRI.getType(Reg: Dst);
1029
1030 bool Invert = false;
1031 AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
1032 if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1033 Pred == CmpInst::Predicate::FCMP_UNO) &&
1034 isBuildVectorAllZeros(MI: *MRI.getVRegDef(Reg: RHS), MRI)) {
1035 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1036 // NaN, so equivalent to a == a and doesn't need the two comparisons an
1037 // "ord" normally would.
1038 // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1039 // thus equivalent to a != a.
1040 RHS = LHS;
1041 CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
1042 } else
1043 changeVectorFCMPPredToAArch64CC(P: Pred, CondCode&: CC, CondCode2&: CC2, Invert);
1044
1045 // Instead of having an apply function, just build here to simplify things.
1046 MIB.setInstrAndDebugLoc(MI);
1047
1048 const bool NoNans =
1049 ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
1050
1051 auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
1052 Register CmpRes;
1053 if (CC2 == AArch64CC::AL)
1054 CmpRes = Cmp(MIB);
1055 else {
1056 auto Cmp2 = getVectorFCMP(CC: CC2, LHS, RHS, NoNans, MRI);
1057 auto Cmp2Dst = Cmp2(MIB);
1058 auto Cmp1Dst = Cmp(MIB);
1059 CmpRes = MIB.buildOr(Dst: DstTy, Src0: Cmp1Dst, Src1: Cmp2Dst).getReg(Idx: 0);
1060 }
1061 if (Invert)
1062 CmpRes = MIB.buildNot(Dst: DstTy, Src0: CmpRes).getReg(Idx: 0);
1063 MRI.replaceRegWith(FromReg: Dst, ToReg: CmpRes);
1064 MI.eraseFromParent();
1065}
1066
1067// Matches G_BUILD_VECTOR where at least one source operand is not a constant
1068bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {
1069 auto *GBuildVec = cast<GBuildVector>(Val: &MI);
1070
1071 // Check if the values are all constants
1072 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1073 auto ConstVal =
1074 getAnyConstantVRegValWithLookThrough(VReg: GBuildVec->getSourceReg(I), MRI);
1075
1076 if (!ConstVal.has_value())
1077 return true;
1078 }
1079
1080 return false;
1081}
1082
1083void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
1084 MachineIRBuilder &B) {
1085 auto *GBuildVec = cast<GBuildVector>(Val: &MI);
1086 LLT DstTy = MRI.getType(Reg: GBuildVec->getReg(Idx: 0));
1087 Register DstReg = B.buildUndef(Res: DstTy).getReg(Idx: 0);
1088
1089 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1090 Register SrcReg = GBuildVec->getSourceReg(I);
1091 if (mi_match(R: SrcReg, MRI, P: m_GImplicitDef()))
1092 continue;
1093 auto IdxReg = B.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: I);
1094 DstReg =
1095 B.buildInsertVectorElement(Res: DstTy, Val: DstReg, Elt: SrcReg, Idx: IdxReg).getReg(Idx: 0);
1096 }
1097 B.buildCopy(Res: GBuildVec->getReg(Idx: 0), Op: DstReg);
1098 GBuildVec->eraseFromParent();
1099}
1100
1101bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1102 Register &SrcReg) {
1103 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1104 Register DstReg = MI.getOperand(i: 0).getReg();
1105 if (MRI.getType(Reg: DstReg).isVector())
1106 return false;
1107 // Match a store of a truncate.
1108 if (!mi_match(R: DstReg, MRI, P: m_GTrunc(Src: m_Reg(R&: SrcReg))))
1109 return false;
1110 // Only form truncstores for value types of max 64b.
1111 return MRI.getType(Reg: SrcReg).getSizeInBits() <= 64;
1112}
1113
1114void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1115 MachineIRBuilder &B, GISelChangeObserver &Observer,
1116 Register &SrcReg) {
1117 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1118 Observer.changingInstr(MI);
1119 MI.getOperand(i: 0).setReg(SrcReg);
1120 Observer.changedInstr(MI);
1121}
1122
1123// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1124// form in the first place for combine opportunities, so any remaining ones
1125// at this stage need be lowered back.
1126bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1127 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1128 Register DstReg = MI.getOperand(i: 0).getReg();
1129 LLT DstTy = MRI.getType(Reg: DstReg);
1130 return DstTy.isVector();
1131}
1132
1133void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1134 MachineIRBuilder &B, GISelChangeObserver &Observer) {
1135 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1136 B.setInstrAndDebugLoc(MI);
1137 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1138 Helper.lower(MI, TypeIdx: 0, /* Unused hint type */ Ty: LLT());
1139}
1140
1141/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1142/// => unused, <N x t> = unmerge v
1143bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1144 Register &MatchInfo) {
1145 auto &Unmerge = cast<GUnmerge>(Val&: MI);
1146 if (Unmerge.getNumDefs() != 2)
1147 return false;
1148 if (!MRI.use_nodbg_empty(RegNo: Unmerge.getReg(Idx: 1)))
1149 return false;
1150
1151 LLT DstTy = MRI.getType(Reg: Unmerge.getReg(Idx: 0));
1152 if (!DstTy.isVector())
1153 return false;
1154
1155 MachineInstr *Ext = getOpcodeDef(Opcode: AArch64::G_EXT, Reg: Unmerge.getSourceReg(), MRI);
1156 if (!Ext)
1157 return false;
1158
1159 Register ExtSrc1 = Ext->getOperand(i: 1).getReg();
1160 Register ExtSrc2 = Ext->getOperand(i: 2).getReg();
1161 auto LowestVal =
1162 getIConstantVRegValWithLookThrough(VReg: Ext->getOperand(i: 3).getReg(), MRI);
1163 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1164 return false;
1165
1166 if (!getOpcodeDef<GImplicitDef>(Reg: ExtSrc2, MRI))
1167 return false;
1168
1169 MatchInfo = ExtSrc1;
1170 return true;
1171}
1172
1173void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1174 MachineIRBuilder &B,
1175 GISelChangeObserver &Observer, Register &SrcReg) {
1176 Observer.changingInstr(MI);
1177 // Swap dst registers.
1178 Register Dst1 = MI.getOperand(i: 0).getReg();
1179 MI.getOperand(i: 0).setReg(MI.getOperand(i: 1).getReg());
1180 MI.getOperand(i: 1).setReg(Dst1);
1181 MI.getOperand(i: 2).setReg(SrcReg);
1182 Observer.changedInstr(MI);
1183}
1184
1185// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1186// Match v2s64 mul instructions, which will then be scalarised later on
1187// Doing these two matches in one function to ensure that the order of matching
1188// will always be the same.
1189// Try lowering MUL to MULL before trying to scalarize if needed.
1190bool matchMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI) {
1191 // Get the instructions that defined the source operand
1192 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1193 return DstTy == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
1194}
1195
1196void applyMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI,
1197 MachineIRBuilder &B, GISelChangeObserver &Observer) {
1198 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1199 "Expected a G_MUL instruction");
1200
1201 // Get the instructions that defined the source operand
1202 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1203 assert(DstTy == LLT::fixed_vector(2, 64) && "Expected v2s64 Mul");
1204 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1205 Helper.fewerElementsVector(
1206 MI, TypeIdx: 0,
1207 NarrowTy: DstTy.changeElementCount(EC: DstTy.getElementCount().divideCoefficientBy(RHS: 2)));
1208}
1209
1210class AArch64PostLegalizerLoweringImpl : public Combiner {
1211protected:
1212 const CombinerHelper Helper;
1213 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1214 const AArch64Subtarget &STI;
1215
1216public:
1217 AArch64PostLegalizerLoweringImpl(
1218 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1219 GISelCSEInfo *CSEInfo,
1220 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1221 const AArch64Subtarget &STI);
1222
1223 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
1224
1225 bool tryCombineAll(MachineInstr &I) const override;
1226
1227private:
1228#define GET_GICOMBINER_CLASS_MEMBERS
1229#include "AArch64GenPostLegalizeGILowering.inc"
1230#undef GET_GICOMBINER_CLASS_MEMBERS
1231};
1232
1233#define GET_GICOMBINER_IMPL
1234#include "AArch64GenPostLegalizeGILowering.inc"
1235#undef GET_GICOMBINER_IMPL
1236
1237AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1238 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
1239 GISelCSEInfo *CSEInfo,
1240 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1241 const AArch64Subtarget &STI)
1242 : Combiner(MF, CInfo, TPC, /*VT*/ nullptr, CSEInfo),
1243 Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
1244 STI(STI),
1245#define GET_GICOMBINER_CONSTRUCTOR_INITS
1246#include "AArch64GenPostLegalizeGILowering.inc"
1247#undef GET_GICOMBINER_CONSTRUCTOR_INITS
1248{
1249}
1250
1251class AArch64PostLegalizerLowering : public MachineFunctionPass {
1252public:
1253 static char ID;
1254
1255 AArch64PostLegalizerLowering();
1256
1257 StringRef getPassName() const override {
1258 return "AArch64PostLegalizerLowering";
1259 }
1260
1261 bool runOnMachineFunction(MachineFunction &MF) override;
1262 void getAnalysisUsage(AnalysisUsage &AU) const override;
1263
1264private:
1265 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1266};
1267} // end anonymous namespace
1268
1269void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1270 AU.addRequired<TargetPassConfig>();
1271 AU.setPreservesCFG();
1272 getSelectionDAGFallbackAnalysisUsage(AU);
1273 MachineFunctionPass::getAnalysisUsage(AU);
1274}
1275
1276AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1277 : MachineFunctionPass(ID) {
1278 if (!RuleConfig.parseCommandLineOption())
1279 report_fatal_error(reason: "Invalid rule identifier");
1280}
1281
1282bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1283 if (MF.getProperties().hasFailedISel())
1284 return false;
1285 assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");
1286 auto *TPC = &getAnalysis<TargetPassConfig>();
1287 const Function &F = MF.getFunction();
1288
1289 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
1290 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
1291 /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,
1292 F.hasOptSize(), F.hasMinSize());
1293 // Disable fixed-point iteration to reduce compile-time
1294 CInfo.MaxIterations = 1;
1295 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
1296 // PostLegalizerCombiner performs DCE, so a full DCE pass is unnecessary.
1297 CInfo.EnableFullDCE = false;
1298 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,
1299 RuleConfig, ST);
1300 return Impl.combineMachineInstrs();
1301}
1302
1303char AArch64PostLegalizerLowering::ID = 0;
1304INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1305 "Lower AArch64 MachineInstrs after legalization", false,
1306 false)
1307INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
1308INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1309 "Lower AArch64 MachineInstrs after legalization", false,
1310 false)
1311
1312namespace llvm {
1313FunctionPass *createAArch64PostLegalizerLowering() {
1314 return new AArch64PostLegalizerLowering();
1315}
1316} // end namespace llvm
1317