1 | //=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// Post-legalization lowering for instructions. |
11 | /// |
12 | /// This is used to offload pattern matching from the selector. |
13 | /// |
14 | /// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually |
15 | /// a G_ZIP, G_UZP, etc. |
16 | /// |
17 | /// General optimization combines should be handled by either the |
18 | /// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner. |
19 | /// |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #include "AArch64ExpandImm.h" |
23 | #include "AArch64GlobalISelUtils.h" |
24 | #include "AArch64PerfectShuffle.h" |
25 | #include "AArch64Subtarget.h" |
26 | #include "AArch64TargetMachine.h" |
27 | #include "GISel/AArch64LegalizerInfo.h" |
28 | #include "MCTargetDesc/AArch64MCTargetDesc.h" |
29 | #include "TargetInfo/AArch64TargetInfo.h" |
30 | #include "Utils/AArch64BaseInfo.h" |
31 | #include "llvm/CodeGen/GlobalISel/Combiner.h" |
32 | #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" |
33 | #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" |
34 | #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" |
35 | #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" |
36 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
37 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
38 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
39 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
40 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
41 | #include "llvm/CodeGen/MachineFrameInfo.h" |
42 | #include "llvm/CodeGen/MachineFunctionPass.h" |
43 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
44 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
45 | #include "llvm/CodeGen/TargetOpcodes.h" |
46 | #include "llvm/CodeGen/TargetPassConfig.h" |
47 | #include "llvm/IR/InstrTypes.h" |
48 | #include "llvm/InitializePasses.h" |
49 | #include "llvm/Support/Debug.h" |
50 | #include "llvm/Support/ErrorHandling.h" |
51 | #include <optional> |
52 | |
53 | #define GET_GICOMBINER_DEPS |
54 | #include "AArch64GenPostLegalizeGILowering.inc" |
55 | #undef GET_GICOMBINER_DEPS |
56 | |
57 | #define DEBUG_TYPE "aarch64-postlegalizer-lowering" |
58 | |
59 | using namespace llvm; |
60 | using namespace MIPatternMatch; |
61 | using namespace AArch64GISelUtils; |
62 | |
63 | namespace { |
64 | |
65 | #define GET_GICOMBINER_TYPES |
66 | #include "AArch64GenPostLegalizeGILowering.inc" |
67 | #undef GET_GICOMBINER_TYPES |
68 | |
69 | /// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR. |
70 | /// |
71 | /// Used for matching target-supported shuffles before codegen. |
72 | struct ShuffleVectorPseudo { |
73 | unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1) |
74 | Register Dst; ///< Destination register. |
75 | SmallVector<SrcOp, 2> SrcOps; ///< Source registers. |
76 | ShuffleVectorPseudo(unsigned Opc, Register Dst, |
77 | std::initializer_list<SrcOp> SrcOps) |
78 | : Opc(Opc), Dst(Dst), SrcOps(SrcOps){}; |
79 | ShuffleVectorPseudo() = default; |
80 | }; |
81 | |
82 | /// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector |
83 | /// sources of the shuffle are different. |
84 | std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M, |
85 | unsigned NumElts) { |
86 | // Look for the first non-undef element. |
87 | auto FirstRealElt = find_if(Range&: M, P: [](int Elt) { return Elt >= 0; }); |
88 | if (FirstRealElt == M.end()) |
89 | return std::nullopt; |
90 | |
91 | // Use APInt to handle overflow when calculating expected element. |
92 | unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); |
93 | APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); |
94 | |
95 | // The following shuffle indices must be the successive elements after the |
96 | // first real element. |
97 | if (any_of( |
98 | Range: make_range(x: std::next(x: FirstRealElt), y: M.end()), |
99 | P: [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; })) |
100 | return std::nullopt; |
101 | |
102 | // The index of an EXT is the first element if it is not UNDEF. |
103 | // Watch out for the beginning UNDEFs. The EXT index should be the expected |
104 | // value of the first element. E.g. |
105 | // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. |
106 | // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. |
107 | // ExpectedElt is the last mask index plus 1. |
108 | uint64_t Imm = ExpectedElt.getZExtValue(); |
109 | bool ReverseExt = false; |
110 | |
111 | // There are two difference cases requiring to reverse input vectors. |
112 | // For example, for vector <4 x i32> we have the following cases, |
113 | // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) |
114 | // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) |
115 | // For both cases, we finally use mask <5, 6, 7, 0>, which requires |
116 | // to reverse two input vectors. |
117 | if (Imm < NumElts) |
118 | ReverseExt = true; |
119 | else |
120 | Imm -= NumElts; |
121 | return std::make_pair(x&: ReverseExt, y&: Imm); |
122 | } |
123 | |
124 | /// Helper function for matchINS. |
125 | /// |
126 | /// \returns a value when \p M is an ins mask for \p NumInputElements. |
127 | /// |
128 | /// First element of the returned pair is true when the produced |
129 | /// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR. |
130 | /// |
131 | /// Second element is the destination lane for the G_INSERT_VECTOR_ELT. |
132 | std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M, |
133 | int NumInputElements) { |
134 | if (M.size() != static_cast<size_t>(NumInputElements)) |
135 | return std::nullopt; |
136 | int NumLHSMatch = 0, NumRHSMatch = 0; |
137 | int LastLHSMismatch = -1, LastRHSMismatch = -1; |
138 | for (int Idx = 0; Idx < NumInputElements; ++Idx) { |
139 | if (M[Idx] == -1) { |
140 | ++NumLHSMatch; |
141 | ++NumRHSMatch; |
142 | continue; |
143 | } |
144 | M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx; |
145 | M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx; |
146 | } |
147 | const int NumNeededToMatch = NumInputElements - 1; |
148 | if (NumLHSMatch == NumNeededToMatch) |
149 | return std::make_pair(x: true, y&: LastLHSMismatch); |
150 | if (NumRHSMatch == NumNeededToMatch) |
151 | return std::make_pair(x: false, y&: LastRHSMismatch); |
152 | return std::nullopt; |
153 | } |
154 | |
155 | /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a |
156 | /// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc. |
157 | bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, |
158 | ShuffleVectorPseudo &MatchInfo) { |
159 | assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); |
160 | ArrayRef<int> ShuffleMask = MI.getOperand(i: 3).getShuffleMask(); |
161 | Register Dst = MI.getOperand(i: 0).getReg(); |
162 | Register Src = MI.getOperand(i: 1).getReg(); |
163 | LLT Ty = MRI.getType(Reg: Dst); |
164 | unsigned EltSize = Ty.getScalarSizeInBits(); |
165 | |
166 | // Element size for a rev cannot be 64. |
167 | if (EltSize == 64) |
168 | return false; |
169 | |
170 | unsigned NumElts = Ty.getNumElements(); |
171 | |
172 | // Try to produce a G_REV instruction |
173 | for (unsigned LaneSize : {64U, 32U, 16U}) { |
174 | if (isREVMask(M: ShuffleMask, EltSize, NumElts, BlockSize: LaneSize)) { |
175 | unsigned Opcode; |
176 | if (LaneSize == 64U) |
177 | Opcode = AArch64::G_REV64; |
178 | else if (LaneSize == 32U) |
179 | Opcode = AArch64::G_REV32; |
180 | else |
181 | Opcode = AArch64::G_REV16; |
182 | |
183 | MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src}); |
184 | return true; |
185 | } |
186 | } |
187 | |
188 | return false; |
189 | } |
190 | |
191 | /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with |
192 | /// a G_TRN1 or G_TRN2 instruction. |
193 | bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI, |
194 | ShuffleVectorPseudo &MatchInfo) { |
195 | assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); |
196 | unsigned WhichResult; |
197 | ArrayRef<int> ShuffleMask = MI.getOperand(i: 3).getShuffleMask(); |
198 | Register Dst = MI.getOperand(i: 0).getReg(); |
199 | unsigned NumElts = MRI.getType(Reg: Dst).getNumElements(); |
200 | if (!isTRNMask(M: ShuffleMask, NumElts, WhichResult)) |
201 | return false; |
202 | unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2; |
203 | Register V1 = MI.getOperand(i: 1).getReg(); |
204 | Register V2 = MI.getOperand(i: 2).getReg(); |
205 | MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); |
206 | return true; |
207 | } |
208 | |
209 | /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with |
210 | /// a G_UZP1 or G_UZP2 instruction. |
211 | /// |
212 | /// \param [in] MI - The shuffle vector instruction. |
213 | /// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success. |
214 | bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI, |
215 | ShuffleVectorPseudo &MatchInfo) { |
216 | assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); |
217 | unsigned WhichResult; |
218 | ArrayRef<int> ShuffleMask = MI.getOperand(i: 3).getShuffleMask(); |
219 | Register Dst = MI.getOperand(i: 0).getReg(); |
220 | unsigned NumElts = MRI.getType(Reg: Dst).getNumElements(); |
221 | if (!isUZPMask(M: ShuffleMask, NumElts, WhichResultOut&: WhichResult)) |
222 | return false; |
223 | unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2; |
224 | Register V1 = MI.getOperand(i: 1).getReg(); |
225 | Register V2 = MI.getOperand(i: 2).getReg(); |
226 | MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); |
227 | return true; |
228 | } |
229 | |
230 | bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, |
231 | ShuffleVectorPseudo &MatchInfo) { |
232 | assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); |
233 | unsigned WhichResult; |
234 | ArrayRef<int> ShuffleMask = MI.getOperand(i: 3).getShuffleMask(); |
235 | Register Dst = MI.getOperand(i: 0).getReg(); |
236 | unsigned NumElts = MRI.getType(Reg: Dst).getNumElements(); |
237 | if (!isZIPMask(M: ShuffleMask, NumElts, WhichResultOut&: WhichResult)) |
238 | return false; |
239 | unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2; |
240 | Register V1 = MI.getOperand(i: 1).getReg(); |
241 | Register V2 = MI.getOperand(i: 2).getReg(); |
242 | MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); |
243 | return true; |
244 | } |
245 | |
246 | /// Helper function for matchDup. |
247 | bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI, |
248 | MachineRegisterInfo &MRI, |
249 | ShuffleVectorPseudo &MatchInfo) { |
250 | if (Lane != 0) |
251 | return false; |
252 | |
253 | // Try to match a vector splat operation into a dup instruction. |
254 | // We're looking for this pattern: |
255 | // |
256 | // %scalar:gpr(s64) = COPY $x0 |
257 | // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF |
258 | // %cst0:gpr(s32) = G_CONSTANT i32 0 |
259 | // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32) |
260 | // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32) |
261 | // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, |
262 | // %zerovec(<2 x s32>) |
263 | // |
264 | // ...into: |
265 | // %splat = G_DUP %scalar |
266 | |
267 | // Begin matching the insert. |
268 | auto *InsMI = getOpcodeDef(Opcode: TargetOpcode::G_INSERT_VECTOR_ELT, |
269 | Reg: MI.getOperand(i: 1).getReg(), MRI); |
270 | if (!InsMI) |
271 | return false; |
272 | // Match the undef vector operand. |
273 | if (!getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: InsMI->getOperand(i: 1).getReg(), |
274 | MRI)) |
275 | return false; |
276 | |
277 | // Match the index constant 0. |
278 | if (!mi_match(R: InsMI->getOperand(i: 3).getReg(), MRI, P: m_ZeroInt())) |
279 | return false; |
280 | |
281 | MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(i: 0).getReg(), |
282 | {InsMI->getOperand(i: 2).getReg()}); |
283 | return true; |
284 | } |
285 | |
286 | /// Helper function for matchDup. |
287 | bool matchDupFromBuildVector(int Lane, MachineInstr &MI, |
288 | MachineRegisterInfo &MRI, |
289 | ShuffleVectorPseudo &MatchInfo) { |
290 | assert(Lane >= 0 && "Expected positive lane?" ); |
291 | // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the |
292 | // lane's definition directly. |
293 | auto *BuildVecMI = getOpcodeDef(Opcode: TargetOpcode::G_BUILD_VECTOR, |
294 | Reg: MI.getOperand(i: 1).getReg(), MRI); |
295 | if (!BuildVecMI) |
296 | return false; |
297 | Register Reg = BuildVecMI->getOperand(i: Lane + 1).getReg(); |
298 | MatchInfo = |
299 | ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(i: 0).getReg(), {Reg}); |
300 | return true; |
301 | } |
302 | |
303 | bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, |
304 | ShuffleVectorPseudo &MatchInfo) { |
305 | assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); |
306 | auto MaybeLane = getSplatIndex(MI); |
307 | if (!MaybeLane) |
308 | return false; |
309 | int Lane = *MaybeLane; |
310 | // If this is undef splat, generate it via "just" vdup, if possible. |
311 | if (Lane < 0) |
312 | Lane = 0; |
313 | if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo)) |
314 | return true; |
315 | if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo)) |
316 | return true; |
317 | return false; |
318 | } |
319 | |
320 | // Check if an EXT instruction can handle the shuffle mask when the vector |
321 | // sources of the shuffle are the same. |
322 | bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) { |
323 | unsigned NumElts = Ty.getNumElements(); |
324 | |
325 | // Assume that the first shuffle index is not UNDEF. Fail if it is. |
326 | if (M[0] < 0) |
327 | return false; |
328 | |
329 | // If this is a VEXT shuffle, the immediate value is the index of the first |
330 | // element. The other shuffle indices must be the successive elements after |
331 | // the first one. |
332 | unsigned ExpectedElt = M[0]; |
333 | for (unsigned I = 1; I < NumElts; ++I) { |
334 | // Increment the expected index. If it wraps around, just follow it |
335 | // back to index zero and keep going. |
336 | ++ExpectedElt; |
337 | if (ExpectedElt == NumElts) |
338 | ExpectedElt = 0; |
339 | |
340 | if (M[I] < 0) |
341 | continue; // Ignore UNDEF indices. |
342 | if (ExpectedElt != static_cast<unsigned>(M[I])) |
343 | return false; |
344 | } |
345 | |
346 | return true; |
347 | } |
348 | |
349 | bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, |
350 | ShuffleVectorPseudo &MatchInfo) { |
351 | assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); |
352 | Register Dst = MI.getOperand(i: 0).getReg(); |
353 | LLT DstTy = MRI.getType(Reg: Dst); |
354 | Register V1 = MI.getOperand(i: 1).getReg(); |
355 | Register V2 = MI.getOperand(i: 2).getReg(); |
356 | auto Mask = MI.getOperand(i: 3).getShuffleMask(); |
357 | uint64_t Imm; |
358 | auto ExtInfo = getExtMask(M: Mask, NumElts: DstTy.getNumElements()); |
359 | uint64_t ExtFactor = MRI.getType(Reg: V1).getScalarSizeInBits() / 8; |
360 | |
361 | if (!ExtInfo) { |
362 | if (!getOpcodeDef<GImplicitDef>(Reg: V2, MRI) || |
363 | !isSingletonExtMask(M: Mask, Ty: DstTy)) |
364 | return false; |
365 | |
366 | Imm = Mask[0] * ExtFactor; |
367 | MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm}); |
368 | return true; |
369 | } |
370 | bool ReverseExt; |
371 | std::tie(args&: ReverseExt, args&: Imm) = *ExtInfo; |
372 | if (ReverseExt) |
373 | std::swap(a&: V1, b&: V2); |
374 | Imm *= ExtFactor; |
375 | MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); |
376 | return true; |
377 | } |
378 | |
379 | /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. |
380 | /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. |
381 | void applyShuffleVectorPseudo(MachineInstr &MI, |
382 | ShuffleVectorPseudo &MatchInfo) { |
383 | MachineIRBuilder MIRBuilder(MI); |
384 | MIRBuilder.buildInstr(Opc: MatchInfo.Opc, DstOps: {MatchInfo.Dst}, SrcOps: MatchInfo.SrcOps); |
385 | MI.eraseFromParent(); |
386 | } |
387 | |
388 | /// Replace a G_SHUFFLE_VECTOR instruction with G_EXT. |
389 | /// Special-cased because the constant operand must be emitted as a G_CONSTANT |
390 | /// for the imported tablegen patterns to work. |
391 | void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { |
392 | MachineIRBuilder MIRBuilder(MI); |
393 | if (MatchInfo.SrcOps[2].getImm() == 0) |
394 | MIRBuilder.buildCopy(Res: MatchInfo.Dst, Op: MatchInfo.SrcOps[0]); |
395 | else { |
396 | // Tablegen patterns expect an i32 G_CONSTANT as the final op. |
397 | auto Cst = |
398 | MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: MatchInfo.SrcOps[2].getImm()); |
399 | MIRBuilder.buildInstr(Opc: MatchInfo.Opc, DstOps: {MatchInfo.Dst}, |
400 | SrcOps: {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); |
401 | } |
402 | MI.eraseFromParent(); |
403 | } |
404 | |
405 | bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) { |
406 | assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); |
407 | |
408 | auto ValAndVReg = |
409 | getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 3).getReg(), MRI); |
410 | return !ValAndVReg; |
411 | } |
412 | |
413 | void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI, |
414 | MachineIRBuilder &Builder) { |
415 | auto &Insert = cast<GInsertVectorElement>(Val&: MI); |
416 | Builder.setInstrAndDebugLoc(Insert); |
417 | |
418 | Register Offset = Insert.getIndexReg(); |
419 | LLT VecTy = MRI.getType(Reg: Insert.getReg(Idx: 0)); |
420 | LLT EltTy = MRI.getType(Reg: Insert.getElementReg()); |
421 | LLT IdxTy = MRI.getType(Reg: Insert.getIndexReg()); |
422 | |
423 | // Create a stack slot and store the vector into it |
424 | MachineFunction &MF = Builder.getMF(); |
425 | Align Alignment( |
426 | std::min<uint64_t>(a: VecTy.getSizeInBytes().getKnownMinValue(), b: 16)); |
427 | int FrameIdx = MF.getFrameInfo().CreateStackObject(Size: VecTy.getSizeInBytes(), |
428 | Alignment, isSpillSlot: false); |
429 | LLT FramePtrTy = LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
430 | MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIdx); |
431 | auto StackTemp = Builder.buildFrameIndex(Res: FramePtrTy, Idx: FrameIdx); |
432 | |
433 | Builder.buildStore(Val: Insert.getOperand(i: 1), Addr: StackTemp, PtrInfo, Alignment: Align(8)); |
434 | |
435 | // Get the pointer to the element, and be sure not to hit undefined behavior |
436 | // if the index is out of bounds. |
437 | assert(isPowerOf2_64(VecTy.getNumElements()) && |
438 | "Expected a power-2 vector size" ); |
439 | auto Mask = Builder.buildConstant(Res: IdxTy, Val: VecTy.getNumElements() - 1); |
440 | Register And = Builder.buildAnd(Dst: IdxTy, Src0: Offset, Src1: Mask).getReg(Idx: 0); |
441 | auto EltSize = Builder.buildConstant(Res: IdxTy, Val: EltTy.getSizeInBytes()); |
442 | Register Mul = Builder.buildMul(Dst: IdxTy, Src0: And, Src1: EltSize).getReg(Idx: 0); |
443 | Register EltPtr = |
444 | Builder.buildPtrAdd(Res: MRI.getType(Reg: StackTemp.getReg(Idx: 0)), Op0: StackTemp, Op1: Mul) |
445 | .getReg(Idx: 0); |
446 | |
447 | // Write the inserted element |
448 | Builder.buildStore(Val: Insert.getElementReg(), Addr: EltPtr, PtrInfo, Alignment: Align(1)); |
449 | // Reload the whole vector. |
450 | Builder.buildLoad(Res: Insert.getReg(Idx: 0), Addr: StackTemp, PtrInfo, Alignment: Align(8)); |
451 | Insert.eraseFromParent(); |
452 | } |
453 | |
454 | /// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a |
455 | /// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair. |
456 | /// |
457 | /// e.g. |
458 | /// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0) |
459 | /// |
460 | /// Can be represented as |
461 | /// |
462 | /// %extract = G_EXTRACT_VECTOR_ELT %left, 0 |
463 | /// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1 |
464 | /// |
465 | bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI, |
466 | std::tuple<Register, int, Register, int> &MatchInfo) { |
467 | assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); |
468 | ArrayRef<int> ShuffleMask = MI.getOperand(i: 3).getShuffleMask(); |
469 | Register Dst = MI.getOperand(i: 0).getReg(); |
470 | int NumElts = MRI.getType(Reg: Dst).getNumElements(); |
471 | auto DstIsLeftAndDstLane = isINSMask(M: ShuffleMask, NumInputElements: NumElts); |
472 | if (!DstIsLeftAndDstLane) |
473 | return false; |
474 | bool DstIsLeft; |
475 | int DstLane; |
476 | std::tie(args&: DstIsLeft, args&: DstLane) = *DstIsLeftAndDstLane; |
477 | Register Left = MI.getOperand(i: 1).getReg(); |
478 | Register Right = MI.getOperand(i: 2).getReg(); |
479 | Register DstVec = DstIsLeft ? Left : Right; |
480 | Register SrcVec = Left; |
481 | |
482 | int SrcLane = ShuffleMask[DstLane]; |
483 | if (SrcLane >= NumElts) { |
484 | SrcVec = Right; |
485 | SrcLane -= NumElts; |
486 | } |
487 | |
488 | MatchInfo = std::make_tuple(args&: DstVec, args&: DstLane, args&: SrcVec, args&: SrcLane); |
489 | return true; |
490 | } |
491 | |
492 | void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI, |
493 | MachineIRBuilder &Builder, |
494 | std::tuple<Register, int, Register, int> &MatchInfo) { |
495 | Builder.setInstrAndDebugLoc(MI); |
496 | Register Dst = MI.getOperand(i: 0).getReg(); |
497 | auto ScalarTy = MRI.getType(Reg: Dst).getElementType(); |
498 | Register DstVec, SrcVec; |
499 | int DstLane, SrcLane; |
500 | std::tie(args&: DstVec, args&: DstLane, args&: SrcVec, args&: SrcLane) = MatchInfo; |
501 | auto SrcCst = Builder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: SrcLane); |
502 | auto = Builder.buildExtractVectorElement(Res: ScalarTy, Val: SrcVec, Idx: SrcCst); |
503 | auto DstCst = Builder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: DstLane); |
504 | Builder.buildInsertVectorElement(Res: Dst, Val: DstVec, Elt: Extract, Idx: DstCst); |
505 | MI.eraseFromParent(); |
506 | } |
507 | |
508 | /// isVShiftRImm - Check if this is a valid vector for the immediate |
509 | /// operand of a vector shift right operation. The value must be in the range: |
510 | /// 1 <= Value <= ElementBits for a right shift. |
511 | bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty, |
512 | int64_t &Cnt) { |
513 | assert(Ty.isVector() && "vector shift count is not a vector type" ); |
514 | MachineInstr *MI = MRI.getVRegDef(Reg); |
515 | auto Cst = getAArch64VectorSplatScalar(MI: *MI, MRI); |
516 | if (!Cst) |
517 | return false; |
518 | Cnt = *Cst; |
519 | int64_t ElementBits = Ty.getScalarSizeInBits(); |
520 | return Cnt >= 1 && Cnt <= ElementBits; |
521 | } |
522 | |
523 | /// Match a vector G_ASHR or G_LSHR with a valid immediate shift. |
524 | bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, |
525 | int64_t &Imm) { |
526 | assert(MI.getOpcode() == TargetOpcode::G_ASHR || |
527 | MI.getOpcode() == TargetOpcode::G_LSHR); |
528 | LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg()); |
529 | if (!Ty.isVector()) |
530 | return false; |
531 | return isVShiftRImm(Reg: MI.getOperand(i: 2).getReg(), MRI, Ty, Cnt&: Imm); |
532 | } |
533 | |
534 | void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, |
535 | int64_t &Imm) { |
536 | unsigned Opc = MI.getOpcode(); |
537 | assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR); |
538 | unsigned NewOpc = |
539 | Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR; |
540 | MachineIRBuilder MIB(MI); |
541 | auto ImmDef = MIB.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: Imm); |
542 | MIB.buildInstr(Opc: NewOpc, DstOps: {MI.getOperand(i: 0)}, SrcOps: {MI.getOperand(i: 1), ImmDef}); |
543 | MI.eraseFromParent(); |
544 | } |
545 | |
546 | /// Determine if it is possible to modify the \p RHS and predicate \p P of a |
547 | /// G_ICMP instruction such that the right-hand side is an arithmetic immediate. |
548 | /// |
549 | /// \returns A pair containing the updated immediate and predicate which may |
550 | /// be used to optimize the instruction. |
551 | /// |
552 | /// \note This assumes that the comparison has been legalized. |
553 | std::optional<std::pair<uint64_t, CmpInst::Predicate>> |
554 | tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P, |
555 | const MachineRegisterInfo &MRI) { |
556 | const auto &Ty = MRI.getType(Reg: RHS); |
557 | if (Ty.isVector()) |
558 | return std::nullopt; |
559 | unsigned Size = Ty.getSizeInBits(); |
560 | assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?" ); |
561 | |
562 | // If the RHS is not a constant, or the RHS is already a valid arithmetic |
563 | // immediate, then there is nothing to change. |
564 | auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
565 | if (!ValAndVReg) |
566 | return std::nullopt; |
567 | uint64_t OriginalC = ValAndVReg->Value.getZExtValue(); |
568 | uint64_t C = OriginalC; |
569 | if (isLegalArithImmed(C)) |
570 | return std::nullopt; |
571 | |
572 | // We have a non-arithmetic immediate. Check if adjusting the immediate and |
573 | // adjusting the predicate will result in a legal arithmetic immediate. |
574 | switch (P) { |
575 | default: |
576 | return std::nullopt; |
577 | case CmpInst::ICMP_SLT: |
578 | case CmpInst::ICMP_SGE: |
579 | // Check for |
580 | // |
581 | // x slt c => x sle c - 1 |
582 | // x sge c => x sgt c - 1 |
583 | // |
584 | // When c is not the smallest possible negative number. |
585 | if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) || |
586 | (Size == 32 && static_cast<int32_t>(C) == INT32_MIN)) |
587 | return std::nullopt; |
588 | P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; |
589 | C -= 1; |
590 | break; |
591 | case CmpInst::ICMP_ULT: |
592 | case CmpInst::ICMP_UGE: |
593 | // Check for |
594 | // |
595 | // x ult c => x ule c - 1 |
596 | // x uge c => x ugt c - 1 |
597 | // |
598 | // When c is not zero. |
599 | if (C == 0) |
600 | return std::nullopt; |
601 | P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; |
602 | C -= 1; |
603 | break; |
604 | case CmpInst::ICMP_SLE: |
605 | case CmpInst::ICMP_SGT: |
606 | // Check for |
607 | // |
608 | // x sle c => x slt c + 1 |
609 | // x sgt c => s sge c + 1 |
610 | // |
611 | // When c is not the largest possible signed integer. |
612 | if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) || |
613 | (Size == 64 && static_cast<int64_t>(C) == INT64_MAX)) |
614 | return std::nullopt; |
615 | P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; |
616 | C += 1; |
617 | break; |
618 | case CmpInst::ICMP_ULE: |
619 | case CmpInst::ICMP_UGT: |
620 | // Check for |
621 | // |
622 | // x ule c => x ult c + 1 |
623 | // x ugt c => s uge c + 1 |
624 | // |
625 | // When c is not the largest possible unsigned integer. |
626 | if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) || |
627 | (Size == 64 && C == UINT64_MAX)) |
628 | return std::nullopt; |
629 | P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; |
630 | C += 1; |
631 | break; |
632 | } |
633 | |
634 | // Check if the new constant is valid, and return the updated constant and |
635 | // predicate if it is. |
636 | if (Size == 32) |
637 | C = static_cast<uint32_t>(C); |
638 | if (isLegalArithImmed(C)) |
639 | return {{C, P}}; |
640 | |
641 | auto IsMaterializableInSingleInstruction = [=](uint64_t Imm) { |
642 | SmallVector<AArch64_IMM::ImmInsnModel> Insn; |
643 | AArch64_IMM::expandMOVImm(Imm, BitSize: 32, Insn); |
644 | return Insn.size() == 1; |
645 | }; |
646 | |
647 | if (!IsMaterializableInSingleInstruction(OriginalC) && |
648 | IsMaterializableInSingleInstruction(C)) |
649 | return {{C, P}}; |
650 | |
651 | return std::nullopt; |
652 | } |
653 | |
654 | /// Determine whether or not it is possible to update the RHS and predicate of |
655 | /// a G_ICMP instruction such that the RHS will be selected as an arithmetic |
656 | /// immediate. |
657 | /// |
658 | /// \p MI - The G_ICMP instruction |
659 | /// \p MatchInfo - The new RHS immediate and predicate on success |
660 | /// |
661 | /// See tryAdjustICmpImmAndPred for valid transformations. |
662 | bool matchAdjustICmpImmAndPred( |
663 | MachineInstr &MI, const MachineRegisterInfo &MRI, |
664 | std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) { |
665 | assert(MI.getOpcode() == TargetOpcode::G_ICMP); |
666 | Register RHS = MI.getOperand(i: 3).getReg(); |
667 | auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate()); |
668 | if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, P: Pred, MRI)) { |
669 | MatchInfo = *MaybeNewImmAndPred; |
670 | return true; |
671 | } |
672 | return false; |
673 | } |
674 | |
675 | void applyAdjustICmpImmAndPred( |
676 | MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo, |
677 | MachineIRBuilder &MIB, GISelChangeObserver &Observer) { |
678 | MIB.setInstrAndDebugLoc(MI); |
679 | MachineOperand &RHS = MI.getOperand(i: 3); |
680 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
681 | auto Cst = MIB.buildConstant(Res: MRI.cloneVirtualRegister(VReg: RHS.getReg()), |
682 | Val: MatchInfo.first); |
683 | Observer.changingInstr(MI); |
684 | RHS.setReg(Cst->getOperand(i: 0).getReg()); |
685 | MI.getOperand(i: 1).setPredicate(MatchInfo.second); |
686 | Observer.changedInstr(MI); |
687 | } |
688 | |
689 | bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, |
690 | std::pair<unsigned, int> &MatchInfo) { |
691 | assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); |
692 | Register Src1Reg = MI.getOperand(i: 1).getReg(); |
693 | const LLT SrcTy = MRI.getType(Reg: Src1Reg); |
694 | const LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg()); |
695 | |
696 | auto LaneIdx = getSplatIndex(MI); |
697 | if (!LaneIdx) |
698 | return false; |
699 | |
700 | // The lane idx should be within the first source vector. |
701 | if (*LaneIdx >= SrcTy.getNumElements()) |
702 | return false; |
703 | |
704 | if (DstTy != SrcTy) |
705 | return false; |
706 | |
707 | LLT ScalarTy = SrcTy.getElementType(); |
708 | unsigned ScalarSize = ScalarTy.getSizeInBits(); |
709 | |
710 | unsigned Opc = 0; |
711 | switch (SrcTy.getNumElements()) { |
712 | case 2: |
713 | if (ScalarSize == 64) |
714 | Opc = AArch64::G_DUPLANE64; |
715 | else if (ScalarSize == 32) |
716 | Opc = AArch64::G_DUPLANE32; |
717 | break; |
718 | case 4: |
719 | if (ScalarSize == 32) |
720 | Opc = AArch64::G_DUPLANE32; |
721 | else if (ScalarSize == 16) |
722 | Opc = AArch64::G_DUPLANE16; |
723 | break; |
724 | case 8: |
725 | if (ScalarSize == 8) |
726 | Opc = AArch64::G_DUPLANE8; |
727 | else if (ScalarSize == 16) |
728 | Opc = AArch64::G_DUPLANE16; |
729 | break; |
730 | case 16: |
731 | if (ScalarSize == 8) |
732 | Opc = AArch64::G_DUPLANE8; |
733 | break; |
734 | default: |
735 | break; |
736 | } |
737 | if (!Opc) |
738 | return false; |
739 | |
740 | MatchInfo.first = Opc; |
741 | MatchInfo.second = *LaneIdx; |
742 | return true; |
743 | } |
744 | |
745 | void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, |
746 | MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) { |
747 | assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); |
748 | Register Src1Reg = MI.getOperand(i: 1).getReg(); |
749 | const LLT SrcTy = MRI.getType(Reg: Src1Reg); |
750 | |
751 | B.setInstrAndDebugLoc(MI); |
752 | auto Lane = B.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: MatchInfo.second); |
753 | |
754 | Register DupSrc = MI.getOperand(i: 1).getReg(); |
755 | // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source. |
756 | // To do this, we can use a G_CONCAT_VECTORS to do the widening. |
757 | if (SrcTy.getSizeInBits() == 64) { |
758 | auto Undef = B.buildUndef(Res: SrcTy); |
759 | DupSrc = B.buildConcatVectors(Res: SrcTy.multiplyElements(Factor: 2), |
760 | Ops: {Src1Reg, Undef.getReg(Idx: 0)}) |
761 | .getReg(Idx: 0); |
762 | } |
763 | B.buildInstr(Opc: MatchInfo.first, DstOps: {MI.getOperand(i: 0).getReg()}, SrcOps: {DupSrc, Lane}); |
764 | MI.eraseFromParent(); |
765 | } |
766 | |
767 | bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) { |
768 | auto &Unmerge = cast<GUnmerge>(Val&: MI); |
769 | Register Src1Reg = Unmerge.getReg(Idx: Unmerge.getNumOperands() - 1); |
770 | const LLT SrcTy = MRI.getType(Reg: Src1Reg); |
771 | if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64) |
772 | return false; |
773 | return SrcTy.isVector() && !SrcTy.isScalable() && |
774 | Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1; |
775 | } |
776 | |
777 | void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, |
778 | MachineIRBuilder &B) { |
779 | auto &Unmerge = cast<GUnmerge>(Val&: MI); |
780 | Register Src1Reg = Unmerge.getReg(Idx: Unmerge.getNumOperands() - 1); |
781 | const LLT SrcTy = MRI.getType(Reg: Src1Reg); |
782 | assert((SrcTy.isVector() && !SrcTy.isScalable()) && |
783 | "Expected a fixed length vector" ); |
784 | |
785 | for (int I = 0; I < SrcTy.getNumElements(); ++I) |
786 | B.buildExtractVectorElementConstant(Res: Unmerge.getReg(Idx: I), Val: Src1Reg, Idx: I); |
787 | MI.eraseFromParent(); |
788 | } |
789 | |
790 | bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) { |
791 | assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); |
792 | auto Splat = getAArch64VectorSplat(MI, MRI); |
793 | if (!Splat) |
794 | return false; |
795 | if (Splat->isReg()) |
796 | return true; |
797 | // Later, during selection, we'll try to match imported patterns using |
798 | // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower |
799 | // G_BUILD_VECTORs which could match those patterns. |
800 | int64_t Cst = Splat->getCst(); |
801 | return (Cst != 0 && Cst != -1); |
802 | } |
803 | |
804 | void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI, |
805 | MachineIRBuilder &B) { |
806 | B.setInstrAndDebugLoc(MI); |
807 | B.buildInstr(Opc: AArch64::G_DUP, DstOps: {MI.getOperand(i: 0).getReg()}, |
808 | SrcOps: {MI.getOperand(i: 1).getReg()}); |
809 | MI.eraseFromParent(); |
810 | } |
811 | |
812 | /// \returns how many instructions would be saved by folding a G_ICMP's shift |
813 | /// and/or extension operations. |
814 | unsigned getCmpOperandFoldingProfit(Register CmpOp, MachineRegisterInfo &MRI) { |
815 | // No instructions to save if there's more than one use or no uses. |
816 | if (!MRI.hasOneNonDBGUse(RegNo: CmpOp)) |
817 | return 0; |
818 | |
819 | // FIXME: This is duplicated with the selector. (See: selectShiftedRegister) |
820 | auto IsSupportedExtend = [&](const MachineInstr &MI) { |
821 | if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG) |
822 | return true; |
823 | if (MI.getOpcode() != TargetOpcode::G_AND) |
824 | return false; |
825 | auto ValAndVReg = |
826 | getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 2).getReg(), MRI); |
827 | if (!ValAndVReg) |
828 | return false; |
829 | uint64_t Mask = ValAndVReg->Value.getZExtValue(); |
830 | return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF); |
831 | }; |
832 | |
833 | MachineInstr *Def = getDefIgnoringCopies(Reg: CmpOp, MRI); |
834 | if (IsSupportedExtend(*Def)) |
835 | return 1; |
836 | |
837 | unsigned Opc = Def->getOpcode(); |
838 | if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR && |
839 | Opc != TargetOpcode::G_LSHR) |
840 | return 0; |
841 | |
842 | auto MaybeShiftAmt = |
843 | getIConstantVRegValWithLookThrough(VReg: Def->getOperand(i: 2).getReg(), MRI); |
844 | if (!MaybeShiftAmt) |
845 | return 0; |
846 | uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue(); |
847 | MachineInstr *ShiftLHS = |
848 | getDefIgnoringCopies(Reg: Def->getOperand(i: 1).getReg(), MRI); |
849 | |
850 | // Check if we can fold an extend and a shift. |
851 | // FIXME: This is duplicated with the selector. (See: |
852 | // selectArithExtendedRegister) |
853 | if (IsSupportedExtend(*ShiftLHS)) |
854 | return (ShiftAmt <= 4) ? 2 : 1; |
855 | |
856 | LLT Ty = MRI.getType(Reg: Def->getOperand(i: 0).getReg()); |
857 | if (Ty.isVector()) |
858 | return 0; |
859 | unsigned ShiftSize = Ty.getSizeInBits(); |
860 | if ((ShiftSize == 32 && ShiftAmt <= 31) || |
861 | (ShiftSize == 64 && ShiftAmt <= 63)) |
862 | return 1; |
863 | return 0; |
864 | } |
865 | |
866 | /// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP |
867 | /// instruction \p MI. |
868 | bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) { |
869 | assert(MI.getOpcode() == TargetOpcode::G_ICMP); |
870 | // Swap the operands if it would introduce a profitable folding opportunity. |
871 | // (e.g. a shift + extend). |
872 | // |
873 | // For example: |
874 | // lsl w13, w11, #1 |
875 | // cmp w13, w12 |
876 | // can be turned into: |
877 | // cmp w12, w11, lsl #1 |
878 | |
879 | // Don't swap if there's a constant on the RHS, because we know we can fold |
880 | // that. |
881 | Register RHS = MI.getOperand(i: 3).getReg(); |
882 | auto RHSCst = getIConstantVRegValWithLookThrough(VReg: RHS, MRI); |
883 | if (RHSCst && isLegalArithImmed(C: RHSCst->Value.getSExtValue())) |
884 | return false; |
885 | |
886 | Register LHS = MI.getOperand(i: 2).getReg(); |
887 | auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate()); |
888 | auto GetRegForProfit = [&](Register Reg) { |
889 | MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); |
890 | return isCMN(MaybeSub: Def, Pred, MRI) ? Def->getOperand(i: 2).getReg() : Reg; |
891 | }; |
892 | |
893 | // Don't have a constant on the RHS. If we swap the LHS and RHS of the |
894 | // compare, would we be able to fold more instructions? |
895 | Register TheLHS = GetRegForProfit(LHS); |
896 | Register TheRHS = GetRegForProfit(RHS); |
897 | |
898 | // If the LHS is more likely to give us a folding opportunity, then swap the |
899 | // LHS and RHS. |
900 | return (getCmpOperandFoldingProfit(CmpOp: TheLHS, MRI) > |
901 | getCmpOperandFoldingProfit(CmpOp: TheRHS, MRI)); |
902 | } |
903 | |
904 | void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) { |
905 | auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate()); |
906 | Register LHS = MI.getOperand(i: 2).getReg(); |
907 | Register RHS = MI.getOperand(i: 3).getReg(); |
908 | Observer.changedInstr(MI); |
909 | MI.getOperand(i: 1).setPredicate(CmpInst::getSwappedPredicate(pred: Pred)); |
910 | MI.getOperand(i: 2).setReg(RHS); |
911 | MI.getOperand(i: 3).setReg(LHS); |
912 | Observer.changedInstr(MI); |
913 | } |
914 | |
915 | /// \returns a function which builds a vector floating point compare instruction |
916 | /// for a condition code \p CC. |
917 | /// \param [in] IsZero - True if the comparison is against 0. |
918 | /// \param [in] NoNans - True if the target has NoNansFPMath. |
919 | std::function<Register(MachineIRBuilder &)> |
920 | getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero, |
921 | bool NoNans, MachineRegisterInfo &MRI) { |
922 | LLT DstTy = MRI.getType(Reg: LHS); |
923 | assert(DstTy.isVector() && "Expected vector types only?" ); |
924 | assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!" ); |
925 | switch (CC) { |
926 | default: |
927 | llvm_unreachable("Unexpected condition code!" ); |
928 | case AArch64CC::NE: |
929 | return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { |
930 | auto FCmp = IsZero |
931 | ? MIB.buildInstr(Opc: AArch64::G_FCMEQZ, DstOps: {DstTy}, SrcOps: {LHS}) |
932 | : MIB.buildInstr(Opc: AArch64::G_FCMEQ, DstOps: {DstTy}, SrcOps: {LHS, RHS}); |
933 | return MIB.buildNot(Dst: DstTy, Src0: FCmp).getReg(Idx: 0); |
934 | }; |
935 | case AArch64CC::EQ: |
936 | return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { |
937 | return IsZero |
938 | ? MIB.buildInstr(Opc: AArch64::G_FCMEQZ, DstOps: {DstTy}, SrcOps: {LHS}).getReg(Idx: 0) |
939 | : MIB.buildInstr(Opc: AArch64::G_FCMEQ, DstOps: {DstTy}, SrcOps: {LHS, RHS}) |
940 | .getReg(Idx: 0); |
941 | }; |
942 | case AArch64CC::GE: |
943 | return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { |
944 | return IsZero |
945 | ? MIB.buildInstr(Opc: AArch64::G_FCMGEZ, DstOps: {DstTy}, SrcOps: {LHS}).getReg(Idx: 0) |
946 | : MIB.buildInstr(Opc: AArch64::G_FCMGE, DstOps: {DstTy}, SrcOps: {LHS, RHS}) |
947 | .getReg(Idx: 0); |
948 | }; |
949 | case AArch64CC::GT: |
950 | return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { |
951 | return IsZero |
952 | ? MIB.buildInstr(Opc: AArch64::G_FCMGTZ, DstOps: {DstTy}, SrcOps: {LHS}).getReg(Idx: 0) |
953 | : MIB.buildInstr(Opc: AArch64::G_FCMGT, DstOps: {DstTy}, SrcOps: {LHS, RHS}) |
954 | .getReg(Idx: 0); |
955 | }; |
956 | case AArch64CC::LS: |
957 | return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { |
958 | return IsZero |
959 | ? MIB.buildInstr(Opc: AArch64::G_FCMLEZ, DstOps: {DstTy}, SrcOps: {LHS}).getReg(Idx: 0) |
960 | : MIB.buildInstr(Opc: AArch64::G_FCMGE, DstOps: {DstTy}, SrcOps: {RHS, LHS}) |
961 | .getReg(Idx: 0); |
962 | }; |
963 | case AArch64CC::MI: |
964 | return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { |
965 | return IsZero |
966 | ? MIB.buildInstr(Opc: AArch64::G_FCMLTZ, DstOps: {DstTy}, SrcOps: {LHS}).getReg(Idx: 0) |
967 | : MIB.buildInstr(Opc: AArch64::G_FCMGT, DstOps: {DstTy}, SrcOps: {RHS, LHS}) |
968 | .getReg(Idx: 0); |
969 | }; |
970 | } |
971 | } |
972 | |
973 | /// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo. |
974 | bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI, |
975 | MachineIRBuilder &MIB) { |
976 | assert(MI.getOpcode() == TargetOpcode::G_FCMP); |
977 | const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>(); |
978 | |
979 | Register Dst = MI.getOperand(i: 0).getReg(); |
980 | LLT DstTy = MRI.getType(Reg: Dst); |
981 | if (!DstTy.isVector() || !ST.hasNEON()) |
982 | return false; |
983 | Register LHS = MI.getOperand(i: 2).getReg(); |
984 | unsigned EltSize = MRI.getType(Reg: LHS).getScalarSizeInBits(); |
985 | if (EltSize == 16 && !ST.hasFullFP16()) |
986 | return false; |
987 | if (EltSize != 16 && EltSize != 32 && EltSize != 64) |
988 | return false; |
989 | |
990 | return true; |
991 | } |
992 | |
993 | /// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo. |
994 | void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI, |
995 | MachineIRBuilder &MIB) { |
996 | assert(MI.getOpcode() == TargetOpcode::G_FCMP); |
997 | const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>(); |
998 | |
999 | const auto &CmpMI = cast<GFCmp>(Val&: MI); |
1000 | |
1001 | Register Dst = CmpMI.getReg(Idx: 0); |
1002 | CmpInst::Predicate Pred = CmpMI.getCond(); |
1003 | Register LHS = CmpMI.getLHSReg(); |
1004 | Register RHS = CmpMI.getRHSReg(); |
1005 | |
1006 | LLT DstTy = MRI.getType(Reg: Dst); |
1007 | |
1008 | auto Splat = getAArch64VectorSplat(MI: *MRI.getVRegDef(Reg: RHS), MRI); |
1009 | |
1010 | // Compares against 0 have special target-specific pseudos. |
1011 | bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0; |
1012 | |
1013 | bool Invert = false; |
1014 | AArch64CC::CondCode CC, CC2 = AArch64CC::AL; |
1015 | if ((Pred == CmpInst::Predicate::FCMP_ORD || |
1016 | Pred == CmpInst::Predicate::FCMP_UNO) && |
1017 | IsZero) { |
1018 | // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't |
1019 | // NaN, so equivalent to a == a and doesn't need the two comparisons an |
1020 | // "ord" normally would. |
1021 | // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is |
1022 | // thus equivalent to a != a. |
1023 | RHS = LHS; |
1024 | IsZero = false; |
1025 | CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE; |
1026 | } else |
1027 | changeVectorFCMPPredToAArch64CC(P: Pred, CondCode&: CC, CondCode2&: CC2, Invert); |
1028 | |
1029 | // Instead of having an apply function, just build here to simplify things. |
1030 | MIB.setInstrAndDebugLoc(MI); |
1031 | |
1032 | const bool NoNans = |
1033 | ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath; |
1034 | |
1035 | auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI); |
1036 | Register CmpRes; |
1037 | if (CC2 == AArch64CC::AL) |
1038 | CmpRes = Cmp(MIB); |
1039 | else { |
1040 | auto Cmp2 = getVectorFCMP(CC: CC2, LHS, RHS, IsZero, NoNans, MRI); |
1041 | auto Cmp2Dst = Cmp2(MIB); |
1042 | auto Cmp1Dst = Cmp(MIB); |
1043 | CmpRes = MIB.buildOr(Dst: DstTy, Src0: Cmp1Dst, Src1: Cmp2Dst).getReg(Idx: 0); |
1044 | } |
1045 | if (Invert) |
1046 | CmpRes = MIB.buildNot(Dst: DstTy, Src0: CmpRes).getReg(Idx: 0); |
1047 | MRI.replaceRegWith(FromReg: Dst, ToReg: CmpRes); |
1048 | MI.eraseFromParent(); |
1049 | } |
1050 | |
1051 | bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI, |
1052 | Register &SrcReg) { |
1053 | assert(MI.getOpcode() == TargetOpcode::G_STORE); |
1054 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1055 | if (MRI.getType(Reg: DstReg).isVector()) |
1056 | return false; |
1057 | // Match a store of a truncate. |
1058 | if (!mi_match(R: DstReg, MRI, P: m_GTrunc(Src: m_Reg(R&: SrcReg)))) |
1059 | return false; |
1060 | // Only form truncstores for value types of max 64b. |
1061 | return MRI.getType(Reg: SrcReg).getSizeInBits() <= 64; |
1062 | } |
1063 | |
1064 | void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI, |
1065 | MachineIRBuilder &B, GISelChangeObserver &Observer, |
1066 | Register &SrcReg) { |
1067 | assert(MI.getOpcode() == TargetOpcode::G_STORE); |
1068 | Observer.changingInstr(MI); |
1069 | MI.getOperand(i: 0).setReg(SrcReg); |
1070 | Observer.changedInstr(MI); |
1071 | } |
1072 | |
1073 | // Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to |
1074 | // form in the first place for combine opportunities, so any remaining ones |
1075 | // at this stage need be lowered back. |
1076 | bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) { |
1077 | assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); |
1078 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1079 | LLT DstTy = MRI.getType(Reg: DstReg); |
1080 | return DstTy.isVector(); |
1081 | } |
1082 | |
1083 | void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI, |
1084 | MachineIRBuilder &B, GISelChangeObserver &Observer) { |
1085 | assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); |
1086 | B.setInstrAndDebugLoc(MI); |
1087 | LegalizerHelper Helper(*MI.getMF(), Observer, B); |
1088 | Helper.lower(MI, TypeIdx: 0, /* Unused hint type */ Ty: LLT()); |
1089 | } |
1090 | |
1091 | /// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N) |
1092 | /// => unused, <N x t> = unmerge v |
1093 | bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, |
1094 | Register &MatchInfo) { |
1095 | auto &Unmerge = cast<GUnmerge>(Val&: MI); |
1096 | if (Unmerge.getNumDefs() != 2) |
1097 | return false; |
1098 | if (!MRI.use_nodbg_empty(RegNo: Unmerge.getReg(Idx: 1))) |
1099 | return false; |
1100 | |
1101 | LLT DstTy = MRI.getType(Reg: Unmerge.getReg(Idx: 0)); |
1102 | if (!DstTy.isVector()) |
1103 | return false; |
1104 | |
1105 | MachineInstr *Ext = getOpcodeDef(Opcode: AArch64::G_EXT, Reg: Unmerge.getSourceReg(), MRI); |
1106 | if (!Ext) |
1107 | return false; |
1108 | |
1109 | Register ExtSrc1 = Ext->getOperand(i: 1).getReg(); |
1110 | Register ExtSrc2 = Ext->getOperand(i: 2).getReg(); |
1111 | auto LowestVal = |
1112 | getIConstantVRegValWithLookThrough(VReg: Ext->getOperand(i: 3).getReg(), MRI); |
1113 | if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes()) |
1114 | return false; |
1115 | |
1116 | if (!getOpcodeDef<GImplicitDef>(Reg: ExtSrc2, MRI)) |
1117 | return false; |
1118 | |
1119 | MatchInfo = ExtSrc1; |
1120 | return true; |
1121 | } |
1122 | |
1123 | void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, |
1124 | MachineIRBuilder &B, |
1125 | GISelChangeObserver &Observer, Register &SrcReg) { |
1126 | Observer.changingInstr(MI); |
1127 | // Swap dst registers. |
1128 | Register Dst1 = MI.getOperand(i: 0).getReg(); |
1129 | MI.getOperand(i: 0).setReg(MI.getOperand(i: 1).getReg()); |
1130 | MI.getOperand(i: 1).setReg(Dst1); |
1131 | MI.getOperand(i: 2).setReg(SrcReg); |
1132 | Observer.changedInstr(MI); |
1133 | } |
1134 | |
1135 | // Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR |
1136 | // Match v2s64 mul instructions, which will then be scalarised later on |
1137 | // Doing these two matches in one function to ensure that the order of matching |
1138 | // will always be the same. |
1139 | // Try lowering MUL to MULL before trying to scalarize if needed. |
1140 | bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) { |
1141 | // Get the instructions that defined the source operand |
1142 | LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg()); |
1143 | MachineInstr *I1 = getDefIgnoringCopies(Reg: MI.getOperand(i: 1).getReg(), MRI); |
1144 | MachineInstr *I2 = getDefIgnoringCopies(Reg: MI.getOperand(i: 2).getReg(), MRI); |
1145 | |
1146 | if (DstTy.isVector()) { |
1147 | // If the source operands were EXTENDED before, then {U/S}MULL can be used |
1148 | unsigned I1Opc = I1->getOpcode(); |
1149 | unsigned I2Opc = I2->getOpcode(); |
1150 | if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) || |
1151 | (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) && |
1152 | (MRI.getType(Reg: I1->getOperand(i: 0).getReg()).getScalarSizeInBits() == |
1153 | MRI.getType(Reg: I1->getOperand(i: 1).getReg()).getScalarSizeInBits() * 2) && |
1154 | (MRI.getType(Reg: I2->getOperand(i: 0).getReg()).getScalarSizeInBits() == |
1155 | MRI.getType(Reg: I2->getOperand(i: 1).getReg()).getScalarSizeInBits() * 2)) { |
1156 | return true; |
1157 | } |
1158 | // If result type is v2s64, scalarise the instruction |
1159 | else if (DstTy == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) { |
1160 | return true; |
1161 | } |
1162 | } |
1163 | return false; |
1164 | } |
1165 | |
1166 | void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI, |
1167 | MachineIRBuilder &B, GISelChangeObserver &Observer) { |
1168 | assert(MI.getOpcode() == TargetOpcode::G_MUL && |
1169 | "Expected a G_MUL instruction" ); |
1170 | |
1171 | // Get the instructions that defined the source operand |
1172 | LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg()); |
1173 | MachineInstr *I1 = getDefIgnoringCopies(Reg: MI.getOperand(i: 1).getReg(), MRI); |
1174 | MachineInstr *I2 = getDefIgnoringCopies(Reg: MI.getOperand(i: 2).getReg(), MRI); |
1175 | |
1176 | // If the source operands were EXTENDED before, then {U/S}MULL can be used |
1177 | unsigned I1Opc = I1->getOpcode(); |
1178 | unsigned I2Opc = I2->getOpcode(); |
1179 | if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) || |
1180 | (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) && |
1181 | (MRI.getType(Reg: I1->getOperand(i: 0).getReg()).getScalarSizeInBits() == |
1182 | MRI.getType(Reg: I1->getOperand(i: 1).getReg()).getScalarSizeInBits() * 2) && |
1183 | (MRI.getType(Reg: I2->getOperand(i: 0).getReg()).getScalarSizeInBits() == |
1184 | MRI.getType(Reg: I2->getOperand(i: 1).getReg()).getScalarSizeInBits() * 2)) { |
1185 | |
1186 | B.setInstrAndDebugLoc(MI); |
1187 | B.buildInstr(Opc: I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL |
1188 | : AArch64::G_SMULL, |
1189 | DstOps: {MI.getOperand(i: 0).getReg()}, |
1190 | SrcOps: {I1->getOperand(i: 1).getReg(), I2->getOperand(i: 1).getReg()}); |
1191 | MI.eraseFromParent(); |
1192 | } |
1193 | // If result type is v2s64, scalarise the instruction |
1194 | else if (DstTy == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) { |
1195 | LegalizerHelper Helper(*MI.getMF(), Observer, B); |
1196 | B.setInstrAndDebugLoc(MI); |
1197 | Helper.fewerElementsVector( |
1198 | MI, TypeIdx: 0, |
1199 | NarrowTy: DstTy.changeElementCount( |
1200 | EC: DstTy.getElementCount().divideCoefficientBy(RHS: 2))); |
1201 | } |
1202 | } |
1203 | |
1204 | class AArch64PostLegalizerLoweringImpl : public Combiner { |
1205 | protected: |
1206 | // TODO: Make CombinerHelper methods const. |
1207 | mutable CombinerHelper Helper; |
1208 | const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig; |
1209 | const AArch64Subtarget &STI; |
1210 | |
1211 | public: |
1212 | AArch64PostLegalizerLoweringImpl( |
1213 | MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, |
1214 | GISelCSEInfo *CSEInfo, |
1215 | const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig, |
1216 | const AArch64Subtarget &STI); |
1217 | |
1218 | static const char *getName() { return "AArch6400PreLegalizerCombiner" ; } |
1219 | |
1220 | bool tryCombineAll(MachineInstr &I) const override; |
1221 | |
1222 | private: |
1223 | #define GET_GICOMBINER_CLASS_MEMBERS |
1224 | #include "AArch64GenPostLegalizeGILowering.inc" |
1225 | #undef GET_GICOMBINER_CLASS_MEMBERS |
1226 | }; |
1227 | |
1228 | #define GET_GICOMBINER_IMPL |
1229 | #include "AArch64GenPostLegalizeGILowering.inc" |
1230 | #undef GET_GICOMBINER_IMPL |
1231 | |
1232 | AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl( |
1233 | MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, |
1234 | GISelCSEInfo *CSEInfo, |
1235 | const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig, |
1236 | const AArch64Subtarget &STI) |
1237 | : Combiner(MF, CInfo, TPC, /*KB*/ nullptr, CSEInfo), |
1238 | Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig), |
1239 | STI(STI), |
1240 | #define GET_GICOMBINER_CONSTRUCTOR_INITS |
1241 | #include "AArch64GenPostLegalizeGILowering.inc" |
1242 | #undef GET_GICOMBINER_CONSTRUCTOR_INITS |
1243 | { |
1244 | } |
1245 | |
1246 | class AArch64PostLegalizerLowering : public MachineFunctionPass { |
1247 | public: |
1248 | static char ID; |
1249 | |
1250 | AArch64PostLegalizerLowering(); |
1251 | |
1252 | StringRef getPassName() const override { |
1253 | return "AArch64PostLegalizerLowering" ; |
1254 | } |
1255 | |
1256 | bool runOnMachineFunction(MachineFunction &MF) override; |
1257 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
1258 | |
1259 | private: |
1260 | AArch64PostLegalizerLoweringImplRuleConfig RuleConfig; |
1261 | }; |
1262 | } // end anonymous namespace |
1263 | |
1264 | void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const { |
1265 | AU.addRequired<TargetPassConfig>(); |
1266 | AU.setPreservesCFG(); |
1267 | getSelectionDAGFallbackAnalysisUsage(AU); |
1268 | MachineFunctionPass::getAnalysisUsage(AU); |
1269 | } |
1270 | |
1271 | AArch64PostLegalizerLowering::AArch64PostLegalizerLowering() |
1272 | : MachineFunctionPass(ID) { |
1273 | initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry()); |
1274 | |
1275 | if (!RuleConfig.parseCommandLineOption()) |
1276 | report_fatal_error(reason: "Invalid rule identifier" ); |
1277 | } |
1278 | |
1279 | bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) { |
1280 | if (MF.getProperties().hasProperty( |
1281 | P: MachineFunctionProperties::Property::FailedISel)) |
1282 | return false; |
1283 | assert(MF.getProperties().hasProperty( |
1284 | MachineFunctionProperties::Property::Legalized) && |
1285 | "Expected a legalized function?" ); |
1286 | auto *TPC = &getAnalysis<TargetPassConfig>(); |
1287 | const Function &F = MF.getFunction(); |
1288 | |
1289 | const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>(); |
1290 | CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, |
1291 | /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true, |
1292 | F.hasOptSize(), F.hasMinSize()); |
1293 | AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr, |
1294 | RuleConfig, ST); |
1295 | return Impl.combineMachineInstrs(); |
1296 | } |
1297 | |
1298 | char AArch64PostLegalizerLowering::ID = 0; |
1299 | INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE, |
1300 | "Lower AArch64 MachineInstrs after legalization" , false, |
1301 | false) |
1302 | INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) |
1303 | INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE, |
1304 | "Lower AArch64 MachineInstrs after legalization" , false, |
1305 | false) |
1306 | |
1307 | namespace llvm { |
1308 | FunctionPass *createAArch64PostLegalizerLowering() { |
1309 | return new AArch64PostLegalizerLowering(); |
1310 | } |
1311 | } // end namespace llvm |
1312 | |