1 | //=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass does combining of machine instructions at the generic MI level, |
10 | // before the legalizer. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "AArch64GlobalISelUtils.h" |
15 | #include "AArch64TargetMachine.h" |
16 | #include "llvm/CodeGen/GlobalISel/CSEInfo.h" |
17 | #include "llvm/CodeGen/GlobalISel/Combiner.h" |
18 | #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" |
19 | #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" |
20 | #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" |
21 | #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" |
22 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
23 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
24 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
25 | #include "llvm/CodeGen/MachineDominators.h" |
26 | #include "llvm/CodeGen/MachineFunction.h" |
27 | #include "llvm/CodeGen/MachineFunctionPass.h" |
28 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
29 | #include "llvm/CodeGen/TargetPassConfig.h" |
30 | #include "llvm/IR/Instructions.h" |
31 | #include "llvm/Support/Debug.h" |
32 | |
33 | #define GET_GICOMBINER_DEPS |
34 | #include "AArch64GenPreLegalizeGICombiner.inc" |
35 | #undef GET_GICOMBINER_DEPS |
36 | |
37 | #define DEBUG_TYPE "aarch64-prelegalizer-combiner" |
38 | |
39 | using namespace llvm; |
40 | using namespace MIPatternMatch; |
41 | |
42 | namespace { |
43 | |
44 | #define GET_GICOMBINER_TYPES |
45 | #include "AArch64GenPreLegalizeGICombiner.inc" |
46 | #undef GET_GICOMBINER_TYPES |
47 | |
48 | /// Return true if a G_FCONSTANT instruction is known to be better-represented |
49 | /// as a G_CONSTANT. |
50 | bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) { |
51 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT); |
52 | Register DstReg = MI.getOperand(i: 0).getReg(); |
53 | const unsigned DstSize = MRI.getType(Reg: DstReg).getSizeInBits(); |
54 | if (DstSize != 32 && DstSize != 64) |
55 | return false; |
56 | |
57 | // When we're storing a value, it doesn't matter what register bank it's on. |
58 | // Since not all floating point constants can be materialized using a fmov, |
59 | // it makes more sense to just use a GPR. |
60 | return all_of(Range: MRI.use_nodbg_instructions(Reg: DstReg), |
61 | P: [](const MachineInstr &Use) { return Use.mayStore(); }); |
62 | } |
63 | |
64 | /// Change a G_FCONSTANT into a G_CONSTANT. |
65 | void applyFConstantToConstant(MachineInstr &MI) { |
66 | assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT); |
67 | MachineIRBuilder MIB(MI); |
68 | const APFloat &ImmValAPF = MI.getOperand(i: 1).getFPImm()->getValueAPF(); |
69 | MIB.buildConstant(Res: MI.getOperand(i: 0).getReg(), Val: ImmValAPF.bitcastToAPInt()); |
70 | MI.eraseFromParent(); |
71 | } |
72 | |
73 | /// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits |
74 | /// are sign bits. In this case, we can transform the G_ICMP to directly compare |
75 | /// the wide value with a zero. |
76 | bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, |
77 | GISelKnownBits *KB, Register &MatchInfo) { |
78 | assert(MI.getOpcode() == TargetOpcode::G_ICMP && KB); |
79 | |
80 | auto Pred = (CmpInst::Predicate)MI.getOperand(i: 1).getPredicate(); |
81 | if (!ICmpInst::isEquality(P: Pred)) |
82 | return false; |
83 | |
84 | Register LHS = MI.getOperand(i: 2).getReg(); |
85 | LLT LHSTy = MRI.getType(Reg: LHS); |
86 | if (!LHSTy.isScalar()) |
87 | return false; |
88 | |
89 | Register RHS = MI.getOperand(i: 3).getReg(); |
90 | Register WideReg; |
91 | |
92 | if (!mi_match(R: LHS, MRI, P: m_GTrunc(Src: m_Reg(R&: WideReg))) || |
93 | !mi_match(R: RHS, MRI, P: m_SpecificICst(RequestedValue: 0))) |
94 | return false; |
95 | |
96 | LLT WideTy = MRI.getType(Reg: WideReg); |
97 | if (KB->computeNumSignBits(R: WideReg) <= |
98 | WideTy.getSizeInBits() - LHSTy.getSizeInBits()) |
99 | return false; |
100 | |
101 | MatchInfo = WideReg; |
102 | return true; |
103 | } |
104 | |
105 | void applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI, |
106 | MachineIRBuilder &Builder, |
107 | GISelChangeObserver &Observer, Register &WideReg) { |
108 | assert(MI.getOpcode() == TargetOpcode::G_ICMP); |
109 | |
110 | LLT WideTy = MRI.getType(Reg: WideReg); |
111 | // We're going to directly use the wide register as the LHS, and then use an |
112 | // equivalent size zero for RHS. |
113 | Builder.setInstrAndDebugLoc(MI); |
114 | auto WideZero = Builder.buildConstant(Res: WideTy, Val: 0); |
115 | Observer.changingInstr(MI); |
116 | MI.getOperand(i: 2).setReg(WideReg); |
117 | MI.getOperand(i: 3).setReg(WideZero.getReg(Idx: 0)); |
118 | Observer.changedInstr(MI); |
119 | } |
120 | |
121 | /// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE. |
122 | /// |
123 | /// e.g. |
124 | /// |
125 | /// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst |
126 | bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI, |
127 | std::pair<uint64_t, uint64_t> &MatchInfo) { |
128 | assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE); |
129 | MachineFunction &MF = *MI.getMF(); |
130 | auto &GlobalOp = MI.getOperand(i: 1); |
131 | auto *GV = GlobalOp.getGlobal(); |
132 | if (GV->isThreadLocal()) |
133 | return false; |
134 | |
135 | // Don't allow anything that could represent offsets etc. |
136 | if (MF.getSubtarget<AArch64Subtarget>().ClassifyGlobalReference( |
137 | GV, TM: MF.getTarget()) != AArch64II::MO_NO_FLAG) |
138 | return false; |
139 | |
140 | // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants: |
141 | // |
142 | // %g = G_GLOBAL_VALUE @x |
143 | // %ptr1 = G_PTR_ADD %g, cst1 |
144 | // %ptr2 = G_PTR_ADD %g, cst2 |
145 | // ... |
146 | // %ptrN = G_PTR_ADD %g, cstN |
147 | // |
148 | // Identify the *smallest* constant. We want to be able to form this: |
149 | // |
150 | // %offset_g = G_GLOBAL_VALUE @x + min_cst |
151 | // %g = G_PTR_ADD %offset_g, -min_cst |
152 | // %ptr1 = G_PTR_ADD %g, cst1 |
153 | // ... |
154 | Register Dst = MI.getOperand(i: 0).getReg(); |
155 | uint64_t MinOffset = -1ull; |
156 | for (auto &UseInstr : MRI.use_nodbg_instructions(Reg: Dst)) { |
157 | if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD) |
158 | return false; |
159 | auto Cst = getIConstantVRegValWithLookThrough( |
160 | VReg: UseInstr.getOperand(i: 2).getReg(), MRI); |
161 | if (!Cst) |
162 | return false; |
163 | MinOffset = std::min(a: MinOffset, b: Cst->Value.getZExtValue()); |
164 | } |
165 | |
166 | // Require that the new offset is larger than the existing one to avoid |
167 | // infinite loops. |
168 | uint64_t CurrOffset = GlobalOp.getOffset(); |
169 | uint64_t NewOffset = MinOffset + CurrOffset; |
170 | if (NewOffset <= CurrOffset) |
171 | return false; |
172 | |
173 | // Check whether folding this offset is legal. It must not go out of bounds of |
174 | // the referenced object to avoid violating the code model, and must be |
175 | // smaller than 2^20 because this is the largest offset expressible in all |
176 | // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF |
177 | // stores an immediate signed 21 bit offset.) |
178 | // |
179 | // This check also prevents us from folding negative offsets, which will end |
180 | // up being treated in the same way as large positive ones. They could also |
181 | // cause code model violations, and aren't really common enough to matter. |
182 | if (NewOffset >= (1 << 20)) |
183 | return false; |
184 | |
185 | Type *T = GV->getValueType(); |
186 | if (!T->isSized() || |
187 | NewOffset > GV->getDataLayout().getTypeAllocSize(Ty: T)) |
188 | return false; |
189 | MatchInfo = std::make_pair(x&: NewOffset, y&: MinOffset); |
190 | return true; |
191 | } |
192 | |
193 | void applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI, |
194 | MachineIRBuilder &B, GISelChangeObserver &Observer, |
195 | std::pair<uint64_t, uint64_t> &MatchInfo) { |
196 | // Change: |
197 | // |
198 | // %g = G_GLOBAL_VALUE @x |
199 | // %ptr1 = G_PTR_ADD %g, cst1 |
200 | // %ptr2 = G_PTR_ADD %g, cst2 |
201 | // ... |
202 | // %ptrN = G_PTR_ADD %g, cstN |
203 | // |
204 | // To: |
205 | // |
206 | // %offset_g = G_GLOBAL_VALUE @x + min_cst |
207 | // %g = G_PTR_ADD %offset_g, -min_cst |
208 | // %ptr1 = G_PTR_ADD %g, cst1 |
209 | // ... |
210 | // %ptrN = G_PTR_ADD %g, cstN |
211 | // |
212 | // Then, the original G_PTR_ADDs should be folded later on so that they look |
213 | // like this: |
214 | // |
215 | // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst |
216 | uint64_t Offset, MinOffset; |
217 | std::tie(args&: Offset, args&: MinOffset) = MatchInfo; |
218 | B.setInstrAndDebugLoc(*std::next(x: MI.getIterator())); |
219 | Observer.changingInstr(MI); |
220 | auto &GlobalOp = MI.getOperand(i: 1); |
221 | auto *GV = GlobalOp.getGlobal(); |
222 | GlobalOp.ChangeToGA(GV, Offset, TargetFlags: GlobalOp.getTargetFlags()); |
223 | Register Dst = MI.getOperand(i: 0).getReg(); |
224 | Register NewGVDst = MRI.cloneVirtualRegister(VReg: Dst); |
225 | MI.getOperand(i: 0).setReg(NewGVDst); |
226 | Observer.changedInstr(MI); |
227 | B.buildPtrAdd( |
228 | Res: Dst, Op0: NewGVDst, |
229 | Op1: B.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: -static_cast<int64_t>(MinOffset))); |
230 | } |
231 | |
232 | // Combines vecreduce_add(mul(ext(x), ext(y))) -> vecreduce_add(udot(x, y)) |
233 | // Or vecreduce_add(ext(x)) -> vecreduce_add(udot(x, 1)) |
234 | // Similar to performVecReduceAddCombine in SelectionDAG |
235 | bool matchExtAddvToUdotAddv(MachineInstr &MI, MachineRegisterInfo &MRI, |
236 | const AArch64Subtarget &STI, |
237 | std::tuple<Register, Register, bool> &MatchInfo) { |
238 | assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD && |
239 | "Expected a G_VECREDUCE_ADD instruction" ); |
240 | assert(STI.hasDotProd() && "Target should have Dot Product feature" ); |
241 | |
242 | MachineInstr *I1 = getDefIgnoringCopies(Reg: MI.getOperand(i: 1).getReg(), MRI); |
243 | Register DstReg = MI.getOperand(i: 0).getReg(); |
244 | Register MidReg = I1->getOperand(i: 0).getReg(); |
245 | LLT DstTy = MRI.getType(Reg: DstReg); |
246 | LLT MidTy = MRI.getType(Reg: MidReg); |
247 | if (DstTy.getScalarSizeInBits() != 32 || MidTy.getScalarSizeInBits() != 32) |
248 | return false; |
249 | |
250 | LLT SrcTy; |
251 | auto I1Opc = I1->getOpcode(); |
252 | if (I1Opc == TargetOpcode::G_MUL) { |
253 | // If result of this has more than 1 use, then there is no point in creating |
254 | // udot instruction |
255 | if (!MRI.hasOneNonDBGUse(RegNo: MidReg)) |
256 | return false; |
257 | |
258 | MachineInstr *ExtMI1 = |
259 | getDefIgnoringCopies(Reg: I1->getOperand(i: 1).getReg(), MRI); |
260 | MachineInstr *ExtMI2 = |
261 | getDefIgnoringCopies(Reg: I1->getOperand(i: 2).getReg(), MRI); |
262 | LLT Ext1DstTy = MRI.getType(Reg: ExtMI1->getOperand(i: 0).getReg()); |
263 | LLT Ext2DstTy = MRI.getType(Reg: ExtMI2->getOperand(i: 0).getReg()); |
264 | |
265 | if (ExtMI1->getOpcode() != ExtMI2->getOpcode() || Ext1DstTy != Ext2DstTy) |
266 | return false; |
267 | I1Opc = ExtMI1->getOpcode(); |
268 | SrcTy = MRI.getType(Reg: ExtMI1->getOperand(i: 1).getReg()); |
269 | std::get<0>(t&: MatchInfo) = ExtMI1->getOperand(i: 1).getReg(); |
270 | std::get<1>(t&: MatchInfo) = ExtMI2->getOperand(i: 1).getReg(); |
271 | } else { |
272 | SrcTy = MRI.getType(Reg: I1->getOperand(i: 1).getReg()); |
273 | std::get<0>(t&: MatchInfo) = I1->getOperand(i: 1).getReg(); |
274 | std::get<1>(t&: MatchInfo) = 0; |
275 | } |
276 | |
277 | if (I1Opc == TargetOpcode::G_ZEXT) |
278 | std::get<2>(t&: MatchInfo) = 0; |
279 | else if (I1Opc == TargetOpcode::G_SEXT) |
280 | std::get<2>(t&: MatchInfo) = 1; |
281 | else |
282 | return false; |
283 | |
284 | if (SrcTy.getScalarSizeInBits() != 8 || SrcTy.getNumElements() % 8 != 0) |
285 | return false; |
286 | |
287 | return true; |
288 | } |
289 | |
290 | void applyExtAddvToUdotAddv(MachineInstr &MI, MachineRegisterInfo &MRI, |
291 | MachineIRBuilder &Builder, |
292 | GISelChangeObserver &Observer, |
293 | const AArch64Subtarget &STI, |
294 | std::tuple<Register, Register, bool> &MatchInfo) { |
295 | assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD && |
296 | "Expected a G_VECREDUCE_ADD instruction" ); |
297 | assert(STI.hasDotProd() && "Target should have Dot Product feature" ); |
298 | |
299 | // Initialise the variables |
300 | unsigned DotOpcode = |
301 | std::get<2>(t&: MatchInfo) ? AArch64::G_SDOT : AArch64::G_UDOT; |
302 | Register Ext1SrcReg = std::get<0>(t&: MatchInfo); |
303 | |
304 | // If there is one source register, create a vector of 0s as the second |
305 | // source register |
306 | Register Ext2SrcReg; |
307 | if (std::get<1>(t&: MatchInfo) == 0) |
308 | Ext2SrcReg = Builder.buildConstant(Res: MRI.getType(Reg: Ext1SrcReg), Val: 1) |
309 | ->getOperand(i: 0) |
310 | .getReg(); |
311 | else |
312 | Ext2SrcReg = std::get<1>(t&: MatchInfo); |
313 | |
314 | // Find out how many DOT instructions are needed |
315 | LLT SrcTy = MRI.getType(Reg: Ext1SrcReg); |
316 | LLT MidTy; |
317 | unsigned NumOfDotMI; |
318 | if (SrcTy.getNumElements() % 16 == 0) { |
319 | NumOfDotMI = SrcTy.getNumElements() / 16; |
320 | MidTy = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32); |
321 | } else if (SrcTy.getNumElements() % 8 == 0) { |
322 | NumOfDotMI = SrcTy.getNumElements() / 8; |
323 | MidTy = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32); |
324 | } else { |
325 | llvm_unreachable("Source type number of elements is not multiple of 8" ); |
326 | } |
327 | |
328 | // Handle case where one DOT instruction is needed |
329 | if (NumOfDotMI == 1) { |
330 | auto Zeroes = Builder.buildConstant(Res: MidTy, Val: 0)->getOperand(i: 0).getReg(); |
331 | auto Dot = Builder.buildInstr(Opc: DotOpcode, DstOps: {MidTy}, |
332 | SrcOps: {Zeroes, Ext1SrcReg, Ext2SrcReg}); |
333 | Builder.buildVecReduceAdd(Dst: MI.getOperand(i: 0), Src: Dot->getOperand(i: 0)); |
334 | } else { |
335 | // If not pad the last v8 element with 0s to a v16 |
336 | SmallVector<Register, 4> Ext1UnmergeReg; |
337 | SmallVector<Register, 4> Ext2UnmergeReg; |
338 | if (SrcTy.getNumElements() % 16 != 0) { |
339 | SmallVector<Register> Leftover1; |
340 | SmallVector<Register> Leftover2; |
341 | |
342 | // Split the elements into v16i8 and v8i8 |
343 | LLT MainTy = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8); |
344 | LLT LeftoverTy1, LeftoverTy2; |
345 | if ((!extractParts(Reg: Ext1SrcReg, RegTy: MRI.getType(Reg: Ext1SrcReg), MainTy, |
346 | LeftoverTy&: LeftoverTy1, VRegs&: Ext1UnmergeReg, LeftoverVRegs&: Leftover1, MIRBuilder&: Builder, |
347 | MRI)) || |
348 | (!extractParts(Reg: Ext2SrcReg, RegTy: MRI.getType(Reg: Ext2SrcReg), MainTy, |
349 | LeftoverTy&: LeftoverTy2, VRegs&: Ext2UnmergeReg, LeftoverVRegs&: Leftover2, MIRBuilder&: Builder, |
350 | MRI))) { |
351 | llvm_unreachable("Unable to split this vector properly" ); |
352 | } |
353 | |
354 | // Pad the leftover v8i8 vector with register of 0s of type v8i8 |
355 | Register v8Zeroes = Builder.buildConstant(Res: LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8), Val: 0) |
356 | ->getOperand(i: 0) |
357 | .getReg(); |
358 | |
359 | Ext1UnmergeReg.push_back( |
360 | Elt: Builder |
361 | .buildMergeLikeInstr(Res: LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8), |
362 | Ops: {Leftover1[0], v8Zeroes}) |
363 | .getReg(Idx: 0)); |
364 | Ext2UnmergeReg.push_back( |
365 | Elt: Builder |
366 | .buildMergeLikeInstr(Res: LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8), |
367 | Ops: {Leftover2[0], v8Zeroes}) |
368 | .getReg(Idx: 0)); |
369 | |
370 | } else { |
371 | // Unmerge the source vectors to v16i8 |
372 | unsigned SrcNumElts = SrcTy.getNumElements(); |
373 | extractParts(Reg: Ext1SrcReg, Ty: LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8), NumParts: SrcNumElts / 16, |
374 | VRegs&: Ext1UnmergeReg, MIRBuilder&: Builder, MRI); |
375 | extractParts(Reg: Ext2SrcReg, Ty: LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8), NumParts: SrcNumElts / 16, |
376 | VRegs&: Ext2UnmergeReg, MIRBuilder&: Builder, MRI); |
377 | } |
378 | |
379 | // Build the UDOT instructions |
380 | SmallVector<Register, 2> DotReg; |
381 | unsigned NumElements = 0; |
382 | for (unsigned i = 0; i < Ext1UnmergeReg.size(); i++) { |
383 | LLT ZeroesLLT; |
384 | // Check if it is 16 or 8 elements. Set Zeroes to the according size |
385 | if (MRI.getType(Reg: Ext1UnmergeReg[i]).getNumElements() == 16) { |
386 | ZeroesLLT = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32); |
387 | NumElements += 4; |
388 | } else { |
389 | ZeroesLLT = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32); |
390 | NumElements += 2; |
391 | } |
392 | auto Zeroes = Builder.buildConstant(Res: ZeroesLLT, Val: 0)->getOperand(i: 0).getReg(); |
393 | DotReg.push_back( |
394 | Elt: Builder |
395 | .buildInstr(Opc: DotOpcode, DstOps: {MRI.getType(Reg: Zeroes)}, |
396 | SrcOps: {Zeroes, Ext1UnmergeReg[i], Ext2UnmergeReg[i]}) |
397 | .getReg(Idx: 0)); |
398 | } |
399 | |
400 | // Merge the output |
401 | auto ConcatMI = |
402 | Builder.buildConcatVectors(Res: LLT::fixed_vector(NumElements, ScalarSizeInBits: 32), Ops: DotReg); |
403 | |
404 | // Put it through a vector reduction |
405 | Builder.buildVecReduceAdd(Dst: MI.getOperand(i: 0).getReg(), |
406 | Src: ConcatMI->getOperand(i: 0).getReg()); |
407 | } |
408 | |
409 | // Erase the dead instructions |
410 | MI.eraseFromParent(); |
411 | } |
412 | |
413 | // Matches {U/S}ADDV(ext(x)) => {U/S}ADDLV(x) |
414 | // Ensure that the type coming from the extend instruction is the right size |
415 | bool matchExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI, |
416 | std::pair<Register, bool> &MatchInfo) { |
417 | assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD && |
418 | "Expected G_VECREDUCE_ADD Opcode" ); |
419 | |
420 | // Check if the last instruction is an extend |
421 | MachineInstr *ExtMI = getDefIgnoringCopies(Reg: MI.getOperand(i: 1).getReg(), MRI); |
422 | auto ExtOpc = ExtMI->getOpcode(); |
423 | |
424 | if (ExtOpc == TargetOpcode::G_ZEXT) |
425 | std::get<1>(in&: MatchInfo) = 0; |
426 | else if (ExtOpc == TargetOpcode::G_SEXT) |
427 | std::get<1>(in&: MatchInfo) = 1; |
428 | else |
429 | return false; |
430 | |
431 | // Check if the source register is a valid type |
432 | Register ExtSrcReg = ExtMI->getOperand(i: 1).getReg(); |
433 | LLT ExtSrcTy = MRI.getType(Reg: ExtSrcReg); |
434 | LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg()); |
435 | if ((DstTy.getScalarSizeInBits() == 16 && |
436 | ExtSrcTy.getNumElements() % 8 == 0 && ExtSrcTy.getNumElements() < 256) || |
437 | (DstTy.getScalarSizeInBits() == 32 && |
438 | ExtSrcTy.getNumElements() % 4 == 0) || |
439 | (DstTy.getScalarSizeInBits() == 64 && |
440 | ExtSrcTy.getNumElements() % 4 == 0)) { |
441 | std::get<0>(in&: MatchInfo) = ExtSrcReg; |
442 | return true; |
443 | } |
444 | return false; |
445 | } |
446 | |
447 | void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI, |
448 | MachineIRBuilder &B, GISelChangeObserver &Observer, |
449 | std::pair<Register, bool> &MatchInfo) { |
450 | assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD && |
451 | "Expected G_VECREDUCE_ADD Opcode" ); |
452 | |
453 | unsigned Opc = std::get<1>(in&: MatchInfo) ? AArch64::G_SADDLV : AArch64::G_UADDLV; |
454 | Register SrcReg = std::get<0>(in&: MatchInfo); |
455 | Register DstReg = MI.getOperand(i: 0).getReg(); |
456 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
457 | LLT DstTy = MRI.getType(Reg: DstReg); |
458 | |
459 | // If SrcTy has more elements than expected, split them into multiple |
460 | // insructions and sum the results |
461 | LLT MainTy; |
462 | SmallVector<Register, 1> WorkingRegisters; |
463 | unsigned SrcScalSize = SrcTy.getScalarSizeInBits(); |
464 | unsigned SrcNumElem = SrcTy.getNumElements(); |
465 | if ((SrcScalSize == 8 && SrcNumElem > 16) || |
466 | (SrcScalSize == 16 && SrcNumElem > 8) || |
467 | (SrcScalSize == 32 && SrcNumElem > 4)) { |
468 | |
469 | LLT LeftoverTy; |
470 | SmallVector<Register, 4> LeftoverRegs; |
471 | if (SrcScalSize == 8) |
472 | MainTy = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8); |
473 | else if (SrcScalSize == 16) |
474 | MainTy = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16); |
475 | else if (SrcScalSize == 32) |
476 | MainTy = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32); |
477 | else |
478 | llvm_unreachable("Source's Scalar Size not supported" ); |
479 | |
480 | // Extract the parts and put each extracted sources through U/SADDLV and put |
481 | // the values inside a small vec |
482 | extractParts(Reg: SrcReg, RegTy: SrcTy, MainTy, LeftoverTy, VRegs&: WorkingRegisters, |
483 | LeftoverVRegs&: LeftoverRegs, MIRBuilder&: B, MRI); |
484 | for (unsigned I = 0; I < LeftoverRegs.size(); I++) { |
485 | WorkingRegisters.push_back(Elt: LeftoverRegs[I]); |
486 | } |
487 | } else { |
488 | WorkingRegisters.push_back(Elt: SrcReg); |
489 | MainTy = SrcTy; |
490 | } |
491 | |
492 | unsigned MidScalarSize = MainTy.getScalarSizeInBits() * 2; |
493 | LLT MidScalarLLT = LLT::scalar(SizeInBits: MidScalarSize); |
494 | Register zeroReg = B.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: 0).getReg(Idx: 0); |
495 | for (unsigned I = 0; I < WorkingRegisters.size(); I++) { |
496 | // If the number of elements is too small to build an instruction, extend |
497 | // its size before applying addlv |
498 | LLT WorkingRegTy = MRI.getType(Reg: WorkingRegisters[I]); |
499 | if ((WorkingRegTy.getScalarSizeInBits() == 8) && |
500 | (WorkingRegTy.getNumElements() == 4)) { |
501 | WorkingRegisters[I] = |
502 | B.buildInstr(Opc: std::get<1>(in&: MatchInfo) ? TargetOpcode::G_SEXT |
503 | : TargetOpcode::G_ZEXT, |
504 | DstOps: {LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)}, SrcOps: {WorkingRegisters[I]}) |
505 | .getReg(Idx: 0); |
506 | } |
507 | |
508 | // Generate the {U/S}ADDLV instruction, whose output is always double of the |
509 | // Src's Scalar size |
510 | LLT addlvTy = MidScalarSize <= 32 ? LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) |
511 | : LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64); |
512 | Register addlvReg = |
513 | B.buildInstr(Opc, DstOps: {addlvTy}, SrcOps: {WorkingRegisters[I]}).getReg(Idx: 0); |
514 | |
515 | // The output from {U/S}ADDLV gets placed in the lowest lane of a v4i32 or |
516 | // v2i64 register. |
517 | // i16, i32 results uses v4i32 registers |
518 | // i64 results uses v2i64 registers |
519 | // Therefore we have to extract/truncate the the value to the right type |
520 | if (MidScalarSize == 32 || MidScalarSize == 64) { |
521 | WorkingRegisters[I] = B.buildInstr(Opc: AArch64::G_EXTRACT_VECTOR_ELT, |
522 | DstOps: {MidScalarLLT}, SrcOps: {addlvReg, zeroReg}) |
523 | .getReg(Idx: 0); |
524 | } else { |
525 | Register = B.buildInstr(Opc: AArch64::G_EXTRACT_VECTOR_ELT, |
526 | DstOps: {LLT::scalar(SizeInBits: 32)}, SrcOps: {addlvReg, zeroReg}) |
527 | .getReg(Idx: 0); |
528 | WorkingRegisters[I] = |
529 | B.buildTrunc(Res: {MidScalarLLT}, Op: {extractReg}).getReg(Idx: 0); |
530 | } |
531 | } |
532 | |
533 | Register outReg; |
534 | if (WorkingRegisters.size() > 1) { |
535 | outReg = B.buildAdd(Dst: MidScalarLLT, Src0: WorkingRegisters[0], Src1: WorkingRegisters[1]) |
536 | .getReg(Idx: 0); |
537 | for (unsigned I = 2; I < WorkingRegisters.size(); I++) { |
538 | outReg = B.buildAdd(Dst: MidScalarLLT, Src0: outReg, Src1: WorkingRegisters[I]).getReg(Idx: 0); |
539 | } |
540 | } else { |
541 | outReg = WorkingRegisters[0]; |
542 | } |
543 | |
544 | if (DstTy.getScalarSizeInBits() > MidScalarSize) { |
545 | // Handle the scalar value if the DstTy's Scalar Size is more than double |
546 | // Src's ScalarType |
547 | B.buildInstr(Opc: std::get<1>(in&: MatchInfo) ? TargetOpcode::G_SEXT |
548 | : TargetOpcode::G_ZEXT, |
549 | DstOps: {DstReg}, SrcOps: {outReg}); |
550 | } else { |
551 | B.buildCopy(Res: DstReg, Op: outReg); |
552 | } |
553 | |
554 | MI.eraseFromParent(); |
555 | } |
556 | |
557 | // Pushes ADD/SUB through extend instructions to decrease the number of extend |
558 | // instruction at the end by allowing selection of {s|u}addl sooner |
559 | |
560 | // i32 add(i32 ext i8, i32 ext i8) => i32 ext(i16 add(i16 ext i8, i16 ext i8)) |
561 | bool matchPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI, |
562 | Register DstReg, Register SrcReg1, Register SrcReg2) { |
563 | assert((MI.getOpcode() == TargetOpcode::G_ADD || |
564 | MI.getOpcode() == TargetOpcode::G_SUB) && |
565 | "Expected a G_ADD or G_SUB instruction\n" ); |
566 | |
567 | // Deal with vector types only |
568 | LLT DstTy = MRI.getType(Reg: DstReg); |
569 | if (!DstTy.isVector()) |
570 | return false; |
571 | |
572 | // Return true if G_{S|Z}EXT instruction is more than 2* source |
573 | Register ExtDstReg = MI.getOperand(i: 1).getReg(); |
574 | LLT Ext1SrcTy = MRI.getType(Reg: SrcReg1); |
575 | LLT Ext2SrcTy = MRI.getType(Reg: SrcReg2); |
576 | unsigned ExtDstScal = MRI.getType(Reg: ExtDstReg).getScalarSizeInBits(); |
577 | unsigned Ext1SrcScal = Ext1SrcTy.getScalarSizeInBits(); |
578 | if (((Ext1SrcScal == 8 && ExtDstScal == 32) || |
579 | ((Ext1SrcScal == 8 || Ext1SrcScal == 16) && ExtDstScal == 64)) && |
580 | Ext1SrcTy == Ext2SrcTy) |
581 | return true; |
582 | |
583 | return false; |
584 | } |
585 | |
586 | void applyPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI, |
587 | MachineIRBuilder &B, bool isSExt, Register DstReg, |
588 | Register SrcReg1, Register SrcReg2) { |
589 | LLT SrcTy = MRI.getType(Reg: SrcReg1); |
590 | LLT MidTy = SrcTy.changeElementSize(NewEltSize: SrcTy.getScalarSizeInBits() * 2); |
591 | unsigned Opc = isSExt ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; |
592 | Register Ext1Reg = B.buildInstr(Opc, DstOps: {MidTy}, SrcOps: {SrcReg1}).getReg(Idx: 0); |
593 | Register Ext2Reg = B.buildInstr(Opc, DstOps: {MidTy}, SrcOps: {SrcReg2}).getReg(Idx: 0); |
594 | Register AddReg = |
595 | B.buildInstr(Opc: MI.getOpcode(), DstOps: {MidTy}, SrcOps: {Ext1Reg, Ext2Reg}).getReg(Idx: 0); |
596 | |
597 | // G_SUB has to sign-extend the result. |
598 | // G_ADD needs to sext from sext and can sext or zext from zext, so the |
599 | // original opcode is used. |
600 | if (MI.getOpcode() == TargetOpcode::G_ADD) |
601 | B.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {AddReg}); |
602 | else |
603 | B.buildSExt(Res: DstReg, Op: AddReg); |
604 | |
605 | MI.eraseFromParent(); |
606 | } |
607 | |
608 | bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B, |
609 | CombinerHelper &Helper, GISelChangeObserver &Observer) { |
610 | // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if |
611 | // result is only used in the no-overflow case. It is restricted to cases |
612 | // where we know that the high-bits of the operands are 0. If there's an |
613 | // overflow, then the 9th or 17th bit must be set, which can be checked |
614 | // using TBNZ. |
615 | // |
616 | // Change (for UADDOs on 8 and 16 bits): |
617 | // |
618 | // %z0 = G_ASSERT_ZEXT _ |
619 | // %op0 = G_TRUNC %z0 |
620 | // %z1 = G_ASSERT_ZEXT _ |
621 | // %op1 = G_TRUNC %z1 |
622 | // %val, %cond = G_UADDO %op0, %op1 |
623 | // G_BRCOND %cond, %error.bb |
624 | // |
625 | // error.bb: |
626 | // (no successors and no uses of %val) |
627 | // |
628 | // To: |
629 | // |
630 | // %z0 = G_ASSERT_ZEXT _ |
631 | // %z1 = G_ASSERT_ZEXT _ |
632 | // %add = G_ADD %z0, %z1 |
633 | // %val = G_TRUNC %add |
634 | // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1) |
635 | // %cond = G_ICMP NE, %bit, 0 |
636 | // G_BRCOND %cond, %error.bb |
637 | |
638 | auto &MRI = *B.getMRI(); |
639 | |
640 | MachineOperand *DefOp0 = MRI.getOneDef(Reg: MI.getOperand(i: 2).getReg()); |
641 | MachineOperand *DefOp1 = MRI.getOneDef(Reg: MI.getOperand(i: 3).getReg()); |
642 | Register Op0Wide; |
643 | Register Op1Wide; |
644 | if (!mi_match(R: DefOp0->getParent(), MRI, P: m_GTrunc(Src: m_Reg(R&: Op0Wide))) || |
645 | !mi_match(R: DefOp1->getParent(), MRI, P: m_GTrunc(Src: m_Reg(R&: Op1Wide)))) |
646 | return false; |
647 | LLT WideTy0 = MRI.getType(Reg: Op0Wide); |
648 | LLT WideTy1 = MRI.getType(Reg: Op1Wide); |
649 | Register ResVal = MI.getOperand(i: 0).getReg(); |
650 | LLT OpTy = MRI.getType(Reg: ResVal); |
651 | MachineInstr *Op0WideDef = MRI.getVRegDef(Reg: Op0Wide); |
652 | MachineInstr *Op1WideDef = MRI.getVRegDef(Reg: Op1Wide); |
653 | |
654 | unsigned OpTySize = OpTy.getScalarSizeInBits(); |
655 | // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the |
656 | // inputs have been zero-extended. |
657 | if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT || |
658 | Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT || |
659 | OpTySize != Op0WideDef->getOperand(i: 2).getImm() || |
660 | OpTySize != Op1WideDef->getOperand(i: 2).getImm()) |
661 | return false; |
662 | |
663 | // Only scalar UADDO with either 8 or 16 bit operands are handled. |
664 | if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 || |
665 | OpTySize >= WideTy0.getScalarSizeInBits() || |
666 | (OpTySize != 8 && OpTySize != 16)) |
667 | return false; |
668 | |
669 | // The overflow-status result must be used by a branch only. |
670 | Register ResStatus = MI.getOperand(i: 1).getReg(); |
671 | if (!MRI.hasOneNonDBGUse(RegNo: ResStatus)) |
672 | return false; |
673 | MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(RegNo: ResStatus); |
674 | if (CondUser->getOpcode() != TargetOpcode::G_BRCOND) |
675 | return false; |
676 | |
677 | // Make sure the computed result is only used in the no-overflow blocks. |
678 | MachineBasicBlock *CurrentMBB = MI.getParent(); |
679 | MachineBasicBlock *FailMBB = CondUser->getOperand(i: 1).getMBB(); |
680 | if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB) |
681 | return false; |
682 | if (any_of(Range: MRI.use_nodbg_instructions(Reg: ResVal), |
683 | P: [&MI, FailMBB, CurrentMBB](MachineInstr &I) { |
684 | return &MI != &I && |
685 | (I.getParent() == FailMBB || I.getParent() == CurrentMBB); |
686 | })) |
687 | return false; |
688 | |
689 | // Remove G_ADDO. |
690 | B.setInstrAndDebugLoc(*MI.getNextNode()); |
691 | MI.eraseFromParent(); |
692 | |
693 | // Emit wide add. |
694 | Register AddDst = MRI.cloneVirtualRegister(VReg: Op0Wide); |
695 | B.buildInstr(Opc: TargetOpcode::G_ADD, DstOps: {AddDst}, SrcOps: {Op0Wide, Op1Wide}); |
696 | |
697 | // Emit check of the 9th or 17th bit and update users (the branch). This will |
698 | // later be folded to TBNZ. |
699 | Register CondBit = MRI.cloneVirtualRegister(VReg: Op0Wide); |
700 | B.buildAnd( |
701 | Dst: CondBit, Src0: AddDst, |
702 | Src1: B.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: OpTySize == 8 ? 1 << 8 : 1 << 16)); |
703 | B.buildICmp(Pred: CmpInst::ICMP_NE, Res: ResStatus, Op0: CondBit, |
704 | Op1: B.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: 0)); |
705 | |
706 | // Update ZEXts users of the result value. Because all uses are in the |
707 | // no-overflow case, we know that the top bits are 0 and we can ignore ZExts. |
708 | B.buildZExtOrTrunc(Res: ResVal, Op: AddDst); |
709 | for (MachineOperand &U : make_early_inc_range(Range: MRI.use_operands(Reg: ResVal))) { |
710 | Register WideReg; |
711 | if (mi_match(R: U.getParent(), MRI, P: m_GZExt(Src: m_Reg(R&: WideReg)))) { |
712 | auto OldR = U.getParent()->getOperand(i: 0).getReg(); |
713 | Observer.erasingInstr(MI&: *U.getParent()); |
714 | U.getParent()->eraseFromParent(); |
715 | Helper.replaceRegWith(MRI, FromReg: OldR, ToReg: AddDst); |
716 | } |
717 | } |
718 | |
719 | return true; |
720 | } |
721 | |
722 | class AArch64PreLegalizerCombinerImpl : public Combiner { |
723 | protected: |
724 | // TODO: Make CombinerHelper methods const. |
725 | mutable CombinerHelper Helper; |
726 | const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig; |
727 | const AArch64Subtarget &STI; |
728 | |
729 | public: |
730 | AArch64PreLegalizerCombinerImpl( |
731 | MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, |
732 | GISelKnownBits &KB, GISelCSEInfo *CSEInfo, |
733 | const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig, |
734 | const AArch64Subtarget &STI, MachineDominatorTree *MDT, |
735 | const LegalizerInfo *LI); |
736 | |
737 | static const char *getName() { return "AArch6400PreLegalizerCombiner" ; } |
738 | |
739 | bool tryCombineAll(MachineInstr &I) const override; |
740 | |
741 | bool tryCombineAllImpl(MachineInstr &I) const; |
742 | |
743 | private: |
744 | #define GET_GICOMBINER_CLASS_MEMBERS |
745 | #include "AArch64GenPreLegalizeGICombiner.inc" |
746 | #undef GET_GICOMBINER_CLASS_MEMBERS |
747 | }; |
748 | |
749 | #define GET_GICOMBINER_IMPL |
750 | #include "AArch64GenPreLegalizeGICombiner.inc" |
751 | #undef GET_GICOMBINER_IMPL |
752 | |
753 | AArch64PreLegalizerCombinerImpl::AArch64PreLegalizerCombinerImpl( |
754 | MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, |
755 | GISelKnownBits &KB, GISelCSEInfo *CSEInfo, |
756 | const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig, |
757 | const AArch64Subtarget &STI, MachineDominatorTree *MDT, |
758 | const LegalizerInfo *LI) |
759 | : Combiner(MF, CInfo, TPC, &KB, CSEInfo), |
760 | Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI), |
761 | RuleConfig(RuleConfig), STI(STI), |
762 | #define GET_GICOMBINER_CONSTRUCTOR_INITS |
763 | #include "AArch64GenPreLegalizeGICombiner.inc" |
764 | #undef GET_GICOMBINER_CONSTRUCTOR_INITS |
765 | { |
766 | } |
767 | |
768 | bool AArch64PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { |
769 | if (tryCombineAllImpl(I&: MI)) |
770 | return true; |
771 | |
772 | unsigned Opc = MI.getOpcode(); |
773 | switch (Opc) { |
774 | case TargetOpcode::G_SHUFFLE_VECTOR: |
775 | return Helper.tryCombineShuffleVector(MI); |
776 | case TargetOpcode::G_UADDO: |
777 | return tryToSimplifyUADDO(MI, B, Helper, Observer); |
778 | case TargetOpcode::G_MEMCPY_INLINE: |
779 | return Helper.tryEmitMemcpyInline(MI); |
780 | case TargetOpcode::G_MEMCPY: |
781 | case TargetOpcode::G_MEMMOVE: |
782 | case TargetOpcode::G_MEMSET: { |
783 | // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other |
784 | // heuristics decide. |
785 | unsigned MaxLen = CInfo.EnableOpt ? 0 : 32; |
786 | // Try to inline memcpy type calls if optimizations are enabled. |
787 | if (Helper.tryCombineMemCpyFamily(MI, MaxLen)) |
788 | return true; |
789 | if (Opc == TargetOpcode::G_MEMSET) |
790 | return llvm::AArch64GISelUtils::tryEmitBZero(MI, MIRBuilder&: B, MinSize: CInfo.EnableMinSize); |
791 | return false; |
792 | } |
793 | } |
794 | |
795 | return false; |
796 | } |
797 | |
798 | // Pass boilerplate |
799 | // ================ |
800 | |
801 | class AArch64PreLegalizerCombiner : public MachineFunctionPass { |
802 | public: |
803 | static char ID; |
804 | |
805 | AArch64PreLegalizerCombiner(); |
806 | |
807 | StringRef getPassName() const override { |
808 | return "AArch64PreLegalizerCombiner" ; |
809 | } |
810 | |
811 | bool runOnMachineFunction(MachineFunction &MF) override; |
812 | |
813 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
814 | |
815 | private: |
816 | AArch64PreLegalizerCombinerImplRuleConfig RuleConfig; |
817 | }; |
818 | } // end anonymous namespace |
819 | |
820 | void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { |
821 | AU.addRequired<TargetPassConfig>(); |
822 | AU.setPreservesCFG(); |
823 | getSelectionDAGFallbackAnalysisUsage(AU); |
824 | AU.addRequired<GISelKnownBitsAnalysis>(); |
825 | AU.addPreserved<GISelKnownBitsAnalysis>(); |
826 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
827 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
828 | AU.addRequired<GISelCSEAnalysisWrapperPass>(); |
829 | AU.addPreserved<GISelCSEAnalysisWrapperPass>(); |
830 | MachineFunctionPass::getAnalysisUsage(AU); |
831 | } |
832 | |
833 | AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() |
834 | : MachineFunctionPass(ID) { |
835 | initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); |
836 | |
837 | if (!RuleConfig.parseCommandLineOption()) |
838 | report_fatal_error(reason: "Invalid rule identifier" ); |
839 | } |
840 | |
841 | bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { |
842 | if (MF.getProperties().hasProperty( |
843 | P: MachineFunctionProperties::Property::FailedISel)) |
844 | return false; |
845 | auto &TPC = getAnalysis<TargetPassConfig>(); |
846 | |
847 | // Enable CSE. |
848 | GISelCSEAnalysisWrapper &Wrapper = |
849 | getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); |
850 | auto *CSEInfo = &Wrapper.get(CSEOpt: TPC.getCSEConfig()); |
851 | |
852 | const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>(); |
853 | const auto *LI = ST.getLegalizerInfo(); |
854 | |
855 | const Function &F = MF.getFunction(); |
856 | bool EnableOpt = |
857 | MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F); |
858 | GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); |
859 | MachineDominatorTree *MDT = |
860 | &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
861 | CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, |
862 | /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(), |
863 | F.hasMinSize()); |
864 | AArch64PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB, CSEInfo, |
865 | RuleConfig, ST, MDT, LI); |
866 | return Impl.combineMachineInstrs(); |
867 | } |
868 | |
869 | char AArch64PreLegalizerCombiner::ID = 0; |
870 | INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE, |
871 | "Combine AArch64 machine instrs before legalization" , |
872 | false, false) |
873 | INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) |
874 | INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) |
875 | INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) |
876 | INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE, |
877 | "Combine AArch64 machine instrs before legalization" , false, |
878 | false) |
879 | |
880 | namespace llvm { |
881 | FunctionPass *createAArch64PreLegalizerCombiner() { |
882 | return new AArch64PreLegalizerCombiner(); |
883 | } |
884 | } // end namespace llvm |
885 | |