1 | //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass performs below peephole optimizations on MIR level. |
10 | // |
11 | // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri |
12 | // MOVi64imm + ANDXrr ==> ANDXri + ANDXri |
13 | // |
14 | // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi |
15 | // MOVi64imm + ADDXrr ==> ANDXri + ANDXri |
16 | // |
17 | // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi |
18 | // MOVi64imm + SUBXrr ==> SUBXri + SUBXri |
19 | // |
20 | // The mov pseudo instruction could be expanded to multiple mov instructions |
21 | // later. In this case, we could try to split the constant operand of mov |
22 | // instruction into two immediates which can be directly encoded into |
23 | // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of |
24 | // multiple `mov` + `and/add/sub` instructions. |
25 | // |
26 | // 4. Remove redundant ORRWrs which is generated by zero-extend. |
27 | // |
28 | // %3:gpr32 = ORRWrs $wzr, %2, 0 |
29 | // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32 |
30 | // |
31 | // If AArch64's 32-bit form of instruction defines the source operand of |
32 | // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source |
33 | // operand are set to zero. |
34 | // |
35 | // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx |
36 | // ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx |
37 | // |
38 | // 6. %intermediate:gpr32 = COPY %src:fpr128 |
39 | // %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32 |
40 | // ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0 |
41 | // |
42 | // In cases where a source FPR is copied to a GPR in order to be copied |
43 | // to a destination FPR, we can directly copy the values between the FPRs, |
44 | // eliminating the use of the Integer unit. When we match a pattern of |
45 | // INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR |
46 | // source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr |
47 | // instructions. |
48 | // |
49 | // 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high |
50 | // 64-bits. For example, |
51 | // |
52 | // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr |
53 | // %2:fpr64 = MOVID 0 |
54 | // %4:fpr128 = IMPLICIT_DEF |
55 | // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), %2:fpr64, %subreg.dsub |
56 | // %6:fpr128 = IMPLICIT_DEF |
57 | // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub |
58 | // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, %3:fpr128, 0 |
59 | // ==> |
60 | // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr |
61 | // %6:fpr128 = IMPLICIT_DEF |
62 | // %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub |
63 | // |
64 | // 8. Remove redundant CSELs that select between identical registers, by |
65 | // replacing them with unconditional moves. |
66 | // |
67 | // 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit |
68 | // LSR or LSL alias of UBFM. |
69 | // |
70 | //===----------------------------------------------------------------------===// |
71 | |
72 | #include "AArch64ExpandImm.h" |
73 | #include "AArch64InstrInfo.h" |
74 | #include "MCTargetDesc/AArch64AddressingModes.h" |
75 | #include "llvm/CodeGen/MachineDominators.h" |
76 | #include "llvm/CodeGen/MachineLoopInfo.h" |
77 | |
78 | using namespace llvm; |
79 | |
80 | #define DEBUG_TYPE "aarch64-mi-peephole-opt" |
81 | |
82 | namespace { |
83 | |
84 | struct AArch64MIPeepholeOpt : public MachineFunctionPass { |
85 | static char ID; |
86 | |
87 | AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {} |
88 | |
89 | const AArch64InstrInfo *TII; |
90 | const AArch64RegisterInfo *TRI; |
91 | MachineLoopInfo *MLI; |
92 | MachineRegisterInfo *MRI; |
93 | |
94 | using OpcodePair = std::pair<unsigned, unsigned>; |
95 | template <typename T> |
96 | using SplitAndOpcFunc = |
97 | std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>; |
98 | using BuildMIFunc = |
99 | std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned, |
100 | Register, Register, Register)>; |
101 | |
102 | /// For instructions where an immediate operand could be split into two |
103 | /// separate immediate instructions, use the splitTwoPartImm two handle the |
104 | /// optimization. |
105 | /// |
106 | /// To implement, the following function types must be passed to |
107 | /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if |
108 | /// splitting the immediate is valid and returns the associated new opcode. A |
109 | /// BuildMIFunc must be implemented to build the two immediate instructions. |
110 | /// |
111 | /// Example Pattern (where IMM would require 2+ MOV instructions): |
112 | /// %dst = <Instr>rr %src IMM [...] |
113 | /// becomes: |
114 | /// %tmp = <Instr>ri %src (encode half IMM) [...] |
115 | /// %dst = <Instr>ri %tmp (encode half IMM) [...] |
116 | template <typename T> |
117 | bool splitTwoPartImm(MachineInstr &MI, |
118 | SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr); |
119 | |
120 | bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI, |
121 | MachineInstr *&SubregToRegMI); |
122 | |
123 | template <typename T> |
124 | bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI); |
125 | template <typename T> |
126 | bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI); |
127 | |
128 | template <typename T> |
129 | bool visitAND(unsigned Opc, MachineInstr &MI); |
130 | bool visitORR(MachineInstr &MI); |
131 | bool visitCSEL(MachineInstr &MI); |
132 | bool visitINSERT(MachineInstr &MI); |
133 | bool visitINSviGPR(MachineInstr &MI, unsigned Opc); |
134 | bool visitINSvi64lane(MachineInstr &MI); |
135 | bool visitFMOVDr(MachineInstr &MI); |
136 | bool visitUBFMXri(MachineInstr &MI); |
137 | bool visitCopy(MachineInstr &MI); |
138 | bool runOnMachineFunction(MachineFunction &MF) override; |
139 | |
140 | StringRef getPassName() const override { |
141 | return "AArch64 MI Peephole Optimization pass" ; |
142 | } |
143 | |
144 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
145 | AU.setPreservesCFG(); |
146 | AU.addRequired<MachineLoopInfoWrapperPass>(); |
147 | MachineFunctionPass::getAnalysisUsage(AU); |
148 | } |
149 | }; |
150 | |
151 | char AArch64MIPeepholeOpt::ID = 0; |
152 | |
153 | } // end anonymous namespace |
154 | |
155 | INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt" , |
156 | "AArch64 MI Peephole Optimization" , false, false) |
157 | |
158 | template <typename T> |
159 | static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { |
160 | T UImm = static_cast<T>(Imm); |
161 | if (AArch64_AM::isLogicalImmediate(imm: UImm, regSize: RegSize)) |
162 | return false; |
163 | |
164 | // If this immediate can be handled by one instruction, do not split it. |
165 | SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; |
166 | AArch64_IMM::expandMOVImm(Imm: UImm, BitSize: RegSize, Insn); |
167 | if (Insn.size() == 1) |
168 | return false; |
169 | |
170 | // The bitmask immediate consists of consecutive ones. Let's say there is |
171 | // constant 0b00000000001000000000010000000000 which does not consist of |
172 | // consecutive ones. We can split it in to two bitmask immediate like |
173 | // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. |
174 | // If we do AND with these two bitmask immediate, we can see original one. |
175 | unsigned LowestBitSet = llvm::countr_zero(UImm); |
176 | unsigned HighestBitSet = Log2_64(UImm); |
177 | |
178 | // Create a mask which is filled with one from the position of lowest bit set |
179 | // to the position of highest bit set. |
180 | T NewImm1 = (static_cast<T>(2) << HighestBitSet) - |
181 | (static_cast<T>(1) << LowestBitSet); |
182 | // Create a mask which is filled with one outside the position of lowest bit |
183 | // set and the position of highest bit set. |
184 | T NewImm2 = UImm | ~NewImm1; |
185 | |
186 | // If the split value is not valid bitmask immediate, do not split this |
187 | // constant. |
188 | if (!AArch64_AM::isLogicalImmediate(imm: NewImm2, regSize: RegSize)) |
189 | return false; |
190 | |
191 | Imm1Enc = AArch64_AM::encodeLogicalImmediate(imm: NewImm1, regSize: RegSize); |
192 | Imm2Enc = AArch64_AM::encodeLogicalImmediate(imm: NewImm2, regSize: RegSize); |
193 | return true; |
194 | } |
195 | |
196 | template <typename T> |
197 | bool AArch64MIPeepholeOpt::visitAND( |
198 | unsigned Opc, MachineInstr &MI) { |
199 | // Try below transformation. |
200 | // |
201 | // MOVi32imm + ANDWrr ==> ANDWri + ANDWri |
202 | // MOVi64imm + ANDXrr ==> ANDXri + ANDXri |
203 | // |
204 | // The mov pseudo instruction could be expanded to multiple mov instructions |
205 | // later. Let's try to split the constant operand of mov instruction into two |
206 | // bitmask immediates. It makes only two AND instructions instead of multiple |
207 | // mov + and instructions. |
208 | |
209 | return splitTwoPartImm<T>( |
210 | MI, |
211 | [Opc](T Imm, unsigned RegSize, T &Imm0, |
212 | T &Imm1) -> std::optional<OpcodePair> { |
213 | if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) |
214 | return std::make_pair(x: Opc, y: Opc); |
215 | return std::nullopt; |
216 | }, |
217 | [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, |
218 | unsigned Imm1, Register SrcReg, Register NewTmpReg, |
219 | Register NewDstReg) { |
220 | DebugLoc DL = MI.getDebugLoc(); |
221 | MachineBasicBlock *MBB = MI.getParent(); |
222 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.first), DestReg: NewTmpReg) |
223 | .addReg(RegNo: SrcReg) |
224 | .addImm(Val: Imm0); |
225 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.second), DestReg: NewDstReg) |
226 | .addReg(RegNo: NewTmpReg) |
227 | .addImm(Val: Imm1); |
228 | }); |
229 | } |
230 | |
231 | bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) { |
232 | // Check this ORR comes from below zero-extend pattern. |
233 | // |
234 | // def : Pat<(i64 (zext GPR32:$src)), |
235 | // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; |
236 | if (MI.getOperand(i: 3).getImm() != 0) |
237 | return false; |
238 | |
239 | if (MI.getOperand(i: 1).getReg() != AArch64::WZR) |
240 | return false; |
241 | |
242 | MachineInstr *SrcMI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 2).getReg()); |
243 | if (!SrcMI) |
244 | return false; |
245 | |
246 | // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC |
247 | // |
248 | // When you use the 32-bit form of an instruction, the upper 32 bits of the |
249 | // source registers are ignored and the upper 32 bits of the destination |
250 | // register are set to zero. |
251 | // |
252 | // If AArch64's 32-bit form of instruction defines the source operand of |
253 | // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is |
254 | // real AArch64 instruction and if it is not, do not process the opcode |
255 | // conservatively. |
256 | if (SrcMI->getOpcode() == TargetOpcode::COPY && |
257 | SrcMI->getOperand(i: 1).getReg().isVirtual()) { |
258 | const TargetRegisterClass *RC = |
259 | MRI->getRegClass(Reg: SrcMI->getOperand(i: 1).getReg()); |
260 | |
261 | // A COPY from an FPR will become a FMOVSWr, so do so now so that we know |
262 | // that the upper bits are zero. |
263 | if (RC != &AArch64::FPR32RegClass && |
264 | ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass && |
265 | RC != &AArch64::ZPRRegClass) || |
266 | SrcMI->getOperand(i: 1).getSubReg() != AArch64::ssub)) |
267 | return false; |
268 | Register CpySrc; |
269 | if (SrcMI->getOperand(i: 1).getSubReg() == AArch64::ssub) { |
270 | CpySrc = MRI->createVirtualRegister(RegClass: &AArch64::FPR32RegClass); |
271 | BuildMI(BB&: *SrcMI->getParent(), I: SrcMI, MIMD: SrcMI->getDebugLoc(), |
272 | MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: CpySrc) |
273 | .add(MO: SrcMI->getOperand(i: 1)); |
274 | } else { |
275 | CpySrc = SrcMI->getOperand(i: 1).getReg(); |
276 | } |
277 | BuildMI(BB&: *SrcMI->getParent(), I: SrcMI, MIMD: SrcMI->getDebugLoc(), |
278 | MCID: TII->get(Opcode: AArch64::FMOVSWr), DestReg: SrcMI->getOperand(i: 0).getReg()) |
279 | .addReg(RegNo: CpySrc); |
280 | SrcMI->eraseFromParent(); |
281 | } |
282 | else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) |
283 | return false; |
284 | |
285 | Register DefReg = MI.getOperand(i: 0).getReg(); |
286 | Register SrcReg = MI.getOperand(i: 2).getReg(); |
287 | MRI->replaceRegWith(FromReg: DefReg, ToReg: SrcReg); |
288 | MRI->clearKillFlags(Reg: SrcReg); |
289 | LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n" ); |
290 | MI.eraseFromParent(); |
291 | |
292 | return true; |
293 | } |
294 | |
295 | bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) { |
296 | // Replace CSEL with MOV when both inputs are the same register. |
297 | if (MI.getOperand(i: 1).getReg() != MI.getOperand(i: 2).getReg()) |
298 | return false; |
299 | |
300 | auto ZeroReg = |
301 | MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR; |
302 | auto OrOpcode = |
303 | MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs; |
304 | |
305 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: OrOpcode)) |
306 | .addReg(RegNo: MI.getOperand(i: 0).getReg(), flags: RegState::Define) |
307 | .addReg(RegNo: ZeroReg) |
308 | .addReg(RegNo: MI.getOperand(i: 1).getReg()) |
309 | .addImm(Val: 0); |
310 | |
311 | MI.eraseFromParent(); |
312 | return true; |
313 | } |
314 | |
315 | bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) { |
316 | // Check this INSERT_SUBREG comes from below zero-extend pattern. |
317 | // |
318 | // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx |
319 | // To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx |
320 | // |
321 | // We're assuming the first operand to INSERT_SUBREG is irrelevant because a |
322 | // COPY would destroy the upper part of the register anyway |
323 | if (!MI.isRegTiedToDefOperand(UseOpIdx: 1)) |
324 | return false; |
325 | |
326 | Register DstReg = MI.getOperand(i: 0).getReg(); |
327 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: DstReg); |
328 | MachineInstr *SrcMI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 2).getReg()); |
329 | if (!SrcMI) |
330 | return false; |
331 | |
332 | // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC |
333 | // |
334 | // When you use the 32-bit form of an instruction, the upper 32 bits of the |
335 | // source registers are ignored and the upper 32 bits of the destination |
336 | // register are set to zero. |
337 | // |
338 | // If AArch64's 32-bit form of instruction defines the source operand of |
339 | // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is |
340 | // real AArch64 instruction and if it is not, do not process the opcode |
341 | // conservatively. |
342 | if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) || |
343 | !AArch64::GPR64allRegClass.hasSubClassEq(RC)) |
344 | return false; |
345 | |
346 | // Build a SUBREG_TO_REG instruction |
347 | MachineInstr *SubregMI = |
348 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), |
349 | MCID: TII->get(Opcode: TargetOpcode::SUBREG_TO_REG), DestReg: DstReg) |
350 | .addImm(Val: 0) |
351 | .add(MO: MI.getOperand(i: 2)) |
352 | .add(MO: MI.getOperand(i: 3)); |
353 | LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n" ); |
354 | (void)SubregMI; |
355 | MI.eraseFromParent(); |
356 | |
357 | return true; |
358 | } |
359 | |
360 | template <typename T> |
361 | static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { |
362 | // The immediate must be in the form of ((imm0 << 12) + imm1), in which both |
363 | // imm0 and imm1 are non-zero 12-bit unsigned int. |
364 | if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 || |
365 | (Imm & ~static_cast<T>(0xffffff)) != 0) |
366 | return false; |
367 | |
368 | // The immediate can not be composed via a single instruction. |
369 | SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; |
370 | AArch64_IMM::expandMOVImm(Imm, BitSize: RegSize, Insn); |
371 | if (Insn.size() == 1) |
372 | return false; |
373 | |
374 | // Split Imm into (Imm0 << 12) + Imm1; |
375 | Imm0 = (Imm >> 12) & 0xfff; |
376 | Imm1 = Imm & 0xfff; |
377 | return true; |
378 | } |
379 | |
380 | template <typename T> |
381 | bool AArch64MIPeepholeOpt::visitADDSUB( |
382 | unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) { |
383 | // Try below transformation. |
384 | // |
385 | // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri |
386 | // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri |
387 | // |
388 | // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri |
389 | // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri |
390 | // |
391 | // The mov pseudo instruction could be expanded to multiple mov instructions |
392 | // later. Let's try to split the constant operand of mov instruction into two |
393 | // legal add/sub immediates. It makes only two ADD/SUB instructions instead of |
394 | // multiple `mov` + `and/sub` instructions. |
395 | |
396 | // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant |
397 | // folded. Make sure that we don't generate invalid instructions that use XZR |
398 | // in those cases. |
399 | if (MI.getOperand(i: 1).getReg() == AArch64::XZR || |
400 | MI.getOperand(i: 1).getReg() == AArch64::WZR) |
401 | return false; |
402 | |
403 | return splitTwoPartImm<T>( |
404 | MI, |
405 | [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0, |
406 | T &Imm1) -> std::optional<OpcodePair> { |
407 | if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) |
408 | return std::make_pair(x: PosOpc, y: PosOpc); |
409 | if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) |
410 | return std::make_pair(x: NegOpc, y: NegOpc); |
411 | return std::nullopt; |
412 | }, |
413 | [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, |
414 | unsigned Imm1, Register SrcReg, Register NewTmpReg, |
415 | Register NewDstReg) { |
416 | DebugLoc DL = MI.getDebugLoc(); |
417 | MachineBasicBlock *MBB = MI.getParent(); |
418 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.first), DestReg: NewTmpReg) |
419 | .addReg(RegNo: SrcReg) |
420 | .addImm(Val: Imm0) |
421 | .addImm(Val: 12); |
422 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.second), DestReg: NewDstReg) |
423 | .addReg(RegNo: NewTmpReg) |
424 | .addImm(Val: Imm1) |
425 | .addImm(Val: 0); |
426 | }); |
427 | } |
428 | |
429 | template <typename T> |
430 | bool AArch64MIPeepholeOpt::visitADDSSUBS( |
431 | OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) { |
432 | // Try the same transformation as ADDSUB but with additional requirement |
433 | // that the condition code usages are only for Equal and Not Equal |
434 | |
435 | if (MI.getOperand(i: 1).getReg() == AArch64::XZR || |
436 | MI.getOperand(i: 1).getReg() == AArch64::WZR) |
437 | return false; |
438 | |
439 | return splitTwoPartImm<T>( |
440 | MI, |
441 | [PosOpcs, NegOpcs, &MI, &TRI = TRI, |
442 | &MRI = MRI](T Imm, unsigned RegSize, T &Imm0, |
443 | T &Imm1) -> std::optional<OpcodePair> { |
444 | OpcodePair OP; |
445 | if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) |
446 | OP = PosOpcs; |
447 | else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) |
448 | OP = NegOpcs; |
449 | else |
450 | return std::nullopt; |
451 | // Check conditional uses last since it is expensive for scanning |
452 | // proceeding instructions |
453 | MachineInstr &SrcMI = *MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 1).getReg()); |
454 | std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(MI&: SrcMI, CmpInstr&: MI, TRI: *TRI); |
455 | if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V) |
456 | return std::nullopt; |
457 | return OP; |
458 | }, |
459 | [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, |
460 | unsigned Imm1, Register SrcReg, Register NewTmpReg, |
461 | Register NewDstReg) { |
462 | DebugLoc DL = MI.getDebugLoc(); |
463 | MachineBasicBlock *MBB = MI.getParent(); |
464 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.first), DestReg: NewTmpReg) |
465 | .addReg(RegNo: SrcReg) |
466 | .addImm(Val: Imm0) |
467 | .addImm(Val: 12); |
468 | BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.second), DestReg: NewDstReg) |
469 | .addReg(RegNo: NewTmpReg) |
470 | .addImm(Val: Imm1) |
471 | .addImm(Val: 0); |
472 | }); |
473 | } |
474 | |
475 | // Checks if the corresponding MOV immediate instruction is applicable for |
476 | // this peephole optimization. |
477 | bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, |
478 | MachineInstr *&MovMI, |
479 | MachineInstr *&SubregToRegMI) { |
480 | // Check whether current MBB is in loop and the AND is loop invariant. |
481 | MachineBasicBlock *MBB = MI.getParent(); |
482 | MachineLoop *L = MLI->getLoopFor(BB: MBB); |
483 | if (L && !L->isLoopInvariant(I&: MI)) |
484 | return false; |
485 | |
486 | // Check whether current MI's operand is MOV with immediate. |
487 | MovMI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 2).getReg()); |
488 | if (!MovMI) |
489 | return false; |
490 | |
491 | // If it is SUBREG_TO_REG, check its operand. |
492 | SubregToRegMI = nullptr; |
493 | if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) { |
494 | SubregToRegMI = MovMI; |
495 | MovMI = MRI->getUniqueVRegDef(Reg: MovMI->getOperand(i: 2).getReg()); |
496 | if (!MovMI) |
497 | return false; |
498 | } |
499 | |
500 | if (MovMI->getOpcode() != AArch64::MOVi32imm && |
501 | MovMI->getOpcode() != AArch64::MOVi64imm) |
502 | return false; |
503 | |
504 | // If the MOV has multiple uses, do not split the immediate because it causes |
505 | // more instructions. |
506 | if (!MRI->hasOneUse(RegNo: MovMI->getOperand(i: 0).getReg())) |
507 | return false; |
508 | if (SubregToRegMI && !MRI->hasOneUse(RegNo: SubregToRegMI->getOperand(i: 0).getReg())) |
509 | return false; |
510 | |
511 | // It is OK to perform this peephole optimization. |
512 | return true; |
513 | } |
514 | |
515 | template <typename T> |
516 | bool AArch64MIPeepholeOpt::splitTwoPartImm( |
517 | MachineInstr &MI, |
518 | SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) { |
519 | unsigned RegSize = sizeof(T) * 8; |
520 | assert((RegSize == 32 || RegSize == 64) && |
521 | "Invalid RegSize for legal immediate peephole optimization" ); |
522 | |
523 | // Perform several essential checks against current MI. |
524 | MachineInstr *MovMI, *SubregToRegMI; |
525 | if (!checkMovImmInstr(MI, MovMI, SubregToRegMI)) |
526 | return false; |
527 | |
528 | // Split the immediate to Imm0 and Imm1, and calculate the Opcode. |
529 | T Imm = static_cast<T>(MovMI->getOperand(i: 1).getImm()), Imm0, Imm1; |
530 | // For the 32 bit form of instruction, the upper 32 bits of the destination |
531 | // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits |
532 | // of Imm to zero. This is essential if the Immediate value was a negative |
533 | // number since it was sign extended when we assign to the 64-bit Imm. |
534 | if (SubregToRegMI) |
535 | Imm &= 0xFFFFFFFF; |
536 | OpcodePair Opcode; |
537 | if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1)) |
538 | Opcode = *R; |
539 | else |
540 | return false; |
541 | |
542 | // Create new MIs using the first and second opcodes. Opcodes might differ for |
543 | // flag setting operations that should only set flags on second instruction. |
544 | // NewTmpReg = Opcode.first SrcReg Imm0 |
545 | // NewDstReg = Opcode.second NewTmpReg Imm1 |
546 | |
547 | // Determine register classes for destinations and register operands |
548 | MachineFunction *MF = MI.getMF(); |
549 | const TargetRegisterClass *FirstInstrDstRC = |
550 | TII->getRegClass(MCID: TII->get(Opcode: Opcode.first), OpNum: 0, TRI, MF: *MF); |
551 | const TargetRegisterClass *FirstInstrOperandRC = |
552 | TII->getRegClass(MCID: TII->get(Opcode: Opcode.first), OpNum: 1, TRI, MF: *MF); |
553 | const TargetRegisterClass *SecondInstrDstRC = |
554 | (Opcode.first == Opcode.second) |
555 | ? FirstInstrDstRC |
556 | : TII->getRegClass(MCID: TII->get(Opcode: Opcode.second), OpNum: 0, TRI, MF: *MF); |
557 | const TargetRegisterClass *SecondInstrOperandRC = |
558 | (Opcode.first == Opcode.second) |
559 | ? FirstInstrOperandRC |
560 | : TII->getRegClass(MCID: TII->get(Opcode: Opcode.second), OpNum: 1, TRI, MF: *MF); |
561 | |
562 | // Get old registers destinations and new register destinations |
563 | Register DstReg = MI.getOperand(i: 0).getReg(); |
564 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
565 | Register NewTmpReg = MRI->createVirtualRegister(RegClass: FirstInstrDstRC); |
566 | // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to |
567 | // reuse that same destination register. |
568 | Register NewDstReg = DstReg.isVirtual() |
569 | ? MRI->createVirtualRegister(RegClass: SecondInstrDstRC) |
570 | : DstReg; |
571 | |
572 | // Constrain registers based on their new uses |
573 | MRI->constrainRegClass(Reg: SrcReg, RC: FirstInstrOperandRC); |
574 | MRI->constrainRegClass(Reg: NewTmpReg, RC: SecondInstrOperandRC); |
575 | if (DstReg != NewDstReg) |
576 | MRI->constrainRegClass(Reg: NewDstReg, RC: MRI->getRegClass(Reg: DstReg)); |
577 | |
578 | // Call the delegating operation to build the instruction |
579 | BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg); |
580 | |
581 | // replaceRegWith changes MI's definition register. Keep it for SSA form until |
582 | // deleting MI. Only if we made a new destination register. |
583 | if (DstReg != NewDstReg) { |
584 | MRI->replaceRegWith(FromReg: DstReg, ToReg: NewDstReg); |
585 | MI.getOperand(i: 0).setReg(DstReg); |
586 | } |
587 | |
588 | // Record the MIs need to be removed. |
589 | MI.eraseFromParent(); |
590 | if (SubregToRegMI) |
591 | SubregToRegMI->eraseFromParent(); |
592 | MovMI->eraseFromParent(); |
593 | |
594 | return true; |
595 | } |
596 | |
597 | bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) { |
598 | // Check if this INSvi[X]gpr comes from COPY of a source FPR128 |
599 | // |
600 | // From |
601 | // %intermediate1:gpr64 = COPY %src:fpr128 |
602 | // %intermediate2:gpr32 = COPY %intermediate1:gpr64 |
603 | // %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32 |
604 | // To |
605 | // %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128, |
606 | // src_index |
607 | // where src_index = 0, X = [8|16|32|64] |
608 | |
609 | MachineInstr *SrcMI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 3).getReg()); |
610 | |
611 | // For a chain of COPY instructions, find the initial source register |
612 | // and check if it's an FPR128 |
613 | while (true) { |
614 | if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY) |
615 | return false; |
616 | |
617 | if (!SrcMI->getOperand(i: 1).getReg().isVirtual()) |
618 | return false; |
619 | |
620 | if (MRI->getRegClass(Reg: SrcMI->getOperand(i: 1).getReg()) == |
621 | &AArch64::FPR128RegClass) { |
622 | break; |
623 | } |
624 | SrcMI = MRI->getUniqueVRegDef(Reg: SrcMI->getOperand(i: 1).getReg()); |
625 | } |
626 | |
627 | Register DstReg = MI.getOperand(i: 0).getReg(); |
628 | Register SrcReg = SrcMI->getOperand(i: 1).getReg(); |
629 | MachineInstr *INSvilaneMI = |
630 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc), DestReg: DstReg) |
631 | .add(MO: MI.getOperand(i: 1)) |
632 | .add(MO: MI.getOperand(i: 2)) |
633 | .addUse(RegNo: SrcReg, Flags: getRegState(RegOp: SrcMI->getOperand(i: 1))) |
634 | .addImm(Val: 0); |
635 | |
636 | LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *INSvilaneMI << "\n" ); |
637 | (void)INSvilaneMI; |
638 | MI.eraseFromParent(); |
639 | return true; |
640 | } |
641 | |
642 | // All instructions that set a FPR64 will implicitly zero the top bits of the |
643 | // register. |
644 | static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, |
645 | MachineRegisterInfo *MRI) { |
646 | if (!MI->getOperand(i: 0).isReg() || !MI->getOperand(i: 0).isDef()) |
647 | return false; |
648 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: MI->getOperand(i: 0).getReg()); |
649 | if (RC != &AArch64::FPR64RegClass) |
650 | return false; |
651 | return MI->getOpcode() > TargetOpcode::GENERIC_OP_END; |
652 | } |
653 | |
654 | bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) { |
655 | // Check the MI for low 64-bits sets zero for high 64-bits implicitly. |
656 | // We are expecting below case. |
657 | // |
658 | // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr |
659 | // %6:fpr128 = IMPLICIT_DEF |
660 | // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub |
661 | // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0 |
662 | MachineInstr *Low64MI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 1).getReg()); |
663 | if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG) |
664 | return false; |
665 | Low64MI = MRI->getUniqueVRegDef(Reg: Low64MI->getOperand(i: 2).getReg()); |
666 | if (!Low64MI || !is64bitDefwithZeroHigh64bit(MI: Low64MI, MRI)) |
667 | return false; |
668 | |
669 | // Check there is `mov 0` MI for high 64-bits. |
670 | // We are expecting below cases. |
671 | // |
672 | // %2:fpr64 = MOVID 0 |
673 | // %4:fpr128 = IMPLICIT_DEF |
674 | // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub |
675 | // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0 |
676 | // or |
677 | // %5:fpr128 = MOVIv2d_ns 0 |
678 | // %6:fpr64 = COPY %5.dsub:fpr128 |
679 | // %8:fpr128 = IMPLICIT_DEF |
680 | // %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub |
681 | // %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0 |
682 | MachineInstr *High64MI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 3).getReg()); |
683 | if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG) |
684 | return false; |
685 | High64MI = MRI->getUniqueVRegDef(Reg: High64MI->getOperand(i: 2).getReg()); |
686 | if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY) |
687 | High64MI = MRI->getUniqueVRegDef(Reg: High64MI->getOperand(i: 1).getReg()); |
688 | if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID && |
689 | High64MI->getOpcode() != AArch64::MOVIv2d_ns)) |
690 | return false; |
691 | if (High64MI->getOperand(i: 1).getImm() != 0) |
692 | return false; |
693 | |
694 | // Let's remove MIs for high 64-bits. |
695 | Register OldDef = MI.getOperand(i: 0).getReg(); |
696 | Register NewDef = MI.getOperand(i: 1).getReg(); |
697 | MRI->constrainRegClass(Reg: NewDef, RC: MRI->getRegClass(Reg: OldDef)); |
698 | MRI->replaceRegWith(FromReg: OldDef, ToReg: NewDef); |
699 | MI.eraseFromParent(); |
700 | |
701 | return true; |
702 | } |
703 | |
704 | bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) { |
705 | // An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR. |
706 | MachineInstr *Low64MI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 1).getReg()); |
707 | if (!Low64MI || !is64bitDefwithZeroHigh64bit(MI: Low64MI, MRI)) |
708 | return false; |
709 | |
710 | // Let's remove MIs for high 64-bits. |
711 | Register OldDef = MI.getOperand(i: 0).getReg(); |
712 | Register NewDef = MI.getOperand(i: 1).getReg(); |
713 | LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n" ); |
714 | MRI->clearKillFlags(Reg: OldDef); |
715 | MRI->clearKillFlags(Reg: NewDef); |
716 | MRI->constrainRegClass(Reg: NewDef, RC: MRI->getRegClass(Reg: OldDef)); |
717 | MRI->replaceRegWith(FromReg: OldDef, ToReg: NewDef); |
718 | MI.eraseFromParent(); |
719 | |
720 | return true; |
721 | } |
722 | |
723 | bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) { |
724 | // Check if the instruction is equivalent to a 32 bit LSR or LSL alias of |
725 | // UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri. |
726 | int64_t Immr = MI.getOperand(i: 2).getImm(); |
727 | int64_t Imms = MI.getOperand(i: 3).getImm(); |
728 | |
729 | bool IsLSR = Imms == 31 && Immr <= Imms; |
730 | bool IsLSL = Immr == Imms + 33; |
731 | if (!IsLSR && !IsLSL) |
732 | return false; |
733 | |
734 | if (IsLSL) { |
735 | Immr -= 32; |
736 | } |
737 | |
738 | const TargetRegisterClass *DstRC64 = |
739 | TII->getRegClass(MCID: TII->get(Opcode: MI.getOpcode()), OpNum: 0, TRI, MF: *MI.getMF()); |
740 | const TargetRegisterClass *DstRC32 = |
741 | TRI->getSubRegisterClass(DstRC64, AArch64::sub_32); |
742 | assert(DstRC32 && "Destination register class of UBFMXri doesn't have a " |
743 | "sub_32 subregister class" ); |
744 | |
745 | const TargetRegisterClass *SrcRC64 = |
746 | TII->getRegClass(MCID: TII->get(Opcode: MI.getOpcode()), OpNum: 1, TRI, MF: *MI.getMF()); |
747 | const TargetRegisterClass *SrcRC32 = |
748 | TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32); |
749 | assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 " |
750 | "subregister class" ); |
751 | |
752 | Register DstReg64 = MI.getOperand(i: 0).getReg(); |
753 | Register DstReg32 = MRI->createVirtualRegister(RegClass: DstRC32); |
754 | Register SrcReg64 = MI.getOperand(i: 1).getReg(); |
755 | Register SrcReg32 = MRI->createVirtualRegister(RegClass: SrcRC32); |
756 | |
757 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::COPY), |
758 | DestReg: SrcReg32) |
759 | .addReg(RegNo: SrcReg64, flags: 0, SubReg: AArch64::sub_32); |
760 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::UBFMWri), |
761 | DestReg: DstReg32) |
762 | .addReg(RegNo: SrcReg32) |
763 | .addImm(Val: Immr) |
764 | .addImm(Val: Imms); |
765 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), |
766 | MCID: TII->get(Opcode: AArch64::SUBREG_TO_REG), DestReg: DstReg64) |
767 | .addImm(Val: 0) |
768 | .addReg(RegNo: DstReg32) |
769 | .addImm(Val: AArch64::sub_32); |
770 | MI.eraseFromParent(); |
771 | return true; |
772 | } |
773 | |
774 | // Across a basic-block we might have in i32 extract from a value that only |
775 | // operates on upper bits (for example a sxtw). We can replace the COPY with a |
776 | // new version skipping the sxtw. |
777 | bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) { |
778 | Register InputReg = MI.getOperand(i: 1).getReg(); |
779 | if (MI.getOperand(i: 1).getSubReg() != AArch64::sub_32 || |
780 | !MRI->hasOneNonDBGUse(RegNo: InputReg)) |
781 | return false; |
782 | |
783 | MachineInstr *SrcMI = MRI->getUniqueVRegDef(Reg: InputReg); |
784 | SmallPtrSet<MachineInstr *, 4> DeadInstrs; |
785 | DeadInstrs.insert(Ptr: SrcMI); |
786 | while (SrcMI && SrcMI->isFullCopy() && |
787 | MRI->hasOneNonDBGUse(RegNo: SrcMI->getOperand(i: 1).getReg())) { |
788 | SrcMI = MRI->getUniqueVRegDef(Reg: SrcMI->getOperand(i: 1).getReg()); |
789 | DeadInstrs.insert(Ptr: SrcMI); |
790 | } |
791 | |
792 | if (!SrcMI) |
793 | return false; |
794 | |
795 | // Look for SXTW(X) and return Reg. |
796 | auto getSXTWSrcReg = [](MachineInstr *SrcMI) -> Register { |
797 | if (SrcMI->getOpcode() != AArch64::SBFMXri || |
798 | SrcMI->getOperand(i: 2).getImm() != 0 || |
799 | SrcMI->getOperand(i: 3).getImm() != 31) |
800 | return AArch64::NoRegister; |
801 | return SrcMI->getOperand(i: 1).getReg(); |
802 | }; |
803 | // Look for SUBREG_TO_REG(ORRWrr(WZR, COPY(X.sub_32))) |
804 | auto getUXTWSrcReg = [&](MachineInstr *SrcMI) -> Register { |
805 | if (SrcMI->getOpcode() != AArch64::SUBREG_TO_REG || |
806 | SrcMI->getOperand(i: 3).getImm() != AArch64::sub_32 || |
807 | !MRI->hasOneNonDBGUse(RegNo: SrcMI->getOperand(i: 2).getReg())) |
808 | return AArch64::NoRegister; |
809 | MachineInstr *Orr = MRI->getUniqueVRegDef(Reg: SrcMI->getOperand(i: 2).getReg()); |
810 | if (!Orr || Orr->getOpcode() != AArch64::ORRWrr || |
811 | Orr->getOperand(i: 1).getReg() != AArch64::WZR || |
812 | !MRI->hasOneNonDBGUse(RegNo: Orr->getOperand(i: 2).getReg())) |
813 | return AArch64::NoRegister; |
814 | MachineInstr *Cpy = MRI->getUniqueVRegDef(Reg: Orr->getOperand(i: 2).getReg()); |
815 | if (!Cpy || Cpy->getOpcode() != AArch64::COPY || |
816 | Cpy->getOperand(i: 1).getSubReg() != AArch64::sub_32) |
817 | return AArch64::NoRegister; |
818 | DeadInstrs.insert(Ptr: Orr); |
819 | return Cpy->getOperand(i: 1).getReg(); |
820 | }; |
821 | |
822 | Register SrcReg = getSXTWSrcReg(SrcMI); |
823 | if (!SrcReg) |
824 | SrcReg = getUXTWSrcReg(SrcMI); |
825 | if (!SrcReg) |
826 | return false; |
827 | |
828 | MRI->constrainRegClass(Reg: SrcReg, RC: MRI->getRegClass(Reg: InputReg)); |
829 | LLVM_DEBUG(dbgs() << "Optimizing: " << MI); |
830 | MI.getOperand(i: 1).setReg(SrcReg); |
831 | LLVM_DEBUG(dbgs() << " to: " << MI); |
832 | for (auto *DeadMI : DeadInstrs) { |
833 | LLVM_DEBUG(dbgs() << " Removing: " << *DeadMI); |
834 | DeadMI->eraseFromParent(); |
835 | } |
836 | return true; |
837 | } |
838 | |
839 | bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { |
840 | if (skipFunction(F: MF.getFunction())) |
841 | return false; |
842 | |
843 | TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); |
844 | TRI = static_cast<const AArch64RegisterInfo *>( |
845 | MF.getSubtarget().getRegisterInfo()); |
846 | MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); |
847 | MRI = &MF.getRegInfo(); |
848 | |
849 | assert(MRI->isSSA() && "Expected to be run on SSA form!" ); |
850 | |
851 | bool Changed = false; |
852 | |
853 | for (MachineBasicBlock &MBB : MF) { |
854 | for (MachineInstr &MI : make_early_inc_range(Range&: MBB)) { |
855 | switch (MI.getOpcode()) { |
856 | default: |
857 | break; |
858 | case AArch64::INSERT_SUBREG: |
859 | Changed |= visitINSERT(MI); |
860 | break; |
861 | case AArch64::ANDWrr: |
862 | Changed |= visitAND<uint32_t>(Opc: AArch64::ANDWri, MI); |
863 | break; |
864 | case AArch64::ANDXrr: |
865 | Changed |= visitAND<uint64_t>(Opc: AArch64::ANDXri, MI); |
866 | break; |
867 | case AArch64::ORRWrs: |
868 | Changed |= visitORR(MI); |
869 | break; |
870 | case AArch64::ADDWrr: |
871 | Changed |= visitADDSUB<uint32_t>(PosOpc: AArch64::ADDWri, NegOpc: AArch64::SUBWri, MI); |
872 | break; |
873 | case AArch64::SUBWrr: |
874 | Changed |= visitADDSUB<uint32_t>(PosOpc: AArch64::SUBWri, NegOpc: AArch64::ADDWri, MI); |
875 | break; |
876 | case AArch64::ADDXrr: |
877 | Changed |= visitADDSUB<uint64_t>(PosOpc: AArch64::ADDXri, NegOpc: AArch64::SUBXri, MI); |
878 | break; |
879 | case AArch64::SUBXrr: |
880 | Changed |= visitADDSUB<uint64_t>(PosOpc: AArch64::SUBXri, NegOpc: AArch64::ADDXri, MI); |
881 | break; |
882 | case AArch64::ADDSWrr: |
883 | Changed |= |
884 | visitADDSSUBS<uint32_t>(PosOpcs: {AArch64::ADDWri, AArch64::ADDSWri}, |
885 | NegOpcs: {AArch64::SUBWri, AArch64::SUBSWri}, MI); |
886 | break; |
887 | case AArch64::SUBSWrr: |
888 | Changed |= |
889 | visitADDSSUBS<uint32_t>(PosOpcs: {AArch64::SUBWri, AArch64::SUBSWri}, |
890 | NegOpcs: {AArch64::ADDWri, AArch64::ADDSWri}, MI); |
891 | break; |
892 | case AArch64::ADDSXrr: |
893 | Changed |= |
894 | visitADDSSUBS<uint64_t>(PosOpcs: {AArch64::ADDXri, AArch64::ADDSXri}, |
895 | NegOpcs: {AArch64::SUBXri, AArch64::SUBSXri}, MI); |
896 | break; |
897 | case AArch64::SUBSXrr: |
898 | Changed |= |
899 | visitADDSSUBS<uint64_t>(PosOpcs: {AArch64::SUBXri, AArch64::SUBSXri}, |
900 | NegOpcs: {AArch64::ADDXri, AArch64::ADDSXri}, MI); |
901 | break; |
902 | case AArch64::CSELWr: |
903 | case AArch64::CSELXr: |
904 | Changed |= visitCSEL(MI); |
905 | break; |
906 | case AArch64::INSvi64gpr: |
907 | Changed |= visitINSviGPR(MI, Opc: AArch64::INSvi64lane); |
908 | break; |
909 | case AArch64::INSvi32gpr: |
910 | Changed |= visitINSviGPR(MI, Opc: AArch64::INSvi32lane); |
911 | break; |
912 | case AArch64::INSvi16gpr: |
913 | Changed |= visitINSviGPR(MI, Opc: AArch64::INSvi16lane); |
914 | break; |
915 | case AArch64::INSvi8gpr: |
916 | Changed |= visitINSviGPR(MI, Opc: AArch64::INSvi8lane); |
917 | break; |
918 | case AArch64::INSvi64lane: |
919 | Changed |= visitINSvi64lane(MI); |
920 | break; |
921 | case AArch64::FMOVDr: |
922 | Changed |= visitFMOVDr(MI); |
923 | break; |
924 | case AArch64::UBFMXri: |
925 | Changed |= visitUBFMXri(MI); |
926 | break; |
927 | case AArch64::COPY: |
928 | Changed |= visitCopy(MI); |
929 | break; |
930 | } |
931 | } |
932 | } |
933 | |
934 | return Changed; |
935 | } |
936 | |
937 | FunctionPass *llvm::createAArch64MIPeepholeOptPass() { |
938 | return new AArch64MIPeepholeOpt(); |
939 | } |
940 | |