1//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs below peephole optimizations on MIR level.
10//
11// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
12// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
13//
14// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15// MOVi64imm + ADDXrr ==> ADDXri + ADDXri
16//
17// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18// MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19//
20// The mov pseudo instruction could be expanded to multiple mov instructions
21// later. In this case, we could try to split the constant operand of mov
22// instruction into two immediates which can be directly encoded into
23// *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24// multiple `mov` + `and/add/sub` instructions.
25//
26// 4. Remove redundant ORRWrs which is generated by zero-extend.
27//
28// %3:gpr32 = ORRWrs $wzr, %2, 0
29// %4:gpr64 = SUBREG_TO_REG %3, %subreg.sub_32
30//
31// If AArch64's 32-bit form of instruction defines the source operand of
32// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33// operand are set to zero.
34//
35// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
36// ==> %reg:subidx = SUBREG_TO_REG %subreg, subidx
37//
38// 6. %intermediate:gpr32 = COPY %src:fpr128
39// %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32
40// ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0
41//
42// In cases where a source FPR is copied to a GPR in order to be copied
43// to a destination FPR, we can directly copy the values between the FPRs,
44// eliminating the use of the Integer unit. When we match a pattern of
45// INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR
46// source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr
47// instructions.
48//
49// 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high
50// 64-bits. For example,
51//
52// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
53// %2:fpr64 = MOVID 0
54// %4:fpr128 = IMPLICIT_DEF
55// %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), %2:fpr64, %subreg.dsub
56// %6:fpr128 = IMPLICIT_DEF
57// %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
58// %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, %3:fpr128, 0
59// ==>
60// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
61// %6:fpr128 = IMPLICIT_DEF
62// %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
63//
64// 8. Remove redundant CSELs that select between identical registers, by
65// replacing them with unconditional moves.
66//
67// 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit
68// LSR or LSL alias of UBFM.
69//
70//===----------------------------------------------------------------------===//
71
72#include "AArch64ExpandImm.h"
73#include "AArch64InstrInfo.h"
74#include "MCTargetDesc/AArch64AddressingModes.h"
75#include "llvm/CodeGen/MachineDominators.h"
76#include "llvm/CodeGen/MachineLoopInfo.h"
77
78using namespace llvm;
79
80#define DEBUG_TYPE "aarch64-mi-peephole-opt"
81
82namespace {
83
84struct AArch64MIPeepholeOpt : public MachineFunctionPass {
85 static char ID;
86
87 AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {}
88
89 const AArch64InstrInfo *TII;
90 const AArch64RegisterInfo *TRI;
91 MachineLoopInfo *MLI;
92 MachineRegisterInfo *MRI;
93
94 using OpcodePair = std::pair<unsigned, unsigned>;
95 template <typename T>
96 using SplitAndOpcFunc =
97 std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;
98 using BuildMIFunc =
99 std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
100 Register, Register, Register)>;
101
102 /// For instructions where an immediate operand could be split into two
103 /// separate immediate instructions, use the splitTwoPartImm two handle the
104 /// optimization.
105 ///
106 /// To implement, the following function types must be passed to
107 /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
108 /// splitting the immediate is valid and returns the associated new opcode. A
109 /// BuildMIFunc must be implemented to build the two immediate instructions.
110 ///
111 /// Example Pattern (where IMM would require 2+ MOV instructions):
112 /// %dst = <Instr>rr %src IMM [...]
113 /// becomes:
114 /// %tmp = <Instr>ri %src (encode half IMM) [...]
115 /// %dst = <Instr>ri %tmp (encode half IMM) [...]
116 template <typename T>
117 bool splitTwoPartImm(MachineInstr &MI,
118 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
119
120 bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
121 MachineInstr *&SubregToRegMI);
122
123 template <typename T>
124 bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
125 template <typename T>
126 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
127
128 // Strategy used to split logical immediate bitmasks.
129 enum class SplitStrategy {
130 Intersect,
131 Disjoint,
132 };
133 template <typename T>
134 bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
135 SplitStrategy Strategy, unsigned OtherOpc = 0);
136 bool visitORR(MachineInstr &MI);
137 bool visitCSEL(MachineInstr &MI);
138 bool visitINSERT(MachineInstr &MI);
139 bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
140 bool visitINSvi64lane(MachineInstr &MI);
141 bool visitFMOVDr(MachineInstr &MI);
142 bool visitUBFMXri(MachineInstr &MI);
143 bool visitCopy(MachineInstr &MI);
144 bool runOnMachineFunction(MachineFunction &MF) override;
145
146 StringRef getPassName() const override {
147 return "AArch64 MI Peephole Optimization pass";
148 }
149
150 void getAnalysisUsage(AnalysisUsage &AU) const override {
151 AU.setPreservesCFG();
152 AU.addRequired<MachineLoopInfoWrapperPass>();
153 MachineFunctionPass::getAnalysisUsage(AU);
154 }
155};
156
157char AArch64MIPeepholeOpt::ID = 0;
158
159} // end anonymous namespace
160
161INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
162 "AArch64 MI Peephole Optimization", false, false)
163
164template <typename T>
165static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
166 T UImm = static_cast<T>(Imm);
167 assert(UImm && (UImm != ~static_cast<T>(0)) && "Invalid immediate!");
168
169 // The bitmask immediate consists of consecutive ones. Let's say there is
170 // constant 0b00000000001000000000010000000000 which does not consist of
171 // consecutive ones. We can split it in to two bitmask immediate like
172 // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
173 // If we do AND with these two bitmask immediate, we can see original one.
174 unsigned LowestBitSet = llvm::countr_zero(UImm);
175 unsigned HighestBitSet = Log2_64(UImm);
176
177 // Create a mask which is filled with one from the position of lowest bit set
178 // to the position of highest bit set.
179 T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
180 (static_cast<T>(1) << LowestBitSet);
181 // Create a mask which is filled with one outside the position of lowest bit
182 // set and the position of highest bit set.
183 T NewImm2 = UImm | ~NewImm1;
184
185 // If the split value is not valid bitmask immediate, do not split this
186 // constant.
187 if (!AArch64_AM::isLogicalImmediate(imm: NewImm2, regSize: RegSize))
188 return false;
189
190 Imm1Enc = AArch64_AM::encodeLogicalImmediate(imm: NewImm1, regSize: RegSize);
191 Imm2Enc = AArch64_AM::encodeLogicalImmediate(imm: NewImm2, regSize: RegSize);
192 return true;
193}
194
195template <typename T>
196static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
197 T &Imm2Enc) {
198 assert(Imm && (Imm != ~static_cast<T>(0)) && "Invalid immediate!");
199
200 // Try to split a bitmask of the form 0b00000000011000000000011110000000 into
201 // two disjoint masks such as 0b00000000011000000000000000000000 and
202 // 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the
203 // new masks match the original mask.
204 unsigned LowestBitSet = llvm::countr_zero(Imm);
205 unsigned LowestGapBitUnset =
206 LowestBitSet + llvm::countr_one(Imm >> LowestBitSet);
207
208 // Create a mask for the least significant group of consecutive ones.
209 assert(LowestGapBitUnset < sizeof(T) * CHAR_BIT && "Undefined behaviour!");
210 T NewImm1 = (static_cast<T>(1) << LowestGapBitUnset) -
211 (static_cast<T>(1) << LowestBitSet);
212 // Create a disjoint mask for the remaining ones.
213 T NewImm2 = Imm & ~NewImm1;
214
215 // Do not split if NewImm2 is not a valid bitmask immediate.
216 if (!AArch64_AM::isLogicalImmediate(imm: NewImm2, regSize: RegSize))
217 return false;
218
219 Imm1Enc = AArch64_AM::encodeLogicalImmediate(imm: NewImm1, regSize: RegSize);
220 Imm2Enc = AArch64_AM::encodeLogicalImmediate(imm: NewImm2, regSize: RegSize);
221 return true;
222}
223
224template <typename T>
225bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
226 SplitStrategy Strategy,
227 unsigned OtherOpc) {
228 // Try below transformations.
229 //
230 // MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
231 // MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
232 //
233 // The mov pseudo instruction could be expanded to multiple mov instructions
234 // later. Let's try to split the constant operand of mov instruction into two
235 // bitmask immediates based on the given split strategy. It makes only two
236 // logical instructions instead of multiple mov + logic instructions.
237
238 return splitTwoPartImm<T>(
239 MI,
240 [Opc, Strategy, OtherOpc](T Imm, unsigned RegSize, T &Imm0,
241 T &Imm1) -> std::optional<OpcodePair> {
242 // If this immediate is already a suitable bitmask, don't split it.
243 // TODO: Should we just combine the two instructions in this case?
244 if (AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize))
245 return std::nullopt;
246
247 // If this immediate can be handled by one instruction, don't split it.
248 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
249 AArch64_IMM::expandMOVImm(Imm, BitSize: RegSize, Insn);
250 if (Insn.size() == 1)
251 return std::nullopt;
252
253 bool SplitSucc = false;
254 switch (Strategy) {
255 case SplitStrategy::Intersect:
256 SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1);
257 break;
258 case SplitStrategy::Disjoint:
259 SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1);
260 break;
261 }
262 if (SplitSucc)
263 return std::make_pair(x: Opc, y: !OtherOpc ? Opc : OtherOpc);
264 return std::nullopt;
265 },
266 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
267 unsigned Imm1, Register SrcReg, Register NewTmpReg,
268 Register NewDstReg) {
269 DebugLoc DL = MI.getDebugLoc();
270 MachineBasicBlock *MBB = MI.getParent();
271 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.first), DestReg: NewTmpReg)
272 .addReg(RegNo: SrcReg)
273 .addImm(Val: Imm0);
274 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.second), DestReg: NewDstReg)
275 .addReg(RegNo: NewTmpReg)
276 .addImm(Val: Imm1);
277 });
278}
279
280bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
281 // Check this ORR comes from below zero-extend pattern.
282 //
283 // def : Pat<(i64 (zext GPR32:$src)),
284 // (SUBREG_TO_REG (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
285 if (MI.getOperand(i: 3).getImm() != 0)
286 return false;
287
288 if (MI.getOperand(i: 1).getReg() != AArch64::WZR)
289 return false;
290
291 MachineInstr *SrcMI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 2).getReg());
292 if (!SrcMI)
293 return false;
294
295 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
296 //
297 // When you use the 32-bit form of an instruction, the upper 32 bits of the
298 // source registers are ignored and the upper 32 bits of the destination
299 // register are set to zero.
300 //
301 // If AArch64's 32-bit form of instruction defines the source operand of
302 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
303 // real AArch64 instruction and if it is not, do not process the opcode
304 // conservatively.
305 if (SrcMI->getOpcode() == TargetOpcode::COPY &&
306 SrcMI->getOperand(i: 1).getReg().isVirtual()) {
307 const TargetRegisterClass *RC =
308 MRI->getRegClass(Reg: SrcMI->getOperand(i: 1).getReg());
309
310 // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
311 // that the upper bits are zero.
312 if (RC != &AArch64::FPR32RegClass &&
313 ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass &&
314 RC != &AArch64::ZPRRegClass) ||
315 SrcMI->getOperand(i: 1).getSubReg() != AArch64::ssub))
316 return false;
317 Register CpySrc;
318 if (SrcMI->getOperand(i: 1).getSubReg() == AArch64::ssub) {
319 CpySrc = MRI->createVirtualRegister(RegClass: &AArch64::FPR32RegClass);
320 BuildMI(BB&: *SrcMI->getParent(), I: SrcMI, MIMD: SrcMI->getDebugLoc(),
321 MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: CpySrc)
322 .add(MO: SrcMI->getOperand(i: 1));
323 } else {
324 CpySrc = SrcMI->getOperand(i: 1).getReg();
325 }
326 BuildMI(BB&: *SrcMI->getParent(), I: SrcMI, MIMD: SrcMI->getDebugLoc(),
327 MCID: TII->get(Opcode: AArch64::FMOVSWr), DestReg: SrcMI->getOperand(i: 0).getReg())
328 .addReg(RegNo: CpySrc);
329 SrcMI->eraseFromParent();
330 }
331 else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
332 return false;
333
334 Register DefReg = MI.getOperand(i: 0).getReg();
335 Register SrcReg = MI.getOperand(i: 2).getReg();
336 MRI->replaceRegWith(FromReg: DefReg, ToReg: SrcReg);
337 MRI->clearKillFlags(Reg: SrcReg);
338 LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
339 MI.eraseFromParent();
340
341 return true;
342}
343
344bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) {
345 // Replace CSEL with MOV when both inputs are the same register.
346 if (MI.getOperand(i: 1).getReg() != MI.getOperand(i: 2).getReg())
347 return false;
348
349 auto ZeroReg =
350 MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
351 auto OrOpcode =
352 MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
353
354 BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: OrOpcode))
355 .addReg(RegNo: MI.getOperand(i: 0).getReg(), Flags: RegState::Define)
356 .addReg(RegNo: ZeroReg)
357 .addReg(RegNo: MI.getOperand(i: 1).getReg())
358 .addImm(Val: 0);
359
360 MI.eraseFromParent();
361 return true;
362}
363
364bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
365 // Check this INSERT_SUBREG comes from below zero-extend pattern.
366 //
367 // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
368 // To %reg:subidx = SUBREG_TO_REG %subreg, subidx
369 //
370 // We're assuming the first operand to INSERT_SUBREG is irrelevant because a
371 // COPY would destroy the upper part of the register anyway
372 if (!MI.isRegTiedToDefOperand(UseOpIdx: 1))
373 return false;
374
375 Register DstReg = MI.getOperand(i: 0).getReg();
376 const TargetRegisterClass *RC = MRI->getRegClass(Reg: DstReg);
377 MachineInstr *SrcMI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 2).getReg());
378 if (!SrcMI)
379 return false;
380
381 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
382 //
383 // When you use the 32-bit form of an instruction, the upper 32 bits of the
384 // source registers are ignored and the upper 32 bits of the destination
385 // register are set to zero.
386 //
387 // If AArch64's 32-bit form of instruction defines the source operand of
388 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
389 // real AArch64 instruction and if it is not, do not process the opcode
390 // conservatively.
391 if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
392 !AArch64::GPR64allRegClass.hasSubClassEq(RC))
393 return false;
394
395 // Build a SUBREG_TO_REG instruction
396 MachineInstr *SubregMI =
397 BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(),
398 MCID: TII->get(Opcode: TargetOpcode::SUBREG_TO_REG), DestReg: DstReg)
399 .add(MO: MI.getOperand(i: 2))
400 .add(MO: MI.getOperand(i: 3));
401 LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n");
402 (void)SubregMI;
403 MI.eraseFromParent();
404
405 return true;
406}
407
408template <typename T>
409static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
410 // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
411 // imm0 and imm1 are non-zero 12-bit unsigned int.
412 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
413 (Imm & ~static_cast<T>(0xffffff)) != 0)
414 return false;
415
416 // The immediate can not be composed via a single instruction.
417 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
418 AArch64_IMM::expandMOVImm(Imm, BitSize: RegSize, Insn);
419 if (Insn.size() == 1)
420 return false;
421
422 // Split Imm into (Imm0 << 12) + Imm1;
423 Imm0 = (Imm >> 12) & 0xfff;
424 Imm1 = Imm & 0xfff;
425 return true;
426}
427
428template <typename T>
429bool AArch64MIPeepholeOpt::visitADDSUB(
430 unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
431 // Try below transformation.
432 //
433 // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri
434 // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri
435 //
436 // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri
437 // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri
438 //
439 // The mov pseudo instruction could be expanded to multiple mov instructions
440 // later. Let's try to split the constant operand of mov instruction into two
441 // legal add/sub immediates. It makes only two ADD/SUB instructions instead of
442 // multiple `mov` + `and/sub` instructions.
443
444 // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant
445 // folded. Make sure that we don't generate invalid instructions that use XZR
446 // in those cases.
447 if (MI.getOperand(i: 1).getReg() == AArch64::XZR ||
448 MI.getOperand(i: 1).getReg() == AArch64::WZR)
449 return false;
450
451 return splitTwoPartImm<T>(
452 MI,
453 [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
454 T &Imm1) -> std::optional<OpcodePair> {
455 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
456 return std::make_pair(x: PosOpc, y: PosOpc);
457 if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
458 return std::make_pair(x: NegOpc, y: NegOpc);
459 return std::nullopt;
460 },
461 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
462 unsigned Imm1, Register SrcReg, Register NewTmpReg,
463 Register NewDstReg) {
464 DebugLoc DL = MI.getDebugLoc();
465 MachineBasicBlock *MBB = MI.getParent();
466 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.first), DestReg: NewTmpReg)
467 .addReg(RegNo: SrcReg)
468 .addImm(Val: Imm0)
469 .addImm(Val: 12);
470 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.second), DestReg: NewDstReg)
471 .addReg(RegNo: NewTmpReg)
472 .addImm(Val: Imm1)
473 .addImm(Val: 0);
474 });
475}
476
477template <typename T>
478bool AArch64MIPeepholeOpt::visitADDSSUBS(
479 OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
480 // Try the same transformation as ADDSUB but with additional requirement
481 // that the condition code usages are only for Equal and Not Equal
482
483 if (MI.getOperand(i: 1).getReg() == AArch64::XZR ||
484 MI.getOperand(i: 1).getReg() == AArch64::WZR)
485 return false;
486
487 return splitTwoPartImm<T>(
488 MI,
489 [PosOpcs, NegOpcs, &MI, &TRI = TRI,
490 &MRI = MRI](T Imm, unsigned RegSize, T &Imm0,
491 T &Imm1) -> std::optional<OpcodePair> {
492 OpcodePair OP;
493 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
494 OP = PosOpcs;
495 else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
496 OP = NegOpcs;
497 else
498 return std::nullopt;
499 // Check conditional uses last since it is expensive for scanning
500 // proceeding instructions
501 MachineInstr &SrcMI = *MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 1).getReg());
502 std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(MI&: SrcMI, CmpInstr&: MI, TRI: *TRI);
503 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
504 return std::nullopt;
505 return OP;
506 },
507 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
508 unsigned Imm1, Register SrcReg, Register NewTmpReg,
509 Register NewDstReg) {
510 DebugLoc DL = MI.getDebugLoc();
511 MachineBasicBlock *MBB = MI.getParent();
512 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.first), DestReg: NewTmpReg)
513 .addReg(RegNo: SrcReg)
514 .addImm(Val: Imm0)
515 .addImm(Val: 12);
516 BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.second), DestReg: NewDstReg)
517 .addReg(RegNo: NewTmpReg)
518 .addImm(Val: Imm1)
519 .addImm(Val: 0);
520 });
521}
522
523// Checks if the corresponding MOV immediate instruction is applicable for
524// this peephole optimization.
525bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
526 MachineInstr *&MovMI,
527 MachineInstr *&SubregToRegMI) {
528 // Check whether current MBB is in loop and the AND is loop invariant.
529 MachineBasicBlock *MBB = MI.getParent();
530 MachineLoop *L = MLI->getLoopFor(BB: MBB);
531 if (L && !L->isLoopInvariant(I&: MI))
532 return false;
533
534 // Check whether current MI's operand is MOV with immediate.
535 MovMI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 2).getReg());
536 if (!MovMI)
537 return false;
538
539 // If it is SUBREG_TO_REG, check its operand.
540 SubregToRegMI = nullptr;
541 if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
542 SubregToRegMI = MovMI;
543 MovMI = MRI->getUniqueVRegDef(Reg: MovMI->getOperand(i: 1).getReg());
544 if (!MovMI)
545 return false;
546 }
547
548 if (MovMI->getOpcode() != AArch64::MOVi32imm &&
549 MovMI->getOpcode() != AArch64::MOVi64imm)
550 return false;
551
552 // If the MOV has multiple uses, do not split the immediate because it causes
553 // more instructions.
554 if (!MRI->hasOneUse(RegNo: MovMI->getOperand(i: 0).getReg()))
555 return false;
556 if (SubregToRegMI && !MRI->hasOneUse(RegNo: SubregToRegMI->getOperand(i: 0).getReg()))
557 return false;
558
559 // It is OK to perform this peephole optimization.
560 return true;
561}
562
563template <typename T>
564bool AArch64MIPeepholeOpt::splitTwoPartImm(
565 MachineInstr &MI,
566 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
567 unsigned RegSize = sizeof(T) * 8;
568 assert((RegSize == 32 || RegSize == 64) &&
569 "Invalid RegSize for legal immediate peephole optimization");
570
571 // Perform several essential checks against current MI.
572 MachineInstr *MovMI, *SubregToRegMI;
573 if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
574 return false;
575
576 // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
577 T Imm = static_cast<T>(MovMI->getOperand(i: 1).getImm()), Imm0, Imm1;
578 // For the 32 bit form of instruction, the upper 32 bits of the destination
579 // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
580 // of Imm to zero. This is essential if the Immediate value was a negative
581 // number since it was sign extended when we assign to the 64-bit Imm.
582 if (SubregToRegMI)
583 Imm &= 0xFFFFFFFF;
584 OpcodePair Opcode;
585 if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
586 Opcode = *R;
587 else
588 return false;
589
590 // Create new MIs using the first and second opcodes. Opcodes might differ for
591 // flag setting operations that should only set flags on second instruction.
592 // NewTmpReg = Opcode.first SrcReg Imm0
593 // NewDstReg = Opcode.second NewTmpReg Imm1
594
595 // Determine register classes for destinations and register operands
596 const TargetRegisterClass *FirstInstrDstRC =
597 TII->getRegClass(MCID: TII->get(Opcode: Opcode.first), OpNum: 0);
598 const TargetRegisterClass *FirstInstrOperandRC =
599 TII->getRegClass(MCID: TII->get(Opcode: Opcode.first), OpNum: 1);
600 const TargetRegisterClass *SecondInstrDstRC =
601 (Opcode.first == Opcode.second)
602 ? FirstInstrDstRC
603 : TII->getRegClass(MCID: TII->get(Opcode: Opcode.second), OpNum: 0);
604 const TargetRegisterClass *SecondInstrOperandRC =
605 (Opcode.first == Opcode.second)
606 ? FirstInstrOperandRC
607 : TII->getRegClass(MCID: TII->get(Opcode: Opcode.second), OpNum: 1);
608
609 // Get old registers destinations and new register destinations
610 Register DstReg = MI.getOperand(i: 0).getReg();
611 Register SrcReg = MI.getOperand(i: 1).getReg();
612 Register NewTmpReg = MRI->createVirtualRegister(RegClass: FirstInstrDstRC);
613 // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
614 // reuse that same destination register.
615 Register NewDstReg = DstReg.isVirtual()
616 ? MRI->createVirtualRegister(RegClass: SecondInstrDstRC)
617 : DstReg;
618
619 // Constrain registers based on their new uses
620 MRI->constrainRegClass(Reg: SrcReg, RC: FirstInstrOperandRC);
621 MRI->constrainRegClass(Reg: NewTmpReg, RC: SecondInstrOperandRC);
622 if (DstReg != NewDstReg)
623 MRI->constrainRegClass(Reg: NewDstReg, RC: MRI->getRegClass(Reg: DstReg));
624
625 // Call the delegating operation to build the instruction
626 BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
627
628 // replaceRegWith changes MI's definition register. Keep it for SSA form until
629 // deleting MI. Only if we made a new destination register.
630 if (DstReg != NewDstReg) {
631 MRI->replaceRegWith(FromReg: DstReg, ToReg: NewDstReg);
632 MI.getOperand(i: 0).setReg(DstReg);
633 }
634
635 // Record the MIs need to be removed.
636 MI.eraseFromParent();
637 if (SubregToRegMI)
638 SubregToRegMI->eraseFromParent();
639 MovMI->eraseFromParent();
640
641 return true;
642}
643
644bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
645 // Check if this INSvi[X]gpr comes from COPY of a source FPR128
646 //
647 // From
648 // %intermediate1:gpr64 = COPY %src:fpr128
649 // %intermediate2:gpr32 = COPY %intermediate1:gpr64
650 // %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32
651 // To
652 // %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128,
653 // src_index
654 // where src_index = 0, X = [8|16|32|64]
655
656 MachineInstr *SrcMI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 3).getReg());
657
658 // For a chain of COPY instructions, find the initial source register
659 // and check if it's an FPR128
660 while (true) {
661 if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY)
662 return false;
663
664 if (!SrcMI->getOperand(i: 1).getReg().isVirtual())
665 return false;
666
667 if (MRI->getRegClass(Reg: SrcMI->getOperand(i: 1).getReg()) ==
668 &AArch64::FPR128RegClass) {
669 break;
670 }
671 SrcMI = MRI->getUniqueVRegDef(Reg: SrcMI->getOperand(i: 1).getReg());
672 }
673
674 Register DstReg = MI.getOperand(i: 0).getReg();
675 Register SrcReg = SrcMI->getOperand(i: 1).getReg();
676 MachineInstr *INSvilaneMI =
677 BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc), DestReg: DstReg)
678 .add(MO: MI.getOperand(i: 1))
679 .add(MO: MI.getOperand(i: 2))
680 .addUse(RegNo: SrcReg, Flags: getRegState(RegOp: SrcMI->getOperand(i: 1)))
681 .addImm(Val: 0);
682
683 LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *INSvilaneMI << "\n");
684 (void)INSvilaneMI;
685 MI.eraseFromParent();
686 return true;
687}
688
689// All instructions that set a FPR64 will implicitly zero the top bits of the
690// register.
691static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI,
692 MachineRegisterInfo *MRI) {
693 if (!MI->getOperand(i: 0).isReg() || !MI->getOperand(i: 0).isDef())
694 return false;
695 const TargetRegisterClass *RC = MRI->getRegClass(Reg: MI->getOperand(i: 0).getReg());
696 if (RC != &AArch64::FPR64RegClass)
697 return false;
698 return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
699}
700
701bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {
702 // Check the MI for low 64-bits sets zero for high 64-bits implicitly.
703 // We are expecting below case.
704 //
705 // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
706 // %6:fpr128 = IMPLICIT_DEF
707 // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
708 // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
709 MachineInstr *Low64MI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 1).getReg());
710 if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)
711 return false;
712 Low64MI = MRI->getUniqueVRegDef(Reg: Low64MI->getOperand(i: 2).getReg());
713 if (!Low64MI || !is64bitDefwithZeroHigh64bit(MI: Low64MI, MRI))
714 return false;
715
716 // Check there is `mov 0` MI for high 64-bits.
717 // We are expecting below cases.
718 //
719 // %2:fpr64 = MOVID 0
720 // %4:fpr128 = IMPLICIT_DEF
721 // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
722 // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
723 // or
724 // %5:fpr128 = MOVIv2d_ns 0
725 // %6:fpr64 = COPY %5.dsub:fpr128
726 // %8:fpr128 = IMPLICIT_DEF
727 // %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub
728 // %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0
729 MachineInstr *High64MI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 3).getReg());
730 if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG)
731 return false;
732 High64MI = MRI->getUniqueVRegDef(Reg: High64MI->getOperand(i: 2).getReg());
733 if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY)
734 High64MI = MRI->getUniqueVRegDef(Reg: High64MI->getOperand(i: 1).getReg());
735 if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID &&
736 High64MI->getOpcode() != AArch64::MOVIv2d_ns))
737 return false;
738 if (High64MI->getOperand(i: 1).getImm() != 0)
739 return false;
740
741 // Let's remove MIs for high 64-bits.
742 Register OldDef = MI.getOperand(i: 0).getReg();
743 Register NewDef = MI.getOperand(i: 1).getReg();
744 MRI->constrainRegClass(Reg: NewDef, RC: MRI->getRegClass(Reg: OldDef));
745 MRI->replaceRegWith(FromReg: OldDef, ToReg: NewDef);
746 MI.eraseFromParent();
747
748 return true;
749}
750
751bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
752 // An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR.
753 MachineInstr *Low64MI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: 1).getReg());
754 if (!Low64MI || !is64bitDefwithZeroHigh64bit(MI: Low64MI, MRI))
755 return false;
756
757 // Let's remove MIs for high 64-bits.
758 Register OldDef = MI.getOperand(i: 0).getReg();
759 Register NewDef = MI.getOperand(i: 1).getReg();
760 LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n");
761 MRI->clearKillFlags(Reg: OldDef);
762 MRI->clearKillFlags(Reg: NewDef);
763 MRI->constrainRegClass(Reg: NewDef, RC: MRI->getRegClass(Reg: OldDef));
764 MRI->replaceRegWith(FromReg: OldDef, ToReg: NewDef);
765 MI.eraseFromParent();
766
767 return true;
768}
769
770bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) {
771 // Check if the instruction is equivalent to a 32 bit LSR or LSL alias of
772 // UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri.
773 int64_t Immr = MI.getOperand(i: 2).getImm();
774 int64_t Imms = MI.getOperand(i: 3).getImm();
775
776 bool IsLSR = Imms == 31 && Immr <= Imms;
777 bool IsLSL = Immr == Imms + 33;
778 if (!IsLSR && !IsLSL)
779 return false;
780
781 if (IsLSL) {
782 Immr -= 32;
783 }
784
785 const TargetRegisterClass *DstRC64 =
786 TII->getRegClass(MCID: TII->get(Opcode: MI.getOpcode()), OpNum: 0);
787 const TargetRegisterClass *DstRC32 =
788 TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
789 assert(DstRC32 && "Destination register class of UBFMXri doesn't have a "
790 "sub_32 subregister class");
791
792 const TargetRegisterClass *SrcRC64 =
793 TII->getRegClass(MCID: TII->get(Opcode: MI.getOpcode()), OpNum: 1);
794 const TargetRegisterClass *SrcRC32 =
795 TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
796 assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 "
797 "subregister class");
798
799 Register DstReg64 = MI.getOperand(i: 0).getReg();
800 Register DstReg32 = MRI->createVirtualRegister(RegClass: DstRC32);
801 Register SrcReg64 = MI.getOperand(i: 1).getReg();
802 Register SrcReg32 = MRI->createVirtualRegister(RegClass: SrcRC32);
803
804 BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::COPY),
805 DestReg: SrcReg32)
806 .addReg(RegNo: SrcReg64, Flags: {}, SubReg: AArch64::sub_32);
807 BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::UBFMWri),
808 DestReg: DstReg32)
809 .addReg(RegNo: SrcReg32)
810 .addImm(Val: Immr)
811 .addImm(Val: Imms);
812 BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(),
813 MCID: TII->get(Opcode: AArch64::SUBREG_TO_REG), DestReg: DstReg64)
814 .addReg(RegNo: DstReg32)
815 .addImm(Val: AArch64::sub_32);
816 MI.eraseFromParent();
817 return true;
818}
819
820// Across a basic-block we might have in i32 extract from a value that only
821// operates on upper bits (for example a sxtw). We can replace the COPY with a
822// new version skipping the sxtw.
823bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
824 Register InputReg = MI.getOperand(i: 1).getReg();
825 if (MI.getOperand(i: 1).getSubReg() != AArch64::sub_32 ||
826 !MRI->hasOneNonDBGUse(RegNo: InputReg))
827 return false;
828
829 MachineInstr *SrcMI = MRI->getUniqueVRegDef(Reg: InputReg);
830 SmallPtrSet<MachineInstr *, 4> DeadInstrs;
831 DeadInstrs.insert(Ptr: SrcMI);
832 while (SrcMI && SrcMI->isFullCopy() &&
833 MRI->hasOneNonDBGUse(RegNo: SrcMI->getOperand(i: 1).getReg())) {
834 SrcMI = MRI->getUniqueVRegDef(Reg: SrcMI->getOperand(i: 1).getReg());
835 DeadInstrs.insert(Ptr: SrcMI);
836 }
837
838 if (!SrcMI)
839 return false;
840
841 // Look for SXTW(X) and return Reg.
842 auto getSXTWSrcReg = [](MachineInstr *SrcMI) -> Register {
843 if (SrcMI->getOpcode() != AArch64::SBFMXri ||
844 SrcMI->getOperand(i: 2).getImm() != 0 ||
845 SrcMI->getOperand(i: 3).getImm() != 31)
846 return AArch64::NoRegister;
847 return SrcMI->getOperand(i: 1).getReg();
848 };
849 // Look for SUBREG_TO_REG(ORRWrr(WZR, COPY(X.sub_32)))
850 auto getUXTWSrcReg = [&](MachineInstr *SrcMI) -> Register {
851 if (SrcMI->getOpcode() != AArch64::SUBREG_TO_REG ||
852 SrcMI->getOperand(i: 2).getImm() != AArch64::sub_32 ||
853 !MRI->hasOneNonDBGUse(RegNo: SrcMI->getOperand(i: 1).getReg()))
854 return AArch64::NoRegister;
855 MachineInstr *Orr = MRI->getUniqueVRegDef(Reg: SrcMI->getOperand(i: 1).getReg());
856 if (!Orr || Orr->getOpcode() != AArch64::ORRWrr ||
857 Orr->getOperand(i: 1).getReg() != AArch64::WZR ||
858 !MRI->hasOneNonDBGUse(RegNo: Orr->getOperand(i: 2).getReg()))
859 return AArch64::NoRegister;
860 MachineInstr *Cpy = MRI->getUniqueVRegDef(Reg: Orr->getOperand(i: 2).getReg());
861 if (!Cpy || Cpy->getOpcode() != AArch64::COPY ||
862 Cpy->getOperand(i: 1).getSubReg() != AArch64::sub_32)
863 return AArch64::NoRegister;
864 DeadInstrs.insert(Ptr: Orr);
865 return Cpy->getOperand(i: 1).getReg();
866 };
867
868 Register SrcReg = getSXTWSrcReg(SrcMI);
869 if (!SrcReg)
870 SrcReg = getUXTWSrcReg(SrcMI);
871 if (!SrcReg)
872 return false;
873
874 MRI->constrainRegClass(Reg: SrcReg, RC: MRI->getRegClass(Reg: InputReg));
875 LLVM_DEBUG(dbgs() << "Optimizing: " << MI);
876 MI.getOperand(i: 1).setReg(SrcReg);
877 LLVM_DEBUG(dbgs() << " to: " << MI);
878 for (auto *DeadMI : DeadInstrs) {
879 LLVM_DEBUG(dbgs() << " Removing: " << *DeadMI);
880 DeadMI->eraseFromParent();
881 }
882 return true;
883}
884
885bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
886 if (skipFunction(F: MF.getFunction()))
887 return false;
888
889 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
890 TRI = static_cast<const AArch64RegisterInfo *>(
891 MF.getSubtarget().getRegisterInfo());
892 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
893 MRI = &MF.getRegInfo();
894
895 assert(MRI->isSSA() && "Expected to be run on SSA form!");
896
897 bool Changed = false;
898
899 for (MachineBasicBlock &MBB : MF) {
900 for (MachineInstr &MI : make_early_inc_range(Range&: MBB)) {
901 switch (MI.getOpcode()) {
902 default:
903 break;
904 case AArch64::INSERT_SUBREG:
905 Changed |= visitINSERT(MI);
906 break;
907 case AArch64::ANDWrr:
908 Changed |= trySplitLogicalImm<uint32_t>(Opc: AArch64::ANDWri, MI,
909 Strategy: SplitStrategy::Intersect);
910 break;
911 case AArch64::ANDXrr:
912 Changed |= trySplitLogicalImm<uint64_t>(Opc: AArch64::ANDXri, MI,
913 Strategy: SplitStrategy::Intersect);
914 break;
915 case AArch64::ANDSWrr:
916 Changed |= trySplitLogicalImm<uint32_t>(
917 Opc: AArch64::ANDWri, MI, Strategy: SplitStrategy::Intersect, OtherOpc: AArch64::ANDSWri);
918 break;
919 case AArch64::ANDSXrr:
920 Changed |= trySplitLogicalImm<uint64_t>(
921 Opc: AArch64::ANDXri, MI, Strategy: SplitStrategy::Intersect, OtherOpc: AArch64::ANDSXri);
922 break;
923 case AArch64::EORWrr:
924 Changed |= trySplitLogicalImm<uint32_t>(Opc: AArch64::EORWri, MI,
925 Strategy: SplitStrategy::Disjoint);
926 break;
927 case AArch64::EORXrr:
928 Changed |= trySplitLogicalImm<uint64_t>(Opc: AArch64::EORXri, MI,
929 Strategy: SplitStrategy::Disjoint);
930 break;
931 case AArch64::ORRWrr:
932 Changed |= trySplitLogicalImm<uint32_t>(Opc: AArch64::ORRWri, MI,
933 Strategy: SplitStrategy::Disjoint);
934 break;
935 case AArch64::ORRXrr:
936 Changed |= trySplitLogicalImm<uint64_t>(Opc: AArch64::ORRXri, MI,
937 Strategy: SplitStrategy::Disjoint);
938 break;
939 case AArch64::ORRWrs:
940 Changed |= visitORR(MI);
941 break;
942 case AArch64::ADDWrr:
943 Changed |= visitADDSUB<uint32_t>(PosOpc: AArch64::ADDWri, NegOpc: AArch64::SUBWri, MI);
944 break;
945 case AArch64::SUBWrr:
946 Changed |= visitADDSUB<uint32_t>(PosOpc: AArch64::SUBWri, NegOpc: AArch64::ADDWri, MI);
947 break;
948 case AArch64::ADDXrr:
949 Changed |= visitADDSUB<uint64_t>(PosOpc: AArch64::ADDXri, NegOpc: AArch64::SUBXri, MI);
950 break;
951 case AArch64::SUBXrr:
952 Changed |= visitADDSUB<uint64_t>(PosOpc: AArch64::SUBXri, NegOpc: AArch64::ADDXri, MI);
953 break;
954 case AArch64::ADDSWrr:
955 Changed |=
956 visitADDSSUBS<uint32_t>(PosOpcs: {AArch64::ADDWri, AArch64::ADDSWri},
957 NegOpcs: {AArch64::SUBWri, AArch64::SUBSWri}, MI);
958 break;
959 case AArch64::SUBSWrr:
960 Changed |=
961 visitADDSSUBS<uint32_t>(PosOpcs: {AArch64::SUBWri, AArch64::SUBSWri},
962 NegOpcs: {AArch64::ADDWri, AArch64::ADDSWri}, MI);
963 break;
964 case AArch64::ADDSXrr:
965 Changed |=
966 visitADDSSUBS<uint64_t>(PosOpcs: {AArch64::ADDXri, AArch64::ADDSXri},
967 NegOpcs: {AArch64::SUBXri, AArch64::SUBSXri}, MI);
968 break;
969 case AArch64::SUBSXrr:
970 Changed |=
971 visitADDSSUBS<uint64_t>(PosOpcs: {AArch64::SUBXri, AArch64::SUBSXri},
972 NegOpcs: {AArch64::ADDXri, AArch64::ADDSXri}, MI);
973 break;
974 case AArch64::CSELWr:
975 case AArch64::CSELXr:
976 Changed |= visitCSEL(MI);
977 break;
978 case AArch64::INSvi64gpr:
979 Changed |= visitINSviGPR(MI, Opc: AArch64::INSvi64lane);
980 break;
981 case AArch64::INSvi32gpr:
982 Changed |= visitINSviGPR(MI, Opc: AArch64::INSvi32lane);
983 break;
984 case AArch64::INSvi16gpr:
985 Changed |= visitINSviGPR(MI, Opc: AArch64::INSvi16lane);
986 break;
987 case AArch64::INSvi8gpr:
988 Changed |= visitINSviGPR(MI, Opc: AArch64::INSvi8lane);
989 break;
990 case AArch64::INSvi64lane:
991 Changed |= visitINSvi64lane(MI);
992 break;
993 case AArch64::FMOVDr:
994 Changed |= visitFMOVDr(MI);
995 break;
996 case AArch64::UBFMXri:
997 Changed |= visitUBFMXri(MI);
998 break;
999 case AArch64::COPY:
1000 Changed |= visitCopy(MI);
1001 break;
1002 }
1003 }
1004 }
1005
1006 return Changed;
1007}
1008
1009FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
1010 return new AArch64MIPeepholeOpt();
1011}
1012