AArch64MIPeepholeOpt.cpp source code [llvm_projects/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp]

1	//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass performs below peephole optimizations on MIR level.
10	//
11	// 1. MOVi32imm + (ANDS?\|EOR\|ORR)Wrr ==> (AND\|EOR\|ORR)Wri + (ANDS?\|EOR\|ORR)Wri
12	// MOVi64imm + (ANDS?\|EOR\|ORR)Xrr ==> (AND\|EOR\|ORR)Xri + (ANDS?\|EOR\|ORR)Xri
13	//
14	// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15	// MOVi64imm + ADDXrr ==> ADDXri + ADDXri
16	//
17	// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18	// MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19	//
20	// The mov pseudo instruction could be expanded to multiple mov instructions
21	// later. In this case, we could try to split the constant operand of mov
22	// instruction into two immediates which can be directly encoded into
23	// Wri/Xri instructions. It makes two AND/ADD/SUB instructions instead of
24	// multiple `mov` + `and/add/sub` instructions.
25	//
26	// 4. Remove redundant ORRWrs which is generated by zero-extend.
27	//
28	// %3:gpr32 = ORRWrs $wzr, %2, 0
29	// %4:gpr64 = SUBREG_TO_REG %3, %subreg.sub_32
30	//
31	// If AArch64's 32-bit form of instruction defines the source operand of
32	// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33	// operand are set to zero.
34	//
35	// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
36	// ==> %reg:subidx = SUBREG_TO_REG %subreg, subidx
37	//
38	// 6. %intermediate:gpr32 = COPY %src:fpr128
39	// %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32
40	// ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0
41	//
42	// In cases where a source FPR is copied to a GPR in order to be copied
43	// to a destination FPR, we can directly copy the values between the FPRs,
44	// eliminating the use of the Integer unit. When we match a pattern of
45	// INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR
46	// source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr
47	// instructions.
48	//
49	// 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high
50	// 64-bits. For example,
51	//
52	// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
53	// %2:fpr64 = MOVID 0
54	// %4:fpr128 = IMPLICIT_DEF
55	// %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), %2:fpr64, %subreg.dsub
56	// %6:fpr128 = IMPLICIT_DEF
57	// %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
58	// %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, %3:fpr128, 0
59	// ==>
60	// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
61	// %6:fpr128 = IMPLICIT_DEF
62	// %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
63	//
64	// 8. Remove redundant CSELs that select between identical registers, by
65	// replacing them with unconditional moves.
66	//
67	// 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit
68	// LSR or LSL alias of UBFM.
69	//
70	//===----------------------------------------------------------------------===//
71
72	#include "AArch64ExpandImm.h"
73	#include "AArch64InstrInfo.h"
74	#include "MCTargetDesc/AArch64AddressingModes.h"
75	#include "llvm/CodeGen/MachineDominators.h"
76	#include "llvm/CodeGen/MachineLoopInfo.h"
77
78	using namespace llvm;
79
80	#define DEBUG_TYPE "aarch64-mi-peephole-opt"
81
82	namespace {
83
84	class AArch64MIPeepholeOptImpl {
85	public:
86	const AArch64InstrInfo *TII;
87	const AArch64RegisterInfo *TRI;
88	MachineLoopInfo *MLI;
89	MachineRegisterInfo *MRI;
90
91	explicit AArch64MIPeepholeOptImpl(MachineLoopInfo &MLI) : MLI(&MLI) {}
92
93	bool run(MachineFunction &MF);
94
95	private:
96	using OpcodePair = std::pair<unsigned, unsigned>;
97	template <typename T>
98	using SplitAndOpcFunc =
99	std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;
100	using BuildMIFunc =
101	std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
102	Register, Register, Register)>;
103
104	/// For instructions where an immediate operand could be split into two
105	/// separate immediate instructions, use the splitTwoPartImm two handle the
106	/// optimization.
107	///
108	/// To implement, the following function types must be passed to
109	/// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
110	/// splitting the immediate is valid and returns the associated new opcode. A
111	/// BuildMIFunc must be implemented to build the two immediate instructions.
112	///
113	/// Example Pattern (where IMM would require 2+ MOV instructions):
114	/// %dst = <Instr>rr %src IMM [...]
115	/// becomes:
116	/// %tmp = <Instr>ri %src (encode half IMM) [...]
117	/// %dst = <Instr>ri %tmp (encode half IMM) [...]
118	template <typename T>
119	bool splitTwoPartImm(MachineInstr &MI,
120	SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
121
122	bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
123	MachineInstr *&SubregToRegMI);
124
125	template <typename T>
126	bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
127	template <typename T>
128	bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
129
130	// Strategy used to split logical immediate bitmasks.
131	enum class SplitStrategy {
132	Intersect,
133	Disjoint,
134	};
135	template <typename T>
136	bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
137	SplitStrategy Strategy, unsigned OtherOpc = `0`);
138	bool visitORR(MachineInstr &MI);
139	bool visitCSEL(MachineInstr &MI);
140	bool visitINSERT(MachineInstr &MI);
141	bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
142	bool visitINSvi64lane(MachineInstr &MI);
143	bool visitFMOVDr(MachineInstr &MI);
144	bool visitUBFMXri(MachineInstr &MI);
145	bool visitCopy(MachineInstr &MI);
146	};
147
148	struct AArch64MIPeepholeOptLegacy : public MachineFunctionPass {
149	static char ID;
150
151	AArch64MIPeepholeOptLegacy() : MachineFunctionPass (ID) {}
152
153	bool runOnMachineFunction(MachineFunction &MF) override;
154
155	StringRef getPassName() const override {
156	return "AArch64 MI Peephole Optimization pass";
157	}
158
159	void getAnalysisUsage(AnalysisUsage &AU) const override {
160	AU.setPreservesCFG();
161	AU.addRequired<MachineLoopInfoWrapperPass>();
162	MachineFunctionPass::getAnalysisUsage(AU);
163	}
164	};
165
166	char AArch64MIPeepholeOptLegacy::ID = `0`;
167
168	} // end anonymous namespace
169
170	INITIALIZE_PASS(AArch64MIPeepholeOptLegacy, "aarch64-mi-peephole-opt",
171	"AArch64 MI Peephole Optimization", false, false)
172
173	template <typename T>
174	static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
175	T UImm = static_cast<T>(Imm);
176	assert(UImm && (UImm != ~static_cast<T>(`0`)) && "Invalid immediate!");
177
178	// The bitmask immediate consists of consecutive ones. Let's say there is
179	// constant 0b00000000001000000000010000000000 which does not consist of
180	// consecutive ones. We can split it in to two bitmask immediate like
181	// 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
182	// If we do AND with these two bitmask immediate, we can see original one.
183	unsigned LowestBitSet = llvm::countr_zero(UImm);
184	unsigned HighestBitSet = Log2_64(UImm);
185
186	// Create a mask which is filled with one from the position of lowest bit set
187	// to the position of highest bit set.
188	T NewImm1 = (static_cast<T>(`2`) << HighestBitSet) -
189	(static_cast<T>(`1`) << LowestBitSet);
190	// Create a mask which is filled with one outside the position of lowest bit
191	// set and the position of highest bit set.
192	T NewImm2 = UImm \| ~NewImm1;
193
194	// If the split value is not valid bitmask immediate, do not split this
195	// constant.
196	if (!AArch64_AM::isLogicalImmediate(imm: NewImm2, regSize: RegSize))
197	return false;
198
199	Imm1Enc = AArch64_AM::encodeLogicalImmediate(imm: NewImm1, regSize: RegSize);
200	Imm2Enc = AArch64_AM::encodeLogicalImmediate(imm: NewImm2, regSize: RegSize);
201	return true;
202	}
203
204	template <typename T>
205	static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
206	T &Imm2Enc) {
207	assert(Imm && (Imm != ~static_cast<T>(`0`)) && "Invalid immediate!");
208
209	// Try to split a bitmask of the form 0b00000000011000000000011110000000 into
210	// two disjoint masks such as 0b00000000011000000000000000000000 and
211	// 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the
212	// new masks match the original mask.
213	unsigned LowestBitSet = llvm::countr_zero(Imm);
214	unsigned LowestGapBitUnset =
215	LowestBitSet + llvm::countr_one(Imm >> LowestBitSet);
216
217	// Create a mask for the least significant group of consecutive ones.
218	assert(LowestGapBitUnset < sizeof(T) * CHAR_BIT && "Undefined behaviour!");
219	T NewImm1 = (static_cast<T>(`1`) << LowestGapBitUnset) -
220	(static_cast<T>(`1`) << LowestBitSet);
221	// Create a disjoint mask for the remaining ones.
222	T NewImm2 = Imm & ~NewImm1;
223
224	// Do not split if NewImm2 is not a valid bitmask immediate.
225	if (!AArch64_AM::isLogicalImmediate(imm: NewImm2, regSize: RegSize))
226	return false;
227
228	Imm1Enc = AArch64_AM::encodeLogicalImmediate(imm: NewImm1, regSize: RegSize);
229	Imm2Enc = AArch64_AM::encodeLogicalImmediate(imm: NewImm2, regSize: RegSize);
230	return true;
231	}
232
233	template <typename T>
234	bool AArch64MIPeepholeOptImpl::trySplitLogicalImm(unsigned Opc,
235	MachineInstr &MI,
236	SplitStrategy Strategy,
237	unsigned OtherOpc) {
238	// Try below transformations.
239	//
240	// MOVi32imm + (ANDS?\|EOR\|ORR)Wrr ==> (AND\|EOR\|ORR)Wri + (ANDS?\|EOR\|ORR)Wri
241	// MOVi64imm + (ANDS?\|EOR\|ORR)Xrr ==> (AND\|EOR\|ORR)Xri + (ANDS?\|EOR\|ORR)Xri
242	//
243	// The mov pseudo instruction could be expanded to multiple mov instructions
244	// later. Let's try to split the constant operand of mov instruction into two
245	// bitmask immediates based on the given split strategy. It makes only two
246	// logical instructions instead of multiple mov + logic instructions.
247
248	return splitTwoPartImm<T>(
249	MI,
250	[Opc, Strategy, OtherOpc](T Imm, unsigned RegSize, T &Imm0,
251	T &Imm1) -> std::optional<OpcodePair> {
252	// If this immediate is already a suitable bitmask, don't split it.
253	// TODO: Should we just combine the two instructions in this case?
254	if (AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize))
255	return std::nullopt;
256
257	// If this immediate can be handled by one instruction, don't split it.
258	SmallVector<AArch64_IMM::ImmInsnModel, `4`> Insn;
259	AArch64_IMM::expandMOVImm(Imm, BitSize: RegSize, Insn);
260	if (Insn.size() == `1`)
261	return std::nullopt;
262
263	bool SplitSucc = false;
264	switch (Strategy) {
265	case SplitStrategy::Intersect:
266	SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1);
267	break;
268	case SplitStrategy::Disjoint:
269	SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1);
270	break;
271	}
272	if (SplitSucc)
273	return std::make_pair(x: Opc, y: !OtherOpc ? Opc : OtherOpc);
274	return std::nullopt;
275	},
276	[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
277	unsigned Imm1, Register SrcReg, Register NewTmpReg,
278	Register NewDstReg) {
279	DebugLoc DL = MI.getDebugLoc();
280	MachineBasicBlock *MBB = MI.getParent();
281	BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.first), DestReg: NewTmpReg)
282	.addReg(RegNo: SrcReg)
283	.addImm(Val: Imm0);
284	BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.second), DestReg: NewDstReg)
285	.addReg(RegNo: NewTmpReg)
286	.addImm(Val: Imm1);
287	});
288	}
289
290	bool AArch64MIPeepholeOptImpl::visitORR(MachineInstr &MI) {
291	// Check this ORR comes from below zero-extend pattern.
292	//
293	// def : Pat<(i64 (zext GPR32:$src)),
294	// (SUBREG_TO_REG (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
295	if (MI.getOperand(i: `3`).getImm() != `0`)
296	return false;
297
298	if (MI.getOperand(i: `1`).getReg() != AArch64::WZR)
299	return false;
300
301	if (MI.getOperand(i: `2`).getSubReg())
302	return false;
303
304	MachineInstr *SrcMI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: `2`).getReg());
305	if (!SrcMI)
306	return false;
307
308	// From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
309	//
310	// When you use the 32-bit form of an instruction, the upper 32 bits of the
311	// source registers are ignored and the upper 32 bits of the destination
312	// register are set to zero.
313	//
314	// If AArch64's 32-bit form of instruction defines the source operand of
315	// zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
316	// real AArch64 instruction and if it is not, do not process the opcode
317	// conservatively.
318	if (SrcMI->getOpcode() == TargetOpcode::COPY &&
319	SrcMI->getOperand(i: `1`).getReg().isVirtual()) {
320	const TargetRegisterClass *RC =
321	MRI->getRegClass(Reg: SrcMI->getOperand(i: `1`).getReg());
322
323	// A COPY from an FPR will become a FMOVSWr, so do so now so that we know
324	// that the upper bits are zero.
325	if (RC != &AArch64::FPR32RegClass &&
326	((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass &&
327	RC != &AArch64::ZPRRegClass) \|\|
328	SrcMI->getOperand(i: `1`).getSubReg() != AArch64::ssub))
329	return false;
330	Register CpySrc;
331	if (SrcMI->getOperand(i: `1`).getSubReg() == AArch64::ssub) {
332	CpySrc = MRI->createVirtualRegister(RegClass: &AArch64::FPR32RegClass);
333	BuildMI(BB&: *SrcMI->getParent(), I: SrcMI, MIMD: SrcMI->getDebugLoc(),
334	MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: CpySrc)
335	.add(MO: SrcMI->getOperand(i: `1`));
336	} else {
337	CpySrc = SrcMI->getOperand(i: `1`).getReg();
338	}
339	BuildMI(BB&: *SrcMI->getParent(), I: SrcMI, MIMD: SrcMI->getDebugLoc(),
340	MCID: TII->get(Opcode: AArch64::FMOVSWr), DestReg: SrcMI->getOperand(i: `0`).getReg())
341	.addReg(RegNo: CpySrc);
342	SrcMI->eraseFromParent();
343	}
344	else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
345	return false;
346
347	Register DefReg = MI.getOperand(i: `0`).getReg();
348	Register SrcReg = MI.getOperand(i: `2`).getReg();
349	MRI->replaceRegWith(FromReg: DefReg, ToReg: SrcReg);
350	MRI->clearKillFlags(Reg: SrcReg);
351	LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
352	MI.eraseFromParent();
353
354	return true;
355	}
356
357	bool AArch64MIPeepholeOptImpl::visitCSEL(MachineInstr &MI) {
358	// Replace CSEL with MOV when both inputs are the same register.
359	if (MI.getOperand(i: `1`).getReg() != MI.getOperand(i: `2`).getReg())
360	return false;
361
362	auto ZeroReg =
363	MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
364	auto OrOpcode =
365	MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
366
367	BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: OrOpcode))
368	.addReg(RegNo: MI.getOperand(i: `0`).getReg(), Flags: RegState::Define)
369	.addReg(RegNo: ZeroReg)
370	.addReg(RegNo: MI.getOperand(i: `1`).getReg())
371	.addImm(Val: `0`);
372
373	MI.eraseFromParent();
374	return true;
375	}
376
377	bool AArch64MIPeepholeOptImpl::visitINSERT(MachineInstr &MI) {
378	// Check this INSERT_SUBREG comes from below zero-extend pattern.
379	//
380	// From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
381	// To %reg:subidx = SUBREG_TO_REG %subreg, subidx
382	//
383	// We're assuming the first operand to INSERT_SUBREG is irrelevant because a
384	// COPY would destroy the upper part of the register anyway
385	if (!MI.isRegTiedToDefOperand(UseOpIdx: `1`))
386	return false;
387
388	Register DstReg = MI.getOperand(i: `0`).getReg();
389	const TargetRegisterClass *RC = MRI->getRegClass(Reg: DstReg);
390	MachineInstr *SrcMI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: `2`).getReg());
391	if (!SrcMI)
392	return false;
393
394	// From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
395	//
396	// When you use the 32-bit form of an instruction, the upper 32 bits of the
397	// source registers are ignored and the upper 32 bits of the destination
398	// register are set to zero.
399	//
400	// If AArch64's 32-bit form of instruction defines the source operand of
401	// zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
402	// real AArch64 instruction and if it is not, do not process the opcode
403	// conservatively.
404	if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) \|\|
405	!AArch64::GPR64allRegClass.hasSubClassEq(RC))
406	return false;
407
408	// Build a SUBREG_TO_REG instruction
409	MachineInstr *SubregMI =
410	BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(),
411	MCID: TII->get(Opcode: TargetOpcode::SUBREG_TO_REG), DestReg: DstReg)
412	.add(MO: MI.getOperand(i: `2`))
413	.add(MO: MI.getOperand(i: `3`));
414	LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n");
415	(void)SubregMI;
416	MI.eraseFromParent();
417
418	return true;
419	}
420
421	template <typename T>
422	static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
423	// The immediate must be in the form of ((imm0 << 12) + imm1), in which both
424	// imm0 and imm1 are non-zero 12-bit unsigned int.
425	if ((Imm & `0xfff000`) == `0` \|\| (Imm & `0xfff`) == `0` \|\|
426	(Imm & ~static_cast<T>(`0xffffff`)) != `0`)
427	return false;
428
429	// The immediate can not be composed via a single instruction.
430	SmallVector<AArch64_IMM::ImmInsnModel, `4`> Insn;
431	AArch64_IMM::expandMOVImm(Imm, BitSize: RegSize, Insn);
432	if (Insn.size() == `1`)
433	return false;
434
435	// Split Imm into (Imm0 << 12) + Imm1;
436	Imm0 = (Imm >> `12`) & `0xfff`;
437	Imm1 = Imm & `0xfff`;
438	return true;
439	}
440
441	template <typename T>
442	bool AArch64MIPeepholeOptImpl::visitADDSUB(unsigned PosOpc, unsigned NegOpc,
443	MachineInstr &MI) {
444	// Try below transformation.
445	//
446	// ADDWrr X, MOVi32imm ==> ADDWri + ADDWri
447	// ADDXrr X, MOVi64imm ==> ADDXri + ADDXri
448	//
449	// SUBWrr X, MOVi32imm ==> SUBWri + SUBWri
450	// SUBXrr X, MOVi64imm ==> SUBXri + SUBXri
451	//
452	// The mov pseudo instruction could be expanded to multiple mov instructions
453	// later. Let's try to split the constant operand of mov instruction into two
454	// legal add/sub immediates. It makes only two ADD/SUB instructions instead of
455	// multiple `mov` + `and/sub` instructions.
456
457	// We can sometimes have ADDWrr WZR, MULi32imm that have not been constant
458	// folded. Make sure that we don't generate invalid instructions that use XZR
459	// in those cases.
460	if (MI.getOperand(i: `1`).getReg() == AArch64::XZR \|\|
461	MI.getOperand(i: `1`).getReg() == AArch64::WZR)
462	return false;
463
464	return splitTwoPartImm<T>(
465	MI,
466	[PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
467	T &Imm1) -> std::optional<OpcodePair> {
468	if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
469	return std::make_pair(x: PosOpc, y: PosOpc);
470	if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
471	return std::make_pair(x: NegOpc, y: NegOpc);
472	return std::nullopt;
473	},
474	[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
475	unsigned Imm1, Register SrcReg, Register NewTmpReg,
476	Register NewDstReg) {
477	DebugLoc DL = MI.getDebugLoc();
478	MachineBasicBlock *MBB = MI.getParent();
479	BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.first), DestReg: NewTmpReg)
480	.addReg(RegNo: SrcReg)
481	.addImm(Val: Imm0)
482	.addImm(Val: `12`);
483	BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.second), DestReg: NewDstReg)
484	.addReg(RegNo: NewTmpReg)
485	.addImm(Val: Imm1)
486	.addImm(Val: `0`);
487	});
488	}
489
490	template <typename T>
491	bool AArch64MIPeepholeOptImpl::visitADDSSUBS(OpcodePair PosOpcs,
492	OpcodePair NegOpcs,
493	MachineInstr &MI) {
494	// Try the same transformation as ADDSUB but with additional requirement
495	// that the condition code usages are only for Equal and Not Equal
496
497	if (MI.getOperand(i: `1`).getReg() == AArch64::XZR \|\|
498	MI.getOperand(i: `1`).getReg() == AArch64::WZR)
499	return false;
500
501	return splitTwoPartImm<T>(
502	MI,
503	[PosOpcs, NegOpcs, &MI, &TRI = TRI,
504	&MRI = MRI](T Imm, unsigned RegSize, T &Imm0,
505	T &Imm1) -> std::optional<OpcodePair> {
506	OpcodePair OP;
507	if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
508	OP = PosOpcs;
509	else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
510	OP = NegOpcs;
511	else
512	return std::nullopt;
513	// Check conditional uses last since it is expensive for scanning
514	// proceeding instructions
515	MachineInstr &SrcMI = *MRI->getUniqueVRegDef(Reg: MI.getOperand(i: `1`).getReg());
516	std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(MI&: SrcMI, CmpInstr&: MI, TRI: *TRI);
517	if (!NZCVUsed \|\| NZCVUsed ->C \|\| NZCVUsed ->V)
518	return std::nullopt;
519	return OP;
520	},
521	[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
522	unsigned Imm1, Register SrcReg, Register NewTmpReg,
523	Register NewDstReg) {
524	DebugLoc DL = MI.getDebugLoc();
525	MachineBasicBlock *MBB = MI.getParent();
526	BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.first), DestReg: NewTmpReg)
527	.addReg(RegNo: SrcReg)
528	.addImm(Val: Imm0)
529	.addImm(Val: `12`);
530	BuildMI(BB&: *MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: Opcode.second), DestReg: NewDstReg)
531	.addReg(RegNo: NewTmpReg)
532	.addImm(Val: Imm1)
533	.addImm(Val: `0`);
534	});
535	}
536
537	// Checks if the corresponding MOV immediate instruction is applicable for
538	// this peephole optimization.
539	bool AArch64MIPeepholeOptImpl::checkMovImmInstr(MachineInstr &MI,
540	MachineInstr *&MovMI,
541	MachineInstr *&SubregToRegMI) {
542	// Check whether current MBB is in loop and the AND is loop invariant.
543	MachineBasicBlock *MBB = MI.getParent();
544	MachineLoop *L = MLI->getLoopFor(BB: MBB);
545	if (L && !L->isLoopInvariant(I&: MI))
546	return false;
547
548	// Check whether current MI's operand is MOV with immediate.
549	MovMI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: `2`).getReg());
550	if (!MovMI)
551	return false;
552
553	// If it is SUBREG_TO_REG, check its operand.
554	SubregToRegMI = nullptr;
555	if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
556	SubregToRegMI = MovMI;
557	MovMI = MRI->getUniqueVRegDef(Reg: MovMI->getOperand(i: `1`).getReg());
558	if (!MovMI)
559	return false;
560	}
561
562	if (MovMI->getOpcode() != AArch64::MOVi32imm &&
563	MovMI->getOpcode() != AArch64::MOVi64imm)
564	return false;
565
566	// If the MOV has multiple uses, do not split the immediate because it causes
567	// more instructions.
568	if (!MRI->hasOneUse(RegNo: MovMI->getOperand(i: `0`).getReg()))
569	return false;
570	if (SubregToRegMI && !MRI->hasOneUse(RegNo: SubregToRegMI->getOperand(i: `0`).getReg()))
571	return false;
572
573	// It is OK to perform this peephole optimization.
574	return true;
575	}
576
577	template <typename T>
578	bool AArch64MIPeepholeOptImpl::splitTwoPartImm(MachineInstr &MI,
579	SplitAndOpcFunc<T> SplitAndOpc,
580	BuildMIFunc BuildInstr) {
581	unsigned RegSize = sizeof(T) * `8`;
582	assert((RegSize == `32` \|\| RegSize == `64`) &&
583	"Invalid RegSize for legal immediate peephole optimization");
584
585	// Perform several essential checks against current MI.
586	MachineInstr MovMI, SubregToRegMI;
587	if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
588	return false;
589
590	// Split the immediate to Imm0 and Imm1, and calculate the Opcode.
591	T Imm = static_cast<T>(MovMI->getOperand(i: `1`).getImm()), Imm0, Imm1;
592	// For the 32 bit form of instruction, the upper 32 bits of the destination
593	// register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
594	// of Imm to zero. This is essential if the Immediate value was a negative
595	// number since it was sign extended when we assign to the 64-bit Imm.
596	if (SubregToRegMI)
597	Imm &= `0xFFFFFFFF`;
598	OpcodePair Opcode;
599	if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
600	Opcode = *R;
601	else
602	return false;
603
604	// Create new MIs using the first and second opcodes. Opcodes might differ for
605	// flag setting operations that should only set flags on second instruction.
606	// NewTmpReg = Opcode.first SrcReg Imm0
607	// NewDstReg = Opcode.second NewTmpReg Imm1
608
609	// Determine register classes for destinations and register operands
610	const TargetRegisterClass *FirstInstrDstRC =
611	TII->getRegClass(MCID: TII->get(Opcode: Opcode.first), OpNum: `0`);
612	const TargetRegisterClass *FirstInstrOperandRC =
613	TII->getRegClass(MCID: TII->get(Opcode: Opcode.first), OpNum: `1`);
614	const TargetRegisterClass *SecondInstrDstRC =
615	(Opcode.first == Opcode.second)
616	? FirstInstrDstRC
617	: TII->getRegClass(MCID: TII->get(Opcode: Opcode.second), OpNum: `0`);
618	const TargetRegisterClass *SecondInstrOperandRC =
619	(Opcode.first == Opcode.second)
620	? FirstInstrOperandRC
621	: TII->getRegClass(MCID: TII->get(Opcode: Opcode.second), OpNum: `1`);
622
623	// Get old registers destinations and new register destinations
624	Register DstReg = MI.getOperand(i: `0`).getReg();
625	Register SrcReg = MI.getOperand(i: `1`).getReg();
626	Register NewTmpReg = MRI->createVirtualRegister(RegClass: FirstInstrDstRC);
627	// In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
628	// reuse that same destination register.
629	Register NewDstReg = DstReg.isVirtual()
630	? MRI->createVirtualRegister(RegClass: SecondInstrDstRC)
631	: DstReg;
632
633	// Constrain registers based on their new uses
634	MRI->constrainRegClass(Reg: SrcReg, RC: FirstInstrOperandRC);
635	MRI->constrainRegClass(Reg: NewTmpReg, RC: SecondInstrOperandRC);
636	if (DstReg != NewDstReg)
637	MRI->constrainRegClass(Reg: NewDstReg, RC: MRI->getRegClass(Reg: DstReg));
638
639	// Call the delegating operation to build the instruction
640	BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
641
642	// replaceRegWith changes MI's definition register. Keep it for SSA form until
643	// deleting MI. Only if we made a new destination register.
644	if (DstReg != NewDstReg) {
645	MRI->replaceRegWith(FromReg: DstReg, ToReg: NewDstReg);
646	MI.getOperand(i: `0`).setReg(DstReg);
647	}
648
649	// Record the MIs need to be removed.
650	MI.eraseFromParent();
651	if (SubregToRegMI)
652	SubregToRegMI->eraseFromParent();
653	MovMI->eraseFromParent();
654
655	return true;
656	}
657
658	bool AArch64MIPeepholeOptImpl::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
659	// Check if this INSvi[X]gpr comes from COPY of a source FPR128
660	//
661	// From
662	// %intermediate1:gpr64 = COPY %src:fpr128
663	// %intermediate2:gpr32 = COPY %intermediate1:gpr64
664	// %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32
665	// To
666	// %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128,
667	// src_index
668	// where src_index = 0, X = [8\|16\|32\|64]
669
670	MachineInstr *SrcMI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: `3`).getReg());
671
672	// For a chain of COPY instructions, find the initial source register
673	// and check if it's an FPR128
674	while (true) {
675	if (!SrcMI \|\| SrcMI->getOpcode() != TargetOpcode::COPY)
676	return false;
677
678	if (!SrcMI->getOperand(i: `1`).getReg().isVirtual())
679	return false;
680
681	if (MRI->getRegClass(Reg: SrcMI->getOperand(i: `1`).getReg()) ==
682	&AArch64::FPR128RegClass) {
683	break;
684	}
685	SrcMI = MRI->getUniqueVRegDef(Reg: SrcMI->getOperand(i: `1`).getReg());
686	}
687
688	Register DstReg = MI.getOperand(i: `0`).getReg();
689	Register SrcReg = SrcMI->getOperand(i: `1`).getReg();
690	MachineInstr *INSvilaneMI =
691	BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: Opc), DestReg: DstReg)
692	.add(MO: MI.getOperand(i: `1`))
693	.add(MO: MI.getOperand(i: `2`))
694	.addUse(RegNo: SrcReg, Flags: getRegState(RegOp: SrcMI->getOperand(i: `1`)))
695	.addImm(Val: `0`);
696
697	LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *INSvilaneMI << "\n");
698	(void)INSvilaneMI;
699	MI.eraseFromParent();
700	return true;
701	}
702
703	// All instructions that set a FPR64 will implicitly zero the top bits of the
704	// register. When the def is expressed as a COPY from a GPR, turn it into an
705	// explicit FMOV so it cannot be elided later in further passes.
706	static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI,
707	MachineRegisterInfo *MRI,
708	const AArch64InstrInfo *TII) {
709	if (!MI->getOperand(i: `0`).isReg() \|\| !MI->getOperand(i: `0`).isDef())
710	return false;
711	const TargetRegisterClass *RC = MRI->getRegClass(Reg: MI->getOperand(i: `0`).getReg());
712	if (RC != &AArch64::FPR64RegClass)
713	return false;
714	if (MI->getOpcode() == TargetOpcode::COPY) {
715	MachineOperand &SrcOp = MI->getOperand(i: `1`);
716	if (!SrcOp.isReg())
717	return false;
718	if (SrcOp.getSubReg())
719	return false;
720	Register SrcReg = SrcOp.getReg();
721	auto IsGPR64Like = [&]() -> bool {
722	if (SrcReg.isVirtual())
723	return AArch64::GPR64allRegClass.hasSubClassEq(
724	RC: MRI->getRegClass(Reg: SrcReg));
725	return AArch64::GPR64allRegClass.contains(Reg: SrcReg);
726	};
727	if (!IsGPR64Like ())
728	return false;
729	assert(TII && "Expected InstrInfo when materializing COPYs");
730	// FMOVXDr insists on strict GPR64 operands, so fix up the COPY source.
731	MachineOperand &SrcMO = MI->getOperand(i: `1`);
732	bool SrcKill = SrcMO.isKill();
733	if (SrcReg.isVirtual()) {
734	if (MRI->getRegClass(Reg: SrcReg) != &AArch64::GPR64RegClass) {
735	// Pass the value through a temporary GPR64 vreg to satisfy the
736	// verifier.
737	Register NewSrc = MRI->createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
738	BuildMI(BB&: *MI->getParent(), I: MI, MIMD: MI->getDebugLoc(),
739	MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: NewSrc)
740	.addReg(RegNo: SrcReg, Flags: getKillRegState(B: SrcKill));
741	SrcReg = NewSrc;
742	SrcKill = true;
743	}
744	} else if (!AArch64::GPR64RegClass.contains(Reg: SrcReg)) {
745	return false;
746	}
747	SrcMO.setReg(SrcReg);
748	SrcMO.setSubReg(`0`);
749	SrcMO.setIsKill(SrcKill);
750	// Replace the COPY with an explicit FMOV so the zeroing behaviour stays
751	// visible.
752	MI->setDesc(TII->get(Opcode: AArch64::FMOVXDr));
753	return true;
754	}
755	return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
756	}
757
758	bool AArch64MIPeepholeOptImpl::visitINSvi64lane(MachineInstr &MI) {
759	// Check the MI for low 64-bits sets zero for high 64-bits implicitly.
760	// We are expecting below case.
761	//
762	// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
763	// %6:fpr128 = IMPLICIT_DEF
764	// %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
765	// %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
766	MachineInstr *Low64MI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: `1`).getReg());
767	if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)
768	return false;
769	Low64MI = MRI->getUniqueVRegDef(Reg: Low64MI->getOperand(i: `2`).getReg());
770	if (!Low64MI \|\| !is64bitDefwithZeroHigh64bit(MI: Low64MI, MRI, TII))
771	return false;
772
773	// Check there is `mov 0` MI for high 64-bits.
774	// We are expecting below cases.
775	//
776	// %2:fpr64 = MOVID 0
777	// %4:fpr128 = IMPLICIT_DEF
778	// %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
779	// %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
780	// or
781	// %5:fpr128 = MOVIv2d_ns 0
782	// %6:fpr64 = COPY %5.dsub:fpr128
783	// %8:fpr128 = IMPLICIT_DEF
784	// %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub
785	// %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0
786	MachineInstr *High64MI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: `3`).getReg());
787	if (!High64MI \|\| High64MI->getOpcode() != AArch64::INSERT_SUBREG)
788	return false;
789	High64MI = MRI->getUniqueVRegDef(Reg: High64MI->getOperand(i: `2`).getReg());
790	if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY)
791	High64MI = MRI->getUniqueVRegDef(Reg: High64MI->getOperand(i: `1`).getReg());
792	if (!High64MI \|\| (High64MI->getOpcode() != AArch64::MOVID &&
793	High64MI->getOpcode() != AArch64::MOVIv2d_ns))
794	return false;
795	if (High64MI->getOperand(i: `1`).getImm() != `0`)
796	return false;
797
798	// Let's remove MIs for high 64-bits.
799	Register OldDef = MI.getOperand(i: `0`).getReg();
800	Register NewDef = MI.getOperand(i: `1`).getReg();
801	LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n");
802	MRI->constrainRegClass(Reg: NewDef, RC: MRI->getRegClass(Reg: OldDef));
803	MRI->replaceRegWith(FromReg: OldDef, ToReg: NewDef);
804	MRI->clearKillFlags(Reg: NewDef);
805	MI.eraseFromParent();
806
807	return true;
808	}
809
810	bool AArch64MIPeepholeOptImpl::visitFMOVDr(MachineInstr &MI) {
811	// An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR.
812	MachineInstr *Low64MI = MRI->getUniqueVRegDef(Reg: MI.getOperand(i: `1`).getReg());
813	if (!Low64MI \|\| !is64bitDefwithZeroHigh64bit(MI: Low64MI, MRI, TII))
814	return false;
815
816	// Let's remove MIs for high 64-bits.
817	Register OldDef = MI.getOperand(i: `0`).getReg();
818	Register NewDef = MI.getOperand(i: `1`).getReg();
819	LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n");
820	MRI->clearKillFlags(Reg: OldDef);
821	MRI->clearKillFlags(Reg: NewDef);
822	MRI->constrainRegClass(Reg: NewDef, RC: MRI->getRegClass(Reg: OldDef));
823	MRI->replaceRegWith(FromReg: OldDef, ToReg: NewDef);
824	MI.eraseFromParent();
825
826	return true;
827	}
828
829	bool AArch64MIPeepholeOptImpl::visitUBFMXri(MachineInstr &MI) {
830	// Check if the instruction is equivalent to a 32 bit LSR or LSL alias of
831	// UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri.
832	int64_t Immr = MI.getOperand(i: `2`).getImm();
833	int64_t Imms = MI.getOperand(i: `3`).getImm();
834
835	bool IsLSR = Imms == `31` && Immr <= Imms;
836	bool IsLSL = Immr == Imms + `33`;
837	if (!IsLSR && !IsLSL)
838	return false;
839
840	if (IsLSL) {
841	Immr -= `32`;
842	}
843
844	const TargetRegisterClass *DstRC64 =
845	TII->getRegClass(MCID: TII->get(Opcode: MI.getOpcode()), OpNum: `0`);
846	const TargetRegisterClass *DstRC32 =
847	TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
848	assert(DstRC32 && "Destination register class of UBFMXri doesn't have a "
849	"sub_32 subregister class");
850
851	const TargetRegisterClass *SrcRC64 =
852	TII->getRegClass(MCID: TII->get(Opcode: MI.getOpcode()), OpNum: `1`);
853	const TargetRegisterClass *SrcRC32 =
854	TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
855	assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 "
856	"subregister class");
857
858	Register DstReg64 = MI.getOperand(i: `0`).getReg();
859	Register DstReg32 = MRI->createVirtualRegister(RegClass: DstRC32);
860	Register SrcReg64 = MI.getOperand(i: `1`).getReg();
861	Register SrcReg32 = MRI->createVirtualRegister(RegClass: SrcRC32);
862
863	BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::COPY),
864	DestReg: SrcReg32)
865	.addReg(RegNo: SrcReg64, Flags: {}, SubReg: AArch64::sub_32);
866	BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::UBFMWri),
867	DestReg: DstReg32)
868	.addReg(RegNo: SrcReg32)
869	.addImm(Val: Immr)
870	.addImm(Val: Imms);
871	BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(),
872	MCID: TII->get(Opcode: AArch64::SUBREG_TO_REG), DestReg: DstReg64)
873	.addReg(RegNo: DstReg32)
874	.addImm(Val: AArch64::sub_32);
875	MI.eraseFromParent();
876	return true;
877	}
878
879	// Across a basic-block we might have in i32 extract from a value that only
880	// operates on upper bits (for example a sxtw). We can replace the COPY with a
881	// new version skipping the sxtw.
882	bool AArch64MIPeepholeOptImpl::visitCopy(MachineInstr &MI) {
883	Register InputReg = MI.getOperand(i: `1`).getReg();
884	if (MI.getOperand(i: `1`).getSubReg() != AArch64::sub_32 \|\|
885	!MRI->hasOneNonDBGUse(RegNo: InputReg))
886	return false;
887
888	MachineInstr *SrcMI = MRI->getUniqueVRegDef(Reg: InputReg);
889	SmallPtrSet<MachineInstr *, `4`> DeadInstrs;
890	DeadInstrs.insert(Ptr: SrcMI);
891	while (SrcMI && SrcMI->isFullCopy() &&
892	MRI->hasOneNonDBGUse(RegNo: SrcMI->getOperand(i: `1`).getReg())) {
893	SrcMI = MRI->getUniqueVRegDef(Reg: SrcMI->getOperand(i: `1`).getReg());
894	DeadInstrs.insert(Ptr: SrcMI);
895	}
896
897	if (!SrcMI)
898	return false;
899
900	// Look for SXTW(X) and return Reg.
901	auto getSXTWSrcReg = [](MachineInstr *SrcMI) -> Register {
902	if (SrcMI->getOpcode() != AArch64::SBFMXri \|\|
903	SrcMI->getOperand(i: `2`).getImm() != `0` \|\|
904	SrcMI->getOperand(i: `3`).getImm() != `31`)
905	return AArch64::NoRegister;
906	return SrcMI->getOperand(i: `1`).getReg();
907	};
908	// Look for SUBREG_TO_REG(ORRWrr(WZR, COPY(X.sub_32)))
909	auto getUXTWSrcReg = [&](MachineInstr *SrcMI) -> Register {
910	if (SrcMI->getOpcode() != AArch64::SUBREG_TO_REG \|\|
911	SrcMI->getOperand(i: `2`).getImm() != AArch64::sub_32 \|\|
912	!MRI->hasOneNonDBGUse(RegNo: SrcMI->getOperand(i: `1`).getReg()))
913	return AArch64::NoRegister;
914	MachineInstr *Orr = MRI->getUniqueVRegDef(Reg: SrcMI->getOperand(i: `1`).getReg());
915	if (!Orr \|\| Orr->getOpcode() != AArch64::ORRWrr \|\|
916	Orr->getOperand(i: `1`).getReg() != AArch64::WZR \|\|
917	!MRI->hasOneNonDBGUse(RegNo: Orr->getOperand(i: `2`).getReg()))
918	return AArch64::NoRegister;
919	MachineInstr *Cpy = MRI->getUniqueVRegDef(Reg: Orr->getOperand(i: `2`).getReg());
920	if (!Cpy \|\| Cpy->getOpcode() != AArch64::COPY \|\|
921	Cpy->getOperand(i: `1`).getSubReg() != AArch64::sub_32)
922	return AArch64::NoRegister;
923	DeadInstrs.insert(Ptr: Orr);
924	return Cpy->getOperand(i: `1`).getReg();
925	};
926
927	Register SrcReg = getSXTWSrcReg (SrcMI);
928	if (!SrcReg)
929	SrcReg = getUXTWSrcReg (SrcMI);
930	if (!SrcReg)
931	return false;
932
933	MRI->constrainRegClass(Reg: SrcReg, RC: MRI->getRegClass(Reg: InputReg));
934	LLVM_DEBUG(dbgs() << "Optimizing: " << MI);
935	MI.getOperand(i: `1`).setReg(SrcReg);
936	LLVM_DEBUG(dbgs() << " to: " << MI);
937	for (auto *DeadMI : DeadInstrs) {
938	LLVM_DEBUG(dbgs() << " Removing: " << *DeadMI);
939	DeadMI->eraseFromParent();
940	}
941	return true;
942	}
943
944	bool AArch64MIPeepholeOptImpl::run(MachineFunction &MF) {
945	TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
946	TRI = static_cast<const AArch64RegisterInfo *>(
947	MF.getSubtarget().getRegisterInfo());
948	MRI = &MF.getRegInfo();
949
950	assert(MRI->isSSA() && "Expected to be run on SSA form!");
951
952	bool Changed = false;
953
954	for (MachineBasicBlock &MBB : MF) {
955	for (MachineInstr &MI : make_early_inc_range(Range&: MBB)) {
956	switch (MI.getOpcode()) {
957	default:
958	break;
959	case AArch64::INSERT_SUBREG:
960	Changed \|= visitINSERT(MI);
961	break;
962	case AArch64::ANDWrr:
963	Changed \|= trySplitLogicalImm<uint32_t>(Opc: AArch64::ANDWri, MI,
964	Strategy: SplitStrategy::Intersect);
965	break;
966	case AArch64::ANDXrr:
967	Changed \|= trySplitLogicalImm<uint64_t>(Opc: AArch64::ANDXri, MI,
968	Strategy: SplitStrategy::Intersect);
969	break;
970	case AArch64::ANDSWrr:
971	Changed \|= trySplitLogicalImm<uint32_t>(
972	Opc: AArch64::ANDWri, MI, Strategy: SplitStrategy::Intersect, OtherOpc: AArch64::ANDSWri);
973	break;
974	case AArch64::ANDSXrr:
975	Changed \|= trySplitLogicalImm<uint64_t>(
976	Opc: AArch64::ANDXri, MI, Strategy: SplitStrategy::Intersect, OtherOpc: AArch64::ANDSXri);
977	break;
978	case AArch64::EORWrr:
979	Changed \|= trySplitLogicalImm<uint32_t>(Opc: AArch64::EORWri, MI,
980	Strategy: SplitStrategy::Disjoint);
981	break;
982	case AArch64::EORXrr:
983	Changed \|= trySplitLogicalImm<uint64_t>(Opc: AArch64::EORXri, MI,
984	Strategy: SplitStrategy::Disjoint);
985	break;
986	case AArch64::ORRWrr:
987	Changed \|= trySplitLogicalImm<uint32_t>(Opc: AArch64::ORRWri, MI,
988	Strategy: SplitStrategy::Disjoint);
989	break;
990	case AArch64::ORRXrr:
991	Changed \|= trySplitLogicalImm<uint64_t>(Opc: AArch64::ORRXri, MI,
992	Strategy: SplitStrategy::Disjoint);
993	break;
994	case AArch64::ORRWrs:
995	Changed \|= visitORR(MI);
996	break;
997	case AArch64::ADDWrr:
998	Changed \|= visitADDSUB<uint32_t>(PosOpc: AArch64::ADDWri, NegOpc: AArch64::SUBWri, MI);
999	break;
1000	case AArch64::SUBWrr:
1001	Changed \|= visitADDSUB<uint32_t>(PosOpc: AArch64::SUBWri, NegOpc: AArch64::ADDWri, MI);
1002	break;
1003	case AArch64::ADDXrr:
1004	Changed \|= visitADDSUB<uint64_t>(PosOpc: AArch64::ADDXri, NegOpc: AArch64::SUBXri, MI);
1005	break;
1006	case AArch64::SUBXrr:
1007	Changed \|= visitADDSUB<uint64_t>(PosOpc: AArch64::SUBXri, NegOpc: AArch64::ADDXri, MI);
1008	break;
1009	case AArch64::ADDSWrr:
1010	Changed \|=
1011	visitADDSSUBS<uint32_t>(PosOpcs: {AArch64::ADDWri, AArch64::ADDSWri},
1012	NegOpcs: {AArch64::SUBWri, AArch64::SUBSWri}, MI);
1013	break;
1014	case AArch64::SUBSWrr:
1015	Changed \|=
1016	visitADDSSUBS<uint32_t>(PosOpcs: {AArch64::SUBWri, AArch64::SUBSWri},
1017	NegOpcs: {AArch64::ADDWri, AArch64::ADDSWri}, MI);
1018	break;
1019	case AArch64::ADDSXrr:
1020	Changed \|=
1021	visitADDSSUBS<uint64_t>(PosOpcs: {AArch64::ADDXri, AArch64::ADDSXri},
1022	NegOpcs: {AArch64::SUBXri, AArch64::SUBSXri}, MI);
1023	break;
1024	case AArch64::SUBSXrr:
1025	Changed \|=
1026	visitADDSSUBS<uint64_t>(PosOpcs: {AArch64::SUBXri, AArch64::SUBSXri},
1027	NegOpcs: {AArch64::ADDXri, AArch64::ADDSXri}, MI);
1028	break;
1029	case AArch64::CSELWr:
1030	case AArch64::CSELXr:
1031	Changed \|= visitCSEL(MI);
1032	break;
1033	case AArch64::INSvi64gpr:
1034	Changed \|= visitINSviGPR(MI, Opc: AArch64::INSvi64lane);
1035	break;
1036	case AArch64::INSvi32gpr:
1037	Changed \|= visitINSviGPR(MI, Opc: AArch64::INSvi32lane);
1038	break;
1039	case AArch64::INSvi16gpr:
1040	Changed \|= visitINSviGPR(MI, Opc: AArch64::INSvi16lane);
1041	break;
1042	case AArch64::INSvi8gpr:
1043	Changed \|= visitINSviGPR(MI, Opc: AArch64::INSvi8lane);
1044	break;
1045	case AArch64::INSvi64lane:
1046	Changed \|= visitINSvi64lane(MI);
1047	break;
1048	case AArch64::FMOVDr:
1049	Changed \|= visitFMOVDr(MI);
1050	break;
1051	case AArch64::UBFMXri:
1052	Changed \|= visitUBFMXri(MI);
1053	break;
1054	case AArch64::COPY:
1055	Changed \|= visitCopy(MI);
1056	break;
1057	}
1058	}
1059	}
1060
1061	return Changed;
1062	}
1063
1064	bool AArch64MIPeepholeOptLegacy::runOnMachineFunction(MachineFunction &MF) {
1065	if (skipFunction(F: MF.getFunction()))
1066	return false;
1067
1068	MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1069	return AArch64MIPeepholeOptImpl (MLI).run(MF);
1070	}
1071
1072	FunctionPass *llvm::createAArch64MIPeepholeOptLegacyPass() {
1073	return new AArch64MIPeepholeOptLegacy ();
1074	}
1075
1076	PreservedAnalyses
1077	AArch64MIPeepholeOptPass::run(MachineFunction &MF,
1078	MachineFunctionAnalysisManager &MFAM) {
1079	MachineLoopInfo &MLI = MFAM.getResult<MachineLoopAnalysis>(IR&: MF);
1080	const bool Changed = AArch64MIPeepholeOptImpl (MLI).run(MF);
1081	if (!Changed)
1082	return PreservedAnalyses::all();
1083	PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
1084	PA.preserveSet<CFGAnalyses>();
1085	return PA;
1086	}
1087

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp