1//=- AArch64RedundantCopyElimination.cpp - Remove useless copy for AArch64 -=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// This pass removes unnecessary copies/moves in BBs based on a dominating
8// condition.
9//
10// We handle three cases:
11// 1. For BBs that are targets of CBZ/CBNZ instructions, we know the value of
12// the CBZ/CBNZ source register is zero on the taken/not-taken path. For
13// instance, the copy instruction in the code below can be removed because
14// the CBZW jumps to %bb.2 when w0 is zero.
15//
16// %bb.1:
17// cbz w0, .LBB0_2
18// .LBB0_2:
19// mov w0, wzr ; <-- redundant
20//
21// 2. If the flag setting instruction defines a register other than WZR/XZR, we
22// can remove a zero copy in some cases.
23//
24// %bb.0:
25// subs w0, w1, w2
26// str w0, [x1]
27// b.ne .LBB0_2
28// %bb.1:
29// mov w0, wzr ; <-- redundant
30// str w0, [x2]
31// .LBB0_2
32//
33// 3. Finally, if the flag setting instruction is a comparison against a
34// constant (i.e., ADDS[W|X]ri, SUBS[W|X]ri), we can remove a mov immediate
35// in some cases.
36//
37// %bb.0:
38// subs xzr, x0, #1
39// b.eq .LBB0_1
40// .LBB0_1:
41// orr x0, xzr, #0x1 ; <-- redundant
42//
43// This pass should be run after register allocation.
44//
45// FIXME: This could also be extended to check the whole dominance subtree below
46// the comparison if the compile time regression is acceptable.
47//
48// FIXME: Add support for handling CCMP instructions.
49// FIXME: If the known register value is zero, we should be able to rewrite uses
50// to use WZR/XZR directly in some cases.
51//===----------------------------------------------------------------------===//
52#include "AArch64.h"
53#include "AArch64InstrInfo.h"
54#include "llvm/ADT/SetVector.h"
55#include "llvm/ADT/Statistic.h"
56#include "llvm/ADT/iterator_range.h"
57#include "llvm/CodeGen/LiveRegUnits.h"
58#include "llvm/CodeGen/MachineFunctionPass.h"
59#include "llvm/CodeGen/MachineRegisterInfo.h"
60#include "llvm/Support/Debug.h"
61
62using namespace llvm;
63
64#define DEBUG_TYPE "aarch64-copyelim"
65
66STATISTIC(NumCopiesRemoved, "Number of copies removed.");
67
68namespace {
69class AArch64RedundantCopyEliminationImpl {
70public:
71 bool run(MachineFunction &MF);
72
73private:
74 const MachineRegisterInfo *MRI;
75 const TargetRegisterInfo *TRI;
76
77 // DomBBClobberedRegs is used when computing known values in the dominating
78 // BB.
79 LiveRegUnits DomBBClobberedRegs, DomBBUsedRegs;
80
81 // OptBBClobberedRegs is used when optimizing away redundant copies/moves.
82 LiveRegUnits OptBBClobberedRegs, OptBBUsedRegs;
83
84 struct RegImm {
85 MCPhysReg Reg;
86 int32_t Imm;
87 RegImm(MCPhysReg Reg, int32_t Imm) : Reg(Reg), Imm(Imm) {}
88 };
89
90 bool knownRegValInBlock(MachineInstr &CondBr, MachineBasicBlock *MBB,
91 SmallVectorImpl<RegImm> &KnownRegs,
92 MachineBasicBlock::iterator &FirstUse);
93 bool optimizeBlock(MachineBasicBlock *MBB);
94};
95
96class AArch64RedundantCopyEliminationLegacy : public MachineFunctionPass {
97public:
98 static char ID;
99 AArch64RedundantCopyEliminationLegacy() : MachineFunctionPass(ID) {}
100
101 bool runOnMachineFunction(MachineFunction &MF) override;
102
103 MachineFunctionProperties getRequiredProperties() const override {
104 return MachineFunctionProperties().setNoVRegs();
105 }
106 StringRef getPassName() const override {
107 return "AArch64 Redundant Copy Elimination";
108 }
109};
110char AArch64RedundantCopyEliminationLegacy::ID = 0;
111} // end anonymous namespace
112
113INITIALIZE_PASS(AArch64RedundantCopyEliminationLegacy, "aarch64-copyelim",
114 "AArch64 redundant copy elimination pass", false, false)
115
116/// It's possible to determine the value of a register based on a dominating
117/// condition. To do so, this function checks to see if the basic block \p MBB
118/// is the target of a conditional branch \p CondBr with an equality comparison.
119/// If the branch is a CBZ/CBNZ, we know the value of its source operand is zero
120/// in \p MBB for some cases. Otherwise, we find and inspect the NZCV setting
121/// instruction (e.g., SUBS, ADDS). If this instruction defines a register
122/// other than WZR/XZR, we know the value of the destination register is zero in
123/// \p MMB for some cases. In addition, if the NZCV setting instruction is
124/// comparing against a constant we know the other source register is equal to
125/// the constant in \p MBB for some cases. If we find any constant values, push
126/// a physical register and constant value pair onto the KnownRegs vector and
127/// return true. Otherwise, return false if no known values were found.
128bool AArch64RedundantCopyEliminationImpl::knownRegValInBlock(
129 MachineInstr &CondBr, MachineBasicBlock *MBB,
130 SmallVectorImpl<RegImm> &KnownRegs, MachineBasicBlock::iterator &FirstUse) {
131 unsigned Opc = CondBr.getOpcode();
132
133 // Check if the current basic block is the target block to which the
134 // CBZ/CBNZ instruction jumps when its Wt/Xt is zero.
135 if (((Opc == AArch64::CBZW || Opc == AArch64::CBZX) &&
136 MBB == CondBr.getOperand(i: 1).getMBB()) ||
137 ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) &&
138 MBB != CondBr.getOperand(i: 1).getMBB())) {
139 FirstUse = CondBr;
140 KnownRegs.push_back(Elt: RegImm(CondBr.getOperand(i: 0).getReg(), 0));
141 return true;
142 }
143
144 // Otherwise, must be a conditional branch.
145 if (Opc != AArch64::Bcc)
146 return false;
147
148 // Must be an equality check (i.e., == or !=).
149 AArch64CC::CondCode CC = (AArch64CC::CondCode)CondBr.getOperand(i: 0).getImm();
150 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
151 return false;
152
153 MachineBasicBlock *BrTarget = CondBr.getOperand(i: 1).getMBB();
154 if ((CC == AArch64CC::EQ && BrTarget != MBB) ||
155 (CC == AArch64CC::NE && BrTarget == MBB))
156 return false;
157
158 // Stop if we get to the beginning of PredMBB.
159 MachineBasicBlock *PredMBB = *MBB->pred_begin();
160 assert(PredMBB == CondBr.getParent() &&
161 "Conditional branch not in predecessor block!");
162 if (CondBr == PredMBB->begin())
163 return false;
164
165 // Registers clobbered in PredMBB between CondBr instruction and current
166 // instruction being checked in loop.
167 DomBBClobberedRegs.clear();
168 DomBBUsedRegs.clear();
169
170 // Find compare instruction that sets NZCV used by CondBr.
171 MachineBasicBlock::reverse_iterator RIt = CondBr.getReverseIterator();
172 for (MachineInstr &PredI : make_range(x: std::next(x: RIt), y: PredMBB->rend())) {
173
174 bool IsCMN = false;
175 switch (PredI.getOpcode()) {
176 default:
177 break;
178
179 // CMN is an alias for ADDS with a dead destination register.
180 case AArch64::ADDSWri:
181 case AArch64::ADDSXri:
182 IsCMN = true;
183 [[fallthrough]];
184 // CMP is an alias for SUBS with a dead destination register.
185 case AArch64::SUBSWri:
186 case AArch64::SUBSXri: {
187 // Sometimes the first operand is a FrameIndex. Bail if tht happens.
188 if (!PredI.getOperand(i: 1).isReg())
189 return false;
190 MCPhysReg DstReg = PredI.getOperand(i: 0).getReg();
191 MCPhysReg SrcReg = PredI.getOperand(i: 1).getReg();
192
193 bool Res = false;
194 // If we're comparing against a non-symbolic immediate and the source
195 // register of the compare is not modified (including a self-clobbering
196 // compare) between the compare and conditional branch we known the value
197 // of the 1st source operand.
198 if (PredI.getOperand(i: 2).isImm() && DomBBClobberedRegs.available(Reg: SrcReg) &&
199 SrcReg != DstReg) {
200 // We've found the instruction that sets NZCV.
201 int32_t KnownImm = PredI.getOperand(i: 2).getImm();
202 int32_t Shift = PredI.getOperand(i: 3).getImm();
203 KnownImm <<= Shift;
204 if (IsCMN)
205 KnownImm = -KnownImm;
206 FirstUse = PredI;
207 KnownRegs.push_back(Elt: RegImm(SrcReg, KnownImm));
208 Res = true;
209 }
210
211 // If this instructions defines something other than WZR/XZR, we know it's
212 // result is zero in some cases.
213 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
214 return Res;
215
216 // The destination register must not be modified between the NZCV setting
217 // instruction and the conditional branch.
218 if (!DomBBClobberedRegs.available(Reg: DstReg))
219 return Res;
220
221 FirstUse = PredI;
222 KnownRegs.push_back(Elt: RegImm(DstReg, 0));
223 return true;
224 }
225
226 // Look for NZCV setting instructions that define something other than
227 // WZR/XZR.
228 case AArch64::ADCSWr:
229 case AArch64::ADCSXr:
230 case AArch64::ADDSWrr:
231 case AArch64::ADDSWrs:
232 case AArch64::ADDSWrx:
233 case AArch64::ADDSXrr:
234 case AArch64::ADDSXrs:
235 case AArch64::ADDSXrx:
236 case AArch64::ADDSXrx64:
237 case AArch64::ANDSWri:
238 case AArch64::ANDSWrr:
239 case AArch64::ANDSWrs:
240 case AArch64::ANDSXri:
241 case AArch64::ANDSXrr:
242 case AArch64::ANDSXrs:
243 case AArch64::BICSWrr:
244 case AArch64::BICSWrs:
245 case AArch64::BICSXrs:
246 case AArch64::BICSXrr:
247 case AArch64::SBCSWr:
248 case AArch64::SBCSXr:
249 case AArch64::SUBSWrr:
250 case AArch64::SUBSWrs:
251 case AArch64::SUBSWrx:
252 case AArch64::SUBSXrr:
253 case AArch64::SUBSXrs:
254 case AArch64::SUBSXrx:
255 case AArch64::SUBSXrx64: {
256 MCPhysReg DstReg = PredI.getOperand(i: 0).getReg();
257 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
258 return false;
259
260 // The destination register of the NZCV setting instruction must not be
261 // modified before the conditional branch.
262 if (!DomBBClobberedRegs.available(Reg: DstReg))
263 return false;
264
265 // We've found the instruction that sets NZCV whose DstReg == 0.
266 FirstUse = PredI;
267 KnownRegs.push_back(Elt: RegImm(DstReg, 0));
268 return true;
269 }
270 }
271
272 // Bail if we see an instruction that defines NZCV that we don't handle.
273 if (PredI.definesRegister(Reg: AArch64::NZCV, /*TRI=*/nullptr))
274 return false;
275
276 // Track clobbered and used registers.
277 LiveRegUnits::accumulateUsedDefed(MI: PredI, ModifiedRegUnits&: DomBBClobberedRegs, UsedRegUnits&: DomBBUsedRegs,
278 TRI);
279 }
280 return false;
281}
282
283bool AArch64RedundantCopyEliminationImpl::optimizeBlock(
284 MachineBasicBlock *MBB) {
285 // Check if the current basic block has a single predecessor.
286 if (MBB->pred_size() != 1)
287 return false;
288
289 // Check if the predecessor has two successors, implying the block ends in a
290 // conditional branch.
291 MachineBasicBlock *PredMBB = *MBB->pred_begin();
292 if (PredMBB->succ_size() != 2)
293 return false;
294
295 MachineBasicBlock::iterator CondBr = PredMBB->getLastNonDebugInstr();
296 if (CondBr == PredMBB->end())
297 return false;
298
299 // Keep track of the earliest point in the PredMBB block where kill markers
300 // need to be removed if a COPY is removed.
301 MachineBasicBlock::iterator FirstUse;
302 // After calling knownRegValInBlock, FirstUse will either point to a CBZ/CBNZ
303 // or a compare (i.e., SUBS). In the latter case, we must take care when
304 // updating FirstUse when scanning for COPY instructions. In particular, if
305 // there's a COPY in between the compare and branch the COPY should not
306 // update FirstUse.
307 bool SeenFirstUse = false;
308 // Registers that contain a known value at the start of MBB.
309 SmallVector<RegImm, 4> KnownRegs;
310
311 MachineBasicBlock::iterator Itr = std::next(x: CondBr);
312 do {
313 --Itr;
314
315 if (!knownRegValInBlock(CondBr&: *Itr, MBB, KnownRegs, FirstUse))
316 continue;
317
318 // Reset the clobbered and used register units.
319 OptBBClobberedRegs.clear();
320 OptBBUsedRegs.clear();
321
322 // Look backward in PredMBB for COPYs from the known reg to find other
323 // registers that are known to be a constant value.
324 for (auto PredI = Itr;; --PredI) {
325 if (FirstUse == PredI)
326 SeenFirstUse = true;
327
328 if (PredI->isCopy()) {
329 MCPhysReg CopyDstReg = PredI->getOperand(i: 0).getReg();
330 MCPhysReg CopySrcReg = PredI->getOperand(i: 1).getReg();
331 for (auto &KnownReg : KnownRegs) {
332 if (!OptBBClobberedRegs.available(Reg: KnownReg.Reg))
333 continue;
334 // If we have X = COPY Y, and Y is known to be zero, then now X is
335 // known to be zero.
336 if (CopySrcReg == KnownReg.Reg &&
337 OptBBClobberedRegs.available(Reg: CopyDstReg)) {
338 KnownRegs.push_back(Elt: RegImm(CopyDstReg, KnownReg.Imm));
339 if (SeenFirstUse)
340 FirstUse = PredI;
341 break;
342 }
343 // If we have X = COPY Y, and X is known to be zero, then now Y is
344 // known to be zero.
345 if (CopyDstReg == KnownReg.Reg &&
346 OptBBClobberedRegs.available(Reg: CopySrcReg)) {
347 KnownRegs.push_back(Elt: RegImm(CopySrcReg, KnownReg.Imm));
348 if (SeenFirstUse)
349 FirstUse = PredI;
350 break;
351 }
352 }
353 }
354
355 // Stop if we get to the beginning of PredMBB.
356 if (PredI == PredMBB->begin())
357 break;
358
359 LiveRegUnits::accumulateUsedDefed(MI: *PredI, ModifiedRegUnits&: OptBBClobberedRegs,
360 UsedRegUnits&: OptBBUsedRegs, TRI);
361 // Stop if all of the known-zero regs have been clobbered.
362 if (all_of(Range&: KnownRegs, P: [&](RegImm KnownReg) {
363 return !OptBBClobberedRegs.available(Reg: KnownReg.Reg);
364 }))
365 break;
366 }
367 break;
368
369 } while (Itr != PredMBB->begin() && Itr->isTerminator());
370
371 // We've not found a registers with a known value, time to bail out.
372 if (KnownRegs.empty())
373 return false;
374
375 bool Changed = false;
376 // UsedKnownRegs is the set of KnownRegs that have had uses added to MBB.
377 SmallSetVector<unsigned, 4> UsedKnownRegs;
378 MachineBasicBlock::iterator LastChange = MBB->begin();
379 // Remove redundant copy/move instructions unless KnownReg is modified.
380 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
381 MachineInstr *MI = &*I;
382 ++I;
383 bool RemovedMI = false;
384 bool IsCopy = MI->isCopy();
385 bool IsMoveImm = MI->isMoveImmediate();
386 if (IsCopy || IsMoveImm) {
387 Register DefReg = MI->getOperand(i: 0).getReg();
388 Register SrcReg = IsCopy ? MI->getOperand(i: 1).getReg() : Register();
389 int64_t SrcImm = IsMoveImm ? MI->getOperand(i: 1).getImm() : 0;
390 if (!MRI->isReserved(PhysReg: DefReg) &&
391 ((IsCopy && (SrcReg == AArch64::XZR || SrcReg == AArch64::WZR)) ||
392 IsMoveImm)) {
393 for (RegImm &KnownReg : KnownRegs) {
394 if (KnownReg.Reg != DefReg &&
395 !TRI->isSuperRegister(RegA: DefReg, RegB: KnownReg.Reg))
396 continue;
397
398 // For a copy, the known value must be a zero.
399 if (IsCopy && KnownReg.Imm != 0)
400 continue;
401
402 if (IsMoveImm) {
403 // For a move immediate, the known immediate must match the source
404 // immediate.
405 if (KnownReg.Imm != SrcImm)
406 continue;
407
408 // Don't remove a move immediate that implicitly defines the upper
409 // bits when only the lower 32 bits are known.
410 MCPhysReg CmpReg = KnownReg.Reg;
411 if (any_of(Range: MI->implicit_operands(), P: [CmpReg](MachineOperand &O) {
412 return !O.isDead() && O.isReg() && O.isDef() &&
413 O.getReg() != CmpReg;
414 }))
415 continue;
416
417 // Don't remove a move immediate that implicitly defines the upper
418 // bits as different.
419 if (TRI->isSuperRegister(RegA: DefReg, RegB: KnownReg.Reg) && KnownReg.Imm < 0)
420 continue;
421 }
422
423 if (IsCopy)
424 LLVM_DEBUG(dbgs() << "Remove redundant Copy : " << *MI);
425 else
426 LLVM_DEBUG(dbgs() << "Remove redundant Move : " << *MI);
427
428 MI->eraseFromParent();
429 Changed = true;
430 LastChange = I;
431 NumCopiesRemoved++;
432 UsedKnownRegs.insert(X: KnownReg.Reg);
433 RemovedMI = true;
434 break;
435 }
436 }
437 }
438
439 // Skip to the next instruction if we removed the COPY/MovImm.
440 if (RemovedMI)
441 continue;
442
443 // Remove any regs the MI clobbers from the KnownConstRegs set.
444 for (unsigned RI = 0; RI < KnownRegs.size();)
445 if (MI->modifiesRegister(Reg: KnownRegs[RI].Reg, TRI)) {
446 std::swap(a&: KnownRegs[RI], b&: KnownRegs[KnownRegs.size() - 1]);
447 KnownRegs.pop_back();
448 // Don't increment RI since we need to now check the swapped-in
449 // KnownRegs[RI].
450 } else {
451 ++RI;
452 }
453
454 // Continue until the KnownRegs set is empty.
455 if (KnownRegs.empty())
456 break;
457 }
458
459 if (!Changed)
460 return false;
461
462 // Add newly used regs to the block's live-in list if they aren't there
463 // already.
464 for (MCPhysReg KnownReg : UsedKnownRegs)
465 if (!MBB->isLiveIn(Reg: KnownReg))
466 MBB->addLiveIn(PhysReg: KnownReg);
467
468 // Clear kills in the range where changes were made. This is conservative,
469 // but should be okay since kill markers are being phased out.
470 LLVM_DEBUG(dbgs() << "Clearing kill flags.\n\tFirstUse: " << *FirstUse
471 << "\tLastChange: ";
472 if (LastChange == MBB->end()) dbgs() << "<end>\n";
473 else dbgs() << *LastChange);
474 for (MachineInstr &MMI : make_range(x: FirstUse, y: PredMBB->end()))
475 MMI.clearKillInfo();
476 for (MachineInstr &MMI : make_range(x: MBB->begin(), y: LastChange))
477 MMI.clearKillInfo();
478
479 return true;
480}
481
482bool AArch64RedundantCopyEliminationImpl::run(MachineFunction &MF) {
483 TRI = MF.getSubtarget().getRegisterInfo();
484 MRI = &MF.getRegInfo();
485 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
486
487 // Resize the clobbered and used register unit trackers. We do this once per
488 // function.
489 DomBBClobberedRegs.init(TRI: *TRI);
490 DomBBUsedRegs.init(TRI: *TRI);
491 OptBBClobberedRegs.init(TRI: *TRI);
492 OptBBUsedRegs.init(TRI: *TRI);
493
494 bool Changed = false;
495 for (MachineBasicBlock &MBB : MF) {
496 Changed |= optimizeTerminators(MBB: &MBB, TII);
497 Changed |= optimizeBlock(MBB: &MBB);
498 }
499 return Changed;
500}
501
502bool AArch64RedundantCopyEliminationLegacy::runOnMachineFunction(
503 MachineFunction &MF) {
504 if (skipFunction(F: MF.getFunction()))
505 return false;
506 return AArch64RedundantCopyEliminationImpl().run(MF);
507}
508
509PreservedAnalyses
510AArch64RedundantCopyEliminationPass::run(MachineFunction &MF,
511 MachineFunctionAnalysisManager &MFAM) {
512 const bool Changed = AArch64RedundantCopyEliminationImpl().run(MF);
513 if (!Changed)
514 return PreservedAnalyses::all();
515 PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
516 PA.preserveSet<CFGAnalyses>();
517 return PA;
518}
519
520FunctionPass *llvm::createAArch64RedundantCopyEliminationPass() {
521 return new AArch64RedundantCopyEliminationLegacy();
522}
523