1//=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10// This pass tries to make consecutive comparisons of values use the same
11// operands to allow the CSE pass to remove duplicate instructions. It adjusts
12// comparisons with immediate values by converting between inclusive and
13// exclusive forms (GE <-> GT, LE <-> LT) and correcting immediate values to
14// make them equal.
15//
16// The pass handles:
17// * Cross-block: SUBS/ADDS followed by conditional branches
18// * Intra-block: CSINC conditional instructions
19//
20//
21// Consider the following example in C:
22//
23// if ((a < 5 && ...) || (a > 5 && ...)) {
24// ~~~~~ ~~~~~
25// ^ ^
26// x y
27//
28// Here both "x" and "y" expressions compare "a" with "5". When "x" evaluates
29// to "false", "y" can just check flags set by the first comparison. As a
30// result of the canonicalization employed by
31// SelectionDAGBuilder::visitSwitchCase, DAGCombine, and other target-specific
32// code, assembly ends up in the form that is not CSE friendly:
33//
34// ...
35// cmp w8, #4
36// b.gt .LBB0_3
37// ...
38// .LBB0_3:
39// cmp w8, #6
40// b.lt .LBB0_6
41// ...
42//
43// Same assembly after the pass:
44//
45// ...
46// cmp w8, #5
47// b.ge .LBB0_3
48// ...
49// .LBB0_3:
50// cmp w8, #5 // <-- CSE pass removes this instruction
51// b.le .LBB0_6
52// ...
53//
54// See optimizeCrossBlock() and optimizeIntraBlock() for implementation details.
55//
56// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0"
57// TODO: For cross-block:
58// - handle other conditional instructions (e.g. CSET)
59// - allow second branching to be anything if it doesn't require adjusting
60// TODO: For intra-block:
61// - handle CINC and CSET (CSINC aliases) as their conditions are inverted
62// compared to CSINC.
63// - handle other non-CSINC conditional instructions
64//
65//===----------------------------------------------------------------------===//
66
67#include "AArch64.h"
68#include "MCTargetDesc/AArch64AddressingModes.h"
69#include "Utils/AArch64BaseInfo.h"
70#include "llvm/ADT/ArrayRef.h"
71#include "llvm/ADT/DepthFirstIterator.h"
72#include "llvm/ADT/SmallVector.h"
73#include "llvm/ADT/Statistic.h"
74#include "llvm/CodeGen/MachineBasicBlock.h"
75#include "llvm/CodeGen/MachineDominators.h"
76#include "llvm/CodeGen/MachineFunction.h"
77#include "llvm/CodeGen/MachineFunctionPass.h"
78#include "llvm/CodeGen/MachineInstr.h"
79#include "llvm/CodeGen/MachineOperand.h"
80#include "llvm/CodeGen/MachineRegisterInfo.h"
81#include "llvm/CodeGen/TargetInstrInfo.h"
82#include "llvm/CodeGen/TargetRegisterInfo.h"
83#include "llvm/CodeGen/TargetSubtargetInfo.h"
84#include "llvm/InitializePasses.h"
85#include "llvm/Pass.h"
86#include "llvm/Support/Debug.h"
87#include "llvm/Support/ErrorHandling.h"
88#include "llvm/Support/raw_ostream.h"
89#include <cassert>
90#include <cstdlib>
91
92using namespace llvm;
93
94#define DEBUG_TYPE "aarch64-condopt"
95
96STATISTIC(NumConditionsAdjusted, "Number of conditions adjusted");
97
98namespace {
99
100/// Bundles the parameters needed to adjust a comparison instruction.
101struct CmpInfo {
102 int Imm;
103 unsigned Opc;
104 AArch64CC::CondCode CC;
105};
106
107class AArch64ConditionOptimizerImpl {
108 const TargetInstrInfo *TII;
109 const TargetRegisterInfo *TRI;
110 MachineDominatorTree *DomTree;
111 const MachineRegisterInfo *MRI;
112
113public:
114 bool run(MachineFunction &MF, MachineDominatorTree &MDT);
115
116private:
117 bool canAdjustCmp(MachineInstr &CmpMI);
118 bool registersMatch(MachineInstr *FirstMI, MachineInstr *SecondMI);
119 bool nzcvLivesOut(MachineBasicBlock *MBB);
120 MachineInstr *getBccTerminator(MachineBasicBlock *MBB);
121 MachineInstr *findAdjustableCmp(MachineInstr *CondMI);
122 CmpInfo getAdjustedCmpInfo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp);
123 void updateCmpInstr(MachineInstr *CmpMI, int NewImm, unsigned NewOpc);
124 void updateCondInstr(MachineInstr *CondMI, AArch64CC::CondCode NewCC);
125 void applyCmpAdjustment(MachineInstr *CmpMI, MachineInstr *CondMI,
126 const CmpInfo &Info);
127 bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To,
128 int ToImm);
129 bool optimizeIntraBlock(MachineBasicBlock &MBB);
130 bool optimizeCrossBlock(MachineBasicBlock &HBB);
131};
132
133class AArch64ConditionOptimizerLegacy : public MachineFunctionPass {
134public:
135 static char ID;
136 AArch64ConditionOptimizerLegacy() : MachineFunctionPass(ID) {}
137
138 void getAnalysisUsage(AnalysisUsage &AU) const override;
139 bool runOnMachineFunction(MachineFunction &MF) override;
140
141 StringRef getPassName() const override {
142 return "AArch64 Condition Optimizer";
143 }
144};
145
146} // end anonymous namespace
147
148char AArch64ConditionOptimizerLegacy::ID = 0;
149
150INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizerLegacy, "aarch64-condopt",
151 "AArch64 CondOpt Pass", false, false)
152INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
153INITIALIZE_PASS_END(AArch64ConditionOptimizerLegacy, "aarch64-condopt",
154 "AArch64 CondOpt Pass", false, false)
155
156FunctionPass *llvm::createAArch64ConditionOptimizerLegacyPass() {
157 return new AArch64ConditionOptimizerLegacy();
158}
159
160void AArch64ConditionOptimizerLegacy::getAnalysisUsage(
161 AnalysisUsage &AU) const {
162 AU.addRequired<MachineDominatorTreeWrapperPass>();
163 AU.addPreserved<MachineDominatorTreeWrapperPass>();
164 MachineFunctionPass::getAnalysisUsage(AU);
165}
166
167// Verify that the MI's immediate is adjustable and it only sets flags (pure
168// cmp)
169bool AArch64ConditionOptimizerImpl::canAdjustCmp(MachineInstr &CmpMI) {
170 unsigned ShiftAmt = AArch64_AM::getShiftValue(Imm: CmpMI.getOperand(i: 3).getImm());
171 if (!CmpMI.getOperand(i: 2).isImm()) {
172 LLVM_DEBUG(dbgs() << "Immediate of cmp is symbolic, " << CmpMI << '\n');
173 return false;
174 } else if (CmpMI.getOperand(i: 2).getImm() << ShiftAmt >= 0xfff) {
175 LLVM_DEBUG(dbgs() << "Immediate of cmp may be out of range, " << CmpMI
176 << '\n');
177 return false;
178 } else if (!MRI->use_nodbg_empty(RegNo: CmpMI.getOperand(i: 0).getReg())) {
179 LLVM_DEBUG(dbgs() << "Destination of cmp is not dead, " << CmpMI << '\n');
180 return false;
181 }
182
183 return true;
184}
185
186// Ensure both compare MIs use the same register, tracing through copies.
187bool AArch64ConditionOptimizerImpl::registersMatch(MachineInstr *FirstMI,
188 MachineInstr *SecondMI) {
189 Register FirstReg = FirstMI->getOperand(i: 1).getReg();
190 Register SecondReg = SecondMI->getOperand(i: 1).getReg();
191 Register FirstCmpReg =
192 FirstReg.isVirtual() ? TRI->lookThruCopyLike(SrcReg: FirstReg, MRI) : FirstReg;
193 Register SecondCmpReg =
194 SecondReg.isVirtual() ? TRI->lookThruCopyLike(SrcReg: SecondReg, MRI) : SecondReg;
195 if (FirstCmpReg != SecondCmpReg) {
196 LLVM_DEBUG(dbgs() << "CMPs compare different registers\n");
197 return false;
198 }
199
200 return true;
201}
202
203// Check if NZCV lives out to any successor block.
204bool AArch64ConditionOptimizerImpl::nzcvLivesOut(MachineBasicBlock *MBB) {
205 for (auto *SuccBB : MBB->successors()) {
206 if (SuccBB->isLiveIn(Reg: AArch64::NZCV)) {
207 LLVM_DEBUG(dbgs() << "NZCV live into successor "
208 << printMBBReference(*SuccBB) << " from "
209 << printMBBReference(*MBB) << '\n');
210 return true;
211 }
212 }
213 return false;
214}
215
216// Returns true if the opcode is a comparison instruction (CMP/CMN).
217static bool isCmpInstruction(unsigned Opc) {
218 switch (Opc) {
219 // cmp is an alias for SUBS with a dead destination register.
220 case AArch64::SUBSWri:
221 case AArch64::SUBSXri:
222 // cmp is an alias for ADDS with a dead destination register.
223 case AArch64::ADDSWri:
224 case AArch64::ADDSXri:
225 return true;
226 default:
227 return false;
228 }
229}
230
231static bool isCSINCInstruction(unsigned Opc) {
232 return Opc == AArch64::CSINCWr || Opc == AArch64::CSINCXr;
233}
234
235// Returns the Bcc terminator if present, otherwise nullptr.
236MachineInstr *
237AArch64ConditionOptimizerImpl::getBccTerminator(MachineBasicBlock *MBB) {
238 MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
239 if (Term == MBB->end()) {
240 LLVM_DEBUG(dbgs() << "No terminator in " << printMBBReference(*MBB)
241 << '\n');
242 return nullptr;
243 }
244
245 if (Term->getOpcode() != AArch64::Bcc) {
246 LLVM_DEBUG(dbgs() << "Non-Bcc terminator in " << printMBBReference(*MBB)
247 << ": " << *Term);
248 return nullptr;
249 }
250
251 return &*Term;
252}
253
254// Find the CMP instruction controlling the given conditional instruction and
255// ensure it can be adjusted for CSE optimization. Searches backward from
256// CondMI, ensuring no NZCV interference. Returns nullptr if no suitable CMP
257// is found or if adjustments are not safe.
258MachineInstr *
259AArch64ConditionOptimizerImpl::findAdjustableCmp(MachineInstr *CondMI) {
260 assert(CondMI && "CondMI cannot be null");
261 MachineBasicBlock *MBB = CondMI->getParent();
262
263 // Search backward from the conditional to find the instruction controlling
264 // it.
265 for (MachineBasicBlock::iterator B = MBB->begin(),
266 It = MachineBasicBlock::iterator(CondMI);
267 It != B;) {
268 It = prev_nodbg(It, Begin: B);
269 MachineInstr &I = *It;
270 assert(!I.isTerminator() && "Spurious terminator");
271 // Ensure there is no use of NZCV between CMP and conditional.
272 if (I.readsRegister(Reg: AArch64::NZCV, /*TRI=*/nullptr))
273 return nullptr;
274
275 if (isCmpInstruction(Opc: I.getOpcode())) {
276 if (!canAdjustCmp(CmpMI&: I)) {
277 return nullptr;
278 }
279 return &I;
280 }
281
282 if (I.modifiesRegister(Reg: AArch64::NZCV, /*TRI=*/nullptr))
283 return nullptr;
284 }
285 LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
286 << '\n');
287 return nullptr;
288}
289
290// Changes opcode adds <-> subs considering register operand width.
291static int getComplementOpc(int Opc) {
292 switch (Opc) {
293 case AArch64::ADDSWri: return AArch64::SUBSWri;
294 case AArch64::ADDSXri: return AArch64::SUBSXri;
295 case AArch64::SUBSWri: return AArch64::ADDSWri;
296 case AArch64::SUBSXri: return AArch64::ADDSXri;
297 default:
298 llvm_unreachable("Unexpected opcode");
299 }
300}
301
302// Changes form of comparison inclusive <-> exclusive.
303static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) {
304 switch (Cmp) {
305 case AArch64CC::GT:
306 return AArch64CC::GE;
307 case AArch64CC::GE:
308 return AArch64CC::GT;
309 case AArch64CC::LT:
310 return AArch64CC::LE;
311 case AArch64CC::LE:
312 return AArch64CC::LT;
313 case AArch64CC::HI:
314 return AArch64CC::HS;
315 case AArch64CC::HS:
316 return AArch64CC::HI;
317 case AArch64CC::LO:
318 return AArch64CC::LS;
319 case AArch64CC::LS:
320 return AArch64CC::LO;
321 default:
322 llvm_unreachable("Unexpected condition code");
323 }
324}
325
326// Returns the adjusted immediate, opcode, and condition code for switching
327// between inclusive/exclusive forms (GT <-> GE, LT <-> LE).
328CmpInfo
329AArch64ConditionOptimizerImpl::getAdjustedCmpInfo(MachineInstr *CmpMI,
330 AArch64CC::CondCode Cmp) {
331 unsigned Opc = CmpMI->getOpcode();
332
333 bool IsSigned = Cmp == AArch64CC::GT || Cmp == AArch64CC::GE ||
334 Cmp == AArch64CC::LT || Cmp == AArch64CC::LE;
335
336 // CMN (compare with negative immediate) is an alias to ADDS (as
337 // "operand - negative" == "operand + positive")
338 bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri);
339
340 int Correction = (Cmp == AArch64CC::GT || Cmp == AArch64CC::HI) ? 1 : -1;
341 // Negate Correction value for comparison with negative immediate (CMN).
342 if (Negative) {
343 Correction = -Correction;
344 }
345
346 const int OldImm = (int)CmpMI->getOperand(i: 2).getImm();
347 const int NewImm = std::abs(x: OldImm + Correction);
348
349 // Bail out on cmn 0 (ADDS with immediate 0). It is a valid instruction but
350 // doesn't set flags in a way we can safely transform, so skip optimization.
351 if (OldImm == 0 && Negative)
352 return {.Imm: OldImm, .Opc: Opc, .CC: Cmp};
353
354 if ((OldImm == 1 && Negative && Correction == -1) ||
355 (OldImm == 0 && Correction == -1)) {
356 // If we change opcodes for unsigned comparisons, this means we did an
357 // unsigned wrap (e.g., 0 wrapping to 0xFFFFFFFF), so return the old cmp.
358 // Note: For signed comparisons, opcode changes (cmn 1 ↔ cmp 0) are valid.
359 if (!IsSigned)
360 return {.Imm: OldImm, .Opc: Opc, .CC: Cmp};
361 Opc = getComplementOpc(Opc);
362 }
363
364 return {.Imm: NewImm, .Opc: Opc, .CC: getAdjustedCmp(Cmp)};
365}
366
367// Modifies a comparison instruction's immediate and opcode.
368void AArch64ConditionOptimizerImpl::updateCmpInstr(MachineInstr *CmpMI,
369 int NewImm,
370 unsigned NewOpc) {
371 CmpMI->getOperand(i: 2).setImm(NewImm);
372 CmpMI->setDesc(TII->get(Opcode: NewOpc));
373}
374
375// Modifies the condition code of a conditional instruction.
376void AArch64ConditionOptimizerImpl::updateCondInstr(MachineInstr *CondMI,
377 AArch64CC::CondCode NewCC) {
378 // Get the correct operand index for the conditional instruction
379 unsigned CondOpIdx;
380 switch (CondMI->getOpcode()) {
381 case AArch64::Bcc:
382 CondOpIdx = 0;
383 break;
384 case AArch64::CSINCWr:
385 case AArch64::CSINCXr:
386 CondOpIdx = 3;
387 break;
388 default:
389 llvm_unreachable("Unsupported conditional instruction");
390 }
391 CondMI->getOperand(i: CondOpIdx).setImm(NewCC);
392 ++NumConditionsAdjusted;
393}
394
395// Applies a comparison adjustment to a cmp/cond instruction pair.
396void AArch64ConditionOptimizerImpl::applyCmpAdjustment(MachineInstr *CmpMI,
397 MachineInstr *CondMI,
398 const CmpInfo &Info) {
399 updateCmpInstr(CmpMI, NewImm: Info.Imm, NewOpc: Info.Opc);
400 updateCondInstr(CondMI, NewCC: Info.CC);
401}
402
403// Extracts the condition code from the result of analyzeBranch.
404// Returns the CondCode or Invalid if the format is not a simple br.cond.
405static AArch64CC::CondCode parseCondCode(ArrayRef<MachineOperand> Cond) {
406 assert(!Cond.empty() && "Expected non-empty condition from analyzeBranch");
407 // A normal br.cond simply has the condition code.
408 if (Cond[0].getImm() != -1) {
409 assert(Cond.size() == 1 && "Unknown Cond array format");
410 return (AArch64CC::CondCode)(int)Cond[0].getImm();
411 }
412 return AArch64CC::CondCode::Invalid;
413}
414
415// Adjusts one cmp instruction to another one if result of adjustment will allow
416// CSE. Returns true if compare instruction was changed, otherwise false is
417// returned.
418
419bool AArch64ConditionOptimizerImpl::adjustTo(MachineInstr *CmpMI,
420 AArch64CC::CondCode Cmp,
421 MachineInstr *To, int ToImm) {
422 CmpInfo Info = getAdjustedCmpInfo(CmpMI, Cmp);
423 if (Info.Imm == ToImm && Info.Opc == To->getOpcode()) {
424 MachineInstr &BrMI = *CmpMI->getParent()->getFirstTerminator();
425 applyCmpAdjustment(CmpMI, CondMI: &BrMI, Info);
426 return true;
427 }
428 return false;
429}
430
431static bool isGreaterThan(AArch64CC::CondCode Cmp) {
432 return Cmp == AArch64CC::GT || Cmp == AArch64CC::HI;
433}
434
435static bool isLessThan(AArch64CC::CondCode Cmp) {
436 return Cmp == AArch64CC::LT || Cmp == AArch64CC::LO;
437}
438
439// This function transforms two CMP+CSINC pairs within the same basic block
440// when both conditions are the same (GT/GT or LT/LT) and immediates differ
441// by 1.
442//
443// Example transformation:
444// cmp w8, #10
445// csinc w9, w0, w1, gt ; w9 = (w8 > 10) ? w0 : w1+1
446// cmp w8, #9
447// csinc w10, w0, w1, gt ; w10 = (w8 > 9) ? w0 : w1+1
448//
449// Into:
450// cmp w8, #10
451// csinc w9, w0, w1, gt ; w9 = (w8 > 10) ? w0 : w1+1
452// csinc w10, w0, w1, ge ; w10 = (w8 >= 10) ? w0 : w1+1
453//
454// The second CMP is eliminated, enabling CSE to remove the redundant
455// comparison.
456bool AArch64ConditionOptimizerImpl::optimizeIntraBlock(MachineBasicBlock &MBB) {
457 MachineInstr *FirstCSINC = nullptr;
458 MachineInstr *SecondCSINC = nullptr;
459
460 // Find two CSINC instructions
461 for (MachineInstr &MI : MBB) {
462 if (isCSINCInstruction(Opc: MI.getOpcode())) {
463 if (!FirstCSINC) {
464 FirstCSINC = &MI;
465 } else if (!SecondCSINC) {
466 SecondCSINC = &MI;
467 break; // Found both
468 }
469 }
470 }
471
472 if (!FirstCSINC || !SecondCSINC) {
473 return false;
474 }
475
476 // Since we may modify cmps in this MBB, make sure NZCV does not live out.
477 if (nzcvLivesOut(MBB: &MBB))
478 return false;
479
480 // Find the CMPs controlling each CSINC
481 MachineInstr *FirstCmpMI = findAdjustableCmp(CondMI: FirstCSINC);
482 MachineInstr *SecondCmpMI = findAdjustableCmp(CondMI: SecondCSINC);
483 if (!FirstCmpMI || !SecondCmpMI)
484 return false;
485
486 // Ensure we have two distinct CMPs
487 if (FirstCmpMI == SecondCmpMI) {
488 LLVM_DEBUG(dbgs() << "Both CSINCs already controlled by same CMP\n");
489 return false;
490 }
491
492 if (!registersMatch(FirstMI: FirstCmpMI, SecondMI: SecondCmpMI))
493 return false;
494
495 // Check that nothing else modifies the flags between the first CMP and second
496 // conditional
497 for (auto It = std::next(x: MachineBasicBlock::iterator(FirstCmpMI));
498 It != std::next(x: MachineBasicBlock::iterator(SecondCSINC)); ++It) {
499 if (&*It != SecondCmpMI &&
500 It->modifiesRegister(Reg: AArch64::NZCV, /*TRI=*/nullptr)) {
501 LLVM_DEBUG(dbgs() << "Flags modified between CMPs by: " << *It << '\n');
502 return false;
503 }
504 }
505
506 // Check flags aren't read after second conditional within the same block
507 for (auto It = std::next(x: MachineBasicBlock::iterator(SecondCSINC));
508 It != MBB.end(); ++It) {
509 if (It->readsRegister(Reg: AArch64::NZCV, /*TRI=*/nullptr)) {
510 LLVM_DEBUG(dbgs() << "Flags read after second CSINC by: " << *It << '\n');
511 return false;
512 }
513 }
514
515 // Extract condition codes from both CSINCs (operand 3)
516 AArch64CC::CondCode FirstCond =
517 (AArch64CC::CondCode)(int)FirstCSINC->getOperand(i: 3).getImm();
518 AArch64CC::CondCode SecondCond =
519 (AArch64CC::CondCode)(int)SecondCSINC->getOperand(i: 3).getImm();
520
521 const int FirstImm = (int)FirstCmpMI->getOperand(i: 2).getImm();
522 const int SecondImm = (int)SecondCmpMI->getOperand(i: 2).getImm();
523
524 LLVM_DEBUG(dbgs() << "Comparing intra-block CSINCs: "
525 << AArch64CC::getCondCodeName(FirstCond) << " #" << FirstImm
526 << " and " << AArch64CC::getCondCodeName(SecondCond) << " #"
527 << SecondImm << '\n');
528
529 // Check if both conditions are the same (GT/GT, LT/LT, HI/HI, LO/LO)
530 // and immediates differ by 1.
531 if (FirstCond == SecondCond &&
532 (isGreaterThan(Cmp: FirstCond) || isLessThan(Cmp: FirstCond)) &&
533 std::abs(x: SecondImm - FirstImm) == 1) {
534 // Pick which comparison to adjust to match the other
535 // For GT/HI: adjust the one with smaller immediate
536 // For LT/LO: adjust the one with larger immediate
537 bool adjustFirst = (FirstImm < SecondImm);
538 if (isLessThan(Cmp: FirstCond)) {
539 adjustFirst = !adjustFirst;
540 }
541
542 MachineInstr *CmpToAdjust = adjustFirst ? FirstCmpMI : SecondCmpMI;
543 MachineInstr *CSINCToAdjust = adjustFirst ? FirstCSINC : SecondCSINC;
544 AArch64CC::CondCode CondToAdjust = adjustFirst ? FirstCond : SecondCond;
545 int TargetImm = adjustFirst ? SecondImm : FirstImm;
546
547 CmpInfo Adj = getAdjustedCmpInfo(CmpMI: CmpToAdjust, Cmp: CondToAdjust);
548
549 if (Adj.Imm == TargetImm &&
550 Adj.Opc == (adjustFirst ? SecondCmpMI : FirstCmpMI)->getOpcode()) {
551 LLVM_DEBUG(dbgs() << "Successfully optimizing intra-block CSINC pair\n");
552
553 // Modify the selected CMP and CSINC
554 applyCmpAdjustment(CmpMI: CmpToAdjust, CondMI: CSINCToAdjust, Info: Adj);
555
556 return true;
557 }
558 }
559
560 return false;
561}
562
563// Optimize across blocks
564bool AArch64ConditionOptimizerImpl::optimizeCrossBlock(MachineBasicBlock &HBB) {
565 SmallVector<MachineOperand, 4> HeadCond;
566 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
567 if (TII->analyzeBranch(MBB&: HBB, TBB, FBB, Cond&: HeadCond)) {
568 return false;
569 }
570
571 // Equivalence check is to skip loops.
572 if (!TBB || TBB == &HBB) {
573 return false;
574 }
575
576 SmallVector<MachineOperand, 4> TrueCond;
577 MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
578 if (TII->analyzeBranch(MBB&: *TBB, TBB&: TBB_TBB, FBB&: TBB_FBB, Cond&: TrueCond)) {
579 return false;
580 }
581
582 MachineInstr *HeadBrMI = getBccTerminator(MBB: &HBB);
583 MachineInstr *TrueBrMI = getBccTerminator(MBB: TBB);
584 if (!HeadBrMI || !TrueBrMI)
585 return false;
586
587 // Since we may modify cmps in these blocks, make sure NZCV does not live out.
588 if (nzcvLivesOut(MBB: &HBB) || nzcvLivesOut(MBB: TBB))
589 return false;
590
591 MachineInstr *HeadCmpMI = findAdjustableCmp(CondMI: HeadBrMI);
592 MachineInstr *TrueCmpMI = findAdjustableCmp(CondMI: TrueBrMI);
593 if (!HeadCmpMI || !TrueCmpMI)
594 return false;
595
596 if (!registersMatch(FirstMI: HeadCmpMI, SecondMI: TrueCmpMI))
597 return false;
598
599 AArch64CC::CondCode HeadCmp = parseCondCode(Cond: HeadCond);
600 AArch64CC::CondCode TrueCmp = parseCondCode(Cond: TrueCond);
601 if (HeadCmp == AArch64CC::CondCode::Invalid ||
602 TrueCmp == AArch64CC::CondCode::Invalid) {
603 return false;
604 }
605
606 const int HeadImm = (int)HeadCmpMI->getOperand(i: 2).getImm();
607 const int TrueImm = (int)TrueCmpMI->getOperand(i: 2).getImm();
608
609 int HeadImmTrueValue = HeadImm;
610 int TrueImmTrueValue = TrueImm;
611
612 LLVM_DEBUG(dbgs() << "Head branch:\n");
613 LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp)
614 << '\n');
615 LLVM_DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');
616
617 LLVM_DEBUG(dbgs() << "True branch:\n");
618 LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(TrueCmp)
619 << '\n');
620 LLVM_DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');
621
622 unsigned Opc = HeadCmpMI->getOpcode();
623 if (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri)
624 HeadImmTrueValue = -HeadImmTrueValue;
625
626 Opc = TrueCmpMI->getOpcode();
627 if (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri)
628 TrueImmTrueValue = -TrueImmTrueValue;
629
630 if (((isGreaterThan(Cmp: HeadCmp) && isLessThan(Cmp: TrueCmp)) ||
631 (isLessThan(Cmp: HeadCmp) && isGreaterThan(Cmp: TrueCmp))) &&
632 std::abs(x: TrueImmTrueValue - HeadImmTrueValue) == 2) {
633 // This branch transforms machine instructions that correspond to
634 //
635 // 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
636 // 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)
637 //
638 // into
639 //
640 // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
641 // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)
642
643 CmpInfo HeadCmpInfo = getAdjustedCmpInfo(CmpMI: HeadCmpMI, Cmp: HeadCmp);
644 CmpInfo TrueCmpInfo = getAdjustedCmpInfo(CmpMI: TrueCmpMI, Cmp: TrueCmp);
645 if (HeadCmpInfo.Imm == TrueCmpInfo.Imm &&
646 HeadCmpInfo.Opc == TrueCmpInfo.Opc) {
647 applyCmpAdjustment(CmpMI: HeadCmpMI, CondMI: HeadBrMI, Info: HeadCmpInfo);
648 applyCmpAdjustment(CmpMI: TrueCmpMI, CondMI: TrueBrMI, Info: TrueCmpInfo);
649 return true;
650 }
651 } else if (((isGreaterThan(Cmp: HeadCmp) && isGreaterThan(Cmp: TrueCmp)) ||
652 (isLessThan(Cmp: HeadCmp) && isLessThan(Cmp: TrueCmp))) &&
653 std::abs(x: TrueImmTrueValue - HeadImmTrueValue) == 1) {
654 // This branch transforms machine instructions that correspond to
655 //
656 // 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
657 // 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)
658 //
659 // into
660 //
661 // 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...)
662 // 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...)
663
664 // GT -> GE transformation increases immediate value, so picking the
665 // smaller one; LT -> LE decreases immediate value so invert the choice.
666 bool adjustHeadCond = (HeadImmTrueValue < TrueImmTrueValue);
667 if (isLessThan(Cmp: HeadCmp)) {
668 adjustHeadCond = !adjustHeadCond;
669 }
670
671 if (adjustHeadCond) {
672 return adjustTo(CmpMI: HeadCmpMI, Cmp: HeadCmp, To: TrueCmpMI, ToImm: TrueImm);
673 } else {
674 return adjustTo(CmpMI: TrueCmpMI, Cmp: TrueCmp, To: HeadCmpMI, ToImm: HeadImm);
675 }
676 }
677 // Other transformation cases almost never occur due to generation of < or >
678 // comparisons instead of <= and >=.
679
680 return false;
681}
682
683bool AArch64ConditionOptimizerLegacy::runOnMachineFunction(
684 MachineFunction &MF) {
685 if (skipFunction(F: MF.getFunction()))
686 return false;
687 MachineDominatorTree &MDT =
688 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
689 return AArch64ConditionOptimizerImpl().run(MF, MDT);
690}
691
692bool AArch64ConditionOptimizerImpl::run(MachineFunction &MF,
693 MachineDominatorTree &MDT) {
694 LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
695 << "********** Function: " << MF.getName() << '\n');
696
697 TII = MF.getSubtarget().getInstrInfo();
698 TRI = MF.getSubtarget().getRegisterInfo();
699 DomTree = &MDT;
700 MRI = &MF.getRegInfo();
701
702 bool Changed = false;
703
704 // Visit blocks in dominator tree pre-order. The pre-order enables multiple
705 // cmp-conversions from the same head block.
706 // Note that updateDomTree() modifies the children of the DomTree node
707 // currently being visited. The df_iterator supports that; it doesn't look at
708 // child_begin() / child_end() until after a node has been visited.
709 for (MachineDomTreeNode *I : depth_first(G: DomTree)) {
710 MachineBasicBlock *HBB = I->getBlock();
711 Changed |= optimizeIntraBlock(MBB&: *HBB);
712 Changed |= optimizeCrossBlock(HBB&: *HBB);
713 }
714
715 return Changed;
716}
717
718PreservedAnalyses
719AArch64ConditionOptimizerPass::run(MachineFunction &MF,
720 MachineFunctionAnalysisManager &MFAM) {
721 auto &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
722 bool Changed = AArch64ConditionOptimizerImpl().run(MF, MDT);
723 if (!Changed)
724 return PreservedAnalyses::all();
725 PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
726 PA.preserveSet<CFGAnalyses>();
727 return PA;
728}
729