1//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64ConditionalCompares pass which reduces
10// branching and code size by using the conditional compare instructions CCMP,
11// CCMN, and FCMP.
12//
13// The CFG transformations for forming conditional compares are very similar to
14// if-conversion, and this pass should run immediately before the early
15// if-conversion pass.
16//
17//===----------------------------------------------------------------------===//
18
19#include "AArch64.h"
20#include "llvm/ADT/DepthFirstIterator.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
23#include "llvm/CodeGen/MachineDominators.h"
24#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineLoopInfo.h"
28#include "llvm/CodeGen/MachinePassManager.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/MachineTraceMetrics.h"
31#include "llvm/CodeGen/Passes.h"
32#include "llvm/CodeGen/TargetInstrInfo.h"
33#include "llvm/CodeGen/TargetRegisterInfo.h"
34#include "llvm/CodeGen/TargetSubtargetInfo.h"
35#include "llvm/InitializePasses.h"
36#include "llvm/Support/CommandLine.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/raw_ostream.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "aarch64-ccmp"
43
44// Absolute maximum number of instructions allowed per speculated block.
45// This bypasses all other heuristics, so it should be set fairly high.
46static cl::opt<unsigned> BlockInstrLimit(
47 "aarch64-ccmp-limit", cl::init(Val: 30), cl::Hidden,
48 cl::desc("Maximum number of instructions per speculated block."));
49
50// Stress testing mode - disable heuristics.
51static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
52 cl::desc("Turn all knobs to 11"));
53
54STATISTIC(NumConsidered, "Number of ccmps considered");
55STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
56STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
57STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
58STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
59STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
60STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
61STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
62STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
63STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
64STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
65
66STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
67
68STATISTIC(NumConverted, "Number of ccmp instructions created");
69STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
70
71//===----------------------------------------------------------------------===//
72// SSACCmpConv
73//===----------------------------------------------------------------------===//
74//
75// The SSACCmpConv class performs ccmp-conversion on SSA form machine code
76// after determining if it is possible. The class contains no heuristics;
77// external code should be used to determine when ccmp-conversion is a good
78// idea.
79//
80// CCmp-formation works on a CFG representing chained conditions, typically
81// from C's short-circuit || and && operators:
82//
83// From: Head To: Head
84// / | CmpBB
85// / | / |
86// | CmpBB / |
87// | / | Tail |
88// | / | | |
89// Tail | | |
90// | | | |
91// ... ... ... ...
92//
93// The Head block is terminated by a br.cond instruction, and the CmpBB block
94// contains compare + br.cond. Tail must be a successor of both.
95//
96// The cmp-conversion turns the compare instruction in CmpBB into a conditional
97// compare, and merges CmpBB into Head, speculatively executing its
98// instructions. The AArch64 conditional compare instructions have an immediate
99// operand that specifies the NZCV flag values when the condition is false and
100// the compare isn't executed. This makes it possible to chain compares with
101// different condition codes.
102//
103// Example:
104//
105// if (a == 5 || b == 17)
106// foo();
107//
108// Head:
109// cmp w0, #5
110// b.eq Tail
111// CmpBB:
112// cmp w1, #17
113// b.eq Tail
114// ...
115// Tail:
116// bl _foo
117//
118// Becomes:
119//
120// Head:
121// cmp w0, #5
122// ccmp w1, #17, 4, ne ; 4 = nZcv
123// b.eq Tail
124// ...
125// Tail:
126// bl _foo
127//
128// The ccmp condition code is the one that would cause the Head terminator to
129// branch to CmpBB.
130//
131// FIXME: It should also be possible to speculate a block on the critical edge
132// between Head and Tail, just like if-converting a diamond.
133//
134// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
135
136namespace {
137class SSACCmpConv {
138 MachineFunction *MF;
139 const TargetInstrInfo *TII;
140 const TargetRegisterInfo *TRI;
141 MachineRegisterInfo *MRI;
142 const MachineBranchProbabilityInfo *MBPI;
143
144public:
145 /// The first block containing a conditional branch, dominating everything
146 /// else.
147 MachineBasicBlock *Head;
148
149 /// The block containing cmp+br.cond with a successor shared with Head.
150 MachineBasicBlock *CmpBB;
151
152 /// The common successor for Head and CmpBB.
153 MachineBasicBlock *Tail;
154
155 /// The compare instruction in CmpBB that can be converted to a ccmp.
156 MachineInstr *CmpMI;
157
158private:
159 /// The branch condition in Head as determined by analyzeBranch.
160 SmallVector<MachineOperand, 4> HeadCond;
161
162 /// The condition code that makes Head branch to CmpBB.
163 AArch64CC::CondCode HeadCmpBBCC;
164
165 /// The branch condition in CmpBB.
166 SmallVector<MachineOperand, 4> CmpBBCond;
167
168 /// The condition code that makes CmpBB branch to Tail.
169 AArch64CC::CondCode CmpBBTailCC;
170
171 /// Check if the Tail PHIs are trivially convertible.
172 bool trivialTailPHIs();
173
174 /// Remove CmpBB from the Tail PHIs.
175 void updateTailPHIs();
176
177 /// Check if an operand defining DstReg is dead.
178 bool isDeadDef(unsigned DstReg);
179
180 /// Find the compare instruction in MBB that controls the conditional branch.
181 /// Return NULL if a convertible instruction can't be found.
182 MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
183
184 /// Return true if all non-terminator instructions in MBB can be safely
185 /// speculated.
186 bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
187
188public:
189 /// runOnMachineFunction - Initialize per-function data structures.
190 void runOnMachineFunction(MachineFunction &MF,
191 const MachineBranchProbabilityInfo *MBPI) {
192 this->MF = &MF;
193 this->MBPI = MBPI;
194 TII = MF.getSubtarget().getInstrInfo();
195 TRI = MF.getSubtarget().getRegisterInfo();
196 MRI = &MF.getRegInfo();
197 }
198
199 /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
200 /// internal state, and return true.
201 bool canConvert(MachineBasicBlock *MBB);
202
203 /// Cmo-convert the last block passed to canConvertCmp(), assuming
204 /// it is possible. Add any erased blocks to RemovedBlocks.
205 void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
206
207 /// Return the expected code size delta if the conversion into a
208 /// conditional compare is performed.
209 int expectedCodeSizeDelta() const;
210};
211} // end anonymous namespace
212
213static Register lookThroughCopies(Register Reg, MachineRegisterInfo *MRI) {
214 MachineInstr *MI;
215 while ((MI = MRI->getUniqueVRegDef(Reg)) &&
216 MI->getOpcode() == TargetOpcode::COPY) {
217 if (MI->getOperand(i: 1).getReg().isPhysical())
218 break;
219 Reg = MI->getOperand(i: 1).getReg();
220 }
221 return Reg;
222}
223
224// Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
225// This means that no if-conversion is required when merging CmpBB into Head.
226bool SSACCmpConv::trivialTailPHIs() {
227 for (auto &I : *Tail) {
228 if (!I.isPHI())
229 break;
230 unsigned HeadReg = 0, CmpBBReg = 0;
231 // PHI operands come in (VReg, MBB) pairs.
232 for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
233 MachineBasicBlock *MBB = I.getOperand(i: oi + 1).getMBB();
234 Register Reg = lookThroughCopies(Reg: I.getOperand(i: oi).getReg(), MRI);
235 if (MBB == Head) {
236 assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
237 HeadReg = Reg;
238 }
239 if (MBB == CmpBB) {
240 assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
241 CmpBBReg = Reg;
242 }
243 }
244 if (HeadReg != CmpBBReg)
245 return false;
246 }
247 return true;
248}
249
250// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
251// removing the CmpBB operands. The Head operands will be identical.
252void SSACCmpConv::updateTailPHIs() {
253 for (auto &I : *Tail) {
254 if (!I.isPHI())
255 break;
256 // I is a PHI. It can have multiple entries for CmpBB.
257 for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
258 // PHI operands are (Reg, MBB) at (oi-2, oi-1).
259 if (I.getOperand(i: oi - 1).getMBB() == CmpBB) {
260 I.removeOperand(OpNo: oi - 1);
261 I.removeOperand(OpNo: oi - 2);
262 }
263 }
264 }
265}
266
267// This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
268// are still writing virtual registers without any uses.
269bool SSACCmpConv::isDeadDef(unsigned DstReg) {
270 // Writes to the zero register are dead.
271 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
272 return true;
273 if (!Register::isVirtualRegister(Reg: DstReg))
274 return false;
275 // A virtual register def without any uses will be marked dead later, and
276 // eventually replaced by the zero register.
277 return MRI->use_nodbg_empty(RegNo: DstReg);
278}
279
280// Parse a condition code returned by analyzeBranch, and compute the CondCode
281// corresponding to TBB.
282// Return
283static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
284 // A normal br.cond simply has the condition code.
285 if (Cond[0].getImm() != -1) {
286 assert(Cond.size() == 1 && "Unknown Cond array format");
287 CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
288 return true;
289 }
290 // For tbz and cbz instruction, the opcode is next.
291 switch (Cond[1].getImm()) {
292 default:
293 // This includes tbz / tbnz branches which can't be converted to
294 // ccmp + br.cond.
295 return false;
296 case AArch64::CBZW:
297 case AArch64::CBZX:
298 assert(Cond.size() == 3 && "Unknown Cond array format");
299 CC = AArch64CC::EQ;
300 return true;
301 case AArch64::CBNZW:
302 case AArch64::CBNZX:
303 assert(Cond.size() == 3 && "Unknown Cond array format");
304 CC = AArch64CC::NE;
305 return true;
306 }
307}
308
309MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
310 MachineBasicBlock::iterator I = MBB->getFirstTerminator();
311 if (I == MBB->end())
312 return nullptr;
313 // The terminator must be controlled by the flags.
314 if (!I->readsRegister(Reg: AArch64::NZCV, /*TRI=*/nullptr)) {
315 switch (I->getOpcode()) {
316 case AArch64::CBZW:
317 case AArch64::CBZX:
318 case AArch64::CBNZW:
319 case AArch64::CBNZX:
320 // These can be converted into a ccmp against #0.
321 return &*I;
322 }
323 ++NumCmpTermRejs;
324 LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I);
325 return nullptr;
326 }
327
328 // Now find the instruction controlling the terminator.
329 for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
330 I = prev_nodbg(It: I, Begin: MBB->begin());
331 assert(!I->isTerminator() && "Spurious terminator");
332 switch (I->getOpcode()) {
333 // cmp is an alias for subs with a dead destination register.
334 case AArch64::SUBSWri:
335 case AArch64::SUBSXri:
336 // cmn is an alias for adds with a dead destination register.
337 case AArch64::ADDSWri:
338 case AArch64::ADDSXri:
339 // Check that the immediate operand is within range, ccmp wants a uimm5.
340 // Rd = SUBSri Rn, imm, shift
341 if (I->getOperand(i: 3).getImm() || !isUInt<5>(x: I->getOperand(i: 2).getImm())) {
342 LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
343 ++NumImmRangeRejs;
344 return nullptr;
345 }
346 [[fallthrough]];
347 case AArch64::SUBSWrr:
348 case AArch64::SUBSXrr:
349 case AArch64::ADDSWrr:
350 case AArch64::ADDSXrr:
351 if (isDeadDef(DstReg: I->getOperand(i: 0).getReg()))
352 return &*I;
353 LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
354 << *I);
355 ++NumLiveDstRejs;
356 return nullptr;
357 case AArch64::FCMPSrr:
358 case AArch64::FCMPDrr:
359 case AArch64::FCMPESrr:
360 case AArch64::FCMPEDrr:
361 return &*I;
362 }
363
364 // Check for flag reads and clobbers.
365 PhysRegInfo PRI = AnalyzePhysRegInBundle(MI: *I, Reg: AArch64::NZCV, TRI);
366
367 if (PRI.Read) {
368 // The ccmp doesn't produce exactly the same flags as the original
369 // compare, so reject the transform if there are uses of the flags
370 // besides the terminators.
371 LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
372 ++NumMultNZCVUses;
373 return nullptr;
374 }
375
376 if (PRI.Defined || PRI.Clobbered) {
377 LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I);
378 ++NumUnknNZCVDefs;
379 return nullptr;
380 }
381 }
382 LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
383 << '\n');
384 return nullptr;
385}
386
387/// Determine if all the instructions in MBB can safely
388/// be speculated. The terminators are not considered.
389///
390/// Only CmpMI is allowed to clobber the flags.
391///
392bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
393 const MachineInstr *CmpMI) {
394 // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
395 // get right.
396 if (!MBB->livein_empty()) {
397 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
398 return false;
399 }
400
401 unsigned InstrCount = 0;
402
403 // Check all instructions, except the terminators. It is assumed that
404 // terminators never have side effects or define any used register values.
405 for (auto &I : make_range(x: MBB->begin(), y: MBB->getFirstTerminator())) {
406 if (I.isDebugInstr())
407 continue;
408
409 if (++InstrCount > BlockInstrLimit && !Stress) {
410 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
411 << BlockInstrLimit << " instructions.\n");
412 return false;
413 }
414
415 // There shouldn't normally be any phis in a single-predecessor block.
416 if (I.isPHI()) {
417 LLVM_DEBUG(dbgs() << "Can't hoist: " << I);
418 return false;
419 }
420
421 // Don't speculate loads. Note that it may be possible and desirable to
422 // speculate GOT or constant pool loads that are guaranteed not to trap,
423 // but we don't support that for now.
424 if (I.mayLoad()) {
425 LLVM_DEBUG(dbgs() << "Won't speculate load: " << I);
426 return false;
427 }
428
429 // We never speculate stores, so an AA pointer isn't necessary.
430 bool DontMoveAcrossStore = true;
431 if (!I.isSafeToMove(SawStore&: DontMoveAcrossStore)) {
432 LLVM_DEBUG(dbgs() << "Can't speculate: " << I);
433 return false;
434 }
435
436 // Only CmpMI is allowed to clobber the flags.
437 if (&I != CmpMI && I.modifiesRegister(Reg: AArch64::NZCV, TRI)) {
438 LLVM_DEBUG(dbgs() << "Clobbers flags: " << I);
439 return false;
440 }
441 }
442 return true;
443}
444
445/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
446/// candidate for cmp-conversion. Fill out the internal state.
447///
448bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
449 Head = MBB;
450 Tail = CmpBB = nullptr;
451
452 if (Head->succ_size() != 2)
453 return false;
454 MachineBasicBlock *Succ0 = Head->succ_begin()[0];
455 MachineBasicBlock *Succ1 = Head->succ_begin()[1];
456
457 // CmpBB can only have a single predecessor. Tail is allowed many.
458 if (Succ0->pred_size() != 1)
459 std::swap(a&: Succ0, b&: Succ1);
460
461 // Succ0 is our candidate for CmpBB.
462 if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
463 return false;
464
465 CmpBB = Succ0;
466 Tail = Succ1;
467
468 if (!CmpBB->isSuccessor(MBB: Tail))
469 return false;
470
471 // The CFG topology checks out.
472 LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
473 << printMBBReference(*CmpBB) << " -> "
474 << printMBBReference(*Tail) << '\n');
475 ++NumConsidered;
476
477 // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
478 //
479 // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
480 // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
481 // always be safe to sink the ccmp down to immediately before the CmpBB
482 // terminators.
483 if (!trivialTailPHIs()) {
484 LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
485 ++NumPhiRejs;
486 return false;
487 }
488
489 if (!Tail->livein_empty()) {
490 LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
491 ++NumPhysRejs;
492 return false;
493 }
494
495 // CmpBB should never have PHIs since Head is its only predecessor.
496 // FIXME: Clean them up if it happens.
497 if (!CmpBB->empty() && CmpBB->front().isPHI()) {
498 LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
499 ++NumPhi2Rejs;
500 return false;
501 }
502
503 if (!CmpBB->livein_empty()) {
504 LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
505 ++NumPhysRejs;
506 return false;
507 }
508
509 // The branch we're looking to eliminate must be analyzable.
510 HeadCond.clear();
511 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
512 if (TII->analyzeBranch(MBB&: *Head, TBB, FBB, Cond&: HeadCond)) {
513 LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
514 ++NumHeadBranchRejs;
515 return false;
516 }
517
518 // This is weird, probably some sort of degenerate CFG, or an edge to a
519 // landing pad.
520 if (!TBB || HeadCond.empty()) {
521 LLVM_DEBUG(
522 dbgs() << "analyzeBranch didn't find conditional branch in Head.\n");
523 ++NumHeadBranchRejs;
524 return false;
525 }
526
527 if (!parseCond(Cond: HeadCond, CC&: HeadCmpBBCC)) {
528 LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
529 ++NumHeadBranchRejs;
530 return false;
531 }
532
533 // Make sure the branch direction is right.
534 if (TBB != CmpBB) {
535 assert(TBB == Tail && "Unexpected TBB");
536 HeadCmpBBCC = AArch64CC::getInvertedCondCode(Code: HeadCmpBBCC);
537 }
538
539 CmpBBCond.clear();
540 TBB = FBB = nullptr;
541 if (TII->analyzeBranch(MBB&: *CmpBB, TBB, FBB, Cond&: CmpBBCond)) {
542 LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
543 ++NumCmpBranchRejs;
544 return false;
545 }
546
547 if (!TBB || CmpBBCond.empty()) {
548 LLVM_DEBUG(
549 dbgs() << "analyzeBranch didn't find conditional branch in CmpBB.\n");
550 ++NumCmpBranchRejs;
551 return false;
552 }
553
554 if (!parseCond(Cond: CmpBBCond, CC&: CmpBBTailCC)) {
555 LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
556 ++NumCmpBranchRejs;
557 return false;
558 }
559
560 if (TBB != Tail)
561 CmpBBTailCC = AArch64CC::getInvertedCondCode(Code: CmpBBTailCC);
562
563 LLVM_DEBUG(dbgs() << "Head->CmpBB on "
564 << AArch64CC::getCondCodeName(HeadCmpBBCC)
565 << ", CmpBB->Tail on "
566 << AArch64CC::getCondCodeName(CmpBBTailCC) << '\n');
567
568 CmpMI = findConvertibleCompare(MBB: CmpBB);
569 if (!CmpMI)
570 return false;
571
572 if (!canSpeculateInstrs(MBB: CmpBB, CmpMI)) {
573 ++NumSpeculateRejs;
574 return false;
575 }
576 return true;
577}
578
579void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
580 LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
581 << printMBBReference(*Head) << ":\n"
582 << *CmpBB);
583
584 // All CmpBB instructions are moved into Head, and CmpBB is deleted.
585 // Update the CFG first.
586 updateTailPHIs();
587
588 // Save successor probabilities before removing CmpBB and Tail from their
589 // parents.
590 BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Src: Head, Dst: CmpBB);
591 BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(Src: CmpBB, Dst: Tail);
592
593 Head->removeSuccessor(Succ: CmpBB);
594 CmpBB->removeSuccessor(Succ: Tail);
595
596 // If Head and CmpBB had successor probabilities, update the probabilities to
597 // reflect the ccmp-conversion.
598 if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
599
600 // Head is allowed two successors. We've removed CmpBB, so the remaining
601 // successor is Tail. We need to increase the successor probability for
602 // Tail to account for the CmpBB path we removed.
603 //
604 // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
605 assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
606 BranchProbability Head2Tail = MBPI->getEdgeProbability(Src: Head, Dst: Tail);
607 Head->setSuccProbability(I: Head->succ_begin(),
608 Prob: Head2Tail + Head2CmpBB * CmpBB2Tail);
609
610 // We will transfer successors of CmpBB to Head in a moment without
611 // normalizing the successor probabilities. Set the successor probabilities
612 // before doing so.
613 //
614 // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
615 for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
616 BranchProbability CmpBB2I = MBPI->getEdgeProbability(Src: CmpBB, Dst: *I);
617 CmpBB->setSuccProbability(I, Prob: Head2CmpBB * CmpBB2I);
618 }
619 }
620
621 Head->transferSuccessorsAndUpdatePHIs(FromMBB: CmpBB);
622 DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
623 TII->removeBranch(MBB&: *Head);
624
625 // If the Head terminator was one of the cbz / tbz branches with built-in
626 // compare, we need to insert an explicit compare instruction in its place.
627 if (HeadCond[0].getImm() == -1) {
628 ++NumCompBranches;
629 unsigned Opc = 0;
630 switch (HeadCond[1].getImm()) {
631 case AArch64::CBZW:
632 case AArch64::CBNZW:
633 Opc = AArch64::SUBSWri;
634 break;
635 case AArch64::CBZX:
636 case AArch64::CBNZX:
637 Opc = AArch64::SUBSXri;
638 break;
639 default:
640 llvm_unreachable("Cannot convert Head branch");
641 }
642 const MCInstrDesc &MCID = TII->get(Opcode: Opc);
643 // Create a dummy virtual register for the SUBS def.
644 Register DestReg = MRI->createVirtualRegister(RegClass: TII->getRegClass(MCID, OpNum: 0));
645 // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
646 BuildMI(BB&: *Head, I: Head->end(), MIMD: TermDL, MCID)
647 .addReg(RegNo: DestReg, Flags: RegState::Define | RegState::Dead)
648 .add(MO: HeadCond[2])
649 .addImm(Val: 0)
650 .addImm(Val: 0);
651 // SUBS uses the GPR*sp register classes.
652 MRI->constrainRegClass(Reg: HeadCond[2].getReg(), RC: TII->getRegClass(MCID, OpNum: 1));
653 }
654
655 Head->splice(Where: Head->end(), Other: CmpBB, From: CmpBB->begin(), To: CmpBB->end());
656
657 // Now replace CmpMI with a ccmp instruction that also considers the incoming
658 // flags.
659 unsigned Opc = 0;
660 unsigned FirstOp = 1; // First CmpMI operand to copy.
661 bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
662 switch (CmpMI->getOpcode()) {
663 default:
664 llvm_unreachable("Unknown compare opcode");
665 case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
666 case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
667 case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
668 case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
669 case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
670 case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
671 case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
672 case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
673 case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
674 case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
675 case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
676 case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
677 case AArch64::CBZW:
678 case AArch64::CBNZW:
679 Opc = AArch64::CCMPWi;
680 FirstOp = 0;
681 isZBranch = true;
682 break;
683 case AArch64::CBZX:
684 case AArch64::CBNZX:
685 Opc = AArch64::CCMPXi;
686 FirstOp = 0;
687 isZBranch = true;
688 break;
689 }
690
691 // The ccmp instruction should set the flags according to the comparison when
692 // Head would have branched to CmpBB.
693 // The NZCV immediate operand should provide flags for the case where Head
694 // would have branched to Tail. These flags should cause the new Head
695 // terminator to branch to tail.
696 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(Code: CmpBBTailCC);
697 const MCInstrDesc &MCID = TII->get(Opcode: Opc);
698 MRI->constrainRegClass(Reg: CmpMI->getOperand(i: FirstOp).getReg(),
699 RC: TII->getRegClass(MCID, OpNum: 0));
700 if (CmpMI->getOperand(i: FirstOp + 1).isReg())
701 MRI->constrainRegClass(Reg: CmpMI->getOperand(i: FirstOp + 1).getReg(),
702 RC: TII->getRegClass(MCID, OpNum: 1));
703 MachineInstrBuilder MIB = BuildMI(BB&: *Head, I: CmpMI, MIMD: CmpMI->getDebugLoc(), MCID)
704 .add(MO: CmpMI->getOperand(i: FirstOp)); // Register Rn
705 if (isZBranch)
706 MIB.addImm(Val: 0); // cbz/cbnz Rn -> ccmp Rn, #0
707 else
708 MIB.add(MO: CmpMI->getOperand(i: FirstOp + 1)); // Register Rm / Immediate
709 MIB.addImm(Val: NZCV).addImm(Val: HeadCmpBBCC);
710
711 // If CmpMI was a terminator, we need a new conditional branch to replace it.
712 // This now becomes a Head terminator.
713 if (isZBranch) {
714 bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
715 CmpMI->getOpcode() == AArch64::CBNZX;
716 BuildMI(BB&: *Head, I: CmpMI, MIMD: CmpMI->getDebugLoc(), MCID: TII->get(Opcode: AArch64::Bcc))
717 .addImm(Val: isNZ ? AArch64CC::NE : AArch64CC::EQ)
718 .add(MO: CmpMI->getOperand(i: 1)); // Branch target.
719 }
720 CmpMI->eraseFromParent();
721 Head->updateTerminator(PreviousLayoutSuccessor: CmpBB->getNextNode());
722
723 RemovedBlocks.push_back(Elt: CmpBB);
724 LLVM_DEBUG(dbgs() << "Result:\n" << *Head);
725 ++NumConverted;
726}
727
728int SSACCmpConv::expectedCodeSizeDelta() const {
729 int delta = 0;
730 // If the Head terminator was one of the cbz / tbz branches with built-in
731 // compare, we need to insert an explicit compare instruction in its place
732 // plus a branch instruction.
733 if (HeadCond[0].getImm() == -1) {
734 switch (HeadCond[1].getImm()) {
735 case AArch64::CBZW:
736 case AArch64::CBNZW:
737 case AArch64::CBZX:
738 case AArch64::CBNZX:
739 // Therefore delta += 1
740 delta = 1;
741 break;
742 default:
743 llvm_unreachable("Cannot convert Head branch");
744 }
745 }
746 // If the Cmp terminator was one of the cbz / tbz branches with
747 // built-in compare, it will be turned into a compare instruction
748 // into Head, but we do not save any instruction.
749 // Otherwise, we save the branch instruction.
750 switch (CmpMI->getOpcode()) {
751 default:
752 --delta;
753 break;
754 case AArch64::CBZW:
755 case AArch64::CBNZW:
756 case AArch64::CBZX:
757 case AArch64::CBNZX:
758 break;
759 }
760 return delta;
761}
762
763//===----------------------------------------------------------------------===//
764// AArch64ConditionalCompares Pass
765//===----------------------------------------------------------------------===//
766
767namespace {
768class AArch64ConditionalComparesImpl {
769 const MachineBranchProbabilityInfo *MBPI;
770 const TargetInstrInfo *TII;
771 const TargetRegisterInfo *TRI;
772 MCSchedModel SchedModel;
773 // Does the proceeded function has Oz attribute.
774 bool MinSize;
775 MachineRegisterInfo *MRI;
776 MachineDominatorTree *DomTree;
777 MachineLoopInfo *Loops;
778 MachineTraceMetrics *Traces;
779 MachineTraceMetrics::Ensemble *MinInstr;
780 SSACCmpConv CmpConv;
781
782public:
783 AArch64ConditionalComparesImpl(const MachineBranchProbabilityInfo *MBPI,
784 MachineDominatorTree *DomTree,
785 MachineLoopInfo *Loops,
786 MachineTraceMetrics *Traces)
787 : MBPI(MBPI), DomTree(DomTree), Loops(Loops), Traces(Traces) {}
788
789 bool run(MachineFunction &MF);
790
791private:
792 bool tryConvert(MachineBasicBlock *);
793 void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
794 void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
795 void invalidateTraces();
796 bool shouldConvert();
797};
798
799class AArch64ConditionalComparesLegacy : public MachineFunctionPass {
800public:
801 static char ID;
802 AArch64ConditionalComparesLegacy() : MachineFunctionPass(ID) {
803 initializeAArch64ConditionalComparesLegacyPass(
804 *PassRegistry::getPassRegistry());
805 }
806 void getAnalysisUsage(AnalysisUsage &AU) const override;
807 bool runOnMachineFunction(MachineFunction &MF) override;
808 StringRef getPassName() const override {
809 return "AArch64 Conditional Compares";
810 }
811};
812} // end anonymous namespace
813
814char AArch64ConditionalComparesLegacy::ID = 0;
815
816INITIALIZE_PASS_BEGIN(AArch64ConditionalComparesLegacy, "aarch64-ccmp",
817 "AArch64 CCMP Pass", false, false)
818INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
819INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
820INITIALIZE_PASS_DEPENDENCY(MachineTraceMetricsWrapperPass)
821INITIALIZE_PASS_END(AArch64ConditionalComparesLegacy, "aarch64-ccmp",
822 "AArch64 CCMP Pass", false, false)
823
824FunctionPass *llvm::createAArch64ConditionalCompares() {
825 return new AArch64ConditionalComparesLegacy();
826}
827
828void AArch64ConditionalComparesLegacy::getAnalysisUsage(
829 AnalysisUsage &AU) const {
830 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
831 AU.addRequired<MachineDominatorTreeWrapperPass>();
832 AU.addPreserved<MachineDominatorTreeWrapperPass>();
833 AU.addRequired<MachineLoopInfoWrapperPass>();
834 AU.addPreserved<MachineLoopInfoWrapperPass>();
835 AU.addRequired<MachineTraceMetricsWrapperPass>();
836 AU.addPreserved<MachineTraceMetricsWrapperPass>();
837 MachineFunctionPass::getAnalysisUsage(AU);
838}
839
840/// Update the dominator tree after if-conversion erased some blocks.
841void AArch64ConditionalComparesImpl::updateDomTree(
842 ArrayRef<MachineBasicBlock *> Removed) {
843 // convert() removes CmpBB which was previously dominated by Head.
844 // CmpBB children should be transferred to Head.
845 MachineDomTreeNode *HeadNode = DomTree->getNode(BB: CmpConv.Head);
846 for (MachineBasicBlock *RemovedMBB : Removed) {
847 MachineDomTreeNode *Node = DomTree->getNode(BB: RemovedMBB);
848 assert(Node != HeadNode && "Cannot erase the head node");
849 assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
850 while (!Node->isLeaf())
851 DomTree->changeImmediateDominator(N: *Node->begin(), NewIDom: HeadNode);
852 DomTree->eraseNode(BB: RemovedMBB);
853 }
854}
855
856/// Update LoopInfo after if-conversion.
857void AArch64ConditionalComparesImpl::updateLoops(
858 ArrayRef<MachineBasicBlock *> Removed) {
859 if (!Loops)
860 return;
861 for (MachineBasicBlock *RemovedMBB : Removed)
862 Loops->removeBlock(BB: RemovedMBB);
863}
864
865/// Invalidate MachineTraceMetrics before if-conversion.
866void AArch64ConditionalComparesImpl::invalidateTraces() {
867 Traces->invalidate(MBB: CmpConv.Head);
868 Traces->invalidate(MBB: CmpConv.CmpBB);
869}
870
871/// Apply cost model and heuristics to the if-conversion in IfConv.
872/// Return true if the conversion is a good idea.
873///
874bool AArch64ConditionalComparesImpl::shouldConvert() {
875 // Stress testing mode disables all cost considerations.
876 if (Stress)
877 return true;
878 if (!MinInstr)
879 MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
880
881 // Head dominates CmpBB, so it is always included in its trace.
882 MachineTraceMetrics::Trace Trace = MinInstr->getTrace(MBB: CmpConv.CmpBB);
883
884 // If code size is the main concern
885 if (MinSize) {
886 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
887 LLVM_DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
888 // If we are minimizing the code size, do the conversion whatever
889 // the cost is.
890 if (CodeSizeDelta < 0)
891 return true;
892 if (CodeSizeDelta > 0) {
893 LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
894 return false;
895 }
896 // CodeSizeDelta == 0, continue with the regular heuristics
897 }
898
899 // Heuristic: The compare conversion delays the execution of the branch
900 // instruction because we must wait for the inputs to the second compare as
901 // well. The branch has no dependent instructions, but delaying it increases
902 // the cost of a misprediction.
903 //
904 // Set a limit on the delay we will accept.
905 unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
906
907 // Instruction depths can be computed for all trace instructions above CmpBB.
908 unsigned HeadDepth =
909 Trace.getInstrCycles(MI: *CmpConv.Head->getFirstTerminator()).Depth;
910 unsigned CmpBBDepth =
911 Trace.getInstrCycles(MI: *CmpConv.CmpBB->getFirstTerminator()).Depth;
912 LLVM_DEBUG(dbgs() << "Head depth: " << HeadDepth
913 << "\nCmpBB depth: " << CmpBBDepth << '\n');
914 if (CmpBBDepth > HeadDepth + DelayLimit) {
915 LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
916 << " cycles.\n");
917 return false;
918 }
919
920 // Check the resource depth at the bottom of CmpBB - these instructions will
921 // be speculated.
922 unsigned ResDepth = Trace.getResourceDepth(Bottom: true);
923 LLVM_DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
924
925 // Heuristic: The speculatively executed instructions must all be able to
926 // merge into the Head block. The Head critical path should dominate the
927 // resource cost of the speculated instructions.
928 if (ResDepth > HeadDepth) {
929 LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
930 return false;
931 }
932 return true;
933}
934
935bool AArch64ConditionalComparesImpl::tryConvert(MachineBasicBlock *MBB) {
936 bool Changed = false;
937 while (CmpConv.canConvert(MBB) && shouldConvert()) {
938 invalidateTraces();
939 SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
940 CmpConv.convert(RemovedBlocks);
941 Changed = true;
942 updateDomTree(Removed: RemovedBlocks);
943 updateLoops(Removed: RemovedBlocks);
944 for (MachineBasicBlock *MBB : RemovedBlocks)
945 MBB->eraseFromParent();
946 }
947 return Changed;
948}
949
950bool AArch64ConditionalComparesImpl::run(MachineFunction &MF) {
951 LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
952 << "********** Function: " << MF.getName() << '\n');
953
954 TII = MF.getSubtarget().getInstrInfo();
955 TRI = MF.getSubtarget().getRegisterInfo();
956 SchedModel = MF.getSubtarget().getSchedModel();
957 MRI = &MF.getRegInfo();
958 MinInstr = nullptr;
959 MinSize = MF.getFunction().hasMinSize();
960
961 bool Changed = false;
962 CmpConv.runOnMachineFunction(MF, MBPI);
963
964 // Visit blocks in dominator tree pre-order. The pre-order enables multiple
965 // cmp-conversions from the same head block.
966 // Note that updateDomTree() modifies the children of the DomTree node
967 // currently being visited. The df_iterator supports that; it doesn't look at
968 // child_begin() / child_end() until after a node has been visited.
969 for (auto *I : depth_first(G: DomTree))
970 if (tryConvert(MBB: I->getBlock()))
971 Changed = true;
972
973 return Changed;
974}
975
976bool AArch64ConditionalComparesLegacy::runOnMachineFunction(
977 MachineFunction &MF) {
978 if (skipFunction(F: MF.getFunction()))
979 return false;
980
981 const MachineBranchProbabilityInfo *MBPI =
982 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
983 MachineDominatorTree *DomTree =
984 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
985 MachineLoopInfo *Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
986 MachineTraceMetrics *Traces =
987 &getAnalysis<MachineTraceMetricsWrapperPass>().getMTM();
988
989 AArch64ConditionalComparesImpl Impl(MBPI, DomTree, Loops, Traces);
990 return Impl.run(MF);
991}
992
993PreservedAnalyses
994AArch64ConditionalComparesPass::run(MachineFunction &MF,
995 MachineFunctionAnalysisManager &MFAM) {
996 const MachineBranchProbabilityInfo *MBPI =
997 &MFAM.getResult<MachineBranchProbabilityAnalysis>(IR&: MF);
998 MachineDominatorTree *DomTree =
999 &MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
1000 MachineLoopInfo *Loops = &MFAM.getResult<MachineLoopAnalysis>(IR&: MF);
1001 MachineTraceMetrics *Traces =
1002 &MFAM.getResult<MachineTraceMetricsAnalysis>(IR&: MF);
1003
1004 AArch64ConditionalComparesImpl Impl(MBPI, DomTree, Loops, Traces);
1005 bool Changed = Impl.run(MF);
1006 if (!Changed)
1007 return PreservedAnalyses::all();
1008
1009 PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
1010 PA.preserve<MachineDominatorTreeAnalysis>();
1011 PA.preserve<MachineLoopAnalysis>();
1012 PA.preserve<MachineTraceMetricsAnalysis>();
1013 return PA;
1014}
1015