1//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64ConditionalCompares pass which reduces
10// branching and code size by using the conditional compare instructions CCMP,
11// CCMN, and FCMP.
12//
13// The CFG transformations for forming conditional compares are very similar to
14// if-conversion, and this pass should run immediately before the early
15// if-conversion pass.
16//
17//===----------------------------------------------------------------------===//
18
19#include "AArch64.h"
20#include "llvm/ADT/DepthFirstIterator.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
23#include "llvm/CodeGen/MachineDominators.h"
24#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineLoopInfo.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/CodeGen/MachineTraceMetrics.h"
30#include "llvm/CodeGen/Passes.h"
31#include "llvm/CodeGen/TargetInstrInfo.h"
32#include "llvm/CodeGen/TargetRegisterInfo.h"
33#include "llvm/CodeGen/TargetSubtargetInfo.h"
34#include "llvm/InitializePasses.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/raw_ostream.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "aarch64-ccmp"
42
43// Absolute maximum number of instructions allowed per speculated block.
44// This bypasses all other heuristics, so it should be set fairly high.
45static cl::opt<unsigned> BlockInstrLimit(
46 "aarch64-ccmp-limit", cl::init(Val: 30), cl::Hidden,
47 cl::desc("Maximum number of instructions per speculated block."));
48
49// Stress testing mode - disable heuristics.
50static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
51 cl::desc("Turn all knobs to 11"));
52
53STATISTIC(NumConsidered, "Number of ccmps considered");
54STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
55STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
56STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
57STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
58STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
59STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
60STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
61STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
62STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
63STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
64
65STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
66
67STATISTIC(NumConverted, "Number of ccmp instructions created");
68STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
69
70//===----------------------------------------------------------------------===//
71// SSACCmpConv
72//===----------------------------------------------------------------------===//
73//
74// The SSACCmpConv class performs ccmp-conversion on SSA form machine code
75// after determining if it is possible. The class contains no heuristics;
76// external code should be used to determine when ccmp-conversion is a good
77// idea.
78//
79// CCmp-formation works on a CFG representing chained conditions, typically
80// from C's short-circuit || and && operators:
81//
82// From: Head To: Head
83// / | CmpBB
84// / | / |
85// | CmpBB / |
86// | / | Tail |
87// | / | | |
88// Tail | | |
89// | | | |
90// ... ... ... ...
91//
92// The Head block is terminated by a br.cond instruction, and the CmpBB block
93// contains compare + br.cond. Tail must be a successor of both.
94//
95// The cmp-conversion turns the compare instruction in CmpBB into a conditional
96// compare, and merges CmpBB into Head, speculatively executing its
97// instructions. The AArch64 conditional compare instructions have an immediate
98// operand that specifies the NZCV flag values when the condition is false and
99// the compare isn't executed. This makes it possible to chain compares with
100// different condition codes.
101//
102// Example:
103//
104// if (a == 5 || b == 17)
105// foo();
106//
107// Head:
108// cmp w0, #5
109// b.eq Tail
110// CmpBB:
111// cmp w1, #17
112// b.eq Tail
113// ...
114// Tail:
115// bl _foo
116//
117// Becomes:
118//
119// Head:
120// cmp w0, #5
121// ccmp w1, #17, 4, ne ; 4 = nZcv
122// b.eq Tail
123// ...
124// Tail:
125// bl _foo
126//
127// The ccmp condition code is the one that would cause the Head terminator to
128// branch to CmpBB.
129//
130// FIXME: It should also be possible to speculate a block on the critical edge
131// between Head and Tail, just like if-converting a diamond.
132//
133// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
134
135namespace {
136class SSACCmpConv {
137 MachineFunction *MF;
138 const TargetInstrInfo *TII;
139 const TargetRegisterInfo *TRI;
140 MachineRegisterInfo *MRI;
141 const MachineBranchProbabilityInfo *MBPI;
142
143public:
144 /// The first block containing a conditional branch, dominating everything
145 /// else.
146 MachineBasicBlock *Head;
147
148 /// The block containing cmp+br.cond with a successor shared with Head.
149 MachineBasicBlock *CmpBB;
150
151 /// The common successor for Head and CmpBB.
152 MachineBasicBlock *Tail;
153
154 /// The compare instruction in CmpBB that can be converted to a ccmp.
155 MachineInstr *CmpMI;
156
157private:
158 /// The branch condition in Head as determined by analyzeBranch.
159 SmallVector<MachineOperand, 4> HeadCond;
160
161 /// The condition code that makes Head branch to CmpBB.
162 AArch64CC::CondCode HeadCmpBBCC;
163
164 /// The branch condition in CmpBB.
165 SmallVector<MachineOperand, 4> CmpBBCond;
166
167 /// The condition code that makes CmpBB branch to Tail.
168 AArch64CC::CondCode CmpBBTailCC;
169
170 /// Check if the Tail PHIs are trivially convertible.
171 bool trivialTailPHIs();
172
173 /// Remove CmpBB from the Tail PHIs.
174 void updateTailPHIs();
175
176 /// Check if an operand defining DstReg is dead.
177 bool isDeadDef(unsigned DstReg);
178
179 /// Find the compare instruction in MBB that controls the conditional branch.
180 /// Return NULL if a convertible instruction can't be found.
181 MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
182
183 /// Return true if all non-terminator instructions in MBB can be safely
184 /// speculated.
185 bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
186
187public:
188 /// runOnMachineFunction - Initialize per-function data structures.
189 void runOnMachineFunction(MachineFunction &MF,
190 const MachineBranchProbabilityInfo *MBPI) {
191 this->MF = &MF;
192 this->MBPI = MBPI;
193 TII = MF.getSubtarget().getInstrInfo();
194 TRI = MF.getSubtarget().getRegisterInfo();
195 MRI = &MF.getRegInfo();
196 }
197
198 /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
199 /// internal state, and return true.
200 bool canConvert(MachineBasicBlock *MBB);
201
202 /// Cmo-convert the last block passed to canConvertCmp(), assuming
203 /// it is possible. Add any erased blocks to RemovedBlocks.
204 void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
205
206 /// Return the expected code size delta if the conversion into a
207 /// conditional compare is performed.
208 int expectedCodeSizeDelta() const;
209};
210} // end anonymous namespace
211
212static Register lookThroughCopies(Register Reg, MachineRegisterInfo *MRI) {
213 MachineInstr *MI;
214 while ((MI = MRI->getUniqueVRegDef(Reg)) &&
215 MI->getOpcode() == TargetOpcode::COPY) {
216 if (MI->getOperand(i: 1).getReg().isPhysical())
217 break;
218 Reg = MI->getOperand(i: 1).getReg();
219 }
220 return Reg;
221}
222
223// Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
224// This means that no if-conversion is required when merging CmpBB into Head.
225bool SSACCmpConv::trivialTailPHIs() {
226 for (auto &I : *Tail) {
227 if (!I.isPHI())
228 break;
229 unsigned HeadReg = 0, CmpBBReg = 0;
230 // PHI operands come in (VReg, MBB) pairs.
231 for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
232 MachineBasicBlock *MBB = I.getOperand(i: oi + 1).getMBB();
233 Register Reg = lookThroughCopies(Reg: I.getOperand(i: oi).getReg(), MRI);
234 if (MBB == Head) {
235 assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
236 HeadReg = Reg;
237 }
238 if (MBB == CmpBB) {
239 assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
240 CmpBBReg = Reg;
241 }
242 }
243 if (HeadReg != CmpBBReg)
244 return false;
245 }
246 return true;
247}
248
249// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
250// removing the CmpBB operands. The Head operands will be identical.
251void SSACCmpConv::updateTailPHIs() {
252 for (auto &I : *Tail) {
253 if (!I.isPHI())
254 break;
255 // I is a PHI. It can have multiple entries for CmpBB.
256 for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
257 // PHI operands are (Reg, MBB) at (oi-2, oi-1).
258 if (I.getOperand(i: oi - 1).getMBB() == CmpBB) {
259 I.removeOperand(OpNo: oi - 1);
260 I.removeOperand(OpNo: oi - 2);
261 }
262 }
263 }
264}
265
266// This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
267// are still writing virtual registers without any uses.
268bool SSACCmpConv::isDeadDef(unsigned DstReg) {
269 // Writes to the zero register are dead.
270 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
271 return true;
272 if (!Register::isVirtualRegister(Reg: DstReg))
273 return false;
274 // A virtual register def without any uses will be marked dead later, and
275 // eventually replaced by the zero register.
276 return MRI->use_nodbg_empty(RegNo: DstReg);
277}
278
279// Parse a condition code returned by analyzeBranch, and compute the CondCode
280// corresponding to TBB.
281// Return
282static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
283 // A normal br.cond simply has the condition code.
284 if (Cond[0].getImm() != -1) {
285 assert(Cond.size() == 1 && "Unknown Cond array format");
286 CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
287 return true;
288 }
289 // For tbz and cbz instruction, the opcode is next.
290 switch (Cond[1].getImm()) {
291 default:
292 // This includes tbz / tbnz branches which can't be converted to
293 // ccmp + br.cond.
294 return false;
295 case AArch64::CBZW:
296 case AArch64::CBZX:
297 assert(Cond.size() == 3 && "Unknown Cond array format");
298 CC = AArch64CC::EQ;
299 return true;
300 case AArch64::CBNZW:
301 case AArch64::CBNZX:
302 assert(Cond.size() == 3 && "Unknown Cond array format");
303 CC = AArch64CC::NE;
304 return true;
305 }
306}
307
308MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
309 MachineBasicBlock::iterator I = MBB->getFirstTerminator();
310 if (I == MBB->end())
311 return nullptr;
312 // The terminator must be controlled by the flags.
313 if (!I->readsRegister(Reg: AArch64::NZCV, /*TRI=*/nullptr)) {
314 switch (I->getOpcode()) {
315 case AArch64::CBZW:
316 case AArch64::CBZX:
317 case AArch64::CBNZW:
318 case AArch64::CBNZX:
319 // These can be converted into a ccmp against #0.
320 return &*I;
321 }
322 ++NumCmpTermRejs;
323 LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I);
324 return nullptr;
325 }
326
327 // Now find the instruction controlling the terminator.
328 for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
329 I = prev_nodbg(It: I, Begin: MBB->begin());
330 assert(!I->isTerminator() && "Spurious terminator");
331 switch (I->getOpcode()) {
332 // cmp is an alias for subs with a dead destination register.
333 case AArch64::SUBSWri:
334 case AArch64::SUBSXri:
335 // cmn is an alias for adds with a dead destination register.
336 case AArch64::ADDSWri:
337 case AArch64::ADDSXri:
338 // Check that the immediate operand is within range, ccmp wants a uimm5.
339 // Rd = SUBSri Rn, imm, shift
340 if (I->getOperand(i: 3).getImm() || !isUInt<5>(x: I->getOperand(i: 2).getImm())) {
341 LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
342 ++NumImmRangeRejs;
343 return nullptr;
344 }
345 [[fallthrough]];
346 case AArch64::SUBSWrr:
347 case AArch64::SUBSXrr:
348 case AArch64::ADDSWrr:
349 case AArch64::ADDSXrr:
350 if (isDeadDef(DstReg: I->getOperand(i: 0).getReg()))
351 return &*I;
352 LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
353 << *I);
354 ++NumLiveDstRejs;
355 return nullptr;
356 case AArch64::FCMPSrr:
357 case AArch64::FCMPDrr:
358 case AArch64::FCMPESrr:
359 case AArch64::FCMPEDrr:
360 return &*I;
361 }
362
363 // Check for flag reads and clobbers.
364 PhysRegInfo PRI = AnalyzePhysRegInBundle(MI: *I, Reg: AArch64::NZCV, TRI);
365
366 if (PRI.Read) {
367 // The ccmp doesn't produce exactly the same flags as the original
368 // compare, so reject the transform if there are uses of the flags
369 // besides the terminators.
370 LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
371 ++NumMultNZCVUses;
372 return nullptr;
373 }
374
375 if (PRI.Defined || PRI.Clobbered) {
376 LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I);
377 ++NumUnknNZCVDefs;
378 return nullptr;
379 }
380 }
381 LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
382 << '\n');
383 return nullptr;
384}
385
386/// Determine if all the instructions in MBB can safely
387/// be speculated. The terminators are not considered.
388///
389/// Only CmpMI is allowed to clobber the flags.
390///
391bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
392 const MachineInstr *CmpMI) {
393 // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
394 // get right.
395 if (!MBB->livein_empty()) {
396 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
397 return false;
398 }
399
400 unsigned InstrCount = 0;
401
402 // Check all instructions, except the terminators. It is assumed that
403 // terminators never have side effects or define any used register values.
404 for (auto &I : make_range(x: MBB->begin(), y: MBB->getFirstTerminator())) {
405 if (I.isDebugInstr())
406 continue;
407
408 if (++InstrCount > BlockInstrLimit && !Stress) {
409 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
410 << BlockInstrLimit << " instructions.\n");
411 return false;
412 }
413
414 // There shouldn't normally be any phis in a single-predecessor block.
415 if (I.isPHI()) {
416 LLVM_DEBUG(dbgs() << "Can't hoist: " << I);
417 return false;
418 }
419
420 // Don't speculate loads. Note that it may be possible and desirable to
421 // speculate GOT or constant pool loads that are guaranteed not to trap,
422 // but we don't support that for now.
423 if (I.mayLoad()) {
424 LLVM_DEBUG(dbgs() << "Won't speculate load: " << I);
425 return false;
426 }
427
428 // We never speculate stores, so an AA pointer isn't necessary.
429 bool DontMoveAcrossStore = true;
430 if (!I.isSafeToMove(SawStore&: DontMoveAcrossStore)) {
431 LLVM_DEBUG(dbgs() << "Can't speculate: " << I);
432 return false;
433 }
434
435 // Only CmpMI is allowed to clobber the flags.
436 if (&I != CmpMI && I.modifiesRegister(Reg: AArch64::NZCV, TRI)) {
437 LLVM_DEBUG(dbgs() << "Clobbers flags: " << I);
438 return false;
439 }
440 }
441 return true;
442}
443
444/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
445/// candidate for cmp-conversion. Fill out the internal state.
446///
447bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
448 Head = MBB;
449 Tail = CmpBB = nullptr;
450
451 if (Head->succ_size() != 2)
452 return false;
453 MachineBasicBlock *Succ0 = Head->succ_begin()[0];
454 MachineBasicBlock *Succ1 = Head->succ_begin()[1];
455
456 // CmpBB can only have a single predecessor. Tail is allowed many.
457 if (Succ0->pred_size() != 1)
458 std::swap(a&: Succ0, b&: Succ1);
459
460 // Succ0 is our candidate for CmpBB.
461 if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
462 return false;
463
464 CmpBB = Succ0;
465 Tail = Succ1;
466
467 if (!CmpBB->isSuccessor(MBB: Tail))
468 return false;
469
470 // The CFG topology checks out.
471 LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
472 << printMBBReference(*CmpBB) << " -> "
473 << printMBBReference(*Tail) << '\n');
474 ++NumConsidered;
475
476 // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
477 //
478 // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
479 // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
480 // always be safe to sink the ccmp down to immediately before the CmpBB
481 // terminators.
482 if (!trivialTailPHIs()) {
483 LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
484 ++NumPhiRejs;
485 return false;
486 }
487
488 if (!Tail->livein_empty()) {
489 LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
490 ++NumPhysRejs;
491 return false;
492 }
493
494 // CmpBB should never have PHIs since Head is its only predecessor.
495 // FIXME: Clean them up if it happens.
496 if (!CmpBB->empty() && CmpBB->front().isPHI()) {
497 LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
498 ++NumPhi2Rejs;
499 return false;
500 }
501
502 if (!CmpBB->livein_empty()) {
503 LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
504 ++NumPhysRejs;
505 return false;
506 }
507
508 // The branch we're looking to eliminate must be analyzable.
509 HeadCond.clear();
510 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
511 if (TII->analyzeBranch(MBB&: *Head, TBB, FBB, Cond&: HeadCond)) {
512 LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
513 ++NumHeadBranchRejs;
514 return false;
515 }
516
517 // This is weird, probably some sort of degenerate CFG, or an edge to a
518 // landing pad.
519 if (!TBB || HeadCond.empty()) {
520 LLVM_DEBUG(
521 dbgs() << "analyzeBranch didn't find conditional branch in Head.\n");
522 ++NumHeadBranchRejs;
523 return false;
524 }
525
526 if (!parseCond(Cond: HeadCond, CC&: HeadCmpBBCC)) {
527 LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
528 ++NumHeadBranchRejs;
529 return false;
530 }
531
532 // Make sure the branch direction is right.
533 if (TBB != CmpBB) {
534 assert(TBB == Tail && "Unexpected TBB");
535 HeadCmpBBCC = AArch64CC::getInvertedCondCode(Code: HeadCmpBBCC);
536 }
537
538 CmpBBCond.clear();
539 TBB = FBB = nullptr;
540 if (TII->analyzeBranch(MBB&: *CmpBB, TBB, FBB, Cond&: CmpBBCond)) {
541 LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
542 ++NumCmpBranchRejs;
543 return false;
544 }
545
546 if (!TBB || CmpBBCond.empty()) {
547 LLVM_DEBUG(
548 dbgs() << "analyzeBranch didn't find conditional branch in CmpBB.\n");
549 ++NumCmpBranchRejs;
550 return false;
551 }
552
553 if (!parseCond(Cond: CmpBBCond, CC&: CmpBBTailCC)) {
554 LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
555 ++NumCmpBranchRejs;
556 return false;
557 }
558
559 if (TBB != Tail)
560 CmpBBTailCC = AArch64CC::getInvertedCondCode(Code: CmpBBTailCC);
561
562 LLVM_DEBUG(dbgs() << "Head->CmpBB on "
563 << AArch64CC::getCondCodeName(HeadCmpBBCC)
564 << ", CmpBB->Tail on "
565 << AArch64CC::getCondCodeName(CmpBBTailCC) << '\n');
566
567 CmpMI = findConvertibleCompare(MBB: CmpBB);
568 if (!CmpMI)
569 return false;
570
571 if (!canSpeculateInstrs(MBB: CmpBB, CmpMI)) {
572 ++NumSpeculateRejs;
573 return false;
574 }
575 return true;
576}
577
578void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
579 LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
580 << printMBBReference(*Head) << ":\n"
581 << *CmpBB);
582
583 // All CmpBB instructions are moved into Head, and CmpBB is deleted.
584 // Update the CFG first.
585 updateTailPHIs();
586
587 // Save successor probabilities before removing CmpBB and Tail from their
588 // parents.
589 BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Src: Head, Dst: CmpBB);
590 BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(Src: CmpBB, Dst: Tail);
591
592 Head->removeSuccessor(Succ: CmpBB);
593 CmpBB->removeSuccessor(Succ: Tail);
594
595 // If Head and CmpBB had successor probabilities, update the probabilities to
596 // reflect the ccmp-conversion.
597 if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
598
599 // Head is allowed two successors. We've removed CmpBB, so the remaining
600 // successor is Tail. We need to increase the successor probability for
601 // Tail to account for the CmpBB path we removed.
602 //
603 // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
604 assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
605 BranchProbability Head2Tail = MBPI->getEdgeProbability(Src: Head, Dst: Tail);
606 Head->setSuccProbability(I: Head->succ_begin(),
607 Prob: Head2Tail + Head2CmpBB * CmpBB2Tail);
608
609 // We will transfer successors of CmpBB to Head in a moment without
610 // normalizing the successor probabilities. Set the successor probabilities
611 // before doing so.
612 //
613 // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
614 for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
615 BranchProbability CmpBB2I = MBPI->getEdgeProbability(Src: CmpBB, Dst: *I);
616 CmpBB->setSuccProbability(I, Prob: Head2CmpBB * CmpBB2I);
617 }
618 }
619
620 Head->transferSuccessorsAndUpdatePHIs(FromMBB: CmpBB);
621 DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
622 TII->removeBranch(MBB&: *Head);
623
624 // If the Head terminator was one of the cbz / tbz branches with built-in
625 // compare, we need to insert an explicit compare instruction in its place.
626 if (HeadCond[0].getImm() == -1) {
627 ++NumCompBranches;
628 unsigned Opc = 0;
629 switch (HeadCond[1].getImm()) {
630 case AArch64::CBZW:
631 case AArch64::CBNZW:
632 Opc = AArch64::SUBSWri;
633 break;
634 case AArch64::CBZX:
635 case AArch64::CBNZX:
636 Opc = AArch64::SUBSXri;
637 break;
638 default:
639 llvm_unreachable("Cannot convert Head branch");
640 }
641 const MCInstrDesc &MCID = TII->get(Opcode: Opc);
642 // Create a dummy virtual register for the SUBS def.
643 Register DestReg = MRI->createVirtualRegister(RegClass: TII->getRegClass(MCID, OpNum: 0));
644 // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
645 BuildMI(BB&: *Head, I: Head->end(), MIMD: TermDL, MCID)
646 .addReg(RegNo: DestReg, Flags: RegState::Define | RegState::Dead)
647 .add(MO: HeadCond[2])
648 .addImm(Val: 0)
649 .addImm(Val: 0);
650 // SUBS uses the GPR*sp register classes.
651 MRI->constrainRegClass(Reg: HeadCond[2].getReg(), RC: TII->getRegClass(MCID, OpNum: 1));
652 }
653
654 Head->splice(Where: Head->end(), Other: CmpBB, From: CmpBB->begin(), To: CmpBB->end());
655
656 // Now replace CmpMI with a ccmp instruction that also considers the incoming
657 // flags.
658 unsigned Opc = 0;
659 unsigned FirstOp = 1; // First CmpMI operand to copy.
660 bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
661 switch (CmpMI->getOpcode()) {
662 default:
663 llvm_unreachable("Unknown compare opcode");
664 case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
665 case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
666 case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
667 case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
668 case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
669 case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
670 case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
671 case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
672 case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
673 case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
674 case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
675 case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
676 case AArch64::CBZW:
677 case AArch64::CBNZW:
678 Opc = AArch64::CCMPWi;
679 FirstOp = 0;
680 isZBranch = true;
681 break;
682 case AArch64::CBZX:
683 case AArch64::CBNZX:
684 Opc = AArch64::CCMPXi;
685 FirstOp = 0;
686 isZBranch = true;
687 break;
688 }
689
690 // The ccmp instruction should set the flags according to the comparison when
691 // Head would have branched to CmpBB.
692 // The NZCV immediate operand should provide flags for the case where Head
693 // would have branched to Tail. These flags should cause the new Head
694 // terminator to branch to tail.
695 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(Code: CmpBBTailCC);
696 const MCInstrDesc &MCID = TII->get(Opcode: Opc);
697 MRI->constrainRegClass(Reg: CmpMI->getOperand(i: FirstOp).getReg(),
698 RC: TII->getRegClass(MCID, OpNum: 0));
699 if (CmpMI->getOperand(i: FirstOp + 1).isReg())
700 MRI->constrainRegClass(Reg: CmpMI->getOperand(i: FirstOp + 1).getReg(),
701 RC: TII->getRegClass(MCID, OpNum: 1));
702 MachineInstrBuilder MIB = BuildMI(BB&: *Head, I: CmpMI, MIMD: CmpMI->getDebugLoc(), MCID)
703 .add(MO: CmpMI->getOperand(i: FirstOp)); // Register Rn
704 if (isZBranch)
705 MIB.addImm(Val: 0); // cbz/cbnz Rn -> ccmp Rn, #0
706 else
707 MIB.add(MO: CmpMI->getOperand(i: FirstOp + 1)); // Register Rm / Immediate
708 MIB.addImm(Val: NZCV).addImm(Val: HeadCmpBBCC);
709
710 // If CmpMI was a terminator, we need a new conditional branch to replace it.
711 // This now becomes a Head terminator.
712 if (isZBranch) {
713 bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
714 CmpMI->getOpcode() == AArch64::CBNZX;
715 BuildMI(BB&: *Head, I: CmpMI, MIMD: CmpMI->getDebugLoc(), MCID: TII->get(Opcode: AArch64::Bcc))
716 .addImm(Val: isNZ ? AArch64CC::NE : AArch64CC::EQ)
717 .add(MO: CmpMI->getOperand(i: 1)); // Branch target.
718 }
719 CmpMI->eraseFromParent();
720 Head->updateTerminator(PreviousLayoutSuccessor: CmpBB->getNextNode());
721
722 RemovedBlocks.push_back(Elt: CmpBB);
723 LLVM_DEBUG(dbgs() << "Result:\n" << *Head);
724 ++NumConverted;
725}
726
727int SSACCmpConv::expectedCodeSizeDelta() const {
728 int delta = 0;
729 // If the Head terminator was one of the cbz / tbz branches with built-in
730 // compare, we need to insert an explicit compare instruction in its place
731 // plus a branch instruction.
732 if (HeadCond[0].getImm() == -1) {
733 switch (HeadCond[1].getImm()) {
734 case AArch64::CBZW:
735 case AArch64::CBNZW:
736 case AArch64::CBZX:
737 case AArch64::CBNZX:
738 // Therefore delta += 1
739 delta = 1;
740 break;
741 default:
742 llvm_unreachable("Cannot convert Head branch");
743 }
744 }
745 // If the Cmp terminator was one of the cbz / tbz branches with
746 // built-in compare, it will be turned into a compare instruction
747 // into Head, but we do not save any instruction.
748 // Otherwise, we save the branch instruction.
749 switch (CmpMI->getOpcode()) {
750 default:
751 --delta;
752 break;
753 case AArch64::CBZW:
754 case AArch64::CBNZW:
755 case AArch64::CBZX:
756 case AArch64::CBNZX:
757 break;
758 }
759 return delta;
760}
761
762//===----------------------------------------------------------------------===//
763// AArch64ConditionalCompares Pass
764//===----------------------------------------------------------------------===//
765
766namespace {
767class AArch64ConditionalCompares : public MachineFunctionPass {
768 const MachineBranchProbabilityInfo *MBPI;
769 const TargetInstrInfo *TII;
770 const TargetRegisterInfo *TRI;
771 MCSchedModel SchedModel;
772 // Does the proceeded function has Oz attribute.
773 bool MinSize;
774 MachineRegisterInfo *MRI;
775 MachineDominatorTree *DomTree;
776 MachineLoopInfo *Loops;
777 MachineTraceMetrics *Traces;
778 MachineTraceMetrics::Ensemble *MinInstr;
779 SSACCmpConv CmpConv;
780
781public:
782 static char ID;
783 AArch64ConditionalCompares() : MachineFunctionPass(ID) {}
784 void getAnalysisUsage(AnalysisUsage &AU) const override;
785 bool runOnMachineFunction(MachineFunction &MF) override;
786 StringRef getPassName() const override {
787 return "AArch64 Conditional Compares";
788 }
789
790private:
791 bool tryConvert(MachineBasicBlock *);
792 void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
793 void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
794 void invalidateTraces();
795 bool shouldConvert();
796};
797} // end anonymous namespace
798
799char AArch64ConditionalCompares::ID = 0;
800
801INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
802 "AArch64 CCMP Pass", false, false)
803INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
804INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
805INITIALIZE_PASS_DEPENDENCY(MachineTraceMetricsWrapperPass)
806INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
807 "AArch64 CCMP Pass", false, false)
808
809FunctionPass *llvm::createAArch64ConditionalCompares() {
810 return new AArch64ConditionalCompares();
811}
812
813void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
814 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
815 AU.addRequired<MachineDominatorTreeWrapperPass>();
816 AU.addPreserved<MachineDominatorTreeWrapperPass>();
817 AU.addRequired<MachineLoopInfoWrapperPass>();
818 AU.addPreserved<MachineLoopInfoWrapperPass>();
819 AU.addRequired<MachineTraceMetricsWrapperPass>();
820 AU.addPreserved<MachineTraceMetricsWrapperPass>();
821 MachineFunctionPass::getAnalysisUsage(AU);
822}
823
824/// Update the dominator tree after if-conversion erased some blocks.
825void AArch64ConditionalCompares::updateDomTree(
826 ArrayRef<MachineBasicBlock *> Removed) {
827 // convert() removes CmpBB which was previously dominated by Head.
828 // CmpBB children should be transferred to Head.
829 MachineDomTreeNode *HeadNode = DomTree->getNode(BB: CmpConv.Head);
830 for (MachineBasicBlock *RemovedMBB : Removed) {
831 MachineDomTreeNode *Node = DomTree->getNode(BB: RemovedMBB);
832 assert(Node != HeadNode && "Cannot erase the head node");
833 assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
834 while (!Node->isLeaf())
835 DomTree->changeImmediateDominator(N: *Node->begin(), NewIDom: HeadNode);
836 DomTree->eraseNode(BB: RemovedMBB);
837 }
838}
839
840/// Update LoopInfo after if-conversion.
841void
842AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
843 if (!Loops)
844 return;
845 for (MachineBasicBlock *RemovedMBB : Removed)
846 Loops->removeBlock(BB: RemovedMBB);
847}
848
849/// Invalidate MachineTraceMetrics before if-conversion.
850void AArch64ConditionalCompares::invalidateTraces() {
851 Traces->invalidate(MBB: CmpConv.Head);
852 Traces->invalidate(MBB: CmpConv.CmpBB);
853}
854
855/// Apply cost model and heuristics to the if-conversion in IfConv.
856/// Return true if the conversion is a good idea.
857///
858bool AArch64ConditionalCompares::shouldConvert() {
859 // Stress testing mode disables all cost considerations.
860 if (Stress)
861 return true;
862 if (!MinInstr)
863 MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
864
865 // Head dominates CmpBB, so it is always included in its trace.
866 MachineTraceMetrics::Trace Trace = MinInstr->getTrace(MBB: CmpConv.CmpBB);
867
868 // If code size is the main concern
869 if (MinSize) {
870 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
871 LLVM_DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
872 // If we are minimizing the code size, do the conversion whatever
873 // the cost is.
874 if (CodeSizeDelta < 0)
875 return true;
876 if (CodeSizeDelta > 0) {
877 LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
878 return false;
879 }
880 // CodeSizeDelta == 0, continue with the regular heuristics
881 }
882
883 // Heuristic: The compare conversion delays the execution of the branch
884 // instruction because we must wait for the inputs to the second compare as
885 // well. The branch has no dependent instructions, but delaying it increases
886 // the cost of a misprediction.
887 //
888 // Set a limit on the delay we will accept.
889 unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
890
891 // Instruction depths can be computed for all trace instructions above CmpBB.
892 unsigned HeadDepth =
893 Trace.getInstrCycles(MI: *CmpConv.Head->getFirstTerminator()).Depth;
894 unsigned CmpBBDepth =
895 Trace.getInstrCycles(MI: *CmpConv.CmpBB->getFirstTerminator()).Depth;
896 LLVM_DEBUG(dbgs() << "Head depth: " << HeadDepth
897 << "\nCmpBB depth: " << CmpBBDepth << '\n');
898 if (CmpBBDepth > HeadDepth + DelayLimit) {
899 LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
900 << " cycles.\n");
901 return false;
902 }
903
904 // Check the resource depth at the bottom of CmpBB - these instructions will
905 // be speculated.
906 unsigned ResDepth = Trace.getResourceDepth(Bottom: true);
907 LLVM_DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
908
909 // Heuristic: The speculatively executed instructions must all be able to
910 // merge into the Head block. The Head critical path should dominate the
911 // resource cost of the speculated instructions.
912 if (ResDepth > HeadDepth) {
913 LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
914 return false;
915 }
916 return true;
917}
918
919bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
920 bool Changed = false;
921 while (CmpConv.canConvert(MBB) && shouldConvert()) {
922 invalidateTraces();
923 SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
924 CmpConv.convert(RemovedBlocks);
925 Changed = true;
926 updateDomTree(Removed: RemovedBlocks);
927 for (MachineBasicBlock *MBB : RemovedBlocks)
928 MBB->eraseFromParent();
929 updateLoops(Removed: RemovedBlocks);
930 }
931 return Changed;
932}
933
934bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
935 LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
936 << "********** Function: " << MF.getName() << '\n');
937 if (skipFunction(F: MF.getFunction()))
938 return false;
939
940 TII = MF.getSubtarget().getInstrInfo();
941 TRI = MF.getSubtarget().getRegisterInfo();
942 SchedModel = MF.getSubtarget().getSchedModel();
943 MRI = &MF.getRegInfo();
944 DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
945 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
946 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
947 Traces = &getAnalysis<MachineTraceMetricsWrapperPass>().getMTM();
948 MinInstr = nullptr;
949 MinSize = MF.getFunction().hasMinSize();
950
951 bool Changed = false;
952 CmpConv.runOnMachineFunction(MF, MBPI);
953
954 // Visit blocks in dominator tree pre-order. The pre-order enables multiple
955 // cmp-conversions from the same head block.
956 // Note that updateDomTree() modifies the children of the DomTree node
957 // currently being visited. The df_iterator supports that; it doesn't look at
958 // child_begin() / child_end() until after a node has been visited.
959 for (auto *I : depth_first(G: DomTree))
960 if (tryConvert(MBB: I->getBlock()))
961 Changed = true;
962
963 return Changed;
964}
965