1 | //===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Early if-conversion is for out-of-order CPUs that don't have a lot of |
10 | // predicable instructions. The goal is to eliminate conditional branches that |
11 | // may mispredict. |
12 | // |
13 | // Instructions from both sides of the branch are executed specutatively, and a |
14 | // cmov instruction selects the result. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #include "llvm/ADT/BitVector.h" |
19 | #include "llvm/ADT/PostOrderIterator.h" |
20 | #include "llvm/ADT/SmallPtrSet.h" |
21 | #include "llvm/ADT/SparseSet.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
24 | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
25 | #include "llvm/CodeGen/MachineDominators.h" |
26 | #include "llvm/CodeGen/MachineFunction.h" |
27 | #include "llvm/CodeGen/MachineFunctionPass.h" |
28 | #include "llvm/CodeGen/MachineInstr.h" |
29 | #include "llvm/CodeGen/MachineLoopInfo.h" |
30 | #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" |
31 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
32 | #include "llvm/CodeGen/MachineTraceMetrics.h" |
33 | #include "llvm/CodeGen/TargetInstrInfo.h" |
34 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
35 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
36 | #include "llvm/InitializePasses.h" |
37 | #include "llvm/Support/CommandLine.h" |
38 | #include "llvm/Support/Debug.h" |
39 | #include "llvm/Support/raw_ostream.h" |
40 | |
41 | using namespace llvm; |
42 | |
43 | #define DEBUG_TYPE "early-ifcvt" |
44 | |
45 | // Absolute maximum number of instructions allowed per speculated block. |
46 | // This bypasses all other heuristics, so it should be set fairly high. |
47 | static cl::opt<unsigned> |
48 | BlockInstrLimit("early-ifcvt-limit" , cl::init(Val: 30), cl::Hidden, |
49 | cl::desc("Maximum number of instructions per speculated block." )); |
50 | |
51 | // Stress testing mode - disable heuristics. |
52 | static cl::opt<bool> Stress("stress-early-ifcvt" , cl::Hidden, |
53 | cl::desc("Turn all knobs to 11" )); |
54 | |
55 | STATISTIC(NumDiamondsSeen, "Number of diamonds" ); |
56 | STATISTIC(NumDiamondsConv, "Number of diamonds converted" ); |
57 | STATISTIC(NumTrianglesSeen, "Number of triangles" ); |
58 | STATISTIC(NumTrianglesConv, "Number of triangles converted" ); |
59 | |
60 | //===----------------------------------------------------------------------===// |
61 | // SSAIfConv |
62 | //===----------------------------------------------------------------------===// |
63 | // |
64 | // The SSAIfConv class performs if-conversion on SSA form machine code after |
65 | // determining if it is possible. The class contains no heuristics; external |
66 | // code should be used to determine when if-conversion is a good idea. |
67 | // |
68 | // SSAIfConv can convert both triangles and diamonds: |
69 | // |
70 | // Triangle: Head Diamond: Head |
71 | // | \ / \_ |
72 | // | \ / | |
73 | // | [TF]BB FBB TBB |
74 | // | / \ / |
75 | // | / \ / |
76 | // Tail Tail |
77 | // |
78 | // Instructions in the conditional blocks TBB and/or FBB are spliced into the |
79 | // Head block, and phis in the Tail block are converted to select instructions. |
80 | // |
81 | namespace { |
82 | class SSAIfConv { |
83 | const TargetInstrInfo *TII; |
84 | const TargetRegisterInfo *TRI; |
85 | MachineRegisterInfo *MRI; |
86 | |
87 | public: |
88 | /// The block containing the conditional branch. |
89 | MachineBasicBlock *Head; |
90 | |
91 | /// The block containing phis after the if-then-else. |
92 | MachineBasicBlock *Tail; |
93 | |
94 | /// The 'true' conditional block as determined by analyzeBranch. |
95 | MachineBasicBlock *TBB; |
96 | |
97 | /// The 'false' conditional block as determined by analyzeBranch. |
98 | MachineBasicBlock *FBB; |
99 | |
100 | /// isTriangle - When there is no 'else' block, either TBB or FBB will be |
101 | /// equal to Tail. |
102 | bool isTriangle() const { return TBB == Tail || FBB == Tail; } |
103 | |
104 | /// Returns the Tail predecessor for the True side. |
105 | MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; } |
106 | |
107 | /// Returns the Tail predecessor for the False side. |
108 | MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; } |
109 | |
110 | /// Information about each phi in the Tail block. |
111 | struct PHIInfo { |
112 | MachineInstr *PHI; |
113 | unsigned TReg = 0, FReg = 0; |
114 | // Latencies from Cond+Branch, TReg, and FReg to DstReg. |
115 | int CondCycles = 0, TCycles = 0, FCycles = 0; |
116 | |
117 | PHIInfo(MachineInstr *phi) : PHI(phi) {} |
118 | }; |
119 | |
120 | SmallVector<PHIInfo, 8> PHIs; |
121 | |
122 | /// The branch condition determined by analyzeBranch. |
123 | SmallVector<MachineOperand, 4> Cond; |
124 | |
125 | private: |
126 | /// Instructions in Head that define values used by the conditional blocks. |
127 | /// The hoisted instructions must be inserted after these instructions. |
128 | SmallPtrSet<MachineInstr*, 8> InsertAfter; |
129 | |
130 | /// Register units clobbered by the conditional blocks. |
131 | BitVector ClobberedRegUnits; |
132 | |
133 | // Scratch pad for findInsertionPoint. |
134 | SparseSet<unsigned> LiveRegUnits; |
135 | |
136 | /// Insertion point in Head for speculatively executed instructions form TBB |
137 | /// and FBB. |
138 | MachineBasicBlock::iterator InsertionPoint; |
139 | |
140 | /// Return true if all non-terminator instructions in MBB can be safely |
141 | /// speculated. |
142 | bool canSpeculateInstrs(MachineBasicBlock *MBB); |
143 | |
144 | /// Return true if all non-terminator instructions in MBB can be safely |
145 | /// predicated. |
146 | bool canPredicateInstrs(MachineBasicBlock *MBB); |
147 | |
148 | /// Scan through instruction dependencies and update InsertAfter array. |
149 | /// Return false if any dependency is incompatible with if conversion. |
150 | bool InstrDependenciesAllowIfConv(MachineInstr *I); |
151 | |
152 | /// Predicate all instructions of the basic block with current condition |
153 | /// except for terminators. Reverse the condition if ReversePredicate is set. |
154 | void PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate); |
155 | |
156 | /// Find a valid insertion point in Head. |
157 | bool findInsertionPoint(); |
158 | |
159 | /// Replace PHI instructions in Tail with selects. |
160 | void replacePHIInstrs(); |
161 | |
162 | /// Insert selects and rewrite PHI operands to use them. |
163 | void rewritePHIOperands(); |
164 | |
165 | public: |
166 | /// runOnMachineFunction - Initialize per-function data structures. |
167 | void runOnMachineFunction(MachineFunction &MF) { |
168 | TII = MF.getSubtarget().getInstrInfo(); |
169 | TRI = MF.getSubtarget().getRegisterInfo(); |
170 | MRI = &MF.getRegInfo(); |
171 | LiveRegUnits.clear(); |
172 | LiveRegUnits.setUniverse(TRI->getNumRegUnits()); |
173 | ClobberedRegUnits.clear(); |
174 | ClobberedRegUnits.resize(N: TRI->getNumRegUnits()); |
175 | } |
176 | |
177 | /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, |
178 | /// initialize the internal state, and return true. |
179 | /// If predicate is set try to predicate the block otherwise try to |
180 | /// speculatively execute it. |
181 | bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false); |
182 | |
183 | /// convertIf - If-convert the last block passed to canConvertIf(), assuming |
184 | /// it is possible. Add any erased blocks to RemovedBlocks. |
185 | void convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks, |
186 | bool Predicate = false); |
187 | }; |
188 | } // end anonymous namespace |
189 | |
190 | |
191 | /// canSpeculateInstrs - Returns true if all the instructions in MBB can safely |
192 | /// be speculated. The terminators are not considered. |
193 | /// |
194 | /// If instructions use any values that are defined in the head basic block, |
195 | /// the defining instructions are added to InsertAfter. |
196 | /// |
197 | /// Any clobbered regunits are added to ClobberedRegUnits. |
198 | /// |
199 | bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { |
200 | // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to |
201 | // get right. |
202 | if (!MBB->livein_empty()) { |
203 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n" ); |
204 | return false; |
205 | } |
206 | |
207 | unsigned InstrCount = 0; |
208 | |
209 | // Check all instructions, except the terminators. It is assumed that |
210 | // terminators never have side effects or define any used register values. |
211 | for (MachineInstr &MI : |
212 | llvm::make_range(x: MBB->begin(), y: MBB->getFirstTerminator())) { |
213 | if (MI.isDebugInstr()) |
214 | continue; |
215 | |
216 | if (++InstrCount > BlockInstrLimit && !Stress) { |
217 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " |
218 | << BlockInstrLimit << " instructions.\n" ); |
219 | return false; |
220 | } |
221 | |
222 | // There shouldn't normally be any phis in a single-predecessor block. |
223 | if (MI.isPHI()) { |
224 | LLVM_DEBUG(dbgs() << "Can't hoist: " << MI); |
225 | return false; |
226 | } |
227 | |
228 | // Don't speculate loads. Note that it may be possible and desirable to |
229 | // speculate GOT or constant pool loads that are guaranteed not to trap, |
230 | // but we don't support that for now. |
231 | if (MI.mayLoad()) { |
232 | LLVM_DEBUG(dbgs() << "Won't speculate load: " << MI); |
233 | return false; |
234 | } |
235 | |
236 | // We never speculate stores, so an AA pointer isn't necessary. |
237 | bool DontMoveAcrossStore = true; |
238 | if (!MI.isSafeToMove(AA: nullptr, SawStore&: DontMoveAcrossStore)) { |
239 | LLVM_DEBUG(dbgs() << "Can't speculate: " << MI); |
240 | return false; |
241 | } |
242 | |
243 | // Check for any dependencies on Head instructions. |
244 | if (!InstrDependenciesAllowIfConv(I: &MI)) |
245 | return false; |
246 | } |
247 | return true; |
248 | } |
249 | |
250 | /// Check that there is no dependencies preventing if conversion. |
251 | /// |
252 | /// If instruction uses any values that are defined in the head basic block, |
253 | /// the defining instructions are added to InsertAfter. |
254 | bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) { |
255 | for (const MachineOperand &MO : I->operands()) { |
256 | if (MO.isRegMask()) { |
257 | LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I); |
258 | return false; |
259 | } |
260 | if (!MO.isReg()) |
261 | continue; |
262 | Register Reg = MO.getReg(); |
263 | |
264 | // Remember clobbered regunits. |
265 | if (MO.isDef() && Reg.isPhysical()) |
266 | for (MCRegUnit Unit : TRI->regunits(Reg: Reg.asMCReg())) |
267 | ClobberedRegUnits.set(Unit); |
268 | |
269 | if (!MO.readsReg() || !Reg.isVirtual()) |
270 | continue; |
271 | MachineInstr *DefMI = MRI->getVRegDef(Reg); |
272 | if (!DefMI || DefMI->getParent() != Head) |
273 | continue; |
274 | if (InsertAfter.insert(Ptr: DefMI).second) |
275 | LLVM_DEBUG(dbgs() << printMBBReference(*I->getParent()) << " depends on " |
276 | << *DefMI); |
277 | if (DefMI->isTerminator()) { |
278 | LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n" ); |
279 | return false; |
280 | } |
281 | } |
282 | return true; |
283 | } |
284 | |
285 | /// canPredicateInstrs - Returns true if all the instructions in MBB can safely |
286 | /// be predicates. The terminators are not considered. |
287 | /// |
288 | /// If instructions use any values that are defined in the head basic block, |
289 | /// the defining instructions are added to InsertAfter. |
290 | /// |
291 | /// Any clobbered regunits are added to ClobberedRegUnits. |
292 | /// |
293 | bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) { |
294 | // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to |
295 | // get right. |
296 | if (!MBB->livein_empty()) { |
297 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n" ); |
298 | return false; |
299 | } |
300 | |
301 | unsigned InstrCount = 0; |
302 | |
303 | // Check all instructions, except the terminators. It is assumed that |
304 | // terminators never have side effects or define any used register values. |
305 | for (MachineBasicBlock::iterator I = MBB->begin(), |
306 | E = MBB->getFirstTerminator(); |
307 | I != E; ++I) { |
308 | if (I->isDebugInstr()) |
309 | continue; |
310 | |
311 | if (++InstrCount > BlockInstrLimit && !Stress) { |
312 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " |
313 | << BlockInstrLimit << " instructions.\n" ); |
314 | return false; |
315 | } |
316 | |
317 | // There shouldn't normally be any phis in a single-predecessor block. |
318 | if (I->isPHI()) { |
319 | LLVM_DEBUG(dbgs() << "Can't predicate: " << *I); |
320 | return false; |
321 | } |
322 | |
323 | // Check that instruction is predicable |
324 | if (!TII->isPredicable(MI: *I)) { |
325 | LLVM_DEBUG(dbgs() << "Isn't predicable: " << *I); |
326 | return false; |
327 | } |
328 | |
329 | // Check that instruction is not already predicated. |
330 | if (TII->isPredicated(MI: *I) && !TII->canPredicatePredicatedInstr(MI: *I)) { |
331 | LLVM_DEBUG(dbgs() << "Is already predicated: " << *I); |
332 | return false; |
333 | } |
334 | |
335 | // Check for any dependencies on Head instructions. |
336 | if (!InstrDependenciesAllowIfConv(I: &(*I))) |
337 | return false; |
338 | } |
339 | return true; |
340 | } |
341 | |
342 | // Apply predicate to all instructions in the machine block. |
343 | void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) { |
344 | auto Condition = Cond; |
345 | if (ReversePredicate) { |
346 | bool CanRevCond = !TII->reverseBranchCondition(Cond&: Condition); |
347 | assert(CanRevCond && "Reversed predicate is not supported" ); |
348 | (void)CanRevCond; |
349 | } |
350 | // Terminators don't need to be predicated as they will be removed. |
351 | for (MachineBasicBlock::iterator I = MBB->begin(), |
352 | E = MBB->getFirstTerminator(); |
353 | I != E; ++I) { |
354 | if (I->isDebugInstr()) |
355 | continue; |
356 | TII->PredicateInstruction(MI&: *I, Pred: Condition); |
357 | } |
358 | } |
359 | |
360 | /// Find an insertion point in Head for the speculated instructions. The |
361 | /// insertion point must be: |
362 | /// |
363 | /// 1. Before any terminators. |
364 | /// 2. After any instructions in InsertAfter. |
365 | /// 3. Not have any clobbered regunits live. |
366 | /// |
367 | /// This function sets InsertionPoint and returns true when successful, it |
368 | /// returns false if no valid insertion point could be found. |
369 | /// |
370 | bool SSAIfConv::findInsertionPoint() { |
371 | // Keep track of live regunits before the current position. |
372 | // Only track RegUnits that are also in ClobberedRegUnits. |
373 | LiveRegUnits.clear(); |
374 | SmallVector<MCRegister, 8> Reads; |
375 | MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); |
376 | MachineBasicBlock::iterator I = Head->end(); |
377 | MachineBasicBlock::iterator B = Head->begin(); |
378 | while (I != B) { |
379 | --I; |
380 | // Some of the conditional code depends in I. |
381 | if (InsertAfter.count(Ptr: &*I)) { |
382 | LLVM_DEBUG(dbgs() << "Can't insert code after " << *I); |
383 | return false; |
384 | } |
385 | |
386 | // Update live regunits. |
387 | for (const MachineOperand &MO : I->operands()) { |
388 | // We're ignoring regmask operands. That is conservatively correct. |
389 | if (!MO.isReg()) |
390 | continue; |
391 | Register Reg = MO.getReg(); |
392 | if (!Reg.isPhysical()) |
393 | continue; |
394 | // I clobbers Reg, so it isn't live before I. |
395 | if (MO.isDef()) |
396 | for (MCRegUnit Unit : TRI->regunits(Reg: Reg.asMCReg())) |
397 | LiveRegUnits.erase(Key: Unit); |
398 | // Unless I reads Reg. |
399 | if (MO.readsReg()) |
400 | Reads.push_back(Elt: Reg.asMCReg()); |
401 | } |
402 | // Anything read by I is live before I. |
403 | while (!Reads.empty()) |
404 | for (MCRegUnit Unit : TRI->regunits(Reg: Reads.pop_back_val())) |
405 | if (ClobberedRegUnits.test(Idx: Unit)) |
406 | LiveRegUnits.insert(Val: Unit); |
407 | |
408 | // We can't insert before a terminator. |
409 | if (I != FirstTerm && I->isTerminator()) |
410 | continue; |
411 | |
412 | // Some of the clobbered registers are live before I, not a valid insertion |
413 | // point. |
414 | if (!LiveRegUnits.empty()) { |
415 | LLVM_DEBUG({ |
416 | dbgs() << "Would clobber" ; |
417 | for (unsigned LRU : LiveRegUnits) |
418 | dbgs() << ' ' << printRegUnit(LRU, TRI); |
419 | dbgs() << " live before " << *I; |
420 | }); |
421 | continue; |
422 | } |
423 | |
424 | // This is a valid insertion point. |
425 | InsertionPoint = I; |
426 | LLVM_DEBUG(dbgs() << "Can insert before " << *I); |
427 | return true; |
428 | } |
429 | LLVM_DEBUG(dbgs() << "No legal insertion point found.\n" ); |
430 | return false; |
431 | } |
432 | |
433 | |
434 | |
435 | /// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is |
436 | /// a potential candidate for if-conversion. Fill out the internal state. |
437 | /// |
438 | bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { |
439 | Head = MBB; |
440 | TBB = FBB = Tail = nullptr; |
441 | |
442 | if (Head->succ_size() != 2) |
443 | return false; |
444 | MachineBasicBlock *Succ0 = Head->succ_begin()[0]; |
445 | MachineBasicBlock *Succ1 = Head->succ_begin()[1]; |
446 | |
447 | // Canonicalize so Succ0 has MBB as its single predecessor. |
448 | if (Succ0->pred_size() != 1) |
449 | std::swap(a&: Succ0, b&: Succ1); |
450 | |
451 | if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1) |
452 | return false; |
453 | |
454 | Tail = Succ0->succ_begin()[0]; |
455 | |
456 | // This is not a triangle. |
457 | if (Tail != Succ1) { |
458 | // Check for a diamond. We won't deal with any critical edges. |
459 | if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || |
460 | Succ1->succ_begin()[0] != Tail) |
461 | return false; |
462 | LLVM_DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> " |
463 | << printMBBReference(*Succ0) << "/" |
464 | << printMBBReference(*Succ1) << " -> " |
465 | << printMBBReference(*Tail) << '\n'); |
466 | |
467 | // Live-in physregs are tricky to get right when speculating code. |
468 | if (!Tail->livein_empty()) { |
469 | LLVM_DEBUG(dbgs() << "Tail has live-ins.\n" ); |
470 | return false; |
471 | } |
472 | } else { |
473 | LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> " |
474 | << printMBBReference(*Succ0) << " -> " |
475 | << printMBBReference(*Tail) << '\n'); |
476 | } |
477 | |
478 | // This is a triangle or a diamond. |
479 | // Skip if we cannot predicate and there are no phis skip as there must be |
480 | // side effects that can only be handled with predication. |
481 | if (!Predicate && (Tail->empty() || !Tail->front().isPHI())) { |
482 | LLVM_DEBUG(dbgs() << "No phis in tail.\n" ); |
483 | return false; |
484 | } |
485 | |
486 | // The branch we're looking to eliminate must be analyzable. |
487 | Cond.clear(); |
488 | if (TII->analyzeBranch(MBB&: *Head, TBB, FBB, Cond)) { |
489 | LLVM_DEBUG(dbgs() << "Branch not analyzable.\n" ); |
490 | return false; |
491 | } |
492 | |
493 | // This is weird, probably some sort of degenerate CFG. |
494 | if (!TBB) { |
495 | LLVM_DEBUG(dbgs() << "analyzeBranch didn't find conditional branch.\n" ); |
496 | return false; |
497 | } |
498 | |
499 | // Make sure the analyzed branch is conditional; one of the successors |
500 | // could be a landing pad. (Empty landing pads can be generated on Windows.) |
501 | if (Cond.empty()) { |
502 | LLVM_DEBUG(dbgs() << "analyzeBranch found an unconditional branch.\n" ); |
503 | return false; |
504 | } |
505 | |
506 | // analyzeBranch doesn't set FBB on a fall-through branch. |
507 | // Make sure it is always set. |
508 | FBB = TBB == Succ0 ? Succ1 : Succ0; |
509 | |
510 | // Any phis in the tail block must be convertible to selects. |
511 | PHIs.clear(); |
512 | MachineBasicBlock *TPred = getTPred(); |
513 | MachineBasicBlock *FPred = getFPred(); |
514 | for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); |
515 | I != E && I->isPHI(); ++I) { |
516 | PHIs.push_back(Elt: &*I); |
517 | PHIInfo &PI = PHIs.back(); |
518 | // Find PHI operands corresponding to TPred and FPred. |
519 | for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) { |
520 | if (PI.PHI->getOperand(i: i+1).getMBB() == TPred) |
521 | PI.TReg = PI.PHI->getOperand(i).getReg(); |
522 | if (PI.PHI->getOperand(i: i+1).getMBB() == FPred) |
523 | PI.FReg = PI.PHI->getOperand(i).getReg(); |
524 | } |
525 | assert(Register::isVirtualRegister(PI.TReg) && "Bad PHI" ); |
526 | assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI" ); |
527 | |
528 | // Get target information. |
529 | if (!TII->canInsertSelect(MBB: *Head, Cond, DstReg: PI.PHI->getOperand(i: 0).getReg(), |
530 | TrueReg: PI.TReg, FalseReg: PI.FReg, CondCycles&: PI.CondCycles, TrueCycles&: PI.TCycles, |
531 | FalseCycles&: PI.FCycles)) { |
532 | LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI); |
533 | return false; |
534 | } |
535 | } |
536 | |
537 | // Check that the conditional instructions can be speculated. |
538 | InsertAfter.clear(); |
539 | ClobberedRegUnits.reset(); |
540 | if (Predicate) { |
541 | if (TBB != Tail && !canPredicateInstrs(MBB: TBB)) |
542 | return false; |
543 | if (FBB != Tail && !canPredicateInstrs(MBB: FBB)) |
544 | return false; |
545 | } else { |
546 | if (TBB != Tail && !canSpeculateInstrs(MBB: TBB)) |
547 | return false; |
548 | if (FBB != Tail && !canSpeculateInstrs(MBB: FBB)) |
549 | return false; |
550 | } |
551 | |
552 | // Try to find a valid insertion point for the speculated instructions in the |
553 | // head basic block. |
554 | if (!findInsertionPoint()) |
555 | return false; |
556 | |
557 | if (isTriangle()) |
558 | ++NumTrianglesSeen; |
559 | else |
560 | ++NumDiamondsSeen; |
561 | return true; |
562 | } |
563 | |
564 | /// \return true iff the two registers are known to have the same value. |
565 | static bool hasSameValue(const MachineRegisterInfo &MRI, |
566 | const TargetInstrInfo *TII, Register TReg, |
567 | Register FReg) { |
568 | if (TReg == FReg) |
569 | return true; |
570 | |
571 | if (!TReg.isVirtual() || !FReg.isVirtual()) |
572 | return false; |
573 | |
574 | const MachineInstr *TDef = MRI.getUniqueVRegDef(Reg: TReg); |
575 | const MachineInstr *FDef = MRI.getUniqueVRegDef(Reg: FReg); |
576 | if (!TDef || !FDef) |
577 | return false; |
578 | |
579 | // If there are side-effects, all bets are off. |
580 | if (TDef->hasUnmodeledSideEffects()) |
581 | return false; |
582 | |
583 | // If the instruction could modify memory, or there may be some intervening |
584 | // store between the two, we can't consider them to be equal. |
585 | if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad()) |
586 | return false; |
587 | |
588 | // We also can't guarantee that they are the same if, for example, the |
589 | // instructions are both a copy from a physical reg, because some other |
590 | // instruction may have modified the value in that reg between the two |
591 | // defining insts. |
592 | if (any_of(Range: TDef->uses(), P: [](const MachineOperand &MO) { |
593 | return MO.isReg() && MO.getReg().isPhysical(); |
594 | })) |
595 | return false; |
596 | |
597 | // Check whether the two defining instructions produce the same value(s). |
598 | if (!TII->produceSameValue(MI0: *TDef, MI1: *FDef, MRI: &MRI)) |
599 | return false; |
600 | |
601 | // Further, check that the two defs come from corresponding operands. |
602 | int TIdx = TDef->findRegisterDefOperandIdx(Reg: TReg, /*TRI=*/nullptr); |
603 | int FIdx = FDef->findRegisterDefOperandIdx(Reg: FReg, /*TRI=*/nullptr); |
604 | if (TIdx == -1 || FIdx == -1) |
605 | return false; |
606 | |
607 | return TIdx == FIdx; |
608 | } |
609 | |
610 | /// replacePHIInstrs - Completely replace PHI instructions with selects. |
611 | /// This is possible when the only Tail predecessors are the if-converted |
612 | /// blocks. |
613 | void SSAIfConv::replacePHIInstrs() { |
614 | assert(Tail->pred_size() == 2 && "Cannot replace PHIs" ); |
615 | MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); |
616 | assert(FirstTerm != Head->end() && "No terminators" ); |
617 | DebugLoc HeadDL = FirstTerm->getDebugLoc(); |
618 | |
619 | // Convert all PHIs to select instructions inserted before FirstTerm. |
620 | for (PHIInfo &PI : PHIs) { |
621 | LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); |
622 | Register DstReg = PI.PHI->getOperand(i: 0).getReg(); |
623 | if (hasSameValue(MRI: *MRI, TII, TReg: PI.TReg, FReg: PI.FReg)) { |
624 | // We do not need the select instruction if both incoming values are |
625 | // equal, but we do need a COPY. |
626 | BuildMI(BB&: *Head, I: FirstTerm, MIMD: HeadDL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg) |
627 | .addReg(RegNo: PI.TReg); |
628 | } else { |
629 | TII->insertSelect(MBB&: *Head, I: FirstTerm, DL: HeadDL, DstReg, Cond, TrueReg: PI.TReg, |
630 | FalseReg: PI.FReg); |
631 | } |
632 | LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); |
633 | PI.PHI->eraseFromParent(); |
634 | PI.PHI = nullptr; |
635 | } |
636 | } |
637 | |
638 | /// rewritePHIOperands - When there are additional Tail predecessors, insert |
639 | /// select instructions in Head and rewrite PHI operands to use the selects. |
640 | /// Keep the PHI instructions in Tail to handle the other predecessors. |
641 | void SSAIfConv::rewritePHIOperands() { |
642 | MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); |
643 | assert(FirstTerm != Head->end() && "No terminators" ); |
644 | DebugLoc HeadDL = FirstTerm->getDebugLoc(); |
645 | |
646 | // Convert all PHIs to select instructions inserted before FirstTerm. |
647 | for (PHIInfo &PI : PHIs) { |
648 | unsigned DstReg = 0; |
649 | |
650 | LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); |
651 | if (hasSameValue(MRI: *MRI, TII, TReg: PI.TReg, FReg: PI.FReg)) { |
652 | // We do not need the select instruction if both incoming values are |
653 | // equal. |
654 | DstReg = PI.TReg; |
655 | } else { |
656 | Register PHIDst = PI.PHI->getOperand(i: 0).getReg(); |
657 | DstReg = MRI->createVirtualRegister(RegClass: MRI->getRegClass(Reg: PHIDst)); |
658 | TII->insertSelect(MBB&: *Head, I: FirstTerm, DL: HeadDL, |
659 | DstReg, Cond, TrueReg: PI.TReg, FalseReg: PI.FReg); |
660 | LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); |
661 | } |
662 | |
663 | // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. |
664 | for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { |
665 | MachineBasicBlock *MBB = PI.PHI->getOperand(i: i-1).getMBB(); |
666 | if (MBB == getTPred()) { |
667 | PI.PHI->getOperand(i: i-1).setMBB(Head); |
668 | PI.PHI->getOperand(i: i-2).setReg(DstReg); |
669 | } else if (MBB == getFPred()) { |
670 | PI.PHI->removeOperand(OpNo: i-1); |
671 | PI.PHI->removeOperand(OpNo: i-2); |
672 | } |
673 | } |
674 | LLVM_DEBUG(dbgs() << " --> " << *PI.PHI); |
675 | } |
676 | } |
677 | |
678 | /// convertIf - Execute the if conversion after canConvertIf has determined the |
679 | /// feasibility. |
680 | /// |
681 | /// Any basic blocks erased will be added to RemovedBlocks. |
682 | /// |
683 | void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks, |
684 | bool Predicate) { |
685 | assert(Head && Tail && TBB && FBB && "Call canConvertIf first." ); |
686 | |
687 | // Update statistics. |
688 | if (isTriangle()) |
689 | ++NumTrianglesConv; |
690 | else |
691 | ++NumDiamondsConv; |
692 | |
693 | // Move all instructions into Head, except for the terminators. |
694 | if (TBB != Tail) { |
695 | if (Predicate) |
696 | PredicateBlock(MBB: TBB, /*ReversePredicate=*/false); |
697 | Head->splice(Where: InsertionPoint, Other: TBB, From: TBB->begin(), To: TBB->getFirstTerminator()); |
698 | } |
699 | if (FBB != Tail) { |
700 | if (Predicate) |
701 | PredicateBlock(MBB: FBB, /*ReversePredicate=*/true); |
702 | Head->splice(Where: InsertionPoint, Other: FBB, From: FBB->begin(), To: FBB->getFirstTerminator()); |
703 | } |
704 | // Are there extra Tail predecessors? |
705 | bool = Tail->pred_size() != 2; |
706 | if (ExtraPreds) |
707 | rewritePHIOperands(); |
708 | else |
709 | replacePHIInstrs(); |
710 | |
711 | // Fix up the CFG, temporarily leave Head without any successors. |
712 | Head->removeSuccessor(Succ: TBB); |
713 | Head->removeSuccessor(Succ: FBB, NormalizeSuccProbs: true); |
714 | if (TBB != Tail) |
715 | TBB->removeSuccessor(Succ: Tail, NormalizeSuccProbs: true); |
716 | if (FBB != Tail) |
717 | FBB->removeSuccessor(Succ: Tail, NormalizeSuccProbs: true); |
718 | |
719 | // Fix up Head's terminators. |
720 | // It should become a single branch or a fallthrough. |
721 | DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); |
722 | TII->removeBranch(MBB&: *Head); |
723 | |
724 | // Erase the now empty conditional blocks. It is likely that Head can fall |
725 | // through to Tail, and we can join the two blocks. |
726 | if (TBB != Tail) { |
727 | RemovedBlocks.push_back(Elt: TBB); |
728 | TBB->eraseFromParent(); |
729 | } |
730 | if (FBB != Tail) { |
731 | RemovedBlocks.push_back(Elt: FBB); |
732 | FBB->eraseFromParent(); |
733 | } |
734 | |
735 | assert(Head->succ_empty() && "Additional head successors?" ); |
736 | if (!ExtraPreds && Head->isLayoutSuccessor(MBB: Tail)) { |
737 | // Splice Tail onto the end of Head. |
738 | LLVM_DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) |
739 | << " into head " << printMBBReference(*Head) << '\n'); |
740 | Head->splice(Where: Head->end(), Other: Tail, |
741 | From: Tail->begin(), To: Tail->end()); |
742 | Head->transferSuccessorsAndUpdatePHIs(FromMBB: Tail); |
743 | RemovedBlocks.push_back(Elt: Tail); |
744 | Tail->eraseFromParent(); |
745 | } else { |
746 | // We need a branch to Tail, let code placement work it out later. |
747 | LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n" ); |
748 | SmallVector<MachineOperand, 0> EmptyCond; |
749 | TII->insertBranch(MBB&: *Head, TBB: Tail, FBB: nullptr, Cond: EmptyCond, DL: HeadDL); |
750 | Head->addSuccessor(Succ: Tail); |
751 | } |
752 | LLVM_DEBUG(dbgs() << *Head); |
753 | } |
754 | |
755 | //===----------------------------------------------------------------------===// |
756 | // EarlyIfConverter Pass |
757 | //===----------------------------------------------------------------------===// |
758 | |
759 | namespace { |
760 | class EarlyIfConverter : public MachineFunctionPass { |
761 | const TargetInstrInfo *TII = nullptr; |
762 | const TargetRegisterInfo *TRI = nullptr; |
763 | MCSchedModel SchedModel; |
764 | MachineRegisterInfo *MRI = nullptr; |
765 | MachineDominatorTree *DomTree = nullptr; |
766 | MachineLoopInfo *Loops = nullptr; |
767 | MachineTraceMetrics *Traces = nullptr; |
768 | MachineTraceMetrics::Ensemble *MinInstr = nullptr; |
769 | SSAIfConv IfConv; |
770 | |
771 | public: |
772 | static char ID; |
773 | EarlyIfConverter() : MachineFunctionPass(ID) {} |
774 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
775 | bool runOnMachineFunction(MachineFunction &MF) override; |
776 | StringRef getPassName() const override { return "Early If-Conversion" ; } |
777 | |
778 | private: |
779 | bool tryConvertIf(MachineBasicBlock*); |
780 | void invalidateTraces(); |
781 | bool shouldConvertIf(); |
782 | }; |
783 | } // end anonymous namespace |
784 | |
785 | char EarlyIfConverter::ID = 0; |
786 | char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; |
787 | |
788 | INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE, |
789 | "Early If Converter" , false, false) |
790 | INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) |
791 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
792 | INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) |
793 | INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE, |
794 | "Early If Converter" , false, false) |
795 | |
796 | void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { |
797 | AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); |
798 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
799 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
800 | AU.addRequired<MachineLoopInfoWrapperPass>(); |
801 | AU.addPreserved<MachineLoopInfoWrapperPass>(); |
802 | AU.addRequired<MachineTraceMetrics>(); |
803 | AU.addPreserved<MachineTraceMetrics>(); |
804 | MachineFunctionPass::getAnalysisUsage(AU); |
805 | } |
806 | |
807 | namespace { |
808 | /// Update the dominator tree after if-conversion erased some blocks. |
809 | void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv, |
810 | ArrayRef<MachineBasicBlock *> Removed) { |
811 | // convertIf can remove TBB, FBB, and Tail can be merged into Head. |
812 | // TBB and FBB should not dominate any blocks. |
813 | // Tail children should be transferred to Head. |
814 | MachineDomTreeNode *HeadNode = DomTree->getNode(BB: IfConv.Head); |
815 | for (auto *B : Removed) { |
816 | MachineDomTreeNode *Node = DomTree->getNode(BB: B); |
817 | assert(Node != HeadNode && "Cannot erase the head node" ); |
818 | while (Node->getNumChildren()) { |
819 | assert(Node->getBlock() == IfConv.Tail && "Unexpected children" ); |
820 | DomTree->changeImmediateDominator(N: Node->back(), NewIDom: HeadNode); |
821 | } |
822 | DomTree->eraseNode(BB: B); |
823 | } |
824 | } |
825 | |
826 | /// Update LoopInfo after if-conversion. |
827 | void updateLoops(MachineLoopInfo *Loops, |
828 | ArrayRef<MachineBasicBlock *> Removed) { |
829 | // If-conversion doesn't change loop structure, and it doesn't mess with back |
830 | // edges, so updating LoopInfo is simply removing the dead blocks. |
831 | for (auto *B : Removed) |
832 | Loops->removeBlock(BB: B); |
833 | } |
834 | } // namespace |
835 | |
836 | /// Invalidate MachineTraceMetrics before if-conversion. |
837 | void EarlyIfConverter::invalidateTraces() { |
838 | Traces->verifyAnalysis(); |
839 | Traces->invalidate(MBB: IfConv.Head); |
840 | Traces->invalidate(MBB: IfConv.Tail); |
841 | Traces->invalidate(MBB: IfConv.TBB); |
842 | Traces->invalidate(MBB: IfConv.FBB); |
843 | Traces->verifyAnalysis(); |
844 | } |
845 | |
846 | // Adjust cycles with downward saturation. |
847 | static unsigned adjCycles(unsigned Cyc, int Delta) { |
848 | if (Delta < 0 && Cyc + Delta > Cyc) |
849 | return 0; |
850 | return Cyc + Delta; |
851 | } |
852 | |
853 | namespace { |
854 | /// Helper class to simplify emission of cycle counts into optimization remarks. |
855 | struct Cycles { |
856 | const char *Key; |
857 | unsigned Value; |
858 | }; |
859 | template <typename Remark> Remark &operator<<(Remark &R, Cycles C) { |
860 | return R << ore::NV(C.Key, C.Value) << (C.Value == 1 ? " cycle" : " cycles" ); |
861 | } |
862 | } // anonymous namespace |
863 | |
864 | /// Apply cost model and heuristics to the if-conversion in IfConv. |
865 | /// Return true if the conversion is a good idea. |
866 | /// |
867 | bool EarlyIfConverter::shouldConvertIf() { |
868 | // Stress testing mode disables all cost considerations. |
869 | if (Stress) |
870 | return true; |
871 | |
872 | // Do not try to if-convert if the condition has a high chance of being |
873 | // predictable. |
874 | MachineLoop *CurrentLoop = Loops->getLoopFor(BB: IfConv.Head); |
875 | // If the condition is in a loop, consider it predictable if the condition |
876 | // itself or all its operands are loop-invariant. E.g. this considers a load |
877 | // from a loop-invariant address predictable; we were unable to prove that it |
878 | // doesn't alias any of the memory-writes in the loop, but it is likely to |
879 | // read to same value multiple times. |
880 | if (CurrentLoop && any_of(Range&: IfConv.Cond, P: [&](MachineOperand &MO) { |
881 | if (!MO.isReg() || !MO.isUse()) |
882 | return false; |
883 | Register Reg = MO.getReg(); |
884 | if (Register::isPhysicalRegister(Reg)) |
885 | return false; |
886 | |
887 | MachineInstr *Def = MRI->getVRegDef(Reg); |
888 | return CurrentLoop->isLoopInvariant(I&: *Def) || |
889 | all_of(Range: Def->operands(), P: [&](MachineOperand &Op) { |
890 | if (Op.isImm()) |
891 | return true; |
892 | if (!MO.isReg() || !MO.isUse()) |
893 | return false; |
894 | Register Reg = MO.getReg(); |
895 | if (Register::isPhysicalRegister(Reg)) |
896 | return false; |
897 | |
898 | MachineInstr *Def = MRI->getVRegDef(Reg); |
899 | return CurrentLoop->isLoopInvariant(I&: *Def); |
900 | }); |
901 | })) |
902 | return false; |
903 | |
904 | if (!MinInstr) |
905 | MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount); |
906 | |
907 | MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(MBB: IfConv.getTPred()); |
908 | MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(MBB: IfConv.getFPred()); |
909 | LLVM_DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); |
910 | unsigned MinCrit = std::min(a: TBBTrace.getCriticalPath(), |
911 | b: FBBTrace.getCriticalPath()); |
912 | |
913 | // Set a somewhat arbitrary limit on the critical path extension we accept. |
914 | unsigned CritLimit = SchedModel.MispredictPenalty/2; |
915 | |
916 | MachineBasicBlock &MBB = *IfConv.Head; |
917 | MachineOptimizationRemarkEmitter MORE(*MBB.getParent(), nullptr); |
918 | |
919 | // If-conversion only makes sense when there is unexploited ILP. Compute the |
920 | // maximum-ILP resource length of the trace after if-conversion. Compare it |
921 | // to the shortest critical path. |
922 | SmallVector<const MachineBasicBlock*, 1> ; |
923 | if (IfConv.TBB != IfConv.Tail) |
924 | ExtraBlocks.push_back(Elt: IfConv.TBB); |
925 | unsigned ResLength = FBBTrace.getResourceLength(Extrablocks: ExtraBlocks); |
926 | LLVM_DEBUG(dbgs() << "Resource length " << ResLength |
927 | << ", minimal critical path " << MinCrit << '\n'); |
928 | if (ResLength > MinCrit + CritLimit) { |
929 | LLVM_DEBUG(dbgs() << "Not enough available ILP.\n" ); |
930 | MORE.emit(RemarkBuilder: [&]() { |
931 | MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion" , |
932 | MBB.findDebugLoc(MBBI: MBB.back()), &MBB); |
933 | R << "did not if-convert branch: the resulting critical path (" |
934 | << Cycles{.Key: "ResLength" , .Value: ResLength} |
935 | << ") would extend the shorter leg's critical path (" |
936 | << Cycles{.Key: "MinCrit" , .Value: MinCrit} << ") by more than the threshold of " |
937 | << Cycles{.Key: "CritLimit" , .Value: CritLimit} |
938 | << ", which cannot be hidden by available ILP." ; |
939 | return R; |
940 | }); |
941 | return false; |
942 | } |
943 | |
944 | // Assume that the depth of the first head terminator will also be the depth |
945 | // of the select instruction inserted, as determined by the flag dependency. |
946 | // TBB / FBB data dependencies may delay the select even more. |
947 | MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(MBB: IfConv.Head); |
948 | unsigned BranchDepth = |
949 | HeadTrace.getInstrCycles(MI: *IfConv.Head->getFirstTerminator()).Depth; |
950 | LLVM_DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); |
951 | |
952 | // Look at all the tail phis, and compute the critical path extension caused |
953 | // by inserting select instructions. |
954 | MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(MBB: IfConv.Tail); |
955 | struct CriticalPathInfo { |
956 | unsigned ; // Count of extra cycles that the component adds. |
957 | unsigned Depth; // Absolute depth of the component in cycles. |
958 | }; |
959 | CriticalPathInfo Cond{}; |
960 | CriticalPathInfo TBlock{}; |
961 | CriticalPathInfo FBlock{}; |
962 | bool ShouldConvert = true; |
963 | for (SSAIfConv::PHIInfo &PI : IfConv.PHIs) { |
964 | unsigned Slack = TailTrace.getInstrSlack(MI: *PI.PHI); |
965 | unsigned MaxDepth = Slack + TailTrace.getInstrCycles(MI: *PI.PHI).Depth; |
966 | LLVM_DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); |
967 | |
968 | // The condition is pulled into the critical path. |
969 | unsigned CondDepth = adjCycles(Cyc: BranchDepth, Delta: PI.CondCycles); |
970 | if (CondDepth > MaxDepth) { |
971 | unsigned = CondDepth - MaxDepth; |
972 | LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n" ); |
973 | if (Extra > Cond.Extra) |
974 | Cond = {.Extra: Extra, .Depth: CondDepth}; |
975 | if (Extra > CritLimit) { |
976 | LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); |
977 | ShouldConvert = false; |
978 | } |
979 | } |
980 | |
981 | // The TBB value is pulled into the critical path. |
982 | unsigned TDepth = adjCycles(Cyc: TBBTrace.getPHIDepth(PHI: *PI.PHI), Delta: PI.TCycles); |
983 | if (TDepth > MaxDepth) { |
984 | unsigned = TDepth - MaxDepth; |
985 | LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n" ); |
986 | if (Extra > TBlock.Extra) |
987 | TBlock = {.Extra: Extra, .Depth: TDepth}; |
988 | if (Extra > CritLimit) { |
989 | LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); |
990 | ShouldConvert = false; |
991 | } |
992 | } |
993 | |
994 | // The FBB value is pulled into the critical path. |
995 | unsigned FDepth = adjCycles(Cyc: FBBTrace.getPHIDepth(PHI: *PI.PHI), Delta: PI.FCycles); |
996 | if (FDepth > MaxDepth) { |
997 | unsigned = FDepth - MaxDepth; |
998 | LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n" ); |
999 | if (Extra > FBlock.Extra) |
1000 | FBlock = {.Extra: Extra, .Depth: FDepth}; |
1001 | if (Extra > CritLimit) { |
1002 | LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); |
1003 | ShouldConvert = false; |
1004 | } |
1005 | } |
1006 | } |
1007 | |
1008 | // Organize by "short" and "long" legs, since the diagnostics get confusing |
1009 | // when referring to the "true" and "false" sides of the branch, given that |
1010 | // those don't always correlate with what the user wrote in source-terms. |
1011 | const CriticalPathInfo Short = TBlock.Extra > FBlock.Extra ? FBlock : TBlock; |
1012 | const CriticalPathInfo Long = TBlock.Extra > FBlock.Extra ? TBlock : FBlock; |
1013 | |
1014 | if (ShouldConvert) { |
1015 | MORE.emit(RemarkBuilder: [&]() { |
1016 | MachineOptimizationRemark R(DEBUG_TYPE, "IfConversion" , |
1017 | MBB.back().getDebugLoc(), &MBB); |
1018 | R << "performing if-conversion on branch: the condition adds " |
1019 | << Cycles{.Key: "CondCycles" , .Value: Cond.Extra} << " to the critical path" ; |
1020 | if (Short.Extra > 0) |
1021 | R << ", and the short leg adds another " |
1022 | << Cycles{.Key: "ShortCycles" , .Value: Short.Extra}; |
1023 | if (Long.Extra > 0) |
1024 | R << ", and the long leg adds another " |
1025 | << Cycles{.Key: "LongCycles" , .Value: Long.Extra}; |
1026 | R << ", each staying under the threshold of " |
1027 | << Cycles{.Key: "CritLimit" , .Value: CritLimit} << "." ; |
1028 | return R; |
1029 | }); |
1030 | } else { |
1031 | MORE.emit(RemarkBuilder: [&]() { |
1032 | MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion" , |
1033 | MBB.back().getDebugLoc(), &MBB); |
1034 | R << "did not if-convert branch: the condition would add " |
1035 | << Cycles{.Key: "CondCycles" , .Value: Cond.Extra} << " to the critical path" ; |
1036 | if (Cond.Extra > CritLimit) |
1037 | R << " exceeding the limit of " << Cycles{.Key: "CritLimit" , .Value: CritLimit}; |
1038 | if (Short.Extra > 0) { |
1039 | R << ", and the short leg would add another " |
1040 | << Cycles{.Key: "ShortCycles" , .Value: Short.Extra}; |
1041 | if (Short.Extra > CritLimit) |
1042 | R << " exceeding the limit of " << Cycles{.Key: "CritLimit" , .Value: CritLimit}; |
1043 | } |
1044 | if (Long.Extra > 0) { |
1045 | R << ", and the long leg would add another " |
1046 | << Cycles{.Key: "LongCycles" , .Value: Long.Extra}; |
1047 | if (Long.Extra > CritLimit) |
1048 | R << " exceeding the limit of " << Cycles{.Key: "CritLimit" , .Value: CritLimit}; |
1049 | } |
1050 | R << "." ; |
1051 | return R; |
1052 | }); |
1053 | } |
1054 | |
1055 | return ShouldConvert; |
1056 | } |
1057 | |
1058 | /// Attempt repeated if-conversion on MBB, return true if successful. |
1059 | /// |
1060 | bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { |
1061 | bool Changed = false; |
1062 | while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { |
1063 | // If-convert MBB and update analyses. |
1064 | invalidateTraces(); |
1065 | SmallVector<MachineBasicBlock*, 4> RemovedBlocks; |
1066 | IfConv.convertIf(RemovedBlocks); |
1067 | Changed = true; |
1068 | updateDomTree(DomTree, IfConv, Removed: RemovedBlocks); |
1069 | updateLoops(Loops, Removed: RemovedBlocks); |
1070 | } |
1071 | return Changed; |
1072 | } |
1073 | |
1074 | bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { |
1075 | LLVM_DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" |
1076 | << "********** Function: " << MF.getName() << '\n'); |
1077 | if (skipFunction(F: MF.getFunction())) |
1078 | return false; |
1079 | |
1080 | // Only run if conversion if the target wants it. |
1081 | const TargetSubtargetInfo &STI = MF.getSubtarget(); |
1082 | if (!STI.enableEarlyIfConversion()) |
1083 | return false; |
1084 | |
1085 | TII = STI.getInstrInfo(); |
1086 | TRI = STI.getRegisterInfo(); |
1087 | SchedModel = STI.getSchedModel(); |
1088 | MRI = &MF.getRegInfo(); |
1089 | DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
1090 | Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); |
1091 | Traces = &getAnalysis<MachineTraceMetrics>(); |
1092 | MinInstr = nullptr; |
1093 | |
1094 | bool Changed = false; |
1095 | IfConv.runOnMachineFunction(MF); |
1096 | |
1097 | // Visit blocks in dominator tree post-order. The post-order enables nested |
1098 | // if-conversion in a single pass. The tryConvertIf() function may erase |
1099 | // blocks, but only blocks dominated by the head block. This makes it safe to |
1100 | // update the dominator tree while the post-order iterator is still active. |
1101 | for (auto *DomNode : post_order(G: DomTree)) |
1102 | if (tryConvertIf(MBB: DomNode->getBlock())) |
1103 | Changed = true; |
1104 | |
1105 | return Changed; |
1106 | } |
1107 | |
1108 | //===----------------------------------------------------------------------===// |
1109 | // EarlyIfPredicator Pass |
1110 | //===----------------------------------------------------------------------===// |
1111 | |
1112 | namespace { |
1113 | class EarlyIfPredicator : public MachineFunctionPass { |
1114 | const TargetInstrInfo *TII = nullptr; |
1115 | const TargetRegisterInfo *TRI = nullptr; |
1116 | TargetSchedModel SchedModel; |
1117 | MachineRegisterInfo *MRI = nullptr; |
1118 | MachineDominatorTree *DomTree = nullptr; |
1119 | MachineBranchProbabilityInfo *MBPI = nullptr; |
1120 | MachineLoopInfo *Loops = nullptr; |
1121 | SSAIfConv IfConv; |
1122 | |
1123 | public: |
1124 | static char ID; |
1125 | EarlyIfPredicator() : MachineFunctionPass(ID) {} |
1126 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
1127 | bool runOnMachineFunction(MachineFunction &MF) override; |
1128 | StringRef getPassName() const override { return "Early If-predicator" ; } |
1129 | |
1130 | protected: |
1131 | bool tryConvertIf(MachineBasicBlock *); |
1132 | bool shouldConvertIf(); |
1133 | }; |
1134 | } // end anonymous namespace |
1135 | |
1136 | #undef DEBUG_TYPE |
1137 | #define DEBUG_TYPE "early-if-predicator" |
1138 | |
1139 | char EarlyIfPredicator::ID = 0; |
1140 | char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID; |
1141 | |
1142 | INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator" , |
1143 | false, false) |
1144 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
1145 | INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) |
1146 | INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator" , false, |
1147 | false) |
1148 | |
1149 | void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const { |
1150 | AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); |
1151 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
1152 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
1153 | AU.addRequired<MachineLoopInfoWrapperPass>(); |
1154 | AU.addPreserved<MachineLoopInfoWrapperPass>(); |
1155 | MachineFunctionPass::getAnalysisUsage(AU); |
1156 | } |
1157 | |
1158 | /// Apply the target heuristic to decide if the transformation is profitable. |
1159 | bool EarlyIfPredicator::shouldConvertIf() { |
1160 | auto TrueProbability = MBPI->getEdgeProbability(Src: IfConv.Head, Dst: IfConv.TBB); |
1161 | if (IfConv.isTriangle()) { |
1162 | MachineBasicBlock &IfBlock = |
1163 | (IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB; |
1164 | |
1165 | unsigned = 0; |
1166 | unsigned Cycles = 0; |
1167 | for (MachineInstr &I : IfBlock) { |
1168 | unsigned NumCycles = SchedModel.computeInstrLatency(MI: &I, UseDefaultDefLatency: false); |
1169 | if (NumCycles > 1) |
1170 | Cycles += NumCycles - 1; |
1171 | ExtraPredCost += TII->getPredicationCost(MI: I); |
1172 | } |
1173 | |
1174 | return TII->isProfitableToIfCvt(MBB&: IfBlock, NumCycles: Cycles, ExtraPredCycles: ExtraPredCost, |
1175 | Probability: TrueProbability); |
1176 | } |
1177 | unsigned = 0; |
1178 | unsigned = 0; |
1179 | unsigned TCycle = 0; |
1180 | unsigned FCycle = 0; |
1181 | for (MachineInstr &I : *IfConv.TBB) { |
1182 | unsigned NumCycles = SchedModel.computeInstrLatency(MI: &I, UseDefaultDefLatency: false); |
1183 | if (NumCycles > 1) |
1184 | TCycle += NumCycles - 1; |
1185 | TExtra += TII->getPredicationCost(MI: I); |
1186 | } |
1187 | for (MachineInstr &I : *IfConv.FBB) { |
1188 | unsigned NumCycles = SchedModel.computeInstrLatency(MI: &I, UseDefaultDefLatency: false); |
1189 | if (NumCycles > 1) |
1190 | FCycle += NumCycles - 1; |
1191 | FExtra += TII->getPredicationCost(MI: I); |
1192 | } |
1193 | return TII->isProfitableToIfCvt(TMBB&: *IfConv.TBB, NumTCycles: TCycle, ExtraTCycles: TExtra, FMBB&: *IfConv.FBB, |
1194 | NumFCycles: FCycle, ExtraFCycles: FExtra, Probability: TrueProbability); |
1195 | } |
1196 | |
1197 | /// Attempt repeated if-conversion on MBB, return true if successful. |
1198 | /// |
1199 | bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) { |
1200 | bool Changed = false; |
1201 | while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) { |
1202 | // If-convert MBB and update analyses. |
1203 | SmallVector<MachineBasicBlock *, 4> RemovedBlocks; |
1204 | IfConv.convertIf(RemovedBlocks, /*Predicate*/ true); |
1205 | Changed = true; |
1206 | updateDomTree(DomTree, IfConv, Removed: RemovedBlocks); |
1207 | updateLoops(Loops, Removed: RemovedBlocks); |
1208 | } |
1209 | return Changed; |
1210 | } |
1211 | |
1212 | bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) { |
1213 | LLVM_DEBUG(dbgs() << "********** EARLY IF-PREDICATOR **********\n" |
1214 | << "********** Function: " << MF.getName() << '\n'); |
1215 | if (skipFunction(F: MF.getFunction())) |
1216 | return false; |
1217 | |
1218 | const TargetSubtargetInfo &STI = MF.getSubtarget(); |
1219 | TII = STI.getInstrInfo(); |
1220 | TRI = STI.getRegisterInfo(); |
1221 | MRI = &MF.getRegInfo(); |
1222 | SchedModel.init(TSInfo: &STI); |
1223 | DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
1224 | Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); |
1225 | MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); |
1226 | |
1227 | bool Changed = false; |
1228 | IfConv.runOnMachineFunction(MF); |
1229 | |
1230 | // Visit blocks in dominator tree post-order. The post-order enables nested |
1231 | // if-conversion in a single pass. The tryConvertIf() function may erase |
1232 | // blocks, but only blocks dominated by the head block. This makes it safe to |
1233 | // update the dominator tree while the post-order iterator is still active. |
1234 | for (auto *DomNode : post_order(G: DomTree)) |
1235 | if (tryConvertIf(MBB: DomNode->getBlock())) |
1236 | Changed = true; |
1237 | |
1238 | return Changed; |
1239 | } |
1240 | |