1 | //===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Early if-conversion is for out-of-order CPUs that don't have a lot of |
10 | // predicable instructions. The goal is to eliminate conditional branches that |
11 | // may mispredict. |
12 | // |
13 | // Instructions from both sides of the branch are executed specutatively, and a |
14 | // cmov instruction selects the result. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #include "llvm/CodeGen/EarlyIfConversion.h" |
19 | #include "llvm/ADT/BitVector.h" |
20 | #include "llvm/ADT/DenseSet.h" |
21 | #include "llvm/ADT/PostOrderIterator.h" |
22 | #include "llvm/ADT/SmallPtrSet.h" |
23 | #include "llvm/ADT/SparseSet.h" |
24 | #include "llvm/ADT/Statistic.h" |
25 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
26 | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
27 | #include "llvm/CodeGen/MachineDominators.h" |
28 | #include "llvm/CodeGen/MachineFunction.h" |
29 | #include "llvm/CodeGen/MachineFunctionPass.h" |
30 | #include "llvm/CodeGen/MachineInstr.h" |
31 | #include "llvm/CodeGen/MachineLoopInfo.h" |
32 | #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" |
33 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
34 | #include "llvm/CodeGen/MachineTraceMetrics.h" |
35 | #include "llvm/CodeGen/Register.h" |
36 | #include "llvm/CodeGen/TargetInstrInfo.h" |
37 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
38 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
39 | #include "llvm/InitializePasses.h" |
40 | #include "llvm/Support/CommandLine.h" |
41 | #include "llvm/Support/Debug.h" |
42 | #include "llvm/Support/raw_ostream.h" |
43 | |
44 | using namespace llvm; |
45 | |
46 | #define DEBUG_TYPE "early-ifcvt" |
47 | |
48 | // Absolute maximum number of instructions allowed per speculated block. |
49 | // This bypasses all other heuristics, so it should be set fairly high. |
50 | static cl::opt<unsigned> |
51 | BlockInstrLimit("early-ifcvt-limit" , cl::init(Val: 30), cl::Hidden, |
52 | cl::desc("Maximum number of instructions per speculated block." )); |
53 | |
54 | // Stress testing mode - disable heuristics. |
55 | static cl::opt<bool> Stress("stress-early-ifcvt" , cl::Hidden, |
56 | cl::desc("Turn all knobs to 11" )); |
57 | |
58 | STATISTIC(NumDiamondsSeen, "Number of diamonds" ); |
59 | STATISTIC(NumDiamondsConv, "Number of diamonds converted" ); |
60 | STATISTIC(NumTrianglesSeen, "Number of triangles" ); |
61 | STATISTIC(NumTrianglesConv, "Number of triangles converted" ); |
62 | |
63 | //===----------------------------------------------------------------------===// |
64 | // SSAIfConv |
65 | //===----------------------------------------------------------------------===// |
66 | // |
67 | // The SSAIfConv class performs if-conversion on SSA form machine code after |
68 | // determining if it is possible. The class contains no heuristics; external |
69 | // code should be used to determine when if-conversion is a good idea. |
70 | // |
71 | // SSAIfConv can convert both triangles and diamonds: |
72 | // |
73 | // Triangle: Head Diamond: Head |
74 | // | \ / \_ |
75 | // | \ / | |
76 | // | [TF]BB FBB TBB |
77 | // | / \ / |
78 | // | / \ / |
79 | // Tail Tail |
80 | // |
81 | // Instructions in the conditional blocks TBB and/or FBB are spliced into the |
82 | // Head block, and phis in the Tail block are converted to select instructions. |
83 | // |
84 | namespace { |
85 | class SSAIfConv { |
86 | const TargetInstrInfo *TII; |
87 | const TargetRegisterInfo *TRI; |
88 | MachineRegisterInfo *MRI; |
89 | |
90 | public: |
91 | /// The block containing the conditional branch. |
92 | MachineBasicBlock *Head; |
93 | |
94 | /// The block containing phis after the if-then-else. |
95 | MachineBasicBlock *Tail; |
96 | |
97 | /// The 'true' conditional block as determined by analyzeBranch. |
98 | MachineBasicBlock *TBB; |
99 | |
100 | /// The 'false' conditional block as determined by analyzeBranch. |
101 | MachineBasicBlock *FBB; |
102 | |
103 | /// isTriangle - When there is no 'else' block, either TBB or FBB will be |
104 | /// equal to Tail. |
105 | bool isTriangle() const { return TBB == Tail || FBB == Tail; } |
106 | |
107 | /// Returns the Tail predecessor for the True side. |
108 | MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; } |
109 | |
110 | /// Returns the Tail predecessor for the False side. |
111 | MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; } |
112 | |
113 | /// Information about each phi in the Tail block. |
114 | struct PHIInfo { |
115 | MachineInstr *PHI; |
116 | Register TReg, FReg; |
117 | // Latencies from Cond+Branch, TReg, and FReg to DstReg. |
118 | int CondCycles = 0, TCycles = 0, FCycles = 0; |
119 | |
120 | PHIInfo(MachineInstr *phi) : PHI(phi) {} |
121 | }; |
122 | |
123 | SmallVector<PHIInfo, 8> PHIs; |
124 | |
125 | /// The branch condition determined by analyzeBranch. |
126 | SmallVector<MachineOperand, 4> Cond; |
127 | |
128 | private: |
129 | /// Instructions in Head that define values used by the conditional blocks. |
130 | /// The hoisted instructions must be inserted after these instructions. |
131 | SmallPtrSet<MachineInstr*, 8> InsertAfter; |
132 | |
133 | /// Register units clobbered by the conditional blocks. |
134 | BitVector ClobberedRegUnits; |
135 | |
136 | // Scratch pad for findInsertionPoint. |
137 | SparseSet<unsigned> LiveRegUnits; |
138 | |
139 | /// Insertion point in Head for speculatively executed instructions form TBB |
140 | /// and FBB. |
141 | MachineBasicBlock::iterator InsertionPoint; |
142 | |
143 | /// Return true if all non-terminator instructions in MBB can be safely |
144 | /// speculated. |
145 | bool canSpeculateInstrs(MachineBasicBlock *MBB); |
146 | |
147 | /// Return true if all non-terminator instructions in MBB can be safely |
148 | /// predicated. |
149 | bool canPredicateInstrs(MachineBasicBlock *MBB); |
150 | |
151 | /// Scan through instruction dependencies and update InsertAfter array. |
152 | /// Return false if any dependency is incompatible with if conversion. |
153 | bool InstrDependenciesAllowIfConv(MachineInstr *I); |
154 | |
155 | /// Predicate all instructions of the basic block with current condition |
156 | /// except for terminators. Reverse the condition if ReversePredicate is set. |
157 | void PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate); |
158 | |
159 | /// Find a valid insertion point in Head. |
160 | bool findInsertionPoint(); |
161 | |
162 | /// Replace PHI instructions in Tail with selects. |
163 | void replacePHIInstrs(); |
164 | |
165 | /// Insert selects and rewrite PHI operands to use them. |
166 | void rewritePHIOperands(); |
167 | |
168 | /// If virtual register has "killed" flag in TBB and FBB basic blocks, remove |
169 | /// the flag in TBB instruction. |
170 | void clearRepeatedKillFlagsFromTBB(MachineBasicBlock *TBB, |
171 | MachineBasicBlock *FBB); |
172 | |
173 | public: |
174 | /// init - Initialize per-function data structures. |
175 | void init(MachineFunction &MF) { |
176 | TII = MF.getSubtarget().getInstrInfo(); |
177 | TRI = MF.getSubtarget().getRegisterInfo(); |
178 | MRI = &MF.getRegInfo(); |
179 | LiveRegUnits.clear(); |
180 | LiveRegUnits.setUniverse(TRI->getNumRegUnits()); |
181 | ClobberedRegUnits.clear(); |
182 | ClobberedRegUnits.resize(N: TRI->getNumRegUnits()); |
183 | } |
184 | |
185 | /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, |
186 | /// initialize the internal state, and return true. |
187 | /// If predicate is set try to predicate the block otherwise try to |
188 | /// speculatively execute it. |
189 | bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false); |
190 | |
191 | /// convertIf - If-convert the last block passed to canConvertIf(), assuming |
192 | /// it is possible. Add any blocks that are to be erased to RemoveBlocks. |
193 | void convertIf(SmallVectorImpl<MachineBasicBlock *> &RemoveBlocks, |
194 | bool Predicate = false); |
195 | }; |
196 | } // end anonymous namespace |
197 | |
198 | /// canSpeculateInstrs - Returns true if all the instructions in MBB can safely |
199 | /// be speculated. The terminators are not considered. |
200 | /// |
201 | /// If instructions use any values that are defined in the head basic block, |
202 | /// the defining instructions are added to InsertAfter. |
203 | /// |
204 | /// Any clobbered regunits are added to ClobberedRegUnits. |
205 | /// |
206 | bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { |
207 | // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to |
208 | // get right. |
209 | if (!MBB->livein_empty()) { |
210 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n" ); |
211 | return false; |
212 | } |
213 | |
214 | unsigned InstrCount = 0; |
215 | |
216 | // Check all instructions, except the terminators. It is assumed that |
217 | // terminators never have side effects or define any used register values. |
218 | for (MachineInstr &MI : |
219 | llvm::make_range(x: MBB->begin(), y: MBB->getFirstTerminator())) { |
220 | if (MI.isDebugInstr()) |
221 | continue; |
222 | |
223 | if (++InstrCount > BlockInstrLimit && !Stress) { |
224 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " |
225 | << BlockInstrLimit << " instructions.\n" ); |
226 | return false; |
227 | } |
228 | |
229 | // There shouldn't normally be any phis in a single-predecessor block. |
230 | if (MI.isPHI()) { |
231 | LLVM_DEBUG(dbgs() << "Can't hoist: " << MI); |
232 | return false; |
233 | } |
234 | |
235 | // Don't speculate loads. Note that it may be possible and desirable to |
236 | // speculate GOT or constant pool loads that are guaranteed not to trap, |
237 | // but we don't support that for now. |
238 | if (MI.mayLoad()) { |
239 | LLVM_DEBUG(dbgs() << "Won't speculate load: " << MI); |
240 | return false; |
241 | } |
242 | |
243 | // We never speculate stores, so an AA pointer isn't necessary. |
244 | bool DontMoveAcrossStore = true; |
245 | if (!MI.isSafeToMove(SawStore&: DontMoveAcrossStore)) { |
246 | LLVM_DEBUG(dbgs() << "Can't speculate: " << MI); |
247 | return false; |
248 | } |
249 | |
250 | // Check for any dependencies on Head instructions. |
251 | if (!InstrDependenciesAllowIfConv(I: &MI)) |
252 | return false; |
253 | } |
254 | return true; |
255 | } |
256 | |
257 | /// Check that there is no dependencies preventing if conversion. |
258 | /// |
259 | /// If instruction uses any values that are defined in the head basic block, |
260 | /// the defining instructions are added to InsertAfter. |
261 | bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) { |
262 | for (const MachineOperand &MO : I->operands()) { |
263 | if (MO.isRegMask()) { |
264 | LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I); |
265 | return false; |
266 | } |
267 | if (!MO.isReg()) |
268 | continue; |
269 | Register Reg = MO.getReg(); |
270 | |
271 | // Remember clobbered regunits. |
272 | if (MO.isDef() && Reg.isPhysical()) |
273 | for (MCRegUnit Unit : TRI->regunits(Reg: Reg.asMCReg())) |
274 | ClobberedRegUnits.set(Unit); |
275 | |
276 | if (!MO.readsReg() || !Reg.isVirtual()) |
277 | continue; |
278 | MachineInstr *DefMI = MRI->getVRegDef(Reg); |
279 | if (!DefMI || DefMI->getParent() != Head) |
280 | continue; |
281 | if (InsertAfter.insert(Ptr: DefMI).second) |
282 | LLVM_DEBUG(dbgs() << printMBBReference(*I->getParent()) << " depends on " |
283 | << *DefMI); |
284 | if (DefMI->isTerminator()) { |
285 | LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n" ); |
286 | return false; |
287 | } |
288 | } |
289 | return true; |
290 | } |
291 | |
292 | /// canPredicateInstrs - Returns true if all the instructions in MBB can safely |
293 | /// be predicates. The terminators are not considered. |
294 | /// |
295 | /// If instructions use any values that are defined in the head basic block, |
296 | /// the defining instructions are added to InsertAfter. |
297 | /// |
298 | /// Any clobbered regunits are added to ClobberedRegUnits. |
299 | /// |
300 | bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) { |
301 | // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to |
302 | // get right. |
303 | if (!MBB->livein_empty()) { |
304 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n" ); |
305 | return false; |
306 | } |
307 | |
308 | unsigned InstrCount = 0; |
309 | |
310 | // Check all instructions, except the terminators. It is assumed that |
311 | // terminators never have side effects or define any used register values. |
312 | for (MachineBasicBlock::iterator I = MBB->begin(), |
313 | E = MBB->getFirstTerminator(); |
314 | I != E; ++I) { |
315 | if (I->isDebugInstr()) |
316 | continue; |
317 | |
318 | if (++InstrCount > BlockInstrLimit && !Stress) { |
319 | LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " |
320 | << BlockInstrLimit << " instructions.\n" ); |
321 | return false; |
322 | } |
323 | |
324 | // There shouldn't normally be any phis in a single-predecessor block. |
325 | if (I->isPHI()) { |
326 | LLVM_DEBUG(dbgs() << "Can't predicate: " << *I); |
327 | return false; |
328 | } |
329 | |
330 | // Check that instruction is predicable |
331 | if (!TII->isPredicable(MI: *I)) { |
332 | LLVM_DEBUG(dbgs() << "Isn't predicable: " << *I); |
333 | return false; |
334 | } |
335 | |
336 | // Check that instruction is not already predicated. |
337 | if (TII->isPredicated(MI: *I) && !TII->canPredicatePredicatedInstr(MI: *I)) { |
338 | LLVM_DEBUG(dbgs() << "Is already predicated: " << *I); |
339 | return false; |
340 | } |
341 | |
342 | // Check for any dependencies on Head instructions. |
343 | if (!InstrDependenciesAllowIfConv(I: &(*I))) |
344 | return false; |
345 | } |
346 | return true; |
347 | } |
348 | |
349 | // Apply predicate to all instructions in the machine block. |
350 | void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) { |
351 | auto Condition = Cond; |
352 | if (ReversePredicate) { |
353 | bool CanRevCond = !TII->reverseBranchCondition(Cond&: Condition); |
354 | assert(CanRevCond && "Reversed predicate is not supported" ); |
355 | (void)CanRevCond; |
356 | } |
357 | // Terminators don't need to be predicated as they will be removed. |
358 | for (MachineBasicBlock::iterator I = MBB->begin(), |
359 | E = MBB->getFirstTerminator(); |
360 | I != E; ++I) { |
361 | if (I->isDebugInstr()) |
362 | continue; |
363 | TII->PredicateInstruction(MI&: *I, Pred: Condition); |
364 | } |
365 | } |
366 | |
367 | /// Find an insertion point in Head for the speculated instructions. The |
368 | /// insertion point must be: |
369 | /// |
370 | /// 1. Before any terminators. |
371 | /// 2. After any instructions in InsertAfter. |
372 | /// 3. Not have any clobbered regunits live. |
373 | /// |
374 | /// This function sets InsertionPoint and returns true when successful, it |
375 | /// returns false if no valid insertion point could be found. |
376 | /// |
377 | bool SSAIfConv::findInsertionPoint() { |
378 | // Keep track of live regunits before the current position. |
379 | // Only track RegUnits that are also in ClobberedRegUnits. |
380 | LiveRegUnits.clear(); |
381 | SmallVector<MCRegister, 8> Reads; |
382 | MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); |
383 | MachineBasicBlock::iterator I = Head->end(); |
384 | MachineBasicBlock::iterator B = Head->begin(); |
385 | while (I != B) { |
386 | --I; |
387 | // Some of the conditional code depends in I. |
388 | if (InsertAfter.count(Ptr: &*I)) { |
389 | LLVM_DEBUG(dbgs() << "Can't insert code after " << *I); |
390 | return false; |
391 | } |
392 | |
393 | // Update live regunits. |
394 | for (const MachineOperand &MO : I->operands()) { |
395 | // We're ignoring regmask operands. That is conservatively correct. |
396 | if (!MO.isReg()) |
397 | continue; |
398 | Register Reg = MO.getReg(); |
399 | if (!Reg.isPhysical()) |
400 | continue; |
401 | // I clobbers Reg, so it isn't live before I. |
402 | if (MO.isDef()) |
403 | for (MCRegUnit Unit : TRI->regunits(Reg: Reg.asMCReg())) |
404 | LiveRegUnits.erase(Key: Unit); |
405 | // Unless I reads Reg. |
406 | if (MO.readsReg()) |
407 | Reads.push_back(Elt: Reg.asMCReg()); |
408 | } |
409 | // Anything read by I is live before I. |
410 | while (!Reads.empty()) |
411 | for (MCRegUnit Unit : TRI->regunits(Reg: Reads.pop_back_val())) |
412 | if (ClobberedRegUnits.test(Idx: Unit)) |
413 | LiveRegUnits.insert(Val: Unit); |
414 | |
415 | // We can't insert before a terminator. |
416 | if (I != FirstTerm && I->isTerminator()) |
417 | continue; |
418 | |
419 | // Some of the clobbered registers are live before I, not a valid insertion |
420 | // point. |
421 | if (!LiveRegUnits.empty()) { |
422 | LLVM_DEBUG({ |
423 | dbgs() << "Would clobber" ; |
424 | for (unsigned LRU : LiveRegUnits) |
425 | dbgs() << ' ' << printRegUnit(LRU, TRI); |
426 | dbgs() << " live before " << *I; |
427 | }); |
428 | continue; |
429 | } |
430 | |
431 | // This is a valid insertion point. |
432 | InsertionPoint = I; |
433 | LLVM_DEBUG(dbgs() << "Can insert before " << *I); |
434 | return true; |
435 | } |
436 | LLVM_DEBUG(dbgs() << "No legal insertion point found.\n" ); |
437 | return false; |
438 | } |
439 | |
440 | |
441 | |
442 | /// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is |
443 | /// a potential candidate for if-conversion. Fill out the internal state. |
444 | /// |
445 | bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { |
446 | Head = MBB; |
447 | TBB = FBB = Tail = nullptr; |
448 | |
449 | if (Head->succ_size() != 2) |
450 | return false; |
451 | MachineBasicBlock *Succ0 = Head->succ_begin()[0]; |
452 | MachineBasicBlock *Succ1 = Head->succ_begin()[1]; |
453 | |
454 | // Canonicalize so Succ0 has MBB as its single predecessor. |
455 | if (Succ0->pred_size() != 1) |
456 | std::swap(a&: Succ0, b&: Succ1); |
457 | |
458 | if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1) |
459 | return false; |
460 | |
461 | Tail = Succ0->succ_begin()[0]; |
462 | |
463 | // This is not a triangle. |
464 | if (Tail != Succ1) { |
465 | // Check for a diamond. We won't deal with any critical edges. |
466 | if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || |
467 | Succ1->succ_begin()[0] != Tail) |
468 | return false; |
469 | LLVM_DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> " |
470 | << printMBBReference(*Succ0) << "/" |
471 | << printMBBReference(*Succ1) << " -> " |
472 | << printMBBReference(*Tail) << '\n'); |
473 | |
474 | // Live-in physregs are tricky to get right when speculating code. |
475 | if (!Tail->livein_empty()) { |
476 | LLVM_DEBUG(dbgs() << "Tail has live-ins.\n" ); |
477 | return false; |
478 | } |
479 | } else { |
480 | LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> " |
481 | << printMBBReference(*Succ0) << " -> " |
482 | << printMBBReference(*Tail) << '\n'); |
483 | } |
484 | |
485 | // This is a triangle or a diamond. |
486 | // Skip if we cannot predicate and there are no phis skip as there must be |
487 | // side effects that can only be handled with predication. |
488 | if (!Predicate && (Tail->empty() || !Tail->front().isPHI())) { |
489 | LLVM_DEBUG(dbgs() << "No phis in tail.\n" ); |
490 | return false; |
491 | } |
492 | |
493 | // The branch we're looking to eliminate must be analyzable. |
494 | Cond.clear(); |
495 | if (TII->analyzeBranch(MBB&: *Head, TBB, FBB, Cond)) { |
496 | LLVM_DEBUG(dbgs() << "Branch not analyzable.\n" ); |
497 | return false; |
498 | } |
499 | |
500 | // This is weird, probably some sort of degenerate CFG. |
501 | if (!TBB) { |
502 | LLVM_DEBUG(dbgs() << "analyzeBranch didn't find conditional branch.\n" ); |
503 | return false; |
504 | } |
505 | |
506 | // Make sure the analyzed branch is conditional; one of the successors |
507 | // could be a landing pad. (Empty landing pads can be generated on Windows.) |
508 | if (Cond.empty()) { |
509 | LLVM_DEBUG(dbgs() << "analyzeBranch found an unconditional branch.\n" ); |
510 | return false; |
511 | } |
512 | |
513 | // analyzeBranch doesn't set FBB on a fall-through branch. |
514 | // Make sure it is always set. |
515 | FBB = TBB == Succ0 ? Succ1 : Succ0; |
516 | |
517 | // Any phis in the tail block must be convertible to selects. |
518 | PHIs.clear(); |
519 | MachineBasicBlock *TPred = getTPred(); |
520 | MachineBasicBlock *FPred = getFPred(); |
521 | for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); |
522 | I != E && I->isPHI(); ++I) { |
523 | PHIs.push_back(Elt: &*I); |
524 | PHIInfo &PI = PHIs.back(); |
525 | // Find PHI operands corresponding to TPred and FPred. |
526 | for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) { |
527 | if (PI.PHI->getOperand(i: i+1).getMBB() == TPred) |
528 | PI.TReg = PI.PHI->getOperand(i).getReg(); |
529 | if (PI.PHI->getOperand(i: i+1).getMBB() == FPred) |
530 | PI.FReg = PI.PHI->getOperand(i).getReg(); |
531 | } |
532 | assert(PI.TReg.isVirtual() && "Bad PHI" ); |
533 | assert(PI.FReg.isVirtual() && "Bad PHI" ); |
534 | |
535 | // Get target information. |
536 | if (!TII->canInsertSelect(MBB: *Head, Cond, DstReg: PI.PHI->getOperand(i: 0).getReg(), |
537 | TrueReg: PI.TReg, FalseReg: PI.FReg, CondCycles&: PI.CondCycles, TrueCycles&: PI.TCycles, |
538 | FalseCycles&: PI.FCycles)) { |
539 | LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI); |
540 | return false; |
541 | } |
542 | } |
543 | |
544 | // Check that the conditional instructions can be speculated. |
545 | InsertAfter.clear(); |
546 | ClobberedRegUnits.reset(); |
547 | if (Predicate) { |
548 | if (TBB != Tail && !canPredicateInstrs(MBB: TBB)) |
549 | return false; |
550 | if (FBB != Tail && !canPredicateInstrs(MBB: FBB)) |
551 | return false; |
552 | } else { |
553 | if (TBB != Tail && !canSpeculateInstrs(MBB: TBB)) |
554 | return false; |
555 | if (FBB != Tail && !canSpeculateInstrs(MBB: FBB)) |
556 | return false; |
557 | } |
558 | |
559 | // Try to find a valid insertion point for the speculated instructions in the |
560 | // head basic block. |
561 | if (!findInsertionPoint()) |
562 | return false; |
563 | |
564 | if (isTriangle()) |
565 | ++NumTrianglesSeen; |
566 | else |
567 | ++NumDiamondsSeen; |
568 | return true; |
569 | } |
570 | |
571 | /// \return true iff the two registers are known to have the same value. |
572 | static bool hasSameValue(const MachineRegisterInfo &MRI, |
573 | const TargetInstrInfo *TII, Register TReg, |
574 | Register FReg) { |
575 | if (TReg == FReg) |
576 | return true; |
577 | |
578 | if (!TReg.isVirtual() || !FReg.isVirtual()) |
579 | return false; |
580 | |
581 | const MachineInstr *TDef = MRI.getUniqueVRegDef(Reg: TReg); |
582 | const MachineInstr *FDef = MRI.getUniqueVRegDef(Reg: FReg); |
583 | if (!TDef || !FDef) |
584 | return false; |
585 | |
586 | // If there are side-effects, all bets are off. |
587 | if (TDef->hasUnmodeledSideEffects()) |
588 | return false; |
589 | |
590 | // If the instruction could modify memory, or there may be some intervening |
591 | // store between the two, we can't consider them to be equal. |
592 | if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad()) |
593 | return false; |
594 | |
595 | // We also can't guarantee that they are the same if, for example, the |
596 | // instructions are both a copy from a physical reg, because some other |
597 | // instruction may have modified the value in that reg between the two |
598 | // defining insts. |
599 | if (any_of(Range: TDef->uses(), P: [](const MachineOperand &MO) { |
600 | return MO.isReg() && MO.getReg().isPhysical(); |
601 | })) |
602 | return false; |
603 | |
604 | // Check whether the two defining instructions produce the same value(s). |
605 | if (!TII->produceSameValue(MI0: *TDef, MI1: *FDef, MRI: &MRI)) |
606 | return false; |
607 | |
608 | // Further, check that the two defs come from corresponding operands. |
609 | int TIdx = TDef->findRegisterDefOperandIdx(Reg: TReg, /*TRI=*/nullptr); |
610 | int FIdx = FDef->findRegisterDefOperandIdx(Reg: FReg, /*TRI=*/nullptr); |
611 | if (TIdx == -1 || FIdx == -1) |
612 | return false; |
613 | |
614 | return TIdx == FIdx; |
615 | } |
616 | |
617 | /// replacePHIInstrs - Completely replace PHI instructions with selects. |
618 | /// This is possible when the only Tail predecessors are the if-converted |
619 | /// blocks. |
620 | void SSAIfConv::replacePHIInstrs() { |
621 | assert(Tail->pred_size() == 2 && "Cannot replace PHIs" ); |
622 | MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); |
623 | assert(FirstTerm != Head->end() && "No terminators" ); |
624 | DebugLoc HeadDL = FirstTerm->getDebugLoc(); |
625 | |
626 | // Convert all PHIs to select instructions inserted before FirstTerm. |
627 | for (PHIInfo &PI : PHIs) { |
628 | LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); |
629 | Register DstReg = PI.PHI->getOperand(i: 0).getReg(); |
630 | if (hasSameValue(MRI: *MRI, TII, TReg: PI.TReg, FReg: PI.FReg)) { |
631 | // We do not need the select instruction if both incoming values are |
632 | // equal, but we do need a COPY. |
633 | BuildMI(BB&: *Head, I: FirstTerm, MIMD: HeadDL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: DstReg) |
634 | .addReg(RegNo: PI.TReg); |
635 | } else { |
636 | TII->insertSelect(MBB&: *Head, I: FirstTerm, DL: HeadDL, DstReg, Cond, TrueReg: PI.TReg, |
637 | FalseReg: PI.FReg); |
638 | } |
639 | LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); |
640 | PI.PHI->eraseFromParent(); |
641 | PI.PHI = nullptr; |
642 | } |
643 | } |
644 | |
645 | /// rewritePHIOperands - When there are additional Tail predecessors, insert |
646 | /// select instructions in Head and rewrite PHI operands to use the selects. |
647 | /// Keep the PHI instructions in Tail to handle the other predecessors. |
648 | void SSAIfConv::rewritePHIOperands() { |
649 | MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); |
650 | assert(FirstTerm != Head->end() && "No terminators" ); |
651 | DebugLoc HeadDL = FirstTerm->getDebugLoc(); |
652 | |
653 | // Convert all PHIs to select instructions inserted before FirstTerm. |
654 | for (PHIInfo &PI : PHIs) { |
655 | Register DstReg; |
656 | |
657 | LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); |
658 | if (hasSameValue(MRI: *MRI, TII, TReg: PI.TReg, FReg: PI.FReg)) { |
659 | // We do not need the select instruction if both incoming values are |
660 | // equal. |
661 | DstReg = PI.TReg; |
662 | } else { |
663 | Register PHIDst = PI.PHI->getOperand(i: 0).getReg(); |
664 | DstReg = MRI->createVirtualRegister(RegClass: MRI->getRegClass(Reg: PHIDst)); |
665 | TII->insertSelect(MBB&: *Head, I: FirstTerm, DL: HeadDL, |
666 | DstReg, Cond, TrueReg: PI.TReg, FalseReg: PI.FReg); |
667 | LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); |
668 | } |
669 | |
670 | // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. |
671 | for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { |
672 | MachineBasicBlock *MBB = PI.PHI->getOperand(i: i-1).getMBB(); |
673 | if (MBB == getTPred()) { |
674 | PI.PHI->getOperand(i: i-1).setMBB(Head); |
675 | PI.PHI->getOperand(i: i-2).setReg(DstReg); |
676 | } else if (MBB == getFPred()) { |
677 | PI.PHI->removeOperand(OpNo: i-1); |
678 | PI.PHI->removeOperand(OpNo: i-2); |
679 | } |
680 | } |
681 | LLVM_DEBUG(dbgs() << " --> " << *PI.PHI); |
682 | } |
683 | } |
684 | |
685 | void SSAIfConv::clearRepeatedKillFlagsFromTBB(MachineBasicBlock *TBB, |
686 | MachineBasicBlock *FBB) { |
687 | assert(TBB != FBB); |
688 | |
689 | // Collect virtual registers killed in FBB. |
690 | SmallDenseSet<Register> FBBKilledRegs; |
691 | for (MachineInstr &MI : FBB->instrs()) { |
692 | for (MachineOperand &MO : MI.operands()) { |
693 | if (MO.isReg() && MO.isKill() && MO.getReg().isVirtual()) |
694 | FBBKilledRegs.insert(V: MO.getReg()); |
695 | } |
696 | } |
697 | |
698 | if (FBBKilledRegs.empty()) |
699 | return; |
700 | |
701 | // Find the same killed registers in TBB and clear kill flags for them. |
702 | for (MachineInstr &MI : TBB->instrs()) { |
703 | for (MachineOperand &MO : MI.operands()) { |
704 | if (MO.isReg() && MO.isKill() && FBBKilledRegs.contains(V: MO.getReg())) |
705 | MO.setIsKill(false); |
706 | } |
707 | } |
708 | } |
709 | |
710 | /// convertIf - Execute the if conversion after canConvertIf has determined the |
711 | /// feasibility. |
712 | /// |
713 | /// Any basic blocks that need to be erased will be added to RemoveBlocks. |
714 | /// |
715 | void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock *> &RemoveBlocks, |
716 | bool Predicate) { |
717 | assert(Head && Tail && TBB && FBB && "Call canConvertIf first." ); |
718 | |
719 | // Update statistics. |
720 | if (isTriangle()) |
721 | ++NumTrianglesConv; |
722 | else |
723 | ++NumDiamondsConv; |
724 | |
725 | // If both blocks are going to be merged into Head, remove "killed" flag in |
726 | // TBB for registers, which are killed in TBB and FBB. Otherwise, register |
727 | // will be killed twice in Head after splice. Register killed twice is an |
728 | // incorrect MIR. |
729 | if (TBB != Tail && FBB != Tail) |
730 | clearRepeatedKillFlagsFromTBB(TBB, FBB); |
731 | |
732 | // Move all instructions into Head, except for the terminators. |
733 | if (TBB != Tail) { |
734 | if (Predicate) |
735 | PredicateBlock(MBB: TBB, /*ReversePredicate=*/false); |
736 | Head->splice(Where: InsertionPoint, Other: TBB, From: TBB->begin(), To: TBB->getFirstTerminator()); |
737 | } |
738 | if (FBB != Tail) { |
739 | if (Predicate) |
740 | PredicateBlock(MBB: FBB, /*ReversePredicate=*/true); |
741 | Head->splice(Where: InsertionPoint, Other: FBB, From: FBB->begin(), To: FBB->getFirstTerminator()); |
742 | } |
743 | // Are there extra Tail predecessors? |
744 | bool = Tail->pred_size() != 2; |
745 | if (ExtraPreds) |
746 | rewritePHIOperands(); |
747 | else |
748 | replacePHIInstrs(); |
749 | |
750 | // Fix up the CFG, temporarily leave Head without any successors. |
751 | Head->removeSuccessor(Succ: TBB); |
752 | Head->removeSuccessor(Succ: FBB, NormalizeSuccProbs: true); |
753 | if (TBB != Tail) |
754 | TBB->removeSuccessor(Succ: Tail, NormalizeSuccProbs: true); |
755 | if (FBB != Tail) |
756 | FBB->removeSuccessor(Succ: Tail, NormalizeSuccProbs: true); |
757 | |
758 | // Fix up Head's terminators. |
759 | // It should become a single branch or a fallthrough. |
760 | DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); |
761 | TII->removeBranch(MBB&: *Head); |
762 | |
763 | // Mark the now empty conditional blocks for removal and move them to the end. |
764 | // It is likely that Head can fall |
765 | // through to Tail, and we can join the two blocks. |
766 | if (TBB != Tail) { |
767 | RemoveBlocks.push_back(Elt: TBB); |
768 | if (TBB != &TBB->getParent()->back()) |
769 | TBB->moveAfter(NewBefore: &TBB->getParent()->back()); |
770 | } |
771 | if (FBB != Tail) { |
772 | RemoveBlocks.push_back(Elt: FBB); |
773 | if (FBB != &FBB->getParent()->back()) |
774 | FBB->moveAfter(NewBefore: &FBB->getParent()->back()); |
775 | } |
776 | |
777 | assert(Head->succ_empty() && "Additional head successors?" ); |
778 | if (!ExtraPreds && Head->isLayoutSuccessor(MBB: Tail)) { |
779 | // Splice Tail onto the end of Head. |
780 | LLVM_DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) |
781 | << " into head " << printMBBReference(*Head) << '\n'); |
782 | Head->splice(Where: Head->end(), Other: Tail, |
783 | From: Tail->begin(), To: Tail->end()); |
784 | Head->transferSuccessorsAndUpdatePHIs(FromMBB: Tail); |
785 | RemoveBlocks.push_back(Elt: Tail); |
786 | if (Tail != &Tail->getParent()->back()) |
787 | Tail->moveAfter(NewBefore: &Tail->getParent()->back()); |
788 | } else { |
789 | // We need a branch to Tail, let code placement work it out later. |
790 | LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n" ); |
791 | SmallVector<MachineOperand, 0> EmptyCond; |
792 | TII->insertBranch(MBB&: *Head, TBB: Tail, FBB: nullptr, Cond: EmptyCond, DL: HeadDL); |
793 | Head->addSuccessor(Succ: Tail); |
794 | } |
795 | LLVM_DEBUG(dbgs() << *Head); |
796 | } |
797 | |
798 | //===----------------------------------------------------------------------===// |
799 | // EarlyIfConverter Pass |
800 | //===----------------------------------------------------------------------===// |
801 | |
802 | namespace { |
803 | class EarlyIfConverter { |
804 | const TargetInstrInfo *TII = nullptr; |
805 | const TargetRegisterInfo *TRI = nullptr; |
806 | MCSchedModel SchedModel; |
807 | MachineRegisterInfo *MRI = nullptr; |
808 | MachineDominatorTree *DomTree = nullptr; |
809 | MachineLoopInfo *Loops = nullptr; |
810 | MachineTraceMetrics *Traces = nullptr; |
811 | MachineTraceMetrics::Ensemble *MinInstr = nullptr; |
812 | SSAIfConv IfConv; |
813 | |
814 | public: |
815 | EarlyIfConverter(MachineDominatorTree &DT, MachineLoopInfo &LI, |
816 | MachineTraceMetrics &MTM) |
817 | : DomTree(&DT), Loops(&LI), Traces(&MTM) {} |
818 | EarlyIfConverter() = delete; |
819 | |
820 | bool run(MachineFunction &MF); |
821 | |
822 | private: |
823 | bool tryConvertIf(MachineBasicBlock *); |
824 | void invalidateTraces(); |
825 | bool shouldConvertIf(); |
826 | }; |
827 | |
828 | class EarlyIfConverterLegacy : public MachineFunctionPass { |
829 | public: |
830 | static char ID; |
831 | EarlyIfConverterLegacy() : MachineFunctionPass(ID) {} |
832 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
833 | bool runOnMachineFunction(MachineFunction &MF) override; |
834 | StringRef getPassName() const override { return "Early If-Conversion" ; } |
835 | }; |
836 | } // end anonymous namespace |
837 | |
838 | char EarlyIfConverterLegacy::ID = 0; |
839 | char &llvm::EarlyIfConverterLegacyID = EarlyIfConverterLegacy::ID; |
840 | |
841 | INITIALIZE_PASS_BEGIN(EarlyIfConverterLegacy, DEBUG_TYPE, "Early If Converter" , |
842 | false, false) |
843 | INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) |
844 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
845 | INITIALIZE_PASS_DEPENDENCY(MachineTraceMetricsWrapperPass) |
846 | INITIALIZE_PASS_END(EarlyIfConverterLegacy, DEBUG_TYPE, "Early If Converter" , |
847 | false, false) |
848 | |
849 | void EarlyIfConverterLegacy::getAnalysisUsage(AnalysisUsage &AU) const { |
850 | AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); |
851 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
852 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
853 | AU.addRequired<MachineLoopInfoWrapperPass>(); |
854 | AU.addPreserved<MachineLoopInfoWrapperPass>(); |
855 | AU.addRequired<MachineTraceMetricsWrapperPass>(); |
856 | AU.addPreserved<MachineTraceMetricsWrapperPass>(); |
857 | MachineFunctionPass::getAnalysisUsage(AU); |
858 | } |
859 | |
860 | namespace { |
861 | /// Update the dominator tree after if-conversion erased some blocks. |
862 | void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv, |
863 | ArrayRef<MachineBasicBlock *> Removed) { |
864 | // convertIf can remove TBB, FBB, and Tail can be merged into Head. |
865 | // TBB and FBB should not dominate any blocks. |
866 | // Tail children should be transferred to Head. |
867 | MachineDomTreeNode *HeadNode = DomTree->getNode(BB: IfConv.Head); |
868 | for (auto *B : Removed) { |
869 | MachineDomTreeNode *Node = DomTree->getNode(BB: B); |
870 | assert(Node != HeadNode && "Cannot erase the head node" ); |
871 | while (Node->getNumChildren()) { |
872 | assert(Node->getBlock() == IfConv.Tail && "Unexpected children" ); |
873 | DomTree->changeImmediateDominator(N: Node->back(), NewIDom: HeadNode); |
874 | } |
875 | DomTree->eraseNode(BB: B); |
876 | } |
877 | } |
878 | |
879 | /// Update LoopInfo after if-conversion. |
880 | void updateLoops(MachineLoopInfo *Loops, |
881 | ArrayRef<MachineBasicBlock *> Removed) { |
882 | // If-conversion doesn't change loop structure, and it doesn't mess with back |
883 | // edges, so updating LoopInfo is simply removing the dead blocks. |
884 | for (auto *B : Removed) |
885 | Loops->removeBlock(BB: B); |
886 | } |
887 | } // namespace |
888 | |
889 | /// Invalidate MachineTraceMetrics before if-conversion. |
890 | void EarlyIfConverter::invalidateTraces() { |
891 | Traces->verifyAnalysis(); |
892 | Traces->invalidate(MBB: IfConv.Head); |
893 | Traces->invalidate(MBB: IfConv.Tail); |
894 | Traces->invalidate(MBB: IfConv.TBB); |
895 | Traces->invalidate(MBB: IfConv.FBB); |
896 | Traces->verifyAnalysis(); |
897 | } |
898 | |
899 | // Adjust cycles with downward saturation. |
900 | static unsigned adjCycles(unsigned Cyc, int Delta) { |
901 | if (Delta < 0 && Cyc + Delta > Cyc) |
902 | return 0; |
903 | return Cyc + Delta; |
904 | } |
905 | |
906 | namespace { |
907 | /// Helper class to simplify emission of cycle counts into optimization remarks. |
908 | struct Cycles { |
909 | const char *Key; |
910 | unsigned Value; |
911 | }; |
912 | template <typename Remark> Remark &operator<<(Remark &R, Cycles C) { |
913 | return R << ore::NV(C.Key, C.Value) << (C.Value == 1 ? " cycle" : " cycles" ); |
914 | } |
915 | } // anonymous namespace |
916 | |
917 | /// Apply cost model and heuristics to the if-conversion in IfConv. |
918 | /// Return true if the conversion is a good idea. |
919 | /// |
920 | bool EarlyIfConverter::shouldConvertIf() { |
921 | // Stress testing mode disables all cost considerations. |
922 | if (Stress) |
923 | return true; |
924 | |
925 | // Do not try to if-convert if the condition has a high chance of being |
926 | // predictable. |
927 | MachineLoop *CurrentLoop = Loops->getLoopFor(BB: IfConv.Head); |
928 | // If the condition is in a loop, consider it predictable if the condition |
929 | // itself or all its operands are loop-invariant. E.g. this considers a load |
930 | // from a loop-invariant address predictable; we were unable to prove that it |
931 | // doesn't alias any of the memory-writes in the loop, but it is likely to |
932 | // read to same value multiple times. |
933 | if (CurrentLoop && any_of(Range&: IfConv.Cond, P: [&](MachineOperand &MO) { |
934 | if (!MO.isReg() || !MO.isUse()) |
935 | return false; |
936 | Register Reg = MO.getReg(); |
937 | if (Reg.isPhysical()) |
938 | return false; |
939 | |
940 | MachineInstr *Def = MRI->getVRegDef(Reg); |
941 | return CurrentLoop->isLoopInvariant(I&: *Def) || |
942 | all_of(Range: Def->operands(), P: [&](MachineOperand &Op) { |
943 | if (Op.isImm()) |
944 | return true; |
945 | if (!MO.isReg() || !MO.isUse()) |
946 | return false; |
947 | Register Reg = MO.getReg(); |
948 | if (Reg.isPhysical()) |
949 | return false; |
950 | |
951 | MachineInstr *Def = MRI->getVRegDef(Reg); |
952 | return CurrentLoop->isLoopInvariant(I&: *Def); |
953 | }); |
954 | })) |
955 | return false; |
956 | |
957 | if (!MinInstr) |
958 | MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount); |
959 | |
960 | MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(MBB: IfConv.getTPred()); |
961 | MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(MBB: IfConv.getFPred()); |
962 | LLVM_DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); |
963 | unsigned MinCrit = std::min(a: TBBTrace.getCriticalPath(), |
964 | b: FBBTrace.getCriticalPath()); |
965 | |
966 | // Set a somewhat arbitrary limit on the critical path extension we accept. |
967 | unsigned CritLimit = SchedModel.MispredictPenalty/2; |
968 | |
969 | MachineBasicBlock &MBB = *IfConv.Head; |
970 | MachineOptimizationRemarkEmitter MORE(*MBB.getParent(), nullptr); |
971 | |
972 | // If-conversion only makes sense when there is unexploited ILP. Compute the |
973 | // maximum-ILP resource length of the trace after if-conversion. Compare it |
974 | // to the shortest critical path. |
975 | SmallVector<const MachineBasicBlock*, 1> ; |
976 | if (IfConv.TBB != IfConv.Tail) |
977 | ExtraBlocks.push_back(Elt: IfConv.TBB); |
978 | unsigned ResLength = FBBTrace.getResourceLength(Extrablocks: ExtraBlocks); |
979 | LLVM_DEBUG(dbgs() << "Resource length " << ResLength |
980 | << ", minimal critical path " << MinCrit << '\n'); |
981 | if (ResLength > MinCrit + CritLimit) { |
982 | LLVM_DEBUG(dbgs() << "Not enough available ILP.\n" ); |
983 | MORE.emit(RemarkBuilder: [&]() { |
984 | MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion" , |
985 | MBB.findDebugLoc(MBBI: MBB.back()), &MBB); |
986 | R << "did not if-convert branch: the resulting critical path (" |
987 | << Cycles{.Key: "ResLength" , .Value: ResLength} |
988 | << ") would extend the shorter leg's critical path (" |
989 | << Cycles{.Key: "MinCrit" , .Value: MinCrit} << ") by more than the threshold of " |
990 | << Cycles{.Key: "CritLimit" , .Value: CritLimit} |
991 | << ", which cannot be hidden by available ILP." ; |
992 | return R; |
993 | }); |
994 | return false; |
995 | } |
996 | |
997 | // Assume that the depth of the first head terminator will also be the depth |
998 | // of the select instruction inserted, as determined by the flag dependency. |
999 | // TBB / FBB data dependencies may delay the select even more. |
1000 | MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(MBB: IfConv.Head); |
1001 | unsigned BranchDepth = |
1002 | HeadTrace.getInstrCycles(MI: *IfConv.Head->getFirstTerminator()).Depth; |
1003 | LLVM_DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); |
1004 | |
1005 | // Look at all the tail phis, and compute the critical path extension caused |
1006 | // by inserting select instructions. |
1007 | MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(MBB: IfConv.Tail); |
1008 | struct CriticalPathInfo { |
1009 | unsigned ; // Count of extra cycles that the component adds. |
1010 | unsigned Depth; // Absolute depth of the component in cycles. |
1011 | }; |
1012 | CriticalPathInfo Cond{}; |
1013 | CriticalPathInfo TBlock{}; |
1014 | CriticalPathInfo FBlock{}; |
1015 | bool ShouldConvert = true; |
1016 | for (SSAIfConv::PHIInfo &PI : IfConv.PHIs) { |
1017 | unsigned Slack = TailTrace.getInstrSlack(MI: *PI.PHI); |
1018 | unsigned MaxDepth = Slack + TailTrace.getInstrCycles(MI: *PI.PHI).Depth; |
1019 | LLVM_DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); |
1020 | |
1021 | // The condition is pulled into the critical path. |
1022 | unsigned CondDepth = adjCycles(Cyc: BranchDepth, Delta: PI.CondCycles); |
1023 | if (CondDepth > MaxDepth) { |
1024 | unsigned = CondDepth - MaxDepth; |
1025 | LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n" ); |
1026 | if (Extra > Cond.Extra) |
1027 | Cond = {.Extra: Extra, .Depth: CondDepth}; |
1028 | if (Extra > CritLimit) { |
1029 | LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); |
1030 | ShouldConvert = false; |
1031 | } |
1032 | } |
1033 | |
1034 | // The TBB value is pulled into the critical path. |
1035 | unsigned TDepth = adjCycles(Cyc: TBBTrace.getPHIDepth(PHI: *PI.PHI), Delta: PI.TCycles); |
1036 | if (TDepth > MaxDepth) { |
1037 | unsigned = TDepth - MaxDepth; |
1038 | LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n" ); |
1039 | if (Extra > TBlock.Extra) |
1040 | TBlock = {.Extra: Extra, .Depth: TDepth}; |
1041 | if (Extra > CritLimit) { |
1042 | LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); |
1043 | ShouldConvert = false; |
1044 | } |
1045 | } |
1046 | |
1047 | // The FBB value is pulled into the critical path. |
1048 | unsigned FDepth = adjCycles(Cyc: FBBTrace.getPHIDepth(PHI: *PI.PHI), Delta: PI.FCycles); |
1049 | if (FDepth > MaxDepth) { |
1050 | unsigned = FDepth - MaxDepth; |
1051 | LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n" ); |
1052 | if (Extra > FBlock.Extra) |
1053 | FBlock = {.Extra: Extra, .Depth: FDepth}; |
1054 | if (Extra > CritLimit) { |
1055 | LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); |
1056 | ShouldConvert = false; |
1057 | } |
1058 | } |
1059 | } |
1060 | |
1061 | // Organize by "short" and "long" legs, since the diagnostics get confusing |
1062 | // when referring to the "true" and "false" sides of the branch, given that |
1063 | // those don't always correlate with what the user wrote in source-terms. |
1064 | const CriticalPathInfo Short = TBlock.Extra > FBlock.Extra ? FBlock : TBlock; |
1065 | const CriticalPathInfo Long = TBlock.Extra > FBlock.Extra ? TBlock : FBlock; |
1066 | |
1067 | if (ShouldConvert) { |
1068 | MORE.emit(RemarkBuilder: [&]() { |
1069 | MachineOptimizationRemark R(DEBUG_TYPE, "IfConversion" , |
1070 | MBB.back().getDebugLoc(), &MBB); |
1071 | R << "performing if-conversion on branch: the condition adds " |
1072 | << Cycles{.Key: "CondCycles" , .Value: Cond.Extra} << " to the critical path" ; |
1073 | if (Short.Extra > 0) |
1074 | R << ", and the short leg adds another " |
1075 | << Cycles{.Key: "ShortCycles" , .Value: Short.Extra}; |
1076 | if (Long.Extra > 0) |
1077 | R << ", and the long leg adds another " |
1078 | << Cycles{.Key: "LongCycles" , .Value: Long.Extra}; |
1079 | R << ", each staying under the threshold of " |
1080 | << Cycles{.Key: "CritLimit" , .Value: CritLimit} << "." ; |
1081 | return R; |
1082 | }); |
1083 | } else { |
1084 | MORE.emit(RemarkBuilder: [&]() { |
1085 | MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion" , |
1086 | MBB.back().getDebugLoc(), &MBB); |
1087 | R << "did not if-convert branch: the condition would add " |
1088 | << Cycles{.Key: "CondCycles" , .Value: Cond.Extra} << " to the critical path" ; |
1089 | if (Cond.Extra > CritLimit) |
1090 | R << " exceeding the limit of " << Cycles{.Key: "CritLimit" , .Value: CritLimit}; |
1091 | if (Short.Extra > 0) { |
1092 | R << ", and the short leg would add another " |
1093 | << Cycles{.Key: "ShortCycles" , .Value: Short.Extra}; |
1094 | if (Short.Extra > CritLimit) |
1095 | R << " exceeding the limit of " << Cycles{.Key: "CritLimit" , .Value: CritLimit}; |
1096 | } |
1097 | if (Long.Extra > 0) { |
1098 | R << ", and the long leg would add another " |
1099 | << Cycles{.Key: "LongCycles" , .Value: Long.Extra}; |
1100 | if (Long.Extra > CritLimit) |
1101 | R << " exceeding the limit of " << Cycles{.Key: "CritLimit" , .Value: CritLimit}; |
1102 | } |
1103 | R << "." ; |
1104 | return R; |
1105 | }); |
1106 | } |
1107 | |
1108 | return ShouldConvert; |
1109 | } |
1110 | |
1111 | /// Attempt repeated if-conversion on MBB, return true if successful. |
1112 | /// |
1113 | bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { |
1114 | bool Changed = false; |
1115 | while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { |
1116 | // If-convert MBB and update analyses. |
1117 | invalidateTraces(); |
1118 | SmallVector<MachineBasicBlock *, 4> RemoveBlocks; |
1119 | IfConv.convertIf(RemoveBlocks); |
1120 | Changed = true; |
1121 | updateDomTree(DomTree, IfConv, Removed: RemoveBlocks); |
1122 | for (MachineBasicBlock *MBB : RemoveBlocks) |
1123 | MBB->eraseFromParent(); |
1124 | updateLoops(Loops, Removed: RemoveBlocks); |
1125 | } |
1126 | return Changed; |
1127 | } |
1128 | |
1129 | bool EarlyIfConverter::run(MachineFunction &MF) { |
1130 | LLVM_DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" |
1131 | << "********** Function: " << MF.getName() << '\n'); |
1132 | |
1133 | // Only run if conversion if the target wants it. |
1134 | const TargetSubtargetInfo &STI = MF.getSubtarget(); |
1135 | if (!STI.enableEarlyIfConversion()) |
1136 | return false; |
1137 | |
1138 | TII = STI.getInstrInfo(); |
1139 | TRI = STI.getRegisterInfo(); |
1140 | SchedModel = STI.getSchedModel(); |
1141 | MRI = &MF.getRegInfo(); |
1142 | MinInstr = nullptr; |
1143 | |
1144 | bool Changed = false; |
1145 | IfConv.init(MF); |
1146 | |
1147 | // Visit blocks in dominator tree post-order. The post-order enables nested |
1148 | // if-conversion in a single pass. The tryConvertIf() function may erase |
1149 | // blocks, but only blocks dominated by the head block. This makes it safe to |
1150 | // update the dominator tree while the post-order iterator is still active. |
1151 | for (auto *DomNode : post_order(G: DomTree)) |
1152 | if (tryConvertIf(MBB: DomNode->getBlock())) |
1153 | Changed = true; |
1154 | |
1155 | return Changed; |
1156 | } |
1157 | |
1158 | PreservedAnalyses |
1159 | EarlyIfConverterPass::run(MachineFunction &MF, |
1160 | MachineFunctionAnalysisManager &MFAM) { |
1161 | MachineDominatorTree &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF); |
1162 | MachineLoopInfo &LI = MFAM.getResult<MachineLoopAnalysis>(IR&: MF); |
1163 | MachineTraceMetrics &MTM = MFAM.getResult<MachineTraceMetricsAnalysis>(IR&: MF); |
1164 | |
1165 | EarlyIfConverter Impl(MDT, LI, MTM); |
1166 | bool Changed = Impl.run(MF); |
1167 | if (!Changed) |
1168 | return PreservedAnalyses::all(); |
1169 | |
1170 | auto PA = getMachineFunctionPassPreservedAnalyses(); |
1171 | PA.preserve<MachineDominatorTreeAnalysis>(); |
1172 | PA.preserve<MachineLoopAnalysis>(); |
1173 | PA.preserve<MachineTraceMetricsAnalysis>(); |
1174 | return PA; |
1175 | } |
1176 | |
1177 | bool EarlyIfConverterLegacy::runOnMachineFunction(MachineFunction &MF) { |
1178 | if (skipFunction(F: MF.getFunction())) |
1179 | return false; |
1180 | |
1181 | MachineDominatorTree &MDT = |
1182 | getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
1183 | MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); |
1184 | MachineTraceMetrics &MTM = |
1185 | getAnalysis<MachineTraceMetricsWrapperPass>().getMTM(); |
1186 | |
1187 | return EarlyIfConverter(MDT, LI, MTM).run(MF); |
1188 | } |
1189 | |
1190 | //===----------------------------------------------------------------------===// |
1191 | // EarlyIfPredicator Pass |
1192 | //===----------------------------------------------------------------------===// |
1193 | |
1194 | namespace { |
1195 | class EarlyIfPredicator : public MachineFunctionPass { |
1196 | const TargetInstrInfo *TII = nullptr; |
1197 | const TargetRegisterInfo *TRI = nullptr; |
1198 | TargetSchedModel SchedModel; |
1199 | MachineRegisterInfo *MRI = nullptr; |
1200 | MachineDominatorTree *DomTree = nullptr; |
1201 | MachineBranchProbabilityInfo *MBPI = nullptr; |
1202 | MachineLoopInfo *Loops = nullptr; |
1203 | SSAIfConv IfConv; |
1204 | |
1205 | public: |
1206 | static char ID; |
1207 | EarlyIfPredicator() : MachineFunctionPass(ID) {} |
1208 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
1209 | bool runOnMachineFunction(MachineFunction &MF) override; |
1210 | StringRef getPassName() const override { return "Early If-predicator" ; } |
1211 | |
1212 | protected: |
1213 | bool tryConvertIf(MachineBasicBlock *); |
1214 | bool shouldConvertIf(); |
1215 | }; |
1216 | } // end anonymous namespace |
1217 | |
1218 | #undef DEBUG_TYPE |
1219 | #define DEBUG_TYPE "early-if-predicator" |
1220 | |
1221 | char EarlyIfPredicator::ID = 0; |
1222 | char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID; |
1223 | |
1224 | INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator" , |
1225 | false, false) |
1226 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
1227 | INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) |
1228 | INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator" , false, |
1229 | false) |
1230 | |
1231 | void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const { |
1232 | AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); |
1233 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
1234 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
1235 | AU.addRequired<MachineLoopInfoWrapperPass>(); |
1236 | AU.addPreserved<MachineLoopInfoWrapperPass>(); |
1237 | MachineFunctionPass::getAnalysisUsage(AU); |
1238 | } |
1239 | |
1240 | /// Apply the target heuristic to decide if the transformation is profitable. |
1241 | bool EarlyIfPredicator::shouldConvertIf() { |
1242 | auto TrueProbability = MBPI->getEdgeProbability(Src: IfConv.Head, Dst: IfConv.TBB); |
1243 | if (IfConv.isTriangle()) { |
1244 | MachineBasicBlock &IfBlock = |
1245 | (IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB; |
1246 | |
1247 | unsigned = 0; |
1248 | unsigned Cycles = 0; |
1249 | for (MachineInstr &I : IfBlock) { |
1250 | unsigned NumCycles = SchedModel.computeInstrLatency(MI: &I, UseDefaultDefLatency: false); |
1251 | if (NumCycles > 1) |
1252 | Cycles += NumCycles - 1; |
1253 | ExtraPredCost += TII->getPredicationCost(MI: I); |
1254 | } |
1255 | |
1256 | return TII->isProfitableToIfCvt(MBB&: IfBlock, NumCycles: Cycles, ExtraPredCycles: ExtraPredCost, |
1257 | Probability: TrueProbability); |
1258 | } |
1259 | unsigned = 0; |
1260 | unsigned = 0; |
1261 | unsigned TCycle = 0; |
1262 | unsigned FCycle = 0; |
1263 | for (MachineInstr &I : *IfConv.TBB) { |
1264 | unsigned NumCycles = SchedModel.computeInstrLatency(MI: &I, UseDefaultDefLatency: false); |
1265 | if (NumCycles > 1) |
1266 | TCycle += NumCycles - 1; |
1267 | TExtra += TII->getPredicationCost(MI: I); |
1268 | } |
1269 | for (MachineInstr &I : *IfConv.FBB) { |
1270 | unsigned NumCycles = SchedModel.computeInstrLatency(MI: &I, UseDefaultDefLatency: false); |
1271 | if (NumCycles > 1) |
1272 | FCycle += NumCycles - 1; |
1273 | FExtra += TII->getPredicationCost(MI: I); |
1274 | } |
1275 | return TII->isProfitableToIfCvt(TMBB&: *IfConv.TBB, NumTCycles: TCycle, ExtraTCycles: TExtra, FMBB&: *IfConv.FBB, |
1276 | NumFCycles: FCycle, ExtraFCycles: FExtra, Probability: TrueProbability); |
1277 | } |
1278 | |
1279 | /// Attempt repeated if-conversion on MBB, return true if successful. |
1280 | /// |
1281 | bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) { |
1282 | bool Changed = false; |
1283 | while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) { |
1284 | // If-convert MBB and update analyses. |
1285 | SmallVector<MachineBasicBlock *, 4> RemoveBlocks; |
1286 | IfConv.convertIf(RemoveBlocks, /*Predicate*/ true); |
1287 | Changed = true; |
1288 | updateDomTree(DomTree, IfConv, Removed: RemoveBlocks); |
1289 | for (MachineBasicBlock *MBB : RemoveBlocks) |
1290 | MBB->eraseFromParent(); |
1291 | updateLoops(Loops, Removed: RemoveBlocks); |
1292 | } |
1293 | return Changed; |
1294 | } |
1295 | |
1296 | bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) { |
1297 | LLVM_DEBUG(dbgs() << "********** EARLY IF-PREDICATOR **********\n" |
1298 | << "********** Function: " << MF.getName() << '\n'); |
1299 | if (skipFunction(F: MF.getFunction())) |
1300 | return false; |
1301 | |
1302 | const TargetSubtargetInfo &STI = MF.getSubtarget(); |
1303 | TII = STI.getInstrInfo(); |
1304 | TRI = STI.getRegisterInfo(); |
1305 | MRI = &MF.getRegInfo(); |
1306 | SchedModel.init(TSInfo: &STI); |
1307 | DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
1308 | Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); |
1309 | MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); |
1310 | |
1311 | bool Changed = false; |
1312 | IfConv.init(MF); |
1313 | |
1314 | // Visit blocks in dominator tree post-order. The post-order enables nested |
1315 | // if-conversion in a single pass. The tryConvertIf() function may erase |
1316 | // blocks, but only blocks dominated by the head block. This makes it safe to |
1317 | // update the dominator tree while the post-order iterator is still active. |
1318 | for (auto *DomNode : post_order(G: DomTree)) |
1319 | if (tryConvertIf(MBB: DomNode->getBlock())) |
1320 | Changed = true; |
1321 | |
1322 | return Changed; |
1323 | } |
1324 | |