1//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global common subexpression elimination on machine
10// instructions using a scoped hash table based value numbering scheme. It
11// must be run while the machine function is still in SSA form.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/CodeGen/MachineCSE.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/ScopedHashTable.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include "llvm/ADT/SmallSet.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/CFG.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
25#include "llvm/CodeGen/MachineDominators.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineFunctionPass.h"
28#include "llvm/CodeGen/MachineInstr.h"
29#include "llvm/CodeGen/MachineLoopInfo.h"
30#include "llvm/CodeGen/MachineOperand.h"
31#include "llvm/CodeGen/MachineRegisterInfo.h"
32#include "llvm/CodeGen/Passes.h"
33#include "llvm/CodeGen/TargetInstrInfo.h"
34#include "llvm/CodeGen/TargetOpcodes.h"
35#include "llvm/CodeGen/TargetRegisterInfo.h"
36#include "llvm/CodeGen/TargetSubtargetInfo.h"
37#include "llvm/InitializePasses.h"
38#include "llvm/MC/MCRegister.h"
39#include "llvm/MC/MCRegisterInfo.h"
40#include "llvm/Pass.h"
41#include "llvm/Support/Allocator.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/RecyclingAllocator.h"
44#include "llvm/Support/raw_ostream.h"
45#include <cassert>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "machine-cse"
52
53STATISTIC(NumCoalesces, "Number of copies coalesced");
54STATISTIC(NumCSEs, "Number of common subexpression eliminated");
55STATISTIC(NumPREs, "Number of partial redundant expression"
56 " transformed to fully redundant");
57STATISTIC(NumPhysCSEs,
58 "Number of physreg referencing common subexpr eliminated");
59STATISTIC(NumCrossBBCSEs,
60 "Number of cross-MBB physreg referencing CS eliminated");
61STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
62
63// Threshold to avoid excessive cost to compute isProfitableToCSE.
64static cl::opt<int>
65 CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(Val: 1024),
66 cl::desc("Threshold for the size of CSUses"));
67
68static cl::opt<bool> AggressiveMachineCSE(
69 "aggressive-machine-cse", cl::Hidden, cl::init(Val: false),
70 cl::desc("Override the profitability heuristics for Machine CSE"));
71
72namespace {
73
74class MachineCSEImpl {
75 const TargetInstrInfo *TII = nullptr;
76 const TargetRegisterInfo *TRI = nullptr;
77 MachineDominatorTree *DT = nullptr;
78 MachineRegisterInfo *MRI = nullptr;
79 MachineBlockFrequencyInfo *MBFI = nullptr;
80
81public:
82 MachineCSEImpl(MachineDominatorTree *DT, MachineBlockFrequencyInfo *MBFI)
83 : DT(DT), MBFI(MBFI) {}
84 bool run(MachineFunction &MF);
85
86private:
87 using AllocatorTy =
88 RecyclingAllocator<BumpPtrAllocator,
89 ScopedHashTableVal<MachineInstr *, unsigned>>;
90 using ScopedHTType =
91 ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
92 AllocatorTy>;
93 using ScopeType = ScopedHTType::ScopeTy;
94 using PhysDefVector = SmallVector<std::pair<unsigned, Register>, 2>;
95
96 unsigned LookAheadLimit = 0;
97 DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
98 DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
99 PREMap;
100 ScopedHTType VNT;
101 SmallVector<MachineInstr *, 64> Exps;
102 unsigned CurrVN = 0;
103
104 bool PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB);
105 bool isPhysDefTriviallyDead(MCRegister Reg,
106 MachineBasicBlock::const_iterator I,
107 MachineBasicBlock::const_iterator E) const;
108 bool hasLivePhysRegDefUses(const MachineInstr *MI,
109 const MachineBasicBlock *MBB,
110 SmallSet<MCRegister, 8> &PhysRefs,
111 PhysDefVector &PhysDefs, bool &PhysUseDef) const;
112 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
113 const SmallSet<MCRegister, 8> &PhysRefs,
114 const PhysDefVector &PhysDefs, bool &NonLocal) const;
115 bool isCSECandidate(MachineInstr *MI);
116 bool isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB,
117 MachineInstr *MI);
118 void EnterScope(MachineBasicBlock *MBB);
119 void ExitScope(MachineBasicBlock *MBB);
120 bool ProcessBlockCSE(MachineBasicBlock *MBB);
121 void ExitScopeIfDone(MachineDomTreeNode *Node,
122 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren);
123 bool PerformCSE(MachineDomTreeNode *Node);
124
125 bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
126 bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
127 bool PerformSimplePRE(MachineDominatorTree *DT);
128 /// Heuristics to see if it's profitable to move common computations of MBB
129 /// and MBB1 to CandidateBB.
130 bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
131 MachineBasicBlock *MBB, MachineBasicBlock *MBB1);
132 void releaseMemory();
133};
134
135class MachineCSELegacy : public MachineFunctionPass {
136public:
137 static char ID; // Pass identification
138
139 MachineCSELegacy() : MachineFunctionPass(ID) {}
140
141 bool runOnMachineFunction(MachineFunction &MF) override;
142
143 void getAnalysisUsage(AnalysisUsage &AU) const override {
144 AU.setPreservesCFG();
145 MachineFunctionPass::getAnalysisUsage(AU);
146 AU.addPreservedID(ID&: MachineLoopInfoID);
147 AU.addRequired<MachineDominatorTreeWrapperPass>();
148 AU.addPreserved<MachineDominatorTreeWrapperPass>();
149 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
150 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
151 }
152
153 MachineFunctionProperties getRequiredProperties() const override {
154 return MachineFunctionProperties().setIsSSA();
155 }
156};
157} // end anonymous namespace
158
159char MachineCSELegacy::ID = 0;
160
161char &llvm::MachineCSELegacyID = MachineCSELegacy::ID;
162
163INITIALIZE_PASS_BEGIN(MachineCSELegacy, DEBUG_TYPE,
164 "Machine Common Subexpression Elimination", false, false)
165INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
166INITIALIZE_PASS_END(MachineCSELegacy, DEBUG_TYPE,
167 "Machine Common Subexpression Elimination", false, false)
168
169/// The source register of a COPY machine instruction can be propagated to all
170/// its users, and this propagation could increase the probability of finding
171/// common subexpressions. If the COPY has only one user, the COPY itself can
172/// be removed.
173bool MachineCSEImpl::PerformTrivialCopyPropagation(MachineInstr *MI,
174 MachineBasicBlock *MBB) {
175 bool Changed = false;
176 for (MachineOperand &MO : MI->all_uses()) {
177 Register Reg = MO.getReg();
178 if (!Reg.isVirtual())
179 continue;
180 bool OnlyOneUse = MRI->hasOneNonDBGUse(RegNo: Reg);
181 MachineInstr *DefMI = MRI->getVRegDef(Reg);
182 if (!DefMI || !DefMI->isCopy())
183 continue;
184 Register SrcReg = DefMI->getOperand(i: 1).getReg();
185 if (!SrcReg.isVirtual())
186 continue;
187 // FIXME: We should trivially coalesce subregister copies to expose CSE
188 // opportunities on instructions with truncated operands (see
189 // cse-add-with-overflow.ll). This can be done here as follows:
190 // if (SrcSubReg)
191 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
192 // SrcSubReg);
193 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
194 //
195 // The 2-addr pass has been updated to handle coalesced subregs. However,
196 // some machine-specific code still can't handle it.
197 // To handle it properly we also need a way find a constrained subregister
198 // class given a super-reg class and subreg index.
199 if (DefMI->getOperand(i: 1).getSubReg())
200 continue;
201 if (!MRI->constrainRegAttrs(Reg: SrcReg, ConstrainingReg: Reg))
202 continue;
203 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
204 LLVM_DEBUG(dbgs() << "*** to: " << *MI);
205
206 // Propagate SrcReg of copies to MI.
207 MO.setReg(SrcReg);
208 MRI->clearKillFlags(Reg: SrcReg);
209 // Coalesce single use copies.
210 if (OnlyOneUse) {
211 // If (and only if) we've eliminated all uses of the copy, also
212 // copy-propagate to any debug-users of MI, or they'll be left using
213 // an undefined value.
214 DefMI->changeDebugValuesDefReg(Reg: SrcReg);
215
216 DefMI->eraseFromParent();
217 ++NumCoalesces;
218 }
219 Changed = true;
220 }
221
222 return Changed;
223}
224
225bool MachineCSEImpl::isPhysDefTriviallyDead(
226 MCRegister Reg, MachineBasicBlock::const_iterator I,
227 MachineBasicBlock::const_iterator E) const {
228 unsigned LookAheadLeft = LookAheadLimit;
229 while (LookAheadLeft) {
230 // Skip over dbg_value's.
231 I = skipDebugInstructionsForward(It: I, End: E);
232
233 if (I == E)
234 // Reached end of block, we don't know if register is dead or not.
235 return false;
236
237 bool SeenDef = false;
238 for (const MachineOperand &MO : I->operands()) {
239 if (MO.isRegMask() && MO.clobbersPhysReg(PhysReg: Reg))
240 SeenDef = true;
241 if (!MO.isReg() || !MO.getReg())
242 continue;
243 if (!TRI->regsOverlap(RegA: MO.getReg(), RegB: Reg))
244 continue;
245 if (MO.isUse())
246 // Found a use!
247 return false;
248 SeenDef = true;
249 }
250 if (SeenDef)
251 // See a def of Reg (or an alias) before encountering any use, it's
252 // trivially dead.
253 return true;
254
255 --LookAheadLeft;
256 ++I;
257 }
258 return false;
259}
260
261static bool isCallerPreservedOrConstPhysReg(MCRegister Reg,
262 const MachineOperand &MO,
263 const MachineFunction &MF,
264 const TargetRegisterInfo &TRI,
265 const TargetInstrInfo &TII) {
266 // MachineRegisterInfo::isConstantPhysReg directly called by
267 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
268 // reserved registers to be frozen. That doesn't cause a problem post-ISel as
269 // most (if not all) targets freeze reserved registers right after ISel.
270 //
271 // It does cause issues mid-GlobalISel, however, hence the additional
272 // reservedRegsFrozen check.
273 const MachineRegisterInfo &MRI = MF.getRegInfo();
274 return TRI.isCallerPreservedPhysReg(PhysReg: Reg, MF) || TII.isIgnorableUse(MO) ||
275 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(PhysReg: Reg));
276}
277
278/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
279/// physical registers (except for dead defs of physical registers). It also
280/// returns the physical register def by reference if it's the only one and the
281/// instruction does not uses a physical register.
282bool MachineCSEImpl::hasLivePhysRegDefUses(const MachineInstr *MI,
283 const MachineBasicBlock *MBB,
284 SmallSet<MCRegister, 8> &PhysRefs,
285 PhysDefVector &PhysDefs,
286 bool &PhysUseDef) const {
287 // First, add all uses to PhysRefs.
288 for (const MachineOperand &MO : MI->all_uses()) {
289 Register Reg = MO.getReg();
290 if (!Reg)
291 continue;
292 if (Reg.isVirtual())
293 continue;
294 // Reading either caller preserved or constant physregs is ok.
295 if (!isCallerPreservedOrConstPhysReg(Reg: Reg.asMCReg(), MO, MF: *MI->getMF(), TRI: *TRI,
296 TII: *TII))
297 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
298 PhysRefs.insert(V: *AI);
299 }
300
301 // Next, collect all defs into PhysDefs. If any is already in PhysRefs
302 // (which currently contains only uses), set the PhysUseDef flag.
303 PhysUseDef = false;
304 MachineBasicBlock::const_iterator I = MI; I = std::next(x: I);
305 for (const auto &MOP : llvm::enumerate(First: MI->operands())) {
306 const MachineOperand &MO = MOP.value();
307 if (!MO.isReg() || !MO.isDef())
308 continue;
309 Register Reg = MO.getReg();
310 if (!Reg)
311 continue;
312 if (Reg.isVirtual())
313 continue;
314 // Check against PhysRefs even if the def is "dead".
315 if (PhysRefs.count(V: Reg.asMCReg()))
316 PhysUseDef = true;
317 // If the def is dead, it's ok. But the def may not marked "dead". That's
318 // common since this pass is run before livevariables. We can scan
319 // forward a few instructions and check if it is obviously dead.
320 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg: Reg.asMCReg(), I, E: MBB->end()))
321 PhysDefs.emplace_back(Args: MOP.index(), Args&: Reg);
322 }
323
324 // Finally, add all defs to PhysRefs as well.
325 for (const auto &Def : PhysDefs)
326 for (MCRegAliasIterator AI(Def.second, TRI, true); AI.isValid(); ++AI)
327 PhysRefs.insert(V: *AI);
328
329 return !PhysRefs.empty();
330}
331
332bool MachineCSEImpl::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
333 const SmallSet<MCRegister, 8> &PhysRefs,
334 const PhysDefVector &PhysDefs,
335 bool &NonLocal) const {
336 // For now conservatively returns false if the common subexpression is
337 // not in the same basic block as the given instruction. The only exception
338 // is if the common subexpression is in the sole predecessor block.
339 const MachineBasicBlock *MBB = MI->getParent();
340 const MachineBasicBlock *CSMBB = CSMI->getParent();
341
342 bool CrossMBB = false;
343 if (CSMBB != MBB) {
344 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
345 return false;
346
347 for (const auto &PhysDef : PhysDefs) {
348 if (MRI->isAllocatable(PhysReg: PhysDef.second) || MRI->isReserved(PhysReg: PhysDef.second))
349 // Avoid extending live range of physical registers if they are
350 //allocatable or reserved.
351 return false;
352 }
353 CrossMBB = true;
354 }
355 MachineBasicBlock::const_iterator I = CSMI; I = std::next(x: I);
356 MachineBasicBlock::const_iterator E = MI;
357 MachineBasicBlock::const_iterator EE = CSMBB->end();
358 unsigned LookAheadLeft = LookAheadLimit;
359 while (LookAheadLeft) {
360 // Skip over dbg_value's.
361 while (I != E && I != EE && I->isDebugInstr())
362 ++I;
363
364 if (I == EE) {
365 assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
366 (void)CrossMBB;
367 CrossMBB = false;
368 NonLocal = true;
369 I = MBB->begin();
370 EE = MBB->end();
371 continue;
372 }
373
374 if (I == E)
375 return true;
376
377 for (const MachineOperand &MO : I->operands()) {
378 // RegMasks go on instructions like calls that clobber lots of physregs.
379 // Don't attempt to CSE across such an instruction.
380 if (MO.isRegMask())
381 return false;
382 if (!MO.isReg() || !MO.isDef())
383 continue;
384 Register MOReg = MO.getReg();
385 if (MOReg.isVirtual())
386 continue;
387 if (PhysRefs.count(V: MOReg.asMCReg()))
388 return false;
389 }
390
391 --LookAheadLeft;
392 ++I;
393 }
394
395 return false;
396}
397
398bool MachineCSEImpl::isCSECandidate(MachineInstr *MI) {
399 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
400 MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() ||
401 MI->isFakeUse())
402 return false;
403
404 // Ignore copies.
405 if (MI->isCopyLike())
406 return false;
407
408 // Ignore stuff that we obviously can't move.
409 if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
410 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
411 return false;
412
413 if (MI->mayLoad()) {
414 // Okay, this instruction does a load. As a refinement, we allow the target
415 // to decide whether the loaded value is actually a constant. If so, we can
416 // actually use it as a load.
417 if (!MI->isDereferenceableInvariantLoad())
418 // FIXME: we should be able to hoist loads with no other side effects if
419 // there are no other instructions which can change memory in this loop.
420 // This is a trivial form of alias analysis.
421 return false;
422 }
423
424 // Ignore stack guard loads, otherwise the register that holds CSEed value may
425 // be spilled and get loaded back with corrupted data.
426 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
427 return false;
428
429 return true;
430}
431
432/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
433/// common expression that defines Reg. CSBB is basic block where CSReg is
434/// defined.
435bool MachineCSEImpl::isProfitableToCSE(Register CSReg, Register Reg,
436 MachineBasicBlock *CSBB,
437 MachineInstr *MI) {
438 if (AggressiveMachineCSE)
439 return true;
440
441 // FIXME: Heuristics that works around the lack the live range splitting.
442
443 // If CSReg is used at all uses of Reg, CSE should not increase register
444 // pressure of CSReg.
445 bool MayIncreasePressure = true;
446 if (CSReg.isVirtual() && Reg.isVirtual()) {
447 MayIncreasePressure = false;
448 SmallPtrSet<MachineInstr*, 8> CSUses;
449 int NumOfUses = 0;
450 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg: CSReg)) {
451 CSUses.insert(Ptr: &MI);
452 // Too costly to compute if NumOfUses is very large. Conservatively assume
453 // MayIncreasePressure to avoid spending too much time here.
454 if (++NumOfUses > CSUsesThreshold) {
455 MayIncreasePressure = true;
456 break;
457 }
458 }
459 if (!MayIncreasePressure)
460 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
461 if (!CSUses.count(Ptr: &MI)) {
462 MayIncreasePressure = true;
463 break;
464 }
465 }
466 }
467 if (!MayIncreasePressure) return true;
468
469 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
470 // an immediate predecessor. We don't want to increase register pressure and
471 // end up causing other computation to be spilled.
472 if (TII->isAsCheapAsAMove(MI: *MI)) {
473 MachineBasicBlock *BB = MI->getParent();
474 if (CSBB != BB && !CSBB->isSuccessor(MBB: BB))
475 return false;
476 }
477
478 // Heuristics #2: If the expression doesn't not use a vr and the only use
479 // of the redundant computation are copies, do not cse.
480 bool HasVRegUse = false;
481 for (const MachineOperand &MO : MI->all_uses()) {
482 if (MO.getReg().isVirtual()) {
483 HasVRegUse = true;
484 break;
485 }
486 }
487 if (!HasVRegUse) {
488 bool HasNonCopyUse = false;
489 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
490 // Ignore copies.
491 if (!MI.isCopyLike()) {
492 HasNonCopyUse = true;
493 break;
494 }
495 }
496 if (!HasNonCopyUse)
497 return false;
498 }
499
500 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
501 // it unless the defined value is already used in the BB of the new use.
502 bool HasPHI = false;
503 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg: CSReg)) {
504 HasPHI |= UseMI.isPHI();
505 if (UseMI.getParent() == MI->getParent())
506 return true;
507 }
508
509 return !HasPHI;
510}
511
512void MachineCSEImpl::EnterScope(MachineBasicBlock *MBB) {
513 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
514 ScopeType *Scope = new ScopeType(VNT);
515 ScopeMap[MBB] = Scope;
516}
517
518void MachineCSEImpl::ExitScope(MachineBasicBlock *MBB) {
519 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
520 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(Val: MBB);
521 assert(SI != ScopeMap.end());
522 delete SI->second;
523 ScopeMap.erase(I: SI);
524}
525
526bool MachineCSEImpl::ProcessBlockCSE(MachineBasicBlock *MBB) {
527 bool Changed = false;
528
529 SmallVector<std::pair<Register, Register>, 8> CSEPairs;
530 SmallVector<unsigned, 2> ImplicitDefsToUpdate;
531 SmallVector<Register, 2> ImplicitDefs;
532 for (MachineInstr &MI : llvm::make_early_inc_range(Range&: *MBB)) {
533 if (!isCSECandidate(MI: &MI))
534 continue;
535
536 bool FoundCSE = VNT.count(Key: &MI);
537 if (!FoundCSE) {
538 // Using trivial copy propagation to find more CSE opportunities.
539 if (PerformTrivialCopyPropagation(MI: &MI, MBB)) {
540 Changed = true;
541
542 // After coalescing MI itself may become a copy.
543 if (MI.isCopyLike())
544 continue;
545
546 // Try again to see if CSE is possible.
547 FoundCSE = VNT.count(Key: &MI);
548 }
549 }
550
551 // Commute commutable instructions.
552 bool Commuted = false;
553 if (!FoundCSE && MI.isCommutable()) {
554 if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
555 Commuted = true;
556 FoundCSE = VNT.count(Key: NewMI);
557 if (NewMI != &MI) {
558 // New instruction. It doesn't need to be kept.
559 NewMI->eraseFromParent();
560 Changed = true;
561 } else if (!FoundCSE)
562 // MI was changed but it didn't help, commute it back!
563 (void)TII->commuteInstruction(MI);
564 }
565 }
566
567 // If the instruction defines physical registers and the values *may* be
568 // used, then it's not safe to replace it with a common subexpression.
569 // It's also not safe if the instruction uses physical registers.
570 bool CrossMBBPhysDef = false;
571 SmallSet<MCRegister, 8> PhysRefs;
572 PhysDefVector PhysDefs;
573 bool PhysUseDef = false;
574 if (FoundCSE &&
575 hasLivePhysRegDefUses(MI: &MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
576 FoundCSE = false;
577
578 // ... Unless the CS is local or is in the sole predecessor block
579 // and it also defines the physical register which is not clobbered
580 // in between and the physical register uses were not clobbered.
581 // This can never be the case if the instruction both uses and
582 // defines the same physical register, which was detected above.
583 if (!PhysUseDef) {
584 unsigned CSVN = VNT.lookup(Key: &MI);
585 MachineInstr *CSMI = Exps[CSVN];
586 if (PhysRegDefsReach(CSMI, MI: &MI, PhysRefs, PhysDefs, NonLocal&: CrossMBBPhysDef))
587 FoundCSE = true;
588 }
589 }
590
591 if (!FoundCSE) {
592 VNT.insert(Key: &MI, Val: CurrVN++);
593 Exps.push_back(Elt: &MI);
594 continue;
595 }
596
597 // Found a common subexpression, eliminate it.
598 unsigned CSVN = VNT.lookup(Key: &MI);
599 MachineInstr *CSMI = Exps[CSVN];
600 LLVM_DEBUG(dbgs() << "Examining: " << MI);
601 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
602
603 // Prevent CSE-ing non-local convergent instructions.
604 // LLVM's current definition of `isConvergent` does not necessarily prove
605 // that non-local CSE is illegal. The following check extends the definition
606 // of `isConvergent` to assume a convergent instruction is dependent not
607 // only on additional conditions, but also on fewer conditions. LLVM does
608 // not have a MachineInstr attribute which expresses this extended
609 // definition, so it's necessary to use `isConvergent` to prevent illegally
610 // CSE-ing the subset of `isConvergent` instructions which do fall into this
611 // extended definition.
612 if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
613 LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
614 "different BBs, avoid CSE!\n");
615 VNT.insert(Key: &MI, Val: CurrVN++);
616 Exps.push_back(Elt: &MI);
617 continue;
618 }
619
620 // Check if it's profitable to perform this CSE.
621 bool DoCSE = true;
622 unsigned NumDefs = MI.getNumDefs();
623
624 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
625 MachineOperand &MO = MI.getOperand(i);
626 if (!MO.isReg() || !MO.isDef())
627 continue;
628 Register OldReg = MO.getReg();
629 Register NewReg = CSMI->getOperand(i).getReg();
630
631 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
632 // we should make sure it is not dead at CSMI.
633 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
634 ImplicitDefsToUpdate.push_back(Elt: i);
635
636 // Keep track of implicit defs of CSMI and MI, to clear possibly
637 // made-redundant kill flags.
638 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
639 ImplicitDefs.push_back(Elt: OldReg);
640
641 if (OldReg == NewReg) {
642 --NumDefs;
643 continue;
644 }
645
646 assert(OldReg.isVirtual() && NewReg.isVirtual() &&
647 "Do not CSE physical register defs!");
648
649 if (!isProfitableToCSE(CSReg: NewReg, Reg: OldReg, CSBB: CSMI->getParent(), MI: &MI)) {
650 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
651 DoCSE = false;
652 break;
653 }
654
655 // Don't perform CSE if the result of the new instruction cannot exist
656 // within the constraints (register class, bank, or low-level type) of
657 // the old instruction.
658 if (!MRI->constrainRegAttrs(Reg: NewReg, ConstrainingReg: OldReg)) {
659 LLVM_DEBUG(
660 dbgs() << "*** Not the same register constraints, avoid CSE!\n");
661 DoCSE = false;
662 break;
663 }
664
665 CSEPairs.emplace_back(Args&: OldReg, Args&: NewReg);
666 --NumDefs;
667 }
668
669 // Actually perform the elimination.
670 if (DoCSE) {
671 for (const std::pair<Register, Register> &CSEPair : CSEPairs) {
672 Register OldReg = CSEPair.first;
673 Register NewReg = CSEPair.second;
674 // OldReg may have been unused but is used now, clear the Dead flag
675 MachineInstr *Def = MRI->getUniqueVRegDef(Reg: NewReg);
676 assert(Def != nullptr && "CSEd register has no unique definition?");
677 Def->clearRegisterDeads(Reg: NewReg);
678 // Replace with NewReg and clear kill flags which may be wrong now.
679 MRI->replaceRegWith(FromReg: OldReg, ToReg: NewReg);
680 MRI->clearKillFlags(Reg: NewReg);
681 }
682
683 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
684 // we should make sure it is not dead at CSMI.
685 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
686 CSMI->getOperand(i: ImplicitDefToUpdate).setIsDead(false);
687 for (const auto &PhysDef : PhysDefs)
688 if (!MI.getOperand(i: PhysDef.first).isDead())
689 CSMI->getOperand(i: PhysDef.first).setIsDead(false);
690
691 // Go through implicit defs of CSMI and MI, and clear the kill flags on
692 // their uses in all the instructions between CSMI and MI.
693 // We might have made some of the kill flags redundant, consider:
694 // subs ... implicit-def %nzcv <- CSMI
695 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
696 // subs ... implicit-def %nzcv <- MI, to be eliminated
697 // csinc ... implicit killed %nzcv
698 // Since we eliminated MI, and reused a register imp-def'd by CSMI
699 // (here %nzcv), that register, if it was killed before MI, should have
700 // that kill flag removed, because it's lifetime was extended.
701 if (CSMI->getParent() == MI.getParent()) {
702 for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
703 for (auto ImplicitDef : ImplicitDefs)
704 if (MachineOperand *MO = II->findRegisterUseOperand(
705 Reg: ImplicitDef, TRI, /*isKill=*/true))
706 MO->setIsKill(false);
707 } else {
708 // If the instructions aren't in the same BB, bail out and clear the
709 // kill flag on all uses of the imp-def'd register.
710 for (auto ImplicitDef : ImplicitDefs)
711 MRI->clearKillFlags(Reg: ImplicitDef);
712 }
713
714 if (CrossMBBPhysDef) {
715 // Add physical register defs now coming in from a predecessor to MBB
716 // livein list.
717 while (!PhysDefs.empty()) {
718 auto LiveIn = PhysDefs.pop_back_val();
719 if (!MBB->isLiveIn(Reg: LiveIn.second))
720 MBB->addLiveIn(PhysReg: LiveIn.second);
721 }
722 ++NumCrossBBCSEs;
723 }
724
725 MI.eraseFromParent();
726 ++NumCSEs;
727 if (!PhysRefs.empty())
728 ++NumPhysCSEs;
729 if (Commuted)
730 ++NumCommutes;
731 Changed = true;
732 } else {
733 VNT.insert(Key: &MI, Val: CurrVN++);
734 Exps.push_back(Elt: &MI);
735 }
736 CSEPairs.clear();
737 ImplicitDefsToUpdate.clear();
738 ImplicitDefs.clear();
739 }
740
741 return Changed;
742}
743
744/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
745/// dominator tree node if its a leaf or all of its children are done. Walk
746/// up the dominator tree to destroy ancestors which are now done.
747void MachineCSEImpl::ExitScopeIfDone(
748 MachineDomTreeNode *Node,
749 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren) {
750 if (OpenChildren[Node])
751 return;
752
753 // Pop scope.
754 ExitScope(MBB: Node->getBlock());
755
756 // Now traverse upwards to pop ancestors whose offsprings are all done.
757 while (MachineDomTreeNode *Parent = Node->getIDom()) {
758 unsigned Left = --OpenChildren[Parent];
759 if (Left != 0)
760 break;
761 ExitScope(MBB: Parent->getBlock());
762 Node = Parent;
763 }
764}
765
766bool MachineCSEImpl::PerformCSE(MachineDomTreeNode *Node) {
767 SmallVector<MachineDomTreeNode*, 32> Scopes;
768 SmallVector<MachineDomTreeNode*, 8> WorkList;
769 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
770
771 CurrVN = 0;
772
773 // Perform a DFS walk to determine the order of visit.
774 WorkList.push_back(Elt: Node);
775 do {
776 Node = WorkList.pop_back_val();
777 Scopes.push_back(Elt: Node);
778 size_t WorkListSize = WorkList.size();
779 append_range(C&: WorkList, R: Node->children());
780 OpenChildren[Node] = WorkList.size() - WorkListSize; // Number of children.
781 } while (!WorkList.empty());
782
783 // Now perform CSE.
784 bool Changed = false;
785 for (MachineDomTreeNode *Node : Scopes) {
786 MachineBasicBlock *MBB = Node->getBlock();
787 EnterScope(MBB);
788 Changed |= ProcessBlockCSE(MBB);
789 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
790 ExitScopeIfDone(Node, OpenChildren);
791 }
792
793 return Changed;
794}
795
796// We use stronger checks for PRE candidate rather than for CSE ones to embrace
797// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
798// to exclude instrs created by PRE that won't be CSEed later.
799bool MachineCSEImpl::isPRECandidate(MachineInstr *MI,
800 SmallSet<MCRegister, 8> &PhysRefs) {
801 if (!isCSECandidate(MI) ||
802 MI->isNotDuplicable() ||
803 MI->mayLoad() ||
804 TII->isAsCheapAsAMove(MI: *MI) ||
805 MI->getNumDefs() != 1 ||
806 MI->getNumExplicitDefs() != 1)
807 return false;
808
809 for (const MachineOperand &MO : MI->operands()) {
810 if (MO.isReg() && !MO.getReg().isVirtual()) {
811 if (MO.isDef())
812 return false;
813 else
814 PhysRefs.insert(V: MO.getReg());
815 }
816 }
817
818 return true;
819}
820
821bool MachineCSEImpl::ProcessBlockPRE(MachineDominatorTree *DT,
822 MachineBasicBlock *MBB) {
823 bool Changed = false;
824 for (MachineInstr &MI : llvm::make_early_inc_range(Range&: *MBB)) {
825 SmallSet<MCRegister, 8> PhysRefs;
826 if (!isPRECandidate(MI: &MI, PhysRefs))
827 continue;
828
829 auto [It, Inserted] = PREMap.try_emplace(Key: &MI, Args&: MBB);
830 if (Inserted)
831 continue;
832
833 auto *MBB1 = It->second;
834 assert(
835 !DT->properlyDominates(MBB, MBB1) &&
836 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
837 auto CMBB = DT->findNearestCommonDominator(A: MBB, B: MBB1);
838 if (!CMBB->isLegalToHoistInto())
839 continue;
840
841 if (!isProfitableToHoistInto(CandidateBB: CMBB, MBB, MBB1))
842 continue;
843
844 // Two instrs are partial redundant if their basic blocks are reachable
845 // from one to another but one doesn't dominate another.
846 if (CMBB != MBB1) {
847 auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
848 if (BB != nullptr && BB1 != nullptr &&
849 (isPotentiallyReachable(From: BB1, To: BB) ||
850 isPotentiallyReachable(From: BB, To: BB1))) {
851 // The following check extends the definition of `isConvergent` to
852 // assume a convergent instruction is dependent not only on additional
853 // conditions, but also on fewer conditions. LLVM does not have a
854 // MachineInstr attribute which expresses this extended definition, so
855 // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
856 // subset of `isConvergent` instructions which do fall into this
857 // extended definition.
858 if (MI.isConvergent() && CMBB != MBB)
859 continue;
860
861 // If this instruction uses physical registers then we can only do PRE
862 // if it's using the value that is live at the place we're hoisting to.
863 bool NonLocal;
864 PhysDefVector PhysDefs;
865 if (!PhysRefs.empty() &&
866 !PhysRegDefsReach(CSMI: &*(CMBB->getFirstTerminator()), MI: &MI, PhysRefs,
867 PhysDefs, NonLocal))
868 continue;
869
870 assert(MI.getOperand(0).isDef() &&
871 "First operand of instr with one explicit def must be this def");
872 Register VReg = MI.getOperand(i: 0).getReg();
873 Register NewReg = MRI->cloneVirtualRegister(VReg);
874 if (!isProfitableToCSE(CSReg: NewReg, Reg: VReg, CSBB: CMBB, MI: &MI))
875 continue;
876 MachineInstr &NewMI =
877 TII->duplicate(MBB&: *CMBB, InsertBefore: CMBB->getFirstTerminator(), Orig: MI);
878
879 // When hoisting, make sure we don't carry the debug location of
880 // the original instruction, as that's not correct and can cause
881 // unexpected jumps when debugging optimized code.
882 auto EmptyDL = DebugLoc();
883 NewMI.setDebugLoc(EmptyDL);
884
885 NewMI.getOperand(i: 0).setReg(NewReg);
886
887 PREMap[&MI] = CMBB;
888 ++NumPREs;
889 Changed = true;
890 }
891 }
892 }
893 return Changed;
894}
895
896// This simple PRE (partial redundancy elimination) pass doesn't actually
897// eliminate partial redundancy but transforms it to full redundancy,
898// anticipating that the next CSE step will eliminate this created redundancy.
899// If CSE doesn't eliminate this, than created instruction will remain dead
900// and eliminated later by Remove Dead Machine Instructions pass.
901bool MachineCSEImpl::PerformSimplePRE(MachineDominatorTree *DT) {
902 SmallVector<MachineDomTreeNode *, 32> BBs;
903
904 PREMap.clear();
905 bool Changed = false;
906 BBs.push_back(Elt: DT->getRootNode());
907 do {
908 auto Node = BBs.pop_back_val();
909 append_range(C&: BBs, R: Node->children());
910
911 MachineBasicBlock *MBB = Node->getBlock();
912 Changed |= ProcessBlockPRE(DT, MBB);
913
914 } while (!BBs.empty());
915
916 return Changed;
917}
918
919bool MachineCSEImpl::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
920 MachineBasicBlock *MBB,
921 MachineBasicBlock *MBB1) {
922 if (CandidateBB->getParent()->getFunction().hasMinSize())
923 return true;
924 assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
925 assert(DT->dominates(CandidateBB, MBB1) &&
926 "CandidateBB should dominate MBB1");
927 return MBFI->getBlockFreq(MBB: CandidateBB) <=
928 MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB: MBB1);
929}
930
931void MachineCSEImpl::releaseMemory() {
932 ScopeMap.clear();
933 PREMap.clear();
934 Exps.clear();
935}
936
937bool MachineCSEImpl::run(MachineFunction &MF) {
938 TII = MF.getSubtarget().getInstrInfo();
939 TRI = MF.getSubtarget().getRegisterInfo();
940 MRI = &MF.getRegInfo();
941 LookAheadLimit = TII->getMachineCSELookAheadLimit();
942 bool ChangedPRE, ChangedCSE;
943 ChangedPRE = PerformSimplePRE(DT);
944 ChangedCSE = PerformCSE(Node: DT->getRootNode());
945 releaseMemory();
946 return ChangedPRE || ChangedCSE;
947}
948
949PreservedAnalyses MachineCSEPass::run(MachineFunction &MF,
950 MachineFunctionAnalysisManager &MFAM) {
951 MFPropsModifier _(*this, MF);
952
953 MachineDominatorTree &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
954 MachineBlockFrequencyInfo &MBFI =
955 MFAM.getResult<MachineBlockFrequencyAnalysis>(IR&: MF);
956 MachineCSEImpl Impl(&MDT, &MBFI);
957 bool Changed = Impl.run(MF);
958 if (!Changed)
959 return PreservedAnalyses::all();
960
961 auto PA = getMachineFunctionPassPreservedAnalyses();
962 PA.preserve<MachineLoopAnalysis>();
963 PA.preserve<MachineDominatorTreeAnalysis>();
964 PA.preserve<MachineBlockFrequencyAnalysis>();
965 PA.preserveSet<CFGAnalyses>();
966 return PA;
967}
968
969bool MachineCSELegacy::runOnMachineFunction(MachineFunction &MF) {
970 if (skipFunction(F: MF.getFunction()))
971 return false;
972
973 MachineDominatorTree &MDT =
974 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
975 MachineBlockFrequencyInfo &MBFI =
976 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
977 MachineCSEImpl Impl(&MDT, &MBFI);
978 return Impl.run(MF);
979}
980