1//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global common subexpression elimination on machine
10// instructions using a scoped hash table based value numbering scheme. It
11// must be run while the machine function is still in SSA form.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/CodeGen/MachineCSE.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/ScopedHashTable.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include "llvm/ADT/SmallSet.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/CFG.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
25#include "llvm/CodeGen/MachineDominators.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineFunctionPass.h"
28#include "llvm/CodeGen/MachineInstr.h"
29#include "llvm/CodeGen/MachineLoopInfo.h"
30#include "llvm/CodeGen/MachineOperand.h"
31#include "llvm/CodeGen/MachineRegisterInfo.h"
32#include "llvm/CodeGen/Passes.h"
33#include "llvm/CodeGen/TargetInstrInfo.h"
34#include "llvm/CodeGen/TargetOpcodes.h"
35#include "llvm/CodeGen/TargetRegisterInfo.h"
36#include "llvm/CodeGen/TargetSubtargetInfo.h"
37#include "llvm/InitializePasses.h"
38#include "llvm/MC/MCRegister.h"
39#include "llvm/MC/MCRegisterInfo.h"
40#include "llvm/Pass.h"
41#include "llvm/Support/Allocator.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/RecyclingAllocator.h"
44#include "llvm/Support/raw_ostream.h"
45#include <cassert>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "machine-cse"
52
53STATISTIC(NumCoalesces, "Number of copies coalesced");
54STATISTIC(NumCSEs, "Number of common subexpression eliminated");
55STATISTIC(NumPREs, "Number of partial redundant expression"
56 " transformed to fully redundant");
57STATISTIC(NumPhysCSEs,
58 "Number of physreg referencing common subexpr eliminated");
59STATISTIC(NumCrossBBCSEs,
60 "Number of cross-MBB physreg referencing CS eliminated");
61STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
62
63// Threshold to avoid excessive cost to compute isProfitableToCSE.
64static cl::opt<int>
65 CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(Val: 1024),
66 cl::desc("Threshold for the size of CSUses"));
67
68static cl::opt<bool> AggressiveMachineCSE(
69 "aggressive-machine-cse", cl::Hidden, cl::init(Val: false),
70 cl::desc("Override the profitability heuristics for Machine CSE"));
71
72namespace {
73
74class MachineCSEImpl {
75 const TargetInstrInfo *TII = nullptr;
76 const TargetRegisterInfo *TRI = nullptr;
77 MachineDominatorTree *DT = nullptr;
78 MachineRegisterInfo *MRI = nullptr;
79 MachineBlockFrequencyInfo *MBFI = nullptr;
80
81public:
82 MachineCSEImpl(MachineDominatorTree *DT, MachineBlockFrequencyInfo *MBFI)
83 : DT(DT), MBFI(MBFI) {}
84 bool run(MachineFunction &MF);
85
86private:
87 using AllocatorTy =
88 RecyclingAllocator<BumpPtrAllocator,
89 ScopedHashTableVal<MachineInstr *, unsigned>>;
90 using ScopedHTType =
91 ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
92 AllocatorTy>;
93 using ScopeType = ScopedHTType::ScopeTy;
94 using PhysDefVector = SmallVector<std::pair<unsigned, Register>, 2>;
95
96 unsigned LookAheadLimit = 0;
97 DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
98 DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
99 PREMap;
100 ScopedHTType VNT;
101 SmallVector<MachineInstr *, 64> Exps;
102 unsigned CurrVN = 0;
103
104 bool PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB);
105 bool isPhysDefTriviallyDead(MCRegister Reg,
106 MachineBasicBlock::const_iterator I,
107 MachineBasicBlock::const_iterator E) const;
108 bool hasLivePhysRegDefUses(const MachineInstr *MI,
109 const MachineBasicBlock *MBB,
110 SmallSet<MCRegister, 8> &PhysRefs,
111 PhysDefVector &PhysDefs, bool &PhysUseDef) const;
112 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
113 const SmallSet<MCRegister, 8> &PhysRefs,
114 const PhysDefVector &PhysDefs, bool &NonLocal) const;
115 bool isCSECandidate(MachineInstr *MI);
116 bool isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB,
117 MachineInstr *MI);
118 void EnterScope(MachineBasicBlock *MBB);
119 void ExitScope(MachineBasicBlock *MBB);
120 bool ProcessBlockCSE(MachineBasicBlock *MBB);
121 void ExitScopeIfDone(MachineDomTreeNode *Node,
122 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren);
123 bool PerformCSE(MachineDomTreeNode *Node);
124
125 bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
126 bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
127 bool PerformSimplePRE(MachineDominatorTree *DT);
128 /// Heuristics to see if it's profitable to move common computations of MBB
129 /// and MBB1 to CandidateBB.
130 bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
131 MachineBasicBlock *MBB, MachineBasicBlock *MBB1);
132 void releaseMemory();
133};
134
135class MachineCSELegacy : public MachineFunctionPass {
136public:
137 static char ID; // Pass identification
138
139 MachineCSELegacy() : MachineFunctionPass(ID) {
140 initializeMachineCSELegacyPass(*PassRegistry::getPassRegistry());
141 }
142
143 bool runOnMachineFunction(MachineFunction &MF) override;
144
145 void getAnalysisUsage(AnalysisUsage &AU) const override {
146 AU.setPreservesCFG();
147 MachineFunctionPass::getAnalysisUsage(AU);
148 AU.addPreservedID(ID&: MachineLoopInfoID);
149 AU.addRequired<MachineDominatorTreeWrapperPass>();
150 AU.addPreserved<MachineDominatorTreeWrapperPass>();
151 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
152 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
153 }
154
155 MachineFunctionProperties getRequiredProperties() const override {
156 return MachineFunctionProperties().setIsSSA();
157 }
158};
159} // end anonymous namespace
160
161char MachineCSELegacy::ID = 0;
162
163char &llvm::MachineCSELegacyID = MachineCSELegacy::ID;
164
165INITIALIZE_PASS_BEGIN(MachineCSELegacy, DEBUG_TYPE,
166 "Machine Common Subexpression Elimination", false, false)
167INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
168INITIALIZE_PASS_END(MachineCSELegacy, DEBUG_TYPE,
169 "Machine Common Subexpression Elimination", false, false)
170
171/// The source register of a COPY machine instruction can be propagated to all
172/// its users, and this propagation could increase the probability of finding
173/// common subexpressions. If the COPY has only one user, the COPY itself can
174/// be removed.
175bool MachineCSEImpl::PerformTrivialCopyPropagation(MachineInstr *MI,
176 MachineBasicBlock *MBB) {
177 bool Changed = false;
178 for (MachineOperand &MO : MI->all_uses()) {
179 Register Reg = MO.getReg();
180 if (!Reg.isVirtual())
181 continue;
182 bool OnlyOneUse = MRI->hasOneNonDBGUse(RegNo: Reg);
183 MachineInstr *DefMI = MRI->getVRegDef(Reg);
184 if (!DefMI || !DefMI->isCopy())
185 continue;
186 Register SrcReg = DefMI->getOperand(i: 1).getReg();
187 if (!SrcReg.isVirtual())
188 continue;
189 // FIXME: We should trivially coalesce subregister copies to expose CSE
190 // opportunities on instructions with truncated operands (see
191 // cse-add-with-overflow.ll). This can be done here as follows:
192 // if (SrcSubReg)
193 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
194 // SrcSubReg);
195 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
196 //
197 // The 2-addr pass has been updated to handle coalesced subregs. However,
198 // some machine-specific code still can't handle it.
199 // To handle it properly we also need a way find a constrained subregister
200 // class given a super-reg class and subreg index.
201 if (DefMI->getOperand(i: 1).getSubReg())
202 continue;
203 if (!MRI->constrainRegAttrs(Reg: SrcReg, ConstrainingReg: Reg))
204 continue;
205 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
206 LLVM_DEBUG(dbgs() << "*** to: " << *MI);
207
208 // Propagate SrcReg of copies to MI.
209 MO.setReg(SrcReg);
210 MRI->clearKillFlags(Reg: SrcReg);
211 // Coalesce single use copies.
212 if (OnlyOneUse) {
213 // If (and only if) we've eliminated all uses of the copy, also
214 // copy-propagate to any debug-users of MI, or they'll be left using
215 // an undefined value.
216 DefMI->changeDebugValuesDefReg(Reg: SrcReg);
217
218 DefMI->eraseFromParent();
219 ++NumCoalesces;
220 }
221 Changed = true;
222 }
223
224 return Changed;
225}
226
227bool MachineCSEImpl::isPhysDefTriviallyDead(
228 MCRegister Reg, MachineBasicBlock::const_iterator I,
229 MachineBasicBlock::const_iterator E) const {
230 unsigned LookAheadLeft = LookAheadLimit;
231 while (LookAheadLeft) {
232 // Skip over dbg_value's.
233 I = skipDebugInstructionsForward(It: I, End: E);
234
235 if (I == E)
236 // Reached end of block, we don't know if register is dead or not.
237 return false;
238
239 bool SeenDef = false;
240 for (const MachineOperand &MO : I->operands()) {
241 if (MO.isRegMask() && MO.clobbersPhysReg(PhysReg: Reg))
242 SeenDef = true;
243 if (!MO.isReg() || !MO.getReg())
244 continue;
245 if (!TRI->regsOverlap(RegA: MO.getReg(), RegB: Reg))
246 continue;
247 if (MO.isUse())
248 // Found a use!
249 return false;
250 SeenDef = true;
251 }
252 if (SeenDef)
253 // See a def of Reg (or an alias) before encountering any use, it's
254 // trivially dead.
255 return true;
256
257 --LookAheadLeft;
258 ++I;
259 }
260 return false;
261}
262
263static bool isCallerPreservedOrConstPhysReg(MCRegister Reg,
264 const MachineOperand &MO,
265 const MachineFunction &MF,
266 const TargetRegisterInfo &TRI,
267 const TargetInstrInfo &TII) {
268 // MachineRegisterInfo::isConstantPhysReg directly called by
269 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
270 // reserved registers to be frozen. That doesn't cause a problem post-ISel as
271 // most (if not all) targets freeze reserved registers right after ISel.
272 //
273 // It does cause issues mid-GlobalISel, however, hence the additional
274 // reservedRegsFrozen check.
275 const MachineRegisterInfo &MRI = MF.getRegInfo();
276 return TRI.isCallerPreservedPhysReg(PhysReg: Reg, MF) || TII.isIgnorableUse(MO) ||
277 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(PhysReg: Reg));
278}
279
280/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
281/// physical registers (except for dead defs of physical registers). It also
282/// returns the physical register def by reference if it's the only one and the
283/// instruction does not uses a physical register.
284bool MachineCSEImpl::hasLivePhysRegDefUses(const MachineInstr *MI,
285 const MachineBasicBlock *MBB,
286 SmallSet<MCRegister, 8> &PhysRefs,
287 PhysDefVector &PhysDefs,
288 bool &PhysUseDef) const {
289 // First, add all uses to PhysRefs.
290 for (const MachineOperand &MO : MI->all_uses()) {
291 Register Reg = MO.getReg();
292 if (!Reg)
293 continue;
294 if (Reg.isVirtual())
295 continue;
296 // Reading either caller preserved or constant physregs is ok.
297 if (!isCallerPreservedOrConstPhysReg(Reg: Reg.asMCReg(), MO, MF: *MI->getMF(), TRI: *TRI,
298 TII: *TII))
299 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
300 PhysRefs.insert(V: *AI);
301 }
302
303 // Next, collect all defs into PhysDefs. If any is already in PhysRefs
304 // (which currently contains only uses), set the PhysUseDef flag.
305 PhysUseDef = false;
306 MachineBasicBlock::const_iterator I = MI; I = std::next(x: I);
307 for (const auto &MOP : llvm::enumerate(First: MI->operands())) {
308 const MachineOperand &MO = MOP.value();
309 if (!MO.isReg() || !MO.isDef())
310 continue;
311 Register Reg = MO.getReg();
312 if (!Reg)
313 continue;
314 if (Reg.isVirtual())
315 continue;
316 // Check against PhysRefs even if the def is "dead".
317 if (PhysRefs.count(V: Reg.asMCReg()))
318 PhysUseDef = true;
319 // If the def is dead, it's ok. But the def may not marked "dead". That's
320 // common since this pass is run before livevariables. We can scan
321 // forward a few instructions and check if it is obviously dead.
322 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg: Reg.asMCReg(), I, E: MBB->end()))
323 PhysDefs.emplace_back(Args: MOP.index(), Args&: Reg);
324 }
325
326 // Finally, add all defs to PhysRefs as well.
327 for (const auto &Def : PhysDefs)
328 for (MCRegAliasIterator AI(Def.second, TRI, true); AI.isValid(); ++AI)
329 PhysRefs.insert(V: *AI);
330
331 return !PhysRefs.empty();
332}
333
334bool MachineCSEImpl::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
335 const SmallSet<MCRegister, 8> &PhysRefs,
336 const PhysDefVector &PhysDefs,
337 bool &NonLocal) const {
338 // For now conservatively returns false if the common subexpression is
339 // not in the same basic block as the given instruction. The only exception
340 // is if the common subexpression is in the sole predecessor block.
341 const MachineBasicBlock *MBB = MI->getParent();
342 const MachineBasicBlock *CSMBB = CSMI->getParent();
343
344 bool CrossMBB = false;
345 if (CSMBB != MBB) {
346 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
347 return false;
348
349 for (const auto &PhysDef : PhysDefs) {
350 if (MRI->isAllocatable(PhysReg: PhysDef.second) || MRI->isReserved(PhysReg: PhysDef.second))
351 // Avoid extending live range of physical registers if they are
352 //allocatable or reserved.
353 return false;
354 }
355 CrossMBB = true;
356 }
357 MachineBasicBlock::const_iterator I = CSMI; I = std::next(x: I);
358 MachineBasicBlock::const_iterator E = MI;
359 MachineBasicBlock::const_iterator EE = CSMBB->end();
360 unsigned LookAheadLeft = LookAheadLimit;
361 while (LookAheadLeft) {
362 // Skip over dbg_value's.
363 while (I != E && I != EE && I->isDebugInstr())
364 ++I;
365
366 if (I == EE) {
367 assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
368 (void)CrossMBB;
369 CrossMBB = false;
370 NonLocal = true;
371 I = MBB->begin();
372 EE = MBB->end();
373 continue;
374 }
375
376 if (I == E)
377 return true;
378
379 for (const MachineOperand &MO : I->operands()) {
380 // RegMasks go on instructions like calls that clobber lots of physregs.
381 // Don't attempt to CSE across such an instruction.
382 if (MO.isRegMask())
383 return false;
384 if (!MO.isReg() || !MO.isDef())
385 continue;
386 Register MOReg = MO.getReg();
387 if (MOReg.isVirtual())
388 continue;
389 if (PhysRefs.count(V: MOReg.asMCReg()))
390 return false;
391 }
392
393 --LookAheadLeft;
394 ++I;
395 }
396
397 return false;
398}
399
400bool MachineCSEImpl::isCSECandidate(MachineInstr *MI) {
401 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
402 MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() ||
403 MI->isFakeUse())
404 return false;
405
406 // Ignore copies.
407 if (MI->isCopyLike())
408 return false;
409
410 // Ignore stuff that we obviously can't move.
411 if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
412 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
413 return false;
414
415 if (MI->mayLoad()) {
416 // Okay, this instruction does a load. As a refinement, we allow the target
417 // to decide whether the loaded value is actually a constant. If so, we can
418 // actually use it as a load.
419 if (!MI->isDereferenceableInvariantLoad())
420 // FIXME: we should be able to hoist loads with no other side effects if
421 // there are no other instructions which can change memory in this loop.
422 // This is a trivial form of alias analysis.
423 return false;
424 }
425
426 // Ignore stack guard loads, otherwise the register that holds CSEed value may
427 // be spilled and get loaded back with corrupted data.
428 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
429 return false;
430
431 return true;
432}
433
434/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
435/// common expression that defines Reg. CSBB is basic block where CSReg is
436/// defined.
437bool MachineCSEImpl::isProfitableToCSE(Register CSReg, Register Reg,
438 MachineBasicBlock *CSBB,
439 MachineInstr *MI) {
440 if (AggressiveMachineCSE)
441 return true;
442
443 // FIXME: Heuristics that works around the lack the live range splitting.
444
445 // If CSReg is used at all uses of Reg, CSE should not increase register
446 // pressure of CSReg.
447 bool MayIncreasePressure = true;
448 if (CSReg.isVirtual() && Reg.isVirtual()) {
449 MayIncreasePressure = false;
450 SmallPtrSet<MachineInstr*, 8> CSUses;
451 int NumOfUses = 0;
452 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg: CSReg)) {
453 CSUses.insert(Ptr: &MI);
454 // Too costly to compute if NumOfUses is very large. Conservatively assume
455 // MayIncreasePressure to avoid spending too much time here.
456 if (++NumOfUses > CSUsesThreshold) {
457 MayIncreasePressure = true;
458 break;
459 }
460 }
461 if (!MayIncreasePressure)
462 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
463 if (!CSUses.count(Ptr: &MI)) {
464 MayIncreasePressure = true;
465 break;
466 }
467 }
468 }
469 if (!MayIncreasePressure) return true;
470
471 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
472 // an immediate predecessor. We don't want to increase register pressure and
473 // end up causing other computation to be spilled.
474 if (TII->isAsCheapAsAMove(MI: *MI)) {
475 MachineBasicBlock *BB = MI->getParent();
476 if (CSBB != BB && !CSBB->isSuccessor(MBB: BB))
477 return false;
478 }
479
480 // Heuristics #2: If the expression doesn't not use a vr and the only use
481 // of the redundant computation are copies, do not cse.
482 bool HasVRegUse = false;
483 for (const MachineOperand &MO : MI->all_uses()) {
484 if (MO.getReg().isVirtual()) {
485 HasVRegUse = true;
486 break;
487 }
488 }
489 if (!HasVRegUse) {
490 bool HasNonCopyUse = false;
491 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
492 // Ignore copies.
493 if (!MI.isCopyLike()) {
494 HasNonCopyUse = true;
495 break;
496 }
497 }
498 if (!HasNonCopyUse)
499 return false;
500 }
501
502 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
503 // it unless the defined value is already used in the BB of the new use.
504 bool HasPHI = false;
505 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg: CSReg)) {
506 HasPHI |= UseMI.isPHI();
507 if (UseMI.getParent() == MI->getParent())
508 return true;
509 }
510
511 return !HasPHI;
512}
513
514void MachineCSEImpl::EnterScope(MachineBasicBlock *MBB) {
515 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
516 ScopeType *Scope = new ScopeType(VNT);
517 ScopeMap[MBB] = Scope;
518}
519
520void MachineCSEImpl::ExitScope(MachineBasicBlock *MBB) {
521 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
522 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(Val: MBB);
523 assert(SI != ScopeMap.end());
524 delete SI->second;
525 ScopeMap.erase(I: SI);
526}
527
528bool MachineCSEImpl::ProcessBlockCSE(MachineBasicBlock *MBB) {
529 bool Changed = false;
530
531 SmallVector<std::pair<Register, Register>, 8> CSEPairs;
532 SmallVector<unsigned, 2> ImplicitDefsToUpdate;
533 SmallVector<Register, 2> ImplicitDefs;
534 for (MachineInstr &MI : llvm::make_early_inc_range(Range&: *MBB)) {
535 if (!isCSECandidate(MI: &MI))
536 continue;
537
538 bool FoundCSE = VNT.count(Key: &MI);
539 if (!FoundCSE) {
540 // Using trivial copy propagation to find more CSE opportunities.
541 if (PerformTrivialCopyPropagation(MI: &MI, MBB)) {
542 Changed = true;
543
544 // After coalescing MI itself may become a copy.
545 if (MI.isCopyLike())
546 continue;
547
548 // Try again to see if CSE is possible.
549 FoundCSE = VNT.count(Key: &MI);
550 }
551 }
552
553 // Commute commutable instructions.
554 bool Commuted = false;
555 if (!FoundCSE && MI.isCommutable()) {
556 if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
557 Commuted = true;
558 FoundCSE = VNT.count(Key: NewMI);
559 if (NewMI != &MI) {
560 // New instruction. It doesn't need to be kept.
561 NewMI->eraseFromParent();
562 Changed = true;
563 } else if (!FoundCSE)
564 // MI was changed but it didn't help, commute it back!
565 (void)TII->commuteInstruction(MI);
566 }
567 }
568
569 // If the instruction defines physical registers and the values *may* be
570 // used, then it's not safe to replace it with a common subexpression.
571 // It's also not safe if the instruction uses physical registers.
572 bool CrossMBBPhysDef = false;
573 SmallSet<MCRegister, 8> PhysRefs;
574 PhysDefVector PhysDefs;
575 bool PhysUseDef = false;
576 if (FoundCSE &&
577 hasLivePhysRegDefUses(MI: &MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
578 FoundCSE = false;
579
580 // ... Unless the CS is local or is in the sole predecessor block
581 // and it also defines the physical register which is not clobbered
582 // in between and the physical register uses were not clobbered.
583 // This can never be the case if the instruction both uses and
584 // defines the same physical register, which was detected above.
585 if (!PhysUseDef) {
586 unsigned CSVN = VNT.lookup(Key: &MI);
587 MachineInstr *CSMI = Exps[CSVN];
588 if (PhysRegDefsReach(CSMI, MI: &MI, PhysRefs, PhysDefs, NonLocal&: CrossMBBPhysDef))
589 FoundCSE = true;
590 }
591 }
592
593 if (!FoundCSE) {
594 VNT.insert(Key: &MI, Val: CurrVN++);
595 Exps.push_back(Elt: &MI);
596 continue;
597 }
598
599 // Found a common subexpression, eliminate it.
600 unsigned CSVN = VNT.lookup(Key: &MI);
601 MachineInstr *CSMI = Exps[CSVN];
602 LLVM_DEBUG(dbgs() << "Examining: " << MI);
603 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
604
605 // Prevent CSE-ing non-local convergent instructions.
606 // LLVM's current definition of `isConvergent` does not necessarily prove
607 // that non-local CSE is illegal. The following check extends the definition
608 // of `isConvergent` to assume a convergent instruction is dependent not
609 // only on additional conditions, but also on fewer conditions. LLVM does
610 // not have a MachineInstr attribute which expresses this extended
611 // definition, so it's necessary to use `isConvergent` to prevent illegally
612 // CSE-ing the subset of `isConvergent` instructions which do fall into this
613 // extended definition.
614 if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
615 LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
616 "different BBs, avoid CSE!\n");
617 VNT.insert(Key: &MI, Val: CurrVN++);
618 Exps.push_back(Elt: &MI);
619 continue;
620 }
621
622 // Check if it's profitable to perform this CSE.
623 bool DoCSE = true;
624 unsigned NumDefs = MI.getNumDefs();
625
626 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
627 MachineOperand &MO = MI.getOperand(i);
628 if (!MO.isReg() || !MO.isDef())
629 continue;
630 Register OldReg = MO.getReg();
631 Register NewReg = CSMI->getOperand(i).getReg();
632
633 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
634 // we should make sure it is not dead at CSMI.
635 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
636 ImplicitDefsToUpdate.push_back(Elt: i);
637
638 // Keep track of implicit defs of CSMI and MI, to clear possibly
639 // made-redundant kill flags.
640 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
641 ImplicitDefs.push_back(Elt: OldReg);
642
643 if (OldReg == NewReg) {
644 --NumDefs;
645 continue;
646 }
647
648 assert(OldReg.isVirtual() && NewReg.isVirtual() &&
649 "Do not CSE physical register defs!");
650
651 if (!isProfitableToCSE(CSReg: NewReg, Reg: OldReg, CSBB: CSMI->getParent(), MI: &MI)) {
652 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
653 DoCSE = false;
654 break;
655 }
656
657 // Don't perform CSE if the result of the new instruction cannot exist
658 // within the constraints (register class, bank, or low-level type) of
659 // the old instruction.
660 if (!MRI->constrainRegAttrs(Reg: NewReg, ConstrainingReg: OldReg)) {
661 LLVM_DEBUG(
662 dbgs() << "*** Not the same register constraints, avoid CSE!\n");
663 DoCSE = false;
664 break;
665 }
666
667 CSEPairs.emplace_back(Args&: OldReg, Args&: NewReg);
668 --NumDefs;
669 }
670
671 // Actually perform the elimination.
672 if (DoCSE) {
673 for (const std::pair<Register, Register> &CSEPair : CSEPairs) {
674 Register OldReg = CSEPair.first;
675 Register NewReg = CSEPair.second;
676 // OldReg may have been unused but is used now, clear the Dead flag
677 MachineInstr *Def = MRI->getUniqueVRegDef(Reg: NewReg);
678 assert(Def != nullptr && "CSEd register has no unique definition?");
679 Def->clearRegisterDeads(Reg: NewReg);
680 // Replace with NewReg and clear kill flags which may be wrong now.
681 MRI->replaceRegWith(FromReg: OldReg, ToReg: NewReg);
682 MRI->clearKillFlags(Reg: NewReg);
683 }
684
685 // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
686 // we should make sure it is not dead at CSMI.
687 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
688 CSMI->getOperand(i: ImplicitDefToUpdate).setIsDead(false);
689 for (const auto &PhysDef : PhysDefs)
690 if (!MI.getOperand(i: PhysDef.first).isDead())
691 CSMI->getOperand(i: PhysDef.first).setIsDead(false);
692
693 // Go through implicit defs of CSMI and MI, and clear the kill flags on
694 // their uses in all the instructions between CSMI and MI.
695 // We might have made some of the kill flags redundant, consider:
696 // subs ... implicit-def %nzcv <- CSMI
697 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
698 // subs ... implicit-def %nzcv <- MI, to be eliminated
699 // csinc ... implicit killed %nzcv
700 // Since we eliminated MI, and reused a register imp-def'd by CSMI
701 // (here %nzcv), that register, if it was killed before MI, should have
702 // that kill flag removed, because it's lifetime was extended.
703 if (CSMI->getParent() == MI.getParent()) {
704 for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
705 for (auto ImplicitDef : ImplicitDefs)
706 if (MachineOperand *MO = II->findRegisterUseOperand(
707 Reg: ImplicitDef, TRI, /*isKill=*/true))
708 MO->setIsKill(false);
709 } else {
710 // If the instructions aren't in the same BB, bail out and clear the
711 // kill flag on all uses of the imp-def'd register.
712 for (auto ImplicitDef : ImplicitDefs)
713 MRI->clearKillFlags(Reg: ImplicitDef);
714 }
715
716 if (CrossMBBPhysDef) {
717 // Add physical register defs now coming in from a predecessor to MBB
718 // livein list.
719 while (!PhysDefs.empty()) {
720 auto LiveIn = PhysDefs.pop_back_val();
721 if (!MBB->isLiveIn(Reg: LiveIn.second))
722 MBB->addLiveIn(PhysReg: LiveIn.second);
723 }
724 ++NumCrossBBCSEs;
725 }
726
727 MI.eraseFromParent();
728 ++NumCSEs;
729 if (!PhysRefs.empty())
730 ++NumPhysCSEs;
731 if (Commuted)
732 ++NumCommutes;
733 Changed = true;
734 } else {
735 VNT.insert(Key: &MI, Val: CurrVN++);
736 Exps.push_back(Elt: &MI);
737 }
738 CSEPairs.clear();
739 ImplicitDefsToUpdate.clear();
740 ImplicitDefs.clear();
741 }
742
743 return Changed;
744}
745
746/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
747/// dominator tree node if its a leaf or all of its children are done. Walk
748/// up the dominator tree to destroy ancestors which are now done.
749void MachineCSEImpl::ExitScopeIfDone(
750 MachineDomTreeNode *Node,
751 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren) {
752 if (OpenChildren[Node])
753 return;
754
755 // Pop scope.
756 ExitScope(MBB: Node->getBlock());
757
758 // Now traverse upwards to pop ancestors whose offsprings are all done.
759 while (MachineDomTreeNode *Parent = Node->getIDom()) {
760 unsigned Left = --OpenChildren[Parent];
761 if (Left != 0)
762 break;
763 ExitScope(MBB: Parent->getBlock());
764 Node = Parent;
765 }
766}
767
768bool MachineCSEImpl::PerformCSE(MachineDomTreeNode *Node) {
769 SmallVector<MachineDomTreeNode*, 32> Scopes;
770 SmallVector<MachineDomTreeNode*, 8> WorkList;
771 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
772
773 CurrVN = 0;
774
775 // Perform a DFS walk to determine the order of visit.
776 WorkList.push_back(Elt: Node);
777 do {
778 Node = WorkList.pop_back_val();
779 Scopes.push_back(Elt: Node);
780 OpenChildren[Node] = Node->getNumChildren();
781 append_range(C&: WorkList, R: Node->children());
782 } while (!WorkList.empty());
783
784 // Now perform CSE.
785 bool Changed = false;
786 for (MachineDomTreeNode *Node : Scopes) {
787 MachineBasicBlock *MBB = Node->getBlock();
788 EnterScope(MBB);
789 Changed |= ProcessBlockCSE(MBB);
790 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
791 ExitScopeIfDone(Node, OpenChildren);
792 }
793
794 return Changed;
795}
796
797// We use stronger checks for PRE candidate rather than for CSE ones to embrace
798// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
799// to exclude instrs created by PRE that won't be CSEed later.
800bool MachineCSEImpl::isPRECandidate(MachineInstr *MI,
801 SmallSet<MCRegister, 8> &PhysRefs) {
802 if (!isCSECandidate(MI) ||
803 MI->isNotDuplicable() ||
804 MI->mayLoad() ||
805 TII->isAsCheapAsAMove(MI: *MI) ||
806 MI->getNumDefs() != 1 ||
807 MI->getNumExplicitDefs() != 1)
808 return false;
809
810 for (const MachineOperand &MO : MI->operands()) {
811 if (MO.isReg() && !MO.getReg().isVirtual()) {
812 if (MO.isDef())
813 return false;
814 else
815 PhysRefs.insert(V: MO.getReg());
816 }
817 }
818
819 return true;
820}
821
822bool MachineCSEImpl::ProcessBlockPRE(MachineDominatorTree *DT,
823 MachineBasicBlock *MBB) {
824 bool Changed = false;
825 for (MachineInstr &MI : llvm::make_early_inc_range(Range&: *MBB)) {
826 SmallSet<MCRegister, 8> PhysRefs;
827 if (!isPRECandidate(MI: &MI, PhysRefs))
828 continue;
829
830 auto [It, Inserted] = PREMap.try_emplace(Key: &MI, Args&: MBB);
831 if (Inserted)
832 continue;
833
834 auto *MBB1 = It->second;
835 assert(
836 !DT->properlyDominates(MBB, MBB1) &&
837 "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
838 auto CMBB = DT->findNearestCommonDominator(A: MBB, B: MBB1);
839 if (!CMBB->isLegalToHoistInto())
840 continue;
841
842 if (!isProfitableToHoistInto(CandidateBB: CMBB, MBB, MBB1))
843 continue;
844
845 // Two instrs are partial redundant if their basic blocks are reachable
846 // from one to another but one doesn't dominate another.
847 if (CMBB != MBB1) {
848 auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
849 if (BB != nullptr && BB1 != nullptr &&
850 (isPotentiallyReachable(From: BB1, To: BB) ||
851 isPotentiallyReachable(From: BB, To: BB1))) {
852 // The following check extends the definition of `isConvergent` to
853 // assume a convergent instruction is dependent not only on additional
854 // conditions, but also on fewer conditions. LLVM does not have a
855 // MachineInstr attribute which expresses this extended definition, so
856 // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
857 // subset of `isConvergent` instructions which do fall into this
858 // extended definition.
859 if (MI.isConvergent() && CMBB != MBB)
860 continue;
861
862 // If this instruction uses physical registers then we can only do PRE
863 // if it's using the value that is live at the place we're hoisting to.
864 bool NonLocal;
865 PhysDefVector PhysDefs;
866 if (!PhysRefs.empty() &&
867 !PhysRegDefsReach(CSMI: &*(CMBB->getFirstTerminator()), MI: &MI, PhysRefs,
868 PhysDefs, NonLocal))
869 continue;
870
871 assert(MI.getOperand(0).isDef() &&
872 "First operand of instr with one explicit def must be this def");
873 Register VReg = MI.getOperand(i: 0).getReg();
874 Register NewReg = MRI->cloneVirtualRegister(VReg);
875 if (!isProfitableToCSE(CSReg: NewReg, Reg: VReg, CSBB: CMBB, MI: &MI))
876 continue;
877 MachineInstr &NewMI =
878 TII->duplicate(MBB&: *CMBB, InsertBefore: CMBB->getFirstTerminator(), Orig: MI);
879
880 // When hoisting, make sure we don't carry the debug location of
881 // the original instruction, as that's not correct and can cause
882 // unexpected jumps when debugging optimized code.
883 auto EmptyDL = DebugLoc();
884 NewMI.setDebugLoc(EmptyDL);
885
886 NewMI.getOperand(i: 0).setReg(NewReg);
887
888 PREMap[&MI] = CMBB;
889 ++NumPREs;
890 Changed = true;
891 }
892 }
893 }
894 return Changed;
895}
896
897// This simple PRE (partial redundancy elimination) pass doesn't actually
898// eliminate partial redundancy but transforms it to full redundancy,
899// anticipating that the next CSE step will eliminate this created redundancy.
900// If CSE doesn't eliminate this, than created instruction will remain dead
901// and eliminated later by Remove Dead Machine Instructions pass.
902bool MachineCSEImpl::PerformSimplePRE(MachineDominatorTree *DT) {
903 SmallVector<MachineDomTreeNode *, 32> BBs;
904
905 PREMap.clear();
906 bool Changed = false;
907 BBs.push_back(Elt: DT->getRootNode());
908 do {
909 auto Node = BBs.pop_back_val();
910 append_range(C&: BBs, R: Node->children());
911
912 MachineBasicBlock *MBB = Node->getBlock();
913 Changed |= ProcessBlockPRE(DT, MBB);
914
915 } while (!BBs.empty());
916
917 return Changed;
918}
919
920bool MachineCSEImpl::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
921 MachineBasicBlock *MBB,
922 MachineBasicBlock *MBB1) {
923 if (CandidateBB->getParent()->getFunction().hasMinSize())
924 return true;
925 assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
926 assert(DT->dominates(CandidateBB, MBB1) &&
927 "CandidateBB should dominate MBB1");
928 return MBFI->getBlockFreq(MBB: CandidateBB) <=
929 MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB: MBB1);
930}
931
932void MachineCSEImpl::releaseMemory() {
933 ScopeMap.clear();
934 PREMap.clear();
935 Exps.clear();
936}
937
938bool MachineCSEImpl::run(MachineFunction &MF) {
939 TII = MF.getSubtarget().getInstrInfo();
940 TRI = MF.getSubtarget().getRegisterInfo();
941 MRI = &MF.getRegInfo();
942 LookAheadLimit = TII->getMachineCSELookAheadLimit();
943 bool ChangedPRE, ChangedCSE;
944 ChangedPRE = PerformSimplePRE(DT);
945 ChangedCSE = PerformCSE(Node: DT->getRootNode());
946 releaseMemory();
947 return ChangedPRE || ChangedCSE;
948}
949
950PreservedAnalyses MachineCSEPass::run(MachineFunction &MF,
951 MachineFunctionAnalysisManager &MFAM) {
952 MFPropsModifier _(*this, MF);
953
954 MachineDominatorTree &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
955 MachineBlockFrequencyInfo &MBFI =
956 MFAM.getResult<MachineBlockFrequencyAnalysis>(IR&: MF);
957 MachineCSEImpl Impl(&MDT, &MBFI);
958 bool Changed = Impl.run(MF);
959 if (!Changed)
960 return PreservedAnalyses::all();
961
962 auto PA = getMachineFunctionPassPreservedAnalyses();
963 PA.preserve<MachineLoopAnalysis>();
964 PA.preserve<MachineDominatorTreeAnalysis>();
965 PA.preserve<MachineBlockFrequencyAnalysis>();
966 PA.preserveSet<CFGAnalyses>();
967 return PA;
968}
969
970bool MachineCSELegacy::runOnMachineFunction(MachineFunction &MF) {
971 if (skipFunction(F: MF.getFunction()))
972 return false;
973
974 MachineDominatorTree &MDT =
975 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
976 MachineBlockFrequencyInfo &MBFI =
977 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
978 MachineCSEImpl Impl(&MDT, &MBFI);
979 return Impl.run(MF);
980}
981