1 | //===- MachineSink.cpp - Sinking for machine instructions -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass moves instructions into successor blocks when possible, so that |
10 | // they aren't executed on paths where their results aren't needed. |
11 | // |
12 | // This pass is not intended to be a replacement or a complete alternative |
13 | // for an LLVM-IR-level sinking pass. It is only designed to sink simple |
14 | // constructs that are not exposed before lowering and instruction selection. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #include "llvm/ADT/DenseSet.h" |
19 | #include "llvm/ADT/DepthFirstIterator.h" |
20 | #include "llvm/ADT/MapVector.h" |
21 | #include "llvm/ADT/PointerIntPair.h" |
22 | #include "llvm/ADT/PostOrderIterator.h" |
23 | #include "llvm/ADT/SetVector.h" |
24 | #include "llvm/ADT/SmallSet.h" |
25 | #include "llvm/ADT/SmallVector.h" |
26 | #include "llvm/ADT/Statistic.h" |
27 | #include "llvm/Analysis/AliasAnalysis.h" |
28 | #include "llvm/Analysis/CFG.h" |
29 | #include "llvm/CodeGen/MachineBasicBlock.h" |
30 | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
31 | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
32 | #include "llvm/CodeGen/MachineCycleAnalysis.h" |
33 | #include "llvm/CodeGen/MachineDominators.h" |
34 | #include "llvm/CodeGen/MachineFunction.h" |
35 | #include "llvm/CodeGen/MachineFunctionPass.h" |
36 | #include "llvm/CodeGen/MachineInstr.h" |
37 | #include "llvm/CodeGen/MachineLoopInfo.h" |
38 | #include "llvm/CodeGen/MachineOperand.h" |
39 | #include "llvm/CodeGen/MachinePostDominators.h" |
40 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
41 | #include "llvm/CodeGen/RegisterClassInfo.h" |
42 | #include "llvm/CodeGen/RegisterPressure.h" |
43 | #include "llvm/CodeGen/TargetInstrInfo.h" |
44 | #include "llvm/CodeGen/TargetPassConfig.h" |
45 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
46 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
47 | #include "llvm/IR/BasicBlock.h" |
48 | #include "llvm/IR/DebugInfoMetadata.h" |
49 | #include "llvm/IR/LLVMContext.h" |
50 | #include "llvm/InitializePasses.h" |
51 | #include "llvm/MC/MCRegisterInfo.h" |
52 | #include "llvm/Pass.h" |
53 | #include "llvm/Support/BranchProbability.h" |
54 | #include "llvm/Support/CommandLine.h" |
55 | #include "llvm/Support/Debug.h" |
56 | #include "llvm/Support/raw_ostream.h" |
57 | #include <algorithm> |
58 | #include <cassert> |
59 | #include <cstdint> |
60 | #include <utility> |
61 | #include <vector> |
62 | |
63 | using namespace llvm; |
64 | |
65 | #define DEBUG_TYPE "machine-sink" |
66 | |
67 | static cl::opt<bool> |
68 | SplitEdges("machine-sink-split" , |
69 | cl::desc("Split critical edges during machine sinking" ), |
70 | cl::init(Val: true), cl::Hidden); |
71 | |
72 | static cl::opt<bool> |
73 | UseBlockFreqInfo("machine-sink-bfi" , |
74 | cl::desc("Use block frequency info to find successors to sink" ), |
75 | cl::init(Val: true), cl::Hidden); |
76 | |
77 | static cl::opt<unsigned> SplitEdgeProbabilityThreshold( |
78 | "machine-sink-split-probability-threshold" , |
79 | cl::desc( |
80 | "Percentage threshold for splitting single-instruction critical edge. " |
81 | "If the branch threshold is higher than this threshold, we allow " |
82 | "speculative execution of up to 1 instruction to avoid branching to " |
83 | "splitted critical edge" ), |
84 | cl::init(Val: 40), cl::Hidden); |
85 | |
86 | static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold( |
87 | "machine-sink-load-instrs-threshold" , |
88 | cl::desc("Do not try to find alias store for a load if there is a in-path " |
89 | "block whose instruction number is higher than this threshold." ), |
90 | cl::init(Val: 2000), cl::Hidden); |
91 | |
92 | static cl::opt<unsigned> SinkLoadBlocksThreshold( |
93 | "machine-sink-load-blocks-threshold" , |
94 | cl::desc("Do not try to find alias store for a load if the block number in " |
95 | "the straight line is higher than this threshold." ), |
96 | cl::init(Val: 20), cl::Hidden); |
97 | |
98 | static cl::opt<bool> |
99 | SinkInstsIntoCycle("sink-insts-to-avoid-spills" , |
100 | cl::desc("Sink instructions into cycles to avoid " |
101 | "register spills" ), |
102 | cl::init(Val: false), cl::Hidden); |
103 | |
104 | static cl::opt<unsigned> SinkIntoCycleLimit( |
105 | "machine-sink-cycle-limit" , |
106 | cl::desc("The maximum number of instructions considered for cycle sinking." ), |
107 | cl::init(Val: 50), cl::Hidden); |
108 | |
109 | STATISTIC(NumSunk, "Number of machine instructions sunk" ); |
110 | STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle" ); |
111 | STATISTIC(NumSplit, "Number of critical edges split" ); |
112 | STATISTIC(NumCoalesces, "Number of copies coalesced" ); |
113 | STATISTIC(NumPostRACopySink, "Number of copies sunk after RA" ); |
114 | |
115 | namespace { |
116 | |
117 | class MachineSinking : public MachineFunctionPass { |
118 | const TargetSubtargetInfo *STI = nullptr; |
119 | const TargetInstrInfo *TII = nullptr; |
120 | const TargetRegisterInfo *TRI = nullptr; |
121 | MachineRegisterInfo *MRI = nullptr; // Machine register information |
122 | MachineDominatorTree *DT = nullptr; // Machine dominator tree |
123 | MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree |
124 | MachineCycleInfo *CI = nullptr; |
125 | MachineBlockFrequencyInfo *MBFI = nullptr; |
126 | const MachineBranchProbabilityInfo *MBPI = nullptr; |
127 | AliasAnalysis *AA = nullptr; |
128 | RegisterClassInfo RegClassInfo; |
129 | |
130 | // Remember which edges have been considered for breaking. |
131 | SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8> |
132 | CEBCandidates; |
133 | // Memorize the register that also wanted to sink into the same block along |
134 | // a different critical edge. |
135 | // {register to sink, sink-to block} -> the first sink-from block. |
136 | // We're recording the first sink-from block because that (critical) edge |
137 | // was deferred until we see another register that's going to sink into the |
138 | // same block. |
139 | DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *> |
140 | CEMergeCandidates; |
141 | // Remember which edges we are about to split. |
142 | // This is different from CEBCandidates since those edges |
143 | // will be split. |
144 | SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit; |
145 | |
146 | DenseSet<Register> RegsToClearKillFlags; |
147 | |
148 | using AllSuccsCache = |
149 | SmallDenseMap<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>; |
150 | |
151 | /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is |
152 | /// post-dominated by another DBG_VALUE of the same variable location. |
153 | /// This is necessary to detect sequences such as: |
154 | /// %0 = someinst |
155 | /// DBG_VALUE %0, !123, !DIExpression() |
156 | /// %1 = anotherinst |
157 | /// DBG_VALUE %1, !123, !DIExpression() |
158 | /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that |
159 | /// would re-order assignments. |
160 | using SeenDbgUser = PointerIntPair<MachineInstr *, 1>; |
161 | |
162 | /// Record of DBG_VALUE uses of vregs in a block, so that we can identify |
163 | /// debug instructions to sink. |
164 | SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers; |
165 | |
166 | /// Record of debug variables that have had their locations set in the |
167 | /// current block. |
168 | DenseSet<DebugVariable> SeenDbgVars; |
169 | |
170 | DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool> |
171 | HasStoreCache; |
172 | |
173 | DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>, |
174 | SmallVector<MachineInstr *>> |
175 | StoreInstrCache; |
176 | |
177 | /// Cached BB's register pressure. |
178 | DenseMap<const MachineBasicBlock *, std::vector<unsigned>> |
179 | CachedRegisterPressure; |
180 | |
181 | bool EnableSinkAndFold; |
182 | |
183 | public: |
184 | static char ID; // Pass identification |
185 | |
186 | MachineSinking() : MachineFunctionPass(ID) { |
187 | initializeMachineSinkingPass(*PassRegistry::getPassRegistry()); |
188 | } |
189 | |
190 | bool runOnMachineFunction(MachineFunction &MF) override; |
191 | |
192 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
193 | MachineFunctionPass::getAnalysisUsage(AU); |
194 | AU.addRequired<AAResultsWrapperPass>(); |
195 | AU.addRequired<MachineDominatorTreeWrapperPass>(); |
196 | AU.addRequired<MachinePostDominatorTreeWrapperPass>(); |
197 | AU.addRequired<MachineCycleInfoWrapperPass>(); |
198 | AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); |
199 | AU.addPreserved<MachineCycleInfoWrapperPass>(); |
200 | AU.addPreserved<MachineLoopInfoWrapperPass>(); |
201 | if (UseBlockFreqInfo) |
202 | AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); |
203 | AU.addRequired<TargetPassConfig>(); |
204 | } |
205 | |
206 | void releaseMemory() override { |
207 | CEBCandidates.clear(); |
208 | CEMergeCandidates.clear(); |
209 | } |
210 | |
211 | private: |
212 | bool ProcessBlock(MachineBasicBlock &MBB); |
213 | void ProcessDbgInst(MachineInstr &MI); |
214 | bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, |
215 | MachineBasicBlock *To, bool BreakPHIEdge); |
216 | bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, |
217 | MachineBasicBlock *To, |
218 | MachineBasicBlock *&DeferredFromBlock); |
219 | |
220 | bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To, |
221 | MachineInstr &MI); |
222 | |
223 | /// Postpone the splitting of the given critical |
224 | /// edge (\p From, \p To). |
225 | /// |
226 | /// We do not split the edges on the fly. Indeed, this invalidates |
227 | /// the dominance information and thus triggers a lot of updates |
228 | /// of that information underneath. |
229 | /// Instead, we postpone all the splits after each iteration of |
230 | /// the main loop. That way, the information is at least valid |
231 | /// for the lifetime of an iteration. |
232 | /// |
233 | /// \return True if the edge is marked as toSplit, false otherwise. |
234 | /// False can be returned if, for instance, this is not profitable. |
235 | bool PostponeSplitCriticalEdge(MachineInstr &MI, |
236 | MachineBasicBlock *From, |
237 | MachineBasicBlock *To, |
238 | bool BreakPHIEdge); |
239 | bool SinkInstruction(MachineInstr &MI, bool &SawStore, |
240 | AllSuccsCache &AllSuccessors); |
241 | |
242 | /// If we sink a COPY inst, some debug users of it's destination may no |
243 | /// longer be dominated by the COPY, and will eventually be dropped. |
244 | /// This is easily rectified by forwarding the non-dominated debug uses |
245 | /// to the copy source. |
246 | void SalvageUnsunkDebugUsersOfCopy(MachineInstr &, |
247 | MachineBasicBlock *TargetBlock); |
248 | bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB, |
249 | MachineBasicBlock *DefMBB, bool &BreakPHIEdge, |
250 | bool &LocalUse) const; |
251 | MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, |
252 | bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); |
253 | |
254 | void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, |
255 | SmallVectorImpl<MachineInstr *> &Candidates); |
256 | bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I); |
257 | |
258 | bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, |
259 | MachineBasicBlock *MBB, |
260 | MachineBasicBlock *SuccToSinkTo, |
261 | AllSuccsCache &AllSuccessors); |
262 | |
263 | bool PerformTrivialForwardCoalescing(MachineInstr &MI, |
264 | MachineBasicBlock *MBB); |
265 | |
266 | bool PerformSinkAndFold(MachineInstr &MI, MachineBasicBlock *MBB); |
267 | |
268 | SmallVector<MachineBasicBlock *, 4> & |
269 | GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, |
270 | AllSuccsCache &AllSuccessors) const; |
271 | |
272 | std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB); |
273 | |
274 | bool registerPressureSetExceedsLimit(unsigned NRegs, |
275 | const TargetRegisterClass *RC, |
276 | const MachineBasicBlock &MBB); |
277 | }; |
278 | |
279 | } // end anonymous namespace |
280 | |
281 | char MachineSinking::ID = 0; |
282 | |
283 | char &llvm::MachineSinkingID = MachineSinking::ID; |
284 | |
285 | INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, |
286 | "Machine code sinking" , false, false) |
287 | INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) |
288 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
289 | INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) |
290 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
291 | INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, |
292 | "Machine code sinking" , false, false) |
293 | |
294 | /// Return true if a target defined block prologue instruction interferes |
295 | /// with a sink candidate. |
296 | static bool blockPrologueInterferes(const MachineBasicBlock *BB, |
297 | MachineBasicBlock::const_iterator End, |
298 | const MachineInstr &MI, |
299 | const TargetRegisterInfo *TRI, |
300 | const TargetInstrInfo *TII, |
301 | const MachineRegisterInfo *MRI) { |
302 | for (MachineBasicBlock::const_iterator PI = BB->getFirstNonPHI(); PI != End; |
303 | ++PI) { |
304 | // Only check target defined prologue instructions |
305 | if (!TII->isBasicBlockPrologue(MI: *PI)) |
306 | continue; |
307 | for (auto &MO : MI.operands()) { |
308 | if (!MO.isReg()) |
309 | continue; |
310 | Register Reg = MO.getReg(); |
311 | if (!Reg) |
312 | continue; |
313 | if (MO.isUse()) { |
314 | if (Reg.isPhysical() && |
315 | (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(PhysReg: Reg)))) |
316 | continue; |
317 | if (PI->modifiesRegister(Reg, TRI)) |
318 | return true; |
319 | } else { |
320 | if (PI->readsRegister(Reg, TRI)) |
321 | return true; |
322 | // Check for interference with non-dead defs |
323 | auto *DefOp = PI->findRegisterDefOperand(Reg, TRI, isDead: false, Overlap: true); |
324 | if (DefOp && !DefOp->isDead()) |
325 | return true; |
326 | } |
327 | } |
328 | } |
329 | |
330 | return false; |
331 | } |
332 | |
333 | bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, |
334 | MachineBasicBlock *MBB) { |
335 | if (!MI.isCopy()) |
336 | return false; |
337 | |
338 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
339 | Register DstReg = MI.getOperand(i: 0).getReg(); |
340 | if (!SrcReg.isVirtual() || !DstReg.isVirtual() || |
341 | !MRI->hasOneNonDBGUse(RegNo: SrcReg)) |
342 | return false; |
343 | |
344 | const TargetRegisterClass *SRC = MRI->getRegClass(Reg: SrcReg); |
345 | const TargetRegisterClass *DRC = MRI->getRegClass(Reg: DstReg); |
346 | if (SRC != DRC) |
347 | return false; |
348 | |
349 | MachineInstr *DefMI = MRI->getVRegDef(Reg: SrcReg); |
350 | if (DefMI->isCopyLike()) |
351 | return false; |
352 | LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); |
353 | LLVM_DEBUG(dbgs() << "*** to: " << MI); |
354 | MRI->replaceRegWith(FromReg: DstReg, ToReg: SrcReg); |
355 | MI.eraseFromParent(); |
356 | |
357 | // Conservatively, clear any kill flags, since it's possible that they are no |
358 | // longer correct. |
359 | MRI->clearKillFlags(Reg: SrcReg); |
360 | |
361 | ++NumCoalesces; |
362 | return true; |
363 | } |
364 | |
365 | bool MachineSinking::PerformSinkAndFold(MachineInstr &MI, |
366 | MachineBasicBlock *MBB) { |
367 | if (MI.isCopy() || MI.mayLoadOrStore() || |
368 | MI.getOpcode() == TargetOpcode::REG_SEQUENCE) |
369 | return false; |
370 | |
371 | // Don't sink instructions that the target prefers not to sink. |
372 | if (!TII->shouldSink(MI)) |
373 | return false; |
374 | |
375 | // Check if it's safe to move the instruction. |
376 | bool SawStore = true; |
377 | if (!MI.isSafeToMove(AA, SawStore)) |
378 | return false; |
379 | |
380 | // Convergent operations may not be made control-dependent on additional |
381 | // values. |
382 | if (MI.isConvergent()) |
383 | return false; |
384 | |
385 | // Don't sink defs/uses of hard registers or if the instruction defines more |
386 | // than one register. |
387 | // Don't sink more than two register uses - it'll cover most of the cases and |
388 | // greatly simplifies the register pressure checks. |
389 | Register DefReg; |
390 | Register UsedRegA, UsedRegB; |
391 | for (const MachineOperand &MO : MI.operands()) { |
392 | if (MO.isImm() || MO.isRegMask() || MO.isRegLiveOut() || MO.isMetadata() || |
393 | MO.isMCSymbol() || MO.isDbgInstrRef() || MO.isCFIIndex() || |
394 | MO.isIntrinsicID() || MO.isPredicate() || MO.isShuffleMask()) |
395 | continue; |
396 | if (!MO.isReg()) |
397 | return false; |
398 | |
399 | Register Reg = MO.getReg(); |
400 | if (Reg == 0) |
401 | continue; |
402 | |
403 | if (Reg.isVirtual()) { |
404 | if (MO.isDef()) { |
405 | if (DefReg) |
406 | return false; |
407 | DefReg = Reg; |
408 | continue; |
409 | } |
410 | |
411 | if (UsedRegA == 0) |
412 | UsedRegA = Reg; |
413 | else if (UsedRegB == 0) |
414 | UsedRegB = Reg; |
415 | else |
416 | return false; |
417 | continue; |
418 | } |
419 | |
420 | if (Reg.isPhysical() && MO.isUse() && |
421 | (MRI->isConstantPhysReg(PhysReg: Reg) || TII->isIgnorableUse(MO))) |
422 | continue; |
423 | |
424 | return false; |
425 | } |
426 | |
427 | // Scan uses of the destination register. Every use, except the last, must be |
428 | // a copy, with a chain of copies terminating with either a copy into a hard |
429 | // register, or a load/store instruction where the use is part of the |
430 | // address (*not* the stored value). |
431 | using SinkInfo = std::pair<MachineInstr *, ExtAddrMode>; |
432 | SmallVector<SinkInfo> SinkInto; |
433 | SmallVector<Register> Worklist; |
434 | |
435 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: DefReg); |
436 | const TargetRegisterClass *RCA = |
437 | UsedRegA == 0 ? nullptr : MRI->getRegClass(Reg: UsedRegA); |
438 | const TargetRegisterClass *RCB = |
439 | UsedRegB == 0 ? nullptr : MRI->getRegClass(Reg: UsedRegB); |
440 | |
441 | Worklist.push_back(Elt: DefReg); |
442 | while (!Worklist.empty()) { |
443 | Register Reg = Worklist.pop_back_val(); |
444 | |
445 | for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { |
446 | ExtAddrMode MaybeAM; |
447 | MachineInstr &UseInst = *MO.getParent(); |
448 | if (UseInst.isCopy()) { |
449 | Register DstReg; |
450 | if (const MachineOperand &O = UseInst.getOperand(i: 0); O.isReg()) |
451 | DstReg = O.getReg(); |
452 | if (DstReg == 0) |
453 | return false; |
454 | if (DstReg.isVirtual()) { |
455 | Worklist.push_back(Elt: DstReg); |
456 | continue; |
457 | } |
458 | // If we are going to replace a copy, the original instruction must be |
459 | // as cheap as a copy. |
460 | if (!TII->isAsCheapAsAMove(MI)) |
461 | return false; |
462 | // The hard register must be in the register class of the original |
463 | // instruction's destination register. |
464 | if (!RC->contains(Reg: DstReg)) |
465 | return false; |
466 | } else if (UseInst.mayLoadOrStore()) { |
467 | ExtAddrMode AM; |
468 | if (!TII->canFoldIntoAddrMode(MemI: UseInst, Reg, AddrI: MI, AM)) |
469 | return false; |
470 | MaybeAM = AM; |
471 | } else { |
472 | return false; |
473 | } |
474 | |
475 | if (UseInst.getParent() != MI.getParent()) { |
476 | // If the register class of the register we are replacing is a superset |
477 | // of any of the register classes of the operands of the materialized |
478 | // instruction don't consider that live range extended. |
479 | const TargetRegisterClass *RCS = MRI->getRegClass(Reg); |
480 | if (RCA && RCA->hasSuperClassEq(RC: RCS)) |
481 | RCA = nullptr; |
482 | else if (RCB && RCB->hasSuperClassEq(RC: RCS)) |
483 | RCB = nullptr; |
484 | if (RCA || RCB) { |
485 | if (RCA == nullptr) { |
486 | RCA = RCB; |
487 | RCB = nullptr; |
488 | } |
489 | |
490 | unsigned NRegs = !!RCA + !!RCB; |
491 | if (RCA == RCB) |
492 | RCB = nullptr; |
493 | |
494 | // Check we don't exceed register pressure at the destination. |
495 | const MachineBasicBlock &MBB = *UseInst.getParent(); |
496 | if (RCB == nullptr) { |
497 | if (registerPressureSetExceedsLimit(NRegs, RC: RCA, MBB)) |
498 | return false; |
499 | } else if (registerPressureSetExceedsLimit(NRegs: 1, RC: RCA, MBB) || |
500 | registerPressureSetExceedsLimit(NRegs: 1, RC: RCB, MBB)) { |
501 | return false; |
502 | } |
503 | } |
504 | } |
505 | |
506 | SinkInto.emplace_back(Args: &UseInst, Args&: MaybeAM); |
507 | } |
508 | } |
509 | |
510 | if (SinkInto.empty()) |
511 | return false; |
512 | |
513 | // Now we know we can fold the instruction in all its users. |
514 | for (auto &[SinkDst, MaybeAM] : SinkInto) { |
515 | MachineInstr *New = nullptr; |
516 | LLVM_DEBUG(dbgs() << "Sinking copy of" ; MI.dump(); dbgs() << "into" ; |
517 | SinkDst->dump()); |
518 | if (SinkDst->isCopy()) { |
519 | // TODO: After performing the sink-and-fold, the original instruction is |
520 | // deleted. Its value is still available (in a hard register), so if there |
521 | // are debug instructions which refer to the (now deleted) virtual |
522 | // register they could be updated to refer to the hard register, in |
523 | // principle. However, it's not clear how to do that, moreover in some |
524 | // cases the debug instructions may need to be replicated proportionally |
525 | // to the number of the COPY instructions replaced and in some extreme |
526 | // cases we can end up with quadratic increase in the number of debug |
527 | // instructions. |
528 | |
529 | // Sink a copy of the instruction, replacing a COPY instruction. |
530 | MachineBasicBlock::iterator InsertPt = SinkDst->getIterator(); |
531 | Register DstReg = SinkDst->getOperand(i: 0).getReg(); |
532 | TII->reMaterialize(MBB&: *SinkDst->getParent(), MI: InsertPt, DestReg: DstReg, SubIdx: 0, Orig: MI, TRI: *TRI); |
533 | New = &*std::prev(x: InsertPt); |
534 | if (!New->getDebugLoc()) |
535 | New->setDebugLoc(SinkDst->getDebugLoc()); |
536 | |
537 | // The operand registers of the "sunk" instruction have their live range |
538 | // extended and their kill flags may no longer be correct. Conservatively |
539 | // clear the kill flags. |
540 | if (UsedRegA) |
541 | MRI->clearKillFlags(Reg: UsedRegA); |
542 | if (UsedRegB) |
543 | MRI->clearKillFlags(Reg: UsedRegB); |
544 | } else { |
545 | // Fold instruction into the addressing mode of a memory instruction. |
546 | New = TII->emitLdStWithAddr(MemI&: *SinkDst, AM: MaybeAM); |
547 | |
548 | // The registers of the addressing mode may have their live range extended |
549 | // and their kill flags may no longer be correct. Conservatively clear the |
550 | // kill flags. |
551 | if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual()) |
552 | MRI->clearKillFlags(Reg: R); |
553 | if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual()) |
554 | MRI->clearKillFlags(Reg: R); |
555 | } |
556 | LLVM_DEBUG(dbgs() << "yielding" ; New->dump()); |
557 | // Clear the StoreInstrCache, since we may invalidate it by erasing. |
558 | if (SinkDst->mayStore() && !SinkDst->hasOrderedMemoryRef()) |
559 | StoreInstrCache.clear(); |
560 | SinkDst->eraseFromParent(); |
561 | } |
562 | |
563 | // Collect operands that need to be cleaned up because the registers no longer |
564 | // exist (in COPYs and debug instructions). We cannot delete instructions or |
565 | // clear operands while traversing register uses. |
566 | SmallVector<MachineOperand *> Cleanup; |
567 | Worklist.push_back(Elt: DefReg); |
568 | while (!Worklist.empty()) { |
569 | Register Reg = Worklist.pop_back_val(); |
570 | for (MachineOperand &MO : MRI->use_operands(Reg)) { |
571 | MachineInstr *U = MO.getParent(); |
572 | assert((U->isCopy() || U->isDebugInstr()) && |
573 | "Only debug uses and copies must remain" ); |
574 | if (U->isCopy()) |
575 | Worklist.push_back(Elt: U->getOperand(i: 0).getReg()); |
576 | Cleanup.push_back(Elt: &MO); |
577 | } |
578 | } |
579 | |
580 | // Delete the dead COPYs and clear operands in debug instructions |
581 | for (MachineOperand *MO : Cleanup) { |
582 | MachineInstr *I = MO->getParent(); |
583 | if (I->isCopy()) { |
584 | I->eraseFromParent(); |
585 | } else { |
586 | MO->setReg(0); |
587 | MO->setSubReg(0); |
588 | } |
589 | } |
590 | |
591 | MI.eraseFromParent(); |
592 | return true; |
593 | } |
594 | |
595 | /// AllUsesDominatedByBlock - Return true if all uses of the specified register |
596 | /// occur in blocks dominated by the specified block. If any use is in the |
597 | /// definition block, then return false since it is never legal to move def |
598 | /// after uses. |
599 | bool MachineSinking::AllUsesDominatedByBlock(Register Reg, |
600 | MachineBasicBlock *MBB, |
601 | MachineBasicBlock *DefMBB, |
602 | bool &BreakPHIEdge, |
603 | bool &LocalUse) const { |
604 | assert(Reg.isVirtual() && "Only makes sense for vregs" ); |
605 | |
606 | // Ignore debug uses because debug info doesn't affect the code. |
607 | if (MRI->use_nodbg_empty(RegNo: Reg)) |
608 | return true; |
609 | |
610 | // BreakPHIEdge is true if all the uses are in the successor MBB being sunken |
611 | // into and they are all PHI nodes. In this case, machine-sink must break |
612 | // the critical edge first. e.g. |
613 | // |
614 | // %bb.1: |
615 | // Predecessors according to CFG: %bb.0 |
616 | // ... |
617 | // %def = DEC64_32r %x, implicit-def dead %eflags |
618 | // ... |
619 | // JE_4 <%bb.37>, implicit %eflags |
620 | // Successors according to CFG: %bb.37 %bb.2 |
621 | // |
622 | // %bb.2: |
623 | // %p = PHI %y, %bb.0, %def, %bb.1 |
624 | if (all_of(Range: MRI->use_nodbg_operands(Reg), P: [&](MachineOperand &MO) { |
625 | MachineInstr *UseInst = MO.getParent(); |
626 | unsigned OpNo = MO.getOperandNo(); |
627 | MachineBasicBlock *UseBlock = UseInst->getParent(); |
628 | return UseBlock == MBB && UseInst->isPHI() && |
629 | UseInst->getOperand(i: OpNo + 1).getMBB() == DefMBB; |
630 | })) { |
631 | BreakPHIEdge = true; |
632 | return true; |
633 | } |
634 | |
635 | for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { |
636 | // Determine the block of the use. |
637 | MachineInstr *UseInst = MO.getParent(); |
638 | unsigned OpNo = &MO - &UseInst->getOperand(i: 0); |
639 | MachineBasicBlock *UseBlock = UseInst->getParent(); |
640 | if (UseInst->isPHI()) { |
641 | // PHI nodes use the operand in the predecessor block, not the block with |
642 | // the PHI. |
643 | UseBlock = UseInst->getOperand(i: OpNo+1).getMBB(); |
644 | } else if (UseBlock == DefMBB) { |
645 | LocalUse = true; |
646 | return false; |
647 | } |
648 | |
649 | // Check that it dominates. |
650 | if (!DT->dominates(A: MBB, B: UseBlock)) |
651 | return false; |
652 | } |
653 | |
654 | return true; |
655 | } |
656 | |
657 | /// Return true if this machine instruction loads from global offset table or |
658 | /// constant pool. |
659 | static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { |
660 | assert(MI.mayLoad() && "Expected MI that loads!" ); |
661 | |
662 | // If we lost memory operands, conservatively assume that the instruction |
663 | // reads from everything.. |
664 | if (MI.memoperands_empty()) |
665 | return true; |
666 | |
667 | for (MachineMemOperand *MemOp : MI.memoperands()) |
668 | if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) |
669 | if (PSV->isGOT() || PSV->isConstantPool()) |
670 | return true; |
671 | |
672 | return false; |
673 | } |
674 | |
675 | void MachineSinking::FindCycleSinkCandidates( |
676 | MachineCycle *Cycle, MachineBasicBlock *BB, |
677 | SmallVectorImpl<MachineInstr *> &Candidates) { |
678 | for (auto &MI : *BB) { |
679 | LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); |
680 | if (!TII->shouldSink(MI)) { |
681 | LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this " |
682 | "target\n" ); |
683 | continue; |
684 | } |
685 | if (!isCycleInvariant(Cycle, I&: MI)) { |
686 | LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n" ); |
687 | continue; |
688 | } |
689 | bool DontMoveAcrossStore = true; |
690 | if (!MI.isSafeToMove(AA, SawStore&: DontMoveAcrossStore)) { |
691 | LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n" ); |
692 | continue; |
693 | } |
694 | if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { |
695 | LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n" ); |
696 | continue; |
697 | } |
698 | if (MI.isConvergent()) |
699 | continue; |
700 | |
701 | const MachineOperand &MO = MI.getOperand(i: 0); |
702 | if (!MO.isReg() || !MO.getReg() || !MO.isDef()) |
703 | continue; |
704 | if (!MRI->hasOneDef(RegNo: MO.getReg())) |
705 | continue; |
706 | |
707 | LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n" ); |
708 | Candidates.push_back(Elt: &MI); |
709 | } |
710 | } |
711 | |
712 | bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { |
713 | if (skipFunction(F: MF.getFunction())) |
714 | return false; |
715 | |
716 | LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n" ); |
717 | |
718 | STI = &MF.getSubtarget(); |
719 | TII = STI->getInstrInfo(); |
720 | TRI = STI->getRegisterInfo(); |
721 | MRI = &MF.getRegInfo(); |
722 | DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
723 | PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree(); |
724 | CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo(); |
725 | MBFI = UseBlockFreqInfo |
726 | ? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI() |
727 | : nullptr; |
728 | MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); |
729 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
730 | RegClassInfo.runOnMachineFunction(MF); |
731 | TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); |
732 | EnableSinkAndFold = PassConfig->getEnableSinkAndFold(); |
733 | |
734 | bool EverMadeChange = false; |
735 | |
736 | while (true) { |
737 | bool MadeChange = false; |
738 | |
739 | // Process all basic blocks. |
740 | CEBCandidates.clear(); |
741 | CEMergeCandidates.clear(); |
742 | ToSplit.clear(); |
743 | for (auto &MBB: MF) |
744 | MadeChange |= ProcessBlock(MBB); |
745 | |
746 | // If we have anything we marked as toSplit, split it now. |
747 | for (const auto &Pair : ToSplit) { |
748 | auto NewSucc = Pair.first->SplitCriticalEdge(Succ: Pair.second, P&: *this); |
749 | if (NewSucc != nullptr) { |
750 | LLVM_DEBUG(dbgs() << " *** Splitting critical edge: " |
751 | << printMBBReference(*Pair.first) << " -- " |
752 | << printMBBReference(*NewSucc) << " -- " |
753 | << printMBBReference(*Pair.second) << '\n'); |
754 | if (MBFI) |
755 | MBFI->onEdgeSplit(NewPredecessor: *Pair.first, NewSuccessor: *NewSucc, MBPI: *MBPI); |
756 | |
757 | MadeChange = true; |
758 | ++NumSplit; |
759 | CI->splitCriticalEdge(Pred: Pair.first, Succ: Pair.second, New: NewSucc); |
760 | } else |
761 | LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n" ); |
762 | } |
763 | // If this iteration over the code changed anything, keep iterating. |
764 | if (!MadeChange) break; |
765 | EverMadeChange = true; |
766 | } |
767 | |
768 | if (SinkInstsIntoCycle) { |
769 | SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_begin(), |
770 | CI->toplevel_end()); |
771 | for (auto *Cycle : Cycles) { |
772 | MachineBasicBlock * = Cycle->getCyclePreheader(); |
773 | if (!Preheader) { |
774 | LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n" ); |
775 | continue; |
776 | } |
777 | SmallVector<MachineInstr *, 8> Candidates; |
778 | FindCycleSinkCandidates(Cycle, BB: Preheader, Candidates); |
779 | |
780 | // Walk the candidates in reverse order so that we start with the use |
781 | // of a def-use chain, if there is any. |
782 | // TODO: Sort the candidates using a cost-model. |
783 | unsigned i = 0; |
784 | for (MachineInstr *I : llvm::reverse(C&: Candidates)) { |
785 | if (i++ == SinkIntoCycleLimit) { |
786 | LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to " |
787 | "be analysed." ); |
788 | break; |
789 | } |
790 | |
791 | if (!SinkIntoCycle(Cycle, I&: *I)) |
792 | break; |
793 | EverMadeChange = true; |
794 | ++NumCycleSunk; |
795 | } |
796 | } |
797 | } |
798 | |
799 | HasStoreCache.clear(); |
800 | StoreInstrCache.clear(); |
801 | |
802 | // Now clear any kill flags for recorded registers. |
803 | for (auto I : RegsToClearKillFlags) |
804 | MRI->clearKillFlags(Reg: I); |
805 | RegsToClearKillFlags.clear(); |
806 | |
807 | return EverMadeChange; |
808 | } |
809 | |
810 | bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { |
811 | if ((!EnableSinkAndFold && MBB.succ_size() <= 1) || MBB.empty()) |
812 | return false; |
813 | |
814 | // Don't bother sinking code out of unreachable blocks. In addition to being |
815 | // unprofitable, it can also lead to infinite looping, because in an |
816 | // unreachable cycle there may be nowhere to stop. |
817 | if (!DT->isReachableFromEntry(A: &MBB)) return false; |
818 | |
819 | bool MadeChange = false; |
820 | |
821 | // Cache all successors, sorted by frequency info and cycle depth. |
822 | AllSuccsCache AllSuccessors; |
823 | |
824 | // Walk the basic block bottom-up. Remember if we saw a store. |
825 | MachineBasicBlock::iterator I = MBB.end(); |
826 | --I; |
827 | bool ProcessedBegin, SawStore = false; |
828 | do { |
829 | MachineInstr &MI = *I; // The instruction to sink. |
830 | |
831 | // Predecrement I (if it's not begin) so that it isn't invalidated by |
832 | // sinking. |
833 | ProcessedBegin = I == MBB.begin(); |
834 | if (!ProcessedBegin) |
835 | --I; |
836 | |
837 | if (MI.isDebugOrPseudoInstr()) { |
838 | if (MI.isDebugValue()) |
839 | ProcessDbgInst(MI); |
840 | continue; |
841 | } |
842 | |
843 | if (EnableSinkAndFold && PerformSinkAndFold(MI, MBB: &MBB)) { |
844 | MadeChange = true; |
845 | continue; |
846 | } |
847 | |
848 | // Can't sink anything out of a block that has less than two successors. |
849 | if (MBB.succ_size() <= 1) |
850 | continue; |
851 | |
852 | if (PerformTrivialForwardCoalescing(MI, MBB: &MBB)) { |
853 | MadeChange = true; |
854 | continue; |
855 | } |
856 | |
857 | if (SinkInstruction(MI, SawStore, AllSuccessors)) { |
858 | ++NumSunk; |
859 | MadeChange = true; |
860 | } |
861 | |
862 | // If we just processed the first instruction in the block, we're done. |
863 | } while (!ProcessedBegin); |
864 | |
865 | SeenDbgUsers.clear(); |
866 | SeenDbgVars.clear(); |
867 | // recalculate the bb register pressure after sinking one BB. |
868 | CachedRegisterPressure.clear(); |
869 | return MadeChange; |
870 | } |
871 | |
872 | void MachineSinking::ProcessDbgInst(MachineInstr &MI) { |
873 | // When we see DBG_VALUEs for registers, record any vreg it reads, so that |
874 | // we know what to sink if the vreg def sinks. |
875 | assert(MI.isDebugValue() && "Expected DBG_VALUE for processing" ); |
876 | |
877 | DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), |
878 | MI.getDebugLoc()->getInlinedAt()); |
879 | bool SeenBefore = SeenDbgVars.contains(V: Var); |
880 | |
881 | for (MachineOperand &MO : MI.debug_operands()) { |
882 | if (MO.isReg() && MO.getReg().isVirtual()) |
883 | SeenDbgUsers[MO.getReg()].push_back(NewVal: SeenDbgUser(&MI, SeenBefore)); |
884 | } |
885 | |
886 | // Record the variable for any DBG_VALUE, to avoid re-ordering any of them. |
887 | SeenDbgVars.insert(V: Var); |
888 | } |
889 | |
890 | bool MachineSinking::isWorthBreakingCriticalEdge( |
891 | MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To, |
892 | MachineBasicBlock *&DeferredFromBlock) { |
893 | // FIXME: Need much better heuristics. |
894 | |
895 | // If the pass has already considered breaking this edge (during this pass |
896 | // through the function), then let's go ahead and break it. This means |
897 | // sinking multiple "cheap" instructions into the same block. |
898 | if (!CEBCandidates.insert(V: std::make_pair(x&: From, y&: To)).second) |
899 | return true; |
900 | |
901 | if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI)) |
902 | return true; |
903 | |
904 | // Check and record the register and the destination block we want to sink |
905 | // into. Note that we want to do the following before the next check on branch |
906 | // probability. Because we want to record the initial candidate even if it's |
907 | // on hot edge, so that other candidates that might not on hot edges can be |
908 | // sinked as well. |
909 | for (const auto &MO : MI.all_defs()) { |
910 | Register Reg = MO.getReg(); |
911 | if (!Reg) |
912 | continue; |
913 | Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(SrcReg: Reg, MRI) : Reg; |
914 | auto Key = std::make_pair(x&: SrcReg, y&: To); |
915 | auto Res = CEMergeCandidates.try_emplace(Key, Args&: From); |
916 | // We wanted to sink the same register into the same block, consider it to |
917 | // be profitable. |
918 | if (!Res.second) { |
919 | // Return the source block that was previously held off. |
920 | DeferredFromBlock = Res.first->second; |
921 | return true; |
922 | } |
923 | } |
924 | |
925 | if (From->isSuccessor(MBB: To) && MBPI->getEdgeProbability(Src: From, Dst: To) <= |
926 | BranchProbability(SplitEdgeProbabilityThreshold, 100)) |
927 | return true; |
928 | |
929 | // MI is cheap, we probably don't want to break the critical edge for it. |
930 | // However, if this would allow some definitions of its source operands |
931 | // to be sunk then it's probably worth it. |
932 | for (const MachineOperand &MO : MI.all_uses()) { |
933 | Register Reg = MO.getReg(); |
934 | if (Reg == 0) |
935 | continue; |
936 | |
937 | // We don't move live definitions of physical registers, |
938 | // so sinking their uses won't enable any opportunities. |
939 | if (Reg.isPhysical()) |
940 | continue; |
941 | |
942 | // If this instruction is the only user of a virtual register, |
943 | // check if breaking the edge will enable sinking |
944 | // both this instruction and the defining instruction. |
945 | if (MRI->hasOneNonDBGUse(RegNo: Reg)) { |
946 | // If the definition resides in same MBB, |
947 | // claim it's likely we can sink these together. |
948 | // If definition resides elsewhere, we aren't |
949 | // blocking it from being sunk so don't break the edge. |
950 | MachineInstr *DefMI = MRI->getVRegDef(Reg); |
951 | if (DefMI->getParent() == MI.getParent()) |
952 | return true; |
953 | } |
954 | } |
955 | |
956 | return false; |
957 | } |
958 | |
959 | bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI, |
960 | MachineBasicBlock *FromBB, |
961 | MachineBasicBlock *ToBB, |
962 | bool BreakPHIEdge) { |
963 | // Avoid breaking back edge. From == To means backedge for single BB cycle. |
964 | if (!SplitEdges || FromBB == ToBB || !FromBB->isSuccessor(MBB: ToBB)) |
965 | return false; |
966 | |
967 | MachineCycle *FromCycle = CI->getCycle(Block: FromBB); |
968 | MachineCycle *ToCycle = CI->getCycle(Block: ToBB); |
969 | |
970 | // Check for backedges of more "complex" cycles. |
971 | if (FromCycle == ToCycle && FromCycle && |
972 | (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB)) |
973 | return false; |
974 | |
975 | // It's not always legal to break critical edges and sink the computation |
976 | // to the edge. |
977 | // |
978 | // %bb.1: |
979 | // v1024 |
980 | // Beq %bb.3 |
981 | // <fallthrough> |
982 | // %bb.2: |
983 | // ... no uses of v1024 |
984 | // <fallthrough> |
985 | // %bb.3: |
986 | // ... |
987 | // = v1024 |
988 | // |
989 | // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted: |
990 | // |
991 | // %bb.1: |
992 | // ... |
993 | // Bne %bb.2 |
994 | // %bb.4: |
995 | // v1024 = |
996 | // B %bb.3 |
997 | // %bb.2: |
998 | // ... no uses of v1024 |
999 | // <fallthrough> |
1000 | // %bb.3: |
1001 | // ... |
1002 | // = v1024 |
1003 | // |
1004 | // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3 |
1005 | // flow. We need to ensure the new basic block where the computation is |
1006 | // sunk to dominates all the uses. |
1007 | // It's only legal to break critical edge and sink the computation to the |
1008 | // new block if all the predecessors of "To", except for "From", are |
1009 | // not dominated by "From". Given SSA property, this means these |
1010 | // predecessors are dominated by "To". |
1011 | // |
1012 | // There is no need to do this check if all the uses are PHI nodes. PHI |
1013 | // sources are only defined on the specific predecessor edges. |
1014 | if (!BreakPHIEdge) { |
1015 | for (MachineBasicBlock *Pred : ToBB->predecessors()) |
1016 | if (Pred != FromBB && !DT->dominates(A: ToBB, B: Pred)) |
1017 | return false; |
1018 | } |
1019 | |
1020 | return true; |
1021 | } |
1022 | |
1023 | bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, |
1024 | MachineBasicBlock *FromBB, |
1025 | MachineBasicBlock *ToBB, |
1026 | bool BreakPHIEdge) { |
1027 | bool Status = false; |
1028 | MachineBasicBlock *DeferredFromBB = nullptr; |
1029 | if (isWorthBreakingCriticalEdge(MI, From: FromBB, To: ToBB, DeferredFromBlock&: DeferredFromBB)) { |
1030 | // If there is a DeferredFromBB, we consider FromBB only if _both_ |
1031 | // of them are legal to split. |
1032 | if ((!DeferredFromBB || |
1033 | ToSplit.count(key: std::make_pair(x&: DeferredFromBB, y&: ToBB)) || |
1034 | isLegalToBreakCriticalEdge(MI, FromBB: DeferredFromBB, ToBB, BreakPHIEdge)) && |
1035 | isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) { |
1036 | ToSplit.insert(X: std::make_pair(x&: FromBB, y&: ToBB)); |
1037 | if (DeferredFromBB) |
1038 | ToSplit.insert(X: std::make_pair(x&: DeferredFromBB, y&: ToBB)); |
1039 | Status = true; |
1040 | } |
1041 | } |
1042 | |
1043 | return Status; |
1044 | } |
1045 | |
1046 | std::vector<unsigned> & |
1047 | MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) { |
1048 | // Currently to save compiling time, MBB's register pressure will not change |
1049 | // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's |
1050 | // register pressure is changed after sinking any instructions into it. |
1051 | // FIXME: need a accurate and cheap register pressure estiminate model here. |
1052 | auto RP = CachedRegisterPressure.find(Val: &MBB); |
1053 | if (RP != CachedRegisterPressure.end()) |
1054 | return RP->second; |
1055 | |
1056 | RegionPressure Pressure; |
1057 | RegPressureTracker RPTracker(Pressure); |
1058 | |
1059 | // Initialize the register pressure tracker. |
1060 | RPTracker.init(mf: MBB.getParent(), rci: &RegClassInfo, lis: nullptr, mbb: &MBB, pos: MBB.end(), |
1061 | /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); |
1062 | |
1063 | for (MachineBasicBlock::const_iterator MII = MBB.instr_end(), |
1064 | MIE = MBB.instr_begin(); |
1065 | MII != MIE; --MII) { |
1066 | const MachineInstr &MI = *std::prev(x: MII); |
1067 | if (MI.isDebugInstr() || MI.isPseudoProbe()) |
1068 | continue; |
1069 | RegisterOperands RegOpers; |
1070 | RegOpers.collect(MI, TRI: *TRI, MRI: *MRI, TrackLaneMasks: false, IgnoreDead: false); |
1071 | RPTracker.recedeSkipDebugValues(); |
1072 | assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!" ); |
1073 | RPTracker.recede(RegOpers); |
1074 | } |
1075 | |
1076 | RPTracker.closeRegion(); |
1077 | auto It = CachedRegisterPressure.insert( |
1078 | KV: std::make_pair(x: &MBB, y&: RPTracker.getPressure().MaxSetPressure)); |
1079 | return It.first->second; |
1080 | } |
1081 | |
1082 | bool MachineSinking::registerPressureSetExceedsLimit( |
1083 | unsigned NRegs, const TargetRegisterClass *RC, |
1084 | const MachineBasicBlock &MBB) { |
1085 | unsigned Weight = NRegs * TRI->getRegClassWeight(RC).RegWeight; |
1086 | const int *PS = TRI->getRegClassPressureSets(RC); |
1087 | std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB); |
1088 | for (; *PS != -1; PS++) |
1089 | if (Weight + BBRegisterPressure[*PS] >= |
1090 | TRI->getRegPressureSetLimit(MF: *MBB.getParent(), Idx: *PS)) |
1091 | return true; |
1092 | return false; |
1093 | } |
1094 | |
1095 | /// isProfitableToSinkTo - Return true if it is profitable to sink MI. |
1096 | bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, |
1097 | MachineBasicBlock *MBB, |
1098 | MachineBasicBlock *SuccToSinkTo, |
1099 | AllSuccsCache &AllSuccessors) { |
1100 | assert (SuccToSinkTo && "Invalid SinkTo Candidate BB" ); |
1101 | |
1102 | if (MBB == SuccToSinkTo) |
1103 | return false; |
1104 | |
1105 | // It is profitable if SuccToSinkTo does not post dominate current block. |
1106 | if (!PDT->dominates(A: SuccToSinkTo, B: MBB)) |
1107 | return true; |
1108 | |
1109 | // It is profitable to sink an instruction from a deeper cycle to a shallower |
1110 | // cycle, even if the latter post-dominates the former (PR21115). |
1111 | if (CI->getCycleDepth(Block: MBB) > CI->getCycleDepth(Block: SuccToSinkTo)) |
1112 | return true; |
1113 | |
1114 | // Check if only use in post dominated block is PHI instruction. |
1115 | bool NonPHIUse = false; |
1116 | for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) { |
1117 | MachineBasicBlock *UseBlock = UseInst.getParent(); |
1118 | if (UseBlock == SuccToSinkTo && !UseInst.isPHI()) |
1119 | NonPHIUse = true; |
1120 | } |
1121 | if (!NonPHIUse) |
1122 | return true; |
1123 | |
1124 | // If SuccToSinkTo post dominates then also it may be profitable if MI |
1125 | // can further profitably sinked into another block in next round. |
1126 | bool BreakPHIEdge = false; |
1127 | // FIXME - If finding successor is compile time expensive then cache results. |
1128 | if (MachineBasicBlock *MBB2 = |
1129 | FindSuccToSinkTo(MI, MBB: SuccToSinkTo, BreakPHIEdge, AllSuccessors)) |
1130 | return isProfitableToSinkTo(Reg, MI, MBB: SuccToSinkTo, SuccToSinkTo: MBB2, AllSuccessors); |
1131 | |
1132 | MachineCycle *MCycle = CI->getCycle(Block: MBB); |
1133 | |
1134 | // If the instruction is not inside a cycle, it is not profitable to sink MI to |
1135 | // a post dominate block SuccToSinkTo. |
1136 | if (!MCycle) |
1137 | return false; |
1138 | |
1139 | // If this instruction is inside a Cycle and sinking this instruction can make |
1140 | // more registers live range shorten, it is still prifitable. |
1141 | for (const MachineOperand &MO : MI.operands()) { |
1142 | // Ignore non-register operands. |
1143 | if (!MO.isReg()) |
1144 | continue; |
1145 | Register Reg = MO.getReg(); |
1146 | if (Reg == 0) |
1147 | continue; |
1148 | |
1149 | if (Reg.isPhysical()) { |
1150 | // Don't handle non-constant and non-ignorable physical register uses. |
1151 | if (MO.isUse() && !MRI->isConstantPhysReg(PhysReg: Reg) && !TII->isIgnorableUse(MO)) |
1152 | return false; |
1153 | continue; |
1154 | } |
1155 | |
1156 | // Users for the defs are all dominated by SuccToSinkTo. |
1157 | if (MO.isDef()) { |
1158 | // This def register's live range is shortened after sinking. |
1159 | bool LocalUse = false; |
1160 | if (!AllUsesDominatedByBlock(Reg, MBB: SuccToSinkTo, DefMBB: MBB, BreakPHIEdge, |
1161 | LocalUse)) |
1162 | return false; |
1163 | } else { |
1164 | MachineInstr *DefMI = MRI->getVRegDef(Reg); |
1165 | if (!DefMI) |
1166 | continue; |
1167 | MachineCycle *Cycle = CI->getCycle(Block: DefMI->getParent()); |
1168 | // DefMI is defined outside of cycle. There should be no live range |
1169 | // impact for this operand. Defination outside of cycle means: |
1170 | // 1: defination is outside of cycle. |
1171 | // 2: defination is in this cycle, but it is a PHI in the cycle header. |
1172 | if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() && |
1173 | Cycle->getHeader() == DefMI->getParent())) |
1174 | continue; |
1175 | // The DefMI is defined inside the cycle. |
1176 | // If sinking this operand makes some register pressure set exceed limit, |
1177 | // it is not profitable. |
1178 | if (registerPressureSetExceedsLimit(NRegs: 1, RC: MRI->getRegClass(Reg), |
1179 | MBB: *SuccToSinkTo)) { |
1180 | LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable." ); |
1181 | return false; |
1182 | } |
1183 | } |
1184 | } |
1185 | |
1186 | // If MI is in cycle and all its operands are alive across the whole cycle or |
1187 | // if no operand sinking make register pressure set exceed limit, it is |
1188 | // profitable to sink MI. |
1189 | return true; |
1190 | } |
1191 | |
1192 | /// Get the sorted sequence of successors for this MachineBasicBlock, possibly |
1193 | /// computing it if it was not already cached. |
1194 | SmallVector<MachineBasicBlock *, 4> & |
1195 | MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, |
1196 | AllSuccsCache &AllSuccessors) const { |
1197 | // Do we have the sorted successors in cache ? |
1198 | auto Succs = AllSuccessors.find(Val: MBB); |
1199 | if (Succs != AllSuccessors.end()) |
1200 | return Succs->second; |
1201 | |
1202 | SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors()); |
1203 | |
1204 | // Handle cases where sinking can happen but where the sink point isn't a |
1205 | // successor. For example: |
1206 | // |
1207 | // x = computation |
1208 | // if () {} else {} |
1209 | // use x |
1210 | // |
1211 | for (MachineDomTreeNode *DTChild : DT->getNode(BB: MBB)->children()) { |
1212 | // DomTree children of MBB that have MBB as immediate dominator are added. |
1213 | if (DTChild->getIDom()->getBlock() == MI.getParent() && |
1214 | // Skip MBBs already added to the AllSuccs vector above. |
1215 | !MBB->isSuccessor(MBB: DTChild->getBlock())) |
1216 | AllSuccs.push_back(Elt: DTChild->getBlock()); |
1217 | } |
1218 | |
1219 | // Sort Successors according to their cycle depth or block frequency info. |
1220 | llvm::stable_sort( |
1221 | Range&: AllSuccs, C: [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { |
1222 | uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(MBB: L).getFrequency() : 0; |
1223 | uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(MBB: R).getFrequency() : 0; |
1224 | bool HasBlockFreq = LHSFreq != 0 || RHSFreq != 0; |
1225 | return HasBlockFreq ? LHSFreq < RHSFreq |
1226 | : CI->getCycleDepth(Block: L) < CI->getCycleDepth(Block: R); |
1227 | }); |
1228 | |
1229 | auto it = AllSuccessors.insert(KV: std::make_pair(x&: MBB, y&: AllSuccs)); |
1230 | |
1231 | return it.first->second; |
1232 | } |
1233 | |
1234 | /// FindSuccToSinkTo - Find a successor to sink this instruction to. |
1235 | MachineBasicBlock * |
1236 | MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, |
1237 | bool &BreakPHIEdge, |
1238 | AllSuccsCache &AllSuccessors) { |
1239 | assert (MBB && "Invalid MachineBasicBlock!" ); |
1240 | |
1241 | // loop over all the operands of the specified instruction. If there is |
1242 | // anything we can't handle, bail out. |
1243 | |
1244 | // SuccToSinkTo - This is the successor to sink this instruction to, once we |
1245 | // decide. |
1246 | MachineBasicBlock *SuccToSinkTo = nullptr; |
1247 | for (const MachineOperand &MO : MI.operands()) { |
1248 | if (!MO.isReg()) continue; // Ignore non-register operands. |
1249 | |
1250 | Register Reg = MO.getReg(); |
1251 | if (Reg == 0) continue; |
1252 | |
1253 | if (Reg.isPhysical()) { |
1254 | if (MO.isUse()) { |
1255 | // If the physreg has no defs anywhere, it's just an ambient register |
1256 | // and we can freely move its uses. Alternatively, if it's allocatable, |
1257 | // it could get allocated to something with a def during allocation. |
1258 | if (!MRI->isConstantPhysReg(PhysReg: Reg) && !TII->isIgnorableUse(MO)) |
1259 | return nullptr; |
1260 | } else if (!MO.isDead()) { |
1261 | // A def that isn't dead. We can't move it. |
1262 | return nullptr; |
1263 | } |
1264 | } else { |
1265 | // Virtual register uses are always safe to sink. |
1266 | if (MO.isUse()) continue; |
1267 | |
1268 | // If it's not safe to move defs of the register class, then abort. |
1269 | if (!TII->isSafeToMoveRegClassDefs(RC: MRI->getRegClass(Reg))) |
1270 | return nullptr; |
1271 | |
1272 | // Virtual register defs can only be sunk if all their uses are in blocks |
1273 | // dominated by one of the successors. |
1274 | if (SuccToSinkTo) { |
1275 | // If a previous operand picked a block to sink to, then this operand |
1276 | // must be sinkable to the same block. |
1277 | bool LocalUse = false; |
1278 | if (!AllUsesDominatedByBlock(Reg, MBB: SuccToSinkTo, DefMBB: MBB, |
1279 | BreakPHIEdge, LocalUse)) |
1280 | return nullptr; |
1281 | |
1282 | continue; |
1283 | } |
1284 | |
1285 | // Otherwise, we should look at all the successors and decide which one |
1286 | // we should sink to. If we have reliable block frequency information |
1287 | // (frequency != 0) available, give successors with smaller frequencies |
1288 | // higher priority, otherwise prioritize smaller cycle depths. |
1289 | for (MachineBasicBlock *SuccBlock : |
1290 | GetAllSortedSuccessors(MI, MBB, AllSuccessors)) { |
1291 | bool LocalUse = false; |
1292 | if (AllUsesDominatedByBlock(Reg, MBB: SuccBlock, DefMBB: MBB, |
1293 | BreakPHIEdge, LocalUse)) { |
1294 | SuccToSinkTo = SuccBlock; |
1295 | break; |
1296 | } |
1297 | if (LocalUse) |
1298 | // Def is used locally, it's never safe to move this def. |
1299 | return nullptr; |
1300 | } |
1301 | |
1302 | // If we couldn't find a block to sink to, ignore this instruction. |
1303 | if (!SuccToSinkTo) |
1304 | return nullptr; |
1305 | if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors)) |
1306 | return nullptr; |
1307 | } |
1308 | } |
1309 | |
1310 | // It is not possible to sink an instruction into its own block. This can |
1311 | // happen with cycles. |
1312 | if (MBB == SuccToSinkTo) |
1313 | return nullptr; |
1314 | |
1315 | // It's not safe to sink instructions to EH landing pad. Control flow into |
1316 | // landing pad is implicitly defined. |
1317 | if (SuccToSinkTo && SuccToSinkTo->isEHPad()) |
1318 | return nullptr; |
1319 | |
1320 | // It ought to be okay to sink instructions into an INLINEASM_BR target, but |
1321 | // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in |
1322 | // the source block (which this code does not yet do). So for now, forbid |
1323 | // doing so. |
1324 | if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget()) |
1325 | return nullptr; |
1326 | |
1327 | if (SuccToSinkTo && !TII->isSafeToSink(MI, SuccToSinkTo, CI)) |
1328 | return nullptr; |
1329 | |
1330 | return SuccToSinkTo; |
1331 | } |
1332 | |
1333 | /// Return true if MI is likely to be usable as a memory operation by the |
1334 | /// implicit null check optimization. |
1335 | /// |
1336 | /// This is a "best effort" heuristic, and should not be relied upon for |
1337 | /// correctness. This returning true does not guarantee that the implicit null |
1338 | /// check optimization is legal over MI, and this returning false does not |
1339 | /// guarantee MI cannot possibly be used to do a null check. |
1340 | static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, |
1341 | const TargetInstrInfo *TII, |
1342 | const TargetRegisterInfo *TRI) { |
1343 | using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate; |
1344 | |
1345 | auto *MBB = MI.getParent(); |
1346 | if (MBB->pred_size() != 1) |
1347 | return false; |
1348 | |
1349 | auto *PredMBB = *MBB->pred_begin(); |
1350 | auto *PredBB = PredMBB->getBasicBlock(); |
1351 | |
1352 | // Frontends that don't use implicit null checks have no reason to emit |
1353 | // branches with make.implicit metadata, and this function should always |
1354 | // return false for them. |
1355 | if (!PredBB || |
1356 | !PredBB->getTerminator()->getMetadata(KindID: LLVMContext::MD_make_implicit)) |
1357 | return false; |
1358 | |
1359 | const MachineOperand *BaseOp; |
1360 | int64_t Offset; |
1361 | bool OffsetIsScalable; |
1362 | if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) |
1363 | return false; |
1364 | |
1365 | if (!BaseOp->isReg()) |
1366 | return false; |
1367 | |
1368 | if (!(MI.mayLoad() && !MI.isPredicable())) |
1369 | return false; |
1370 | |
1371 | MachineBranchPredicate MBP; |
1372 | if (TII->analyzeBranchPredicate(MBB&: *PredMBB, MBP, AllowModify: false)) |
1373 | return false; |
1374 | |
1375 | return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 && |
1376 | (MBP.Predicate == MachineBranchPredicate::PRED_NE || |
1377 | MBP.Predicate == MachineBranchPredicate::PRED_EQ) && |
1378 | MBP.LHS.getReg() == BaseOp->getReg(); |
1379 | } |
1380 | |
1381 | /// If the sunk instruction is a copy, try to forward the copy instead of |
1382 | /// leaving an 'undef' DBG_VALUE in the original location. Don't do this if |
1383 | /// there's any subregister weirdness involved. Returns true if copy |
1384 | /// propagation occurred. |
1385 | static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI, |
1386 | Register Reg) { |
1387 | const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo(); |
1388 | const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo(); |
1389 | |
1390 | // Copy DBG_VALUE operand and set the original to undef. We then check to |
1391 | // see whether this is something that can be copy-forwarded. If it isn't, |
1392 | // continue around the loop. |
1393 | |
1394 | const MachineOperand *SrcMO = nullptr, *DstMO = nullptr; |
1395 | auto CopyOperands = TII.isCopyInstr(MI: SinkInst); |
1396 | if (!CopyOperands) |
1397 | return false; |
1398 | SrcMO = CopyOperands->Source; |
1399 | DstMO = CopyOperands->Destination; |
1400 | |
1401 | // Check validity of forwarding this copy. |
1402 | bool PostRA = MRI.getNumVirtRegs() == 0; |
1403 | |
1404 | // Trying to forward between physical and virtual registers is too hard. |
1405 | if (Reg.isVirtual() != SrcMO->getReg().isVirtual()) |
1406 | return false; |
1407 | |
1408 | // Only try virtual register copy-forwarding before regalloc, and physical |
1409 | // register copy-forwarding after regalloc. |
1410 | bool arePhysRegs = !Reg.isVirtual(); |
1411 | if (arePhysRegs != PostRA) |
1412 | return false; |
1413 | |
1414 | // Pre-regalloc, only forward if all subregisters agree (or there are no |
1415 | // subregs at all). More analysis might recover some forwardable copies. |
1416 | if (!PostRA) |
1417 | for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) |
1418 | if (DbgMO.getSubReg() != SrcMO->getSubReg() || |
1419 | DbgMO.getSubReg() != DstMO->getSubReg()) |
1420 | return false; |
1421 | |
1422 | // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register |
1423 | // of this copy. Only forward the copy if the DBG_VALUE operand exactly |
1424 | // matches the copy destination. |
1425 | if (PostRA && Reg != DstMO->getReg()) |
1426 | return false; |
1427 | |
1428 | for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) { |
1429 | DbgMO.setReg(SrcMO->getReg()); |
1430 | DbgMO.setSubReg(SrcMO->getSubReg()); |
1431 | } |
1432 | return true; |
1433 | } |
1434 | |
1435 | using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>; |
1436 | /// Sink an instruction and its associated debug instructions. |
1437 | static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, |
1438 | MachineBasicBlock::iterator InsertPos, |
1439 | ArrayRef<MIRegs> DbgValuesToSink) { |
1440 | // If we cannot find a location to use (merge with), then we erase the debug |
1441 | // location to prevent debug-info driven tools from potentially reporting |
1442 | // wrong location information. |
1443 | if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end()) |
1444 | MI.setDebugLoc(DILocation::getMergedLocation(LocA: MI.getDebugLoc(), |
1445 | LocB: InsertPos->getDebugLoc())); |
1446 | else |
1447 | MI.setDebugLoc(DebugLoc()); |
1448 | |
1449 | // Move the instruction. |
1450 | MachineBasicBlock *ParentBlock = MI.getParent(); |
1451 | SuccToSinkTo.splice(Where: InsertPos, Other: ParentBlock, From: MI, |
1452 | To: ++MachineBasicBlock::iterator(MI)); |
1453 | |
1454 | // Sink a copy of debug users to the insert position. Mark the original |
1455 | // DBG_VALUE location as 'undef', indicating that any earlier variable |
1456 | // location should be terminated as we've optimised away the value at this |
1457 | // point. |
1458 | for (const auto &DbgValueToSink : DbgValuesToSink) { |
1459 | MachineInstr *DbgMI = DbgValueToSink.first; |
1460 | MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(Orig: DbgMI); |
1461 | SuccToSinkTo.insert(I: InsertPos, MI: NewDbgMI); |
1462 | |
1463 | bool PropagatedAllSunkOps = true; |
1464 | for (unsigned Reg : DbgValueToSink.second) { |
1465 | if (DbgMI->hasDebugOperandForReg(Reg)) { |
1466 | if (!attemptDebugCopyProp(SinkInst&: MI, DbgMI&: *DbgMI, Reg)) { |
1467 | PropagatedAllSunkOps = false; |
1468 | break; |
1469 | } |
1470 | } |
1471 | } |
1472 | if (!PropagatedAllSunkOps) |
1473 | DbgMI->setDebugValueUndef(); |
1474 | } |
1475 | } |
1476 | |
1477 | /// hasStoreBetween - check if there is store betweeen straight line blocks From |
1478 | /// and To. |
1479 | bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, |
1480 | MachineBasicBlock *To, MachineInstr &MI) { |
1481 | // Make sure From and To are in straight line which means From dominates To |
1482 | // and To post dominates From. |
1483 | if (!DT->dominates(A: From, B: To) || !PDT->dominates(A: To, B: From)) |
1484 | return true; |
1485 | |
1486 | auto BlockPair = std::make_pair(x&: From, y&: To); |
1487 | |
1488 | // Does these two blocks pair be queried before and have a definite cached |
1489 | // result? |
1490 | if (auto It = HasStoreCache.find(Val: BlockPair); It != HasStoreCache.end()) |
1491 | return It->second; |
1492 | |
1493 | if (auto It = StoreInstrCache.find(Val: BlockPair); It != StoreInstrCache.end()) |
1494 | return llvm::any_of(Range&: It->second, P: [&](MachineInstr *I) { |
1495 | return I->mayAlias(AA, Other: MI, UseTBAA: false); |
1496 | }); |
1497 | |
1498 | bool SawStore = false; |
1499 | bool HasAliasedStore = false; |
1500 | DenseSet<MachineBasicBlock *> HandledBlocks; |
1501 | DenseSet<MachineBasicBlock *> HandledDomBlocks; |
1502 | // Go through all reachable blocks from From. |
1503 | for (MachineBasicBlock *BB : depth_first(G: From)) { |
1504 | // We insert the instruction at the start of block To, so no need to worry |
1505 | // about stores inside To. |
1506 | // Store in block From should be already considered when just enter function |
1507 | // SinkInstruction. |
1508 | if (BB == To || BB == From) |
1509 | continue; |
1510 | |
1511 | // We already handle this BB in previous iteration. |
1512 | if (HandledBlocks.count(V: BB)) |
1513 | continue; |
1514 | |
1515 | HandledBlocks.insert(V: BB); |
1516 | // To post dominates BB, it must be a path from block From. |
1517 | if (PDT->dominates(A: To, B: BB)) { |
1518 | if (!HandledDomBlocks.count(V: BB)) |
1519 | HandledDomBlocks.insert(V: BB); |
1520 | |
1521 | // If this BB is too big or the block number in straight line between From |
1522 | // and To is too big, stop searching to save compiling time. |
1523 | if (BB->sizeWithoutDebugLargerThan(Limit: SinkLoadInstsPerBlockThreshold) || |
1524 | HandledDomBlocks.size() > SinkLoadBlocksThreshold) { |
1525 | for (auto *DomBB : HandledDomBlocks) { |
1526 | if (DomBB != BB && DT->dominates(A: DomBB, B: BB)) |
1527 | HasStoreCache[std::make_pair(x&: DomBB, y&: To)] = true; |
1528 | else if(DomBB != BB && DT->dominates(A: BB, B: DomBB)) |
1529 | HasStoreCache[std::make_pair(x&: From, y&: DomBB)] = true; |
1530 | } |
1531 | HasStoreCache[BlockPair] = true; |
1532 | return true; |
1533 | } |
1534 | |
1535 | for (MachineInstr &I : *BB) { |
1536 | // Treat as alias conservatively for a call or an ordered memory |
1537 | // operation. |
1538 | if (I.isCall() || I.hasOrderedMemoryRef()) { |
1539 | for (auto *DomBB : HandledDomBlocks) { |
1540 | if (DomBB != BB && DT->dominates(A: DomBB, B: BB)) |
1541 | HasStoreCache[std::make_pair(x&: DomBB, y&: To)] = true; |
1542 | else if(DomBB != BB && DT->dominates(A: BB, B: DomBB)) |
1543 | HasStoreCache[std::make_pair(x&: From, y&: DomBB)] = true; |
1544 | } |
1545 | HasStoreCache[BlockPair] = true; |
1546 | return true; |
1547 | } |
1548 | |
1549 | if (I.mayStore()) { |
1550 | SawStore = true; |
1551 | // We still have chance to sink MI if all stores between are not |
1552 | // aliased to MI. |
1553 | // Cache all store instructions, so that we don't need to go through |
1554 | // all From reachable blocks for next load instruction. |
1555 | if (I.mayAlias(AA, Other: MI, UseTBAA: false)) |
1556 | HasAliasedStore = true; |
1557 | StoreInstrCache[BlockPair].push_back(Elt: &I); |
1558 | } |
1559 | } |
1560 | } |
1561 | } |
1562 | // If there is no store at all, cache the result. |
1563 | if (!SawStore) |
1564 | HasStoreCache[BlockPair] = false; |
1565 | return HasAliasedStore; |
1566 | } |
1567 | |
1568 | /// Sink instructions into cycles if profitable. This especially tries to |
1569 | /// prevent register spills caused by register pressure if there is little to no |
1570 | /// overhead moving instructions into cycles. |
1571 | bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) { |
1572 | LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I); |
1573 | MachineBasicBlock * = Cycle->getCyclePreheader(); |
1574 | assert(Preheader && "Cycle sink needs a preheader block" ); |
1575 | MachineBasicBlock *SinkBlock = nullptr; |
1576 | bool CanSink = true; |
1577 | const MachineOperand &MO = I.getOperand(i: 0); |
1578 | |
1579 | for (MachineInstr &MI : MRI->use_instructions(Reg: MO.getReg())) { |
1580 | LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI); |
1581 | if (!Cycle->contains(Block: MI.getParent())) { |
1582 | LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n" ); |
1583 | CanSink = false; |
1584 | break; |
1585 | } |
1586 | |
1587 | // FIXME: Come up with a proper cost model that estimates whether sinking |
1588 | // the instruction (and thus possibly executing it on every cycle |
1589 | // iteration) is more expensive than a register. |
1590 | // For now assumes that copies are cheap and thus almost always worth it. |
1591 | if (!MI.isCopy()) { |
1592 | LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n" ); |
1593 | CanSink = false; |
1594 | break; |
1595 | } |
1596 | if (!SinkBlock) { |
1597 | SinkBlock = MI.getParent(); |
1598 | LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: " |
1599 | << printMBBReference(*SinkBlock) << "\n" ); |
1600 | continue; |
1601 | } |
1602 | SinkBlock = DT->findNearestCommonDominator(A: SinkBlock, B: MI.getParent()); |
1603 | if (!SinkBlock) { |
1604 | LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n" ); |
1605 | CanSink = false; |
1606 | break; |
1607 | } |
1608 | LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " << |
1609 | printMBBReference(*SinkBlock) << "\n" ); |
1610 | } |
1611 | |
1612 | if (!CanSink) { |
1613 | LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n" ); |
1614 | return false; |
1615 | } |
1616 | if (!SinkBlock) { |
1617 | LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n" ); |
1618 | return false; |
1619 | } |
1620 | if (SinkBlock == Preheader) { |
1621 | LLVM_DEBUG( |
1622 | dbgs() << "CycleSink: Not sinking, sink block is the preheader\n" ); |
1623 | return false; |
1624 | } |
1625 | if (SinkBlock->sizeWithoutDebugLargerThan(Limit: SinkLoadInstsPerBlockThreshold)) { |
1626 | LLVM_DEBUG( |
1627 | dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n" ); |
1628 | return false; |
1629 | } |
1630 | |
1631 | LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n" ); |
1632 | SinkBlock->splice(Where: SinkBlock->SkipPHIsAndLabels(I: SinkBlock->begin()), Other: Preheader, |
1633 | From: I); |
1634 | |
1635 | // Conservatively clear any kill flags on uses of sunk instruction |
1636 | for (MachineOperand &MO : I.operands()) { |
1637 | if (MO.isReg() && MO.readsReg()) |
1638 | RegsToClearKillFlags.insert(V: MO.getReg()); |
1639 | } |
1640 | |
1641 | // The instruction is moved from its basic block, so do not retain the |
1642 | // debug information. |
1643 | assert(!I.isDebugInstr() && "Should not sink debug inst" ); |
1644 | I.setDebugLoc(DebugLoc()); |
1645 | return true; |
1646 | } |
1647 | |
1648 | /// SinkInstruction - Determine whether it is safe to sink the specified machine |
1649 | /// instruction out of its current block into a successor. |
1650 | bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, |
1651 | AllSuccsCache &AllSuccessors) { |
1652 | // Don't sink instructions that the target prefers not to sink. |
1653 | if (!TII->shouldSink(MI)) |
1654 | return false; |
1655 | |
1656 | // Check if it's safe to move the instruction. |
1657 | if (!MI.isSafeToMove(AA, SawStore)) |
1658 | return false; |
1659 | |
1660 | // Convergent operations may not be made control-dependent on additional |
1661 | // values. |
1662 | if (MI.isConvergent()) |
1663 | return false; |
1664 | |
1665 | // Don't break implicit null checks. This is a performance heuristic, and not |
1666 | // required for correctness. |
1667 | if (SinkingPreventsImplicitNullCheck(MI, TII, TRI)) |
1668 | return false; |
1669 | |
1670 | // FIXME: This should include support for sinking instructions within the |
1671 | // block they are currently in to shorten the live ranges. We often get |
1672 | // instructions sunk into the top of a large block, but it would be better to |
1673 | // also sink them down before their first use in the block. This xform has to |
1674 | // be careful not to *increase* register pressure though, e.g. sinking |
1675 | // "x = y + z" down if it kills y and z would increase the live ranges of y |
1676 | // and z and only shrink the live range of x. |
1677 | |
1678 | bool BreakPHIEdge = false; |
1679 | MachineBasicBlock *ParentBlock = MI.getParent(); |
1680 | MachineBasicBlock *SuccToSinkTo = |
1681 | FindSuccToSinkTo(MI, MBB: ParentBlock, BreakPHIEdge, AllSuccessors); |
1682 | |
1683 | // If there are no outputs, it must have side-effects. |
1684 | if (!SuccToSinkTo) |
1685 | return false; |
1686 | |
1687 | // If the instruction to move defines a dead physical register which is live |
1688 | // when leaving the basic block, don't move it because it could turn into a |
1689 | // "zombie" define of that preg. E.g., EFLAGS. |
1690 | for (const MachineOperand &MO : MI.all_defs()) { |
1691 | Register Reg = MO.getReg(); |
1692 | if (Reg == 0 || !Reg.isPhysical()) |
1693 | continue; |
1694 | if (SuccToSinkTo->isLiveIn(Reg)) |
1695 | return false; |
1696 | } |
1697 | |
1698 | LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo); |
1699 | |
1700 | // If the block has multiple predecessors, this is a critical edge. |
1701 | // Decide if we can sink along it or need to break the edge. |
1702 | if (SuccToSinkTo->pred_size() > 1) { |
1703 | // We cannot sink a load across a critical edge - there may be stores in |
1704 | // other code paths. |
1705 | bool TryBreak = false; |
1706 | bool Store = |
1707 | MI.mayLoad() ? hasStoreBetween(From: ParentBlock, To: SuccToSinkTo, MI) : true; |
1708 | if (!MI.isSafeToMove(AA, SawStore&: Store)) { |
1709 | LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n" ); |
1710 | TryBreak = true; |
1711 | } |
1712 | |
1713 | // We don't want to sink across a critical edge if we don't dominate the |
1714 | // successor. We could be introducing calculations to new code paths. |
1715 | if (!TryBreak && !DT->dominates(A: ParentBlock, B: SuccToSinkTo)) { |
1716 | LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n" ); |
1717 | TryBreak = true; |
1718 | } |
1719 | |
1720 | // Don't sink instructions into a cycle. |
1721 | if (!TryBreak && CI->getCycle(Block: SuccToSinkTo) && |
1722 | (!CI->getCycle(Block: SuccToSinkTo)->isReducible() || |
1723 | CI->getCycle(Block: SuccToSinkTo)->getHeader() == SuccToSinkTo)) { |
1724 | LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n" ); |
1725 | TryBreak = true; |
1726 | } |
1727 | |
1728 | // Otherwise we are OK with sinking along a critical edge. |
1729 | if (!TryBreak) |
1730 | LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n" ); |
1731 | else { |
1732 | // Mark this edge as to be split. |
1733 | // If the edge can actually be split, the next iteration of the main loop |
1734 | // will sink MI in the newly created block. |
1735 | bool Status = |
1736 | PostponeSplitCriticalEdge(MI, FromBB: ParentBlock, ToBB: SuccToSinkTo, BreakPHIEdge); |
1737 | if (!Status) |
1738 | LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " |
1739 | "break critical edge\n" ); |
1740 | // The instruction will not be sunk this time. |
1741 | return false; |
1742 | } |
1743 | } |
1744 | |
1745 | if (BreakPHIEdge) { |
1746 | // BreakPHIEdge is true if all the uses are in the successor MBB being |
1747 | // sunken into and they are all PHI nodes. In this case, machine-sink must |
1748 | // break the critical edge first. |
1749 | bool Status = PostponeSplitCriticalEdge(MI, FromBB: ParentBlock, |
1750 | ToBB: SuccToSinkTo, BreakPHIEdge); |
1751 | if (!Status) |
1752 | LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " |
1753 | "break critical edge\n" ); |
1754 | // The instruction will not be sunk this time. |
1755 | return false; |
1756 | } |
1757 | |
1758 | // Determine where to insert into. Skip phi nodes. |
1759 | MachineBasicBlock::iterator InsertPos = |
1760 | SuccToSinkTo->SkipPHIsAndLabels(I: SuccToSinkTo->begin()); |
1761 | if (blockPrologueInterferes(BB: SuccToSinkTo, End: InsertPos, MI, TRI, TII, MRI)) { |
1762 | LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n" ); |
1763 | return false; |
1764 | } |
1765 | |
1766 | // Collect debug users of any vreg that this inst defines. |
1767 | SmallVector<MIRegs, 4> DbgUsersToSink; |
1768 | for (auto &MO : MI.all_defs()) { |
1769 | if (!MO.getReg().isVirtual()) |
1770 | continue; |
1771 | if (!SeenDbgUsers.count(Val: MO.getReg())) |
1772 | continue; |
1773 | |
1774 | // Sink any users that don't pass any other DBG_VALUEs for this variable. |
1775 | auto &Users = SeenDbgUsers[MO.getReg()]; |
1776 | for (auto &User : Users) { |
1777 | MachineInstr *DbgMI = User.getPointer(); |
1778 | if (User.getInt()) { |
1779 | // This DBG_VALUE would re-order assignments. If we can't copy-propagate |
1780 | // it, it can't be recovered. Set it undef. |
1781 | if (!attemptDebugCopyProp(SinkInst&: MI, DbgMI&: *DbgMI, Reg: MO.getReg())) |
1782 | DbgMI->setDebugValueUndef(); |
1783 | } else { |
1784 | DbgUsersToSink.push_back( |
1785 | Elt: {DbgMI, SmallVector<unsigned, 2>(1, MO.getReg())}); |
1786 | } |
1787 | } |
1788 | } |
1789 | |
1790 | // After sinking, some debug users may not be dominated any more. If possible, |
1791 | // copy-propagate their operands. As it's expensive, don't do this if there's |
1792 | // no debuginfo in the program. |
1793 | if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy()) |
1794 | SalvageUnsunkDebugUsersOfCopy(MI, TargetBlock: SuccToSinkTo); |
1795 | |
1796 | performSink(MI, SuccToSinkTo&: *SuccToSinkTo, InsertPos, DbgValuesToSink: DbgUsersToSink); |
1797 | |
1798 | // Conservatively, clear any kill flags, since it's possible that they are no |
1799 | // longer correct. |
1800 | // Note that we have to clear the kill flags for any register this instruction |
1801 | // uses as we may sink over another instruction which currently kills the |
1802 | // used registers. |
1803 | for (MachineOperand &MO : MI.all_uses()) |
1804 | RegsToClearKillFlags.insert(V: MO.getReg()); // Remember to clear kill flags. |
1805 | |
1806 | return true; |
1807 | } |
1808 | |
1809 | void MachineSinking::SalvageUnsunkDebugUsersOfCopy( |
1810 | MachineInstr &MI, MachineBasicBlock *TargetBlock) { |
1811 | assert(MI.isCopy()); |
1812 | assert(MI.getOperand(1).isReg()); |
1813 | |
1814 | // Enumerate all users of vreg operands that are def'd. Skip those that will |
1815 | // be sunk. For the rest, if they are not dominated by the block we will sink |
1816 | // MI into, propagate the copy source to them. |
1817 | SmallVector<MachineInstr *, 4> DbgDefUsers; |
1818 | SmallVector<Register, 4> DbgUseRegs; |
1819 | const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); |
1820 | for (auto &MO : MI.all_defs()) { |
1821 | if (!MO.getReg().isVirtual()) |
1822 | continue; |
1823 | DbgUseRegs.push_back(Elt: MO.getReg()); |
1824 | for (auto &User : MRI.use_instructions(Reg: MO.getReg())) { |
1825 | if (!User.isDebugValue() || DT->dominates(A: TargetBlock, B: User.getParent())) |
1826 | continue; |
1827 | |
1828 | // If is in same block, will either sink or be use-before-def. |
1829 | if (User.getParent() == MI.getParent()) |
1830 | continue; |
1831 | |
1832 | assert(User.hasDebugOperandForReg(MO.getReg()) && |
1833 | "DBG_VALUE user of vreg, but has no operand for it?" ); |
1834 | DbgDefUsers.push_back(Elt: &User); |
1835 | } |
1836 | } |
1837 | |
1838 | // Point the users of this copy that are no longer dominated, at the source |
1839 | // of the copy. |
1840 | for (auto *User : DbgDefUsers) { |
1841 | for (auto &Reg : DbgUseRegs) { |
1842 | for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) { |
1843 | DbgOp.setReg(MI.getOperand(i: 1).getReg()); |
1844 | DbgOp.setSubReg(MI.getOperand(i: 1).getSubReg()); |
1845 | } |
1846 | } |
1847 | } |
1848 | } |
1849 | |
1850 | //===----------------------------------------------------------------------===// |
1851 | // This pass is not intended to be a replacement or a complete alternative |
1852 | // for the pre-ra machine sink pass. It is only designed to sink COPY |
1853 | // instructions which should be handled after RA. |
1854 | // |
1855 | // This pass sinks COPY instructions into a successor block, if the COPY is not |
1856 | // used in the current block and the COPY is live-in to a single successor |
1857 | // (i.e., doesn't require the COPY to be duplicated). This avoids executing the |
1858 | // copy on paths where their results aren't needed. This also exposes |
1859 | // additional opportunites for dead copy elimination and shrink wrapping. |
1860 | // |
1861 | // These copies were either not handled by or are inserted after the MachineSink |
1862 | // pass. As an example of the former case, the MachineSink pass cannot sink |
1863 | // COPY instructions with allocatable source registers; for AArch64 these type |
1864 | // of copy instructions are frequently used to move function parameters (PhyReg) |
1865 | // into virtual registers in the entry block. |
1866 | // |
1867 | // For the machine IR below, this pass will sink %w19 in the entry into its |
1868 | // successor (%bb.1) because %w19 is only live-in in %bb.1. |
1869 | // %bb.0: |
1870 | // %wzr = SUBSWri %w1, 1 |
1871 | // %w19 = COPY %w0 |
1872 | // Bcc 11, %bb.2 |
1873 | // %bb.1: |
1874 | // Live Ins: %w19 |
1875 | // BL @fun |
1876 | // %w0 = ADDWrr %w0, %w19 |
1877 | // RET %w0 |
1878 | // %bb.2: |
1879 | // %w0 = COPY %wzr |
1880 | // RET %w0 |
1881 | // As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be |
1882 | // able to see %bb.0 as a candidate. |
1883 | //===----------------------------------------------------------------------===// |
1884 | namespace { |
1885 | |
1886 | class PostRAMachineSinking : public MachineFunctionPass { |
1887 | public: |
1888 | bool runOnMachineFunction(MachineFunction &MF) override; |
1889 | |
1890 | static char ID; |
1891 | PostRAMachineSinking() : MachineFunctionPass(ID) {} |
1892 | StringRef getPassName() const override { return "PostRA Machine Sink" ; } |
1893 | |
1894 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
1895 | AU.setPreservesCFG(); |
1896 | MachineFunctionPass::getAnalysisUsage(AU); |
1897 | } |
1898 | |
1899 | MachineFunctionProperties getRequiredProperties() const override { |
1900 | return MachineFunctionProperties().set( |
1901 | MachineFunctionProperties::Property::NoVRegs); |
1902 | } |
1903 | |
1904 | private: |
1905 | /// Track which register units have been modified and used. |
1906 | LiveRegUnits ModifiedRegUnits, UsedRegUnits; |
1907 | |
1908 | /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an |
1909 | /// entry in this map for each unit it touches. The DBG_VALUE's entry |
1910 | /// consists of a pointer to the instruction itself, and a vector of registers |
1911 | /// referred to by the instruction that overlap the key register unit. |
1912 | DenseMap<unsigned, SmallVector<MIRegs, 2>> SeenDbgInstrs; |
1913 | |
1914 | /// Sink Copy instructions unused in the same block close to their uses in |
1915 | /// successors. |
1916 | bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF, |
1917 | const TargetRegisterInfo *TRI, const TargetInstrInfo *TII); |
1918 | }; |
1919 | } // namespace |
1920 | |
1921 | char PostRAMachineSinking::ID = 0; |
1922 | char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID; |
1923 | |
1924 | INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink" , |
1925 | "PostRA Machine Sink" , false, false) |
1926 | |
1927 | static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg, |
1928 | const TargetRegisterInfo *TRI) { |
1929 | LiveRegUnits LiveInRegUnits(*TRI); |
1930 | LiveInRegUnits.addLiveIns(MBB); |
1931 | return !LiveInRegUnits.available(Reg); |
1932 | } |
1933 | |
1934 | static MachineBasicBlock * |
1935 | getSingleLiveInSuccBB(MachineBasicBlock &CurBB, |
1936 | const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs, |
1937 | unsigned Reg, const TargetRegisterInfo *TRI) { |
1938 | // Try to find a single sinkable successor in which Reg is live-in. |
1939 | MachineBasicBlock *BB = nullptr; |
1940 | for (auto *SI : SinkableBBs) { |
1941 | if (aliasWithRegsInLiveIn(MBB&: *SI, Reg, TRI)) { |
1942 | // If BB is set here, Reg is live-in to at least two sinkable successors, |
1943 | // so quit. |
1944 | if (BB) |
1945 | return nullptr; |
1946 | BB = SI; |
1947 | } |
1948 | } |
1949 | // Reg is not live-in to any sinkable successors. |
1950 | if (!BB) |
1951 | return nullptr; |
1952 | |
1953 | // Check if any register aliased with Reg is live-in in other successors. |
1954 | for (auto *SI : CurBB.successors()) { |
1955 | if (!SinkableBBs.count(Ptr: SI) && aliasWithRegsInLiveIn(MBB&: *SI, Reg, TRI)) |
1956 | return nullptr; |
1957 | } |
1958 | return BB; |
1959 | } |
1960 | |
1961 | static MachineBasicBlock * |
1962 | getSingleLiveInSuccBB(MachineBasicBlock &CurBB, |
1963 | const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs, |
1964 | ArrayRef<unsigned> DefedRegsInCopy, |
1965 | const TargetRegisterInfo *TRI) { |
1966 | MachineBasicBlock *SingleBB = nullptr; |
1967 | for (auto DefReg : DefedRegsInCopy) { |
1968 | MachineBasicBlock *BB = |
1969 | getSingleLiveInSuccBB(CurBB, SinkableBBs, Reg: DefReg, TRI); |
1970 | if (!BB || (SingleBB && SingleBB != BB)) |
1971 | return nullptr; |
1972 | SingleBB = BB; |
1973 | } |
1974 | return SingleBB; |
1975 | } |
1976 | |
1977 | static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, |
1978 | SmallVectorImpl<unsigned> &UsedOpsInCopy, |
1979 | LiveRegUnits &UsedRegUnits, |
1980 | const TargetRegisterInfo *TRI) { |
1981 | for (auto U : UsedOpsInCopy) { |
1982 | MachineOperand &MO = MI->getOperand(i: U); |
1983 | Register SrcReg = MO.getReg(); |
1984 | if (!UsedRegUnits.available(Reg: SrcReg)) { |
1985 | MachineBasicBlock::iterator NI = std::next(x: MI->getIterator()); |
1986 | for (MachineInstr &UI : make_range(x: NI, y: CurBB.end())) { |
1987 | if (UI.killsRegister(Reg: SrcReg, TRI)) { |
1988 | UI.clearRegisterKills(Reg: SrcReg, RegInfo: TRI); |
1989 | MO.setIsKill(true); |
1990 | break; |
1991 | } |
1992 | } |
1993 | } |
1994 | } |
1995 | } |
1996 | |
1997 | static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, |
1998 | SmallVectorImpl<unsigned> &UsedOpsInCopy, |
1999 | SmallVectorImpl<unsigned> &DefedRegsInCopy) { |
2000 | MachineFunction &MF = *SuccBB->getParent(); |
2001 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
2002 | for (unsigned DefReg : DefedRegsInCopy) |
2003 | for (MCPhysReg S : TRI->subregs_inclusive(Reg: DefReg)) |
2004 | SuccBB->removeLiveIn(Reg: S); |
2005 | for (auto U : UsedOpsInCopy) |
2006 | SuccBB->addLiveIn(PhysReg: MI->getOperand(i: U).getReg()); |
2007 | SuccBB->sortUniqueLiveIns(); |
2008 | } |
2009 | |
2010 | static bool hasRegisterDependency(MachineInstr *MI, |
2011 | SmallVectorImpl<unsigned> &UsedOpsInCopy, |
2012 | SmallVectorImpl<unsigned> &DefedRegsInCopy, |
2013 | LiveRegUnits &ModifiedRegUnits, |
2014 | LiveRegUnits &UsedRegUnits) { |
2015 | bool HasRegDependency = false; |
2016 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { |
2017 | MachineOperand &MO = MI->getOperand(i); |
2018 | if (!MO.isReg()) |
2019 | continue; |
2020 | Register Reg = MO.getReg(); |
2021 | if (!Reg) |
2022 | continue; |
2023 | if (MO.isDef()) { |
2024 | if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) { |
2025 | HasRegDependency = true; |
2026 | break; |
2027 | } |
2028 | DefedRegsInCopy.push_back(Elt: Reg); |
2029 | |
2030 | // FIXME: instead of isUse(), readsReg() would be a better fix here, |
2031 | // For example, we can ignore modifications in reg with undef. However, |
2032 | // it's not perfectly clear if skipping the internal read is safe in all |
2033 | // other targets. |
2034 | } else if (MO.isUse()) { |
2035 | if (!ModifiedRegUnits.available(Reg)) { |
2036 | HasRegDependency = true; |
2037 | break; |
2038 | } |
2039 | UsedOpsInCopy.push_back(Elt: i); |
2040 | } |
2041 | } |
2042 | return HasRegDependency; |
2043 | } |
2044 | |
2045 | bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, |
2046 | MachineFunction &MF, |
2047 | const TargetRegisterInfo *TRI, |
2048 | const TargetInstrInfo *TII) { |
2049 | SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs; |
2050 | // FIXME: For now, we sink only to a successor which has a single predecessor |
2051 | // so that we can directly sink COPY instructions to the successor without |
2052 | // adding any new block or branch instruction. |
2053 | for (MachineBasicBlock *SI : CurBB.successors()) |
2054 | if (!SI->livein_empty() && SI->pred_size() == 1) |
2055 | SinkableBBs.insert(Ptr: SI); |
2056 | |
2057 | if (SinkableBBs.empty()) |
2058 | return false; |
2059 | |
2060 | bool Changed = false; |
2061 | |
2062 | // Track which registers have been modified and used between the end of the |
2063 | // block and the current instruction. |
2064 | ModifiedRegUnits.clear(); |
2065 | UsedRegUnits.clear(); |
2066 | SeenDbgInstrs.clear(); |
2067 | |
2068 | for (MachineInstr &MI : llvm::make_early_inc_range(Range: llvm::reverse(C&: CurBB))) { |
2069 | // Track the operand index for use in Copy. |
2070 | SmallVector<unsigned, 2> UsedOpsInCopy; |
2071 | // Track the register number defed in Copy. |
2072 | SmallVector<unsigned, 2> DefedRegsInCopy; |
2073 | |
2074 | // We must sink this DBG_VALUE if its operand is sunk. To avoid searching |
2075 | // for DBG_VALUEs later, record them when they're encountered. |
2076 | if (MI.isDebugValue() && !MI.isDebugRef()) { |
2077 | SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits; |
2078 | bool IsValid = true; |
2079 | for (MachineOperand &MO : MI.debug_operands()) { |
2080 | if (MO.isReg() && MO.getReg().isPhysical()) { |
2081 | // Bail if we can already tell the sink would be rejected, rather |
2082 | // than needlessly accumulating lots of DBG_VALUEs. |
2083 | if (hasRegisterDependency(MI: &MI, UsedOpsInCopy, DefedRegsInCopy, |
2084 | ModifiedRegUnits, UsedRegUnits)) { |
2085 | IsValid = false; |
2086 | break; |
2087 | } |
2088 | |
2089 | // Record debug use of each reg unit. |
2090 | for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg())) |
2091 | MIUnits[Unit].push_back(Elt: MO.getReg()); |
2092 | } |
2093 | } |
2094 | if (IsValid) { |
2095 | for (auto &RegOps : MIUnits) |
2096 | SeenDbgInstrs[RegOps.first].emplace_back(Args: &MI, |
2097 | Args: std::move(RegOps.second)); |
2098 | } |
2099 | continue; |
2100 | } |
2101 | |
2102 | if (MI.isDebugOrPseudoInstr()) |
2103 | continue; |
2104 | |
2105 | // Do not move any instruction across function call. |
2106 | if (MI.isCall()) |
2107 | return false; |
2108 | |
2109 | if (!MI.isCopy() || !MI.getOperand(i: 0).isRenamable()) { |
2110 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
2111 | TRI); |
2112 | continue; |
2113 | } |
2114 | |
2115 | // Don't sink the COPY if it would violate a register dependency. |
2116 | if (hasRegisterDependency(MI: &MI, UsedOpsInCopy, DefedRegsInCopy, |
2117 | ModifiedRegUnits, UsedRegUnits)) { |
2118 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
2119 | TRI); |
2120 | continue; |
2121 | } |
2122 | assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) && |
2123 | "Unexpect SrcReg or DefReg" ); |
2124 | MachineBasicBlock *SuccBB = |
2125 | getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI); |
2126 | // Don't sink if we cannot find a single sinkable successor in which Reg |
2127 | // is live-in. |
2128 | if (!SuccBB) { |
2129 | LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, |
2130 | TRI); |
2131 | continue; |
2132 | } |
2133 | assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && |
2134 | "Unexpected predecessor" ); |
2135 | |
2136 | // Collect DBG_VALUEs that must sink with this copy. We've previously |
2137 | // recorded which reg units that DBG_VALUEs read, if this instruction |
2138 | // writes any of those units then the corresponding DBG_VALUEs must sink. |
2139 | MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap; |
2140 | for (auto &MO : MI.all_defs()) { |
2141 | for (MCRegUnit Unit : TRI->regunits(Reg: MO.getReg())) { |
2142 | for (const auto &MIRegs : SeenDbgInstrs.lookup(Val: Unit)) { |
2143 | auto &Regs = DbgValsToSinkMap[MIRegs.first]; |
2144 | for (unsigned Reg : MIRegs.second) |
2145 | Regs.push_back(Elt: Reg); |
2146 | } |
2147 | } |
2148 | } |
2149 | auto DbgValsToSink = DbgValsToSinkMap.takeVector(); |
2150 | |
2151 | LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB); |
2152 | |
2153 | MachineBasicBlock::iterator InsertPos = |
2154 | SuccBB->SkipPHIsAndLabels(I: SuccBB->begin()); |
2155 | if (blockPrologueInterferes(BB: SuccBB, End: InsertPos, MI, TRI, TII, MRI: nullptr)) { |
2156 | LLVM_DEBUG( |
2157 | dbgs() << " *** Not sinking: prologue interference\n" ); |
2158 | continue; |
2159 | } |
2160 | |
2161 | // Clear the kill flag if SrcReg is killed between MI and the end of the |
2162 | // block. |
2163 | clearKillFlags(MI: &MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); |
2164 | performSink(MI, SuccToSinkTo&: *SuccBB, InsertPos, DbgValuesToSink: DbgValsToSink); |
2165 | updateLiveIn(MI: &MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); |
2166 | |
2167 | Changed = true; |
2168 | ++NumPostRACopySink; |
2169 | } |
2170 | return Changed; |
2171 | } |
2172 | |
2173 | bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) { |
2174 | if (skipFunction(F: MF.getFunction())) |
2175 | return false; |
2176 | |
2177 | bool Changed = false; |
2178 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
2179 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
2180 | |
2181 | ModifiedRegUnits.init(TRI: *TRI); |
2182 | UsedRegUnits.init(TRI: *TRI); |
2183 | for (auto &BB : MF) |
2184 | Changed |= tryToSinkCopy(CurBB&: BB, MF, TRI, TII); |
2185 | |
2186 | return Changed; |
2187 | } |
2188 | |