1 | //===- InlineSpiller.cpp - Insert spills and restores inline --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // The inline spiller modifies the machine function directly instead of |
10 | // inserting spills and restores in VirtRegMap. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "SplitKit.h" |
15 | #include "llvm/ADT/ArrayRef.h" |
16 | #include "llvm/ADT/DenseMap.h" |
17 | #include "llvm/ADT/MapVector.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/SetVector.h" |
20 | #include "llvm/ADT/SmallPtrSet.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/ADT/Statistic.h" |
23 | #include "llvm/Analysis/AliasAnalysis.h" |
24 | #include "llvm/CodeGen/LiveInterval.h" |
25 | #include "llvm/CodeGen/LiveIntervals.h" |
26 | #include "llvm/CodeGen/LiveRangeEdit.h" |
27 | #include "llvm/CodeGen/LiveStacks.h" |
28 | #include "llvm/CodeGen/MachineBasicBlock.h" |
29 | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
30 | #include "llvm/CodeGen/MachineDominators.h" |
31 | #include "llvm/CodeGen/MachineFunction.h" |
32 | #include "llvm/CodeGen/MachineFunctionPass.h" |
33 | #include "llvm/CodeGen/MachineInstr.h" |
34 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
35 | #include "llvm/CodeGen/MachineInstrBundle.h" |
36 | #include "llvm/CodeGen/MachineOperand.h" |
37 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
38 | #include "llvm/CodeGen/SlotIndexes.h" |
39 | #include "llvm/CodeGen/Spiller.h" |
40 | #include "llvm/CodeGen/StackMaps.h" |
41 | #include "llvm/CodeGen/TargetInstrInfo.h" |
42 | #include "llvm/CodeGen/TargetOpcodes.h" |
43 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
44 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
45 | #include "llvm/CodeGen/VirtRegMap.h" |
46 | #include "llvm/Config/llvm-config.h" |
47 | #include "llvm/Support/BlockFrequency.h" |
48 | #include "llvm/Support/BranchProbability.h" |
49 | #include "llvm/Support/CommandLine.h" |
50 | #include "llvm/Support/Compiler.h" |
51 | #include "llvm/Support/Debug.h" |
52 | #include "llvm/Support/ErrorHandling.h" |
53 | #include "llvm/Support/raw_ostream.h" |
54 | #include <cassert> |
55 | #include <iterator> |
56 | #include <tuple> |
57 | #include <utility> |
58 | |
59 | using namespace llvm; |
60 | |
61 | #define DEBUG_TYPE "regalloc" |
62 | |
63 | STATISTIC(NumSpilledRanges, "Number of spilled live ranges" ); |
64 | STATISTIC(NumSnippets, "Number of spilled snippets" ); |
65 | STATISTIC(NumSpills, "Number of spills inserted" ); |
66 | STATISTIC(NumSpillsRemoved, "Number of spills removed" ); |
67 | STATISTIC(NumReloads, "Number of reloads inserted" ); |
68 | STATISTIC(NumReloadsRemoved, "Number of reloads removed" ); |
69 | STATISTIC(NumFolded, "Number of folded stack accesses" ); |
70 | STATISTIC(NumFoldedLoads, "Number of folded loads" ); |
71 | STATISTIC(NumRemats, "Number of rematerialized defs for spilling" ); |
72 | |
73 | static cl::opt<bool> |
74 | RestrictStatepointRemat("restrict-statepoint-remat" , |
75 | cl::init(Val: false), cl::Hidden, |
76 | cl::desc("Restrict remat for statepoint operands" )); |
77 | |
78 | namespace { |
79 | |
80 | class HoistSpillHelper : private LiveRangeEdit::Delegate { |
81 | MachineFunction &MF; |
82 | LiveIntervals &LIS; |
83 | LiveStacks &LSS; |
84 | MachineDominatorTree &MDT; |
85 | VirtRegMap &VRM; |
86 | MachineRegisterInfo &MRI; |
87 | const TargetInstrInfo &TII; |
88 | const TargetRegisterInfo &TRI; |
89 | const MachineBlockFrequencyInfo &MBFI; |
90 | |
91 | InsertPointAnalysis IPA; |
92 | |
93 | // Map from StackSlot to the LiveInterval of the original register. |
94 | // Note the LiveInterval of the original register may have been deleted |
95 | // after it is spilled. We keep a copy here to track the range where |
96 | // spills can be moved. |
97 | DenseMap<int, std::unique_ptr<LiveInterval>> StackSlotToOrigLI; |
98 | |
99 | // Map from pair of (StackSlot and Original VNI) to a set of spills which |
100 | // have the same stackslot and have equal values defined by Original VNI. |
101 | // These spills are mergeable and are hoist candidates. |
102 | using MergeableSpillsMap = |
103 | MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>; |
104 | MergeableSpillsMap MergeableSpills; |
105 | |
106 | /// This is the map from original register to a set containing all its |
107 | /// siblings. To hoist a spill to another BB, we need to find out a live |
108 | /// sibling there and use it as the source of the new spill. |
109 | DenseMap<Register, SmallSetVector<Register, 16>> Virt2SiblingsMap; |
110 | |
111 | bool isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, |
112 | MachineBasicBlock &BB, Register &LiveReg); |
113 | |
114 | void rmRedundantSpills( |
115 | SmallPtrSet<MachineInstr *, 16> &Spills, |
116 | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
117 | DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill); |
118 | |
119 | void getVisitOrders( |
120 | MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills, |
121 | SmallVectorImpl<MachineDomTreeNode *> &Orders, |
122 | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
123 | DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep, |
124 | DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill); |
125 | |
126 | void runHoistSpills(LiveInterval &OrigLI, VNInfo &OrigVNI, |
127 | SmallPtrSet<MachineInstr *, 16> &Spills, |
128 | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
129 | DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns); |
130 | |
131 | public: |
132 | HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf, |
133 | VirtRegMap &vrm) |
134 | : MF(mf), LIS(pass.getAnalysis<LiveIntervalsWrapperPass>().getLIS()), |
135 | LSS(pass.getAnalysis<LiveStacks>()), |
136 | MDT(pass.getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()), |
137 | VRM(vrm), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), |
138 | TRI(*mf.getSubtarget().getRegisterInfo()), |
139 | MBFI( |
140 | pass.getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()), |
141 | IPA(LIS, mf.getNumBlockIDs()) {} |
142 | |
143 | void addToMergeableSpills(MachineInstr &Spill, int StackSlot, |
144 | unsigned Original); |
145 | bool rmFromMergeableSpills(MachineInstr &Spill, int StackSlot); |
146 | void hoistAllSpills(); |
147 | void LRE_DidCloneVirtReg(Register, Register) override; |
148 | }; |
149 | |
150 | class InlineSpiller : public Spiller { |
151 | MachineFunction &MF; |
152 | LiveIntervals &LIS; |
153 | LiveStacks &LSS; |
154 | MachineDominatorTree &MDT; |
155 | VirtRegMap &VRM; |
156 | MachineRegisterInfo &MRI; |
157 | const TargetInstrInfo &TII; |
158 | const TargetRegisterInfo &TRI; |
159 | const MachineBlockFrequencyInfo &MBFI; |
160 | |
161 | // Variables that are valid during spill(), but used by multiple methods. |
162 | LiveRangeEdit *Edit = nullptr; |
163 | LiveInterval *StackInt = nullptr; |
164 | int StackSlot; |
165 | Register Original; |
166 | |
167 | // All registers to spill to StackSlot, including the main register. |
168 | SmallVector<Register, 8> RegsToSpill; |
169 | |
170 | // All COPY instructions to/from snippets. |
171 | // They are ignored since both operands refer to the same stack slot. |
172 | // For bundled copies, this will only include the first header copy. |
173 | SmallPtrSet<MachineInstr*, 8> SnippetCopies; |
174 | |
175 | // Values that failed to remat at some point. |
176 | SmallPtrSet<VNInfo*, 8> UsedValues; |
177 | |
178 | // Dead defs generated during spilling. |
179 | SmallVector<MachineInstr*, 8> DeadDefs; |
180 | |
181 | // Object records spills information and does the hoisting. |
182 | HoistSpillHelper HSpiller; |
183 | |
184 | // Live range weight calculator. |
185 | VirtRegAuxInfo &VRAI; |
186 | |
187 | ~InlineSpiller() override = default; |
188 | |
189 | public: |
190 | InlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, |
191 | VirtRegAuxInfo &VRAI) |
192 | : MF(MF), LIS(Pass.getAnalysis<LiveIntervalsWrapperPass>().getLIS()), |
193 | LSS(Pass.getAnalysis<LiveStacks>()), |
194 | MDT(Pass.getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()), |
195 | VRM(VRM), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()), |
196 | TRI(*MF.getSubtarget().getRegisterInfo()), |
197 | MBFI( |
198 | Pass.getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI()), |
199 | HSpiller(Pass, MF, VRM), VRAI(VRAI) {} |
200 | |
201 | void spill(LiveRangeEdit &) override; |
202 | void postOptimization() override; |
203 | |
204 | private: |
205 | bool isSnippet(const LiveInterval &SnipLI); |
206 | void collectRegsToSpill(); |
207 | |
208 | bool isRegToSpill(Register Reg) { return is_contained(Range&: RegsToSpill, Element: Reg); } |
209 | |
210 | bool isSibling(Register Reg); |
211 | bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI); |
212 | void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); |
213 | |
214 | void markValueUsed(LiveInterval*, VNInfo*); |
215 | bool canGuaranteeAssignmentAfterRemat(Register VReg, MachineInstr &MI); |
216 | bool reMaterializeFor(LiveInterval &, MachineInstr &MI); |
217 | void reMaterializeAll(); |
218 | |
219 | bool coalesceStackAccess(MachineInstr *MI, Register Reg); |
220 | bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>>, |
221 | MachineInstr *LoadMI = nullptr); |
222 | void insertReload(Register VReg, SlotIndex, MachineBasicBlock::iterator MI); |
223 | void insertSpill(Register VReg, bool isKill, MachineBasicBlock::iterator MI); |
224 | |
225 | void spillAroundUses(Register Reg); |
226 | void spillAll(); |
227 | }; |
228 | |
229 | } // end anonymous namespace |
230 | |
231 | Spiller::~Spiller() = default; |
232 | |
233 | void Spiller::anchor() {} |
234 | |
235 | Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass, |
236 | MachineFunction &MF, VirtRegMap &VRM, |
237 | VirtRegAuxInfo &VRAI) { |
238 | return new InlineSpiller(Pass, MF, VRM, VRAI); |
239 | } |
240 | |
241 | //===----------------------------------------------------------------------===// |
242 | // Snippets |
243 | //===----------------------------------------------------------------------===// |
244 | |
245 | // When spilling a virtual register, we also spill any snippets it is connected |
246 | // to. The snippets are small live ranges that only have a single real use, |
247 | // leftovers from live range splitting. Spilling them enables memory operand |
248 | // folding or tightens the live range around the single use. |
249 | // |
250 | // This minimizes register pressure and maximizes the store-to-load distance for |
251 | // spill slots which can be important in tight loops. |
252 | |
253 | /// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, |
254 | /// otherwise return 0. |
255 | static Register isCopyOf(const MachineInstr &MI, Register Reg, |
256 | const TargetInstrInfo &TII) { |
257 | if (!TII.isCopyInstr(MI)) |
258 | return Register(); |
259 | |
260 | const MachineOperand &DstOp = MI.getOperand(i: 0); |
261 | const MachineOperand &SrcOp = MI.getOperand(i: 1); |
262 | |
263 | // TODO: Probably only worth allowing subreg copies with undef dests. |
264 | if (DstOp.getSubReg() != SrcOp.getSubReg()) |
265 | return Register(); |
266 | if (DstOp.getReg() == Reg) |
267 | return SrcOp.getReg(); |
268 | if (SrcOp.getReg() == Reg) |
269 | return DstOp.getReg(); |
270 | return Register(); |
271 | } |
272 | |
273 | /// Check for a copy bundle as formed by SplitKit. |
274 | static Register isCopyOfBundle(const MachineInstr &FirstMI, Register Reg, |
275 | const TargetInstrInfo &TII) { |
276 | if (!FirstMI.isBundled()) |
277 | return isCopyOf(MI: FirstMI, Reg, TII); |
278 | |
279 | assert(!FirstMI.isBundledWithPred() && FirstMI.isBundledWithSucc() && |
280 | "expected to see first instruction in bundle" ); |
281 | |
282 | Register SnipReg; |
283 | MachineBasicBlock::const_instr_iterator I = FirstMI.getIterator(); |
284 | while (I->isBundledWithSucc()) { |
285 | const MachineInstr &MI = *I; |
286 | auto CopyInst = TII.isCopyInstr(MI); |
287 | if (!CopyInst) |
288 | return Register(); |
289 | |
290 | const MachineOperand &DstOp = *CopyInst->Destination; |
291 | const MachineOperand &SrcOp = *CopyInst->Source; |
292 | if (DstOp.getReg() == Reg) { |
293 | if (!SnipReg) |
294 | SnipReg = SrcOp.getReg(); |
295 | else if (SnipReg != SrcOp.getReg()) |
296 | return Register(); |
297 | } else if (SrcOp.getReg() == Reg) { |
298 | if (!SnipReg) |
299 | SnipReg = DstOp.getReg(); |
300 | else if (SnipReg != DstOp.getReg()) |
301 | return Register(); |
302 | } |
303 | |
304 | ++I; |
305 | } |
306 | |
307 | return Register(); |
308 | } |
309 | |
310 | static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) { |
311 | for (const MachineOperand &MO : MI.all_defs()) |
312 | if (MO.getReg().isVirtual()) |
313 | LIS.getInterval(Reg: MO.getReg()); |
314 | } |
315 | |
316 | /// isSnippet - Identify if a live interval is a snippet that should be spilled. |
317 | /// It is assumed that SnipLI is a virtual register with the same original as |
318 | /// Edit->getReg(). |
319 | bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { |
320 | Register Reg = Edit->getReg(); |
321 | |
322 | // A snippet is a tiny live range with only a single instruction using it |
323 | // besides copies to/from Reg or spills/fills. |
324 | // Exception is done for statepoint instructions which will fold fills |
325 | // into their operands. |
326 | // We accept: |
327 | // |
328 | // %snip = COPY %Reg / FILL fi# |
329 | // %snip = USE %snip |
330 | // %snip = STATEPOINT %snip in var arg area |
331 | // %Reg = COPY %snip / SPILL %snip, fi# |
332 | // |
333 | if (!LIS.intervalIsInOneMBB(LI: SnipLI)) |
334 | return false; |
335 | |
336 | // Number of defs should not exceed 2 not accounting defs coming from |
337 | // statepoint instructions. |
338 | unsigned NumValNums = SnipLI.getNumValNums(); |
339 | for (auto *VNI : SnipLI.vnis()) { |
340 | MachineInstr *MI = LIS.getInstructionFromIndex(index: VNI->def); |
341 | if (MI->getOpcode() == TargetOpcode::STATEPOINT) |
342 | --NumValNums; |
343 | } |
344 | if (NumValNums > 2) |
345 | return false; |
346 | |
347 | MachineInstr *UseMI = nullptr; |
348 | |
349 | // Check that all uses satisfy our criteria. |
350 | for (MachineRegisterInfo::reg_bundle_nodbg_iterator |
351 | RI = MRI.reg_bundle_nodbg_begin(RegNo: SnipLI.reg()), |
352 | E = MRI.reg_bundle_nodbg_end(); |
353 | RI != E;) { |
354 | MachineInstr &MI = *RI++; |
355 | |
356 | // Allow copies to/from Reg. |
357 | if (isCopyOfBundle(FirstMI: MI, Reg, TII)) |
358 | continue; |
359 | |
360 | // Allow stack slot loads. |
361 | int FI; |
362 | if (SnipLI.reg() == TII.isLoadFromStackSlot(MI, FrameIndex&: FI) && FI == StackSlot) |
363 | continue; |
364 | |
365 | // Allow stack slot stores. |
366 | if (SnipLI.reg() == TII.isStoreToStackSlot(MI, FrameIndex&: FI) && FI == StackSlot) |
367 | continue; |
368 | |
369 | if (StatepointOpers::isFoldableReg(MI: &MI, Reg: SnipLI.reg())) |
370 | continue; |
371 | |
372 | // Allow a single additional instruction. |
373 | if (UseMI && &MI != UseMI) |
374 | return false; |
375 | UseMI = &MI; |
376 | } |
377 | return true; |
378 | } |
379 | |
380 | /// collectRegsToSpill - Collect live range snippets that only have a single |
381 | /// real use. |
382 | void InlineSpiller::collectRegsToSpill() { |
383 | Register Reg = Edit->getReg(); |
384 | |
385 | // Main register always spills. |
386 | RegsToSpill.assign(NumElts: 1, Elt: Reg); |
387 | SnippetCopies.clear(); |
388 | |
389 | // Snippets all have the same original, so there can't be any for an original |
390 | // register. |
391 | if (Original == Reg) |
392 | return; |
393 | |
394 | for (MachineInstr &MI : llvm::make_early_inc_range(Range: MRI.reg_bundles(Reg))) { |
395 | Register SnipReg = isCopyOfBundle(FirstMI: MI, Reg, TII); |
396 | if (!isSibling(Reg: SnipReg)) |
397 | continue; |
398 | LiveInterval &SnipLI = LIS.getInterval(Reg: SnipReg); |
399 | if (!isSnippet(SnipLI)) |
400 | continue; |
401 | SnippetCopies.insert(Ptr: &MI); |
402 | if (isRegToSpill(Reg: SnipReg)) |
403 | continue; |
404 | RegsToSpill.push_back(Elt: SnipReg); |
405 | LLVM_DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n'); |
406 | ++NumSnippets; |
407 | } |
408 | } |
409 | |
410 | bool InlineSpiller::isSibling(Register Reg) { |
411 | return Reg.isVirtual() && VRM.getOriginal(VirtReg: Reg) == Original; |
412 | } |
413 | |
414 | /// It is beneficial to spill to earlier place in the same BB in case |
415 | /// as follows: |
416 | /// There is an alternative def earlier in the same MBB. |
417 | /// Hoist the spill as far as possible in SpillMBB. This can ease |
418 | /// register pressure: |
419 | /// |
420 | /// x = def |
421 | /// y = use x |
422 | /// s = copy x |
423 | /// |
424 | /// Hoisting the spill of s to immediately after the def removes the |
425 | /// interference between x and y: |
426 | /// |
427 | /// x = def |
428 | /// spill x |
429 | /// y = use killed x |
430 | /// |
431 | /// This hoist only helps when the copy kills its source. |
432 | /// |
433 | bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, |
434 | MachineInstr &CopyMI) { |
435 | SlotIndex Idx = LIS.getInstructionIndex(Instr: CopyMI); |
436 | #ifndef NDEBUG |
437 | VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot()); |
438 | assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy" ); |
439 | #endif |
440 | |
441 | Register SrcReg = CopyMI.getOperand(i: 1).getReg(); |
442 | LiveInterval &SrcLI = LIS.getInterval(Reg: SrcReg); |
443 | VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx); |
444 | LiveQueryResult SrcQ = SrcLI.Query(Idx); |
445 | MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(index: SrcVNI->def); |
446 | if (DefMBB != CopyMI.getParent() || !SrcQ.isKill()) |
447 | return false; |
448 | |
449 | // Conservatively extend the stack slot range to the range of the original |
450 | // value. We may be able to do better with stack slot coloring by being more |
451 | // careful here. |
452 | assert(StackInt && "No stack slot assigned yet." ); |
453 | LiveInterval &OrigLI = LIS.getInterval(Reg: Original); |
454 | VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); |
455 | StackInt->MergeValueInAsValue(RHS: OrigLI, RHSValNo: OrigVNI, LHSValNo: StackInt->getValNumInfo(ValNo: 0)); |
456 | LLVM_DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " |
457 | << *StackInt << '\n'); |
458 | |
459 | // We are going to spill SrcVNI immediately after its def, so clear out |
460 | // any later spills of the same value. |
461 | eliminateRedundantSpills(LI&: SrcLI, VNI: SrcVNI); |
462 | |
463 | MachineBasicBlock *MBB = LIS.getMBBFromIndex(index: SrcVNI->def); |
464 | MachineBasicBlock::iterator MII; |
465 | if (SrcVNI->isPHIDef()) |
466 | MII = MBB->SkipPHIsLabelsAndDebug(I: MBB->begin(), Reg: SrcReg); |
467 | else { |
468 | MachineInstr *DefMI = LIS.getInstructionFromIndex(index: SrcVNI->def); |
469 | assert(DefMI && "Defining instruction disappeared" ); |
470 | MII = DefMI; |
471 | ++MII; |
472 | } |
473 | MachineInstrSpan MIS(MII, MBB); |
474 | // Insert spill without kill flag immediately after def. |
475 | TII.storeRegToStackSlot(MBB&: *MBB, MI: MII, SrcReg, isKill: false, FrameIndex: StackSlot, |
476 | RC: MRI.getRegClass(Reg: SrcReg), TRI: &TRI, VReg: Register()); |
477 | LIS.InsertMachineInstrRangeInMaps(B: MIS.begin(), E: MII); |
478 | for (const MachineInstr &MI : make_range(x: MIS.begin(), y: MII)) |
479 | getVDefInterval(MI, LIS); |
480 | --MII; // Point to store instruction. |
481 | LLVM_DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII); |
482 | |
483 | // If there is only 1 store instruction is required for spill, add it |
484 | // to mergeable list. In X86 AMX, 2 intructions are required to store. |
485 | // We disable the merge for this case. |
486 | if (MIS.begin() == MII) |
487 | HSpiller.addToMergeableSpills(Spill&: *MII, StackSlot, Original); |
488 | ++NumSpills; |
489 | return true; |
490 | } |
491 | |
492 | /// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any |
493 | /// redundant spills of this value in SLI.reg and sibling copies. |
494 | void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { |
495 | assert(VNI && "Missing value" ); |
496 | SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; |
497 | WorkList.push_back(Elt: std::make_pair(x: &SLI, y&: VNI)); |
498 | assert(StackInt && "No stack slot assigned yet." ); |
499 | |
500 | do { |
501 | LiveInterval *LI; |
502 | std::tie(args&: LI, args&: VNI) = WorkList.pop_back_val(); |
503 | Register Reg = LI->reg(); |
504 | LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@' |
505 | << VNI->def << " in " << *LI << '\n'); |
506 | |
507 | // Regs to spill are taken care of. |
508 | if (isRegToSpill(Reg)) |
509 | continue; |
510 | |
511 | // Add all of VNI's live range to StackInt. |
512 | StackInt->MergeValueInAsValue(RHS: *LI, RHSValNo: VNI, LHSValNo: StackInt->getValNumInfo(ValNo: 0)); |
513 | LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); |
514 | |
515 | // Find all spills and copies of VNI. |
516 | for (MachineInstr &MI : |
517 | llvm::make_early_inc_range(Range: MRI.use_nodbg_bundles(Reg))) { |
518 | if (!MI.mayStore() && !TII.isCopyInstr(MI)) |
519 | continue; |
520 | SlotIndex Idx = LIS.getInstructionIndex(Instr: MI); |
521 | if (LI->getVNInfoAt(Idx) != VNI) |
522 | continue; |
523 | |
524 | // Follow sibling copies down the dominator tree. |
525 | if (Register DstReg = isCopyOfBundle(FirstMI: MI, Reg, TII)) { |
526 | if (isSibling(Reg: DstReg)) { |
527 | LiveInterval &DstLI = LIS.getInterval(Reg: DstReg); |
528 | VNInfo *DstVNI = DstLI.getVNInfoAt(Idx: Idx.getRegSlot()); |
529 | assert(DstVNI && "Missing defined value" ); |
530 | assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot" ); |
531 | |
532 | WorkList.push_back(Elt: std::make_pair(x: &DstLI, y&: DstVNI)); |
533 | } |
534 | continue; |
535 | } |
536 | |
537 | // Erase spills. |
538 | int FI; |
539 | if (Reg == TII.isStoreToStackSlot(MI, FrameIndex&: FI) && FI == StackSlot) { |
540 | LLVM_DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI); |
541 | // eliminateDeadDefs won't normally remove stores, so switch opcode. |
542 | MI.setDesc(TII.get(Opcode: TargetOpcode::KILL)); |
543 | DeadDefs.push_back(Elt: &MI); |
544 | ++NumSpillsRemoved; |
545 | if (HSpiller.rmFromMergeableSpills(Spill&: MI, StackSlot)) |
546 | --NumSpills; |
547 | } |
548 | } |
549 | } while (!WorkList.empty()); |
550 | } |
551 | |
552 | //===----------------------------------------------------------------------===// |
553 | // Rematerialization |
554 | //===----------------------------------------------------------------------===// |
555 | |
556 | /// markValueUsed - Remember that VNI failed to rematerialize, so its defining |
557 | /// instruction cannot be eliminated. See through snippet copies |
558 | void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { |
559 | SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; |
560 | WorkList.push_back(Elt: std::make_pair(x&: LI, y&: VNI)); |
561 | do { |
562 | std::tie(args&: LI, args&: VNI) = WorkList.pop_back_val(); |
563 | if (!UsedValues.insert(Ptr: VNI).second) |
564 | continue; |
565 | |
566 | if (VNI->isPHIDef()) { |
567 | MachineBasicBlock *MBB = LIS.getMBBFromIndex(index: VNI->def); |
568 | for (MachineBasicBlock *P : MBB->predecessors()) { |
569 | VNInfo *PVNI = LI->getVNInfoBefore(Idx: LIS.getMBBEndIdx(mbb: P)); |
570 | if (PVNI) |
571 | WorkList.push_back(Elt: std::make_pair(x&: LI, y&: PVNI)); |
572 | } |
573 | continue; |
574 | } |
575 | |
576 | // Follow snippet copies. |
577 | MachineInstr *MI = LIS.getInstructionFromIndex(index: VNI->def); |
578 | if (!SnippetCopies.count(Ptr: MI)) |
579 | continue; |
580 | LiveInterval &SnipLI = LIS.getInterval(Reg: MI->getOperand(i: 1).getReg()); |
581 | assert(isRegToSpill(SnipLI.reg()) && "Unexpected register in copy" ); |
582 | VNInfo *SnipVNI = SnipLI.getVNInfoAt(Idx: VNI->def.getRegSlot(EC: true)); |
583 | assert(SnipVNI && "Snippet undefined before copy" ); |
584 | WorkList.push_back(Elt: std::make_pair(x: &SnipLI, y&: SnipVNI)); |
585 | } while (!WorkList.empty()); |
586 | } |
587 | |
588 | bool InlineSpiller::canGuaranteeAssignmentAfterRemat(Register VReg, |
589 | MachineInstr &MI) { |
590 | if (!RestrictStatepointRemat) |
591 | return true; |
592 | // Here's a quick explanation of the problem we're trying to handle here: |
593 | // * There are some pseudo instructions with more vreg uses than there are |
594 | // physical registers on the machine. |
595 | // * This is normally handled by spilling the vreg, and folding the reload |
596 | // into the user instruction. (Thus decreasing the number of used vregs |
597 | // until the remainder can be assigned to physregs.) |
598 | // * However, since we may try to spill vregs in any order, we can end up |
599 | // trying to spill each operand to the instruction, and then rematting it |
600 | // instead. When that happens, the new live intervals (for the remats) are |
601 | // expected to be trivially assignable (i.e. RS_Done). However, since we |
602 | // may have more remats than physregs, we're guaranteed to fail to assign |
603 | // one. |
604 | // At the moment, we only handle this for STATEPOINTs since they're the only |
605 | // pseudo op where we've seen this. If we start seeing other instructions |
606 | // with the same problem, we need to revisit this. |
607 | if (MI.getOpcode() != TargetOpcode::STATEPOINT) |
608 | return true; |
609 | // For STATEPOINTs we allow re-materialization for fixed arguments only hoping |
610 | // that number of physical registers is enough to cover all fixed arguments. |
611 | // If it is not true we need to revisit it. |
612 | for (unsigned Idx = StatepointOpers(&MI).getVarIdx(), |
613 | EndIdx = MI.getNumOperands(); |
614 | Idx < EndIdx; ++Idx) { |
615 | MachineOperand &MO = MI.getOperand(i: Idx); |
616 | if (MO.isReg() && MO.getReg() == VReg) |
617 | return false; |
618 | } |
619 | return true; |
620 | } |
621 | |
622 | /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. |
623 | bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { |
624 | // Analyze instruction |
625 | SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops; |
626 | VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, Reg: VirtReg.reg(), Ops: &Ops); |
627 | |
628 | if (!RI.Reads) |
629 | return false; |
630 | |
631 | SlotIndex UseIdx = LIS.getInstructionIndex(Instr: MI).getRegSlot(EC: true); |
632 | VNInfo *ParentVNI = VirtReg.getVNInfoAt(Idx: UseIdx.getBaseIndex()); |
633 | |
634 | if (!ParentVNI) { |
635 | LLVM_DEBUG(dbgs() << "\tadding <undef> flags: " ); |
636 | for (MachineOperand &MO : MI.all_uses()) |
637 | if (MO.getReg() == VirtReg.reg()) |
638 | MO.setIsUndef(); |
639 | LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI); |
640 | return true; |
641 | } |
642 | |
643 | if (SnippetCopies.count(Ptr: &MI)) |
644 | return false; |
645 | |
646 | LiveInterval &OrigLI = LIS.getInterval(Reg: Original); |
647 | VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx: UseIdx); |
648 | LiveRangeEdit::Remat RM(ParentVNI); |
649 | RM.OrigMI = LIS.getInstructionFromIndex(index: OrigVNI->def); |
650 | |
651 | if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, cheapAsAMove: false)) { |
652 | markValueUsed(LI: &VirtReg, VNI: ParentVNI); |
653 | LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); |
654 | return false; |
655 | } |
656 | |
657 | // If the instruction also writes VirtReg.reg, it had better not require the |
658 | // same register for uses and defs. |
659 | if (RI.Tied) { |
660 | markValueUsed(LI: &VirtReg, VNI: ParentVNI); |
661 | LLVM_DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI); |
662 | return false; |
663 | } |
664 | |
665 | // Before rematerializing into a register for a single instruction, try to |
666 | // fold a load into the instruction. That avoids allocating a new register. |
667 | if (RM.OrigMI->canFoldAsLoad() && |
668 | foldMemoryOperand(Ops, LoadMI: RM.OrigMI)) { |
669 | Edit->markRematerialized(ParentVNI: RM.ParentVNI); |
670 | ++NumFoldedLoads; |
671 | return true; |
672 | } |
673 | |
674 | // If we can't guarantee that we'll be able to actually assign the new vreg, |
675 | // we can't remat. |
676 | if (!canGuaranteeAssignmentAfterRemat(VReg: VirtReg.reg(), MI)) { |
677 | markValueUsed(LI: &VirtReg, VNI: ParentVNI); |
678 | LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); |
679 | return false; |
680 | } |
681 | |
682 | // Allocate a new register for the remat. |
683 | Register NewVReg = Edit->createFrom(OldReg: Original); |
684 | |
685 | // Finally we can rematerialize OrigMI before MI. |
686 | SlotIndex DefIdx = |
687 | Edit->rematerializeAt(MBB&: *MI.getParent(), MI, DestReg: NewVReg, RM, TRI); |
688 | |
689 | // We take the DebugLoc from MI, since OrigMI may be attributed to a |
690 | // different source location. |
691 | auto *NewMI = LIS.getInstructionFromIndex(index: DefIdx); |
692 | NewMI->setDebugLoc(MI.getDebugLoc()); |
693 | |
694 | (void)DefIdx; |
695 | LLVM_DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' |
696 | << *LIS.getInstructionFromIndex(DefIdx)); |
697 | |
698 | // Replace operands |
699 | for (const auto &OpPair : Ops) { |
700 | MachineOperand &MO = OpPair.first->getOperand(i: OpPair.second); |
701 | if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg()) { |
702 | MO.setReg(NewVReg); |
703 | MO.setIsKill(); |
704 | } |
705 | } |
706 | LLVM_DEBUG(dbgs() << "\t " << UseIdx << '\t' << MI << '\n'); |
707 | |
708 | ++NumRemats; |
709 | return true; |
710 | } |
711 | |
712 | /// reMaterializeAll - Try to rematerialize as many uses as possible, |
713 | /// and trim the live ranges after. |
714 | void InlineSpiller::reMaterializeAll() { |
715 | if (!Edit->anyRematerializable()) |
716 | return; |
717 | |
718 | UsedValues.clear(); |
719 | |
720 | // Try to remat before all uses of snippets. |
721 | bool anyRemat = false; |
722 | for (Register Reg : RegsToSpill) { |
723 | LiveInterval &LI = LIS.getInterval(Reg); |
724 | for (MachineInstr &MI : llvm::make_early_inc_range(Range: MRI.reg_bundles(Reg))) { |
725 | // Debug values are not allowed to affect codegen. |
726 | if (MI.isDebugValue()) |
727 | continue; |
728 | |
729 | assert(!MI.isDebugInstr() && "Did not expect to find a use in debug " |
730 | "instruction that isn't a DBG_VALUE" ); |
731 | |
732 | anyRemat |= reMaterializeFor(VirtReg&: LI, MI); |
733 | } |
734 | } |
735 | if (!anyRemat) |
736 | return; |
737 | |
738 | // Remove any values that were completely rematted. |
739 | for (Register Reg : RegsToSpill) { |
740 | LiveInterval &LI = LIS.getInterval(Reg); |
741 | for (VNInfo *VNI : LI.vnis()) { |
742 | if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(Ptr: VNI)) |
743 | continue; |
744 | MachineInstr *MI = LIS.getInstructionFromIndex(index: VNI->def); |
745 | MI->addRegisterDead(Reg, RegInfo: &TRI); |
746 | if (!MI->allDefsAreDead()) |
747 | continue; |
748 | LLVM_DEBUG(dbgs() << "All defs dead: " << *MI); |
749 | DeadDefs.push_back(Elt: MI); |
750 | // If MI is a bundle header, also try removing copies inside the bundle, |
751 | // otherwise the verifier would complain "live range continues after dead |
752 | // def flag". |
753 | if (MI->isBundledWithSucc() && !MI->isBundledWithPred()) { |
754 | MachineBasicBlock::instr_iterator BeginIt = MI->getIterator(), |
755 | EndIt = MI->getParent()->instr_end(); |
756 | ++BeginIt; // Skip MI that was already handled. |
757 | |
758 | bool OnlyDeadCopies = true; |
759 | for (MachineBasicBlock::instr_iterator It = BeginIt; |
760 | It != EndIt && It->isBundledWithPred(); ++It) { |
761 | |
762 | auto DestSrc = TII.isCopyInstr(MI: *It); |
763 | bool IsCopyToDeadReg = |
764 | DestSrc && DestSrc->Destination->getReg() == Reg; |
765 | if (!IsCopyToDeadReg) { |
766 | OnlyDeadCopies = false; |
767 | break; |
768 | } |
769 | } |
770 | if (OnlyDeadCopies) { |
771 | for (MachineBasicBlock::instr_iterator It = BeginIt; |
772 | It != EndIt && It->isBundledWithPred(); ++It) { |
773 | It->addRegisterDead(Reg, RegInfo: &TRI); |
774 | LLVM_DEBUG(dbgs() << "All defs dead: " << *It); |
775 | DeadDefs.push_back(Elt: &*It); |
776 | } |
777 | } |
778 | } |
779 | } |
780 | } |
781 | |
782 | // Eliminate dead code after remat. Note that some snippet copies may be |
783 | // deleted here. |
784 | if (DeadDefs.empty()) |
785 | return; |
786 | LLVM_DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n" ); |
787 | Edit->eliminateDeadDefs(Dead&: DeadDefs, RegsBeingSpilled: RegsToSpill); |
788 | |
789 | // LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions |
790 | // after rematerialization. To remove a VNI for a vreg from its LiveInterval, |
791 | // LiveIntervals::removeVRegDefAt is used. However, after non-PHI VNIs are all |
792 | // removed, PHI VNI are still left in the LiveInterval. |
793 | // So to get rid of unused reg, we need to check whether it has non-dbg |
794 | // reference instead of whether it has non-empty interval. |
795 | unsigned ResultPos = 0; |
796 | for (Register Reg : RegsToSpill) { |
797 | if (MRI.reg_nodbg_empty(RegNo: Reg)) { |
798 | Edit->eraseVirtReg(Reg); |
799 | continue; |
800 | } |
801 | |
802 | assert(LIS.hasInterval(Reg) && |
803 | (!LIS.getInterval(Reg).empty() || !MRI.reg_nodbg_empty(Reg)) && |
804 | "Empty and not used live-range?!" ); |
805 | |
806 | RegsToSpill[ResultPos++] = Reg; |
807 | } |
808 | RegsToSpill.erase(CS: RegsToSpill.begin() + ResultPos, CE: RegsToSpill.end()); |
809 | LLVM_DEBUG(dbgs() << RegsToSpill.size() |
810 | << " registers to spill after remat.\n" ); |
811 | } |
812 | |
813 | //===----------------------------------------------------------------------===// |
814 | // Spilling |
815 | //===----------------------------------------------------------------------===// |
816 | |
817 | /// If MI is a load or store of StackSlot, it can be removed. |
818 | bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, Register Reg) { |
819 | int FI = 0; |
820 | Register InstrReg = TII.isLoadFromStackSlot(MI: *MI, FrameIndex&: FI); |
821 | bool IsLoad = InstrReg; |
822 | if (!IsLoad) |
823 | InstrReg = TII.isStoreToStackSlot(MI: *MI, FrameIndex&: FI); |
824 | |
825 | // We have a stack access. Is it the right register and slot? |
826 | if (InstrReg != Reg || FI != StackSlot) |
827 | return false; |
828 | |
829 | if (!IsLoad) |
830 | HSpiller.rmFromMergeableSpills(Spill&: *MI, StackSlot); |
831 | |
832 | LLVM_DEBUG(dbgs() << "Coalescing stack access: " << *MI); |
833 | LIS.RemoveMachineInstrFromMaps(MI&: *MI); |
834 | MI->eraseFromParent(); |
835 | |
836 | if (IsLoad) { |
837 | ++NumReloadsRemoved; |
838 | --NumReloads; |
839 | } else { |
840 | ++NumSpillsRemoved; |
841 | --NumSpills; |
842 | } |
843 | |
844 | return true; |
845 | } |
846 | |
847 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
848 | LLVM_DUMP_METHOD |
849 | // Dump the range of instructions from B to E with their slot indexes. |
850 | static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, |
851 | MachineBasicBlock::iterator E, |
852 | LiveIntervals const &LIS, |
853 | const char *const header, |
854 | Register VReg = Register()) { |
855 | char NextLine = '\n'; |
856 | char SlotIndent = '\t'; |
857 | |
858 | if (std::next(B) == E) { |
859 | NextLine = ' '; |
860 | SlotIndent = ' '; |
861 | } |
862 | |
863 | dbgs() << '\t' << header << ": " << NextLine; |
864 | |
865 | for (MachineBasicBlock::iterator I = B; I != E; ++I) { |
866 | SlotIndex Idx = LIS.getInstructionIndex(*I).getRegSlot(); |
867 | |
868 | // If a register was passed in and this instruction has it as a |
869 | // destination that is marked as an early clobber, print the |
870 | // early-clobber slot index. |
871 | if (VReg) { |
872 | MachineOperand *MO = I->findRegisterDefOperand(VReg, /*TRI=*/nullptr); |
873 | if (MO && MO->isEarlyClobber()) |
874 | Idx = Idx.getRegSlot(true); |
875 | } |
876 | |
877 | dbgs() << SlotIndent << Idx << '\t' << *I; |
878 | } |
879 | } |
880 | #endif |
881 | |
882 | /// foldMemoryOperand - Try folding stack slot references in Ops into their |
883 | /// instructions. |
884 | /// |
885 | /// @param Ops Operand indices from AnalyzeVirtRegInBundle(). |
886 | /// @param LoadMI Load instruction to use instead of stack slot when non-null. |
887 | /// @return True on success. |
888 | bool InlineSpiller:: |
889 | foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, |
890 | MachineInstr *LoadMI) { |
891 | if (Ops.empty()) |
892 | return false; |
893 | // Don't attempt folding in bundles. |
894 | MachineInstr *MI = Ops.front().first; |
895 | if (Ops.back().first != MI || MI->isBundled()) |
896 | return false; |
897 | |
898 | bool WasCopy = TII.isCopyInstr(MI: *MI).has_value(); |
899 | Register ImpReg; |
900 | |
901 | // TII::foldMemoryOperand will do what we need here for statepoint |
902 | // (fold load into use and remove corresponding def). We will replace |
903 | // uses of removed def with loads (spillAroundUses). |
904 | // For that to work we need to untie def and use to pass it through |
905 | // foldMemoryOperand and signal foldPatchpoint that it is allowed to |
906 | // fold them. |
907 | bool UntieRegs = MI->getOpcode() == TargetOpcode::STATEPOINT; |
908 | |
909 | // Spill subregs if the target allows it. |
910 | // We always want to spill subregs for stackmap/patchpoint pseudos. |
911 | bool SpillSubRegs = TII.isSubregFoldable() || |
912 | MI->getOpcode() == TargetOpcode::STATEPOINT || |
913 | MI->getOpcode() == TargetOpcode::PATCHPOINT || |
914 | MI->getOpcode() == TargetOpcode::STACKMAP; |
915 | |
916 | // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied |
917 | // operands. |
918 | SmallVector<unsigned, 8> FoldOps; |
919 | for (const auto &OpPair : Ops) { |
920 | unsigned Idx = OpPair.second; |
921 | assert(MI == OpPair.first && "Instruction conflict during operand folding" ); |
922 | MachineOperand &MO = MI->getOperand(i: Idx); |
923 | |
924 | // No point restoring an undef read, and we'll produce an invalid live |
925 | // interval. |
926 | // TODO: Is this really the correct way to handle undef tied uses? |
927 | if (MO.isUse() && !MO.readsReg() && !MO.isTied()) |
928 | continue; |
929 | |
930 | if (MO.isImplicit()) { |
931 | ImpReg = MO.getReg(); |
932 | continue; |
933 | } |
934 | |
935 | if (!SpillSubRegs && MO.getSubReg()) |
936 | return false; |
937 | // We cannot fold a load instruction into a def. |
938 | if (LoadMI && MO.isDef()) |
939 | return false; |
940 | // Tied use operands should not be passed to foldMemoryOperand. |
941 | if (UntieRegs || !MI->isRegTiedToDefOperand(UseOpIdx: Idx)) |
942 | FoldOps.push_back(Elt: Idx); |
943 | } |
944 | |
945 | // If we only have implicit uses, we won't be able to fold that. |
946 | // Moreover, TargetInstrInfo::foldMemoryOperand will assert if we try! |
947 | if (FoldOps.empty()) |
948 | return false; |
949 | |
950 | MachineInstrSpan MIS(MI, MI->getParent()); |
951 | |
952 | SmallVector<std::pair<unsigned, unsigned> > TiedOps; |
953 | if (UntieRegs) |
954 | for (unsigned Idx : FoldOps) { |
955 | MachineOperand &MO = MI->getOperand(i: Idx); |
956 | if (!MO.isTied()) |
957 | continue; |
958 | unsigned Tied = MI->findTiedOperandIdx(OpIdx: Idx); |
959 | if (MO.isUse()) |
960 | TiedOps.emplace_back(Args&: Tied, Args&: Idx); |
961 | else { |
962 | assert(MO.isDef() && "Tied to not use and def?" ); |
963 | TiedOps.emplace_back(Args&: Idx, Args&: Tied); |
964 | } |
965 | MI->untieRegOperand(OpIdx: Idx); |
966 | } |
967 | |
968 | MachineInstr *FoldMI = |
969 | LoadMI ? TII.foldMemoryOperand(MI&: *MI, Ops: FoldOps, LoadMI&: *LoadMI, LIS: &LIS) |
970 | : TII.foldMemoryOperand(MI&: *MI, Ops: FoldOps, FI: StackSlot, LIS: &LIS, VRM: &VRM); |
971 | if (!FoldMI) { |
972 | // Re-tie operands. |
973 | for (auto Tied : TiedOps) |
974 | MI->tieOperands(DefIdx: Tied.first, UseIdx: Tied.second); |
975 | return false; |
976 | } |
977 | |
978 | // Remove LIS for any dead defs in the original MI not in FoldMI. |
979 | for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) { |
980 | if (!MO->isReg()) |
981 | continue; |
982 | Register Reg = MO->getReg(); |
983 | if (!Reg || Reg.isVirtual() || MRI.isReserved(PhysReg: Reg)) { |
984 | continue; |
985 | } |
986 | // Skip non-Defs, including undef uses and internal reads. |
987 | if (MO->isUse()) |
988 | continue; |
989 | PhysRegInfo RI = AnalyzePhysRegInBundle(MI: *FoldMI, Reg, TRI: &TRI); |
990 | if (RI.FullyDefined) |
991 | continue; |
992 | // FoldMI does not define this physreg. Remove the LI segment. |
993 | assert(MO->isDead() && "Cannot fold physreg def" ); |
994 | SlotIndex Idx = LIS.getInstructionIndex(Instr: *MI).getRegSlot(); |
995 | LIS.removePhysRegDefAt(Reg: Reg.asMCReg(), Pos: Idx); |
996 | } |
997 | |
998 | int FI; |
999 | if (TII.isStoreToStackSlot(MI: *MI, FrameIndex&: FI) && |
1000 | HSpiller.rmFromMergeableSpills(Spill&: *MI, StackSlot: FI)) |
1001 | --NumSpills; |
1002 | LIS.ReplaceMachineInstrInMaps(MI&: *MI, NewMI&: *FoldMI); |
1003 | // Update the call site info. |
1004 | if (MI->isCandidateForCallSiteEntry()) |
1005 | MI->getMF()->moveCallSiteInfo(Old: MI, New: FoldMI); |
1006 | |
1007 | // If we've folded a store into an instruction labelled with debug-info, |
1008 | // record a substitution from the old operand to the memory operand. Handle |
1009 | // the simple common case where operand 0 is the one being folded, plus when |
1010 | // the destination operand is also a tied def. More values could be |
1011 | // substituted / preserved with more analysis. |
1012 | if (MI->peekDebugInstrNum() && Ops[0].second == 0) { |
1013 | // Helper lambda. |
1014 | auto MakeSubstitution = [this,FoldMI,MI,&Ops]() { |
1015 | // Substitute old operand zero to the new instructions memory operand. |
1016 | unsigned OldOperandNum = Ops[0].second; |
1017 | unsigned NewNum = FoldMI->getDebugInstrNum(); |
1018 | unsigned OldNum = MI->getDebugInstrNum(); |
1019 | MF.makeDebugValueSubstitution({OldNum, OldOperandNum}, |
1020 | {NewNum, MachineFunction::DebugOperandMemNumber}); |
1021 | }; |
1022 | |
1023 | const MachineOperand &Op0 = MI->getOperand(i: Ops[0].second); |
1024 | if (Ops.size() == 1 && Op0.isDef()) { |
1025 | MakeSubstitution(); |
1026 | } else if (Ops.size() == 2 && Op0.isDef() && MI->getOperand(i: 1).isTied() && |
1027 | Op0.getReg() == MI->getOperand(i: 1).getReg()) { |
1028 | MakeSubstitution(); |
1029 | } |
1030 | } else if (MI->peekDebugInstrNum()) { |
1031 | // This is a debug-labelled instruction, but the operand being folded isn't |
1032 | // at operand zero. Most likely this means it's a load being folded in. |
1033 | // Substitute any register defs from operand zero up to the one being |
1034 | // folded -- past that point, we don't know what the new operand indexes |
1035 | // will be. |
1036 | MF.substituteDebugValuesForInst(Old: *MI, New&: *FoldMI, MaxOperand: Ops[0].second); |
1037 | } |
1038 | |
1039 | MI->eraseFromParent(); |
1040 | |
1041 | // Insert any new instructions other than FoldMI into the LIS maps. |
1042 | assert(!MIS.empty() && "Unexpected empty span of instructions!" ); |
1043 | for (MachineInstr &MI : MIS) |
1044 | if (&MI != FoldMI) |
1045 | LIS.InsertMachineInstrInMaps(MI); |
1046 | |
1047 | // TII.foldMemoryOperand may have left some implicit operands on the |
1048 | // instruction. Strip them. |
1049 | if (ImpReg) |
1050 | for (unsigned i = FoldMI->getNumOperands(); i; --i) { |
1051 | MachineOperand &MO = FoldMI->getOperand(i: i - 1); |
1052 | if (!MO.isReg() || !MO.isImplicit()) |
1053 | break; |
1054 | if (MO.getReg() == ImpReg) |
1055 | FoldMI->removeOperand(OpNo: i - 1); |
1056 | } |
1057 | |
1058 | LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, |
1059 | "folded" )); |
1060 | |
1061 | if (!WasCopy) |
1062 | ++NumFolded; |
1063 | else if (Ops.front().second == 0) { |
1064 | ++NumSpills; |
1065 | // If there is only 1 store instruction is required for spill, add it |
1066 | // to mergeable list. In X86 AMX, 2 intructions are required to store. |
1067 | // We disable the merge for this case. |
1068 | if (std::distance(first: MIS.begin(), last: MIS.end()) <= 1) |
1069 | HSpiller.addToMergeableSpills(Spill&: *FoldMI, StackSlot, Original); |
1070 | } else |
1071 | ++NumReloads; |
1072 | return true; |
1073 | } |
1074 | |
1075 | void InlineSpiller::insertReload(Register NewVReg, |
1076 | SlotIndex Idx, |
1077 | MachineBasicBlock::iterator MI) { |
1078 | MachineBasicBlock &MBB = *MI->getParent(); |
1079 | |
1080 | MachineInstrSpan MIS(MI, &MBB); |
1081 | TII.loadRegFromStackSlot(MBB, MI, DestReg: NewVReg, FrameIndex: StackSlot, |
1082 | RC: MRI.getRegClass(Reg: NewVReg), TRI: &TRI, VReg: Register()); |
1083 | |
1084 | LIS.InsertMachineInstrRangeInMaps(B: MIS.begin(), E: MI); |
1085 | |
1086 | LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload" , |
1087 | NewVReg)); |
1088 | ++NumReloads; |
1089 | } |
1090 | |
1091 | /// Check if \p Def fully defines a VReg with an undefined value. |
1092 | /// If that's the case, that means the value of VReg is actually |
1093 | /// not relevant. |
1094 | static bool isRealSpill(const MachineInstr &Def) { |
1095 | if (!Def.isImplicitDef()) |
1096 | return true; |
1097 | |
1098 | // We can say that the VReg defined by Def is undef, only if it is |
1099 | // fully defined by Def. Otherwise, some of the lanes may not be |
1100 | // undef and the value of the VReg matters. |
1101 | return Def.getOperand(i: 0).getSubReg(); |
1102 | } |
1103 | |
1104 | /// insertSpill - Insert a spill of NewVReg after MI. |
1105 | void InlineSpiller::insertSpill(Register NewVReg, bool isKill, |
1106 | MachineBasicBlock::iterator MI) { |
1107 | // Spill are not terminators, so inserting spills after terminators will |
1108 | // violate invariants in MachineVerifier. |
1109 | assert(!MI->isTerminator() && "Inserting a spill after a terminator" ); |
1110 | MachineBasicBlock &MBB = *MI->getParent(); |
1111 | |
1112 | MachineInstrSpan MIS(MI, &MBB); |
1113 | MachineBasicBlock::iterator SpillBefore = std::next(x: MI); |
1114 | bool IsRealSpill = isRealSpill(Def: *MI); |
1115 | |
1116 | if (IsRealSpill) |
1117 | TII.storeRegToStackSlot(MBB, MI: SpillBefore, SrcReg: NewVReg, isKill, FrameIndex: StackSlot, |
1118 | RC: MRI.getRegClass(Reg: NewVReg), TRI: &TRI, VReg: Register()); |
1119 | else |
1120 | // Don't spill undef value. |
1121 | // Anything works for undef, in particular keeping the memory |
1122 | // uninitialized is a viable option and it saves code size and |
1123 | // run time. |
1124 | BuildMI(BB&: MBB, I: SpillBefore, MIMD: MI->getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::KILL)) |
1125 | .addReg(RegNo: NewVReg, flags: getKillRegState(B: isKill)); |
1126 | |
1127 | MachineBasicBlock::iterator Spill = std::next(x: MI); |
1128 | LIS.InsertMachineInstrRangeInMaps(B: Spill, E: MIS.end()); |
1129 | for (const MachineInstr &MI : make_range(x: Spill, y: MIS.end())) |
1130 | getVDefInterval(MI, LIS); |
1131 | |
1132 | LLVM_DEBUG( |
1133 | dumpMachineInstrRangeWithSlotIndex(Spill, MIS.end(), LIS, "spill" )); |
1134 | ++NumSpills; |
1135 | // If there is only 1 store instruction is required for spill, add it |
1136 | // to mergeable list. In X86 AMX, 2 intructions are required to store. |
1137 | // We disable the merge for this case. |
1138 | if (IsRealSpill && std::distance(first: Spill, last: MIS.end()) <= 1) |
1139 | HSpiller.addToMergeableSpills(Spill&: *Spill, StackSlot, Original); |
1140 | } |
1141 | |
1142 | /// spillAroundUses - insert spill code around each use of Reg. |
1143 | void InlineSpiller::spillAroundUses(Register Reg) { |
1144 | LLVM_DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n'); |
1145 | LiveInterval &OldLI = LIS.getInterval(Reg); |
1146 | |
1147 | // Iterate over instructions using Reg. |
1148 | for (MachineInstr &MI : llvm::make_early_inc_range(Range: MRI.reg_bundles(Reg))) { |
1149 | // Debug values are not allowed to affect codegen. |
1150 | if (MI.isDebugValue()) { |
1151 | // Modify DBG_VALUE now that the value is in a spill slot. |
1152 | MachineBasicBlock *MBB = MI.getParent(); |
1153 | LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << MI); |
1154 | buildDbgValueForSpill(BB&: *MBB, I: &MI, Orig: MI, FrameIndex: StackSlot, SpillReg: Reg); |
1155 | MBB->erase(I: MI); |
1156 | continue; |
1157 | } |
1158 | |
1159 | assert(!MI.isDebugInstr() && "Did not expect to find a use in debug " |
1160 | "instruction that isn't a DBG_VALUE" ); |
1161 | |
1162 | // Ignore copies to/from snippets. We'll delete them. |
1163 | if (SnippetCopies.count(Ptr: &MI)) |
1164 | continue; |
1165 | |
1166 | // Stack slot accesses may coalesce away. |
1167 | if (coalesceStackAccess(MI: &MI, Reg)) |
1168 | continue; |
1169 | |
1170 | // Analyze instruction. |
1171 | SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; |
1172 | VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, Reg, Ops: &Ops); |
1173 | |
1174 | // Find the slot index where this instruction reads and writes OldLI. |
1175 | // This is usually the def slot, except for tied early clobbers. |
1176 | SlotIndex Idx = LIS.getInstructionIndex(Instr: MI).getRegSlot(); |
1177 | if (VNInfo *VNI = OldLI.getVNInfoAt(Idx: Idx.getRegSlot(EC: true))) |
1178 | if (SlotIndex::isSameInstr(A: Idx, B: VNI->def)) |
1179 | Idx = VNI->def; |
1180 | |
1181 | // Check for a sibling copy. |
1182 | Register SibReg = isCopyOfBundle(FirstMI: MI, Reg, TII); |
1183 | if (SibReg && isSibling(Reg: SibReg)) { |
1184 | // This may actually be a copy between snippets. |
1185 | if (isRegToSpill(Reg: SibReg)) { |
1186 | LLVM_DEBUG(dbgs() << "Found new snippet copy: " << MI); |
1187 | SnippetCopies.insert(Ptr: &MI); |
1188 | continue; |
1189 | } |
1190 | if (RI.Writes) { |
1191 | if (hoistSpillInsideBB(SpillLI&: OldLI, CopyMI&: MI)) { |
1192 | // This COPY is now dead, the value is already in the stack slot. |
1193 | MI.getOperand(i: 0).setIsDead(); |
1194 | DeadDefs.push_back(Elt: &MI); |
1195 | continue; |
1196 | } |
1197 | } else { |
1198 | // This is a reload for a sib-reg copy. Drop spills downstream. |
1199 | LiveInterval &SibLI = LIS.getInterval(Reg: SibReg); |
1200 | eliminateRedundantSpills(SLI&: SibLI, VNI: SibLI.getVNInfoAt(Idx)); |
1201 | // The COPY will fold to a reload below. |
1202 | } |
1203 | } |
1204 | |
1205 | // Attempt to fold memory ops. |
1206 | if (foldMemoryOperand(Ops)) |
1207 | continue; |
1208 | |
1209 | // Create a new virtual register for spill/fill. |
1210 | // FIXME: Infer regclass from instruction alone. |
1211 | Register NewVReg = Edit->createFrom(OldReg: Reg); |
1212 | |
1213 | if (RI.Reads) |
1214 | insertReload(NewVReg, Idx, MI: &MI); |
1215 | |
1216 | // Rewrite instruction operands. |
1217 | bool hasLiveDef = false; |
1218 | for (const auto &OpPair : Ops) { |
1219 | MachineOperand &MO = OpPair.first->getOperand(i: OpPair.second); |
1220 | MO.setReg(NewVReg); |
1221 | if (MO.isUse()) { |
1222 | if (!OpPair.first->isRegTiedToDefOperand(UseOpIdx: OpPair.second)) |
1223 | MO.setIsKill(); |
1224 | } else { |
1225 | if (!MO.isDead()) |
1226 | hasLiveDef = true; |
1227 | } |
1228 | } |
1229 | LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << MI << '\n'); |
1230 | |
1231 | // FIXME: Use a second vreg if instruction has no tied ops. |
1232 | if (RI.Writes) |
1233 | if (hasLiveDef) |
1234 | insertSpill(NewVReg, isKill: true, MI: &MI); |
1235 | } |
1236 | } |
1237 | |
1238 | /// spillAll - Spill all registers remaining after rematerialization. |
1239 | void InlineSpiller::spillAll() { |
1240 | // Update LiveStacks now that we are committed to spilling. |
1241 | if (StackSlot == VirtRegMap::NO_STACK_SLOT) { |
1242 | StackSlot = VRM.assignVirt2StackSlot(virtReg: Original); |
1243 | StackInt = &LSS.getOrCreateInterval(Slot: StackSlot, RC: MRI.getRegClass(Reg: Original)); |
1244 | StackInt->getNextValue(Def: SlotIndex(), VNInfoAllocator&: LSS.getVNInfoAllocator()); |
1245 | } else |
1246 | StackInt = &LSS.getInterval(Slot: StackSlot); |
1247 | |
1248 | if (Original != Edit->getReg()) |
1249 | VRM.assignVirt2StackSlot(virtReg: Edit->getReg(), SS: StackSlot); |
1250 | |
1251 | assert(StackInt->getNumValNums() == 1 && "Bad stack interval values" ); |
1252 | for (Register Reg : RegsToSpill) |
1253 | StackInt->MergeSegmentsInAsValue(RHS: LIS.getInterval(Reg), |
1254 | LHSValNo: StackInt->getValNumInfo(ValNo: 0)); |
1255 | LLVM_DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); |
1256 | |
1257 | // Spill around uses of all RegsToSpill. |
1258 | for (Register Reg : RegsToSpill) |
1259 | spillAroundUses(Reg); |
1260 | |
1261 | // Hoisted spills may cause dead code. |
1262 | if (!DeadDefs.empty()) { |
1263 | LLVM_DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n" ); |
1264 | Edit->eliminateDeadDefs(Dead&: DeadDefs, RegsBeingSpilled: RegsToSpill); |
1265 | } |
1266 | |
1267 | // Finally delete the SnippetCopies. |
1268 | for (Register Reg : RegsToSpill) { |
1269 | for (MachineInstr &MI : |
1270 | llvm::make_early_inc_range(Range: MRI.reg_instructions(Reg))) { |
1271 | assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy" ); |
1272 | // FIXME: Do this with a LiveRangeEdit callback. |
1273 | LIS.getSlotIndexes()->removeSingleMachineInstrFromMaps(MI); |
1274 | MI.eraseFromBundle(); |
1275 | } |
1276 | } |
1277 | |
1278 | // Delete all spilled registers. |
1279 | for (Register Reg : RegsToSpill) |
1280 | Edit->eraseVirtReg(Reg); |
1281 | } |
1282 | |
1283 | void InlineSpiller::spill(LiveRangeEdit &edit) { |
1284 | ++NumSpilledRanges; |
1285 | Edit = &edit; |
1286 | assert(!Register::isStackSlot(edit.getReg()) && |
1287 | "Trying to spill a stack slot." ); |
1288 | // Share a stack slot among all descendants of Original. |
1289 | Original = VRM.getOriginal(VirtReg: edit.getReg()); |
1290 | StackSlot = VRM.getStackSlot(virtReg: Original); |
1291 | StackInt = nullptr; |
1292 | |
1293 | LLVM_DEBUG(dbgs() << "Inline spilling " |
1294 | << TRI.getRegClassName(MRI.getRegClass(edit.getReg())) |
1295 | << ':' << edit.getParent() << "\nFrom original " |
1296 | << printReg(Original) << '\n'); |
1297 | assert(edit.getParent().isSpillable() && |
1298 | "Attempting to spill already spilled value." ); |
1299 | assert(DeadDefs.empty() && "Previous spill didn't remove dead defs" ); |
1300 | |
1301 | collectRegsToSpill(); |
1302 | reMaterializeAll(); |
1303 | |
1304 | // Remat may handle everything. |
1305 | if (!RegsToSpill.empty()) |
1306 | spillAll(); |
1307 | |
1308 | Edit->calculateRegClassAndHint(MF, VRAI); |
1309 | } |
1310 | |
1311 | /// Optimizations after all the reg selections and spills are done. |
1312 | void InlineSpiller::postOptimization() { HSpiller.hoistAllSpills(); } |
1313 | |
1314 | /// When a spill is inserted, add the spill to MergeableSpills map. |
1315 | void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot, |
1316 | unsigned Original) { |
1317 | BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); |
1318 | LiveInterval &OrigLI = LIS.getInterval(Reg: Original); |
1319 | // save a copy of LiveInterval in StackSlotToOrigLI because the original |
1320 | // LiveInterval may be cleared after all its references are spilled. |
1321 | if (!StackSlotToOrigLI.contains(Val: StackSlot)) { |
1322 | auto LI = std::make_unique<LiveInterval>(args: OrigLI.reg(), args: OrigLI.weight()); |
1323 | LI->assign(Other: OrigLI, Allocator); |
1324 | StackSlotToOrigLI[StackSlot] = std::move(LI); |
1325 | } |
1326 | SlotIndex Idx = LIS.getInstructionIndex(Instr: Spill); |
1327 | VNInfo *OrigVNI = StackSlotToOrigLI[StackSlot]->getVNInfoAt(Idx: Idx.getRegSlot()); |
1328 | std::pair<int, VNInfo *> MIdx = std::make_pair(x&: StackSlot, y&: OrigVNI); |
1329 | MergeableSpills[MIdx].insert(Ptr: &Spill); |
1330 | } |
1331 | |
1332 | /// When a spill is removed, remove the spill from MergeableSpills map. |
1333 | /// Return true if the spill is removed successfully. |
1334 | bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill, |
1335 | int StackSlot) { |
1336 | auto It = StackSlotToOrigLI.find(Val: StackSlot); |
1337 | if (It == StackSlotToOrigLI.end()) |
1338 | return false; |
1339 | SlotIndex Idx = LIS.getInstructionIndex(Instr: Spill); |
1340 | VNInfo *OrigVNI = It->second->getVNInfoAt(Idx: Idx.getRegSlot()); |
1341 | std::pair<int, VNInfo *> MIdx = std::make_pair(x&: StackSlot, y&: OrigVNI); |
1342 | return MergeableSpills[MIdx].erase(Ptr: &Spill); |
1343 | } |
1344 | |
1345 | /// Check BB to see if it is a possible target BB to place a hoisted spill, |
1346 | /// i.e., there should be a living sibling of OrigReg at the insert point. |
1347 | bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, |
1348 | MachineBasicBlock &BB, Register &LiveReg) { |
1349 | SlotIndex Idx = IPA.getLastInsertPoint(CurLI: OrigLI, MBB: BB); |
1350 | // The original def could be after the last insert point in the root block, |
1351 | // we can't hoist to here. |
1352 | if (Idx < OrigVNI.def) { |
1353 | // TODO: We could be better here. If LI is not alive in landing pad |
1354 | // we could hoist spill after LIP. |
1355 | LLVM_DEBUG(dbgs() << "can't spill in root block - def after LIP\n" ); |
1356 | return false; |
1357 | } |
1358 | Register OrigReg = OrigLI.reg(); |
1359 | SmallSetVector<Register, 16> &Siblings = Virt2SiblingsMap[OrigReg]; |
1360 | assert(OrigLI.getVNInfoAt(Idx) == &OrigVNI && "Unexpected VNI" ); |
1361 | |
1362 | for (const Register &SibReg : Siblings) { |
1363 | LiveInterval &LI = LIS.getInterval(Reg: SibReg); |
1364 | VNInfo *VNI = LI.getVNInfoAt(Idx); |
1365 | if (VNI) { |
1366 | LiveReg = SibReg; |
1367 | return true; |
1368 | } |
1369 | } |
1370 | return false; |
1371 | } |
1372 | |
1373 | /// Remove redundant spills in the same BB. Save those redundant spills in |
1374 | /// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map. |
1375 | void HoistSpillHelper::rmRedundantSpills( |
1376 | SmallPtrSet<MachineInstr *, 16> &Spills, |
1377 | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
1378 | DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) { |
1379 | // For each spill saw, check SpillBBToSpill[] and see if its BB already has |
1380 | // another spill inside. If a BB contains more than one spill, only keep the |
1381 | // earlier spill with smaller SlotIndex. |
1382 | for (auto *const CurrentSpill : Spills) { |
1383 | MachineBasicBlock *Block = CurrentSpill->getParent(); |
1384 | MachineDomTreeNode *Node = MDT.getNode(BB: Block); |
1385 | MachineInstr *PrevSpill = SpillBBToSpill[Node]; |
1386 | if (PrevSpill) { |
1387 | SlotIndex PIdx = LIS.getInstructionIndex(Instr: *PrevSpill); |
1388 | SlotIndex CIdx = LIS.getInstructionIndex(Instr: *CurrentSpill); |
1389 | MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill; |
1390 | MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill; |
1391 | SpillsToRm.push_back(Elt: SpillToRm); |
1392 | SpillBBToSpill[MDT.getNode(BB: Block)] = SpillToKeep; |
1393 | } else { |
1394 | SpillBBToSpill[MDT.getNode(BB: Block)] = CurrentSpill; |
1395 | } |
1396 | } |
1397 | for (auto *const SpillToRm : SpillsToRm) |
1398 | Spills.erase(Ptr: SpillToRm); |
1399 | } |
1400 | |
1401 | /// Starting from \p Root find a top-down traversal order of the dominator |
1402 | /// tree to visit all basic blocks containing the elements of \p Spills. |
1403 | /// Redundant spills will be found and put into \p SpillsToRm at the same |
1404 | /// time. \p SpillBBToSpill will be populated as part of the process and |
1405 | /// maps a basic block to the first store occurring in the basic block. |
1406 | /// \post SpillsToRm.union(Spills\@post) == Spills\@pre |
1407 | void HoistSpillHelper::getVisitOrders( |
1408 | MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills, |
1409 | SmallVectorImpl<MachineDomTreeNode *> &Orders, |
1410 | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
1411 | DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep, |
1412 | DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) { |
1413 | // The set contains all the possible BB nodes to which we may hoist |
1414 | // original spills. |
1415 | SmallPtrSet<MachineDomTreeNode *, 8> WorkSet; |
1416 | // Save the BB nodes on the path from the first BB node containing |
1417 | // non-redundant spill to the Root node. |
1418 | SmallPtrSet<MachineDomTreeNode *, 8> NodesOnPath; |
1419 | // All the spills to be hoisted must originate from a single def instruction |
1420 | // to the OrigReg. It means the def instruction should dominate all the spills |
1421 | // to be hoisted. We choose the BB where the def instruction is located as |
1422 | // the Root. |
1423 | MachineDomTreeNode *RootIDomNode = MDT[Root]->getIDom(); |
1424 | // For every node on the dominator tree with spill, walk up on the dominator |
1425 | // tree towards the Root node until it is reached. If there is other node |
1426 | // containing spill in the middle of the path, the previous spill saw will |
1427 | // be redundant and the node containing it will be removed. All the nodes on |
1428 | // the path starting from the first node with non-redundant spill to the Root |
1429 | // node will be added to the WorkSet, which will contain all the possible |
1430 | // locations where spills may be hoisted to after the loop below is done. |
1431 | for (auto *const Spill : Spills) { |
1432 | MachineBasicBlock *Block = Spill->getParent(); |
1433 | MachineDomTreeNode *Node = MDT[Block]; |
1434 | MachineInstr *SpillToRm = nullptr; |
1435 | while (Node != RootIDomNode) { |
1436 | // If Node dominates Block, and it already contains a spill, the spill in |
1437 | // Block will be redundant. |
1438 | if (Node != MDT[Block] && SpillBBToSpill[Node]) { |
1439 | SpillToRm = SpillBBToSpill[MDT[Block]]; |
1440 | break; |
1441 | /// If we see the Node already in WorkSet, the path from the Node to |
1442 | /// the Root node must already be traversed by another spill. |
1443 | /// Then no need to repeat. |
1444 | } else if (WorkSet.count(Ptr: Node)) { |
1445 | break; |
1446 | } else { |
1447 | NodesOnPath.insert(Ptr: Node); |
1448 | } |
1449 | Node = Node->getIDom(); |
1450 | } |
1451 | if (SpillToRm) { |
1452 | SpillsToRm.push_back(Elt: SpillToRm); |
1453 | } else { |
1454 | // Add a BB containing the original spills to SpillsToKeep -- i.e., |
1455 | // set the initial status before hoisting start. The value of BBs |
1456 | // containing original spills is set to 0, in order to descriminate |
1457 | // with BBs containing hoisted spills which will be inserted to |
1458 | // SpillsToKeep later during hoisting. |
1459 | SpillsToKeep[MDT[Block]] = 0; |
1460 | WorkSet.insert(I: NodesOnPath.begin(), E: NodesOnPath.end()); |
1461 | } |
1462 | NodesOnPath.clear(); |
1463 | } |
1464 | |
1465 | // Sort the nodes in WorkSet in top-down order and save the nodes |
1466 | // in Orders. Orders will be used for hoisting in runHoistSpills. |
1467 | unsigned idx = 0; |
1468 | Orders.push_back(Elt: MDT.getNode(BB: Root)); |
1469 | do { |
1470 | MachineDomTreeNode *Node = Orders[idx++]; |
1471 | for (MachineDomTreeNode *Child : Node->children()) { |
1472 | if (WorkSet.count(Ptr: Child)) |
1473 | Orders.push_back(Elt: Child); |
1474 | } |
1475 | } while (idx != Orders.size()); |
1476 | assert(Orders.size() == WorkSet.size() && |
1477 | "Orders have different size with WorkSet" ); |
1478 | |
1479 | #ifndef NDEBUG |
1480 | LLVM_DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n" ); |
1481 | SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin(); |
1482 | for (; RIt != Orders.rend(); RIt++) |
1483 | LLVM_DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << "," ); |
1484 | LLVM_DEBUG(dbgs() << "\n" ); |
1485 | #endif |
1486 | } |
1487 | |
1488 | /// Try to hoist spills according to BB hotness. The spills to removed will |
1489 | /// be saved in \p SpillsToRm. The spills to be inserted will be saved in |
1490 | /// \p SpillsToIns. |
1491 | void HoistSpillHelper::runHoistSpills( |
1492 | LiveInterval &OrigLI, VNInfo &OrigVNI, |
1493 | SmallPtrSet<MachineInstr *, 16> &Spills, |
1494 | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
1495 | DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) { |
1496 | // Visit order of dominator tree nodes. |
1497 | SmallVector<MachineDomTreeNode *, 32> Orders; |
1498 | // SpillsToKeep contains all the nodes where spills are to be inserted |
1499 | // during hoisting. If the spill to be inserted is an original spill |
1500 | // (not a hoisted one), the value of the map entry is 0. If the spill |
1501 | // is a hoisted spill, the value of the map entry is the VReg to be used |
1502 | // as the source of the spill. |
1503 | DenseMap<MachineDomTreeNode *, unsigned> SpillsToKeep; |
1504 | // Map from BB to the first spill inside of it. |
1505 | DenseMap<MachineDomTreeNode *, MachineInstr *> SpillBBToSpill; |
1506 | |
1507 | rmRedundantSpills(Spills, SpillsToRm, SpillBBToSpill); |
1508 | |
1509 | MachineBasicBlock *Root = LIS.getMBBFromIndex(index: OrigVNI.def); |
1510 | getVisitOrders(Root, Spills, Orders, SpillsToRm, SpillsToKeep, |
1511 | SpillBBToSpill); |
1512 | |
1513 | // SpillsInSubTreeMap keeps the map from a dom tree node to a pair of |
1514 | // nodes set and the cost of all the spills inside those nodes. |
1515 | // The nodes set are the locations where spills are to be inserted |
1516 | // in the subtree of current node. |
1517 | using NodesCostPair = |
1518 | std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>; |
1519 | DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap; |
1520 | |
1521 | // Iterate Orders set in reverse order, which will be a bottom-up order |
1522 | // in the dominator tree. Once we visit a dom tree node, we know its |
1523 | // children have already been visited and the spill locations in the |
1524 | // subtrees of all the children have been determined. |
1525 | SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin(); |
1526 | for (; RIt != Orders.rend(); RIt++) { |
1527 | MachineBasicBlock *Block = (*RIt)->getBlock(); |
1528 | |
1529 | // If Block contains an original spill, simply continue. |
1530 | if (SpillsToKeep.contains(Val: *RIt) && !SpillsToKeep[*RIt]) { |
1531 | SpillsInSubTreeMap[*RIt].first.insert(Ptr: *RIt); |
1532 | // SpillsInSubTreeMap[*RIt].second contains the cost of spill. |
1533 | SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(MBB: Block); |
1534 | continue; |
1535 | } |
1536 | |
1537 | // Collect spills in subtree of current node (*RIt) to |
1538 | // SpillsInSubTreeMap[*RIt].first. |
1539 | for (MachineDomTreeNode *Child : (*RIt)->children()) { |
1540 | if (!SpillsInSubTreeMap.contains(Val: Child)) |
1541 | continue; |
1542 | // The stmt "SpillsInSubTree = SpillsInSubTreeMap[*RIt].first" below |
1543 | // should be placed before getting the begin and end iterators of |
1544 | // SpillsInSubTreeMap[Child].first, or else the iterators may be |
1545 | // invalidated when SpillsInSubTreeMap[*RIt] is seen the first time |
1546 | // and the map grows and then the original buckets in the map are moved. |
1547 | SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree = |
1548 | SpillsInSubTreeMap[*RIt].first; |
1549 | BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second; |
1550 | SubTreeCost += SpillsInSubTreeMap[Child].second; |
1551 | auto BI = SpillsInSubTreeMap[Child].first.begin(); |
1552 | auto EI = SpillsInSubTreeMap[Child].first.end(); |
1553 | SpillsInSubTree.insert(I: BI, E: EI); |
1554 | SpillsInSubTreeMap.erase(Val: Child); |
1555 | } |
1556 | |
1557 | SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree = |
1558 | SpillsInSubTreeMap[*RIt].first; |
1559 | BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second; |
1560 | // No spills in subtree, simply continue. |
1561 | if (SpillsInSubTree.empty()) |
1562 | continue; |
1563 | |
1564 | // Check whether Block is a possible candidate to insert spill. |
1565 | Register LiveReg; |
1566 | if (!isSpillCandBB(OrigLI, OrigVNI, BB&: *Block, LiveReg)) |
1567 | continue; |
1568 | |
1569 | // If there are multiple spills that could be merged, bias a little |
1570 | // to hoist the spill. |
1571 | BranchProbability MarginProb = (SpillsInSubTree.size() > 1) |
1572 | ? BranchProbability(9, 10) |
1573 | : BranchProbability(1, 1); |
1574 | if (SubTreeCost > MBFI.getBlockFreq(MBB: Block) * MarginProb) { |
1575 | // Hoist: Move spills to current Block. |
1576 | for (auto *const SpillBB : SpillsInSubTree) { |
1577 | // When SpillBB is a BB contains original spill, insert the spill |
1578 | // to SpillsToRm. |
1579 | if (SpillsToKeep.contains(Val: SpillBB) && !SpillsToKeep[SpillBB]) { |
1580 | MachineInstr *SpillToRm = SpillBBToSpill[SpillBB]; |
1581 | SpillsToRm.push_back(Elt: SpillToRm); |
1582 | } |
1583 | // SpillBB will not contain spill anymore, remove it from SpillsToKeep. |
1584 | SpillsToKeep.erase(Val: SpillBB); |
1585 | } |
1586 | // Current Block is the BB containing the new hoisted spill. Add it to |
1587 | // SpillsToKeep. LiveReg is the source of the new spill. |
1588 | SpillsToKeep[*RIt] = LiveReg; |
1589 | LLVM_DEBUG({ |
1590 | dbgs() << "spills in BB: " ; |
1591 | for (const auto Rspill : SpillsInSubTree) |
1592 | dbgs() << Rspill->getBlock()->getNumber() << " " ; |
1593 | dbgs() << "were promoted to BB" << (*RIt)->getBlock()->getNumber() |
1594 | << "\n" ; |
1595 | }); |
1596 | SpillsInSubTree.clear(); |
1597 | SpillsInSubTree.insert(Ptr: *RIt); |
1598 | SubTreeCost = MBFI.getBlockFreq(MBB: Block); |
1599 | } |
1600 | } |
1601 | // For spills in SpillsToKeep with LiveReg set (i.e., not original spill), |
1602 | // save them to SpillsToIns. |
1603 | for (const auto &Ent : SpillsToKeep) { |
1604 | if (Ent.second) |
1605 | SpillsToIns[Ent.first->getBlock()] = Ent.second; |
1606 | } |
1607 | } |
1608 | |
1609 | /// For spills with equal values, remove redundant spills and hoist those left |
1610 | /// to less hot spots. |
1611 | /// |
1612 | /// Spills with equal values will be collected into the same set in |
1613 | /// MergeableSpills when spill is inserted. These equal spills are originated |
1614 | /// from the same defining instruction and are dominated by the instruction. |
1615 | /// Before hoisting all the equal spills, redundant spills inside in the same |
1616 | /// BB are first marked to be deleted. Then starting from the spills left, walk |
1617 | /// up on the dominator tree towards the Root node where the define instruction |
1618 | /// is located, mark the dominated spills to be deleted along the way and |
1619 | /// collect the BB nodes on the path from non-dominated spills to the define |
1620 | /// instruction into a WorkSet. The nodes in WorkSet are the candidate places |
1621 | /// where we are considering to hoist the spills. We iterate the WorkSet in |
1622 | /// bottom-up order, and for each node, we will decide whether to hoist spills |
1623 | /// inside its subtree to that node. In this way, we can get benefit locally |
1624 | /// even if hoisting all the equal spills to one cold place is impossible. |
1625 | void HoistSpillHelper::hoistAllSpills() { |
1626 | SmallVector<Register, 4> NewVRegs; |
1627 | LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this); |
1628 | |
1629 | for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { |
1630 | Register Reg = Register::index2VirtReg(Index: i); |
1631 | Register Original = VRM.getPreSplitReg(virtReg: Reg); |
1632 | if (!MRI.def_empty(RegNo: Reg)) |
1633 | Virt2SiblingsMap[Original].insert(X: Reg); |
1634 | } |
1635 | |
1636 | // Each entry in MergeableSpills contains a spill set with equal values. |
1637 | for (auto &Ent : MergeableSpills) { |
1638 | int Slot = Ent.first.first; |
1639 | LiveInterval &OrigLI = *StackSlotToOrigLI[Slot]; |
1640 | VNInfo *OrigVNI = Ent.first.second; |
1641 | SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second; |
1642 | if (Ent.second.empty()) |
1643 | continue; |
1644 | |
1645 | LLVM_DEBUG({ |
1646 | dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n" |
1647 | << "Equal spills in BB: " ; |
1648 | for (const auto spill : EqValSpills) |
1649 | dbgs() << spill->getParent()->getNumber() << " " ; |
1650 | dbgs() << "\n" ; |
1651 | }); |
1652 | |
1653 | // SpillsToRm is the spill set to be removed from EqValSpills. |
1654 | SmallVector<MachineInstr *, 16> SpillsToRm; |
1655 | // SpillsToIns is the spill set to be newly inserted after hoisting. |
1656 | DenseMap<MachineBasicBlock *, unsigned> SpillsToIns; |
1657 | |
1658 | runHoistSpills(OrigLI, OrigVNI&: *OrigVNI, Spills&: EqValSpills, SpillsToRm, SpillsToIns); |
1659 | |
1660 | LLVM_DEBUG({ |
1661 | dbgs() << "Finally inserted spills in BB: " ; |
1662 | for (const auto &Ispill : SpillsToIns) |
1663 | dbgs() << Ispill.first->getNumber() << " " ; |
1664 | dbgs() << "\nFinally removed spills in BB: " ; |
1665 | for (const auto Rspill : SpillsToRm) |
1666 | dbgs() << Rspill->getParent()->getNumber() << " " ; |
1667 | dbgs() << "\n" ; |
1668 | }); |
1669 | |
1670 | // Stack live range update. |
1671 | LiveInterval &StackIntvl = LSS.getInterval(Slot); |
1672 | if (!SpillsToIns.empty() || !SpillsToRm.empty()) |
1673 | StackIntvl.MergeValueInAsValue(RHS: OrigLI, RHSValNo: OrigVNI, |
1674 | LHSValNo: StackIntvl.getValNumInfo(ValNo: 0)); |
1675 | |
1676 | // Insert hoisted spills. |
1677 | for (auto const &Insert : SpillsToIns) { |
1678 | MachineBasicBlock *BB = Insert.first; |
1679 | Register LiveReg = Insert.second; |
1680 | MachineBasicBlock::iterator MII = IPA.getLastInsertPointIter(CurLI: OrigLI, MBB&: *BB); |
1681 | MachineInstrSpan MIS(MII, BB); |
1682 | TII.storeRegToStackSlot(MBB&: *BB, MI: MII, SrcReg: LiveReg, isKill: false, FrameIndex: Slot, |
1683 | RC: MRI.getRegClass(Reg: LiveReg), TRI: &TRI, VReg: Register()); |
1684 | LIS.InsertMachineInstrRangeInMaps(B: MIS.begin(), E: MII); |
1685 | for (const MachineInstr &MI : make_range(x: MIS.begin(), y: MII)) |
1686 | getVDefInterval(MI, LIS); |
1687 | ++NumSpills; |
1688 | } |
1689 | |
1690 | // Remove redundant spills or change them to dead instructions. |
1691 | NumSpills -= SpillsToRm.size(); |
1692 | for (auto *const RMEnt : SpillsToRm) { |
1693 | RMEnt->setDesc(TII.get(Opcode: TargetOpcode::KILL)); |
1694 | for (unsigned i = RMEnt->getNumOperands(); i; --i) { |
1695 | MachineOperand &MO = RMEnt->getOperand(i: i - 1); |
1696 | if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead()) |
1697 | RMEnt->removeOperand(OpNo: i - 1); |
1698 | } |
1699 | } |
1700 | Edit.eliminateDeadDefs(Dead&: SpillsToRm, RegsBeingSpilled: std::nullopt); |
1701 | } |
1702 | } |
1703 | |
1704 | /// For VirtReg clone, the \p New register should have the same physreg or |
1705 | /// stackslot as the \p old register. |
1706 | void HoistSpillHelper::LRE_DidCloneVirtReg(Register New, Register Old) { |
1707 | if (VRM.hasPhys(virtReg: Old)) |
1708 | VRM.assignVirt2Phys(virtReg: New, physReg: VRM.getPhys(virtReg: Old)); |
1709 | else if (VRM.getStackSlot(virtReg: Old) != VirtRegMap::NO_STACK_SLOT) |
1710 | VRM.assignVirt2StackSlot(virtReg: New, SS: VRM.getStackSlot(virtReg: Old)); |
1711 | else |
1712 | llvm_unreachable("VReg should be assigned either physreg or stackslot" ); |
1713 | if (VRM.hasShape(virtReg: Old)) |
1714 | VRM.assignVirt2Shape(virtReg: New, shape: VRM.getShape(virtReg: Old)); |
1715 | } |
1716 | |