1 | //===- InlineSpiller.cpp - Insert spills and restores inline --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // The inline spiller modifies the machine function directly instead of |
10 | // inserting spills and restores in VirtRegMap. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "AllocationOrder.h" |
15 | #include "SplitKit.h" |
16 | #include "llvm/ADT/ArrayRef.h" |
17 | #include "llvm/ADT/DenseMap.h" |
18 | #include "llvm/ADT/MapVector.h" |
19 | #include "llvm/ADT/STLExtras.h" |
20 | #include "llvm/ADT/SetVector.h" |
21 | #include "llvm/ADT/SmallPtrSet.h" |
22 | #include "llvm/ADT/SmallVector.h" |
23 | #include "llvm/ADT/Statistic.h" |
24 | #include "llvm/CodeGen/LiveInterval.h" |
25 | #include "llvm/CodeGen/LiveIntervals.h" |
26 | #include "llvm/CodeGen/LiveRangeEdit.h" |
27 | #include "llvm/CodeGen/LiveRegMatrix.h" |
28 | #include "llvm/CodeGen/LiveStacks.h" |
29 | #include "llvm/CodeGen/MachineBasicBlock.h" |
30 | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
31 | #include "llvm/CodeGen/MachineDominators.h" |
32 | #include "llvm/CodeGen/MachineFunction.h" |
33 | #include "llvm/CodeGen/MachineInstr.h" |
34 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
35 | #include "llvm/CodeGen/MachineInstrBundle.h" |
36 | #include "llvm/CodeGen/MachineOperand.h" |
37 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
38 | #include "llvm/CodeGen/SlotIndexes.h" |
39 | #include "llvm/CodeGen/Spiller.h" |
40 | #include "llvm/CodeGen/StackMaps.h" |
41 | #include "llvm/CodeGen/TargetInstrInfo.h" |
42 | #include "llvm/CodeGen/TargetOpcodes.h" |
43 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
44 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
45 | #include "llvm/CodeGen/VirtRegMap.h" |
46 | #include "llvm/Config/llvm-config.h" |
47 | #include "llvm/Support/BlockFrequency.h" |
48 | #include "llvm/Support/BranchProbability.h" |
49 | #include "llvm/Support/CommandLine.h" |
50 | #include "llvm/Support/Compiler.h" |
51 | #include "llvm/Support/Debug.h" |
52 | #include "llvm/Support/ErrorHandling.h" |
53 | #include "llvm/Support/raw_ostream.h" |
54 | #include <cassert> |
55 | #include <iterator> |
56 | #include <tuple> |
57 | #include <utility> |
58 | |
59 | using namespace llvm; |
60 | |
61 | #define DEBUG_TYPE "regalloc" |
62 | |
63 | STATISTIC(NumSpilledRanges, "Number of spilled live ranges" ); |
64 | STATISTIC(NumSnippets, "Number of spilled snippets" ); |
65 | STATISTIC(NumSpills, "Number of spills inserted" ); |
66 | STATISTIC(NumSpillsRemoved, "Number of spills removed" ); |
67 | STATISTIC(NumReloads, "Number of reloads inserted" ); |
68 | STATISTIC(NumReloadsRemoved, "Number of reloads removed" ); |
69 | STATISTIC(NumFolded, "Number of folded stack accesses" ); |
70 | STATISTIC(NumFoldedLoads, "Number of folded loads" ); |
71 | STATISTIC(NumRemats, "Number of rematerialized defs for spilling" ); |
72 | |
73 | static cl::opt<bool> |
74 | RestrictStatepointRemat("restrict-statepoint-remat" , |
75 | cl::init(Val: false), cl::Hidden, |
76 | cl::desc("Restrict remat for statepoint operands" )); |
77 | |
78 | namespace { |
79 | class HoistSpillHelper : private LiveRangeEdit::Delegate { |
80 | MachineFunction &MF; |
81 | LiveIntervals &LIS; |
82 | LiveStacks &LSS; |
83 | MachineDominatorTree &MDT; |
84 | VirtRegMap &VRM; |
85 | MachineRegisterInfo &MRI; |
86 | const TargetInstrInfo &TII; |
87 | const TargetRegisterInfo &TRI; |
88 | const MachineBlockFrequencyInfo &MBFI; |
89 | |
90 | InsertPointAnalysis IPA; |
91 | |
92 | // Map from StackSlot to the LiveInterval of the original register. |
93 | // Note the LiveInterval of the original register may have been deleted |
94 | // after it is spilled. We keep a copy here to track the range where |
95 | // spills can be moved. |
96 | DenseMap<int, std::unique_ptr<LiveInterval>> StackSlotToOrigLI; |
97 | |
98 | // Map from pair of (StackSlot and Original VNI) to a set of spills which |
99 | // have the same stackslot and have equal values defined by Original VNI. |
100 | // These spills are mergeable and are hoist candidates. |
101 | using MergeableSpillsMap = |
102 | MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>; |
103 | MergeableSpillsMap MergeableSpills; |
104 | |
105 | /// This is the map from original register to a set containing all its |
106 | /// siblings. To hoist a spill to another BB, we need to find out a live |
107 | /// sibling there and use it as the source of the new spill. |
108 | DenseMap<Register, SmallSetVector<Register, 16>> Virt2SiblingsMap; |
109 | |
110 | bool isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, |
111 | MachineBasicBlock &BB, Register &LiveReg); |
112 | |
113 | void rmRedundantSpills( |
114 | SmallPtrSet<MachineInstr *, 16> &Spills, |
115 | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
116 | DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill); |
117 | |
118 | void getVisitOrders( |
119 | MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills, |
120 | SmallVectorImpl<MachineDomTreeNode *> &Orders, |
121 | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
122 | DenseMap<MachineDomTreeNode *, Register> &SpillsToKeep, |
123 | DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill); |
124 | |
125 | void runHoistSpills(LiveInterval &OrigLI, VNInfo &OrigVNI, |
126 | SmallPtrSet<MachineInstr *, 16> &Spills, |
127 | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
128 | DenseMap<MachineBasicBlock *, Register> &SpillsToIns); |
129 | |
130 | public: |
131 | HoistSpillHelper(const Spiller::RequiredAnalyses &Analyses, |
132 | MachineFunction &mf, VirtRegMap &vrm) |
133 | : MF(mf), LIS(Analyses.LIS), LSS(Analyses.LSS), MDT(Analyses.MDT), |
134 | VRM(vrm), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), |
135 | TRI(*mf.getSubtarget().getRegisterInfo()), MBFI(Analyses.MBFI), |
136 | IPA(LIS, mf.getNumBlockIDs()) {} |
137 | |
138 | void addToMergeableSpills(MachineInstr &Spill, int StackSlot, |
139 | Register Original); |
140 | bool rmFromMergeableSpills(MachineInstr &Spill, int StackSlot); |
141 | void hoistAllSpills(); |
142 | void LRE_DidCloneVirtReg(Register, Register) override; |
143 | }; |
144 | |
145 | class InlineSpiller : public Spiller { |
146 | MachineFunction &MF; |
147 | LiveIntervals &LIS; |
148 | LiveStacks &LSS; |
149 | VirtRegMap &VRM; |
150 | MachineRegisterInfo &MRI; |
151 | const TargetInstrInfo &TII; |
152 | const TargetRegisterInfo &TRI; |
153 | LiveRegMatrix *Matrix = nullptr; |
154 | |
155 | // Variables that are valid during spill(), but used by multiple methods. |
156 | LiveRangeEdit *Edit = nullptr; |
157 | LiveInterval *StackInt = nullptr; |
158 | int StackSlot; |
159 | Register Original; |
160 | AllocationOrder *Order = nullptr; |
161 | |
162 | // All registers to spill to StackSlot, including the main register. |
163 | SmallVector<Register, 8> RegsToSpill; |
164 | |
165 | // All registers that were replaced by the spiller through some other method, |
166 | // e.g. rematerialization. |
167 | SmallVector<Register, 8> RegsReplaced; |
168 | |
169 | // All COPY instructions to/from snippets. |
170 | // They are ignored since both operands refer to the same stack slot. |
171 | // For bundled copies, this will only include the first header copy. |
172 | SmallPtrSet<MachineInstr*, 8> SnippetCopies; |
173 | |
174 | // Values that failed to remat at some point. |
175 | SmallPtrSet<VNInfo*, 8> UsedValues; |
176 | |
177 | // Dead defs generated during spilling. |
178 | SmallVector<MachineInstr*, 8> DeadDefs; |
179 | |
180 | // Object records spills information and does the hoisting. |
181 | HoistSpillHelper HSpiller; |
182 | |
183 | // Live range weight calculator. |
184 | VirtRegAuxInfo &VRAI; |
185 | |
186 | ~InlineSpiller() override = default; |
187 | |
188 | public: |
189 | InlineSpiller(const Spiller::RequiredAnalyses &Analyses, MachineFunction &MF, |
190 | VirtRegMap &VRM, VirtRegAuxInfo &VRAI, LiveRegMatrix *Matrix) |
191 | : MF(MF), LIS(Analyses.LIS), LSS(Analyses.LSS), VRM(VRM), |
192 | MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()), |
193 | TRI(*MF.getSubtarget().getRegisterInfo()), Matrix(Matrix), |
194 | HSpiller(Analyses, MF, VRM), VRAI(VRAI) {} |
195 | |
196 | void spill(LiveRangeEdit &, AllocationOrder *Order = nullptr) override; |
197 | ArrayRef<Register> getSpilledRegs() override { return RegsToSpill; } |
198 | ArrayRef<Register> getReplacedRegs() override { return RegsReplaced; } |
199 | void postOptimization() override; |
200 | |
201 | private: |
202 | bool isSnippet(const LiveInterval &SnipLI); |
203 | void collectRegsToSpill(); |
204 | |
205 | bool isRegToSpill(Register Reg) { return is_contained(Range&: RegsToSpill, Element: Reg); } |
206 | |
207 | bool isSibling(Register Reg); |
208 | bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI); |
209 | void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); |
210 | |
211 | void markValueUsed(LiveInterval*, VNInfo*); |
212 | bool canGuaranteeAssignmentAfterRemat(Register VReg, MachineInstr &MI); |
213 | bool hasPhysRegAvailable(const MachineInstr &MI); |
214 | bool reMaterializeFor(LiveInterval &, MachineInstr &MI); |
215 | void reMaterializeAll(); |
216 | |
217 | bool coalesceStackAccess(MachineInstr *MI, Register Reg); |
218 | bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>>, |
219 | MachineInstr *LoadMI = nullptr); |
220 | void insertReload(Register VReg, SlotIndex, MachineBasicBlock::iterator MI); |
221 | void insertSpill(Register VReg, bool isKill, MachineBasicBlock::iterator MI); |
222 | |
223 | void spillAroundUses(Register Reg); |
224 | void spillAll(); |
225 | }; |
226 | |
227 | } // end anonymous namespace |
228 | |
229 | Spiller::~Spiller() = default; |
230 | |
231 | void Spiller::anchor() {} |
232 | |
233 | Spiller * |
234 | llvm::createInlineSpiller(const InlineSpiller::RequiredAnalyses &Analyses, |
235 | MachineFunction &MF, VirtRegMap &VRM, |
236 | VirtRegAuxInfo &VRAI, LiveRegMatrix *Matrix) { |
237 | return new InlineSpiller(Analyses, MF, VRM, VRAI, Matrix); |
238 | } |
239 | |
240 | //===----------------------------------------------------------------------===// |
241 | // Snippets |
242 | //===----------------------------------------------------------------------===// |
243 | |
244 | // When spilling a virtual register, we also spill any snippets it is connected |
245 | // to. The snippets are small live ranges that only have a single real use, |
246 | // leftovers from live range splitting. Spilling them enables memory operand |
247 | // folding or tightens the live range around the single use. |
248 | // |
249 | // This minimizes register pressure and maximizes the store-to-load distance for |
250 | // spill slots which can be important in tight loops. |
251 | |
252 | /// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, |
253 | /// otherwise return 0. |
254 | static Register isCopyOf(const MachineInstr &MI, Register Reg, |
255 | const TargetInstrInfo &TII) { |
256 | if (!TII.isCopyInstr(MI)) |
257 | return Register(); |
258 | |
259 | const MachineOperand &DstOp = MI.getOperand(i: 0); |
260 | const MachineOperand &SrcOp = MI.getOperand(i: 1); |
261 | |
262 | // TODO: Probably only worth allowing subreg copies with undef dests. |
263 | if (DstOp.getSubReg() != SrcOp.getSubReg()) |
264 | return Register(); |
265 | if (DstOp.getReg() == Reg) |
266 | return SrcOp.getReg(); |
267 | if (SrcOp.getReg() == Reg) |
268 | return DstOp.getReg(); |
269 | return Register(); |
270 | } |
271 | |
272 | /// Check for a copy bundle as formed by SplitKit. |
273 | static Register isCopyOfBundle(const MachineInstr &FirstMI, Register Reg, |
274 | const TargetInstrInfo &TII) { |
275 | if (!FirstMI.isBundled()) |
276 | return isCopyOf(MI: FirstMI, Reg, TII); |
277 | |
278 | assert(!FirstMI.isBundledWithPred() && FirstMI.isBundledWithSucc() && |
279 | "expected to see first instruction in bundle" ); |
280 | |
281 | Register SnipReg; |
282 | MachineBasicBlock::const_instr_iterator I = FirstMI.getIterator(); |
283 | while (I->isBundledWithSucc()) { |
284 | const MachineInstr &MI = *I; |
285 | auto CopyInst = TII.isCopyInstr(MI); |
286 | if (!CopyInst) |
287 | return Register(); |
288 | |
289 | const MachineOperand &DstOp = *CopyInst->Destination; |
290 | const MachineOperand &SrcOp = *CopyInst->Source; |
291 | if (DstOp.getReg() == Reg) { |
292 | if (!SnipReg) |
293 | SnipReg = SrcOp.getReg(); |
294 | else if (SnipReg != SrcOp.getReg()) |
295 | return Register(); |
296 | } else if (SrcOp.getReg() == Reg) { |
297 | if (!SnipReg) |
298 | SnipReg = DstOp.getReg(); |
299 | else if (SnipReg != DstOp.getReg()) |
300 | return Register(); |
301 | } |
302 | |
303 | ++I; |
304 | } |
305 | |
306 | return Register(); |
307 | } |
308 | |
309 | static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) { |
310 | for (const MachineOperand &MO : MI.all_defs()) |
311 | if (MO.getReg().isVirtual()) |
312 | LIS.getInterval(Reg: MO.getReg()); |
313 | } |
314 | |
315 | /// isSnippet - Identify if a live interval is a snippet that should be spilled. |
316 | /// It is assumed that SnipLI is a virtual register with the same original as |
317 | /// Edit->getReg(). |
318 | bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { |
319 | Register Reg = Edit->getReg(); |
320 | |
321 | // A snippet is a tiny live range with only a single instruction using it |
322 | // besides copies to/from Reg or spills/fills. |
323 | // Exception is done for statepoint instructions which will fold fills |
324 | // into their operands. |
325 | // We accept: |
326 | // |
327 | // %snip = COPY %Reg / FILL fi# |
328 | // %snip = USE %snip |
329 | // %snip = STATEPOINT %snip in var arg area |
330 | // %Reg = COPY %snip / SPILL %snip, fi# |
331 | // |
332 | if (!LIS.intervalIsInOneMBB(LI: SnipLI)) |
333 | return false; |
334 | |
335 | // Number of defs should not exceed 2 not accounting defs coming from |
336 | // statepoint instructions. |
337 | unsigned NumValNums = SnipLI.getNumValNums(); |
338 | for (auto *VNI : SnipLI.vnis()) { |
339 | MachineInstr *MI = LIS.getInstructionFromIndex(index: VNI->def); |
340 | if (MI->getOpcode() == TargetOpcode::STATEPOINT) |
341 | --NumValNums; |
342 | } |
343 | if (NumValNums > 2) |
344 | return false; |
345 | |
346 | MachineInstr *UseMI = nullptr; |
347 | |
348 | // Check that all uses satisfy our criteria. |
349 | for (MachineRegisterInfo::reg_bundle_nodbg_iterator |
350 | RI = MRI.reg_bundle_nodbg_begin(RegNo: SnipLI.reg()), |
351 | E = MRI.reg_bundle_nodbg_end(); |
352 | RI != E;) { |
353 | MachineInstr &MI = *RI++; |
354 | |
355 | // Allow copies to/from Reg. |
356 | if (isCopyOfBundle(FirstMI: MI, Reg, TII)) |
357 | continue; |
358 | |
359 | // Allow stack slot loads. |
360 | int FI; |
361 | if (SnipLI.reg() == TII.isLoadFromStackSlot(MI, FrameIndex&: FI) && FI == StackSlot) |
362 | continue; |
363 | |
364 | // Allow stack slot stores. |
365 | if (SnipLI.reg() == TII.isStoreToStackSlot(MI, FrameIndex&: FI) && FI == StackSlot) |
366 | continue; |
367 | |
368 | if (StatepointOpers::isFoldableReg(MI: &MI, Reg: SnipLI.reg())) |
369 | continue; |
370 | |
371 | // Allow a single additional instruction. |
372 | if (UseMI && &MI != UseMI) |
373 | return false; |
374 | UseMI = &MI; |
375 | } |
376 | return true; |
377 | } |
378 | |
379 | /// collectRegsToSpill - Collect live range snippets that only have a single |
380 | /// real use. |
381 | void InlineSpiller::collectRegsToSpill() { |
382 | Register Reg = Edit->getReg(); |
383 | |
384 | // Main register always spills. |
385 | RegsToSpill.assign(NumElts: 1, Elt: Reg); |
386 | SnippetCopies.clear(); |
387 | RegsReplaced.clear(); |
388 | |
389 | // Snippets all have the same original, so there can't be any for an original |
390 | // register. |
391 | if (Original == Reg) |
392 | return; |
393 | |
394 | for (MachineInstr &MI : llvm::make_early_inc_range(Range: MRI.reg_bundles(Reg))) { |
395 | Register SnipReg = isCopyOfBundle(FirstMI: MI, Reg, TII); |
396 | if (!isSibling(Reg: SnipReg)) |
397 | continue; |
398 | LiveInterval &SnipLI = LIS.getInterval(Reg: SnipReg); |
399 | if (!isSnippet(SnipLI)) |
400 | continue; |
401 | SnippetCopies.insert(Ptr: &MI); |
402 | if (isRegToSpill(Reg: SnipReg)) |
403 | continue; |
404 | RegsToSpill.push_back(Elt: SnipReg); |
405 | LLVM_DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n'); |
406 | ++NumSnippets; |
407 | } |
408 | } |
409 | |
410 | bool InlineSpiller::isSibling(Register Reg) { |
411 | return Reg.isVirtual() && VRM.getOriginal(VirtReg: Reg) == Original; |
412 | } |
413 | |
414 | /// It is beneficial to spill to earlier place in the same BB in case |
415 | /// as follows: |
416 | /// There is an alternative def earlier in the same MBB. |
417 | /// Hoist the spill as far as possible in SpillMBB. This can ease |
418 | /// register pressure: |
419 | /// |
420 | /// x = def |
421 | /// y = use x |
422 | /// s = copy x |
423 | /// |
424 | /// Hoisting the spill of s to immediately after the def removes the |
425 | /// interference between x and y: |
426 | /// |
427 | /// x = def |
428 | /// spill x |
429 | /// y = use killed x |
430 | /// |
431 | /// This hoist only helps when the copy kills its source. |
432 | /// |
433 | bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, |
434 | MachineInstr &CopyMI) { |
435 | SlotIndex Idx = LIS.getInstructionIndex(Instr: CopyMI); |
436 | #ifndef NDEBUG |
437 | VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot()); |
438 | assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy" ); |
439 | #endif |
440 | |
441 | Register SrcReg = CopyMI.getOperand(i: 1).getReg(); |
442 | LiveInterval &SrcLI = LIS.getInterval(Reg: SrcReg); |
443 | VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx); |
444 | LiveQueryResult SrcQ = SrcLI.Query(Idx); |
445 | MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(index: SrcVNI->def); |
446 | if (DefMBB != CopyMI.getParent() || !SrcQ.isKill()) |
447 | return false; |
448 | |
449 | // Conservatively extend the stack slot range to the range of the original |
450 | // value. We may be able to do better with stack slot coloring by being more |
451 | // careful here. |
452 | assert(StackInt && "No stack slot assigned yet." ); |
453 | LiveInterval &OrigLI = LIS.getInterval(Reg: Original); |
454 | VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); |
455 | StackInt->MergeValueInAsValue(RHS: OrigLI, RHSValNo: OrigVNI, LHSValNo: StackInt->getValNumInfo(ValNo: 0)); |
456 | LLVM_DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " |
457 | << *StackInt << '\n'); |
458 | |
459 | // We are going to spill SrcVNI immediately after its def, so clear out |
460 | // any later spills of the same value. |
461 | eliminateRedundantSpills(LI&: SrcLI, VNI: SrcVNI); |
462 | |
463 | MachineBasicBlock *MBB = LIS.getMBBFromIndex(index: SrcVNI->def); |
464 | MachineBasicBlock::iterator MII; |
465 | if (SrcVNI->isPHIDef()) |
466 | MII = MBB->SkipPHIsLabelsAndDebug(I: MBB->begin(), Reg: SrcReg); |
467 | else { |
468 | MachineInstr *DefMI = LIS.getInstructionFromIndex(index: SrcVNI->def); |
469 | assert(DefMI && "Defining instruction disappeared" ); |
470 | MII = DefMI; |
471 | ++MII; |
472 | } |
473 | MachineInstrSpan MIS(MII, MBB); |
474 | // Insert spill without kill flag immediately after def. |
475 | TII.storeRegToStackSlot(MBB&: *MBB, MI: MII, SrcReg, isKill: false, FrameIndex: StackSlot, |
476 | RC: MRI.getRegClass(Reg: SrcReg), TRI: &TRI, VReg: Register()); |
477 | LIS.InsertMachineInstrRangeInMaps(B: MIS.begin(), E: MII); |
478 | for (const MachineInstr &MI : make_range(x: MIS.begin(), y: MII)) |
479 | getVDefInterval(MI, LIS); |
480 | --MII; // Point to store instruction. |
481 | LLVM_DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII); |
482 | |
483 | // If there is only 1 store instruction is required for spill, add it |
484 | // to mergeable list. In X86 AMX, 2 intructions are required to store. |
485 | // We disable the merge for this case. |
486 | if (MIS.begin() == MII) |
487 | HSpiller.addToMergeableSpills(Spill&: *MII, StackSlot, Original); |
488 | ++NumSpills; |
489 | return true; |
490 | } |
491 | |
492 | /// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any |
493 | /// redundant spills of this value in SLI.reg and sibling copies. |
494 | void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { |
495 | assert(VNI && "Missing value" ); |
496 | SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; |
497 | WorkList.push_back(Elt: std::make_pair(x: &SLI, y&: VNI)); |
498 | assert(StackInt && "No stack slot assigned yet." ); |
499 | |
500 | do { |
501 | LiveInterval *LI; |
502 | std::tie(args&: LI, args&: VNI) = WorkList.pop_back_val(); |
503 | Register Reg = LI->reg(); |
504 | LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@' |
505 | << VNI->def << " in " << *LI << '\n'); |
506 | |
507 | // Regs to spill are taken care of. |
508 | if (isRegToSpill(Reg)) |
509 | continue; |
510 | |
511 | // Add all of VNI's live range to StackInt. |
512 | StackInt->MergeValueInAsValue(RHS: *LI, RHSValNo: VNI, LHSValNo: StackInt->getValNumInfo(ValNo: 0)); |
513 | LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); |
514 | |
515 | // Find all spills and copies of VNI. |
516 | for (MachineInstr &MI : |
517 | llvm::make_early_inc_range(Range: MRI.use_nodbg_bundles(Reg))) { |
518 | if (!MI.mayStore() && !TII.isCopyInstr(MI)) |
519 | continue; |
520 | SlotIndex Idx = LIS.getInstructionIndex(Instr: MI); |
521 | if (LI->getVNInfoAt(Idx) != VNI) |
522 | continue; |
523 | |
524 | // Follow sibling copies down the dominator tree. |
525 | if (Register DstReg = isCopyOfBundle(FirstMI: MI, Reg, TII)) { |
526 | if (isSibling(Reg: DstReg)) { |
527 | LiveInterval &DstLI = LIS.getInterval(Reg: DstReg); |
528 | VNInfo *DstVNI = DstLI.getVNInfoAt(Idx: Idx.getRegSlot()); |
529 | assert(DstVNI && "Missing defined value" ); |
530 | assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot" ); |
531 | |
532 | WorkList.push_back(Elt: std::make_pair(x: &DstLI, y&: DstVNI)); |
533 | } |
534 | continue; |
535 | } |
536 | |
537 | // Erase spills. |
538 | int FI; |
539 | if (Reg == TII.isStoreToStackSlot(MI, FrameIndex&: FI) && FI == StackSlot) { |
540 | LLVM_DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI); |
541 | // eliminateDeadDefs won't normally remove stores, so switch opcode. |
542 | MI.setDesc(TII.get(Opcode: TargetOpcode::KILL)); |
543 | DeadDefs.push_back(Elt: &MI); |
544 | ++NumSpillsRemoved; |
545 | if (HSpiller.rmFromMergeableSpills(Spill&: MI, StackSlot)) |
546 | --NumSpills; |
547 | } |
548 | } |
549 | } while (!WorkList.empty()); |
550 | } |
551 | |
552 | //===----------------------------------------------------------------------===// |
553 | // Rematerialization |
554 | //===----------------------------------------------------------------------===// |
555 | |
556 | /// markValueUsed - Remember that VNI failed to rematerialize, so its defining |
557 | /// instruction cannot be eliminated. See through snippet copies |
558 | void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { |
559 | SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; |
560 | WorkList.push_back(Elt: std::make_pair(x&: LI, y&: VNI)); |
561 | do { |
562 | std::tie(args&: LI, args&: VNI) = WorkList.pop_back_val(); |
563 | if (!UsedValues.insert(Ptr: VNI).second) |
564 | continue; |
565 | |
566 | if (VNI->isPHIDef()) { |
567 | MachineBasicBlock *MBB = LIS.getMBBFromIndex(index: VNI->def); |
568 | for (MachineBasicBlock *P : MBB->predecessors()) { |
569 | VNInfo *PVNI = LI->getVNInfoBefore(Idx: LIS.getMBBEndIdx(mbb: P)); |
570 | if (PVNI) |
571 | WorkList.push_back(Elt: std::make_pair(x&: LI, y&: PVNI)); |
572 | } |
573 | continue; |
574 | } |
575 | |
576 | // Follow snippet copies. |
577 | MachineInstr *MI = LIS.getInstructionFromIndex(index: VNI->def); |
578 | if (!SnippetCopies.count(Ptr: MI)) |
579 | continue; |
580 | LiveInterval &SnipLI = LIS.getInterval(Reg: MI->getOperand(i: 1).getReg()); |
581 | assert(isRegToSpill(SnipLI.reg()) && "Unexpected register in copy" ); |
582 | VNInfo *SnipVNI = SnipLI.getVNInfoAt(Idx: VNI->def.getRegSlot(EC: true)); |
583 | assert(SnipVNI && "Snippet undefined before copy" ); |
584 | WorkList.push_back(Elt: std::make_pair(x: &SnipLI, y&: SnipVNI)); |
585 | } while (!WorkList.empty()); |
586 | } |
587 | |
588 | bool InlineSpiller::canGuaranteeAssignmentAfterRemat(Register VReg, |
589 | MachineInstr &MI) { |
590 | if (!RestrictStatepointRemat) |
591 | return true; |
592 | // Here's a quick explanation of the problem we're trying to handle here: |
593 | // * There are some pseudo instructions with more vreg uses than there are |
594 | // physical registers on the machine. |
595 | // * This is normally handled by spilling the vreg, and folding the reload |
596 | // into the user instruction. (Thus decreasing the number of used vregs |
597 | // until the remainder can be assigned to physregs.) |
598 | // * However, since we may try to spill vregs in any order, we can end up |
599 | // trying to spill each operand to the instruction, and then rematting it |
600 | // instead. When that happens, the new live intervals (for the remats) are |
601 | // expected to be trivially assignable (i.e. RS_Done). However, since we |
602 | // may have more remats than physregs, we're guaranteed to fail to assign |
603 | // one. |
604 | // At the moment, we only handle this for STATEPOINTs since they're the only |
605 | // pseudo op where we've seen this. If we start seeing other instructions |
606 | // with the same problem, we need to revisit this. |
607 | if (MI.getOpcode() != TargetOpcode::STATEPOINT) |
608 | return true; |
609 | // For STATEPOINTs we allow re-materialization for fixed arguments only hoping |
610 | // that number of physical registers is enough to cover all fixed arguments. |
611 | // If it is not true we need to revisit it. |
612 | for (unsigned Idx = StatepointOpers(&MI).getVarIdx(), |
613 | EndIdx = MI.getNumOperands(); |
614 | Idx < EndIdx; ++Idx) { |
615 | MachineOperand &MO = MI.getOperand(i: Idx); |
616 | if (MO.isReg() && MO.getReg() == VReg) |
617 | return false; |
618 | } |
619 | return true; |
620 | } |
621 | |
622 | /// hasPhysRegAvailable - Check if there is an available physical register for |
623 | /// rematerialization. |
624 | bool InlineSpiller::hasPhysRegAvailable(const MachineInstr &MI) { |
625 | if (!Order || !Matrix) |
626 | return false; |
627 | |
628 | SlotIndex UseIdx = LIS.getInstructionIndex(Instr: MI).getRegSlot(EC: true); |
629 | SlotIndex PrevIdx = UseIdx.getPrevSlot(); |
630 | |
631 | for (MCPhysReg PhysReg : *Order) { |
632 | if (!Matrix->checkInterference(Start: PrevIdx, End: UseIdx, PhysReg)) |
633 | return true; |
634 | } |
635 | |
636 | return false; |
637 | } |
638 | |
639 | /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. |
640 | bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { |
641 | // Analyze instruction |
642 | SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops; |
643 | VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, Reg: VirtReg.reg(), Ops: &Ops); |
644 | |
645 | if (!RI.Reads) |
646 | return false; |
647 | |
648 | SlotIndex UseIdx = LIS.getInstructionIndex(Instr: MI).getRegSlot(EC: true); |
649 | VNInfo *ParentVNI = VirtReg.getVNInfoAt(Idx: UseIdx.getBaseIndex()); |
650 | |
651 | if (!ParentVNI) { |
652 | LLVM_DEBUG(dbgs() << "\tadding <undef> flags: " ); |
653 | for (MachineOperand &MO : MI.all_uses()) |
654 | if (MO.getReg() == VirtReg.reg()) |
655 | MO.setIsUndef(); |
656 | LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI); |
657 | return true; |
658 | } |
659 | |
660 | if (SnippetCopies.count(Ptr: &MI)) |
661 | return false; |
662 | |
663 | LiveInterval &OrigLI = LIS.getInterval(Reg: Original); |
664 | VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx: UseIdx); |
665 | LiveRangeEdit::Remat RM(ParentVNI); |
666 | RM.OrigMI = LIS.getInstructionFromIndex(index: OrigVNI->def); |
667 | |
668 | if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx)) { |
669 | markValueUsed(LI: &VirtReg, VNI: ParentVNI); |
670 | LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); |
671 | return false; |
672 | } |
673 | |
674 | // If the instruction also writes VirtReg.reg, it had better not require the |
675 | // same register for uses and defs. |
676 | if (RI.Tied) { |
677 | markValueUsed(LI: &VirtReg, VNI: ParentVNI); |
678 | LLVM_DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI); |
679 | return false; |
680 | } |
681 | |
682 | // Before rematerializing into a register for a single instruction, try to |
683 | // fold a load into the instruction. That avoids allocating a new register. |
684 | if (RM.OrigMI->canFoldAsLoad() && |
685 | (RM.OrigMI->mayLoad() || !hasPhysRegAvailable(MI)) && |
686 | foldMemoryOperand(Ops, LoadMI: RM.OrigMI)) { |
687 | Edit->markRematerialized(ParentVNI: RM.ParentVNI); |
688 | ++NumFoldedLoads; |
689 | return true; |
690 | } |
691 | |
692 | // If we can't guarantee that we'll be able to actually assign the new vreg, |
693 | // we can't remat. |
694 | if (!canGuaranteeAssignmentAfterRemat(VReg: VirtReg.reg(), MI)) { |
695 | markValueUsed(LI: &VirtReg, VNI: ParentVNI); |
696 | LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); |
697 | return false; |
698 | } |
699 | |
700 | // Allocate a new register for the remat. |
701 | Register NewVReg = Edit->createFrom(OldReg: Original); |
702 | |
703 | // Finally we can rematerialize OrigMI before MI. |
704 | SlotIndex DefIdx = |
705 | Edit->rematerializeAt(MBB&: *MI.getParent(), MI, DestReg: NewVReg, RM, TRI); |
706 | |
707 | // We take the DebugLoc from MI, since OrigMI may be attributed to a |
708 | // different source location. |
709 | auto *NewMI = LIS.getInstructionFromIndex(index: DefIdx); |
710 | NewMI->setDebugLoc(MI.getDebugLoc()); |
711 | |
712 | (void)DefIdx; |
713 | LLVM_DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' |
714 | << *LIS.getInstructionFromIndex(DefIdx)); |
715 | |
716 | // Replace operands |
717 | for (const auto &OpPair : Ops) { |
718 | MachineOperand &MO = OpPair.first->getOperand(i: OpPair.second); |
719 | if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg()) { |
720 | MO.setReg(NewVReg); |
721 | MO.setIsKill(); |
722 | } |
723 | } |
724 | LLVM_DEBUG(dbgs() << "\t " << UseIdx << '\t' << MI << '\n'); |
725 | |
726 | ++NumRemats; |
727 | return true; |
728 | } |
729 | |
730 | /// reMaterializeAll - Try to rematerialize as many uses as possible, |
731 | /// and trim the live ranges after. |
732 | void InlineSpiller::reMaterializeAll() { |
733 | if (!Edit->anyRematerializable()) |
734 | return; |
735 | |
736 | UsedValues.clear(); |
737 | |
738 | // Try to remat before all uses of snippets. |
739 | bool anyRemat = false; |
740 | for (Register Reg : RegsToSpill) { |
741 | LiveInterval &LI = LIS.getInterval(Reg); |
742 | for (MachineInstr &MI : llvm::make_early_inc_range(Range: MRI.reg_bundles(Reg))) { |
743 | // Debug values are not allowed to affect codegen. |
744 | if (MI.isDebugValue()) |
745 | continue; |
746 | |
747 | assert(!MI.isDebugInstr() && "Did not expect to find a use in debug " |
748 | "instruction that isn't a DBG_VALUE" ); |
749 | |
750 | anyRemat |= reMaterializeFor(VirtReg&: LI, MI); |
751 | } |
752 | } |
753 | if (!anyRemat) |
754 | return; |
755 | |
756 | // Remove any values that were completely rematted. |
757 | for (Register Reg : RegsToSpill) { |
758 | LiveInterval &LI = LIS.getInterval(Reg); |
759 | for (VNInfo *VNI : LI.vnis()) { |
760 | if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(Ptr: VNI)) |
761 | continue; |
762 | MachineInstr *MI = LIS.getInstructionFromIndex(index: VNI->def); |
763 | MI->addRegisterDead(Reg, RegInfo: &TRI); |
764 | if (!MI->allDefsAreDead()) |
765 | continue; |
766 | LLVM_DEBUG(dbgs() << "All defs dead: " << *MI); |
767 | DeadDefs.push_back(Elt: MI); |
768 | // If MI is a bundle header, also try removing copies inside the bundle, |
769 | // otherwise the verifier would complain "live range continues after dead |
770 | // def flag". |
771 | if (MI->isBundledWithSucc() && !MI->isBundledWithPred()) { |
772 | MachineBasicBlock::instr_iterator BeginIt = MI->getIterator(), |
773 | EndIt = MI->getParent()->instr_end(); |
774 | ++BeginIt; // Skip MI that was already handled. |
775 | |
776 | bool OnlyDeadCopies = true; |
777 | for (MachineBasicBlock::instr_iterator It = BeginIt; |
778 | It != EndIt && It->isBundledWithPred(); ++It) { |
779 | |
780 | auto DestSrc = TII.isCopyInstr(MI: *It); |
781 | bool IsCopyToDeadReg = |
782 | DestSrc && DestSrc->Destination->getReg() == Reg; |
783 | if (!IsCopyToDeadReg) { |
784 | OnlyDeadCopies = false; |
785 | break; |
786 | } |
787 | } |
788 | if (OnlyDeadCopies) { |
789 | for (MachineBasicBlock::instr_iterator It = BeginIt; |
790 | It != EndIt && It->isBundledWithPred(); ++It) { |
791 | It->addRegisterDead(Reg, RegInfo: &TRI); |
792 | LLVM_DEBUG(dbgs() << "All defs dead: " << *It); |
793 | DeadDefs.push_back(Elt: &*It); |
794 | } |
795 | } |
796 | } |
797 | } |
798 | } |
799 | |
800 | // Eliminate dead code after remat. Note that some snippet copies may be |
801 | // deleted here. |
802 | if (DeadDefs.empty()) |
803 | return; |
804 | LLVM_DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n" ); |
805 | Edit->eliminateDeadDefs(Dead&: DeadDefs, RegsBeingSpilled: RegsToSpill); |
806 | |
807 | // LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions |
808 | // after rematerialization. To remove a VNI for a vreg from its LiveInterval, |
809 | // LiveIntervals::removeVRegDefAt is used. However, after non-PHI VNIs are all |
810 | // removed, PHI VNI are still left in the LiveInterval. |
811 | // So to get rid of unused reg, we need to check whether it has non-dbg |
812 | // reference instead of whether it has non-empty interval. |
813 | unsigned ResultPos = 0; |
814 | for (Register Reg : RegsToSpill) { |
815 | if (MRI.reg_nodbg_empty(RegNo: Reg)) { |
816 | Edit->eraseVirtReg(Reg); |
817 | RegsReplaced.push_back(Elt: Reg); |
818 | continue; |
819 | } |
820 | |
821 | assert(LIS.hasInterval(Reg) && |
822 | (!LIS.getInterval(Reg).empty() || !MRI.reg_nodbg_empty(Reg)) && |
823 | "Empty and not used live-range?!" ); |
824 | |
825 | RegsToSpill[ResultPos++] = Reg; |
826 | } |
827 | RegsToSpill.erase(CS: RegsToSpill.begin() + ResultPos, CE: RegsToSpill.end()); |
828 | LLVM_DEBUG(dbgs() << RegsToSpill.size() |
829 | << " registers to spill after remat.\n" ); |
830 | } |
831 | |
832 | //===----------------------------------------------------------------------===// |
833 | // Spilling |
834 | //===----------------------------------------------------------------------===// |
835 | |
836 | /// If MI is a load or store of StackSlot, it can be removed. |
837 | bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, Register Reg) { |
838 | int FI = 0; |
839 | Register InstrReg = TII.isLoadFromStackSlot(MI: *MI, FrameIndex&: FI); |
840 | bool IsLoad = InstrReg.isValid(); |
841 | if (!IsLoad) |
842 | InstrReg = TII.isStoreToStackSlot(MI: *MI, FrameIndex&: FI); |
843 | |
844 | // We have a stack access. Is it the right register and slot? |
845 | if (InstrReg != Reg || FI != StackSlot) |
846 | return false; |
847 | |
848 | if (!IsLoad) |
849 | HSpiller.rmFromMergeableSpills(Spill&: *MI, StackSlot); |
850 | |
851 | LLVM_DEBUG(dbgs() << "Coalescing stack access: " << *MI); |
852 | LIS.RemoveMachineInstrFromMaps(MI&: *MI); |
853 | MI->eraseFromParent(); |
854 | |
855 | if (IsLoad) { |
856 | ++NumReloadsRemoved; |
857 | --NumReloads; |
858 | } else { |
859 | ++NumSpillsRemoved; |
860 | --NumSpills; |
861 | } |
862 | |
863 | return true; |
864 | } |
865 | |
866 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
867 | LLVM_DUMP_METHOD |
868 | // Dump the range of instructions from B to E with their slot indexes. |
869 | static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, |
870 | MachineBasicBlock::iterator E, |
871 | LiveIntervals const &LIS, |
872 | const char *const header, |
873 | Register VReg = Register()) { |
874 | char NextLine = '\n'; |
875 | char SlotIndent = '\t'; |
876 | |
877 | if (std::next(B) == E) { |
878 | NextLine = ' '; |
879 | SlotIndent = ' '; |
880 | } |
881 | |
882 | dbgs() << '\t' << header << ": " << NextLine; |
883 | |
884 | for (MachineBasicBlock::iterator I = B; I != E; ++I) { |
885 | SlotIndex Idx = LIS.getInstructionIndex(*I).getRegSlot(); |
886 | |
887 | // If a register was passed in and this instruction has it as a |
888 | // destination that is marked as an early clobber, print the |
889 | // early-clobber slot index. |
890 | if (VReg) { |
891 | MachineOperand *MO = I->findRegisterDefOperand(VReg, /*TRI=*/nullptr); |
892 | if (MO && MO->isEarlyClobber()) |
893 | Idx = Idx.getRegSlot(true); |
894 | } |
895 | |
896 | dbgs() << SlotIndent << Idx << '\t' << *I; |
897 | } |
898 | } |
899 | #endif |
900 | |
901 | /// foldMemoryOperand - Try folding stack slot references in Ops into their |
902 | /// instructions. |
903 | /// |
904 | /// @param Ops Operand indices from AnalyzeVirtRegInBundle(). |
905 | /// @param LoadMI Load instruction to use instead of stack slot when non-null. |
906 | /// @return True on success. |
907 | bool InlineSpiller:: |
908 | foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, |
909 | MachineInstr *LoadMI) { |
910 | if (Ops.empty()) |
911 | return false; |
912 | // Don't attempt folding in bundles. |
913 | MachineInstr *MI = Ops.front().first; |
914 | if (Ops.back().first != MI || MI->isBundled()) |
915 | return false; |
916 | |
917 | bool WasCopy = TII.isCopyInstr(MI: *MI).has_value(); |
918 | Register ImpReg; |
919 | |
920 | // TII::foldMemoryOperand will do what we need here for statepoint |
921 | // (fold load into use and remove corresponding def). We will replace |
922 | // uses of removed def with loads (spillAroundUses). |
923 | // For that to work we need to untie def and use to pass it through |
924 | // foldMemoryOperand and signal foldPatchpoint that it is allowed to |
925 | // fold them. |
926 | bool UntieRegs = MI->getOpcode() == TargetOpcode::STATEPOINT; |
927 | |
928 | // Spill subregs if the target allows it. |
929 | // We always want to spill subregs for stackmap/patchpoint pseudos. |
930 | bool SpillSubRegs = TII.isSubregFoldable() || |
931 | MI->getOpcode() == TargetOpcode::STATEPOINT || |
932 | MI->getOpcode() == TargetOpcode::PATCHPOINT || |
933 | MI->getOpcode() == TargetOpcode::STACKMAP; |
934 | |
935 | // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied |
936 | // operands. |
937 | SmallVector<unsigned, 8> FoldOps; |
938 | for (const auto &OpPair : Ops) { |
939 | unsigned Idx = OpPair.second; |
940 | assert(MI == OpPair.first && "Instruction conflict during operand folding" ); |
941 | MachineOperand &MO = MI->getOperand(i: Idx); |
942 | |
943 | // No point restoring an undef read, and we'll produce an invalid live |
944 | // interval. |
945 | // TODO: Is this really the correct way to handle undef tied uses? |
946 | if (MO.isUse() && !MO.readsReg() && !MO.isTied()) |
947 | continue; |
948 | |
949 | if (MO.isImplicit()) { |
950 | ImpReg = MO.getReg(); |
951 | continue; |
952 | } |
953 | |
954 | if (!SpillSubRegs && MO.getSubReg()) |
955 | return false; |
956 | // We cannot fold a load instruction into a def. |
957 | if (LoadMI && MO.isDef()) |
958 | return false; |
959 | // Tied use operands should not be passed to foldMemoryOperand. |
960 | if (UntieRegs || !MI->isRegTiedToDefOperand(UseOpIdx: Idx)) |
961 | FoldOps.push_back(Elt: Idx); |
962 | } |
963 | |
964 | // If we only have implicit uses, we won't be able to fold that. |
965 | // Moreover, TargetInstrInfo::foldMemoryOperand will assert if we try! |
966 | if (FoldOps.empty()) |
967 | return false; |
968 | |
969 | MachineInstrSpan MIS(MI, MI->getParent()); |
970 | |
971 | SmallVector<std::pair<unsigned, unsigned> > TiedOps; |
972 | if (UntieRegs) |
973 | for (unsigned Idx : FoldOps) { |
974 | MachineOperand &MO = MI->getOperand(i: Idx); |
975 | if (!MO.isTied()) |
976 | continue; |
977 | unsigned Tied = MI->findTiedOperandIdx(OpIdx: Idx); |
978 | if (MO.isUse()) |
979 | TiedOps.emplace_back(Args&: Tied, Args&: Idx); |
980 | else { |
981 | assert(MO.isDef() && "Tied to not use and def?" ); |
982 | TiedOps.emplace_back(Args&: Idx, Args&: Tied); |
983 | } |
984 | MI->untieRegOperand(OpIdx: Idx); |
985 | } |
986 | |
987 | MachineInstr *FoldMI = |
988 | LoadMI ? TII.foldMemoryOperand(MI&: *MI, Ops: FoldOps, LoadMI&: *LoadMI, LIS: &LIS) |
989 | : TII.foldMemoryOperand(MI&: *MI, Ops: FoldOps, FI: StackSlot, LIS: &LIS, VRM: &VRM); |
990 | if (!FoldMI) { |
991 | // Re-tie operands. |
992 | for (auto Tied : TiedOps) |
993 | MI->tieOperands(DefIdx: Tied.first, UseIdx: Tied.second); |
994 | return false; |
995 | } |
996 | |
997 | // Remove LIS for any dead defs in the original MI not in FoldMI. |
998 | for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) { |
999 | if (!MO->isReg()) |
1000 | continue; |
1001 | Register Reg = MO->getReg(); |
1002 | if (!Reg || Reg.isVirtual() || MRI.isReserved(PhysReg: Reg)) { |
1003 | continue; |
1004 | } |
1005 | // Skip non-Defs, including undef uses and internal reads. |
1006 | if (MO->isUse()) |
1007 | continue; |
1008 | PhysRegInfo RI = AnalyzePhysRegInBundle(MI: *FoldMI, Reg, TRI: &TRI); |
1009 | if (RI.FullyDefined) |
1010 | continue; |
1011 | // FoldMI does not define this physreg. Remove the LI segment. |
1012 | assert(MO->isDead() && "Cannot fold physreg def" ); |
1013 | SlotIndex Idx = LIS.getInstructionIndex(Instr: *MI).getRegSlot(); |
1014 | LIS.removePhysRegDefAt(Reg: Reg.asMCReg(), Pos: Idx); |
1015 | } |
1016 | |
1017 | int FI; |
1018 | if (TII.isStoreToStackSlot(MI: *MI, FrameIndex&: FI) && |
1019 | HSpiller.rmFromMergeableSpills(Spill&: *MI, StackSlot: FI)) |
1020 | --NumSpills; |
1021 | LIS.ReplaceMachineInstrInMaps(MI&: *MI, NewMI&: *FoldMI); |
1022 | // Update the call info. |
1023 | if (MI->isCandidateForAdditionalCallInfo()) |
1024 | MI->getMF()->moveAdditionalCallInfo(Old: MI, New: FoldMI); |
1025 | |
1026 | // If we've folded a store into an instruction labelled with debug-info, |
1027 | // record a substitution from the old operand to the memory operand. Handle |
1028 | // the simple common case where operand 0 is the one being folded, plus when |
1029 | // the destination operand is also a tied def. More values could be |
1030 | // substituted / preserved with more analysis. |
1031 | if (MI->peekDebugInstrNum() && Ops[0].second == 0) { |
1032 | // Helper lambda. |
1033 | auto MakeSubstitution = [this,FoldMI,MI,&Ops]() { |
1034 | // Substitute old operand zero to the new instructions memory operand. |
1035 | unsigned OldOperandNum = Ops[0].second; |
1036 | unsigned NewNum = FoldMI->getDebugInstrNum(); |
1037 | unsigned OldNum = MI->getDebugInstrNum(); |
1038 | MF.makeDebugValueSubstitution({OldNum, OldOperandNum}, |
1039 | {NewNum, MachineFunction::DebugOperandMemNumber}); |
1040 | }; |
1041 | |
1042 | const MachineOperand &Op0 = MI->getOperand(i: Ops[0].second); |
1043 | if (Ops.size() == 1 && Op0.isDef()) { |
1044 | MakeSubstitution(); |
1045 | } else if (Ops.size() == 2 && Op0.isDef() && MI->getOperand(i: 1).isTied() && |
1046 | Op0.getReg() == MI->getOperand(i: 1).getReg()) { |
1047 | MakeSubstitution(); |
1048 | } |
1049 | } else if (MI->peekDebugInstrNum()) { |
1050 | // This is a debug-labelled instruction, but the operand being folded isn't |
1051 | // at operand zero. Most likely this means it's a load being folded in. |
1052 | // Substitute any register defs from operand zero up to the one being |
1053 | // folded -- past that point, we don't know what the new operand indexes |
1054 | // will be. |
1055 | MF.substituteDebugValuesForInst(Old: *MI, New&: *FoldMI, MaxOperand: Ops[0].second); |
1056 | } |
1057 | |
1058 | MI->eraseFromParent(); |
1059 | |
1060 | // Insert any new instructions other than FoldMI into the LIS maps. |
1061 | assert(!MIS.empty() && "Unexpected empty span of instructions!" ); |
1062 | for (MachineInstr &MI : MIS) |
1063 | if (&MI != FoldMI) |
1064 | LIS.InsertMachineInstrInMaps(MI); |
1065 | |
1066 | // TII.foldMemoryOperand may have left some implicit operands on the |
1067 | // instruction. Strip them. |
1068 | if (ImpReg) |
1069 | for (unsigned i = FoldMI->getNumOperands(); i; --i) { |
1070 | MachineOperand &MO = FoldMI->getOperand(i: i - 1); |
1071 | if (!MO.isReg() || !MO.isImplicit()) |
1072 | break; |
1073 | if (MO.getReg() == ImpReg) |
1074 | FoldMI->removeOperand(OpNo: i - 1); |
1075 | } |
1076 | |
1077 | LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, |
1078 | "folded" )); |
1079 | |
1080 | if (!WasCopy) |
1081 | ++NumFolded; |
1082 | else if (Ops.front().second == 0) { |
1083 | ++NumSpills; |
1084 | // If there is only 1 store instruction is required for spill, add it |
1085 | // to mergeable list. In X86 AMX, 2 intructions are required to store. |
1086 | // We disable the merge for this case. |
1087 | if (std::distance(first: MIS.begin(), last: MIS.end()) <= 1) |
1088 | HSpiller.addToMergeableSpills(Spill&: *FoldMI, StackSlot, Original); |
1089 | } else |
1090 | ++NumReloads; |
1091 | return true; |
1092 | } |
1093 | |
1094 | void InlineSpiller::insertReload(Register NewVReg, |
1095 | SlotIndex Idx, |
1096 | MachineBasicBlock::iterator MI) { |
1097 | MachineBasicBlock &MBB = *MI->getParent(); |
1098 | |
1099 | MachineInstrSpan MIS(MI, &MBB); |
1100 | TII.loadRegFromStackSlot(MBB, MI, DestReg: NewVReg, FrameIndex: StackSlot, |
1101 | RC: MRI.getRegClass(Reg: NewVReg), TRI: &TRI, VReg: Register()); |
1102 | |
1103 | LIS.InsertMachineInstrRangeInMaps(B: MIS.begin(), E: MI); |
1104 | |
1105 | LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload" , |
1106 | NewVReg)); |
1107 | ++NumReloads; |
1108 | } |
1109 | |
1110 | /// Check if \p Def fully defines a VReg with an undefined value. |
1111 | /// If that's the case, that means the value of VReg is actually |
1112 | /// not relevant. |
1113 | static bool isRealSpill(const MachineInstr &Def) { |
1114 | if (!Def.isImplicitDef()) |
1115 | return true; |
1116 | |
1117 | // We can say that the VReg defined by Def is undef, only if it is |
1118 | // fully defined by Def. Otherwise, some of the lanes may not be |
1119 | // undef and the value of the VReg matters. |
1120 | return Def.getOperand(i: 0).getSubReg(); |
1121 | } |
1122 | |
1123 | /// insertSpill - Insert a spill of NewVReg after MI. |
1124 | void InlineSpiller::insertSpill(Register NewVReg, bool isKill, |
1125 | MachineBasicBlock::iterator MI) { |
1126 | // Spill are not terminators, so inserting spills after terminators will |
1127 | // violate invariants in MachineVerifier. |
1128 | assert(!MI->isTerminator() && "Inserting a spill after a terminator" ); |
1129 | MachineBasicBlock &MBB = *MI->getParent(); |
1130 | |
1131 | MachineInstrSpan MIS(MI, &MBB); |
1132 | MachineBasicBlock::iterator SpillBefore = std::next(x: MI); |
1133 | bool IsRealSpill = isRealSpill(Def: *MI); |
1134 | |
1135 | if (IsRealSpill) |
1136 | TII.storeRegToStackSlot(MBB, MI: SpillBefore, SrcReg: NewVReg, isKill, FrameIndex: StackSlot, |
1137 | RC: MRI.getRegClass(Reg: NewVReg), TRI: &TRI, VReg: Register()); |
1138 | else |
1139 | // Don't spill undef value. |
1140 | // Anything works for undef, in particular keeping the memory |
1141 | // uninitialized is a viable option and it saves code size and |
1142 | // run time. |
1143 | BuildMI(BB&: MBB, I: SpillBefore, MIMD: MI->getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::KILL)) |
1144 | .addReg(RegNo: NewVReg, flags: getKillRegState(B: isKill)); |
1145 | |
1146 | MachineBasicBlock::iterator Spill = std::next(x: MI); |
1147 | LIS.InsertMachineInstrRangeInMaps(B: Spill, E: MIS.end()); |
1148 | for (const MachineInstr &MI : make_range(x: Spill, y: MIS.end())) |
1149 | getVDefInterval(MI, LIS); |
1150 | |
1151 | LLVM_DEBUG( |
1152 | dumpMachineInstrRangeWithSlotIndex(Spill, MIS.end(), LIS, "spill" )); |
1153 | ++NumSpills; |
1154 | // If there is only 1 store instruction is required for spill, add it |
1155 | // to mergeable list. In X86 AMX, 2 intructions are required to store. |
1156 | // We disable the merge for this case. |
1157 | if (IsRealSpill && std::distance(first: Spill, last: MIS.end()) <= 1) |
1158 | HSpiller.addToMergeableSpills(Spill&: *Spill, StackSlot, Original); |
1159 | } |
1160 | |
1161 | /// spillAroundUses - insert spill code around each use of Reg. |
1162 | void InlineSpiller::spillAroundUses(Register Reg) { |
1163 | LLVM_DEBUG(dbgs() << "spillAroundUses " << printReg(Reg) << '\n'); |
1164 | LiveInterval &OldLI = LIS.getInterval(Reg); |
1165 | |
1166 | // Iterate over instructions using Reg. |
1167 | for (MachineInstr &MI : llvm::make_early_inc_range(Range: MRI.reg_bundles(Reg))) { |
1168 | // Debug values are not allowed to affect codegen. |
1169 | if (MI.isDebugValue()) { |
1170 | // Modify DBG_VALUE now that the value is in a spill slot. |
1171 | MachineBasicBlock *MBB = MI.getParent(); |
1172 | LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << MI); |
1173 | buildDbgValueForSpill(BB&: *MBB, I: &MI, Orig: MI, FrameIndex: StackSlot, SpillReg: Reg); |
1174 | MBB->erase(I: MI); |
1175 | continue; |
1176 | } |
1177 | |
1178 | assert(!MI.isDebugInstr() && "Did not expect to find a use in debug " |
1179 | "instruction that isn't a DBG_VALUE" ); |
1180 | |
1181 | // Ignore copies to/from snippets. We'll delete them. |
1182 | if (SnippetCopies.count(Ptr: &MI)) |
1183 | continue; |
1184 | |
1185 | // Stack slot accesses may coalesce away. |
1186 | if (coalesceStackAccess(MI: &MI, Reg)) |
1187 | continue; |
1188 | |
1189 | // Analyze instruction. |
1190 | SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; |
1191 | VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, Reg, Ops: &Ops); |
1192 | |
1193 | // Find the slot index where this instruction reads and writes OldLI. |
1194 | // This is usually the def slot, except for tied early clobbers. |
1195 | SlotIndex Idx = LIS.getInstructionIndex(Instr: MI).getRegSlot(); |
1196 | if (VNInfo *VNI = OldLI.getVNInfoAt(Idx: Idx.getRegSlot(EC: true))) |
1197 | if (SlotIndex::isSameInstr(A: Idx, B: VNI->def)) |
1198 | Idx = VNI->def; |
1199 | |
1200 | // Check for a sibling copy. |
1201 | Register SibReg = isCopyOfBundle(FirstMI: MI, Reg, TII); |
1202 | if (SibReg && isSibling(Reg: SibReg)) { |
1203 | // This may actually be a copy between snippets. |
1204 | if (isRegToSpill(Reg: SibReg)) { |
1205 | LLVM_DEBUG(dbgs() << "Found new snippet copy: " << MI); |
1206 | SnippetCopies.insert(Ptr: &MI); |
1207 | continue; |
1208 | } |
1209 | if (RI.Writes) { |
1210 | if (hoistSpillInsideBB(SpillLI&: OldLI, CopyMI&: MI)) { |
1211 | // This COPY is now dead, the value is already in the stack slot. |
1212 | MI.getOperand(i: 0).setIsDead(); |
1213 | DeadDefs.push_back(Elt: &MI); |
1214 | continue; |
1215 | } |
1216 | } else { |
1217 | // This is a reload for a sib-reg copy. Drop spills downstream. |
1218 | LiveInterval &SibLI = LIS.getInterval(Reg: SibReg); |
1219 | eliminateRedundantSpills(SLI&: SibLI, VNI: SibLI.getVNInfoAt(Idx)); |
1220 | // The COPY will fold to a reload below. |
1221 | } |
1222 | } |
1223 | |
1224 | // Attempt to fold memory ops. |
1225 | if (foldMemoryOperand(Ops)) |
1226 | continue; |
1227 | |
1228 | // Create a new virtual register for spill/fill. |
1229 | // FIXME: Infer regclass from instruction alone. |
1230 | Register NewVReg = Edit->createFrom(OldReg: Reg); |
1231 | |
1232 | if (RI.Reads) |
1233 | insertReload(NewVReg, Idx, MI: &MI); |
1234 | |
1235 | // Rewrite instruction operands. |
1236 | bool hasLiveDef = false; |
1237 | for (const auto &OpPair : Ops) { |
1238 | MachineOperand &MO = OpPair.first->getOperand(i: OpPair.second); |
1239 | MO.setReg(NewVReg); |
1240 | if (MO.isUse()) { |
1241 | if (!OpPair.first->isRegTiedToDefOperand(UseOpIdx: OpPair.second)) |
1242 | MO.setIsKill(); |
1243 | } else { |
1244 | if (!MO.isDead()) |
1245 | hasLiveDef = true; |
1246 | } |
1247 | } |
1248 | LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << MI << '\n'); |
1249 | |
1250 | // FIXME: Use a second vreg if instruction has no tied ops. |
1251 | if (RI.Writes) |
1252 | if (hasLiveDef) |
1253 | insertSpill(NewVReg, isKill: true, MI: &MI); |
1254 | } |
1255 | } |
1256 | |
1257 | /// spillAll - Spill all registers remaining after rematerialization. |
1258 | void InlineSpiller::spillAll() { |
1259 | // Update LiveStacks now that we are committed to spilling. |
1260 | if (StackSlot == VirtRegMap::NO_STACK_SLOT) { |
1261 | StackSlot = VRM.assignVirt2StackSlot(virtReg: Original); |
1262 | StackInt = &LSS.getOrCreateInterval(Slot: StackSlot, RC: MRI.getRegClass(Reg: Original)); |
1263 | StackInt->getNextValue(Def: SlotIndex(), VNInfoAllocator&: LSS.getVNInfoAllocator()); |
1264 | } else |
1265 | StackInt = &LSS.getInterval(Slot: StackSlot); |
1266 | |
1267 | if (Original != Edit->getReg()) |
1268 | VRM.assignVirt2StackSlot(virtReg: Edit->getReg(), SS: StackSlot); |
1269 | |
1270 | assert(StackInt->getNumValNums() == 1 && "Bad stack interval values" ); |
1271 | for (Register Reg : RegsToSpill) |
1272 | StackInt->MergeSegmentsInAsValue(RHS: LIS.getInterval(Reg), |
1273 | LHSValNo: StackInt->getValNumInfo(ValNo: 0)); |
1274 | LLVM_DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); |
1275 | |
1276 | // Spill around uses of all RegsToSpill. |
1277 | for (Register Reg : RegsToSpill) { |
1278 | spillAroundUses(Reg); |
1279 | // Assign all of the spilled registers to the slot so that |
1280 | // LiveDebugVariables knows about these locations later on. |
1281 | if (VRM.getStackSlot(virtReg: Reg) == VirtRegMap::NO_STACK_SLOT) |
1282 | VRM.assignVirt2StackSlot(virtReg: Reg, SS: StackSlot); |
1283 | } |
1284 | |
1285 | // Hoisted spills may cause dead code. |
1286 | if (!DeadDefs.empty()) { |
1287 | LLVM_DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n" ); |
1288 | Edit->eliminateDeadDefs(Dead&: DeadDefs, RegsBeingSpilled: RegsToSpill); |
1289 | } |
1290 | |
1291 | // Finally delete the SnippetCopies. |
1292 | for (Register Reg : RegsToSpill) { |
1293 | for (MachineInstr &MI : |
1294 | llvm::make_early_inc_range(Range: MRI.reg_instructions(Reg))) { |
1295 | assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy" ); |
1296 | // FIXME: Do this with a LiveRangeEdit callback. |
1297 | LIS.getSlotIndexes()->removeSingleMachineInstrFromMaps(MI); |
1298 | MI.eraseFromBundle(); |
1299 | } |
1300 | } |
1301 | |
1302 | // Delete all spilled registers. |
1303 | for (Register Reg : RegsToSpill) |
1304 | Edit->eraseVirtReg(Reg); |
1305 | } |
1306 | |
1307 | void InlineSpiller::spill(LiveRangeEdit &edit, AllocationOrder *order) { |
1308 | ++NumSpilledRanges; |
1309 | Edit = &edit; |
1310 | Order = order; |
1311 | assert(!edit.getReg().isStack() && "Trying to spill a stack slot." ); |
1312 | // Share a stack slot among all descendants of Original. |
1313 | Original = VRM.getOriginal(VirtReg: edit.getReg()); |
1314 | StackSlot = VRM.getStackSlot(virtReg: Original); |
1315 | StackInt = nullptr; |
1316 | |
1317 | LLVM_DEBUG(dbgs() << "Inline spilling " |
1318 | << TRI.getRegClassName(MRI.getRegClass(edit.getReg())) |
1319 | << ':' << edit.getParent() << "\nFrom original " |
1320 | << printReg(Original) << '\n'); |
1321 | assert(edit.getParent().isSpillable() && |
1322 | "Attempting to spill already spilled value." ); |
1323 | assert(DeadDefs.empty() && "Previous spill didn't remove dead defs" ); |
1324 | |
1325 | collectRegsToSpill(); |
1326 | reMaterializeAll(); |
1327 | |
1328 | // Remat may handle everything. |
1329 | if (!RegsToSpill.empty()) |
1330 | spillAll(); |
1331 | |
1332 | Edit->calculateRegClassAndHint(MF, VRAI); |
1333 | } |
1334 | |
1335 | /// Optimizations after all the reg selections and spills are done. |
1336 | void InlineSpiller::postOptimization() { HSpiller.hoistAllSpills(); } |
1337 | |
1338 | /// When a spill is inserted, add the spill to MergeableSpills map. |
1339 | void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot, |
1340 | Register Original) { |
1341 | BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); |
1342 | LiveInterval &OrigLI = LIS.getInterval(Reg: Original); |
1343 | // save a copy of LiveInterval in StackSlotToOrigLI because the original |
1344 | // LiveInterval may be cleared after all its references are spilled. |
1345 | |
1346 | auto [Place, Inserted] = StackSlotToOrigLI.try_emplace(Key: StackSlot); |
1347 | if (Inserted) { |
1348 | auto LI = std::make_unique<LiveInterval>(args: OrigLI.reg(), args: OrigLI.weight()); |
1349 | LI->assign(Other: OrigLI, Allocator); |
1350 | Place->second = std::move(LI); |
1351 | } |
1352 | |
1353 | SlotIndex Idx = LIS.getInstructionIndex(Instr: Spill); |
1354 | VNInfo *OrigVNI = Place->second->getVNInfoAt(Idx: Idx.getRegSlot()); |
1355 | std::pair<int, VNInfo *> MIdx = std::make_pair(x&: StackSlot, y&: OrigVNI); |
1356 | MergeableSpills[MIdx].insert(Ptr: &Spill); |
1357 | } |
1358 | |
1359 | /// When a spill is removed, remove the spill from MergeableSpills map. |
1360 | /// Return true if the spill is removed successfully. |
1361 | bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill, |
1362 | int StackSlot) { |
1363 | auto It = StackSlotToOrigLI.find(Val: StackSlot); |
1364 | if (It == StackSlotToOrigLI.end()) |
1365 | return false; |
1366 | SlotIndex Idx = LIS.getInstructionIndex(Instr: Spill); |
1367 | VNInfo *OrigVNI = It->second->getVNInfoAt(Idx: Idx.getRegSlot()); |
1368 | std::pair<int, VNInfo *> MIdx = std::make_pair(x&: StackSlot, y&: OrigVNI); |
1369 | return MergeableSpills[MIdx].erase(Ptr: &Spill); |
1370 | } |
1371 | |
1372 | /// Check BB to see if it is a possible target BB to place a hoisted spill, |
1373 | /// i.e., there should be a living sibling of OrigReg at the insert point. |
1374 | bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI, |
1375 | MachineBasicBlock &BB, Register &LiveReg) { |
1376 | SlotIndex Idx = IPA.getLastInsertPoint(CurLI: OrigLI, MBB: BB); |
1377 | // The original def could be after the last insert point in the root block, |
1378 | // we can't hoist to here. |
1379 | if (Idx < OrigVNI.def) { |
1380 | // TODO: We could be better here. If LI is not alive in landing pad |
1381 | // we could hoist spill after LIP. |
1382 | LLVM_DEBUG(dbgs() << "can't spill in root block - def after LIP\n" ); |
1383 | return false; |
1384 | } |
1385 | Register OrigReg = OrigLI.reg(); |
1386 | SmallSetVector<Register, 16> &Siblings = Virt2SiblingsMap[OrigReg]; |
1387 | assert(OrigLI.getVNInfoAt(Idx) == &OrigVNI && "Unexpected VNI" ); |
1388 | |
1389 | for (const Register &SibReg : Siblings) { |
1390 | LiveInterval &LI = LIS.getInterval(Reg: SibReg); |
1391 | VNInfo *VNI = LI.getVNInfoAt(Idx); |
1392 | if (VNI) { |
1393 | LiveReg = SibReg; |
1394 | return true; |
1395 | } |
1396 | } |
1397 | return false; |
1398 | } |
1399 | |
1400 | /// Remove redundant spills in the same BB. Save those redundant spills in |
1401 | /// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map. |
1402 | void HoistSpillHelper::rmRedundantSpills( |
1403 | SmallPtrSet<MachineInstr *, 16> &Spills, |
1404 | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
1405 | DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) { |
1406 | // For each spill saw, check SpillBBToSpill[] and see if its BB already has |
1407 | // another spill inside. If a BB contains more than one spill, only keep the |
1408 | // earlier spill with smaller SlotIndex. |
1409 | for (auto *const CurrentSpill : Spills) { |
1410 | MachineBasicBlock *Block = CurrentSpill->getParent(); |
1411 | MachineDomTreeNode *Node = MDT.getNode(BB: Block); |
1412 | MachineInstr *PrevSpill = SpillBBToSpill[Node]; |
1413 | if (PrevSpill) { |
1414 | SlotIndex PIdx = LIS.getInstructionIndex(Instr: *PrevSpill); |
1415 | SlotIndex CIdx = LIS.getInstructionIndex(Instr: *CurrentSpill); |
1416 | MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill; |
1417 | MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill; |
1418 | SpillsToRm.push_back(Elt: SpillToRm); |
1419 | SpillBBToSpill[MDT.getNode(BB: Block)] = SpillToKeep; |
1420 | } else { |
1421 | SpillBBToSpill[MDT.getNode(BB: Block)] = CurrentSpill; |
1422 | } |
1423 | } |
1424 | for (auto *const SpillToRm : SpillsToRm) |
1425 | Spills.erase(Ptr: SpillToRm); |
1426 | } |
1427 | |
1428 | /// Starting from \p Root find a top-down traversal order of the dominator |
1429 | /// tree to visit all basic blocks containing the elements of \p Spills. |
1430 | /// Redundant spills will be found and put into \p SpillsToRm at the same |
1431 | /// time. \p SpillBBToSpill will be populated as part of the process and |
1432 | /// maps a basic block to the first store occurring in the basic block. |
1433 | /// \post SpillsToRm.union(Spills\@post) == Spills\@pre |
1434 | void HoistSpillHelper::getVisitOrders( |
1435 | MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills, |
1436 | SmallVectorImpl<MachineDomTreeNode *> &Orders, |
1437 | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
1438 | DenseMap<MachineDomTreeNode *, Register> &SpillsToKeep, |
1439 | DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) { |
1440 | // The set contains all the possible BB nodes to which we may hoist |
1441 | // original spills. |
1442 | SmallPtrSet<MachineDomTreeNode *, 8> WorkSet; |
1443 | // Save the BB nodes on the path from the first BB node containing |
1444 | // non-redundant spill to the Root node. |
1445 | SmallPtrSet<MachineDomTreeNode *, 8> NodesOnPath; |
1446 | // All the spills to be hoisted must originate from a single def instruction |
1447 | // to the OrigReg. It means the def instruction should dominate all the spills |
1448 | // to be hoisted. We choose the BB where the def instruction is located as |
1449 | // the Root. |
1450 | MachineDomTreeNode *RootIDomNode = MDT[Root]->getIDom(); |
1451 | // For every node on the dominator tree with spill, walk up on the dominator |
1452 | // tree towards the Root node until it is reached. If there is other node |
1453 | // containing spill in the middle of the path, the previous spill saw will |
1454 | // be redundant and the node containing it will be removed. All the nodes on |
1455 | // the path starting from the first node with non-redundant spill to the Root |
1456 | // node will be added to the WorkSet, which will contain all the possible |
1457 | // locations where spills may be hoisted to after the loop below is done. |
1458 | for (auto *const Spill : Spills) { |
1459 | MachineBasicBlock *Block = Spill->getParent(); |
1460 | MachineDomTreeNode *Node = MDT[Block]; |
1461 | MachineInstr *SpillToRm = nullptr; |
1462 | while (Node != RootIDomNode) { |
1463 | // If Node dominates Block, and it already contains a spill, the spill in |
1464 | // Block will be redundant. |
1465 | if (Node != MDT[Block] && SpillBBToSpill[Node]) { |
1466 | SpillToRm = SpillBBToSpill[MDT[Block]]; |
1467 | break; |
1468 | /// If we see the Node already in WorkSet, the path from the Node to |
1469 | /// the Root node must already be traversed by another spill. |
1470 | /// Then no need to repeat. |
1471 | } else if (WorkSet.count(Ptr: Node)) { |
1472 | break; |
1473 | } else { |
1474 | NodesOnPath.insert(Ptr: Node); |
1475 | } |
1476 | Node = Node->getIDom(); |
1477 | } |
1478 | if (SpillToRm) { |
1479 | SpillsToRm.push_back(Elt: SpillToRm); |
1480 | } else { |
1481 | // Add a BB containing the original spills to SpillsToKeep -- i.e., |
1482 | // set the initial status before hoisting start. The value of BBs |
1483 | // containing original spills is set to 0, in order to descriminate |
1484 | // with BBs containing hoisted spills which will be inserted to |
1485 | // SpillsToKeep later during hoisting. |
1486 | SpillsToKeep[MDT[Block]] = Register(); |
1487 | WorkSet.insert_range(R&: NodesOnPath); |
1488 | } |
1489 | NodesOnPath.clear(); |
1490 | } |
1491 | |
1492 | // Sort the nodes in WorkSet in top-down order and save the nodes |
1493 | // in Orders. Orders will be used for hoisting in runHoistSpills. |
1494 | unsigned idx = 0; |
1495 | Orders.push_back(Elt: MDT.getNode(BB: Root)); |
1496 | do { |
1497 | MachineDomTreeNode *Node = Orders[idx++]; |
1498 | for (MachineDomTreeNode *Child : Node->children()) { |
1499 | if (WorkSet.count(Ptr: Child)) |
1500 | Orders.push_back(Elt: Child); |
1501 | } |
1502 | } while (idx != Orders.size()); |
1503 | assert(Orders.size() == WorkSet.size() && |
1504 | "Orders have different size with WorkSet" ); |
1505 | |
1506 | #ifndef NDEBUG |
1507 | LLVM_DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n" ); |
1508 | SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin(); |
1509 | for (; RIt != Orders.rend(); RIt++) |
1510 | LLVM_DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << "," ); |
1511 | LLVM_DEBUG(dbgs() << "\n" ); |
1512 | #endif |
1513 | } |
1514 | |
1515 | /// Try to hoist spills according to BB hotness. The spills to removed will |
1516 | /// be saved in \p SpillsToRm. The spills to be inserted will be saved in |
1517 | /// \p SpillsToIns. |
1518 | void HoistSpillHelper::runHoistSpills( |
1519 | LiveInterval &OrigLI, VNInfo &OrigVNI, |
1520 | SmallPtrSet<MachineInstr *, 16> &Spills, |
1521 | SmallVectorImpl<MachineInstr *> &SpillsToRm, |
1522 | DenseMap<MachineBasicBlock *, Register> &SpillsToIns) { |
1523 | // Visit order of dominator tree nodes. |
1524 | SmallVector<MachineDomTreeNode *, 32> Orders; |
1525 | // SpillsToKeep contains all the nodes where spills are to be inserted |
1526 | // during hoisting. If the spill to be inserted is an original spill |
1527 | // (not a hoisted one), the value of the map entry is 0. If the spill |
1528 | // is a hoisted spill, the value of the map entry is the VReg to be used |
1529 | // as the source of the spill. |
1530 | DenseMap<MachineDomTreeNode *, Register> SpillsToKeep; |
1531 | // Map from BB to the first spill inside of it. |
1532 | DenseMap<MachineDomTreeNode *, MachineInstr *> SpillBBToSpill; |
1533 | |
1534 | rmRedundantSpills(Spills, SpillsToRm, SpillBBToSpill); |
1535 | |
1536 | MachineBasicBlock *Root = LIS.getMBBFromIndex(index: OrigVNI.def); |
1537 | getVisitOrders(Root, Spills, Orders, SpillsToRm, SpillsToKeep, |
1538 | SpillBBToSpill); |
1539 | |
1540 | // SpillsInSubTreeMap keeps the map from a dom tree node to a pair of |
1541 | // nodes set and the cost of all the spills inside those nodes. |
1542 | // The nodes set are the locations where spills are to be inserted |
1543 | // in the subtree of current node. |
1544 | using NodesCostPair = |
1545 | std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>; |
1546 | DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap; |
1547 | |
1548 | // Iterate Orders set in reverse order, which will be a bottom-up order |
1549 | // in the dominator tree. Once we visit a dom tree node, we know its |
1550 | // children have already been visited and the spill locations in the |
1551 | // subtrees of all the children have been determined. |
1552 | SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin(); |
1553 | for (; RIt != Orders.rend(); RIt++) { |
1554 | MachineBasicBlock *Block = (*RIt)->getBlock(); |
1555 | |
1556 | // If Block contains an original spill, simply continue. |
1557 | if (auto It = SpillsToKeep.find(Val: *RIt); |
1558 | It != SpillsToKeep.end() && !It->second) { |
1559 | auto &SIt = SpillsInSubTreeMap[*RIt]; |
1560 | SIt.first.insert(Ptr: *RIt); |
1561 | // Sit.second contains the cost of spill. |
1562 | SIt.second = MBFI.getBlockFreq(MBB: Block); |
1563 | continue; |
1564 | } |
1565 | |
1566 | // Collect spills in subtree of current node (*RIt) to |
1567 | // SpillsInSubTreeMap[*RIt].first. |
1568 | for (MachineDomTreeNode *Child : (*RIt)->children()) { |
1569 | if (!SpillsInSubTreeMap.contains(Val: Child)) |
1570 | continue; |
1571 | // The stmt: |
1572 | // "auto &[SpillsInSubTree, SubTreeCost] = SpillsInSubTreeMap[*RIt]" |
1573 | // below should be placed before getting the begin and end iterators of |
1574 | // SpillsInSubTreeMap[Child].first, or else the iterators may be |
1575 | // invalidated when SpillsInSubTreeMap[*RIt] is seen the first time |
1576 | // and the map grows and then the original buckets in the map are moved. |
1577 | auto &[SpillsInSubTree, SubTreeCost] = SpillsInSubTreeMap[*RIt]; |
1578 | auto ChildIt = SpillsInSubTreeMap.find(Val: Child); |
1579 | SubTreeCost += ChildIt->second.second; |
1580 | auto BI = ChildIt->second.first.begin(); |
1581 | auto EI = ChildIt->second.first.end(); |
1582 | SpillsInSubTree.insert(I: BI, E: EI); |
1583 | SpillsInSubTreeMap.erase(I: ChildIt); |
1584 | } |
1585 | |
1586 | auto &[SpillsInSubTree, SubTreeCost] = SpillsInSubTreeMap[*RIt]; |
1587 | // No spills in subtree, simply continue. |
1588 | if (SpillsInSubTree.empty()) |
1589 | continue; |
1590 | |
1591 | // Check whether Block is a possible candidate to insert spill. |
1592 | Register LiveReg; |
1593 | if (!isSpillCandBB(OrigLI, OrigVNI, BB&: *Block, LiveReg)) |
1594 | continue; |
1595 | |
1596 | // If there are multiple spills that could be merged, bias a little |
1597 | // to hoist the spill. |
1598 | BranchProbability MarginProb = (SpillsInSubTree.size() > 1) |
1599 | ? BranchProbability(9, 10) |
1600 | : BranchProbability(1, 1); |
1601 | if (SubTreeCost > MBFI.getBlockFreq(MBB: Block) * MarginProb) { |
1602 | // Hoist: Move spills to current Block. |
1603 | for (auto *const SpillBB : SpillsInSubTree) { |
1604 | // When SpillBB is a BB contains original spill, insert the spill |
1605 | // to SpillsToRm. |
1606 | if (auto It = SpillsToKeep.find(Val: SpillBB); |
1607 | It != SpillsToKeep.end() && !It->second) { |
1608 | MachineInstr *SpillToRm = SpillBBToSpill[SpillBB]; |
1609 | SpillsToRm.push_back(Elt: SpillToRm); |
1610 | } |
1611 | // SpillBB will not contain spill anymore, remove it from SpillsToKeep. |
1612 | SpillsToKeep.erase(Val: SpillBB); |
1613 | } |
1614 | // Current Block is the BB containing the new hoisted spill. Add it to |
1615 | // SpillsToKeep. LiveReg is the source of the new spill. |
1616 | SpillsToKeep[*RIt] = LiveReg; |
1617 | LLVM_DEBUG({ |
1618 | dbgs() << "spills in BB: " ; |
1619 | for (const auto Rspill : SpillsInSubTree) |
1620 | dbgs() << Rspill->getBlock()->getNumber() << " " ; |
1621 | dbgs() << "were promoted to BB" << (*RIt)->getBlock()->getNumber() |
1622 | << "\n" ; |
1623 | }); |
1624 | SpillsInSubTree.clear(); |
1625 | SpillsInSubTree.insert(Ptr: *RIt); |
1626 | SubTreeCost = MBFI.getBlockFreq(MBB: Block); |
1627 | } |
1628 | } |
1629 | // For spills in SpillsToKeep with LiveReg set (i.e., not original spill), |
1630 | // save them to SpillsToIns. |
1631 | for (const auto &Ent : SpillsToKeep) { |
1632 | if (Ent.second) |
1633 | SpillsToIns[Ent.first->getBlock()] = Ent.second; |
1634 | } |
1635 | } |
1636 | |
1637 | /// For spills with equal values, remove redundant spills and hoist those left |
1638 | /// to less hot spots. |
1639 | /// |
1640 | /// Spills with equal values will be collected into the same set in |
1641 | /// MergeableSpills when spill is inserted. These equal spills are originated |
1642 | /// from the same defining instruction and are dominated by the instruction. |
1643 | /// Before hoisting all the equal spills, redundant spills inside in the same |
1644 | /// BB are first marked to be deleted. Then starting from the spills left, walk |
1645 | /// up on the dominator tree towards the Root node where the define instruction |
1646 | /// is located, mark the dominated spills to be deleted along the way and |
1647 | /// collect the BB nodes on the path from non-dominated spills to the define |
1648 | /// instruction into a WorkSet. The nodes in WorkSet are the candidate places |
1649 | /// where we are considering to hoist the spills. We iterate the WorkSet in |
1650 | /// bottom-up order, and for each node, we will decide whether to hoist spills |
1651 | /// inside its subtree to that node. In this way, we can get benefit locally |
1652 | /// even if hoisting all the equal spills to one cold place is impossible. |
1653 | void HoistSpillHelper::hoistAllSpills() { |
1654 | SmallVector<Register, 4> NewVRegs; |
1655 | LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this); |
1656 | |
1657 | for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { |
1658 | Register Reg = Register::index2VirtReg(Index: i); |
1659 | Register Original = VRM.getPreSplitReg(virtReg: Reg); |
1660 | if (!MRI.def_empty(RegNo: Reg)) |
1661 | Virt2SiblingsMap[Original].insert(X: Reg); |
1662 | } |
1663 | |
1664 | // Each entry in MergeableSpills contains a spill set with equal values. |
1665 | for (auto &Ent : MergeableSpills) { |
1666 | int Slot = Ent.first.first; |
1667 | LiveInterval &OrigLI = *StackSlotToOrigLI[Slot]; |
1668 | VNInfo *OrigVNI = Ent.first.second; |
1669 | SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second; |
1670 | if (Ent.second.empty()) |
1671 | continue; |
1672 | |
1673 | LLVM_DEBUG({ |
1674 | dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n" |
1675 | << "Equal spills in BB: " ; |
1676 | for (const auto spill : EqValSpills) |
1677 | dbgs() << spill->getParent()->getNumber() << " " ; |
1678 | dbgs() << "\n" ; |
1679 | }); |
1680 | |
1681 | // SpillsToRm is the spill set to be removed from EqValSpills. |
1682 | SmallVector<MachineInstr *, 16> SpillsToRm; |
1683 | // SpillsToIns is the spill set to be newly inserted after hoisting. |
1684 | DenseMap<MachineBasicBlock *, Register> SpillsToIns; |
1685 | |
1686 | runHoistSpills(OrigLI, OrigVNI&: *OrigVNI, Spills&: EqValSpills, SpillsToRm, SpillsToIns); |
1687 | |
1688 | LLVM_DEBUG({ |
1689 | dbgs() << "Finally inserted spills in BB: " ; |
1690 | for (const auto &Ispill : SpillsToIns) |
1691 | dbgs() << Ispill.first->getNumber() << " " ; |
1692 | dbgs() << "\nFinally removed spills in BB: " ; |
1693 | for (const auto Rspill : SpillsToRm) |
1694 | dbgs() << Rspill->getParent()->getNumber() << " " ; |
1695 | dbgs() << "\n" ; |
1696 | }); |
1697 | |
1698 | // Stack live range update. |
1699 | LiveInterval &StackIntvl = LSS.getInterval(Slot); |
1700 | if (!SpillsToIns.empty() || !SpillsToRm.empty()) |
1701 | StackIntvl.MergeValueInAsValue(RHS: OrigLI, RHSValNo: OrigVNI, |
1702 | LHSValNo: StackIntvl.getValNumInfo(ValNo: 0)); |
1703 | |
1704 | // Insert hoisted spills. |
1705 | for (auto const &Insert : SpillsToIns) { |
1706 | MachineBasicBlock *BB = Insert.first; |
1707 | Register LiveReg = Insert.second; |
1708 | MachineBasicBlock::iterator MII = IPA.getLastInsertPointIter(CurLI: OrigLI, MBB&: *BB); |
1709 | MachineInstrSpan MIS(MII, BB); |
1710 | TII.storeRegToStackSlot(MBB&: *BB, MI: MII, SrcReg: LiveReg, isKill: false, FrameIndex: Slot, |
1711 | RC: MRI.getRegClass(Reg: LiveReg), TRI: &TRI, VReg: Register()); |
1712 | LIS.InsertMachineInstrRangeInMaps(B: MIS.begin(), E: MII); |
1713 | for (const MachineInstr &MI : make_range(x: MIS.begin(), y: MII)) |
1714 | getVDefInterval(MI, LIS); |
1715 | ++NumSpills; |
1716 | } |
1717 | |
1718 | // Remove redundant spills or change them to dead instructions. |
1719 | NumSpills -= SpillsToRm.size(); |
1720 | for (auto *const RMEnt : SpillsToRm) { |
1721 | RMEnt->setDesc(TII.get(Opcode: TargetOpcode::KILL)); |
1722 | for (unsigned i = RMEnt->getNumOperands(); i; --i) { |
1723 | MachineOperand &MO = RMEnt->getOperand(i: i - 1); |
1724 | if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead()) |
1725 | RMEnt->removeOperand(OpNo: i - 1); |
1726 | } |
1727 | } |
1728 | Edit.eliminateDeadDefs(Dead&: SpillsToRm, RegsBeingSpilled: {}); |
1729 | } |
1730 | } |
1731 | |
1732 | /// For VirtReg clone, the \p New register should have the same physreg or |
1733 | /// stackslot as the \p old register. |
1734 | void HoistSpillHelper::LRE_DidCloneVirtReg(Register New, Register Old) { |
1735 | if (VRM.hasPhys(virtReg: Old)) |
1736 | VRM.assignVirt2Phys(virtReg: New, physReg: VRM.getPhys(virtReg: Old)); |
1737 | else if (VRM.getStackSlot(virtReg: Old) != VirtRegMap::NO_STACK_SLOT) |
1738 | VRM.assignVirt2StackSlot(virtReg: New, SS: VRM.getStackSlot(virtReg: Old)); |
1739 | else |
1740 | llvm_unreachable("VReg should be assigned either physreg or stackslot" ); |
1741 | if (VRM.hasShape(virtReg: Old)) |
1742 | VRM.assignVirt2Shape(virtReg: New, shape: VRM.getShape(virtReg: Old)); |
1743 | } |
1744 | |