1 | //===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass assigns local frame indices to stack slots relative to one another |
10 | // and allocates additional base registers to access them when the target |
11 | // estimates they are likely to be out of range of stack pointer and frame |
12 | // pointer relative addressing. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "llvm/CodeGen/LocalStackSlotAllocation.h" |
17 | #include "llvm/ADT/SetVector.h" |
18 | #include "llvm/ADT/SmallSet.h" |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/ADT/Statistic.h" |
21 | #include "llvm/CodeGen/MachineBasicBlock.h" |
22 | #include "llvm/CodeGen/MachineFrameInfo.h" |
23 | #include "llvm/CodeGen/MachineFunction.h" |
24 | #include "llvm/CodeGen/MachineFunctionPass.h" |
25 | #include "llvm/CodeGen/MachineInstr.h" |
26 | #include "llvm/CodeGen/MachineOperand.h" |
27 | #include "llvm/CodeGen/TargetFrameLowering.h" |
28 | #include "llvm/CodeGen/TargetOpcodes.h" |
29 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
30 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
31 | #include "llvm/InitializePasses.h" |
32 | #include "llvm/Pass.h" |
33 | #include "llvm/Support/Debug.h" |
34 | #include "llvm/Support/ErrorHandling.h" |
35 | #include "llvm/Support/raw_ostream.h" |
36 | #include <algorithm> |
37 | #include <cassert> |
38 | #include <cstdint> |
39 | #include <tuple> |
40 | |
41 | using namespace llvm; |
42 | |
43 | #define DEBUG_TYPE "localstackalloc" |
44 | |
45 | STATISTIC(NumAllocations, "Number of frame indices allocated into local block" ); |
46 | STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated" ); |
47 | STATISTIC(NumReplacements, "Number of frame indices references replaced" ); |
48 | |
49 | namespace { |
50 | |
51 | class FrameRef { |
52 | MachineBasicBlock::iterator MI; // Instr referencing the frame |
53 | int64_t LocalOffset; // Local offset of the frame idx referenced |
54 | int FrameIdx; // The frame index |
55 | |
56 | // Order reference instruction appears in program. Used to ensure |
57 | // deterministic order when multiple instructions may reference the same |
58 | // location. |
59 | unsigned Order; |
60 | |
61 | public: |
62 | FrameRef(MachineInstr *I, int64_t Offset, int Idx, unsigned Ord) : |
63 | MI(I), LocalOffset(Offset), FrameIdx(Idx), Order(Ord) {} |
64 | |
65 | bool operator<(const FrameRef &RHS) const { |
66 | return std::tie(args: LocalOffset, args: FrameIdx, args: Order) < |
67 | std::tie(args: RHS.LocalOffset, args: RHS.FrameIdx, args: RHS.Order); |
68 | } |
69 | |
70 | MachineBasicBlock::iterator getMachineInstr() const { return MI; } |
71 | int64_t getLocalOffset() const { return LocalOffset; } |
72 | int getFrameIndex() const { return FrameIdx; } |
73 | }; |
74 | |
75 | class LocalStackSlotImpl { |
76 | SmallVector<int64_t, 16> LocalOffsets; |
77 | |
78 | /// StackObjSet - A set of stack object indexes |
79 | using StackObjSet = SmallSetVector<int, 8>; |
80 | |
81 | void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset, |
82 | bool StackGrowsDown, Align &MaxAlign); |
83 | void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, |
84 | SmallSet<int, 16> &ProtectedObjs, |
85 | MachineFrameInfo &MFI, bool StackGrowsDown, |
86 | int64_t &Offset, Align &MaxAlign); |
87 | void calculateFrameObjectOffsets(MachineFunction &Fn); |
88 | bool insertFrameReferenceRegisters(MachineFunction &Fn); |
89 | |
90 | public: |
91 | bool runOnMachineFunction(MachineFunction &MF); |
92 | }; |
93 | |
94 | class LocalStackSlotPass : public MachineFunctionPass { |
95 | public: |
96 | static char ID; // Pass identification, replacement for typeid |
97 | |
98 | explicit LocalStackSlotPass() : MachineFunctionPass(ID) { |
99 | initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry()); |
100 | } |
101 | |
102 | bool runOnMachineFunction(MachineFunction &MF) override { |
103 | return LocalStackSlotImpl().runOnMachineFunction(MF); |
104 | } |
105 | |
106 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
107 | AU.setPreservesCFG(); |
108 | MachineFunctionPass::getAnalysisUsage(AU); |
109 | } |
110 | }; |
111 | |
112 | } // end anonymous namespace |
113 | |
114 | PreservedAnalyses |
115 | LocalStackSlotAllocationPass::run(MachineFunction &MF, |
116 | MachineFunctionAnalysisManager &) { |
117 | bool Changed = LocalStackSlotImpl().runOnMachineFunction(MF); |
118 | if (!Changed) |
119 | return PreservedAnalyses::all(); |
120 | auto PA = getMachineFunctionPassPreservedAnalyses(); |
121 | PA.preserveSet<CFGAnalyses>(); |
122 | return PA; |
123 | } |
124 | |
125 | char LocalStackSlotPass::ID = 0; |
126 | |
127 | char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; |
128 | INITIALIZE_PASS(LocalStackSlotPass, DEBUG_TYPE, |
129 | "Local Stack Slot Allocation" , false, false) |
130 | |
131 | bool LocalStackSlotImpl::runOnMachineFunction(MachineFunction &MF) { |
132 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
133 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
134 | unsigned LocalObjectCount = MFI.getObjectIndexEnd(); |
135 | |
136 | // If the target doesn't want/need this pass, or if there are no locals |
137 | // to consider, early exit. |
138 | if (LocalObjectCount == 0 || !TRI->requiresVirtualBaseRegisters(MF)) |
139 | return false; |
140 | |
141 | // Make sure we have enough space to store the local offsets. |
142 | LocalOffsets.resize(N: MFI.getObjectIndexEnd()); |
143 | |
144 | // Lay out the local blob. |
145 | calculateFrameObjectOffsets(Fn&: MF); |
146 | |
147 | // Insert virtual base registers to resolve frame index references. |
148 | bool UsedBaseRegs = insertFrameReferenceRegisters(Fn&: MF); |
149 | |
150 | // Tell MFI whether any base registers were allocated. PEI will only |
151 | // want to use the local block allocations from this pass if there were any. |
152 | // Otherwise, PEI can do a bit better job of getting the alignment right |
153 | // without a hole at the start since it knows the alignment of the stack |
154 | // at the start of local allocation, and this pass doesn't. |
155 | MFI.setUseLocalStackAllocationBlock(UsedBaseRegs); |
156 | |
157 | return true; |
158 | } |
159 | |
160 | /// AdjustStackOffset - Helper function used to adjust the stack frame offset. |
161 | void LocalStackSlotImpl::AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, |
162 | int64_t &Offset, bool StackGrowsDown, |
163 | Align &MaxAlign) { |
164 | // If the stack grows down, add the object size to find the lowest address. |
165 | if (StackGrowsDown) |
166 | Offset += MFI.getObjectSize(ObjectIdx: FrameIdx); |
167 | |
168 | Align Alignment = MFI.getObjectAlign(ObjectIdx: FrameIdx); |
169 | |
170 | // If the alignment of this object is greater than that of the stack, then |
171 | // increase the stack alignment to match. |
172 | MaxAlign = std::max(a: MaxAlign, b: Alignment); |
173 | |
174 | // Adjust to alignment boundary. |
175 | Offset = alignTo(Size: Offset, A: Alignment); |
176 | |
177 | int64_t LocalOffset = StackGrowsDown ? -Offset : Offset; |
178 | LLVM_DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " |
179 | << LocalOffset << "\n" ); |
180 | // Keep the offset available for base register allocation |
181 | LocalOffsets[FrameIdx] = LocalOffset; |
182 | // And tell MFI about it for PEI to use later |
183 | MFI.mapLocalFrameObject(ObjectIndex: FrameIdx, Offset: LocalOffset); |
184 | |
185 | if (!StackGrowsDown) |
186 | Offset += MFI.getObjectSize(ObjectIdx: FrameIdx); |
187 | |
188 | ++NumAllocations; |
189 | } |
190 | |
191 | /// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., |
192 | /// those required to be close to the Stack Protector) to stack offsets. |
193 | void LocalStackSlotImpl::AssignProtectedObjSet( |
194 | const StackObjSet &UnassignedObjs, SmallSet<int, 16> &ProtectedObjs, |
195 | MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset, |
196 | Align &MaxAlign) { |
197 | for (int i : UnassignedObjs) { |
198 | AdjustStackOffset(MFI, FrameIdx: i, Offset, StackGrowsDown, MaxAlign); |
199 | ProtectedObjs.insert(V: i); |
200 | } |
201 | } |
202 | |
203 | /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the |
204 | /// abstract stack objects. |
205 | void LocalStackSlotImpl::calculateFrameObjectOffsets(MachineFunction &Fn) { |
206 | // Loop over all of the stack objects, assigning sequential addresses... |
207 | MachineFrameInfo &MFI = Fn.getFrameInfo(); |
208 | const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); |
209 | bool StackGrowsDown = |
210 | TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; |
211 | int64_t Offset = 0; |
212 | Align MaxAlign; |
213 | |
214 | // Make sure that the stack protector comes before the local variables on the |
215 | // stack. |
216 | SmallSet<int, 16> ProtectedObjs; |
217 | if (MFI.hasStackProtectorIndex()) { |
218 | int StackProtectorFI = MFI.getStackProtectorIndex(); |
219 | |
220 | // We need to make sure we didn't pre-allocate the stack protector when |
221 | // doing this. |
222 | // If we already have a stack protector, this will re-assign it to a slot |
223 | // that is **not** covering the protected objects. |
224 | assert(!MFI.isObjectPreAllocated(StackProtectorFI) && |
225 | "Stack protector pre-allocated in LocalStackSlotAllocation" ); |
226 | |
227 | StackObjSet LargeArrayObjs; |
228 | StackObjSet SmallArrayObjs; |
229 | StackObjSet AddrOfObjs; |
230 | |
231 | // Only place the stack protector in the local stack area if the target |
232 | // allows it. |
233 | if (TFI.isStackIdSafeForLocalArea(StackId: MFI.getStackID(ObjectIdx: StackProtectorFI))) |
234 | AdjustStackOffset(MFI, FrameIdx: StackProtectorFI, Offset, StackGrowsDown, |
235 | MaxAlign); |
236 | |
237 | // Assign large stack objects first. |
238 | for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { |
239 | if (MFI.isDeadObjectIndex(ObjectIdx: i)) |
240 | continue; |
241 | if (StackProtectorFI == (int)i) |
242 | continue; |
243 | if (!TFI.isStackIdSafeForLocalArea(StackId: MFI.getStackID(ObjectIdx: i))) |
244 | continue; |
245 | |
246 | switch (MFI.getObjectSSPLayout(ObjectIdx: i)) { |
247 | case MachineFrameInfo::SSPLK_None: |
248 | continue; |
249 | case MachineFrameInfo::SSPLK_SmallArray: |
250 | SmallArrayObjs.insert(X: i); |
251 | continue; |
252 | case MachineFrameInfo::SSPLK_AddrOf: |
253 | AddrOfObjs.insert(X: i); |
254 | continue; |
255 | case MachineFrameInfo::SSPLK_LargeArray: |
256 | LargeArrayObjs.insert(X: i); |
257 | continue; |
258 | } |
259 | llvm_unreachable("Unexpected SSPLayoutKind." ); |
260 | } |
261 | |
262 | AssignProtectedObjSet(UnassignedObjs: LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, |
263 | Offset, MaxAlign); |
264 | AssignProtectedObjSet(UnassignedObjs: SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, |
265 | Offset, MaxAlign); |
266 | AssignProtectedObjSet(UnassignedObjs: AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, |
267 | Offset, MaxAlign); |
268 | } |
269 | |
270 | // Then assign frame offsets to stack objects that are not used to spill |
271 | // callee saved registers. |
272 | for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { |
273 | if (MFI.isDeadObjectIndex(ObjectIdx: i)) |
274 | continue; |
275 | if (MFI.getStackProtectorIndex() == (int)i) |
276 | continue; |
277 | if (ProtectedObjs.count(V: i)) |
278 | continue; |
279 | if (!TFI.isStackIdSafeForLocalArea(StackId: MFI.getStackID(ObjectIdx: i))) |
280 | continue; |
281 | |
282 | AdjustStackOffset(MFI, FrameIdx: i, Offset, StackGrowsDown, MaxAlign); |
283 | } |
284 | |
285 | // Remember how big this blob of stack space is |
286 | MFI.setLocalFrameSize(Offset); |
287 | MFI.setLocalFrameMaxAlign(MaxAlign); |
288 | } |
289 | |
290 | static inline bool |
291 | lookupCandidateBaseReg(unsigned BaseReg, |
292 | int64_t BaseOffset, |
293 | int64_t FrameSizeAdjust, |
294 | int64_t LocalFrameOffset, |
295 | const MachineInstr &MI, |
296 | const TargetRegisterInfo *TRI) { |
297 | // Check if the relative offset from the where the base register references |
298 | // to the target address is in range for the instruction. |
299 | int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset; |
300 | return TRI->isFrameOffsetLegal(MI: &MI, BaseReg, Offset); |
301 | } |
302 | |
303 | bool LocalStackSlotImpl::insertFrameReferenceRegisters(MachineFunction &Fn) { |
304 | // Scan the function's instructions looking for frame index references. |
305 | // For each, ask the target if it wants a virtual base register for it |
306 | // based on what we can tell it about where the local will end up in the |
307 | // stack frame. If it wants one, re-use a suitable one we've previously |
308 | // allocated, or if there isn't one that fits the bill, allocate a new one |
309 | // and ask the target to create a defining instruction for it. |
310 | |
311 | MachineFrameInfo &MFI = Fn.getFrameInfo(); |
312 | const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); |
313 | const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); |
314 | bool StackGrowsDown = |
315 | TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; |
316 | |
317 | // Collect all of the instructions in the block that reference |
318 | // a frame index. Also store the frame index referenced to ease later |
319 | // lookup. (For any insn that has more than one FI reference, we arbitrarily |
320 | // choose the first one). |
321 | SmallVector<FrameRef, 64> FrameReferenceInsns; |
322 | |
323 | unsigned Order = 0; |
324 | |
325 | for (MachineBasicBlock &BB : Fn) { |
326 | for (MachineInstr &MI : BB) { |
327 | // Debug value, stackmap and patchpoint instructions can't be out of |
328 | // range, so they don't need any updates. |
329 | if (MI.isDebugInstr() || MI.getOpcode() == TargetOpcode::STATEPOINT || |
330 | MI.getOpcode() == TargetOpcode::STACKMAP || |
331 | MI.getOpcode() == TargetOpcode::PATCHPOINT) |
332 | continue; |
333 | |
334 | // For now, allocate the base register(s) within the basic block |
335 | // where they're used, and don't try to keep them around outside |
336 | // of that. It may be beneficial to try sharing them more broadly |
337 | // than that, but the increased register pressure makes that a |
338 | // tricky thing to balance. Investigate if re-materializing these |
339 | // becomes an issue. |
340 | for (const MachineOperand &MO : MI.operands()) { |
341 | // Consider replacing all frame index operands that reference |
342 | // an object allocated in the local block. |
343 | if (MO.isFI()) { |
344 | // Don't try this with values not in the local block. |
345 | if (!MFI.isObjectPreAllocated(ObjectIdx: MO.getIndex())) |
346 | break; |
347 | int Idx = MO.getIndex(); |
348 | int64_t LocalOffset = LocalOffsets[Idx]; |
349 | if (!TRI->needsFrameBaseReg(MI: &MI, Offset: LocalOffset)) |
350 | break; |
351 | FrameReferenceInsns.push_back(Elt: FrameRef(&MI, LocalOffset, Idx, Order++)); |
352 | break; |
353 | } |
354 | } |
355 | } |
356 | } |
357 | |
358 | // Sort the frame references by local offset. |
359 | // Use frame index as a tie-breaker in case MI's have the same offset. |
360 | llvm::sort(C&: FrameReferenceInsns); |
361 | |
362 | MachineBasicBlock *Entry = &Fn.front(); |
363 | |
364 | Register BaseReg; |
365 | int64_t BaseOffset = 0; |
366 | |
367 | // Loop through the frame references and allocate for them as necessary. |
368 | for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) { |
369 | FrameRef &FR = FrameReferenceInsns[ref]; |
370 | MachineInstr &MI = *FR.getMachineInstr(); |
371 | int64_t LocalOffset = FR.getLocalOffset(); |
372 | int FrameIdx = FR.getFrameIndex(); |
373 | assert(MFI.isObjectPreAllocated(FrameIdx) && |
374 | "Only pre-allocated locals expected!" ); |
375 | |
376 | // We need to keep the references to the stack protector slot through frame |
377 | // index operands so that it gets resolved by PEI rather than this pass. |
378 | // This avoids accesses to the stack protector though virtual base |
379 | // registers, and forces PEI to address it using fp/sp/bp. |
380 | if (MFI.hasStackProtectorIndex() && |
381 | FrameIdx == MFI.getStackProtectorIndex()) |
382 | continue; |
383 | |
384 | LLVM_DEBUG(dbgs() << "Considering: " << MI); |
385 | |
386 | unsigned idx = 0; |
387 | for (unsigned f = MI.getNumOperands(); idx != f; ++idx) { |
388 | if (!MI.getOperand(i: idx).isFI()) |
389 | continue; |
390 | |
391 | if (FrameIdx == MI.getOperand(i: idx).getIndex()) |
392 | break; |
393 | } |
394 | |
395 | assert(idx < MI.getNumOperands() && "Cannot find FI operand" ); |
396 | |
397 | int64_t Offset = 0; |
398 | int64_t FrameSizeAdjust = StackGrowsDown ? MFI.getLocalFrameSize() : 0; |
399 | |
400 | LLVM_DEBUG(dbgs() << " Replacing FI in: " << MI); |
401 | |
402 | // If we have a suitable base register available, use it; otherwise |
403 | // create a new one. Note that any offset encoded in the |
404 | // instruction itself will be taken into account by the target, |
405 | // so we don't have to adjust for it here when reusing a base |
406 | // register. |
407 | if (BaseReg.isValid() && |
408 | lookupCandidateBaseReg(BaseReg, BaseOffset, FrameSizeAdjust, |
409 | LocalFrameOffset: LocalOffset, MI, TRI)) { |
410 | LLVM_DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n" ); |
411 | // We found a register to reuse. |
412 | Offset = FrameSizeAdjust + LocalOffset - BaseOffset; |
413 | } else { |
414 | // No previously defined register was in range, so create a new one. |
415 | int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI: &MI, Idx: idx); |
416 | |
417 | int64_t CandBaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset; |
418 | |
419 | // We'd like to avoid creating single-use virtual base registers. |
420 | // Because the FrameRefs are in sorted order, and we've already |
421 | // processed all FrameRefs before this one, just check whether or not |
422 | // the next FrameRef will be able to reuse this new register. If not, |
423 | // then don't bother creating it. |
424 | if (ref + 1 >= e || |
425 | !lookupCandidateBaseReg( |
426 | BaseReg, BaseOffset: CandBaseOffset, FrameSizeAdjust, |
427 | LocalFrameOffset: FrameReferenceInsns[ref + 1].getLocalOffset(), |
428 | MI: *FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) |
429 | continue; |
430 | |
431 | // Save the base offset. |
432 | BaseOffset = CandBaseOffset; |
433 | |
434 | // Tell the target to insert the instruction to initialize |
435 | // the base register. |
436 | // MachineBasicBlock::iterator InsertionPt = Entry->begin(); |
437 | BaseReg = TRI->materializeFrameBaseRegister(MBB: Entry, FrameIdx, Offset: InstrOffset); |
438 | |
439 | LLVM_DEBUG(dbgs() << " Materialized base register at frame local offset " |
440 | << LocalOffset + InstrOffset |
441 | << " into " << printReg(BaseReg, TRI) << '\n'); |
442 | |
443 | // The base register already includes any offset specified |
444 | // by the instruction, so account for that so it doesn't get |
445 | // applied twice. |
446 | Offset = -InstrOffset; |
447 | |
448 | ++NumBaseRegisters; |
449 | } |
450 | assert(BaseReg && "Unable to allocate virtual base register!" ); |
451 | |
452 | // Modify the instruction to use the new base register rather |
453 | // than the frame index operand. |
454 | TRI->resolveFrameIndex(MI, BaseReg, Offset); |
455 | LLVM_DEBUG(dbgs() << "Resolved: " << MI); |
456 | |
457 | ++NumReplacements; |
458 | } |
459 | |
460 | return BaseReg.isValid(); |
461 | } |
462 | |