1 | //===----- X86DynAllocaExpander.cpp - Expand DynAlloca pseudo instruction -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a pass that expands DynAlloca pseudo-instructions. |
10 | // |
11 | // It performs a conservative analysis to determine whether each allocation |
12 | // falls within a region of the stack that is safe to use, or whether stack |
13 | // probes must be emitted. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "X86.h" |
18 | #include "X86InstrInfo.h" |
19 | #include "X86MachineFunctionInfo.h" |
20 | #include "X86Subtarget.h" |
21 | #include "llvm/ADT/MapVector.h" |
22 | #include "llvm/ADT/PostOrderIterator.h" |
23 | #include "llvm/CodeGen/MachineFunctionPass.h" |
24 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
25 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
26 | #include "llvm/CodeGen/Passes.h" |
27 | #include "llvm/CodeGen/TargetInstrInfo.h" |
28 | #include "llvm/IR/Function.h" |
29 | |
30 | using namespace llvm; |
31 | |
32 | namespace { |
33 | |
34 | class X86DynAllocaExpander : public MachineFunctionPass { |
35 | public: |
36 | X86DynAllocaExpander() : MachineFunctionPass(ID) {} |
37 | |
38 | bool runOnMachineFunction(MachineFunction &MF) override; |
39 | |
40 | private: |
41 | /// Strategies for lowering a DynAlloca. |
42 | enum Lowering { TouchAndSub, Sub, Probe }; |
43 | |
44 | /// Deterministic-order map from DynAlloca instruction to desired lowering. |
45 | typedef MapVector<MachineInstr*, Lowering> LoweringMap; |
46 | |
47 | /// Compute which lowering to use for each DynAlloca instruction. |
48 | void computeLowerings(MachineFunction &MF, LoweringMap& Lowerings); |
49 | |
50 | /// Get the appropriate lowering based on current offset and amount. |
51 | Lowering getLowering(int64_t CurrentOffset, int64_t AllocaAmount); |
52 | |
53 | /// Lower a DynAlloca instruction. |
54 | void lower(MachineInstr* MI, Lowering L); |
55 | |
56 | MachineRegisterInfo *MRI = nullptr; |
57 | const X86Subtarget *STI = nullptr; |
58 | const TargetInstrInfo *TII = nullptr; |
59 | const X86RegisterInfo *TRI = nullptr; |
60 | Register StackPtr; |
61 | unsigned SlotSize = 0; |
62 | int64_t StackProbeSize = 0; |
63 | bool NoStackArgProbe = false; |
64 | |
65 | StringRef getPassName() const override { return "X86 DynAlloca Expander" ; } |
66 | |
67 | public: |
68 | static char ID; |
69 | }; |
70 | |
71 | char X86DynAllocaExpander::ID = 0; |
72 | |
73 | } // end anonymous namespace |
74 | |
75 | INITIALIZE_PASS(X86DynAllocaExpander, "x86-dyn-alloca-expander" , |
76 | "X86 DynAlloca Expander" , false, false) |
77 | |
78 | FunctionPass *llvm::createX86DynAllocaExpander() { |
79 | return new X86DynAllocaExpander(); |
80 | } |
81 | |
82 | /// Return the allocation amount for a DynAlloca instruction, or -1 if unknown. |
83 | static int64_t getDynAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) { |
84 | assert(MI->getOpcode() == X86::DYN_ALLOCA_32 || |
85 | MI->getOpcode() == X86::DYN_ALLOCA_64); |
86 | assert(MI->getOperand(0).isReg()); |
87 | |
88 | Register AmountReg = MI->getOperand(i: 0).getReg(); |
89 | MachineInstr *Def = MRI->getUniqueVRegDef(Reg: AmountReg); |
90 | |
91 | if (!Def || |
92 | (Def->getOpcode() != X86::MOV32ri && Def->getOpcode() != X86::MOV64ri) || |
93 | !Def->getOperand(i: 1).isImm()) |
94 | return -1; |
95 | |
96 | return Def->getOperand(i: 1).getImm(); |
97 | } |
98 | |
99 | X86DynAllocaExpander::Lowering |
100 | X86DynAllocaExpander::getLowering(int64_t CurrentOffset, |
101 | int64_t AllocaAmount) { |
102 | // For a non-constant amount or a large amount, we have to probe. |
103 | if (AllocaAmount < 0 || AllocaAmount > StackProbeSize) |
104 | return Probe; |
105 | |
106 | // If it fits within the safe region of the stack, just subtract. |
107 | if (CurrentOffset + AllocaAmount <= StackProbeSize) |
108 | return Sub; |
109 | |
110 | // Otherwise, touch the current tip of the stack, then subtract. |
111 | return TouchAndSub; |
112 | } |
113 | |
114 | static bool isPushPop(const MachineInstr &MI) { |
115 | switch (MI.getOpcode()) { |
116 | case X86::PUSH32r: |
117 | case X86::PUSH32rmm: |
118 | case X86::PUSH32rmr: |
119 | case X86::PUSH32i: |
120 | case X86::PUSH64r: |
121 | case X86::PUSH64rmm: |
122 | case X86::PUSH64rmr: |
123 | case X86::PUSH64i32: |
124 | case X86::POP32r: |
125 | case X86::POP64r: |
126 | return true; |
127 | default: |
128 | return false; |
129 | } |
130 | } |
131 | |
132 | void X86DynAllocaExpander::computeLowerings(MachineFunction &MF, |
133 | LoweringMap &Lowerings) { |
134 | // Do a one-pass reverse post-order walk of the CFG to conservatively estimate |
135 | // the offset between the stack pointer and the lowest touched part of the |
136 | // stack, and use that to decide how to lower each DynAlloca instruction. |
137 | |
138 | // Initialize OutOffset[B], the stack offset at exit from B, to something big. |
139 | DenseMap<MachineBasicBlock *, int64_t> OutOffset; |
140 | for (MachineBasicBlock &MBB : MF) |
141 | OutOffset[&MBB] = INT32_MAX; |
142 | |
143 | // Note: we don't know the offset at the start of the entry block since the |
144 | // prologue hasn't been inserted yet, and how much that will adjust the stack |
145 | // pointer depends on register spills, which have not been computed yet. |
146 | |
147 | // Compute the reverse post-order. |
148 | ReversePostOrderTraversal<MachineFunction*> RPO(&MF); |
149 | |
150 | for (MachineBasicBlock *MBB : RPO) { |
151 | int64_t Offset = -1; |
152 | for (MachineBasicBlock *Pred : MBB->predecessors()) |
153 | Offset = std::max(a: Offset, b: OutOffset[Pred]); |
154 | if (Offset == -1) Offset = INT32_MAX; |
155 | |
156 | for (MachineInstr &MI : *MBB) { |
157 | if (MI.getOpcode() == X86::DYN_ALLOCA_32 || |
158 | MI.getOpcode() == X86::DYN_ALLOCA_64) { |
159 | // A DynAlloca moves StackPtr, and potentially touches it. |
160 | int64_t Amount = getDynAllocaAmount(MI: &MI, MRI); |
161 | Lowering L = getLowering(CurrentOffset: Offset, AllocaAmount: Amount); |
162 | Lowerings[&MI] = L; |
163 | switch (L) { |
164 | case Sub: |
165 | Offset += Amount; |
166 | break; |
167 | case TouchAndSub: |
168 | Offset = Amount; |
169 | break; |
170 | case Probe: |
171 | Offset = 0; |
172 | break; |
173 | } |
174 | } else if (MI.isCall() || isPushPop(MI)) { |
175 | // Calls, pushes and pops touch the tip of the stack. |
176 | Offset = 0; |
177 | } else if (MI.getOpcode() == X86::ADJCALLSTACKUP32 || |
178 | MI.getOpcode() == X86::ADJCALLSTACKUP64) { |
179 | Offset -= MI.getOperand(i: 0).getImm(); |
180 | } else if (MI.getOpcode() == X86::ADJCALLSTACKDOWN32 || |
181 | MI.getOpcode() == X86::ADJCALLSTACKDOWN64) { |
182 | Offset += MI.getOperand(i: 0).getImm(); |
183 | } else if (MI.modifiesRegister(Reg: StackPtr, TRI)) { |
184 | // Any other modification of SP means we've lost track of it. |
185 | Offset = INT32_MAX; |
186 | } |
187 | } |
188 | |
189 | OutOffset[MBB] = Offset; |
190 | } |
191 | } |
192 | |
193 | static unsigned getSubOpcode(bool Is64Bit) { |
194 | if (Is64Bit) |
195 | return X86::SUB64ri32; |
196 | return X86::SUB32ri; |
197 | } |
198 | |
199 | void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) { |
200 | const DebugLoc &DL = MI->getDebugLoc(); |
201 | MachineBasicBlock *MBB = MI->getParent(); |
202 | MachineBasicBlock::iterator I = *MI; |
203 | |
204 | int64_t Amount = getDynAllocaAmount(MI, MRI); |
205 | if (Amount == 0) { |
206 | MI->eraseFromParent(); |
207 | return; |
208 | } |
209 | |
210 | // These two variables differ on x32, which is a 64-bit target with a |
211 | // 32-bit alloca. |
212 | bool Is64Bit = STI->is64Bit(); |
213 | bool Is64BitAlloca = MI->getOpcode() == X86::DYN_ALLOCA_64; |
214 | assert(SlotSize == 4 || SlotSize == 8); |
215 | |
216 | std::optional<MachineFunction::DebugInstrOperandPair> InstrNum; |
217 | if (unsigned Num = MI->peekDebugInstrNum()) { |
218 | // Operand 2 of DYN_ALLOCAs contains the stack def. |
219 | InstrNum = {Num, 2}; |
220 | } |
221 | |
222 | switch (L) { |
223 | case TouchAndSub: { |
224 | assert(Amount >= SlotSize); |
225 | |
226 | // Use a push to touch the top of the stack. |
227 | unsigned RegA = Is64Bit ? X86::RAX : X86::EAX; |
228 | BuildMI(BB&: *MBB, I, MIMD: DL, MCID: TII->get(Opcode: Is64Bit ? X86::PUSH64r : X86::PUSH32r)) |
229 | .addReg(RegNo: RegA, flags: RegState::Undef); |
230 | Amount -= SlotSize; |
231 | if (!Amount) |
232 | break; |
233 | |
234 | // Fall through to make any remaining adjustment. |
235 | [[fallthrough]]; |
236 | } |
237 | case Sub: |
238 | assert(Amount > 0); |
239 | if (Amount == SlotSize) { |
240 | // Use push to save size. |
241 | unsigned RegA = Is64Bit ? X86::RAX : X86::EAX; |
242 | BuildMI(BB&: *MBB, I, MIMD: DL, MCID: TII->get(Opcode: Is64Bit ? X86::PUSH64r : X86::PUSH32r)) |
243 | .addReg(RegNo: RegA, flags: RegState::Undef); |
244 | } else { |
245 | // Sub. |
246 | BuildMI(BB&: *MBB, I, MIMD: DL, MCID: TII->get(Opcode: getSubOpcode(Is64Bit: Is64BitAlloca)), DestReg: StackPtr) |
247 | .addReg(RegNo: StackPtr) |
248 | .addImm(Val: Amount); |
249 | } |
250 | break; |
251 | case Probe: |
252 | if (!NoStackArgProbe) { |
253 | // The probe lowering expects the amount in RAX/EAX. |
254 | unsigned RegA = Is64BitAlloca ? X86::RAX : X86::EAX; |
255 | BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: RegA) |
256 | .addReg(RegNo: MI->getOperand(i: 0).getReg()); |
257 | |
258 | // Do the probe. |
259 | STI->getFrameLowering()->emitStackProbe(MF&: *MBB->getParent(), MBB&: *MBB, MBBI: MI, DL, |
260 | /*InProlog=*/false, InstrNum); |
261 | } else { |
262 | // Sub |
263 | BuildMI(BB&: *MBB, I, MIMD: DL, |
264 | MCID: TII->get(Opcode: Is64BitAlloca ? X86::SUB64rr : X86::SUB32rr), DestReg: StackPtr) |
265 | .addReg(RegNo: StackPtr) |
266 | .addReg(RegNo: MI->getOperand(i: 0).getReg()); |
267 | } |
268 | break; |
269 | } |
270 | |
271 | Register AmountReg = MI->getOperand(i: 0).getReg(); |
272 | MI->eraseFromParent(); |
273 | |
274 | // Delete the definition of AmountReg. |
275 | if (MRI->use_empty(RegNo: AmountReg)) |
276 | if (MachineInstr *AmountDef = MRI->getUniqueVRegDef(Reg: AmountReg)) |
277 | AmountDef->eraseFromParent(); |
278 | } |
279 | |
280 | bool X86DynAllocaExpander::runOnMachineFunction(MachineFunction &MF) { |
281 | if (!MF.getInfo<X86MachineFunctionInfo>()->hasDynAlloca()) |
282 | return false; |
283 | |
284 | MRI = &MF.getRegInfo(); |
285 | STI = &MF.getSubtarget<X86Subtarget>(); |
286 | TII = STI->getInstrInfo(); |
287 | TRI = STI->getRegisterInfo(); |
288 | StackPtr = TRI->getStackRegister(); |
289 | SlotSize = TRI->getSlotSize(); |
290 | StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF); |
291 | NoStackArgProbe = MF.getFunction().hasFnAttribute(Kind: "no-stack-arg-probe" ); |
292 | if (NoStackArgProbe) |
293 | StackProbeSize = INT64_MAX; |
294 | |
295 | LoweringMap Lowerings; |
296 | computeLowerings(MF, Lowerings); |
297 | for (auto &P : Lowerings) |
298 | lower(MI: P.first, L: P.second); |
299 | |
300 | return true; |
301 | } |
302 | |