1 | //===----- X86DynAllocaExpander.cpp - Expand DynAlloca pseudo instruction -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a pass that expands DynAlloca pseudo-instructions. |
10 | // |
11 | // It performs a conservative analysis to determine whether each allocation |
12 | // falls within a region of the stack that is safe to use, or whether stack |
13 | // probes must be emitted. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "X86.h" |
18 | #include "X86InstrBuilder.h" |
19 | #include "X86InstrInfo.h" |
20 | #include "X86MachineFunctionInfo.h" |
21 | #include "X86Subtarget.h" |
22 | #include "llvm/ADT/MapVector.h" |
23 | #include "llvm/ADT/PostOrderIterator.h" |
24 | #include "llvm/CodeGen/MachineFunctionPass.h" |
25 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
26 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
27 | #include "llvm/CodeGen/Passes.h" |
28 | #include "llvm/CodeGen/TargetInstrInfo.h" |
29 | #include "llvm/IR/Function.h" |
30 | #include "llvm/Support/raw_ostream.h" |
31 | |
32 | using namespace llvm; |
33 | |
34 | namespace { |
35 | |
36 | class X86DynAllocaExpander : public MachineFunctionPass { |
37 | public: |
38 | X86DynAllocaExpander() : MachineFunctionPass(ID) {} |
39 | |
40 | bool runOnMachineFunction(MachineFunction &MF) override; |
41 | |
42 | private: |
43 | /// Strategies for lowering a DynAlloca. |
44 | enum Lowering { TouchAndSub, Sub, Probe }; |
45 | |
46 | /// Deterministic-order map from DynAlloca instruction to desired lowering. |
47 | typedef MapVector<MachineInstr*, Lowering> LoweringMap; |
48 | |
49 | /// Compute which lowering to use for each DynAlloca instruction. |
50 | void computeLowerings(MachineFunction &MF, LoweringMap& Lowerings); |
51 | |
52 | /// Get the appropriate lowering based on current offset and amount. |
53 | Lowering getLowering(int64_t CurrentOffset, int64_t AllocaAmount); |
54 | |
55 | /// Lower a DynAlloca instruction. |
56 | void lower(MachineInstr* MI, Lowering L); |
57 | |
58 | MachineRegisterInfo *MRI = nullptr; |
59 | const X86Subtarget *STI = nullptr; |
60 | const TargetInstrInfo *TII = nullptr; |
61 | const X86RegisterInfo *TRI = nullptr; |
62 | unsigned StackPtr = 0; |
63 | unsigned SlotSize = 0; |
64 | int64_t StackProbeSize = 0; |
65 | bool NoStackArgProbe = false; |
66 | |
67 | StringRef getPassName() const override { return "X86 DynAlloca Expander" ; } |
68 | static char ID; |
69 | }; |
70 | |
71 | char X86DynAllocaExpander::ID = 0; |
72 | |
73 | } // end anonymous namespace |
74 | |
75 | FunctionPass *llvm::createX86DynAllocaExpander() { |
76 | return new X86DynAllocaExpander(); |
77 | } |
78 | |
79 | /// Return the allocation amount for a DynAlloca instruction, or -1 if unknown. |
80 | static int64_t getDynAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) { |
81 | assert(MI->getOpcode() == X86::DYN_ALLOCA_32 || |
82 | MI->getOpcode() == X86::DYN_ALLOCA_64); |
83 | assert(MI->getOperand(0).isReg()); |
84 | |
85 | Register AmountReg = MI->getOperand(i: 0).getReg(); |
86 | MachineInstr *Def = MRI->getUniqueVRegDef(Reg: AmountReg); |
87 | |
88 | if (!Def || |
89 | (Def->getOpcode() != X86::MOV32ri && Def->getOpcode() != X86::MOV64ri) || |
90 | !Def->getOperand(i: 1).isImm()) |
91 | return -1; |
92 | |
93 | return Def->getOperand(i: 1).getImm(); |
94 | } |
95 | |
96 | X86DynAllocaExpander::Lowering |
97 | X86DynAllocaExpander::getLowering(int64_t CurrentOffset, |
98 | int64_t AllocaAmount) { |
99 | // For a non-constant amount or a large amount, we have to probe. |
100 | if (AllocaAmount < 0 || AllocaAmount > StackProbeSize) |
101 | return Probe; |
102 | |
103 | // If it fits within the safe region of the stack, just subtract. |
104 | if (CurrentOffset + AllocaAmount <= StackProbeSize) |
105 | return Sub; |
106 | |
107 | // Otherwise, touch the current tip of the stack, then subtract. |
108 | return TouchAndSub; |
109 | } |
110 | |
111 | static bool isPushPop(const MachineInstr &MI) { |
112 | switch (MI.getOpcode()) { |
113 | case X86::PUSH32r: |
114 | case X86::PUSH32rmm: |
115 | case X86::PUSH32rmr: |
116 | case X86::PUSH32i: |
117 | case X86::PUSH64r: |
118 | case X86::PUSH64rmm: |
119 | case X86::PUSH64rmr: |
120 | case X86::PUSH64i32: |
121 | case X86::POP32r: |
122 | case X86::POP64r: |
123 | return true; |
124 | default: |
125 | return false; |
126 | } |
127 | } |
128 | |
129 | void X86DynAllocaExpander::computeLowerings(MachineFunction &MF, |
130 | LoweringMap &Lowerings) { |
131 | // Do a one-pass reverse post-order walk of the CFG to conservatively estimate |
132 | // the offset between the stack pointer and the lowest touched part of the |
133 | // stack, and use that to decide how to lower each DynAlloca instruction. |
134 | |
135 | // Initialize OutOffset[B], the stack offset at exit from B, to something big. |
136 | DenseMap<MachineBasicBlock *, int64_t> OutOffset; |
137 | for (MachineBasicBlock &MBB : MF) |
138 | OutOffset[&MBB] = INT32_MAX; |
139 | |
140 | // Note: we don't know the offset at the start of the entry block since the |
141 | // prologue hasn't been inserted yet, and how much that will adjust the stack |
142 | // pointer depends on register spills, which have not been computed yet. |
143 | |
144 | // Compute the reverse post-order. |
145 | ReversePostOrderTraversal<MachineFunction*> RPO(&MF); |
146 | |
147 | for (MachineBasicBlock *MBB : RPO) { |
148 | int64_t Offset = -1; |
149 | for (MachineBasicBlock *Pred : MBB->predecessors()) |
150 | Offset = std::max(a: Offset, b: OutOffset[Pred]); |
151 | if (Offset == -1) Offset = INT32_MAX; |
152 | |
153 | for (MachineInstr &MI : *MBB) { |
154 | if (MI.getOpcode() == X86::DYN_ALLOCA_32 || |
155 | MI.getOpcode() == X86::DYN_ALLOCA_64) { |
156 | // A DynAlloca moves StackPtr, and potentially touches it. |
157 | int64_t Amount = getDynAllocaAmount(MI: &MI, MRI); |
158 | Lowering L = getLowering(CurrentOffset: Offset, AllocaAmount: Amount); |
159 | Lowerings[&MI] = L; |
160 | switch (L) { |
161 | case Sub: |
162 | Offset += Amount; |
163 | break; |
164 | case TouchAndSub: |
165 | Offset = Amount; |
166 | break; |
167 | case Probe: |
168 | Offset = 0; |
169 | break; |
170 | } |
171 | } else if (MI.isCall() || isPushPop(MI)) { |
172 | // Calls, pushes and pops touch the tip of the stack. |
173 | Offset = 0; |
174 | } else if (MI.getOpcode() == X86::ADJCALLSTACKUP32 || |
175 | MI.getOpcode() == X86::ADJCALLSTACKUP64) { |
176 | Offset -= MI.getOperand(i: 0).getImm(); |
177 | } else if (MI.getOpcode() == X86::ADJCALLSTACKDOWN32 || |
178 | MI.getOpcode() == X86::ADJCALLSTACKDOWN64) { |
179 | Offset += MI.getOperand(i: 0).getImm(); |
180 | } else if (MI.modifiesRegister(Reg: StackPtr, TRI)) { |
181 | // Any other modification of SP means we've lost track of it. |
182 | Offset = INT32_MAX; |
183 | } |
184 | } |
185 | |
186 | OutOffset[MBB] = Offset; |
187 | } |
188 | } |
189 | |
190 | static unsigned getSubOpcode(bool Is64Bit) { |
191 | if (Is64Bit) |
192 | return X86::SUB64ri32; |
193 | return X86::SUB32ri; |
194 | } |
195 | |
196 | void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) { |
197 | const DebugLoc &DL = MI->getDebugLoc(); |
198 | MachineBasicBlock *MBB = MI->getParent(); |
199 | MachineBasicBlock::iterator I = *MI; |
200 | |
201 | int64_t Amount = getDynAllocaAmount(MI, MRI); |
202 | if (Amount == 0) { |
203 | MI->eraseFromParent(); |
204 | return; |
205 | } |
206 | |
207 | // These two variables differ on x32, which is a 64-bit target with a |
208 | // 32-bit alloca. |
209 | bool Is64Bit = STI->is64Bit(); |
210 | bool Is64BitAlloca = MI->getOpcode() == X86::DYN_ALLOCA_64; |
211 | assert(SlotSize == 4 || SlotSize == 8); |
212 | |
213 | std::optional<MachineFunction::DebugInstrOperandPair> InstrNum; |
214 | if (unsigned Num = MI->peekDebugInstrNum()) { |
215 | // Operand 2 of DYN_ALLOCAs contains the stack def. |
216 | InstrNum = {Num, 2}; |
217 | } |
218 | |
219 | switch (L) { |
220 | case TouchAndSub: { |
221 | assert(Amount >= SlotSize); |
222 | |
223 | // Use a push to touch the top of the stack. |
224 | unsigned RegA = Is64Bit ? X86::RAX : X86::EAX; |
225 | BuildMI(BB&: *MBB, I, MIMD: DL, MCID: TII->get(Opcode: Is64Bit ? X86::PUSH64r : X86::PUSH32r)) |
226 | .addReg(RegNo: RegA, flags: RegState::Undef); |
227 | Amount -= SlotSize; |
228 | if (!Amount) |
229 | break; |
230 | |
231 | // Fall through to make any remaining adjustment. |
232 | [[fallthrough]]; |
233 | } |
234 | case Sub: |
235 | assert(Amount > 0); |
236 | if (Amount == SlotSize) { |
237 | // Use push to save size. |
238 | unsigned RegA = Is64Bit ? X86::RAX : X86::EAX; |
239 | BuildMI(BB&: *MBB, I, MIMD: DL, MCID: TII->get(Opcode: Is64Bit ? X86::PUSH64r : X86::PUSH32r)) |
240 | .addReg(RegNo: RegA, flags: RegState::Undef); |
241 | } else { |
242 | // Sub. |
243 | BuildMI(BB&: *MBB, I, MIMD: DL, MCID: TII->get(Opcode: getSubOpcode(Is64Bit: Is64BitAlloca)), DestReg: StackPtr) |
244 | .addReg(RegNo: StackPtr) |
245 | .addImm(Val: Amount); |
246 | } |
247 | break; |
248 | case Probe: |
249 | if (!NoStackArgProbe) { |
250 | // The probe lowering expects the amount in RAX/EAX. |
251 | unsigned RegA = Is64BitAlloca ? X86::RAX : X86::EAX; |
252 | BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: RegA) |
253 | .addReg(RegNo: MI->getOperand(i: 0).getReg()); |
254 | |
255 | // Do the probe. |
256 | STI->getFrameLowering()->emitStackProbe(MF&: *MBB->getParent(), MBB&: *MBB, MBBI: MI, DL, |
257 | /*InProlog=*/false, InstrNum); |
258 | } else { |
259 | // Sub |
260 | BuildMI(BB&: *MBB, I, MIMD: DL, |
261 | MCID: TII->get(Opcode: Is64BitAlloca ? X86::SUB64rr : X86::SUB32rr), DestReg: StackPtr) |
262 | .addReg(RegNo: StackPtr) |
263 | .addReg(RegNo: MI->getOperand(i: 0).getReg()); |
264 | } |
265 | break; |
266 | } |
267 | |
268 | Register AmountReg = MI->getOperand(i: 0).getReg(); |
269 | MI->eraseFromParent(); |
270 | |
271 | // Delete the definition of AmountReg. |
272 | if (MRI->use_empty(RegNo: AmountReg)) |
273 | if (MachineInstr *AmountDef = MRI->getUniqueVRegDef(Reg: AmountReg)) |
274 | AmountDef->eraseFromParent(); |
275 | } |
276 | |
277 | bool X86DynAllocaExpander::runOnMachineFunction(MachineFunction &MF) { |
278 | if (!MF.getInfo<X86MachineFunctionInfo>()->hasDynAlloca()) |
279 | return false; |
280 | |
281 | MRI = &MF.getRegInfo(); |
282 | STI = &MF.getSubtarget<X86Subtarget>(); |
283 | TII = STI->getInstrInfo(); |
284 | TRI = STI->getRegisterInfo(); |
285 | StackPtr = TRI->getStackRegister(); |
286 | SlotSize = TRI->getSlotSize(); |
287 | StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF); |
288 | NoStackArgProbe = MF.getFunction().hasFnAttribute(Kind: "no-stack-arg-probe" ); |
289 | if (NoStackArgProbe) |
290 | StackProbeSize = INT64_MAX; |
291 | |
292 | LoweringMap Lowerings; |
293 | computeLowerings(MF, Lowerings); |
294 | for (auto &P : Lowerings) |
295 | lower(MI: P.first, L: P.second); |
296 | |
297 | return true; |
298 | } |
299 | |