1 | //===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the Thumb1 implementation of TargetFrameLowering class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "Thumb1FrameLowering.h" |
14 | #include "ARMBaseInstrInfo.h" |
15 | #include "ARMBaseRegisterInfo.h" |
16 | #include "ARMMachineFunctionInfo.h" |
17 | #include "ARMSubtarget.h" |
18 | #include "Thumb1InstrInfo.h" |
19 | #include "ThumbRegisterInfo.h" |
20 | #include "Utils/ARMBaseInfo.h" |
21 | #include "llvm/ADT/BitVector.h" |
22 | #include "llvm/ADT/STLExtras.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/CodeGen/CFIInstBuilder.h" |
25 | #include "llvm/CodeGen/LivePhysRegs.h" |
26 | #include "llvm/CodeGen/MachineBasicBlock.h" |
27 | #include "llvm/CodeGen/MachineFrameInfo.h" |
28 | #include "llvm/CodeGen/MachineFunction.h" |
29 | #include "llvm/CodeGen/MachineInstr.h" |
30 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
31 | #include "llvm/CodeGen/MachineModuleInfo.h" |
32 | #include "llvm/CodeGen/MachineOperand.h" |
33 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
34 | #include "llvm/CodeGen/TargetInstrInfo.h" |
35 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
36 | #include "llvm/IR/DebugLoc.h" |
37 | #include "llvm/Support/Compiler.h" |
38 | #include "llvm/Support/ErrorHandling.h" |
39 | #include <cassert> |
40 | #include <iterator> |
41 | #include <vector> |
42 | |
43 | #define DEBUG_TYPE "arm-frame-lowering" |
44 | |
45 | using namespace llvm; |
46 | |
47 | Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) |
48 | : ARMFrameLowering(sti) {} |
49 | |
50 | bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ |
51 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
52 | unsigned CFSize = MFI.getMaxCallFrameSize(); |
53 | // It's not always a good idea to include the call frame as part of the |
54 | // stack frame. ARM (especially Thumb) has small immediate offset to |
55 | // address the stack frame. So a large call frame can cause poor codegen |
56 | // and may even makes it impossible to scavenge a register. |
57 | if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 |
58 | return false; |
59 | |
60 | return !MFI.hasVarSizedObjects(); |
61 | } |
62 | |
63 | static void |
64 | emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB, |
65 | MachineBasicBlock::iterator &MBBI, |
66 | const TargetInstrInfo &TII, const DebugLoc &dl, |
67 | const ThumbRegisterInfo &MRI, int NumBytes, |
68 | unsigned ScratchReg, unsigned MIFlags) { |
69 | // If it would take more than three instructions to adjust the stack pointer |
70 | // using tADDspi/tSUBspi, load an immediate instead. |
71 | if (std::abs(x: NumBytes) > 508 * 3) { |
72 | // We use a different codepath here from the normal |
73 | // emitThumbRegPlusImmediate so we don't have to deal with register |
74 | // scavenging. (Scavenging could try to use the emergency spill slot |
75 | // before we've actually finished setting up the stack.) |
76 | if (ScratchReg == ARM::NoRegister) |
77 | report_fatal_error(reason: "Failed to emit Thumb1 stack adjustment" ); |
78 | MachineFunction &MF = *MBB.getParent(); |
79 | const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>(); |
80 | if (ST.genExecuteOnly()) { |
81 | unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm; |
82 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: XOInstr), DestReg: ScratchReg) |
83 | .addImm(Val: NumBytes).setMIFlags(MIFlags); |
84 | } else { |
85 | MRI.emitLoadConstPool(MBB, MBBI, dl, DestReg: ScratchReg, SubIdx: 0, Val: NumBytes, Pred: ARMCC::AL, |
86 | PredReg: 0, MIFlags); |
87 | } |
88 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tADDhirr), DestReg: ARM::SP) |
89 | .addReg(RegNo: ARM::SP) |
90 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
91 | .add(MOs: predOps(Pred: ARMCC::AL)) |
92 | .setMIFlags(MIFlags); |
93 | return; |
94 | } |
95 | // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate |
96 | // won't change. |
97 | emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::SP, BaseReg: ARM::SP, NumBytes, TII, |
98 | MRI, MIFlags); |
99 | |
100 | } |
101 | |
102 | static void emitCallSPUpdate(MachineBasicBlock &MBB, |
103 | MachineBasicBlock::iterator &MBBI, |
104 | const TargetInstrInfo &TII, const DebugLoc &dl, |
105 | const ThumbRegisterInfo &MRI, int NumBytes, |
106 | unsigned MIFlags = MachineInstr::NoFlags) { |
107 | emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::SP, BaseReg: ARM::SP, NumBytes, TII, |
108 | MRI, MIFlags); |
109 | } |
110 | |
111 | |
112 | MachineBasicBlock::iterator Thumb1FrameLowering:: |
113 | eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, |
114 | MachineBasicBlock::iterator I) const { |
115 | const Thumb1InstrInfo &TII = |
116 | *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); |
117 | const ThumbRegisterInfo *RegInfo = |
118 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
119 | if (!hasReservedCallFrame(MF)) { |
120 | // If we have alloca, convert as follows: |
121 | // ADJCALLSTACKDOWN -> sub, sp, sp, amount |
122 | // ADJCALLSTACKUP -> add, sp, sp, amount |
123 | MachineInstr &Old = *I; |
124 | DebugLoc dl = Old.getDebugLoc(); |
125 | unsigned Amount = TII.getFrameSize(I: Old); |
126 | if (Amount != 0) { |
127 | // We need to keep the stack aligned properly. To do this, we round the |
128 | // amount of space needed for the outgoing arguments up to the next |
129 | // alignment boundary. |
130 | Amount = alignTo(Size: Amount, A: getStackAlign()); |
131 | |
132 | // Replace the pseudo instruction with a new instruction... |
133 | unsigned Opc = Old.getOpcode(); |
134 | if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { |
135 | emitCallSPUpdate(MBB, MBBI&: I, TII, dl, MRI: *RegInfo, NumBytes: -Amount); |
136 | } else { |
137 | assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); |
138 | emitCallSPUpdate(MBB, MBBI&: I, TII, dl, MRI: *RegInfo, NumBytes: Amount); |
139 | } |
140 | } |
141 | } |
142 | return MBB.erase(I); |
143 | } |
144 | |
145 | void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, |
146 | MachineBasicBlock &MBB) const { |
147 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
148 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
149 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
150 | const ThumbRegisterInfo *RegInfo = |
151 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
152 | const Thumb1InstrInfo &TII = |
153 | *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); |
154 | |
155 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); |
156 | unsigned NumBytes = MFI.getStackSize(); |
157 | assert(NumBytes >= ArgRegsSaveSize && |
158 | "ArgRegsSaveSize is included in NumBytes" ); |
159 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
160 | assert(STI.getPushPopSplitVariation(MF) == ARMSubtarget::SplitR7 && |
161 | "Must use R7 spilt for Thumb1" ); |
162 | |
163 | // Debug location must be unknown since the first debug location is used |
164 | // to determine the end of the prologue. |
165 | DebugLoc dl; |
166 | |
167 | Register FramePtr = RegInfo->getFrameRegister(MF); |
168 | Register BasePtr = RegInfo->getBaseRegister(); |
169 | int CFAOffset = 0; |
170 | |
171 | // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. |
172 | NumBytes = (NumBytes + 3) & ~3; |
173 | MFI.setStackSize(NumBytes); |
174 | |
175 | // Determine the sizes of each callee-save spill areas and record which frame |
176 | // belongs to which callee-save spill areas. |
177 | unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; |
178 | int FramePtrSpillFI = 0; |
179 | CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); |
180 | |
181 | if (ArgRegsSaveSize) { |
182 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, NumBytes: -ArgRegsSaveSize, |
183 | ScratchReg: ARM::NoRegister, MIFlags: MachineInstr::FrameSetup); |
184 | CFAOffset += ArgRegsSaveSize; |
185 | CFIBuilder.buildDefCFAOffset(Offset: CFAOffset); |
186 | } |
187 | |
188 | if (!AFI->hasStackFrame()) { |
189 | if (NumBytes - ArgRegsSaveSize != 0) { |
190 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, |
191 | NumBytes: -(NumBytes - ArgRegsSaveSize), |
192 | ScratchReg: ARM::NoRegister, MIFlags: MachineInstr::FrameSetup); |
193 | CFAOffset += NumBytes - ArgRegsSaveSize; |
194 | CFIBuilder.buildDefCFAOffset(Offset: CFAOffset); |
195 | } |
196 | return; |
197 | } |
198 | |
199 | bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(Reg: FramePtr); |
200 | |
201 | for (const CalleeSavedInfo &I : CSI) { |
202 | MCRegister Reg = I.getReg(); |
203 | int FI = I.getFrameIdx(); |
204 | if (Reg == FramePtr.asMCReg()) |
205 | FramePtrSpillFI = FI; |
206 | switch (Reg) { |
207 | case ARM::R11: |
208 | if (HasFrameRecordArea) { |
209 | FRSize += 4; |
210 | break; |
211 | } |
212 | [[fallthrough]]; |
213 | case ARM::R8: |
214 | case ARM::R9: |
215 | case ARM::R10: |
216 | GPRCS2Size += 4; |
217 | break; |
218 | case ARM::LR: |
219 | if (HasFrameRecordArea) { |
220 | FRSize += 4; |
221 | break; |
222 | } |
223 | [[fallthrough]]; |
224 | case ARM::R4: |
225 | case ARM::R5: |
226 | case ARM::R6: |
227 | case ARM::R7: |
228 | GPRCS1Size += 4; |
229 | break; |
230 | default: |
231 | DPRCSSize += 8; |
232 | } |
233 | } |
234 | |
235 | MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push; |
236 | if (HasFrameRecordArea) { |
237 | // Skip Frame Record setup: |
238 | // push {lr} |
239 | // mov lr, r11 |
240 | // push {lr} |
241 | std::advance(i&: MBBI, n: 2); |
242 | FRPush = MBBI++; |
243 | } |
244 | |
245 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { |
246 | GPRCS1Push = MBBI; |
247 | ++MBBI; |
248 | } |
249 | |
250 | // Find last push instruction for GPRCS2 - spilling of high registers |
251 | // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. |
252 | while (true) { |
253 | MachineBasicBlock::iterator OldMBBI = MBBI; |
254 | // Skip a run of tMOVr instructions |
255 | while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && |
256 | MBBI->getFlag(Flag: MachineInstr::FrameSetup)) |
257 | MBBI++; |
258 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH && |
259 | MBBI->getFlag(Flag: MachineInstr::FrameSetup)) { |
260 | GPRCS2Push = MBBI; |
261 | MBBI++; |
262 | } else { |
263 | // We have reached an instruction which is not a push, so the previous |
264 | // run of tMOVr instructions (which may have been empty) was not part of |
265 | // the prologue. Reset MBBI back to the last PUSH of the prologue. |
266 | MBBI = OldMBBI; |
267 | break; |
268 | } |
269 | } |
270 | |
271 | // Skip past this code sequence, which is emitted to restore the LR if it is |
272 | // live-in and clobbered by the frame record setup code: |
273 | // ldr rX, [sp, #Y] |
274 | // mov lr, rX |
275 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tLDRspi && |
276 | MBBI->getFlag(Flag: MachineInstr::FrameSetup)) { |
277 | ++MBBI; |
278 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && |
279 | MBBI->getOperand(i: 0).getReg() == ARM::LR && |
280 | MBBI->getFlag(Flag: MachineInstr::FrameSetup)) { |
281 | ++MBBI; |
282 | } |
283 | } |
284 | |
285 | // Determine starting offsets of spill areas. |
286 | unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - |
287 | (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize); |
288 | unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; |
289 | unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; |
290 | bool HasFP = hasFP(MF); |
291 | if (HasFP) |
292 | AFI->setFramePtrSpillOffset(MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) + |
293 | NumBytes); |
294 | if (HasFrameRecordArea) |
295 | AFI->setFrameRecordSavedAreaSize(FRSize); |
296 | AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); |
297 | AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); |
298 | AFI->setDPRCalleeSavedArea1Offset(DPRCSOffset); |
299 | NumBytes = DPRCSOffset; |
300 | |
301 | int FramePtrOffsetInBlock = 0; |
302 | unsigned adjustedGPRCS1Size = GPRCS1Size; |
303 | if (GPRCS1Size > 0 && GPRCS2Size == 0 && |
304 | tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*(GPRCS1Push), NumBytes)) { |
305 | FramePtrOffsetInBlock = NumBytes; |
306 | adjustedGPRCS1Size += NumBytes; |
307 | NumBytes = 0; |
308 | } |
309 | CFAOffset += adjustedGPRCS1Size; |
310 | |
311 | // Adjust FP so it point to the stack slot that contains the previous FP. |
312 | if (HasFP) { |
313 | MachineBasicBlock::iterator AfterPush = |
314 | HasFrameRecordArea ? std::next(x: FRPush) : std::next(x: GPRCS1Push); |
315 | if (HasFrameRecordArea) { |
316 | // We have just finished pushing the previous FP into the stack, |
317 | // so simply capture the SP value as the new Frame Pointer. |
318 | BuildMI(BB&: MBB, I: AfterPush, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: FramePtr) |
319 | .addReg(RegNo: ARM::SP) |
320 | .setMIFlags(MachineInstr::FrameSetup) |
321 | .add(MOs: predOps(Pred: ARMCC::AL)); |
322 | } else { |
323 | FramePtrOffsetInBlock += |
324 | MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; |
325 | BuildMI(BB&: MBB, I: AfterPush, MIMD: dl, MCID: TII.get(Opcode: ARM::tADDrSPi), DestReg: FramePtr) |
326 | .addReg(RegNo: ARM::SP) |
327 | .addImm(Val: FramePtrOffsetInBlock / 4) |
328 | .setMIFlags(MachineInstr::FrameSetup) |
329 | .add(MOs: predOps(Pred: ARMCC::AL)); |
330 | } |
331 | |
332 | CFIBuilder.setInsertPoint(AfterPush); |
333 | if (FramePtrOffsetInBlock) |
334 | CFIBuilder.buildDefCFA(Reg: FramePtr, Offset: CFAOffset - FramePtrOffsetInBlock); |
335 | else |
336 | CFIBuilder.buildDefCFARegister(Reg: FramePtr); |
337 | if (NumBytes > 508) |
338 | // If offset is > 508 then sp cannot be adjusted in a single instruction, |
339 | // try restoring from fp instead. |
340 | AFI->setShouldRestoreSPFromFP(true); |
341 | } |
342 | |
343 | // Emit call frame information for the callee-saved low registers. |
344 | if (GPRCS1Size > 0) { |
345 | CFIBuilder.setInsertPoint(std::next(x: GPRCS1Push)); |
346 | if (adjustedGPRCS1Size) |
347 | CFIBuilder.buildDefCFAOffset(Offset: CFAOffset); |
348 | for (const CalleeSavedInfo &I : CSI) { |
349 | switch (I.getReg()) { |
350 | case ARM::R8: |
351 | case ARM::R9: |
352 | case ARM::R10: |
353 | case ARM::R11: |
354 | case ARM::R12: |
355 | break; |
356 | case ARM::R0: |
357 | case ARM::R1: |
358 | case ARM::R2: |
359 | case ARM::R3: |
360 | case ARM::R4: |
361 | case ARM::R5: |
362 | case ARM::R6: |
363 | case ARM::R7: |
364 | case ARM::LR: |
365 | CFIBuilder.buildOffset(Reg: I.getReg(), |
366 | Offset: MFI.getObjectOffset(ObjectIdx: I.getFrameIdx())); |
367 | break; |
368 | } |
369 | } |
370 | } |
371 | |
372 | // Emit call frame information for the callee-saved high registers. |
373 | if (GPRCS2Size > 0) { |
374 | CFIBuilder.setInsertPoint(std::next(x: GPRCS2Push)); |
375 | for (auto &I : CSI) { |
376 | switch (I.getReg()) { |
377 | case ARM::R8: |
378 | case ARM::R9: |
379 | case ARM::R10: |
380 | case ARM::R11: |
381 | case ARM::R12: |
382 | CFIBuilder.buildOffset(Reg: I.getReg(), |
383 | Offset: MFI.getObjectOffset(ObjectIdx: I.getFrameIdx())); |
384 | break; |
385 | default: |
386 | break; |
387 | } |
388 | } |
389 | } |
390 | |
391 | if (NumBytes) { |
392 | // Insert it after all the callee-save spills. |
393 | // |
394 | // For a large stack frame, we might need a scratch register to store |
395 | // the size of the frame. We know all callee-save registers are free |
396 | // at this point in the prologue, so pick one. |
397 | unsigned ScratchRegister = ARM::NoRegister; |
398 | for (auto &I : CSI) { |
399 | MCRegister Reg = I.getReg(); |
400 | if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr.asMCReg())) { |
401 | ScratchRegister = Reg; |
402 | break; |
403 | } |
404 | } |
405 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, NumBytes: -NumBytes, |
406 | ScratchReg: ScratchRegister, MIFlags: MachineInstr::FrameSetup); |
407 | if (!HasFP) { |
408 | CFAOffset += NumBytes; |
409 | CFIBuilder.buildDefCFAOffset(Offset: CFAOffset); |
410 | } |
411 | } |
412 | |
413 | if (STI.isTargetELF() && HasFP) |
414 | MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - |
415 | AFI->getFramePtrSpillOffset()); |
416 | |
417 | AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); |
418 | AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); |
419 | AFI->setDPRCalleeSavedArea1Size(DPRCSSize); |
420 | |
421 | if (RegInfo->hasStackRealignment(MF)) { |
422 | const unsigned NrBitsToZero = Log2(A: MFI.getMaxAlign()); |
423 | // Emit the following sequence, using R4 as a temporary, since we cannot use |
424 | // SP as a source or destination register for the shifts: |
425 | // mov r4, sp |
426 | // lsrs r4, r4, #NrBitsToZero |
427 | // lsls r4, r4, #NrBitsToZero |
428 | // mov sp, r4 |
429 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::R4) |
430 | .addReg(RegNo: ARM::SP, flags: RegState::Kill) |
431 | .add(MOs: predOps(Pred: ARMCC::AL)); |
432 | |
433 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tLSRri), DestReg: ARM::R4) |
434 | .addDef(RegNo: ARM::CPSR) |
435 | .addReg(RegNo: ARM::R4, flags: RegState::Kill) |
436 | .addImm(Val: NrBitsToZero) |
437 | .add(MOs: predOps(Pred: ARMCC::AL)); |
438 | |
439 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tLSLri), DestReg: ARM::R4) |
440 | .addDef(RegNo: ARM::CPSR) |
441 | .addReg(RegNo: ARM::R4, flags: RegState::Kill) |
442 | .addImm(Val: NrBitsToZero) |
443 | .add(MOs: predOps(Pred: ARMCC::AL)); |
444 | |
445 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP) |
446 | .addReg(RegNo: ARM::R4, flags: RegState::Kill) |
447 | .add(MOs: predOps(Pred: ARMCC::AL)); |
448 | |
449 | AFI->setShouldRestoreSPFromFP(true); |
450 | } |
451 | |
452 | // If we need a base pointer, set it up here. It's whatever the value |
453 | // of the stack pointer is at this point. Any variable size objects |
454 | // will be allocated after this, so we can still use the base pointer |
455 | // to reference locals. |
456 | if (RegInfo->hasBasePointer(MF)) |
457 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: BasePtr) |
458 | .addReg(RegNo: ARM::SP) |
459 | .add(MOs: predOps(Pred: ARMCC::AL)); |
460 | |
461 | // If the frame has variable sized objects then the epilogue must restore |
462 | // the sp from fp. We can assume there's an FP here since hasFP already |
463 | // checks for hasVarSizedObjects. |
464 | if (MFI.hasVarSizedObjects()) |
465 | AFI->setShouldRestoreSPFromFP(true); |
466 | |
467 | // In some cases, virtual registers have been introduced, e.g. by uses of |
468 | // emitThumbRegPlusImmInReg. |
469 | MF.getProperties().resetNoVRegs(); |
470 | } |
471 | |
472 | void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, |
473 | MachineBasicBlock &MBB) const { |
474 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
475 | DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); |
476 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
477 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
478 | const ThumbRegisterInfo *RegInfo = |
479 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
480 | const Thumb1InstrInfo &TII = |
481 | *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); |
482 | |
483 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); |
484 | int NumBytes = (int)MFI.getStackSize(); |
485 | assert((unsigned)NumBytes >= ArgRegsSaveSize && |
486 | "ArgRegsSaveSize is included in NumBytes" ); |
487 | Register FramePtr = RegInfo->getFrameRegister(MF); |
488 | |
489 | if (!AFI->hasStackFrame()) { |
490 | if (NumBytes - ArgRegsSaveSize != 0) |
491 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, |
492 | NumBytes: NumBytes - ArgRegsSaveSize, ScratchReg: ARM::NoRegister, |
493 | MIFlags: MachineInstr::FrameDestroy); |
494 | } else { |
495 | // Unwind MBBI to point to first LDR / VLDRD. |
496 | if (MBBI != MBB.begin()) { |
497 | do |
498 | --MBBI; |
499 | while (MBBI != MBB.begin() && MBBI->getFlag(Flag: MachineInstr::FrameDestroy)); |
500 | if (!MBBI->getFlag(Flag: MachineInstr::FrameDestroy)) |
501 | ++MBBI; |
502 | } |
503 | |
504 | // Move SP to start of FP callee save spill area. |
505 | NumBytes -= |
506 | (AFI->getFrameRecordSavedAreaSize() + |
507 | AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + |
508 | AFI->getDPRCalleeSavedArea1Size() + ArgRegsSaveSize); |
509 | |
510 | // We are likely to need a scratch register and we know all callee-save |
511 | // registers are free at this point in the epilogue, so pick one. |
512 | unsigned ScratchRegister = ARM::NoRegister; |
513 | bool HasFP = hasFP(MF); |
514 | for (auto &I : MFI.getCalleeSavedInfo()) { |
515 | MCRegister Reg = I.getReg(); |
516 | if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr.asMCReg())) { |
517 | ScratchRegister = Reg; |
518 | break; |
519 | } |
520 | } |
521 | |
522 | if (AFI->shouldRestoreSPFromFP()) { |
523 | NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; |
524 | // Reset SP based on frame pointer only if the stack frame extends beyond |
525 | // frame pointer stack slot, the target is ELF and the function has FP, or |
526 | // the target uses var sized objects. |
527 | if (NumBytes) { |
528 | assert(ScratchRegister != ARM::NoRegister && |
529 | "No scratch register to restore SP from FP!" ); |
530 | emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg: ScratchRegister, BaseReg: FramePtr, NumBytes: -NumBytes, |
531 | TII, MRI: *RegInfo, MIFlags: MachineInstr::FrameDestroy); |
532 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP) |
533 | .addReg(RegNo: ScratchRegister) |
534 | .add(MOs: predOps(Pred: ARMCC::AL)) |
535 | .setMIFlag(MachineInstr::FrameDestroy); |
536 | } else |
537 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP) |
538 | .addReg(RegNo: FramePtr) |
539 | .add(MOs: predOps(Pred: ARMCC::AL)) |
540 | .setMIFlag(MachineInstr::FrameDestroy); |
541 | } else { |
542 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && |
543 | &MBB.front() != &*MBBI && std::prev(x: MBBI)->getOpcode() == ARM::tPOP) { |
544 | MachineBasicBlock::iterator PMBBI = std::prev(x: MBBI); |
545 | if (!tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*PMBBI, NumBytes)) |
546 | emitPrologueEpilogueSPUpdate(MBB, MBBI&: PMBBI, TII, dl, MRI: *RegInfo, NumBytes, |
547 | ScratchReg: ScratchRegister, MIFlags: MachineInstr::FrameDestroy); |
548 | } else if (!tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*MBBI, NumBytes)) |
549 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, NumBytes, |
550 | ScratchReg: ScratchRegister, MIFlags: MachineInstr::FrameDestroy); |
551 | } |
552 | } |
553 | |
554 | if (needPopSpecialFixUp(MF)) { |
555 | bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); |
556 | (void)Done; |
557 | assert(Done && "Emission of the special fixup failed!?" ); |
558 | } |
559 | } |
560 | |
561 | bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { |
562 | if (!needPopSpecialFixUp(MF: *MBB.getParent())) |
563 | return true; |
564 | |
565 | MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); |
566 | return emitPopSpecialFixUp(MBB&: *TmpMBB, /* DoIt */ false); |
567 | } |
568 | |
569 | bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { |
570 | ARMFunctionInfo *AFI = |
571 | const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>(); |
572 | if (AFI->getArgRegsSaveSize()) |
573 | return true; |
574 | |
575 | // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. |
576 | for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) |
577 | if (CSI.getReg() == ARM::LR) |
578 | return true; |
579 | |
580 | return false; |
581 | } |
582 | |
583 | static void findTemporariesForLR(const BitVector &GPRsNoLRSP, |
584 | const BitVector &PopFriendly, |
585 | const LiveRegUnits &UsedRegs, unsigned &PopReg, |
586 | unsigned &TmpReg, MachineRegisterInfo &MRI) { |
587 | PopReg = TmpReg = 0; |
588 | for (auto Reg : GPRsNoLRSP.set_bits()) { |
589 | if (UsedRegs.available(Reg)) { |
590 | // Remember the first pop-friendly register and exit. |
591 | if (PopFriendly.test(Idx: Reg)) { |
592 | PopReg = Reg; |
593 | TmpReg = 0; |
594 | break; |
595 | } |
596 | // Otherwise, remember that the register will be available to |
597 | // save a pop-friendly register. |
598 | TmpReg = Reg; |
599 | } |
600 | } |
601 | } |
602 | |
603 | bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, |
604 | bool DoIt) const { |
605 | MachineFunction &MF = *MBB.getParent(); |
606 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
607 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); |
608 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
609 | const ThumbRegisterInfo *RegInfo = |
610 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
611 | |
612 | // If MBBI is a return instruction, or is a tPOP followed by a return |
613 | // instruction in the successor BB, we may be able to directly restore |
614 | // LR in the PC. |
615 | // This is only possible with v5T ops (v4T can't change the Thumb bit via |
616 | // a POP PC instruction), and only if we do not need to emit any SP update. |
617 | // Otherwise, we need a temporary register to pop the value |
618 | // and copy that value into LR. |
619 | auto MBBI = MBB.getFirstTerminator(); |
620 | bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; |
621 | if (CanRestoreDirectly) { |
622 | if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) |
623 | CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || |
624 | MBBI->getOpcode() == ARM::tPOP_RET); |
625 | else { |
626 | auto MBBI_prev = MBBI; |
627 | MBBI_prev--; |
628 | assert(MBBI_prev->getOpcode() == ARM::tPOP); |
629 | assert(MBB.succ_size() == 1); |
630 | if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) |
631 | MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. |
632 | else |
633 | CanRestoreDirectly = false; |
634 | } |
635 | } |
636 | |
637 | if (CanRestoreDirectly) { |
638 | if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) |
639 | return true; |
640 | MachineInstrBuilder MIB = |
641 | BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI->getDebugLoc(), MCID: TII.get(Opcode: ARM::tPOP_RET)) |
642 | .add(MOs: predOps(Pred: ARMCC::AL)) |
643 | .setMIFlag(MachineInstr::FrameDestroy); |
644 | // Copy implicit ops and popped registers, if any. |
645 | for (auto MO: MBBI->operands()) |
646 | if (MO.isReg() && (MO.isImplicit() || MO.isDef())) |
647 | MIB.add(MO); |
648 | MIB.addReg(RegNo: ARM::PC, flags: RegState::Define); |
649 | // Erase the old instruction (tBX_RET or tPOP). |
650 | MBB.erase(I: MBBI); |
651 | return true; |
652 | } |
653 | |
654 | // Look for a temporary register to use. |
655 | // First, compute the liveness information. |
656 | const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); |
657 | LiveRegUnits UsedRegs(TRI); |
658 | UsedRegs.addLiveOuts(MBB); |
659 | // The semantic of pristines changed recently and now, |
660 | // the callee-saved registers that are touched in the function |
661 | // are not part of the pristines set anymore. |
662 | // Add those callee-saved now. |
663 | const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF: &MF); |
664 | for (unsigned i = 0; CSRegs[i]; ++i) |
665 | UsedRegs.addReg(Reg: CSRegs[i]); |
666 | |
667 | DebugLoc dl = DebugLoc(); |
668 | if (MBBI != MBB.end()) { |
669 | dl = MBBI->getDebugLoc(); |
670 | auto InstUpToMBBI = MBB.end(); |
671 | while (InstUpToMBBI != MBBI) |
672 | // The pre-decrement is on purpose here. |
673 | // We want to have the liveness right before MBBI. |
674 | UsedRegs.stepBackward(MI: *--InstUpToMBBI); |
675 | } |
676 | |
677 | // Look for a register that can be directly use in the POP. |
678 | unsigned PopReg = 0; |
679 | // And some temporary register, just in case. |
680 | unsigned TemporaryReg = 0; |
681 | BitVector PopFriendly = |
682 | TRI.getAllocatableSet(MF, RC: TRI.getRegClass(i: ARM::tGPRRegClassID)); |
683 | |
684 | assert(PopFriendly.any() && "No allocatable pop-friendly register?!" ); |
685 | // Rebuild the GPRs from the high registers because they are removed |
686 | // form the GPR reg class for thumb1. |
687 | BitVector GPRsNoLRSP = |
688 | TRI.getAllocatableSet(MF, RC: TRI.getRegClass(i: ARM::hGPRRegClassID)); |
689 | GPRsNoLRSP |= PopFriendly; |
690 | GPRsNoLRSP.reset(Idx: ARM::LR); |
691 | GPRsNoLRSP.reset(Idx: ARM::SP); |
692 | GPRsNoLRSP.reset(Idx: ARM::PC); |
693 | findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TmpReg&: TemporaryReg, |
694 | MRI&: MF.getRegInfo()); |
695 | |
696 | // If we couldn't find a pop-friendly register, try restoring LR before |
697 | // popping the other callee-saved registers, so we could use one of them as a |
698 | // temporary. |
699 | bool UseLDRSP = false; |
700 | if (!PopReg && MBBI != MBB.begin()) { |
701 | auto PrevMBBI = MBBI; |
702 | PrevMBBI--; |
703 | if (PrevMBBI->getOpcode() == ARM::tPOP) { |
704 | UsedRegs.stepBackward(MI: *PrevMBBI); |
705 | findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, |
706 | TmpReg&: TemporaryReg, MRI&: MF.getRegInfo()); |
707 | if (PopReg) { |
708 | MBBI = PrevMBBI; |
709 | UseLDRSP = true; |
710 | } |
711 | } |
712 | } |
713 | |
714 | if (!DoIt && !PopReg && !TemporaryReg) |
715 | return false; |
716 | |
717 | assert((PopReg || TemporaryReg) && "Cannot get LR" ); |
718 | |
719 | if (UseLDRSP) { |
720 | assert(PopReg && "Do not know how to get LR" ); |
721 | // Load the LR via LDR tmp, [SP, #off] |
722 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tLDRspi)) |
723 | .addReg(RegNo: PopReg, flags: RegState::Define) |
724 | .addReg(RegNo: ARM::SP) |
725 | .addImm(Val: MBBI->getNumExplicitOperands() - 2) |
726 | .add(MOs: predOps(Pred: ARMCC::AL)) |
727 | .setMIFlag(MachineInstr::FrameDestroy); |
728 | // Move from the temporary register to the LR. |
729 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr)) |
730 | .addReg(RegNo: ARM::LR, flags: RegState::Define) |
731 | .addReg(RegNo: PopReg, flags: RegState::Kill) |
732 | .add(MOs: predOps(Pred: ARMCC::AL)) |
733 | .setMIFlag(MachineInstr::FrameDestroy); |
734 | // Advance past the pop instruction. |
735 | MBBI++; |
736 | // Increment the SP. |
737 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, |
738 | NumBytes: ArgRegsSaveSize + 4, ScratchReg: ARM::NoRegister, |
739 | MIFlags: MachineInstr::FrameDestroy); |
740 | return true; |
741 | } |
742 | |
743 | if (TemporaryReg) { |
744 | assert(!PopReg && "Unnecessary MOV is about to be inserted" ); |
745 | PopReg = PopFriendly.find_first(); |
746 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr)) |
747 | .addReg(RegNo: TemporaryReg, flags: RegState::Define) |
748 | .addReg(RegNo: PopReg, flags: RegState::Kill) |
749 | .add(MOs: predOps(Pred: ARMCC::AL)) |
750 | .setMIFlag(MachineInstr::FrameDestroy); |
751 | } |
752 | |
753 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { |
754 | // We couldn't use the direct restoration above, so |
755 | // perform the opposite conversion: tPOP_RET to tPOP. |
756 | MachineInstrBuilder MIB = |
757 | BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI->getDebugLoc(), MCID: TII.get(Opcode: ARM::tPOP)) |
758 | .add(MOs: predOps(Pred: ARMCC::AL)) |
759 | .setMIFlag(MachineInstr::FrameDestroy); |
760 | bool Popped = false; |
761 | for (auto MO: MBBI->operands()) |
762 | if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && |
763 | MO.getReg() != ARM::PC) { |
764 | MIB.add(MO); |
765 | if (!MO.isImplicit()) |
766 | Popped = true; |
767 | } |
768 | // Is there anything left to pop? |
769 | if (!Popped) |
770 | MBB.erase(I: MIB.getInstr()); |
771 | // Erase the old instruction. |
772 | MBB.erase(I: MBBI); |
773 | MBBI = BuildMI(BB&: MBB, I: MBB.end(), MIMD: dl, MCID: TII.get(Opcode: ARM::tBX_RET)) |
774 | .add(MOs: predOps(Pred: ARMCC::AL)) |
775 | .setMIFlag(MachineInstr::FrameDestroy); |
776 | } |
777 | |
778 | assert(PopReg && "Do not know how to get LR" ); |
779 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tPOP)) |
780 | .add(MOs: predOps(Pred: ARMCC::AL)) |
781 | .addReg(RegNo: PopReg, flags: RegState::Define) |
782 | .setMIFlag(MachineInstr::FrameDestroy); |
783 | |
784 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, NumBytes: ArgRegsSaveSize, |
785 | ScratchReg: ARM::NoRegister, MIFlags: MachineInstr::FrameDestroy); |
786 | |
787 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr)) |
788 | .addReg(RegNo: ARM::LR, flags: RegState::Define) |
789 | .addReg(RegNo: PopReg, flags: RegState::Kill) |
790 | .add(MOs: predOps(Pred: ARMCC::AL)) |
791 | .setMIFlag(MachineInstr::FrameDestroy); |
792 | |
793 | if (TemporaryReg) |
794 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr)) |
795 | .addReg(RegNo: PopReg, flags: RegState::Define) |
796 | .addReg(RegNo: TemporaryReg, flags: RegState::Kill) |
797 | .add(MOs: predOps(Pred: ARMCC::AL)) |
798 | .setMIFlag(MachineInstr::FrameDestroy); |
799 | |
800 | return true; |
801 | } |
802 | |
803 | static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, |
804 | ARM::R7, ARM::LR}; |
805 | static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9, |
806 | ARM::R10, ARM::R11}; |
807 | static const SmallVector<Register> OrderedCopyRegs = { |
808 | ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, |
809 | ARM::R5, ARM::R6, ARM::R7, ARM::LR}; |
810 | |
811 | static void splitLowAndHighRegs(const std::set<Register> &Regs, |
812 | std::set<Register> &LowRegs, |
813 | std::set<Register> &HighRegs) { |
814 | for (Register Reg : Regs) { |
815 | if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { |
816 | LowRegs.insert(x: Reg); |
817 | } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { |
818 | HighRegs.insert(x: Reg); |
819 | } else { |
820 | llvm_unreachable("callee-saved register of unexpected class" ); |
821 | } |
822 | } |
823 | } |
824 | |
825 | template <typename It> |
826 | It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, |
827 | const std::set<Register> &RegSet) { |
828 | return std::find_if(OrderedStartIt, OrderedEndIt, |
829 | [&](Register Reg) { return RegSet.count(x: Reg); }); |
830 | } |
831 | |
832 | static void pushRegsToStack(MachineBasicBlock &MBB, |
833 | MachineBasicBlock::iterator MI, |
834 | const TargetInstrInfo &TII, |
835 | const std::set<Register> &RegsToSave, |
836 | const std::set<Register> &CopyRegs, |
837 | bool &UsedLRAsTemp) { |
838 | MachineFunction &MF = *MBB.getParent(); |
839 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
840 | DebugLoc DL; |
841 | |
842 | std::set<Register> LowRegs, HighRegs; |
843 | splitLowAndHighRegs(Regs: RegsToSave, LowRegs, HighRegs); |
844 | |
845 | // Push low regs first |
846 | if (!LowRegs.empty()) { |
847 | MachineInstrBuilder MIB = |
848 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH)).add(MOs: predOps(Pred: ARMCC::AL)); |
849 | for (unsigned Reg : OrderedLowRegs) { |
850 | if (LowRegs.count(x: Reg)) { |
851 | bool isKill = !MRI.isLiveIn(Reg); |
852 | if (isKill && !MRI.isReserved(PhysReg: Reg)) |
853 | MBB.addLiveIn(PhysReg: Reg); |
854 | |
855 | MIB.addReg(RegNo: Reg, flags: getKillRegState(B: isKill)); |
856 | } |
857 | } |
858 | MIB.setMIFlags(MachineInstr::FrameSetup); |
859 | } |
860 | |
861 | // Now push the high registers |
862 | // There are no store instructions that can access high registers directly, |
863 | // so we have to move them to low registers, and push them. |
864 | // This might take multiple pushes, as it is possible for there to |
865 | // be fewer low registers available than high registers which need saving. |
866 | |
867 | // Find the first register to save. |
868 | // Registers must be processed in reverse order so that in case we need to use |
869 | // multiple PUSH instructions, the order of the registers on the stack still |
870 | // matches the unwind info. They need to be swicthed back to ascending order |
871 | // before adding to the PUSH instruction. |
872 | auto HiRegToSave = getNextOrderedReg(OrderedStartIt: OrderedHighRegs.rbegin(), |
873 | OrderedEndIt: OrderedHighRegs.rend(), |
874 | RegSet: HighRegs); |
875 | |
876 | while (HiRegToSave != OrderedHighRegs.rend()) { |
877 | // Find the first low register to use. |
878 | auto CopyRegIt = getNextOrderedReg(OrderedStartIt: OrderedCopyRegs.rbegin(), |
879 | OrderedEndIt: OrderedCopyRegs.rend(), |
880 | RegSet: CopyRegs); |
881 | |
882 | // Create the PUSH, but don't insert it yet (the MOVs need to come first). |
883 | MachineInstrBuilder PushMIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH)) |
884 | .add(MOs: predOps(Pred: ARMCC::AL)) |
885 | .setMIFlags(MachineInstr::FrameSetup); |
886 | |
887 | SmallVector<unsigned, 4> RegsToPush; |
888 | while (HiRegToSave != OrderedHighRegs.rend() && |
889 | CopyRegIt != OrderedCopyRegs.rend()) { |
890 | if (HighRegs.count(x: *HiRegToSave)) { |
891 | bool isKill = !MRI.isLiveIn(Reg: *HiRegToSave); |
892 | if (isKill && !MRI.isReserved(PhysReg: *HiRegToSave)) |
893 | MBB.addLiveIn(PhysReg: *HiRegToSave); |
894 | if (*CopyRegIt == ARM::LR) |
895 | UsedLRAsTemp = true; |
896 | |
897 | // Emit a MOV from the high reg to the low reg. |
898 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr)) |
899 | .addReg(RegNo: *CopyRegIt, flags: RegState::Define) |
900 | .addReg(RegNo: *HiRegToSave, flags: getKillRegState(B: isKill)) |
901 | .add(MOs: predOps(Pred: ARMCC::AL)) |
902 | .setMIFlags(MachineInstr::FrameSetup); |
903 | |
904 | // Record the register that must be added to the PUSH. |
905 | RegsToPush.push_back(Elt: *CopyRegIt); |
906 | |
907 | CopyRegIt = getNextOrderedReg(OrderedStartIt: std::next(x: CopyRegIt), |
908 | OrderedEndIt: OrderedCopyRegs.rend(), |
909 | RegSet: CopyRegs); |
910 | HiRegToSave = getNextOrderedReg(OrderedStartIt: std::next(x: HiRegToSave), |
911 | OrderedEndIt: OrderedHighRegs.rend(), |
912 | RegSet: HighRegs); |
913 | } |
914 | } |
915 | |
916 | // Add the low registers to the PUSH, in ascending order. |
917 | for (unsigned Reg : llvm::reverse(C&: RegsToPush)) |
918 | PushMIB.addReg(RegNo: Reg, flags: RegState::Kill); |
919 | |
920 | // Insert the PUSH instruction after the MOVs. |
921 | MBB.insert(I: MI, MI: PushMIB); |
922 | } |
923 | } |
924 | |
925 | static void popRegsFromStack(MachineBasicBlock &MBB, |
926 | MachineBasicBlock::iterator &MI, |
927 | const TargetInstrInfo &TII, |
928 | const std::set<Register> &RegsToRestore, |
929 | const std::set<Register> &AvailableCopyRegs, |
930 | bool IsVarArg, bool HasV5Ops) { |
931 | if (RegsToRestore.empty()) |
932 | return; |
933 | |
934 | MachineFunction &MF = *MBB.getParent(); |
935 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
936 | DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); |
937 | |
938 | std::set<Register> LowRegs, HighRegs; |
939 | splitLowAndHighRegs(Regs: RegsToRestore, LowRegs, HighRegs); |
940 | |
941 | // Pop the high registers first |
942 | // There are no store instructions that can access high registers directly, |
943 | // so we have to pop into low registers and them move to the high registers. |
944 | // This might take multiple pops, as it is possible for there to |
945 | // be fewer low registers available than high registers which need restoring. |
946 | |
947 | // Find the first register to restore. |
948 | auto HiRegToRestore = getNextOrderedReg(OrderedStartIt: OrderedHighRegs.begin(), |
949 | OrderedEndIt: OrderedHighRegs.end(), |
950 | RegSet: HighRegs); |
951 | |
952 | std::set<Register> CopyRegs = AvailableCopyRegs; |
953 | Register LowScratchReg; |
954 | if (!HighRegs.empty() && CopyRegs.empty()) { |
955 | // No copy regs are available to pop high regs. Let's make use of a return |
956 | // register and the scratch register (IP/R12) to copy things around. |
957 | LowScratchReg = ARM::R0; |
958 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr)) |
959 | .addReg(RegNo: ARM::R12, flags: RegState::Define) |
960 | .addReg(RegNo: LowScratchReg, flags: RegState::Kill) |
961 | .add(MOs: predOps(Pred: ARMCC::AL)) |
962 | .setMIFlag(MachineInstr::FrameDestroy); |
963 | CopyRegs.insert(x: LowScratchReg); |
964 | } |
965 | |
966 | while (HiRegToRestore != OrderedHighRegs.end()) { |
967 | assert(!CopyRegs.empty()); |
968 | // Find the first low register to use. |
969 | auto CopyReg = getNextOrderedReg(OrderedStartIt: OrderedCopyRegs.begin(), |
970 | OrderedEndIt: OrderedCopyRegs.end(), |
971 | RegSet: CopyRegs); |
972 | |
973 | // Create the POP instruction. |
974 | MachineInstrBuilder PopMIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP)) |
975 | .add(MOs: predOps(Pred: ARMCC::AL)) |
976 | .setMIFlag(MachineInstr::FrameDestroy); |
977 | |
978 | while (HiRegToRestore != OrderedHighRegs.end() && |
979 | CopyReg != OrderedCopyRegs.end()) { |
980 | // Add the low register to the POP. |
981 | PopMIB.addReg(RegNo: *CopyReg, flags: RegState::Define); |
982 | |
983 | // Create the MOV from low to high register. |
984 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr)) |
985 | .addReg(RegNo: *HiRegToRestore, flags: RegState::Define) |
986 | .addReg(RegNo: *CopyReg, flags: RegState::Kill) |
987 | .add(MOs: predOps(Pred: ARMCC::AL)) |
988 | .setMIFlag(MachineInstr::FrameDestroy); |
989 | |
990 | CopyReg = getNextOrderedReg(OrderedStartIt: std::next(x: CopyReg), |
991 | OrderedEndIt: OrderedCopyRegs.end(), |
992 | RegSet: CopyRegs); |
993 | HiRegToRestore = getNextOrderedReg(OrderedStartIt: std::next(x: HiRegToRestore), |
994 | OrderedEndIt: OrderedHighRegs.end(), |
995 | RegSet: HighRegs); |
996 | } |
997 | } |
998 | |
999 | // Restore low register used as scratch if necessary |
1000 | if (LowScratchReg.isValid()) { |
1001 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr)) |
1002 | .addReg(RegNo: LowScratchReg, flags: RegState::Define) |
1003 | .addReg(RegNo: ARM::R12, flags: RegState::Kill) |
1004 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1005 | .setMIFlag(MachineInstr::FrameDestroy); |
1006 | } |
1007 | |
1008 | // Now pop the low registers |
1009 | if (!LowRegs.empty()) { |
1010 | MachineInstrBuilder MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP)) |
1011 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1012 | .setMIFlag(MachineInstr::FrameDestroy); |
1013 | |
1014 | bool NeedsPop = false; |
1015 | for (Register Reg : OrderedLowRegs) { |
1016 | if (!LowRegs.count(x: Reg)) |
1017 | continue; |
1018 | |
1019 | if (Reg == ARM::LR) { |
1020 | if (!MBB.succ_empty() || MI->getOpcode() == ARM::TCRETURNdi || |
1021 | MI->getOpcode() == ARM::TCRETURNri || |
1022 | MI->getOpcode() == ARM::TCRETURNrinotr12) |
1023 | // LR may only be popped into PC, as part of return sequence. |
1024 | // If this isn't the return sequence, we'll need emitPopSpecialFixUp |
1025 | // to restore LR the hard way. |
1026 | // FIXME: if we don't pass any stack arguments it would be actually |
1027 | // advantageous *and* correct to do the conversion to an ordinary call |
1028 | // instruction here. |
1029 | continue; |
1030 | // Special epilogue for vararg functions. See emitEpilogue |
1031 | if (IsVarArg) |
1032 | continue; |
1033 | // ARMv4T requires BX, see emitEpilogue |
1034 | if (!HasV5Ops) |
1035 | continue; |
1036 | |
1037 | // CMSE entry functions must return via BXNS, see emitEpilogue. |
1038 | if (AFI->isCmseNSEntryFunction()) |
1039 | continue; |
1040 | |
1041 | // Pop LR into PC. |
1042 | Reg = ARM::PC; |
1043 | (*MIB).setDesc(TII.get(Opcode: ARM::tPOP_RET)); |
1044 | if (MI != MBB.end()) |
1045 | MIB.copyImplicitOps(OtherMI: *MI); |
1046 | MI = MBB.erase(I: MI); |
1047 | } |
1048 | MIB.addReg(RegNo: Reg, flags: getDefRegState(B: true)); |
1049 | NeedsPop = true; |
1050 | } |
1051 | |
1052 | // It's illegal to emit pop instruction without operands. |
1053 | if (NeedsPop) |
1054 | MBB.insert(I: MI, MI: &*MIB); |
1055 | else |
1056 | MF.deleteMachineInstr(MI: MIB); |
1057 | } |
1058 | } |
1059 | |
1060 | bool Thumb1FrameLowering::spillCalleeSavedRegisters( |
1061 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
1062 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
1063 | if (CSI.empty()) |
1064 | return false; |
1065 | |
1066 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
1067 | MachineFunction &MF = *MBB.getParent(); |
1068 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( |
1069 | MF.getSubtarget().getRegisterInfo()); |
1070 | Register FPReg = RegInfo->getFrameRegister(MF); |
1071 | |
1072 | // In case FP is a high reg, we need a separate push sequence to generate |
1073 | // a correct Frame Record |
1074 | bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(Reg: FPReg); |
1075 | bool LRLiveIn = MF.getRegInfo().isLiveIn(Reg: ARM::LR); |
1076 | bool UsedLRAsTemp = false; |
1077 | |
1078 | std::set<Register> FrameRecord; |
1079 | std::set<Register> SpilledGPRs; |
1080 | for (const CalleeSavedInfo &I : CSI) { |
1081 | MCRegister Reg = I.getReg(); |
1082 | if (NeedsFrameRecordPush && (Reg == FPReg.asMCReg() || Reg == ARM::LR)) |
1083 | FrameRecord.insert(x: Reg); |
1084 | else |
1085 | SpilledGPRs.insert(x: Reg); |
1086 | } |
1087 | |
1088 | // Determine intermediate registers which can be used for pushing the frame |
1089 | // record: |
1090 | // - Unused argument registers |
1091 | // - LR: This is possible because the first PUSH will save it on the stack, |
1092 | // so it is free to be used as a temporary for the second. However, it |
1093 | // is possible for LR to be live-in to the function, in which case we |
1094 | // will need to restore it later in the prologue, so we only use this |
1095 | // if there are no free argument registers. |
1096 | std::set<Register> FrameRecordCopyRegs; |
1097 | for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) |
1098 | if (!MF.getRegInfo().isLiveIn(Reg: ArgReg)) |
1099 | FrameRecordCopyRegs.insert(x: ArgReg); |
1100 | if (FrameRecordCopyRegs.empty()) |
1101 | FrameRecordCopyRegs.insert(x: ARM::LR); |
1102 | |
1103 | pushRegsToStack(MBB, MI, TII, RegsToSave: FrameRecord, CopyRegs: FrameRecordCopyRegs, UsedLRAsTemp); |
1104 | |
1105 | // Determine intermediate registers which can be used for pushing high regs: |
1106 | // - Spilled low regs |
1107 | // - Unused argument registers |
1108 | std::set<Register> CopyRegs; |
1109 | for (Register Reg : SpilledGPRs) |
1110 | if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && |
1111 | !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) |
1112 | CopyRegs.insert(x: Reg); |
1113 | for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) |
1114 | if (!MF.getRegInfo().isLiveIn(Reg: ArgReg)) |
1115 | CopyRegs.insert(x: ArgReg); |
1116 | |
1117 | pushRegsToStack(MBB, MI, TII, RegsToSave: SpilledGPRs, CopyRegs, UsedLRAsTemp); |
1118 | |
1119 | // If the push sequence used LR as a temporary, and LR is live-in (for |
1120 | // example because it is used by the llvm.returnaddress intrinsic), then we |
1121 | // need to reload it from the stack. Thumb1 does not have a load instruction |
1122 | // which can use LR, so we need to load into a temporary low register and |
1123 | // copy to LR. |
1124 | if (LRLiveIn && UsedLRAsTemp) { |
1125 | auto CopyRegIt = getNextOrderedReg(OrderedStartIt: OrderedCopyRegs.rbegin(), |
1126 | OrderedEndIt: OrderedCopyRegs.rend(), RegSet: CopyRegs); |
1127 | assert(CopyRegIt != OrderedCopyRegs.rend()); |
1128 | unsigned NumRegsPushed = FrameRecord.size() + SpilledGPRs.size(); |
1129 | LLVM_DEBUG( |
1130 | dbgs() << "LR is live-in but clobbered in prologue, restoring via " |
1131 | << RegInfo->getName(*CopyRegIt) << "\n" ); |
1132 | |
1133 | BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: ARM::tLDRspi), DestReg: *CopyRegIt) |
1134 | .addReg(RegNo: ARM::SP) |
1135 | .addImm(Val: NumRegsPushed - 1) |
1136 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1137 | .setMIFlags(MachineInstr::FrameSetup); |
1138 | |
1139 | BuildMI(BB&: MBB, I: MI, MIMD: DebugLoc(), MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::LR) |
1140 | .addReg(RegNo: *CopyRegIt) |
1141 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1142 | .setMIFlags(MachineInstr::FrameSetup); |
1143 | } |
1144 | |
1145 | return true; |
1146 | } |
1147 | |
1148 | bool Thumb1FrameLowering::restoreCalleeSavedRegisters( |
1149 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
1150 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
1151 | if (CSI.empty()) |
1152 | return false; |
1153 | |
1154 | MachineFunction &MF = *MBB.getParent(); |
1155 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
1156 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
1157 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( |
1158 | MF.getSubtarget().getRegisterInfo()); |
1159 | bool IsVarArg = AFI->getArgRegsSaveSize() > 0; |
1160 | Register FPReg = RegInfo->getFrameRegister(MF); |
1161 | |
1162 | // In case FP is a high reg, we need a separate pop sequence to generate |
1163 | // a correct Frame Record |
1164 | bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(Reg: FPReg); |
1165 | |
1166 | std::set<Register> FrameRecord; |
1167 | std::set<Register> SpilledGPRs; |
1168 | for (CalleeSavedInfo &I : CSI) { |
1169 | MCRegister Reg = I.getReg(); |
1170 | if (NeedsFrameRecordPop && (Reg == FPReg.asMCReg() || Reg == ARM::LR)) |
1171 | FrameRecord.insert(x: Reg); |
1172 | else |
1173 | SpilledGPRs.insert(x: Reg); |
1174 | |
1175 | if (Reg == ARM::LR) |
1176 | I.setRestored(false); |
1177 | } |
1178 | |
1179 | // Determine intermidiate registers which can be used for popping high regs: |
1180 | // - Spilled low regs |
1181 | // - Unused return registers |
1182 | std::set<Register> CopyRegs; |
1183 | std::set<Register> UnusedReturnRegs; |
1184 | for (Register Reg : SpilledGPRs) |
1185 | if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) |
1186 | CopyRegs.insert(x: Reg); |
1187 | auto Terminator = MBB.getFirstTerminator(); |
1188 | if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { |
1189 | UnusedReturnRegs.insert(x: ARM::R0); |
1190 | UnusedReturnRegs.insert(x: ARM::R1); |
1191 | UnusedReturnRegs.insert(x: ARM::R2); |
1192 | UnusedReturnRegs.insert(x: ARM::R3); |
1193 | for (auto Op : Terminator->implicit_operands()) { |
1194 | if (Op.isReg()) |
1195 | UnusedReturnRegs.erase(x: Op.getReg()); |
1196 | } |
1197 | } |
1198 | CopyRegs.insert(first: UnusedReturnRegs.begin(), last: UnusedReturnRegs.end()); |
1199 | |
1200 | // First pop regular spilled regs. |
1201 | popRegsFromStack(MBB, MI, TII, RegsToRestore: SpilledGPRs, AvailableCopyRegs: CopyRegs, IsVarArg, |
1202 | HasV5Ops: STI.hasV5TOps()); |
1203 | |
1204 | // LR may only be popped into pc, as part of a return sequence. |
1205 | // Check that no other pop instructions are inserted after that. |
1206 | assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) && |
1207 | "Can't insert pop after return sequence" ); |
1208 | |
1209 | // Now pop Frame Record regs. |
1210 | // Only unused return registers can be used as copy regs at this point. |
1211 | popRegsFromStack(MBB, MI, TII, RegsToRestore: FrameRecord, AvailableCopyRegs: UnusedReturnRegs, IsVarArg, |
1212 | HasV5Ops: STI.hasV5TOps()); |
1213 | |
1214 | return true; |
1215 | } |
1216 | |