1 | //===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the Thumb1 implementation of TargetFrameLowering class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "Thumb1FrameLowering.h" |
14 | #include "ARMBaseInstrInfo.h" |
15 | #include "ARMBaseRegisterInfo.h" |
16 | #include "ARMMachineFunctionInfo.h" |
17 | #include "ARMSubtarget.h" |
18 | #include "Thumb1InstrInfo.h" |
19 | #include "ThumbRegisterInfo.h" |
20 | #include "Utils/ARMBaseInfo.h" |
21 | #include "llvm/ADT/BitVector.h" |
22 | #include "llvm/ADT/STLExtras.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/CodeGen/LivePhysRegs.h" |
25 | #include "llvm/CodeGen/MachineBasicBlock.h" |
26 | #include "llvm/CodeGen/MachineFrameInfo.h" |
27 | #include "llvm/CodeGen/MachineFunction.h" |
28 | #include "llvm/CodeGen/MachineInstr.h" |
29 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
30 | #include "llvm/CodeGen/MachineModuleInfo.h" |
31 | #include "llvm/CodeGen/MachineOperand.h" |
32 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
33 | #include "llvm/CodeGen/TargetInstrInfo.h" |
34 | #include "llvm/CodeGen/TargetOpcodes.h" |
35 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
36 | #include "llvm/IR/DebugLoc.h" |
37 | #include "llvm/MC/MCContext.h" |
38 | #include "llvm/MC/MCDwarf.h" |
39 | #include "llvm/MC/MCRegisterInfo.h" |
40 | #include "llvm/Support/Compiler.h" |
41 | #include "llvm/Support/ErrorHandling.h" |
42 | #include "llvm/Support/MathExtras.h" |
43 | #include <cassert> |
44 | #include <iterator> |
45 | #include <vector> |
46 | |
47 | using namespace llvm; |
48 | |
49 | Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) |
50 | : ARMFrameLowering(sti) {} |
51 | |
52 | bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ |
53 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
54 | unsigned CFSize = MFI.getMaxCallFrameSize(); |
55 | // It's not always a good idea to include the call frame as part of the |
56 | // stack frame. ARM (especially Thumb) has small immediate offset to |
57 | // address the stack frame. So a large call frame can cause poor codegen |
58 | // and may even makes it impossible to scavenge a register. |
59 | if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 |
60 | return false; |
61 | |
62 | return !MFI.hasVarSizedObjects(); |
63 | } |
64 | |
65 | static void |
66 | emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB, |
67 | MachineBasicBlock::iterator &MBBI, |
68 | const TargetInstrInfo &TII, const DebugLoc &dl, |
69 | const ThumbRegisterInfo &MRI, int NumBytes, |
70 | unsigned ScratchReg, unsigned MIFlags) { |
71 | // If it would take more than three instructions to adjust the stack pointer |
72 | // using tADDspi/tSUBspi, load an immediate instead. |
73 | if (std::abs(x: NumBytes) > 508 * 3) { |
74 | // We use a different codepath here from the normal |
75 | // emitThumbRegPlusImmediate so we don't have to deal with register |
76 | // scavenging. (Scavenging could try to use the emergency spill slot |
77 | // before we've actually finished setting up the stack.) |
78 | if (ScratchReg == ARM::NoRegister) |
79 | report_fatal_error(reason: "Failed to emit Thumb1 stack adjustment" ); |
80 | MachineFunction &MF = *MBB.getParent(); |
81 | const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>(); |
82 | if (ST.genExecuteOnly()) { |
83 | unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm; |
84 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: XOInstr), DestReg: ScratchReg) |
85 | .addImm(Val: NumBytes).setMIFlags(MIFlags); |
86 | } else { |
87 | MRI.emitLoadConstPool(MBB, MBBI, dl, DestReg: ScratchReg, SubIdx: 0, Val: NumBytes, Pred: ARMCC::AL, |
88 | PredReg: 0, MIFlags); |
89 | } |
90 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tADDhirr), DestReg: ARM::SP) |
91 | .addReg(RegNo: ARM::SP) |
92 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
93 | .add(MOs: predOps(Pred: ARMCC::AL)) |
94 | .setMIFlags(MIFlags); |
95 | return; |
96 | } |
97 | // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate |
98 | // won't change. |
99 | emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::SP, BaseReg: ARM::SP, NumBytes, TII, |
100 | MRI, MIFlags); |
101 | |
102 | } |
103 | |
104 | static void emitCallSPUpdate(MachineBasicBlock &MBB, |
105 | MachineBasicBlock::iterator &MBBI, |
106 | const TargetInstrInfo &TII, const DebugLoc &dl, |
107 | const ThumbRegisterInfo &MRI, int NumBytes, |
108 | unsigned MIFlags = MachineInstr::NoFlags) { |
109 | emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg: ARM::SP, BaseReg: ARM::SP, NumBytes, TII, |
110 | MRI, MIFlags); |
111 | } |
112 | |
113 | |
114 | MachineBasicBlock::iterator Thumb1FrameLowering:: |
115 | eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, |
116 | MachineBasicBlock::iterator I) const { |
117 | const Thumb1InstrInfo &TII = |
118 | *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); |
119 | const ThumbRegisterInfo *RegInfo = |
120 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
121 | if (!hasReservedCallFrame(MF)) { |
122 | // If we have alloca, convert as follows: |
123 | // ADJCALLSTACKDOWN -> sub, sp, sp, amount |
124 | // ADJCALLSTACKUP -> add, sp, sp, amount |
125 | MachineInstr &Old = *I; |
126 | DebugLoc dl = Old.getDebugLoc(); |
127 | unsigned Amount = TII.getFrameSize(I: Old); |
128 | if (Amount != 0) { |
129 | // We need to keep the stack aligned properly. To do this, we round the |
130 | // amount of space needed for the outgoing arguments up to the next |
131 | // alignment boundary. |
132 | Amount = alignTo(Size: Amount, A: getStackAlign()); |
133 | |
134 | // Replace the pseudo instruction with a new instruction... |
135 | unsigned Opc = Old.getOpcode(); |
136 | if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { |
137 | emitCallSPUpdate(MBB, MBBI&: I, TII, dl, MRI: *RegInfo, NumBytes: -Amount); |
138 | } else { |
139 | assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); |
140 | emitCallSPUpdate(MBB, MBBI&: I, TII, dl, MRI: *RegInfo, NumBytes: Amount); |
141 | } |
142 | } |
143 | } |
144 | return MBB.erase(I); |
145 | } |
146 | |
147 | void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, |
148 | MachineBasicBlock &MBB) const { |
149 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
150 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
151 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
152 | const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); |
153 | const ThumbRegisterInfo *RegInfo = |
154 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
155 | const Thumb1InstrInfo &TII = |
156 | *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); |
157 | |
158 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); |
159 | unsigned NumBytes = MFI.getStackSize(); |
160 | assert(NumBytes >= ArgRegsSaveSize && |
161 | "ArgRegsSaveSize is included in NumBytes" ); |
162 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
163 | |
164 | // Debug location must be unknown since the first debug location is used |
165 | // to determine the end of the prologue. |
166 | DebugLoc dl; |
167 | |
168 | Register FramePtr = RegInfo->getFrameRegister(MF); |
169 | Register BasePtr = RegInfo->getBaseRegister(); |
170 | int CFAOffset = 0; |
171 | |
172 | // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. |
173 | NumBytes = (NumBytes + 3) & ~3; |
174 | MFI.setStackSize(NumBytes); |
175 | |
176 | // Determine the sizes of each callee-save spill areas and record which frame |
177 | // belongs to which callee-save spill areas. |
178 | unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; |
179 | int FramePtrSpillFI = 0; |
180 | |
181 | if (ArgRegsSaveSize) { |
182 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, NumBytes: -ArgRegsSaveSize, |
183 | ScratchReg: ARM::NoRegister, MIFlags: MachineInstr::FrameSetup); |
184 | CFAOffset += ArgRegsSaveSize; |
185 | unsigned CFIIndex = |
186 | MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: CFAOffset)); |
187 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
188 | .addCFIIndex(CFIIndex) |
189 | .setMIFlags(MachineInstr::FrameSetup); |
190 | } |
191 | |
192 | if (!AFI->hasStackFrame()) { |
193 | if (NumBytes - ArgRegsSaveSize != 0) { |
194 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, |
195 | NumBytes: -(NumBytes - ArgRegsSaveSize), |
196 | ScratchReg: ARM::NoRegister, MIFlags: MachineInstr::FrameSetup); |
197 | CFAOffset += NumBytes - ArgRegsSaveSize; |
198 | unsigned CFIIndex = MF.addFrameInst( |
199 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: CFAOffset)); |
200 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
201 | .addCFIIndex(CFIIndex) |
202 | .setMIFlags(MachineInstr::FrameSetup); |
203 | } |
204 | return; |
205 | } |
206 | |
207 | bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(Reg: FramePtr); |
208 | |
209 | for (const CalleeSavedInfo &I : CSI) { |
210 | Register Reg = I.getReg(); |
211 | int FI = I.getFrameIdx(); |
212 | if (Reg == FramePtr) |
213 | FramePtrSpillFI = FI; |
214 | switch (Reg) { |
215 | case ARM::R11: |
216 | if (HasFrameRecordArea) { |
217 | FRSize += 4; |
218 | break; |
219 | } |
220 | [[fallthrough]]; |
221 | case ARM::R8: |
222 | case ARM::R9: |
223 | case ARM::R10: |
224 | if (STI.splitFramePushPop(MF)) { |
225 | GPRCS2Size += 4; |
226 | break; |
227 | } |
228 | [[fallthrough]]; |
229 | case ARM::LR: |
230 | if (HasFrameRecordArea) { |
231 | FRSize += 4; |
232 | break; |
233 | } |
234 | [[fallthrough]]; |
235 | case ARM::R4: |
236 | case ARM::R5: |
237 | case ARM::R6: |
238 | case ARM::R7: |
239 | GPRCS1Size += 4; |
240 | break; |
241 | default: |
242 | DPRCSSize += 8; |
243 | } |
244 | } |
245 | |
246 | MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push; |
247 | if (HasFrameRecordArea) { |
248 | // Skip Frame Record setup: |
249 | // push {lr} |
250 | // mov lr, r11 |
251 | // push {lr} |
252 | std::advance(i&: MBBI, n: 2); |
253 | FRPush = MBBI++; |
254 | } |
255 | |
256 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { |
257 | GPRCS1Push = MBBI; |
258 | ++MBBI; |
259 | } |
260 | |
261 | // Find last push instruction for GPRCS2 - spilling of high registers |
262 | // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. |
263 | while (true) { |
264 | MachineBasicBlock::iterator OldMBBI = MBBI; |
265 | // Skip a run of tMOVr instructions |
266 | while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && |
267 | MBBI->getFlag(Flag: MachineInstr::FrameSetup)) |
268 | MBBI++; |
269 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH && |
270 | MBBI->getFlag(Flag: MachineInstr::FrameSetup)) { |
271 | GPRCS2Push = MBBI; |
272 | MBBI++; |
273 | } else { |
274 | // We have reached an instruction which is not a push, so the previous |
275 | // run of tMOVr instructions (which may have been empty) was not part of |
276 | // the prologue. Reset MBBI back to the last PUSH of the prologue. |
277 | MBBI = OldMBBI; |
278 | break; |
279 | } |
280 | } |
281 | |
282 | // Determine starting offsets of spill areas. |
283 | unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - |
284 | (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize); |
285 | unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; |
286 | unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; |
287 | bool HasFP = hasFP(MF); |
288 | if (HasFP) |
289 | AFI->setFramePtrSpillOffset(MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) + |
290 | NumBytes); |
291 | if (HasFrameRecordArea) |
292 | AFI->setFrameRecordSavedAreaSize(FRSize); |
293 | AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); |
294 | AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); |
295 | AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); |
296 | NumBytes = DPRCSOffset; |
297 | |
298 | int FramePtrOffsetInBlock = 0; |
299 | unsigned adjustedGPRCS1Size = GPRCS1Size; |
300 | if (GPRCS1Size > 0 && GPRCS2Size == 0 && |
301 | tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*(GPRCS1Push), NumBytes)) { |
302 | FramePtrOffsetInBlock = NumBytes; |
303 | adjustedGPRCS1Size += NumBytes; |
304 | NumBytes = 0; |
305 | } |
306 | CFAOffset += adjustedGPRCS1Size; |
307 | |
308 | // Adjust FP so it point to the stack slot that contains the previous FP. |
309 | if (HasFP) { |
310 | MachineBasicBlock::iterator AfterPush = |
311 | HasFrameRecordArea ? std::next(x: FRPush) : std::next(x: GPRCS1Push); |
312 | if (HasFrameRecordArea) { |
313 | // We have just finished pushing the previous FP into the stack, |
314 | // so simply capture the SP value as the new Frame Pointer. |
315 | BuildMI(BB&: MBB, I: AfterPush, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: FramePtr) |
316 | .addReg(RegNo: ARM::SP) |
317 | .setMIFlags(MachineInstr::FrameSetup) |
318 | .add(MOs: predOps(Pred: ARMCC::AL)); |
319 | } else { |
320 | FramePtrOffsetInBlock += |
321 | MFI.getObjectOffset(ObjectIdx: FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; |
322 | BuildMI(BB&: MBB, I: AfterPush, MIMD: dl, MCID: TII.get(Opcode: ARM::tADDrSPi), DestReg: FramePtr) |
323 | .addReg(RegNo: ARM::SP) |
324 | .addImm(Val: FramePtrOffsetInBlock / 4) |
325 | .setMIFlags(MachineInstr::FrameSetup) |
326 | .add(MOs: predOps(Pred: ARMCC::AL)); |
327 | } |
328 | |
329 | if(FramePtrOffsetInBlock) { |
330 | unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfa( |
331 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: FramePtr, isEH: true), Offset: (CFAOffset - FramePtrOffsetInBlock))); |
332 | BuildMI(BB&: MBB, I: AfterPush, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
333 | .addCFIIndex(CFIIndex) |
334 | .setMIFlags(MachineInstr::FrameSetup); |
335 | } else { |
336 | unsigned CFIIndex = |
337 | MF.addFrameInst(Inst: MCCFIInstruction::createDefCfaRegister( |
338 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: FramePtr, isEH: true))); |
339 | BuildMI(BB&: MBB, I: AfterPush, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
340 | .addCFIIndex(CFIIndex) |
341 | .setMIFlags(MachineInstr::FrameSetup); |
342 | } |
343 | if (NumBytes > 508) |
344 | // If offset is > 508 then sp cannot be adjusted in a single instruction, |
345 | // try restoring from fp instead. |
346 | AFI->setShouldRestoreSPFromFP(true); |
347 | } |
348 | |
349 | // Emit call frame information for the callee-saved low registers. |
350 | if (GPRCS1Size > 0) { |
351 | MachineBasicBlock::iterator Pos = std::next(x: GPRCS1Push); |
352 | if (adjustedGPRCS1Size) { |
353 | unsigned CFIIndex = |
354 | MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: CFAOffset)); |
355 | BuildMI(BB&: MBB, I: Pos, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
356 | .addCFIIndex(CFIIndex) |
357 | .setMIFlags(MachineInstr::FrameSetup); |
358 | } |
359 | for (const CalleeSavedInfo &I : CSI) { |
360 | Register Reg = I.getReg(); |
361 | int FI = I.getFrameIdx(); |
362 | switch (Reg) { |
363 | case ARM::R8: |
364 | case ARM::R9: |
365 | case ARM::R10: |
366 | case ARM::R11: |
367 | case ARM::R12: |
368 | if (STI.splitFramePushPop(MF)) |
369 | break; |
370 | [[fallthrough]]; |
371 | case ARM::R0: |
372 | case ARM::R1: |
373 | case ARM::R2: |
374 | case ARM::R3: |
375 | case ARM::R4: |
376 | case ARM::R5: |
377 | case ARM::R6: |
378 | case ARM::R7: |
379 | case ARM::LR: |
380 | unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset( |
381 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: Reg, isEH: true), Offset: MFI.getObjectOffset(ObjectIdx: FI))); |
382 | BuildMI(BB&: MBB, I: Pos, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
383 | .addCFIIndex(CFIIndex) |
384 | .setMIFlags(MachineInstr::FrameSetup); |
385 | break; |
386 | } |
387 | } |
388 | } |
389 | |
390 | // Emit call frame information for the callee-saved high registers. |
391 | if (GPRCS2Size > 0) { |
392 | MachineBasicBlock::iterator Pos = std::next(x: GPRCS2Push); |
393 | for (auto &I : CSI) { |
394 | Register Reg = I.getReg(); |
395 | int FI = I.getFrameIdx(); |
396 | switch (Reg) { |
397 | case ARM::R8: |
398 | case ARM::R9: |
399 | case ARM::R10: |
400 | case ARM::R11: |
401 | case ARM::R12: { |
402 | unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset( |
403 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: Reg, isEH: true), Offset: MFI.getObjectOffset(ObjectIdx: FI))); |
404 | BuildMI(BB&: MBB, I: Pos, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
405 | .addCFIIndex(CFIIndex) |
406 | .setMIFlags(MachineInstr::FrameSetup); |
407 | break; |
408 | } |
409 | default: |
410 | break; |
411 | } |
412 | } |
413 | } |
414 | |
415 | if (NumBytes) { |
416 | // Insert it after all the callee-save spills. |
417 | // |
418 | // For a large stack frame, we might need a scratch register to store |
419 | // the size of the frame. We know all callee-save registers are free |
420 | // at this point in the prologue, so pick one. |
421 | unsigned ScratchRegister = ARM::NoRegister; |
422 | for (auto &I : CSI) { |
423 | Register Reg = I.getReg(); |
424 | if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { |
425 | ScratchRegister = Reg; |
426 | break; |
427 | } |
428 | } |
429 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, NumBytes: -NumBytes, |
430 | ScratchReg: ScratchRegister, MIFlags: MachineInstr::FrameSetup); |
431 | if (!HasFP) { |
432 | CFAOffset += NumBytes; |
433 | unsigned CFIIndex = MF.addFrameInst( |
434 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: CFAOffset)); |
435 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
436 | .addCFIIndex(CFIIndex) |
437 | .setMIFlags(MachineInstr::FrameSetup); |
438 | } |
439 | } |
440 | |
441 | if (STI.isTargetELF() && HasFP) |
442 | MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - |
443 | AFI->getFramePtrSpillOffset()); |
444 | |
445 | AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); |
446 | AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); |
447 | AFI->setDPRCalleeSavedAreaSize(DPRCSSize); |
448 | |
449 | if (RegInfo->hasStackRealignment(MF)) { |
450 | const unsigned NrBitsToZero = Log2(A: MFI.getMaxAlign()); |
451 | // Emit the following sequence, using R4 as a temporary, since we cannot use |
452 | // SP as a source or destination register for the shifts: |
453 | // mov r4, sp |
454 | // lsrs r4, r4, #NrBitsToZero |
455 | // lsls r4, r4, #NrBitsToZero |
456 | // mov sp, r4 |
457 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::R4) |
458 | .addReg(RegNo: ARM::SP, flags: RegState::Kill) |
459 | .add(MOs: predOps(Pred: ARMCC::AL)); |
460 | |
461 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tLSRri), DestReg: ARM::R4) |
462 | .addDef(RegNo: ARM::CPSR) |
463 | .addReg(RegNo: ARM::R4, flags: RegState::Kill) |
464 | .addImm(Val: NrBitsToZero) |
465 | .add(MOs: predOps(Pred: ARMCC::AL)); |
466 | |
467 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tLSLri), DestReg: ARM::R4) |
468 | .addDef(RegNo: ARM::CPSR) |
469 | .addReg(RegNo: ARM::R4, flags: RegState::Kill) |
470 | .addImm(Val: NrBitsToZero) |
471 | .add(MOs: predOps(Pred: ARMCC::AL)); |
472 | |
473 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP) |
474 | .addReg(RegNo: ARM::R4, flags: RegState::Kill) |
475 | .add(MOs: predOps(Pred: ARMCC::AL)); |
476 | |
477 | AFI->setShouldRestoreSPFromFP(true); |
478 | } |
479 | |
480 | // If we need a base pointer, set it up here. It's whatever the value |
481 | // of the stack pointer is at this point. Any variable size objects |
482 | // will be allocated after this, so we can still use the base pointer |
483 | // to reference locals. |
484 | if (RegInfo->hasBasePointer(MF)) |
485 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: BasePtr) |
486 | .addReg(RegNo: ARM::SP) |
487 | .add(MOs: predOps(Pred: ARMCC::AL)); |
488 | |
489 | // If the frame has variable sized objects then the epilogue must restore |
490 | // the sp from fp. We can assume there's an FP here since hasFP already |
491 | // checks for hasVarSizedObjects. |
492 | if (MFI.hasVarSizedObjects()) |
493 | AFI->setShouldRestoreSPFromFP(true); |
494 | |
495 | // In some cases, virtual registers have been introduced, e.g. by uses of |
496 | // emitThumbRegPlusImmInReg. |
497 | MF.getProperties().reset(P: MachineFunctionProperties::Property::NoVRegs); |
498 | } |
499 | |
500 | void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, |
501 | MachineBasicBlock &MBB) const { |
502 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
503 | DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); |
504 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
505 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
506 | const ThumbRegisterInfo *RegInfo = |
507 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
508 | const Thumb1InstrInfo &TII = |
509 | *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); |
510 | |
511 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); |
512 | int NumBytes = (int)MFI.getStackSize(); |
513 | assert((unsigned)NumBytes >= ArgRegsSaveSize && |
514 | "ArgRegsSaveSize is included in NumBytes" ); |
515 | Register FramePtr = RegInfo->getFrameRegister(MF); |
516 | |
517 | if (!AFI->hasStackFrame()) { |
518 | if (NumBytes - ArgRegsSaveSize != 0) |
519 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, |
520 | NumBytes: NumBytes - ArgRegsSaveSize, ScratchReg: ARM::NoRegister, |
521 | MIFlags: MachineInstr::FrameDestroy); |
522 | } else { |
523 | // Unwind MBBI to point to first LDR / VLDRD. |
524 | if (MBBI != MBB.begin()) { |
525 | do |
526 | --MBBI; |
527 | while (MBBI != MBB.begin() && MBBI->getFlag(Flag: MachineInstr::FrameDestroy)); |
528 | if (!MBBI->getFlag(Flag: MachineInstr::FrameDestroy)) |
529 | ++MBBI; |
530 | } |
531 | |
532 | // Move SP to start of FP callee save spill area. |
533 | NumBytes -= (AFI->getFrameRecordSavedAreaSize() + |
534 | AFI->getGPRCalleeSavedArea1Size() + |
535 | AFI->getGPRCalleeSavedArea2Size() + |
536 | AFI->getDPRCalleeSavedAreaSize() + |
537 | ArgRegsSaveSize); |
538 | |
539 | // We are likely to need a scratch register and we know all callee-save |
540 | // registers are free at this point in the epilogue, so pick one. |
541 | unsigned ScratchRegister = ARM::NoRegister; |
542 | bool HasFP = hasFP(MF); |
543 | for (auto &I : MFI.getCalleeSavedInfo()) { |
544 | Register Reg = I.getReg(); |
545 | if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { |
546 | ScratchRegister = Reg; |
547 | break; |
548 | } |
549 | } |
550 | |
551 | if (AFI->shouldRestoreSPFromFP()) { |
552 | NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; |
553 | // Reset SP based on frame pointer only if the stack frame extends beyond |
554 | // frame pointer stack slot, the target is ELF and the function has FP, or |
555 | // the target uses var sized objects. |
556 | if (NumBytes) { |
557 | assert(ScratchRegister != ARM::NoRegister && |
558 | "No scratch register to restore SP from FP!" ); |
559 | emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg: ScratchRegister, BaseReg: FramePtr, NumBytes: -NumBytes, |
560 | TII, MRI: *RegInfo, MIFlags: MachineInstr::FrameDestroy); |
561 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP) |
562 | .addReg(RegNo: ScratchRegister) |
563 | .add(MOs: predOps(Pred: ARMCC::AL)) |
564 | .setMIFlag(MachineInstr::FrameDestroy); |
565 | } else |
566 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg: ARM::SP) |
567 | .addReg(RegNo: FramePtr) |
568 | .add(MOs: predOps(Pred: ARMCC::AL)) |
569 | .setMIFlag(MachineInstr::FrameDestroy); |
570 | } else { |
571 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && |
572 | &MBB.front() != &*MBBI && std::prev(x: MBBI)->getOpcode() == ARM::tPOP) { |
573 | MachineBasicBlock::iterator PMBBI = std::prev(x: MBBI); |
574 | if (!tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*PMBBI, NumBytes)) |
575 | emitPrologueEpilogueSPUpdate(MBB, MBBI&: PMBBI, TII, dl, MRI: *RegInfo, NumBytes, |
576 | ScratchReg: ScratchRegister, MIFlags: MachineInstr::FrameDestroy); |
577 | } else if (!tryFoldSPUpdateIntoPushPop(Subtarget: STI, MF, MI: &*MBBI, NumBytes)) |
578 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, NumBytes, |
579 | ScratchReg: ScratchRegister, MIFlags: MachineInstr::FrameDestroy); |
580 | } |
581 | } |
582 | |
583 | if (needPopSpecialFixUp(MF)) { |
584 | bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); |
585 | (void)Done; |
586 | assert(Done && "Emission of the special fixup failed!?" ); |
587 | } |
588 | } |
589 | |
590 | bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { |
591 | if (!needPopSpecialFixUp(MF: *MBB.getParent())) |
592 | return true; |
593 | |
594 | MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); |
595 | return emitPopSpecialFixUp(MBB&: *TmpMBB, /* DoIt */ false); |
596 | } |
597 | |
598 | bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { |
599 | ARMFunctionInfo *AFI = |
600 | const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>(); |
601 | if (AFI->getArgRegsSaveSize()) |
602 | return true; |
603 | |
604 | // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. |
605 | for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) |
606 | if (CSI.getReg() == ARM::LR) |
607 | return true; |
608 | |
609 | return false; |
610 | } |
611 | |
612 | static void findTemporariesForLR(const BitVector &GPRsNoLRSP, |
613 | const BitVector &PopFriendly, |
614 | const LiveRegUnits &UsedRegs, unsigned &PopReg, |
615 | unsigned &TmpReg, MachineRegisterInfo &MRI) { |
616 | PopReg = TmpReg = 0; |
617 | for (auto Reg : GPRsNoLRSP.set_bits()) { |
618 | if (UsedRegs.available(Reg)) { |
619 | // Remember the first pop-friendly register and exit. |
620 | if (PopFriendly.test(Idx: Reg)) { |
621 | PopReg = Reg; |
622 | TmpReg = 0; |
623 | break; |
624 | } |
625 | // Otherwise, remember that the register will be available to |
626 | // save a pop-friendly register. |
627 | TmpReg = Reg; |
628 | } |
629 | } |
630 | } |
631 | |
632 | bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, |
633 | bool DoIt) const { |
634 | MachineFunction &MF = *MBB.getParent(); |
635 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
636 | unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); |
637 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
638 | const ThumbRegisterInfo *RegInfo = |
639 | static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); |
640 | |
641 | // If MBBI is a return instruction, or is a tPOP followed by a return |
642 | // instruction in the successor BB, we may be able to directly restore |
643 | // LR in the PC. |
644 | // This is only possible with v5T ops (v4T can't change the Thumb bit via |
645 | // a POP PC instruction), and only if we do not need to emit any SP update. |
646 | // Otherwise, we need a temporary register to pop the value |
647 | // and copy that value into LR. |
648 | auto MBBI = MBB.getFirstTerminator(); |
649 | bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; |
650 | if (CanRestoreDirectly) { |
651 | if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) |
652 | CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || |
653 | MBBI->getOpcode() == ARM::tPOP_RET); |
654 | else { |
655 | auto MBBI_prev = MBBI; |
656 | MBBI_prev--; |
657 | assert(MBBI_prev->getOpcode() == ARM::tPOP); |
658 | assert(MBB.succ_size() == 1); |
659 | if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) |
660 | MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. |
661 | else |
662 | CanRestoreDirectly = false; |
663 | } |
664 | } |
665 | |
666 | if (CanRestoreDirectly) { |
667 | if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) |
668 | return true; |
669 | MachineInstrBuilder MIB = |
670 | BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI->getDebugLoc(), MCID: TII.get(Opcode: ARM::tPOP_RET)) |
671 | .add(MOs: predOps(Pred: ARMCC::AL)) |
672 | .setMIFlag(MachineInstr::FrameDestroy); |
673 | // Copy implicit ops and popped registers, if any. |
674 | for (auto MO: MBBI->operands()) |
675 | if (MO.isReg() && (MO.isImplicit() || MO.isDef())) |
676 | MIB.add(MO); |
677 | MIB.addReg(RegNo: ARM::PC, flags: RegState::Define); |
678 | // Erase the old instruction (tBX_RET or tPOP). |
679 | MBB.erase(I: MBBI); |
680 | return true; |
681 | } |
682 | |
683 | // Look for a temporary register to use. |
684 | // First, compute the liveness information. |
685 | const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); |
686 | LiveRegUnits UsedRegs(TRI); |
687 | UsedRegs.addLiveOuts(MBB); |
688 | // The semantic of pristines changed recently and now, |
689 | // the callee-saved registers that are touched in the function |
690 | // are not part of the pristines set anymore. |
691 | // Add those callee-saved now. |
692 | const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF: &MF); |
693 | for (unsigned i = 0; CSRegs[i]; ++i) |
694 | UsedRegs.addReg(Reg: CSRegs[i]); |
695 | |
696 | DebugLoc dl = DebugLoc(); |
697 | if (MBBI != MBB.end()) { |
698 | dl = MBBI->getDebugLoc(); |
699 | auto InstUpToMBBI = MBB.end(); |
700 | while (InstUpToMBBI != MBBI) |
701 | // The pre-decrement is on purpose here. |
702 | // We want to have the liveness right before MBBI. |
703 | UsedRegs.stepBackward(MI: *--InstUpToMBBI); |
704 | } |
705 | |
706 | // Look for a register that can be directly use in the POP. |
707 | unsigned PopReg = 0; |
708 | // And some temporary register, just in case. |
709 | unsigned TemporaryReg = 0; |
710 | BitVector PopFriendly = |
711 | TRI.getAllocatableSet(MF, RC: TRI.getRegClass(i: ARM::tGPRRegClassID)); |
712 | |
713 | assert(PopFriendly.any() && "No allocatable pop-friendly register?!" ); |
714 | // Rebuild the GPRs from the high registers because they are removed |
715 | // form the GPR reg class for thumb1. |
716 | BitVector GPRsNoLRSP = |
717 | TRI.getAllocatableSet(MF, RC: TRI.getRegClass(i: ARM::hGPRRegClassID)); |
718 | GPRsNoLRSP |= PopFriendly; |
719 | GPRsNoLRSP.reset(Idx: ARM::LR); |
720 | GPRsNoLRSP.reset(Idx: ARM::SP); |
721 | GPRsNoLRSP.reset(Idx: ARM::PC); |
722 | findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TmpReg&: TemporaryReg, |
723 | MRI&: MF.getRegInfo()); |
724 | |
725 | // If we couldn't find a pop-friendly register, try restoring LR before |
726 | // popping the other callee-saved registers, so we could use one of them as a |
727 | // temporary. |
728 | bool UseLDRSP = false; |
729 | if (!PopReg && MBBI != MBB.begin()) { |
730 | auto PrevMBBI = MBBI; |
731 | PrevMBBI--; |
732 | if (PrevMBBI->getOpcode() == ARM::tPOP) { |
733 | UsedRegs.stepBackward(MI: *PrevMBBI); |
734 | findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, |
735 | TmpReg&: TemporaryReg, MRI&: MF.getRegInfo()); |
736 | if (PopReg) { |
737 | MBBI = PrevMBBI; |
738 | UseLDRSP = true; |
739 | } |
740 | } |
741 | } |
742 | |
743 | if (!DoIt && !PopReg && !TemporaryReg) |
744 | return false; |
745 | |
746 | assert((PopReg || TemporaryReg) && "Cannot get LR" ); |
747 | |
748 | if (UseLDRSP) { |
749 | assert(PopReg && "Do not know how to get LR" ); |
750 | // Load the LR via LDR tmp, [SP, #off] |
751 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tLDRspi)) |
752 | .addReg(RegNo: PopReg, flags: RegState::Define) |
753 | .addReg(RegNo: ARM::SP) |
754 | .addImm(Val: MBBI->getNumExplicitOperands() - 2) |
755 | .add(MOs: predOps(Pred: ARMCC::AL)) |
756 | .setMIFlag(MachineInstr::FrameDestroy); |
757 | // Move from the temporary register to the LR. |
758 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr)) |
759 | .addReg(RegNo: ARM::LR, flags: RegState::Define) |
760 | .addReg(RegNo: PopReg, flags: RegState::Kill) |
761 | .add(MOs: predOps(Pred: ARMCC::AL)) |
762 | .setMIFlag(MachineInstr::FrameDestroy); |
763 | // Advance past the pop instruction. |
764 | MBBI++; |
765 | // Increment the SP. |
766 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, |
767 | NumBytes: ArgRegsSaveSize + 4, ScratchReg: ARM::NoRegister, |
768 | MIFlags: MachineInstr::FrameDestroy); |
769 | return true; |
770 | } |
771 | |
772 | if (TemporaryReg) { |
773 | assert(!PopReg && "Unnecessary MOV is about to be inserted" ); |
774 | PopReg = PopFriendly.find_first(); |
775 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr)) |
776 | .addReg(RegNo: TemporaryReg, flags: RegState::Define) |
777 | .addReg(RegNo: PopReg, flags: RegState::Kill) |
778 | .add(MOs: predOps(Pred: ARMCC::AL)) |
779 | .setMIFlag(MachineInstr::FrameDestroy); |
780 | } |
781 | |
782 | if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { |
783 | // We couldn't use the direct restoration above, so |
784 | // perform the opposite conversion: tPOP_RET to tPOP. |
785 | MachineInstrBuilder MIB = |
786 | BuildMI(BB&: MBB, I: MBBI, MIMD: MBBI->getDebugLoc(), MCID: TII.get(Opcode: ARM::tPOP)) |
787 | .add(MOs: predOps(Pred: ARMCC::AL)) |
788 | .setMIFlag(MachineInstr::FrameDestroy); |
789 | bool Popped = false; |
790 | for (auto MO: MBBI->operands()) |
791 | if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && |
792 | MO.getReg() != ARM::PC) { |
793 | MIB.add(MO); |
794 | if (!MO.isImplicit()) |
795 | Popped = true; |
796 | } |
797 | // Is there anything left to pop? |
798 | if (!Popped) |
799 | MBB.erase(I: MIB.getInstr()); |
800 | // Erase the old instruction. |
801 | MBB.erase(I: MBBI); |
802 | MBBI = BuildMI(BB&: MBB, I: MBB.end(), MIMD: dl, MCID: TII.get(Opcode: ARM::tBX_RET)) |
803 | .add(MOs: predOps(Pred: ARMCC::AL)) |
804 | .setMIFlag(MachineInstr::FrameDestroy); |
805 | } |
806 | |
807 | assert(PopReg && "Do not know how to get LR" ); |
808 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tPOP)) |
809 | .add(MOs: predOps(Pred: ARMCC::AL)) |
810 | .addReg(RegNo: PopReg, flags: RegState::Define) |
811 | .setMIFlag(MachineInstr::FrameDestroy); |
812 | |
813 | emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, MRI: *RegInfo, NumBytes: ArgRegsSaveSize, |
814 | ScratchReg: ARM::NoRegister, MIFlags: MachineInstr::FrameDestroy); |
815 | |
816 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr)) |
817 | .addReg(RegNo: ARM::LR, flags: RegState::Define) |
818 | .addReg(RegNo: PopReg, flags: RegState::Kill) |
819 | .add(MOs: predOps(Pred: ARMCC::AL)) |
820 | .setMIFlag(MachineInstr::FrameDestroy); |
821 | |
822 | if (TemporaryReg) |
823 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr)) |
824 | .addReg(RegNo: PopReg, flags: RegState::Define) |
825 | .addReg(RegNo: TemporaryReg, flags: RegState::Kill) |
826 | .add(MOs: predOps(Pred: ARMCC::AL)) |
827 | .setMIFlag(MachineInstr::FrameDestroy); |
828 | |
829 | return true; |
830 | } |
831 | |
832 | static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, |
833 | ARM::R7, ARM::LR}; |
834 | static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9, |
835 | ARM::R10, ARM::R11}; |
836 | static const SmallVector<Register> OrderedCopyRegs = { |
837 | ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, |
838 | ARM::R5, ARM::R6, ARM::R7, ARM::LR}; |
839 | |
840 | static void splitLowAndHighRegs(const std::set<Register> &Regs, |
841 | std::set<Register> &LowRegs, |
842 | std::set<Register> &HighRegs) { |
843 | for (Register Reg : Regs) { |
844 | if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { |
845 | LowRegs.insert(x: Reg); |
846 | } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { |
847 | HighRegs.insert(x: Reg); |
848 | } else { |
849 | llvm_unreachable("callee-saved register of unexpected class" ); |
850 | } |
851 | } |
852 | } |
853 | |
854 | template <typename It> |
855 | It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, |
856 | const std::set<Register> &RegSet) { |
857 | return std::find_if(OrderedStartIt, OrderedEndIt, |
858 | [&](Register Reg) { return RegSet.count(x: Reg); }); |
859 | } |
860 | |
861 | static void pushRegsToStack(MachineBasicBlock &MBB, |
862 | MachineBasicBlock::iterator MI, |
863 | const TargetInstrInfo &TII, |
864 | const std::set<Register> &RegsToSave, |
865 | const std::set<Register> &CopyRegs) { |
866 | MachineFunction &MF = *MBB.getParent(); |
867 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
868 | DebugLoc DL; |
869 | |
870 | std::set<Register> LowRegs, HighRegs; |
871 | splitLowAndHighRegs(Regs: RegsToSave, LowRegs, HighRegs); |
872 | |
873 | // Push low regs first |
874 | if (!LowRegs.empty()) { |
875 | MachineInstrBuilder MIB = |
876 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH)).add(MOs: predOps(Pred: ARMCC::AL)); |
877 | for (unsigned Reg : OrderedLowRegs) { |
878 | if (LowRegs.count(x: Reg)) { |
879 | bool isKill = !MRI.isLiveIn(Reg); |
880 | if (isKill && !MRI.isReserved(PhysReg: Reg)) |
881 | MBB.addLiveIn(PhysReg: Reg); |
882 | |
883 | MIB.addReg(RegNo: Reg, flags: getKillRegState(B: isKill)); |
884 | } |
885 | } |
886 | MIB.setMIFlags(MachineInstr::FrameSetup); |
887 | } |
888 | |
889 | // Now push the high registers |
890 | // There are no store instructions that can access high registers directly, |
891 | // so we have to move them to low registers, and push them. |
892 | // This might take multiple pushes, as it is possible for there to |
893 | // be fewer low registers available than high registers which need saving. |
894 | |
895 | // Find the first register to save. |
896 | // Registers must be processed in reverse order so that in case we need to use |
897 | // multiple PUSH instructions, the order of the registers on the stack still |
898 | // matches the unwind info. They need to be swicthed back to ascending order |
899 | // before adding to the PUSH instruction. |
900 | auto HiRegToSave = getNextOrderedReg(OrderedStartIt: OrderedHighRegs.rbegin(), |
901 | OrderedEndIt: OrderedHighRegs.rend(), |
902 | RegSet: HighRegs); |
903 | |
904 | while (HiRegToSave != OrderedHighRegs.rend()) { |
905 | // Find the first low register to use. |
906 | auto CopyRegIt = getNextOrderedReg(OrderedStartIt: OrderedCopyRegs.rbegin(), |
907 | OrderedEndIt: OrderedCopyRegs.rend(), |
908 | RegSet: CopyRegs); |
909 | |
910 | // Create the PUSH, but don't insert it yet (the MOVs need to come first). |
911 | MachineInstrBuilder PushMIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::tPUSH)) |
912 | .add(MOs: predOps(Pred: ARMCC::AL)) |
913 | .setMIFlags(MachineInstr::FrameSetup); |
914 | |
915 | SmallVector<unsigned, 4> RegsToPush; |
916 | while (HiRegToSave != OrderedHighRegs.rend() && |
917 | CopyRegIt != OrderedCopyRegs.rend()) { |
918 | if (HighRegs.count(x: *HiRegToSave)) { |
919 | bool isKill = !MRI.isLiveIn(Reg: *HiRegToSave); |
920 | if (isKill && !MRI.isReserved(PhysReg: *HiRegToSave)) |
921 | MBB.addLiveIn(PhysReg: *HiRegToSave); |
922 | |
923 | // Emit a MOV from the high reg to the low reg. |
924 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr)) |
925 | .addReg(RegNo: *CopyRegIt, flags: RegState::Define) |
926 | .addReg(RegNo: *HiRegToSave, flags: getKillRegState(B: isKill)) |
927 | .add(MOs: predOps(Pred: ARMCC::AL)) |
928 | .setMIFlags(MachineInstr::FrameSetup); |
929 | |
930 | // Record the register that must be added to the PUSH. |
931 | RegsToPush.push_back(Elt: *CopyRegIt); |
932 | |
933 | CopyRegIt = getNextOrderedReg(OrderedStartIt: std::next(x: CopyRegIt), |
934 | OrderedEndIt: OrderedCopyRegs.rend(), |
935 | RegSet: CopyRegs); |
936 | HiRegToSave = getNextOrderedReg(OrderedStartIt: std::next(x: HiRegToSave), |
937 | OrderedEndIt: OrderedHighRegs.rend(), |
938 | RegSet: HighRegs); |
939 | } |
940 | } |
941 | |
942 | // Add the low registers to the PUSH, in ascending order. |
943 | for (unsigned Reg : llvm::reverse(C&: RegsToPush)) |
944 | PushMIB.addReg(RegNo: Reg, flags: RegState::Kill); |
945 | |
946 | // Insert the PUSH instruction after the MOVs. |
947 | MBB.insert(I: MI, MI: PushMIB); |
948 | } |
949 | } |
950 | |
951 | static void popRegsFromStack(MachineBasicBlock &MBB, |
952 | MachineBasicBlock::iterator &MI, |
953 | const TargetInstrInfo &TII, |
954 | const std::set<Register> &RegsToRestore, |
955 | const std::set<Register> &AvailableCopyRegs, |
956 | bool IsVarArg, bool HasV5Ops) { |
957 | if (RegsToRestore.empty()) |
958 | return; |
959 | |
960 | MachineFunction &MF = *MBB.getParent(); |
961 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
962 | DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); |
963 | |
964 | std::set<Register> LowRegs, HighRegs; |
965 | splitLowAndHighRegs(Regs: RegsToRestore, LowRegs, HighRegs); |
966 | |
967 | // Pop the high registers first |
968 | // There are no store instructions that can access high registers directly, |
969 | // so we have to pop into low registers and them move to the high registers. |
970 | // This might take multiple pops, as it is possible for there to |
971 | // be fewer low registers available than high registers which need restoring. |
972 | |
973 | // Find the first register to restore. |
974 | auto HiRegToRestore = getNextOrderedReg(OrderedStartIt: OrderedHighRegs.begin(), |
975 | OrderedEndIt: OrderedHighRegs.end(), |
976 | RegSet: HighRegs); |
977 | |
978 | std::set<Register> CopyRegs = AvailableCopyRegs; |
979 | Register LowScratchReg; |
980 | if (!HighRegs.empty() && CopyRegs.empty()) { |
981 | // No copy regs are available to pop high regs. Let's make use of a return |
982 | // register and the scratch register (IP/R12) to copy things around. |
983 | LowScratchReg = ARM::R0; |
984 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr)) |
985 | .addReg(RegNo: ARM::R12, flags: RegState::Define) |
986 | .addReg(RegNo: LowScratchReg, flags: RegState::Kill) |
987 | .add(MOs: predOps(Pred: ARMCC::AL)) |
988 | .setMIFlag(MachineInstr::FrameDestroy); |
989 | CopyRegs.insert(x: LowScratchReg); |
990 | } |
991 | |
992 | while (HiRegToRestore != OrderedHighRegs.end()) { |
993 | assert(!CopyRegs.empty()); |
994 | // Find the first low register to use. |
995 | auto CopyReg = getNextOrderedReg(OrderedStartIt: OrderedCopyRegs.begin(), |
996 | OrderedEndIt: OrderedCopyRegs.end(), |
997 | RegSet: CopyRegs); |
998 | |
999 | // Create the POP instruction. |
1000 | MachineInstrBuilder PopMIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP)) |
1001 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1002 | .setMIFlag(MachineInstr::FrameDestroy); |
1003 | |
1004 | while (HiRegToRestore != OrderedHighRegs.end() && |
1005 | CopyReg != OrderedCopyRegs.end()) { |
1006 | // Add the low register to the POP. |
1007 | PopMIB.addReg(RegNo: *CopyReg, flags: RegState::Define); |
1008 | |
1009 | // Create the MOV from low to high register. |
1010 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr)) |
1011 | .addReg(RegNo: *HiRegToRestore, flags: RegState::Define) |
1012 | .addReg(RegNo: *CopyReg, flags: RegState::Kill) |
1013 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1014 | .setMIFlag(MachineInstr::FrameDestroy); |
1015 | |
1016 | CopyReg = getNextOrderedReg(OrderedStartIt: std::next(x: CopyReg), |
1017 | OrderedEndIt: OrderedCopyRegs.end(), |
1018 | RegSet: CopyRegs); |
1019 | HiRegToRestore = getNextOrderedReg(OrderedStartIt: std::next(x: HiRegToRestore), |
1020 | OrderedEndIt: OrderedHighRegs.end(), |
1021 | RegSet: HighRegs); |
1022 | } |
1023 | } |
1024 | |
1025 | // Restore low register used as scratch if necessary |
1026 | if (LowScratchReg.isValid()) { |
1027 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: ARM::tMOVr)) |
1028 | .addReg(RegNo: LowScratchReg, flags: RegState::Define) |
1029 | .addReg(RegNo: ARM::R12, flags: RegState::Kill) |
1030 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1031 | .setMIFlag(MachineInstr::FrameDestroy); |
1032 | } |
1033 | |
1034 | // Now pop the low registers |
1035 | if (!LowRegs.empty()) { |
1036 | MachineInstrBuilder MIB = BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: ARM::tPOP)) |
1037 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1038 | .setMIFlag(MachineInstr::FrameDestroy); |
1039 | |
1040 | bool NeedsPop = false; |
1041 | for (Register Reg : OrderedLowRegs) { |
1042 | if (!LowRegs.count(x: Reg)) |
1043 | continue; |
1044 | |
1045 | if (Reg == ARM::LR) { |
1046 | if (!MBB.succ_empty() || MI->getOpcode() == ARM::TCRETURNdi || |
1047 | MI->getOpcode() == ARM::TCRETURNri || |
1048 | MI->getOpcode() == ARM::TCRETURNrinotr12) |
1049 | // LR may only be popped into PC, as part of return sequence. |
1050 | // If this isn't the return sequence, we'll need emitPopSpecialFixUp |
1051 | // to restore LR the hard way. |
1052 | // FIXME: if we don't pass any stack arguments it would be actually |
1053 | // advantageous *and* correct to do the conversion to an ordinary call |
1054 | // instruction here. |
1055 | continue; |
1056 | // Special epilogue for vararg functions. See emitEpilogue |
1057 | if (IsVarArg) |
1058 | continue; |
1059 | // ARMv4T requires BX, see emitEpilogue |
1060 | if (!HasV5Ops) |
1061 | continue; |
1062 | |
1063 | // CMSE entry functions must return via BXNS, see emitEpilogue. |
1064 | if (AFI->isCmseNSEntryFunction()) |
1065 | continue; |
1066 | |
1067 | // Pop LR into PC. |
1068 | Reg = ARM::PC; |
1069 | (*MIB).setDesc(TII.get(Opcode: ARM::tPOP_RET)); |
1070 | if (MI != MBB.end()) |
1071 | MIB.copyImplicitOps(OtherMI: *MI); |
1072 | MI = MBB.erase(I: MI); |
1073 | } |
1074 | MIB.addReg(RegNo: Reg, flags: getDefRegState(B: true)); |
1075 | NeedsPop = true; |
1076 | } |
1077 | |
1078 | // It's illegal to emit pop instruction without operands. |
1079 | if (NeedsPop) |
1080 | MBB.insert(I: MI, MI: &*MIB); |
1081 | else |
1082 | MF.deleteMachineInstr(MI: MIB); |
1083 | } |
1084 | } |
1085 | |
1086 | bool Thumb1FrameLowering::spillCalleeSavedRegisters( |
1087 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
1088 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
1089 | if (CSI.empty()) |
1090 | return false; |
1091 | |
1092 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
1093 | MachineFunction &MF = *MBB.getParent(); |
1094 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( |
1095 | MF.getSubtarget().getRegisterInfo()); |
1096 | Register FPReg = RegInfo->getFrameRegister(MF); |
1097 | |
1098 | // In case FP is a high reg, we need a separate push sequence to generate |
1099 | // a correct Frame Record |
1100 | bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(Reg: FPReg); |
1101 | |
1102 | std::set<Register> FrameRecord; |
1103 | std::set<Register> SpilledGPRs; |
1104 | for (const CalleeSavedInfo &I : CSI) { |
1105 | Register Reg = I.getReg(); |
1106 | if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR)) |
1107 | FrameRecord.insert(x: Reg); |
1108 | else |
1109 | SpilledGPRs.insert(x: Reg); |
1110 | } |
1111 | |
1112 | pushRegsToStack(MBB, MI, TII, RegsToSave: FrameRecord, CopyRegs: {ARM::LR}); |
1113 | |
1114 | // Determine intermediate registers which can be used for pushing high regs: |
1115 | // - Spilled low regs |
1116 | // - Unused argument registers |
1117 | std::set<Register> CopyRegs; |
1118 | for (Register Reg : SpilledGPRs) |
1119 | if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && |
1120 | !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) |
1121 | CopyRegs.insert(x: Reg); |
1122 | for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) |
1123 | if (!MF.getRegInfo().isLiveIn(Reg: ArgReg)) |
1124 | CopyRegs.insert(x: ArgReg); |
1125 | |
1126 | pushRegsToStack(MBB, MI, TII, RegsToSave: SpilledGPRs, CopyRegs); |
1127 | |
1128 | return true; |
1129 | } |
1130 | |
1131 | bool Thumb1FrameLowering::restoreCalleeSavedRegisters( |
1132 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
1133 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
1134 | if (CSI.empty()) |
1135 | return false; |
1136 | |
1137 | MachineFunction &MF = *MBB.getParent(); |
1138 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
1139 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
1140 | const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( |
1141 | MF.getSubtarget().getRegisterInfo()); |
1142 | bool IsVarArg = AFI->getArgRegsSaveSize() > 0; |
1143 | Register FPReg = RegInfo->getFrameRegister(MF); |
1144 | |
1145 | // In case FP is a high reg, we need a separate pop sequence to generate |
1146 | // a correct Frame Record |
1147 | bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(Reg: FPReg); |
1148 | |
1149 | std::set<Register> FrameRecord; |
1150 | std::set<Register> SpilledGPRs; |
1151 | for (CalleeSavedInfo &I : CSI) { |
1152 | Register Reg = I.getReg(); |
1153 | if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR)) |
1154 | FrameRecord.insert(x: Reg); |
1155 | else |
1156 | SpilledGPRs.insert(x: Reg); |
1157 | |
1158 | if (Reg == ARM::LR) |
1159 | I.setRestored(false); |
1160 | } |
1161 | |
1162 | // Determine intermidiate registers which can be used for popping high regs: |
1163 | // - Spilled low regs |
1164 | // - Unused return registers |
1165 | std::set<Register> CopyRegs; |
1166 | std::set<Register> UnusedReturnRegs; |
1167 | for (Register Reg : SpilledGPRs) |
1168 | if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) |
1169 | CopyRegs.insert(x: Reg); |
1170 | auto Terminator = MBB.getFirstTerminator(); |
1171 | if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { |
1172 | UnusedReturnRegs.insert(x: ARM::R0); |
1173 | UnusedReturnRegs.insert(x: ARM::R1); |
1174 | UnusedReturnRegs.insert(x: ARM::R2); |
1175 | UnusedReturnRegs.insert(x: ARM::R3); |
1176 | for (auto Op : Terminator->implicit_operands()) { |
1177 | if (Op.isReg()) |
1178 | UnusedReturnRegs.erase(x: Op.getReg()); |
1179 | } |
1180 | } |
1181 | CopyRegs.insert(first: UnusedReturnRegs.begin(), last: UnusedReturnRegs.end()); |
1182 | |
1183 | // First pop regular spilled regs. |
1184 | popRegsFromStack(MBB, MI, TII, RegsToRestore: SpilledGPRs, AvailableCopyRegs: CopyRegs, IsVarArg, |
1185 | HasV5Ops: STI.hasV5TOps()); |
1186 | |
1187 | // LR may only be popped into pc, as part of a return sequence. |
1188 | // Check that no other pop instructions are inserted after that. |
1189 | assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) && |
1190 | "Can't insert pop after return sequence" ); |
1191 | |
1192 | // Now pop Frame Record regs. |
1193 | // Only unused return registers can be used as copy regs at this point. |
1194 | popRegsFromStack(MBB, MI, TII, RegsToRestore: FrameRecord, AvailableCopyRegs: UnusedReturnRegs, IsVarArg, |
1195 | HasV5Ops: STI.hasV5TOps()); |
1196 | |
1197 | return true; |
1198 | } |
1199 | |