1 | //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the PPC implementation of TargetFrameLowering class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "PPCFrameLowering.h" |
14 | #include "MCTargetDesc/PPCPredicates.h" |
15 | #include "PPCInstrBuilder.h" |
16 | #include "PPCInstrInfo.h" |
17 | #include "PPCMachineFunctionInfo.h" |
18 | #include "PPCSubtarget.h" |
19 | #include "PPCTargetMachine.h" |
20 | #include "llvm/ADT/Statistic.h" |
21 | #include "llvm/CodeGen/LivePhysRegs.h" |
22 | #include "llvm/CodeGen/MachineFrameInfo.h" |
23 | #include "llvm/CodeGen/MachineFunction.h" |
24 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
25 | #include "llvm/CodeGen/MachineModuleInfo.h" |
26 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
27 | #include "llvm/CodeGen/RegisterScavenging.h" |
28 | #include "llvm/IR/Function.h" |
29 | #include "llvm/Target/TargetOptions.h" |
30 | |
31 | using namespace llvm; |
32 | |
33 | #define DEBUG_TYPE "framelowering" |
34 | STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue" ); |
35 | STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue" ); |
36 | STATISTIC(NumPrologProbed, "Number of prologues probed" ); |
37 | |
38 | static cl::opt<bool> |
39 | EnablePEVectorSpills("ppc-enable-pe-vector-spills" , |
40 | cl::desc("Enable spills in prologue to vector registers." ), |
41 | cl::init(Val: false), cl::Hidden); |
42 | |
43 | static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { |
44 | if (STI.isAIXABI()) |
45 | return STI.isPPC64() ? 16 : 8; |
46 | // SVR4 ABI: |
47 | return STI.isPPC64() ? 16 : 4; |
48 | } |
49 | |
50 | static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { |
51 | if (STI.isAIXABI()) |
52 | return STI.isPPC64() ? 40 : 20; |
53 | return STI.isELFv2ABI() ? 24 : 40; |
54 | } |
55 | |
56 | static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { |
57 | // First slot in the general register save area. |
58 | return STI.isPPC64() ? -8U : -4U; |
59 | } |
60 | |
61 | static unsigned computeLinkageSize(const PPCSubtarget &STI) { |
62 | if (STI.isAIXABI() || STI.isPPC64()) |
63 | return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); |
64 | |
65 | // 32-bit SVR4 ABI: |
66 | return 8; |
67 | } |
68 | |
69 | static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { |
70 | // Third slot in the general purpose register save area. |
71 | if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) |
72 | return -12U; |
73 | |
74 | // Second slot in the general purpose register save area. |
75 | return STI.isPPC64() ? -16U : -8U; |
76 | } |
77 | |
78 | static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { |
79 | return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; |
80 | } |
81 | |
82 | PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) |
83 | : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, |
84 | STI.getPlatformStackAlignment(), 0), |
85 | Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(STI: Subtarget)), |
86 | TOCSaveOffset(computeTOCSaveOffset(STI: Subtarget)), |
87 | FramePointerSaveOffset(computeFramePointerSaveOffset(STI: Subtarget)), |
88 | LinkageSize(computeLinkageSize(STI: Subtarget)), |
89 | BasePointerSaveOffset(computeBasePointerSaveOffset(STI: Subtarget)), |
90 | CRSaveOffset(computeCRSaveOffset(STI: Subtarget)) {} |
91 | |
92 | // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. |
93 | const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( |
94 | unsigned &NumEntries) const { |
95 | |
96 | // Floating-point register save area offsets. |
97 | #define CALLEE_SAVED_FPRS \ |
98 | {PPC::F31, -8}, \ |
99 | {PPC::F30, -16}, \ |
100 | {PPC::F29, -24}, \ |
101 | {PPC::F28, -32}, \ |
102 | {PPC::F27, -40}, \ |
103 | {PPC::F26, -48}, \ |
104 | {PPC::F25, -56}, \ |
105 | {PPC::F24, -64}, \ |
106 | {PPC::F23, -72}, \ |
107 | {PPC::F22, -80}, \ |
108 | {PPC::F21, -88}, \ |
109 | {PPC::F20, -96}, \ |
110 | {PPC::F19, -104}, \ |
111 | {PPC::F18, -112}, \ |
112 | {PPC::F17, -120}, \ |
113 | {PPC::F16, -128}, \ |
114 | {PPC::F15, -136}, \ |
115 | {PPC::F14, -144} |
116 | |
117 | // 32-bit general purpose register save area offsets shared by ELF and |
118 | // AIX. AIX has an extra CSR with r13. |
119 | #define CALLEE_SAVED_GPRS32 \ |
120 | {PPC::R31, -4}, \ |
121 | {PPC::R30, -8}, \ |
122 | {PPC::R29, -12}, \ |
123 | {PPC::R28, -16}, \ |
124 | {PPC::R27, -20}, \ |
125 | {PPC::R26, -24}, \ |
126 | {PPC::R25, -28}, \ |
127 | {PPC::R24, -32}, \ |
128 | {PPC::R23, -36}, \ |
129 | {PPC::R22, -40}, \ |
130 | {PPC::R21, -44}, \ |
131 | {PPC::R20, -48}, \ |
132 | {PPC::R19, -52}, \ |
133 | {PPC::R18, -56}, \ |
134 | {PPC::R17, -60}, \ |
135 | {PPC::R16, -64}, \ |
136 | {PPC::R15, -68}, \ |
137 | {PPC::R14, -72} |
138 | |
139 | // 64-bit general purpose register save area offsets. |
140 | #define CALLEE_SAVED_GPRS64 \ |
141 | {PPC::X31, -8}, \ |
142 | {PPC::X30, -16}, \ |
143 | {PPC::X29, -24}, \ |
144 | {PPC::X28, -32}, \ |
145 | {PPC::X27, -40}, \ |
146 | {PPC::X26, -48}, \ |
147 | {PPC::X25, -56}, \ |
148 | {PPC::X24, -64}, \ |
149 | {PPC::X23, -72}, \ |
150 | {PPC::X22, -80}, \ |
151 | {PPC::X21, -88}, \ |
152 | {PPC::X20, -96}, \ |
153 | {PPC::X19, -104}, \ |
154 | {PPC::X18, -112}, \ |
155 | {PPC::X17, -120}, \ |
156 | {PPC::X16, -128}, \ |
157 | {PPC::X15, -136}, \ |
158 | {PPC::X14, -144} |
159 | |
160 | // Vector register save area offsets. |
161 | #define CALLEE_SAVED_VRS \ |
162 | {PPC::V31, -16}, \ |
163 | {PPC::V30, -32}, \ |
164 | {PPC::V29, -48}, \ |
165 | {PPC::V28, -64}, \ |
166 | {PPC::V27, -80}, \ |
167 | {PPC::V26, -96}, \ |
168 | {PPC::V25, -112}, \ |
169 | {PPC::V24, -128}, \ |
170 | {PPC::V23, -144}, \ |
171 | {PPC::V22, -160}, \ |
172 | {PPC::V21, -176}, \ |
173 | {PPC::V20, -192} |
174 | |
175 | // Note that the offsets here overlap, but this is fixed up in |
176 | // processFunctionBeforeFrameFinalized. |
177 | |
178 | static const SpillSlot ELFOffsets32[] = { |
179 | CALLEE_SAVED_FPRS, |
180 | CALLEE_SAVED_GPRS32, |
181 | |
182 | // CR save area offset. We map each of the nonvolatile CR fields |
183 | // to the slot for CR2, which is the first of the nonvolatile CR |
184 | // fields to be assigned, so that we only allocate one save slot. |
185 | // See PPCRegisterInfo::hasReservedSpillSlot() for more information. |
186 | {.Reg: PPC::CR2, .Offset: -4}, |
187 | |
188 | // VRSAVE save area offset. |
189 | {.Reg: PPC::VRSAVE, .Offset: -4}, |
190 | |
191 | CALLEE_SAVED_VRS, |
192 | |
193 | // SPE register save area (overlaps Vector save area). |
194 | {.Reg: PPC::S31, .Offset: -8}, |
195 | {.Reg: PPC::S30, .Offset: -16}, |
196 | {.Reg: PPC::S29, .Offset: -24}, |
197 | {.Reg: PPC::S28, .Offset: -32}, |
198 | {.Reg: PPC::S27, .Offset: -40}, |
199 | {.Reg: PPC::S26, .Offset: -48}, |
200 | {.Reg: PPC::S25, .Offset: -56}, |
201 | {.Reg: PPC::S24, .Offset: -64}, |
202 | {.Reg: PPC::S23, .Offset: -72}, |
203 | {.Reg: PPC::S22, .Offset: -80}, |
204 | {.Reg: PPC::S21, .Offset: -88}, |
205 | {.Reg: PPC::S20, .Offset: -96}, |
206 | {.Reg: PPC::S19, .Offset: -104}, |
207 | {.Reg: PPC::S18, .Offset: -112}, |
208 | {.Reg: PPC::S17, .Offset: -120}, |
209 | {.Reg: PPC::S16, .Offset: -128}, |
210 | {.Reg: PPC::S15, .Offset: -136}, |
211 | {.Reg: PPC::S14, .Offset: -144}}; |
212 | |
213 | static const SpillSlot ELFOffsets64[] = { |
214 | CALLEE_SAVED_FPRS, |
215 | CALLEE_SAVED_GPRS64, |
216 | |
217 | // VRSAVE save area offset. |
218 | {.Reg: PPC::VRSAVE, .Offset: -4}, |
219 | CALLEE_SAVED_VRS |
220 | }; |
221 | |
222 | static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, |
223 | CALLEE_SAVED_GPRS32, |
224 | // Add AIX's extra CSR. |
225 | {.Reg: PPC::R13, .Offset: -76}, |
226 | CALLEE_SAVED_VRS}; |
227 | |
228 | static const SpillSlot AIXOffsets64[] = { |
229 | CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; |
230 | |
231 | if (Subtarget.is64BitELFABI()) { |
232 | NumEntries = std::size(ELFOffsets64); |
233 | return ELFOffsets64; |
234 | } |
235 | |
236 | if (Subtarget.is32BitELFABI()) { |
237 | NumEntries = std::size(ELFOffsets32); |
238 | return ELFOffsets32; |
239 | } |
240 | |
241 | assert(Subtarget.isAIXABI() && "Unexpected ABI." ); |
242 | |
243 | if (Subtarget.isPPC64()) { |
244 | NumEntries = std::size(AIXOffsets64); |
245 | return AIXOffsets64; |
246 | } |
247 | |
248 | NumEntries = std::size(AIXOffsets32); |
249 | return AIXOffsets32; |
250 | } |
251 | |
252 | static bool spillsCR(const MachineFunction &MF) { |
253 | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
254 | return FuncInfo->isCRSpilled(); |
255 | } |
256 | |
257 | static bool hasSpills(const MachineFunction &MF) { |
258 | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
259 | return FuncInfo->hasSpills(); |
260 | } |
261 | |
262 | static bool hasNonRISpills(const MachineFunction &MF) { |
263 | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
264 | return FuncInfo->hasNonRISpills(); |
265 | } |
266 | |
267 | /// MustSaveLR - Return true if this function requires that we save the LR |
268 | /// register onto the stack in the prolog and restore it in the epilog of the |
269 | /// function. |
270 | static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { |
271 | const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); |
272 | |
273 | // We need a save/restore of LR if there is any def of LR (which is |
274 | // defined by calls, including the PIC setup sequence), or if there is |
275 | // some use of the LR stack slot (e.g. for builtin_return_address). |
276 | // (LR comes in 32 and 64 bit versions.) |
277 | MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(RegNo: LR); |
278 | return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); |
279 | } |
280 | |
281 | /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum |
282 | /// call frame size. Update the MachineFunction object with the stack size. |
283 | uint64_t |
284 | PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, |
285 | bool UseEstimate) const { |
286 | unsigned NewMaxCallFrameSize = 0; |
287 | uint64_t FrameSize = determineFrameLayout(MF, UseEstimate, |
288 | NewMaxCallFrameSize: &NewMaxCallFrameSize); |
289 | MF.getFrameInfo().setStackSize(FrameSize); |
290 | MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); |
291 | return FrameSize; |
292 | } |
293 | |
294 | /// determineFrameLayout - Determine the size of the frame and maximum call |
295 | /// frame size. |
296 | uint64_t |
297 | PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, |
298 | bool UseEstimate, |
299 | unsigned *NewMaxCallFrameSize) const { |
300 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
301 | const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
302 | |
303 | // Get the number of bytes to allocate from the FrameInfo |
304 | uint64_t FrameSize = |
305 | UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); |
306 | |
307 | // Get stack alignments. The frame must be aligned to the greatest of these: |
308 | Align TargetAlign = getStackAlign(); // alignment required per the ABI |
309 | Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame |
310 | Align Alignment = std::max(a: TargetAlign, b: MaxAlign); |
311 | |
312 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
313 | |
314 | unsigned LR = RegInfo->getRARegister(); |
315 | bool DisableRedZone = MF.getFunction().hasFnAttribute(Kind: Attribute::NoRedZone); |
316 | bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. |
317 | !MFI.adjustsStack() && // No calls. |
318 | !MustSaveLR(MF, LR) && // No need to save LR. |
319 | !FI->mustSaveTOC() && // No need to save TOC. |
320 | !RegInfo->hasBasePointer(MF) && // No special alignment. |
321 | !MFI.isFrameAddressTaken(); |
322 | |
323 | // Note: for PPC32 SVR4ABI, we can still generate stackless |
324 | // code if all local vars are reg-allocated. |
325 | bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); |
326 | |
327 | // Check whether we can skip adjusting the stack pointer (by using red zone) |
328 | if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { |
329 | // No need for frame |
330 | return 0; |
331 | } |
332 | |
333 | // Get the maximum call frame size of all the calls. |
334 | unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); |
335 | |
336 | // Maximum call frame needs to be at least big enough for linkage area. |
337 | unsigned minCallFrameSize = getLinkageSize(); |
338 | maxCallFrameSize = std::max(a: maxCallFrameSize, b: minCallFrameSize); |
339 | |
340 | // If we have dynamic alloca then maxCallFrameSize needs to be aligned so |
341 | // that allocations will be aligned. |
342 | if (MFI.hasVarSizedObjects()) |
343 | maxCallFrameSize = alignTo(Size: maxCallFrameSize, A: Alignment); |
344 | |
345 | // Update the new max call frame size if the caller passes in a valid pointer. |
346 | if (NewMaxCallFrameSize) |
347 | *NewMaxCallFrameSize = maxCallFrameSize; |
348 | |
349 | // Include call frame size in total. |
350 | FrameSize += maxCallFrameSize; |
351 | |
352 | // Make sure the frame is aligned. |
353 | FrameSize = alignTo(Size: FrameSize, A: Alignment); |
354 | |
355 | return FrameSize; |
356 | } |
357 | |
358 | // hasFP - Return true if the specified function actually has a dedicated frame |
359 | // pointer register. |
360 | bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { |
361 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
362 | // FIXME: This is pretty much broken by design: hasFP() might be called really |
363 | // early, before the stack layout was calculated and thus hasFP() might return |
364 | // true or false here depending on the time of call. |
365 | return (MFI.getStackSize()) && needsFP(MF); |
366 | } |
367 | |
368 | // needsFP - Return true if the specified function should have a dedicated frame |
369 | // pointer register. This is true if the function has variable sized allocas or |
370 | // if frame pointer elimination is disabled. |
371 | bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { |
372 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
373 | |
374 | // Naked functions have no stack frame pushed, so we don't have a frame |
375 | // pointer. |
376 | if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked)) |
377 | return false; |
378 | |
379 | return MF.getTarget().Options.DisableFramePointerElim(MF) || |
380 | MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || |
381 | MF.exposesReturnsTwice() || |
382 | (MF.getTarget().Options.GuaranteedTailCallOpt && |
383 | MF.getInfo<PPCFunctionInfo>()->hasFastCall()); |
384 | } |
385 | |
386 | void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { |
387 | // When there is dynamic alloca in this function, we can not use the frame |
388 | // pointer X31/R31 for the frameaddress lowering. In this case, only X1/R1 |
389 | // always points to the backchain. |
390 | bool is31 = needsFP(MF) && !MF.getFrameInfo().hasVarSizedObjects(); |
391 | unsigned FPReg = is31 ? PPC::R31 : PPC::R1; |
392 | unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; |
393 | |
394 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
395 | bool HasBP = RegInfo->hasBasePointer(MF); |
396 | unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; |
397 | unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; |
398 | |
399 | for (MachineBasicBlock &MBB : MF) |
400 | for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) { |
401 | --MBBI; |
402 | for (MachineOperand &MO : MBBI->operands()) { |
403 | if (!MO.isReg()) |
404 | continue; |
405 | |
406 | switch (MO.getReg()) { |
407 | case PPC::FP: |
408 | MO.setReg(FPReg); |
409 | break; |
410 | case PPC::FP8: |
411 | MO.setReg(FP8Reg); |
412 | break; |
413 | case PPC::BP: |
414 | MO.setReg(BPReg); |
415 | break; |
416 | case PPC::BP8: |
417 | MO.setReg(BP8Reg); |
418 | break; |
419 | |
420 | } |
421 | } |
422 | } |
423 | } |
424 | |
425 | /* This function will do the following: |
426 | - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 |
427 | respectively (defaults recommended by the ABI) and return true |
428 | - If MBB is not an entry block, initialize the register scavenger and look |
429 | for available registers. |
430 | - If the defaults (R0/R12) are available, return true |
431 | - If TwoUniqueRegsRequired is set to true, it looks for two unique |
432 | registers. Otherwise, look for a single available register. |
433 | - If the required registers are found, set SR1 and SR2 and return true. |
434 | - If the required registers are not found, set SR2 or both SR1 and SR2 to |
435 | PPC::NoRegister and return false. |
436 | |
437 | Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired |
438 | is not set, this function will attempt to find two different registers, but |
439 | still return true if only one register is available (and set SR1 == SR2). |
440 | */ |
441 | bool |
442 | PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, |
443 | bool UseAtEnd, |
444 | bool TwoUniqueRegsRequired, |
445 | Register *SR1, |
446 | Register *SR2) const { |
447 | RegScavenger RS; |
448 | Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; |
449 | Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; |
450 | |
451 | // Set the defaults for the two scratch registers. |
452 | if (SR1) |
453 | *SR1 = R0; |
454 | |
455 | if (SR2) { |
456 | assert (SR1 && "Asking for the second scratch register but not the first?" ); |
457 | *SR2 = R12; |
458 | } |
459 | |
460 | // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. |
461 | if ((UseAtEnd && MBB->isReturnBlock()) || |
462 | (!UseAtEnd && (&MBB->getParent()->front() == MBB))) |
463 | return true; |
464 | |
465 | if (UseAtEnd) { |
466 | // The scratch register will be used before the first terminator (or at the |
467 | // end of the block if there are no terminators). |
468 | MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); |
469 | if (MBBI == MBB->begin()) { |
470 | RS.enterBasicBlock(MBB&: *MBB); |
471 | } else { |
472 | RS.enterBasicBlockEnd(MBB&: *MBB); |
473 | RS.backward(I: MBBI); |
474 | } |
475 | } else { |
476 | // The scratch register will be used at the start of the block. |
477 | RS.enterBasicBlock(MBB&: *MBB); |
478 | } |
479 | |
480 | // If the two registers are available, we're all good. |
481 | // Note that we only return here if both R0 and R12 are available because |
482 | // although the function may not require two unique registers, it may benefit |
483 | // from having two so we should try to provide them. |
484 | if (!RS.isRegUsed(Reg: R0) && !RS.isRegUsed(Reg: R12)) |
485 | return true; |
486 | |
487 | // Get the list of callee-saved registers for the target. |
488 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
489 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF: MBB->getParent()); |
490 | |
491 | // Get all the available registers in the block. |
492 | BitVector BV = RS.getRegsAvailable(RC: Subtarget.isPPC64() ? &PPC::G8RCRegClass : |
493 | &PPC::GPRCRegClass); |
494 | |
495 | // We shouldn't use callee-saved registers as scratch registers as they may be |
496 | // available when looking for a candidate block for shrink wrapping but not |
497 | // available when the actual prologue/epilogue is being emitted because they |
498 | // were added as live-in to the prologue block by PrologueEpilogueInserter. |
499 | for (int i = 0; CSRegs[i]; ++i) |
500 | BV.reset(Idx: CSRegs[i]); |
501 | |
502 | // Set the first scratch register to the first available one. |
503 | if (SR1) { |
504 | int FirstScratchReg = BV.find_first(); |
505 | *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; |
506 | } |
507 | |
508 | // If there is another one available, set the second scratch register to that. |
509 | // Otherwise, set it to either PPC::NoRegister if this function requires two |
510 | // or to whatever SR1 is set to if this function doesn't require two. |
511 | if (SR2) { |
512 | int SecondScratchReg = BV.find_next(Prev: *SR1); |
513 | if (SecondScratchReg != -1) |
514 | *SR2 = SecondScratchReg; |
515 | else |
516 | *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; |
517 | } |
518 | |
519 | // Now that we've done our best to provide both registers, double check |
520 | // whether we were unable to provide enough. |
521 | if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) |
522 | return false; |
523 | |
524 | return true; |
525 | } |
526 | |
527 | // We need a scratch register for spilling LR and for spilling CR. By default, |
528 | // we use two scratch registers to hide latency. However, if only one scratch |
529 | // register is available, we can adjust for that by not overlapping the spill |
530 | // code. However, if we need to realign the stack (i.e. have a base pointer) |
531 | // and the stack frame is large, we need two scratch registers. |
532 | // Also, stack probe requires two scratch registers, one for old sp, one for |
533 | // large frame and large probe size. |
534 | bool |
535 | PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { |
536 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
537 | MachineFunction &MF = *(MBB->getParent()); |
538 | bool HasBP = RegInfo->hasBasePointer(MF); |
539 | unsigned FrameSize = determineFrameLayout(MF); |
540 | int NegFrameSize = -FrameSize; |
541 | bool IsLargeFrame = !isInt<16>(x: NegFrameSize); |
542 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
543 | Align MaxAlign = MFI.getMaxAlign(); |
544 | bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); |
545 | const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); |
546 | |
547 | return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || |
548 | TLI.hasInlineStackProbe(MF); |
549 | } |
550 | |
551 | bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { |
552 | MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); |
553 | |
554 | return findScratchRegister(MBB: TmpMBB, UseAtEnd: false, |
555 | TwoUniqueRegsRequired: twoUniqueScratchRegsRequired(MBB: TmpMBB)); |
556 | } |
557 | |
558 | bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { |
559 | MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); |
560 | |
561 | return findScratchRegister(MBB: TmpMBB, UseAtEnd: true); |
562 | } |
563 | |
564 | bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { |
565 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
566 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
567 | |
568 | // Abort if there is no register info or function info. |
569 | if (!RegInfo || !FI) |
570 | return false; |
571 | |
572 | // Only move the stack update on ELFv2 ABI and PPC64. |
573 | if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) |
574 | return false; |
575 | |
576 | // Check the frame size first and return false if it does not fit the |
577 | // requirements. |
578 | // We need a non-zero frame size as well as a frame that will fit in the red |
579 | // zone. This is because by moving the stack pointer update we are now storing |
580 | // to the red zone until the stack pointer is updated. If we get an interrupt |
581 | // inside the prologue but before the stack update we now have a number of |
582 | // stores to the red zone and those stores must all fit. |
583 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
584 | unsigned FrameSize = MFI.getStackSize(); |
585 | if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) |
586 | return false; |
587 | |
588 | // Frame pointers and base pointers complicate matters so don't do anything |
589 | // if we have them. For example having a frame pointer will sometimes require |
590 | // a copy of r1 into r31 and that makes keeping track of updates to r1 more |
591 | // difficult. Similar situation exists with setjmp. |
592 | if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) |
593 | return false; |
594 | |
595 | // Calls to fast_cc functions use different rules for passing parameters on |
596 | // the stack from the ABI and using PIC base in the function imposes |
597 | // similar restrictions to using the base pointer. It is not generally safe |
598 | // to move the stack pointer update in these situations. |
599 | if (FI->hasFastCall() || FI->usesPICBase()) |
600 | return false; |
601 | |
602 | // Finally we can move the stack update if we do not require register |
603 | // scavenging. Register scavenging can introduce more spills and so |
604 | // may make the frame size larger than we have computed. |
605 | return !RegInfo->requiresFrameIndexScavenging(MF); |
606 | } |
607 | |
608 | void PPCFrameLowering::emitPrologue(MachineFunction &MF, |
609 | MachineBasicBlock &MBB) const { |
610 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
611 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
612 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
613 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
614 | const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); |
615 | |
616 | const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); |
617 | DebugLoc dl; |
618 | // AIX assembler does not support cfi directives. |
619 | const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); |
620 | |
621 | const bool HasFastMFLR = Subtarget.hasFastMFLR(); |
622 | |
623 | // Get processor type. |
624 | bool isPPC64 = Subtarget.isPPC64(); |
625 | // Get the ABI. |
626 | bool isSVR4ABI = Subtarget.isSVR4ABI(); |
627 | bool isELFv2ABI = Subtarget.isELFv2ABI(); |
628 | assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI." ); |
629 | |
630 | // Work out frame sizes. |
631 | uint64_t FrameSize = determineFrameLayoutAndUpdate(MF); |
632 | int64_t NegFrameSize = -FrameSize; |
633 | if (!isPPC64 && (!isInt<32>(x: FrameSize) || !isInt<32>(x: NegFrameSize))) |
634 | llvm_unreachable("Unhandled stack size!" ); |
635 | |
636 | if (MFI.isFrameAddressTaken()) |
637 | replaceFPWithRealFP(MF); |
638 | |
639 | // Check if the link register (LR) must be saved. |
640 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
641 | bool MustSaveLR = FI->mustSaveLR(); |
642 | bool MustSaveTOC = FI->mustSaveTOC(); |
643 | const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); |
644 | bool MustSaveCR = !MustSaveCRs.empty(); |
645 | // Do we have a frame pointer and/or base pointer for this function? |
646 | bool HasFP = hasFP(MF); |
647 | bool HasBP = RegInfo->hasBasePointer(MF); |
648 | bool HasRedZone = isPPC64 || !isSVR4ABI; |
649 | bool HasROPProtect = Subtarget.hasROPProtect(); |
650 | bool HasPrivileged = Subtarget.hasPrivileged(); |
651 | |
652 | Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; |
653 | Register BPReg = RegInfo->getBaseRegister(MF); |
654 | Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; |
655 | Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; |
656 | Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; |
657 | Register ScratchReg; |
658 | Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg |
659 | // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) |
660 | const MCInstrDesc& MFLRInst = TII.get(Opcode: isPPC64 ? PPC::MFLR8 |
661 | : PPC::MFLR ); |
662 | const MCInstrDesc& StoreInst = TII.get(Opcode: isPPC64 ? PPC::STD |
663 | : PPC::STW ); |
664 | const MCInstrDesc& StoreUpdtInst = TII.get(Opcode: isPPC64 ? PPC::STDU |
665 | : PPC::STWU ); |
666 | const MCInstrDesc& StoreUpdtIdxInst = TII.get(Opcode: isPPC64 ? PPC::STDUX |
667 | : PPC::STWUX); |
668 | const MCInstrDesc& OrInst = TII.get(Opcode: isPPC64 ? PPC::OR8 |
669 | : PPC::OR ); |
670 | const MCInstrDesc& SubtractCarryingInst = TII.get(Opcode: isPPC64 ? PPC::SUBFC8 |
671 | : PPC::SUBFC); |
672 | const MCInstrDesc& SubtractImmCarryingInst = TII.get(Opcode: isPPC64 ? PPC::SUBFIC8 |
673 | : PPC::SUBFIC); |
674 | const MCInstrDesc &MoveFromCondRegInst = TII.get(Opcode: isPPC64 ? PPC::MFCR8 |
675 | : PPC::MFCR); |
676 | const MCInstrDesc &StoreWordInst = TII.get(Opcode: isPPC64 ? PPC::STW8 : PPC::STW); |
677 | const MCInstrDesc &HashST = |
678 | TII.get(Opcode: isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8) |
679 | : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST)); |
680 | |
681 | // Regarding this assert: Even though LR is saved in the caller's frame (i.e., |
682 | // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no |
683 | // Red Zone, an asynchronous event (a form of "callee") could claim a frame & |
684 | // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. |
685 | assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && |
686 | "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4." ); |
687 | |
688 | // Using the same bool variable as below to suppress compiler warnings. |
689 | bool SingleScratchReg = findScratchRegister( |
690 | MBB: &MBB, UseAtEnd: false, TwoUniqueRegsRequired: twoUniqueScratchRegsRequired(MBB: &MBB), SR1: &ScratchReg, SR2: &TempReg); |
691 | assert(SingleScratchReg && |
692 | "Required number of registers not available in this block" ); |
693 | |
694 | SingleScratchReg = ScratchReg == TempReg; |
695 | |
696 | int64_t LROffset = getReturnSaveOffset(); |
697 | |
698 | int64_t FPOffset = 0; |
699 | if (HasFP) { |
700 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
701 | int FPIndex = FI->getFramePointerSaveIndex(); |
702 | assert(FPIndex && "No Frame Pointer Save Slot!" ); |
703 | FPOffset = MFI.getObjectOffset(ObjectIdx: FPIndex); |
704 | } |
705 | |
706 | int64_t BPOffset = 0; |
707 | if (HasBP) { |
708 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
709 | int BPIndex = FI->getBasePointerSaveIndex(); |
710 | assert(BPIndex && "No Base Pointer Save Slot!" ); |
711 | BPOffset = MFI.getObjectOffset(ObjectIdx: BPIndex); |
712 | } |
713 | |
714 | int64_t PBPOffset = 0; |
715 | if (FI->usesPICBase()) { |
716 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
717 | int PBPIndex = FI->getPICBasePointerSaveIndex(); |
718 | assert(PBPIndex && "No PIC Base Pointer Save Slot!" ); |
719 | PBPOffset = MFI.getObjectOffset(ObjectIdx: PBPIndex); |
720 | } |
721 | |
722 | // Get stack alignments. |
723 | Align MaxAlign = MFI.getMaxAlign(); |
724 | if (HasBP && MaxAlign > 1) |
725 | assert(Log2(MaxAlign) < 16 && "Invalid alignment!" ); |
726 | |
727 | // Frames of 32KB & larger require special handling because they cannot be |
728 | // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. |
729 | bool isLargeFrame = !isInt<16>(x: NegFrameSize); |
730 | |
731 | // Check if we can move the stack update instruction (stdu) down the prologue |
732 | // past the callee saves. Hopefully this will avoid the situation where the |
733 | // saves are waiting for the update on the store with update to complete. |
734 | MachineBasicBlock::iterator StackUpdateLoc = MBBI; |
735 | bool MovingStackUpdateDown = false; |
736 | |
737 | // Check if we can move the stack update. |
738 | if (stackUpdateCanBeMoved(MF)) { |
739 | const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); |
740 | for (CalleeSavedInfo CSI : Info) { |
741 | // If the callee saved register is spilled to a register instead of the |
742 | // stack then the spill no longer uses the stack pointer. |
743 | // This can lead to two consequences: |
744 | // 1) We no longer need to update the stack because the function does not |
745 | // spill any callee saved registers to stack. |
746 | // 2) We have a situation where we still have to update the stack pointer |
747 | // even though some registers are spilled to other registers. In |
748 | // this case the current code moves the stack update to an incorrect |
749 | // position. |
750 | // In either case we should abort moving the stack update operation. |
751 | if (CSI.isSpilledToReg()) { |
752 | StackUpdateLoc = MBBI; |
753 | MovingStackUpdateDown = false; |
754 | break; |
755 | } |
756 | |
757 | int FrIdx = CSI.getFrameIdx(); |
758 | // If the frame index is not negative the callee saved info belongs to a |
759 | // stack object that is not a fixed stack object. We ignore non-fixed |
760 | // stack objects because we won't move the stack update pointer past them. |
761 | if (FrIdx >= 0) |
762 | continue; |
763 | |
764 | if (MFI.isFixedObjectIndex(ObjectIdx: FrIdx) && MFI.getObjectOffset(ObjectIdx: FrIdx) < 0) { |
765 | StackUpdateLoc++; |
766 | MovingStackUpdateDown = true; |
767 | } else { |
768 | // We need all of the Frame Indices to meet these conditions. |
769 | // If they do not, abort the whole operation. |
770 | StackUpdateLoc = MBBI; |
771 | MovingStackUpdateDown = false; |
772 | break; |
773 | } |
774 | } |
775 | |
776 | // If the operation was not aborted then update the object offset. |
777 | if (MovingStackUpdateDown) { |
778 | for (CalleeSavedInfo CSI : Info) { |
779 | int FrIdx = CSI.getFrameIdx(); |
780 | if (FrIdx < 0) |
781 | MFI.setObjectOffset(ObjectIdx: FrIdx, SPOffset: MFI.getObjectOffset(ObjectIdx: FrIdx) + NegFrameSize); |
782 | } |
783 | } |
784 | } |
785 | |
786 | // Where in the prologue we move the CR fields depends on how many scratch |
787 | // registers we have, and if we need to save the link register or not. This |
788 | // lambda is to avoid duplicating the logic in 2 places. |
789 | auto BuildMoveFromCR = [&]() { |
790 | if (isELFv2ABI && MustSaveCRs.size() == 1) { |
791 | // In the ELFv2 ABI, we are not required to save all CR fields. |
792 | // If only one CR field is clobbered, it is more efficient to use |
793 | // mfocrf to selectively save just that field, because mfocrf has short |
794 | // latency compares to mfcr. |
795 | assert(isPPC64 && "V2 ABI is 64-bit only." ); |
796 | MachineInstrBuilder MIB = |
797 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::MFOCRF8), DestReg: TempReg); |
798 | MIB.addReg(RegNo: MustSaveCRs[0], flags: RegState::Kill); |
799 | } else { |
800 | MachineInstrBuilder MIB = |
801 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: MoveFromCondRegInst, DestReg: TempReg); |
802 | for (unsigned CRfield : MustSaveCRs) |
803 | MIB.addReg(RegNo: CRfield, flags: RegState::ImplicitKill); |
804 | } |
805 | }; |
806 | |
807 | // If we need to spill the CR and the LR but we don't have two separate |
808 | // registers available, we must spill them one at a time |
809 | if (MustSaveCR && SingleScratchReg && MustSaveLR) { |
810 | BuildMoveFromCR(); |
811 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreWordInst) |
812 | .addReg(RegNo: TempReg, flags: getKillRegState(B: true)) |
813 | .addImm(Val: CRSaveOffset) |
814 | .addReg(RegNo: SPReg); |
815 | } |
816 | |
817 | if (MustSaveLR) |
818 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: MFLRInst, DestReg: ScratchReg); |
819 | |
820 | if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) |
821 | BuildMoveFromCR(); |
822 | |
823 | if (HasRedZone) { |
824 | if (HasFP) |
825 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
826 | .addReg(RegNo: FPReg) |
827 | .addImm(Val: FPOffset) |
828 | .addReg(RegNo: SPReg); |
829 | if (FI->usesPICBase()) |
830 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
831 | .addReg(RegNo: PPC::R30) |
832 | .addImm(Val: PBPOffset) |
833 | .addReg(RegNo: SPReg); |
834 | if (HasBP) |
835 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
836 | .addReg(RegNo: BPReg) |
837 | .addImm(Val: BPOffset) |
838 | .addReg(RegNo: SPReg); |
839 | } |
840 | |
841 | // Generate the instruction to store the LR. In the case where ROP protection |
842 | // is required the register holding the LR should not be killed as it will be |
843 | // used by the hash store instruction. |
844 | auto SaveLR = [&](int64_t Offset) { |
845 | assert(MustSaveLR && "LR is not required to be saved!" ); |
846 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: StoreInst) |
847 | .addReg(RegNo: ScratchReg, flags: getKillRegState(B: !HasROPProtect)) |
848 | .addImm(Val: Offset) |
849 | .addReg(RegNo: SPReg); |
850 | |
851 | // Add the ROP protection Hash Store instruction. |
852 | // NOTE: This is technically a violation of the ABI. The hash can be saved |
853 | // up to 512 bytes into the Protected Zone. This can be outside of the |
854 | // initial 288 byte volatile program storage region in the Protected Zone. |
855 | // However, this restriction will be removed in an upcoming revision of the |
856 | // ABI. |
857 | if (HasROPProtect) { |
858 | const int SaveIndex = FI->getROPProtectionHashSaveIndex(); |
859 | const int64_t ImmOffset = MFI.getObjectOffset(ObjectIdx: SaveIndex); |
860 | assert((ImmOffset <= -8 && ImmOffset >= -512) && |
861 | "ROP hash save offset out of range." ); |
862 | assert(((ImmOffset & 0x7) == 0) && |
863 | "ROP hash save offset must be 8 byte aligned." ); |
864 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: HashST) |
865 | .addReg(RegNo: ScratchReg, flags: getKillRegState(B: true)) |
866 | .addImm(Val: ImmOffset) |
867 | .addReg(RegNo: SPReg); |
868 | } |
869 | }; |
870 | |
871 | if (MustSaveLR && HasFastMFLR) |
872 | SaveLR(LROffset); |
873 | |
874 | if (MustSaveCR && |
875 | !(SingleScratchReg && MustSaveLR)) { |
876 | assert(HasRedZone && "A red zone is always available on PPC64" ); |
877 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreWordInst) |
878 | .addReg(RegNo: TempReg, flags: getKillRegState(B: true)) |
879 | .addImm(Val: CRSaveOffset) |
880 | .addReg(RegNo: SPReg); |
881 | } |
882 | |
883 | // Skip the rest if this is a leaf function & all spills fit in the Red Zone. |
884 | if (!FrameSize) { |
885 | if (MustSaveLR && !HasFastMFLR) |
886 | SaveLR(LROffset); |
887 | return; |
888 | } |
889 | |
890 | // Adjust stack pointer: r1 += NegFrameSize. |
891 | // If there is a preferred stack alignment, align R1 now |
892 | |
893 | if (HasBP && HasRedZone) { |
894 | // Save a copy of r1 as the base pointer. |
895 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: BPReg) |
896 | .addReg(RegNo: SPReg) |
897 | .addReg(RegNo: SPReg); |
898 | } |
899 | |
900 | // Have we generated a STUX instruction to claim stack frame? If so, |
901 | // the negated frame size will be placed in ScratchReg. |
902 | bool HasSTUX = |
903 | (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) || |
904 | (HasBP && MaxAlign > 1) || isLargeFrame; |
905 | |
906 | // If we use STUX to update the stack pointer, we need the two scratch |
907 | // registers TempReg and ScratchReg, we have to save LR here which is stored |
908 | // in ScratchReg. |
909 | // If the offset can not be encoded into the store instruction, we also have |
910 | // to save LR here. |
911 | if (MustSaveLR && !HasFastMFLR && |
912 | (HasSTUX || !isInt<16>(x: FrameSize + LROffset))) |
913 | SaveLR(LROffset); |
914 | |
915 | // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain |
916 | // pointer is always stored at SP, we will get a free probe due to an essential |
917 | // STU(X) instruction. |
918 | if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { |
919 | // To be consistent with other targets, a pseudo instruction is emitted and |
920 | // will be later expanded in `inlineStackProbe`. |
921 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, |
922 | MCID: TII.get(Opcode: isPPC64 ? PPC::PROBED_STACKALLOC_64 |
923 | : PPC::PROBED_STACKALLOC_32)) |
924 | .addDef(RegNo: TempReg) |
925 | .addDef(RegNo: ScratchReg) // ScratchReg stores the old sp. |
926 | .addImm(Val: NegFrameSize); |
927 | // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we |
928 | // update the ScratchReg to meet the assumption that ScratchReg contains |
929 | // the NegFrameSize. This solution is rather tricky. |
930 | if (!HasRedZone) { |
931 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::SUBF), DestReg: ScratchReg) |
932 | .addReg(RegNo: ScratchReg) |
933 | .addReg(RegNo: SPReg); |
934 | } |
935 | } else { |
936 | // This condition must be kept in sync with canUseAsPrologue. |
937 | if (HasBP && MaxAlign > 1) { |
938 | if (isPPC64) |
939 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::RLDICL), DestReg: ScratchReg) |
940 | .addReg(RegNo: SPReg) |
941 | .addImm(Val: 0) |
942 | .addImm(Val: 64 - Log2(A: MaxAlign)); |
943 | else // PPC32... |
944 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::RLWINM), DestReg: ScratchReg) |
945 | .addReg(RegNo: SPReg) |
946 | .addImm(Val: 0) |
947 | .addImm(Val: 32 - Log2(A: MaxAlign)) |
948 | .addImm(Val: 31); |
949 | if (!isLargeFrame) { |
950 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: SubtractImmCarryingInst, DestReg: ScratchReg) |
951 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
952 | .addImm(Val: NegFrameSize); |
953 | } else { |
954 | assert(!SingleScratchReg && "Only a single scratch reg available" ); |
955 | TII.materializeImmPostRA(MBB, MBBI, DL: dl, Reg: TempReg, Imm: NegFrameSize); |
956 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: SubtractCarryingInst, DestReg: ScratchReg) |
957 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
958 | .addReg(RegNo: TempReg, flags: RegState::Kill); |
959 | } |
960 | |
961 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreUpdtIdxInst, DestReg: SPReg) |
962 | .addReg(RegNo: SPReg, flags: RegState::Kill) |
963 | .addReg(RegNo: SPReg) |
964 | .addReg(RegNo: ScratchReg); |
965 | } else if (!isLargeFrame) { |
966 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: StoreUpdtInst, DestReg: SPReg) |
967 | .addReg(RegNo: SPReg) |
968 | .addImm(Val: NegFrameSize) |
969 | .addReg(RegNo: SPReg); |
970 | } else { |
971 | TII.materializeImmPostRA(MBB, MBBI, DL: dl, Reg: ScratchReg, Imm: NegFrameSize); |
972 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreUpdtIdxInst, DestReg: SPReg) |
973 | .addReg(RegNo: SPReg, flags: RegState::Kill) |
974 | .addReg(RegNo: SPReg) |
975 | .addReg(RegNo: ScratchReg); |
976 | } |
977 | } |
978 | |
979 | // Save the TOC register after the stack pointer update if a prologue TOC |
980 | // save is required for the function. |
981 | if (MustSaveTOC) { |
982 | assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2" ); |
983 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: TII.get(Opcode: PPC::STD)) |
984 | .addReg(RegNo: TOCReg, flags: getKillRegState(B: true)) |
985 | .addImm(Val: TOCSaveOffset) |
986 | .addReg(RegNo: SPReg); |
987 | } |
988 | |
989 | if (!HasRedZone) { |
990 | assert(!isPPC64 && "A red zone is always available on PPC64" ); |
991 | if (HasSTUX) { |
992 | // The negated frame size is in ScratchReg, and the SPReg has been |
993 | // decremented by the frame size: SPReg = old SPReg + ScratchReg. |
994 | // Since FPOffset, PBPOffset, etc. are relative to the beginning of |
995 | // the stack frame (i.e. the old SP), ideally, we would put the old |
996 | // SP into a register and use it as the base for the stores. The |
997 | // problem is that the only available register may be ScratchReg, |
998 | // which could be R0, and R0 cannot be used as a base address. |
999 | |
1000 | // First, set ScratchReg to the old SP. This may need to be modified |
1001 | // later. |
1002 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::SUBF), DestReg: ScratchReg) |
1003 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
1004 | .addReg(RegNo: SPReg); |
1005 | |
1006 | if (ScratchReg == PPC::R0) { |
1007 | // R0 cannot be used as a base register, but it can be used as an |
1008 | // index in a store-indexed. |
1009 | int LastOffset = 0; |
1010 | if (HasFP) { |
1011 | // R0 += (FPOffset-LastOffset). |
1012 | // Need addic, since addi treats R0 as 0. |
1013 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::ADDIC), DestReg: ScratchReg) |
1014 | .addReg(RegNo: ScratchReg) |
1015 | .addImm(Val: FPOffset-LastOffset); |
1016 | LastOffset = FPOffset; |
1017 | // Store FP into *R0. |
1018 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::STWX)) |
1019 | .addReg(RegNo: FPReg, flags: RegState::Kill) // Save FP. |
1020 | .addReg(RegNo: PPC::ZERO) |
1021 | .addReg(RegNo: ScratchReg); // This will be the index (R0 is ok here). |
1022 | } |
1023 | if (FI->usesPICBase()) { |
1024 | // R0 += (PBPOffset-LastOffset). |
1025 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::ADDIC), DestReg: ScratchReg) |
1026 | .addReg(RegNo: ScratchReg) |
1027 | .addImm(Val: PBPOffset-LastOffset); |
1028 | LastOffset = PBPOffset; |
1029 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::STWX)) |
1030 | .addReg(RegNo: PPC::R30, flags: RegState::Kill) // Save PIC base pointer. |
1031 | .addReg(RegNo: PPC::ZERO) |
1032 | .addReg(RegNo: ScratchReg); // This will be the index (R0 is ok here). |
1033 | } |
1034 | if (HasBP) { |
1035 | // R0 += (BPOffset-LastOffset). |
1036 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::ADDIC), DestReg: ScratchReg) |
1037 | .addReg(RegNo: ScratchReg) |
1038 | .addImm(Val: BPOffset-LastOffset); |
1039 | LastOffset = BPOffset; |
1040 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::STWX)) |
1041 | .addReg(RegNo: BPReg, flags: RegState::Kill) // Save BP. |
1042 | .addReg(RegNo: PPC::ZERO) |
1043 | .addReg(RegNo: ScratchReg); // This will be the index (R0 is ok here). |
1044 | // BP = R0-LastOffset |
1045 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::ADDIC), DestReg: BPReg) |
1046 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
1047 | .addImm(Val: -LastOffset); |
1048 | } |
1049 | } else { |
1050 | // ScratchReg is not R0, so use it as the base register. It is |
1051 | // already set to the old SP, so we can use the offsets directly. |
1052 | |
1053 | // Now that the stack frame has been allocated, save all the necessary |
1054 | // registers using ScratchReg as the base address. |
1055 | if (HasFP) |
1056 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
1057 | .addReg(RegNo: FPReg) |
1058 | .addImm(Val: FPOffset) |
1059 | .addReg(RegNo: ScratchReg); |
1060 | if (FI->usesPICBase()) |
1061 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
1062 | .addReg(RegNo: PPC::R30) |
1063 | .addImm(Val: PBPOffset) |
1064 | .addReg(RegNo: ScratchReg); |
1065 | if (HasBP) { |
1066 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
1067 | .addReg(RegNo: BPReg) |
1068 | .addImm(Val: BPOffset) |
1069 | .addReg(RegNo: ScratchReg); |
1070 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: BPReg) |
1071 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
1072 | .addReg(RegNo: ScratchReg); |
1073 | } |
1074 | } |
1075 | } else { |
1076 | // The frame size is a known 16-bit constant (fitting in the immediate |
1077 | // field of STWU). To be here we have to be compiling for PPC32. |
1078 | // Since the SPReg has been decreased by FrameSize, add it back to each |
1079 | // offset. |
1080 | if (HasFP) |
1081 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
1082 | .addReg(RegNo: FPReg) |
1083 | .addImm(Val: FrameSize + FPOffset) |
1084 | .addReg(RegNo: SPReg); |
1085 | if (FI->usesPICBase()) |
1086 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
1087 | .addReg(RegNo: PPC::R30) |
1088 | .addImm(Val: FrameSize + PBPOffset) |
1089 | .addReg(RegNo: SPReg); |
1090 | if (HasBP) { |
1091 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
1092 | .addReg(RegNo: BPReg) |
1093 | .addImm(Val: FrameSize + BPOffset) |
1094 | .addReg(RegNo: SPReg); |
1095 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::ADDI), DestReg: BPReg) |
1096 | .addReg(RegNo: SPReg) |
1097 | .addImm(Val: FrameSize); |
1098 | } |
1099 | } |
1100 | } |
1101 | |
1102 | // Save the LR now. |
1103 | if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(x: FrameSize + LROffset)) |
1104 | SaveLR(LROffset + FrameSize); |
1105 | |
1106 | // Add Call Frame Information for the instructions we generated above. |
1107 | if (needsCFI) { |
1108 | unsigned CFIIndex; |
1109 | |
1110 | if (HasBP) { |
1111 | // Define CFA in terms of BP. Do this in preference to using FP/SP, |
1112 | // because if the stack needed aligning then CFA won't be at a fixed |
1113 | // offset from FP/SP. |
1114 | unsigned Reg = MRI->getDwarfRegNum(RegNum: BPReg, isEH: true); |
1115 | CFIIndex = MF.addFrameInst( |
1116 | Inst: MCCFIInstruction::createDefCfaRegister(L: nullptr, Register: Reg)); |
1117 | } else { |
1118 | // Adjust the definition of CFA to account for the change in SP. |
1119 | assert(NegFrameSize); |
1120 | CFIIndex = MF.addFrameInst( |
1121 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -NegFrameSize)); |
1122 | } |
1123 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
1124 | .addCFIIndex(CFIIndex); |
1125 | |
1126 | if (HasFP) { |
1127 | // Describe where FP was saved, at a fixed offset from CFA. |
1128 | unsigned Reg = MRI->getDwarfRegNum(RegNum: FPReg, isEH: true); |
1129 | CFIIndex = MF.addFrameInst( |
1130 | Inst: MCCFIInstruction::createOffset(L: nullptr, Register: Reg, Offset: FPOffset)); |
1131 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
1132 | .addCFIIndex(CFIIndex); |
1133 | } |
1134 | |
1135 | if (FI->usesPICBase()) { |
1136 | // Describe where FP was saved, at a fixed offset from CFA. |
1137 | unsigned Reg = MRI->getDwarfRegNum(RegNum: PPC::R30, isEH: true); |
1138 | CFIIndex = MF.addFrameInst( |
1139 | Inst: MCCFIInstruction::createOffset(L: nullptr, Register: Reg, Offset: PBPOffset)); |
1140 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
1141 | .addCFIIndex(CFIIndex); |
1142 | } |
1143 | |
1144 | if (HasBP) { |
1145 | // Describe where BP was saved, at a fixed offset from CFA. |
1146 | unsigned Reg = MRI->getDwarfRegNum(RegNum: BPReg, isEH: true); |
1147 | CFIIndex = MF.addFrameInst( |
1148 | Inst: MCCFIInstruction::createOffset(L: nullptr, Register: Reg, Offset: BPOffset)); |
1149 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
1150 | .addCFIIndex(CFIIndex); |
1151 | } |
1152 | |
1153 | if (MustSaveLR) { |
1154 | // Describe where LR was saved, at a fixed offset from CFA. |
1155 | unsigned Reg = MRI->getDwarfRegNum(RegNum: LRReg, isEH: true); |
1156 | CFIIndex = MF.addFrameInst( |
1157 | Inst: MCCFIInstruction::createOffset(L: nullptr, Register: Reg, Offset: LROffset)); |
1158 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
1159 | .addCFIIndex(CFIIndex); |
1160 | } |
1161 | } |
1162 | |
1163 | // If there is a frame pointer, copy R1 into R31 |
1164 | if (HasFP) { |
1165 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: FPReg) |
1166 | .addReg(RegNo: SPReg) |
1167 | .addReg(RegNo: SPReg); |
1168 | |
1169 | if (!HasBP && needsCFI) { |
1170 | // Change the definition of CFA from SP+offset to FP+offset, because SP |
1171 | // will change at every alloca. |
1172 | unsigned Reg = MRI->getDwarfRegNum(RegNum: FPReg, isEH: true); |
1173 | unsigned CFIIndex = MF.addFrameInst( |
1174 | Inst: MCCFIInstruction::createDefCfaRegister(L: nullptr, Register: Reg)); |
1175 | |
1176 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
1177 | .addCFIIndex(CFIIndex); |
1178 | } |
1179 | } |
1180 | |
1181 | if (needsCFI) { |
1182 | // Describe where callee saved registers were saved, at fixed offsets from |
1183 | // CFA. |
1184 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
1185 | for (const CalleeSavedInfo &I : CSI) { |
1186 | Register Reg = I.getReg(); |
1187 | if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; |
1188 | |
1189 | // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just |
1190 | // subregisters of CR2. We just need to emit a move of CR2. |
1191 | if (PPC::CRBITRCRegClass.contains(Reg)) |
1192 | continue; |
1193 | |
1194 | if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) |
1195 | continue; |
1196 | |
1197 | // For 64-bit SVR4 when we have spilled CRs, the spill location |
1198 | // is SP+8, not a frame-relative slot. |
1199 | if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { |
1200 | // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for |
1201 | // the whole CR word. In the ELFv2 ABI, every CR that was |
1202 | // actually saved gets its own CFI record. |
1203 | Register CRReg = isELFv2ABI? Reg : PPC::CR2; |
1204 | unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset( |
1205 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: CRReg, isEH: true), Offset: CRSaveOffset)); |
1206 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
1207 | .addCFIIndex(CFIIndex); |
1208 | continue; |
1209 | } |
1210 | |
1211 | if (I.isSpilledToReg()) { |
1212 | unsigned SpilledReg = I.getDstReg(); |
1213 | unsigned CFIRegister = MF.addFrameInst(Inst: MCCFIInstruction::createRegister( |
1214 | L: nullptr, Register1: MRI->getDwarfRegNum(RegNum: Reg, isEH: true), |
1215 | Register2: MRI->getDwarfRegNum(RegNum: SpilledReg, isEH: true))); |
1216 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
1217 | .addCFIIndex(CFIIndex: CFIRegister); |
1218 | } else { |
1219 | int64_t Offset = MFI.getObjectOffset(ObjectIdx: I.getFrameIdx()); |
1220 | // We have changed the object offset above but we do not want to change |
1221 | // the actual offsets in the CFI instruction so we have to undo the |
1222 | // offset change here. |
1223 | if (MovingStackUpdateDown) |
1224 | Offset -= NegFrameSize; |
1225 | |
1226 | unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset( |
1227 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: Reg, isEH: true), Offset)); |
1228 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
1229 | .addCFIIndex(CFIIndex); |
1230 | } |
1231 | } |
1232 | } |
1233 | } |
1234 | |
1235 | void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, |
1236 | MachineBasicBlock &PrologMBB) const { |
1237 | bool isPPC64 = Subtarget.isPPC64(); |
1238 | const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); |
1239 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
1240 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1241 | const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); |
1242 | // AIX assembler does not support cfi directives. |
1243 | const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); |
1244 | auto StackAllocMIPos = llvm::find_if(Range&: PrologMBB, P: [](MachineInstr &MI) { |
1245 | int Opc = MI.getOpcode(); |
1246 | return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; |
1247 | }); |
1248 | if (StackAllocMIPos == PrologMBB.end()) |
1249 | return; |
1250 | const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); |
1251 | MachineBasicBlock *CurrentMBB = &PrologMBB; |
1252 | DebugLoc DL = PrologMBB.findDebugLoc(MBBI: StackAllocMIPos); |
1253 | MachineInstr &MI = *StackAllocMIPos; |
1254 | int64_t NegFrameSize = MI.getOperand(i: 2).getImm(); |
1255 | unsigned ProbeSize = TLI.getStackProbeSize(MF); |
1256 | int64_t NegProbeSize = -(int64_t)ProbeSize; |
1257 | assert(isInt<32>(NegProbeSize) && "Unhandled probe size" ); |
1258 | int64_t NumBlocks = NegFrameSize / NegProbeSize; |
1259 | int64_t NegResidualSize = NegFrameSize % NegProbeSize; |
1260 | Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; |
1261 | Register ScratchReg = MI.getOperand(i: 0).getReg(); |
1262 | Register FPReg = MI.getOperand(i: 1).getReg(); |
1263 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
1264 | bool HasBP = RegInfo->hasBasePointer(MF); |
1265 | Register BPReg = RegInfo->getBaseRegister(MF); |
1266 | Align MaxAlign = MFI.getMaxAlign(); |
1267 | bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); |
1268 | const MCInstrDesc &CopyInst = TII.get(Opcode: isPPC64 ? PPC::OR8 : PPC::OR); |
1269 | // Subroutines to generate .cfi_* directives. |
1270 | auto buildDefCFAReg = [&](MachineBasicBlock &MBB, |
1271 | MachineBasicBlock::iterator MBBI, Register Reg) { |
1272 | unsigned RegNum = MRI->getDwarfRegNum(RegNum: Reg, isEH: true); |
1273 | unsigned CFIIndex = MF.addFrameInst( |
1274 | Inst: MCCFIInstruction::createDefCfaRegister(L: nullptr, Register: RegNum)); |
1275 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
1276 | .addCFIIndex(CFIIndex); |
1277 | }; |
1278 | auto buildDefCFA = [&](MachineBasicBlock &MBB, |
1279 | MachineBasicBlock::iterator MBBI, Register Reg, |
1280 | int Offset) { |
1281 | unsigned RegNum = MRI->getDwarfRegNum(RegNum: Reg, isEH: true); |
1282 | unsigned CFIIndex = MBB.getParent()->addFrameInst( |
1283 | Inst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: RegNum, Offset)); |
1284 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
1285 | .addCFIIndex(CFIIndex); |
1286 | }; |
1287 | // Subroutine to determine if we can use the Imm as part of d-form. |
1288 | auto CanUseDForm = [](int64_t Imm) { return isInt<16>(x: Imm) && Imm % 4 == 0; }; |
1289 | // Subroutine to materialize the Imm into TempReg. |
1290 | auto MaterializeImm = [&](MachineBasicBlock &MBB, |
1291 | MachineBasicBlock::iterator MBBI, int64_t Imm, |
1292 | Register &TempReg) { |
1293 | assert(isInt<32>(Imm) && "Unhandled imm" ); |
1294 | if (isInt<16>(x: Imm)) |
1295 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::LI8 : PPC::LI), DestReg: TempReg) |
1296 | .addImm(Val: Imm); |
1297 | else { |
1298 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::LIS8 : PPC::LIS), DestReg: TempReg) |
1299 | .addImm(Val: Imm >> 16); |
1300 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::ORI8 : PPC::ORI), DestReg: TempReg) |
1301 | .addReg(RegNo: TempReg) |
1302 | .addImm(Val: Imm & 0xFFFF); |
1303 | } |
1304 | }; |
1305 | // Subroutine to store frame pointer and decrease stack pointer by probe size. |
1306 | auto allocateAndProbe = [&](MachineBasicBlock &MBB, |
1307 | MachineBasicBlock::iterator MBBI, int64_t NegSize, |
1308 | Register NegSizeReg, bool UseDForm, |
1309 | Register StoreReg) { |
1310 | if (UseDForm) |
1311 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::STDU : PPC::STWU), DestReg: SPReg) |
1312 | .addReg(RegNo: StoreReg) |
1313 | .addImm(Val: NegSize) |
1314 | .addReg(RegNo: SPReg); |
1315 | else |
1316 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::STDUX : PPC::STWUX), DestReg: SPReg) |
1317 | .addReg(RegNo: StoreReg) |
1318 | .addReg(RegNo: SPReg) |
1319 | .addReg(RegNo: NegSizeReg); |
1320 | }; |
1321 | // Used to probe stack when realignment is required. |
1322 | // Note that, according to ABI's requirement, *sp must always equals the |
1323 | // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. |
1324 | // Following is pseudo code: |
1325 | // final_sp = (sp & align) + negframesize; |
1326 | // neg_gap = final_sp - sp; |
1327 | // while (neg_gap < negprobesize) { |
1328 | // stdu fp, negprobesize(sp); |
1329 | // neg_gap -= negprobesize; |
1330 | // } |
1331 | // stdux fp, sp, neg_gap |
1332 | // |
1333 | // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg |
1334 | // before probe code, we don't need to save it, so we get one additional reg |
1335 | // that can be used to materialize the probeside if needed to use xform. |
1336 | // Otherwise, we can NOT materialize probeside, so we can only use Dform for |
1337 | // now. |
1338 | // |
1339 | // The allocations are: |
1340 | // if (HasBP && HasRedzone) { |
1341 | // r0: materialize the probesize if needed so that we can use xform. |
1342 | // r12: `neg_gap` |
1343 | // } else { |
1344 | // r0: back-chain pointer |
1345 | // r12: `neg_gap`. |
1346 | // } |
1347 | auto probeRealignedStack = [&](MachineBasicBlock &MBB, |
1348 | MachineBasicBlock::iterator MBBI, |
1349 | Register ScratchReg, Register TempReg) { |
1350 | assert(HasBP && "The function is supposed to have base pointer when its " |
1351 | "stack is realigned." ); |
1352 | assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2" ); |
1353 | |
1354 | // FIXME: We can eliminate this limitation if we get more infomation about |
1355 | // which part of redzone are already used. Used redzone can be treated |
1356 | // probed. But there might be `holes' in redzone probed, this could |
1357 | // complicate the implementation. |
1358 | assert(ProbeSize >= Subtarget.getRedZoneSize() && |
1359 | "Probe size should be larger or equal to the size of red-zone so " |
1360 | "that red-zone is not clobbered by probing." ); |
1361 | |
1362 | Register &FinalStackPtr = TempReg; |
1363 | // FIXME: We only support NegProbeSize materializable by DForm currently. |
1364 | // When HasBP && HasRedzone, we can use xform if we have an additional idle |
1365 | // register. |
1366 | NegProbeSize = std::max(a: NegProbeSize, b: -((int64_t)1 << 15)); |
1367 | assert(isInt<16>(NegProbeSize) && |
1368 | "NegProbeSize should be materializable by DForm" ); |
1369 | Register CRReg = PPC::CR0; |
1370 | // Layout of output assembly kinda like: |
1371 | // bb.0: |
1372 | // ... |
1373 | // sub $scratchreg, $finalsp, r1 |
1374 | // cmpdi $scratchreg, <negprobesize> |
1375 | // bge bb.2 |
1376 | // bb.1: |
1377 | // stdu <backchain>, <negprobesize>(r1) |
1378 | // sub $scratchreg, $scratchreg, negprobesize |
1379 | // cmpdi $scratchreg, <negprobesize> |
1380 | // blt bb.1 |
1381 | // bb.2: |
1382 | // stdux <backchain>, r1, $scratchreg |
1383 | MachineFunction::iterator MBBInsertPoint = std::next(x: MBB.getIterator()); |
1384 | MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(BB: ProbedBB); |
1385 | MF.insert(MBBI: MBBInsertPoint, MBB: ProbeLoopBodyMBB); |
1386 | MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(BB: ProbedBB); |
1387 | MF.insert(MBBI: MBBInsertPoint, MBB: ProbeExitMBB); |
1388 | // bb.2 |
1389 | { |
1390 | Register BackChainPointer = HasRedZone ? BPReg : TempReg; |
1391 | allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, |
1392 | BackChainPointer); |
1393 | if (HasRedZone) |
1394 | // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg |
1395 | // to TempReg to satisfy it. |
1396 | BuildMI(BB&: *ProbeExitMBB, I: ProbeExitMBB->end(), MIMD: DL, MCID: CopyInst, DestReg: TempReg) |
1397 | .addReg(RegNo: BPReg) |
1398 | .addReg(RegNo: BPReg); |
1399 | ProbeExitMBB->splice(Where: ProbeExitMBB->end(), Other: &MBB, From: MBBI, To: MBB.end()); |
1400 | ProbeExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
1401 | } |
1402 | // bb.0 |
1403 | { |
1404 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::SUBF8 : PPC::SUBF), DestReg: ScratchReg) |
1405 | .addReg(RegNo: SPReg) |
1406 | .addReg(RegNo: FinalStackPtr); |
1407 | if (!HasRedZone) |
1408 | BuildMI(BB: &MBB, MIMD: DL, MCID: CopyInst, DestReg: TempReg).addReg(RegNo: SPReg).addReg(RegNo: SPReg); |
1409 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::CMPDI : PPC::CMPWI), DestReg: CRReg) |
1410 | .addReg(RegNo: ScratchReg) |
1411 | .addImm(Val: NegProbeSize); |
1412 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: PPC::BCC)) |
1413 | .addImm(Val: PPC::PRED_GE) |
1414 | .addReg(RegNo: CRReg) |
1415 | .addMBB(MBB: ProbeExitMBB); |
1416 | MBB.addSuccessor(Succ: ProbeLoopBodyMBB); |
1417 | MBB.addSuccessor(Succ: ProbeExitMBB); |
1418 | } |
1419 | // bb.1 |
1420 | { |
1421 | Register BackChainPointer = HasRedZone ? BPReg : TempReg; |
1422 | allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, |
1423 | 0, true /*UseDForm*/, BackChainPointer); |
1424 | BuildMI(BB: ProbeLoopBodyMBB, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::ADDI8 : PPC::ADDI), |
1425 | DestReg: ScratchReg) |
1426 | .addReg(RegNo: ScratchReg) |
1427 | .addImm(Val: -NegProbeSize); |
1428 | BuildMI(BB: ProbeLoopBodyMBB, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::CMPDI : PPC::CMPWI), |
1429 | DestReg: CRReg) |
1430 | .addReg(RegNo: ScratchReg) |
1431 | .addImm(Val: NegProbeSize); |
1432 | BuildMI(BB: ProbeLoopBodyMBB, MIMD: DL, MCID: TII.get(Opcode: PPC::BCC)) |
1433 | .addImm(Val: PPC::PRED_LT) |
1434 | .addReg(RegNo: CRReg) |
1435 | .addMBB(MBB: ProbeLoopBodyMBB); |
1436 | ProbeLoopBodyMBB->addSuccessor(Succ: ProbeExitMBB); |
1437 | ProbeLoopBodyMBB->addSuccessor(Succ: ProbeLoopBodyMBB); |
1438 | } |
1439 | // Update liveins. |
1440 | fullyRecomputeLiveIns(MBBs: {ProbeExitMBB, ProbeLoopBodyMBB}); |
1441 | return ProbeExitMBB; |
1442 | }; |
1443 | // For case HasBP && MaxAlign > 1, we have to realign the SP by performing |
1444 | // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since |
1445 | // the offset subtracted from SP is determined by SP's runtime value. |
1446 | if (HasBP && MaxAlign > 1) { |
1447 | // Calculate final stack pointer. |
1448 | if (isPPC64) |
1449 | BuildMI(BB&: *CurrentMBB, I&: {MI}, MIMD: DL, MCID: TII.get(Opcode: PPC::RLDICL), DestReg: ScratchReg) |
1450 | .addReg(RegNo: SPReg) |
1451 | .addImm(Val: 0) |
1452 | .addImm(Val: 64 - Log2(A: MaxAlign)); |
1453 | else |
1454 | BuildMI(BB&: *CurrentMBB, I&: {MI}, MIMD: DL, MCID: TII.get(Opcode: PPC::RLWINM), DestReg: ScratchReg) |
1455 | .addReg(RegNo: SPReg) |
1456 | .addImm(Val: 0) |
1457 | .addImm(Val: 32 - Log2(A: MaxAlign)) |
1458 | .addImm(Val: 31); |
1459 | BuildMI(BB&: *CurrentMBB, I&: {MI}, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::SUBF8 : PPC::SUBF), |
1460 | DestReg: FPReg) |
1461 | .addReg(RegNo: ScratchReg) |
1462 | .addReg(RegNo: SPReg); |
1463 | MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); |
1464 | BuildMI(BB&: *CurrentMBB, I&: {MI}, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::ADD8 : PPC::ADD4), |
1465 | DestReg: FPReg) |
1466 | .addReg(RegNo: ScratchReg) |
1467 | .addReg(RegNo: FPReg); |
1468 | CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); |
1469 | if (needsCFI) |
1470 | buildDefCFAReg(*CurrentMBB, {MI}, FPReg); |
1471 | } else { |
1472 | // Initialize current frame pointer. |
1473 | BuildMI(BB&: *CurrentMBB, I&: {MI}, MIMD: DL, MCID: CopyInst, DestReg: FPReg).addReg(RegNo: SPReg).addReg(RegNo: SPReg); |
1474 | // Use FPReg to calculate CFA. |
1475 | if (needsCFI) |
1476 | buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); |
1477 | // Probe residual part. |
1478 | if (NegResidualSize) { |
1479 | bool ResidualUseDForm = CanUseDForm(NegResidualSize); |
1480 | if (!ResidualUseDForm) |
1481 | MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); |
1482 | allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, |
1483 | ResidualUseDForm, FPReg); |
1484 | } |
1485 | bool UseDForm = CanUseDForm(NegProbeSize); |
1486 | // If number of blocks is small, just probe them directly. |
1487 | if (NumBlocks < 3) { |
1488 | if (!UseDForm) |
1489 | MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); |
1490 | for (int i = 0; i < NumBlocks; ++i) |
1491 | allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, |
1492 | FPReg); |
1493 | if (needsCFI) { |
1494 | // Restore using SPReg to calculate CFA. |
1495 | buildDefCFAReg(*CurrentMBB, {MI}, SPReg); |
1496 | } |
1497 | } else { |
1498 | // Since CTR is a volatile register and current shrinkwrap implementation |
1499 | // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a |
1500 | // CTR loop to probe. |
1501 | // Calculate trip count and stores it in CTRReg. |
1502 | MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); |
1503 | BuildMI(BB&: *CurrentMBB, I&: {MI}, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) |
1504 | .addReg(RegNo: ScratchReg, flags: RegState::Kill); |
1505 | if (!UseDForm) |
1506 | MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); |
1507 | // Create MBBs of the loop. |
1508 | MachineFunction::iterator MBBInsertPoint = |
1509 | std::next(x: CurrentMBB->getIterator()); |
1510 | MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(BB: ProbedBB); |
1511 | MF.insert(MBBI: MBBInsertPoint, MBB: LoopMBB); |
1512 | MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(BB: ProbedBB); |
1513 | MF.insert(MBBI: MBBInsertPoint, MBB: ExitMBB); |
1514 | // Synthesize the loop body. |
1515 | allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, |
1516 | UseDForm, FPReg); |
1517 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) |
1518 | .addMBB(MBB: LoopMBB); |
1519 | LoopMBB->addSuccessor(Succ: ExitMBB); |
1520 | LoopMBB->addSuccessor(Succ: LoopMBB); |
1521 | // Synthesize the exit MBB. |
1522 | ExitMBB->splice(Where: ExitMBB->end(), Other: CurrentMBB, |
1523 | From: std::next(x: MachineBasicBlock::iterator(MI)), |
1524 | To: CurrentMBB->end()); |
1525 | ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: CurrentMBB); |
1526 | CurrentMBB->addSuccessor(Succ: LoopMBB); |
1527 | if (needsCFI) { |
1528 | // Restore using SPReg to calculate CFA. |
1529 | buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); |
1530 | } |
1531 | // Update liveins. |
1532 | fullyRecomputeLiveIns(MBBs: {ExitMBB, LoopMBB}); |
1533 | } |
1534 | } |
1535 | ++NumPrologProbed; |
1536 | MI.eraseFromParent(); |
1537 | } |
1538 | |
1539 | void PPCFrameLowering::emitEpilogue(MachineFunction &MF, |
1540 | MachineBasicBlock &MBB) const { |
1541 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
1542 | DebugLoc dl; |
1543 | |
1544 | if (MBBI != MBB.end()) |
1545 | dl = MBBI->getDebugLoc(); |
1546 | |
1547 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
1548 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
1549 | |
1550 | // Get alignment info so we know how to restore the SP. |
1551 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1552 | |
1553 | // Get the number of bytes allocated from the FrameInfo. |
1554 | int64_t FrameSize = MFI.getStackSize(); |
1555 | |
1556 | // Get processor type. |
1557 | bool isPPC64 = Subtarget.isPPC64(); |
1558 | |
1559 | // Check if the link register (LR) has been saved. |
1560 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
1561 | bool MustSaveLR = FI->mustSaveLR(); |
1562 | const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); |
1563 | bool MustSaveCR = !MustSaveCRs.empty(); |
1564 | // Do we have a frame pointer and/or base pointer for this function? |
1565 | bool HasFP = hasFP(MF); |
1566 | bool HasBP = RegInfo->hasBasePointer(MF); |
1567 | bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); |
1568 | bool HasROPProtect = Subtarget.hasROPProtect(); |
1569 | bool HasPrivileged = Subtarget.hasPrivileged(); |
1570 | |
1571 | Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; |
1572 | Register BPReg = RegInfo->getBaseRegister(MF); |
1573 | Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; |
1574 | Register ScratchReg; |
1575 | Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg |
1576 | const MCInstrDesc& MTLRInst = TII.get( Opcode: isPPC64 ? PPC::MTLR8 |
1577 | : PPC::MTLR ); |
1578 | const MCInstrDesc& LoadInst = TII.get( Opcode: isPPC64 ? PPC::LD |
1579 | : PPC::LWZ ); |
1580 | const MCInstrDesc& LoadImmShiftedInst = TII.get( Opcode: isPPC64 ? PPC::LIS8 |
1581 | : PPC::LIS ); |
1582 | const MCInstrDesc& OrInst = TII.get(Opcode: isPPC64 ? PPC::OR8 |
1583 | : PPC::OR ); |
1584 | const MCInstrDesc& OrImmInst = TII.get( Opcode: isPPC64 ? PPC::ORI8 |
1585 | : PPC::ORI ); |
1586 | const MCInstrDesc& AddImmInst = TII.get( Opcode: isPPC64 ? PPC::ADDI8 |
1587 | : PPC::ADDI ); |
1588 | const MCInstrDesc& AddInst = TII.get( Opcode: isPPC64 ? PPC::ADD8 |
1589 | : PPC::ADD4 ); |
1590 | const MCInstrDesc& LoadWordInst = TII.get( Opcode: isPPC64 ? PPC::LWZ8 |
1591 | : PPC::LWZ); |
1592 | const MCInstrDesc& MoveToCRInst = TII.get( Opcode: isPPC64 ? PPC::MTOCRF8 |
1593 | : PPC::MTOCRF); |
1594 | const MCInstrDesc &HashChk = |
1595 | TII.get(Opcode: isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8) |
1596 | : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK)); |
1597 | int64_t LROffset = getReturnSaveOffset(); |
1598 | |
1599 | int64_t FPOffset = 0; |
1600 | |
1601 | // Using the same bool variable as below to suppress compiler warnings. |
1602 | bool SingleScratchReg = findScratchRegister(MBB: &MBB, UseAtEnd: true, TwoUniqueRegsRequired: false, SR1: &ScratchReg, |
1603 | SR2: &TempReg); |
1604 | assert(SingleScratchReg && |
1605 | "Could not find an available scratch register" ); |
1606 | |
1607 | SingleScratchReg = ScratchReg == TempReg; |
1608 | |
1609 | if (HasFP) { |
1610 | int FPIndex = FI->getFramePointerSaveIndex(); |
1611 | assert(FPIndex && "No Frame Pointer Save Slot!" ); |
1612 | FPOffset = MFI.getObjectOffset(ObjectIdx: FPIndex); |
1613 | } |
1614 | |
1615 | int64_t BPOffset = 0; |
1616 | if (HasBP) { |
1617 | int BPIndex = FI->getBasePointerSaveIndex(); |
1618 | assert(BPIndex && "No Base Pointer Save Slot!" ); |
1619 | BPOffset = MFI.getObjectOffset(ObjectIdx: BPIndex); |
1620 | } |
1621 | |
1622 | int64_t PBPOffset = 0; |
1623 | if (FI->usesPICBase()) { |
1624 | int PBPIndex = FI->getPICBasePointerSaveIndex(); |
1625 | assert(PBPIndex && "No PIC Base Pointer Save Slot!" ); |
1626 | PBPOffset = MFI.getObjectOffset(ObjectIdx: PBPIndex); |
1627 | } |
1628 | |
1629 | bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); |
1630 | |
1631 | if (IsReturnBlock) { |
1632 | unsigned RetOpcode = MBBI->getOpcode(); |
1633 | bool UsesTCRet = RetOpcode == PPC::TCRETURNri || |
1634 | RetOpcode == PPC::TCRETURNdi || |
1635 | RetOpcode == PPC::TCRETURNai || |
1636 | RetOpcode == PPC::TCRETURNri8 || |
1637 | RetOpcode == PPC::TCRETURNdi8 || |
1638 | RetOpcode == PPC::TCRETURNai8; |
1639 | |
1640 | if (UsesTCRet) { |
1641 | int MaxTCRetDelta = FI->getTailCallSPDelta(); |
1642 | MachineOperand &StackAdjust = MBBI->getOperand(i: 1); |
1643 | assert(StackAdjust.isImm() && "Expecting immediate value." ); |
1644 | // Adjust stack pointer. |
1645 | int StackAdj = StackAdjust.getImm(); |
1646 | int Delta = StackAdj - MaxTCRetDelta; |
1647 | assert((Delta >= 0) && "Delta must be positive" ); |
1648 | if (MaxTCRetDelta>0) |
1649 | FrameSize += (StackAdj +Delta); |
1650 | else |
1651 | FrameSize += StackAdj; |
1652 | } |
1653 | } |
1654 | |
1655 | // Frames of 32KB & larger require special handling because they cannot be |
1656 | // indexed into with a simple LD/LWZ immediate offset operand. |
1657 | bool isLargeFrame = !isInt<16>(x: FrameSize); |
1658 | |
1659 | // On targets without red zone, the SP needs to be restored last, so that |
1660 | // all live contents of the stack frame are upwards of the SP. This means |
1661 | // that we cannot restore SP just now, since there may be more registers |
1662 | // to restore from the stack frame (e.g. R31). If the frame size is not |
1663 | // a simple immediate value, we will need a spare register to hold the |
1664 | // restored SP. If the frame size is known and small, we can simply adjust |
1665 | // the offsets of the registers to be restored, and still use SP to restore |
1666 | // them. In such case, the final update of SP will be to add the frame |
1667 | // size to it. |
1668 | // To simplify the code, set RBReg to the base register used to restore |
1669 | // values from the stack, and set SPAdd to the value that needs to be added |
1670 | // to the SP at the end. The default values are as if red zone was present. |
1671 | unsigned RBReg = SPReg; |
1672 | uint64_t SPAdd = 0; |
1673 | |
1674 | // Check if we can move the stack update instruction up the epilogue |
1675 | // past the callee saves. This will allow the move to LR instruction |
1676 | // to be executed before the restores of the callee saves which means |
1677 | // that the callee saves can hide the latency from the MTLR instrcution. |
1678 | MachineBasicBlock::iterator StackUpdateLoc = MBBI; |
1679 | if (stackUpdateCanBeMoved(MF)) { |
1680 | const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); |
1681 | for (CalleeSavedInfo CSI : Info) { |
1682 | // If the callee saved register is spilled to another register abort the |
1683 | // stack update movement. |
1684 | if (CSI.isSpilledToReg()) { |
1685 | StackUpdateLoc = MBBI; |
1686 | break; |
1687 | } |
1688 | int FrIdx = CSI.getFrameIdx(); |
1689 | // If the frame index is not negative the callee saved info belongs to a |
1690 | // stack object that is not a fixed stack object. We ignore non-fixed |
1691 | // stack objects because we won't move the update of the stack pointer |
1692 | // past them. |
1693 | if (FrIdx >= 0) |
1694 | continue; |
1695 | |
1696 | if (MFI.isFixedObjectIndex(ObjectIdx: FrIdx) && MFI.getObjectOffset(ObjectIdx: FrIdx) < 0) |
1697 | StackUpdateLoc--; |
1698 | else { |
1699 | // Abort the operation as we can't update all CSR restores. |
1700 | StackUpdateLoc = MBBI; |
1701 | break; |
1702 | } |
1703 | } |
1704 | } |
1705 | |
1706 | if (FrameSize) { |
1707 | // In the prologue, the loaded (or persistent) stack pointer value is |
1708 | // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red |
1709 | // zone add this offset back now. |
1710 | |
1711 | // If the function has a base pointer, the stack pointer has been copied |
1712 | // to it so we can restore it by copying in the other direction. |
1713 | if (HasRedZone && HasBP) { |
1714 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: RBReg). |
1715 | addReg(RegNo: BPReg). |
1716 | addReg(RegNo: BPReg); |
1717 | } |
1718 | // If this function contained a fastcc call and GuaranteedTailCallOpt is |
1719 | // enabled (=> hasFastCall()==true) the fastcc call might contain a tail |
1720 | // call which invalidates the stack pointer value in SP(0). So we use the |
1721 | // value of R31 in this case. Similar situation exists with setjmp. |
1722 | else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { |
1723 | assert(HasFP && "Expecting a valid frame pointer." ); |
1724 | if (!HasRedZone) |
1725 | RBReg = FPReg; |
1726 | if (!isLargeFrame) { |
1727 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: AddImmInst, DestReg: RBReg) |
1728 | .addReg(RegNo: FPReg).addImm(Val: FrameSize); |
1729 | } else { |
1730 | TII.materializeImmPostRA(MBB, MBBI, DL: dl, Reg: ScratchReg, Imm: FrameSize); |
1731 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: AddInst) |
1732 | .addReg(RegNo: RBReg) |
1733 | .addReg(RegNo: FPReg) |
1734 | .addReg(RegNo: ScratchReg); |
1735 | } |
1736 | } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { |
1737 | if (HasRedZone) { |
1738 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: AddImmInst, DestReg: SPReg) |
1739 | .addReg(RegNo: SPReg) |
1740 | .addImm(Val: FrameSize); |
1741 | } else { |
1742 | // Make sure that adding FrameSize will not overflow the max offset |
1743 | // size. |
1744 | assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && |
1745 | "Local offsets should be negative" ); |
1746 | SPAdd = FrameSize; |
1747 | FPOffset += FrameSize; |
1748 | BPOffset += FrameSize; |
1749 | PBPOffset += FrameSize; |
1750 | } |
1751 | } else { |
1752 | // We don't want to use ScratchReg as a base register, because it |
1753 | // could happen to be R0. Use FP instead, but make sure to preserve it. |
1754 | if (!HasRedZone) { |
1755 | // If FP is not saved, copy it to ScratchReg. |
1756 | if (!HasFP) |
1757 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: ScratchReg) |
1758 | .addReg(RegNo: FPReg) |
1759 | .addReg(RegNo: FPReg); |
1760 | RBReg = FPReg; |
1761 | } |
1762 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: LoadInst, DestReg: RBReg) |
1763 | .addImm(Val: 0) |
1764 | .addReg(RegNo: SPReg); |
1765 | } |
1766 | } |
1767 | assert(RBReg != ScratchReg && "Should have avoided ScratchReg" ); |
1768 | // If there is no red zone, ScratchReg may be needed for holding a useful |
1769 | // value (although not the base register). Make sure it is not overwritten |
1770 | // too early. |
1771 | |
1772 | // If we need to restore both the LR and the CR and we only have one |
1773 | // available scratch register, we must do them one at a time. |
1774 | if (MustSaveCR && SingleScratchReg && MustSaveLR) { |
1775 | // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg |
1776 | // is live here. |
1777 | assert(HasRedZone && "Expecting red zone" ); |
1778 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadWordInst, DestReg: TempReg) |
1779 | .addImm(Val: CRSaveOffset) |
1780 | .addReg(RegNo: SPReg); |
1781 | for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) |
1782 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: MoveToCRInst, DestReg: MustSaveCRs[i]) |
1783 | .addReg(RegNo: TempReg, flags: getKillRegState(B: i == e-1)); |
1784 | } |
1785 | |
1786 | // Delay restoring of the LR if ScratchReg is needed. This is ok, since |
1787 | // LR is stored in the caller's stack frame. ScratchReg will be needed |
1788 | // if RBReg is anything other than SP. We shouldn't use ScratchReg as |
1789 | // a base register anyway, because it may happen to be R0. |
1790 | bool LoadedLR = false; |
1791 | if (MustSaveLR && RBReg == SPReg && isInt<16>(x: LROffset+SPAdd)) { |
1792 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: LoadInst, DestReg: ScratchReg) |
1793 | .addImm(Val: LROffset+SPAdd) |
1794 | .addReg(RegNo: RBReg); |
1795 | LoadedLR = true; |
1796 | } |
1797 | |
1798 | if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { |
1799 | assert(RBReg == SPReg && "Should be using SP as a base register" ); |
1800 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadWordInst, DestReg: TempReg) |
1801 | .addImm(Val: CRSaveOffset) |
1802 | .addReg(RegNo: RBReg); |
1803 | } |
1804 | |
1805 | if (HasFP) { |
1806 | // If there is red zone, restore FP directly, since SP has already been |
1807 | // restored. Otherwise, restore the value of FP into ScratchReg. |
1808 | if (HasRedZone || RBReg == SPReg) |
1809 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadInst, DestReg: FPReg) |
1810 | .addImm(Val: FPOffset) |
1811 | .addReg(RegNo: SPReg); |
1812 | else |
1813 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadInst, DestReg: ScratchReg) |
1814 | .addImm(Val: FPOffset) |
1815 | .addReg(RegNo: RBReg); |
1816 | } |
1817 | |
1818 | if (FI->usesPICBase()) |
1819 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadInst, DestReg: PPC::R30) |
1820 | .addImm(Val: PBPOffset) |
1821 | .addReg(RegNo: RBReg); |
1822 | |
1823 | if (HasBP) |
1824 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadInst, DestReg: BPReg) |
1825 | .addImm(Val: BPOffset) |
1826 | .addReg(RegNo: RBReg); |
1827 | |
1828 | // There is nothing more to be loaded from the stack, so now we can |
1829 | // restore SP: SP = RBReg + SPAdd. |
1830 | if (RBReg != SPReg || SPAdd != 0) { |
1831 | assert(!HasRedZone && "This should not happen with red zone" ); |
1832 | // If SPAdd is 0, generate a copy. |
1833 | if (SPAdd == 0) |
1834 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: SPReg) |
1835 | .addReg(RegNo: RBReg) |
1836 | .addReg(RegNo: RBReg); |
1837 | else |
1838 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: AddImmInst, DestReg: SPReg) |
1839 | .addReg(RegNo: RBReg) |
1840 | .addImm(Val: SPAdd); |
1841 | |
1842 | assert(RBReg != ScratchReg && "Should be using FP or SP as base register" ); |
1843 | if (RBReg == FPReg) |
1844 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: FPReg) |
1845 | .addReg(RegNo: ScratchReg) |
1846 | .addReg(RegNo: ScratchReg); |
1847 | |
1848 | // Now load the LR from the caller's stack frame. |
1849 | if (MustSaveLR && !LoadedLR) |
1850 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadInst, DestReg: ScratchReg) |
1851 | .addImm(Val: LROffset) |
1852 | .addReg(RegNo: SPReg); |
1853 | } |
1854 | |
1855 | if (MustSaveCR && |
1856 | !(SingleScratchReg && MustSaveLR)) |
1857 | for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) |
1858 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: MoveToCRInst, DestReg: MustSaveCRs[i]) |
1859 | .addReg(RegNo: TempReg, flags: getKillRegState(B: i == e-1)); |
1860 | |
1861 | if (MustSaveLR) { |
1862 | // If ROP protection is required, an extra instruction is added to compute a |
1863 | // hash and then compare it to the hash stored in the prologue. |
1864 | if (HasROPProtect) { |
1865 | const int SaveIndex = FI->getROPProtectionHashSaveIndex(); |
1866 | const int64_t ImmOffset = MFI.getObjectOffset(ObjectIdx: SaveIndex); |
1867 | assert((ImmOffset <= -8 && ImmOffset >= -512) && |
1868 | "ROP hash check location offset out of range." ); |
1869 | assert(((ImmOffset & 0x7) == 0) && |
1870 | "ROP hash check location offset must be 8 byte aligned." ); |
1871 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: HashChk) |
1872 | .addReg(RegNo: ScratchReg) |
1873 | .addImm(Val: ImmOffset) |
1874 | .addReg(RegNo: SPReg); |
1875 | } |
1876 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: MTLRInst).addReg(RegNo: ScratchReg); |
1877 | } |
1878 | |
1879 | // Callee pop calling convention. Pop parameter/linkage area. Used for tail |
1880 | // call optimization |
1881 | if (IsReturnBlock) { |
1882 | unsigned RetOpcode = MBBI->getOpcode(); |
1883 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
1884 | (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && |
1885 | MF.getFunction().getCallingConv() == CallingConv::Fast) { |
1886 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
1887 | unsigned CallerAllocatedAmt = FI->getMinReservedArea(); |
1888 | |
1889 | if (CallerAllocatedAmt && isInt<16>(x: CallerAllocatedAmt)) { |
1890 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: AddImmInst, DestReg: SPReg) |
1891 | .addReg(RegNo: SPReg).addImm(Val: CallerAllocatedAmt); |
1892 | } else { |
1893 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadImmShiftedInst, DestReg: ScratchReg) |
1894 | .addImm(Val: CallerAllocatedAmt >> 16); |
1895 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrImmInst, DestReg: ScratchReg) |
1896 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
1897 | .addImm(Val: CallerAllocatedAmt & 0xFFFF); |
1898 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: AddInst) |
1899 | .addReg(RegNo: SPReg) |
1900 | .addReg(RegNo: FPReg) |
1901 | .addReg(RegNo: ScratchReg); |
1902 | } |
1903 | } else { |
1904 | createTailCallBranchInstr(MBB); |
1905 | } |
1906 | } |
1907 | } |
1908 | |
1909 | void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { |
1910 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
1911 | |
1912 | // If we got this far a first terminator should exist. |
1913 | assert(MBBI != MBB.end() && "Failed to find the first terminator." ); |
1914 | |
1915 | DebugLoc dl = MBBI->getDebugLoc(); |
1916 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
1917 | |
1918 | // Create branch instruction for pseudo tail call return instruction. |
1919 | // The TCRETURNdi variants are direct calls. Valid targets for those are |
1920 | // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel |
1921 | // since we can tail call external functions with PC-Rel (i.e. we don't need |
1922 | // to worry about different TOC pointers). Some of the external functions will |
1923 | // be MO_GlobalAddress while others like memcpy for example, are going to |
1924 | // be MO_ExternalSymbol. |
1925 | unsigned RetOpcode = MBBI->getOpcode(); |
1926 | if (RetOpcode == PPC::TCRETURNdi) { |
1927 | MBBI = MBB.getLastNonDebugInstr(); |
1928 | MachineOperand &JumpTarget = MBBI->getOperand(i: 0); |
1929 | if (JumpTarget.isGlobal()) |
1930 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILB)). |
1931 | addGlobalAddress(GV: JumpTarget.getGlobal(), Offset: JumpTarget.getOffset()); |
1932 | else if (JumpTarget.isSymbol()) |
1933 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILB)). |
1934 | addExternalSymbol(FnName: JumpTarget.getSymbolName()); |
1935 | else |
1936 | llvm_unreachable("Expecting Global or External Symbol" ); |
1937 | } else if (RetOpcode == PPC::TCRETURNri) { |
1938 | MBBI = MBB.getLastNonDebugInstr(); |
1939 | assert(MBBI->getOperand(0).isReg() && "Expecting register operand." ); |
1940 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILBCTR)); |
1941 | } else if (RetOpcode == PPC::TCRETURNai) { |
1942 | MBBI = MBB.getLastNonDebugInstr(); |
1943 | MachineOperand &JumpTarget = MBBI->getOperand(i: 0); |
1944 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILBA)).addImm(Val: JumpTarget.getImm()); |
1945 | } else if (RetOpcode == PPC::TCRETURNdi8) { |
1946 | MBBI = MBB.getLastNonDebugInstr(); |
1947 | MachineOperand &JumpTarget = MBBI->getOperand(i: 0); |
1948 | if (JumpTarget.isGlobal()) |
1949 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILB8)). |
1950 | addGlobalAddress(GV: JumpTarget.getGlobal(), Offset: JumpTarget.getOffset()); |
1951 | else if (JumpTarget.isSymbol()) |
1952 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILB8)). |
1953 | addExternalSymbol(FnName: JumpTarget.getSymbolName()); |
1954 | else |
1955 | llvm_unreachable("Expecting Global or External Symbol" ); |
1956 | } else if (RetOpcode == PPC::TCRETURNri8) { |
1957 | MBBI = MBB.getLastNonDebugInstr(); |
1958 | assert(MBBI->getOperand(0).isReg() && "Expecting register operand." ); |
1959 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILBCTR8)); |
1960 | } else if (RetOpcode == PPC::TCRETURNai8) { |
1961 | MBBI = MBB.getLastNonDebugInstr(); |
1962 | MachineOperand &JumpTarget = MBBI->getOperand(i: 0); |
1963 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILBA8)).addImm(Val: JumpTarget.getImm()); |
1964 | } |
1965 | } |
1966 | |
1967 | void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, |
1968 | BitVector &SavedRegs, |
1969 | RegScavenger *RS) const { |
1970 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
1971 | if (Subtarget.isAIXABI()) |
1972 | updateCalleeSaves(MF, SavedRegs); |
1973 | |
1974 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
1975 | |
1976 | // Do not explicitly save the callee saved VSRp registers. |
1977 | // The individual VSR subregisters will be saved instead. |
1978 | SavedRegs.reset(Idx: PPC::VSRp26); |
1979 | SavedRegs.reset(Idx: PPC::VSRp27); |
1980 | SavedRegs.reset(Idx: PPC::VSRp28); |
1981 | SavedRegs.reset(Idx: PPC::VSRp29); |
1982 | SavedRegs.reset(Idx: PPC::VSRp30); |
1983 | SavedRegs.reset(Idx: PPC::VSRp31); |
1984 | |
1985 | // Save and clear the LR state. |
1986 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
1987 | unsigned LR = RegInfo->getRARegister(); |
1988 | FI->setMustSaveLR(MustSaveLR(MF, LR)); |
1989 | SavedRegs.reset(Idx: LR); |
1990 | |
1991 | // Save R31 if necessary |
1992 | int FPSI = FI->getFramePointerSaveIndex(); |
1993 | const bool isPPC64 = Subtarget.isPPC64(); |
1994 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1995 | |
1996 | // If the frame pointer save index hasn't been defined yet. |
1997 | if (!FPSI && needsFP(MF)) { |
1998 | // Find out what the fix offset of the frame pointer save area. |
1999 | int FPOffset = getFramePointerSaveOffset(); |
2000 | // Allocate the frame index for frame pointer save area. |
2001 | FPSI = MFI.CreateFixedObject(Size: isPPC64? 8 : 4, SPOffset: FPOffset, IsImmutable: true); |
2002 | // Save the result. |
2003 | FI->setFramePointerSaveIndex(FPSI); |
2004 | } |
2005 | |
2006 | int BPSI = FI->getBasePointerSaveIndex(); |
2007 | if (!BPSI && RegInfo->hasBasePointer(MF)) { |
2008 | int BPOffset = getBasePointerSaveOffset(); |
2009 | // Allocate the frame index for the base pointer save area. |
2010 | BPSI = MFI.CreateFixedObject(Size: isPPC64? 8 : 4, SPOffset: BPOffset, IsImmutable: true); |
2011 | // Save the result. |
2012 | FI->setBasePointerSaveIndex(BPSI); |
2013 | } |
2014 | |
2015 | // Reserve stack space for the PIC Base register (R30). |
2016 | // Only used in SVR4 32-bit. |
2017 | if (FI->usesPICBase()) { |
2018 | int PBPSI = MFI.CreateFixedObject(Size: 4, SPOffset: -8, IsImmutable: true); |
2019 | FI->setPICBasePointerSaveIndex(PBPSI); |
2020 | } |
2021 | |
2022 | // Make sure we don't explicitly spill r31, because, for example, we have |
2023 | // some inline asm which explicitly clobbers it, when we otherwise have a |
2024 | // frame pointer and are using r31's spill slot for the prologue/epilogue |
2025 | // code. Same goes for the base pointer and the PIC base register. |
2026 | if (needsFP(MF)) |
2027 | SavedRegs.reset(Idx: isPPC64 ? PPC::X31 : PPC::R31); |
2028 | if (RegInfo->hasBasePointer(MF)) { |
2029 | SavedRegs.reset(Idx: RegInfo->getBaseRegister(MF)); |
2030 | // On AIX, when BaseRegister(R30) is used, need to spill r31 too to match |
2031 | // AIX trackback table requirement. |
2032 | if (!needsFP(MF) && !SavedRegs.test(Idx: isPPC64 ? PPC::X31 : PPC::R31) && |
2033 | Subtarget.isAIXABI()) { |
2034 | assert( |
2035 | (RegInfo->getBaseRegister(MF) == (isPPC64 ? PPC::X30 : PPC::R30)) && |
2036 | "Invalid base register on AIX!" ); |
2037 | SavedRegs.set(isPPC64 ? PPC::X31 : PPC::R31); |
2038 | } |
2039 | } |
2040 | if (FI->usesPICBase()) |
2041 | SavedRegs.reset(Idx: PPC::R30); |
2042 | |
2043 | // Reserve stack space to move the linkage area to in case of a tail call. |
2044 | int TCSPDelta = 0; |
2045 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
2046 | (TCSPDelta = FI->getTailCallSPDelta()) < 0) { |
2047 | MFI.CreateFixedObject(Size: -1 * TCSPDelta, SPOffset: TCSPDelta, IsImmutable: true); |
2048 | } |
2049 | |
2050 | // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. |
2051 | // For 64-bit SVR4, and all flavors of AIX we create a FixedStack |
2052 | // object at the offset of the CR-save slot in the linkage area. The actual |
2053 | // save and restore of the condition register will be created as part of the |
2054 | // prologue and epilogue insertion, but the FixedStack object is needed to |
2055 | // keep the CalleSavedInfo valid. |
2056 | if ((SavedRegs.test(Idx: PPC::CR2) || SavedRegs.test(Idx: PPC::CR3) || |
2057 | SavedRegs.test(Idx: PPC::CR4))) { |
2058 | const uint64_t SpillSize = 4; // Condition register is always 4 bytes. |
2059 | const int64_t SpillOffset = |
2060 | Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; |
2061 | int FrameIdx = |
2062 | MFI.CreateFixedObject(Size: SpillSize, SPOffset: SpillOffset, |
2063 | /* IsImmutable */ true, /* IsAliased */ isAliased: false); |
2064 | FI->setCRSpillFrameIndex(FrameIdx); |
2065 | } |
2066 | } |
2067 | |
2068 | void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, |
2069 | RegScavenger *RS) const { |
2070 | // Get callee saved register information. |
2071 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2072 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
2073 | |
2074 | // If the function is shrink-wrapped, and if the function has a tail call, the |
2075 | // tail call might not be in the new RestoreBlock, so real branch instruction |
2076 | // won't be generated by emitEpilogue(), because shrink-wrap has chosen new |
2077 | // RestoreBlock. So we handle this case here. |
2078 | if (MFI.getSavePoint() && MFI.hasTailCall()) { |
2079 | MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); |
2080 | for (MachineBasicBlock &MBB : MF) { |
2081 | if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) |
2082 | createTailCallBranchInstr(MBB); |
2083 | } |
2084 | } |
2085 | |
2086 | // Early exit if no callee saved registers are modified! |
2087 | if (CSI.empty() && !needsFP(MF)) { |
2088 | addScavengingSpillSlot(MF, RS); |
2089 | return; |
2090 | } |
2091 | |
2092 | unsigned MinGPR = PPC::R31; |
2093 | unsigned MinG8R = PPC::X31; |
2094 | unsigned MinFPR = PPC::F31; |
2095 | unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; |
2096 | |
2097 | bool HasGPSaveArea = false; |
2098 | bool HasG8SaveArea = false; |
2099 | bool HasFPSaveArea = false; |
2100 | bool HasVRSaveArea = false; |
2101 | |
2102 | SmallVector<CalleeSavedInfo, 18> GPRegs; |
2103 | SmallVector<CalleeSavedInfo, 18> G8Regs; |
2104 | SmallVector<CalleeSavedInfo, 18> FPRegs; |
2105 | SmallVector<CalleeSavedInfo, 18> VRegs; |
2106 | |
2107 | for (const CalleeSavedInfo &I : CSI) { |
2108 | Register Reg = I.getReg(); |
2109 | assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || |
2110 | (Reg != PPC::X2 && Reg != PPC::R2)) && |
2111 | "Not expecting to try to spill R2 in a function that must save TOC" ); |
2112 | if (PPC::GPRCRegClass.contains(Reg)) { |
2113 | HasGPSaveArea = true; |
2114 | |
2115 | GPRegs.push_back(Elt: I); |
2116 | |
2117 | if (Reg < MinGPR) { |
2118 | MinGPR = Reg; |
2119 | } |
2120 | } else if (PPC::G8RCRegClass.contains(Reg)) { |
2121 | HasG8SaveArea = true; |
2122 | |
2123 | G8Regs.push_back(Elt: I); |
2124 | |
2125 | if (Reg < MinG8R) { |
2126 | MinG8R = Reg; |
2127 | } |
2128 | } else if (PPC::F8RCRegClass.contains(Reg)) { |
2129 | HasFPSaveArea = true; |
2130 | |
2131 | FPRegs.push_back(Elt: I); |
2132 | |
2133 | if (Reg < MinFPR) { |
2134 | MinFPR = Reg; |
2135 | } |
2136 | } else if (PPC::CRBITRCRegClass.contains(Reg) || |
2137 | PPC::CRRCRegClass.contains(Reg)) { |
2138 | ; // do nothing, as we already know whether CRs are spilled |
2139 | } else if (PPC::VRRCRegClass.contains(Reg) || |
2140 | PPC::SPERCRegClass.contains(Reg)) { |
2141 | // Altivec and SPE are mutually exclusive, but have the same stack |
2142 | // alignment requirements, so overload the save area for both cases. |
2143 | HasVRSaveArea = true; |
2144 | |
2145 | VRegs.push_back(Elt: I); |
2146 | |
2147 | if (Reg < MinVR) { |
2148 | MinVR = Reg; |
2149 | } |
2150 | } else { |
2151 | llvm_unreachable("Unknown RegisterClass!" ); |
2152 | } |
2153 | } |
2154 | |
2155 | PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); |
2156 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
2157 | |
2158 | int64_t LowerBound = 0; |
2159 | |
2160 | // Take into account stack space reserved for tail calls. |
2161 | int TCSPDelta = 0; |
2162 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
2163 | (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { |
2164 | LowerBound = TCSPDelta; |
2165 | } |
2166 | |
2167 | // The Floating-point register save area is right below the back chain word |
2168 | // of the previous stack frame. |
2169 | if (HasFPSaveArea) { |
2170 | for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { |
2171 | int FI = FPRegs[i].getFrameIdx(); |
2172 | |
2173 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
2174 | } |
2175 | |
2176 | LowerBound -= (31 - TRI->getEncodingValue(RegNo: MinFPR) + 1) * 8; |
2177 | } |
2178 | |
2179 | // Check whether the frame pointer register is allocated. If so, make sure it |
2180 | // is spilled to the correct offset. |
2181 | if (needsFP(MF)) { |
2182 | int FI = PFI->getFramePointerSaveIndex(); |
2183 | assert(FI && "No Frame Pointer Save Slot!" ); |
2184 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
2185 | // FP is R31/X31, so no need to update MinGPR/MinG8R. |
2186 | HasGPSaveArea = true; |
2187 | } |
2188 | |
2189 | if (PFI->usesPICBase()) { |
2190 | int FI = PFI->getPICBasePointerSaveIndex(); |
2191 | assert(FI && "No PIC Base Pointer Save Slot!" ); |
2192 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
2193 | |
2194 | MinGPR = std::min<unsigned>(a: MinGPR, b: PPC::R30); |
2195 | HasGPSaveArea = true; |
2196 | } |
2197 | |
2198 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
2199 | if (RegInfo->hasBasePointer(MF)) { |
2200 | int FI = PFI->getBasePointerSaveIndex(); |
2201 | assert(FI && "No Base Pointer Save Slot!" ); |
2202 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
2203 | |
2204 | Register BP = RegInfo->getBaseRegister(MF); |
2205 | if (PPC::G8RCRegClass.contains(Reg: BP)) { |
2206 | MinG8R = std::min<unsigned>(a: MinG8R, b: BP); |
2207 | HasG8SaveArea = true; |
2208 | } else if (PPC::GPRCRegClass.contains(Reg: BP)) { |
2209 | MinGPR = std::min<unsigned>(a: MinGPR, b: BP); |
2210 | HasGPSaveArea = true; |
2211 | } |
2212 | } |
2213 | |
2214 | // General register save area starts right below the Floating-point |
2215 | // register save area. |
2216 | if (HasGPSaveArea || HasG8SaveArea) { |
2217 | // Move general register save area spill slots down, taking into account |
2218 | // the size of the Floating-point register save area. |
2219 | for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { |
2220 | if (!GPRegs[i].isSpilledToReg()) { |
2221 | int FI = GPRegs[i].getFrameIdx(); |
2222 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
2223 | } |
2224 | } |
2225 | |
2226 | // Move general register save area spill slots down, taking into account |
2227 | // the size of the Floating-point register save area. |
2228 | for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { |
2229 | if (!G8Regs[i].isSpilledToReg()) { |
2230 | int FI = G8Regs[i].getFrameIdx(); |
2231 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
2232 | } |
2233 | } |
2234 | |
2235 | unsigned MinReg = |
2236 | std::min<unsigned>(a: TRI->getEncodingValue(RegNo: MinGPR), |
2237 | b: TRI->getEncodingValue(RegNo: MinG8R)); |
2238 | |
2239 | const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; |
2240 | LowerBound -= (31 - MinReg + 1) * GPRegSize; |
2241 | } |
2242 | |
2243 | // For 32-bit only, the CR save area is below the general register |
2244 | // save area. For 64-bit SVR4, the CR save area is addressed relative |
2245 | // to the stack pointer and hence does not need an adjustment here. |
2246 | // Only CR2 (the first nonvolatile spilled) has an associated frame |
2247 | // index so that we have a single uniform save area. |
2248 | if (spillsCR(MF) && Subtarget.is32BitELFABI()) { |
2249 | // Adjust the frame index of the CR spill slot. |
2250 | for (const auto &CSInfo : CSI) { |
2251 | if (CSInfo.getReg() == PPC::CR2) { |
2252 | int FI = CSInfo.getFrameIdx(); |
2253 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
2254 | break; |
2255 | } |
2256 | } |
2257 | |
2258 | LowerBound -= 4; // The CR save area is always 4 bytes long. |
2259 | } |
2260 | |
2261 | // Both Altivec and SPE have the same alignment and padding requirements |
2262 | // within the stack frame. |
2263 | if (HasVRSaveArea) { |
2264 | // Insert alignment padding, we need 16-byte alignment. Note: for positive |
2265 | // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since |
2266 | // we are using negative number here (the stack grows downward). We should |
2267 | // use formula : y = x & (~(n-1)). Where x is the size before aligning, n |
2268 | // is the alignment size ( n = 16 here) and y is the size after aligning. |
2269 | assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!" ); |
2270 | LowerBound &= ~(15); |
2271 | |
2272 | for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { |
2273 | int FI = VRegs[i].getFrameIdx(); |
2274 | |
2275 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
2276 | } |
2277 | } |
2278 | |
2279 | addScavengingSpillSlot(MF, RS); |
2280 | } |
2281 | |
2282 | void |
2283 | PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, |
2284 | RegScavenger *RS) const { |
2285 | // Reserve a slot closest to SP or frame pointer if we have a dynalloc or |
2286 | // a large stack, which will require scavenging a register to materialize a |
2287 | // large offset. |
2288 | |
2289 | // We need to have a scavenger spill slot for spills if the frame size is |
2290 | // large. In case there is no free register for large-offset addressing, |
2291 | // this slot is used for the necessary emergency spill. Also, we need the |
2292 | // slot for dynamic stack allocations. |
2293 | |
2294 | // The scavenger might be invoked if the frame offset does not fit into |
2295 | // the 16-bit immediate in case of not SPE and 8-bit in case of SPE. |
2296 | // We don't know the complete frame size here because we've not yet computed |
2297 | // callee-saved register spills or the needed alignment padding. |
2298 | unsigned StackSize = determineFrameLayout(MF, UseEstimate: true); |
2299 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2300 | bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(x: StackSize) : !isInt<16>(x: StackSize); |
2301 | |
2302 | if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || |
2303 | (hasSpills(MF) && NeedSpills)) { |
2304 | const TargetRegisterClass &GPRC = PPC::GPRCRegClass; |
2305 | const TargetRegisterClass &G8RC = PPC::G8RCRegClass; |
2306 | const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; |
2307 | const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); |
2308 | unsigned Size = TRI.getSpillSize(RC); |
2309 | Align Alignment = TRI.getSpillAlign(RC); |
2310 | RS->addScavengingFrameIndex(FI: MFI.CreateStackObject(Size, Alignment, isSpillSlot: false)); |
2311 | |
2312 | // Might we have over-aligned allocas? |
2313 | bool HasAlVars = |
2314 | MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); |
2315 | |
2316 | // These kinds of spills might need two registers. |
2317 | if (spillsCR(MF) || HasAlVars) |
2318 | RS->addScavengingFrameIndex( |
2319 | FI: MFI.CreateStackObject(Size, Alignment, isSpillSlot: false)); |
2320 | } |
2321 | } |
2322 | |
2323 | // This function checks if a callee saved gpr can be spilled to a volatile |
2324 | // vector register. This occurs for leaf functions when the option |
2325 | // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers |
2326 | // which were not spilled to vectors, return false so the target independent |
2327 | // code can handle them by assigning a FrameIdx to a stack slot. |
2328 | bool PPCFrameLowering::assignCalleeSavedSpillSlots( |
2329 | MachineFunction &MF, const TargetRegisterInfo *TRI, |
2330 | std::vector<CalleeSavedInfo> &CSI) const { |
2331 | |
2332 | if (CSI.empty()) |
2333 | return true; // Early exit if no callee saved registers are modified! |
2334 | |
2335 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
2336 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF: &MF); |
2337 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
2338 | |
2339 | if (Subtarget.hasSPE()) { |
2340 | // In case of SPE we only have SuperRegs and CRs |
2341 | // in our CalleSaveInfo vector. |
2342 | |
2343 | for (auto &CalleeSaveReg : CSI) { |
2344 | MCPhysReg Reg = CalleeSaveReg.getReg(); |
2345 | MCPhysReg Lower = RegInfo->getSubReg(Reg, Idx: 1); |
2346 | MCPhysReg Higher = RegInfo->getSubReg(Reg, Idx: 2); |
2347 | |
2348 | if ( // Check only for SuperRegs. |
2349 | Lower && |
2350 | // Replace Reg if only lower-32 bits modified |
2351 | !MRI.isPhysRegModified(PhysReg: Higher)) |
2352 | CalleeSaveReg = CalleeSavedInfo(Lower); |
2353 | } |
2354 | } |
2355 | |
2356 | // Early exit if cannot spill gprs to volatile vector registers. |
2357 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
2358 | if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) |
2359 | return false; |
2360 | |
2361 | // Build a BitVector of VSRs that can be used for spilling GPRs. |
2362 | BitVector BVAllocatable = TRI->getAllocatableSet(MF); |
2363 | BitVector BVCalleeSaved(TRI->getNumRegs()); |
2364 | for (unsigned i = 0; CSRegs[i]; ++i) |
2365 | BVCalleeSaved.set(CSRegs[i]); |
2366 | |
2367 | for (unsigned Reg : BVAllocatable.set_bits()) { |
2368 | // Set to 0 if the register is not a volatile VSX register, or if it is |
2369 | // used in the function. |
2370 | if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) || |
2371 | MRI.isPhysRegUsed(PhysReg: Reg)) |
2372 | BVAllocatable.reset(Idx: Reg); |
2373 | } |
2374 | |
2375 | bool AllSpilledToReg = true; |
2376 | unsigned LastVSRUsedForSpill = 0; |
2377 | for (auto &CS : CSI) { |
2378 | if (BVAllocatable.none()) |
2379 | return false; |
2380 | |
2381 | Register Reg = CS.getReg(); |
2382 | |
2383 | if (!PPC::G8RCRegClass.contains(Reg)) { |
2384 | AllSpilledToReg = false; |
2385 | continue; |
2386 | } |
2387 | |
2388 | // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs |
2389 | // into one VSR using the mtvsrdd instruction. |
2390 | if (LastVSRUsedForSpill != 0) { |
2391 | CS.setDstReg(LastVSRUsedForSpill); |
2392 | BVAllocatable.reset(Idx: LastVSRUsedForSpill); |
2393 | LastVSRUsedForSpill = 0; |
2394 | continue; |
2395 | } |
2396 | |
2397 | unsigned VolatileVFReg = BVAllocatable.find_first(); |
2398 | if (VolatileVFReg < BVAllocatable.size()) { |
2399 | CS.setDstReg(VolatileVFReg); |
2400 | LastVSRUsedForSpill = VolatileVFReg; |
2401 | } else { |
2402 | AllSpilledToReg = false; |
2403 | } |
2404 | } |
2405 | return AllSpilledToReg; |
2406 | } |
2407 | |
2408 | bool PPCFrameLowering::spillCalleeSavedRegisters( |
2409 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
2410 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
2411 | |
2412 | MachineFunction *MF = MBB.getParent(); |
2413 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
2414 | PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); |
2415 | bool MustSaveTOC = FI->mustSaveTOC(); |
2416 | DebugLoc DL; |
2417 | bool CRSpilled = false; |
2418 | MachineInstrBuilder CRMIB; |
2419 | BitVector Spilled(TRI->getNumRegs()); |
2420 | |
2421 | VSRContainingGPRs.clear(); |
2422 | |
2423 | // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one |
2424 | // or two GPRs, so we need table to record information for later save/restore. |
2425 | for (const CalleeSavedInfo &Info : CSI) { |
2426 | if (Info.isSpilledToReg()) { |
2427 | auto &SpilledVSR = |
2428 | VSRContainingGPRs.FindAndConstruct(Key: Info.getDstReg()).second; |
2429 | assert(SpilledVSR.second == 0 && |
2430 | "Can't spill more than two GPRs into VSR!" ); |
2431 | if (SpilledVSR.first == 0) |
2432 | SpilledVSR.first = Info.getReg(); |
2433 | else |
2434 | SpilledVSR.second = Info.getReg(); |
2435 | } |
2436 | } |
2437 | |
2438 | for (const CalleeSavedInfo &I : CSI) { |
2439 | Register Reg = I.getReg(); |
2440 | |
2441 | // CR2 through CR4 are the nonvolatile CR fields. |
2442 | bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; |
2443 | |
2444 | // Add the callee-saved register as live-in; it's killed at the spill. |
2445 | // Do not do this for callee-saved registers that are live-in to the |
2446 | // function because they will already be marked live-in and this will be |
2447 | // adding it for a second time. It is an error to add the same register |
2448 | // to the set more than once. |
2449 | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
2450 | bool IsLiveIn = MRI.isLiveIn(Reg); |
2451 | if (!IsLiveIn) |
2452 | MBB.addLiveIn(PhysReg: Reg); |
2453 | |
2454 | if (CRSpilled && IsCRField) { |
2455 | CRMIB.addReg(RegNo: Reg, flags: RegState::ImplicitKill); |
2456 | continue; |
2457 | } |
2458 | |
2459 | // The actual spill will happen in the prologue. |
2460 | if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) |
2461 | continue; |
2462 | |
2463 | // Insert the spill to the stack frame. |
2464 | if (IsCRField) { |
2465 | PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); |
2466 | if (!Subtarget.is32BitELFABI()) { |
2467 | // The actual spill will happen at the start of the prologue. |
2468 | FuncInfo->addMustSaveCR(Reg); |
2469 | } else { |
2470 | CRSpilled = true; |
2471 | FuncInfo->setSpillsCR(); |
2472 | |
2473 | // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have |
2474 | // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. |
2475 | CRMIB = BuildMI(MF&: *MF, MIMD: DL, MCID: TII.get(Opcode: PPC::MFCR), DestReg: PPC::R12) |
2476 | .addReg(RegNo: Reg, flags: RegState::ImplicitKill); |
2477 | |
2478 | MBB.insert(I: MI, MI: CRMIB); |
2479 | MBB.insert(I: MI, MI: addFrameReference(MIB: BuildMI(MF&: *MF, MIMD: DL, MCID: TII.get(Opcode: PPC::STW)) |
2480 | .addReg(RegNo: PPC::R12, |
2481 | flags: getKillRegState(B: true)), |
2482 | FI: I.getFrameIdx())); |
2483 | } |
2484 | } else { |
2485 | if (I.isSpilledToReg()) { |
2486 | unsigned Dst = I.getDstReg(); |
2487 | |
2488 | if (Spilled[Dst]) |
2489 | continue; |
2490 | |
2491 | if (VSRContainingGPRs[Dst].second != 0) { |
2492 | assert(Subtarget.hasP9Vector() && |
2493 | "mtvsrdd is unavailable on pre-P9 targets." ); |
2494 | |
2495 | NumPESpillVSR += 2; |
2496 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: PPC::MTVSRDD), DestReg: Dst) |
2497 | .addReg(RegNo: VSRContainingGPRs[Dst].first, flags: getKillRegState(B: true)) |
2498 | .addReg(RegNo: VSRContainingGPRs[Dst].second, flags: getKillRegState(B: true)); |
2499 | } else if (VSRContainingGPRs[Dst].second == 0) { |
2500 | assert(Subtarget.hasP8Vector() && |
2501 | "Can't move GPR to VSR on pre-P8 targets." ); |
2502 | |
2503 | ++NumPESpillVSR; |
2504 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: PPC::MTVSRD), |
2505 | DestReg: TRI->getSubReg(Reg: Dst, Idx: PPC::sub_64)) |
2506 | .addReg(RegNo: VSRContainingGPRs[Dst].first, flags: getKillRegState(B: true)); |
2507 | } else { |
2508 | llvm_unreachable("More than two GPRs spilled to a VSR!" ); |
2509 | } |
2510 | Spilled.set(Dst); |
2511 | } else { |
2512 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
2513 | // Use !IsLiveIn for the kill flag. |
2514 | // We do not want to kill registers that are live in this function |
2515 | // before their use because they will become undefined registers. |
2516 | // Functions without NoUnwind need to preserve the order of elements in |
2517 | // saved vector registers. |
2518 | if (Subtarget.needsSwapsForVSXMemOps() && |
2519 | !MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind)) |
2520 | TII.storeRegToStackSlotNoUpd(MBB, MBBI: MI, SrcReg: Reg, isKill: !IsLiveIn, |
2521 | FrameIndex: I.getFrameIdx(), RC, TRI); |
2522 | else |
2523 | TII.storeRegToStackSlot(MBB, MBBI: MI, SrcReg: Reg, isKill: !IsLiveIn, FrameIndex: I.getFrameIdx(), RC, |
2524 | TRI, VReg: Register()); |
2525 | } |
2526 | } |
2527 | } |
2528 | return true; |
2529 | } |
2530 | |
2531 | static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, |
2532 | bool CR4Spilled, MachineBasicBlock &MBB, |
2533 | MachineBasicBlock::iterator MI, |
2534 | ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { |
2535 | |
2536 | MachineFunction *MF = MBB.getParent(); |
2537 | const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); |
2538 | DebugLoc DL; |
2539 | unsigned MoveReg = PPC::R12; |
2540 | |
2541 | // 32-bit: FP-relative |
2542 | MBB.insert(I: MI, |
2543 | MI: addFrameReference(MIB: BuildMI(MF&: *MF, MIMD: DL, MCID: TII.get(Opcode: PPC::LWZ), DestReg: MoveReg), |
2544 | FI: CSI[CSIIndex].getFrameIdx())); |
2545 | |
2546 | unsigned RestoreOp = PPC::MTOCRF; |
2547 | if (CR2Spilled) |
2548 | MBB.insert(I: MI, MI: BuildMI(MF&: *MF, MIMD: DL, MCID: TII.get(Opcode: RestoreOp), DestReg: PPC::CR2) |
2549 | .addReg(RegNo: MoveReg, flags: getKillRegState(B: !CR3Spilled && !CR4Spilled))); |
2550 | |
2551 | if (CR3Spilled) |
2552 | MBB.insert(I: MI, MI: BuildMI(MF&: *MF, MIMD: DL, MCID: TII.get(Opcode: RestoreOp), DestReg: PPC::CR3) |
2553 | .addReg(RegNo: MoveReg, flags: getKillRegState(B: !CR4Spilled))); |
2554 | |
2555 | if (CR4Spilled) |
2556 | MBB.insert(I: MI, MI: BuildMI(MF&: *MF, MIMD: DL, MCID: TII.get(Opcode: RestoreOp), DestReg: PPC::CR4) |
2557 | .addReg(RegNo: MoveReg, flags: getKillRegState(B: true))); |
2558 | } |
2559 | |
2560 | MachineBasicBlock::iterator PPCFrameLowering:: |
2561 | eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, |
2562 | MachineBasicBlock::iterator I) const { |
2563 | const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
2564 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
2565 | I->getOpcode() == PPC::ADJCALLSTACKUP) { |
2566 | // Add (actually subtract) back the amount the callee popped on return. |
2567 | if (int CalleeAmt = I->getOperand(i: 1).getImm()) { |
2568 | bool is64Bit = Subtarget.isPPC64(); |
2569 | CalleeAmt *= -1; |
2570 | unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; |
2571 | unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; |
2572 | unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; |
2573 | unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; |
2574 | unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; |
2575 | unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; |
2576 | const DebugLoc &dl = I->getDebugLoc(); |
2577 | |
2578 | if (isInt<16>(x: CalleeAmt)) { |
2579 | BuildMI(BB&: MBB, I, MIMD: dl, MCID: TII.get(Opcode: ADDIInstr), DestReg: StackReg) |
2580 | .addReg(RegNo: StackReg, flags: RegState::Kill) |
2581 | .addImm(Val: CalleeAmt); |
2582 | } else { |
2583 | MachineBasicBlock::iterator MBBI = I; |
2584 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: LISInstr), DestReg: TmpReg) |
2585 | .addImm(Val: CalleeAmt >> 16); |
2586 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ORIInstr), DestReg: TmpReg) |
2587 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
2588 | .addImm(Val: CalleeAmt & 0xFFFF); |
2589 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ADDInstr), DestReg: StackReg) |
2590 | .addReg(RegNo: StackReg, flags: RegState::Kill) |
2591 | .addReg(RegNo: TmpReg); |
2592 | } |
2593 | } |
2594 | } |
2595 | // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. |
2596 | return MBB.erase(I); |
2597 | } |
2598 | |
2599 | static bool isCalleeSavedCR(unsigned Reg) { |
2600 | return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; |
2601 | } |
2602 | |
2603 | bool PPCFrameLowering::restoreCalleeSavedRegisters( |
2604 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
2605 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
2606 | MachineFunction *MF = MBB.getParent(); |
2607 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
2608 | PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); |
2609 | bool MustSaveTOC = FI->mustSaveTOC(); |
2610 | bool CR2Spilled = false; |
2611 | bool CR3Spilled = false; |
2612 | bool CR4Spilled = false; |
2613 | unsigned CSIIndex = 0; |
2614 | BitVector Restored(TRI->getNumRegs()); |
2615 | |
2616 | // Initialize insertion-point logic; we will be restoring in reverse |
2617 | // order of spill. |
2618 | MachineBasicBlock::iterator I = MI, BeforeI = I; |
2619 | bool AtStart = I == MBB.begin(); |
2620 | |
2621 | if (!AtStart) |
2622 | --BeforeI; |
2623 | |
2624 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { |
2625 | Register Reg = CSI[i].getReg(); |
2626 | |
2627 | if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) |
2628 | continue; |
2629 | |
2630 | // Restore of callee saved condition register field is handled during |
2631 | // epilogue insertion. |
2632 | if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) |
2633 | continue; |
2634 | |
2635 | if (Reg == PPC::CR2) { |
2636 | CR2Spilled = true; |
2637 | // The spill slot is associated only with CR2, which is the |
2638 | // first nonvolatile spilled. Save it here. |
2639 | CSIIndex = i; |
2640 | continue; |
2641 | } else if (Reg == PPC::CR3) { |
2642 | CR3Spilled = true; |
2643 | continue; |
2644 | } else if (Reg == PPC::CR4) { |
2645 | CR4Spilled = true; |
2646 | continue; |
2647 | } else { |
2648 | // On 32-bit ELF when we first encounter a non-CR register after seeing at |
2649 | // least one CR register, restore all spilled CRs together. |
2650 | if (CR2Spilled || CR3Spilled || CR4Spilled) { |
2651 | bool is31 = needsFP(MF: *MF); |
2652 | restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, MI: I, CSI, |
2653 | CSIIndex); |
2654 | CR2Spilled = CR3Spilled = CR4Spilled = false; |
2655 | } |
2656 | |
2657 | if (CSI[i].isSpilledToReg()) { |
2658 | DebugLoc DL; |
2659 | unsigned Dst = CSI[i].getDstReg(); |
2660 | |
2661 | if (Restored[Dst]) |
2662 | continue; |
2663 | |
2664 | if (VSRContainingGPRs[Dst].second != 0) { |
2665 | assert(Subtarget.hasP9Vector()); |
2666 | NumPEReloadVSR += 2; |
2667 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII.get(Opcode: PPC::MFVSRLD), |
2668 | DestReg: VSRContainingGPRs[Dst].second) |
2669 | .addReg(RegNo: Dst); |
2670 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII.get(Opcode: PPC::MFVSRD), |
2671 | DestReg: VSRContainingGPRs[Dst].first) |
2672 | .addReg(RegNo: TRI->getSubReg(Reg: Dst, Idx: PPC::sub_64), flags: getKillRegState(B: true)); |
2673 | } else if (VSRContainingGPRs[Dst].second == 0) { |
2674 | assert(Subtarget.hasP8Vector()); |
2675 | ++NumPEReloadVSR; |
2676 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII.get(Opcode: PPC::MFVSRD), |
2677 | DestReg: VSRContainingGPRs[Dst].first) |
2678 | .addReg(RegNo: TRI->getSubReg(Reg: Dst, Idx: PPC::sub_64), flags: getKillRegState(B: true)); |
2679 | } else { |
2680 | llvm_unreachable("More than two GPRs spilled to a VSR!" ); |
2681 | } |
2682 | |
2683 | Restored.set(Dst); |
2684 | |
2685 | } else { |
2686 | // Default behavior for non-CR saves. |
2687 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
2688 | |
2689 | // Functions without NoUnwind need to preserve the order of elements in |
2690 | // saved vector registers. |
2691 | if (Subtarget.needsSwapsForVSXMemOps() && |
2692 | !MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind)) |
2693 | TII.loadRegFromStackSlotNoUpd(MBB, MBBI: I, DestReg: Reg, FrameIndex: CSI[i].getFrameIdx(), RC, |
2694 | TRI); |
2695 | else |
2696 | TII.loadRegFromStackSlot(MBB, MBBI: I, DestReg: Reg, FrameIndex: CSI[i].getFrameIdx(), RC, TRI, |
2697 | VReg: Register()); |
2698 | |
2699 | assert(I != MBB.begin() && |
2700 | "loadRegFromStackSlot didn't insert any code!" ); |
2701 | } |
2702 | } |
2703 | |
2704 | // Insert in reverse order. |
2705 | if (AtStart) |
2706 | I = MBB.begin(); |
2707 | else { |
2708 | I = BeforeI; |
2709 | ++I; |
2710 | } |
2711 | } |
2712 | |
2713 | // If we haven't yet spilled the CRs, do so now. |
2714 | if (CR2Spilled || CR3Spilled || CR4Spilled) { |
2715 | assert(Subtarget.is32BitELFABI() && |
2716 | "Only set CR[2|3|4]Spilled on 32-bit SVR4." ); |
2717 | bool is31 = needsFP(MF: *MF); |
2718 | restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, MI: I, CSI, CSIIndex); |
2719 | } |
2720 | |
2721 | return true; |
2722 | } |
2723 | |
2724 | uint64_t PPCFrameLowering::getTOCSaveOffset() const { |
2725 | return TOCSaveOffset; |
2726 | } |
2727 | |
2728 | uint64_t PPCFrameLowering::getFramePointerSaveOffset() const { |
2729 | return FramePointerSaveOffset; |
2730 | } |
2731 | |
2732 | uint64_t PPCFrameLowering::getBasePointerSaveOffset() const { |
2733 | return BasePointerSaveOffset; |
2734 | } |
2735 | |
2736 | bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { |
2737 | if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) |
2738 | return false; |
2739 | return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI(); |
2740 | } |
2741 | |
2742 | void PPCFrameLowering::updateCalleeSaves(const MachineFunction &MF, |
2743 | BitVector &SavedRegs) const { |
2744 | // The AIX ABI uses traceback tables for EH which require that if callee-saved |
2745 | // register N is used, all registers N-31 must be saved/restored. |
2746 | // NOTE: The check for AIX is not actually what is relevant. Traceback tables |
2747 | // on Linux have the same requirements. It is just that AIX is the only ABI |
2748 | // for which we actually use traceback tables. If another ABI needs to be |
2749 | // supported that also uses them, we can add a check such as |
2750 | // Subtarget.usesTraceBackTables(). |
2751 | assert(Subtarget.isAIXABI() && |
2752 | "Function updateCalleeSaves should only be called for AIX." ); |
2753 | |
2754 | // If there are no callee saves then there is nothing to do. |
2755 | if (SavedRegs.none()) |
2756 | return; |
2757 | |
2758 | const MCPhysReg *CSRegs = |
2759 | Subtarget.getRegisterInfo()->getCalleeSavedRegs(MF: &MF); |
2760 | MCPhysReg LowestGPR = PPC::R31; |
2761 | MCPhysReg LowestG8R = PPC::X31; |
2762 | MCPhysReg LowestFPR = PPC::F31; |
2763 | MCPhysReg LowestVR = PPC::V31; |
2764 | |
2765 | // Traverse the CSRs twice so as not to rely on ascending ordering of |
2766 | // registers in the array. The first pass finds the lowest numbered |
2767 | // register and the second pass marks all higher numbered registers |
2768 | // for spilling. |
2769 | for (int i = 0; CSRegs[i]; i++) { |
2770 | // Get the lowest numbered register for each class that actually needs |
2771 | // to be saved. |
2772 | MCPhysReg Cand = CSRegs[i]; |
2773 | if (!SavedRegs.test(Idx: Cand)) |
2774 | continue; |
2775 | if (PPC::GPRCRegClass.contains(Reg: Cand) && Cand < LowestGPR) |
2776 | LowestGPR = Cand; |
2777 | else if (PPC::G8RCRegClass.contains(Reg: Cand) && Cand < LowestG8R) |
2778 | LowestG8R = Cand; |
2779 | else if ((PPC::F4RCRegClass.contains(Reg: Cand) || |
2780 | PPC::F8RCRegClass.contains(Reg: Cand)) && |
2781 | Cand < LowestFPR) |
2782 | LowestFPR = Cand; |
2783 | else if (PPC::VRRCRegClass.contains(Reg: Cand) && Cand < LowestVR) |
2784 | LowestVR = Cand; |
2785 | } |
2786 | |
2787 | for (int i = 0; CSRegs[i]; i++) { |
2788 | MCPhysReg Cand = CSRegs[i]; |
2789 | if ((PPC::GPRCRegClass.contains(Reg: Cand) && Cand > LowestGPR) || |
2790 | (PPC::G8RCRegClass.contains(Reg: Cand) && Cand > LowestG8R) || |
2791 | ((PPC::F4RCRegClass.contains(Reg: Cand) || |
2792 | PPC::F8RCRegClass.contains(Reg: Cand)) && |
2793 | Cand > LowestFPR) || |
2794 | (PPC::VRRCRegClass.contains(Reg: Cand) && Cand > LowestVR)) |
2795 | SavedRegs.set(Cand); |
2796 | } |
2797 | } |
2798 | |
2799 | uint64_t PPCFrameLowering::getStackThreshold() const { |
2800 | // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack; |
2801 | // use `add r1, r1, <scratch_reg>` to release the stack frame. |
2802 | // Scratch register contains a signed 64-bit number, which is negative |
2803 | // when extending the stack and is positive when releasing the stack frame. |
2804 | // To make `stux` and `add` paired, the absolute value of the number contained |
2805 | // in the scratch register should be the same. Thus the maximum stack size |
2806 | // is (2^63)-1, i.e., LONG_MAX. |
2807 | if (Subtarget.isPPC64()) |
2808 | return LONG_MAX; |
2809 | |
2810 | return TargetFrameLowering::getStackThreshold(); |
2811 | } |
2812 | |