| 1 | //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains the PPC implementation of TargetFrameLowering class. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "PPCFrameLowering.h" |
| 14 | #include "MCTargetDesc/PPCPredicates.h" |
| 15 | #include "PPCInstrBuilder.h" |
| 16 | #include "PPCInstrInfo.h" |
| 17 | #include "PPCMachineFunctionInfo.h" |
| 18 | #include "PPCSubtarget.h" |
| 19 | #include "PPCTargetMachine.h" |
| 20 | #include "llvm/ADT/Statistic.h" |
| 21 | #include "llvm/CodeGen/LivePhysRegs.h" |
| 22 | #include "llvm/CodeGen/MachineFrameInfo.h" |
| 23 | #include "llvm/CodeGen/MachineFunction.h" |
| 24 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 25 | #include "llvm/CodeGen/MachineModuleInfo.h" |
| 26 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 27 | #include "llvm/CodeGen/RegisterScavenging.h" |
| 28 | #include "llvm/IR/Function.h" |
| 29 | #include "llvm/Target/TargetOptions.h" |
| 30 | |
| 31 | using namespace llvm; |
| 32 | |
| 33 | #define DEBUG_TYPE "framelowering" |
| 34 | STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue" ); |
| 35 | STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue" ); |
| 36 | STATISTIC(NumPrologProbed, "Number of prologues probed" ); |
| 37 | |
| 38 | static cl::opt<bool> |
| 39 | EnablePEVectorSpills("ppc-enable-pe-vector-spills" , |
| 40 | cl::desc("Enable spills in prologue to vector registers." ), |
| 41 | cl::init(Val: false), cl::Hidden); |
| 42 | |
| 43 | static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { |
| 44 | if (STI.isAIXABI()) |
| 45 | return STI.isPPC64() ? 16 : 8; |
| 46 | // SVR4 ABI: |
| 47 | return STI.isPPC64() ? 16 : 4; |
| 48 | } |
| 49 | |
| 50 | static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { |
| 51 | if (STI.isAIXABI()) |
| 52 | return STI.isPPC64() ? 40 : 20; |
| 53 | return STI.isELFv2ABI() ? 24 : 40; |
| 54 | } |
| 55 | |
| 56 | static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { |
| 57 | // First slot in the general register save area. |
| 58 | return STI.isPPC64() ? -8U : -4U; |
| 59 | } |
| 60 | |
| 61 | static unsigned computeLinkageSize(const PPCSubtarget &STI) { |
| 62 | if (STI.isAIXABI() || STI.isPPC64()) |
| 63 | return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); |
| 64 | |
| 65 | // 32-bit SVR4 ABI: |
| 66 | return 8; |
| 67 | } |
| 68 | |
| 69 | static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { |
| 70 | // Third slot in the general purpose register save area. |
| 71 | if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) |
| 72 | return -12U; |
| 73 | |
| 74 | // Second slot in the general purpose register save area. |
| 75 | return STI.isPPC64() ? -16U : -8U; |
| 76 | } |
| 77 | |
| 78 | static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { |
| 79 | return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; |
| 80 | } |
| 81 | |
| 82 | PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) |
| 83 | : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, |
| 84 | STI.getPlatformStackAlignment(), 0), |
| 85 | Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(STI: Subtarget)), |
| 86 | TOCSaveOffset(computeTOCSaveOffset(STI: Subtarget)), |
| 87 | FramePointerSaveOffset(computeFramePointerSaveOffset(STI: Subtarget)), |
| 88 | LinkageSize(computeLinkageSize(STI: Subtarget)), |
| 89 | BasePointerSaveOffset(computeBasePointerSaveOffset(STI: Subtarget)), |
| 90 | CRSaveOffset(computeCRSaveOffset(STI: Subtarget)) {} |
| 91 | |
| 92 | // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. |
| 93 | const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( |
| 94 | unsigned &NumEntries) const { |
| 95 | |
| 96 | // Floating-point register save area offsets. |
| 97 | #define CALLEE_SAVED_FPRS \ |
| 98 | {PPC::F31, -8}, \ |
| 99 | {PPC::F30, -16}, \ |
| 100 | {PPC::F29, -24}, \ |
| 101 | {PPC::F28, -32}, \ |
| 102 | {PPC::F27, -40}, \ |
| 103 | {PPC::F26, -48}, \ |
| 104 | {PPC::F25, -56}, \ |
| 105 | {PPC::F24, -64}, \ |
| 106 | {PPC::F23, -72}, \ |
| 107 | {PPC::F22, -80}, \ |
| 108 | {PPC::F21, -88}, \ |
| 109 | {PPC::F20, -96}, \ |
| 110 | {PPC::F19, -104}, \ |
| 111 | {PPC::F18, -112}, \ |
| 112 | {PPC::F17, -120}, \ |
| 113 | {PPC::F16, -128}, \ |
| 114 | {PPC::F15, -136}, \ |
| 115 | {PPC::F14, -144} |
| 116 | |
| 117 | // 32-bit general purpose register save area offsets shared by ELF and |
| 118 | // AIX. AIX has an extra CSR with r13. |
| 119 | #define CALLEE_SAVED_GPRS32 \ |
| 120 | {PPC::R31, -4}, \ |
| 121 | {PPC::R30, -8}, \ |
| 122 | {PPC::R29, -12}, \ |
| 123 | {PPC::R28, -16}, \ |
| 124 | {PPC::R27, -20}, \ |
| 125 | {PPC::R26, -24}, \ |
| 126 | {PPC::R25, -28}, \ |
| 127 | {PPC::R24, -32}, \ |
| 128 | {PPC::R23, -36}, \ |
| 129 | {PPC::R22, -40}, \ |
| 130 | {PPC::R21, -44}, \ |
| 131 | {PPC::R20, -48}, \ |
| 132 | {PPC::R19, -52}, \ |
| 133 | {PPC::R18, -56}, \ |
| 134 | {PPC::R17, -60}, \ |
| 135 | {PPC::R16, -64}, \ |
| 136 | {PPC::R15, -68}, \ |
| 137 | {PPC::R14, -72} |
| 138 | |
| 139 | // 64-bit general purpose register save area offsets. |
| 140 | #define CALLEE_SAVED_GPRS64 \ |
| 141 | {PPC::X31, -8}, \ |
| 142 | {PPC::X30, -16}, \ |
| 143 | {PPC::X29, -24}, \ |
| 144 | {PPC::X28, -32}, \ |
| 145 | {PPC::X27, -40}, \ |
| 146 | {PPC::X26, -48}, \ |
| 147 | {PPC::X25, -56}, \ |
| 148 | {PPC::X24, -64}, \ |
| 149 | {PPC::X23, -72}, \ |
| 150 | {PPC::X22, -80}, \ |
| 151 | {PPC::X21, -88}, \ |
| 152 | {PPC::X20, -96}, \ |
| 153 | {PPC::X19, -104}, \ |
| 154 | {PPC::X18, -112}, \ |
| 155 | {PPC::X17, -120}, \ |
| 156 | {PPC::X16, -128}, \ |
| 157 | {PPC::X15, -136}, \ |
| 158 | {PPC::X14, -144} |
| 159 | |
| 160 | // Vector register save area offsets. |
| 161 | #define CALLEE_SAVED_VRS \ |
| 162 | {PPC::V31, -16}, \ |
| 163 | {PPC::V30, -32}, \ |
| 164 | {PPC::V29, -48}, \ |
| 165 | {PPC::V28, -64}, \ |
| 166 | {PPC::V27, -80}, \ |
| 167 | {PPC::V26, -96}, \ |
| 168 | {PPC::V25, -112}, \ |
| 169 | {PPC::V24, -128}, \ |
| 170 | {PPC::V23, -144}, \ |
| 171 | {PPC::V22, -160}, \ |
| 172 | {PPC::V21, -176}, \ |
| 173 | {PPC::V20, -192} |
| 174 | |
| 175 | // Note that the offsets here overlap, but this is fixed up in |
| 176 | // processFunctionBeforeFrameFinalized. |
| 177 | |
| 178 | static const SpillSlot ELFOffsets32[] = { |
| 179 | CALLEE_SAVED_FPRS, |
| 180 | CALLEE_SAVED_GPRS32, |
| 181 | |
| 182 | // CR save area offset. We map each of the nonvolatile CR fields |
| 183 | // to the slot for CR2, which is the first of the nonvolatile CR |
| 184 | // fields to be assigned, so that we only allocate one save slot. |
| 185 | // See PPCRegisterInfo::hasReservedSpillSlot() for more information. |
| 186 | {.Reg: PPC::CR2, .Offset: -4}, |
| 187 | |
| 188 | // VRSAVE save area offset. |
| 189 | {.Reg: PPC::VRSAVE, .Offset: -4}, |
| 190 | |
| 191 | CALLEE_SAVED_VRS, |
| 192 | |
| 193 | // SPE register save area (overlaps Vector save area). |
| 194 | {.Reg: PPC::S31, .Offset: -8}, |
| 195 | {.Reg: PPC::S30, .Offset: -16}, |
| 196 | {.Reg: PPC::S29, .Offset: -24}, |
| 197 | {.Reg: PPC::S28, .Offset: -32}, |
| 198 | {.Reg: PPC::S27, .Offset: -40}, |
| 199 | {.Reg: PPC::S26, .Offset: -48}, |
| 200 | {.Reg: PPC::S25, .Offset: -56}, |
| 201 | {.Reg: PPC::S24, .Offset: -64}, |
| 202 | {.Reg: PPC::S23, .Offset: -72}, |
| 203 | {.Reg: PPC::S22, .Offset: -80}, |
| 204 | {.Reg: PPC::S21, .Offset: -88}, |
| 205 | {.Reg: PPC::S20, .Offset: -96}, |
| 206 | {.Reg: PPC::S19, .Offset: -104}, |
| 207 | {.Reg: PPC::S18, .Offset: -112}, |
| 208 | {.Reg: PPC::S17, .Offset: -120}, |
| 209 | {.Reg: PPC::S16, .Offset: -128}, |
| 210 | {.Reg: PPC::S15, .Offset: -136}, |
| 211 | {.Reg: PPC::S14, .Offset: -144}}; |
| 212 | |
| 213 | static const SpillSlot ELFOffsets64[] = { |
| 214 | CALLEE_SAVED_FPRS, |
| 215 | CALLEE_SAVED_GPRS64, |
| 216 | |
| 217 | // VRSAVE save area offset. |
| 218 | {.Reg: PPC::VRSAVE, .Offset: -4}, |
| 219 | CALLEE_SAVED_VRS |
| 220 | }; |
| 221 | |
| 222 | static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, |
| 223 | CALLEE_SAVED_GPRS32, |
| 224 | // Add AIX's extra CSR. |
| 225 | {.Reg: PPC::R13, .Offset: -76}, |
| 226 | CALLEE_SAVED_VRS}; |
| 227 | |
| 228 | static const SpillSlot AIXOffsets64[] = { |
| 229 | CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; |
| 230 | |
| 231 | if (Subtarget.is64BitELFABI()) { |
| 232 | NumEntries = std::size(ELFOffsets64); |
| 233 | return ELFOffsets64; |
| 234 | } |
| 235 | |
| 236 | if (Subtarget.is32BitELFABI()) { |
| 237 | NumEntries = std::size(ELFOffsets32); |
| 238 | return ELFOffsets32; |
| 239 | } |
| 240 | |
| 241 | assert(Subtarget.isAIXABI() && "Unexpected ABI." ); |
| 242 | |
| 243 | if (Subtarget.isPPC64()) { |
| 244 | NumEntries = std::size(AIXOffsets64); |
| 245 | return AIXOffsets64; |
| 246 | } |
| 247 | |
| 248 | NumEntries = std::size(AIXOffsets32); |
| 249 | return AIXOffsets32; |
| 250 | } |
| 251 | |
| 252 | static bool spillsCR(const MachineFunction &MF) { |
| 253 | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
| 254 | return FuncInfo->isCRSpilled(); |
| 255 | } |
| 256 | |
| 257 | static bool hasSpills(const MachineFunction &MF) { |
| 258 | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
| 259 | return FuncInfo->hasSpills(); |
| 260 | } |
| 261 | |
| 262 | static bool hasNonRISpills(const MachineFunction &MF) { |
| 263 | const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); |
| 264 | return FuncInfo->hasNonRISpills(); |
| 265 | } |
| 266 | |
| 267 | /// MustSaveLR - Return true if this function requires that we save the LR |
| 268 | /// register onto the stack in the prolog and restore it in the epilog of the |
| 269 | /// function. |
| 270 | static bool MustSaveLR(const MachineFunction &MF, MCRegister LR) { |
| 271 | const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); |
| 272 | |
| 273 | // We need a save/restore of LR if there is any def of LR (which is |
| 274 | // defined by calls, including the PIC setup sequence), or if there is |
| 275 | // some use of the LR stack slot (e.g. for builtin_return_address). |
| 276 | // (LR comes in 32 and 64 bit versions.) |
| 277 | MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(RegNo: LR); |
| 278 | return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); |
| 279 | } |
| 280 | |
| 281 | /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum |
| 282 | /// call frame size. Update the MachineFunction object with the stack size. |
| 283 | uint64_t |
| 284 | PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, |
| 285 | bool UseEstimate) const { |
| 286 | unsigned NewMaxCallFrameSize = 0; |
| 287 | uint64_t FrameSize = determineFrameLayout(MF, UseEstimate, |
| 288 | NewMaxCallFrameSize: &NewMaxCallFrameSize); |
| 289 | MF.getFrameInfo().setStackSize(FrameSize); |
| 290 | MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); |
| 291 | return FrameSize; |
| 292 | } |
| 293 | |
| 294 | /// determineFrameLayout - Determine the size of the frame and maximum call |
| 295 | /// frame size. |
| 296 | uint64_t |
| 297 | PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, |
| 298 | bool UseEstimate, |
| 299 | unsigned *NewMaxCallFrameSize) const { |
| 300 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 301 | const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
| 302 | |
| 303 | // Get the number of bytes to allocate from the FrameInfo |
| 304 | uint64_t FrameSize = |
| 305 | UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); |
| 306 | |
| 307 | // Get stack alignments. The frame must be aligned to the greatest of these: |
| 308 | Align TargetAlign = getStackAlign(); // alignment required per the ABI |
| 309 | Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame |
| 310 | Align Alignment = std::max(a: TargetAlign, b: MaxAlign); |
| 311 | |
| 312 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 313 | |
| 314 | MCRegister LR = RegInfo->getRARegister(); |
| 315 | bool DisableRedZone = MF.getFunction().hasFnAttribute(Kind: Attribute::NoRedZone); |
| 316 | bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. |
| 317 | !MFI.adjustsStack() && // No calls. |
| 318 | !MustSaveLR(MF, LR) && // No need to save LR. |
| 319 | !FI->mustSaveTOC() && // No need to save TOC. |
| 320 | !RegInfo->hasBasePointer(MF) && // No special alignment. |
| 321 | !MFI.isFrameAddressTaken(); |
| 322 | |
| 323 | // Note: for PPC32 SVR4ABI, we can still generate stackless |
| 324 | // code if all local vars are reg-allocated. |
| 325 | bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); |
| 326 | |
| 327 | // Check whether we can skip adjusting the stack pointer (by using red zone) |
| 328 | if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { |
| 329 | // No need for frame |
| 330 | return 0; |
| 331 | } |
| 332 | |
| 333 | // Get the maximum call frame size of all the calls. |
| 334 | unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); |
| 335 | |
| 336 | // Maximum call frame needs to be at least big enough for linkage area. |
| 337 | unsigned minCallFrameSize = getLinkageSize(); |
| 338 | maxCallFrameSize = std::max(a: maxCallFrameSize, b: minCallFrameSize); |
| 339 | |
| 340 | // If we have dynamic alloca then maxCallFrameSize needs to be aligned so |
| 341 | // that allocations will be aligned. |
| 342 | if (MFI.hasVarSizedObjects()) |
| 343 | maxCallFrameSize = alignTo(Size: maxCallFrameSize, A: Alignment); |
| 344 | |
| 345 | // Update the new max call frame size if the caller passes in a valid pointer. |
| 346 | if (NewMaxCallFrameSize) |
| 347 | *NewMaxCallFrameSize = maxCallFrameSize; |
| 348 | |
| 349 | // Include call frame size in total. |
| 350 | FrameSize += maxCallFrameSize; |
| 351 | |
| 352 | // Make sure the frame is aligned. |
| 353 | FrameSize = alignTo(Size: FrameSize, A: Alignment); |
| 354 | |
| 355 | return FrameSize; |
| 356 | } |
| 357 | |
| 358 | // hasFPImpl - Return true if the specified function actually has a dedicated |
| 359 | // frame pointer register. |
| 360 | bool PPCFrameLowering::hasFPImpl(const MachineFunction &MF) const { |
| 361 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 362 | // FIXME: This is pretty much broken by design: hasFP() might be called really |
| 363 | // early, before the stack layout was calculated and thus hasFP() might return |
| 364 | // true or false here depending on the time of call. |
| 365 | return (MFI.getStackSize()) && needsFP(MF); |
| 366 | } |
| 367 | |
| 368 | // needsFP - Return true if the specified function should have a dedicated frame |
| 369 | // pointer register. This is true if the function has variable sized allocas or |
| 370 | // if frame pointer elimination is disabled. |
| 371 | bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { |
| 372 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 373 | |
| 374 | // Naked functions have no stack frame pushed, so we don't have a frame |
| 375 | // pointer. |
| 376 | if (MF.getFunction().hasFnAttribute(Kind: Attribute::Naked)) |
| 377 | return false; |
| 378 | |
| 379 | return MF.getTarget().Options.DisableFramePointerElim(MF) || |
| 380 | MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || |
| 381 | MF.exposesReturnsTwice() || |
| 382 | (MF.getTarget().Options.GuaranteedTailCallOpt && |
| 383 | MF.getInfo<PPCFunctionInfo>()->hasFastCall()); |
| 384 | } |
| 385 | |
| 386 | void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { |
| 387 | // When there is dynamic alloca in this function, we can not use the frame |
| 388 | // pointer X31/R31 for the frameaddress lowering. In this case, only X1/R1 |
| 389 | // always points to the backchain. |
| 390 | bool is31 = needsFP(MF) && !MF.getFrameInfo().hasVarSizedObjects(); |
| 391 | unsigned FPReg = is31 ? PPC::R31 : PPC::R1; |
| 392 | unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; |
| 393 | |
| 394 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 395 | bool HasBP = RegInfo->hasBasePointer(MF); |
| 396 | unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; |
| 397 | unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; |
| 398 | |
| 399 | for (MachineBasicBlock &MBB : MF) |
| 400 | for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) { |
| 401 | --MBBI; |
| 402 | for (MachineOperand &MO : MBBI->operands()) { |
| 403 | if (!MO.isReg()) |
| 404 | continue; |
| 405 | |
| 406 | switch (MO.getReg()) { |
| 407 | case PPC::FP: |
| 408 | MO.setReg(FPReg); |
| 409 | break; |
| 410 | case PPC::FP8: |
| 411 | MO.setReg(FP8Reg); |
| 412 | break; |
| 413 | case PPC::BP: |
| 414 | MO.setReg(BPReg); |
| 415 | break; |
| 416 | case PPC::BP8: |
| 417 | MO.setReg(BP8Reg); |
| 418 | break; |
| 419 | |
| 420 | } |
| 421 | } |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | /* This function will do the following: |
| 426 | - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 |
| 427 | respectively (defaults recommended by the ABI) and return true |
| 428 | - If MBB is not an entry block, initialize the register scavenger and look |
| 429 | for available registers. |
| 430 | - If the defaults (R0/R12) are available, return true |
| 431 | - If TwoUniqueRegsRequired is set to true, it looks for two unique |
| 432 | registers. Otherwise, look for a single available register. |
| 433 | - If the required registers are found, set SR1 and SR2 and return true. |
| 434 | - If the required registers are not found, set SR2 or both SR1 and SR2 to |
| 435 | PPC::NoRegister and return false. |
| 436 | |
| 437 | Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired |
| 438 | is not set, this function will attempt to find two different registers, but |
| 439 | still return true if only one register is available (and set SR1 == SR2). |
| 440 | */ |
| 441 | bool |
| 442 | PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, |
| 443 | bool UseAtEnd, |
| 444 | bool TwoUniqueRegsRequired, |
| 445 | Register *SR1, |
| 446 | Register *SR2) const { |
| 447 | RegScavenger RS; |
| 448 | Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; |
| 449 | Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; |
| 450 | |
| 451 | // Set the defaults for the two scratch registers. |
| 452 | if (SR1) |
| 453 | *SR1 = R0; |
| 454 | |
| 455 | if (SR2) { |
| 456 | assert (SR1 && "Asking for the second scratch register but not the first?" ); |
| 457 | *SR2 = R12; |
| 458 | } |
| 459 | |
| 460 | // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. |
| 461 | if ((UseAtEnd && MBB->isReturnBlock()) || |
| 462 | (!UseAtEnd && (&MBB->getParent()->front() == MBB))) |
| 463 | return true; |
| 464 | |
| 465 | if (UseAtEnd) { |
| 466 | // The scratch register will be used before the first terminator (or at the |
| 467 | // end of the block if there are no terminators). |
| 468 | MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); |
| 469 | if (MBBI == MBB->begin()) { |
| 470 | RS.enterBasicBlock(MBB&: *MBB); |
| 471 | } else { |
| 472 | RS.enterBasicBlockEnd(MBB&: *MBB); |
| 473 | RS.backward(I: MBBI); |
| 474 | } |
| 475 | } else { |
| 476 | // The scratch register will be used at the start of the block. |
| 477 | RS.enterBasicBlock(MBB&: *MBB); |
| 478 | } |
| 479 | |
| 480 | // If the two registers are available, we're all good. |
| 481 | // Note that we only return here if both R0 and R12 are available because |
| 482 | // although the function may not require two unique registers, it may benefit |
| 483 | // from having two so we should try to provide them. |
| 484 | if (!RS.isRegUsed(Reg: R0) && !RS.isRegUsed(Reg: R12)) |
| 485 | return true; |
| 486 | |
| 487 | // Get the list of callee-saved registers for the target. |
| 488 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 489 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF: MBB->getParent()); |
| 490 | |
| 491 | // Get all the available registers in the block. |
| 492 | BitVector BV = RS.getRegsAvailable(RC: Subtarget.isPPC64() ? &PPC::G8RCRegClass : |
| 493 | &PPC::GPRCRegClass); |
| 494 | |
| 495 | // We shouldn't use callee-saved registers as scratch registers as they may be |
| 496 | // available when looking for a candidate block for shrink wrapping but not |
| 497 | // available when the actual prologue/epilogue is being emitted because they |
| 498 | // were added as live-in to the prologue block by PrologueEpilogueInserter. |
| 499 | for (int i = 0; CSRegs[i]; ++i) |
| 500 | BV.reset(Idx: CSRegs[i]); |
| 501 | |
| 502 | // Set the first scratch register to the first available one. |
| 503 | if (SR1) { |
| 504 | int FirstScratchReg = BV.find_first(); |
| 505 | *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; |
| 506 | } |
| 507 | |
| 508 | // If there is another one available, set the second scratch register to that. |
| 509 | // Otherwise, set it to either PPC::NoRegister if this function requires two |
| 510 | // or to whatever SR1 is set to if this function doesn't require two. |
| 511 | if (SR2) { |
| 512 | int SecondScratchReg = BV.find_next(Prev: *SR1); |
| 513 | if (SecondScratchReg != -1) |
| 514 | *SR2 = SecondScratchReg; |
| 515 | else |
| 516 | *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; |
| 517 | } |
| 518 | |
| 519 | // Now that we've done our best to provide both registers, double check |
| 520 | // whether we were unable to provide enough. |
| 521 | if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) |
| 522 | return false; |
| 523 | |
| 524 | return true; |
| 525 | } |
| 526 | |
| 527 | // We need a scratch register for spilling LR and for spilling CR. By default, |
| 528 | // we use two scratch registers to hide latency. However, if only one scratch |
| 529 | // register is available, we can adjust for that by not overlapping the spill |
| 530 | // code. However, if we need to realign the stack (i.e. have a base pointer) |
| 531 | // and the stack frame is large, we need two scratch registers. |
| 532 | // Also, stack probe requires two scratch registers, one for old sp, one for |
| 533 | // large frame and large probe size. |
| 534 | bool |
| 535 | PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { |
| 536 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 537 | MachineFunction &MF = *(MBB->getParent()); |
| 538 | bool HasBP = RegInfo->hasBasePointer(MF); |
| 539 | unsigned FrameSize = determineFrameLayout(MF); |
| 540 | int NegFrameSize = -FrameSize; |
| 541 | bool IsLargeFrame = !isInt<16>(x: NegFrameSize); |
| 542 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 543 | Align MaxAlign = MFI.getMaxAlign(); |
| 544 | bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); |
| 545 | const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); |
| 546 | |
| 547 | return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || |
| 548 | TLI.hasInlineStackProbe(MF); |
| 549 | } |
| 550 | |
| 551 | bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { |
| 552 | MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); |
| 553 | |
| 554 | return findScratchRegister(MBB: TmpMBB, UseAtEnd: false, |
| 555 | TwoUniqueRegsRequired: twoUniqueScratchRegsRequired(MBB: TmpMBB)); |
| 556 | } |
| 557 | |
| 558 | bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { |
| 559 | MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); |
| 560 | |
| 561 | return findScratchRegister(MBB: TmpMBB, UseAtEnd: true); |
| 562 | } |
| 563 | |
| 564 | bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { |
| 565 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 566 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
| 567 | |
| 568 | // Abort if there is no register info or function info. |
| 569 | if (!RegInfo || !FI) |
| 570 | return false; |
| 571 | |
| 572 | // Only move the stack update on ELFv2 ABI and PPC64. |
| 573 | if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) |
| 574 | return false; |
| 575 | |
| 576 | // Check the frame size first and return false if it does not fit the |
| 577 | // requirements. |
| 578 | // We need a non-zero frame size as well as a frame that will fit in the red |
| 579 | // zone. This is because by moving the stack pointer update we are now storing |
| 580 | // to the red zone until the stack pointer is updated. If we get an interrupt |
| 581 | // inside the prologue but before the stack update we now have a number of |
| 582 | // stores to the red zone and those stores must all fit. |
| 583 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 584 | unsigned FrameSize = MFI.getStackSize(); |
| 585 | if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) |
| 586 | return false; |
| 587 | |
| 588 | // Frame pointers and base pointers complicate matters so don't do anything |
| 589 | // if we have them. For example having a frame pointer will sometimes require |
| 590 | // a copy of r1 into r31 and that makes keeping track of updates to r1 more |
| 591 | // difficult. Similar situation exists with setjmp. |
| 592 | if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) |
| 593 | return false; |
| 594 | |
| 595 | // Calls to fast_cc functions use different rules for passing parameters on |
| 596 | // the stack from the ABI and using PIC base in the function imposes |
| 597 | // similar restrictions to using the base pointer. It is not generally safe |
| 598 | // to move the stack pointer update in these situations. |
| 599 | if (FI->hasFastCall() || FI->usesPICBase()) |
| 600 | return false; |
| 601 | |
| 602 | // Finally we can move the stack update if we do not require register |
| 603 | // scavenging. Register scavenging can introduce more spills and so |
| 604 | // may make the frame size larger than we have computed. |
| 605 | return !RegInfo->requiresFrameIndexScavenging(MF); |
| 606 | } |
| 607 | |
| 608 | void PPCFrameLowering::emitPrologue(MachineFunction &MF, |
| 609 | MachineBasicBlock &MBB) const { |
| 610 | MachineBasicBlock::iterator MBBI = MBB.begin(); |
| 611 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 612 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
| 613 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 614 | const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); |
| 615 | |
| 616 | const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); |
| 617 | DebugLoc dl; |
| 618 | // AIX assembler does not support cfi directives. |
| 619 | const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); |
| 620 | |
| 621 | const bool HasFastMFLR = Subtarget.hasFastMFLR(); |
| 622 | |
| 623 | // Get processor type. |
| 624 | bool isPPC64 = Subtarget.isPPC64(); |
| 625 | // Get the ABI. |
| 626 | bool isSVR4ABI = Subtarget.isSVR4ABI(); |
| 627 | bool isELFv2ABI = Subtarget.isELFv2ABI(); |
| 628 | assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI." ); |
| 629 | |
| 630 | // Work out frame sizes. |
| 631 | uint64_t FrameSize = determineFrameLayoutAndUpdate(MF); |
| 632 | int64_t NegFrameSize = -FrameSize; |
| 633 | if (!isPPC64 && (!isInt<32>(x: FrameSize) || !isInt<32>(x: NegFrameSize))) |
| 634 | llvm_unreachable("Unhandled stack size!" ); |
| 635 | |
| 636 | if (MFI.isFrameAddressTaken()) |
| 637 | replaceFPWithRealFP(MF); |
| 638 | |
| 639 | // Check if the link register (LR) must be saved. |
| 640 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
| 641 | bool MustSaveLR = FI->mustSaveLR(); |
| 642 | bool MustSaveTOC = FI->mustSaveTOC(); |
| 643 | const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); |
| 644 | bool MustSaveCR = !MustSaveCRs.empty(); |
| 645 | // Do we have a frame pointer and/or base pointer for this function? |
| 646 | bool HasFP = hasFP(MF); |
| 647 | bool HasBP = RegInfo->hasBasePointer(MF); |
| 648 | bool HasRedZone = isPPC64 || !isSVR4ABI; |
| 649 | const bool HasROPProtect = Subtarget.hasROPProtect(); |
| 650 | bool HasPrivileged = Subtarget.hasPrivileged(); |
| 651 | |
| 652 | Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; |
| 653 | Register BPReg = RegInfo->getBaseRegister(MF); |
| 654 | Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; |
| 655 | Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; |
| 656 | Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; |
| 657 | Register ScratchReg; |
| 658 | Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg |
| 659 | // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) |
| 660 | const MCInstrDesc& MFLRInst = TII.get(Opcode: isPPC64 ? PPC::MFLR8 |
| 661 | : PPC::MFLR ); |
| 662 | const MCInstrDesc& StoreInst = TII.get(Opcode: isPPC64 ? PPC::STD |
| 663 | : PPC::STW ); |
| 664 | const MCInstrDesc& StoreUpdtInst = TII.get(Opcode: isPPC64 ? PPC::STDU |
| 665 | : PPC::STWU ); |
| 666 | const MCInstrDesc& StoreUpdtIdxInst = TII.get(Opcode: isPPC64 ? PPC::STDUX |
| 667 | : PPC::STWUX); |
| 668 | const MCInstrDesc& OrInst = TII.get(Opcode: isPPC64 ? PPC::OR8 |
| 669 | : PPC::OR ); |
| 670 | const MCInstrDesc& SubtractCarryingInst = TII.get(Opcode: isPPC64 ? PPC::SUBFC8 |
| 671 | : PPC::SUBFC); |
| 672 | const MCInstrDesc& SubtractImmCarryingInst = TII.get(Opcode: isPPC64 ? PPC::SUBFIC8 |
| 673 | : PPC::SUBFIC); |
| 674 | const MCInstrDesc &MoveFromCondRegInst = TII.get(Opcode: isPPC64 ? PPC::MFCR8 |
| 675 | : PPC::MFCR); |
| 676 | const MCInstrDesc &StoreWordInst = TII.get(Opcode: isPPC64 ? PPC::STW8 : PPC::STW); |
| 677 | const MCInstrDesc &HashST = |
| 678 | TII.get(Opcode: isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8) |
| 679 | : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST)); |
| 680 | |
| 681 | // Regarding this assert: Even though LR is saved in the caller's frame (i.e., |
| 682 | // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no |
| 683 | // Red Zone, an asynchronous event (a form of "callee") could claim a frame & |
| 684 | // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. |
| 685 | assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && |
| 686 | "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4." ); |
| 687 | |
| 688 | // Using the same bool variable as below to suppress compiler warnings. |
| 689 | bool SingleScratchReg = findScratchRegister( |
| 690 | MBB: &MBB, UseAtEnd: false, TwoUniqueRegsRequired: twoUniqueScratchRegsRequired(MBB: &MBB), SR1: &ScratchReg, SR2: &TempReg); |
| 691 | assert(SingleScratchReg && |
| 692 | "Required number of registers not available in this block" ); |
| 693 | |
| 694 | SingleScratchReg = ScratchReg == TempReg; |
| 695 | |
| 696 | int64_t LROffset = getReturnSaveOffset(); |
| 697 | |
| 698 | int64_t FPOffset = 0; |
| 699 | if (HasFP) { |
| 700 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 701 | int FPIndex = FI->getFramePointerSaveIndex(); |
| 702 | assert(FPIndex && "No Frame Pointer Save Slot!" ); |
| 703 | FPOffset = MFI.getObjectOffset(ObjectIdx: FPIndex); |
| 704 | } |
| 705 | |
| 706 | int64_t BPOffset = 0; |
| 707 | if (HasBP) { |
| 708 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 709 | int BPIndex = FI->getBasePointerSaveIndex(); |
| 710 | assert(BPIndex && "No Base Pointer Save Slot!" ); |
| 711 | BPOffset = MFI.getObjectOffset(ObjectIdx: BPIndex); |
| 712 | } |
| 713 | |
| 714 | int64_t PBPOffset = 0; |
| 715 | if (FI->usesPICBase()) { |
| 716 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 717 | int PBPIndex = FI->getPICBasePointerSaveIndex(); |
| 718 | assert(PBPIndex && "No PIC Base Pointer Save Slot!" ); |
| 719 | PBPOffset = MFI.getObjectOffset(ObjectIdx: PBPIndex); |
| 720 | } |
| 721 | |
| 722 | // Get stack alignments. |
| 723 | Align MaxAlign = MFI.getMaxAlign(); |
| 724 | if (HasBP && MaxAlign > 1) |
| 725 | assert(Log2(MaxAlign) < 16 && "Invalid alignment!" ); |
| 726 | |
| 727 | // Frames of 32KB & larger require special handling because they cannot be |
| 728 | // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. |
| 729 | bool isLargeFrame = !isInt<16>(x: NegFrameSize); |
| 730 | |
| 731 | // Check if we can move the stack update instruction (stdu) down the prologue |
| 732 | // past the callee saves. Hopefully this will avoid the situation where the |
| 733 | // saves are waiting for the update on the store with update to complete. |
| 734 | MachineBasicBlock::iterator StackUpdateLoc = MBBI; |
| 735 | bool MovingStackUpdateDown = false; |
| 736 | |
| 737 | // Check if we can move the stack update. |
| 738 | if (stackUpdateCanBeMoved(MF)) { |
| 739 | const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); |
| 740 | for (CalleeSavedInfo CSI : Info) { |
| 741 | // If the callee saved register is spilled to a register instead of the |
| 742 | // stack then the spill no longer uses the stack pointer. |
| 743 | // This can lead to two consequences: |
| 744 | // 1) We no longer need to update the stack because the function does not |
| 745 | // spill any callee saved registers to stack. |
| 746 | // 2) We have a situation where we still have to update the stack pointer |
| 747 | // even though some registers are spilled to other registers. In |
| 748 | // this case the current code moves the stack update to an incorrect |
| 749 | // position. |
| 750 | // In either case we should abort moving the stack update operation. |
| 751 | if (CSI.isSpilledToReg()) { |
| 752 | StackUpdateLoc = MBBI; |
| 753 | MovingStackUpdateDown = false; |
| 754 | break; |
| 755 | } |
| 756 | |
| 757 | int FrIdx = CSI.getFrameIdx(); |
| 758 | // If the frame index is not negative the callee saved info belongs to a |
| 759 | // stack object that is not a fixed stack object. We ignore non-fixed |
| 760 | // stack objects because we won't move the stack update pointer past them. |
| 761 | if (FrIdx >= 0) |
| 762 | continue; |
| 763 | |
| 764 | if (MFI.isFixedObjectIndex(ObjectIdx: FrIdx) && MFI.getObjectOffset(ObjectIdx: FrIdx) < 0) { |
| 765 | StackUpdateLoc++; |
| 766 | MovingStackUpdateDown = true; |
| 767 | } else { |
| 768 | // We need all of the Frame Indices to meet these conditions. |
| 769 | // If they do not, abort the whole operation. |
| 770 | StackUpdateLoc = MBBI; |
| 771 | MovingStackUpdateDown = false; |
| 772 | break; |
| 773 | } |
| 774 | } |
| 775 | |
| 776 | // If the operation was not aborted then update the object offset. |
| 777 | if (MovingStackUpdateDown) { |
| 778 | for (CalleeSavedInfo CSI : Info) { |
| 779 | int FrIdx = CSI.getFrameIdx(); |
| 780 | if (FrIdx < 0) |
| 781 | MFI.setObjectOffset(ObjectIdx: FrIdx, SPOffset: MFI.getObjectOffset(ObjectIdx: FrIdx) + NegFrameSize); |
| 782 | } |
| 783 | } |
| 784 | } |
| 785 | |
| 786 | // Where in the prologue we move the CR fields depends on how many scratch |
| 787 | // registers we have, and if we need to save the link register or not. This |
| 788 | // lambda is to avoid duplicating the logic in 2 places. |
| 789 | auto BuildMoveFromCR = [&]() { |
| 790 | if (isELFv2ABI && MustSaveCRs.size() == 1) { |
| 791 | // In the ELFv2 ABI, we are not required to save all CR fields. |
| 792 | // If only one CR field is clobbered, it is more efficient to use |
| 793 | // mfocrf to selectively save just that field, because mfocrf has short |
| 794 | // latency compares to mfcr. |
| 795 | assert(isPPC64 && "V2 ABI is 64-bit only." ); |
| 796 | MachineInstrBuilder MIB = |
| 797 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::MFOCRF8), DestReg: TempReg); |
| 798 | MIB.addReg(RegNo: MustSaveCRs[0], flags: RegState::Kill); |
| 799 | } else { |
| 800 | MachineInstrBuilder MIB = |
| 801 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: MoveFromCondRegInst, DestReg: TempReg); |
| 802 | for (unsigned CRfield : MustSaveCRs) |
| 803 | MIB.addReg(RegNo: CRfield, flags: RegState::ImplicitKill); |
| 804 | } |
| 805 | }; |
| 806 | |
| 807 | // If we need to spill the CR and the LR but we don't have two separate |
| 808 | // registers available, we must spill them one at a time |
| 809 | if (MustSaveCR && SingleScratchReg && MustSaveLR) { |
| 810 | BuildMoveFromCR(); |
| 811 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreWordInst) |
| 812 | .addReg(RegNo: TempReg, flags: getKillRegState(B: true)) |
| 813 | .addImm(Val: CRSaveOffset) |
| 814 | .addReg(RegNo: SPReg); |
| 815 | } |
| 816 | |
| 817 | if (MustSaveLR) |
| 818 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: MFLRInst, DestReg: ScratchReg); |
| 819 | |
| 820 | if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) |
| 821 | BuildMoveFromCR(); |
| 822 | |
| 823 | if (HasRedZone) { |
| 824 | if (HasFP) |
| 825 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
| 826 | .addReg(RegNo: FPReg) |
| 827 | .addImm(Val: FPOffset) |
| 828 | .addReg(RegNo: SPReg); |
| 829 | if (FI->usesPICBase()) |
| 830 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
| 831 | .addReg(RegNo: PPC::R30) |
| 832 | .addImm(Val: PBPOffset) |
| 833 | .addReg(RegNo: SPReg); |
| 834 | if (HasBP) |
| 835 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
| 836 | .addReg(RegNo: BPReg) |
| 837 | .addImm(Val: BPOffset) |
| 838 | .addReg(RegNo: SPReg); |
| 839 | } |
| 840 | |
| 841 | // Generate the instruction to store the LR. In the case where ROP protection |
| 842 | // is required the register holding the LR should not be killed as it will be |
| 843 | // used by the hash store instruction. |
| 844 | auto SaveLR = [&](int64_t Offset) { |
| 845 | assert(MustSaveLR && "LR is not required to be saved!" ); |
| 846 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: StoreInst) |
| 847 | .addReg(RegNo: ScratchReg, flags: getKillRegState(B: !HasROPProtect)) |
| 848 | .addImm(Val: Offset) |
| 849 | .addReg(RegNo: SPReg); |
| 850 | |
| 851 | // Add the ROP protection Hash Store instruction. |
| 852 | // NOTE: This is technically a violation of the ABI. The hash can be saved |
| 853 | // up to 512 bytes into the Protected Zone. This can be outside of the |
| 854 | // initial 288 byte volatile program storage region in the Protected Zone. |
| 855 | // However, this restriction will be removed in an upcoming revision of the |
| 856 | // ABI. |
| 857 | if (HasROPProtect) { |
| 858 | const int SaveIndex = FI->getROPProtectionHashSaveIndex(); |
| 859 | const int64_t ImmOffset = MFI.getObjectOffset(ObjectIdx: SaveIndex); |
| 860 | assert((ImmOffset <= -8 && ImmOffset >= -512) && |
| 861 | "ROP hash save offset out of range." ); |
| 862 | assert(((ImmOffset & 0x7) == 0) && |
| 863 | "ROP hash save offset must be 8 byte aligned." ); |
| 864 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: HashST) |
| 865 | .addReg(RegNo: ScratchReg, flags: getKillRegState(B: true)) |
| 866 | .addImm(Val: ImmOffset) |
| 867 | .addReg(RegNo: SPReg); |
| 868 | } |
| 869 | }; |
| 870 | |
| 871 | if (MustSaveLR && HasFastMFLR) |
| 872 | SaveLR(LROffset); |
| 873 | |
| 874 | if (MustSaveCR && |
| 875 | !(SingleScratchReg && MustSaveLR)) { |
| 876 | assert(HasRedZone && "A red zone is always available on PPC64" ); |
| 877 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreWordInst) |
| 878 | .addReg(RegNo: TempReg, flags: getKillRegState(B: true)) |
| 879 | .addImm(Val: CRSaveOffset) |
| 880 | .addReg(RegNo: SPReg); |
| 881 | } |
| 882 | |
| 883 | // Skip the rest if this is a leaf function & all spills fit in the Red Zone. |
| 884 | if (!FrameSize) { |
| 885 | if (MustSaveLR && !HasFastMFLR) |
| 886 | SaveLR(LROffset); |
| 887 | return; |
| 888 | } |
| 889 | |
| 890 | // Adjust stack pointer: r1 += NegFrameSize. |
| 891 | // If there is a preferred stack alignment, align R1 now |
| 892 | |
| 893 | if (HasBP && HasRedZone) { |
| 894 | // Save a copy of r1 as the base pointer. |
| 895 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: BPReg) |
| 896 | .addReg(RegNo: SPReg) |
| 897 | .addReg(RegNo: SPReg); |
| 898 | } |
| 899 | |
| 900 | // Have we generated a STUX instruction to claim stack frame? If so, |
| 901 | // the negated frame size will be placed in ScratchReg. |
| 902 | bool HasSTUX = |
| 903 | (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) || |
| 904 | (HasBP && MaxAlign > 1) || isLargeFrame; |
| 905 | |
| 906 | // If we use STUX to update the stack pointer, we need the two scratch |
| 907 | // registers TempReg and ScratchReg, we have to save LR here which is stored |
| 908 | // in ScratchReg. |
| 909 | // If the offset can not be encoded into the store instruction, we also have |
| 910 | // to save LR here. |
| 911 | // If we are using ROP Protection we need to save the LR here as we cannot |
| 912 | // move the hashst instruction past the point where we get the stack frame. |
| 913 | if (MustSaveLR && !HasFastMFLR && |
| 914 | (HasSTUX || !isInt<16>(x: FrameSize + LROffset) || HasROPProtect)) |
| 915 | SaveLR(LROffset); |
| 916 | |
| 917 | // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain |
| 918 | // pointer is always stored at SP, we will get a free probe due to an essential |
| 919 | // STU(X) instruction. |
| 920 | if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { |
| 921 | // To be consistent with other targets, a pseudo instruction is emitted and |
| 922 | // will be later expanded in `inlineStackProbe`. |
| 923 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, |
| 924 | MCID: TII.get(Opcode: isPPC64 ? PPC::PROBED_STACKALLOC_64 |
| 925 | : PPC::PROBED_STACKALLOC_32)) |
| 926 | .addDef(RegNo: TempReg) |
| 927 | .addDef(RegNo: ScratchReg) // ScratchReg stores the old sp. |
| 928 | .addImm(Val: NegFrameSize); |
| 929 | // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we |
| 930 | // update the ScratchReg to meet the assumption that ScratchReg contains |
| 931 | // the NegFrameSize. This solution is rather tricky. |
| 932 | if (!HasRedZone) { |
| 933 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::SUBF), DestReg: ScratchReg) |
| 934 | .addReg(RegNo: ScratchReg) |
| 935 | .addReg(RegNo: SPReg); |
| 936 | } |
| 937 | } else { |
| 938 | // This condition must be kept in sync with canUseAsPrologue. |
| 939 | if (HasBP && MaxAlign > 1) { |
| 940 | if (isPPC64) |
| 941 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::RLDICL), DestReg: ScratchReg) |
| 942 | .addReg(RegNo: SPReg) |
| 943 | .addImm(Val: 0) |
| 944 | .addImm(Val: 64 - Log2(A: MaxAlign)); |
| 945 | else // PPC32... |
| 946 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::RLWINM), DestReg: ScratchReg) |
| 947 | .addReg(RegNo: SPReg) |
| 948 | .addImm(Val: 0) |
| 949 | .addImm(Val: 32 - Log2(A: MaxAlign)) |
| 950 | .addImm(Val: 31); |
| 951 | if (!isLargeFrame) { |
| 952 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: SubtractImmCarryingInst, DestReg: ScratchReg) |
| 953 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
| 954 | .addImm(Val: NegFrameSize); |
| 955 | } else { |
| 956 | assert(!SingleScratchReg && "Only a single scratch reg available" ); |
| 957 | TII.materializeImmPostRA(MBB, MBBI, DL: dl, Reg: TempReg, Imm: NegFrameSize); |
| 958 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: SubtractCarryingInst, DestReg: ScratchReg) |
| 959 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
| 960 | .addReg(RegNo: TempReg, flags: RegState::Kill); |
| 961 | } |
| 962 | |
| 963 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreUpdtIdxInst, DestReg: SPReg) |
| 964 | .addReg(RegNo: SPReg, flags: RegState::Kill) |
| 965 | .addReg(RegNo: SPReg) |
| 966 | .addReg(RegNo: ScratchReg); |
| 967 | } else if (!isLargeFrame) { |
| 968 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: StoreUpdtInst, DestReg: SPReg) |
| 969 | .addReg(RegNo: SPReg) |
| 970 | .addImm(Val: NegFrameSize) |
| 971 | .addReg(RegNo: SPReg); |
| 972 | } else { |
| 973 | TII.materializeImmPostRA(MBB, MBBI, DL: dl, Reg: ScratchReg, Imm: NegFrameSize); |
| 974 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreUpdtIdxInst, DestReg: SPReg) |
| 975 | .addReg(RegNo: SPReg, flags: RegState::Kill) |
| 976 | .addReg(RegNo: SPReg) |
| 977 | .addReg(RegNo: ScratchReg); |
| 978 | } |
| 979 | } |
| 980 | |
| 981 | // Save the TOC register after the stack pointer update if a prologue TOC |
| 982 | // save is required for the function. |
| 983 | if (MustSaveTOC) { |
| 984 | assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2" ); |
| 985 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: TII.get(Opcode: PPC::STD)) |
| 986 | .addReg(RegNo: TOCReg, flags: getKillRegState(B: true)) |
| 987 | .addImm(Val: TOCSaveOffset) |
| 988 | .addReg(RegNo: SPReg); |
| 989 | } |
| 990 | |
| 991 | if (!HasRedZone) { |
| 992 | assert(!isPPC64 && "A red zone is always available on PPC64" ); |
| 993 | if (HasSTUX) { |
| 994 | // The negated frame size is in ScratchReg, and the SPReg has been |
| 995 | // decremented by the frame size: SPReg = old SPReg + ScratchReg. |
| 996 | // Since FPOffset, PBPOffset, etc. are relative to the beginning of |
| 997 | // the stack frame (i.e. the old SP), ideally, we would put the old |
| 998 | // SP into a register and use it as the base for the stores. The |
| 999 | // problem is that the only available register may be ScratchReg, |
| 1000 | // which could be R0, and R0 cannot be used as a base address. |
| 1001 | |
| 1002 | // First, set ScratchReg to the old SP. This may need to be modified |
| 1003 | // later. |
| 1004 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::SUBF), DestReg: ScratchReg) |
| 1005 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
| 1006 | .addReg(RegNo: SPReg); |
| 1007 | |
| 1008 | if (ScratchReg == PPC::R0) { |
| 1009 | // R0 cannot be used as a base register, but it can be used as an |
| 1010 | // index in a store-indexed. |
| 1011 | int LastOffset = 0; |
| 1012 | if (HasFP) { |
| 1013 | // R0 += (FPOffset-LastOffset). |
| 1014 | // Need addic, since addi treats R0 as 0. |
| 1015 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::ADDIC), DestReg: ScratchReg) |
| 1016 | .addReg(RegNo: ScratchReg) |
| 1017 | .addImm(Val: FPOffset-LastOffset); |
| 1018 | LastOffset = FPOffset; |
| 1019 | // Store FP into *R0. |
| 1020 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::STWX)) |
| 1021 | .addReg(RegNo: FPReg, flags: RegState::Kill) // Save FP. |
| 1022 | .addReg(RegNo: PPC::ZERO) |
| 1023 | .addReg(RegNo: ScratchReg); // This will be the index (R0 is ok here). |
| 1024 | } |
| 1025 | if (FI->usesPICBase()) { |
| 1026 | // R0 += (PBPOffset-LastOffset). |
| 1027 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::ADDIC), DestReg: ScratchReg) |
| 1028 | .addReg(RegNo: ScratchReg) |
| 1029 | .addImm(Val: PBPOffset-LastOffset); |
| 1030 | LastOffset = PBPOffset; |
| 1031 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::STWX)) |
| 1032 | .addReg(RegNo: PPC::R30, flags: RegState::Kill) // Save PIC base pointer. |
| 1033 | .addReg(RegNo: PPC::ZERO) |
| 1034 | .addReg(RegNo: ScratchReg); // This will be the index (R0 is ok here). |
| 1035 | } |
| 1036 | if (HasBP) { |
| 1037 | // R0 += (BPOffset-LastOffset). |
| 1038 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::ADDIC), DestReg: ScratchReg) |
| 1039 | .addReg(RegNo: ScratchReg) |
| 1040 | .addImm(Val: BPOffset-LastOffset); |
| 1041 | LastOffset = BPOffset; |
| 1042 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::STWX)) |
| 1043 | .addReg(RegNo: BPReg, flags: RegState::Kill) // Save BP. |
| 1044 | .addReg(RegNo: PPC::ZERO) |
| 1045 | .addReg(RegNo: ScratchReg); // This will be the index (R0 is ok here). |
| 1046 | // BP = R0-LastOffset |
| 1047 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::ADDIC), DestReg: BPReg) |
| 1048 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
| 1049 | .addImm(Val: -LastOffset); |
| 1050 | } |
| 1051 | } else { |
| 1052 | // ScratchReg is not R0, so use it as the base register. It is |
| 1053 | // already set to the old SP, so we can use the offsets directly. |
| 1054 | |
| 1055 | // Now that the stack frame has been allocated, save all the necessary |
| 1056 | // registers using ScratchReg as the base address. |
| 1057 | if (HasFP) |
| 1058 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
| 1059 | .addReg(RegNo: FPReg) |
| 1060 | .addImm(Val: FPOffset) |
| 1061 | .addReg(RegNo: ScratchReg); |
| 1062 | if (FI->usesPICBase()) |
| 1063 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
| 1064 | .addReg(RegNo: PPC::R30) |
| 1065 | .addImm(Val: PBPOffset) |
| 1066 | .addReg(RegNo: ScratchReg); |
| 1067 | if (HasBP) { |
| 1068 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
| 1069 | .addReg(RegNo: BPReg) |
| 1070 | .addImm(Val: BPOffset) |
| 1071 | .addReg(RegNo: ScratchReg); |
| 1072 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: BPReg) |
| 1073 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
| 1074 | .addReg(RegNo: ScratchReg); |
| 1075 | } |
| 1076 | } |
| 1077 | } else { |
| 1078 | // The frame size is a known 16-bit constant (fitting in the immediate |
| 1079 | // field of STWU). To be here we have to be compiling for PPC32. |
| 1080 | // Since the SPReg has been decreased by FrameSize, add it back to each |
| 1081 | // offset. |
| 1082 | if (HasFP) |
| 1083 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
| 1084 | .addReg(RegNo: FPReg) |
| 1085 | .addImm(Val: FrameSize + FPOffset) |
| 1086 | .addReg(RegNo: SPReg); |
| 1087 | if (FI->usesPICBase()) |
| 1088 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
| 1089 | .addReg(RegNo: PPC::R30) |
| 1090 | .addImm(Val: FrameSize + PBPOffset) |
| 1091 | .addReg(RegNo: SPReg); |
| 1092 | if (HasBP) { |
| 1093 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: StoreInst) |
| 1094 | .addReg(RegNo: BPReg) |
| 1095 | .addImm(Val: FrameSize + BPOffset) |
| 1096 | .addReg(RegNo: SPReg); |
| 1097 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::ADDI), DestReg: BPReg) |
| 1098 | .addReg(RegNo: SPReg) |
| 1099 | .addImm(Val: FrameSize); |
| 1100 | } |
| 1101 | } |
| 1102 | } |
| 1103 | |
| 1104 | // Save the LR now. |
| 1105 | if (!HasSTUX && MustSaveLR && !HasFastMFLR && |
| 1106 | isInt<16>(x: FrameSize + LROffset) && !HasROPProtect) |
| 1107 | SaveLR(LROffset + FrameSize); |
| 1108 | |
| 1109 | // Add Call Frame Information for the instructions we generated above. |
| 1110 | if (needsCFI) { |
| 1111 | unsigned CFIIndex; |
| 1112 | |
| 1113 | if (HasBP) { |
| 1114 | // Define CFA in terms of BP. Do this in preference to using FP/SP, |
| 1115 | // because if the stack needed aligning then CFA won't be at a fixed |
| 1116 | // offset from FP/SP. |
| 1117 | unsigned Reg = MRI->getDwarfRegNum(RegNum: BPReg, isEH: true); |
| 1118 | CFIIndex = MF.addFrameInst( |
| 1119 | Inst: MCCFIInstruction::createDefCfaRegister(L: nullptr, Register: Reg)); |
| 1120 | } else { |
| 1121 | // Adjust the definition of CFA to account for the change in SP. |
| 1122 | assert(NegFrameSize); |
| 1123 | CFIIndex = MF.addFrameInst( |
| 1124 | Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: -NegFrameSize)); |
| 1125 | } |
| 1126 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
| 1127 | .addCFIIndex(CFIIndex); |
| 1128 | |
| 1129 | if (HasFP) { |
| 1130 | // Describe where FP was saved, at a fixed offset from CFA. |
| 1131 | unsigned Reg = MRI->getDwarfRegNum(RegNum: FPReg, isEH: true); |
| 1132 | CFIIndex = MF.addFrameInst( |
| 1133 | Inst: MCCFIInstruction::createOffset(L: nullptr, Register: Reg, Offset: FPOffset)); |
| 1134 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
| 1135 | .addCFIIndex(CFIIndex); |
| 1136 | } |
| 1137 | |
| 1138 | if (FI->usesPICBase()) { |
| 1139 | // Describe where FP was saved, at a fixed offset from CFA. |
| 1140 | unsigned Reg = MRI->getDwarfRegNum(RegNum: PPC::R30, isEH: true); |
| 1141 | CFIIndex = MF.addFrameInst( |
| 1142 | Inst: MCCFIInstruction::createOffset(L: nullptr, Register: Reg, Offset: PBPOffset)); |
| 1143 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
| 1144 | .addCFIIndex(CFIIndex); |
| 1145 | } |
| 1146 | |
| 1147 | if (HasBP) { |
| 1148 | // Describe where BP was saved, at a fixed offset from CFA. |
| 1149 | unsigned Reg = MRI->getDwarfRegNum(RegNum: BPReg, isEH: true); |
| 1150 | CFIIndex = MF.addFrameInst( |
| 1151 | Inst: MCCFIInstruction::createOffset(L: nullptr, Register: Reg, Offset: BPOffset)); |
| 1152 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
| 1153 | .addCFIIndex(CFIIndex); |
| 1154 | } |
| 1155 | |
| 1156 | if (MustSaveLR) { |
| 1157 | // Describe where LR was saved, at a fixed offset from CFA. |
| 1158 | unsigned Reg = MRI->getDwarfRegNum(RegNum: LRReg, isEH: true); |
| 1159 | CFIIndex = MF.addFrameInst( |
| 1160 | Inst: MCCFIInstruction::createOffset(L: nullptr, Register: Reg, Offset: LROffset)); |
| 1161 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
| 1162 | .addCFIIndex(CFIIndex); |
| 1163 | } |
| 1164 | } |
| 1165 | |
| 1166 | // If there is a frame pointer, copy R1 into R31 |
| 1167 | if (HasFP) { |
| 1168 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: FPReg) |
| 1169 | .addReg(RegNo: SPReg) |
| 1170 | .addReg(RegNo: SPReg); |
| 1171 | |
| 1172 | if (!HasBP && needsCFI) { |
| 1173 | // Change the definition of CFA from SP+offset to FP+offset, because SP |
| 1174 | // will change at every alloca. |
| 1175 | unsigned Reg = MRI->getDwarfRegNum(RegNum: FPReg, isEH: true); |
| 1176 | unsigned CFIIndex = MF.addFrameInst( |
| 1177 | Inst: MCCFIInstruction::createDefCfaRegister(L: nullptr, Register: Reg)); |
| 1178 | |
| 1179 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
| 1180 | .addCFIIndex(CFIIndex); |
| 1181 | } |
| 1182 | } |
| 1183 | |
| 1184 | if (needsCFI) { |
| 1185 | // Describe where callee saved registers were saved, at fixed offsets from |
| 1186 | // CFA. |
| 1187 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
| 1188 | for (const CalleeSavedInfo &I : CSI) { |
| 1189 | MCRegister Reg = I.getReg(); |
| 1190 | if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; |
| 1191 | |
| 1192 | // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just |
| 1193 | // subregisters of CR2. We just need to emit a move of CR2. |
| 1194 | if (PPC::CRBITRCRegClass.contains(Reg)) |
| 1195 | continue; |
| 1196 | |
| 1197 | if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) |
| 1198 | continue; |
| 1199 | |
| 1200 | // For 64-bit SVR4 when we have spilled CRs, the spill location |
| 1201 | // is SP+8, not a frame-relative slot. |
| 1202 | if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { |
| 1203 | // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for |
| 1204 | // the whole CR word. In the ELFv2 ABI, every CR that was |
| 1205 | // actually saved gets its own CFI record. |
| 1206 | Register CRReg = isELFv2ABI? Reg : PPC::CR2; |
| 1207 | unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset( |
| 1208 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: CRReg, isEH: true), Offset: CRSaveOffset)); |
| 1209 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
| 1210 | .addCFIIndex(CFIIndex); |
| 1211 | continue; |
| 1212 | } |
| 1213 | |
| 1214 | if (I.isSpilledToReg()) { |
| 1215 | unsigned SpilledReg = I.getDstReg(); |
| 1216 | unsigned CFIRegister = MF.addFrameInst(Inst: MCCFIInstruction::createRegister( |
| 1217 | L: nullptr, Register1: MRI->getDwarfRegNum(RegNum: Reg, isEH: true), |
| 1218 | Register2: MRI->getDwarfRegNum(RegNum: SpilledReg, isEH: true))); |
| 1219 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
| 1220 | .addCFIIndex(CFIIndex: CFIRegister); |
| 1221 | } else { |
| 1222 | int64_t Offset = MFI.getObjectOffset(ObjectIdx: I.getFrameIdx()); |
| 1223 | // We have changed the object offset above but we do not want to change |
| 1224 | // the actual offsets in the CFI instruction so we have to undo the |
| 1225 | // offset change here. |
| 1226 | if (MovingStackUpdateDown) |
| 1227 | Offset -= NegFrameSize; |
| 1228 | |
| 1229 | unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset( |
| 1230 | L: nullptr, Register: MRI->getDwarfRegNum(RegNum: Reg, isEH: true), Offset)); |
| 1231 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
| 1232 | .addCFIIndex(CFIIndex); |
| 1233 | } |
| 1234 | } |
| 1235 | } |
| 1236 | } |
| 1237 | |
| 1238 | void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, |
| 1239 | MachineBasicBlock &PrologMBB) const { |
| 1240 | bool isPPC64 = Subtarget.isPPC64(); |
| 1241 | const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); |
| 1242 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
| 1243 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 1244 | const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); |
| 1245 | // AIX assembler does not support cfi directives. |
| 1246 | const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); |
| 1247 | auto StackAllocMIPos = llvm::find_if(Range&: PrologMBB, P: [](MachineInstr &MI) { |
| 1248 | int Opc = MI.getOpcode(); |
| 1249 | return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; |
| 1250 | }); |
| 1251 | if (StackAllocMIPos == PrologMBB.end()) |
| 1252 | return; |
| 1253 | const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); |
| 1254 | MachineBasicBlock *CurrentMBB = &PrologMBB; |
| 1255 | DebugLoc DL = PrologMBB.findDebugLoc(MBBI: StackAllocMIPos); |
| 1256 | MachineInstr &MI = *StackAllocMIPos; |
| 1257 | int64_t NegFrameSize = MI.getOperand(i: 2).getImm(); |
| 1258 | unsigned ProbeSize = TLI.getStackProbeSize(MF); |
| 1259 | int64_t NegProbeSize = -(int64_t)ProbeSize; |
| 1260 | assert(isInt<32>(NegProbeSize) && "Unhandled probe size" ); |
| 1261 | int64_t NumBlocks = NegFrameSize / NegProbeSize; |
| 1262 | int64_t NegResidualSize = NegFrameSize % NegProbeSize; |
| 1263 | Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; |
| 1264 | Register ScratchReg = MI.getOperand(i: 0).getReg(); |
| 1265 | Register FPReg = MI.getOperand(i: 1).getReg(); |
| 1266 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 1267 | bool HasBP = RegInfo->hasBasePointer(MF); |
| 1268 | Register BPReg = RegInfo->getBaseRegister(MF); |
| 1269 | Align MaxAlign = MFI.getMaxAlign(); |
| 1270 | bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); |
| 1271 | const MCInstrDesc &CopyInst = TII.get(Opcode: isPPC64 ? PPC::OR8 : PPC::OR); |
| 1272 | // Subroutines to generate .cfi_* directives. |
| 1273 | auto buildDefCFAReg = [&](MachineBasicBlock &MBB, |
| 1274 | MachineBasicBlock::iterator MBBI, Register Reg) { |
| 1275 | unsigned RegNum = MRI->getDwarfRegNum(RegNum: Reg, isEH: true); |
| 1276 | unsigned CFIIndex = MF.addFrameInst( |
| 1277 | Inst: MCCFIInstruction::createDefCfaRegister(L: nullptr, Register: RegNum)); |
| 1278 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
| 1279 | .addCFIIndex(CFIIndex); |
| 1280 | }; |
| 1281 | auto buildDefCFA = [&](MachineBasicBlock &MBB, |
| 1282 | MachineBasicBlock::iterator MBBI, Register Reg, |
| 1283 | int Offset) { |
| 1284 | unsigned RegNum = MRI->getDwarfRegNum(RegNum: Reg, isEH: true); |
| 1285 | unsigned CFIIndex = MBB.getParent()->addFrameInst( |
| 1286 | Inst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: RegNum, Offset)); |
| 1287 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
| 1288 | .addCFIIndex(CFIIndex); |
| 1289 | }; |
| 1290 | // Subroutine to determine if we can use the Imm as part of d-form. |
| 1291 | auto CanUseDForm = [](int64_t Imm) { return isInt<16>(x: Imm) && Imm % 4 == 0; }; |
| 1292 | // Subroutine to materialize the Imm into TempReg. |
| 1293 | auto MaterializeImm = [&](MachineBasicBlock &MBB, |
| 1294 | MachineBasicBlock::iterator MBBI, int64_t Imm, |
| 1295 | Register &TempReg) { |
| 1296 | assert(isInt<32>(Imm) && "Unhandled imm" ); |
| 1297 | if (isInt<16>(x: Imm)) |
| 1298 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::LI8 : PPC::LI), DestReg: TempReg) |
| 1299 | .addImm(Val: Imm); |
| 1300 | else { |
| 1301 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::LIS8 : PPC::LIS), DestReg: TempReg) |
| 1302 | .addImm(Val: Imm >> 16); |
| 1303 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::ORI8 : PPC::ORI), DestReg: TempReg) |
| 1304 | .addReg(RegNo: TempReg) |
| 1305 | .addImm(Val: Imm & 0xFFFF); |
| 1306 | } |
| 1307 | }; |
| 1308 | // Subroutine to store frame pointer and decrease stack pointer by probe size. |
| 1309 | auto allocateAndProbe = [&](MachineBasicBlock &MBB, |
| 1310 | MachineBasicBlock::iterator MBBI, int64_t NegSize, |
| 1311 | Register NegSizeReg, bool UseDForm, |
| 1312 | Register StoreReg) { |
| 1313 | if (UseDForm) |
| 1314 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::STDU : PPC::STWU), DestReg: SPReg) |
| 1315 | .addReg(RegNo: StoreReg) |
| 1316 | .addImm(Val: NegSize) |
| 1317 | .addReg(RegNo: SPReg); |
| 1318 | else |
| 1319 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::STDUX : PPC::STWUX), DestReg: SPReg) |
| 1320 | .addReg(RegNo: StoreReg) |
| 1321 | .addReg(RegNo: SPReg) |
| 1322 | .addReg(RegNo: NegSizeReg); |
| 1323 | }; |
| 1324 | // Used to probe stack when realignment is required. |
| 1325 | // Note that, according to ABI's requirement, *sp must always equals the |
| 1326 | // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. |
| 1327 | // Following is pseudo code: |
| 1328 | // final_sp = (sp & align) + negframesize; |
| 1329 | // neg_gap = final_sp - sp; |
| 1330 | // while (neg_gap < negprobesize) { |
| 1331 | // stdu fp, negprobesize(sp); |
| 1332 | // neg_gap -= negprobesize; |
| 1333 | // } |
| 1334 | // stdux fp, sp, neg_gap |
| 1335 | // |
| 1336 | // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg |
| 1337 | // before probe code, we don't need to save it, so we get one additional reg |
| 1338 | // that can be used to materialize the probeside if needed to use xform. |
| 1339 | // Otherwise, we can NOT materialize probeside, so we can only use Dform for |
| 1340 | // now. |
| 1341 | // |
| 1342 | // The allocations are: |
| 1343 | // if (HasBP && HasRedzone) { |
| 1344 | // r0: materialize the probesize if needed so that we can use xform. |
| 1345 | // r12: `neg_gap` |
| 1346 | // } else { |
| 1347 | // r0: back-chain pointer |
| 1348 | // r12: `neg_gap`. |
| 1349 | // } |
| 1350 | auto probeRealignedStack = [&](MachineBasicBlock &MBB, |
| 1351 | MachineBasicBlock::iterator MBBI, |
| 1352 | Register ScratchReg, Register TempReg) { |
| 1353 | assert(HasBP && "The function is supposed to have base pointer when its " |
| 1354 | "stack is realigned." ); |
| 1355 | assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2" ); |
| 1356 | |
| 1357 | // FIXME: We can eliminate this limitation if we get more infomation about |
| 1358 | // which part of redzone are already used. Used redzone can be treated |
| 1359 | // probed. But there might be `holes' in redzone probed, this could |
| 1360 | // complicate the implementation. |
| 1361 | assert(ProbeSize >= Subtarget.getRedZoneSize() && |
| 1362 | "Probe size should be larger or equal to the size of red-zone so " |
| 1363 | "that red-zone is not clobbered by probing." ); |
| 1364 | |
| 1365 | Register &FinalStackPtr = TempReg; |
| 1366 | // FIXME: We only support NegProbeSize materializable by DForm currently. |
| 1367 | // When HasBP && HasRedzone, we can use xform if we have an additional idle |
| 1368 | // register. |
| 1369 | NegProbeSize = std::max(a: NegProbeSize, b: -((int64_t)1 << 15)); |
| 1370 | assert(isInt<16>(NegProbeSize) && |
| 1371 | "NegProbeSize should be materializable by DForm" ); |
| 1372 | Register CRReg = PPC::CR0; |
| 1373 | // Layout of output assembly kinda like: |
| 1374 | // bb.0: |
| 1375 | // ... |
| 1376 | // sub $scratchreg, $finalsp, r1 |
| 1377 | // cmpdi $scratchreg, <negprobesize> |
| 1378 | // bge bb.2 |
| 1379 | // bb.1: |
| 1380 | // stdu <backchain>, <negprobesize>(r1) |
| 1381 | // sub $scratchreg, $scratchreg, negprobesize |
| 1382 | // cmpdi $scratchreg, <negprobesize> |
| 1383 | // blt bb.1 |
| 1384 | // bb.2: |
| 1385 | // stdux <backchain>, r1, $scratchreg |
| 1386 | MachineFunction::iterator MBBInsertPoint = std::next(x: MBB.getIterator()); |
| 1387 | MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(BB: ProbedBB); |
| 1388 | MF.insert(MBBI: MBBInsertPoint, MBB: ProbeLoopBodyMBB); |
| 1389 | MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(BB: ProbedBB); |
| 1390 | MF.insert(MBBI: MBBInsertPoint, MBB: ProbeExitMBB); |
| 1391 | // bb.2 |
| 1392 | { |
| 1393 | Register BackChainPointer = HasRedZone ? BPReg : TempReg; |
| 1394 | allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, |
| 1395 | BackChainPointer); |
| 1396 | if (HasRedZone) |
| 1397 | // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg |
| 1398 | // to TempReg to satisfy it. |
| 1399 | BuildMI(BB&: *ProbeExitMBB, I: ProbeExitMBB->end(), MIMD: DL, MCID: CopyInst, DestReg: TempReg) |
| 1400 | .addReg(RegNo: BPReg) |
| 1401 | .addReg(RegNo: BPReg); |
| 1402 | ProbeExitMBB->splice(Where: ProbeExitMBB->end(), Other: &MBB, From: MBBI, To: MBB.end()); |
| 1403 | ProbeExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB); |
| 1404 | } |
| 1405 | // bb.0 |
| 1406 | { |
| 1407 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::SUBF8 : PPC::SUBF), DestReg: ScratchReg) |
| 1408 | .addReg(RegNo: SPReg) |
| 1409 | .addReg(RegNo: FinalStackPtr); |
| 1410 | if (!HasRedZone) |
| 1411 | BuildMI(BB: &MBB, MIMD: DL, MCID: CopyInst, DestReg: TempReg).addReg(RegNo: SPReg).addReg(RegNo: SPReg); |
| 1412 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::CMPDI : PPC::CMPWI), DestReg: CRReg) |
| 1413 | .addReg(RegNo: ScratchReg) |
| 1414 | .addImm(Val: NegProbeSize); |
| 1415 | BuildMI(BB: &MBB, MIMD: DL, MCID: TII.get(Opcode: PPC::BCC)) |
| 1416 | .addImm(Val: PPC::PRED_GE) |
| 1417 | .addReg(RegNo: CRReg) |
| 1418 | .addMBB(MBB: ProbeExitMBB); |
| 1419 | MBB.addSuccessor(Succ: ProbeLoopBodyMBB); |
| 1420 | MBB.addSuccessor(Succ: ProbeExitMBB); |
| 1421 | } |
| 1422 | // bb.1 |
| 1423 | { |
| 1424 | Register BackChainPointer = HasRedZone ? BPReg : TempReg; |
| 1425 | allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, |
| 1426 | 0, true /*UseDForm*/, BackChainPointer); |
| 1427 | BuildMI(BB: ProbeLoopBodyMBB, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::ADDI8 : PPC::ADDI), |
| 1428 | DestReg: ScratchReg) |
| 1429 | .addReg(RegNo: ScratchReg) |
| 1430 | .addImm(Val: -NegProbeSize); |
| 1431 | BuildMI(BB: ProbeLoopBodyMBB, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::CMPDI : PPC::CMPWI), |
| 1432 | DestReg: CRReg) |
| 1433 | .addReg(RegNo: ScratchReg) |
| 1434 | .addImm(Val: NegProbeSize); |
| 1435 | BuildMI(BB: ProbeLoopBodyMBB, MIMD: DL, MCID: TII.get(Opcode: PPC::BCC)) |
| 1436 | .addImm(Val: PPC::PRED_LT) |
| 1437 | .addReg(RegNo: CRReg) |
| 1438 | .addMBB(MBB: ProbeLoopBodyMBB); |
| 1439 | ProbeLoopBodyMBB->addSuccessor(Succ: ProbeExitMBB); |
| 1440 | ProbeLoopBodyMBB->addSuccessor(Succ: ProbeLoopBodyMBB); |
| 1441 | } |
| 1442 | // Update liveins. |
| 1443 | fullyRecomputeLiveIns(MBBs: {ProbeExitMBB, ProbeLoopBodyMBB}); |
| 1444 | return ProbeExitMBB; |
| 1445 | }; |
| 1446 | // For case HasBP && MaxAlign > 1, we have to realign the SP by performing |
| 1447 | // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since |
| 1448 | // the offset subtracted from SP is determined by SP's runtime value. |
| 1449 | if (HasBP && MaxAlign > 1) { |
| 1450 | // Calculate final stack pointer. |
| 1451 | if (isPPC64) |
| 1452 | BuildMI(BB&: *CurrentMBB, I&: {MI}, MIMD: DL, MCID: TII.get(Opcode: PPC::RLDICL), DestReg: ScratchReg) |
| 1453 | .addReg(RegNo: SPReg) |
| 1454 | .addImm(Val: 0) |
| 1455 | .addImm(Val: 64 - Log2(A: MaxAlign)); |
| 1456 | else |
| 1457 | BuildMI(BB&: *CurrentMBB, I&: {MI}, MIMD: DL, MCID: TII.get(Opcode: PPC::RLWINM), DestReg: ScratchReg) |
| 1458 | .addReg(RegNo: SPReg) |
| 1459 | .addImm(Val: 0) |
| 1460 | .addImm(Val: 32 - Log2(A: MaxAlign)) |
| 1461 | .addImm(Val: 31); |
| 1462 | BuildMI(BB&: *CurrentMBB, I&: {MI}, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::SUBF8 : PPC::SUBF), |
| 1463 | DestReg: FPReg) |
| 1464 | .addReg(RegNo: ScratchReg) |
| 1465 | .addReg(RegNo: SPReg); |
| 1466 | MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); |
| 1467 | BuildMI(BB&: *CurrentMBB, I&: {MI}, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::ADD8 : PPC::ADD4), |
| 1468 | DestReg: FPReg) |
| 1469 | .addReg(RegNo: ScratchReg) |
| 1470 | .addReg(RegNo: FPReg); |
| 1471 | CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); |
| 1472 | if (needsCFI) |
| 1473 | buildDefCFAReg(*CurrentMBB, {MI}, FPReg); |
| 1474 | } else { |
| 1475 | // Initialize current frame pointer. |
| 1476 | BuildMI(BB&: *CurrentMBB, I&: {MI}, MIMD: DL, MCID: CopyInst, DestReg: FPReg).addReg(RegNo: SPReg).addReg(RegNo: SPReg); |
| 1477 | // Use FPReg to calculate CFA. |
| 1478 | if (needsCFI) |
| 1479 | buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); |
| 1480 | // Probe residual part. |
| 1481 | if (NegResidualSize) { |
| 1482 | bool ResidualUseDForm = CanUseDForm(NegResidualSize); |
| 1483 | if (!ResidualUseDForm) |
| 1484 | MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); |
| 1485 | allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, |
| 1486 | ResidualUseDForm, FPReg); |
| 1487 | } |
| 1488 | bool UseDForm = CanUseDForm(NegProbeSize); |
| 1489 | // If number of blocks is small, just probe them directly. |
| 1490 | if (NumBlocks < 3) { |
| 1491 | if (!UseDForm) |
| 1492 | MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); |
| 1493 | for (int i = 0; i < NumBlocks; ++i) |
| 1494 | allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, |
| 1495 | FPReg); |
| 1496 | if (needsCFI) { |
| 1497 | // Restore using SPReg to calculate CFA. |
| 1498 | buildDefCFAReg(*CurrentMBB, {MI}, SPReg); |
| 1499 | } |
| 1500 | } else { |
| 1501 | // Since CTR is a volatile register and current shrinkwrap implementation |
| 1502 | // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a |
| 1503 | // CTR loop to probe. |
| 1504 | // Calculate trip count and stores it in CTRReg. |
| 1505 | MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); |
| 1506 | BuildMI(BB&: *CurrentMBB, I&: {MI}, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) |
| 1507 | .addReg(RegNo: ScratchReg, flags: RegState::Kill); |
| 1508 | if (!UseDForm) |
| 1509 | MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); |
| 1510 | // Create MBBs of the loop. |
| 1511 | MachineFunction::iterator MBBInsertPoint = |
| 1512 | std::next(x: CurrentMBB->getIterator()); |
| 1513 | MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(BB: ProbedBB); |
| 1514 | MF.insert(MBBI: MBBInsertPoint, MBB: LoopMBB); |
| 1515 | MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(BB: ProbedBB); |
| 1516 | MF.insert(MBBI: MBBInsertPoint, MBB: ExitMBB); |
| 1517 | // Synthesize the loop body. |
| 1518 | allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, |
| 1519 | UseDForm, FPReg); |
| 1520 | BuildMI(BB: LoopMBB, MIMD: DL, MCID: TII.get(Opcode: isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) |
| 1521 | .addMBB(MBB: LoopMBB); |
| 1522 | LoopMBB->addSuccessor(Succ: ExitMBB); |
| 1523 | LoopMBB->addSuccessor(Succ: LoopMBB); |
| 1524 | // Synthesize the exit MBB. |
| 1525 | ExitMBB->splice(Where: ExitMBB->end(), Other: CurrentMBB, |
| 1526 | From: std::next(x: MachineBasicBlock::iterator(MI)), |
| 1527 | To: CurrentMBB->end()); |
| 1528 | ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: CurrentMBB); |
| 1529 | CurrentMBB->addSuccessor(Succ: LoopMBB); |
| 1530 | if (needsCFI) { |
| 1531 | // Restore using SPReg to calculate CFA. |
| 1532 | buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); |
| 1533 | } |
| 1534 | // Update liveins. |
| 1535 | fullyRecomputeLiveIns(MBBs: {ExitMBB, LoopMBB}); |
| 1536 | } |
| 1537 | } |
| 1538 | ++NumPrologProbed; |
| 1539 | MI.eraseFromParent(); |
| 1540 | } |
| 1541 | |
| 1542 | void PPCFrameLowering::emitEpilogue(MachineFunction &MF, |
| 1543 | MachineBasicBlock &MBB) const { |
| 1544 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
| 1545 | DebugLoc dl; |
| 1546 | |
| 1547 | if (MBBI != MBB.end()) |
| 1548 | dl = MBBI->getDebugLoc(); |
| 1549 | |
| 1550 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
| 1551 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 1552 | |
| 1553 | // Get alignment info so we know how to restore the SP. |
| 1554 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 1555 | |
| 1556 | // Get the number of bytes allocated from the FrameInfo. |
| 1557 | int64_t FrameSize = MFI.getStackSize(); |
| 1558 | |
| 1559 | // Get processor type. |
| 1560 | bool isPPC64 = Subtarget.isPPC64(); |
| 1561 | |
| 1562 | // Check if the link register (LR) has been saved. |
| 1563 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
| 1564 | bool MustSaveLR = FI->mustSaveLR(); |
| 1565 | const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); |
| 1566 | bool MustSaveCR = !MustSaveCRs.empty(); |
| 1567 | // Do we have a frame pointer and/or base pointer for this function? |
| 1568 | bool HasFP = hasFP(MF); |
| 1569 | bool HasBP = RegInfo->hasBasePointer(MF); |
| 1570 | bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); |
| 1571 | bool HasROPProtect = Subtarget.hasROPProtect(); |
| 1572 | bool HasPrivileged = Subtarget.hasPrivileged(); |
| 1573 | |
| 1574 | Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; |
| 1575 | Register BPReg = RegInfo->getBaseRegister(MF); |
| 1576 | Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; |
| 1577 | Register ScratchReg; |
| 1578 | Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg |
| 1579 | const MCInstrDesc& MTLRInst = TII.get( Opcode: isPPC64 ? PPC::MTLR8 |
| 1580 | : PPC::MTLR ); |
| 1581 | const MCInstrDesc& LoadInst = TII.get( Opcode: isPPC64 ? PPC::LD |
| 1582 | : PPC::LWZ ); |
| 1583 | const MCInstrDesc& LoadImmShiftedInst = TII.get( Opcode: isPPC64 ? PPC::LIS8 |
| 1584 | : PPC::LIS ); |
| 1585 | const MCInstrDesc& OrInst = TII.get(Opcode: isPPC64 ? PPC::OR8 |
| 1586 | : PPC::OR ); |
| 1587 | const MCInstrDesc& OrImmInst = TII.get( Opcode: isPPC64 ? PPC::ORI8 |
| 1588 | : PPC::ORI ); |
| 1589 | const MCInstrDesc& AddImmInst = TII.get( Opcode: isPPC64 ? PPC::ADDI8 |
| 1590 | : PPC::ADDI ); |
| 1591 | const MCInstrDesc& AddInst = TII.get( Opcode: isPPC64 ? PPC::ADD8 |
| 1592 | : PPC::ADD4 ); |
| 1593 | const MCInstrDesc& LoadWordInst = TII.get( Opcode: isPPC64 ? PPC::LWZ8 |
| 1594 | : PPC::LWZ); |
| 1595 | const MCInstrDesc& MoveToCRInst = TII.get( Opcode: isPPC64 ? PPC::MTOCRF8 |
| 1596 | : PPC::MTOCRF); |
| 1597 | const MCInstrDesc &HashChk = |
| 1598 | TII.get(Opcode: isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8) |
| 1599 | : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK)); |
| 1600 | int64_t LROffset = getReturnSaveOffset(); |
| 1601 | |
| 1602 | int64_t FPOffset = 0; |
| 1603 | |
| 1604 | // Using the same bool variable as below to suppress compiler warnings. |
| 1605 | bool SingleScratchReg = findScratchRegister(MBB: &MBB, UseAtEnd: true, TwoUniqueRegsRequired: false, SR1: &ScratchReg, |
| 1606 | SR2: &TempReg); |
| 1607 | assert(SingleScratchReg && |
| 1608 | "Could not find an available scratch register" ); |
| 1609 | |
| 1610 | SingleScratchReg = ScratchReg == TempReg; |
| 1611 | |
| 1612 | if (HasFP) { |
| 1613 | int FPIndex = FI->getFramePointerSaveIndex(); |
| 1614 | assert(FPIndex && "No Frame Pointer Save Slot!" ); |
| 1615 | FPOffset = MFI.getObjectOffset(ObjectIdx: FPIndex); |
| 1616 | } |
| 1617 | |
| 1618 | int64_t BPOffset = 0; |
| 1619 | if (HasBP) { |
| 1620 | int BPIndex = FI->getBasePointerSaveIndex(); |
| 1621 | assert(BPIndex && "No Base Pointer Save Slot!" ); |
| 1622 | BPOffset = MFI.getObjectOffset(ObjectIdx: BPIndex); |
| 1623 | } |
| 1624 | |
| 1625 | int64_t PBPOffset = 0; |
| 1626 | if (FI->usesPICBase()) { |
| 1627 | int PBPIndex = FI->getPICBasePointerSaveIndex(); |
| 1628 | assert(PBPIndex && "No PIC Base Pointer Save Slot!" ); |
| 1629 | PBPOffset = MFI.getObjectOffset(ObjectIdx: PBPIndex); |
| 1630 | } |
| 1631 | |
| 1632 | bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); |
| 1633 | |
| 1634 | if (IsReturnBlock) { |
| 1635 | unsigned RetOpcode = MBBI->getOpcode(); |
| 1636 | bool UsesTCRet = RetOpcode == PPC::TCRETURNri || |
| 1637 | RetOpcode == PPC::TCRETURNdi || |
| 1638 | RetOpcode == PPC::TCRETURNai || |
| 1639 | RetOpcode == PPC::TCRETURNri8 || |
| 1640 | RetOpcode == PPC::TCRETURNdi8 || |
| 1641 | RetOpcode == PPC::TCRETURNai8; |
| 1642 | |
| 1643 | if (UsesTCRet) { |
| 1644 | int MaxTCRetDelta = FI->getTailCallSPDelta(); |
| 1645 | MachineOperand &StackAdjust = MBBI->getOperand(i: 1); |
| 1646 | assert(StackAdjust.isImm() && "Expecting immediate value." ); |
| 1647 | // Adjust stack pointer. |
| 1648 | int StackAdj = StackAdjust.getImm(); |
| 1649 | int Delta = StackAdj - MaxTCRetDelta; |
| 1650 | assert((Delta >= 0) && "Delta must be positive" ); |
| 1651 | if (MaxTCRetDelta>0) |
| 1652 | FrameSize += (StackAdj +Delta); |
| 1653 | else |
| 1654 | FrameSize += StackAdj; |
| 1655 | } |
| 1656 | } |
| 1657 | |
| 1658 | // Frames of 32KB & larger require special handling because they cannot be |
| 1659 | // indexed into with a simple LD/LWZ immediate offset operand. |
| 1660 | bool isLargeFrame = !isInt<16>(x: FrameSize); |
| 1661 | |
| 1662 | // On targets without red zone, the SP needs to be restored last, so that |
| 1663 | // all live contents of the stack frame are upwards of the SP. This means |
| 1664 | // that we cannot restore SP just now, since there may be more registers |
| 1665 | // to restore from the stack frame (e.g. R31). If the frame size is not |
| 1666 | // a simple immediate value, we will need a spare register to hold the |
| 1667 | // restored SP. If the frame size is known and small, we can simply adjust |
| 1668 | // the offsets of the registers to be restored, and still use SP to restore |
| 1669 | // them. In such case, the final update of SP will be to add the frame |
| 1670 | // size to it. |
| 1671 | // To simplify the code, set RBReg to the base register used to restore |
| 1672 | // values from the stack, and set SPAdd to the value that needs to be added |
| 1673 | // to the SP at the end. The default values are as if red zone was present. |
| 1674 | unsigned RBReg = SPReg; |
| 1675 | uint64_t SPAdd = 0; |
| 1676 | |
| 1677 | // Check if we can move the stack update instruction up the epilogue |
| 1678 | // past the callee saves. This will allow the move to LR instruction |
| 1679 | // to be executed before the restores of the callee saves which means |
| 1680 | // that the callee saves can hide the latency from the MTLR instrcution. |
| 1681 | MachineBasicBlock::iterator StackUpdateLoc = MBBI; |
| 1682 | if (stackUpdateCanBeMoved(MF)) { |
| 1683 | const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); |
| 1684 | for (CalleeSavedInfo CSI : Info) { |
| 1685 | // If the callee saved register is spilled to another register abort the |
| 1686 | // stack update movement. |
| 1687 | if (CSI.isSpilledToReg()) { |
| 1688 | StackUpdateLoc = MBBI; |
| 1689 | break; |
| 1690 | } |
| 1691 | int FrIdx = CSI.getFrameIdx(); |
| 1692 | // If the frame index is not negative the callee saved info belongs to a |
| 1693 | // stack object that is not a fixed stack object. We ignore non-fixed |
| 1694 | // stack objects because we won't move the update of the stack pointer |
| 1695 | // past them. |
| 1696 | if (FrIdx >= 0) |
| 1697 | continue; |
| 1698 | |
| 1699 | if (MFI.isFixedObjectIndex(ObjectIdx: FrIdx) && MFI.getObjectOffset(ObjectIdx: FrIdx) < 0) |
| 1700 | StackUpdateLoc--; |
| 1701 | else { |
| 1702 | // Abort the operation as we can't update all CSR restores. |
| 1703 | StackUpdateLoc = MBBI; |
| 1704 | break; |
| 1705 | } |
| 1706 | } |
| 1707 | } |
| 1708 | |
| 1709 | if (FrameSize) { |
| 1710 | // In the prologue, the loaded (or persistent) stack pointer value is |
| 1711 | // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red |
| 1712 | // zone add this offset back now. |
| 1713 | |
| 1714 | // If the function has a base pointer, the stack pointer has been copied |
| 1715 | // to it so we can restore it by copying in the other direction. |
| 1716 | if (HasRedZone && HasBP) { |
| 1717 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: RBReg). |
| 1718 | addReg(RegNo: BPReg). |
| 1719 | addReg(RegNo: BPReg); |
| 1720 | } |
| 1721 | // If this function contained a fastcc call and GuaranteedTailCallOpt is |
| 1722 | // enabled (=> hasFastCall()==true) the fastcc call might contain a tail |
| 1723 | // call which invalidates the stack pointer value in SP(0). So we use the |
| 1724 | // value of R31 in this case. Similar situation exists with setjmp. |
| 1725 | else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { |
| 1726 | assert(HasFP && "Expecting a valid frame pointer." ); |
| 1727 | if (!HasRedZone) |
| 1728 | RBReg = FPReg; |
| 1729 | if (!isLargeFrame) { |
| 1730 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: AddImmInst, DestReg: RBReg) |
| 1731 | .addReg(RegNo: FPReg).addImm(Val: FrameSize); |
| 1732 | } else { |
| 1733 | TII.materializeImmPostRA(MBB, MBBI, DL: dl, Reg: ScratchReg, Imm: FrameSize); |
| 1734 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: AddInst) |
| 1735 | .addReg(RegNo: RBReg) |
| 1736 | .addReg(RegNo: FPReg) |
| 1737 | .addReg(RegNo: ScratchReg); |
| 1738 | } |
| 1739 | } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { |
| 1740 | if (HasRedZone) { |
| 1741 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: AddImmInst, DestReg: SPReg) |
| 1742 | .addReg(RegNo: SPReg) |
| 1743 | .addImm(Val: FrameSize); |
| 1744 | } else { |
| 1745 | // Make sure that adding FrameSize will not overflow the max offset |
| 1746 | // size. |
| 1747 | assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && |
| 1748 | "Local offsets should be negative" ); |
| 1749 | SPAdd = FrameSize; |
| 1750 | FPOffset += FrameSize; |
| 1751 | BPOffset += FrameSize; |
| 1752 | PBPOffset += FrameSize; |
| 1753 | } |
| 1754 | } else { |
| 1755 | // We don't want to use ScratchReg as a base register, because it |
| 1756 | // could happen to be R0. Use FP instead, but make sure to preserve it. |
| 1757 | if (!HasRedZone) { |
| 1758 | // If FP is not saved, copy it to ScratchReg. |
| 1759 | if (!HasFP) |
| 1760 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: ScratchReg) |
| 1761 | .addReg(RegNo: FPReg) |
| 1762 | .addReg(RegNo: FPReg); |
| 1763 | RBReg = FPReg; |
| 1764 | } |
| 1765 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: LoadInst, DestReg: RBReg) |
| 1766 | .addImm(Val: 0) |
| 1767 | .addReg(RegNo: SPReg); |
| 1768 | } |
| 1769 | } |
| 1770 | assert(RBReg != ScratchReg && "Should have avoided ScratchReg" ); |
| 1771 | // If there is no red zone, ScratchReg may be needed for holding a useful |
| 1772 | // value (although not the base register). Make sure it is not overwritten |
| 1773 | // too early. |
| 1774 | |
| 1775 | // If we need to restore both the LR and the CR and we only have one |
| 1776 | // available scratch register, we must do them one at a time. |
| 1777 | if (MustSaveCR && SingleScratchReg && MustSaveLR) { |
| 1778 | // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg |
| 1779 | // is live here. |
| 1780 | assert(HasRedZone && "Expecting red zone" ); |
| 1781 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadWordInst, DestReg: TempReg) |
| 1782 | .addImm(Val: CRSaveOffset) |
| 1783 | .addReg(RegNo: SPReg); |
| 1784 | for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) |
| 1785 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: MoveToCRInst, DestReg: MustSaveCRs[i]) |
| 1786 | .addReg(RegNo: TempReg, flags: getKillRegState(B: i == e-1)); |
| 1787 | } |
| 1788 | |
| 1789 | // Delay restoring of the LR if ScratchReg is needed. This is ok, since |
| 1790 | // LR is stored in the caller's stack frame. ScratchReg will be needed |
| 1791 | // if RBReg is anything other than SP. We shouldn't use ScratchReg as |
| 1792 | // a base register anyway, because it may happen to be R0. |
| 1793 | bool LoadedLR = false; |
| 1794 | if (MustSaveLR && RBReg == SPReg && isInt<16>(x: LROffset+SPAdd)) { |
| 1795 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: LoadInst, DestReg: ScratchReg) |
| 1796 | .addImm(Val: LROffset+SPAdd) |
| 1797 | .addReg(RegNo: RBReg); |
| 1798 | LoadedLR = true; |
| 1799 | } |
| 1800 | |
| 1801 | if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { |
| 1802 | assert(RBReg == SPReg && "Should be using SP as a base register" ); |
| 1803 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadWordInst, DestReg: TempReg) |
| 1804 | .addImm(Val: CRSaveOffset) |
| 1805 | .addReg(RegNo: RBReg); |
| 1806 | } |
| 1807 | |
| 1808 | if (HasFP) { |
| 1809 | // If there is red zone, restore FP directly, since SP has already been |
| 1810 | // restored. Otherwise, restore the value of FP into ScratchReg. |
| 1811 | if (HasRedZone || RBReg == SPReg) |
| 1812 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadInst, DestReg: FPReg) |
| 1813 | .addImm(Val: FPOffset) |
| 1814 | .addReg(RegNo: SPReg); |
| 1815 | else |
| 1816 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadInst, DestReg: ScratchReg) |
| 1817 | .addImm(Val: FPOffset) |
| 1818 | .addReg(RegNo: RBReg); |
| 1819 | } |
| 1820 | |
| 1821 | if (FI->usesPICBase()) |
| 1822 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadInst, DestReg: PPC::R30) |
| 1823 | .addImm(Val: PBPOffset) |
| 1824 | .addReg(RegNo: RBReg); |
| 1825 | |
| 1826 | if (HasBP) |
| 1827 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadInst, DestReg: BPReg) |
| 1828 | .addImm(Val: BPOffset) |
| 1829 | .addReg(RegNo: RBReg); |
| 1830 | |
| 1831 | // There is nothing more to be loaded from the stack, so now we can |
| 1832 | // restore SP: SP = RBReg + SPAdd. |
| 1833 | if (RBReg != SPReg || SPAdd != 0) { |
| 1834 | assert(!HasRedZone && "This should not happen with red zone" ); |
| 1835 | // If SPAdd is 0, generate a copy. |
| 1836 | if (SPAdd == 0) |
| 1837 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: SPReg) |
| 1838 | .addReg(RegNo: RBReg) |
| 1839 | .addReg(RegNo: RBReg); |
| 1840 | else |
| 1841 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: AddImmInst, DestReg: SPReg) |
| 1842 | .addReg(RegNo: RBReg) |
| 1843 | .addImm(Val: SPAdd); |
| 1844 | |
| 1845 | assert(RBReg != ScratchReg && "Should be using FP or SP as base register" ); |
| 1846 | if (RBReg == FPReg) |
| 1847 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrInst, DestReg: FPReg) |
| 1848 | .addReg(RegNo: ScratchReg) |
| 1849 | .addReg(RegNo: ScratchReg); |
| 1850 | |
| 1851 | // Now load the LR from the caller's stack frame. |
| 1852 | if (MustSaveLR && !LoadedLR) |
| 1853 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadInst, DestReg: ScratchReg) |
| 1854 | .addImm(Val: LROffset) |
| 1855 | .addReg(RegNo: SPReg); |
| 1856 | } |
| 1857 | |
| 1858 | if (MustSaveCR && |
| 1859 | !(SingleScratchReg && MustSaveLR)) |
| 1860 | for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) |
| 1861 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: MoveToCRInst, DestReg: MustSaveCRs[i]) |
| 1862 | .addReg(RegNo: TempReg, flags: getKillRegState(B: i == e-1)); |
| 1863 | |
| 1864 | if (MustSaveLR) { |
| 1865 | // If ROP protection is required, an extra instruction is added to compute a |
| 1866 | // hash and then compare it to the hash stored in the prologue. |
| 1867 | if (HasROPProtect) { |
| 1868 | const int SaveIndex = FI->getROPProtectionHashSaveIndex(); |
| 1869 | const int64_t ImmOffset = MFI.getObjectOffset(ObjectIdx: SaveIndex); |
| 1870 | assert((ImmOffset <= -8 && ImmOffset >= -512) && |
| 1871 | "ROP hash check location offset out of range." ); |
| 1872 | assert(((ImmOffset & 0x7) == 0) && |
| 1873 | "ROP hash check location offset must be 8 byte aligned." ); |
| 1874 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: HashChk) |
| 1875 | .addReg(RegNo: ScratchReg) |
| 1876 | .addImm(Val: ImmOffset) |
| 1877 | .addReg(RegNo: SPReg); |
| 1878 | } |
| 1879 | BuildMI(BB&: MBB, I: StackUpdateLoc, MIMD: dl, MCID: MTLRInst).addReg(RegNo: ScratchReg); |
| 1880 | } |
| 1881 | |
| 1882 | // Callee pop calling convention. Pop parameter/linkage area. Used for tail |
| 1883 | // call optimization |
| 1884 | if (IsReturnBlock) { |
| 1885 | unsigned RetOpcode = MBBI->getOpcode(); |
| 1886 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
| 1887 | (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && |
| 1888 | MF.getFunction().getCallingConv() == CallingConv::Fast) { |
| 1889 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
| 1890 | unsigned CallerAllocatedAmt = FI->getMinReservedArea(); |
| 1891 | |
| 1892 | if (CallerAllocatedAmt && isInt<16>(x: CallerAllocatedAmt)) { |
| 1893 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: AddImmInst, DestReg: SPReg) |
| 1894 | .addReg(RegNo: SPReg).addImm(Val: CallerAllocatedAmt); |
| 1895 | } else { |
| 1896 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: LoadImmShiftedInst, DestReg: ScratchReg) |
| 1897 | .addImm(Val: CallerAllocatedAmt >> 16); |
| 1898 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: OrImmInst, DestReg: ScratchReg) |
| 1899 | .addReg(RegNo: ScratchReg, flags: RegState::Kill) |
| 1900 | .addImm(Val: CallerAllocatedAmt & 0xFFFF); |
| 1901 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: AddInst) |
| 1902 | .addReg(RegNo: SPReg) |
| 1903 | .addReg(RegNo: FPReg) |
| 1904 | .addReg(RegNo: ScratchReg); |
| 1905 | } |
| 1906 | } else { |
| 1907 | createTailCallBranchInstr(MBB); |
| 1908 | } |
| 1909 | } |
| 1910 | } |
| 1911 | |
| 1912 | void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { |
| 1913 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
| 1914 | |
| 1915 | // If we got this far a first terminator should exist. |
| 1916 | assert(MBBI != MBB.end() && "Failed to find the first terminator." ); |
| 1917 | |
| 1918 | DebugLoc dl = MBBI->getDebugLoc(); |
| 1919 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
| 1920 | |
| 1921 | // Create branch instruction for pseudo tail call return instruction. |
| 1922 | // The TCRETURNdi variants are direct calls. Valid targets for those are |
| 1923 | // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel |
| 1924 | // since we can tail call external functions with PC-Rel (i.e. we don't need |
| 1925 | // to worry about different TOC pointers). Some of the external functions will |
| 1926 | // be MO_GlobalAddress while others like memcpy for example, are going to |
| 1927 | // be MO_ExternalSymbol. |
| 1928 | unsigned RetOpcode = MBBI->getOpcode(); |
| 1929 | if (RetOpcode == PPC::TCRETURNdi) { |
| 1930 | MBBI = MBB.getLastNonDebugInstr(); |
| 1931 | MachineOperand &JumpTarget = MBBI->getOperand(i: 0); |
| 1932 | if (JumpTarget.isGlobal()) |
| 1933 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILB)). |
| 1934 | addGlobalAddress(GV: JumpTarget.getGlobal(), Offset: JumpTarget.getOffset()); |
| 1935 | else if (JumpTarget.isSymbol()) |
| 1936 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILB)). |
| 1937 | addExternalSymbol(FnName: JumpTarget.getSymbolName()); |
| 1938 | else |
| 1939 | llvm_unreachable("Expecting Global or External Symbol" ); |
| 1940 | } else if (RetOpcode == PPC::TCRETURNri) { |
| 1941 | MBBI = MBB.getLastNonDebugInstr(); |
| 1942 | assert(MBBI->getOperand(0).isReg() && "Expecting register operand." ); |
| 1943 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILBCTR)); |
| 1944 | } else if (RetOpcode == PPC::TCRETURNai) { |
| 1945 | MBBI = MBB.getLastNonDebugInstr(); |
| 1946 | MachineOperand &JumpTarget = MBBI->getOperand(i: 0); |
| 1947 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILBA)).addImm(Val: JumpTarget.getImm()); |
| 1948 | } else if (RetOpcode == PPC::TCRETURNdi8) { |
| 1949 | MBBI = MBB.getLastNonDebugInstr(); |
| 1950 | MachineOperand &JumpTarget = MBBI->getOperand(i: 0); |
| 1951 | if (JumpTarget.isGlobal()) |
| 1952 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILB8)). |
| 1953 | addGlobalAddress(GV: JumpTarget.getGlobal(), Offset: JumpTarget.getOffset()); |
| 1954 | else if (JumpTarget.isSymbol()) |
| 1955 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILB8)). |
| 1956 | addExternalSymbol(FnName: JumpTarget.getSymbolName()); |
| 1957 | else |
| 1958 | llvm_unreachable("Expecting Global or External Symbol" ); |
| 1959 | } else if (RetOpcode == PPC::TCRETURNri8) { |
| 1960 | MBBI = MBB.getLastNonDebugInstr(); |
| 1961 | assert(MBBI->getOperand(0).isReg() && "Expecting register operand." ); |
| 1962 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILBCTR8)); |
| 1963 | } else if (RetOpcode == PPC::TCRETURNai8) { |
| 1964 | MBBI = MBB.getLastNonDebugInstr(); |
| 1965 | MachineOperand &JumpTarget = MBBI->getOperand(i: 0); |
| 1966 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: PPC::TAILBA8)).addImm(Val: JumpTarget.getImm()); |
| 1967 | } |
| 1968 | } |
| 1969 | |
| 1970 | void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, |
| 1971 | BitVector &SavedRegs, |
| 1972 | RegScavenger *RS) const { |
| 1973 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); |
| 1974 | if (Subtarget.isAIXABI()) |
| 1975 | updateCalleeSaves(MF, SavedRegs); |
| 1976 | |
| 1977 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 1978 | |
| 1979 | // Do not explicitly save the callee saved VSRp registers. |
| 1980 | // The individual VSR subregisters will be saved instead. |
| 1981 | SavedRegs.reset(Idx: PPC::VSRp26); |
| 1982 | SavedRegs.reset(Idx: PPC::VSRp27); |
| 1983 | SavedRegs.reset(Idx: PPC::VSRp28); |
| 1984 | SavedRegs.reset(Idx: PPC::VSRp29); |
| 1985 | SavedRegs.reset(Idx: PPC::VSRp30); |
| 1986 | SavedRegs.reset(Idx: PPC::VSRp31); |
| 1987 | |
| 1988 | // Save and clear the LR state. |
| 1989 | PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); |
| 1990 | MCRegister LR = RegInfo->getRARegister(); |
| 1991 | FI->setMustSaveLR(MustSaveLR(MF, LR)); |
| 1992 | SavedRegs.reset(Idx: LR); |
| 1993 | |
| 1994 | // Save R31 if necessary |
| 1995 | int FPSI = FI->getFramePointerSaveIndex(); |
| 1996 | const bool isPPC64 = Subtarget.isPPC64(); |
| 1997 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 1998 | |
| 1999 | // If the frame pointer save index hasn't been defined yet. |
| 2000 | if (!FPSI && needsFP(MF)) { |
| 2001 | // Find out what the fix offset of the frame pointer save area. |
| 2002 | int FPOffset = getFramePointerSaveOffset(); |
| 2003 | // Allocate the frame index for frame pointer save area. |
| 2004 | FPSI = MFI.CreateFixedObject(Size: isPPC64? 8 : 4, SPOffset: FPOffset, IsImmutable: true); |
| 2005 | // Save the result. |
| 2006 | FI->setFramePointerSaveIndex(FPSI); |
| 2007 | } |
| 2008 | |
| 2009 | int BPSI = FI->getBasePointerSaveIndex(); |
| 2010 | if (!BPSI && RegInfo->hasBasePointer(MF)) { |
| 2011 | int BPOffset = getBasePointerSaveOffset(); |
| 2012 | // Allocate the frame index for the base pointer save area. |
| 2013 | BPSI = MFI.CreateFixedObject(Size: isPPC64? 8 : 4, SPOffset: BPOffset, IsImmutable: true); |
| 2014 | // Save the result. |
| 2015 | FI->setBasePointerSaveIndex(BPSI); |
| 2016 | } |
| 2017 | |
| 2018 | // Reserve stack space for the PIC Base register (R30). |
| 2019 | // Only used in SVR4 32-bit. |
| 2020 | if (FI->usesPICBase()) { |
| 2021 | int PBPSI = MFI.CreateFixedObject(Size: 4, SPOffset: -8, IsImmutable: true); |
| 2022 | FI->setPICBasePointerSaveIndex(PBPSI); |
| 2023 | } |
| 2024 | |
| 2025 | // Make sure we don't explicitly spill r31, because, for example, we have |
| 2026 | // some inline asm which explicitly clobbers it, when we otherwise have a |
| 2027 | // frame pointer and are using r31's spill slot for the prologue/epilogue |
| 2028 | // code. Same goes for the base pointer and the PIC base register. |
| 2029 | if (needsFP(MF)) |
| 2030 | SavedRegs.reset(Idx: isPPC64 ? PPC::X31 : PPC::R31); |
| 2031 | if (RegInfo->hasBasePointer(MF)) { |
| 2032 | SavedRegs.reset(Idx: RegInfo->getBaseRegister(MF)); |
| 2033 | // On AIX, when BaseRegister(R30) is used, need to spill r31 too to match |
| 2034 | // AIX trackback table requirement. |
| 2035 | if (!needsFP(MF) && !SavedRegs.test(Idx: isPPC64 ? PPC::X31 : PPC::R31) && |
| 2036 | Subtarget.isAIXABI()) { |
| 2037 | assert( |
| 2038 | (RegInfo->getBaseRegister(MF) == (isPPC64 ? PPC::X30 : PPC::R30)) && |
| 2039 | "Invalid base register on AIX!" ); |
| 2040 | SavedRegs.set(isPPC64 ? PPC::X31 : PPC::R31); |
| 2041 | } |
| 2042 | } |
| 2043 | if (FI->usesPICBase()) |
| 2044 | SavedRegs.reset(Idx: PPC::R30); |
| 2045 | |
| 2046 | // Reserve stack space to move the linkage area to in case of a tail call. |
| 2047 | int TCSPDelta = 0; |
| 2048 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
| 2049 | (TCSPDelta = FI->getTailCallSPDelta()) < 0) { |
| 2050 | MFI.CreateFixedObject(Size: -1 * TCSPDelta, SPOffset: TCSPDelta, IsImmutable: true); |
| 2051 | } |
| 2052 | |
| 2053 | // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. |
| 2054 | // For 64-bit SVR4, and all flavors of AIX we create a FixedStack |
| 2055 | // object at the offset of the CR-save slot in the linkage area. The actual |
| 2056 | // save and restore of the condition register will be created as part of the |
| 2057 | // prologue and epilogue insertion, but the FixedStack object is needed to |
| 2058 | // keep the CalleSavedInfo valid. |
| 2059 | if ((SavedRegs.test(Idx: PPC::CR2) || SavedRegs.test(Idx: PPC::CR3) || |
| 2060 | SavedRegs.test(Idx: PPC::CR4))) { |
| 2061 | const uint64_t SpillSize = 4; // Condition register is always 4 bytes. |
| 2062 | const int64_t SpillOffset = |
| 2063 | Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; |
| 2064 | int FrameIdx = |
| 2065 | MFI.CreateFixedObject(Size: SpillSize, SPOffset: SpillOffset, |
| 2066 | /* IsImmutable */ true, /* IsAliased */ isAliased: false); |
| 2067 | FI->setCRSpillFrameIndex(FrameIdx); |
| 2068 | } |
| 2069 | } |
| 2070 | |
| 2071 | void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, |
| 2072 | RegScavenger *RS) const { |
| 2073 | // Get callee saved register information. |
| 2074 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 2075 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
| 2076 | |
| 2077 | // If the function is shrink-wrapped, and if the function has a tail call, the |
| 2078 | // tail call might not be in the new RestoreBlock, so real branch instruction |
| 2079 | // won't be generated by emitEpilogue(), because shrink-wrap has chosen new |
| 2080 | // RestoreBlock. So we handle this case here. |
| 2081 | if (MFI.getSavePoint() && MFI.hasTailCall()) { |
| 2082 | MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); |
| 2083 | for (MachineBasicBlock &MBB : MF) { |
| 2084 | if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) |
| 2085 | createTailCallBranchInstr(MBB); |
| 2086 | } |
| 2087 | } |
| 2088 | |
| 2089 | // Early exit if no callee saved registers are modified! |
| 2090 | if (CSI.empty() && !needsFP(MF)) { |
| 2091 | addScavengingSpillSlot(MF, RS); |
| 2092 | return; |
| 2093 | } |
| 2094 | |
| 2095 | unsigned MinGPR = PPC::R31; |
| 2096 | unsigned MinG8R = PPC::X31; |
| 2097 | unsigned MinFPR = PPC::F31; |
| 2098 | unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; |
| 2099 | |
| 2100 | bool HasGPSaveArea = false; |
| 2101 | bool HasG8SaveArea = false; |
| 2102 | bool HasFPSaveArea = false; |
| 2103 | bool HasVRSaveArea = false; |
| 2104 | |
| 2105 | SmallVector<CalleeSavedInfo, 18> GPRegs; |
| 2106 | SmallVector<CalleeSavedInfo, 18> G8Regs; |
| 2107 | SmallVector<CalleeSavedInfo, 18> FPRegs; |
| 2108 | SmallVector<CalleeSavedInfo, 18> VRegs; |
| 2109 | |
| 2110 | for (const CalleeSavedInfo &I : CSI) { |
| 2111 | MCRegister Reg = I.getReg(); |
| 2112 | assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || |
| 2113 | (Reg != PPC::X2 && Reg != PPC::R2)) && |
| 2114 | "Not expecting to try to spill R2 in a function that must save TOC" ); |
| 2115 | if (PPC::GPRCRegClass.contains(Reg)) { |
| 2116 | HasGPSaveArea = true; |
| 2117 | |
| 2118 | GPRegs.push_back(Elt: I); |
| 2119 | |
| 2120 | if (Reg < MinGPR) { |
| 2121 | MinGPR = Reg; |
| 2122 | } |
| 2123 | } else if (PPC::G8RCRegClass.contains(Reg)) { |
| 2124 | HasG8SaveArea = true; |
| 2125 | |
| 2126 | G8Regs.push_back(Elt: I); |
| 2127 | |
| 2128 | if (Reg < MinG8R) { |
| 2129 | MinG8R = Reg; |
| 2130 | } |
| 2131 | } else if (PPC::F8RCRegClass.contains(Reg)) { |
| 2132 | HasFPSaveArea = true; |
| 2133 | |
| 2134 | FPRegs.push_back(Elt: I); |
| 2135 | |
| 2136 | if (Reg < MinFPR) { |
| 2137 | MinFPR = Reg; |
| 2138 | } |
| 2139 | } else if (PPC::CRBITRCRegClass.contains(Reg) || |
| 2140 | PPC::CRRCRegClass.contains(Reg)) { |
| 2141 | ; // do nothing, as we already know whether CRs are spilled |
| 2142 | } else if (PPC::VRRCRegClass.contains(Reg) || |
| 2143 | PPC::SPERCRegClass.contains(Reg)) { |
| 2144 | // Altivec and SPE are mutually exclusive, but have the same stack |
| 2145 | // alignment requirements, so overload the save area for both cases. |
| 2146 | HasVRSaveArea = true; |
| 2147 | |
| 2148 | VRegs.push_back(Elt: I); |
| 2149 | |
| 2150 | if (Reg < MinVR) { |
| 2151 | MinVR = Reg; |
| 2152 | } |
| 2153 | } else { |
| 2154 | llvm_unreachable("Unknown RegisterClass!" ); |
| 2155 | } |
| 2156 | } |
| 2157 | |
| 2158 | PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); |
| 2159 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 2160 | |
| 2161 | int64_t LowerBound = 0; |
| 2162 | |
| 2163 | // Take into account stack space reserved for tail calls. |
| 2164 | int TCSPDelta = 0; |
| 2165 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
| 2166 | (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { |
| 2167 | LowerBound = TCSPDelta; |
| 2168 | } |
| 2169 | |
| 2170 | // The Floating-point register save area is right below the back chain word |
| 2171 | // of the previous stack frame. |
| 2172 | if (HasFPSaveArea) { |
| 2173 | for (const CalleeSavedInfo &FPReg : FPRegs) { |
| 2174 | int FI = FPReg.getFrameIdx(); |
| 2175 | |
| 2176 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
| 2177 | } |
| 2178 | |
| 2179 | LowerBound -= (31 - TRI->getEncodingValue(Reg: MinFPR) + 1) * 8; |
| 2180 | } |
| 2181 | |
| 2182 | // Check whether the frame pointer register is allocated. If so, make sure it |
| 2183 | // is spilled to the correct offset. |
| 2184 | if (needsFP(MF)) { |
| 2185 | int FI = PFI->getFramePointerSaveIndex(); |
| 2186 | assert(FI && "No Frame Pointer Save Slot!" ); |
| 2187 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
| 2188 | // FP is R31/X31, so no need to update MinGPR/MinG8R. |
| 2189 | HasGPSaveArea = true; |
| 2190 | } |
| 2191 | |
| 2192 | if (PFI->usesPICBase()) { |
| 2193 | int FI = PFI->getPICBasePointerSaveIndex(); |
| 2194 | assert(FI && "No PIC Base Pointer Save Slot!" ); |
| 2195 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
| 2196 | |
| 2197 | MinGPR = std::min<unsigned>(a: MinGPR, b: PPC::R30); |
| 2198 | HasGPSaveArea = true; |
| 2199 | } |
| 2200 | |
| 2201 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 2202 | if (RegInfo->hasBasePointer(MF)) { |
| 2203 | int FI = PFI->getBasePointerSaveIndex(); |
| 2204 | assert(FI && "No Base Pointer Save Slot!" ); |
| 2205 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
| 2206 | |
| 2207 | Register BP = RegInfo->getBaseRegister(MF); |
| 2208 | if (PPC::G8RCRegClass.contains(Reg: BP)) { |
| 2209 | MinG8R = std::min<unsigned>(a: MinG8R, b: BP); |
| 2210 | HasG8SaveArea = true; |
| 2211 | } else if (PPC::GPRCRegClass.contains(Reg: BP)) { |
| 2212 | MinGPR = std::min<unsigned>(a: MinGPR, b: BP); |
| 2213 | HasGPSaveArea = true; |
| 2214 | } |
| 2215 | } |
| 2216 | |
| 2217 | // General register save area starts right below the Floating-point |
| 2218 | // register save area. |
| 2219 | if (HasGPSaveArea || HasG8SaveArea) { |
| 2220 | // Move general register save area spill slots down, taking into account |
| 2221 | // the size of the Floating-point register save area. |
| 2222 | for (const CalleeSavedInfo &GPReg : GPRegs) { |
| 2223 | if (!GPReg.isSpilledToReg()) { |
| 2224 | int FI = GPReg.getFrameIdx(); |
| 2225 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
| 2226 | } |
| 2227 | } |
| 2228 | |
| 2229 | // Move general register save area spill slots down, taking into account |
| 2230 | // the size of the Floating-point register save area. |
| 2231 | for (const CalleeSavedInfo &G8Reg : G8Regs) { |
| 2232 | if (!G8Reg.isSpilledToReg()) { |
| 2233 | int FI = G8Reg.getFrameIdx(); |
| 2234 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
| 2235 | } |
| 2236 | } |
| 2237 | |
| 2238 | unsigned MinReg = |
| 2239 | std::min<unsigned>(a: TRI->getEncodingValue(Reg: MinGPR), |
| 2240 | b: TRI->getEncodingValue(Reg: MinG8R)); |
| 2241 | |
| 2242 | const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; |
| 2243 | LowerBound -= (31 - MinReg + 1) * GPRegSize; |
| 2244 | } |
| 2245 | |
| 2246 | // For 32-bit only, the CR save area is below the general register |
| 2247 | // save area. For 64-bit SVR4, the CR save area is addressed relative |
| 2248 | // to the stack pointer and hence does not need an adjustment here. |
| 2249 | // Only CR2 (the first nonvolatile spilled) has an associated frame |
| 2250 | // index so that we have a single uniform save area. |
| 2251 | if (spillsCR(MF) && Subtarget.is32BitELFABI()) { |
| 2252 | // Adjust the frame index of the CR spill slot. |
| 2253 | for (const auto &CSInfo : CSI) { |
| 2254 | if (CSInfo.getReg() == PPC::CR2) { |
| 2255 | int FI = CSInfo.getFrameIdx(); |
| 2256 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
| 2257 | break; |
| 2258 | } |
| 2259 | } |
| 2260 | |
| 2261 | LowerBound -= 4; // The CR save area is always 4 bytes long. |
| 2262 | } |
| 2263 | |
| 2264 | // Both Altivec and SPE have the same alignment and padding requirements |
| 2265 | // within the stack frame. |
| 2266 | if (HasVRSaveArea) { |
| 2267 | // Insert alignment padding, we need 16-byte alignment. Note: for positive |
| 2268 | // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since |
| 2269 | // we are using negative number here (the stack grows downward). We should |
| 2270 | // use formula : y = x & (~(n-1)). Where x is the size before aligning, n |
| 2271 | // is the alignment size ( n = 16 here) and y is the size after aligning. |
| 2272 | assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!" ); |
| 2273 | LowerBound &= ~(15); |
| 2274 | |
| 2275 | for (const CalleeSavedInfo &VReg : VRegs) { |
| 2276 | int FI = VReg.getFrameIdx(); |
| 2277 | |
| 2278 | MFI.setObjectOffset(ObjectIdx: FI, SPOffset: LowerBound + MFI.getObjectOffset(ObjectIdx: FI)); |
| 2279 | } |
| 2280 | } |
| 2281 | |
| 2282 | addScavengingSpillSlot(MF, RS); |
| 2283 | } |
| 2284 | |
| 2285 | void |
| 2286 | PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, |
| 2287 | RegScavenger *RS) const { |
| 2288 | // Reserve a slot closest to SP or frame pointer if we have a dynalloc or |
| 2289 | // a large stack, which will require scavenging a register to materialize a |
| 2290 | // large offset. |
| 2291 | |
| 2292 | // We need to have a scavenger spill slot for spills if the frame size is |
| 2293 | // large. In case there is no free register for large-offset addressing, |
| 2294 | // this slot is used for the necessary emergency spill. Also, we need the |
| 2295 | // slot for dynamic stack allocations. |
| 2296 | |
| 2297 | // The scavenger might be invoked if the frame offset does not fit into |
| 2298 | // the 16-bit immediate in case of not SPE and 8-bit in case of SPE. |
| 2299 | // We don't know the complete frame size here because we've not yet computed |
| 2300 | // callee-saved register spills or the needed alignment padding. |
| 2301 | unsigned StackSize = determineFrameLayout(MF, UseEstimate: true); |
| 2302 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 2303 | bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(x: StackSize) : !isInt<16>(x: StackSize); |
| 2304 | |
| 2305 | if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || |
| 2306 | (hasSpills(MF) && NeedSpills)) { |
| 2307 | const TargetRegisterClass &GPRC = PPC::GPRCRegClass; |
| 2308 | const TargetRegisterClass &G8RC = PPC::G8RCRegClass; |
| 2309 | const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; |
| 2310 | const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); |
| 2311 | unsigned Size = TRI.getSpillSize(RC); |
| 2312 | Align Alignment = TRI.getSpillAlign(RC); |
| 2313 | RS->addScavengingFrameIndex(FI: MFI.CreateSpillStackObject(Size, Alignment)); |
| 2314 | |
| 2315 | // Might we have over-aligned allocas? |
| 2316 | bool HasAlVars = |
| 2317 | MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); |
| 2318 | |
| 2319 | // These kinds of spills might need two registers. |
| 2320 | if (spillsCR(MF) || HasAlVars) |
| 2321 | RS->addScavengingFrameIndex(FI: MFI.CreateSpillStackObject(Size, Alignment)); |
| 2322 | } |
| 2323 | } |
| 2324 | |
| 2325 | // This function checks if a callee saved gpr can be spilled to a volatile |
| 2326 | // vector register. This occurs for leaf functions when the option |
| 2327 | // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers |
| 2328 | // which were not spilled to vectors, return false so the target independent |
| 2329 | // code can handle them by assigning a FrameIdx to a stack slot. |
| 2330 | bool PPCFrameLowering::assignCalleeSavedSpillSlots( |
| 2331 | MachineFunction &MF, const TargetRegisterInfo *TRI, |
| 2332 | std::vector<CalleeSavedInfo> &CSI) const { |
| 2333 | |
| 2334 | if (CSI.empty()) |
| 2335 | return true; // Early exit if no callee saved registers are modified! |
| 2336 | |
| 2337 | const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 2338 | const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF: &MF); |
| 2339 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 2340 | |
| 2341 | if (Subtarget.hasSPE()) { |
| 2342 | // In case of SPE we only have SuperRegs and CRs |
| 2343 | // in our CalleSaveInfo vector. |
| 2344 | |
| 2345 | for (auto &CalleeSaveReg : CSI) { |
| 2346 | MCRegister Reg = CalleeSaveReg.getReg(); |
| 2347 | MCRegister Lower = RegInfo->getSubReg(Reg, Idx: PPC::sub_32); |
| 2348 | MCRegister Higher = RegInfo->getSubReg(Reg, Idx: PPC::sub_32_hi_phony); |
| 2349 | |
| 2350 | if ( // Check only for SuperRegs. |
| 2351 | Lower && |
| 2352 | // Replace Reg if only lower-32 bits modified |
| 2353 | !MRI.isPhysRegModified(PhysReg: Higher)) |
| 2354 | CalleeSaveReg = CalleeSavedInfo(Lower); |
| 2355 | } |
| 2356 | } |
| 2357 | |
| 2358 | // Early exit if cannot spill gprs to volatile vector registers. |
| 2359 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 2360 | if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) |
| 2361 | return false; |
| 2362 | |
| 2363 | // Build a BitVector of VSRs that can be used for spilling GPRs. |
| 2364 | BitVector BVAllocatable = TRI->getAllocatableSet(MF); |
| 2365 | BitVector BVCalleeSaved(TRI->getNumRegs()); |
| 2366 | for (unsigned i = 0; CSRegs[i]; ++i) |
| 2367 | BVCalleeSaved.set(CSRegs[i]); |
| 2368 | |
| 2369 | for (unsigned Reg : BVAllocatable.set_bits()) { |
| 2370 | // Set to 0 if the register is not a volatile VSX register, or if it is |
| 2371 | // used in the function. |
| 2372 | if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) || |
| 2373 | MRI.isPhysRegUsed(PhysReg: Reg)) |
| 2374 | BVAllocatable.reset(Idx: Reg); |
| 2375 | } |
| 2376 | |
| 2377 | bool AllSpilledToReg = true; |
| 2378 | unsigned LastVSRUsedForSpill = 0; |
| 2379 | for (auto &CS : CSI) { |
| 2380 | if (BVAllocatable.none()) |
| 2381 | return false; |
| 2382 | |
| 2383 | MCRegister Reg = CS.getReg(); |
| 2384 | |
| 2385 | if (!PPC::G8RCRegClass.contains(Reg)) { |
| 2386 | AllSpilledToReg = false; |
| 2387 | continue; |
| 2388 | } |
| 2389 | |
| 2390 | // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs |
| 2391 | // into one VSR using the mtvsrdd instruction. |
| 2392 | if (LastVSRUsedForSpill != 0) { |
| 2393 | CS.setDstReg(LastVSRUsedForSpill); |
| 2394 | BVAllocatable.reset(Idx: LastVSRUsedForSpill); |
| 2395 | LastVSRUsedForSpill = 0; |
| 2396 | continue; |
| 2397 | } |
| 2398 | |
| 2399 | unsigned VolatileVFReg = BVAllocatable.find_first(); |
| 2400 | if (VolatileVFReg < BVAllocatable.size()) { |
| 2401 | CS.setDstReg(VolatileVFReg); |
| 2402 | LastVSRUsedForSpill = VolatileVFReg; |
| 2403 | } else { |
| 2404 | AllSpilledToReg = false; |
| 2405 | } |
| 2406 | } |
| 2407 | return AllSpilledToReg; |
| 2408 | } |
| 2409 | |
| 2410 | bool PPCFrameLowering::spillCalleeSavedRegisters( |
| 2411 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
| 2412 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
| 2413 | |
| 2414 | MachineFunction *MF = MBB.getParent(); |
| 2415 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
| 2416 | PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); |
| 2417 | bool MustSaveTOC = FI->mustSaveTOC(); |
| 2418 | DebugLoc DL; |
| 2419 | bool CRSpilled = false; |
| 2420 | MachineInstrBuilder CRMIB; |
| 2421 | BitVector Spilled(TRI->getNumRegs()); |
| 2422 | |
| 2423 | VSRContainingGPRs.clear(); |
| 2424 | |
| 2425 | // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one |
| 2426 | // or two GPRs, so we need table to record information for later save/restore. |
| 2427 | for (const CalleeSavedInfo &Info : CSI) { |
| 2428 | if (Info.isSpilledToReg()) { |
| 2429 | auto &SpilledVSR = VSRContainingGPRs[Info.getDstReg()]; |
| 2430 | assert(SpilledVSR.second == 0 && |
| 2431 | "Can't spill more than two GPRs into VSR!" ); |
| 2432 | if (SpilledVSR.first == 0) |
| 2433 | SpilledVSR.first = Info.getReg(); |
| 2434 | else |
| 2435 | SpilledVSR.second = Info.getReg(); |
| 2436 | } |
| 2437 | } |
| 2438 | |
| 2439 | for (const CalleeSavedInfo &I : CSI) { |
| 2440 | MCRegister Reg = I.getReg(); |
| 2441 | |
| 2442 | // CR2 through CR4 are the nonvolatile CR fields. |
| 2443 | bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; |
| 2444 | |
| 2445 | // Add the callee-saved register as live-in; it's killed at the spill. |
| 2446 | // Do not do this for callee-saved registers that are live-in to the |
| 2447 | // function because they will already be marked live-in and this will be |
| 2448 | // adding it for a second time. It is an error to add the same register |
| 2449 | // to the set more than once. |
| 2450 | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
| 2451 | bool IsLiveIn = MRI.isLiveIn(Reg); |
| 2452 | if (!IsLiveIn) |
| 2453 | MBB.addLiveIn(PhysReg: Reg); |
| 2454 | |
| 2455 | if (CRSpilled && IsCRField) { |
| 2456 | CRMIB.addReg(RegNo: Reg, flags: RegState::ImplicitKill); |
| 2457 | continue; |
| 2458 | } |
| 2459 | |
| 2460 | // The actual spill will happen in the prologue. |
| 2461 | if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) |
| 2462 | continue; |
| 2463 | |
| 2464 | // Insert the spill to the stack frame. |
| 2465 | if (IsCRField) { |
| 2466 | PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); |
| 2467 | if (!Subtarget.is32BitELFABI()) { |
| 2468 | // The actual spill will happen at the start of the prologue. |
| 2469 | FuncInfo->addMustSaveCR(Reg); |
| 2470 | } else { |
| 2471 | CRSpilled = true; |
| 2472 | FuncInfo->setSpillsCR(); |
| 2473 | |
| 2474 | // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have |
| 2475 | // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. |
| 2476 | CRMIB = BuildMI(MF&: *MF, MIMD: DL, MCID: TII.get(Opcode: PPC::MFCR), DestReg: PPC::R12) |
| 2477 | .addReg(RegNo: Reg, flags: RegState::ImplicitKill); |
| 2478 | |
| 2479 | MBB.insert(I: MI, MI: CRMIB); |
| 2480 | MBB.insert(I: MI, MI: addFrameReference(MIB: BuildMI(MF&: *MF, MIMD: DL, MCID: TII.get(Opcode: PPC::STW)) |
| 2481 | .addReg(RegNo: PPC::R12, |
| 2482 | flags: getKillRegState(B: true)), |
| 2483 | FI: I.getFrameIdx())); |
| 2484 | } |
| 2485 | } else { |
| 2486 | if (I.isSpilledToReg()) { |
| 2487 | unsigned Dst = I.getDstReg(); |
| 2488 | |
| 2489 | if (Spilled[Dst]) |
| 2490 | continue; |
| 2491 | |
| 2492 | const auto &VSR = VSRContainingGPRs[Dst]; |
| 2493 | if (VSR.second != 0) { |
| 2494 | assert(Subtarget.hasP9Vector() && |
| 2495 | "mtvsrdd is unavailable on pre-P9 targets." ); |
| 2496 | |
| 2497 | NumPESpillVSR += 2; |
| 2498 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: PPC::MTVSRDD), DestReg: Dst) |
| 2499 | .addReg(RegNo: VSR.first, flags: getKillRegState(B: true)) |
| 2500 | .addReg(RegNo: VSR.second, flags: getKillRegState(B: true)); |
| 2501 | } else if (VSR.second == 0) { |
| 2502 | assert(Subtarget.hasP8Vector() && |
| 2503 | "Can't move GPR to VSR on pre-P8 targets." ); |
| 2504 | |
| 2505 | ++NumPESpillVSR; |
| 2506 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: PPC::MTVSRD), |
| 2507 | DestReg: TRI->getSubReg(Reg: Dst, Idx: PPC::sub_64)) |
| 2508 | .addReg(RegNo: VSR.first, flags: getKillRegState(B: true)); |
| 2509 | } else { |
| 2510 | llvm_unreachable("More than two GPRs spilled to a VSR!" ); |
| 2511 | } |
| 2512 | Spilled.set(Dst); |
| 2513 | } else { |
| 2514 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
| 2515 | // Use !IsLiveIn for the kill flag. |
| 2516 | // We do not want to kill registers that are live in this function |
| 2517 | // before their use because they will become undefined registers. |
| 2518 | // Functions without NoUnwind need to preserve the order of elements in |
| 2519 | // saved vector registers. |
| 2520 | if (Subtarget.needsSwapsForVSXMemOps() && |
| 2521 | !MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind)) |
| 2522 | TII.storeRegToStackSlotNoUpd(MBB, MBBI: MI, SrcReg: Reg, isKill: !IsLiveIn, |
| 2523 | FrameIndex: I.getFrameIdx(), RC, TRI); |
| 2524 | else |
| 2525 | TII.storeRegToStackSlot(MBB, MBBI: MI, SrcReg: Reg, isKill: !IsLiveIn, FrameIndex: I.getFrameIdx(), RC, |
| 2526 | TRI, VReg: Register()); |
| 2527 | } |
| 2528 | } |
| 2529 | } |
| 2530 | return true; |
| 2531 | } |
| 2532 | |
| 2533 | static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, |
| 2534 | bool CR4Spilled, MachineBasicBlock &MBB, |
| 2535 | MachineBasicBlock::iterator MI, |
| 2536 | ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { |
| 2537 | |
| 2538 | MachineFunction *MF = MBB.getParent(); |
| 2539 | const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); |
| 2540 | DebugLoc DL; |
| 2541 | unsigned MoveReg = PPC::R12; |
| 2542 | |
| 2543 | // 32-bit: FP-relative |
| 2544 | MBB.insert(I: MI, |
| 2545 | MI: addFrameReference(MIB: BuildMI(MF&: *MF, MIMD: DL, MCID: TII.get(Opcode: PPC::LWZ), DestReg: MoveReg), |
| 2546 | FI: CSI[CSIIndex].getFrameIdx())); |
| 2547 | |
| 2548 | unsigned RestoreOp = PPC::MTOCRF; |
| 2549 | if (CR2Spilled) |
| 2550 | MBB.insert(I: MI, MI: BuildMI(MF&: *MF, MIMD: DL, MCID: TII.get(Opcode: RestoreOp), DestReg: PPC::CR2) |
| 2551 | .addReg(RegNo: MoveReg, flags: getKillRegState(B: !CR3Spilled && !CR4Spilled))); |
| 2552 | |
| 2553 | if (CR3Spilled) |
| 2554 | MBB.insert(I: MI, MI: BuildMI(MF&: *MF, MIMD: DL, MCID: TII.get(Opcode: RestoreOp), DestReg: PPC::CR3) |
| 2555 | .addReg(RegNo: MoveReg, flags: getKillRegState(B: !CR4Spilled))); |
| 2556 | |
| 2557 | if (CR4Spilled) |
| 2558 | MBB.insert(I: MI, MI: BuildMI(MF&: *MF, MIMD: DL, MCID: TII.get(Opcode: RestoreOp), DestReg: PPC::CR4) |
| 2559 | .addReg(RegNo: MoveReg, flags: getKillRegState(B: true))); |
| 2560 | } |
| 2561 | |
| 2562 | MachineBasicBlock::iterator PPCFrameLowering:: |
| 2563 | eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, |
| 2564 | MachineBasicBlock::iterator I) const { |
| 2565 | const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
| 2566 | if (MF.getTarget().Options.GuaranteedTailCallOpt && |
| 2567 | I->getOpcode() == PPC::ADJCALLSTACKUP) { |
| 2568 | // Add (actually subtract) back the amount the callee popped on return. |
| 2569 | if (int CalleeAmt = I->getOperand(i: 1).getImm()) { |
| 2570 | bool is64Bit = Subtarget.isPPC64(); |
| 2571 | CalleeAmt *= -1; |
| 2572 | unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; |
| 2573 | unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; |
| 2574 | unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; |
| 2575 | unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; |
| 2576 | unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; |
| 2577 | unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; |
| 2578 | const DebugLoc &dl = I->getDebugLoc(); |
| 2579 | |
| 2580 | if (isInt<16>(x: CalleeAmt)) { |
| 2581 | BuildMI(BB&: MBB, I, MIMD: dl, MCID: TII.get(Opcode: ADDIInstr), DestReg: StackReg) |
| 2582 | .addReg(RegNo: StackReg, flags: RegState::Kill) |
| 2583 | .addImm(Val: CalleeAmt); |
| 2584 | } else { |
| 2585 | MachineBasicBlock::iterator MBBI = I; |
| 2586 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: LISInstr), DestReg: TmpReg) |
| 2587 | .addImm(Val: CalleeAmt >> 16); |
| 2588 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ORIInstr), DestReg: TmpReg) |
| 2589 | .addReg(RegNo: TmpReg, flags: RegState::Kill) |
| 2590 | .addImm(Val: CalleeAmt & 0xFFFF); |
| 2591 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ADDInstr), DestReg: StackReg) |
| 2592 | .addReg(RegNo: StackReg, flags: RegState::Kill) |
| 2593 | .addReg(RegNo: TmpReg); |
| 2594 | } |
| 2595 | } |
| 2596 | } |
| 2597 | // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. |
| 2598 | return MBB.erase(I); |
| 2599 | } |
| 2600 | |
| 2601 | static bool isCalleeSavedCR(unsigned Reg) { |
| 2602 | return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; |
| 2603 | } |
| 2604 | |
| 2605 | bool PPCFrameLowering::restoreCalleeSavedRegisters( |
| 2606 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
| 2607 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { |
| 2608 | MachineFunction *MF = MBB.getParent(); |
| 2609 | const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); |
| 2610 | PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); |
| 2611 | bool MustSaveTOC = FI->mustSaveTOC(); |
| 2612 | bool CR2Spilled = false; |
| 2613 | bool CR3Spilled = false; |
| 2614 | bool CR4Spilled = false; |
| 2615 | unsigned CSIIndex = 0; |
| 2616 | BitVector Restored(TRI->getNumRegs()); |
| 2617 | |
| 2618 | // Initialize insertion-point logic; we will be restoring in reverse |
| 2619 | // order of spill. |
| 2620 | MachineBasicBlock::iterator I = MI, BeforeI = I; |
| 2621 | bool AtStart = I == MBB.begin(); |
| 2622 | |
| 2623 | if (!AtStart) |
| 2624 | --BeforeI; |
| 2625 | |
| 2626 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { |
| 2627 | MCRegister Reg = CSI[i].getReg(); |
| 2628 | |
| 2629 | if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) |
| 2630 | continue; |
| 2631 | |
| 2632 | // Restore of callee saved condition register field is handled during |
| 2633 | // epilogue insertion. |
| 2634 | if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) |
| 2635 | continue; |
| 2636 | |
| 2637 | if (Reg == PPC::CR2) { |
| 2638 | CR2Spilled = true; |
| 2639 | // The spill slot is associated only with CR2, which is the |
| 2640 | // first nonvolatile spilled. Save it here. |
| 2641 | CSIIndex = i; |
| 2642 | continue; |
| 2643 | } else if (Reg == PPC::CR3) { |
| 2644 | CR3Spilled = true; |
| 2645 | continue; |
| 2646 | } else if (Reg == PPC::CR4) { |
| 2647 | CR4Spilled = true; |
| 2648 | continue; |
| 2649 | } else { |
| 2650 | // On 32-bit ELF when we first encounter a non-CR register after seeing at |
| 2651 | // least one CR register, restore all spilled CRs together. |
| 2652 | if (CR2Spilled || CR3Spilled || CR4Spilled) { |
| 2653 | bool is31 = needsFP(MF: *MF); |
| 2654 | restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, MI: I, CSI, |
| 2655 | CSIIndex); |
| 2656 | CR2Spilled = CR3Spilled = CR4Spilled = false; |
| 2657 | } |
| 2658 | |
| 2659 | if (CSI[i].isSpilledToReg()) { |
| 2660 | DebugLoc DL; |
| 2661 | unsigned Dst = CSI[i].getDstReg(); |
| 2662 | |
| 2663 | if (Restored[Dst]) |
| 2664 | continue; |
| 2665 | |
| 2666 | const auto &VSR = VSRContainingGPRs[Dst]; |
| 2667 | if (VSR.second != 0) { |
| 2668 | assert(Subtarget.hasP9Vector()); |
| 2669 | NumPEReloadVSR += 2; |
| 2670 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII.get(Opcode: PPC::MFVSRLD), DestReg: VSR.second).addReg(RegNo: Dst); |
| 2671 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII.get(Opcode: PPC::MFVSRD), DestReg: VSR.first) |
| 2672 | .addReg(RegNo: TRI->getSubReg(Reg: Dst, Idx: PPC::sub_64), flags: getKillRegState(B: true)); |
| 2673 | } else if (VSR.second == 0) { |
| 2674 | assert(Subtarget.hasP8Vector()); |
| 2675 | ++NumPEReloadVSR; |
| 2676 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII.get(Opcode: PPC::MFVSRD), DestReg: VSR.first) |
| 2677 | .addReg(RegNo: TRI->getSubReg(Reg: Dst, Idx: PPC::sub_64), flags: getKillRegState(B: true)); |
| 2678 | } else { |
| 2679 | llvm_unreachable("More than two GPRs spilled to a VSR!" ); |
| 2680 | } |
| 2681 | |
| 2682 | Restored.set(Dst); |
| 2683 | |
| 2684 | } else { |
| 2685 | // Default behavior for non-CR saves. |
| 2686 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); |
| 2687 | |
| 2688 | // Functions without NoUnwind need to preserve the order of elements in |
| 2689 | // saved vector registers. |
| 2690 | if (Subtarget.needsSwapsForVSXMemOps() && |
| 2691 | !MF->getFunction().hasFnAttribute(Kind: Attribute::NoUnwind)) |
| 2692 | TII.loadRegFromStackSlotNoUpd(MBB, MBBI: I, DestReg: Reg, FrameIndex: CSI[i].getFrameIdx(), RC, |
| 2693 | TRI); |
| 2694 | else |
| 2695 | TII.loadRegFromStackSlot(MBB, MBBI: I, DestReg: Reg, FrameIndex: CSI[i].getFrameIdx(), RC, TRI, |
| 2696 | VReg: Register()); |
| 2697 | |
| 2698 | assert(I != MBB.begin() && |
| 2699 | "loadRegFromStackSlot didn't insert any code!" ); |
| 2700 | } |
| 2701 | } |
| 2702 | |
| 2703 | // Insert in reverse order. |
| 2704 | if (AtStart) |
| 2705 | I = MBB.begin(); |
| 2706 | else { |
| 2707 | I = BeforeI; |
| 2708 | ++I; |
| 2709 | } |
| 2710 | } |
| 2711 | |
| 2712 | // If we haven't yet spilled the CRs, do so now. |
| 2713 | if (CR2Spilled || CR3Spilled || CR4Spilled) { |
| 2714 | assert(Subtarget.is32BitELFABI() && |
| 2715 | "Only set CR[2|3|4]Spilled on 32-bit SVR4." ); |
| 2716 | bool is31 = needsFP(MF: *MF); |
| 2717 | restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, MI: I, CSI, CSIIndex); |
| 2718 | } |
| 2719 | |
| 2720 | return true; |
| 2721 | } |
| 2722 | |
| 2723 | uint64_t PPCFrameLowering::getTOCSaveOffset() const { |
| 2724 | return TOCSaveOffset; |
| 2725 | } |
| 2726 | |
| 2727 | uint64_t PPCFrameLowering::getFramePointerSaveOffset() const { |
| 2728 | return FramePointerSaveOffset; |
| 2729 | } |
| 2730 | |
| 2731 | uint64_t PPCFrameLowering::getBasePointerSaveOffset() const { |
| 2732 | return BasePointerSaveOffset; |
| 2733 | } |
| 2734 | |
| 2735 | bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { |
| 2736 | if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) |
| 2737 | return false; |
| 2738 | return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI(); |
| 2739 | } |
| 2740 | |
| 2741 | void PPCFrameLowering::updateCalleeSaves(const MachineFunction &MF, |
| 2742 | BitVector &SavedRegs) const { |
| 2743 | // The AIX ABI uses traceback tables for EH which require that if callee-saved |
| 2744 | // register N is used, all registers N-31 must be saved/restored. |
| 2745 | // NOTE: The check for AIX is not actually what is relevant. Traceback tables |
| 2746 | // on Linux have the same requirements. It is just that AIX is the only ABI |
| 2747 | // for which we actually use traceback tables. If another ABI needs to be |
| 2748 | // supported that also uses them, we can add a check such as |
| 2749 | // Subtarget.usesTraceBackTables(). |
| 2750 | assert(Subtarget.isAIXABI() && |
| 2751 | "Function updateCalleeSaves should only be called for AIX." ); |
| 2752 | |
| 2753 | // If there are no callee saves then there is nothing to do. |
| 2754 | if (SavedRegs.none()) |
| 2755 | return; |
| 2756 | |
| 2757 | const MCPhysReg *CSRegs = |
| 2758 | Subtarget.getRegisterInfo()->getCalleeSavedRegs(MF: &MF); |
| 2759 | MCPhysReg LowestGPR = PPC::R31; |
| 2760 | MCPhysReg LowestG8R = PPC::X31; |
| 2761 | MCPhysReg LowestFPR = PPC::F31; |
| 2762 | MCPhysReg LowestVR = PPC::V31; |
| 2763 | |
| 2764 | // Traverse the CSRs twice so as not to rely on ascending ordering of |
| 2765 | // registers in the array. The first pass finds the lowest numbered |
| 2766 | // register and the second pass marks all higher numbered registers |
| 2767 | // for spilling. |
| 2768 | for (int i = 0; CSRegs[i]; i++) { |
| 2769 | // Get the lowest numbered register for each class that actually needs |
| 2770 | // to be saved. |
| 2771 | MCPhysReg Cand = CSRegs[i]; |
| 2772 | if (!SavedRegs.test(Idx: Cand)) |
| 2773 | continue; |
| 2774 | // When R2/X2 is a CSR and not used for passing arguments, it is allocated |
| 2775 | // earlier than other volatile registers. R2/X2 is not contiguous with |
| 2776 | // R13/X13 to R31/X31. |
| 2777 | if (Cand == PPC::X2 || Cand == PPC::R2) { |
| 2778 | SavedRegs.set(Cand); |
| 2779 | continue; |
| 2780 | } |
| 2781 | |
| 2782 | if (PPC::GPRCRegClass.contains(Reg: Cand) && Cand < LowestGPR) |
| 2783 | LowestGPR = Cand; |
| 2784 | else if (PPC::G8RCRegClass.contains(Reg: Cand) && Cand < LowestG8R) |
| 2785 | LowestG8R = Cand; |
| 2786 | else if ((PPC::F4RCRegClass.contains(Reg: Cand) || |
| 2787 | PPC::F8RCRegClass.contains(Reg: Cand)) && |
| 2788 | Cand < LowestFPR) |
| 2789 | LowestFPR = Cand; |
| 2790 | else if (PPC::VRRCRegClass.contains(Reg: Cand) && Cand < LowestVR) |
| 2791 | LowestVR = Cand; |
| 2792 | } |
| 2793 | |
| 2794 | for (int i = 0; CSRegs[i]; i++) { |
| 2795 | MCPhysReg Cand = CSRegs[i]; |
| 2796 | if ((PPC::GPRCRegClass.contains(Reg: Cand) && Cand > LowestGPR) || |
| 2797 | (PPC::G8RCRegClass.contains(Reg: Cand) && Cand > LowestG8R) || |
| 2798 | ((PPC::F4RCRegClass.contains(Reg: Cand) || |
| 2799 | PPC::F8RCRegClass.contains(Reg: Cand)) && |
| 2800 | Cand > LowestFPR) || |
| 2801 | (PPC::VRRCRegClass.contains(Reg: Cand) && Cand > LowestVR)) |
| 2802 | SavedRegs.set(Cand); |
| 2803 | } |
| 2804 | } |
| 2805 | |
| 2806 | uint64_t PPCFrameLowering::getStackThreshold() const { |
| 2807 | // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack; |
| 2808 | // use `add r1, r1, <scratch_reg>` to release the stack frame. |
| 2809 | // Scratch register contains a signed 64-bit number, which is negative |
| 2810 | // when extending the stack and is positive when releasing the stack frame. |
| 2811 | // To make `stux` and `add` paired, the absolute value of the number contained |
| 2812 | // in the scratch register should be the same. Thus the maximum stack size |
| 2813 | // is (2^63)-1, i.e., LONG_MAX. |
| 2814 | if (Subtarget.isPPC64()) |
| 2815 | return LONG_MAX; |
| 2816 | |
| 2817 | return TargetFrameLowering::getStackThreshold(); |
| 2818 | } |
| 2819 | |