| 1 | //===- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function ---===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass is responsible for finalizing the functions frame layout, saving |
| 10 | // callee saved registers, and for emitting prolog & epilog code for the |
| 11 | // function. |
| 12 | // |
| 13 | // This pass must be run after register allocation. After this pass is |
| 14 | // executed, it is illegal to construct MO_FrameIndex operands. |
| 15 | // |
| 16 | //===----------------------------------------------------------------------===// |
| 17 | |
| 18 | #include "llvm/ADT/ArrayRef.h" |
| 19 | #include "llvm/ADT/BitVector.h" |
| 20 | #include "llvm/ADT/STLExtras.h" |
| 21 | #include "llvm/ADT/SetVector.h" |
| 22 | #include "llvm/ADT/SmallPtrSet.h" |
| 23 | #include "llvm/ADT/SmallSet.h" |
| 24 | #include "llvm/ADT/SmallVector.h" |
| 25 | #include "llvm/ADT/Statistic.h" |
| 26 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| 27 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 28 | #include "llvm/CodeGen/MachineDominators.h" |
| 29 | #include "llvm/CodeGen/MachineFrameInfo.h" |
| 30 | #include "llvm/CodeGen/MachineFunction.h" |
| 31 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 32 | #include "llvm/CodeGen/MachineInstr.h" |
| 33 | #include "llvm/CodeGen/MachineLoopInfo.h" |
| 34 | #include "llvm/CodeGen/MachineModuleInfo.h" |
| 35 | #include "llvm/CodeGen/MachineOperand.h" |
| 36 | #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" |
| 37 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 38 | #include "llvm/CodeGen/PEI.h" |
| 39 | #include "llvm/CodeGen/RegisterScavenging.h" |
| 40 | #include "llvm/CodeGen/TargetFrameLowering.h" |
| 41 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 42 | #include "llvm/CodeGen/TargetOpcodes.h" |
| 43 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 44 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
| 45 | #include "llvm/CodeGen/WinEHFuncInfo.h" |
| 46 | #include "llvm/IR/Attributes.h" |
| 47 | #include "llvm/IR/CallingConv.h" |
| 48 | #include "llvm/IR/DebugInfoMetadata.h" |
| 49 | #include "llvm/IR/DiagnosticInfo.h" |
| 50 | #include "llvm/IR/Function.h" |
| 51 | #include "llvm/IR/LLVMContext.h" |
| 52 | #include "llvm/InitializePasses.h" |
| 53 | #include "llvm/Pass.h" |
| 54 | #include "llvm/Support/CodeGen.h" |
| 55 | #include "llvm/Support/Debug.h" |
| 56 | #include "llvm/Support/ErrorHandling.h" |
| 57 | #include "llvm/Support/FormatVariadic.h" |
| 58 | #include "llvm/Support/raw_ostream.h" |
| 59 | #include "llvm/Target/TargetMachine.h" |
| 60 | #include "llvm/Target/TargetOptions.h" |
| 61 | #include <algorithm> |
| 62 | #include <cassert> |
| 63 | #include <cstdint> |
| 64 | #include <limits> |
| 65 | #include <utility> |
| 66 | #include <vector> |
| 67 | |
| 68 | using namespace llvm; |
| 69 | |
| 70 | #define DEBUG_TYPE "prologepilog" |
| 71 | |
| 72 | using MBBVector = SmallVector<MachineBasicBlock *, 4>; |
| 73 | |
| 74 | STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs" ); |
| 75 | STATISTIC(NumFuncSeen, "Number of functions seen in PEI" ); |
| 76 | |
| 77 | |
| 78 | namespace { |
| 79 | |
| 80 | class PEIImpl { |
| 81 | RegScavenger *RS = nullptr; |
| 82 | |
| 83 | // Save and Restore blocks of the current function. Typically there is a |
| 84 | // single save block, unless Windows EH funclets are involved. |
| 85 | MBBVector SaveBlocks; |
| 86 | MBBVector RestoreBlocks; |
| 87 | |
| 88 | // Flag to control whether to use the register scavenger to resolve |
| 89 | // frame index materialization registers. Set according to |
| 90 | // TRI->requiresFrameIndexScavenging() for the current function. |
| 91 | bool FrameIndexVirtualScavenging = false; |
| 92 | |
| 93 | // Flag to control whether the scavenger should be passed even though |
| 94 | // FrameIndexVirtualScavenging is used. |
| 95 | bool FrameIndexEliminationScavenging = false; |
| 96 | |
| 97 | // Emit remarks. |
| 98 | MachineOptimizationRemarkEmitter *ORE = nullptr; |
| 99 | |
| 100 | void calculateCallFrameInfo(MachineFunction &MF); |
| 101 | void calculateSaveRestoreBlocks(MachineFunction &MF); |
| 102 | void spillCalleeSavedRegs(MachineFunction &MF); |
| 103 | |
| 104 | void calculateFrameObjectOffsets(MachineFunction &MF); |
| 105 | void replaceFrameIndices(MachineFunction &MF); |
| 106 | void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, |
| 107 | int &SPAdj); |
| 108 | // Frame indices in debug values are encoded in a target independent |
| 109 | // way with simply the frame index and offset rather than any |
| 110 | // target-specific addressing mode. |
| 111 | bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI, |
| 112 | unsigned OpIdx, int SPAdj = 0); |
| 113 | // Does same as replaceFrameIndices but using the backward MIR walk and |
| 114 | // backward register scavenger walk. |
| 115 | void replaceFrameIndicesBackward(MachineFunction &MF); |
| 116 | void replaceFrameIndicesBackward(MachineBasicBlock *BB, MachineFunction &MF, |
| 117 | int &SPAdj); |
| 118 | |
| 119 | void insertPrologEpilogCode(MachineFunction &MF); |
| 120 | void insertZeroCallUsedRegs(MachineFunction &MF); |
| 121 | |
| 122 | public: |
| 123 | (MachineOptimizationRemarkEmitter *ORE) : ORE(ORE) {} |
| 124 | bool run(MachineFunction &MF); |
| 125 | }; |
| 126 | |
| 127 | class PEILegacy : public MachineFunctionPass { |
| 128 | public: |
| 129 | static char ID; |
| 130 | |
| 131 | PEILegacy() : MachineFunctionPass(ID) {} |
| 132 | |
| 133 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
| 134 | |
| 135 | /// runOnMachineFunction - Insert prolog/epilog code and replace abstract |
| 136 | /// frame indexes with appropriate references. |
| 137 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 138 | }; |
| 139 | |
| 140 | } // end anonymous namespace |
| 141 | |
| 142 | char PEILegacy::ID = 0; |
| 143 | |
| 144 | char &llvm::PrologEpilogCodeInserterID = PEILegacy::ID; |
| 145 | |
| 146 | INITIALIZE_PASS_BEGIN(PEILegacy, DEBUG_TYPE, "Prologue/Epilogue Insertion" , |
| 147 | false, false) |
| 148 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) |
| 149 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) |
| 150 | INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) |
| 151 | INITIALIZE_PASS_END(PEILegacy, DEBUG_TYPE, |
| 152 | "Prologue/Epilogue Insertion & Frame Finalization" , false, |
| 153 | false) |
| 154 | |
| 155 | MachineFunctionPass *llvm::createPrologEpilogInserterPass() { |
| 156 | return new PEILegacy(); |
| 157 | } |
| 158 | |
| 159 | STATISTIC(NumBytesStackSpace, |
| 160 | "Number of bytes used for stack in all functions" ); |
| 161 | |
| 162 | void PEILegacy::getAnalysisUsage(AnalysisUsage &AU) const { |
| 163 | AU.setPreservesCFG(); |
| 164 | AU.addPreserved<MachineLoopInfoWrapperPass>(); |
| 165 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
| 166 | AU.addRequired<MachineOptimizationRemarkEmitterPass>(); |
| 167 | MachineFunctionPass::getAnalysisUsage(AU); |
| 168 | } |
| 169 | |
| 170 | /// StackObjSet - A set of stack object indexes |
| 171 | using StackObjSet = SmallSetVector<int, 8>; |
| 172 | |
| 173 | using SavedDbgValuesMap = |
| 174 | SmallDenseMap<MachineBasicBlock *, SmallVector<MachineInstr *, 4>, 4>; |
| 175 | |
| 176 | /// Stash DBG_VALUEs that describe parameters and which are placed at the start |
| 177 | /// of the block. Later on, after the prologue code has been emitted, the |
| 178 | /// stashed DBG_VALUEs will be reinserted at the start of the block. |
| 179 | static void stashEntryDbgValues(MachineBasicBlock &MBB, |
| 180 | SavedDbgValuesMap &EntryDbgValues) { |
| 181 | SmallVector<const MachineInstr *, 4> FrameIndexValues; |
| 182 | |
| 183 | for (auto &MI : MBB) { |
| 184 | if (!MI.isDebugInstr()) |
| 185 | break; |
| 186 | if (!MI.isDebugValue() || !MI.getDebugVariable()->isParameter()) |
| 187 | continue; |
| 188 | if (any_of(Range: MI.debug_operands(), |
| 189 | P: [](const MachineOperand &MO) { return MO.isFI(); })) { |
| 190 | // We can only emit valid locations for frame indices after the frame |
| 191 | // setup, so do not stash away them. |
| 192 | FrameIndexValues.push_back(Elt: &MI); |
| 193 | continue; |
| 194 | } |
| 195 | const DILocalVariable *Var = MI.getDebugVariable(); |
| 196 | const DIExpression *Expr = MI.getDebugExpression(); |
| 197 | auto Overlaps = [Var, Expr](const MachineInstr *DV) { |
| 198 | return Var == DV->getDebugVariable() && |
| 199 | Expr->fragmentsOverlap(Other: DV->getDebugExpression()); |
| 200 | }; |
| 201 | // See if the debug value overlaps with any preceding debug value that will |
| 202 | // not be stashed. If that is the case, then we can't stash this value, as |
| 203 | // we would then reorder the values at reinsertion. |
| 204 | if (llvm::none_of(Range&: FrameIndexValues, P: Overlaps)) |
| 205 | EntryDbgValues[&MBB].push_back(Elt: &MI); |
| 206 | } |
| 207 | |
| 208 | // Remove stashed debug values from the block. |
| 209 | if (auto It = EntryDbgValues.find(Val: &MBB); It != EntryDbgValues.end()) |
| 210 | for (auto *MI : It->second) |
| 211 | MI->removeFromParent(); |
| 212 | } |
| 213 | |
| 214 | bool PEIImpl::run(MachineFunction &MF) { |
| 215 | NumFuncSeen++; |
| 216 | const Function &F = MF.getFunction(); |
| 217 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
| 218 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
| 219 | |
| 220 | RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr; |
| 221 | FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF); |
| 222 | |
| 223 | // Spill frame pointer and/or base pointer registers if they are clobbered. |
| 224 | // It is placed before call frame instruction elimination so it will not mess |
| 225 | // with stack arguments. |
| 226 | TFI->spillFPBP(MF); |
| 227 | |
| 228 | // Calculate the MaxCallFrameSize value for the function's frame |
| 229 | // information. Also eliminates call frame pseudo instructions. |
| 230 | calculateCallFrameInfo(MF); |
| 231 | |
| 232 | // Determine placement of CSR spill/restore code and prolog/epilog code: |
| 233 | // place all spills in the entry block, all restores in return blocks. |
| 234 | calculateSaveRestoreBlocks(MF); |
| 235 | |
| 236 | // Stash away DBG_VALUEs that should not be moved by insertion of prolog code. |
| 237 | SavedDbgValuesMap EntryDbgValues; |
| 238 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
| 239 | stashEntryDbgValues(MBB&: *SaveBlock, EntryDbgValues); |
| 240 | |
| 241 | // Handle CSR spilling and restoring, for targets that need it. |
| 242 | if (MF.getTarget().usesPhysRegsForValues()) |
| 243 | spillCalleeSavedRegs(MF); |
| 244 | |
| 245 | // Allow the target machine to make final modifications to the function |
| 246 | // before the frame layout is finalized. |
| 247 | TFI->processFunctionBeforeFrameFinalized(MF, RS); |
| 248 | |
| 249 | // Calculate actual frame offsets for all abstract stack objects... |
| 250 | calculateFrameObjectOffsets(MF); |
| 251 | |
| 252 | // Add prolog and epilog code to the function. This function is required |
| 253 | // to align the stack frame as necessary for any stack variables or |
| 254 | // called functions. Because of this, calculateCalleeSavedRegisters() |
| 255 | // must be called before this function in order to set the AdjustsStack |
| 256 | // and MaxCallFrameSize variables. |
| 257 | if (!F.hasFnAttribute(Kind: Attribute::Naked)) |
| 258 | insertPrologEpilogCode(MF); |
| 259 | |
| 260 | // Reinsert stashed debug values at the start of the entry blocks. |
| 261 | for (auto &I : EntryDbgValues) |
| 262 | I.first->insert(I: I.first->begin(), S: I.second.begin(), E: I.second.end()); |
| 263 | |
| 264 | // Allow the target machine to make final modifications to the function |
| 265 | // before the frame layout is finalized. |
| 266 | TFI->processFunctionBeforeFrameIndicesReplaced(MF, RS); |
| 267 | |
| 268 | // Replace all MO_FrameIndex operands with physical register references |
| 269 | // and actual offsets. |
| 270 | if (TFI->needsFrameIndexResolution(MF)) { |
| 271 | // Allow the target to determine this after knowing the frame size. |
| 272 | FrameIndexEliminationScavenging = |
| 273 | (RS && !FrameIndexVirtualScavenging) || |
| 274 | TRI->requiresFrameIndexReplacementScavenging(MF); |
| 275 | |
| 276 | if (TRI->eliminateFrameIndicesBackwards()) |
| 277 | replaceFrameIndicesBackward(MF); |
| 278 | else |
| 279 | replaceFrameIndices(MF); |
| 280 | } |
| 281 | |
| 282 | // If register scavenging is needed, as we've enabled doing it as a |
| 283 | // post-pass, scavenge the virtual registers that frame index elimination |
| 284 | // inserted. |
| 285 | if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging) |
| 286 | scavengeFrameVirtualRegs(MF, RS&: *RS); |
| 287 | |
| 288 | // Warn on stack size when we exceeds the given limit. |
| 289 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 290 | uint64_t StackSize = MFI.getStackSize(); |
| 291 | |
| 292 | uint64_t Threshold = TFI->getStackThreshold(); |
| 293 | if (MF.getFunction().hasFnAttribute(Kind: "warn-stack-size" )) { |
| 294 | bool Failed = MF.getFunction() |
| 295 | .getFnAttribute(Kind: "warn-stack-size" ) |
| 296 | .getValueAsString() |
| 297 | .getAsInteger(Radix: 10, Result&: Threshold); |
| 298 | // Verifier should have caught this. |
| 299 | assert(!Failed && "Invalid warn-stack-size fn attr value" ); |
| 300 | (void)Failed; |
| 301 | } |
| 302 | uint64_t UnsafeStackSize = MFI.getUnsafeStackSize(); |
| 303 | if (MF.getFunction().hasFnAttribute(Kind: Attribute::SafeStack)) |
| 304 | StackSize += UnsafeStackSize; |
| 305 | |
| 306 | if (StackSize > Threshold) { |
| 307 | DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning); |
| 308 | F.getContext().diagnose(DI: DiagStackSize); |
| 309 | int64_t SpillSize = 0; |
| 310 | for (int Idx = MFI.getObjectIndexBegin(), End = MFI.getObjectIndexEnd(); |
| 311 | Idx != End; ++Idx) { |
| 312 | if (MFI.isSpillSlotObjectIndex(ObjectIdx: Idx)) |
| 313 | SpillSize += MFI.getObjectSize(ObjectIdx: Idx); |
| 314 | } |
| 315 | |
| 316 | [[maybe_unused]] float SpillPct = |
| 317 | static_cast<float>(SpillSize) / static_cast<float>(StackSize); |
| 318 | LLVM_DEBUG( |
| 319 | dbgs() << formatv("{0}/{1} ({3:P}) spills, {2}/{1} ({4:P}) variables" , |
| 320 | SpillSize, StackSize, StackSize - SpillSize, SpillPct, |
| 321 | 1.0f - SpillPct)); |
| 322 | if (UnsafeStackSize != 0) { |
| 323 | LLVM_DEBUG(dbgs() << formatv(", {0}/{2} ({1:P}) unsafe stack" , |
| 324 | UnsafeStackSize, |
| 325 | static_cast<float>(UnsafeStackSize) / |
| 326 | static_cast<float>(StackSize), |
| 327 | StackSize)); |
| 328 | } |
| 329 | LLVM_DEBUG(dbgs() << "\n" ); |
| 330 | } |
| 331 | |
| 332 | ORE->emit(RemarkBuilder: [&]() { |
| 333 | return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize" , |
| 334 | MF.getFunction().getSubprogram(), |
| 335 | &MF.front()) |
| 336 | << ore::NV("NumStackBytes" , StackSize) |
| 337 | << " stack bytes in function '" |
| 338 | << ore::NV("Function" , MF.getFunction().getName()) << "'" ; |
| 339 | }); |
| 340 | |
| 341 | // Emit any remarks implemented for the target, based on final frame layout. |
| 342 | TFI->emitRemarks(MF, ORE); |
| 343 | |
| 344 | delete RS; |
| 345 | SaveBlocks.clear(); |
| 346 | RestoreBlocks.clear(); |
| 347 | MFI.clearSavePoints(); |
| 348 | MFI.clearRestorePoints(); |
| 349 | return true; |
| 350 | } |
| 351 | |
| 352 | /// runOnMachineFunction - Insert prolog/epilog code and replace abstract |
| 353 | /// frame indexes with appropriate references. |
| 354 | bool PEILegacy::runOnMachineFunction(MachineFunction &MF) { |
| 355 | MachineOptimizationRemarkEmitter *ORE = |
| 356 | &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); |
| 357 | return PEIImpl(ORE).run(MF); |
| 358 | } |
| 359 | |
| 360 | PreservedAnalyses |
| 361 | PrologEpilogInserterPass::run(MachineFunction &MF, |
| 362 | MachineFunctionAnalysisManager &MFAM) { |
| 363 | MachineOptimizationRemarkEmitter &ORE = |
| 364 | MFAM.getResult<MachineOptimizationRemarkEmitterAnalysis>(IR&: MF); |
| 365 | if (!PEIImpl(&ORE).run(MF)) |
| 366 | return PreservedAnalyses::all(); |
| 367 | |
| 368 | return getMachineFunctionPassPreservedAnalyses() |
| 369 | .preserveSet<CFGAnalyses>() |
| 370 | .preserve<MachineDominatorTreeAnalysis>() |
| 371 | .preserve<MachineLoopAnalysis>(); |
| 372 | } |
| 373 | |
| 374 | /// Calculate the MaxCallFrameSize variable for the function's frame |
| 375 | /// information and eliminate call frame pseudo instructions. |
| 376 | void PEIImpl::calculateCallFrameInfo(MachineFunction &MF) { |
| 377 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
| 378 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
| 379 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 380 | |
| 381 | // Get the function call frame set-up and tear-down instruction opcode |
| 382 | unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); |
| 383 | unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); |
| 384 | |
| 385 | // Early exit for targets which have no call frame setup/destroy pseudo |
| 386 | // instructions. |
| 387 | if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u) |
| 388 | return; |
| 389 | |
| 390 | // (Re-)Compute the MaxCallFrameSize. |
| 391 | [[maybe_unused]] uint64_t MaxCFSIn = |
| 392 | MFI.isMaxCallFrameSizeComputed() ? MFI.getMaxCallFrameSize() : UINT64_MAX; |
| 393 | std::vector<MachineBasicBlock::iterator> FrameSDOps; |
| 394 | MFI.computeMaxCallFrameSize(MF, FrameSDOps: &FrameSDOps); |
| 395 | assert(MFI.getMaxCallFrameSize() <= MaxCFSIn && |
| 396 | "Recomputing MaxCFS gave a larger value." ); |
| 397 | assert((FrameSDOps.empty() || MF.getFrameInfo().adjustsStack()) && |
| 398 | "AdjustsStack not set in presence of a frame pseudo instruction." ); |
| 399 | |
| 400 | if (TFI->canSimplifyCallFramePseudos(MF)) { |
| 401 | // If call frames are not being included as part of the stack frame, and |
| 402 | // the target doesn't indicate otherwise, remove the call frame pseudos |
| 403 | // here. The sub/add sp instruction pairs are still inserted, but we don't |
| 404 | // need to track the SP adjustment for frame index elimination. |
| 405 | for (MachineBasicBlock::iterator I : FrameSDOps) |
| 406 | TFI->eliminateCallFramePseudoInstr(MF, MBB&: *I->getParent(), MI: I); |
| 407 | |
| 408 | // We can't track the call frame size after call frame pseudos have been |
| 409 | // eliminated. Set it to zero everywhere to keep MachineVerifier happy. |
| 410 | for (MachineBasicBlock &MBB : MF) |
| 411 | MBB.setCallFrameSize(0); |
| 412 | } |
| 413 | } |
| 414 | |
| 415 | /// Compute the sets of entry and return blocks for saving and restoring |
| 416 | /// callee-saved registers, and placing prolog and epilog code. |
| 417 | void PEIImpl::calculateSaveRestoreBlocks(MachineFunction &MF) { |
| 418 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 419 | // Even when we do not change any CSR, we still want to insert the |
| 420 | // prologue and epilogue of the function. |
| 421 | // So set the save points for those. |
| 422 | |
| 423 | // Use the points found by shrink-wrapping, if any. |
| 424 | if (!MFI.getSavePoints().empty()) { |
| 425 | assert(MFI.getSavePoints().size() == 1 && |
| 426 | "Multiple save points are not yet supported!" ); |
| 427 | const auto &SavePoint = *MFI.getSavePoints().begin(); |
| 428 | SaveBlocks.push_back(Elt: SavePoint.first); |
| 429 | assert(MFI.getRestorePoints().size() == 1 && |
| 430 | "Multiple restore points are not yet supported!" ); |
| 431 | const auto &RestorePoint = *MFI.getRestorePoints().begin(); |
| 432 | MachineBasicBlock *RestoreBlock = RestorePoint.first; |
| 433 | // If RestoreBlock does not have any successor and is not a return block |
| 434 | // then the end point is unreachable and we do not need to insert any |
| 435 | // epilogue. |
| 436 | if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) |
| 437 | RestoreBlocks.push_back(Elt: RestoreBlock); |
| 438 | return; |
| 439 | } |
| 440 | |
| 441 | // Save refs to entry and return blocks. |
| 442 | SaveBlocks.push_back(Elt: &MF.front()); |
| 443 | for (MachineBasicBlock &MBB : MF) { |
| 444 | if (MBB.isEHFuncletEntry()) |
| 445 | SaveBlocks.push_back(Elt: &MBB); |
| 446 | if (MBB.isReturnBlock()) |
| 447 | RestoreBlocks.push_back(Elt: &MBB); |
| 448 | } |
| 449 | } |
| 450 | |
| 451 | static void assignCalleeSavedSpillSlots(MachineFunction &F, |
| 452 | const BitVector &SavedRegs) { |
| 453 | if (SavedRegs.empty()) |
| 454 | return; |
| 455 | |
| 456 | const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); |
| 457 | const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs(); |
| 458 | BitVector CSMask(SavedRegs.size()); |
| 459 | |
| 460 | for (unsigned i = 0; CSRegs[i]; ++i) |
| 461 | CSMask.set(CSRegs[i]); |
| 462 | |
| 463 | std::vector<CalleeSavedInfo> CSI; |
| 464 | for (unsigned i = 0; CSRegs[i]; ++i) { |
| 465 | unsigned Reg = CSRegs[i]; |
| 466 | if (SavedRegs.test(Idx: Reg)) { |
| 467 | bool SavedSuper = false; |
| 468 | for (const MCPhysReg &SuperReg : RegInfo->superregs(Reg)) { |
| 469 | // Some backends set all aliases for some registers as saved, such as |
| 470 | // Mips's $fp, so they appear in SavedRegs but not CSRegs. |
| 471 | if (SavedRegs.test(Idx: SuperReg) && CSMask.test(Idx: SuperReg)) { |
| 472 | SavedSuper = true; |
| 473 | break; |
| 474 | } |
| 475 | } |
| 476 | |
| 477 | if (!SavedSuper) |
| 478 | CSI.push_back(x: CalleeSavedInfo(Reg)); |
| 479 | } |
| 480 | } |
| 481 | |
| 482 | const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); |
| 483 | MachineFrameInfo &MFI = F.getFrameInfo(); |
| 484 | if (!TFI->assignCalleeSavedSpillSlots(MF&: F, TRI: RegInfo, CSI)) { |
| 485 | // If target doesn't implement this, use generic code. |
| 486 | |
| 487 | if (CSI.empty()) |
| 488 | return; // Early exit if no callee saved registers are modified! |
| 489 | |
| 490 | unsigned NumFixedSpillSlots; |
| 491 | const TargetFrameLowering::SpillSlot *FixedSpillSlots = |
| 492 | TFI->getCalleeSavedSpillSlots(NumEntries&: NumFixedSpillSlots); |
| 493 | |
| 494 | // Now that we know which registers need to be saved and restored, allocate |
| 495 | // stack slots for them. |
| 496 | for (auto &CS : CSI) { |
| 497 | // If the target has spilled this register to another register or already |
| 498 | // handled it , we don't need to allocate a stack slot. |
| 499 | if (CS.isSpilledToReg()) |
| 500 | continue; |
| 501 | |
| 502 | MCRegister Reg = CS.getReg(); |
| 503 | const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); |
| 504 | |
| 505 | int FrameIdx; |
| 506 | if (RegInfo->hasReservedSpillSlot(MF: F, Reg, FrameIdx)) { |
| 507 | CS.setFrameIdx(FrameIdx); |
| 508 | continue; |
| 509 | } |
| 510 | |
| 511 | // Check to see if this physreg must be spilled to a particular stack slot |
| 512 | // on this target. |
| 513 | const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; |
| 514 | while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && |
| 515 | FixedSlot->Reg != Reg) |
| 516 | ++FixedSlot; |
| 517 | |
| 518 | unsigned Size = RegInfo->getSpillSize(RC: *RC); |
| 519 | if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { |
| 520 | // Nope, just spill it anywhere convenient. |
| 521 | Align Alignment = RegInfo->getSpillAlign(RC: *RC); |
| 522 | // We may not be able to satisfy the desired alignment specification of |
| 523 | // the TargetRegisterClass if the stack alignment is smaller. Use the |
| 524 | // min. |
| 525 | Alignment = std::min(a: Alignment, b: TFI->getStackAlign()); |
| 526 | FrameIdx = MFI.CreateStackObject(Size, Alignment, isSpillSlot: true); |
| 527 | MFI.setIsCalleeSavedObjectIndex(ObjectIdx: FrameIdx, IsCalleeSaved: true); |
| 528 | } else { |
| 529 | // Spill it to the stack where we must. |
| 530 | FrameIdx = MFI.CreateFixedSpillStackObject(Size, SPOffset: FixedSlot->Offset); |
| 531 | } |
| 532 | |
| 533 | CS.setFrameIdx(FrameIdx); |
| 534 | } |
| 535 | } |
| 536 | |
| 537 | MFI.setCalleeSavedInfo(CSI); |
| 538 | } |
| 539 | |
| 540 | /// Helper function to update the liveness information for the callee-saved |
| 541 | /// registers. |
| 542 | static void updateLiveness(MachineFunction &MF) { |
| 543 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 544 | // Visited will contain all the basic blocks that are in the region |
| 545 | // where the callee saved registers are alive: |
| 546 | // - Anything that is not Save or Restore -> LiveThrough. |
| 547 | // - Save -> LiveIn. |
| 548 | // - Restore -> LiveOut. |
| 549 | // The live-out is not attached to the block, so no need to keep |
| 550 | // Restore in this set. |
| 551 | SmallPtrSet<MachineBasicBlock *, 8> Visited; |
| 552 | SmallVector<MachineBasicBlock *, 8> WorkList; |
| 553 | MachineBasicBlock *Entry = &MF.front(); |
| 554 | |
| 555 | assert(MFI.getSavePoints().size() < 2 && |
| 556 | "Multiple save points not yet supported!" ); |
| 557 | MachineBasicBlock *Save = MFI.getSavePoints().empty() |
| 558 | ? nullptr |
| 559 | : (*MFI.getSavePoints().begin()).first; |
| 560 | |
| 561 | if (!Save) |
| 562 | Save = Entry; |
| 563 | |
| 564 | if (Entry != Save) { |
| 565 | WorkList.push_back(Elt: Entry); |
| 566 | Visited.insert(Ptr: Entry); |
| 567 | } |
| 568 | Visited.insert(Ptr: Save); |
| 569 | |
| 570 | assert(MFI.getRestorePoints().size() < 2 && |
| 571 | "Multiple restore points not yet supported!" ); |
| 572 | MachineBasicBlock *Restore = MFI.getRestorePoints().empty() |
| 573 | ? nullptr |
| 574 | : (*MFI.getRestorePoints().begin()).first; |
| 575 | if (Restore) |
| 576 | // By construction Restore cannot be visited, otherwise it |
| 577 | // means there exists a path to Restore that does not go |
| 578 | // through Save. |
| 579 | WorkList.push_back(Elt: Restore); |
| 580 | |
| 581 | while (!WorkList.empty()) { |
| 582 | const MachineBasicBlock *CurBB = WorkList.pop_back_val(); |
| 583 | // By construction, the region that is after the save point is |
| 584 | // dominated by the Save and post-dominated by the Restore. |
| 585 | if (CurBB == Save && Save != Restore) |
| 586 | continue; |
| 587 | // Enqueue all the successors not already visited. |
| 588 | // Those are by construction either before Save or after Restore. |
| 589 | for (MachineBasicBlock *SuccBB : CurBB->successors()) |
| 590 | if (Visited.insert(Ptr: SuccBB).second) |
| 591 | WorkList.push_back(Elt: SuccBB); |
| 592 | } |
| 593 | |
| 594 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
| 595 | |
| 596 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 597 | for (const CalleeSavedInfo &I : CSI) { |
| 598 | for (MachineBasicBlock *MBB : Visited) { |
| 599 | MCRegister Reg = I.getReg(); |
| 600 | // Add the callee-saved register as live-in. |
| 601 | // It's killed at the spill. |
| 602 | if (!MRI.isReserved(PhysReg: Reg) && !MBB->isLiveIn(Reg)) |
| 603 | MBB->addLiveIn(PhysReg: Reg); |
| 604 | } |
| 605 | // If callee-saved register is spilled to another register rather than |
| 606 | // spilling to stack, the destination register has to be marked as live for |
| 607 | // each MBB between the prologue and epilogue so that it is not clobbered |
| 608 | // before it is reloaded in the epilogue. The Visited set contains all |
| 609 | // blocks outside of the region delimited by prologue/epilogue. |
| 610 | if (I.isSpilledToReg()) { |
| 611 | for (MachineBasicBlock &MBB : MF) { |
| 612 | if (Visited.count(Ptr: &MBB)) |
| 613 | continue; |
| 614 | MCRegister DstReg = I.getDstReg(); |
| 615 | if (!MBB.isLiveIn(Reg: DstReg)) |
| 616 | MBB.addLiveIn(PhysReg: DstReg); |
| 617 | } |
| 618 | } |
| 619 | } |
| 620 | } |
| 621 | |
| 622 | /// Insert spill code for the callee-saved registers used in the function. |
| 623 | static void insertCSRSaves(MachineBasicBlock &SaveBlock, |
| 624 | ArrayRef<CalleeSavedInfo> CSI) { |
| 625 | MachineFunction &MF = *SaveBlock.getParent(); |
| 626 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
| 627 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
| 628 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
| 629 | |
| 630 | MachineBasicBlock::iterator I = SaveBlock.begin(); |
| 631 | if (!TFI->spillCalleeSavedRegisters(MBB&: SaveBlock, MI: I, CSI, TRI)) { |
| 632 | for (const CalleeSavedInfo &CS : CSI) { |
| 633 | TFI->spillCalleeSavedRegister(SaveBlock, MI: I, CS, TII, TRI); |
| 634 | } |
| 635 | } |
| 636 | } |
| 637 | |
| 638 | /// Insert restore code for the callee-saved registers used in the function. |
| 639 | static void insertCSRRestores(MachineBasicBlock &RestoreBlock, |
| 640 | std::vector<CalleeSavedInfo> &CSI) { |
| 641 | MachineFunction &MF = *RestoreBlock.getParent(); |
| 642 | const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); |
| 643 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
| 644 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
| 645 | |
| 646 | // Restore all registers immediately before the return and any |
| 647 | // terminators that precede it. |
| 648 | MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); |
| 649 | |
| 650 | if (!TFI->restoreCalleeSavedRegisters(MBB&: RestoreBlock, MI: I, CSI, TRI)) { |
| 651 | for (const CalleeSavedInfo &CI : reverse(C&: CSI)) { |
| 652 | TFI->restoreCalleeSavedRegister(MBB&: RestoreBlock, MI: I, CS: CI, TII, TRI); |
| 653 | } |
| 654 | } |
| 655 | } |
| 656 | |
| 657 | void PEIImpl::spillCalleeSavedRegs(MachineFunction &MF) { |
| 658 | // We can't list this requirement in getRequiredProperties because some |
| 659 | // targets (WebAssembly) use virtual registers past this point, and the pass |
| 660 | // pipeline is set up without giving the passes a chance to look at the |
| 661 | // TargetMachine. |
| 662 | // FIXME: Find a way to express this in getRequiredProperties. |
| 663 | assert(MF.getProperties().hasNoVRegs()); |
| 664 | |
| 665 | const Function &F = MF.getFunction(); |
| 666 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
| 667 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 668 | |
| 669 | // Determine which of the registers in the callee save list should be saved. |
| 670 | BitVector SavedRegs; |
| 671 | TFI->determineCalleeSaves(MF, SavedRegs, RS); |
| 672 | |
| 673 | // Assign stack slots for any callee-saved registers that must be spilled. |
| 674 | assignCalleeSavedSpillSlots(F&: MF, SavedRegs); |
| 675 | |
| 676 | // Add the code to save and restore the callee saved registers. |
| 677 | if (!F.hasFnAttribute(Kind: Attribute::Naked)) { |
| 678 | MFI.setCalleeSavedInfoValid(true); |
| 679 | |
| 680 | std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); |
| 681 | |
| 682 | // Fill SavePoints and RestorePoints with CalleeSavedRegisters |
| 683 | if (!MFI.getSavePoints().empty()) { |
| 684 | SaveRestorePoints SaveRestorePts; |
| 685 | for (const auto &SavePoint : MFI.getSavePoints()) |
| 686 | SaveRestorePts.insert(KV: {SavePoint.first, CSI}); |
| 687 | MFI.setSavePoints(std::move(SaveRestorePts)); |
| 688 | |
| 689 | SaveRestorePts.clear(); |
| 690 | for (const auto &RestorePoint : MFI.getRestorePoints()) |
| 691 | SaveRestorePts.insert(KV: {RestorePoint.first, CSI}); |
| 692 | MFI.setRestorePoints(std::move(SaveRestorePts)); |
| 693 | } |
| 694 | |
| 695 | if (!CSI.empty()) { |
| 696 | if (!MFI.hasCalls()) |
| 697 | NumLeafFuncWithSpills++; |
| 698 | |
| 699 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
| 700 | insertCSRSaves(SaveBlock&: *SaveBlock, CSI); |
| 701 | |
| 702 | // Update the live-in information of all the blocks up to the save point. |
| 703 | updateLiveness(MF); |
| 704 | |
| 705 | for (MachineBasicBlock *RestoreBlock : RestoreBlocks) |
| 706 | insertCSRRestores(RestoreBlock&: *RestoreBlock, CSI); |
| 707 | } |
| 708 | } |
| 709 | } |
| 710 | |
| 711 | /// AdjustStackOffset - Helper function used to adjust the stack frame offset. |
| 712 | static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, |
| 713 | bool StackGrowsDown, int64_t &Offset, |
| 714 | Align &MaxAlign) { |
| 715 | // If the stack grows down, add the object size to find the lowest address. |
| 716 | if (StackGrowsDown) |
| 717 | Offset += MFI.getObjectSize(ObjectIdx: FrameIdx); |
| 718 | |
| 719 | Align Alignment = MFI.getObjectAlign(ObjectIdx: FrameIdx); |
| 720 | |
| 721 | // If the alignment of this object is greater than that of the stack, then |
| 722 | // increase the stack alignment to match. |
| 723 | MaxAlign = std::max(a: MaxAlign, b: Alignment); |
| 724 | |
| 725 | // Adjust to alignment boundary. |
| 726 | Offset = alignTo(Size: Offset, A: Alignment); |
| 727 | |
| 728 | if (StackGrowsDown) { |
| 729 | LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset |
| 730 | << "]\n" ); |
| 731 | MFI.setObjectOffset(ObjectIdx: FrameIdx, SPOffset: -Offset); // Set the computed offset |
| 732 | } else { |
| 733 | LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset |
| 734 | << "]\n" ); |
| 735 | MFI.setObjectOffset(ObjectIdx: FrameIdx, SPOffset: Offset); |
| 736 | Offset += MFI.getObjectSize(ObjectIdx: FrameIdx); |
| 737 | } |
| 738 | } |
| 739 | |
| 740 | /// Compute which bytes of fixed and callee-save stack area are unused and keep |
| 741 | /// track of them in StackBytesFree. |
| 742 | static inline void computeFreeStackSlots(MachineFrameInfo &MFI, |
| 743 | bool StackGrowsDown, |
| 744 | int64_t FixedCSEnd, |
| 745 | BitVector &StackBytesFree) { |
| 746 | // Avoid undefined int64_t -> int conversion below in extreme case. |
| 747 | if (FixedCSEnd > std::numeric_limits<int>::max()) |
| 748 | return; |
| 749 | |
| 750 | StackBytesFree.resize(N: FixedCSEnd, t: true); |
| 751 | |
| 752 | SmallVector<int, 16> AllocatedFrameSlots; |
| 753 | // Add fixed objects. |
| 754 | for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) |
| 755 | // StackSlot scavenging is only implemented for the default stack. |
| 756 | if (MFI.getStackID(ObjectIdx: i) == TargetStackID::Default) |
| 757 | AllocatedFrameSlots.push_back(Elt: i); |
| 758 | // Add callee-save objects if there are any. |
| 759 | for (int i = MFI.getObjectIndexBegin(); i < MFI.getObjectIndexEnd(); i++) |
| 760 | if (MFI.isCalleeSavedObjectIndex(ObjectIdx: i) && |
| 761 | MFI.getStackID(ObjectIdx: i) == TargetStackID::Default) |
| 762 | AllocatedFrameSlots.push_back(Elt: i); |
| 763 | |
| 764 | for (int i : AllocatedFrameSlots) { |
| 765 | // These are converted from int64_t, but they should always fit in int |
| 766 | // because of the FixedCSEnd check above. |
| 767 | int ObjOffset = MFI.getObjectOffset(ObjectIdx: i); |
| 768 | int ObjSize = MFI.getObjectSize(ObjectIdx: i); |
| 769 | int ObjStart, ObjEnd; |
| 770 | if (StackGrowsDown) { |
| 771 | // ObjOffset is negative when StackGrowsDown is true. |
| 772 | ObjStart = -ObjOffset - ObjSize; |
| 773 | ObjEnd = -ObjOffset; |
| 774 | } else { |
| 775 | ObjStart = ObjOffset; |
| 776 | ObjEnd = ObjOffset + ObjSize; |
| 777 | } |
| 778 | // Ignore fixed holes that are in the previous stack frame. |
| 779 | if (ObjEnd > 0) |
| 780 | StackBytesFree.reset(I: ObjStart, E: ObjEnd); |
| 781 | } |
| 782 | } |
| 783 | |
| 784 | /// Assign frame object to an unused portion of the stack in the fixed stack |
| 785 | /// object range. Return true if the allocation was successful. |
| 786 | static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx, |
| 787 | bool StackGrowsDown, Align MaxAlign, |
| 788 | BitVector &StackBytesFree) { |
| 789 | if (MFI.isVariableSizedObjectIndex(ObjectIdx: FrameIdx)) |
| 790 | return false; |
| 791 | |
| 792 | if (StackBytesFree.none()) { |
| 793 | // clear it to speed up later scavengeStackSlot calls to |
| 794 | // StackBytesFree.none() |
| 795 | StackBytesFree.clear(); |
| 796 | return false; |
| 797 | } |
| 798 | |
| 799 | Align ObjAlign = MFI.getObjectAlign(ObjectIdx: FrameIdx); |
| 800 | if (ObjAlign > MaxAlign) |
| 801 | return false; |
| 802 | |
| 803 | int64_t ObjSize = MFI.getObjectSize(ObjectIdx: FrameIdx); |
| 804 | int FreeStart; |
| 805 | for (FreeStart = StackBytesFree.find_first(); FreeStart != -1; |
| 806 | FreeStart = StackBytesFree.find_next(Prev: FreeStart)) { |
| 807 | |
| 808 | // Check that free space has suitable alignment. |
| 809 | unsigned ObjStart = StackGrowsDown ? FreeStart + ObjSize : FreeStart; |
| 810 | if (alignTo(Size: ObjStart, A: ObjAlign) != ObjStart) |
| 811 | continue; |
| 812 | |
| 813 | if (FreeStart + ObjSize > StackBytesFree.size()) |
| 814 | return false; |
| 815 | |
| 816 | bool AllBytesFree = true; |
| 817 | for (unsigned Byte = 0; Byte < ObjSize; ++Byte) |
| 818 | if (!StackBytesFree.test(Idx: FreeStart + Byte)) { |
| 819 | AllBytesFree = false; |
| 820 | break; |
| 821 | } |
| 822 | if (AllBytesFree) |
| 823 | break; |
| 824 | } |
| 825 | |
| 826 | if (FreeStart == -1) |
| 827 | return false; |
| 828 | |
| 829 | if (StackGrowsDown) { |
| 830 | int ObjStart = -(FreeStart + ObjSize); |
| 831 | LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" |
| 832 | << ObjStart << "]\n" ); |
| 833 | MFI.setObjectOffset(ObjectIdx: FrameIdx, SPOffset: ObjStart); |
| 834 | } else { |
| 835 | LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" |
| 836 | << FreeStart << "]\n" ); |
| 837 | MFI.setObjectOffset(ObjectIdx: FrameIdx, SPOffset: FreeStart); |
| 838 | } |
| 839 | |
| 840 | StackBytesFree.reset(I: FreeStart, E: FreeStart + ObjSize); |
| 841 | return true; |
| 842 | } |
| 843 | |
| 844 | /// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., |
| 845 | /// those required to be close to the Stack Protector) to stack offsets. |
| 846 | static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, |
| 847 | SmallSet<int, 16> &ProtectedObjs, |
| 848 | MachineFrameInfo &MFI, bool StackGrowsDown, |
| 849 | int64_t &Offset, Align &MaxAlign) { |
| 850 | |
| 851 | for (int i : UnassignedObjs) { |
| 852 | AdjustStackOffset(MFI, FrameIdx: i, StackGrowsDown, Offset, MaxAlign); |
| 853 | ProtectedObjs.insert(V: i); |
| 854 | } |
| 855 | } |
| 856 | |
| 857 | /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the |
| 858 | /// abstract stack objects. |
| 859 | void PEIImpl::calculateFrameObjectOffsets(MachineFunction &MF) { |
| 860 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
| 861 | |
| 862 | bool StackGrowsDown = |
| 863 | TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; |
| 864 | |
| 865 | // Loop over all of the stack objects, assigning sequential addresses... |
| 866 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 867 | |
| 868 | // Start at the beginning of the local area. |
| 869 | // The Offset is the distance from the stack top in the direction |
| 870 | // of stack growth -- so it's always nonnegative. |
| 871 | int LocalAreaOffset = TFI.getOffsetOfLocalArea(); |
| 872 | if (StackGrowsDown) |
| 873 | LocalAreaOffset = -LocalAreaOffset; |
| 874 | assert(LocalAreaOffset >= 0 |
| 875 | && "Local area offset should be in direction of stack growth" ); |
| 876 | int64_t Offset = LocalAreaOffset; |
| 877 | |
| 878 | #ifdef EXPENSIVE_CHECKS |
| 879 | for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) |
| 880 | if (!MFI.isDeadObjectIndex(i) && |
| 881 | MFI.getStackID(i) == TargetStackID::Default) |
| 882 | assert(MFI.getObjectAlign(i) <= MFI.getMaxAlign() && |
| 883 | "MaxAlignment is invalid" ); |
| 884 | #endif |
| 885 | |
| 886 | // If there are fixed sized objects that are preallocated in the local area, |
| 887 | // non-fixed objects can't be allocated right at the start of local area. |
| 888 | // Adjust 'Offset' to point to the end of last fixed sized preallocated |
| 889 | // object. |
| 890 | for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) { |
| 891 | // Only allocate objects on the default stack. |
| 892 | if (MFI.getStackID(ObjectIdx: i) != TargetStackID::Default) |
| 893 | continue; |
| 894 | |
| 895 | int64_t FixedOff; |
| 896 | if (StackGrowsDown) { |
| 897 | // The maximum distance from the stack pointer is at lower address of |
| 898 | // the object -- which is given by offset. For down growing stack |
| 899 | // the offset is negative, so we negate the offset to get the distance. |
| 900 | FixedOff = -MFI.getObjectOffset(ObjectIdx: i); |
| 901 | } else { |
| 902 | // The maximum distance from the start pointer is at the upper |
| 903 | // address of the object. |
| 904 | FixedOff = MFI.getObjectOffset(ObjectIdx: i) + MFI.getObjectSize(ObjectIdx: i); |
| 905 | } |
| 906 | if (FixedOff > Offset) Offset = FixedOff; |
| 907 | } |
| 908 | |
| 909 | Align MaxAlign = MFI.getMaxAlign(); |
| 910 | // First assign frame offsets to stack objects that are used to spill |
| 911 | // callee saved registers. |
| 912 | auto AllFIs = seq(Begin: MFI.getObjectIndexBegin(), End: MFI.getObjectIndexEnd()); |
| 913 | for (int FI : reverse_conditionally(C&: AllFIs, /*Reverse=*/ShouldReverse: !StackGrowsDown)) { |
| 914 | // Only allocate objects on the default stack. |
| 915 | if (!MFI.isCalleeSavedObjectIndex(ObjectIdx: FI) || |
| 916 | MFI.getStackID(ObjectIdx: FI) != TargetStackID::Default) |
| 917 | continue; |
| 918 | |
| 919 | // TODO: should this just be if (MFI.isDeadObjectIndex(FI)) |
| 920 | if (!StackGrowsDown && MFI.isDeadObjectIndex(ObjectIdx: FI)) |
| 921 | continue; |
| 922 | |
| 923 | AdjustStackOffset(MFI, FrameIdx: FI, StackGrowsDown, Offset, MaxAlign); |
| 924 | } |
| 925 | |
| 926 | assert(MaxAlign == MFI.getMaxAlign() && |
| 927 | "MFI.getMaxAlign should already account for all callee-saved " |
| 928 | "registers without a fixed stack slot" ); |
| 929 | |
| 930 | // FixedCSEnd is the stack offset to the end of the fixed and callee-save |
| 931 | // stack area. |
| 932 | int64_t FixedCSEnd = Offset; |
| 933 | |
| 934 | // Make sure the special register scavenging spill slot is closest to the |
| 935 | // incoming stack pointer if a frame pointer is required and is closer |
| 936 | // to the incoming rather than the final stack pointer. |
| 937 | const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); |
| 938 | bool EarlyScavengingSlots = TFI.allocateScavengingFrameIndexesNearIncomingSP(MF); |
| 939 | if (RS && EarlyScavengingSlots) { |
| 940 | SmallVector<int, 2> SFIs; |
| 941 | RS->getScavengingFrameIndices(A&: SFIs); |
| 942 | for (int SFI : SFIs) |
| 943 | AdjustStackOffset(MFI, FrameIdx: SFI, StackGrowsDown, Offset, MaxAlign); |
| 944 | } |
| 945 | |
| 946 | // FIXME: Once this is working, then enable flag will change to a target |
| 947 | // check for whether the frame is large enough to want to use virtual |
| 948 | // frame index registers. Functions which don't want/need this optimization |
| 949 | // will continue to use the existing code path. |
| 950 | if (MFI.getUseLocalStackAllocationBlock()) { |
| 951 | Align Alignment = MFI.getLocalFrameMaxAlign(); |
| 952 | |
| 953 | // Adjust to alignment boundary. |
| 954 | Offset = alignTo(Size: Offset, A: Alignment); |
| 955 | |
| 956 | LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n" ); |
| 957 | |
| 958 | // Resolve offsets for objects in the local block. |
| 959 | for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) { |
| 960 | std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i); |
| 961 | int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; |
| 962 | LLVM_DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset |
| 963 | << "]\n" ); |
| 964 | MFI.setObjectOffset(ObjectIdx: Entry.first, SPOffset: FIOffset); |
| 965 | } |
| 966 | // Allocate the local block |
| 967 | Offset += MFI.getLocalFrameSize(); |
| 968 | |
| 969 | MaxAlign = std::max(a: Alignment, b: MaxAlign); |
| 970 | } |
| 971 | |
| 972 | // Retrieve the Exception Handler registration node. |
| 973 | int EHRegNodeFrameIndex = std::numeric_limits<int>::max(); |
| 974 | if (const WinEHFuncInfo *FuncInfo = MF.getWinEHFuncInfo()) |
| 975 | EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex; |
| 976 | |
| 977 | // Make sure that the stack protector comes before the local variables on the |
| 978 | // stack. |
| 979 | SmallSet<int, 16> ProtectedObjs; |
| 980 | if (MFI.hasStackProtectorIndex()) { |
| 981 | int StackProtectorFI = MFI.getStackProtectorIndex(); |
| 982 | StackObjSet LargeArrayObjs; |
| 983 | StackObjSet SmallArrayObjs; |
| 984 | StackObjSet AddrOfObjs; |
| 985 | |
| 986 | // If we need a stack protector, we need to make sure that |
| 987 | // LocalStackSlotPass didn't already allocate a slot for it. |
| 988 | // If we are told to use the LocalStackAllocationBlock, the stack protector |
| 989 | // is expected to be already pre-allocated. |
| 990 | if (MFI.getStackID(ObjectIdx: StackProtectorFI) != TargetStackID::Default) { |
| 991 | // If the stack protector isn't on the default stack then it's up to the |
| 992 | // target to set the stack offset. |
| 993 | assert(MFI.getObjectOffset(StackProtectorFI) != 0 && |
| 994 | "Offset of stack protector on non-default stack expected to be " |
| 995 | "already set." ); |
| 996 | assert(!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()) && |
| 997 | "Stack protector on non-default stack expected to not be " |
| 998 | "pre-allocated by LocalStackSlotPass." ); |
| 999 | } else if (!MFI.getUseLocalStackAllocationBlock()) { |
| 1000 | AdjustStackOffset(MFI, FrameIdx: StackProtectorFI, StackGrowsDown, Offset, |
| 1001 | MaxAlign); |
| 1002 | } else if (!MFI.isObjectPreAllocated(ObjectIdx: MFI.getStackProtectorIndex())) { |
| 1003 | llvm_unreachable( |
| 1004 | "Stack protector not pre-allocated by LocalStackSlotPass." ); |
| 1005 | } |
| 1006 | |
| 1007 | // Assign large stack objects first. |
| 1008 | for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { |
| 1009 | if (MFI.isObjectPreAllocated(ObjectIdx: i) && MFI.getUseLocalStackAllocationBlock()) |
| 1010 | continue; |
| 1011 | if (MFI.isCalleeSavedObjectIndex(ObjectIdx: i)) |
| 1012 | continue; |
| 1013 | if (RS && RS->isScavengingFrameIndex(FI: (int)i)) |
| 1014 | continue; |
| 1015 | if (MFI.isDeadObjectIndex(ObjectIdx: i)) |
| 1016 | continue; |
| 1017 | if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i) |
| 1018 | continue; |
| 1019 | // Only allocate objects on the default stack. |
| 1020 | if (MFI.getStackID(ObjectIdx: i) != TargetStackID::Default) |
| 1021 | continue; |
| 1022 | |
| 1023 | switch (MFI.getObjectSSPLayout(ObjectIdx: i)) { |
| 1024 | case MachineFrameInfo::SSPLK_None: |
| 1025 | continue; |
| 1026 | case MachineFrameInfo::SSPLK_SmallArray: |
| 1027 | SmallArrayObjs.insert(X: i); |
| 1028 | continue; |
| 1029 | case MachineFrameInfo::SSPLK_AddrOf: |
| 1030 | AddrOfObjs.insert(X: i); |
| 1031 | continue; |
| 1032 | case MachineFrameInfo::SSPLK_LargeArray: |
| 1033 | LargeArrayObjs.insert(X: i); |
| 1034 | continue; |
| 1035 | } |
| 1036 | llvm_unreachable("Unexpected SSPLayoutKind." ); |
| 1037 | } |
| 1038 | |
| 1039 | // We expect **all** the protected stack objects to be pre-allocated by |
| 1040 | // LocalStackSlotPass. If it turns out that PEI still has to allocate some |
| 1041 | // of them, we may end up messing up the expected order of the objects. |
| 1042 | if (MFI.getUseLocalStackAllocationBlock() && |
| 1043 | !(LargeArrayObjs.empty() && SmallArrayObjs.empty() && |
| 1044 | AddrOfObjs.empty())) |
| 1045 | llvm_unreachable("Found protected stack objects not pre-allocated by " |
| 1046 | "LocalStackSlotPass." ); |
| 1047 | |
| 1048 | AssignProtectedObjSet(UnassignedObjs: LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, |
| 1049 | Offset, MaxAlign); |
| 1050 | AssignProtectedObjSet(UnassignedObjs: SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, |
| 1051 | Offset, MaxAlign); |
| 1052 | AssignProtectedObjSet(UnassignedObjs: AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, |
| 1053 | Offset, MaxAlign); |
| 1054 | } |
| 1055 | |
| 1056 | SmallVector<int, 8> ObjectsToAllocate; |
| 1057 | |
| 1058 | // Then prepare to assign frame offsets to stack objects that are not used to |
| 1059 | // spill callee saved registers. |
| 1060 | for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { |
| 1061 | if (MFI.isObjectPreAllocated(ObjectIdx: i) && MFI.getUseLocalStackAllocationBlock()) |
| 1062 | continue; |
| 1063 | if (MFI.isCalleeSavedObjectIndex(ObjectIdx: i)) |
| 1064 | continue; |
| 1065 | if (RS && RS->isScavengingFrameIndex(FI: (int)i)) |
| 1066 | continue; |
| 1067 | if (MFI.isDeadObjectIndex(ObjectIdx: i)) |
| 1068 | continue; |
| 1069 | if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i) |
| 1070 | continue; |
| 1071 | if (ProtectedObjs.count(V: i)) |
| 1072 | continue; |
| 1073 | // Only allocate objects on the default stack. |
| 1074 | if (MFI.getStackID(ObjectIdx: i) != TargetStackID::Default) |
| 1075 | continue; |
| 1076 | |
| 1077 | // Add the objects that we need to allocate to our working set. |
| 1078 | ObjectsToAllocate.push_back(Elt: i); |
| 1079 | } |
| 1080 | |
| 1081 | // Allocate the EH registration node first if one is present. |
| 1082 | if (EHRegNodeFrameIndex != std::numeric_limits<int>::max()) |
| 1083 | AdjustStackOffset(MFI, FrameIdx: EHRegNodeFrameIndex, StackGrowsDown, Offset, |
| 1084 | MaxAlign); |
| 1085 | |
| 1086 | // Give the targets a chance to order the objects the way they like it. |
| 1087 | if (MF.getTarget().getOptLevel() != CodeGenOptLevel::None && |
| 1088 | MF.getTarget().Options.StackSymbolOrdering) |
| 1089 | TFI.orderFrameObjects(MF, objectsToAllocate&: ObjectsToAllocate); |
| 1090 | |
| 1091 | // Keep track of which bytes in the fixed and callee-save range are used so we |
| 1092 | // can use the holes when allocating later stack objects. Only do this if |
| 1093 | // stack protector isn't being used and the target requests it and we're |
| 1094 | // optimizing. |
| 1095 | BitVector StackBytesFree; |
| 1096 | if (!ObjectsToAllocate.empty() && |
| 1097 | MF.getTarget().getOptLevel() != CodeGenOptLevel::None && |
| 1098 | MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(MF)) |
| 1099 | computeFreeStackSlots(MFI, StackGrowsDown, FixedCSEnd, StackBytesFree); |
| 1100 | |
| 1101 | // Now walk the objects and actually assign base offsets to them. |
| 1102 | for (auto &Object : ObjectsToAllocate) |
| 1103 | if (!scavengeStackSlot(MFI, FrameIdx: Object, StackGrowsDown, MaxAlign, |
| 1104 | StackBytesFree)) |
| 1105 | AdjustStackOffset(MFI, FrameIdx: Object, StackGrowsDown, Offset, MaxAlign); |
| 1106 | |
| 1107 | // Make sure the special register scavenging spill slot is closest to the |
| 1108 | // stack pointer. |
| 1109 | if (RS && !EarlyScavengingSlots) { |
| 1110 | SmallVector<int, 2> SFIs; |
| 1111 | RS->getScavengingFrameIndices(A&: SFIs); |
| 1112 | for (int SFI : SFIs) |
| 1113 | AdjustStackOffset(MFI, FrameIdx: SFI, StackGrowsDown, Offset, MaxAlign); |
| 1114 | } |
| 1115 | |
| 1116 | if (!TFI.targetHandlesStackFrameRounding()) { |
| 1117 | // If we have reserved argument space for call sites in the function |
| 1118 | // immediately on entry to the current function, count it as part of the |
| 1119 | // overall stack size. |
| 1120 | if (MFI.adjustsStack() && TFI.hasReservedCallFrame(MF)) |
| 1121 | Offset += MFI.getMaxCallFrameSize(); |
| 1122 | |
| 1123 | // Round up the size to a multiple of the alignment. If the function has |
| 1124 | // any calls or alloca's, align to the target's StackAlignment value to |
| 1125 | // ensure that the callee's frame or the alloca data is suitably aligned; |
| 1126 | // otherwise, for leaf functions, align to the TransientStackAlignment |
| 1127 | // value. |
| 1128 | Align StackAlign; |
| 1129 | if (MFI.adjustsStack() || MFI.hasVarSizedObjects() || |
| 1130 | (RegInfo->hasStackRealignment(MF) && MFI.getObjectIndexEnd() != 0)) |
| 1131 | StackAlign = TFI.getStackAlign(); |
| 1132 | else |
| 1133 | StackAlign = TFI.getTransientStackAlign(); |
| 1134 | |
| 1135 | // If the frame pointer is eliminated, all frame offsets will be relative to |
| 1136 | // SP not FP. Align to MaxAlign so this works. |
| 1137 | StackAlign = std::max(a: StackAlign, b: MaxAlign); |
| 1138 | int64_t OffsetBeforeAlignment = Offset; |
| 1139 | Offset = alignTo(Size: Offset, A: StackAlign); |
| 1140 | |
| 1141 | // If we have increased the offset to fulfill the alignment constrants, |
| 1142 | // then the scavenging spill slots may become harder to reach from the |
| 1143 | // stack pointer, float them so they stay close. |
| 1144 | if (StackGrowsDown && OffsetBeforeAlignment != Offset && RS && |
| 1145 | !EarlyScavengingSlots) { |
| 1146 | SmallVector<int, 2> SFIs; |
| 1147 | RS->getScavengingFrameIndices(A&: SFIs); |
| 1148 | LLVM_DEBUG(if (!SFIs.empty()) llvm::dbgs() |
| 1149 | << "Adjusting emergency spill slots!\n" ;); |
| 1150 | int64_t Delta = Offset - OffsetBeforeAlignment; |
| 1151 | for (int SFI : SFIs) { |
| 1152 | LLVM_DEBUG(llvm::dbgs() |
| 1153 | << "Adjusting offset of emergency spill slot #" << SFI |
| 1154 | << " from " << MFI.getObjectOffset(SFI);); |
| 1155 | MFI.setObjectOffset(ObjectIdx: SFI, SPOffset: MFI.getObjectOffset(ObjectIdx: SFI) - Delta); |
| 1156 | LLVM_DEBUG(llvm::dbgs() << " to " << MFI.getObjectOffset(SFI) << "\n" ;); |
| 1157 | } |
| 1158 | } |
| 1159 | } |
| 1160 | |
| 1161 | // Update frame info to pretend that this is part of the stack... |
| 1162 | int64_t StackSize = Offset - LocalAreaOffset; |
| 1163 | MFI.setStackSize(StackSize); |
| 1164 | NumBytesStackSpace += StackSize; |
| 1165 | } |
| 1166 | |
| 1167 | /// insertPrologEpilogCode - Scan the function for modified callee saved |
| 1168 | /// registers, insert spill code for these callee saved registers, then add |
| 1169 | /// prolog and epilog code to the function. |
| 1170 | void PEIImpl::insertPrologEpilogCode(MachineFunction &MF) { |
| 1171 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
| 1172 | |
| 1173 | // Add prologue to the function... |
| 1174 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
| 1175 | TFI.emitPrologue(MF, MBB&: *SaveBlock); |
| 1176 | |
| 1177 | // Add epilogue to restore the callee-save registers in each exiting block. |
| 1178 | for (MachineBasicBlock *RestoreBlock : RestoreBlocks) |
| 1179 | TFI.emitEpilogue(MF, MBB&: *RestoreBlock); |
| 1180 | |
| 1181 | // Zero call used registers before restoring callee-saved registers. |
| 1182 | insertZeroCallUsedRegs(MF); |
| 1183 | |
| 1184 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
| 1185 | TFI.inlineStackProbe(MF, PrologueMBB&: *SaveBlock); |
| 1186 | |
| 1187 | // Emit additional code that is required to support segmented stacks, if |
| 1188 | // we've been asked for it. This, when linked with a runtime with support |
| 1189 | // for segmented stacks (libgcc is one), will result in allocating stack |
| 1190 | // space in small chunks instead of one large contiguous block. |
| 1191 | if (MF.shouldSplitStack()) { |
| 1192 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
| 1193 | TFI.adjustForSegmentedStacks(MF, PrologueMBB&: *SaveBlock); |
| 1194 | } |
| 1195 | |
| 1196 | // Emit additional code that is required to explicitly handle the stack in |
| 1197 | // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The |
| 1198 | // approach is rather similar to that of Segmented Stacks, but it uses a |
| 1199 | // different conditional check and another BIF for allocating more stack |
| 1200 | // space. |
| 1201 | if (MF.getFunction().getCallingConv() == CallingConv::HiPE) |
| 1202 | for (MachineBasicBlock *SaveBlock : SaveBlocks) |
| 1203 | TFI.adjustForHiPEPrologue(MF, PrologueMBB&: *SaveBlock); |
| 1204 | } |
| 1205 | |
| 1206 | /// insertZeroCallUsedRegs - Zero out call used registers. |
| 1207 | void PEIImpl::insertZeroCallUsedRegs(MachineFunction &MF) { |
| 1208 | const Function &F = MF.getFunction(); |
| 1209 | |
| 1210 | if (!F.hasFnAttribute(Kind: "zero-call-used-regs" )) |
| 1211 | return; |
| 1212 | |
| 1213 | using namespace ZeroCallUsedRegs; |
| 1214 | |
| 1215 | ZeroCallUsedRegsKind ZeroRegsKind = |
| 1216 | StringSwitch<ZeroCallUsedRegsKind>( |
| 1217 | F.getFnAttribute(Kind: "zero-call-used-regs" ).getValueAsString()) |
| 1218 | .Case(S: "skip" , Value: ZeroCallUsedRegsKind::Skip) |
| 1219 | .Case(S: "used-gpr-arg" , Value: ZeroCallUsedRegsKind::UsedGPRArg) |
| 1220 | .Case(S: "used-gpr" , Value: ZeroCallUsedRegsKind::UsedGPR) |
| 1221 | .Case(S: "used-arg" , Value: ZeroCallUsedRegsKind::UsedArg) |
| 1222 | .Case(S: "used" , Value: ZeroCallUsedRegsKind::Used) |
| 1223 | .Case(S: "all-gpr-arg" , Value: ZeroCallUsedRegsKind::AllGPRArg) |
| 1224 | .Case(S: "all-gpr" , Value: ZeroCallUsedRegsKind::AllGPR) |
| 1225 | .Case(S: "all-arg" , Value: ZeroCallUsedRegsKind::AllArg) |
| 1226 | .Case(S: "all" , Value: ZeroCallUsedRegsKind::All); |
| 1227 | |
| 1228 | if (ZeroRegsKind == ZeroCallUsedRegsKind::Skip) |
| 1229 | return; |
| 1230 | |
| 1231 | const bool OnlyGPR = static_cast<unsigned>(ZeroRegsKind) & ONLY_GPR; |
| 1232 | const bool OnlyUsed = static_cast<unsigned>(ZeroRegsKind) & ONLY_USED; |
| 1233 | const bool OnlyArg = static_cast<unsigned>(ZeroRegsKind) & ONLY_ARG; |
| 1234 | |
| 1235 | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
| 1236 | const BitVector AllocatableSet(TRI.getAllocatableSet(MF)); |
| 1237 | |
| 1238 | // Mark all used registers. |
| 1239 | BitVector UsedRegs(TRI.getNumRegs()); |
| 1240 | if (OnlyUsed) |
| 1241 | for (const MachineBasicBlock &MBB : MF) |
| 1242 | for (const MachineInstr &MI : MBB) { |
| 1243 | // skip debug instructions |
| 1244 | if (MI.isDebugInstr()) |
| 1245 | continue; |
| 1246 | |
| 1247 | for (const MachineOperand &MO : MI.operands()) { |
| 1248 | if (!MO.isReg()) |
| 1249 | continue; |
| 1250 | |
| 1251 | MCRegister Reg = MO.getReg(); |
| 1252 | if (AllocatableSet[Reg.id()] && !MO.isImplicit() && |
| 1253 | (MO.isDef() || MO.isUse())) |
| 1254 | UsedRegs.set(Reg.id()); |
| 1255 | } |
| 1256 | } |
| 1257 | |
| 1258 | // Get a list of registers that are used. |
| 1259 | BitVector LiveIns(TRI.getNumRegs()); |
| 1260 | for (const MachineBasicBlock::RegisterMaskPair &LI : MF.front().liveins()) |
| 1261 | LiveIns.set(LI.PhysReg); |
| 1262 | |
| 1263 | BitVector RegsToZero(TRI.getNumRegs()); |
| 1264 | for (MCRegister Reg : AllocatableSet.set_bits()) { |
| 1265 | // Skip over fixed registers. |
| 1266 | if (TRI.isFixedRegister(MF, PhysReg: Reg)) |
| 1267 | continue; |
| 1268 | |
| 1269 | // Want only general purpose registers. |
| 1270 | if (OnlyGPR && !TRI.isGeneralPurposeRegister(MF, PhysReg: Reg)) |
| 1271 | continue; |
| 1272 | |
| 1273 | // Want only used registers. |
| 1274 | if (OnlyUsed && !UsedRegs[Reg.id()]) |
| 1275 | continue; |
| 1276 | |
| 1277 | // Want only registers used for arguments. |
| 1278 | if (OnlyArg) { |
| 1279 | if (OnlyUsed) { |
| 1280 | if (!LiveIns[Reg.id()]) |
| 1281 | continue; |
| 1282 | } else if (!TRI.isArgumentRegister(MF, PhysReg: Reg)) { |
| 1283 | continue; |
| 1284 | } |
| 1285 | } |
| 1286 | |
| 1287 | RegsToZero.set(Reg.id()); |
| 1288 | } |
| 1289 | |
| 1290 | // Don't clear registers that are live when leaving the function. |
| 1291 | for (const MachineBasicBlock &MBB : MF) |
| 1292 | for (const MachineInstr &MI : MBB.terminators()) { |
| 1293 | if (!MI.isReturn()) |
| 1294 | continue; |
| 1295 | |
| 1296 | for (const auto &MO : MI.operands()) { |
| 1297 | if (!MO.isReg()) |
| 1298 | continue; |
| 1299 | |
| 1300 | MCRegister Reg = MO.getReg(); |
| 1301 | if (!Reg) |
| 1302 | continue; |
| 1303 | |
| 1304 | // This picks up sibling registers (e.q. %al -> %ah). |
| 1305 | // FIXME: Mixing physical registers and register units is likely a bug. |
| 1306 | for (MCRegUnit Unit : TRI.regunits(Reg)) |
| 1307 | RegsToZero.reset(Idx: static_cast<unsigned>(Unit)); |
| 1308 | |
| 1309 | for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg)) |
| 1310 | RegsToZero.reset(Idx: SReg); |
| 1311 | } |
| 1312 | } |
| 1313 | |
| 1314 | // Don't need to clear registers that are used/clobbered by terminating |
| 1315 | // instructions. |
| 1316 | for (const MachineBasicBlock &MBB : MF) { |
| 1317 | if (!MBB.isReturnBlock()) |
| 1318 | continue; |
| 1319 | |
| 1320 | MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); |
| 1321 | for (MachineBasicBlock::const_iterator I = MBBI, E = MBB.end(); I != E; |
| 1322 | ++I) { |
| 1323 | for (const MachineOperand &MO : I->operands()) { |
| 1324 | if (!MO.isReg()) |
| 1325 | continue; |
| 1326 | |
| 1327 | MCRegister Reg = MO.getReg(); |
| 1328 | if (!Reg) |
| 1329 | continue; |
| 1330 | |
| 1331 | for (const MCPhysReg Reg : TRI.sub_and_superregs_inclusive(Reg)) |
| 1332 | RegsToZero.reset(Idx: Reg); |
| 1333 | } |
| 1334 | } |
| 1335 | } |
| 1336 | |
| 1337 | // Don't clear registers that must be preserved. |
| 1338 | for (const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF: &MF); |
| 1339 | MCPhysReg CSReg = *CSRegs; ++CSRegs) |
| 1340 | for (MCRegister Reg : TRI.sub_and_superregs_inclusive(Reg: CSReg)) |
| 1341 | RegsToZero.reset(Idx: Reg.id()); |
| 1342 | |
| 1343 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
| 1344 | for (MachineBasicBlock &MBB : MF) |
| 1345 | if (MBB.isReturnBlock()) |
| 1346 | TFI.emitZeroCallUsedRegs(RegsToZero, MBB); |
| 1347 | } |
| 1348 | |
| 1349 | /// Replace all FrameIndex operands with physical register references and actual |
| 1350 | /// offsets. |
| 1351 | void PEIImpl::replaceFrameIndicesBackward(MachineFunction &MF) { |
| 1352 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
| 1353 | |
| 1354 | for (auto &MBB : MF) { |
| 1355 | int SPAdj = 0; |
| 1356 | if (!MBB.succ_empty()) { |
| 1357 | // Get the SP adjustment for the end of MBB from the start of any of its |
| 1358 | // successors. They should all be the same. |
| 1359 | assert(all_of(MBB.successors(), [&MBB](const MachineBasicBlock *Succ) { |
| 1360 | return Succ->getCallFrameSize() == |
| 1361 | (*MBB.succ_begin())->getCallFrameSize(); |
| 1362 | })); |
| 1363 | const MachineBasicBlock &FirstSucc = **MBB.succ_begin(); |
| 1364 | SPAdj = TFI.alignSPAdjust(SPAdj: FirstSucc.getCallFrameSize()); |
| 1365 | if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) |
| 1366 | SPAdj = -SPAdj; |
| 1367 | } |
| 1368 | |
| 1369 | replaceFrameIndicesBackward(BB: &MBB, MF, SPAdj); |
| 1370 | |
| 1371 | // We can't track the call frame size after call frame pseudos have been |
| 1372 | // eliminated. Set it to zero everywhere to keep MachineVerifier happy. |
| 1373 | MBB.setCallFrameSize(0); |
| 1374 | } |
| 1375 | } |
| 1376 | |
| 1377 | /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical |
| 1378 | /// register references and actual offsets. |
| 1379 | void PEIImpl::replaceFrameIndices(MachineFunction &MF) { |
| 1380 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
| 1381 | |
| 1382 | for (auto &MBB : MF) { |
| 1383 | int SPAdj = TFI.alignSPAdjust(SPAdj: MBB.getCallFrameSize()); |
| 1384 | if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) |
| 1385 | SPAdj = -SPAdj; |
| 1386 | |
| 1387 | replaceFrameIndices(BB: &MBB, MF, SPAdj); |
| 1388 | |
| 1389 | // We can't track the call frame size after call frame pseudos have been |
| 1390 | // eliminated. Set it to zero everywhere to keep MachineVerifier happy. |
| 1391 | MBB.setCallFrameSize(0); |
| 1392 | } |
| 1393 | } |
| 1394 | |
| 1395 | bool PEIImpl::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI, |
| 1396 | unsigned OpIdx, int SPAdj) { |
| 1397 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
| 1398 | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
| 1399 | if (MI.isDebugValue()) { |
| 1400 | |
| 1401 | MachineOperand &Op = MI.getOperand(i: OpIdx); |
| 1402 | assert(MI.isDebugOperand(&Op) && |
| 1403 | "Frame indices can only appear as a debug operand in a DBG_VALUE*" |
| 1404 | " machine instruction" ); |
| 1405 | Register Reg; |
| 1406 | unsigned FrameIdx = Op.getIndex(); |
| 1407 | unsigned Size = MF.getFrameInfo().getObjectSize(ObjectIdx: FrameIdx); |
| 1408 | |
| 1409 | StackOffset Offset = TFI->getFrameIndexReference(MF, FI: FrameIdx, FrameReg&: Reg); |
| 1410 | Op.ChangeToRegister(Reg, isDef: false /*isDef*/); |
| 1411 | |
| 1412 | const DIExpression *DIExpr = MI.getDebugExpression(); |
| 1413 | |
| 1414 | // If we have a direct DBG_VALUE, and its location expression isn't |
| 1415 | // currently complex, then adding an offset will morph it into a |
| 1416 | // complex location that is interpreted as being a memory address. |
| 1417 | // This changes a pointer-valued variable to dereference that pointer, |
| 1418 | // which is incorrect. Fix by adding DW_OP_stack_value. |
| 1419 | |
| 1420 | if (MI.isNonListDebugValue()) { |
| 1421 | unsigned PrependFlags = DIExpression::ApplyOffset; |
| 1422 | if (!MI.isIndirectDebugValue() && !DIExpr->isComplex()) |
| 1423 | PrependFlags |= DIExpression::StackValue; |
| 1424 | |
| 1425 | // If we have DBG_VALUE that is indirect and has a Implicit location |
| 1426 | // expression need to insert a deref before prepending a Memory |
| 1427 | // location expression. Also after doing this we change the DBG_VALUE |
| 1428 | // to be direct. |
| 1429 | if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) { |
| 1430 | SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size}; |
| 1431 | bool WithStackValue = true; |
| 1432 | DIExpr = DIExpression::prependOpcodes(Expr: DIExpr, Ops, StackValue: WithStackValue); |
| 1433 | // Make the DBG_VALUE direct. |
| 1434 | MI.getDebugOffset().ChangeToRegister(Reg: 0, isDef: false); |
| 1435 | } |
| 1436 | DIExpr = TRI.prependOffsetExpression(Expr: DIExpr, PrependFlags, Offset); |
| 1437 | } else { |
| 1438 | // The debug operand at DebugOpIndex was a frame index at offset |
| 1439 | // `Offset`; now the operand has been replaced with the frame |
| 1440 | // register, we must add Offset with `register x, plus Offset`. |
| 1441 | unsigned DebugOpIndex = MI.getDebugOperandIndex(Op: &Op); |
| 1442 | SmallVector<uint64_t, 3> Ops; |
| 1443 | TRI.getOffsetOpcodes(Offset, Ops); |
| 1444 | DIExpr = DIExpression::appendOpsToArg(Expr: DIExpr, Ops, ArgNo: DebugOpIndex); |
| 1445 | } |
| 1446 | MI.getDebugExpressionOp().setMetadata(DIExpr); |
| 1447 | return true; |
| 1448 | } |
| 1449 | |
| 1450 | if (MI.isDebugPHI()) { |
| 1451 | // Allow stack ref to continue onwards. |
| 1452 | return true; |
| 1453 | } |
| 1454 | |
| 1455 | // TODO: This code should be commoned with the code for |
| 1456 | // PATCHPOINT. There's no good reason for the difference in |
| 1457 | // implementation other than historical accident. The only |
| 1458 | // remaining difference is the unconditional use of the stack |
| 1459 | // pointer as the base register. |
| 1460 | if (MI.getOpcode() == TargetOpcode::STATEPOINT) { |
| 1461 | assert((!MI.isDebugValue() || OpIdx == 0) && |
| 1462 | "Frame indices can only appear as the first operand of a " |
| 1463 | "DBG_VALUE machine instruction" ); |
| 1464 | Register Reg; |
| 1465 | MachineOperand &Offset = MI.getOperand(i: OpIdx + 1); |
| 1466 | StackOffset refOffset = TFI->getFrameIndexReferencePreferSP( |
| 1467 | MF, FI: MI.getOperand(i: OpIdx).getIndex(), FrameReg&: Reg, /*IgnoreSPUpdates*/ false); |
| 1468 | assert(!refOffset.getScalable() && |
| 1469 | "Frame offsets with a scalable component are not supported" ); |
| 1470 | Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj); |
| 1471 | MI.getOperand(i: OpIdx).ChangeToRegister(Reg, isDef: false /*isDef*/); |
| 1472 | return true; |
| 1473 | } |
| 1474 | return false; |
| 1475 | } |
| 1476 | |
| 1477 | void PEIImpl::replaceFrameIndicesBackward(MachineBasicBlock *BB, |
| 1478 | MachineFunction &MF, int &SPAdj) { |
| 1479 | assert(MF.getSubtarget().getRegisterInfo() && |
| 1480 | "getRegisterInfo() must be implemented!" ); |
| 1481 | |
| 1482 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
| 1483 | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
| 1484 | const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); |
| 1485 | |
| 1486 | RegScavenger *LocalRS = FrameIndexEliminationScavenging ? RS : nullptr; |
| 1487 | if (LocalRS) |
| 1488 | LocalRS->enterBasicBlockEnd(MBB&: *BB); |
| 1489 | |
| 1490 | for (MachineBasicBlock::iterator I = BB->end(); I != BB->begin();) { |
| 1491 | MachineInstr &MI = *std::prev(x: I); |
| 1492 | |
| 1493 | if (TII.isFrameInstr(I: MI)) { |
| 1494 | SPAdj -= TII.getSPAdjust(MI); |
| 1495 | TFI.eliminateCallFramePseudoInstr(MF, MBB&: *BB, MI: &MI); |
| 1496 | continue; |
| 1497 | } |
| 1498 | |
| 1499 | // Step backwards to get the liveness state at (immedately after) MI. |
| 1500 | if (LocalRS) |
| 1501 | LocalRS->backward(I); |
| 1502 | |
| 1503 | bool RemovedMI = false; |
| 1504 | for (const auto &[Idx, Op] : enumerate(First: MI.operands())) { |
| 1505 | if (!Op.isFI()) |
| 1506 | continue; |
| 1507 | |
| 1508 | if (replaceFrameIndexDebugInstr(MF, MI, OpIdx: Idx, SPAdj)) |
| 1509 | continue; |
| 1510 | |
| 1511 | // Eliminate this FrameIndex operand. |
| 1512 | RemovedMI = TRI.eliminateFrameIndex(MI, SPAdj, FIOperandNum: Idx, RS: LocalRS); |
| 1513 | if (RemovedMI) |
| 1514 | break; |
| 1515 | } |
| 1516 | |
| 1517 | if (!RemovedMI) |
| 1518 | --I; |
| 1519 | } |
| 1520 | } |
| 1521 | |
| 1522 | void PEIImpl::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, |
| 1523 | int &SPAdj) { |
| 1524 | assert(MF.getSubtarget().getRegisterInfo() && |
| 1525 | "getRegisterInfo() must be implemented!" ); |
| 1526 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
| 1527 | const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); |
| 1528 | const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); |
| 1529 | |
| 1530 | bool InsideCallSequence = false; |
| 1531 | |
| 1532 | for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { |
| 1533 | if (TII.isFrameInstr(I: *I)) { |
| 1534 | InsideCallSequence = TII.isFrameSetup(I: *I); |
| 1535 | SPAdj += TII.getSPAdjust(MI: *I); |
| 1536 | I = TFI->eliminateCallFramePseudoInstr(MF, MBB&: *BB, MI: I); |
| 1537 | continue; |
| 1538 | } |
| 1539 | |
| 1540 | MachineInstr &MI = *I; |
| 1541 | bool DoIncr = true; |
| 1542 | bool DidFinishLoop = true; |
| 1543 | for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { |
| 1544 | if (!MI.getOperand(i).isFI()) |
| 1545 | continue; |
| 1546 | |
| 1547 | if (replaceFrameIndexDebugInstr(MF, MI, OpIdx: i, SPAdj)) |
| 1548 | continue; |
| 1549 | |
| 1550 | // Some instructions (e.g. inline asm instructions) can have |
| 1551 | // multiple frame indices and/or cause eliminateFrameIndex |
| 1552 | // to insert more than one instruction. We need the register |
| 1553 | // scavenger to go through all of these instructions so that |
| 1554 | // it can update its register information. We keep the |
| 1555 | // iterator at the point before insertion so that we can |
| 1556 | // revisit them in full. |
| 1557 | bool AtBeginning = (I == BB->begin()); |
| 1558 | if (!AtBeginning) --I; |
| 1559 | |
| 1560 | // If this instruction has a FrameIndex operand, we need to |
| 1561 | // use that target machine register info object to eliminate |
| 1562 | // it. |
| 1563 | TRI.eliminateFrameIndex(MI, SPAdj, FIOperandNum: i, RS); |
| 1564 | |
| 1565 | // Reset the iterator if we were at the beginning of the BB. |
| 1566 | if (AtBeginning) { |
| 1567 | I = BB->begin(); |
| 1568 | DoIncr = false; |
| 1569 | } |
| 1570 | |
| 1571 | DidFinishLoop = false; |
| 1572 | break; |
| 1573 | } |
| 1574 | |
| 1575 | // If we are looking at a call sequence, we need to keep track of |
| 1576 | // the SP adjustment made by each instruction in the sequence. |
| 1577 | // This includes both the frame setup/destroy pseudos (handled above), |
| 1578 | // as well as other instructions that have side effects w.r.t the SP. |
| 1579 | // Note that this must come after eliminateFrameIndex, because |
| 1580 | // if I itself referred to a frame index, we shouldn't count its own |
| 1581 | // adjustment. |
| 1582 | if (DidFinishLoop && InsideCallSequence) |
| 1583 | SPAdj += TII.getSPAdjust(MI); |
| 1584 | |
| 1585 | if (DoIncr && I != BB->end()) |
| 1586 | ++I; |
| 1587 | } |
| 1588 | } |
| 1589 | |