1//===-- LoongArchFrameLowering.cpp - LoongArch Frame Information -*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the LoongArch implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "LoongArchFrameLowering.h"
14#include "LoongArchMachineFunctionInfo.h"
15#include "LoongArchSubtarget.h"
16#include "MCTargetDesc/LoongArchBaseInfo.h"
17#include "MCTargetDesc/LoongArchMCTargetDesc.h"
18#include "llvm/CodeGen/CFIInstBuilder.h"
19#include "llvm/CodeGen/LivePhysRegs.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/RegisterScavenging.h"
25#include "llvm/IR/DiagnosticInfo.h"
26#include "llvm/MC/MCDwarf.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "loongarch-frame-lowering"
31
32// Return true if the specified function should have a dedicated frame
33// pointer register. This is true if frame pointer elimination is
34// disabled, if it needs dynamic stack realignment, if the function has
35// variable sized allocas, or if the frame address is taken.
36bool LoongArchFrameLowering::hasFPImpl(const MachineFunction &MF) const {
37 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
38
39 const MachineFrameInfo &MFI = MF.getFrameInfo();
40 return MF.getTarget().Options.DisableFramePointerElim(MF) ||
41 RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
42 MFI.isFrameAddressTaken();
43}
44
45bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const {
46 const MachineFrameInfo &MFI = MF.getFrameInfo();
47 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
48
49 return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF);
50}
51
52void LoongArchFrameLowering::adjustReg(MachineBasicBlock &MBB,
53 MachineBasicBlock::iterator MBBI,
54 const DebugLoc &DL, Register DestReg,
55 Register SrcReg, int64_t Val,
56 MachineInstr::MIFlag Flag) const {
57 const LoongArchInstrInfo *TII = STI.getInstrInfo();
58 bool IsLA64 = STI.is64Bit();
59 unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W;
60
61 if (DestReg == SrcReg && Val == 0)
62 return;
63
64 if (isInt<12>(x: Val)) {
65 // addi.w/d $DstReg, $SrcReg, Val
66 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Addi), DestReg)
67 .addReg(RegNo: SrcReg)
68 .addImm(Val)
69 .setMIFlag(Flag);
70 return;
71 }
72
73 // Try to split the offset across two ADDIs. We need to keep the stack pointer
74 // aligned after each ADDI. We need to determine the maximum value we can put
75 // in each ADDI. In the negative direction, we can use -2048 which is always
76 // sufficiently aligned. In the positive direction, we need to find the
77 // largest 12-bit immediate that is aligned. Exclude -4096 since it can be
78 // created with LU12I.W.
79 assert(getStackAlign().value() < 2048 && "Stack alignment too large");
80 int64_t MaxPosAdjStep = 2048 - getStackAlign().value();
81 if (Val > -4096 && Val <= (2 * MaxPosAdjStep)) {
82 int64_t FirstAdj = Val < 0 ? -2048 : MaxPosAdjStep;
83 Val -= FirstAdj;
84 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Addi), DestReg)
85 .addReg(RegNo: SrcReg)
86 .addImm(Val: FirstAdj)
87 .setMIFlag(Flag);
88 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Addi), DestReg)
89 .addReg(RegNo: DestReg, Flags: RegState::Kill)
90 .addImm(Val)
91 .setMIFlag(Flag);
92 return;
93 }
94
95 unsigned Opc = IsLA64 ? LoongArch::ADD_D : LoongArch::ADD_W;
96 if (Val < 0) {
97 Val = -Val;
98 Opc = IsLA64 ? LoongArch::SUB_D : LoongArch::SUB_W;
99 }
100
101 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
102 Register ScratchReg = MRI.createVirtualRegister(RegClass: &LoongArch::GPRRegClass);
103 TII->movImm(MBB, MBBI, DL, DstReg: ScratchReg, Val, Flag);
104 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: Opc), DestReg)
105 .addReg(RegNo: SrcReg)
106 .addReg(RegNo: ScratchReg, Flags: RegState::Kill)
107 .setMIFlag(Flag);
108}
109
110// Determine the size of the frame and maximum call frame size.
111void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const {
112 MachineFrameInfo &MFI = MF.getFrameInfo();
113
114 // Get the number of bytes to allocate from the FrameInfo.
115 uint64_t FrameSize = MFI.getStackSize();
116
117 // Make sure the frame is aligned.
118 FrameSize = alignTo(Size: FrameSize, A: getStackAlign());
119
120 // Update frame info.
121 MFI.setStackSize(FrameSize);
122}
123
124static uint64_t estimateFunctionSizeInBytes(const LoongArchInstrInfo *TII,
125 const MachineFunction &MF) {
126 uint64_t FuncSize = 0;
127 for (auto &MBB : MF)
128 for (auto &MI : MBB)
129 FuncSize += TII->getInstSizeInBytes(MI);
130 return FuncSize;
131}
132
133static bool needScavSlotForCFR(MachineFunction &MF) {
134 if (!MF.getSubtarget<LoongArchSubtarget>().hasBasicF())
135 return false;
136 for (auto &MBB : MF)
137 for (auto &MI : MBB)
138 if (MI.getOpcode() == LoongArch::PseudoST_CFR)
139 return true;
140 return false;
141}
142
143void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
144 MachineFunction &MF, RegScavenger *RS) const {
145 const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
146 const TargetRegisterClass &RC = LoongArch::GPRRegClass;
147 const LoongArchInstrInfo *TII = STI.getInstrInfo();
148 LoongArchMachineFunctionInfo *LAFI =
149 MF.getInfo<LoongArchMachineFunctionInfo>();
150 MachineFrameInfo &MFI = MF.getFrameInfo();
151
152 unsigned ScavSlotsNum = 0;
153
154 // Far branches beyond 27-bit offset require a spill slot for scratch
155 // register.
156 bool IsLargeFunction = !isInt<27>(x: estimateFunctionSizeInBytes(TII, MF));
157 if (IsLargeFunction)
158 ScavSlotsNum = 1;
159
160 // estimateStackSize has been observed to under-estimate the final stack
161 // size, so give ourselves wiggle-room by checking for stack size
162 // representable an 11-bit signed field rather than 12-bits.
163 // For [x]vstelm.{b/h/w/d} memory instructions with 8 imm offset, 7-bit
164 // signed field is fine.
165 unsigned EstimateStackSize = MFI.estimateStackSize(MF);
166 if (!isInt<11>(x: EstimateStackSize) ||
167 (MF.getSubtarget<LoongArchSubtarget>().hasExtLSX() &&
168 !isInt<7>(x: EstimateStackSize)))
169 ScavSlotsNum = std::max(a: ScavSlotsNum, b: 1u);
170
171 // For CFR spill.
172 if (needScavSlotForCFR(MF))
173 ++ScavSlotsNum;
174
175 // Create emergency spill slots.
176 for (unsigned i = 0; i < ScavSlotsNum; ++i) {
177 int FI =
178 MFI.CreateSpillStackObject(Size: RI->getSpillSize(RC), Alignment: RI->getSpillAlign(RC));
179 RS->addScavengingFrameIndex(FI);
180 if (IsLargeFunction && LAFI->getBranchRelaxationSpillFrameIndex() == -1)
181 LAFI->setBranchRelaxationSpillFrameIndex(FI);
182 LLVM_DEBUG(dbgs() << "Allocated FI(" << FI
183 << ") as the emergency spill slot.\n");
184 }
185}
186
187// Allocate stack space and probe it if necessary.
188void LoongArchFrameLowering::allocateStack(MachineBasicBlock &MBB,
189 MachineBasicBlock::iterator MBBI,
190 MachineFunction &MF, uint64_t Offset,
191 uint64_t RealStackSize, bool EmitCFI,
192 bool NeedProbe, uint64_t ProbeSize,
193 bool DynAllocation,
194 MachineInstr::MIFlag Flag) const {
195 DebugLoc DL;
196 const LoongArchInstrInfo *TII = STI.getInstrInfo();
197 const bool IsLA64 = STI.is64Bit();
198 const Register SPReg = LoongArch::R3;
199 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
200
201 // Simply allocate the stack if it's not big enough to require a probe.
202 if (!NeedProbe || Offset <= ProbeSize) {
203 adjustReg(MBB, MBBI, DL, DestReg: SPReg, SrcReg: SPReg, Val: -Offset, Flag);
204 if (EmitCFI)
205 CFIBuilder.buildDefCFAOffset(Offset: RealStackSize);
206
207 if (NeedProbe && DynAllocation) {
208 // st.{w/d} $zero, $sp, 0
209 BuildMI(BB&: MBB, I: MBBI, MIMD: DL,
210 MCID: TII->get(Opcode: IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
211 .addReg(RegNo: LoongArch::R0)
212 .addReg(RegNo: SPReg)
213 .addImm(Val: 0)
214 .setMIFlag(Flag);
215 }
216
217 return;
218 }
219
220 // Unroll the probe loop depending on the number of iterations.
221 if (Offset < ProbeSize * 5) {
222 const uint64_t CFAAdjust = RealStackSize - Offset;
223
224 uint64_t CurrentOffset = 0;
225 while (CurrentOffset + ProbeSize <= Offset) {
226 adjustReg(MBB, MBBI, DL, DestReg: SPReg, SrcReg: SPReg, Val: -ProbeSize, Flag);
227 // st.{w/d} $zero, $sp, 0
228 BuildMI(BB&: MBB, I: MBBI, MIMD: DL,
229 MCID: TII->get(Opcode: IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
230 .addReg(RegNo: LoongArch::R0)
231 .addReg(RegNo: SPReg)
232 .addImm(Val: 0)
233 .setMIFlag(Flag);
234
235 CurrentOffset += ProbeSize;
236 if (EmitCFI)
237 CFIBuilder.buildDefCFAOffset(Offset: CurrentOffset + CFAAdjust);
238 }
239
240 const uint64_t Residual = Offset - CurrentOffset;
241 if (Residual) {
242 adjustReg(MBB, MBBI, DL, DestReg: SPReg, SrcReg: SPReg, Val: -Residual, Flag);
243 if (EmitCFI)
244 CFIBuilder.buildDefCFAOffset(Offset: RealStackSize);
245
246 if (DynAllocation) {
247 // st.{w/d} $zero, $sp, 0
248 BuildMI(BB&: MBB, I: MBBI, MIMD: DL,
249 MCID: TII->get(Opcode: IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
250 .addReg(RegNo: LoongArch::R0)
251 .addReg(RegNo: SPReg)
252 .addImm(Val: 0)
253 .setMIFlag(Flag);
254 }
255 }
256 return;
257 }
258
259 // Emit a variable-length allocation probing loop.
260 const uint64_t RoundedSize = alignDown(Value: Offset, Align: ProbeSize);
261 const uint64_t Residual = Offset - RoundedSize;
262 const uint64_t CFAAdjust = RealStackSize - Offset;
263
264 const Register TargetReg = LoongArch::R13;
265 // SUB TargetReg, $sp, RoundedSize
266 adjustReg(MBB, MBBI, DL, DestReg: TargetReg, SrcReg: SPReg, Val: -RoundedSize, Flag);
267
268 if (EmitCFI) {
269 // Set the CFA register to TargetReg.
270 CFIBuilder.buildDefCFA(Reg: TargetReg, Offset: RoundedSize + CFAAdjust);
271 }
272
273 // It will be expanded to a probe loop in inlineStackProbe().
274 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::PROBED_STACKALLOC))
275 .addReg(RegNo: TargetReg);
276
277 if (EmitCFI) {
278 // Set the CFA register back to SP.
279 CFIBuilder.buildDefCFARegister(Reg: SPReg);
280 }
281
282 if (Residual) {
283 adjustReg(MBB, MBBI, DL, DestReg: SPReg, SrcReg: SPReg, Val: -Residual, Flag);
284 if (DynAllocation) {
285 // st.{w/d} $zero, $sp, 0
286 BuildMI(BB&: MBB, I: MBBI, MIMD: DL,
287 MCID: TII->get(Opcode: IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
288 .addReg(RegNo: LoongArch::R0)
289 .addReg(RegNo: SPReg)
290 .addImm(Val: 0)
291 .setMIFlag(Flag);
292 }
293 }
294
295 if (EmitCFI)
296 CFIBuilder.buildDefCFAOffset(Offset: RealStackSize);
297}
298
299void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
300 MachineBasicBlock &MBB) const {
301 MachineFrameInfo &MFI = MF.getFrameInfo();
302 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
303 const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
304 const LoongArchInstrInfo *TII = STI.getInstrInfo();
305 MachineBasicBlock::iterator MBBI = MBB.begin();
306 bool IsLA64 = STI.is64Bit();
307
308 Register SPReg = LoongArch::R3;
309 Register FPReg = LoongArch::R22;
310
311 // Debug location must be unknown since the first debug location is used
312 // to determine the end of the prologue.
313 DebugLoc DL;
314 // All calls are tail calls in GHC calling conv, and functions have no
315 // prologue/epilogue.
316 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
317 return;
318 // Determine the correct frame layout
319 determineFrameLayout(MF);
320
321 // First, compute final stack size.
322 uint64_t StackSize = MFI.getStackSize();
323 uint64_t RealStackSize = StackSize;
324
325 // Early exit if there is no need to allocate space in the stack.
326 if (StackSize == 0 && !MFI.adjustsStack())
327 return;
328
329 uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
330 // Split the SP adjustment to reduce the offsets of callee saved spill.
331 if (FirstSPAdjustAmount)
332 StackSize = FirstSPAdjustAmount;
333
334 // Adjust stack.
335 const LoongArchTargetLowering *TLI = STI.getTargetLowering();
336 const bool NeedProbe = TLI->hasInlineStackProbe(MF);
337 const uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign: getStackAlign());
338 const bool DynAllocation =
339 MF.getInfo<LoongArchMachineFunctionInfo>()->hasDynamicAllocation();
340 if (StackSize != 0)
341 allocateStack(MBB, MBBI, MF, Offset: StackSize, RealStackSize: StackSize,
342 /*EmitCFI=*/true, NeedProbe, ProbeSize, DynAllocation,
343 Flag: MachineInstr::FrameSetup);
344
345 const auto &CSI = MFI.getCalleeSavedInfo();
346
347 // The frame pointer is callee-saved, and code has been generated for us to
348 // save it to the stack. We need to skip over the storing of callee-saved
349 // registers as the frame pointer must be modified after it has been saved
350 // to the stack, not before.
351 std::advance(i&: MBBI, n: CSI.size());
352
353 // Iterate over list of callee-saved registers and emit .cfi_offset
354 // directives.
355 for (const auto &Entry : CSI) {
356 int64_t Offset = MFI.getObjectOffset(ObjectIdx: Entry.getFrameIdx());
357 unsigned CFIIndex = MF.addFrameInst(Inst: MCCFIInstruction::createOffset(
358 L: nullptr, Register: RI->getDwarfRegNum(Reg: Entry.getReg(), isEH: true), Offset));
359 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::CFI_INSTRUCTION))
360 .addCFIIndex(CFIIndex)
361 .setMIFlag(MachineInstr::FrameSetup);
362 }
363
364 // Generate new FP.
365 if (hasFP(MF)) {
366 adjustReg(MBB, MBBI, DL, DestReg: FPReg, SrcReg: SPReg,
367 Val: StackSize - LoongArchFI->getVarArgsSaveSize(),
368 Flag: MachineInstr::FrameSetup);
369
370 // Emit ".cfi_def_cfa $fp, LoongArchFI->getVarArgsSaveSize()"
371 unsigned CFIIndex = MF.addFrameInst(
372 Inst: MCCFIInstruction::cfiDefCfa(L: nullptr, Register: RI->getDwarfRegNum(Reg: FPReg, isEH: true),
373 Offset: LoongArchFI->getVarArgsSaveSize()));
374 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::CFI_INSTRUCTION))
375 .addCFIIndex(CFIIndex)
376 .setMIFlag(MachineInstr::FrameSetup);
377 }
378
379 // Emit the second SP adjustment after saving callee saved registers.
380 if (FirstSPAdjustAmount) {
381 uint64_t SecondSPAdjustAmount = RealStackSize - FirstSPAdjustAmount;
382 assert(SecondSPAdjustAmount > 0 &&
383 "SecondSPAdjustAmount should be greater than zero");
384 allocateStack(MBB, MBBI, MF, Offset: SecondSPAdjustAmount, RealStackSize,
385 EmitCFI: !hasFP(MF), NeedProbe, ProbeSize, DynAllocation,
386 Flag: MachineInstr::FrameSetup);
387 }
388
389 if (hasFP(MF)) {
390 // Realign stack.
391 if (RI->hasStackRealignment(MF)) {
392 unsigned Align = Log2(A: MFI.getMaxAlign());
393 assert(Align > 0 && "The stack realignment size is invalid!");
394 BuildMI(BB&: MBB, I: MBBI, MIMD: DL,
395 MCID: TII->get(Opcode: IsLA64 ? LoongArch::BSTRINS_D : LoongArch::BSTRINS_W),
396 DestReg: SPReg)
397 .addReg(RegNo: SPReg)
398 .addReg(RegNo: LoongArch::R0)
399 .addImm(Val: Align - 1)
400 .addImm(Val: 0)
401 .setMIFlag(MachineInstr::FrameSetup);
402 // FP will be used to restore the frame in the epilogue, so we need
403 // another base register BP to record SP after re-alignment. SP will
404 // track the current stack after allocating variable sized objects.
405 if (hasBP(MF)) {
406 // move BP, $sp
407 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: LoongArch::OR),
408 DestReg: LoongArchABI::getBPReg())
409 .addReg(RegNo: SPReg)
410 .addReg(RegNo: LoongArch::R0)
411 .setMIFlag(MachineInstr::FrameSetup);
412 }
413 }
414 }
415}
416
417void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF,
418 MachineBasicBlock &MBB) const {
419 const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
420 MachineFrameInfo &MFI = MF.getFrameInfo();
421 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
422 Register SPReg = LoongArch::R3;
423 // All calls are tail calls in GHC calling conv, and functions have no
424 // prologue/epilogue.
425 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
426 return;
427 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
428 DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
429
430 const auto &CSI = MFI.getCalleeSavedInfo();
431 // Skip to before the restores of callee-saved registers.
432 auto LastFrameDestroy = MBBI;
433 if (!CSI.empty())
434 LastFrameDestroy = std::prev(x: MBBI, n: CSI.size());
435
436 // Get the number of bytes from FrameInfo.
437 uint64_t StackSize = MFI.getStackSize();
438
439 // Restore the stack pointer.
440 if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) {
441 assert(hasFP(MF) && "frame pointer should not have been eliminated");
442 adjustReg(MBB, MBBI: LastFrameDestroy, DL, DestReg: SPReg, SrcReg: LoongArch::R22,
443 Val: -StackSize + LoongArchFI->getVarArgsSaveSize(),
444 Flag: MachineInstr::FrameDestroy);
445 }
446
447 uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
448 if (FirstSPAdjustAmount) {
449 uint64_t SecondSPAdjustAmount = StackSize - FirstSPAdjustAmount;
450 assert(SecondSPAdjustAmount > 0 &&
451 "SecondSPAdjustAmount should be greater than zero");
452
453 adjustReg(MBB, MBBI: LastFrameDestroy, DL, DestReg: SPReg, SrcReg: SPReg, Val: SecondSPAdjustAmount,
454 Flag: MachineInstr::FrameDestroy);
455 StackSize = FirstSPAdjustAmount;
456 }
457
458 // Deallocate stack
459 adjustReg(MBB, MBBI, DL, DestReg: SPReg, SrcReg: SPReg, Val: StackSize, Flag: MachineInstr::FrameDestroy);
460}
461
462// Synthesize the probe loop.
463static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL,
464 Register TargetReg) {
465 assert(TargetReg != LoongArch::R3 &&
466 "New top of stack cannot already be in $sp");
467
468 MachineBasicBlock &MBB = *MBBI->getParent();
469 MachineFunction &MF = *MBB.getParent();
470
471 const LoongArchSubtarget &STI = MF.getSubtarget<LoongArchSubtarget>();
472 const LoongArchInstrInfo *TII = STI.getInstrInfo();
473 const bool IsLA64 = STI.is64Bit();
474 const Align StackAlign = STI.getFrameLowering()->getStackAlign();
475 const LoongArchTargetLowering *TLI = STI.getTargetLowering();
476 const uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
477
478 MachineFunction::iterator MBBInsertPoint = std::next(x: MBB.getIterator());
479 MachineBasicBlock *LoopTestMBB =
480 MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
481 MF.insert(MBBI: MBBInsertPoint, MBB: LoopTestMBB);
482 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
483 MF.insert(MBBI: MBBInsertPoint, MBB: ExitMBB);
484 const Register SPReg = LoongArch::R3;
485 const Register ScratchReg = LoongArch::R14;
486 const MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
487
488 // ScratchReg = ProbeSize
489 TII->movImm(MBB, MBBI, DL, DstReg: ScratchReg, Val: ProbeSize, Flag: Flags);
490
491 // LoopTest:
492 // sub.{w/d} $sp, $sp, ScratchReg
493 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL,
494 MCID: TII->get(Opcode: IsLA64 ? LoongArch::SUB_D : LoongArch::SUB_W), DestReg: SPReg)
495 .addReg(RegNo: SPReg)
496 .addReg(RegNo: ScratchReg)
497 .setMIFlag(Flags);
498
499 // st.{w/d} $zero, $sp, 0
500 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL,
501 MCID: TII->get(Opcode: IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
502 .addReg(RegNo: LoongArch::R0)
503 .addReg(RegNo: SPReg)
504 .addImm(Val: 0)
505 .setMIFlag(Flags);
506
507 // bne $sp, TargetReg, LoopTest
508 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: LoongArch::BNE))
509 .addReg(RegNo: SPReg)
510 .addReg(RegNo: TargetReg)
511 .addMBB(MBB: LoopTestMBB)
512 .setMIFlag(Flags);
513
514 ExitMBB->splice(Where: ExitMBB->end(), Other: &MBB, From: std::next(x: MBBI), To: MBB.end());
515 ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB);
516
517 LoopTestMBB->addSuccessor(Succ: ExitMBB);
518 LoopTestMBB->addSuccessor(Succ: LoopTestMBB);
519 MBB.addSuccessor(Succ: LoopTestMBB);
520 // Update liveins.
521 fullyRecomputeLiveIns(MBBs: {ExitMBB, LoopTestMBB});
522}
523
524void LoongArchFrameLowering::inlineStackProbe(MachineFunction &MF,
525 MachineBasicBlock &MBB) const {
526 // Get the instructions that need to be replaced. We emit at most two of
527 // these. Remember them in order to avoid complications coming from the need
528 // to traverse the block while potentially creating more blocks.
529 SmallVector<MachineInstr *, 2> ToReplace;
530 for (MachineInstr &MI : MBB) {
531 if (MI.getOpcode() == LoongArch::PROBED_STACKALLOC) {
532 ToReplace.push_back(Elt: &MI);
533 }
534 }
535
536 for (MachineInstr *MI : ToReplace) {
537 MachineBasicBlock::iterator MBBI = MI->getIterator();
538 DebugLoc DL = MBB.findDebugLoc(MBBI);
539 Register TargetReg = MI->getOperand(i: 0).getReg();
540 emitStackProbeInline(MBBI, DL, TargetReg);
541 MBBI->eraseFromParent();
542 }
543}
544
545// We would like to split the SP adjustment to reduce prologue/epilogue
546// as following instructions. In this way, the offset of the callee saved
547// register could fit in a single store.
548// e.g.
549// addi.d $sp, $sp, -2032
550// st.d $ra, $sp, 2024
551// st.d $fp, $sp, 2016
552// addi.d $sp, $sp, -16
553uint64_t LoongArchFrameLowering::getFirstSPAdjustAmount(
554 const MachineFunction &MF) const {
555 const MachineFrameInfo &MFI = MF.getFrameInfo();
556 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
557
558 // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
559 // 12-bit and there exists a callee-saved register needing to be pushed.
560 if (!isInt<12>(x: MFI.getStackSize()) && (CSI.size() > 0)) {
561 // FirstSPAdjustAmount is chosen as (2048 - StackAlign) because 2048 will
562 // cause sp = sp + 2048 in the epilogue to be split into multiple
563 // instructions. Offsets smaller than 2048 can fit in a single load/store
564 // instruction, and we have to stick with the stack alignment.
565 // So (2048 - StackAlign) will satisfy the stack alignment.
566 return 2048 - getStackAlign().value();
567 }
568 return 0;
569}
570
571void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF,
572 BitVector &SavedRegs,
573 RegScavenger *RS) const {
574 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
575 // Unconditionally spill RA and FP only if the function uses a frame
576 // pointer.
577 if (hasFP(MF)) {
578 SavedRegs.set(LoongArch::R1);
579 SavedRegs.set(LoongArch::R22);
580 }
581 // Mark BP as used if function has dedicated base pointer.
582 if (hasBP(MF))
583 SavedRegs.set(LoongArchABI::getBPReg());
584}
585
586// Do not preserve stack space within prologue for outgoing variables if the
587// function contains variable size objects.
588// Let eliminateCallFramePseudoInstr preserve stack space for it.
589bool LoongArchFrameLowering::hasReservedCallFrame(
590 const MachineFunction &MF) const {
591 return !MF.getFrameInfo().hasVarSizedObjects();
592}
593
594// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions.
595MachineBasicBlock::iterator
596LoongArchFrameLowering::eliminateCallFramePseudoInstr(
597 MachineFunction &MF, MachineBasicBlock &MBB,
598 MachineBasicBlock::iterator MI) const {
599 Register SPReg = LoongArch::R3;
600 DebugLoc DL = MI->getDebugLoc();
601
602 if (!hasReservedCallFrame(MF)) {
603 // If space has not been reserved for a call frame, ADJCALLSTACKDOWN and
604 // ADJCALLSTACKUP must be converted to instructions manipulating the stack
605 // pointer. This is necessary when there is a variable length stack
606 // allocation (e.g. alloca), which means it's not possible to allocate
607 // space for outgoing arguments from within the function prologue.
608 int64_t Amount = MI->getOperand(i: 0).getImm();
609
610 if (Amount != 0) {
611 // Ensure the stack remains aligned after adjustment.
612 Amount = alignSPAdjust(SPAdj: Amount);
613
614 if (MI->getOpcode() == LoongArch::ADJCALLSTACKDOWN)
615 Amount = -Amount;
616
617 const LoongArchTargetLowering *TLI =
618 MF.getSubtarget<LoongArchSubtarget>().getTargetLowering();
619 const int64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign: getStackAlign());
620 if (TLI->hasInlineStackProbe(MF) && -Amount >= ProbeSize) {
621 // When stack probing is enabled, the decrement of SP may need to be
622 // probed. We can handle both the decrement and the probing in
623 // allocateStack.
624 const bool DynAllocation =
625 MF.getInfo<LoongArchMachineFunctionInfo>()->hasDynamicAllocation();
626 allocateStack(MBB, MBBI: MI, MF, Offset: -Amount, RealStackSize: -Amount,
627 EmitCFI: MF.needsFrameMoves() && !hasFP(MF),
628 /*NeedProbe=*/true, ProbeSize, DynAllocation,
629 Flag: MachineInstr::NoFlags);
630 inlineStackProbe(MF, MBB);
631 } else {
632 adjustReg(MBB, MBBI: MI, DL, DestReg: SPReg, SrcReg: SPReg, Val: Amount, Flag: MachineInstr::NoFlags);
633 }
634 }
635 }
636
637 return MBB.erase(I: MI);
638}
639
640bool LoongArchFrameLowering::spillCalleeSavedRegisters(
641 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
642 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
643 if (CSI.empty())
644 return true;
645
646 MachineFunction *MF = MBB.getParent();
647 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
648
649 // Insert the spill to the stack frame.
650 for (auto &CS : CSI) {
651 MCRegister Reg = CS.getReg();
652 // If the register is RA and the return address is taken by method
653 // LoongArchTargetLowering::lowerRETURNADDR, don't set kill flag.
654 bool IsKill =
655 !(Reg == LoongArch::R1 && MF->getFrameInfo().isReturnAddressTaken());
656 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
657 TII.storeRegToStackSlot(MBB, MI, SrcReg: Reg, isKill: IsKill, FrameIndex: CS.getFrameIdx(), RC,
658 VReg: Register());
659 }
660
661 return true;
662}
663
664StackOffset LoongArchFrameLowering::getFrameIndexReference(
665 const MachineFunction &MF, int FI, Register &FrameReg) const {
666 const MachineFrameInfo &MFI = MF.getFrameInfo();
667 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
668 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
669 uint64_t StackSize = MFI.getStackSize();
670 uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
671
672 // Callee-saved registers should be referenced relative to the stack
673 // pointer (positive offset), otherwise use the frame pointer (negative
674 // offset).
675 const auto &CSI = MFI.getCalleeSavedInfo();
676 int MinCSFI = 0;
677 int MaxCSFI = -1;
678 StackOffset Offset =
679 StackOffset::getFixed(Fixed: MFI.getObjectOffset(ObjectIdx: FI) - getOffsetOfLocalArea() +
680 MFI.getOffsetAdjustment());
681
682 if (CSI.size()) {
683 MinCSFI = CSI[0].getFrameIdx();
684 MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
685 }
686
687 if (FI >= MinCSFI && FI <= MaxCSFI) {
688 FrameReg = LoongArch::R3;
689 if (FirstSPAdjustAmount)
690 Offset += StackOffset::getFixed(Fixed: FirstSPAdjustAmount);
691 else
692 Offset += StackOffset::getFixed(Fixed: StackSize);
693 } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(ObjectIdx: FI)) {
694 // If the stack was realigned, the frame pointer is set in order to allow
695 // SP to be restored, so we need another base register to record the stack
696 // after realignment.
697 FrameReg = hasBP(MF) ? LoongArchABI::getBPReg() : LoongArch::R3;
698 Offset += StackOffset::getFixed(Fixed: StackSize);
699 } else {
700 FrameReg = RI->getFrameRegister(MF);
701 if (hasFP(MF))
702 Offset += StackOffset::getFixed(Fixed: LoongArchFI->getVarArgsSaveSize());
703 else
704 Offset += StackOffset::getFixed(Fixed: StackSize);
705 }
706
707 return Offset;
708}
709
710bool LoongArchFrameLowering::enableShrinkWrapping(
711 const MachineFunction &MF) const {
712 // Keep the conventional code flow when not optimizing.
713 if (MF.getFunction().hasOptNone())
714 return false;
715
716 return true;
717}
718