1//===-- RISCVFrameLowering.cpp - RISC-V Frame Information -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the RISC-V implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVFrameLowering.h"
14#include "MCTargetDesc/RISCVBaseInfo.h"
15#include "RISCVMachineFunctionInfo.h"
16#include "RISCVSubtarget.h"
17#include "llvm/BinaryFormat/Dwarf.h"
18#include "llvm/CodeGen/CFIInstBuilder.h"
19#include "llvm/CodeGen/LivePhysRegs.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/RegisterScavenging.h"
25#include "llvm/IR/DiagnosticInfo.h"
26#include "llvm/MC/MCDwarf.h"
27#include "llvm/Support/LEB128.h"
28
29#include <algorithm>
30
31#define DEBUG_TYPE "riscv-frame"
32
33using namespace llvm;
34
35static Align getABIStackAlignment(RISCVABI::ABI ABI) {
36 if (ABI == RISCVABI::ABI_ILP32E)
37 return Align(4);
38 if (ABI == RISCVABI::ABI_LP64E)
39 return Align(8);
40 return Align(16);
41}
42
43RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI)
44 : TargetFrameLowering(
45 StackGrowsDown, getABIStackAlignment(ABI: STI.getTargetABI()),
46 /*LocalAreaOffset=*/0,
47 /*TransientStackAlignment=*/getABIStackAlignment(ABI: STI.getTargetABI())),
48 STI(STI) {}
49
50// The register used to hold the frame pointer.
51static constexpr MCPhysReg FPReg = RISCV::X8;
52
53// The register used to hold the stack pointer.
54static constexpr MCPhysReg SPReg = RISCV::X2;
55
56// The register used to hold the return address.
57static constexpr MCPhysReg RAReg = RISCV::X1;
58
59// LIst of CSRs that are given a fixed location by save/restore libcalls or
60// Zcmp/Xqccmp Push/Pop. The order in this table indicates the order the
61// registers are saved on the stack. Zcmp uses the reverse order of save/restore
62// and Xqccmp on the stack, but this is handled when offsets are calculated.
63static const MCPhysReg FixedCSRFIMap[] = {
64 /*ra*/ RAReg, /*s0*/ FPReg, /*s1*/ RISCV::X9,
65 /*s2*/ RISCV::X18, /*s3*/ RISCV::X19, /*s4*/ RISCV::X20,
66 /*s5*/ RISCV::X21, /*s6*/ RISCV::X22, /*s7*/ RISCV::X23,
67 /*s8*/ RISCV::X24, /*s9*/ RISCV::X25, /*s10*/ RISCV::X26,
68 /*s11*/ RISCV::X27};
69
70// The number of stack bytes allocated by `QC.C.MIENTER(.NEST)` and popped by
71// `QC.C.MILEAVERET`.
72static constexpr uint64_t QCIInterruptPushAmount = 96;
73
74static const std::pair<MCPhysReg, int8_t> FixedCSRFIQCIInterruptMap[] = {
75 /* -1 is a gap for mepc/mnepc */
76 {/*fp*/ FPReg, -2},
77 /* -3 is a gap for qc.mcause */
78 {/*ra*/ RAReg, -4},
79 /* -5 is reserved */
80 {/*t0*/ RISCV::X5, -6},
81 {/*t1*/ RISCV::X6, -7},
82 {/*t2*/ RISCV::X7, -8},
83 {/*a0*/ RISCV::X10, -9},
84 {/*a1*/ RISCV::X11, -10},
85 {/*a2*/ RISCV::X12, -11},
86 {/*a3*/ RISCV::X13, -12},
87 {/*a4*/ RISCV::X14, -13},
88 {/*a5*/ RISCV::X15, -14},
89 {/*a6*/ RISCV::X16, -15},
90 {/*a7*/ RISCV::X17, -16},
91 {/*t3*/ RISCV::X28, -17},
92 {/*t4*/ RISCV::X29, -18},
93 {/*t5*/ RISCV::X30, -19},
94 {/*t6*/ RISCV::X31, -20},
95 /* -21, -22, -23, -24 are reserved */
96};
97
98/// Returns true if DWARF CFI instructions ("frame moves") should be emitted.
99static bool needsDwarfCFI(const MachineFunction &MF) {
100 return MF.needsFrameMoves();
101}
102
103// For now we use x3, a.k.a gp, as pointer to shadow call stack.
104// User should not use x3 in their asm.
105static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
106 MachineBasicBlock::iterator MI,
107 const DebugLoc &DL) {
108 const auto &STI = MF.getSubtarget<RISCVSubtarget>();
109 // We check Zimop instead of (Zimop || Zcmop) to determine whether HW shadow
110 // stack is available despite the fact that sspush/sspopchk both have a
111 // compressed form, because if only Zcmop is available, we would need to
112 // reserve X5 due to c.sspopchk only takes X5 and we currently do not support
113 // using X5 as the return address register.
114 // However, we can still aggressively use c.sspush x1 if zcmop is available.
115 bool HasHWShadowStack = MF.getFunction().hasFnAttribute(Kind: "hw-shadow-stack") &&
116 STI.hasStdExtZimop();
117 bool HasSWShadowStack =
118 MF.getFunction().hasFnAttribute(Kind: Attribute::ShadowCallStack);
119 if (!HasHWShadowStack && !HasSWShadowStack)
120 return;
121
122 const llvm::RISCVRegisterInfo *TRI = STI.getRegisterInfo();
123
124 // Do not save RA to the SCS if it's not saved to the regular stack,
125 // i.e. RA is not at risk of being overwritten.
126 std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
127 if (llvm::none_of(
128 Range&: CSI, P: [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; }))
129 return;
130
131 const RISCVInstrInfo *TII = STI.getInstrInfo();
132 if (HasHWShadowStack) {
133 if (STI.hasStdExtZcmop()) {
134 static_assert(RAReg == RISCV::X1, "C.SSPUSH only accepts X1");
135 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoMOP_C_SSPUSH));
136 } else {
137 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoMOP_SSPUSH)).addReg(RegNo: RAReg);
138 }
139 return;
140 }
141
142 Register SCSPReg = RISCVABI::getSCSPReg();
143
144 bool IsRV64 = STI.is64Bit();
145 int64_t SlotSize = STI.getXLen() / 8;
146 // Store return address to shadow call stack
147 // addi gp, gp, [4|8]
148 // s[w|d] ra, -[4|8](gp)
149 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI))
150 .addReg(RegNo: SCSPReg, Flags: RegState::Define)
151 .addReg(RegNo: SCSPReg)
152 .addImm(Val: SlotSize)
153 .setMIFlag(MachineInstr::FrameSetup);
154 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
155 .addReg(RegNo: RAReg)
156 .addReg(RegNo: SCSPReg)
157 .addImm(Val: -SlotSize)
158 .setMIFlag(MachineInstr::FrameSetup);
159
160 if (!needsDwarfCFI(MF))
161 return;
162
163 // Emit a CFI instruction that causes SlotSize to be subtracted from the value
164 // of the shadow stack pointer when unwinding past this frame.
165 char DwarfSCSReg = TRI->getDwarfRegNum(Reg: SCSPReg, /*IsEH*/ isEH: true);
166 assert(DwarfSCSReg < 32 && "SCS Register should be < 32 (X3).");
167
168 char Offset = static_cast<char>(-SlotSize) & 0x7f;
169 const char CFIInst[] = {
170 dwarf::DW_CFA_val_expression,
171 DwarfSCSReg, // register
172 2, // length
173 static_cast<char>(unsigned(dwarf::DW_OP_breg0 + DwarfSCSReg)),
174 Offset, // addend (sleb128)
175 };
176
177 CFIInstBuilder(MBB, MI, MachineInstr::FrameSetup)
178 .buildEscape(Bytes: StringRef(CFIInst, sizeof(CFIInst)));
179}
180
181static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
182 MachineBasicBlock::iterator MI,
183 const DebugLoc &DL) {
184 const auto &STI = MF.getSubtarget<RISCVSubtarget>();
185 bool HasHWShadowStack = MF.getFunction().hasFnAttribute(Kind: "hw-shadow-stack") &&
186 STI.hasStdExtZimop();
187 bool HasSWShadowStack =
188 MF.getFunction().hasFnAttribute(Kind: Attribute::ShadowCallStack);
189 if (!HasHWShadowStack && !HasSWShadowStack)
190 return;
191
192 // See emitSCSPrologue() above.
193 std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
194 if (llvm::none_of(
195 Range&: CSI, P: [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; }))
196 return;
197
198 const RISCVInstrInfo *TII = STI.getInstrInfo();
199 if (HasHWShadowStack) {
200 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoMOP_SSPOPCHK)).addReg(RegNo: RAReg);
201 return;
202 }
203
204 Register SCSPReg = RISCVABI::getSCSPReg();
205
206 bool IsRV64 = STI.is64Bit();
207 int64_t SlotSize = STI.getXLen() / 8;
208 // Load return address from shadow call stack
209 // l[w|d] ra, -[4|8](gp)
210 // addi gp, gp, -[4|8]
211 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::LD : RISCV::LW))
212 .addReg(RegNo: RAReg, Flags: RegState::Define)
213 .addReg(RegNo: SCSPReg)
214 .addImm(Val: -SlotSize)
215 .setMIFlag(MachineInstr::FrameDestroy);
216 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI))
217 .addReg(RegNo: SCSPReg, Flags: RegState::Define)
218 .addReg(RegNo: SCSPReg)
219 .addImm(Val: -SlotSize)
220 .setMIFlag(MachineInstr::FrameDestroy);
221 if (needsDwarfCFI(MF)) {
222 // Restore the SCS pointer
223 CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(Reg: SCSPReg);
224 }
225}
226
227// Insert instruction to swap mscratchsw with sp
228static void emitSiFiveCLICStackSwap(MachineFunction &MF, MachineBasicBlock &MBB,
229 MachineBasicBlock::iterator MBBI,
230 const DebugLoc &DL) {
231 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
232
233 if (!RVFI->isSiFiveStackSwapInterrupt(MF))
234 return;
235
236 const auto &STI = MF.getSubtarget<RISCVSubtarget>();
237 const RISCVInstrInfo *TII = STI.getInstrInfo();
238
239 assert(STI.hasVendorXSfmclic() && "Stack Swapping Requires XSfmclic");
240
241 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRW))
242 .addReg(RegNo: SPReg, Flags: RegState::Define)
243 .addImm(Val: RISCVSysReg::sf_mscratchcsw)
244 .addReg(RegNo: SPReg, Flags: RegState::Kill)
245 .setMIFlag(MachineInstr::FrameSetup);
246
247 // FIXME: CFI Information for this swap.
248}
249
250static void
251createSiFivePreemptibleInterruptFrameEntries(MachineFunction &MF,
252 RISCVMachineFunctionInfo &RVFI) {
253 if (!RVFI.isSiFivePreemptibleInterrupt(MF))
254 return;
255
256 const TargetRegisterClass &RC = RISCV::GPRRegClass;
257 const TargetRegisterInfo &TRI =
258 *MF.getSubtarget<RISCVSubtarget>().getRegisterInfo();
259 MachineFrameInfo &MFI = MF.getFrameInfo();
260
261 // Create two frame objects for spilling X8 and X9, which will be done in
262 // `emitSiFiveCLICPreemptibleSaves`. This is in addition to any other stack
263 // objects we might have for X8 and X9, as they might be saved twice.
264 for (int I = 0; I < 2; ++I) {
265 int FI = MFI.CreateStackObject(Size: TRI.getSpillSize(RC), Alignment: TRI.getSpillAlign(RC),
266 isSpillSlot: true);
267 RVFI.pushInterruptCSRFrameIndex(FI);
268 }
269}
270
271static void emitSiFiveCLICPreemptibleSaves(MachineFunction &MF,
272 MachineBasicBlock &MBB,
273 MachineBasicBlock::iterator MBBI,
274 const DebugLoc &DL) {
275 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
276
277 if (!RVFI->isSiFivePreemptibleInterrupt(MF))
278 return;
279
280 const auto &STI = MF.getSubtarget<RISCVSubtarget>();
281 const RISCVInstrInfo *TII = STI.getInstrInfo();
282
283 // FIXME: CFI Information here is nonexistent/wrong.
284
285 // X8 and X9 might be stored into the stack twice, initially into the
286 // `interruptCSRFrameIndex` here, and then maybe again into their CSI frame
287 // index.
288 //
289 // This is done instead of telling the register allocator that we need two
290 // VRegs to store the value of `mcause` and `mepc` through the instruction,
291 // which affects other passes.
292 TII->storeRegToStackSlot(MBB, MBBI, SrcReg: RISCV::X8, /* IsKill=*/true,
293 FrameIndex: RVFI->getInterruptCSRFrameIndex(Idx: 0),
294 RC: &RISCV::GPRRegClass, VReg: Register(),
295 Flags: MachineInstr::FrameSetup);
296 TII->storeRegToStackSlot(MBB, MBBI, SrcReg: RISCV::X9, /* IsKill=*/true,
297 FrameIndex: RVFI->getInterruptCSRFrameIndex(Idx: 1),
298 RC: &RISCV::GPRRegClass, VReg: Register(),
299 Flags: MachineInstr::FrameSetup);
300
301 // Put `mcause` into X8 (s0), and `mepc` into X9 (s1). If either of these are
302 // used in the function, then they will appear in `getUnmanagedCSI` and will
303 // be saved again.
304 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRS))
305 .addReg(RegNo: RISCV::X8, Flags: RegState::Define)
306 .addImm(Val: RISCVSysReg::mcause)
307 .addReg(RegNo: RISCV::X0)
308 .setMIFlag(MachineInstr::FrameSetup);
309 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRS))
310 .addReg(RegNo: RISCV::X9, Flags: RegState::Define)
311 .addImm(Val: RISCVSysReg::mepc)
312 .addReg(RegNo: RISCV::X0)
313 .setMIFlag(MachineInstr::FrameSetup);
314
315 // Enable interrupts.
316 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRSI))
317 .addReg(RegNo: RISCV::X0, Flags: RegState::Define)
318 .addImm(Val: RISCVSysReg::mstatus)
319 .addImm(Val: 8)
320 .setMIFlag(MachineInstr::FrameSetup);
321}
322
323static void emitSiFiveCLICPreemptibleRestores(MachineFunction &MF,
324 MachineBasicBlock &MBB,
325 MachineBasicBlock::iterator MBBI,
326 const DebugLoc &DL) {
327 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
328
329 if (!RVFI->isSiFivePreemptibleInterrupt(MF))
330 return;
331
332 const auto &STI = MF.getSubtarget<RISCVSubtarget>();
333 const RISCVInstrInfo *TII = STI.getInstrInfo();
334
335 // FIXME: CFI Information here is nonexistent/wrong.
336
337 // Disable interrupts.
338 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRCI))
339 .addReg(RegNo: RISCV::X0, Flags: RegState::Define)
340 .addImm(Val: RISCVSysReg::mstatus)
341 .addImm(Val: 8)
342 .setMIFlag(MachineInstr::FrameSetup);
343
344 // Restore `mepc` from x9 (s1), and `mcause` from x8 (s0). If either were used
345 // in the function, they have already been restored once, so now have the
346 // value stored in `emitSiFiveCLICPreemptibleSaves`.
347 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRW))
348 .addReg(RegNo: RISCV::X0, Flags: RegState::Define)
349 .addImm(Val: RISCVSysReg::mepc)
350 .addReg(RegNo: RISCV::X9, Flags: RegState::Kill)
351 .setMIFlag(MachineInstr::FrameSetup);
352 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRW))
353 .addReg(RegNo: RISCV::X0, Flags: RegState::Define)
354 .addImm(Val: RISCVSysReg::mcause)
355 .addReg(RegNo: RISCV::X8, Flags: RegState::Kill)
356 .setMIFlag(MachineInstr::FrameSetup);
357
358 // X8 and X9 need to be restored to their values on function entry, which we
359 // saved onto the stack in `emitSiFiveCLICPreemptibleSaves`.
360 TII->loadRegFromStackSlot(MBB, MBBI, DstReg: RISCV::X9,
361 FrameIndex: RVFI->getInterruptCSRFrameIndex(Idx: 1),
362 RC: &RISCV::GPRRegClass, VReg: Register(),
363 SubReg: RISCV::NoSubRegister, Flags: MachineInstr::FrameSetup);
364 TII->loadRegFromStackSlot(MBB, MBBI, DstReg: RISCV::X8,
365 FrameIndex: RVFI->getInterruptCSRFrameIndex(Idx: 0),
366 RC: &RISCV::GPRRegClass, VReg: Register(),
367 SubReg: RISCV::NoSubRegister, Flags: MachineInstr::FrameSetup);
368}
369
370// Get the ID of the libcall used for spilling and restoring callee saved
371// registers. The ID is representative of the number of registers saved or
372// restored by the libcall, except it is zero-indexed - ID 0 corresponds to a
373// single register.
374static int getLibCallID(const MachineFunction &MF,
375 const std::vector<CalleeSavedInfo> &CSI) {
376 const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
377
378 if (CSI.empty() || !RVFI->useSaveRestoreLibCalls(MF))
379 return -1;
380
381 MCRegister MaxReg;
382 for (auto &CS : CSI)
383 // assignCalleeSavedSpillSlots assigns negative frame indexes to
384 // registers which can be saved by libcall.
385 if (CS.getFrameIdx() < 0)
386 MaxReg = std::max(a: MaxReg.id(), b: CS.getReg().id());
387
388 if (!MaxReg)
389 return -1;
390
391 switch (MaxReg.id()) {
392 default:
393 llvm_unreachable("Something has gone wrong!");
394 // clang-format off
395 case /*s11*/ RISCV::X27: return 12;
396 case /*s10*/ RISCV::X26: return 11;
397 case /*s9*/ RISCV::X25: return 10;
398 case /*s8*/ RISCV::X24: return 9;
399 case /*s7*/ RISCV::X23: return 8;
400 case /*s6*/ RISCV::X22: return 7;
401 case /*s5*/ RISCV::X21: return 6;
402 case /*s4*/ RISCV::X20: return 5;
403 case /*s3*/ RISCV::X19: return 4;
404 case /*s2*/ RISCV::X18: return 3;
405 case /*s1*/ RISCV::X9: return 2;
406 case /*s0*/ FPReg: return 1;
407 case /*ra*/ RAReg: return 0;
408 // clang-format on
409 }
410}
411
412// Get the name of the libcall used for spilling callee saved registers.
413// If this function will not use save/restore libcalls, then return a nullptr.
414static const char *
415getSpillLibCallName(const MachineFunction &MF,
416 const std::vector<CalleeSavedInfo> &CSI) {
417 static const char *const SpillLibCalls[] = {
418 "__riscv_save_0",
419 "__riscv_save_1",
420 "__riscv_save_2",
421 "__riscv_save_3",
422 "__riscv_save_4",
423 "__riscv_save_5",
424 "__riscv_save_6",
425 "__riscv_save_7",
426 "__riscv_save_8",
427 "__riscv_save_9",
428 "__riscv_save_10",
429 "__riscv_save_11",
430 "__riscv_save_12"
431 };
432
433 int LibCallID = getLibCallID(MF, CSI);
434 if (LibCallID == -1)
435 return nullptr;
436 return SpillLibCalls[LibCallID];
437}
438
439// Get the name of the libcall used for restoring callee saved registers.
440// If this function will not use save/restore libcalls, then return a nullptr.
441static const char *
442getRestoreLibCallName(const MachineFunction &MF,
443 const std::vector<CalleeSavedInfo> &CSI) {
444 static const char *const RestoreLibCalls[] = {
445 "__riscv_restore_0",
446 "__riscv_restore_1",
447 "__riscv_restore_2",
448 "__riscv_restore_3",
449 "__riscv_restore_4",
450 "__riscv_restore_5",
451 "__riscv_restore_6",
452 "__riscv_restore_7",
453 "__riscv_restore_8",
454 "__riscv_restore_9",
455 "__riscv_restore_10",
456 "__riscv_restore_11",
457 "__riscv_restore_12"
458 };
459
460 int LibCallID = getLibCallID(MF, CSI);
461 if (LibCallID == -1)
462 return nullptr;
463 return RestoreLibCalls[LibCallID];
464}
465
466// Get the max reg of Push/Pop for restoring callee saved registers.
467static unsigned getNumPushPopRegs(const std::vector<CalleeSavedInfo> &CSI) {
468 unsigned NumPushPopRegs = 0;
469 for (auto &CS : CSI) {
470 auto *FII = llvm::find_if(Range: FixedCSRFIMap,
471 P: [&](MCPhysReg P) { return P == CS.getReg(); });
472 if (FII != std::end(arr: FixedCSRFIMap)) {
473 unsigned RegNum = std::distance(first: std::begin(arr: FixedCSRFIMap), last: FII);
474 NumPushPopRegs = std::max(a: NumPushPopRegs, b: RegNum + 1);
475 }
476 }
477 assert(NumPushPopRegs != 12 && "x26 requires x27 to also be pushed");
478 return NumPushPopRegs;
479}
480
481// Return true if the specified function should have a dedicated frame
482// pointer register. This is true if frame pointer elimination is
483// disabled, if it needs dynamic stack realignment, if the function has
484// variable sized allocas, or if the frame address is taken.
485bool RISCVFrameLowering::hasFPImpl(const MachineFunction &MF) const {
486 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
487
488 const MachineFrameInfo &MFI = MF.getFrameInfo();
489 if (MF.getTarget().Options.DisableFramePointerElim(MF) ||
490 RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
491 MFI.isFrameAddressTaken())
492 return true;
493
494 // With large callframes around we may need to use FP to access the scavenging
495 // emergency spillslot.
496 //
497 // We calculate the MaxCallFrameSize at the end of isel so this value should
498 // be stable for the whole post-isel MIR pipeline.
499 //
500 // NOTE: The idea of forcing a frame pointer is copied from AArch64, but they
501 // conservatively return true when the call frame size hasd not been
502 // computed yet. On RISC-V that caused MachineOutliner tests to fail the
503 // MachineVerifier due to outlined functions not computing max call frame
504 // size thus the frame pointer would always be reserved.
505 if (MFI.isMaxCallFrameSizeComputed() && MFI.getMaxCallFrameSize() > 2047)
506 return true;
507
508 return false;
509}
510
511bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const {
512 const MachineFrameInfo &MFI = MF.getFrameInfo();
513 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
514
515 // If we do not reserve stack space for outgoing arguments in prologue,
516 // we will adjust the stack pointer before call instruction. After the
517 // adjustment, we can not use SP to access the stack objects for the
518 // arguments. Instead, use BP to access these stack objects.
519 return (MFI.hasVarSizedObjects() ||
520 (!hasReservedCallFrame(MF) && (!MFI.isMaxCallFrameSizeComputed() ||
521 MFI.getMaxCallFrameSize() != 0))) &&
522 TRI->hasStackRealignment(MF);
523}
524
525// Determines the size of the frame and maximum call frame size.
526void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
527 MachineFrameInfo &MFI = MF.getFrameInfo();
528 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
529
530 // Get the number of bytes to allocate from the FrameInfo.
531 uint64_t FrameSize = MFI.getStackSize();
532
533 // QCI Interrupts use at least 96 bytes of stack space
534 if (RVFI->useQCIInterrupt(MF))
535 FrameSize = std::max(a: FrameSize, b: QCIInterruptPushAmount);
536
537 // Get the alignment.
538 Align StackAlign = getStackAlign();
539
540 // Make sure the frame is aligned.
541 FrameSize = alignTo(Size: FrameSize, A: StackAlign);
542
543 // Update frame info.
544 MFI.setStackSize(FrameSize);
545
546 // When using SP or BP to access stack objects, we may require extra padding
547 // to ensure the bottom of the RVV stack is correctly aligned within the main
548 // stack. We calculate this as the amount required to align the scalar local
549 // variable section up to the RVV alignment.
550 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
551 if (RVFI->getRVVStackSize() && (!hasFP(MF) || TRI->hasStackRealignment(MF))) {
552 int ScalarLocalVarSize = FrameSize - RVFI->getCalleeSavedStackSize() -
553 RVFI->getVarArgsSaveSize();
554 if (auto RVVPadding =
555 offsetToAlignment(Value: ScalarLocalVarSize, Alignment: RVFI->getRVVStackAlign()))
556 RVFI->setRVVPadding(RVVPadding);
557 }
558}
559
560// Returns the stack size including RVV padding (when required), rounded back
561// up to the required stack alignment.
562uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding(
563 const MachineFunction &MF) const {
564 const MachineFrameInfo &MFI = MF.getFrameInfo();
565 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
566 return alignTo(Size: MFI.getStackSize() + RVFI->getRVVPadding(), A: getStackAlign());
567}
568
569static SmallVector<CalleeSavedInfo, 8>
570getUnmanagedCSI(const MachineFunction &MF,
571 const std::vector<CalleeSavedInfo> &CSI,
572 bool ReverseOrder = false) {
573 const MachineFrameInfo &MFI = MF.getFrameInfo();
574 SmallVector<CalleeSavedInfo, 8> NonLibcallCSI;
575
576 for (auto &CS : CSI) {
577 int FI = CS.getFrameIdx();
578 if (FI >= 0 && MFI.getStackID(ObjectIdx: FI) == TargetStackID::Default)
579 NonLibcallCSI.push_back(Elt: CS);
580 }
581
582 // Reverse the order so that load/store operations use ascending addresses,
583 // enabling better load/store clustering and fusion.
584 if (ReverseOrder)
585 std::reverse(first: NonLibcallCSI.begin(), last: NonLibcallCSI.end());
586
587 return NonLibcallCSI;
588}
589
590static SmallVector<CalleeSavedInfo, 8>
591getRVVCalleeSavedInfo(const MachineFunction &MF,
592 const std::vector<CalleeSavedInfo> &CSI) {
593 const MachineFrameInfo &MFI = MF.getFrameInfo();
594 SmallVector<CalleeSavedInfo, 8> RVVCSI;
595
596 for (auto &CS : CSI) {
597 int FI = CS.getFrameIdx();
598 if (FI >= 0 && MFI.getStackID(ObjectIdx: FI) == TargetStackID::ScalableVector)
599 RVVCSI.push_back(Elt: CS);
600 }
601
602 return RVVCSI;
603}
604
605static SmallVector<CalleeSavedInfo, 8>
606getPushOrLibCallsSavedInfo(const MachineFunction &MF,
607 const std::vector<CalleeSavedInfo> &CSI) {
608 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
609
610 SmallVector<CalleeSavedInfo, 8> PushOrLibCallsCSI;
611 if (!RVFI->useSaveRestoreLibCalls(MF) && !RVFI->isPushable(MF))
612 return PushOrLibCallsCSI;
613
614 for (const auto &CS : CSI) {
615 if (RVFI->useQCIInterrupt(MF)) {
616 // Some registers are saved by both `QC.C.MIENTER(.NEST)` and
617 // `QC.CM.PUSH(FP)`. In these cases, prioritise the CFI info that points
618 // to the versions saved by `QC.C.MIENTER(.NEST)` which is what FP
619 // unwinding would use.
620 if (llvm::is_contained(Range: llvm::make_first_range(c: FixedCSRFIQCIInterruptMap),
621 Element: CS.getReg()))
622 continue;
623 }
624
625 if (llvm::is_contained(Range: FixedCSRFIMap, Element: CS.getReg()))
626 PushOrLibCallsCSI.push_back(Elt: CS);
627 }
628
629 return PushOrLibCallsCSI;
630}
631
632static SmallVector<CalleeSavedInfo, 8>
633getQCISavedInfo(const MachineFunction &MF,
634 const std::vector<CalleeSavedInfo> &CSI) {
635 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
636
637 SmallVector<CalleeSavedInfo, 8> QCIInterruptCSI;
638 if (!RVFI->useQCIInterrupt(MF))
639 return QCIInterruptCSI;
640
641 for (const auto &CS : CSI) {
642 if (llvm::is_contained(Range: llvm::make_first_range(c: FixedCSRFIQCIInterruptMap),
643 Element: CS.getReg()))
644 QCIInterruptCSI.push_back(Elt: CS);
645 }
646
647 return QCIInterruptCSI;
648}
649
650void RISCVFrameLowering::allocateAndProbeStackForRVV(
651 MachineFunction &MF, MachineBasicBlock &MBB,
652 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount,
653 MachineInstr::MIFlag Flag, bool EmitCFI, bool DynAllocation) const {
654 assert(Amount != 0 && "Did not need to adjust stack pointer for RVV.");
655
656 // Emit a variable-length allocation probing loop.
657
658 // Get VLEN in TargetReg
659 const RISCVInstrInfo *TII = STI.getInstrInfo();
660 Register TargetReg = RISCV::X6;
661 uint32_t NumOfVReg = Amount / RISCV::RVVBytesPerBlock;
662 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoReadVLENB), DestReg: TargetReg)
663 .setMIFlag(Flag);
664 TII->mulImm(MF, MBB, II: MBBI, DL, DestReg: TargetReg, Amt: NumOfVReg, Flag);
665
666 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
667 if (EmitCFI) {
668 // Set the CFA register to TargetReg.
669 CFIBuilder.buildDefCFA(Reg: TargetReg, Offset: -Amount);
670 }
671
672 // It will be expanded to a probe loop in `inlineStackProbe`.
673 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::PROBED_STACKALLOC_RVV))
674 .addReg(RegNo: TargetReg);
675
676 if (EmitCFI) {
677 // Set the CFA register back to SP.
678 CFIBuilder.buildDefCFARegister(Reg: SPReg);
679 }
680
681 // SUB SP, SP, T1
682 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::SUB), DestReg: SPReg)
683 .addReg(RegNo: SPReg)
684 .addReg(RegNo: TargetReg)
685 .setMIFlag(Flag);
686
687 // If we have a dynamic allocation later we need to probe any residuals.
688 if (DynAllocation) {
689 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: STI.is64Bit() ? RISCV::SD : RISCV::SW))
690 .addReg(RegNo: RISCV::X0)
691 .addReg(RegNo: SPReg)
692 .addImm(Val: 0)
693 .setMIFlags(MachineInstr::FrameSetup);
694 }
695}
696
697static void appendScalableVectorExpression(const TargetRegisterInfo &TRI,
698 SmallVectorImpl<char> &Expr,
699 StackOffset Offset,
700 llvm::raw_string_ostream &Comment) {
701 int64_t FixedOffset = Offset.getFixed();
702 int64_t ScalableOffset = Offset.getScalable();
703 unsigned DwarfVLenB = TRI.getDwarfRegNum(Reg: RISCV::VLENB, isEH: true);
704 if (FixedOffset) {
705 Expr.push_back(Elt: dwarf::DW_OP_consts);
706 appendLEB128<LEB128Sign::Signed>(Buffer&: Expr, Value: FixedOffset);
707 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_plus);
708 Comment << (FixedOffset < 0 ? " - " : " + ") << std::abs(i: FixedOffset);
709 }
710
711 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_consts);
712 appendLEB128<LEB128Sign::Signed>(Buffer&: Expr, Value: ScalableOffset);
713
714 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_bregx);
715 appendLEB128<LEB128Sign::Unsigned>(Buffer&: Expr, Value: DwarfVLenB);
716 Expr.push_back(Elt: 0);
717
718 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_mul);
719 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_plus);
720
721 Comment << (ScalableOffset < 0 ? " - " : " + ") << std::abs(i: ScalableOffset)
722 << " * vlenb";
723}
724
725static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
726 Register Reg,
727 StackOffset Offset) {
728 assert(Offset.getScalable() != 0 && "Did not need to adjust CFA for RVV");
729 SmallString<64> Expr;
730 std::string CommentBuffer;
731 llvm::raw_string_ostream Comment(CommentBuffer);
732 // Build up the expression (Reg + FixedOffset + ScalableOffset * VLENB).
733 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, isEH: true);
734 Expr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
735 Expr.push_back(Elt: 0);
736 if (Reg == SPReg)
737 Comment << "sp";
738 else
739 Comment << printReg(Reg, TRI: &TRI);
740
741 appendScalableVectorExpression(TRI, Expr, Offset, Comment);
742
743 SmallString<64> DefCfaExpr;
744 DefCfaExpr.push_back(Elt: dwarf::DW_CFA_def_cfa_expression);
745 appendLEB128<LEB128Sign::Unsigned>(Buffer&: DefCfaExpr, Value: Expr.size());
746 DefCfaExpr.append(RHS: Expr.str());
747
748 return MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str(), Loc: SMLoc(),
749 Comment: Comment.str());
750}
751
752static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
753 Register Reg, StackOffset Offset) {
754 assert(Offset.getScalable() != 0 && "Did not need to adjust CFA for RVV");
755 SmallString<64> Expr;
756 std::string CommentBuffer;
757 llvm::raw_string_ostream Comment(CommentBuffer);
758 Comment << printReg(Reg, TRI: &TRI) << " @ cfa";
759
760 // Build up the expression (FixedOffset + ScalableOffset * VLENB).
761 appendScalableVectorExpression(TRI, Expr, Offset, Comment);
762
763 SmallString<64> DefCfaExpr;
764 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, isEH: true);
765 DefCfaExpr.push_back(Elt: dwarf::DW_CFA_expression);
766 appendLEB128<LEB128Sign::Unsigned>(Buffer&: DefCfaExpr, Value: DwarfReg);
767 appendLEB128<LEB128Sign::Unsigned>(Buffer&: DefCfaExpr, Value: Expr.size());
768 DefCfaExpr.append(RHS: Expr.str());
769
770 return MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str(), Loc: SMLoc(),
771 Comment: Comment.str());
772}
773
774// Allocate stack space and probe it if necessary.
775void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
776 MachineBasicBlock::iterator MBBI,
777 MachineFunction &MF, uint64_t Offset,
778 uint64_t RealStackSize, bool EmitCFI,
779 bool NeedProbe, uint64_t ProbeSize,
780 bool DynAllocation,
781 MachineInstr::MIFlag Flag) const {
782 DebugLoc DL;
783 const RISCVRegisterInfo *RI = STI.getRegisterInfo();
784 const RISCVInstrInfo *TII = STI.getInstrInfo();
785 bool IsRV64 = STI.is64Bit();
786 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
787
788 // Simply allocate the stack if it's not big enough to require a probe.
789 if (!NeedProbe || Offset <= ProbeSize) {
790 RI->adjustReg(MBB, II: MBBI, DL, DestReg: SPReg, SrcReg: SPReg, Offset: StackOffset::getFixed(Fixed: -Offset),
791 Flag, RequiredAlign: getStackAlign());
792
793 if (EmitCFI)
794 CFIBuilder.buildDefCFAOffset(Offset: RealStackSize);
795
796 if (NeedProbe && DynAllocation) {
797 // s[d|w] zero, 0(sp)
798 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
799 .addReg(RegNo: RISCV::X0)
800 .addReg(RegNo: SPReg)
801 .addImm(Val: 0)
802 .setMIFlags(Flag);
803 }
804
805 return;
806 }
807
808 // Unroll the probe loop depending on the number of iterations.
809 if (Offset < ProbeSize * 5) {
810 uint64_t CFAAdjust = RealStackSize - Offset;
811
812 uint64_t CurrentOffset = 0;
813 while (CurrentOffset + ProbeSize <= Offset) {
814 RI->adjustReg(MBB, II: MBBI, DL, DestReg: SPReg, SrcReg: SPReg,
815 Offset: StackOffset::getFixed(Fixed: -ProbeSize), Flag, RequiredAlign: getStackAlign());
816 // s[d|w] zero, 0(sp)
817 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
818 .addReg(RegNo: RISCV::X0)
819 .addReg(RegNo: SPReg)
820 .addImm(Val: 0)
821 .setMIFlags(Flag);
822
823 CurrentOffset += ProbeSize;
824 if (EmitCFI)
825 CFIBuilder.buildDefCFAOffset(Offset: CurrentOffset + CFAAdjust);
826 }
827
828 uint64_t Residual = Offset - CurrentOffset;
829 if (Residual) {
830 RI->adjustReg(MBB, II: MBBI, DL, DestReg: SPReg, SrcReg: SPReg,
831 Offset: StackOffset::getFixed(Fixed: -Residual), Flag, RequiredAlign: getStackAlign());
832 if (EmitCFI)
833 CFIBuilder.buildDefCFAOffset(Offset: RealStackSize);
834
835 if (DynAllocation) {
836 // s[d|w] zero, 0(sp)
837 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
838 .addReg(RegNo: RISCV::X0)
839 .addReg(RegNo: SPReg)
840 .addImm(Val: 0)
841 .setMIFlags(Flag);
842 }
843 }
844
845 return;
846 }
847
848 // Emit a variable-length allocation probing loop.
849 uint64_t RoundedSize = alignDown(Value: Offset, Align: ProbeSize);
850 uint64_t Residual = Offset - RoundedSize;
851
852 Register TargetReg = RISCV::X6;
853 // SUB TargetReg, SP, RoundedSize
854 RI->adjustReg(MBB, II: MBBI, DL, DestReg: TargetReg, SrcReg: SPReg,
855 Offset: StackOffset::getFixed(Fixed: -RoundedSize), Flag, RequiredAlign: getStackAlign());
856
857 if (EmitCFI) {
858 // Set the CFA register to TargetReg.
859 CFIBuilder.buildDefCFA(Reg: TargetReg, Offset: RoundedSize);
860 }
861
862 // It will be expanded to a probe loop in `inlineStackProbe`.
863 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::PROBED_STACKALLOC)).addReg(RegNo: TargetReg);
864
865 if (EmitCFI) {
866 // Set the CFA register back to SP.
867 CFIBuilder.buildDefCFARegister(Reg: SPReg);
868 }
869
870 if (Residual) {
871 RI->adjustReg(MBB, II: MBBI, DL, DestReg: SPReg, SrcReg: SPReg, Offset: StackOffset::getFixed(Fixed: -Residual),
872 Flag, RequiredAlign: getStackAlign());
873 if (DynAllocation) {
874 // s[d|w] zero, 0(sp)
875 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
876 .addReg(RegNo: RISCV::X0)
877 .addReg(RegNo: SPReg)
878 .addImm(Val: 0)
879 .setMIFlags(Flag);
880 }
881 }
882
883 if (EmitCFI)
884 CFIBuilder.buildDefCFAOffset(Offset);
885}
886
887static bool isPush(unsigned Opcode) {
888 switch (Opcode) {
889 case RISCV::CM_PUSH:
890 case RISCV::QC_CM_PUSH:
891 case RISCV::QC_CM_PUSHFP:
892 return true;
893 default:
894 return false;
895 }
896}
897
898static bool isPop(unsigned Opcode) {
899 // There are other pops but these are the only ones introduced during this
900 // pass.
901 switch (Opcode) {
902 case RISCV::CM_POP:
903 case RISCV::QC_CM_POP:
904 return true;
905 default:
906 return false;
907 }
908}
909
910static unsigned getPushOpcode(RISCVMachineFunctionInfo::PushPopKind Kind,
911 bool UpdateFP) {
912 switch (Kind) {
913 case RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp:
914 return RISCV::CM_PUSH;
915 case RISCVMachineFunctionInfo::PushPopKind::VendorXqccmp:
916 return UpdateFP ? RISCV::QC_CM_PUSHFP : RISCV::QC_CM_PUSH;
917 default:
918 llvm_unreachable("Unhandled PushPopKind");
919 }
920}
921
922static unsigned getPopOpcode(RISCVMachineFunctionInfo::PushPopKind Kind) {
923 // There are other pops but they are introduced later by the Push/Pop
924 // Optimizer.
925 switch (Kind) {
926 case RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp:
927 return RISCV::CM_POP;
928 case RISCVMachineFunctionInfo::PushPopKind::VendorXqccmp:
929 return RISCV::QC_CM_POP;
930 default:
931 llvm_unreachable("Unhandled PushPopKind");
932 }
933}
934
935void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
936 MachineBasicBlock &MBB) const {
937 MachineFrameInfo &MFI = MF.getFrameInfo();
938 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
939 const RISCVRegisterInfo *RI = STI.getRegisterInfo();
940 MachineBasicBlock::iterator MBBI = MBB.begin();
941 bool PreferAscendingLS = STI.preferAscendingLoadStore();
942
943 Register BPReg = RISCVABI::getBPReg();
944
945 // Debug location must be unknown since the first debug location is used
946 // to determine the end of the prologue.
947 DebugLoc DL;
948
949 // All calls are tail calls in GHC calling conv, and functions have no
950 // prologue/epilogue.
951 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
952 return;
953
954 // SiFive CLIC needs to swap `sp` into `sf.mscratchcsw`
955 emitSiFiveCLICStackSwap(MF, MBB, MBBI, DL);
956
957 // Emit prologue for shadow call stack.
958 emitSCSPrologue(MF, MBB, MI: MBBI, DL);
959
960 // We keep track of the first instruction because it might be a
961 // `(QC.)CM.PUSH(FP)`, and we may need to adjust the immediate rather than
962 // inserting an `addi sp, sp, -N*16`
963 auto PossiblePush = MBBI;
964
965 // Skip past all callee-saved register spill instructions.
966 while (MBBI != MBB.end() && MBBI->getFlag(Flag: MachineInstr::FrameSetup))
967 ++MBBI;
968
969 // Determine the correct frame layout
970 determineFrameLayout(MF);
971
972 const auto &CSI = MFI.getCalleeSavedInfo();
973
974 // Skip to before the spills of scalar callee-saved registers
975 // FIXME: assumes exactly one instruction is used to restore each
976 // callee-saved register.
977 MBBI =
978 std::prev(x: MBBI, n: getRVVCalleeSavedInfo(MF, CSI).size() +
979 getUnmanagedCSI(MF, CSI, ReverseOrder: PreferAscendingLS).size());
980 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
981 bool NeedsDwarfCFI = needsDwarfCFI(MF);
982
983 // If libcalls are used to spill and restore callee-saved registers, the frame
984 // has two sections; the opaque section managed by the libcalls, and the
985 // section managed by MachineFrameInfo which can also hold callee saved
986 // registers in fixed stack slots, both of which have negative frame indices.
987 // This gets even more complicated when incoming arguments are passed via the
988 // stack, as these too have negative frame indices. An example is detailed
989 // below:
990 //
991 // | incoming arg | <- FI[-3]
992 // | libcallspill |
993 // | calleespill | <- FI[-2]
994 // | calleespill | <- FI[-1]
995 // | this_frame | <- FI[0]
996 //
997 // For negative frame indices, the offset from the frame pointer will differ
998 // depending on which of these groups the frame index applies to.
999 // The following calculates the correct offset knowing the number of callee
1000 // saved registers spilt by the two methods.
1001 if (int LibCallRegs = getLibCallID(MF, CSI: MFI.getCalleeSavedInfo()) + 1) {
1002 // Calculate the size of the frame managed by the libcall. The stack
1003 // alignment of these libcalls should be the same as how we set it in
1004 // getABIStackAlignment.
1005 unsigned LibCallFrameSize =
1006 alignTo(Size: (STI.getXLen() / 8) * LibCallRegs, A: getStackAlign());
1007 RVFI->setLibCallStackSize(LibCallFrameSize);
1008
1009 if (NeedsDwarfCFI) {
1010 CFIBuilder.buildDefCFAOffset(Offset: LibCallFrameSize);
1011 for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
1012 CFIBuilder.buildOffset(Reg: CS.getReg(),
1013 Offset: MFI.getObjectOffset(ObjectIdx: CS.getFrameIdx()));
1014 }
1015 }
1016
1017 // FIXME (note copied from Lanai): This appears to be overallocating. Needs
1018 // investigation. Get the number of bytes to allocate from the FrameInfo.
1019 uint64_t RealStackSize = getStackSizeWithRVVPadding(MF);
1020 uint64_t StackSize = RealStackSize - RVFI->getReservedSpillsSize();
1021 uint64_t RVVStackSize = RVFI->getRVVStackSize();
1022
1023 // Early exit if there is no need to allocate on the stack
1024 if (RealStackSize == 0 && !MFI.adjustsStack() && RVVStackSize == 0)
1025 return;
1026
1027 // If the stack pointer has been marked as reserved, then produce an error if
1028 // the frame requires stack allocation
1029 if (STI.isRegisterReservedByUser(i: SPReg))
1030 MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{
1031 MF.getFunction(), "Stack pointer required, but has been reserved."});
1032
1033 uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
1034 // Split the SP adjustment to reduce the offsets of callee saved spill.
1035 if (FirstSPAdjustAmount) {
1036 StackSize = FirstSPAdjustAmount;
1037 RealStackSize = FirstSPAdjustAmount;
1038 }
1039
1040 if (RVFI->useQCIInterrupt(MF)) {
1041 // The function starts with `QC.C.MIENTER(.NEST)`, so the `(QC.)CM.PUSH(FP)`
1042 // could only be the next instruction.
1043 ++PossiblePush;
1044
1045 if (NeedsDwarfCFI) {
1046 // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)`
1047 // could be. The PUSH will also get its own CFI metadata for its own
1048 // modifications, which should come after the PUSH.
1049 CFIInstBuilder PushCFIBuilder(MBB, PossiblePush,
1050 MachineInstr::FrameSetup);
1051 PushCFIBuilder.buildDefCFAOffset(Offset: QCIInterruptPushAmount);
1052 for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI))
1053 PushCFIBuilder.buildOffset(Reg: CS.getReg(),
1054 Offset: MFI.getObjectOffset(ObjectIdx: CS.getFrameIdx()));
1055 }
1056 }
1057
1058 if (RVFI->isPushable(MF) && PossiblePush != MBB.end() &&
1059 isPush(Opcode: PossiblePush->getOpcode())) {
1060 // Use available stack adjustment in push instruction to allocate additional
1061 // stack space. Align the stack size down to a multiple of 16. This is
1062 // needed for RVE.
1063 // FIXME: Can we increase the stack size to a multiple of 16 instead?
1064 uint64_t StackAdj =
1065 std::min(a: alignDown(Value: StackSize, Align: 16), b: static_cast<uint64_t>(48));
1066 PossiblePush->getOperand(i: 1).setImm(StackAdj);
1067 StackSize -= StackAdj;
1068
1069 if (NeedsDwarfCFI) {
1070 CFIBuilder.buildDefCFAOffset(Offset: RealStackSize - StackSize);
1071 for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
1072 CFIBuilder.buildOffset(Reg: CS.getReg(),
1073 Offset: MFI.getObjectOffset(ObjectIdx: CS.getFrameIdx()));
1074 }
1075 }
1076
1077 // Allocate space on the stack if necessary.
1078 auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
1079 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
1080 bool NeedProbe = TLI->hasInlineStackProbe(MF);
1081 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign: getStackAlign());
1082 bool DynAllocation =
1083 MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
1084 if (StackSize != 0)
1085 allocateStack(MBB, MBBI, MF, Offset: StackSize, RealStackSize, EmitCFI: NeedsDwarfCFI,
1086 NeedProbe, ProbeSize, DynAllocation,
1087 Flag: MachineInstr::FrameSetup);
1088
1089 // Save SiFive CLIC CSRs into Stack
1090 emitSiFiveCLICPreemptibleSaves(MF, MBB, MBBI, DL);
1091
1092 // The frame pointer is callee-saved, and code has been generated for us to
1093 // save it to the stack. We need to skip over the storing of callee-saved
1094 // registers as the frame pointer must be modified after it has been saved
1095 // to the stack, not before.
1096 // FIXME: assumes exactly one instruction is used to save each callee-saved
1097 // register.
1098 std::advance(i&: MBBI, n: getUnmanagedCSI(MF, CSI, ReverseOrder: PreferAscendingLS).size());
1099 CFIBuilder.setInsertPoint(MBBI);
1100
1101 // Iterate over list of callee-saved registers and emit .cfi_offset
1102 // directives.
1103 if (NeedsDwarfCFI) {
1104 for (const CalleeSavedInfo &CS :
1105 getUnmanagedCSI(MF, CSI, ReverseOrder: PreferAscendingLS)) {
1106 MCRegister Reg = CS.getReg();
1107 int64_t Offset = MFI.getObjectOffset(ObjectIdx: CS.getFrameIdx());
1108 // Emit CFI for both sub-registers. The even register is at the base
1109 // offset and odd at base+4.
1110 if (RISCV::GPRPairRegClass.contains(Reg)) {
1111 MCRegister EvenReg = RI->getSubReg(Reg, Idx: RISCV::sub_gpr_even);
1112 MCRegister OddReg = RI->getSubReg(Reg, Idx: RISCV::sub_gpr_odd);
1113 CFIBuilder.buildOffset(Reg: EvenReg, Offset);
1114 CFIBuilder.buildOffset(Reg: OddReg, Offset: Offset + 4);
1115 } else {
1116 CFIBuilder.buildOffset(Reg, Offset);
1117 }
1118 }
1119 }
1120
1121 // Generate new FP.
1122 if (hasFP(MF)) {
1123 if (STI.isRegisterReservedByUser(i: FPReg))
1124 MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{
1125 MF.getFunction(), "Frame pointer required, but has been reserved."});
1126 // The frame pointer does need to be reserved from register allocation.
1127 assert(MF.getRegInfo().isReserved(FPReg) && "FP not reserved");
1128
1129 // Some stack management variants automatically keep FP updated, so we don't
1130 // need an instruction to do so.
1131 if (!RVFI->hasImplicitFPUpdates(MF)) {
1132 RI->adjustReg(
1133 MBB, II: MBBI, DL, DestReg: FPReg, SrcReg: SPReg,
1134 Offset: StackOffset::getFixed(Fixed: RealStackSize - RVFI->getVarArgsSaveSize()),
1135 Flag: MachineInstr::FrameSetup, RequiredAlign: getStackAlign());
1136 }
1137
1138 if (NeedsDwarfCFI)
1139 CFIBuilder.buildDefCFA(Reg: FPReg, Offset: RVFI->getVarArgsSaveSize());
1140 }
1141
1142 uint64_t SecondSPAdjustAmount = 0;
1143 // Emit the second SP adjustment after saving callee saved registers.
1144 if (FirstSPAdjustAmount) {
1145 SecondSPAdjustAmount = getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
1146 assert(SecondSPAdjustAmount > 0 &&
1147 "SecondSPAdjustAmount should be greater than zero");
1148
1149 allocateStack(MBB, MBBI, MF, Offset: SecondSPAdjustAmount,
1150 RealStackSize: getStackSizeWithRVVPadding(MF), EmitCFI: NeedsDwarfCFI && !hasFP(MF),
1151 NeedProbe, ProbeSize, DynAllocation,
1152 Flag: MachineInstr::FrameSetup);
1153 }
1154
1155 if (RVVStackSize) {
1156 if (NeedProbe) {
1157 allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, Amount: RVVStackSize,
1158 Flag: MachineInstr::FrameSetup,
1159 EmitCFI: NeedsDwarfCFI && !hasFP(MF), DynAllocation);
1160 } else {
1161 // We must keep the stack pointer aligned through any intermediate
1162 // updates.
1163 RI->adjustReg(MBB, II: MBBI, DL, DestReg: SPReg, SrcReg: SPReg,
1164 Offset: StackOffset::getScalable(Scalable: -RVVStackSize),
1165 Flag: MachineInstr::FrameSetup, RequiredAlign: getStackAlign());
1166 }
1167
1168 if (NeedsDwarfCFI && !hasFP(MF)) {
1169 // Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb".
1170 CFIBuilder.insertCFIInst(CFIInst: createDefCFAExpression(
1171 TRI: *RI, Reg: SPReg,
1172 Offset: StackOffset::get(Fixed: getStackSizeWithRVVPadding(MF), Scalable: RVVStackSize / 8)));
1173 }
1174
1175 std::advance(i&: MBBI, n: getRVVCalleeSavedInfo(MF, CSI).size());
1176 if (NeedsDwarfCFI)
1177 emitCalleeSavedRVVPrologCFI(MBB, MI: MBBI, HasFP: hasFP(MF));
1178 }
1179
1180 if (hasFP(MF)) {
1181 // Realign Stack
1182 const RISCVRegisterInfo *RI = STI.getRegisterInfo();
1183 if (RI->hasStackRealignment(MF)) {
1184 Align MaxAlignment = MFI.getMaxAlign();
1185
1186 const RISCVInstrInfo *TII = STI.getInstrInfo();
1187 if (isInt<12>(x: -(int)MaxAlignment.value())) {
1188 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::ANDI), DestReg: SPReg)
1189 .addReg(RegNo: SPReg)
1190 .addImm(Val: -(int)MaxAlignment.value())
1191 .setMIFlag(MachineInstr::FrameSetup);
1192 } else {
1193 unsigned ShiftAmount = Log2(A: MaxAlignment);
1194 Register VR =
1195 MF.getRegInfo().createVirtualRegister(RegClass: &RISCV::GPRRegClass);
1196 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::SRLI), DestReg: VR)
1197 .addReg(RegNo: SPReg)
1198 .addImm(Val: ShiftAmount)
1199 .setMIFlag(MachineInstr::FrameSetup);
1200 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::SLLI), DestReg: SPReg)
1201 .addReg(RegNo: VR)
1202 .addImm(Val: ShiftAmount)
1203 .setMIFlag(MachineInstr::FrameSetup);
1204 }
1205 if (NeedProbe && RVVStackSize == 0) {
1206 // Do a probe if the align + size allocated just passed the probe size
1207 // and was not yet probed.
1208 if (SecondSPAdjustAmount < ProbeSize &&
1209 SecondSPAdjustAmount + MaxAlignment.value() >= ProbeSize) {
1210 bool IsRV64 = STI.is64Bit();
1211 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
1212 .addReg(RegNo: RISCV::X0)
1213 .addReg(RegNo: SPReg)
1214 .addImm(Val: 0)
1215 .setMIFlags(MachineInstr::FrameSetup);
1216 }
1217 }
1218 // FP will be used to restore the frame in the epilogue, so we need
1219 // another base register BP to record SP after re-alignment. SP will
1220 // track the current stack after allocating variable sized objects.
1221 if (hasBP(MF)) {
1222 // move BP, SP
1223 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI), DestReg: BPReg)
1224 .addReg(RegNo: SPReg)
1225 .addImm(Val: 0)
1226 .setMIFlag(MachineInstr::FrameSetup);
1227 }
1228 }
1229 }
1230}
1231
1232void RISCVFrameLowering::deallocateStack(MachineFunction &MF,
1233 MachineBasicBlock &MBB,
1234 MachineBasicBlock::iterator MBBI,
1235 const DebugLoc &DL,
1236 uint64_t &StackSize,
1237 int64_t CFAOffset) const {
1238 const RISCVRegisterInfo *RI = STI.getRegisterInfo();
1239
1240 RI->adjustReg(MBB, II: MBBI, DL, DestReg: SPReg, SrcReg: SPReg, Offset: StackOffset::getFixed(Fixed: StackSize),
1241 Flag: MachineInstr::FrameDestroy, RequiredAlign: getStackAlign());
1242 StackSize = 0;
1243
1244 if (needsDwarfCFI(MF))
1245 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1246 .buildDefCFAOffset(Offset: CFAOffset);
1247}
1248
1249void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
1250 MachineBasicBlock &MBB) const {
1251 const RISCVRegisterInfo *RI = STI.getRegisterInfo();
1252 MachineFrameInfo &MFI = MF.getFrameInfo();
1253 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1254 bool PreferAscendingLS = STI.preferAscendingLoadStore();
1255
1256 // All calls are tail calls in GHC calling conv, and functions have no
1257 // prologue/epilogue.
1258 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1259 return;
1260
1261 // Get the insert location for the epilogue. If there were no terminators in
1262 // the block, get the last instruction.
1263 MachineBasicBlock::iterator MBBI = MBB.end();
1264 DebugLoc DL;
1265 if (!MBB.empty()) {
1266 MBBI = MBB.getLastNonDebugInstr();
1267 if (MBBI != MBB.end())
1268 DL = MBBI->getDebugLoc();
1269
1270 MBBI = MBB.getFirstTerminator();
1271
1272 // Skip to before the restores of all callee-saved registers.
1273 while (MBBI != MBB.begin() &&
1274 std::prev(x: MBBI)->getFlag(Flag: MachineInstr::FrameDestroy))
1275 --MBBI;
1276 }
1277
1278 const auto &CSI = MFI.getCalleeSavedInfo();
1279
1280 // Skip to before the restores of scalar callee-saved registers
1281 // FIXME: assumes exactly one instruction is used to restore each
1282 // callee-saved register.
1283 auto FirstScalarCSRRestoreInsn =
1284 std::next(x: MBBI, n: getRVVCalleeSavedInfo(MF, CSI).size());
1285 CFIInstBuilder CFIBuilder(MBB, FirstScalarCSRRestoreInsn,
1286 MachineInstr::FrameDestroy);
1287 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1288
1289 uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
1290 uint64_t RealStackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
1291 : getStackSizeWithRVVPadding(MF);
1292 uint64_t StackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
1293 : getStackSizeWithRVVPadding(MF) -
1294 RVFI->getReservedSpillsSize();
1295 uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
1296 uint64_t RVVStackSize = RVFI->getRVVStackSize();
1297
1298 bool RestoreSPFromFP = RI->hasStackRealignment(MF) ||
1299 MFI.hasVarSizedObjects() || !hasReservedCallFrame(MF);
1300 if (RVVStackSize) {
1301 // If RestoreSPFromFP the stack pointer will be restored using the frame
1302 // pointer value.
1303 if (!RestoreSPFromFP)
1304 RI->adjustReg(MBB, II: FirstScalarCSRRestoreInsn, DL, DestReg: SPReg, SrcReg: SPReg,
1305 Offset: StackOffset::getScalable(Scalable: RVVStackSize),
1306 Flag: MachineInstr::FrameDestroy, RequiredAlign: getStackAlign());
1307
1308 if (NeedsDwarfCFI) {
1309 if (!hasFP(MF))
1310 CFIBuilder.buildDefCFA(Reg: SPReg, Offset: RealStackSize);
1311 emitCalleeSavedRVVEpilogCFI(MBB, MI: FirstScalarCSRRestoreInsn);
1312 }
1313 }
1314
1315 if (FirstSPAdjustAmount) {
1316 uint64_t SecondSPAdjustAmount =
1317 getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
1318 assert(SecondSPAdjustAmount > 0 &&
1319 "SecondSPAdjustAmount should be greater than zero");
1320
1321 // If RestoreSPFromFP the stack pointer will be restored using the frame
1322 // pointer value.
1323 if (!RestoreSPFromFP)
1324 RI->adjustReg(MBB, II: FirstScalarCSRRestoreInsn, DL, DestReg: SPReg, SrcReg: SPReg,
1325 Offset: StackOffset::getFixed(Fixed: SecondSPAdjustAmount),
1326 Flag: MachineInstr::FrameDestroy, RequiredAlign: getStackAlign());
1327
1328 if (NeedsDwarfCFI && !hasFP(MF))
1329 CFIBuilder.buildDefCFAOffset(Offset: FirstSPAdjustAmount);
1330 }
1331
1332 // Restore the stack pointer using the value of the frame pointer. Only
1333 // necessary if the stack pointer was modified, meaning the stack size is
1334 // unknown.
1335 //
1336 // In order to make sure the stack point is right through the EH region,
1337 // we also need to restore stack pointer from the frame pointer if we
1338 // don't preserve stack space within prologue/epilogue for outgoing variables,
1339 // normally it's just checking the variable sized object is present or not
1340 // is enough, but we also don't preserve that at prologue/epilogue when
1341 // have vector objects in stack.
1342 if (RestoreSPFromFP) {
1343 assert(hasFP(MF) && "frame pointer should not have been eliminated");
1344 RI->adjustReg(MBB, II: FirstScalarCSRRestoreInsn, DL, DestReg: SPReg, SrcReg: FPReg,
1345 Offset: StackOffset::getFixed(Fixed: -FPOffset), Flag: MachineInstr::FrameDestroy,
1346 RequiredAlign: getStackAlign());
1347 }
1348
1349 if (NeedsDwarfCFI && hasFP(MF))
1350 CFIBuilder.buildDefCFA(Reg: SPReg, Offset: RealStackSize);
1351
1352 // Skip to after the restores of scalar callee-saved registers
1353 // FIXME: assumes exactly one instruction is used to restore each
1354 // callee-saved register.
1355 MBBI = std::next(x: FirstScalarCSRRestoreInsn,
1356 n: getUnmanagedCSI(MF, CSI, ReverseOrder: PreferAscendingLS).size());
1357 CFIBuilder.setInsertPoint(MBBI);
1358
1359 if (getLibCallID(MF, CSI) != -1) {
1360 // tail __riscv_restore_[0-12] instruction is considered as a terminator,
1361 // therefore it is unnecessary to place any CFI instructions after it. Just
1362 // deallocate stack if needed and return.
1363 if (StackSize != 0)
1364 deallocateStack(MF, MBB, MBBI, DL, StackSize,
1365 CFAOffset: RVFI->getLibCallStackSize());
1366
1367 // Emit epilogue for shadow call stack.
1368 emitSCSEpilogue(MF, MBB, MI: MBBI, DL);
1369 return;
1370 }
1371
1372 // Recover callee-saved registers.
1373 if (NeedsDwarfCFI) {
1374 for (const CalleeSavedInfo &CS :
1375 getUnmanagedCSI(MF, CSI, ReverseOrder: PreferAscendingLS)) {
1376 MCRegister Reg = CS.getReg();
1377 // Emit CFI for both sub-registers.
1378 if (RISCV::GPRPairRegClass.contains(Reg)) {
1379 MCRegister EvenReg = RI->getSubReg(Reg, Idx: RISCV::sub_gpr_even);
1380 MCRegister OddReg = RI->getSubReg(Reg, Idx: RISCV::sub_gpr_odd);
1381 CFIBuilder.buildRestore(Reg: EvenReg);
1382 CFIBuilder.buildRestore(Reg: OddReg);
1383 } else {
1384 CFIBuilder.buildRestore(Reg);
1385 }
1386 }
1387 }
1388
1389 if (RVFI->isPushable(MF) && MBBI != MBB.end() && isPop(Opcode: MBBI->getOpcode())) {
1390 // Use available stack adjustment in pop instruction to deallocate stack
1391 // space. Align the stack size down to a multiple of 16. This is needed for
1392 // RVE.
1393 // FIXME: Can we increase the stack size to a multiple of 16 instead?
1394 uint64_t StackAdj =
1395 std::min(a: alignDown(Value: StackSize, Align: 16), b: static_cast<uint64_t>(48));
1396 MBBI->getOperand(i: 1).setImm(StackAdj);
1397 StackSize -= StackAdj;
1398
1399 if (StackSize != 0)
1400 deallocateStack(MF, MBB, MBBI, DL, StackSize,
1401 /*stack_adj of cm.pop instr*/ CFAOffset: RealStackSize - StackSize);
1402
1403 auto NextI = next_nodbg(It: MBBI, End: MBB.end());
1404 if (NextI == MBB.end() || NextI->getOpcode() != RISCV::PseudoRET) {
1405 ++MBBI;
1406 if (NeedsDwarfCFI) {
1407 CFIBuilder.setInsertPoint(MBBI);
1408
1409 for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
1410 CFIBuilder.buildRestore(Reg: CS.getReg());
1411
1412 // Update CFA Offset. If this is a QCI interrupt function, there will
1413 // be a leftover offset which is deallocated by `QC.C.MILEAVERET`,
1414 // otherwise getQCIInterruptStackSize() will be 0.
1415 CFIBuilder.buildDefCFAOffset(Offset: RVFI->getQCIInterruptStackSize());
1416 }
1417 }
1418 }
1419
1420 emitSiFiveCLICPreemptibleRestores(MF, MBB, MBBI, DL);
1421
1422 // Deallocate stack if StackSize isn't a zero yet. If this is a QCI interrupt
1423 // function, there will be a leftover offset which is deallocated by
1424 // `QC.C.MILEAVERET`, otherwise getQCIInterruptStackSize() will be 0.
1425 if (StackSize != 0)
1426 deallocateStack(MF, MBB, MBBI, DL, StackSize,
1427 CFAOffset: RVFI->getQCIInterruptStackSize());
1428
1429 // Emit epilogue for shadow call stack.
1430 emitSCSEpilogue(MF, MBB, MI: MBBI, DL);
1431
1432 // SiFive CLIC needs to swap `sf.mscratchcsw` into `sp`
1433 emitSiFiveCLICStackSwap(MF, MBB, MBBI, DL);
1434}
1435
1436StackOffset
1437RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
1438 Register &FrameReg) const {
1439 const MachineFrameInfo &MFI = MF.getFrameInfo();
1440 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
1441 const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1442
1443 // Callee-saved registers should be referenced relative to the stack
1444 // pointer (positive offset), otherwise use the frame pointer (negative
1445 // offset).
1446 const auto &CSI = getUnmanagedCSI(MF, CSI: MFI.getCalleeSavedInfo(),
1447 ReverseOrder: STI.preferAscendingLoadStore());
1448 int MinCSFI = 0;
1449 int MaxCSFI = -1;
1450 StackOffset Offset;
1451 auto StackID = MFI.getStackID(ObjectIdx: FI);
1452
1453 assert((StackID == TargetStackID::Default ||
1454 StackID == TargetStackID::ScalableVector) &&
1455 "Unexpected stack ID for the frame object.");
1456 if (StackID == TargetStackID::Default) {
1457 assert(getOffsetOfLocalArea() == 0 && "LocalAreaOffset is not 0!");
1458 Offset = StackOffset::getFixed(Fixed: MFI.getObjectOffset(ObjectIdx: FI) +
1459 MFI.getOffsetAdjustment());
1460 } else if (StackID == TargetStackID::ScalableVector) {
1461 Offset = StackOffset::getScalable(Scalable: MFI.getObjectOffset(ObjectIdx: FI));
1462 }
1463
1464 uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
1465
1466 if (CSI.size()) {
1467 MinCSFI = std::min(a: CSI.front().getFrameIdx(), b: CSI.back().getFrameIdx());
1468 MaxCSFI = std::max(a: CSI.front().getFrameIdx(), b: CSI.back().getFrameIdx());
1469 }
1470
1471 if (FI >= MinCSFI && FI <= MaxCSFI) {
1472 FrameReg = SPReg;
1473
1474 if (FirstSPAdjustAmount)
1475 Offset += StackOffset::getFixed(Fixed: FirstSPAdjustAmount);
1476 else
1477 Offset += StackOffset::getFixed(Fixed: getStackSizeWithRVVPadding(MF));
1478 return Offset;
1479 }
1480
1481 if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(ObjectIdx: FI)) {
1482 // If the stack was realigned, the frame pointer is set in order to allow
1483 // SP to be restored, so we need another base register to record the stack
1484 // after realignment.
1485 // |--------------------------| -- <-- FP
1486 // | callee-allocated save | | <----|
1487 // | area for register varargs| | |
1488 // |--------------------------| | |
1489 // | callee-saved registers | | |
1490 // |--------------------------| -- |
1491 // | realignment (the size of | | |
1492 // | this area is not counted | | |
1493 // | in MFI.getStackSize()) | | |
1494 // |--------------------------| -- |-- MFI.getStackSize()
1495 // | RVV alignment padding | | |
1496 // | (not counted in | | |
1497 // | MFI.getStackSize() but | | |
1498 // | counted in | | |
1499 // | RVFI.getRVVStackSize()) | | |
1500 // |--------------------------| -- |
1501 // | RVV objects | | |
1502 // | (not counted in | | |
1503 // | MFI.getStackSize()) | | |
1504 // |--------------------------| -- |
1505 // | padding before RVV | | |
1506 // | (not counted in | | |
1507 // | MFI.getStackSize() or in | | |
1508 // | RVFI.getRVVStackSize()) | | |
1509 // |--------------------------| -- |
1510 // | scalar local variables | | <----'
1511 // |--------------------------| -- <-- BP (if var sized objects present)
1512 // | VarSize objects | |
1513 // |--------------------------| -- <-- SP
1514 if (hasBP(MF)) {
1515 FrameReg = RISCVABI::getBPReg();
1516 } else {
1517 // VarSize objects must be empty in this case!
1518 assert(!MFI.hasVarSizedObjects());
1519 FrameReg = SPReg;
1520 }
1521 } else {
1522 FrameReg = RI->getFrameRegister(MF);
1523 }
1524
1525 if (FrameReg == FPReg) {
1526 Offset += StackOffset::getFixed(Fixed: RVFI->getVarArgsSaveSize());
1527 // When using FP to access scalable vector objects, we need to minus
1528 // the frame size.
1529 //
1530 // |--------------------------| -- <-- FP
1531 // | callee-allocated save | |
1532 // | area for register varargs| |
1533 // |--------------------------| |
1534 // | callee-saved registers | |
1535 // |--------------------------| | MFI.getStackSize()
1536 // | scalar local variables | |
1537 // |--------------------------| -- (Offset of RVV objects is from here.)
1538 // | RVV objects |
1539 // |--------------------------|
1540 // | VarSize objects |
1541 // |--------------------------| <-- SP
1542 if (StackID == TargetStackID::ScalableVector) {
1543 assert(!RI->hasStackRealignment(MF) &&
1544 "Can't index across variable sized realign");
1545 // We don't expect any extra RVV alignment padding, as the stack size
1546 // and RVV object sections should be correct aligned in their own
1547 // right.
1548 assert(MFI.getStackSize() == getStackSizeWithRVVPadding(MF) &&
1549 "Inconsistent stack layout");
1550 Offset -= StackOffset::getFixed(Fixed: MFI.getStackSize());
1551 }
1552 return Offset;
1553 }
1554
1555 // This case handles indexing off both SP and BP.
1556 // If indexing off SP, there must not be any var sized objects
1557 assert(FrameReg == RISCVABI::getBPReg() || !MFI.hasVarSizedObjects());
1558
1559 // When using SP to access frame objects, we need to add RVV stack size.
1560 //
1561 // |--------------------------| -- <-- FP
1562 // | callee-allocated save | | <----|
1563 // | area for register varargs| | |
1564 // |--------------------------| | |
1565 // | callee-saved registers | | |
1566 // |--------------------------| -- |
1567 // | RVV alignment padding | | |
1568 // | (not counted in | | |
1569 // | MFI.getStackSize() but | | |
1570 // | counted in | | |
1571 // | RVFI.getRVVStackSize()) | | |
1572 // |--------------------------| -- |
1573 // | RVV objects | | |-- MFI.getStackSize()
1574 // | (not counted in | | |
1575 // | MFI.getStackSize()) | | |
1576 // |--------------------------| -- |
1577 // | padding before RVV | | |
1578 // | (not counted in | | |
1579 // | MFI.getStackSize()) | | |
1580 // |--------------------------| -- |
1581 // | scalar local variables | | <----'
1582 // |--------------------------| -- <-- BP (if var sized objects present)
1583 // | VarSize objects | |
1584 // |--------------------------| -- <-- SP
1585 //
1586 // The total amount of padding surrounding RVV objects is described by
1587 // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV
1588 // objects to the required alignment.
1589 if (MFI.getStackID(ObjectIdx: FI) == TargetStackID::Default) {
1590 if (MFI.isFixedObjectIndex(ObjectIdx: FI)) {
1591 assert(!RI->hasStackRealignment(MF) &&
1592 "Can't index across variable sized realign");
1593 Offset += StackOffset::get(Fixed: getStackSizeWithRVVPadding(MF),
1594 Scalable: RVFI->getRVVStackSize());
1595 } else {
1596 Offset += StackOffset::getFixed(Fixed: MFI.getStackSize());
1597 }
1598 } else if (MFI.getStackID(ObjectIdx: FI) == TargetStackID::ScalableVector) {
1599 // Ensure the base of the RVV stack is correctly aligned: add on the
1600 // alignment padding.
1601 int ScalarLocalVarSize = MFI.getStackSize() -
1602 RVFI->getCalleeSavedStackSize() -
1603 RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding();
1604 Offset += StackOffset::get(Fixed: ScalarLocalVarSize, Scalable: RVFI->getRVVStackSize());
1605 }
1606 return Offset;
1607}
1608
1609static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
1610 const Register &Reg) {
1611 MCRegister BaseReg = TRI.getSubReg(Reg, Idx: RISCV::sub_vrm1_0);
1612 // If it's not a grouped vector register, it doesn't have subregister, so
1613 // the base register is just itself.
1614 if (!BaseReg.isValid())
1615 BaseReg = Reg;
1616 return BaseReg;
1617}
1618
1619void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
1620 BitVector &SavedRegs,
1621 RegScavenger *RS) const {
1622 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1623
1624 // In TargetFrameLowering::determineCalleeSaves, any vector register is marked
1625 // as saved if any of its subregister is clobbered, this is not correct in
1626 // vector registers. We only want the vector register to be marked as saved
1627 // if all of its subregisters are clobbered.
1628 // For example:
1629 // Original behavior: If v24 is marked, v24m2, v24m4, v24m8 are also marked.
1630 // Correct behavior: v24m2 is marked only if v24 and v25 are marked.
1631 MachineRegisterInfo &MRI = MF.getRegInfo();
1632 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
1633 const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
1634 for (unsigned i = 0; CSRegs[i]; ++i) {
1635 unsigned CSReg = CSRegs[i];
1636 // Only vector registers need special care.
1637 if (!RISCV::VRRegClass.contains(Reg: getRVVBaseRegister(TRI, Reg: CSReg)))
1638 continue;
1639
1640 SavedRegs.reset(Idx: CSReg);
1641
1642 auto SubRegs = TRI.subregs(Reg: CSReg);
1643 // Set the register and all its subregisters.
1644 if (!MRI.def_empty(RegNo: CSReg) || MRI.getUsedPhysRegsMask().test(Idx: CSReg)) {
1645 SavedRegs.set(CSReg);
1646 for (unsigned Reg : SubRegs)
1647 SavedRegs.set(Reg);
1648 }
1649
1650 }
1651
1652 // Unconditionally spill RA and FP only if the function uses a frame
1653 // pointer.
1654 if (hasFP(MF)) {
1655 SavedRegs.set(RAReg);
1656 SavedRegs.set(FPReg);
1657 }
1658 // Mark BP as used if function has dedicated base pointer.
1659 if (hasBP(MF))
1660 SavedRegs.set(RISCVABI::getBPReg());
1661
1662 // When using cm.push/pop we must save X27 if we save X26.
1663 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1664 if (RVFI->isPushable(MF) && SavedRegs.test(Idx: RISCV::X26))
1665 SavedRegs.set(RISCV::X27);
1666
1667 // For Zilsd on RV32, append GPRPair registers to the CSR list. This prevents
1668 // the need to create register sets for each abi which is a lot more complex.
1669 // Don't use Zilsd for callee-saved coalescing if the required alignment
1670 // exceeds the stack alignment.
1671 bool UseZilsd = !STI.is64Bit() && STI.hasStdExtZilsd() &&
1672 STI.getZilsdAlign() <= getStackAlign();
1673 if (UseZilsd) {
1674 SmallVector<MCPhysReg, 32> NewCSRs;
1675 SmallSet<MCPhysReg, 16> CSRSet;
1676 for (unsigned i = 0; CSRegs[i]; ++i) {
1677 NewCSRs.push_back(Elt: CSRegs[i]);
1678 CSRSet.insert(V: CSRegs[i]);
1679 }
1680
1681 // Append GPRPair registers for pairs where both sub-registers are in CSR
1682 // list. Iterate through all GPRPairs and check if both sub-regs are CSRs.
1683 for (MCPhysReg Pair : RISCV::GPRPairRegClass) {
1684 MCPhysReg EvenReg = TRI.getSubReg(Reg: Pair, Idx: RISCV::sub_gpr_even);
1685 MCPhysReg OddReg = TRI.getSubReg(Reg: Pair, Idx: RISCV::sub_gpr_odd);
1686 if (CSRSet.contains(V: EvenReg) && CSRSet.contains(V: OddReg))
1687 NewCSRs.push_back(Elt: Pair);
1688 }
1689
1690 MRI.setCalleeSavedRegs(NewCSRs);
1691 CSRegs = MRI.getCalleeSavedRegs();
1692 }
1693
1694 // Check if all subregisters are marked for saving. If so, set the super
1695 // register bit. For GPRPair, only check sub_gpr_even and sub_gpr_odd, not
1696 // aliases like X8_W or X8_H which are not set in SavedRegs.
1697 for (unsigned i = 0; CSRegs[i]; ++i) {
1698 unsigned CSReg = CSRegs[i];
1699 bool CombineToSuperReg;
1700 if (RISCV::GPRPairRegClass.contains(Reg: CSReg)) {
1701 MCPhysReg EvenReg = TRI.getSubReg(Reg: CSReg, Idx: RISCV::sub_gpr_even);
1702 MCPhysReg OddReg = TRI.getSubReg(Reg: CSReg, Idx: RISCV::sub_gpr_odd);
1703 CombineToSuperReg = SavedRegs.test(Idx: EvenReg) && SavedRegs.test(Idx: OddReg);
1704 // If s0(x8) is used as FP we can't generate load/store pair because it
1705 // breaks the frame chain.
1706 if (hasFP(MF) && CSReg == RISCV::X8_X9)
1707 CombineToSuperReg = false;
1708 } else {
1709 auto SubRegs = TRI.subregs(Reg: CSReg);
1710 CombineToSuperReg =
1711 !SubRegs.empty() && llvm::all_of(Range&: SubRegs, P: [&](unsigned Reg) {
1712 return SavedRegs.test(Idx: Reg);
1713 });
1714 }
1715
1716 if (CombineToSuperReg)
1717 SavedRegs.set(CSReg);
1718 }
1719
1720 // SiFive Preemptible Interrupt Handlers need additional frame entries
1721 createSiFivePreemptibleInterruptFrameEntries(MF, RVFI&: *RVFI);
1722}
1723
1724std::pair<int64_t, Align>
1725RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const {
1726 MachineFrameInfo &MFI = MF.getFrameInfo();
1727 // Create a buffer of RVV objects to allocate.
1728 SmallVector<int, 8> ObjectsToAllocate;
1729 auto pushRVVObjects = [&](int FIBegin, int FIEnd) {
1730 for (int I = FIBegin, E = FIEnd; I != E; ++I) {
1731 unsigned StackID = MFI.getStackID(ObjectIdx: I);
1732 if (StackID != TargetStackID::ScalableVector)
1733 continue;
1734 if (MFI.isDeadObjectIndex(ObjectIdx: I))
1735 continue;
1736
1737 ObjectsToAllocate.push_back(Elt: I);
1738 }
1739 };
1740 // First push RVV Callee Saved object, then push RVV stack object
1741 std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
1742 const auto &RVVCSI = getRVVCalleeSavedInfo(MF, CSI);
1743 if (!RVVCSI.empty())
1744 pushRVVObjects(RVVCSI[0].getFrameIdx(),
1745 RVVCSI[RVVCSI.size() - 1].getFrameIdx() + 1);
1746 pushRVVObjects(0, MFI.getObjectIndexEnd() - RVVCSI.size());
1747
1748 // The minimum alignment is 16 bytes.
1749 Align RVVStackAlign(16);
1750 const auto &ST = MF.getSubtarget<RISCVSubtarget>();
1751
1752 if (!ST.hasVInstructions()) {
1753 assert(ObjectsToAllocate.empty() &&
1754 "Can't allocate scalable-vector objects without V instructions");
1755 return std::make_pair(x: 0, y&: RVVStackAlign);
1756 }
1757
1758 // Allocate all RVV locals and spills
1759 int64_t Offset = 0;
1760 for (int FI : ObjectsToAllocate) {
1761 // ObjectSize in bytes.
1762 int64_t ObjectSize = MFI.getObjectSize(ObjectIdx: FI);
1763 auto ObjectAlign =
1764 std::max(a: Align(RISCV::RVVBytesPerBlock), b: MFI.getObjectAlign(ObjectIdx: FI));
1765 // If the data type is the fractional vector type, reserve one vector
1766 // register for it.
1767 if (ObjectSize < RISCV::RVVBytesPerBlock)
1768 ObjectSize = RISCV::RVVBytesPerBlock;
1769 Offset = alignTo(Size: Offset + ObjectSize, A: ObjectAlign);
1770 MFI.setObjectOffset(ObjectIdx: FI, SPOffset: -Offset);
1771 // Update the maximum alignment of the RVV stack section
1772 RVVStackAlign = std::max(a: RVVStackAlign, b: ObjectAlign);
1773 }
1774
1775 uint64_t StackSize = Offset;
1776
1777 // Ensure the alignment of the RVV stack. Since we want the most-aligned
1778 // object right at the bottom (i.e., any padding at the top of the frame),
1779 // readjust all RVV objects down by the alignment padding.
1780 // Stack size and offsets are multiples of vscale, stack alignment is in
1781 // bytes, we can divide stack alignment by minimum vscale to get a maximum
1782 // stack alignment multiple of vscale.
1783 auto VScale =
1784 std::max<uint64_t>(a: ST.getRealMinVLen() / RISCV::RVVBitsPerBlock, b: 1);
1785 if (auto RVVStackAlignVScale = RVVStackAlign.value() / VScale) {
1786 if (auto AlignmentPadding =
1787 offsetToAlignment(Value: StackSize, Alignment: Align(RVVStackAlignVScale))) {
1788 StackSize += AlignmentPadding;
1789 for (int FI : ObjectsToAllocate)
1790 MFI.setObjectOffset(ObjectIdx: FI, SPOffset: MFI.getObjectOffset(ObjectIdx: FI) - AlignmentPadding);
1791 }
1792 }
1793
1794 return std::make_pair(x&: StackSize, y&: RVVStackAlign);
1795}
1796
1797static unsigned getScavSlotsNumForRVV(MachineFunction &MF) {
1798 // For RVV spill, scalable stack offsets computing requires up to two scratch
1799 // registers
1800 static constexpr unsigned ScavSlotsNumRVVSpillScalableObject = 2;
1801
1802 // For RVV spill, non-scalable stack offsets computing requires up to one
1803 // scratch register.
1804 static constexpr unsigned ScavSlotsNumRVVSpillNonScalableObject = 1;
1805
1806 // ADDI instruction's destination register can be used for computing
1807 // offsets. So Scalable stack offsets require up to one scratch register.
1808 static constexpr unsigned ScavSlotsADDIScalableObject = 1;
1809
1810 static constexpr unsigned MaxScavSlotsNumKnown =
1811 std::max(l: {ScavSlotsADDIScalableObject, ScavSlotsNumRVVSpillScalableObject,
1812 ScavSlotsNumRVVSpillNonScalableObject});
1813
1814 unsigned MaxScavSlotsNum = 0;
1815 if (!MF.getSubtarget<RISCVSubtarget>().hasVInstructions())
1816 return false;
1817 for (const MachineBasicBlock &MBB : MF)
1818 for (const MachineInstr &MI : MBB) {
1819 bool IsRVVSpill = RISCV::isRVVSpill(MI);
1820 for (auto &MO : MI.operands()) {
1821 if (!MO.isFI())
1822 continue;
1823 bool IsScalableVectorID = MF.getFrameInfo().getStackID(ObjectIdx: MO.getIndex()) ==
1824 TargetStackID::ScalableVector;
1825 if (IsRVVSpill) {
1826 MaxScavSlotsNum = std::max(
1827 a: MaxScavSlotsNum, b: IsScalableVectorID
1828 ? ScavSlotsNumRVVSpillScalableObject
1829 : ScavSlotsNumRVVSpillNonScalableObject);
1830 } else if (MI.getOpcode() == RISCV::ADDI && IsScalableVectorID) {
1831 MaxScavSlotsNum =
1832 std::max(a: MaxScavSlotsNum, b: ScavSlotsADDIScalableObject);
1833 }
1834 }
1835 if (MaxScavSlotsNum == MaxScavSlotsNumKnown)
1836 return MaxScavSlotsNumKnown;
1837 }
1838 return MaxScavSlotsNum;
1839}
1840
1841static bool hasRVVFrameObject(const MachineFunction &MF) {
1842 // Originally, the function will scan all the stack objects to check whether
1843 // if there is any scalable vector object on the stack or not. However, it
1844 // causes errors in the register allocator. In issue 53016, it returns false
1845 // before RA because there is no RVV stack objects. After RA, it returns true
1846 // because there are spilling slots for RVV values during RA. It will not
1847 // reserve BP during register allocation and generate BP access in the PEI
1848 // pass due to the inconsistent behavior of the function.
1849 //
1850 // The function is changed to use hasVInstructions() as the return value. It
1851 // is not precise, but it can make the register allocation correct.
1852 //
1853 // FIXME: Find a better way to make the decision or revisit the solution in
1854 // D103622.
1855 //
1856 // Refer to https://github.com/llvm/llvm-project/issues/53016.
1857 return MF.getSubtarget<RISCVSubtarget>().hasVInstructions();
1858}
1859
1860static unsigned estimateFunctionSizeInBytes(const MachineFunction &MF,
1861 const RISCVInstrInfo &TII) {
1862 unsigned FnSize = 0;
1863 for (auto &MBB : MF) {
1864 for (auto &MI : MBB) {
1865 // Far branches over 20-bit offset will be relaxed in branch relaxation
1866 // pass. In the worst case, conditional branches will be relaxed into
1867 // the following instruction sequence. Unconditional branches are
1868 // relaxed in the same way, with the exception that there is no first
1869 // branch instruction.
1870 //
1871 // foo
1872 // bne t5, t6, .rev_cond # `TII->getInstSizeInBytes(MI)` bytes
1873 // sd s11, 0(sp) # 4 bytes, or 2 bytes with Zca
1874 // jump .restore, s11 # 8 bytes
1875 // .rev_cond
1876 // bar
1877 // j .dest_bb # 4 bytes, or 2 bytes with Zca
1878 // .restore:
1879 // ld s11, 0(sp) # 4 bytes, or 2 bytes with Zca
1880 // .dest:
1881 // baz
1882 if (MI.isConditionalBranch())
1883 FnSize += TII.getInstSizeInBytes(MI);
1884 if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) {
1885 if (MF.getSubtarget<RISCVSubtarget>().hasStdExtZca())
1886 FnSize += 2 + 8 + 2 + 2;
1887 else
1888 FnSize += 4 + 8 + 4 + 4;
1889 continue;
1890 }
1891
1892 FnSize += TII.getInstSizeInBytes(MI);
1893 }
1894 }
1895 return FnSize;
1896}
1897
1898void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
1899 MachineFunction &MF, RegScavenger *RS) const {
1900 const RISCVRegisterInfo *RegInfo =
1901 MF.getSubtarget<RISCVSubtarget>().getRegisterInfo();
1902 const RISCVInstrInfo *TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
1903 MachineFrameInfo &MFI = MF.getFrameInfo();
1904 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1905 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1906
1907 int64_t RVVStackSize;
1908 Align RVVStackAlign;
1909 std::tie(args&: RVVStackSize, args&: RVVStackAlign) = assignRVVStackObjectOffsets(MF);
1910
1911 RVFI->setRVVStackSize(RVVStackSize);
1912 RVFI->setRVVStackAlign(RVVStackAlign);
1913
1914 if (hasRVVFrameObject(MF)) {
1915 // Ensure the entire stack is aligned to at least the RVV requirement: some
1916 // scalable-vector object alignments are not considered by the
1917 // target-independent code.
1918 MFI.ensureMaxAlignment(Alignment: RVVStackAlign);
1919 }
1920
1921 unsigned ScavSlotsNum = 0;
1922
1923 // estimateStackSize has been observed to under-estimate the final stack
1924 // size, so give ourselves wiggle-room by checking for stack size
1925 // representable an 11-bit signed field rather than 12-bits.
1926 if (!isInt<11>(x: MFI.estimateStackSize(MF)))
1927 ScavSlotsNum = 1;
1928
1929 // Far branches over 20-bit offset require a spill slot for scratch register.
1930 bool IsLargeFunction = !isInt<20>(x: estimateFunctionSizeInBytes(MF, TII: *TII));
1931 if (IsLargeFunction)
1932 ScavSlotsNum = std::max(a: ScavSlotsNum, b: 1u);
1933
1934 // RVV loads & stores have no capacity to hold the immediate address offsets
1935 // so we must always reserve an emergency spill slot if the MachineFunction
1936 // contains any RVV spills.
1937 ScavSlotsNum = std::max(a: ScavSlotsNum, b: getScavSlotsNumForRVV(MF));
1938
1939 for (unsigned I = 0; I < ScavSlotsNum; I++) {
1940 int FI = MFI.CreateSpillStackObject(Size: RegInfo->getSpillSize(RC: *RC),
1941 Alignment: RegInfo->getSpillAlign(RC: *RC));
1942 RS->addScavengingFrameIndex(FI);
1943
1944 if (IsLargeFunction && RVFI->getBranchRelaxationScratchFrameIndex() == -1)
1945 RVFI->setBranchRelaxationScratchFrameIndex(FI);
1946 }
1947
1948 unsigned Size = RVFI->getReservedSpillsSize();
1949 for (const auto &Info : MFI.getCalleeSavedInfo()) {
1950 int FrameIdx = Info.getFrameIdx();
1951 if (FrameIdx < 0 || MFI.getStackID(ObjectIdx: FrameIdx) != TargetStackID::Default)
1952 continue;
1953
1954 Size += MFI.getObjectSize(ObjectIdx: FrameIdx);
1955 }
1956 RVFI->setCalleeSavedStackSize(Size);
1957}
1958
1959// Not preserve stack space within prologue for outgoing variables when the
1960// function contains variable size objects or there are vector objects accessed
1961// by the frame pointer.
1962// Let eliminateCallFramePseudoInstr preserve stack space for it.
1963bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
1964 return !MF.getFrameInfo().hasVarSizedObjects() &&
1965 !(hasFP(MF) && hasRVVFrameObject(MF));
1966}
1967
1968// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions.
1969MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
1970 MachineFunction &MF, MachineBasicBlock &MBB,
1971 MachineBasicBlock::iterator MI) const {
1972 DebugLoc DL = MI->getDebugLoc();
1973
1974 if (!hasReservedCallFrame(MF)) {
1975 // If space has not been reserved for a call frame, ADJCALLSTACKDOWN and
1976 // ADJCALLSTACKUP must be converted to instructions manipulating the stack
1977 // pointer. This is necessary when there is a variable length stack
1978 // allocation (e.g. alloca), which means it's not possible to allocate
1979 // space for outgoing arguments from within the function prologue.
1980 int64_t Amount = MI->getOperand(i: 0).getImm();
1981
1982 if (Amount != 0) {
1983 // Ensure the stack remains aligned after adjustment.
1984 Amount = alignSPAdjust(SPAdj: Amount);
1985
1986 if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
1987 Amount = -Amount;
1988
1989 const RISCVTargetLowering *TLI =
1990 MF.getSubtarget<RISCVSubtarget>().getTargetLowering();
1991 int64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign: getStackAlign());
1992 if (TLI->hasInlineStackProbe(MF) && -Amount >= ProbeSize) {
1993 // When stack probing is enabled, the decrement of SP may need to be
1994 // probed. We can handle both the decrement and the probing in
1995 // allocateStack.
1996 bool DynAllocation =
1997 MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
1998 allocateStack(MBB, MBBI: MI, MF, Offset: -Amount, RealStackSize: -Amount,
1999 EmitCFI: needsDwarfCFI(MF) && !hasFP(MF),
2000 /*NeedProbe=*/true, ProbeSize, DynAllocation,
2001 Flag: MachineInstr::NoFlags);
2002 } else {
2003 const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
2004 RI.adjustReg(MBB, II: MI, DL, DestReg: SPReg, SrcReg: SPReg, Offset: StackOffset::getFixed(Fixed: Amount),
2005 Flag: MachineInstr::NoFlags, RequiredAlign: getStackAlign());
2006 }
2007 }
2008 }
2009
2010 return MBB.erase(I: MI);
2011}
2012
2013// We would like to split the SP adjustment to reduce prologue/epilogue
2014// as following instructions. In this way, the offset of the callee saved
2015// register could fit in a single store. Supposed that the first sp adjust
2016// amount is 2032.
2017// add sp,sp,-2032
2018// sw ra,2028(sp)
2019// sw s0,2024(sp)
2020// sw s1,2020(sp)
2021// sw s3,2012(sp)
2022// sw s4,2008(sp)
2023// add sp,sp,-64
2024uint64_t
2025RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
2026 const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
2027 const MachineFrameInfo &MFI = MF.getFrameInfo();
2028 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2029 uint64_t StackSize = getStackSizeWithRVVPadding(MF);
2030
2031 // Disable SplitSPAdjust if save-restore libcall, push/pop or QCI interrupts
2032 // are used. The callee-saved registers will be pushed by the save-restore
2033 // libcalls, so we don't have to split the SP adjustment in this case.
2034 if (RVFI->getReservedSpillsSize())
2035 return 0;
2036
2037 // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
2038 // 12-bit and there exists a callee-saved register needing to be pushed.
2039 if (!isInt<12>(x: StackSize) && (CSI.size() > 0)) {
2040 // FirstSPAdjustAmount is chosen at most as (2048 - StackAlign) because
2041 // 2048 will cause sp = sp + 2048 in the epilogue to be split into multiple
2042 // instructions. Offsets smaller than 2048 can fit in a single load/store
2043 // instruction, and we have to stick with the stack alignment. 2048 has
2044 // 16-byte alignment. The stack alignment for RV32 and RV64 is 16 and for
2045 // RV32E it is 4. So (2048 - StackAlign) will satisfy the stack alignment.
2046 const uint64_t StackAlign = getStackAlign().value();
2047
2048 // Amount of (2048 - StackAlign) will prevent callee saved and restored
2049 // instructions be compressed, so try to adjust the amount to the largest
2050 // offset that stack compression instructions accept when target supports
2051 // compression instructions.
2052 if (STI.hasStdExtZca()) {
2053 // The compression extensions may support the following instructions:
2054 // riscv32: c.lwsp rd, offset[7:2] => 2^(6 + 2)
2055 // c.swsp rs2, offset[7:2] => 2^(6 + 2)
2056 // c.flwsp rd, offset[7:2] => 2^(6 + 2)
2057 // c.fswsp rs2, offset[7:2] => 2^(6 + 2)
2058 // riscv64: c.ldsp rd, offset[8:3] => 2^(6 + 3)
2059 // c.sdsp rs2, offset[8:3] => 2^(6 + 3)
2060 // c.fldsp rd, offset[8:3] => 2^(6 + 3)
2061 // c.fsdsp rs2, offset[8:3] => 2^(6 + 3)
2062 const uint64_t RVCompressLen = STI.getXLen() * 8;
2063 // Compared with amount (2048 - StackAlign), StackSize needs to
2064 // satisfy the following conditions to avoid using more instructions
2065 // to adjust the sp after adjusting the amount, such as
2066 // StackSize meets the condition (StackSize <= 2048 + RVCompressLen),
2067 // case1: Amount is 2048 - StackAlign: use addi + addi to adjust sp.
2068 // case2: Amount is RVCompressLen: use addi + addi to adjust sp.
2069 auto CanCompress = [&](uint64_t CompressLen) -> bool {
2070 if (StackSize <= 2047 + CompressLen ||
2071 (StackSize > 2048 * 2 - StackAlign &&
2072 StackSize <= 2047 * 2 + CompressLen) ||
2073 StackSize > 2048 * 3 - StackAlign)
2074 return true;
2075
2076 return false;
2077 };
2078 // In the epilogue, addi sp, sp, 496 is used to recover the sp and it
2079 // can be compressed(C.ADDI16SP, offset can be [-512, 496]), but
2080 // addi sp, sp, 512 can not be compressed. So try to use 496 first.
2081 const uint64_t ADDI16SPCompressLen = 496;
2082 if (STI.is64Bit() && CanCompress(ADDI16SPCompressLen))
2083 return ADDI16SPCompressLen;
2084 if (CanCompress(RVCompressLen))
2085 return RVCompressLen;
2086 }
2087 return 2048 - StackAlign;
2088 }
2089 return 0;
2090}
2091
2092bool RISCVFrameLowering::assignCalleeSavedSpillSlots(
2093 MachineFunction &MF, const TargetRegisterInfo *TRI,
2094 std::vector<CalleeSavedInfo> &CSI) const {
2095 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
2096 MachineFrameInfo &MFI = MF.getFrameInfo();
2097 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
2098
2099 // Preemptible Interrupts have two additional Callee-save Frame Indexes,
2100 // not tracked by `CSI`.
2101 if (RVFI->isSiFivePreemptibleInterrupt(MF)) {
2102 for (int I = 0; I < 2; ++I) {
2103 int FI = RVFI->getInterruptCSRFrameIndex(Idx: I);
2104 MFI.setIsCalleeSavedObjectIndex(ObjectIdx: FI, IsCalleeSaved: true);
2105 }
2106 }
2107
2108 // Early exit if no callee saved registers are modified!
2109 if (CSI.empty())
2110 return true;
2111
2112 if (RVFI->useQCIInterrupt(MF)) {
2113 RVFI->setQCIInterruptStackSize(QCIInterruptPushAmount);
2114 }
2115
2116 if (RVFI->isPushable(MF)) {
2117 // Determine how many GPRs we need to push and save it to RVFI.
2118 unsigned PushedRegNum = getNumPushPopRegs(CSI);
2119
2120 // `QC.C.MIENTER(.NEST)` will save `ra` and `s0`, so we should only push if
2121 // we want to push more than 2 registers. Otherwise, we should push if we
2122 // want to push more than 0 registers.
2123 unsigned OnlyPushIfMoreThan = RVFI->useQCIInterrupt(MF) ? 2 : 0;
2124 if (PushedRegNum > OnlyPushIfMoreThan) {
2125 RVFI->setRVPushRegs(PushedRegNum);
2126 RVFI->setRVPushStackSize(alignTo(Value: (STI.getXLen() / 8) * PushedRegNum, Align: 16));
2127 }
2128 }
2129
2130 for (auto &CS : CSI) {
2131 MCRegister Reg = CS.getReg();
2132 const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
2133 unsigned Size = RegInfo->getSpillSize(RC: *RC);
2134
2135 if (RVFI->useQCIInterrupt(MF)) {
2136 const auto *FFI = llvm::find_if(Range: FixedCSRFIQCIInterruptMap, P: [&](auto P) {
2137 return P.first == CS.getReg();
2138 });
2139 if (FFI != std::end(arr: FixedCSRFIQCIInterruptMap)) {
2140 int64_t Offset = FFI->second * (int64_t)Size;
2141
2142 int FrameIdx = MFI.CreateFixedSpillStackObject(Size, SPOffset: Offset);
2143 assert(FrameIdx < 0);
2144 CS.setFrameIdx(FrameIdx);
2145 continue;
2146 }
2147 }
2148
2149 if (RVFI->useSaveRestoreLibCalls(MF) || RVFI->isPushable(MF)) {
2150 const auto *FII = llvm::find_if(
2151 Range: FixedCSRFIMap, P: [&](MCPhysReg P) { return P == CS.getReg(); });
2152 unsigned RegNum = std::distance(first: std::begin(arr: FixedCSRFIMap), last: FII);
2153
2154 if (FII != std::end(arr: FixedCSRFIMap)) {
2155 int64_t Offset;
2156 if (RVFI->getPushPopKind(MF) ==
2157 RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp)
2158 Offset = -int64_t(RVFI->getRVPushRegs() - RegNum) * Size;
2159 else
2160 Offset = -int64_t(RegNum + 1) * Size;
2161
2162 if (RVFI->useQCIInterrupt(MF))
2163 Offset -= QCIInterruptPushAmount;
2164
2165 int FrameIdx = MFI.CreateFixedSpillStackObject(Size, SPOffset: Offset);
2166 assert(FrameIdx < 0);
2167 CS.setFrameIdx(FrameIdx);
2168 continue;
2169 }
2170 }
2171
2172 // For GPRPair registers, use 8-byte slots with required alignment by zilsd.
2173 if (!STI.is64Bit() && STI.hasStdExtZilsd() &&
2174 RISCV::GPRPairRegClass.contains(Reg)) {
2175 Align PairAlign = STI.getZilsdAlign();
2176 int FrameIdx = MFI.CreateStackObject(Size: 8, Alignment: PairAlign, isSpillSlot: true);
2177 MFI.setIsCalleeSavedObjectIndex(ObjectIdx: FrameIdx, IsCalleeSaved: true);
2178 CS.setFrameIdx(FrameIdx);
2179 continue;
2180 }
2181
2182 // Not a fixed slot.
2183 Align Alignment = RegInfo->getSpillAlign(RC: *RC);
2184 // We may not be able to satisfy the desired alignment specification of
2185 // the TargetRegisterClass if the stack alignment is smaller. Use the
2186 // min.
2187 Alignment = std::min(a: Alignment, b: getStackAlign());
2188 int FrameIdx = MFI.CreateStackObject(Size, Alignment, isSpillSlot: true);
2189 MFI.setIsCalleeSavedObjectIndex(ObjectIdx: FrameIdx, IsCalleeSaved: true);
2190 CS.setFrameIdx(FrameIdx);
2191 if (RISCVRegisterInfo::isRVVRegClass(RC))
2192 MFI.setStackID(ObjectIdx: FrameIdx, ID: TargetStackID::ScalableVector);
2193 }
2194
2195 if (RVFI->useQCIInterrupt(MF)) {
2196 // Allocate a fixed object that covers the entire QCI stack allocation,
2197 // because there are gaps which are reserved for future use.
2198 MFI.CreateFixedSpillStackObject(
2199 Size: QCIInterruptPushAmount, SPOffset: -static_cast<int64_t>(QCIInterruptPushAmount));
2200 }
2201
2202 if (RVFI->isPushable(MF)) {
2203 int64_t QCIOffset = RVFI->useQCIInterrupt(MF) ? QCIInterruptPushAmount : 0;
2204 // Allocate a fixed object that covers the full push.
2205 if (int64_t PushSize = RVFI->getRVPushStackSize())
2206 MFI.CreateFixedSpillStackObject(Size: PushSize, SPOffset: -PushSize - QCIOffset);
2207 } else if (int LibCallRegs = getLibCallID(MF, CSI) + 1) {
2208 int64_t LibCallFrameSize =
2209 alignTo(Size: (STI.getXLen() / 8) * LibCallRegs, A: getStackAlign());
2210 MFI.CreateFixedSpillStackObject(Size: LibCallFrameSize, SPOffset: -LibCallFrameSize);
2211 }
2212
2213 return true;
2214}
2215
2216bool RISCVFrameLowering::spillCalleeSavedRegisters(
2217 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2218 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2219 if (CSI.empty())
2220 return true;
2221
2222 MachineFunction *MF = MBB.getParent();
2223 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
2224 DebugLoc DL;
2225 if (MI != MBB.end() && !MI->isDebugInstr())
2226 DL = MI->getDebugLoc();
2227
2228 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2229 if (RVFI->useQCIInterrupt(MF: *MF)) {
2230 // Emit QC.C.MIENTER(.NEST)
2231 BuildMI(
2232 BB&: MBB, I: MI, MIMD: DL,
2233 MCID: TII.get(Opcode: RVFI->getInterruptStackKind(MF: *MF) ==
2234 RISCVMachineFunctionInfo::InterruptStackKind::QCINest
2235 ? RISCV::QC_C_MIENTER_NEST
2236 : RISCV::QC_C_MIENTER))
2237 .setMIFlag(MachineInstr::FrameSetup);
2238
2239 for (auto [Reg, _Offset] : FixedCSRFIQCIInterruptMap)
2240 MBB.addLiveIn(PhysReg: Reg);
2241 }
2242
2243 if (RVFI->isPushable(MF: *MF)) {
2244 // Emit CM.PUSH with base StackAdj & evaluate Push stack
2245 unsigned PushedRegNum = RVFI->getRVPushRegs();
2246 if (PushedRegNum > 0) {
2247 // Use encoded number to represent registers to spill.
2248 unsigned Opcode = getPushOpcode(
2249 Kind: RVFI->getPushPopKind(MF: *MF), UpdateFP: hasFP(MF: *MF) && !RVFI->useQCIInterrupt(MF: *MF));
2250 unsigned RegEnc = RISCVZC::encodeRegListNumRegs(NumRegs: PushedRegNum);
2251 MachineInstrBuilder PushBuilder =
2252 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode))
2253 .setMIFlag(MachineInstr::FrameSetup);
2254 PushBuilder.addImm(Val: RegEnc);
2255 PushBuilder.addImm(Val: 0);
2256
2257 for (unsigned i = 0; i < PushedRegNum; i++)
2258 PushBuilder.addUse(RegNo: FixedCSRFIMap[i], Flags: RegState::Implicit);
2259 }
2260 } else if (const char *SpillLibCall = getSpillLibCallName(MF: *MF, CSI)) {
2261 // Add spill libcall via non-callee-saved register t0.
2262 MachineInstrBuilder NewMI =
2263 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::PseudoCALLReg), DestReg: RISCV::X5)
2264 .addExternalSymbol(FnName: SpillLibCall, TargetFlags: RISCVII::MO_CALL)
2265 .setMIFlag(MachineInstr::FrameSetup)
2266 .addUse(RegNo: RISCV::X2, Flags: RegState::Implicit)
2267 .addDef(RegNo: RISCV::X2, Flags: RegState::ImplicitDefine);
2268
2269 // Add registers spilled as implicit used.
2270 for (auto &CS : CSI)
2271 NewMI.addUse(RegNo: CS.getReg(), Flags: RegState::Implicit);
2272 }
2273
2274 // Manually spill values not spilled by libcall & Push/Pop.
2275 const auto &UnmanagedCSI =
2276 getUnmanagedCSI(MF: *MF, CSI, ReverseOrder: STI.preferAscendingLoadStore());
2277 const auto &RVVCSI = getRVVCalleeSavedInfo(MF: *MF, CSI);
2278
2279 auto storeRegsToStackSlots = [&](decltype(UnmanagedCSI) CSInfo) {
2280 for (auto &CS : CSInfo) {
2281 // Insert the spill to the stack frame.
2282 MCRegister Reg = CS.getReg();
2283 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2284 TII.storeRegToStackSlot(MBB, MI, SrcReg: Reg, isKill: !MBB.isLiveIn(Reg),
2285 FrameIndex: CS.getFrameIdx(), RC, VReg: Register(),
2286 Flags: MachineInstr::FrameSetup);
2287 }
2288 };
2289 storeRegsToStackSlots(UnmanagedCSI);
2290 storeRegsToStackSlots(RVVCSI);
2291
2292 return true;
2293}
2294
2295static unsigned getCalleeSavedRVVNumRegs(const Register &BaseReg) {
2296 return RISCV::VRRegClass.contains(Reg: BaseReg) ? 1
2297 : RISCV::VRM2RegClass.contains(Reg: BaseReg) ? 2
2298 : RISCV::VRM4RegClass.contains(Reg: BaseReg) ? 4
2299 : 8;
2300}
2301
2302void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
2303 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const {
2304 MachineFunction *MF = MBB.getParent();
2305 const MachineFrameInfo &MFI = MF->getFrameInfo();
2306 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2307 const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
2308
2309 const auto &RVVCSI = getRVVCalleeSavedInfo(MF: *MF, CSI: MFI.getCalleeSavedInfo());
2310 if (RVVCSI.empty())
2311 return;
2312
2313 uint64_t FixedSize = getStackSizeWithRVVPadding(MF: *MF);
2314 if (!HasFP) {
2315 uint64_t ScalarLocalVarSize =
2316 MFI.getStackSize() - RVFI->getCalleeSavedStackSize() -
2317 RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding();
2318 FixedSize -= ScalarLocalVarSize;
2319 }
2320
2321 CFIInstBuilder CFIBuilder(MBB, MI, MachineInstr::FrameSetup);
2322 for (auto &CS : RVVCSI) {
2323 // Insert the spill to the stack frame.
2324 int FI = CS.getFrameIdx();
2325 MCRegister BaseReg = getRVVBaseRegister(TRI, Reg: CS.getReg());
2326 unsigned NumRegs = getCalleeSavedRVVNumRegs(BaseReg: CS.getReg());
2327 for (unsigned i = 0; i < NumRegs; ++i) {
2328 CFIBuilder.insertCFIInst(CFIInst: createDefCFAOffset(
2329 TRI, Reg: BaseReg + i,
2330 Offset: StackOffset::get(Fixed: -FixedSize, Scalable: MFI.getObjectOffset(ObjectIdx: FI) / 8 + i)));
2331 }
2332 }
2333}
2334
2335void RISCVFrameLowering::emitCalleeSavedRVVEpilogCFI(
2336 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
2337 MachineFunction *MF = MBB.getParent();
2338 const MachineFrameInfo &MFI = MF->getFrameInfo();
2339 const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
2340
2341 CFIInstBuilder CFIHelper(MBB, MI, MachineInstr::FrameDestroy);
2342 const auto &RVVCSI = getRVVCalleeSavedInfo(MF: *MF, CSI: MFI.getCalleeSavedInfo());
2343 for (auto &CS : RVVCSI) {
2344 MCRegister BaseReg = getRVVBaseRegister(TRI, Reg: CS.getReg());
2345 unsigned NumRegs = getCalleeSavedRVVNumRegs(BaseReg: CS.getReg());
2346 for (unsigned i = 0; i < NumRegs; ++i)
2347 CFIHelper.buildRestore(Reg: BaseReg + i);
2348 }
2349}
2350
2351bool RISCVFrameLowering::restoreCalleeSavedRegisters(
2352 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2353 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2354 if (CSI.empty())
2355 return true;
2356
2357 MachineFunction *MF = MBB.getParent();
2358 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
2359 DebugLoc DL;
2360 if (MI != MBB.end() && !MI->isDebugInstr())
2361 DL = MI->getDebugLoc();
2362
2363 // Manually restore values not restored by libcall & Push/Pop.
2364 // Reverse the restore order in epilog. In addition, the return
2365 // address will be restored first in the epilogue. It increases
2366 // the opportunity to avoid the load-to-use data hazard between
2367 // loading RA and return by RA. loadRegFromStackSlot can insert
2368 // multiple instructions.
2369 const auto &UnmanagedCSI =
2370 getUnmanagedCSI(MF: *MF, CSI, ReverseOrder: STI.preferAscendingLoadStore());
2371 const auto &RVVCSI = getRVVCalleeSavedInfo(MF: *MF, CSI);
2372
2373 auto loadRegFromStackSlot = [&](decltype(UnmanagedCSI) CSInfo) {
2374 for (auto &CS : CSInfo) {
2375 MCRegister Reg = CS.getReg();
2376 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2377 TII.loadRegFromStackSlot(MBB, MI, DestReg: Reg, FrameIndex: CS.getFrameIdx(), RC, VReg: Register(),
2378 SubReg: RISCV::NoSubRegister,
2379 Flags: MachineInstr::FrameDestroy);
2380 assert(MI != MBB.begin() &&
2381 "loadRegFromStackSlot didn't insert any code!");
2382 }
2383 };
2384 loadRegFromStackSlot(RVVCSI);
2385 loadRegFromStackSlot(UnmanagedCSI);
2386
2387 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2388 if (RVFI->useQCIInterrupt(MF: *MF)) {
2389 // Don't emit anything here because restoration is handled by
2390 // QC.C.MILEAVERET which we already inserted to return.
2391 assert(MI->getOpcode() == RISCV::QC_C_MILEAVERET &&
2392 "Unexpected QCI Interrupt Return Instruction");
2393 }
2394
2395 if (RVFI->isPushable(MF: *MF)) {
2396 unsigned PushedRegNum = RVFI->getRVPushRegs();
2397 if (PushedRegNum > 0) {
2398 unsigned Opcode = getPopOpcode(Kind: RVFI->getPushPopKind(MF: *MF));
2399 unsigned RegEnc = RISCVZC::encodeRegListNumRegs(NumRegs: PushedRegNum);
2400 MachineInstrBuilder PopBuilder =
2401 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode))
2402 .setMIFlag(MachineInstr::FrameDestroy);
2403 // Use encoded number to represent registers to restore.
2404 PopBuilder.addImm(Val: RegEnc);
2405 PopBuilder.addImm(Val: 0);
2406
2407 for (unsigned i = 0; i < RVFI->getRVPushRegs(); i++)
2408 PopBuilder.addDef(RegNo: FixedCSRFIMap[i], Flags: RegState::ImplicitDefine);
2409 }
2410 } else if (const char *RestoreLibCall = getRestoreLibCallName(MF: *MF, CSI)) {
2411 // Add restore libcall via tail call.
2412 MachineInstrBuilder NewMI =
2413 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::PseudoTAIL))
2414 .addExternalSymbol(FnName: RestoreLibCall, TargetFlags: RISCVII::MO_CALL)
2415 .setMIFlag(MachineInstr::FrameDestroy)
2416 .addDef(RegNo: RISCV::X2, Flags: RegState::ImplicitDefine);
2417
2418 // Add registers restored as implicit defined.
2419 for (auto &CS : CSI)
2420 NewMI.addDef(RegNo: CS.getReg(), Flags: RegState::ImplicitDefine);
2421
2422 // Remove trailing returns, since the terminator is now a tail call to the
2423 // restore function.
2424 if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) {
2425 NewMI.getInstr()->copyImplicitOps(MF&: *MF, MI: *MI);
2426 MI->eraseFromParent();
2427 }
2428 }
2429 return true;
2430}
2431
2432bool RISCVFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2433 // Keep the conventional code flow when not optimizing.
2434 if (MF.getFunction().hasOptNone())
2435 return false;
2436
2437 return true;
2438}
2439
2440bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
2441 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
2442 const MachineFunction *MF = MBB.getParent();
2443 const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2444
2445 // Make sure VTYPE and VL are not live-in since we will use vsetvli in the
2446 // prologue to get the VLEN, and that will clobber these registers.
2447 //
2448 // We may do also check the stack contains objects with scalable vector type,
2449 // but this will require iterating over all the stack objects, but this may
2450 // not worth since the situation is rare, we could do further check in future
2451 // if we find it is necessary.
2452 if (STI.preferVsetvliOverReadVLENB() &&
2453 (MBB.isLiveIn(Reg: RISCV::VTYPE) || MBB.isLiveIn(Reg: RISCV::VL)))
2454 return false;
2455
2456 if (!RVFI->useSaveRestoreLibCalls(MF: *MF))
2457 return true;
2458
2459 // Inserting a call to a __riscv_save libcall requires the use of the register
2460 // t0 (X5) to hold the return address. Therefore if this register is already
2461 // used we can't insert the call.
2462
2463 RegScavenger RS;
2464 RS.enterBasicBlock(MBB&: *TmpMBB);
2465 return !RS.isRegUsed(Reg: RISCV::X5);
2466}
2467
2468bool RISCVFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
2469 const MachineFunction *MF = MBB.getParent();
2470 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
2471 const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2472
2473 // We do not want QC.C.MILEAVERET to be subject to shrink-wrapping - it must
2474 // come in the final block of its function as it both pops and returns.
2475 if (RVFI->useQCIInterrupt(MF: *MF))
2476 return MBB.succ_empty();
2477
2478 if (!RVFI->useSaveRestoreLibCalls(MF: *MF))
2479 return true;
2480
2481 // Using the __riscv_restore libcalls to restore CSRs requires a tail call.
2482 // This means if we still need to continue executing code within this function
2483 // the restore cannot take place in this basic block.
2484
2485 if (MBB.succ_size() > 1)
2486 return false;
2487
2488 MachineBasicBlock *SuccMBB =
2489 MBB.succ_empty() ? TmpMBB->getFallThrough() : *MBB.succ_begin();
2490
2491 // Doing a tail call should be safe if there are no successors, because either
2492 // we have a returning block or the end of the block is unreachable, so the
2493 // restore will be eliminated regardless.
2494 if (!SuccMBB)
2495 return true;
2496
2497 // The successor can only contain a return, since we would effectively be
2498 // replacing the successor with our own tail return at the end of our block.
2499 return SuccMBB->isReturnBlock() && SuccMBB->size() == 1;
2500}
2501
2502bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
2503 switch (ID) {
2504 case TargetStackID::Default:
2505 case TargetStackID::ScalableVector:
2506 return true;
2507 case TargetStackID::NoAlloc:
2508 case TargetStackID::SGPRSpill:
2509 case TargetStackID::WasmLocal:
2510 case TargetStackID::ScalablePredicateVector:
2511 return false;
2512 }
2513 llvm_unreachable("Invalid TargetStackID::Value");
2514}
2515
2516TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
2517 return TargetStackID::ScalableVector;
2518}
2519
2520// Synthesize the probe loop.
2521static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL,
2522 Register TargetReg, bool IsRVV) {
2523 assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP");
2524
2525 MachineBasicBlock &MBB = *MBBI->getParent();
2526 MachineFunction &MF = *MBB.getParent();
2527
2528 auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
2529 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
2530 bool IsRV64 = Subtarget.is64Bit();
2531 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
2532 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
2533 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
2534
2535 MachineFunction::iterator MBBInsertPoint = std::next(x: MBB.getIterator());
2536 MachineBasicBlock *LoopTestMBB =
2537 MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
2538 MF.insert(MBBI: MBBInsertPoint, MBB: LoopTestMBB);
2539 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
2540 MF.insert(MBBI: MBBInsertPoint, MBB: ExitMBB);
2541 MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2542 Register ScratchReg = RISCV::X7;
2543
2544 // ScratchReg = ProbeSize
2545 TII->movImm(MBB, MBBI, DL, DstReg: ScratchReg, Val: ProbeSize, Flag: Flags);
2546
2547 // LoopTest:
2548 // SUB SP, SP, ProbeSize
2549 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: RISCV::SUB), DestReg: SPReg)
2550 .addReg(RegNo: SPReg)
2551 .addReg(RegNo: ScratchReg)
2552 .setMIFlags(Flags);
2553
2554 // s[d|w] zero, 0(sp)
2555 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL,
2556 MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
2557 .addReg(RegNo: RISCV::X0)
2558 .addReg(RegNo: SPReg)
2559 .addImm(Val: 0)
2560 .setMIFlags(Flags);
2561
2562 if (IsRVV) {
2563 // SUB TargetReg, TargetReg, ProbeSize
2564 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: RISCV::SUB),
2565 DestReg: TargetReg)
2566 .addReg(RegNo: TargetReg)
2567 .addReg(RegNo: ScratchReg)
2568 .setMIFlags(Flags);
2569
2570 // BGE TargetReg, ProbeSize, LoopTest
2571 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: RISCV::BGE))
2572 .addReg(RegNo: TargetReg)
2573 .addReg(RegNo: ScratchReg)
2574 .addMBB(MBB: LoopTestMBB)
2575 .setMIFlags(Flags);
2576
2577 } else {
2578 // BNE SP, TargetReg, LoopTest
2579 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
2580 .addReg(RegNo: SPReg)
2581 .addReg(RegNo: TargetReg)
2582 .addMBB(MBB: LoopTestMBB)
2583 .setMIFlags(Flags);
2584 }
2585
2586 ExitMBB->splice(Where: ExitMBB->end(), Other: &MBB, From: std::next(x: MBBI), To: MBB.end());
2587 ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB);
2588
2589 LoopTestMBB->addSuccessor(Succ: ExitMBB);
2590 LoopTestMBB->addSuccessor(Succ: LoopTestMBB);
2591 MBB.addSuccessor(Succ: LoopTestMBB);
2592 // Update liveins.
2593 fullyRecomputeLiveIns(MBBs: {ExitMBB, LoopTestMBB});
2594}
2595
2596void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
2597 MachineBasicBlock &MBB) const {
2598 // Get the instructions that need to be replaced. We emit at most two of
2599 // these. Remember them in order to avoid complications coming from the need
2600 // to traverse the block while potentially creating more blocks.
2601 SmallVector<MachineInstr *, 4> ToReplace;
2602 for (MachineInstr &MI : MBB) {
2603 unsigned Opc = MI.getOpcode();
2604 if (Opc == RISCV::PROBED_STACKALLOC ||
2605 Opc == RISCV::PROBED_STACKALLOC_RVV) {
2606 ToReplace.push_back(Elt: &MI);
2607 }
2608 }
2609
2610 for (MachineInstr *MI : ToReplace) {
2611 if (MI->getOpcode() == RISCV::PROBED_STACKALLOC ||
2612 MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV) {
2613 MachineBasicBlock::iterator MBBI = MI->getIterator();
2614 DebugLoc DL = MBB.findDebugLoc(MBBI);
2615 Register TargetReg = MI->getOperand(i: 0).getReg();
2616 emitStackProbeInline(MBBI, DL, TargetReg,
2617 IsRVV: (MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV));
2618 MBBI->eraseFromParent();
2619 }
2620 }
2621}
2622
2623int RISCVFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
2624 return 0;
2625}
2626
2627Register
2628RISCVFrameLowering::getInitialCFARegister(const MachineFunction &MF) const {
2629 return RISCV::X2;
2630}
2631