1//===-- RISCVFrameLowering.cpp - RISC-V Frame Information -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the RISC-V implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVFrameLowering.h"
14#include "MCTargetDesc/RISCVBaseInfo.h"
15#include "RISCVMachineFunctionInfo.h"
16#include "RISCVSubtarget.h"
17#include "llvm/BinaryFormat/Dwarf.h"
18#include "llvm/CodeGen/CFIInstBuilder.h"
19#include "llvm/CodeGen/LivePhysRegs.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/RegisterScavenging.h"
25#include "llvm/IR/DiagnosticInfo.h"
26#include "llvm/MC/MCDwarf.h"
27#include "llvm/Support/LEB128.h"
28
29#include <algorithm>
30
31#define DEBUG_TYPE "riscv-frame"
32
33using namespace llvm;
34
35static Align getABIStackAlignment(RISCVABI::ABI ABI) {
36 if (ABI == RISCVABI::ABI_ILP32E)
37 return Align(4);
38 if (ABI == RISCVABI::ABI_LP64E)
39 return Align(8);
40 return Align(16);
41}
42
43RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI)
44 : TargetFrameLowering(
45 StackGrowsDown, getABIStackAlignment(ABI: STI.getTargetABI()),
46 /*LocalAreaOffset=*/0,
47 /*TransientStackAlignment=*/getABIStackAlignment(ABI: STI.getTargetABI())),
48 STI(STI) {}
49
50// The register used to hold the frame pointer.
51static constexpr MCPhysReg FPReg = RISCV::X8;
52
53// The register used to hold the stack pointer.
54static constexpr MCPhysReg SPReg = RISCV::X2;
55
56// The register used to hold the return address.
57static constexpr MCPhysReg RAReg = RISCV::X1;
58
59// LIst of CSRs that are given a fixed location by save/restore libcalls or
60// Zcmp/Xqccmp Push/Pop. The order in this table indicates the order the
61// registers are saved on the stack. Zcmp uses the reverse order of save/restore
62// and Xqccmp on the stack, but this is handled when offsets are calculated.
63static const MCPhysReg FixedCSRFIMap[] = {
64 /*ra*/ RAReg, /*s0*/ FPReg, /*s1*/ RISCV::X9,
65 /*s2*/ RISCV::X18, /*s3*/ RISCV::X19, /*s4*/ RISCV::X20,
66 /*s5*/ RISCV::X21, /*s6*/ RISCV::X22, /*s7*/ RISCV::X23,
67 /*s8*/ RISCV::X24, /*s9*/ RISCV::X25, /*s10*/ RISCV::X26,
68 /*s11*/ RISCV::X27};
69
70// The number of stack bytes allocated by `QC.C.MIENTER(.NEST)` and popped by
71// `QC.C.MILEAVERET`.
72static constexpr uint64_t QCIInterruptPushAmount = 96;
73
74static const std::pair<MCPhysReg, int8_t> FixedCSRFIQCIInterruptMap[] = {
75 /* -1 is a gap for mepc/mnepc */
76 {/*fp*/ FPReg, -2},
77 /* -3 is a gap for qc.mcause */
78 {/*ra*/ RAReg, -4},
79 /* -5 is reserved */
80 {/*t0*/ RISCV::X5, -6},
81 {/*t1*/ RISCV::X6, -7},
82 {/*t2*/ RISCV::X7, -8},
83 {/*a0*/ RISCV::X10, -9},
84 {/*a1*/ RISCV::X11, -10},
85 {/*a2*/ RISCV::X12, -11},
86 {/*a3*/ RISCV::X13, -12},
87 {/*a4*/ RISCV::X14, -13},
88 {/*a5*/ RISCV::X15, -14},
89 {/*a6*/ RISCV::X16, -15},
90 {/*a7*/ RISCV::X17, -16},
91 {/*t3*/ RISCV::X28, -17},
92 {/*t4*/ RISCV::X29, -18},
93 {/*t5*/ RISCV::X30, -19},
94 {/*t6*/ RISCV::X31, -20},
95 /* -21, -22, -23, -24 are reserved */
96};
97
98/// Returns true if DWARF CFI instructions ("frame moves") should be emitted.
99static bool needsDwarfCFI(const MachineFunction &MF) {
100 return MF.needsFrameMoves();
101}
102
103// For now we use x3, a.k.a gp, as pointer to shadow call stack.
104// User should not use x3 in their asm.
105static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
106 MachineBasicBlock::iterator MI,
107 const DebugLoc &DL) {
108 const auto &STI = MF.getSubtarget<RISCVSubtarget>();
109 // We check Zimop instead of (Zimop || Zcmop) to determine whether HW shadow
110 // stack is available despite the fact that sspush/sspopchk both have a
111 // compressed form, because if only Zcmop is available, we would need to
112 // reserve X5 due to c.sspopchk only takes X5 and we currently do not support
113 // using X5 as the return address register.
114 // However, we can still aggressively use c.sspush x1 if zcmop is available.
115 bool HasHWShadowStack = MF.getFunction().hasFnAttribute(Kind: "hw-shadow-stack") &&
116 STI.hasStdExtZimop();
117 bool HasSWShadowStack =
118 MF.getFunction().hasFnAttribute(Kind: Attribute::ShadowCallStack);
119 if (!HasHWShadowStack && !HasSWShadowStack)
120 return;
121
122 const llvm::RISCVRegisterInfo *TRI = STI.getRegisterInfo();
123
124 // Do not save RA to the SCS if it's not saved to the regular stack,
125 // i.e. RA is not at risk of being overwritten.
126 std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
127 if (llvm::none_of(
128 Range&: CSI, P: [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; }))
129 return;
130
131 const RISCVInstrInfo *TII = STI.getInstrInfo();
132 if (HasHWShadowStack) {
133 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: RISCV::SSPUSH))
134 .addReg(RegNo: RAReg)
135 .setMIFlag(MachineInstr::FrameSetup);
136 return;
137 }
138
139 Register SCSPReg = RISCVABI::getSCSPReg();
140
141 bool IsRV64 = STI.is64Bit();
142 int64_t SlotSize = STI.getXLen() / 8;
143 // Store return address to shadow call stack
144 // addi gp, gp, [4|8]
145 // s[w|d] ra, -[4|8](gp)
146 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI))
147 .addReg(RegNo: SCSPReg, Flags: RegState::Define)
148 .addReg(RegNo: SCSPReg)
149 .addImm(Val: SlotSize)
150 .setMIFlag(MachineInstr::FrameSetup);
151 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
152 .addReg(RegNo: RAReg)
153 .addReg(RegNo: SCSPReg)
154 .addImm(Val: -SlotSize)
155 .setMIFlag(MachineInstr::FrameSetup);
156
157 if (!needsDwarfCFI(MF))
158 return;
159
160 // Emit a CFI instruction that causes SlotSize to be subtracted from the value
161 // of the shadow stack pointer when unwinding past this frame.
162 char DwarfSCSReg = TRI->getDwarfRegNum(Reg: SCSPReg, /*IsEH*/ isEH: true);
163 assert(DwarfSCSReg < 32 && "SCS Register should be < 32 (X3).");
164
165 char Offset = static_cast<char>(-SlotSize) & 0x7f;
166 const char CFIInst[] = {
167 dwarf::DW_CFA_val_expression,
168 DwarfSCSReg, // register
169 2, // length
170 static_cast<char>(unsigned(dwarf::DW_OP_breg0 + DwarfSCSReg)),
171 Offset, // addend (sleb128)
172 };
173
174 CFIInstBuilder(MBB, MI, MachineInstr::FrameSetup)
175 .buildEscape(Bytes: StringRef(CFIInst, sizeof(CFIInst)));
176}
177
178static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
179 MachineBasicBlock::iterator MI,
180 const DebugLoc &DL) {
181 const auto &STI = MF.getSubtarget<RISCVSubtarget>();
182 bool HasHWShadowStack = MF.getFunction().hasFnAttribute(Kind: "hw-shadow-stack") &&
183 STI.hasStdExtZimop();
184 bool HasSWShadowStack =
185 MF.getFunction().hasFnAttribute(Kind: Attribute::ShadowCallStack);
186 if (!HasHWShadowStack && !HasSWShadowStack)
187 return;
188
189 // See emitSCSPrologue() above.
190 std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
191 if (llvm::none_of(
192 Range&: CSI, P: [&](CalleeSavedInfo &CSR) { return CSR.getReg() == RAReg; }))
193 return;
194
195 const RISCVInstrInfo *TII = STI.getInstrInfo();
196 if (HasHWShadowStack) {
197 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: RISCV::SSPOPCHK))
198 .addReg(RegNo: RAReg)
199 .setMIFlag(MachineInstr::FrameDestroy);
200 return;
201 }
202
203 Register SCSPReg = RISCVABI::getSCSPReg();
204
205 bool IsRV64 = STI.is64Bit();
206 int64_t SlotSize = STI.getXLen() / 8;
207 // Load return address from shadow call stack
208 // l[w|d] ra, -[4|8](gp)
209 // addi gp, gp, -[4|8]
210 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::LD : RISCV::LW))
211 .addReg(RegNo: RAReg, Flags: RegState::Define)
212 .addReg(RegNo: SCSPReg)
213 .addImm(Val: -SlotSize)
214 .setMIFlag(MachineInstr::FrameDestroy);
215 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI))
216 .addReg(RegNo: SCSPReg, Flags: RegState::Define)
217 .addReg(RegNo: SCSPReg)
218 .addImm(Val: -SlotSize)
219 .setMIFlag(MachineInstr::FrameDestroy);
220 if (needsDwarfCFI(MF)) {
221 // Restore the SCS pointer
222 CFIInstBuilder(MBB, MI, MachineInstr::FrameDestroy).buildRestore(Reg: SCSPReg);
223 }
224}
225
226// Insert instruction to swap mscratchsw with sp
227static void emitSiFiveCLICStackSwap(MachineFunction &MF, MachineBasicBlock &MBB,
228 MachineBasicBlock::iterator MBBI,
229 const DebugLoc &DL) {
230 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
231
232 if (!RVFI->isSiFiveStackSwapInterrupt(MF))
233 return;
234
235 const auto &STI = MF.getSubtarget<RISCVSubtarget>();
236 const RISCVInstrInfo *TII = STI.getInstrInfo();
237
238 assert(STI.hasVendorXSfmclic() && "Stack Swapping Requires XSfmclic");
239
240 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRW))
241 .addReg(RegNo: SPReg, Flags: RegState::Define)
242 .addImm(Val: RISCVSysReg::sf_mscratchcsw)
243 .addReg(RegNo: SPReg, Flags: RegState::Kill)
244 .setMIFlag(MachineInstr::FrameSetup);
245
246 // FIXME: CFI Information for this swap.
247}
248
249static void
250createSiFivePreemptibleInterruptFrameEntries(MachineFunction &MF,
251 RISCVMachineFunctionInfo &RVFI) {
252 if (!RVFI.isSiFivePreemptibleInterrupt(MF))
253 return;
254
255 const TargetRegisterClass &RC = RISCV::GPRRegClass;
256 const TargetRegisterInfo &TRI =
257 *MF.getSubtarget<RISCVSubtarget>().getRegisterInfo();
258 MachineFrameInfo &MFI = MF.getFrameInfo();
259
260 // Create two frame objects for spilling X8 and X9, which will be done in
261 // `emitSiFiveCLICPreemptibleSaves`. This is in addition to any other stack
262 // objects we might have for X8 and X9, as they might be saved twice.
263 for (int I = 0; I < 2; ++I) {
264 int FI = MFI.CreateStackObject(Size: TRI.getSpillSize(RC), Alignment: TRI.getSpillAlign(RC),
265 isSpillSlot: true);
266 RVFI.pushInterruptCSRFrameIndex(FI);
267 }
268}
269
270static void emitSiFiveCLICPreemptibleSaves(MachineFunction &MF,
271 MachineBasicBlock &MBB,
272 MachineBasicBlock::iterator MBBI,
273 const DebugLoc &DL) {
274 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
275
276 if (!RVFI->isSiFivePreemptibleInterrupt(MF))
277 return;
278
279 const auto &STI = MF.getSubtarget<RISCVSubtarget>();
280 const RISCVInstrInfo *TII = STI.getInstrInfo();
281
282 // FIXME: CFI Information here is nonexistent/wrong.
283
284 // X8 and X9 might be stored into the stack twice, initially into the
285 // `interruptCSRFrameIndex` here, and then maybe again into their CSI frame
286 // index.
287 //
288 // This is done instead of telling the register allocator that we need two
289 // VRegs to store the value of `mcause` and `mepc` through the instruction,
290 // which affects other passes.
291 TII->storeRegToStackSlot(MBB, MBBI, SrcReg: RISCV::X8, /* IsKill=*/true,
292 FrameIndex: RVFI->getInterruptCSRFrameIndex(Idx: 0),
293 RC: &RISCV::GPRRegClass, VReg: Register(),
294 Flags: MachineInstr::FrameSetup);
295 TII->storeRegToStackSlot(MBB, MBBI, SrcReg: RISCV::X9, /* IsKill=*/true,
296 FrameIndex: RVFI->getInterruptCSRFrameIndex(Idx: 1),
297 RC: &RISCV::GPRRegClass, VReg: Register(),
298 Flags: MachineInstr::FrameSetup);
299
300 // Put `mcause` into X8 (s0), and `mepc` into X9 (s1). If either of these are
301 // used in the function, then they will appear in `getUnmanagedCSI` and will
302 // be saved again.
303 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRS))
304 .addReg(RegNo: RISCV::X8, Flags: RegState::Define)
305 .addImm(Val: RISCVSysReg::mcause)
306 .addReg(RegNo: RISCV::X0)
307 .setMIFlag(MachineInstr::FrameSetup);
308 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRS))
309 .addReg(RegNo: RISCV::X9, Flags: RegState::Define)
310 .addImm(Val: RISCVSysReg::mepc)
311 .addReg(RegNo: RISCV::X0)
312 .setMIFlag(MachineInstr::FrameSetup);
313
314 // Enable interrupts.
315 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRSI))
316 .addReg(RegNo: RISCV::X0, Flags: RegState::Define)
317 .addImm(Val: RISCVSysReg::mstatus)
318 .addImm(Val: 8)
319 .setMIFlag(MachineInstr::FrameSetup);
320}
321
322static void emitSiFiveCLICPreemptibleRestores(MachineFunction &MF,
323 MachineBasicBlock &MBB,
324 MachineBasicBlock::iterator MBBI,
325 const DebugLoc &DL) {
326 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
327
328 if (!RVFI->isSiFivePreemptibleInterrupt(MF))
329 return;
330
331 const auto &STI = MF.getSubtarget<RISCVSubtarget>();
332 const RISCVInstrInfo *TII = STI.getInstrInfo();
333
334 // FIXME: CFI Information here is nonexistent/wrong.
335
336 // Disable interrupts.
337 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRCI))
338 .addReg(RegNo: RISCV::X0, Flags: RegState::Define)
339 .addImm(Val: RISCVSysReg::mstatus)
340 .addImm(Val: 8)
341 .setMIFlag(MachineInstr::FrameSetup);
342
343 // Restore `mepc` from x9 (s1), and `mcause` from x8 (s0). If either were used
344 // in the function, they have already been restored once, so now have the
345 // value stored in `emitSiFiveCLICPreemptibleSaves`.
346 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRW))
347 .addReg(RegNo: RISCV::X0, Flags: RegState::Define)
348 .addImm(Val: RISCVSysReg::mepc)
349 .addReg(RegNo: RISCV::X9, Flags: RegState::Kill)
350 .setMIFlag(MachineInstr::FrameSetup);
351 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::CSRRW))
352 .addReg(RegNo: RISCV::X0, Flags: RegState::Define)
353 .addImm(Val: RISCVSysReg::mcause)
354 .addReg(RegNo: RISCV::X8, Flags: RegState::Kill)
355 .setMIFlag(MachineInstr::FrameSetup);
356
357 // X8 and X9 need to be restored to their values on function entry, which we
358 // saved onto the stack in `emitSiFiveCLICPreemptibleSaves`.
359 TII->loadRegFromStackSlot(MBB, MBBI, DstReg: RISCV::X9,
360 FrameIndex: RVFI->getInterruptCSRFrameIndex(Idx: 1),
361 RC: &RISCV::GPRRegClass, VReg: Register(),
362 SubReg: RISCV::NoSubRegister, Flags: MachineInstr::FrameSetup);
363 TII->loadRegFromStackSlot(MBB, MBBI, DstReg: RISCV::X8,
364 FrameIndex: RVFI->getInterruptCSRFrameIndex(Idx: 0),
365 RC: &RISCV::GPRRegClass, VReg: Register(),
366 SubReg: RISCV::NoSubRegister, Flags: MachineInstr::FrameSetup);
367}
368
369// Get the ID of the libcall used for spilling and restoring callee saved
370// registers. The ID is representative of the number of registers saved or
371// restored by the libcall, except it is zero-indexed - ID 0 corresponds to a
372// single register.
373static int getLibCallID(const MachineFunction &MF,
374 const std::vector<CalleeSavedInfo> &CSI) {
375 const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
376
377 if (CSI.empty() || !RVFI->useSaveRestoreLibCalls(MF))
378 return -1;
379
380 MCRegister MaxReg;
381 for (auto &CS : CSI)
382 // assignCalleeSavedSpillSlots assigns negative frame indexes to
383 // registers which can be saved by libcall.
384 if (CS.getFrameIdx() < 0)
385 MaxReg = std::max(a: MaxReg.id(), b: CS.getReg().id());
386
387 if (!MaxReg)
388 return -1;
389
390 switch (MaxReg.id()) {
391 default:
392 llvm_unreachable("Something has gone wrong!");
393 // clang-format off
394 case /*s11*/ RISCV::X27: return 12;
395 case /*s10*/ RISCV::X26: return 11;
396 case /*s9*/ RISCV::X25: return 10;
397 case /*s8*/ RISCV::X24: return 9;
398 case /*s7*/ RISCV::X23: return 8;
399 case /*s6*/ RISCV::X22: return 7;
400 case /*s5*/ RISCV::X21: return 6;
401 case /*s4*/ RISCV::X20: return 5;
402 case /*s3*/ RISCV::X19: return 4;
403 case /*s2*/ RISCV::X18: return 3;
404 case /*s1*/ RISCV::X9: return 2;
405 case /*s0*/ FPReg: return 1;
406 case /*ra*/ RAReg: return 0;
407 // clang-format on
408 }
409}
410
411// Get the name of the libcall used for spilling callee saved registers.
412// If this function will not use save/restore libcalls, then return a nullptr.
413static const char *
414getSpillLibCallName(const MachineFunction &MF,
415 const std::vector<CalleeSavedInfo> &CSI) {
416 static const char *const SpillLibCalls[] = {
417 "__riscv_save_0",
418 "__riscv_save_1",
419 "__riscv_save_2",
420 "__riscv_save_3",
421 "__riscv_save_4",
422 "__riscv_save_5",
423 "__riscv_save_6",
424 "__riscv_save_7",
425 "__riscv_save_8",
426 "__riscv_save_9",
427 "__riscv_save_10",
428 "__riscv_save_11",
429 "__riscv_save_12"
430 };
431
432 int LibCallID = getLibCallID(MF, CSI);
433 if (LibCallID == -1)
434 return nullptr;
435 return SpillLibCalls[LibCallID];
436}
437
438// Get the name of the libcall used for restoring callee saved registers.
439// If this function will not use save/restore libcalls, then return a nullptr.
440static const char *
441getRestoreLibCallName(const MachineFunction &MF,
442 const std::vector<CalleeSavedInfo> &CSI) {
443 static const char *const RestoreLibCalls[] = {
444 "__riscv_restore_0",
445 "__riscv_restore_1",
446 "__riscv_restore_2",
447 "__riscv_restore_3",
448 "__riscv_restore_4",
449 "__riscv_restore_5",
450 "__riscv_restore_6",
451 "__riscv_restore_7",
452 "__riscv_restore_8",
453 "__riscv_restore_9",
454 "__riscv_restore_10",
455 "__riscv_restore_11",
456 "__riscv_restore_12"
457 };
458
459 int LibCallID = getLibCallID(MF, CSI);
460 if (LibCallID == -1)
461 return nullptr;
462 return RestoreLibCalls[LibCallID];
463}
464
465// Get the max reg of Push/Pop for restoring callee saved registers.
466static unsigned getNumPushPopRegs(const std::vector<CalleeSavedInfo> &CSI) {
467 unsigned NumPushPopRegs = 0;
468 for (auto &CS : CSI) {
469 auto *FII = llvm::find_if(Range: FixedCSRFIMap,
470 P: [&](MCPhysReg P) { return P == CS.getReg(); });
471 if (FII != std::end(arr: FixedCSRFIMap)) {
472 unsigned RegNum = std::distance(first: std::begin(arr: FixedCSRFIMap), last: FII);
473 NumPushPopRegs = std::max(a: NumPushPopRegs, b: RegNum + 1);
474 }
475 }
476 assert(NumPushPopRegs != 12 && "x26 requires x27 to also be pushed");
477 return NumPushPopRegs;
478}
479
480// Return true if the specified function should have a dedicated frame
481// pointer register. This is true if frame pointer elimination is
482// disabled, if it needs dynamic stack realignment, if the function has
483// variable sized allocas, or if the frame address is taken.
484bool RISCVFrameLowering::hasFPImpl(const MachineFunction &MF) const {
485 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
486
487 const MachineFrameInfo &MFI = MF.getFrameInfo();
488 if (MF.getTarget().Options.DisableFramePointerElim(MF) ||
489 RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
490 MFI.isFrameAddressTaken())
491 return true;
492
493 // With large callframes around we may need to use FP to access the scavenging
494 // emergency spillslot.
495 //
496 // We calculate the MaxCallFrameSize at the end of isel so this value should
497 // be stable for the whole post-isel MIR pipeline.
498 //
499 // NOTE: The idea of forcing a frame pointer is copied from AArch64, but they
500 // conservatively return true when the call frame size hasd not been
501 // computed yet. On RISC-V that caused MachineOutliner tests to fail the
502 // MachineVerifier due to outlined functions not computing max call frame
503 // size thus the frame pointer would always be reserved.
504 if (MFI.isMaxCallFrameSizeComputed() && MFI.getMaxCallFrameSize() > 2047)
505 return true;
506
507 return false;
508}
509
510bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const {
511 const MachineFrameInfo &MFI = MF.getFrameInfo();
512 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
513
514 // If we do not reserve stack space for outgoing arguments in prologue,
515 // we will adjust the stack pointer before call instruction. After the
516 // adjustment, we can not use SP to access the stack objects for the
517 // arguments. Instead, use BP to access these stack objects.
518 return (MFI.hasVarSizedObjects() ||
519 (!hasReservedCallFrame(MF) && (!MFI.isMaxCallFrameSizeComputed() ||
520 MFI.getMaxCallFrameSize() != 0))) &&
521 TRI->hasStackRealignment(MF);
522}
523
524// Determines the size of the frame and maximum call frame size.
525void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
526 MachineFrameInfo &MFI = MF.getFrameInfo();
527 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
528
529 // Get the number of bytes to allocate from the FrameInfo.
530 uint64_t FrameSize = MFI.getStackSize();
531
532 // QCI Interrupts use at least 96 bytes of stack space
533 if (RVFI->useQCIInterrupt(MF))
534 FrameSize = std::max(a: FrameSize, b: QCIInterruptPushAmount);
535
536 // Get the alignment.
537 Align StackAlign = getStackAlign();
538
539 // Make sure the frame is aligned.
540 FrameSize = alignTo(Size: FrameSize, A: StackAlign);
541
542 // Update frame info.
543 MFI.setStackSize(FrameSize);
544
545 // When using SP or BP to access stack objects, we may require extra padding
546 // to ensure the bottom of the RVV stack is correctly aligned within the main
547 // stack. We calculate this as the amount required to align the scalar local
548 // variable section up to the RVV alignment.
549 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
550 if (RVFI->getRVVStackSize() && (!hasFP(MF) || TRI->hasStackRealignment(MF))) {
551 int ScalarLocalVarSize = FrameSize - RVFI->getCalleeSavedStackSize() -
552 RVFI->getVarArgsSaveSize();
553 if (auto RVVPadding =
554 offsetToAlignment(Value: ScalarLocalVarSize, Alignment: RVFI->getRVVStackAlign()))
555 RVFI->setRVVPadding(RVVPadding);
556 }
557}
558
559// Returns the stack size including RVV padding (when required), rounded back
560// up to the required stack alignment.
561uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding(
562 const MachineFunction &MF) const {
563 const MachineFrameInfo &MFI = MF.getFrameInfo();
564 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
565 return alignTo(Size: MFI.getStackSize() + RVFI->getRVVPadding(), A: getStackAlign());
566}
567
568static SmallVector<CalleeSavedInfo, 8>
569getUnmanagedCSI(const MachineFunction &MF,
570 const std::vector<CalleeSavedInfo> &CSI,
571 bool ReverseOrder = false) {
572 const MachineFrameInfo &MFI = MF.getFrameInfo();
573 SmallVector<CalleeSavedInfo, 8> NonLibcallCSI;
574
575 for (auto &CS : CSI) {
576 int FI = CS.getFrameIdx();
577 if (FI >= 0 && MFI.getStackID(ObjectIdx: FI) == TargetStackID::Default)
578 NonLibcallCSI.push_back(Elt: CS);
579 }
580
581 // Reverse the order so that load/store operations use ascending addresses,
582 // enabling better load/store clustering and fusion.
583 if (ReverseOrder)
584 std::reverse(first: NonLibcallCSI.begin(), last: NonLibcallCSI.end());
585
586 return NonLibcallCSI;
587}
588
589static SmallVector<CalleeSavedInfo, 8>
590getRVVCalleeSavedInfo(const MachineFunction &MF,
591 const std::vector<CalleeSavedInfo> &CSI) {
592 const MachineFrameInfo &MFI = MF.getFrameInfo();
593 SmallVector<CalleeSavedInfo, 8> RVVCSI;
594
595 for (auto &CS : CSI) {
596 int FI = CS.getFrameIdx();
597 if (FI >= 0 && MFI.getStackID(ObjectIdx: FI) == TargetStackID::ScalableVector)
598 RVVCSI.push_back(Elt: CS);
599 }
600
601 return RVVCSI;
602}
603
604static SmallVector<CalleeSavedInfo, 8>
605getPushOrLibCallsSavedInfo(const MachineFunction &MF,
606 const std::vector<CalleeSavedInfo> &CSI) {
607 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
608
609 SmallVector<CalleeSavedInfo, 8> PushOrLibCallsCSI;
610 if (!RVFI->useSaveRestoreLibCalls(MF) && !RVFI->isPushable(MF))
611 return PushOrLibCallsCSI;
612
613 for (const auto &CS : CSI) {
614 if (RVFI->useQCIInterrupt(MF)) {
615 // Some registers are saved by both `QC.C.MIENTER(.NEST)` and
616 // `QC.CM.PUSH(FP)`. In these cases, prioritise the CFI info that points
617 // to the versions saved by `QC.C.MIENTER(.NEST)` which is what FP
618 // unwinding would use.
619 if (llvm::is_contained(Range: llvm::make_first_range(c: FixedCSRFIQCIInterruptMap),
620 Element: CS.getReg()))
621 continue;
622 }
623
624 if (llvm::is_contained(Range: FixedCSRFIMap, Element: CS.getReg()))
625 PushOrLibCallsCSI.push_back(Elt: CS);
626 }
627
628 return PushOrLibCallsCSI;
629}
630
631static SmallVector<CalleeSavedInfo, 8>
632getQCISavedInfo(const MachineFunction &MF,
633 const std::vector<CalleeSavedInfo> &CSI) {
634 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
635
636 SmallVector<CalleeSavedInfo, 8> QCIInterruptCSI;
637 if (!RVFI->useQCIInterrupt(MF))
638 return QCIInterruptCSI;
639
640 for (const auto &CS : CSI) {
641 if (llvm::is_contained(Range: llvm::make_first_range(c: FixedCSRFIQCIInterruptMap),
642 Element: CS.getReg()))
643 QCIInterruptCSI.push_back(Elt: CS);
644 }
645
646 return QCIInterruptCSI;
647}
648
649void RISCVFrameLowering::allocateAndProbeStackForRVV(
650 MachineFunction &MF, MachineBasicBlock &MBB,
651 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t Amount,
652 MachineInstr::MIFlag Flag, bool EmitCFI, bool DynAllocation) const {
653 assert(Amount != 0 && "Did not need to adjust stack pointer for RVV.");
654
655 // Emit a variable-length allocation probing loop.
656
657 // Get VLEN in TargetReg
658 const RISCVInstrInfo *TII = STI.getInstrInfo();
659 Register TargetReg = RISCV::X6;
660 uint32_t NumOfVReg = Amount / RISCV::RVVBytesPerBlock;
661 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::PseudoReadVLENB), DestReg: TargetReg)
662 .setMIFlag(Flag);
663 TII->mulImm(MF, MBB, II: MBBI, DL, DestReg: TargetReg, Amt: NumOfVReg, Flag);
664
665 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
666 if (EmitCFI) {
667 // Set the CFA register to TargetReg.
668 CFIBuilder.buildDefCFA(Reg: TargetReg, Offset: -Amount);
669 }
670
671 // It will be expanded to a probe loop in `inlineStackProbe`.
672 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::PROBED_STACKALLOC_RVV))
673 .addReg(RegNo: TargetReg);
674
675 if (EmitCFI) {
676 // Set the CFA register back to SP.
677 CFIBuilder.buildDefCFARegister(Reg: SPReg);
678 }
679
680 // SUB SP, SP, T1
681 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::SUB), DestReg: SPReg)
682 .addReg(RegNo: SPReg)
683 .addReg(RegNo: TargetReg)
684 .setMIFlag(Flag);
685
686 // If we have a dynamic allocation later we need to probe any residuals.
687 if (DynAllocation) {
688 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: STI.is64Bit() ? RISCV::SD : RISCV::SW))
689 .addReg(RegNo: RISCV::X0)
690 .addReg(RegNo: SPReg)
691 .addImm(Val: 0)
692 .setMIFlags(MachineInstr::FrameSetup);
693 }
694}
695
696static void appendScalableVectorExpression(const TargetRegisterInfo &TRI,
697 SmallVectorImpl<char> &Expr,
698 StackOffset Offset,
699 llvm::raw_string_ostream &Comment) {
700 int64_t FixedOffset = Offset.getFixed();
701 int64_t ScalableOffset = Offset.getScalable();
702 unsigned DwarfVLenB = TRI.getDwarfRegNum(Reg: RISCV::VLENB, isEH: true);
703 if (FixedOffset) {
704 Expr.push_back(Elt: dwarf::DW_OP_consts);
705 appendLEB128<LEB128Sign::Signed>(Buffer&: Expr, Value: FixedOffset);
706 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_plus);
707 Comment << (FixedOffset < 0 ? " - " : " + ") << std::abs(i: FixedOffset);
708 }
709
710 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_consts);
711 appendLEB128<LEB128Sign::Signed>(Buffer&: Expr, Value: ScalableOffset);
712
713 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_bregx);
714 appendLEB128<LEB128Sign::Unsigned>(Buffer&: Expr, Value: DwarfVLenB);
715 Expr.push_back(Elt: 0);
716
717 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_mul);
718 Expr.push_back(Elt: (uint8_t)dwarf::DW_OP_plus);
719
720 Comment << (ScalableOffset < 0 ? " - " : " + ") << std::abs(i: ScalableOffset)
721 << " * vlenb";
722}
723
724static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
725 Register Reg,
726 StackOffset Offset) {
727 assert(Offset.getScalable() != 0 && "Did not need to adjust CFA for RVV");
728 SmallString<64> Expr;
729 std::string CommentBuffer;
730 llvm::raw_string_ostream Comment(CommentBuffer);
731 // Build up the expression (Reg + FixedOffset + ScalableOffset * VLENB).
732 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, isEH: true);
733 Expr.push_back(Elt: (uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
734 Expr.push_back(Elt: 0);
735 if (Reg == SPReg)
736 Comment << "sp";
737 else
738 Comment << printReg(Reg, TRI: &TRI);
739
740 appendScalableVectorExpression(TRI, Expr, Offset, Comment);
741
742 SmallString<64> DefCfaExpr;
743 DefCfaExpr.push_back(Elt: dwarf::DW_CFA_def_cfa_expression);
744 appendLEB128<LEB128Sign::Unsigned>(Buffer&: DefCfaExpr, Value: Expr.size());
745 DefCfaExpr.append(RHS: Expr.str());
746
747 return MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str(), Loc: SMLoc(),
748 Comment: Comment.str());
749}
750
751static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
752 Register Reg, StackOffset Offset) {
753 assert(Offset.getScalable() != 0 && "Did not need to adjust CFA for RVV");
754 SmallString<64> Expr;
755 std::string CommentBuffer;
756 llvm::raw_string_ostream Comment(CommentBuffer);
757 Comment << printReg(Reg, TRI: &TRI) << " @ cfa";
758
759 // Build up the expression (FixedOffset + ScalableOffset * VLENB).
760 appendScalableVectorExpression(TRI, Expr, Offset, Comment);
761
762 SmallString<64> DefCfaExpr;
763 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, isEH: true);
764 DefCfaExpr.push_back(Elt: dwarf::DW_CFA_expression);
765 appendLEB128<LEB128Sign::Unsigned>(Buffer&: DefCfaExpr, Value: DwarfReg);
766 appendLEB128<LEB128Sign::Unsigned>(Buffer&: DefCfaExpr, Value: Expr.size());
767 DefCfaExpr.append(RHS: Expr.str());
768
769 return MCCFIInstruction::createEscape(L: nullptr, Vals: DefCfaExpr.str(), Loc: SMLoc(),
770 Comment: Comment.str());
771}
772
773// Allocate stack space and probe it if necessary.
774void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
775 MachineBasicBlock::iterator MBBI,
776 MachineFunction &MF, uint64_t Offset,
777 uint64_t RealStackSize, bool EmitCFI,
778 bool NeedProbe, uint64_t ProbeSize,
779 bool DynAllocation,
780 MachineInstr::MIFlag Flag) const {
781 DebugLoc DL;
782 const RISCVRegisterInfo *RI = STI.getRegisterInfo();
783 const RISCVInstrInfo *TII = STI.getInstrInfo();
784 bool IsRV64 = STI.is64Bit();
785 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
786
787 // Simply allocate the stack if it's not big enough to require a probe.
788 if (!NeedProbe || Offset <= ProbeSize) {
789 RI->adjustReg(MBB, II: MBBI, DL, DestReg: SPReg, SrcReg: SPReg, Offset: StackOffset::getFixed(Fixed: -Offset),
790 Flag, RequiredAlign: getStackAlign());
791
792 if (EmitCFI)
793 CFIBuilder.buildDefCFAOffset(Offset: RealStackSize);
794
795 if (NeedProbe && DynAllocation) {
796 // s[d|w] zero, 0(sp)
797 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
798 .addReg(RegNo: RISCV::X0)
799 .addReg(RegNo: SPReg)
800 .addImm(Val: 0)
801 .setMIFlags(Flag);
802 }
803
804 return;
805 }
806
807 // Unroll the probe loop depending on the number of iterations.
808 if (Offset < ProbeSize * 5) {
809 uint64_t CFAAdjust = RealStackSize - Offset;
810
811 uint64_t CurrentOffset = 0;
812 while (CurrentOffset + ProbeSize <= Offset) {
813 RI->adjustReg(MBB, II: MBBI, DL, DestReg: SPReg, SrcReg: SPReg,
814 Offset: StackOffset::getFixed(Fixed: -ProbeSize), Flag, RequiredAlign: getStackAlign());
815 // s[d|w] zero, 0(sp)
816 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
817 .addReg(RegNo: RISCV::X0)
818 .addReg(RegNo: SPReg)
819 .addImm(Val: 0)
820 .setMIFlags(Flag);
821
822 CurrentOffset += ProbeSize;
823 if (EmitCFI)
824 CFIBuilder.buildDefCFAOffset(Offset: CurrentOffset + CFAAdjust);
825 }
826
827 uint64_t Residual = Offset - CurrentOffset;
828 if (Residual) {
829 RI->adjustReg(MBB, II: MBBI, DL, DestReg: SPReg, SrcReg: SPReg,
830 Offset: StackOffset::getFixed(Fixed: -Residual), Flag, RequiredAlign: getStackAlign());
831 if (EmitCFI)
832 CFIBuilder.buildDefCFAOffset(Offset: RealStackSize);
833
834 if (DynAllocation) {
835 // s[d|w] zero, 0(sp)
836 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
837 .addReg(RegNo: RISCV::X0)
838 .addReg(RegNo: SPReg)
839 .addImm(Val: 0)
840 .setMIFlags(Flag);
841 }
842 }
843
844 return;
845 }
846
847 // Emit a variable-length allocation probing loop.
848 uint64_t RoundedSize = alignDown(Value: Offset, Align: ProbeSize);
849 uint64_t Residual = Offset - RoundedSize;
850
851 Register TargetReg = RISCV::X6;
852 // SUB TargetReg, SP, RoundedSize
853 RI->adjustReg(MBB, II: MBBI, DL, DestReg: TargetReg, SrcReg: SPReg,
854 Offset: StackOffset::getFixed(Fixed: -RoundedSize), Flag, RequiredAlign: getStackAlign());
855
856 if (EmitCFI) {
857 // Set the CFA register to TargetReg.
858 CFIBuilder.buildDefCFA(Reg: TargetReg, Offset: RoundedSize);
859 }
860
861 // It will be expanded to a probe loop in `inlineStackProbe`.
862 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::PROBED_STACKALLOC)).addReg(RegNo: TargetReg);
863
864 if (EmitCFI) {
865 // Set the CFA register back to SP.
866 CFIBuilder.buildDefCFARegister(Reg: SPReg);
867 }
868
869 if (Residual) {
870 RI->adjustReg(MBB, II: MBBI, DL, DestReg: SPReg, SrcReg: SPReg, Offset: StackOffset::getFixed(Fixed: -Residual),
871 Flag, RequiredAlign: getStackAlign());
872 if (DynAllocation) {
873 // s[d|w] zero, 0(sp)
874 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
875 .addReg(RegNo: RISCV::X0)
876 .addReg(RegNo: SPReg)
877 .addImm(Val: 0)
878 .setMIFlags(Flag);
879 }
880 }
881
882 if (EmitCFI)
883 CFIBuilder.buildDefCFAOffset(Offset);
884}
885
886static bool isPush(unsigned Opcode) {
887 switch (Opcode) {
888 case RISCV::CM_PUSH:
889 case RISCV::QC_CM_PUSH:
890 case RISCV::QC_CM_PUSHFP:
891 return true;
892 default:
893 return false;
894 }
895}
896
897static bool isPop(unsigned Opcode) {
898 // There are other pops but these are the only ones introduced during this
899 // pass.
900 switch (Opcode) {
901 case RISCV::CM_POP:
902 case RISCV::QC_CM_POP:
903 return true;
904 default:
905 return false;
906 }
907}
908
909static unsigned getPushOpcode(RISCVMachineFunctionInfo::PushPopKind Kind,
910 bool UpdateFP) {
911 switch (Kind) {
912 case RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp:
913 return RISCV::CM_PUSH;
914 case RISCVMachineFunctionInfo::PushPopKind::VendorXqccmp:
915 return UpdateFP ? RISCV::QC_CM_PUSHFP : RISCV::QC_CM_PUSH;
916 default:
917 llvm_unreachable("Unhandled PushPopKind");
918 }
919}
920
921static unsigned getPopOpcode(RISCVMachineFunctionInfo::PushPopKind Kind) {
922 // There are other pops but they are introduced later by the Push/Pop
923 // Optimizer.
924 switch (Kind) {
925 case RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp:
926 return RISCV::CM_POP;
927 case RISCVMachineFunctionInfo::PushPopKind::VendorXqccmp:
928 return RISCV::QC_CM_POP;
929 default:
930 llvm_unreachable("Unhandled PushPopKind");
931 }
932}
933
934void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
935 MachineBasicBlock &MBB) const {
936 MachineFrameInfo &MFI = MF.getFrameInfo();
937 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
938 const RISCVRegisterInfo *RI = STI.getRegisterInfo();
939 MachineBasicBlock::iterator MBBI = MBB.begin();
940 bool PreferAscendingLS = STI.preferAscendingLoadStore();
941
942 Register BPReg = RISCVABI::getBPReg();
943
944 // Debug location must be unknown since the first debug location is used
945 // to determine the end of the prologue.
946 DebugLoc DL;
947
948 // All calls are tail calls in GHC calling conv, and functions have no
949 // prologue/epilogue.
950 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
951 return;
952
953 // SiFive CLIC needs to swap `sp` into `sf.mscratchcsw`
954 emitSiFiveCLICStackSwap(MF, MBB, MBBI, DL);
955
956 // Emit prologue for shadow call stack.
957 emitSCSPrologue(MF, MBB, MI: MBBI, DL);
958
959 // We keep track of the first instruction because it might be a
960 // `(QC.)CM.PUSH(FP)`, and we may need to adjust the immediate rather than
961 // inserting an `addi sp, sp, -N*16`
962 auto PossiblePush = MBBI;
963
964 // Skip past all callee-saved register spill instructions.
965 while (MBBI != MBB.end() && MBBI->getFlag(Flag: MachineInstr::FrameSetup))
966 ++MBBI;
967
968 // Determine the correct frame layout
969 determineFrameLayout(MF);
970
971 const auto &CSI = MFI.getCalleeSavedInfo();
972
973 // Skip to before the spills of scalar callee-saved registers
974 // FIXME: assumes exactly one instruction is used to restore each
975 // callee-saved register.
976 MBBI =
977 std::prev(x: MBBI, n: getRVVCalleeSavedInfo(MF, CSI).size() +
978 getUnmanagedCSI(MF, CSI, ReverseOrder: PreferAscendingLS).size());
979 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
980 bool NeedsDwarfCFI = needsDwarfCFI(MF);
981
982 // If libcalls are used to spill and restore callee-saved registers, the frame
983 // has two sections; the opaque section managed by the libcalls, and the
984 // section managed by MachineFrameInfo which can also hold callee saved
985 // registers in fixed stack slots, both of which have negative frame indices.
986 // This gets even more complicated when incoming arguments are passed via the
987 // stack, as these too have negative frame indices. An example is detailed
988 // below:
989 //
990 // | incoming arg | <- FI[-3]
991 // | libcallspill |
992 // | calleespill | <- FI[-2]
993 // | calleespill | <- FI[-1]
994 // | this_frame | <- FI[0]
995 //
996 // For negative frame indices, the offset from the frame pointer will differ
997 // depending on which of these groups the frame index applies to.
998 // The following calculates the correct offset knowing the number of callee
999 // saved registers spilt by the two methods.
1000 if (int LibCallRegs = getLibCallID(MF, CSI: MFI.getCalleeSavedInfo()) + 1) {
1001 // Calculate the size of the frame managed by the libcall. The stack
1002 // alignment of these libcalls should be the same as how we set it in
1003 // getABIStackAlignment.
1004 unsigned LibCallFrameSize =
1005 alignTo(Size: (STI.getXLen() / 8) * LibCallRegs, A: getStackAlign());
1006 RVFI->setLibCallStackSize(LibCallFrameSize);
1007
1008 if (NeedsDwarfCFI) {
1009 CFIBuilder.buildDefCFAOffset(Offset: LibCallFrameSize);
1010 for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
1011 CFIBuilder.buildOffset(Reg: CS.getReg(),
1012 Offset: MFI.getObjectOffset(ObjectIdx: CS.getFrameIdx()));
1013 }
1014 }
1015
1016 // FIXME (note copied from Lanai): This appears to be overallocating. Needs
1017 // investigation. Get the number of bytes to allocate from the FrameInfo.
1018 uint64_t RealStackSize = getStackSizeWithRVVPadding(MF);
1019 uint64_t StackSize = RealStackSize - RVFI->getReservedSpillsSize();
1020 uint64_t RVVStackSize = RVFI->getRVVStackSize();
1021
1022 // Early exit if there is no need to allocate on the stack
1023 if (RealStackSize == 0 && !MFI.adjustsStack() && RVVStackSize == 0)
1024 return;
1025
1026 // If the stack pointer has been marked as reserved, then produce an error if
1027 // the frame requires stack allocation
1028 if (STI.isRegisterReservedByUser(i: SPReg))
1029 MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{
1030 MF.getFunction(), "Stack pointer required, but has been reserved."});
1031
1032 uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
1033 // Split the SP adjustment to reduce the offsets of callee saved spill.
1034 if (FirstSPAdjustAmount) {
1035 StackSize = FirstSPAdjustAmount;
1036 RealStackSize = FirstSPAdjustAmount;
1037 }
1038
1039 if (RVFI->useQCIInterrupt(MF)) {
1040 // The function starts with `QC.C.MIENTER(.NEST)`, so the `(QC.)CM.PUSH(FP)`
1041 // could only be the next instruction.
1042 ++PossiblePush;
1043
1044 if (NeedsDwarfCFI) {
1045 // Insert the CFI metadata before where we think the `(QC.)CM.PUSH(FP)`
1046 // could be. The PUSH will also get its own CFI metadata for its own
1047 // modifications, which should come after the PUSH.
1048 CFIInstBuilder PushCFIBuilder(MBB, PossiblePush,
1049 MachineInstr::FrameSetup);
1050 PushCFIBuilder.buildDefCFAOffset(Offset: QCIInterruptPushAmount);
1051 for (const CalleeSavedInfo &CS : getQCISavedInfo(MF, CSI))
1052 PushCFIBuilder.buildOffset(Reg: CS.getReg(),
1053 Offset: MFI.getObjectOffset(ObjectIdx: CS.getFrameIdx()));
1054 }
1055 }
1056
1057 if (RVFI->isPushable(MF) && PossiblePush != MBB.end() &&
1058 isPush(Opcode: PossiblePush->getOpcode())) {
1059 // Use available stack adjustment in push instruction to allocate additional
1060 // stack space. Align the stack size down to a multiple of 16. This is
1061 // needed for RVE.
1062 // FIXME: Can we increase the stack size to a multiple of 16 instead?
1063 uint64_t StackAdj =
1064 std::min(a: alignDown(Value: StackSize, Align: 16), b: static_cast<uint64_t>(48));
1065 PossiblePush->getOperand(i: 1).setImm(StackAdj);
1066 StackSize -= StackAdj;
1067
1068 if (NeedsDwarfCFI) {
1069 CFIBuilder.buildDefCFAOffset(Offset: RealStackSize - StackSize);
1070 for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
1071 CFIBuilder.buildOffset(Reg: CS.getReg(),
1072 Offset: MFI.getObjectOffset(ObjectIdx: CS.getFrameIdx()));
1073 }
1074 }
1075
1076 // Allocate space on the stack if necessary.
1077 auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
1078 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
1079 bool NeedProbe = TLI->hasInlineStackProbe(MF);
1080 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign: getStackAlign());
1081 bool DynAllocation =
1082 MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
1083 if (StackSize != 0)
1084 allocateStack(MBB, MBBI, MF, Offset: StackSize, RealStackSize, EmitCFI: NeedsDwarfCFI,
1085 NeedProbe, ProbeSize, DynAllocation,
1086 Flag: MachineInstr::FrameSetup);
1087
1088 // Save SiFive CLIC CSRs into Stack
1089 emitSiFiveCLICPreemptibleSaves(MF, MBB, MBBI, DL);
1090
1091 // The frame pointer is callee-saved, and code has been generated for us to
1092 // save it to the stack. We need to skip over the storing of callee-saved
1093 // registers as the frame pointer must be modified after it has been saved
1094 // to the stack, not before.
1095 // FIXME: assumes exactly one instruction is used to save each callee-saved
1096 // register.
1097 std::advance(i&: MBBI, n: getUnmanagedCSI(MF, CSI, ReverseOrder: PreferAscendingLS).size());
1098 CFIBuilder.setInsertPoint(MBBI);
1099
1100 // Iterate over list of callee-saved registers and emit .cfi_offset
1101 // directives.
1102 if (NeedsDwarfCFI) {
1103 for (const CalleeSavedInfo &CS :
1104 getUnmanagedCSI(MF, CSI, ReverseOrder: PreferAscendingLS)) {
1105 MCRegister Reg = CS.getReg();
1106 int64_t Offset = MFI.getObjectOffset(ObjectIdx: CS.getFrameIdx());
1107 // Emit CFI for both sub-registers. The even register is at the base
1108 // offset and odd at base+4.
1109 if (RISCV::GPRPairRegClass.contains(Reg)) {
1110 MCRegister EvenReg = RI->getSubReg(Reg, Idx: RISCV::sub_gpr_even);
1111 MCRegister OddReg = RI->getSubReg(Reg, Idx: RISCV::sub_gpr_odd);
1112 CFIBuilder.buildOffset(Reg: EvenReg, Offset);
1113 CFIBuilder.buildOffset(Reg: OddReg, Offset: Offset + 4);
1114 } else {
1115 CFIBuilder.buildOffset(Reg, Offset);
1116 }
1117 }
1118 }
1119
1120 // Generate new FP.
1121 if (hasFP(MF)) {
1122 if (STI.isRegisterReservedByUser(i: FPReg))
1123 MF.getFunction().getContext().diagnose(DI: DiagnosticInfoUnsupported{
1124 MF.getFunction(), "Frame pointer required, but has been reserved."});
1125 // The frame pointer does need to be reserved from register allocation.
1126 assert(MF.getRegInfo().isReserved(FPReg) && "FP not reserved");
1127
1128 // Some stack management variants automatically keep FP updated, so we don't
1129 // need an instruction to do so.
1130 if (!RVFI->hasImplicitFPUpdates(MF)) {
1131 RI->adjustReg(
1132 MBB, II: MBBI, DL, DestReg: FPReg, SrcReg: SPReg,
1133 Offset: StackOffset::getFixed(Fixed: RealStackSize - RVFI->getVarArgsSaveSize()),
1134 Flag: MachineInstr::FrameSetup, RequiredAlign: getStackAlign());
1135 }
1136
1137 if (NeedsDwarfCFI)
1138 CFIBuilder.buildDefCFA(Reg: FPReg, Offset: RVFI->getVarArgsSaveSize());
1139 }
1140
1141 uint64_t SecondSPAdjustAmount = 0;
1142 // Emit the second SP adjustment after saving callee saved registers.
1143 if (FirstSPAdjustAmount) {
1144 SecondSPAdjustAmount = getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
1145 assert(SecondSPAdjustAmount > 0 &&
1146 "SecondSPAdjustAmount should be greater than zero");
1147
1148 allocateStack(MBB, MBBI, MF, Offset: SecondSPAdjustAmount,
1149 RealStackSize: getStackSizeWithRVVPadding(MF), EmitCFI: NeedsDwarfCFI && !hasFP(MF),
1150 NeedProbe, ProbeSize, DynAllocation,
1151 Flag: MachineInstr::FrameSetup);
1152 }
1153
1154 if (RVVStackSize) {
1155 if (NeedProbe) {
1156 allocateAndProbeStackForRVV(MF, MBB, MBBI, DL, Amount: RVVStackSize,
1157 Flag: MachineInstr::FrameSetup,
1158 EmitCFI: NeedsDwarfCFI && !hasFP(MF), DynAllocation);
1159 } else {
1160 // We must keep the stack pointer aligned through any intermediate
1161 // updates.
1162 RI->adjustReg(MBB, II: MBBI, DL, DestReg: SPReg, SrcReg: SPReg,
1163 Offset: StackOffset::getScalable(Scalable: -RVVStackSize),
1164 Flag: MachineInstr::FrameSetup, RequiredAlign: getStackAlign());
1165 }
1166
1167 if (NeedsDwarfCFI && !hasFP(MF)) {
1168 // Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb".
1169 CFIBuilder.insertCFIInst(CFIInst: createDefCFAExpression(
1170 TRI: *RI, Reg: SPReg,
1171 Offset: StackOffset::get(Fixed: getStackSizeWithRVVPadding(MF), Scalable: RVVStackSize / 8)));
1172 }
1173
1174 std::advance(i&: MBBI, n: getRVVCalleeSavedInfo(MF, CSI).size());
1175 if (NeedsDwarfCFI)
1176 emitCalleeSavedRVVPrologCFI(MBB, MI: MBBI, HasFP: hasFP(MF));
1177 }
1178
1179 if (hasFP(MF)) {
1180 // Realign Stack
1181 const RISCVRegisterInfo *RI = STI.getRegisterInfo();
1182 if (RI->hasStackRealignment(MF)) {
1183 Align MaxAlignment = MFI.getMaxAlign();
1184
1185 const RISCVInstrInfo *TII = STI.getInstrInfo();
1186 if (isInt<12>(x: -(int)MaxAlignment.value())) {
1187 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::ANDI), DestReg: SPReg)
1188 .addReg(RegNo: SPReg)
1189 .addImm(Val: -(int)MaxAlignment.value())
1190 .setMIFlag(MachineInstr::FrameSetup);
1191 } else {
1192 unsigned ShiftAmount = Log2(A: MaxAlignment);
1193 Register VR =
1194 MF.getRegInfo().createVirtualRegister(RegClass: &RISCV::GPRRegClass);
1195 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::SRLI), DestReg: VR)
1196 .addReg(RegNo: SPReg)
1197 .addImm(Val: ShiftAmount)
1198 .setMIFlag(MachineInstr::FrameSetup);
1199 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::SLLI), DestReg: SPReg)
1200 .addReg(RegNo: VR)
1201 .addImm(Val: ShiftAmount)
1202 .setMIFlag(MachineInstr::FrameSetup);
1203 }
1204 if (NeedProbe && RVVStackSize == 0) {
1205 // Do a probe if the align + size allocated just passed the probe size
1206 // and was not yet probed.
1207 if (SecondSPAdjustAmount < ProbeSize &&
1208 SecondSPAdjustAmount + MaxAlignment.value() >= ProbeSize) {
1209 bool IsRV64 = STI.is64Bit();
1210 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
1211 .addReg(RegNo: RISCV::X0)
1212 .addReg(RegNo: SPReg)
1213 .addImm(Val: 0)
1214 .setMIFlags(MachineInstr::FrameSetup);
1215 }
1216 }
1217 // FP will be used to restore the frame in the epilogue, so we need
1218 // another base register BP to record SP after re-alignment. SP will
1219 // track the current stack after allocating variable sized objects.
1220 if (hasBP(MF)) {
1221 // move BP, SP
1222 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: RISCV::ADDI), DestReg: BPReg)
1223 .addReg(RegNo: SPReg)
1224 .addImm(Val: 0)
1225 .setMIFlag(MachineInstr::FrameSetup);
1226 }
1227 }
1228 }
1229}
1230
1231void RISCVFrameLowering::deallocateStack(MachineFunction &MF,
1232 MachineBasicBlock &MBB,
1233 MachineBasicBlock::iterator MBBI,
1234 const DebugLoc &DL,
1235 uint64_t &StackSize,
1236 int64_t CFAOffset) const {
1237 const RISCVRegisterInfo *RI = STI.getRegisterInfo();
1238
1239 RI->adjustReg(MBB, II: MBBI, DL, DestReg: SPReg, SrcReg: SPReg, Offset: StackOffset::getFixed(Fixed: StackSize),
1240 Flag: MachineInstr::FrameDestroy, RequiredAlign: getStackAlign());
1241 StackSize = 0;
1242
1243 if (needsDwarfCFI(MF))
1244 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1245 .buildDefCFAOffset(Offset: CFAOffset);
1246}
1247
1248void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
1249 MachineBasicBlock &MBB) const {
1250 const RISCVRegisterInfo *RI = STI.getRegisterInfo();
1251 MachineFrameInfo &MFI = MF.getFrameInfo();
1252 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1253 bool PreferAscendingLS = STI.preferAscendingLoadStore();
1254
1255 // All calls are tail calls in GHC calling conv, and functions have no
1256 // prologue/epilogue.
1257 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1258 return;
1259
1260 // Get the insert location for the epilogue. If there were no terminators in
1261 // the block, get the last instruction.
1262 MachineBasicBlock::iterator MBBI = MBB.end();
1263 DebugLoc DL;
1264 if (!MBB.empty()) {
1265 MBBI = MBB.getLastNonDebugInstr();
1266 if (MBBI != MBB.end())
1267 DL = MBBI->getDebugLoc();
1268
1269 MBBI = MBB.getFirstTerminator();
1270
1271 // Skip to before the restores of all callee-saved registers.
1272 while (MBBI != MBB.begin() &&
1273 std::prev(x: MBBI)->getFlag(Flag: MachineInstr::FrameDestroy))
1274 --MBBI;
1275 }
1276
1277 const auto &CSI = MFI.getCalleeSavedInfo();
1278
1279 // Skip to before the restores of scalar callee-saved registers
1280 // FIXME: assumes exactly one instruction is used to restore each
1281 // callee-saved register.
1282 auto FirstScalarCSRRestoreInsn =
1283 std::next(x: MBBI, n: getRVVCalleeSavedInfo(MF, CSI).size());
1284 CFIInstBuilder CFIBuilder(MBB, FirstScalarCSRRestoreInsn,
1285 MachineInstr::FrameDestroy);
1286 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1287
1288 uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
1289 uint64_t RealStackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
1290 : getStackSizeWithRVVPadding(MF);
1291 uint64_t StackSize = FirstSPAdjustAmount ? FirstSPAdjustAmount
1292 : getStackSizeWithRVVPadding(MF) -
1293 RVFI->getReservedSpillsSize();
1294 uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
1295 uint64_t RVVStackSize = RVFI->getRVVStackSize();
1296
1297 bool RestoreSPFromFP = RI->hasStackRealignment(MF) ||
1298 MFI.hasVarSizedObjects() || !hasReservedCallFrame(MF);
1299 if (RVVStackSize) {
1300 // If RestoreSPFromFP the stack pointer will be restored using the frame
1301 // pointer value.
1302 if (!RestoreSPFromFP)
1303 RI->adjustReg(MBB, II: FirstScalarCSRRestoreInsn, DL, DestReg: SPReg, SrcReg: SPReg,
1304 Offset: StackOffset::getScalable(Scalable: RVVStackSize),
1305 Flag: MachineInstr::FrameDestroy, RequiredAlign: getStackAlign());
1306
1307 if (NeedsDwarfCFI) {
1308 if (!hasFP(MF))
1309 CFIBuilder.buildDefCFA(Reg: SPReg, Offset: RealStackSize);
1310 emitCalleeSavedRVVEpilogCFI(MBB, MI: FirstScalarCSRRestoreInsn);
1311 }
1312 }
1313
1314 if (FirstSPAdjustAmount) {
1315 uint64_t SecondSPAdjustAmount =
1316 getStackSizeWithRVVPadding(MF) - FirstSPAdjustAmount;
1317 assert(SecondSPAdjustAmount > 0 &&
1318 "SecondSPAdjustAmount should be greater than zero");
1319
1320 // If RestoreSPFromFP the stack pointer will be restored using the frame
1321 // pointer value.
1322 if (!RestoreSPFromFP)
1323 RI->adjustReg(MBB, II: FirstScalarCSRRestoreInsn, DL, DestReg: SPReg, SrcReg: SPReg,
1324 Offset: StackOffset::getFixed(Fixed: SecondSPAdjustAmount),
1325 Flag: MachineInstr::FrameDestroy, RequiredAlign: getStackAlign());
1326
1327 if (NeedsDwarfCFI && !hasFP(MF))
1328 CFIBuilder.buildDefCFAOffset(Offset: FirstSPAdjustAmount);
1329 }
1330
1331 // Restore the stack pointer using the value of the frame pointer. Only
1332 // necessary if the stack pointer was modified, meaning the stack size is
1333 // unknown.
1334 //
1335 // In order to make sure the stack point is right through the EH region,
1336 // we also need to restore stack pointer from the frame pointer if we
1337 // don't preserve stack space within prologue/epilogue for outgoing variables,
1338 // normally it's just checking the variable sized object is present or not
1339 // is enough, but we also don't preserve that at prologue/epilogue when
1340 // have vector objects in stack.
1341 if (RestoreSPFromFP) {
1342 assert(hasFP(MF) && "frame pointer should not have been eliminated");
1343 RI->adjustReg(MBB, II: FirstScalarCSRRestoreInsn, DL, DestReg: SPReg, SrcReg: FPReg,
1344 Offset: StackOffset::getFixed(Fixed: -FPOffset), Flag: MachineInstr::FrameDestroy,
1345 RequiredAlign: getStackAlign());
1346 }
1347
1348 if (NeedsDwarfCFI && hasFP(MF))
1349 CFIBuilder.buildDefCFA(Reg: SPReg, Offset: RealStackSize);
1350
1351 // Skip to after the restores of scalar callee-saved registers
1352 // FIXME: assumes exactly one instruction is used to restore each
1353 // callee-saved register.
1354 MBBI = std::next(x: FirstScalarCSRRestoreInsn,
1355 n: getUnmanagedCSI(MF, CSI, ReverseOrder: PreferAscendingLS).size());
1356 CFIBuilder.setInsertPoint(MBBI);
1357
1358 if (getLibCallID(MF, CSI) != -1) {
1359 // tail __riscv_restore_[0-12] instruction is considered as a terminator,
1360 // therefore it is unnecessary to place any CFI instructions after it. Just
1361 // deallocate stack if needed and return.
1362 if (StackSize != 0)
1363 deallocateStack(MF, MBB, MBBI, DL, StackSize,
1364 CFAOffset: RVFI->getLibCallStackSize());
1365
1366 // Emit epilogue for shadow call stack.
1367 emitSCSEpilogue(MF, MBB, MI: MBBI, DL);
1368 return;
1369 }
1370
1371 // Recover callee-saved registers.
1372 if (NeedsDwarfCFI) {
1373 for (const CalleeSavedInfo &CS :
1374 getUnmanagedCSI(MF, CSI, ReverseOrder: PreferAscendingLS)) {
1375 MCRegister Reg = CS.getReg();
1376 // Emit CFI for both sub-registers.
1377 if (RISCV::GPRPairRegClass.contains(Reg)) {
1378 MCRegister EvenReg = RI->getSubReg(Reg, Idx: RISCV::sub_gpr_even);
1379 MCRegister OddReg = RI->getSubReg(Reg, Idx: RISCV::sub_gpr_odd);
1380 CFIBuilder.buildRestore(Reg: EvenReg);
1381 CFIBuilder.buildRestore(Reg: OddReg);
1382 } else {
1383 CFIBuilder.buildRestore(Reg);
1384 }
1385 }
1386 }
1387
1388 if (RVFI->isPushable(MF) && MBBI != MBB.end() && isPop(Opcode: MBBI->getOpcode())) {
1389 // Use available stack adjustment in pop instruction to deallocate stack
1390 // space. Align the stack size down to a multiple of 16. This is needed for
1391 // RVE.
1392 // FIXME: Can we increase the stack size to a multiple of 16 instead?
1393 uint64_t StackAdj =
1394 std::min(a: alignDown(Value: StackSize, Align: 16), b: static_cast<uint64_t>(48));
1395 MBBI->getOperand(i: 1).setImm(StackAdj);
1396 StackSize -= StackAdj;
1397
1398 if (StackSize != 0)
1399 deallocateStack(MF, MBB, MBBI, DL, StackSize,
1400 /*stack_adj of cm.pop instr*/ CFAOffset: RealStackSize - StackSize);
1401
1402 auto NextI = next_nodbg(It: MBBI, End: MBB.end());
1403 if (NextI == MBB.end() || NextI->getOpcode() != RISCV::PseudoRET) {
1404 ++MBBI;
1405 if (NeedsDwarfCFI) {
1406 CFIBuilder.setInsertPoint(MBBI);
1407
1408 for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
1409 CFIBuilder.buildRestore(Reg: CS.getReg());
1410
1411 // Update CFA Offset. If this is a QCI interrupt function, there will
1412 // be a leftover offset which is deallocated by `QC.C.MILEAVERET`,
1413 // otherwise getQCIInterruptStackSize() will be 0.
1414 CFIBuilder.buildDefCFAOffset(Offset: RVFI->getQCIInterruptStackSize());
1415 }
1416 }
1417 }
1418
1419 emitSiFiveCLICPreemptibleRestores(MF, MBB, MBBI, DL);
1420
1421 // Deallocate stack if StackSize isn't a zero yet. If this is a QCI interrupt
1422 // function, there will be a leftover offset which is deallocated by
1423 // `QC.C.MILEAVERET`, otherwise getQCIInterruptStackSize() will be 0.
1424 if (StackSize != 0)
1425 deallocateStack(MF, MBB, MBBI, DL, StackSize,
1426 CFAOffset: RVFI->getQCIInterruptStackSize());
1427
1428 // Emit epilogue for shadow call stack.
1429 emitSCSEpilogue(MF, MBB, MI: MBBI, DL);
1430
1431 // SiFive CLIC needs to swap `sf.mscratchcsw` into `sp`
1432 emitSiFiveCLICStackSwap(MF, MBB, MBBI, DL);
1433}
1434
1435static MCRegister getLargestFPRegisterOrZero(const RISCVSubtarget &STI,
1436 const TargetRegisterInfo &TRI,
1437 MCRegister Reg) {
1438 if (!STI.hasStdExtF())
1439 return MCRegister();
1440
1441 TargetRegisterClass const *LargestFPRegClass = STI.getLargestFPRegClass();
1442 assert(LargestFPRegClass);
1443
1444 if (LargestFPRegClass->contains(Reg))
1445 return Reg;
1446
1447 std::array<TargetRegisterClass const *, 3> RegisterClasses = {
1448 &RISCV::FPR16RegClass, &RISCV::FPR32RegClass, &RISCV::FPR64RegClass};
1449 std::array<unsigned, 3> SubIdx = {RISCV::sub_16, RISCV::sub_32,
1450 RISCV::sub_64};
1451
1452 for (auto [RegClass, SubReg] : zip(t&: RegisterClasses, u&: SubIdx)) {
1453 if (RegClass->contains(Reg)) {
1454 if (MCRegister Super =
1455 TRI.getMatchingSuperReg(Reg, SubIdx: SubReg, RC: LargestFPRegClass))
1456 return Super;
1457 }
1458 }
1459
1460 // Reg is bigger than what's currently available for the target, we can ignore
1461 // it.
1462 return MCRegister();
1463}
1464
1465void RISCVFrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
1466 MachineBasicBlock &MBB) const {
1467 // Insertion point.
1468 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1469
1470 // Fake a debug loc.
1471 DebugLoc DL;
1472 if (MBBI != MBB.end())
1473 DL = MBBI->getDebugLoc();
1474
1475 const MachineFunction &MF = *MBB.getParent();
1476 const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
1477 const RISCVInstrInfo &TII = *STI.getInstrInfo();
1478
1479 BitVector FinalRegsToZero(TRI.getNumRegs());
1480
1481 for (MCRegister Reg : RegsToZero.set_bits()) {
1482 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
1483 FinalRegsToZero.set(Reg.id());
1484 } else if (TRI.isFPRegister(Reg)) {
1485 if (MCRegister MaybeReg = getLargestFPRegisterOrZero(STI, TRI, Reg))
1486 FinalRegsToZero.set(MaybeReg.id());
1487 }
1488 }
1489
1490 for (MCRegister Reg : FinalRegsToZero.set_bits())
1491 TII.buildClearRegister(Reg, MBB, Iter: MBBI, DL);
1492}
1493
1494StackOffset
1495RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
1496 Register &FrameReg) const {
1497 const MachineFrameInfo &MFI = MF.getFrameInfo();
1498 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
1499 const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1500
1501 // Callee-saved registers should be referenced relative to the stack
1502 // pointer (positive offset), otherwise use the frame pointer (negative
1503 // offset).
1504 const auto &CSI = getUnmanagedCSI(MF, CSI: MFI.getCalleeSavedInfo(),
1505 ReverseOrder: STI.preferAscendingLoadStore());
1506 int MinCSFI = 0;
1507 int MaxCSFI = -1;
1508 StackOffset Offset;
1509 auto StackID = MFI.getStackID(ObjectIdx: FI);
1510
1511 assert((StackID == TargetStackID::Default ||
1512 StackID == TargetStackID::ScalableVector) &&
1513 "Unexpected stack ID for the frame object.");
1514 if (StackID == TargetStackID::Default) {
1515 assert(getOffsetOfLocalArea() == 0 && "LocalAreaOffset is not 0!");
1516 Offset = StackOffset::getFixed(Fixed: MFI.getObjectOffset(ObjectIdx: FI) +
1517 MFI.getOffsetAdjustment());
1518 } else if (StackID == TargetStackID::ScalableVector) {
1519 Offset = StackOffset::getScalable(Scalable: MFI.getObjectOffset(ObjectIdx: FI));
1520 }
1521
1522 uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
1523
1524 if (CSI.size()) {
1525 MinCSFI = std::min(a: CSI.front().getFrameIdx(), b: CSI.back().getFrameIdx());
1526 MaxCSFI = std::max(a: CSI.front().getFrameIdx(), b: CSI.back().getFrameIdx());
1527 }
1528
1529 if (FI >= MinCSFI && FI <= MaxCSFI) {
1530 FrameReg = SPReg;
1531
1532 if (FirstSPAdjustAmount)
1533 Offset += StackOffset::getFixed(Fixed: FirstSPAdjustAmount);
1534 else
1535 Offset += StackOffset::getFixed(Fixed: getStackSizeWithRVVPadding(MF));
1536 return Offset;
1537 }
1538
1539 if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(ObjectIdx: FI)) {
1540 // If the stack was realigned, the frame pointer is set in order to allow
1541 // SP to be restored, so we need another base register to record the stack
1542 // after realignment.
1543 // |--------------------------| --
1544 // | callee-allocated save | | <----|
1545 // | area for register varargs| | |
1546 // |--------------------------| <-- FP |
1547 // | callee-saved registers | | |
1548 // |--------------------------| -- |
1549 // | realignment (the size of | | |
1550 // | this area is not counted | | |
1551 // | in MFI.getStackSize()) | | |
1552 // |--------------------------| -- |-- MFI.getStackSize()
1553 // | RVV alignment padding | | |
1554 // | (not counted in | | |
1555 // | MFI.getStackSize() but | | |
1556 // | counted in | | |
1557 // | RVFI.getRVVStackSize()) | | |
1558 // |--------------------------| -- |
1559 // | RVV objects | | |
1560 // | (not counted in | | |
1561 // | MFI.getStackSize()) | | |
1562 // |--------------------------| -- |
1563 // | padding before RVV | | |
1564 // | (not counted in | | |
1565 // | MFI.getStackSize() or in | | |
1566 // | RVFI.getRVVStackSize()) | | |
1567 // |--------------------------| -- |
1568 // | scalar local variables | | <----'
1569 // |--------------------------| -- <-- BP (if var sized objects present)
1570 // | VarSize objects | |
1571 // |--------------------------| -- <-- SP
1572 if (hasBP(MF)) {
1573 FrameReg = RISCVABI::getBPReg();
1574 } else {
1575 // VarSize objects must be empty in this case!
1576 assert(!MFI.hasVarSizedObjects());
1577 FrameReg = SPReg;
1578 }
1579 } else if (!RI->hasStackRealignment(MF)) {
1580 // Note: Keeping the following as multiple 'if' statements rather than
1581 // merging to a single expression for readability.
1582 if (!hasFP(MF)) {
1583 // No FP available, must use SP.
1584 FrameReg = SPReg;
1585 } else {
1586 FrameReg = FPReg;
1587 // SP-relative addressing is only valid when SP is stable throughout
1588 // the function body: no dynamic SP adjustments for outgoing call args,
1589 // no variable-sized objects, and no RVV scalable stack regions.
1590 // hasReservedCallFrame() conservatively encompasses all these checks.
1591 if (hasReservedCallFrame(MF)) {
1592 // Both FP and SP are candidates.
1593 // Prefer SP when the SP-relative offset fits in the compressed
1594 // instruction immediate range.
1595 int64_t SPOff = Offset.getFixed() + MFI.getStackSize();
1596 int64_t CLWSPMaxOffset = 252;
1597 int64_t CLDSPMaxOffset = 504;
1598 int64_t SPThreshold = STI.is64Bit() ? CLDSPMaxOffset : CLWSPMaxOffset;
1599 if (SPOff >= 0 && SPOff <= SPThreshold)
1600 FrameReg = SPReg;
1601 }
1602 }
1603 } else {
1604 assert(RI->hasStackRealignment(MF) && MFI.isFixedObjectIndex(FI) &&
1605 "Expected fixed object with stack realignment");
1606 assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
1607 FrameReg = FPReg;
1608 }
1609
1610 if (FrameReg == FPReg) {
1611 Offset += StackOffset::getFixed(Fixed: RVFI->getVarArgsSaveSize());
1612 // When using FP to access scalable vector objects, we need to minus
1613 // the frame size.
1614 //
1615 // |--------------------------| --
1616 // | callee-allocated save | |
1617 // | area for register varargs| |
1618 // |--------------------------| | -- <-- FP
1619 // | callee-saved registers | |
1620 // |--------------------------| | MFI.getStackSize()
1621 // | scalar local variables | |
1622 // |--------------------------| -- (Offset of RVV objects is from here.)
1623 // | RVV objects |
1624 // |--------------------------|
1625 // | VarSize objects |
1626 // |--------------------------| <-- SP
1627 if (StackID == TargetStackID::ScalableVector) {
1628 assert(!RI->hasStackRealignment(MF) &&
1629 "Can't index across variable sized realign");
1630 // We don't expect any extra RVV alignment padding, as the stack size
1631 // and RVV object sections should be correct aligned in their own
1632 // right.
1633 assert(MFI.getStackSize() == getStackSizeWithRVVPadding(MF) &&
1634 "Inconsistent stack layout");
1635 Offset -= StackOffset::getFixed(Fixed: MFI.getStackSize());
1636 }
1637 return Offset;
1638 }
1639
1640 // This case handles indexing off both SP and BP.
1641 // If indexing off SP, there must not be any var sized objects
1642 assert(FrameReg == RISCVABI::getBPReg() || !MFI.hasVarSizedObjects());
1643
1644 // When using SP to access frame objects, we need to add RVV stack size.
1645 //
1646 // |--------------------------| --
1647 // | callee-allocated save | | <----|
1648 // | area for register varargs| | |
1649 // |--------------------------| | | <-- FP
1650 // | callee-saved registers | | |
1651 // |--------------------------| -- |
1652 // | RVV alignment padding | | |
1653 // | (not counted in | | |
1654 // | MFI.getStackSize() but | | |
1655 // | counted in | | |
1656 // | RVFI.getRVVStackSize()) | | |
1657 // |--------------------------| -- |
1658 // | RVV objects | | |-- MFI.getStackSize()
1659 // | (not counted in | | |
1660 // | MFI.getStackSize()) | | |
1661 // |--------------------------| -- |
1662 // | padding before RVV | | |
1663 // | (not counted in | | |
1664 // | MFI.getStackSize()) | | |
1665 // |--------------------------| -- |
1666 // | scalar local variables | | <----'
1667 // |--------------------------| -- <-- BP (if var sized objects present)
1668 // | VarSize objects | |
1669 // |--------------------------| -- <-- SP
1670 //
1671 // The total amount of padding surrounding RVV objects is described by
1672 // RVV->getRVVPadding() and it can be zero. It allows us to align the RVV
1673 // objects to the required alignment.
1674 if (MFI.getStackID(ObjectIdx: FI) == TargetStackID::Default) {
1675 if (MFI.isFixedObjectIndex(ObjectIdx: FI)) {
1676 assert(!RI->hasStackRealignment(MF) &&
1677 "Can't index across variable sized realign");
1678 Offset += StackOffset::get(Fixed: getStackSizeWithRVVPadding(MF),
1679 Scalable: RVFI->getRVVStackSize());
1680 } else {
1681 Offset += StackOffset::getFixed(Fixed: MFI.getStackSize());
1682 }
1683 } else if (MFI.getStackID(ObjectIdx: FI) == TargetStackID::ScalableVector) {
1684 // Ensure the base of the RVV stack is correctly aligned: add on the
1685 // alignment padding.
1686 int64_t ScalarLocalVarSize =
1687 MFI.getStackSize() - RVFI->getCalleeSavedStackSize() -
1688 RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding();
1689 Offset += StackOffset::get(Fixed: ScalarLocalVarSize, Scalable: RVFI->getRVVStackSize());
1690 }
1691 return Offset;
1692}
1693
1694static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
1695 const Register &Reg) {
1696 MCRegister BaseReg = TRI.getSubReg(Reg, Idx: RISCV::sub_vrm1_0);
1697 // If it's not a grouped vector register, it doesn't have subregister, so
1698 // the base register is just itself.
1699 if (!BaseReg.isValid())
1700 BaseReg = Reg;
1701 return BaseReg;
1702}
1703
1704void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
1705 BitVector &SavedRegs,
1706 RegScavenger *RS) const {
1707 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1708
1709 // In TargetFrameLowering::determineCalleeSaves, any vector register is marked
1710 // as saved if any of its subregister is clobbered, this is not correct in
1711 // vector registers. We only want the vector register to be marked as saved
1712 // if all of its subregisters are clobbered.
1713 // For example:
1714 // Original behavior: If v24 is marked, v24m2, v24m4, v24m8 are also marked.
1715 // Correct behavior: v24m2 is marked only if v24 and v25 are marked.
1716 MachineRegisterInfo &MRI = MF.getRegInfo();
1717 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
1718 const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
1719 for (unsigned i = 0; CSRegs[i]; ++i) {
1720 unsigned CSReg = CSRegs[i];
1721 // Only vector registers need special care.
1722 if (!RISCV::VRRegClass.contains(Reg: getRVVBaseRegister(TRI, Reg: CSReg)))
1723 continue;
1724
1725 SavedRegs.reset(Idx: CSReg);
1726
1727 auto SubRegs = TRI.subregs(Reg: CSReg);
1728 // Set the register and all its subregisters.
1729 if (!MRI.def_empty(RegNo: CSReg) || MRI.getUsedPhysRegsMask().test(Idx: CSReg)) {
1730 SavedRegs.set(CSReg);
1731 for (unsigned Reg : SubRegs)
1732 SavedRegs.set(Reg);
1733 }
1734
1735 }
1736
1737 // Unconditionally spill RA and FP only if the function uses a frame
1738 // pointer.
1739 if (hasFP(MF)) {
1740 SavedRegs.set(RAReg);
1741 SavedRegs.set(FPReg);
1742 }
1743 // Mark BP as used if function has dedicated base pointer.
1744 if (hasBP(MF))
1745 SavedRegs.set(RISCVABI::getBPReg());
1746
1747 // When using cm.push/pop we must save X27 if we save X26.
1748 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1749 if (RVFI->isPushable(MF) && SavedRegs.test(Idx: RISCV::X26))
1750 SavedRegs.set(RISCV::X27);
1751
1752 // For Zilsd on RV32, append GPRPair registers to the CSR list. This prevents
1753 // the need to create register sets for each abi which is a lot more complex.
1754 // Don't use Zilsd for callee-saved coalescing if the required alignment
1755 // exceeds the stack alignment or when Zcmp/Xqccmp or save/restore libcalls
1756 // are enabled.
1757 bool UseZilsd = !STI.is64Bit() && STI.hasStdExtZilsd() &&
1758 STI.getZilsdAlign() <= getStackAlign() &&
1759 !RVFI->isPushable(MF) && !RVFI->useSaveRestoreLibCalls(MF);
1760 if (UseZilsd) {
1761 SmallVector<MCPhysReg, 32> NewCSRs;
1762 SmallSet<MCPhysReg, 16> CSRSet;
1763 for (unsigned i = 0; CSRegs[i]; ++i) {
1764 NewCSRs.push_back(Elt: CSRegs[i]);
1765 CSRSet.insert(V: CSRegs[i]);
1766 }
1767
1768 // Append GPRPair registers for pairs where both sub-registers are in CSR
1769 // list. Iterate through all GPRPairs and check if both sub-regs are CSRs.
1770 for (MCPhysReg Pair : RISCV::GPRPairRegClass) {
1771 // Do not append a pair that's already in the CSR list.
1772 if (CSRSet.contains(V: Pair))
1773 continue;
1774 MCRegister EvenReg = TRI.getSubReg(Reg: Pair, Idx: RISCV::sub_gpr_even);
1775 MCRegister OddReg = TRI.getSubReg(Reg: Pair, Idx: RISCV::sub_gpr_odd);
1776 if (CSRSet.contains(V: EvenReg.id()) && CSRSet.contains(V: OddReg.id())) {
1777 NewCSRs.push_back(Elt: Pair);
1778 CSRSet.insert(V: Pair);
1779 }
1780 }
1781
1782 MRI.setCalleeSavedRegs(NewCSRs);
1783 CSRegs = MRI.getCalleeSavedRegs();
1784 }
1785
1786 // Check if all subregisters are marked for saving. If so, set the super
1787 // register bit. For GPRPair, only check sub_gpr_even and sub_gpr_odd, not
1788 // aliases like X8_W or X8_H which are not set in SavedRegs.
1789 for (unsigned i = 0; CSRegs[i]; ++i) {
1790 MCRegister CSReg = CSRegs[i];
1791 bool CombineToSuperReg;
1792 if (RISCV::GPRPairRegClass.contains(Reg: CSReg)) {
1793 MCRegister EvenReg = TRI.getSubReg(Reg: CSReg, Idx: RISCV::sub_gpr_even);
1794 MCRegister OddReg = TRI.getSubReg(Reg: CSReg, Idx: RISCV::sub_gpr_odd);
1795 CombineToSuperReg =
1796 SavedRegs.test(Idx: EvenReg.id()) && SavedRegs.test(Idx: OddReg.id());
1797 // If s0(x8) is used as FP we can't generate load/store pair because it
1798 // breaks the frame chain.
1799 if (hasFP(MF) && CSReg == RISCV::X8_X9)
1800 CombineToSuperReg = false;
1801 } else {
1802 auto SubRegs = TRI.subregs(Reg: CSReg);
1803 CombineToSuperReg =
1804 !SubRegs.empty() && llvm::all_of(Range&: SubRegs, P: [&](unsigned Reg) {
1805 return SavedRegs.test(Idx: Reg);
1806 });
1807 }
1808
1809 if (CombineToSuperReg)
1810 SavedRegs.set(CSReg);
1811 }
1812
1813 // SiFive Preemptible Interrupt Handlers need additional frame entries
1814 createSiFivePreemptibleInterruptFrameEntries(MF, RVFI&: *RVFI);
1815}
1816
1817std::pair<int64_t, Align>
1818RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const {
1819 MachineFrameInfo &MFI = MF.getFrameInfo();
1820 // Create a buffer of RVV objects to allocate.
1821 SmallVector<int, 8> ObjectsToAllocate;
1822 auto pushRVVObjects = [&](int FIBegin, int FIEnd) {
1823 for (int I = FIBegin, E = FIEnd; I != E; ++I) {
1824 unsigned StackID = MFI.getStackID(ObjectIdx: I);
1825 if (StackID != TargetStackID::ScalableVector)
1826 continue;
1827 if (MFI.isDeadObjectIndex(ObjectIdx: I))
1828 continue;
1829
1830 ObjectsToAllocate.push_back(Elt: I);
1831 }
1832 };
1833 // First push RVV Callee Saved object, then push RVV stack object
1834 std::vector<CalleeSavedInfo> &CSI = MF.getFrameInfo().getCalleeSavedInfo();
1835 const auto &RVVCSI = getRVVCalleeSavedInfo(MF, CSI);
1836 if (!RVVCSI.empty())
1837 pushRVVObjects(RVVCSI[0].getFrameIdx(),
1838 RVVCSI[RVVCSI.size() - 1].getFrameIdx() + 1);
1839 pushRVVObjects(0, MFI.getObjectIndexEnd() - RVVCSI.size());
1840
1841 // The minimum alignment is 16 bytes.
1842 Align RVVStackAlign(16);
1843 const auto &ST = MF.getSubtarget<RISCVSubtarget>();
1844
1845 if (!ST.hasVInstructions()) {
1846 assert(ObjectsToAllocate.empty() &&
1847 "Can't allocate scalable-vector objects without V instructions");
1848 return std::make_pair(x: 0, y&: RVVStackAlign);
1849 }
1850
1851 // Allocate all RVV locals and spills
1852 int64_t Offset = 0;
1853 for (int FI : ObjectsToAllocate) {
1854 // ObjectSize in bytes.
1855 int64_t ObjectSize = MFI.getObjectSize(ObjectIdx: FI);
1856 auto ObjectAlign =
1857 std::max(a: Align(RISCV::RVVBytesPerBlock), b: MFI.getObjectAlign(ObjectIdx: FI));
1858 // If the data type is the fractional vector type, reserve one vector
1859 // register for it.
1860 if (ObjectSize < RISCV::RVVBytesPerBlock)
1861 ObjectSize = RISCV::RVVBytesPerBlock;
1862 Offset = alignTo(Size: Offset + ObjectSize, A: ObjectAlign);
1863 MFI.setObjectOffset(ObjectIdx: FI, SPOffset: -Offset);
1864 // Update the maximum alignment of the RVV stack section
1865 RVVStackAlign = std::max(a: RVVStackAlign, b: ObjectAlign);
1866 }
1867
1868 uint64_t StackSize = Offset;
1869
1870 // Ensure the alignment of the RVV stack. Since we want the most-aligned
1871 // object right at the bottom (i.e., any padding at the top of the frame),
1872 // readjust all RVV objects down by the alignment padding.
1873 // Stack size and offsets are multiples of vscale, stack alignment is in
1874 // bytes, we can divide stack alignment by minimum vscale to get a maximum
1875 // stack alignment multiple of vscale.
1876 auto VScale =
1877 std::max<uint64_t>(a: ST.getRealMinVLen() / RISCV::RVVBitsPerBlock, b: 1);
1878 if (auto RVVStackAlignVScale = RVVStackAlign.value() / VScale) {
1879 if (auto AlignmentPadding =
1880 offsetToAlignment(Value: StackSize, Alignment: Align(RVVStackAlignVScale))) {
1881 StackSize += AlignmentPadding;
1882 for (int FI : ObjectsToAllocate)
1883 MFI.setObjectOffset(ObjectIdx: FI, SPOffset: MFI.getObjectOffset(ObjectIdx: FI) - AlignmentPadding);
1884 }
1885 }
1886
1887 return std::make_pair(x&: StackSize, y&: RVVStackAlign);
1888}
1889
1890static unsigned getScavSlotsNumForRVV(MachineFunction &MF) {
1891 // For RVV spill, scalable stack offsets computing requires up to two scratch
1892 // registers
1893 static constexpr unsigned ScavSlotsNumRVVSpillScalableObject = 2;
1894
1895 // For RVV spill, non-scalable stack offsets computing requires up to one
1896 // scratch register.
1897 static constexpr unsigned ScavSlotsNumRVVSpillNonScalableObject = 1;
1898
1899 // ADDI instruction's destination register can be used for computing
1900 // offsets. So Scalable stack offsets require up to one scratch register.
1901 static constexpr unsigned ScavSlotsADDIScalableObject = 1;
1902
1903 static constexpr unsigned MaxScavSlotsNumKnown =
1904 std::max(l: {ScavSlotsADDIScalableObject, ScavSlotsNumRVVSpillScalableObject,
1905 ScavSlotsNumRVVSpillNonScalableObject});
1906
1907 unsigned MaxScavSlotsNum = 0;
1908 if (!MF.getSubtarget<RISCVSubtarget>().hasVInstructions())
1909 return false;
1910 for (const MachineBasicBlock &MBB : MF)
1911 for (const MachineInstr &MI : MBB) {
1912 bool IsRVVSpill = RISCV::isRVVSpill(MI);
1913 for (auto &MO : MI.operands()) {
1914 if (!MO.isFI())
1915 continue;
1916 bool IsScalableVectorID = MF.getFrameInfo().getStackID(ObjectIdx: MO.getIndex()) ==
1917 TargetStackID::ScalableVector;
1918 if (IsRVVSpill) {
1919 MaxScavSlotsNum = std::max(
1920 a: MaxScavSlotsNum, b: IsScalableVectorID
1921 ? ScavSlotsNumRVVSpillScalableObject
1922 : ScavSlotsNumRVVSpillNonScalableObject);
1923 } else if (MI.getOpcode() == RISCV::ADDI && IsScalableVectorID) {
1924 MaxScavSlotsNum =
1925 std::max(a: MaxScavSlotsNum, b: ScavSlotsADDIScalableObject);
1926 }
1927 }
1928 if (MaxScavSlotsNum == MaxScavSlotsNumKnown)
1929 return MaxScavSlotsNumKnown;
1930 }
1931 return MaxScavSlotsNum;
1932}
1933
1934static bool hasRVVFrameObject(const MachineFunction &MF) {
1935 // Originally, the function will scan all the stack objects to check whether
1936 // if there is any scalable vector object on the stack or not. However, it
1937 // causes errors in the register allocator. In issue 53016, it returns false
1938 // before RA because there is no RVV stack objects. After RA, it returns true
1939 // because there are spilling slots for RVV values during RA. It will not
1940 // reserve BP during register allocation and generate BP access in the PEI
1941 // pass due to the inconsistent behavior of the function.
1942 //
1943 // The function is changed to use hasVInstructions() as the return value. It
1944 // is not precise, but it can make the register allocation correct.
1945 //
1946 // FIXME: Find a better way to make the decision or revisit the solution in
1947 // D103622.
1948 //
1949 // Refer to https://github.com/llvm/llvm-project/issues/53016.
1950 return MF.getSubtarget<RISCVSubtarget>().hasVInstructions();
1951}
1952
1953static unsigned estimateFunctionSizeInBytes(const MachineFunction &MF,
1954 const RISCVInstrInfo &TII) {
1955 unsigned FnSize = 0;
1956 for (auto &MBB : MF) {
1957 for (auto &MI : MBB) {
1958 // Far branches over 20-bit offset will be relaxed in branch relaxation
1959 // pass. In the worst case, conditional branches will be relaxed into
1960 // the following instruction sequence. Unconditional branches are
1961 // relaxed in the same way, with the exception that there is no first
1962 // branch instruction.
1963 //
1964 // foo
1965 // bne t5, t6, .rev_cond # `TII->getInstSizeInBytes(MI)` bytes
1966 // sd s11, 0(sp) # 4 bytes, or 2 bytes with Zca
1967 // jump .restore, s11 # 8 bytes
1968 // .rev_cond
1969 // bar
1970 // j .dest_bb # 4 bytes, or 2 bytes with Zca
1971 // .restore:
1972 // ld s11, 0(sp) # 4 bytes, or 2 bytes with Zca
1973 // .dest:
1974 // baz
1975 if (MI.isConditionalBranch())
1976 FnSize += TII.getInstSizeInBytes(MI);
1977 if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) {
1978 if (MF.getSubtarget<RISCVSubtarget>().hasStdExtZca())
1979 FnSize += 2 + 8 + 2 + 2;
1980 else
1981 FnSize += 4 + 8 + 4 + 4;
1982 continue;
1983 }
1984
1985 FnSize += TII.getInstSizeInBytes(MI);
1986 }
1987 }
1988 return FnSize;
1989}
1990
1991void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
1992 MachineFunction &MF, RegScavenger *RS) const {
1993 const RISCVRegisterInfo *RegInfo =
1994 MF.getSubtarget<RISCVSubtarget>().getRegisterInfo();
1995 const RISCVInstrInfo *TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
1996 MachineFrameInfo &MFI = MF.getFrameInfo();
1997 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
1998 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
1999
2000 int64_t RVVStackSize;
2001 Align RVVStackAlign;
2002 std::tie(args&: RVVStackSize, args&: RVVStackAlign) = assignRVVStackObjectOffsets(MF);
2003
2004 RVFI->setRVVStackSize(RVVStackSize);
2005 RVFI->setRVVStackAlign(RVVStackAlign);
2006
2007 if (hasRVVFrameObject(MF)) {
2008 // Ensure the entire stack is aligned to at least the RVV requirement: some
2009 // scalable-vector object alignments are not considered by the
2010 // target-independent code.
2011 MFI.ensureMaxAlignment(Alignment: RVVStackAlign);
2012 }
2013
2014 unsigned ScavSlotsNum = 0;
2015
2016 // estimateStackSize has been observed to under-estimate the final stack
2017 // size, so give ourselves wiggle-room by checking for stack size
2018 // representable an 11-bit signed field rather than 12-bits.
2019 if (!isInt<11>(x: MFI.estimateStackSize(MF)))
2020 ScavSlotsNum = 1;
2021
2022 // Far branches over 20-bit offset require a spill slot for scratch register.
2023 bool IsLargeFunction = !isInt<20>(x: estimateFunctionSizeInBytes(MF, TII: *TII));
2024 if (IsLargeFunction)
2025 ScavSlotsNum = std::max(a: ScavSlotsNum, b: 1u);
2026
2027 // RVV loads & stores have no capacity to hold the immediate address offsets
2028 // so we must always reserve an emergency spill slot if the MachineFunction
2029 // contains any RVV spills.
2030 ScavSlotsNum = std::max(a: ScavSlotsNum, b: getScavSlotsNumForRVV(MF));
2031
2032 for (unsigned I = 0; I < ScavSlotsNum; I++) {
2033 int FI = MFI.CreateSpillStackObject(Size: RegInfo->getSpillSize(RC: *RC),
2034 Alignment: RegInfo->getSpillAlign(RC: *RC));
2035 RS->addScavengingFrameIndex(FI);
2036
2037 if (IsLargeFunction && RVFI->getBranchRelaxationScratchFrameIndex() == -1)
2038 RVFI->setBranchRelaxationScratchFrameIndex(FI);
2039 }
2040
2041 unsigned Size = RVFI->getReservedSpillsSize();
2042 for (const auto &Info : MFI.getCalleeSavedInfo()) {
2043 int FrameIdx = Info.getFrameIdx();
2044 if (FrameIdx < 0 || MFI.getStackID(ObjectIdx: FrameIdx) != TargetStackID::Default)
2045 continue;
2046
2047 Size += MFI.getObjectSize(ObjectIdx: FrameIdx);
2048 }
2049 RVFI->setCalleeSavedStackSize(Size);
2050}
2051
2052// Not preserve stack space within prologue for outgoing variables when the
2053// function contains variable size objects or there are vector objects accessed
2054// by the frame pointer.
2055// Let eliminateCallFramePseudoInstr preserve stack space for it.
2056bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
2057 return !MF.getFrameInfo().hasVarSizedObjects() &&
2058 !(hasFP(MF) && hasRVVFrameObject(MF));
2059}
2060
2061// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions.
2062MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
2063 MachineFunction &MF, MachineBasicBlock &MBB,
2064 MachineBasicBlock::iterator MI) const {
2065 DebugLoc DL = MI->getDebugLoc();
2066
2067 if (!hasReservedCallFrame(MF)) {
2068 // If space has not been reserved for a call frame, ADJCALLSTACKDOWN and
2069 // ADJCALLSTACKUP must be converted to instructions manipulating the stack
2070 // pointer. This is necessary when there is a variable length stack
2071 // allocation (e.g. alloca), which means it's not possible to allocate
2072 // space for outgoing arguments from within the function prologue.
2073 int64_t Amount = MI->getOperand(i: 0).getImm();
2074
2075 if (Amount != 0) {
2076 // Ensure the stack remains aligned after adjustment.
2077 Amount = alignSPAdjust(SPAdj: Amount);
2078
2079 if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
2080 Amount = -Amount;
2081
2082 const RISCVTargetLowering *TLI =
2083 MF.getSubtarget<RISCVSubtarget>().getTargetLowering();
2084 int64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign: getStackAlign());
2085 if (TLI->hasInlineStackProbe(MF) && -Amount >= ProbeSize) {
2086 // When stack probing is enabled, the decrement of SP may need to be
2087 // probed. We can handle both the decrement and the probing in
2088 // allocateStack.
2089 bool DynAllocation =
2090 MF.getInfo<RISCVMachineFunctionInfo>()->hasDynamicAllocation();
2091 allocateStack(MBB, MBBI: MI, MF, Offset: -Amount, RealStackSize: -Amount,
2092 EmitCFI: needsDwarfCFI(MF) && !hasFP(MF),
2093 /*NeedProbe=*/true, ProbeSize, DynAllocation,
2094 Flag: MachineInstr::NoFlags);
2095 inlineStackProbe(MF, PrologueMBB&: MBB);
2096 } else {
2097 const RISCVRegisterInfo &RI = *STI.getRegisterInfo();
2098 RI.adjustReg(MBB, II: MI, DL, DestReg: SPReg, SrcReg: SPReg, Offset: StackOffset::getFixed(Fixed: Amount),
2099 Flag: MachineInstr::NoFlags, RequiredAlign: getStackAlign());
2100 }
2101 }
2102 }
2103
2104 return MBB.erase(I: MI);
2105}
2106
2107// We would like to split the SP adjustment to reduce prologue/epilogue
2108// as following instructions. In this way, the offset of the callee saved
2109// register could fit in a single store. Supposed that the first sp adjust
2110// amount is 2032.
2111// add sp,sp,-2032
2112// sw ra,2028(sp)
2113// sw s0,2024(sp)
2114// sw s1,2020(sp)
2115// sw s3,2012(sp)
2116// sw s4,2008(sp)
2117// add sp,sp,-64
2118uint64_t
2119RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
2120 const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
2121 const MachineFrameInfo &MFI = MF.getFrameInfo();
2122 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2123 uint64_t StackSize = getStackSizeWithRVVPadding(MF);
2124
2125 // Disable SplitSPAdjust if save-restore libcall, push/pop or QCI interrupts
2126 // are used. The callee-saved registers will be pushed by the save-restore
2127 // libcalls, so we don't have to split the SP adjustment in this case.
2128 if (RVFI->getReservedSpillsSize())
2129 return 0;
2130
2131 // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
2132 // 12-bit and there exists a callee-saved register needing to be pushed.
2133 if (!isInt<12>(x: StackSize) && (CSI.size() > 0)) {
2134 // FirstSPAdjustAmount is chosen at most as (2048 - StackAlign) because
2135 // 2048 will cause sp = sp + 2048 in the epilogue to be split into multiple
2136 // instructions. Offsets smaller than 2048 can fit in a single load/store
2137 // instruction, and we have to stick with the stack alignment. 2048 has
2138 // 16-byte alignment. The stack alignment for RV32 and RV64 is 16 and for
2139 // RV32E it is 4. So (2048 - StackAlign) will satisfy the stack alignment.
2140 const uint64_t StackAlign = getStackAlign().value();
2141
2142 // Amount of (2048 - StackAlign) will prevent callee saved and restored
2143 // instructions be compressed, so try to adjust the amount to the largest
2144 // offset that stack compression instructions accept when target supports
2145 // compression instructions.
2146 if (STI.hasStdExtZca()) {
2147 // The compression extensions may support the following instructions:
2148 // riscv32: c.lwsp rd, offset[7:2] => 2^(6 + 2)
2149 // c.swsp rs2, offset[7:2] => 2^(6 + 2)
2150 // c.flwsp rd, offset[7:2] => 2^(6 + 2)
2151 // c.fswsp rs2, offset[7:2] => 2^(6 + 2)
2152 // riscv64: c.ldsp rd, offset[8:3] => 2^(6 + 3)
2153 // c.sdsp rs2, offset[8:3] => 2^(6 + 3)
2154 // c.fldsp rd, offset[8:3] => 2^(6 + 3)
2155 // c.fsdsp rs2, offset[8:3] => 2^(6 + 3)
2156 const uint64_t RVCompressLen = STI.getXLen() * 8;
2157 // Compared with amount (2048 - StackAlign), StackSize needs to
2158 // satisfy the following conditions to avoid using more instructions
2159 // to adjust the sp after adjusting the amount, such as
2160 // StackSize meets the condition (StackSize <= 2048 + RVCompressLen),
2161 // case1: Amount is 2048 - StackAlign: use addi + addi to adjust sp.
2162 // case2: Amount is RVCompressLen: use addi + addi to adjust sp.
2163 auto CanCompress = [&](uint64_t CompressLen) -> bool {
2164 if (StackSize <= 2047 + CompressLen ||
2165 (StackSize > 2048 * 2 - StackAlign &&
2166 StackSize <= 2047 * 2 + CompressLen) ||
2167 StackSize > 2048 * 3 - StackAlign)
2168 return true;
2169
2170 return false;
2171 };
2172 // In the epilogue, addi sp, sp, 496 is used to recover the sp and it
2173 // can be compressed(C.ADDI16SP, offset can be [-512, 496]), but
2174 // addi sp, sp, 512 can not be compressed. So try to use 496 first.
2175 const uint64_t ADDI16SPCompressLen = 496;
2176 if (STI.is64Bit() && CanCompress(ADDI16SPCompressLen))
2177 return ADDI16SPCompressLen;
2178 if (CanCompress(RVCompressLen))
2179 return RVCompressLen;
2180 }
2181 return 2048 - StackAlign;
2182 }
2183 return 0;
2184}
2185
2186bool RISCVFrameLowering::assignCalleeSavedSpillSlots(
2187 MachineFunction &MF, const TargetRegisterInfo *TRI,
2188 std::vector<CalleeSavedInfo> &CSI) const {
2189 auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
2190 MachineFrameInfo &MFI = MF.getFrameInfo();
2191 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
2192
2193 // Preemptible Interrupts have two additional Callee-save Frame Indexes,
2194 // not tracked by `CSI`.
2195 if (RVFI->isSiFivePreemptibleInterrupt(MF)) {
2196 for (int I = 0; I < 2; ++I) {
2197 int FI = RVFI->getInterruptCSRFrameIndex(Idx: I);
2198 MFI.setIsCalleeSavedObjectIndex(ObjectIdx: FI, IsCalleeSaved: true);
2199 }
2200 }
2201
2202 // Early exit if no callee saved registers are modified!
2203 if (CSI.empty())
2204 return true;
2205
2206 if (RVFI->useQCIInterrupt(MF)) {
2207 RVFI->setQCIInterruptStackSize(QCIInterruptPushAmount);
2208 }
2209
2210 if (RVFI->isPushable(MF)) {
2211 // Determine how many GPRs we need to push and save it to RVFI.
2212 unsigned PushedRegNum = getNumPushPopRegs(CSI);
2213
2214 // `QC.C.MIENTER(.NEST)` will save `ra` and `s0`, so we should only push if
2215 // we want to push more than 2 registers. Otherwise, we should push if we
2216 // want to push more than 0 registers.
2217 unsigned OnlyPushIfMoreThan = RVFI->useQCIInterrupt(MF) ? 2 : 0;
2218 if (PushedRegNum > OnlyPushIfMoreThan) {
2219 RVFI->setRVPushRegs(PushedRegNum);
2220 RVFI->setRVPushStackSize(alignTo(Value: (STI.getXLen() / 8) * PushedRegNum, Align: 16));
2221 }
2222 }
2223
2224 for (auto &CS : CSI) {
2225 MCRegister Reg = CS.getReg();
2226 const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
2227 unsigned Size = RegInfo->getSpillSize(RC: *RC);
2228
2229 if (RVFI->useQCIInterrupt(MF)) {
2230 const auto *FFI = llvm::find_if(Range: FixedCSRFIQCIInterruptMap, P: [&](auto P) {
2231 return P.first == CS.getReg();
2232 });
2233 if (FFI != std::end(arr: FixedCSRFIQCIInterruptMap)) {
2234 int64_t Offset = FFI->second * (int64_t)Size;
2235
2236 int FrameIdx = MFI.CreateFixedSpillStackObject(Size, SPOffset: Offset);
2237 assert(FrameIdx < 0);
2238 CS.setFrameIdx(FrameIdx);
2239 continue;
2240 }
2241 }
2242
2243 if (RVFI->useSaveRestoreLibCalls(MF) || RVFI->isPushable(MF)) {
2244 const auto *FII = llvm::find_if(
2245 Range: FixedCSRFIMap, P: [&](MCPhysReg P) { return P == CS.getReg(); });
2246 unsigned RegNum = std::distance(first: std::begin(arr: FixedCSRFIMap), last: FII);
2247
2248 if (FII != std::end(arr: FixedCSRFIMap)) {
2249 int64_t Offset;
2250 if (RVFI->getPushPopKind(MF) ==
2251 RISCVMachineFunctionInfo::PushPopKind::StdExtZcmp)
2252 Offset = -int64_t(RVFI->getRVPushRegs() - RegNum) * Size;
2253 else
2254 Offset = -int64_t(RegNum + 1) * Size;
2255
2256 if (RVFI->useQCIInterrupt(MF))
2257 Offset -= QCIInterruptPushAmount;
2258
2259 int FrameIdx = MFI.CreateFixedSpillStackObject(Size, SPOffset: Offset);
2260 assert(FrameIdx < 0);
2261 CS.setFrameIdx(FrameIdx);
2262 continue;
2263 }
2264 }
2265
2266 // For GPRPair registers, use 8-byte slots with required alignment by zilsd.
2267 if (!STI.is64Bit() && STI.hasStdExtZilsd() &&
2268 RISCV::GPRPairRegClass.contains(Reg)) {
2269 Align PairAlign = STI.getZilsdAlign();
2270 int FrameIdx = MFI.CreateStackObject(Size: 8, Alignment: PairAlign, isSpillSlot: true);
2271 MFI.setIsCalleeSavedObjectIndex(ObjectIdx: FrameIdx, IsCalleeSaved: true);
2272 CS.setFrameIdx(FrameIdx);
2273 continue;
2274 }
2275
2276 // Not a fixed slot.
2277 Align Alignment = RegInfo->getSpillAlign(RC: *RC);
2278 // We may not be able to satisfy the desired alignment specification of
2279 // the TargetRegisterClass if the stack alignment is smaller. Use the
2280 // min.
2281 Alignment = std::min(a: Alignment, b: getStackAlign());
2282 int FrameIdx = MFI.CreateStackObject(Size, Alignment, isSpillSlot: true);
2283 MFI.setIsCalleeSavedObjectIndex(ObjectIdx: FrameIdx, IsCalleeSaved: true);
2284 CS.setFrameIdx(FrameIdx);
2285 if (RISCVRegisterInfo::isRVVRegClass(RC))
2286 MFI.setStackID(ObjectIdx: FrameIdx, ID: TargetStackID::ScalableVector);
2287 }
2288
2289 if (RVFI->useQCIInterrupt(MF)) {
2290 // Allocate a fixed object that covers the entire QCI stack allocation,
2291 // because there are gaps which are reserved for future use.
2292 MFI.CreateFixedSpillStackObject(
2293 Size: QCIInterruptPushAmount, SPOffset: -static_cast<int64_t>(QCIInterruptPushAmount));
2294 }
2295
2296 if (RVFI->isPushable(MF)) {
2297 int64_t QCIOffset = RVFI->useQCIInterrupt(MF) ? QCIInterruptPushAmount : 0;
2298 // Allocate a fixed object that covers the full push.
2299 if (int64_t PushSize = RVFI->getRVPushStackSize())
2300 MFI.CreateFixedSpillStackObject(Size: PushSize, SPOffset: -PushSize - QCIOffset);
2301 } else if (int LibCallRegs = getLibCallID(MF, CSI) + 1) {
2302 int64_t LibCallFrameSize =
2303 alignTo(Size: (STI.getXLen() / 8) * LibCallRegs, A: getStackAlign());
2304 MFI.CreateFixedSpillStackObject(Size: LibCallFrameSize, SPOffset: -LibCallFrameSize);
2305 }
2306
2307 return true;
2308}
2309
2310bool RISCVFrameLowering::spillCalleeSavedRegisters(
2311 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2312 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2313 if (CSI.empty())
2314 return true;
2315
2316 MachineFunction *MF = MBB.getParent();
2317 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
2318 DebugLoc DL;
2319 if (MI != MBB.end() && !MI->isDebugInstr())
2320 DL = MI->getDebugLoc();
2321
2322 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2323 if (RVFI->useQCIInterrupt(MF: *MF)) {
2324 // Emit QC.C.MIENTER(.NEST)
2325 BuildMI(
2326 BB&: MBB, I: MI, MIMD: DL,
2327 MCID: TII.get(Opcode: RVFI->getInterruptStackKind(MF: *MF) ==
2328 RISCVMachineFunctionInfo::InterruptStackKind::QCINest
2329 ? RISCV::QC_C_MIENTER_NEST
2330 : RISCV::QC_C_MIENTER))
2331 .setMIFlag(MachineInstr::FrameSetup);
2332
2333 for (auto [Reg, _Offset] : FixedCSRFIQCIInterruptMap)
2334 MBB.addLiveIn(PhysReg: Reg);
2335 }
2336
2337 if (RVFI->isPushable(MF: *MF)) {
2338 // Emit CM.PUSH with base StackAdj & evaluate Push stack
2339 unsigned PushedRegNum = RVFI->getRVPushRegs();
2340 if (PushedRegNum > 0) {
2341 // Use encoded number to represent registers to spill.
2342 unsigned Opcode = getPushOpcode(
2343 Kind: RVFI->getPushPopKind(MF: *MF), UpdateFP: hasFP(MF: *MF) && !RVFI->useQCIInterrupt(MF: *MF));
2344 unsigned RegEnc = RISCVZC::encodeRegListNumRegs(NumRegs: PushedRegNum);
2345 MachineInstrBuilder PushBuilder =
2346 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode))
2347 .setMIFlag(MachineInstr::FrameSetup);
2348 PushBuilder.addImm(Val: RegEnc);
2349 PushBuilder.addImm(Val: 0);
2350
2351 for (unsigned i = 0; i < PushedRegNum; i++)
2352 PushBuilder.addUse(RegNo: FixedCSRFIMap[i], Flags: RegState::Implicit);
2353 }
2354 } else if (const char *SpillLibCall = getSpillLibCallName(MF: *MF, CSI)) {
2355 // Add spill libcall via non-callee-saved register t0.
2356 MachineInstrBuilder NewMI =
2357 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::PseudoCALLReg), DestReg: RISCV::X5)
2358 .addExternalSymbol(FnName: SpillLibCall, TargetFlags: RISCVII::MO_CALL)
2359 .setMIFlag(MachineInstr::FrameSetup)
2360 .addUse(RegNo: RISCV::X2, Flags: RegState::Implicit)
2361 .addDef(RegNo: RISCV::X2, Flags: RegState::ImplicitDefine);
2362
2363 // Add registers spilled as implicit used.
2364 for (auto &CS : CSI)
2365 NewMI.addUse(RegNo: CS.getReg(), Flags: RegState::Implicit);
2366 }
2367
2368 // Manually spill values not spilled by libcall & Push/Pop.
2369 const auto &UnmanagedCSI =
2370 getUnmanagedCSI(MF: *MF, CSI, ReverseOrder: STI.preferAscendingLoadStore());
2371 const auto &RVVCSI = getRVVCalleeSavedInfo(MF: *MF, CSI);
2372
2373 auto storeRegsToStackSlots = [&](decltype(UnmanagedCSI) CSInfo) {
2374 for (auto &CS : CSInfo) {
2375 // Insert the spill to the stack frame.
2376 MCRegister Reg = CS.getReg();
2377 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2378 TII.storeRegToStackSlot(MBB, MI, SrcReg: Reg, isKill: !MBB.isLiveIn(Reg),
2379 FrameIndex: CS.getFrameIdx(), RC, VReg: Register(),
2380 Flags: MachineInstr::FrameSetup);
2381 }
2382 };
2383 storeRegsToStackSlots(UnmanagedCSI);
2384 storeRegsToStackSlots(RVVCSI);
2385
2386 return true;
2387}
2388
2389static unsigned getCalleeSavedRVVNumRegs(const Register &BaseReg) {
2390 return RISCV::VRRegClass.contains(Reg: BaseReg) ? 1
2391 : RISCV::VRM2RegClass.contains(Reg: BaseReg) ? 2
2392 : RISCV::VRM4RegClass.contains(Reg: BaseReg) ? 4
2393 : 8;
2394}
2395
2396void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
2397 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const {
2398 MachineFunction *MF = MBB.getParent();
2399 const MachineFrameInfo &MFI = MF->getFrameInfo();
2400 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2401 const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
2402
2403 const auto &RVVCSI = getRVVCalleeSavedInfo(MF: *MF, CSI: MFI.getCalleeSavedInfo());
2404 if (RVVCSI.empty())
2405 return;
2406
2407 uint64_t FixedSize = getStackSizeWithRVVPadding(MF: *MF);
2408 if (!HasFP) {
2409 uint64_t ScalarLocalVarSize =
2410 MFI.getStackSize() - RVFI->getCalleeSavedStackSize() -
2411 RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding();
2412 FixedSize -= ScalarLocalVarSize;
2413 }
2414
2415 CFIInstBuilder CFIBuilder(MBB, MI, MachineInstr::FrameSetup);
2416 for (auto &CS : RVVCSI) {
2417 // Insert the spill to the stack frame.
2418 int FI = CS.getFrameIdx();
2419 MCRegister BaseReg = getRVVBaseRegister(TRI, Reg: CS.getReg());
2420 unsigned NumRegs = getCalleeSavedRVVNumRegs(BaseReg: CS.getReg());
2421 for (unsigned i = 0; i < NumRegs; ++i) {
2422 CFIBuilder.insertCFIInst(CFIInst: createDefCFAOffset(
2423 TRI, Reg: BaseReg + i,
2424 Offset: StackOffset::get(Fixed: -FixedSize, Scalable: MFI.getObjectOffset(ObjectIdx: FI) / 8 + i)));
2425 }
2426 }
2427}
2428
2429void RISCVFrameLowering::emitCalleeSavedRVVEpilogCFI(
2430 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
2431 MachineFunction *MF = MBB.getParent();
2432 const MachineFrameInfo &MFI = MF->getFrameInfo();
2433 const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
2434
2435 CFIInstBuilder CFIHelper(MBB, MI, MachineInstr::FrameDestroy);
2436 const auto &RVVCSI = getRVVCalleeSavedInfo(MF: *MF, CSI: MFI.getCalleeSavedInfo());
2437 for (auto &CS : RVVCSI) {
2438 MCRegister BaseReg = getRVVBaseRegister(TRI, Reg: CS.getReg());
2439 unsigned NumRegs = getCalleeSavedRVVNumRegs(BaseReg: CS.getReg());
2440 for (unsigned i = 0; i < NumRegs; ++i)
2441 CFIHelper.buildRestore(Reg: BaseReg + i);
2442 }
2443}
2444
2445bool RISCVFrameLowering::restoreCalleeSavedRegisters(
2446 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2447 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2448 if (CSI.empty())
2449 return true;
2450
2451 MachineFunction *MF = MBB.getParent();
2452 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
2453 DebugLoc DL;
2454 if (MI != MBB.end() && !MI->isDebugInstr())
2455 DL = MI->getDebugLoc();
2456
2457 // Manually restore values not restored by libcall & Push/Pop.
2458 // Reverse the restore order in epilog. In addition, the return
2459 // address will be restored first in the epilogue. It increases
2460 // the opportunity to avoid the load-to-use data hazard between
2461 // loading RA and return by RA. loadRegFromStackSlot can insert
2462 // multiple instructions.
2463 const auto &UnmanagedCSI =
2464 getUnmanagedCSI(MF: *MF, CSI, ReverseOrder: STI.preferAscendingLoadStore());
2465 const auto &RVVCSI = getRVVCalleeSavedInfo(MF: *MF, CSI);
2466
2467 auto loadRegFromStackSlot = [&](decltype(UnmanagedCSI) CSInfo) {
2468 for (auto &CS : CSInfo) {
2469 MCRegister Reg = CS.getReg();
2470 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2471 TII.loadRegFromStackSlot(MBB, MI, DestReg: Reg, FrameIndex: CS.getFrameIdx(), RC, VReg: Register(),
2472 SubReg: RISCV::NoSubRegister,
2473 Flags: MachineInstr::FrameDestroy);
2474 assert(MI != MBB.begin() &&
2475 "loadRegFromStackSlot didn't insert any code!");
2476 }
2477 };
2478 loadRegFromStackSlot(RVVCSI);
2479 loadRegFromStackSlot(UnmanagedCSI);
2480
2481 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2482 if (RVFI->useQCIInterrupt(MF: *MF)) {
2483 // Don't emit anything here because restoration is handled by
2484 // QC.C.MILEAVERET which we already inserted to return.
2485 assert(MI->getOpcode() == RISCV::QC_C_MILEAVERET &&
2486 "Unexpected QCI Interrupt Return Instruction");
2487 }
2488
2489 if (RVFI->isPushable(MF: *MF)) {
2490 unsigned PushedRegNum = RVFI->getRVPushRegs();
2491 if (PushedRegNum > 0) {
2492 unsigned Opcode = getPopOpcode(Kind: RVFI->getPushPopKind(MF: *MF));
2493 unsigned RegEnc = RISCVZC::encodeRegListNumRegs(NumRegs: PushedRegNum);
2494 MachineInstrBuilder PopBuilder =
2495 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode))
2496 .setMIFlag(MachineInstr::FrameDestroy);
2497 // Use encoded number to represent registers to restore.
2498 PopBuilder.addImm(Val: RegEnc);
2499 PopBuilder.addImm(Val: 0);
2500
2501 for (unsigned i = 0; i < RVFI->getRVPushRegs(); i++)
2502 PopBuilder.addDef(RegNo: FixedCSRFIMap[i], Flags: RegState::ImplicitDefine);
2503 }
2504 } else if (const char *RestoreLibCall = getRestoreLibCallName(MF: *MF, CSI)) {
2505 // Add restore libcall via tail call.
2506 MachineInstrBuilder NewMI =
2507 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII.get(Opcode: RISCV::PseudoTAIL))
2508 .addExternalSymbol(FnName: RestoreLibCall, TargetFlags: RISCVII::MO_CALL)
2509 .setMIFlag(MachineInstr::FrameDestroy)
2510 .addDef(RegNo: RISCV::X2, Flags: RegState::ImplicitDefine);
2511
2512 // Add registers restored as implicit defined.
2513 for (auto &CS : CSI)
2514 NewMI.addDef(RegNo: CS.getReg(), Flags: RegState::ImplicitDefine);
2515
2516 // Remove trailing returns, since the terminator is now a tail call to the
2517 // restore function.
2518 if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) {
2519 NewMI.getInstr()->copyImplicitOps(MF&: *MF, MI: *MI);
2520 MI->eraseFromParent();
2521 }
2522 }
2523 return true;
2524}
2525
2526bool RISCVFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2527 // Keep the conventional code flow when not optimizing.
2528 if (MF.getFunction().hasOptNone())
2529 return false;
2530
2531 return true;
2532}
2533
2534bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
2535 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
2536 const MachineFunction *MF = MBB.getParent();
2537 const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2538
2539 // Make sure VTYPE and VL are not live-in since we will use vsetvli in the
2540 // prologue to get the VLEN, and that will clobber these registers.
2541 //
2542 // We may do also check the stack contains objects with scalable vector type,
2543 // but this will require iterating over all the stack objects, but this may
2544 // not worth since the situation is rare, we could do further check in future
2545 // if we find it is necessary.
2546 if (STI.preferVsetvliOverReadVLENB() &&
2547 (MBB.isLiveIn(Reg: RISCV::VTYPE) || MBB.isLiveIn(Reg: RISCV::VL)))
2548 return false;
2549
2550 if (!RVFI->useSaveRestoreLibCalls(MF: *MF))
2551 return true;
2552
2553 // Inserting a call to a __riscv_save libcall requires the use of the register
2554 // t0 (X5) to hold the return address. Therefore if this register is already
2555 // used we can't insert the call.
2556
2557 RegScavenger RS;
2558 RS.enterBasicBlock(MBB&: *TmpMBB);
2559 return !RS.isRegUsed(Reg: RISCV::X5);
2560}
2561
2562bool RISCVFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
2563 const MachineFunction *MF = MBB.getParent();
2564 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
2565 const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
2566
2567 // We do not want QC.C.MILEAVERET to be subject to shrink-wrapping - it must
2568 // come in the final block of its function as it both pops and returns.
2569 if (RVFI->useQCIInterrupt(MF: *MF))
2570 return MBB.succ_empty();
2571
2572 if (!RVFI->useSaveRestoreLibCalls(MF: *MF))
2573 return true;
2574
2575 // Using the __riscv_restore libcalls to restore CSRs requires a tail call.
2576 // This means if we still need to continue executing code within this function
2577 // the restore cannot take place in this basic block.
2578
2579 if (MBB.succ_size() > 1)
2580 return false;
2581
2582 MachineBasicBlock *SuccMBB =
2583 MBB.succ_empty() ? TmpMBB->getFallThrough() : *MBB.succ_begin();
2584
2585 // Doing a tail call should be safe if there are no successors, because either
2586 // we have a returning block or the end of the block is unreachable, so the
2587 // restore will be eliminated regardless.
2588 if (!SuccMBB)
2589 return true;
2590
2591 // The successor can only contain a return, since we would effectively be
2592 // replacing the successor with our own tail return at the end of our block.
2593 return SuccMBB->isReturnBlock() && SuccMBB->size() == 1;
2594}
2595
2596bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
2597 switch (ID) {
2598 case TargetStackID::Default:
2599 case TargetStackID::ScalableVector:
2600 return true;
2601 case TargetStackID::NoAlloc:
2602 case TargetStackID::SGPRSpill:
2603 case TargetStackID::WasmLocal:
2604 case TargetStackID::ScalablePredicateVector:
2605 return false;
2606 }
2607 llvm_unreachable("Invalid TargetStackID::Value");
2608}
2609
2610TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
2611 return TargetStackID::ScalableVector;
2612}
2613
2614// Synthesize the probe loop.
2615static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL,
2616 Register TargetReg, bool IsRVV) {
2617 assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP");
2618
2619 MachineBasicBlock &MBB = *MBBI->getParent();
2620 MachineFunction &MF = *MBB.getParent();
2621
2622 auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
2623 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
2624 bool IsRV64 = Subtarget.is64Bit();
2625 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
2626 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
2627 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
2628
2629 MachineFunction::iterator MBBInsertPoint = std::next(x: MBB.getIterator());
2630 MachineBasicBlock *LoopTestMBB =
2631 MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
2632 MF.insert(MBBI: MBBInsertPoint, MBB: LoopTestMBB);
2633 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(BB: MBB.getBasicBlock());
2634 MF.insert(MBBI: MBBInsertPoint, MBB: ExitMBB);
2635 MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2636 Register ScratchReg = RISCV::X7;
2637
2638 // ScratchReg = ProbeSize
2639 TII->movImm(MBB, MBBI, DL, DstReg: ScratchReg, Val: ProbeSize, Flag: Flags);
2640
2641 // LoopTest:
2642 // SUB SP, SP, ProbeSize
2643 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: RISCV::SUB), DestReg: SPReg)
2644 .addReg(RegNo: SPReg)
2645 .addReg(RegNo: ScratchReg)
2646 .setMIFlags(Flags);
2647
2648 // s[d|w] zero, 0(sp)
2649 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL,
2650 MCID: TII->get(Opcode: IsRV64 ? RISCV::SD : RISCV::SW))
2651 .addReg(RegNo: RISCV::X0)
2652 .addReg(RegNo: SPReg)
2653 .addImm(Val: 0)
2654 .setMIFlags(Flags);
2655
2656 if (IsRVV) {
2657 // SUB TargetReg, TargetReg, ProbeSize
2658 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: RISCV::SUB),
2659 DestReg: TargetReg)
2660 .addReg(RegNo: TargetReg)
2661 .addReg(RegNo: ScratchReg)
2662 .setMIFlags(Flags);
2663
2664 // BGE TargetReg, ProbeSize, LoopTest
2665 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: RISCV::BGE))
2666 .addReg(RegNo: TargetReg)
2667 .addReg(RegNo: ScratchReg)
2668 .addMBB(MBB: LoopTestMBB)
2669 .setMIFlags(Flags);
2670
2671 } else {
2672 // BNE SP, TargetReg, LoopTest
2673 BuildMI(BB&: *LoopTestMBB, I: LoopTestMBB->end(), MIMD: DL, MCID: TII->get(Opcode: RISCV::BNE))
2674 .addReg(RegNo: SPReg)
2675 .addReg(RegNo: TargetReg)
2676 .addMBB(MBB: LoopTestMBB)
2677 .setMIFlags(Flags);
2678 }
2679
2680 ExitMBB->splice(Where: ExitMBB->end(), Other: &MBB, From: std::next(x: MBBI), To: MBB.end());
2681 ExitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: &MBB);
2682
2683 LoopTestMBB->addSuccessor(Succ: ExitMBB);
2684 LoopTestMBB->addSuccessor(Succ: LoopTestMBB);
2685 MBB.addSuccessor(Succ: LoopTestMBB);
2686 // Update liveins.
2687 fullyRecomputeLiveIns(MBBs: {ExitMBB, LoopTestMBB});
2688}
2689
2690void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
2691 MachineBasicBlock &MBB) const {
2692 // Get the instructions that need to be replaced. We emit at most two of
2693 // these. Remember them in order to avoid complications coming from the need
2694 // to traverse the block while potentially creating more blocks.
2695 SmallVector<MachineInstr *, 4> ToReplace;
2696 for (MachineInstr &MI : MBB) {
2697 unsigned Opc = MI.getOpcode();
2698 if (Opc == RISCV::PROBED_STACKALLOC ||
2699 Opc == RISCV::PROBED_STACKALLOC_RVV) {
2700 ToReplace.push_back(Elt: &MI);
2701 }
2702 }
2703
2704 for (MachineInstr *MI : ToReplace) {
2705 if (MI->getOpcode() == RISCV::PROBED_STACKALLOC ||
2706 MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV) {
2707 MachineBasicBlock::iterator MBBI = MI->getIterator();
2708 DebugLoc DL = MBB.findDebugLoc(MBBI);
2709 Register TargetReg = MI->getOperand(i: 0).getReg();
2710 emitStackProbeInline(MBBI, DL, TargetReg,
2711 IsRVV: (MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV));
2712 MBBI->eraseFromParent();
2713 }
2714 }
2715}
2716
2717int RISCVFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
2718 return 0;
2719}
2720
2721Register
2722RISCVFrameLowering::getInitialCFARegister(const MachineFunction &MF) const {
2723 return RISCV::X2;
2724}
2725