1//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetRegisterInfo
10// class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64RegisterInfo.h"
15#include "AArch64FrameLowering.h"
16#include "AArch64InstrInfo.h"
17#include "AArch64MachineFunctionInfo.h"
18#include "AArch64SMEAttributes.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "MCTargetDesc/AArch64InstPrinter.h"
22#include "llvm/ADT/BitVector.h"
23#include "llvm/BinaryFormat/Dwarf.h"
24#include "llvm/CodeGen/LiveRegMatrix.h"
25#include "llvm/CodeGen/MachineFrameInfo.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/RegisterScavenging.h"
29#include "llvm/CodeGen/TargetFrameLowering.h"
30#include "llvm/IR/DebugInfoMetadata.h"
31#include "llvm/IR/DiagnosticInfo.h"
32#include "llvm/IR/Function.h"
33#include "llvm/Target/TargetOptions.h"
34#include "llvm/TargetParser/Triple.h"
35
36using namespace llvm;
37
38#define GET_CC_REGISTER_LISTS
39#include "AArch64GenCallingConv.inc"
40#define GET_REGINFO_TARGET_DESC
41#include "AArch64GenRegisterInfo.inc"
42
43AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT, unsigned HwMode)
44 : AArch64GenRegisterInfo(AArch64::LR, 0, 0, 0, HwMode), TT(TT) {
45 AArch64_MC::initLLVMToCVRegMapping(MRI: this);
46}
47
48/// Return whether the register needs a CFI entry. Not all unwinders may know
49/// about SVE registers, so we assume the lowest common denominator, i.e. the
50/// callee-saves required by the base ABI. For the SVE registers z8-z15 only the
51/// lower 64-bits (d8-d15) need to be saved. The lower 64-bits subreg is
52/// returned in \p RegToUseForCFI.
53bool AArch64RegisterInfo::regNeedsCFI(MCRegister Reg,
54 MCRegister &RegToUseForCFI) const {
55 if (AArch64::PPRRegClass.contains(Reg))
56 return false;
57
58 if (AArch64::ZPRRegClass.contains(Reg)) {
59 RegToUseForCFI = getSubReg(Reg, Idx: AArch64::dsub);
60 for (int I = 0; CSR_AArch64_AAPCS_SaveList[I]; ++I) {
61 if (CSR_AArch64_AAPCS_SaveList[I] == RegToUseForCFI)
62 return true;
63 }
64 return false;
65 }
66
67 RegToUseForCFI = Reg;
68 return true;
69}
70
71const MCPhysReg *
72AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
73 assert(MF && "Invalid MachineFunction pointer.");
74
75 auto &AFI = *MF->getInfo<AArch64FunctionInfo>();
76 const auto &F = MF->getFunction();
77 const auto *TLI = MF->getSubtarget<AArch64Subtarget>().getTargetLowering();
78 const bool Darwin = MF->getSubtarget<AArch64Subtarget>().isTargetDarwin();
79 const bool Windows = MF->getSubtarget<AArch64Subtarget>().isTargetWindows();
80
81 if (TLI->supportSwiftError() &&
82 F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError)) {
83 if (Darwin)
84 return CSR_Darwin_AArch64_AAPCS_SwiftError_SaveList;
85 if (Windows)
86 return CSR_Win_AArch64_AAPCS_SwiftError_SaveList;
87 return CSR_AArch64_AAPCS_SwiftError_SaveList;
88 }
89
90 switch (F.getCallingConv()) {
91 case CallingConv::GHC:
92 // GHC set of callee saved regs is empty as all those regs are
93 // used for passing STG regs around
94 return CSR_AArch64_NoRegs_SaveList;
95
96 case CallingConv::PreserveNone:
97 // FIXME: Windows likely need this to be altered for properly unwinding.
98 return CSR_AArch64_NoneRegs_SaveList;
99
100 case CallingConv::AnyReg:
101 return CSR_AArch64_AllRegs_SaveList;
102
103 case CallingConv::ARM64EC_Thunk_X64:
104 return CSR_Win_AArch64_Arm64EC_Thunk_SaveList;
105
106 case CallingConv::PreserveMost:
107 if (Darwin)
108 return CSR_Darwin_AArch64_RT_MostRegs_SaveList;
109 if (Windows)
110 return CSR_Win_AArch64_RT_MostRegs_SaveList;
111 return CSR_AArch64_RT_MostRegs_SaveList;
112
113 case CallingConv::PreserveAll:
114 if (Darwin)
115 return CSR_Darwin_AArch64_RT_AllRegs_SaveList;
116 if (Windows)
117 return CSR_Win_AArch64_RT_AllRegs_SaveList;
118 return CSR_AArch64_RT_AllRegs_SaveList;
119
120 case CallingConv::CFGuard_Check:
121 if (Darwin)
122 report_fatal_error(
123 reason: "Calling convention CFGuard_Check is unsupported on Darwin.");
124 return CSR_Win_AArch64_CFGuard_Check_SaveList;
125
126 case CallingConv::SwiftTail:
127 if (Darwin)
128 return CSR_Darwin_AArch64_AAPCS_SwiftTail_SaveList;
129 if (Windows)
130 return CSR_Win_AArch64_AAPCS_SwiftTail_SaveList;
131 return CSR_AArch64_AAPCS_SwiftTail_SaveList;
132
133 case CallingConv::AArch64_VectorCall:
134 if (Darwin)
135 return CSR_Darwin_AArch64_AAVPCS_SaveList;
136 if (Windows)
137 return CSR_Win_AArch64_AAVPCS_SaveList;
138 return CSR_AArch64_AAVPCS_SaveList;
139
140 case CallingConv::AArch64_SVE_VectorCall:
141 if (Darwin)
142 report_fatal_error(
143 reason: "Calling convention SVE_VectorCall is unsupported on Darwin.");
144 if (Windows)
145 return CSR_Win_AArch64_SVE_AAPCS_SaveList;
146 return CSR_AArch64_SVE_AAPCS_SaveList;
147
148 case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0:
149 report_fatal_error(
150 reason: "Calling convention "
151 "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is only "
152 "supported to improve calls to SME ACLE save/restore/disable-za "
153 "functions, and is not intended to be used beyond that scope.");
154
155 case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1:
156 report_fatal_error(
157 reason: "Calling convention "
158 "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1 is "
159 "only supported to improve calls to SME ACLE __arm_get_current_vg "
160 "function, and is not intended to be used beyond that scope.");
161
162 case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2:
163 report_fatal_error(
164 reason: "Calling convention "
165 "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is "
166 "only supported to improve calls to SME ACLE __arm_sme_state "
167 "and is not intended to be used beyond that scope.");
168
169 case CallingConv::Win64:
170 if (Darwin)
171 return CSR_Darwin_AArch64_AAPCS_Win64_SaveList;
172 if (Windows)
173 return CSR_Win_AArch64_AAPCS_SaveList;
174 return CSR_AArch64_AAPCS_X18_SaveList;
175
176 case CallingConv::CXX_FAST_TLS:
177 if (Darwin)
178 return AFI.isSplitCSR() ? CSR_Darwin_AArch64_CXX_TLS_PE_SaveList
179 : CSR_Darwin_AArch64_CXX_TLS_SaveList;
180 // FIXME: this likely should be a `report_fatal_error` condition, however,
181 // that would be a departure from the previously implemented behaviour.
182 LLVM_FALLTHROUGH;
183
184 default:
185 if (Darwin)
186 return AFI.hasSVE_AAPCS(MF: *MF) ? CSR_Darwin_AArch64_SVE_AAPCS_SaveList
187 : CSR_Darwin_AArch64_AAPCS_SaveList;
188 if (Windows)
189 return AFI.hasSVE_AAPCS(MF: *MF) ? CSR_Win_AArch64_SVE_AAPCS_SaveList
190 : CSR_Win_AArch64_AAPCS_SaveList;
191 return AFI.hasSVE_AAPCS(MF: *MF) ? CSR_AArch64_SVE_AAPCS_SaveList
192 : CSR_AArch64_AAPCS_SaveList;
193 }
194}
195
196const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
197 const MachineFunction *MF) const {
198 assert(MF && "Invalid MachineFunction pointer.");
199 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
200 MF->getInfo<AArch64FunctionInfo>()->isSplitCSR())
201 return CSR_Darwin_AArch64_CXX_TLS_ViaCopy_SaveList;
202 return nullptr;
203}
204
205void AArch64RegisterInfo::UpdateCustomCalleeSavedRegs(
206 MachineFunction &MF) const {
207 const MCPhysReg *CSRs = getCalleeSavedRegs(MF: &MF);
208 SmallVector<MCPhysReg, 32> UpdatedCSRs;
209 for (const MCPhysReg *I = CSRs; *I; ++I)
210 UpdatedCSRs.push_back(Elt: *I);
211
212 for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
213 if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
214 UpdatedCSRs.push_back(Elt: AArch64::GPR64commonRegClass.getRegister(i));
215 }
216 }
217 // Register lists are zero-terminated.
218 UpdatedCSRs.push_back(Elt: 0);
219 MF.getRegInfo().setCalleeSavedRegs(UpdatedCSRs);
220}
221
222const TargetRegisterClass *
223AArch64RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
224 unsigned Idx) const {
225 // edge case for GPR/FPR register classes
226 if (RC == &AArch64::GPR32allRegClass && Idx == AArch64::hsub)
227 return &AArch64::FPR32RegClass;
228 else if (RC == &AArch64::GPR64allRegClass && Idx == AArch64::hsub)
229 return &AArch64::FPR64RegClass;
230
231 // Forward to TableGen's default version.
232 return AArch64GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
233}
234
235const uint32_t *
236AArch64RegisterInfo::getDarwinCallPreservedMask(const MachineFunction &MF,
237 CallingConv::ID CC) const {
238 assert(MF.getSubtarget<AArch64Subtarget>().isTargetDarwin() &&
239 "Invalid subtarget for getDarwinCallPreservedMask");
240
241 if (CC == CallingConv::CXX_FAST_TLS)
242 return CSR_Darwin_AArch64_CXX_TLS_RegMask;
243 if (CC == CallingConv::AArch64_VectorCall)
244 return CSR_Darwin_AArch64_AAVPCS_RegMask;
245 if (CC == CallingConv::AArch64_SVE_VectorCall)
246 return CSR_Darwin_AArch64_SVE_AAPCS_RegMask;
247 if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
248 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask;
249 if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
250 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1_RegMask;
251 if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
252 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2_RegMask;
253 if (CC == CallingConv::CFGuard_Check)
254 report_fatal_error(
255 reason: "Calling convention CFGuard_Check is unsupported on Darwin.");
256 if (MF.getSubtarget<AArch64Subtarget>()
257 .getTargetLowering()
258 ->supportSwiftError() &&
259 MF.getFunction().getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError))
260 return CSR_Darwin_AArch64_AAPCS_SwiftError_RegMask;
261 if (CC == CallingConv::SwiftTail)
262 return CSR_Darwin_AArch64_AAPCS_SwiftTail_RegMask;
263 if (CC == CallingConv::PreserveMost)
264 return CSR_Darwin_AArch64_RT_MostRegs_RegMask;
265 if (CC == CallingConv::PreserveAll)
266 return CSR_Darwin_AArch64_RT_AllRegs_RegMask;
267 return CSR_Darwin_AArch64_AAPCS_RegMask;
268}
269
270const uint32_t *
271AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
272 CallingConv::ID CC) const {
273 bool SCS = MF.getFunction().hasFnAttribute(Kind: Attribute::ShadowCallStack);
274 if (CC == CallingConv::GHC)
275 // This is academic because all GHC calls are (supposed to be) tail calls
276 return SCS ? CSR_AArch64_NoRegs_SCS_RegMask : CSR_AArch64_NoRegs_RegMask;
277 if (CC == CallingConv::PreserveNone)
278 return SCS ? CSR_AArch64_NoneRegs_SCS_RegMask
279 : CSR_AArch64_NoneRegs_RegMask;
280 if (CC == CallingConv::AnyReg)
281 return SCS ? CSR_AArch64_AllRegs_SCS_RegMask : CSR_AArch64_AllRegs_RegMask;
282
283 // All the following calling conventions are handled differently on Darwin.
284 if (MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
285 if (SCS)
286 report_fatal_error(reason: "ShadowCallStack attribute not supported on Darwin.");
287 return getDarwinCallPreservedMask(MF, CC);
288 }
289
290 if (CC == CallingConv::AArch64_VectorCall)
291 return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
292 if (CC == CallingConv::AArch64_SVE_VectorCall)
293 return SCS ? CSR_AArch64_SVE_AAPCS_SCS_RegMask
294 : CSR_AArch64_SVE_AAPCS_RegMask;
295 if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0)
296 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask;
297 if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1)
298 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1_RegMask;
299 if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2)
300 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2_RegMask;
301 if (CC == CallingConv::CFGuard_Check)
302 return CSR_Win_AArch64_CFGuard_Check_RegMask;
303 if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
304 ->supportSwiftError() &&
305 MF.getFunction().getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError))
306 return SCS ? CSR_AArch64_AAPCS_SwiftError_SCS_RegMask
307 : CSR_AArch64_AAPCS_SwiftError_RegMask;
308 if (CC == CallingConv::SwiftTail) {
309 if (SCS)
310 report_fatal_error(reason: "ShadowCallStack attribute not supported with swifttail");
311 return CSR_AArch64_AAPCS_SwiftTail_RegMask;
312 }
313 if (CC == CallingConv::PreserveMost)
314 return SCS ? CSR_AArch64_RT_MostRegs_SCS_RegMask
315 : CSR_AArch64_RT_MostRegs_RegMask;
316 if (CC == CallingConv::PreserveAll)
317 return SCS ? CSR_AArch64_RT_AllRegs_SCS_RegMask
318 : CSR_AArch64_RT_AllRegs_RegMask;
319
320 return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask;
321}
322
323const uint32_t *AArch64RegisterInfo::getCustomEHPadPreservedMask(
324 const MachineFunction &MF) const {
325 if (MF.getSubtarget<AArch64Subtarget>().isTargetLinux())
326 return CSR_AArch64_AAPCS_RegMask;
327
328 return nullptr;
329}
330
331const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
332 if (TT.isOSDarwin())
333 return CSR_Darwin_AArch64_TLS_RegMask;
334
335 assert(TT.isOSBinFormatELF() && "Invalid target");
336 return CSR_AArch64_TLS_ELF_RegMask;
337}
338
339void AArch64RegisterInfo::UpdateCustomCallPreservedMask(MachineFunction &MF,
340 const uint32_t **Mask) const {
341 uint32_t *UpdatedMask = MF.allocateRegMask();
342 unsigned RegMaskSize = MachineOperand::getRegMaskSize(NumRegs: getNumRegs());
343 memcpy(dest: UpdatedMask, src: *Mask, n: sizeof(UpdatedMask[0]) * RegMaskSize);
344
345 for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
346 if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
347 for (MCPhysReg SubReg :
348 subregs_inclusive(Reg: AArch64::GPR64commonRegClass.getRegister(i))) {
349 // See TargetRegisterInfo::getCallPreservedMask for how to interpret the
350 // register mask.
351 UpdatedMask[SubReg / 32] |= 1u << (SubReg % 32);
352 }
353 }
354 }
355 *Mask = UpdatedMask;
356}
357
358const uint32_t *AArch64RegisterInfo::getSMStartStopCallPreservedMask() const {
359 return CSR_AArch64_SMStartStop_RegMask;
360}
361
362const uint32_t *
363AArch64RegisterInfo::SMEABISupportRoutinesCallPreservedMaskFromX0() const {
364 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask;
365}
366
367const uint32_t *AArch64RegisterInfo::getNoPreservedMask() const {
368 return CSR_AArch64_NoRegs_RegMask;
369}
370
371const uint32_t *
372AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
373 CallingConv::ID CC) const {
374 // This should return a register mask that is the same as that returned by
375 // getCallPreservedMask but that additionally preserves the register used for
376 // the first i64 argument (which must also be the register used to return a
377 // single i64 return value)
378 //
379 // In case that the calling convention does not use the same register for
380 // both, the function should return NULL (does not currently apply)
381 assert(CC != CallingConv::GHC && "should not be GHC calling convention.");
382 if (MF.getSubtarget<AArch64Subtarget>().isTargetDarwin())
383 return CSR_Darwin_AArch64_AAPCS_ThisReturn_RegMask;
384 return CSR_AArch64_AAPCS_ThisReturn_RegMask;
385}
386
387const uint32_t *AArch64RegisterInfo::getWindowsStackProbePreservedMask() const {
388 return CSR_AArch64_StackProbe_Windows_RegMask;
389}
390
391std::optional<std::string>
392AArch64RegisterInfo::explainReservedReg(const MachineFunction &MF,
393 MCRegister PhysReg) const {
394 if (hasBasePointer(MF) && MCRegisterInfo::regsOverlap(RegA: PhysReg, RegB: AArch64::X19))
395 return std::string("X19 is used as the frame base pointer register.");
396
397 if (MF.getSubtarget<AArch64Subtarget>().isWindowsArm64EC()) {
398 bool warn = false;
399 if (MCRegisterInfo::regsOverlap(RegA: PhysReg, RegB: AArch64::X13) ||
400 MCRegisterInfo::regsOverlap(RegA: PhysReg, RegB: AArch64::X14) ||
401 MCRegisterInfo::regsOverlap(RegA: PhysReg, RegB: AArch64::X23) ||
402 MCRegisterInfo::regsOverlap(RegA: PhysReg, RegB: AArch64::X24) ||
403 MCRegisterInfo::regsOverlap(RegA: PhysReg, RegB: AArch64::X28))
404 warn = true;
405
406 for (unsigned i = AArch64::B16; i <= AArch64::B31; ++i)
407 if (MCRegisterInfo::regsOverlap(RegA: PhysReg, RegB: i))
408 warn = true;
409
410 if (warn)
411 return std::string(AArch64InstPrinter::getRegisterName(Reg: PhysReg)) +
412 " is clobbered by asynchronous signals when using Arm64EC.";
413 }
414
415 return {};
416}
417
418BitVector
419AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
420 const AArch64FrameLowering *TFI = getFrameLowering(MF);
421
422 BitVector Reserved(getNumRegs());
423 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::WSP);
424 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::WZR);
425
426 if (TFI->isFPReserved(MF))
427 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W29);
428
429 if (MF.getSubtarget<AArch64Subtarget>().isWindowsArm64EC()) {
430 // x13, x14, x23, x24, x28, and v16-v31 are clobbered by asynchronous
431 // signals, so we can't ever use them.
432 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W13);
433 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W14);
434 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W23);
435 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W24);
436 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W28);
437 for (unsigned i = AArch64::B16; i <= AArch64::B31; ++i)
438 markSuperRegs(RegisterSet&: Reserved, Reg: i);
439 }
440
441 if (MF.getSubtarget<AArch64Subtarget>().isLFI()) {
442 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W28);
443 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W27);
444 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W26);
445 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W25);
446 if (!MF.getProperties().hasNoVRegs()) {
447 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::LR);
448 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W30);
449 }
450 }
451
452 for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
453 if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i))
454 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::GPR32commonRegClass.getRegister(i));
455 }
456
457 if (hasBasePointer(MF))
458 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W19);
459
460 // SLH uses register W16/X16 as the taint register.
461 if (MF.getFunction().hasFnAttribute(Kind: Attribute::SpeculativeLoadHardening))
462 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W16);
463
464 // FFR is modelled as global state that cannot be allocated.
465 if (MF.getSubtarget<AArch64Subtarget>().hasSVE())
466 Reserved.set(AArch64::FFR);
467
468 // SME tiles are not allocatable.
469 if (MF.getSubtarget<AArch64Subtarget>().hasSME()) {
470 for (MCPhysReg SubReg : subregs_inclusive(Reg: AArch64::ZA))
471 Reserved.set(SubReg);
472 }
473
474 // VG cannot be allocated
475 Reserved.set(AArch64::VG);
476
477 if (MF.getSubtarget<AArch64Subtarget>().hasSME2()) {
478 for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true);
479 SubReg.isValid(); ++SubReg)
480 Reserved.set(*SubReg);
481 }
482
483 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::FPCR);
484 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::FPMR);
485 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::FPSR);
486
487 if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
488 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::X27);
489 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::X28);
490 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W27);
491 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W28);
492 }
493
494 assert(checkAllSuperRegsMarked(Reserved));
495
496 // Add _HI registers after checkAllSuperRegsMarked as this check otherwise
497 // becomes considerably more expensive.
498 Reserved.set(AArch64::WSP_HI);
499 Reserved.set(AArch64::WZR_HI);
500 static_assert(AArch64::W30_HI - AArch64::W0_HI == 30,
501 "Unexpected order of registers");
502 Reserved.set(I: AArch64::W0_HI, E: AArch64::W30_HI + 1);
503 static_assert(AArch64::B31_HI - AArch64::B0_HI == 31,
504 "Unexpected order of registers");
505 Reserved.set(I: AArch64::B0_HI, E: AArch64::B31_HI + 1);
506 static_assert(AArch64::H31_HI - AArch64::H0_HI == 31,
507 "Unexpected order of registers");
508 Reserved.set(I: AArch64::H0_HI, E: AArch64::H31_HI + 1);
509 static_assert(AArch64::S31_HI - AArch64::S0_HI == 31,
510 "Unexpected order of registers");
511 Reserved.set(I: AArch64::S0_HI, E: AArch64::S31_HI + 1);
512 static_assert(AArch64::D31_HI - AArch64::D0_HI == 31,
513 "Unexpected order of registers");
514 Reserved.set(I: AArch64::D0_HI, E: AArch64::D31_HI + 1);
515 static_assert(AArch64::Q31_HI - AArch64::Q0_HI == 31,
516 "Unexpected order of registers");
517 Reserved.set(I: AArch64::Q0_HI, E: AArch64::Q31_HI + 1);
518
519 return Reserved;
520}
521
522BitVector
523AArch64RegisterInfo::getUserReservedRegs(const MachineFunction &MF) const {
524 BitVector Reserved(getNumRegs());
525 for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
526 // ReserveXRegister is set for registers manually reserved
527 // through +reserve-x#i.
528 if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i))
529 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::GPR32commonRegClass.getRegister(i));
530 }
531 return Reserved;
532}
533
534BitVector
535AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
536 BitVector Reserved(getNumRegs());
537 for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
538 if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReservedForRA(i))
539 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::GPR32commonRegClass.getRegister(i));
540 }
541
542 if (MF.getSubtarget<AArch64Subtarget>().isLRReservedForRA()) {
543 // In order to prevent the register allocator from using LR, we need to
544 // mark it as reserved. However we don't want to keep it reserved throughout
545 // the pipeline since it prevents other infrastructure from reasoning about
546 // it's liveness. We use the NoVRegs property instead of IsSSA because
547 // IsSSA is removed before VirtRegRewriter runs.
548 if (!MF.getProperties().hasNoVRegs())
549 // Reserve LR (X30) by marking from its subregister W30 because otherwise
550 // the register allocator could clobber the subregister.
551 markSuperRegs(RegisterSet&: Reserved, Reg: AArch64::W30);
552 }
553
554 assert(checkAllSuperRegsMarked(Reserved));
555
556 // Handle strictlyReservedRegs separately to avoid re-evaluating the assert,
557 // which becomes considerably expensive when considering the _HI registers.
558 Reserved |= getStrictlyReservedRegs(MF);
559
560 return Reserved;
561}
562
563bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
564 MCRegister Reg) const {
565 return getReservedRegs(MF)[Reg];
566}
567
568bool AArch64RegisterInfo::isUserReservedReg(const MachineFunction &MF,
569 MCRegister Reg) const {
570 return getUserReservedRegs(MF)[Reg];
571}
572
573bool AArch64RegisterInfo::isStrictlyReservedReg(const MachineFunction &MF,
574 MCRegister Reg) const {
575 return getStrictlyReservedRegs(MF)[Reg];
576}
577
578bool AArch64RegisterInfo::isAnyArgRegReserved(const MachineFunction &MF) const {
579 for (size_t i = 0; i < AArch64::GPR64argRegClass.getNumRegs(); ++i) {
580 if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i))
581 return true;
582 }
583 return false;
584}
585
586void AArch64RegisterInfo::emitReservedArgRegCallError(
587 const MachineFunction &MF) const {
588 const Function &F = MF.getFunction();
589 F.getContext().diagnose(DI: DiagnosticInfoUnsupported{F, ("AArch64 doesn't support"
590 " function calls if any of the argument registers is reserved.")});
591}
592
593bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF,
594 MCRegister PhysReg) const {
595 // SLH uses register X16 as the taint register but it will fallback to a different
596 // method if the user clobbers it. So X16 is not reserved for inline asm but is
597 // for normal codegen.
598 if (MF.getFunction().hasFnAttribute(Kind: Attribute::SpeculativeLoadHardening) &&
599 MCRegisterInfo::regsOverlap(RegA: PhysReg, RegB: AArch64::X16))
600 return true;
601
602 // ZA/ZT0 registers are reserved but may be permitted in the clobber list.
603 if (PhysReg == AArch64::ZA || PhysReg == AArch64::ZT0)
604 return true;
605
606 return !isReservedReg(MF, Reg: PhysReg);
607}
608
609const TargetRegisterClass *
610AArch64RegisterInfo::getPointerRegClass(unsigned Kind) const {
611 return &AArch64::GPR64spRegClass;
612}
613
614const TargetRegisterClass *
615AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
616 if (RC == &AArch64::CCRRegClass)
617 return &AArch64::GPR64RegClass; // Only MSR & MRS copy NZCV.
618 return RC;
619}
620
621MCRegister AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; }
622
623bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
624 const MachineFrameInfo &MFI = MF.getFrameInfo();
625
626 // In the presence of variable sized objects or funclets, if the fixed stack
627 // size is large enough that referencing from the FP won't result in things
628 // being in range relatively often, we can use a base pointer to allow access
629 // from the other direction like the SP normally works.
630 //
631 // Furthermore, if both variable sized objects are present, and the
632 // stack needs to be dynamically re-aligned, the base pointer is the only
633 // reliable way to reference the locals.
634 if (MFI.hasVarSizedObjects() || MF.hasEHFunclets()) {
635 if (hasStackRealignment(MF))
636 return true;
637
638 auto &ST = MF.getSubtarget<AArch64Subtarget>();
639 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
640 if (ST.hasSVE() || ST.isStreaming()) {
641 // Frames that have variable sized objects and scalable SVE objects,
642 // should always use a basepointer.
643 if (!AFI->hasCalculatedStackSizeSVE() || AFI->hasSVEStackSize())
644 return true;
645 }
646
647 // Frames with hazard padding can have a large offset between the frame
648 // pointer and GPR locals, which includes the emergency spill slot. If the
649 // emergency spill slot is not within range of the load/store instructions
650 // (which have a signed 9-bit range), we will fail to compile if it is used.
651 // Since hasBasePointer() is called before we know if we have hazard padding
652 // or an emergency spill slot we need to enable the basepointer
653 // conservatively.
654 if (ST.getStreamingHazardSize() &&
655 !AFI->getSMEFnAttrs().hasNonStreamingInterfaceAndBody()) {
656 return true;
657 }
658
659 // Conservatively estimate whether the negative offset from the frame
660 // pointer will be sufficient to reach. If a function has a smallish
661 // frame, it's less likely to have lots of spills and callee saved
662 // space, so it's all more likely to be within range of the frame pointer.
663 // If it's wrong, we'll materialize the constant and still get to the
664 // object; it's just suboptimal. Negative offsets use the unscaled
665 // load/store instructions, which have a 9-bit signed immediate.
666 return MFI.getLocalFrameSize() >= 256;
667 }
668
669 return false;
670}
671
672bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
673 MCRegister Reg) const {
674 CallingConv::ID CC = MF.getFunction().getCallingConv();
675 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
676 bool IsVarArg = STI.isCallingConvWin64(CC: MF.getFunction().getCallingConv(),
677 IsVarArg: MF.getFunction().isVarArg());
678
679 auto HasReg = [](ArrayRef<MCRegister> RegList, MCRegister Reg) {
680 return llvm::is_contained(Range&: RegList, Element: Reg);
681 };
682
683 switch (CC) {
684 default:
685 report_fatal_error(reason: "Unsupported calling convention.");
686 case CallingConv::GHC:
687 return HasReg(CC_AArch64_GHC_ArgRegs, Reg);
688 case CallingConv::PreserveNone:
689 if (!MF.getFunction().isVarArg())
690 return HasReg(CC_AArch64_Preserve_None_ArgRegs, Reg);
691 [[fallthrough]];
692 case CallingConv::C:
693 case CallingConv::Fast:
694 case CallingConv::PreserveMost:
695 case CallingConv::PreserveAll:
696 case CallingConv::CXX_FAST_TLS:
697 case CallingConv::Swift:
698 case CallingConv::SwiftTail:
699 case CallingConv::Tail:
700 if (STI.isTargetWindows()) {
701 if (IsVarArg)
702 return HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg);
703 switch (CC) {
704 default:
705 return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
706 case CallingConv::Swift:
707 case CallingConv::SwiftTail:
708 return HasReg(CC_AArch64_Win64PCS_Swift_ArgRegs, Reg) ||
709 HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
710 }
711 }
712 if (!STI.isTargetDarwin()) {
713 switch (CC) {
714 default:
715 return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg);
716 case CallingConv::Swift:
717 case CallingConv::SwiftTail:
718 return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg) ||
719 HasReg(CC_AArch64_AAPCS_Swift_ArgRegs, Reg);
720 }
721 }
722 if (!IsVarArg) {
723 switch (CC) {
724 default:
725 return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg);
726 case CallingConv::Swift:
727 case CallingConv::SwiftTail:
728 return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg) ||
729 HasReg(CC_AArch64_DarwinPCS_Swift_ArgRegs, Reg);
730 }
731 }
732 if (STI.isTargetILP32())
733 return HasReg(CC_AArch64_DarwinPCS_ILP32_VarArg_ArgRegs, Reg);
734 return HasReg(CC_AArch64_DarwinPCS_VarArg_ArgRegs, Reg);
735 case CallingConv::Win64:
736 if (IsVarArg)
737 HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg);
738 return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
739 case CallingConv::CFGuard_Check:
740 return HasReg(CC_AArch64_Win64_CFGuard_Check_ArgRegs, Reg);
741 case CallingConv::AArch64_VectorCall:
742 case CallingConv::AArch64_SVE_VectorCall:
743 case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0:
744 case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1:
745 case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2:
746 if (STI.isTargetWindows())
747 return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg);
748 return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg);
749 }
750}
751
752Register
753AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
754 const AArch64FrameLowering *TFI = getFrameLowering(MF);
755 return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP;
756}
757
758bool AArch64RegisterInfo::requiresRegisterScavenging(
759 const MachineFunction &MF) const {
760 return true;
761}
762
763bool AArch64RegisterInfo::requiresVirtualBaseRegisters(
764 const MachineFunction &MF) const {
765 return true;
766}
767
768bool
769AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
770 // This function indicates whether the emergency spillslot should be placed
771 // close to the beginning of the stackframe (closer to FP) or the end
772 // (closer to SP).
773 //
774 // The beginning works most reliably if we have a frame pointer.
775 // In the presence of any non-constant space between FP and locals,
776 // (e.g. in case of stack realignment or a scalable SVE area), it is
777 // better to use SP or BP.
778 const AArch64FrameLowering &TFI = *getFrameLowering(MF);
779 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
780 assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
781 AFI->hasCalculatedStackSizeSVE()) &&
782 "Expected SVE area to be calculated by this point");
783 return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->hasSVEStackSize() &&
784 !AFI->hasStackHazardSlotIndex();
785}
786
787bool AArch64RegisterInfo::requiresFrameIndexScavenging(
788 const MachineFunction &MF) const {
789 return true;
790}
791
792bool
793AArch64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
794 const MachineFrameInfo &MFI = MF.getFrameInfo();
795 if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI.adjustsStack())
796 return true;
797 return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken();
798}
799
800/// needsFrameBaseReg - Returns true if the instruction's frame index
801/// reference would be better served by a base register other than FP
802/// or SP. Used by LocalStackFrameAllocation to determine which frame index
803/// references it should create new base registers for.
804bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
805 int64_t Offset) const {
806 for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i)
807 assert(i < MI->getNumOperands() &&
808 "Instr doesn't have FrameIndex operand!");
809
810 // It's the load/store FI references that cause issues, as it can be difficult
811 // to materialize the offset if it won't fit in the literal field. Estimate
812 // based on the size of the local frame and some conservative assumptions
813 // about the rest of the stack frame (note, this is pre-regalloc, so
814 // we don't know everything for certain yet) whether this offset is likely
815 // to be out of range of the immediate. Return true if so.
816
817 // We only generate virtual base registers for loads and stores, so
818 // return false for everything else.
819 if (!MI->mayLoad() && !MI->mayStore())
820 return false;
821
822 // Without a virtual base register, if the function has variable sized
823 // objects, all fixed-size local references will be via the frame pointer,
824 // Approximate the offset and see if it's legal for the instruction.
825 // Note that the incoming offset is based on the SP value at function entry,
826 // so it'll be negative.
827 MachineFunction &MF = *MI->getParent()->getParent();
828 const AArch64FrameLowering *TFI = getFrameLowering(MF);
829 MachineFrameInfo &MFI = MF.getFrameInfo();
830
831 // Estimate an offset from the frame pointer.
832 // Conservatively assume all GPR callee-saved registers get pushed.
833 // FP, LR, X19-X28, D8-D15. 64-bits each.
834 int64_t FPOffset = Offset - 16 * 20;
835 // Estimate an offset from the stack pointer.
836 // The incoming offset is relating to the SP at the start of the function,
837 // but when we access the local it'll be relative to the SP after local
838 // allocation, so adjust our SP-relative offset by that allocation size.
839 Offset += MFI.getLocalFrameSize();
840 // Assume that we'll have at least some spill slots allocated.
841 // FIXME: This is a total SWAG number. We should run some statistics
842 // and pick a real one.
843 Offset += 128; // 128 bytes of spill slots
844
845 // If there is a frame pointer, try using it.
846 // The FP is only available if there is no dynamic realignment. We
847 // don't know for sure yet whether we'll need that, so we guess based
848 // on whether there are any local variables that would trigger it.
849 if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, BaseReg: AArch64::FP, Offset: FPOffset))
850 return false;
851
852 // If we can reference via the stack pointer or base pointer, try that.
853 // FIXME: This (and the code that resolves the references) can be improved
854 // to only disallow SP relative references in the live range of
855 // the VLA(s). In practice, it's unclear how much difference that
856 // would make, but it may be worth doing.
857 if (isFrameOffsetLegal(MI, BaseReg: AArch64::SP, Offset))
858 return false;
859
860 // If even offset 0 is illegal, we don't want a virtual base register.
861 if (!isFrameOffsetLegal(MI, BaseReg: AArch64::SP, Offset: 0))
862 return false;
863
864 // The offset likely isn't legal; we want to allocate a virtual base register.
865 return true;
866}
867
868bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
869 Register BaseReg,
870 int64_t Offset) const {
871 assert(MI && "Unable to get the legal offset for nil instruction.");
872 StackOffset SaveOffset = StackOffset::getFixed(Fixed: Offset);
873 return isAArch64FrameOffsetLegal(MI: *MI, Offset&: SaveOffset) & AArch64FrameOffsetIsLegal;
874}
875
876/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
877/// at the beginning of the basic block.
878Register
879AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
880 int FrameIdx,
881 int64_t Offset) const {
882 MachineBasicBlock::iterator Ins = MBB->begin();
883 DebugLoc DL; // Defaults to "unknown"
884 if (Ins != MBB->end())
885 DL = Ins->getDebugLoc();
886 const MachineFunction &MF = *MBB->getParent();
887 const AArch64InstrInfo *TII =
888 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
889 const MCInstrDesc &MCID = TII->get(Opcode: AArch64::ADDXri);
890 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
891 Register BaseReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass);
892 MRI.constrainRegClass(Reg: BaseReg, RC: TII->getRegClass(MCID, OpNum: 0));
893 unsigned Shifter = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: 0);
894
895 BuildMI(BB&: *MBB, I: Ins, MIMD: DL, MCID, DestReg: BaseReg)
896 .addFrameIndex(Idx: FrameIdx)
897 .addImm(Val: Offset)
898 .addImm(Val: Shifter);
899
900 return BaseReg;
901}
902
903void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
904 int64_t Offset) const {
905 // ARM doesn't need the general 64-bit offsets
906 StackOffset Off = StackOffset::getFixed(Fixed: Offset);
907
908 unsigned i = 0;
909 while (!MI.getOperand(i).isFI()) {
910 ++i;
911 assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
912 }
913
914 const MachineFunction *MF = MI.getParent()->getParent();
915 const AArch64InstrInfo *TII =
916 MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
917 bool Done = rewriteAArch64FrameIndex(MI, FrameRegIdx: i, FrameReg: BaseReg, Offset&: Off, TII);
918 assert(Done && "Unable to resolve frame index!");
919 (void)Done;
920}
921
922// Create a scratch register for the frame index elimination in an instruction.
923// This function has special handling of stack tagging loop pseudos, in which
924// case it can also change the instruction opcode.
925static Register
926createScratchRegisterForInstruction(MachineInstr &MI, unsigned FIOperandNum,
927 const AArch64InstrInfo *TII) {
928 // ST*Gloop have a reserved scratch register in operand 1. Use it, and also
929 // replace the instruction with the writeback variant because it will now
930 // satisfy the operand constraints for it.
931 Register ScratchReg;
932 if (MI.getOpcode() == AArch64::STGloop ||
933 MI.getOpcode() == AArch64::STZGloop) {
934 assert(FIOperandNum == 3 &&
935 "Wrong frame index operand for STGloop/STZGloop");
936 unsigned Op = MI.getOpcode() == AArch64::STGloop ? AArch64::STGloop_wback
937 : AArch64::STZGloop_wback;
938 ScratchReg = MI.getOperand(i: 1).getReg();
939 MI.getOperand(i: 3).ChangeToRegister(Reg: ScratchReg, isDef: false, isImp: false, isKill: true);
940 MI.setDesc(TII->get(Opcode: Op));
941 MI.tieOperands(DefIdx: 1, UseIdx: 3);
942 } else {
943 ScratchReg =
944 MI.getMF()->getRegInfo().createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
945 MI.getOperand(i: FIOperandNum)
946 .ChangeToRegister(Reg: ScratchReg, isDef: false, isImp: false, isKill: true);
947 }
948 return ScratchReg;
949}
950
951void AArch64RegisterInfo::getOffsetOpcodes(
952 const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const {
953 // The smallest scalable element supported by scaled SVE addressing
954 // modes are predicates, which are 2 scalable bytes in size. So the scalable
955 // byte offset must always be a multiple of 2.
956 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
957
958 // Add fixed-sized offset using existing DIExpression interface.
959 DIExpression::appendOffset(Ops, Offset: Offset.getFixed());
960
961 unsigned VG = getDwarfRegNum(Reg: AArch64::VG, isEH: true);
962 int64_t VGSized = Offset.getScalable() / 2;
963 if (VGSized > 0) {
964 Ops.push_back(Elt: dwarf::DW_OP_constu);
965 Ops.push_back(Elt: VGSized);
966 Ops.append(IL: {dwarf::DW_OP_bregx, VG, 0ULL});
967 Ops.push_back(Elt: dwarf::DW_OP_mul);
968 Ops.push_back(Elt: dwarf::DW_OP_plus);
969 } else if (VGSized < 0) {
970 Ops.push_back(Elt: dwarf::DW_OP_constu);
971 Ops.push_back(Elt: -VGSized);
972 Ops.append(IL: {dwarf::DW_OP_bregx, VG, 0ULL});
973 Ops.push_back(Elt: dwarf::DW_OP_mul);
974 Ops.push_back(Elt: dwarf::DW_OP_minus);
975 }
976}
977
978bool AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
979 int SPAdj, unsigned FIOperandNum,
980 RegScavenger *RS) const {
981 assert(SPAdj == 0 && "Unexpected");
982
983 MachineInstr &MI = *II;
984 MachineBasicBlock &MBB = *MI.getParent();
985 MachineFunction &MF = *MBB.getParent();
986 const MachineFrameInfo &MFI = MF.getFrameInfo();
987 const AArch64InstrInfo *TII =
988 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
989 const AArch64FrameLowering *TFI = getFrameLowering(MF);
990 int FrameIndex = MI.getOperand(i: FIOperandNum).getIndex();
991 bool Tagged =
992 MI.getOperand(i: FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED;
993 Register FrameReg;
994
995 // Special handling of dbg_value, stackmap patchpoint statepoint instructions.
996 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
997 MI.getOpcode() == TargetOpcode::PATCHPOINT ||
998 MI.getOpcode() == TargetOpcode::STATEPOINT) {
999 StackOffset Offset =
1000 TFI->resolveFrameIndexReference(MF, FI: FrameIndex, FrameReg,
1001 /*PreferFP=*/true,
1002 /*ForSimm=*/false);
1003 Offset += StackOffset::getFixed(Fixed: MI.getOperand(i: FIOperandNum + 1).getImm());
1004 MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: FrameReg, isDef: false /*isDef*/);
1005 MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset.getFixed());
1006 return false;
1007 }
1008
1009 if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
1010 MachineOperand &FI = MI.getOperand(i: FIOperandNum);
1011 StackOffset Offset = TFI->getNonLocalFrameIndexReference(MF, FI: FrameIndex);
1012 assert(!Offset.getScalable() &&
1013 "Frame offsets with a scalable component are not supported");
1014 FI.ChangeToImmediate(ImmVal: Offset.getFixed());
1015 return false;
1016 }
1017
1018 StackOffset Offset;
1019 if (MI.getOpcode() == AArch64::TAGPstack) {
1020 // TAGPstack must use the virtual frame register in its 3rd operand.
1021 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1022 FrameReg = MI.getOperand(i: 3).getReg();
1023 Offset = StackOffset::getFixed(Fixed: MFI.getObjectOffset(ObjectIdx: FrameIndex) +
1024 AFI->getTaggedBasePointerOffset());
1025 } else if (Tagged) {
1026 StackOffset SPOffset = StackOffset::getFixed(
1027 Fixed: MFI.getObjectOffset(ObjectIdx: FrameIndex) + (int64_t)MFI.getStackSize());
1028 if (MFI.hasVarSizedObjects() ||
1029 isAArch64FrameOffsetLegal(MI, Offset&: SPOffset, OutUseUnscaledOp: nullptr, OutUnscaledOp: nullptr, EmittableOffset: nullptr) !=
1030 (AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) {
1031 // Can't update to SP + offset in place. Precalculate the tagged pointer
1032 // in a scratch register.
1033 Offset = TFI->resolveFrameIndexReference(
1034 MF, FI: FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
1035 Register ScratchReg =
1036 MF.getRegInfo().createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
1037 emitFrameOffset(MBB, MBBI: II, DL: MI.getDebugLoc(), DestReg: ScratchReg, SrcReg: FrameReg, Offset,
1038 TII);
1039 BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: AArch64::LDG), DestReg: ScratchReg)
1040 .addReg(RegNo: ScratchReg)
1041 .addReg(RegNo: ScratchReg)
1042 .addImm(Val: 0);
1043 MI.getOperand(i: FIOperandNum)
1044 .ChangeToRegister(Reg: ScratchReg, isDef: false, isImp: false, isKill: true);
1045 return false;
1046 }
1047 FrameReg = AArch64::SP;
1048 Offset = StackOffset::getFixed(Fixed: MFI.getObjectOffset(ObjectIdx: FrameIndex) +
1049 (int64_t)MFI.getStackSize());
1050 } else {
1051 Offset = TFI->resolveFrameIndexReference(
1052 MF, FI: FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
1053 }
1054
1055 // Modify MI as necessary to handle as much of 'Offset' as possible
1056 if (rewriteAArch64FrameIndex(MI, FrameRegIdx: FIOperandNum, FrameReg, Offset, TII))
1057 return true;
1058
1059 assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) &&
1060 "Emergency spill slot is out of reach");
1061
1062 // If we get here, the immediate doesn't fit into the instruction. We folded
1063 // as much as possible above. Handle the rest, providing a register that is
1064 // SP+LargeImm.
1065 Register ScratchReg =
1066 createScratchRegisterForInstruction(MI, FIOperandNum, TII);
1067 emitFrameOffset(MBB, MBBI: II, DL: MI.getDebugLoc(), DestReg: ScratchReg, SrcReg: FrameReg, Offset, TII);
1068 return false;
1069}
1070
1071unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
1072 MachineFunction &MF) const {
1073 const AArch64FrameLowering *TFI = getFrameLowering(MF);
1074
1075 switch (RC->getID()) {
1076 default:
1077 return 0;
1078 case AArch64::GPR32RegClassID:
1079 case AArch64::GPR32spRegClassID:
1080 case AArch64::GPR32allRegClassID:
1081 case AArch64::GPR64spRegClassID:
1082 case AArch64::GPR64allRegClassID:
1083 case AArch64::GPR64RegClassID:
1084 case AArch64::GPR32commonRegClassID:
1085 case AArch64::GPR64commonRegClassID:
1086 return 32 - 1 // XZR/SP
1087 - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
1088 - MF.getSubtarget<AArch64Subtarget>().getNumXRegisterReserved()
1089 - hasBasePointer(MF); // X19
1090 case AArch64::FPR8RegClassID:
1091 case AArch64::FPR16RegClassID:
1092 case AArch64::FPR32RegClassID:
1093 case AArch64::FPR64RegClassID:
1094 case AArch64::FPR128RegClassID:
1095 return 32;
1096
1097 case AArch64::MatrixIndexGPR32_8_11RegClassID:
1098 case AArch64::MatrixIndexGPR32_12_15RegClassID:
1099 return 4;
1100
1101 case AArch64::DDRegClassID:
1102 case AArch64::DDDRegClassID:
1103 case AArch64::DDDDRegClassID:
1104 case AArch64::QQRegClassID:
1105 case AArch64::QQQRegClassID:
1106 case AArch64::QQQQRegClassID:
1107 return 32;
1108
1109 case AArch64::FPR128_loRegClassID:
1110 case AArch64::FPR64_loRegClassID:
1111 case AArch64::FPR16_loRegClassID:
1112 return 16;
1113 case AArch64::FPR128_0to7RegClassID:
1114 return 8;
1115 }
1116}
1117
1118static bool HandleDestructivePredicateHint(
1119 Register VirtReg, ArrayRef<MCPhysReg> Order,
1120 SmallVectorImpl<MCPhysReg> &Hints, const VirtRegMap *VRM,
1121 const MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
1122 const AArch64Subtarget &ST, const LiveRegMatrix *Matrix) {
1123 const TargetRegisterClass *RegRC = MRI.getRegClass(Reg: VirtReg);
1124 if (!ST.useDistinctPredicateDstReg() ||
1125 !AArch64::PPRRegClass.hasSubClassEq(RC: RegRC) || !MRI.hasOneDef(RegNo: VirtReg) ||
1126 Order.size() < 2)
1127 return false;
1128
1129 const MachineInstr *DefInst = MRI.getOneDef(Reg: VirtReg)->getParent();
1130 if ((TII.get(Opcode: DefInst->getOpcode()).TSFlags &
1131 AArch64::DestructiveInstTypeMask) != AArch64::DestructivePredicate)
1132 return false;
1133
1134 Register Op1Reg = DefInst->getOperand(i: 1).getReg();
1135 if (Op1Reg.isVirtual())
1136 Op1Reg = VRM->getPhys(virtReg: Op1Reg);
1137
1138 // If no register is allocated for the general-predicate, it's not yet
1139 // possible to choose a distinct register.
1140 if (!Op1Reg.isValid())
1141 return false;
1142
1143 // Move Op1Reg as the least preferred register.
1144 //
1145 // This might result in callee-save spills when the function takes/returns
1146 // arguments in SVE registers (i.e. needs to preserve p4-p15) and can't reuse
1147 // p0-p3. That's why we limit it to non-callee saved registers or to
1148 // callee-saved registers that have already been allocated for other uses in
1149 // the function.
1150 DenseSet<unsigned> CSRs;
1151 for (unsigned I = 0;; ++I) {
1152 Register R = MRI.getCalleeSavedRegs()[I];
1153 if (!R.isValid())
1154 break;
1155 if (AArch64::PPRRegClass.contains(Reg: R))
1156 CSRs.insert(V: R);
1157 }
1158
1159 Hints.append(in_start: Order.begin(), in_end: Order.end());
1160 auto CanUseReg = [&](Register R) {
1161 return !CSRs.contains(V: R) || !MRI.def_empty(RegNo: R) || Matrix->isPhysRegUsed(PhysReg: R);
1162 };
1163 llvm::stable_sort(Range&: Hints, C: [&](Register A, Register B) {
1164 bool PrefA = (A != Op1Reg) && CanUseReg(A);
1165 bool PrefB = (B != Op1Reg) && CanUseReg(B);
1166 return PrefA && !PrefB;
1167 });
1168 return true;
1169}
1170
1171// We add regalloc hints for different cases:
1172// * Choosing a better destination operand for predicated SVE instructions
1173// where the inactive lanes are undef, by choosing a register that is not
1174// unique to the other operands of the instruction.
1175//
1176// * Improve register allocation for SME multi-vector instructions where we can
1177// benefit from the strided- and contiguous register multi-vector tuples.
1178//
1179// Here FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register
1180// allocation where a consecutive multi-vector tuple is constructed from the
1181// same indices of multiple strided loads. This may still result in
1182// unnecessary copies between the loads and the tuple. Here we try to return a
1183// hint to assign the contiguous ZPRMulReg starting at the same register as
1184// the first operand of the pseudo, which should be a subregister of the first
1185// strided load.
1186//
1187// For example, if the first strided load has been assigned $z16_z20_z24_z28
1188// and the operands of the pseudo are each accessing subregister zsub2, we
1189// should look through through Order to find a contiguous register which
1190// begins with $z24 (i.e. $z24_z25_z26_z27).
1191bool AArch64RegisterInfo::getRegAllocationHints(
1192 Register VirtReg, ArrayRef<MCPhysReg> Order,
1193 SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
1194 const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
1195 auto &ST = MF.getSubtarget<AArch64Subtarget>();
1196 const AArch64InstrInfo *TII =
1197 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
1198 const MachineRegisterInfo &MRI = MF.getRegInfo();
1199
1200 bool ConsiderOnlyHints =
1201 TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
1202
1203 // For predicated SVE instructions where the inactive lanes are undef,
1204 // pick a destination register that is not unique to avoid introducing
1205 // a movprfx.
1206 const TargetRegisterClass *RegRC = MRI.getRegClass(Reg: VirtReg);
1207 if (AArch64::ZPRRegClass.hasSubClassEq(RC: RegRC)) {
1208 for (const MachineOperand &DefOp : MRI.def_operands(Reg: VirtReg)) {
1209 const MachineInstr &Def = *DefOp.getParent();
1210 if (DefOp.isImplicit() ||
1211 (TII->get(Opcode: Def.getOpcode()).TSFlags & AArch64::FalseLanesMask) !=
1212 AArch64::FalseLanesUndef)
1213 continue;
1214
1215 unsigned InstFlags =
1216 TII->get(Opcode: AArch64::getSVEPseudoMap(Opcode: Def.getOpcode())).TSFlags;
1217
1218 for (MCPhysReg R : Order) {
1219 auto AddHintIfSuitable = [&](MCPhysReg R,
1220 const MachineOperand &MO) -> bool {
1221 // R is a suitable register hint if R can reuse one of the other
1222 // source operands.
1223 MCPhysReg PhysReg = VRM->getPhys(virtReg: MO.getReg());
1224 if (PhysReg && MO.getSubReg())
1225 PhysReg = getSubReg(Reg: PhysReg, Idx: MO.getSubReg());
1226 if (PhysReg != R)
1227 return false;
1228 Hints.push_back(Elt: R);
1229 return true;
1230 };
1231
1232 switch (InstFlags & AArch64::DestructiveInstTypeMask) {
1233 default:
1234 break;
1235 case AArch64::DestructiveTernaryCommWithRev:
1236 AddHintIfSuitable(R, Def.getOperand(i: 2)) ||
1237 AddHintIfSuitable(R, Def.getOperand(i: 3)) ||
1238 AddHintIfSuitable(R, Def.getOperand(i: 4));
1239 break;
1240 case AArch64::DestructiveBinaryComm:
1241 case AArch64::DestructiveBinaryCommWithRev:
1242 AddHintIfSuitable(R, Def.getOperand(i: 2)) ||
1243 AddHintIfSuitable(R, Def.getOperand(i: 3));
1244 break;
1245 case AArch64::DestructiveBinary:
1246 case AArch64::DestructiveBinaryImm:
1247 AddHintIfSuitable(R, Def.getOperand(i: 2));
1248 break;
1249 case AArch64::DestructiveUnaryPassthru:
1250 AddHintIfSuitable(R, Def.getOperand(i: 3));
1251 break;
1252 case AArch64::DestructiveBinaryImmUnpred:
1253 case AArch64::DestructiveBinaryShImmUnpred:
1254 AddHintIfSuitable(R, Def.getOperand(i: 1));
1255 break;
1256 }
1257 }
1258 }
1259
1260 if (Hints.size())
1261 return ConsiderOnlyHints;
1262 }
1263
1264 if (HandleDestructivePredicateHint(VirtReg, Order, Hints, VRM, MRI, TII: *TII, ST,
1265 Matrix))
1266 return ConsiderOnlyHints;
1267
1268 if (!ST.hasSME() || !ST.isStreaming())
1269 return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
1270 VRM);
1271
1272 // The SVE calling convention preserves registers Z8-Z23. As a result, there
1273 // are no ZPR2Strided or ZPR4Strided registers that do not overlap with the
1274 // callee-saved registers and so by default these will be pushed to the back
1275 // of the allocation order for the ZPRStridedOrContiguous classes.
1276 // If any of the instructions which define VirtReg are used by the
1277 // FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy
1278 // instructions over reducing the number of clobbered callee-save registers,
1279 // so we add the strided registers as a hint.
1280 unsigned RegID = RegRC->getID();
1281 if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
1282 RegID == AArch64::ZPR4StridedOrContiguousRegClassID) {
1283
1284 // Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE.
1285 for (const MachineInstr &Use : MRI.use_nodbg_instructions(Reg: VirtReg)) {
1286 if (Use.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
1287 Use.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO)
1288 continue;
1289
1290 unsigned UseOps = Use.getNumOperands() - 1;
1291 const TargetRegisterClass *StridedRC;
1292 switch (RegID) {
1293 case AArch64::ZPR2StridedOrContiguousRegClassID:
1294 StridedRC = &AArch64::ZPR2StridedRegClass;
1295 break;
1296 case AArch64::ZPR4StridedOrContiguousRegClassID:
1297 StridedRC = &AArch64::ZPR4StridedRegClass;
1298 break;
1299 default:
1300 llvm_unreachable("Unexpected RegID");
1301 }
1302
1303 SmallVector<MCPhysReg, 4> StridedOrder;
1304 for (MCPhysReg Reg : Order)
1305 if (StridedRC->contains(Reg))
1306 StridedOrder.push_back(Elt: Reg);
1307
1308 int OpIdx = Use.findRegisterUseOperandIdx(Reg: VirtReg, TRI: this);
1309 assert(OpIdx != -1 && "Expected operand index from register use.");
1310
1311 unsigned TupleID = MRI.getRegClass(Reg: Use.getOperand(i: 0).getReg())->getID();
1312 bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID ||
1313 TupleID == AArch64::ZPR4Mul4RegClassID;
1314
1315 const MachineOperand *AssignedRegOp = llvm::find_if(
1316 Range: make_range(x: Use.operands_begin() + 1, y: Use.operands_end()),
1317 P: [&VRM](const MachineOperand &Op) {
1318 return VRM->hasPhys(virtReg: Op.getReg());
1319 });
1320
1321 // Example:
1322 //
1323 // When trying to find a suitable register allocation for VirtReg %v2 in:
1324 //
1325 // %v0:zpr2stridedorcontiguous = ld1 p0/z, [...]
1326 // %v1:zpr2stridedorcontiguous = ld1 p0/z, [...]
1327 // %v2:zpr2stridedorcontiguous = ld1 p0/z, [...]
1328 // %v3:zpr2stridedorcontiguous = ld1 p0/z, [...]
1329 // %v4:zpr4mul4 = FORM_TRANSPOSED_X4 %v0:0, %v1:0, %v2:0, %v3:0
1330 //
1331 // One such suitable allocation would be:
1332 //
1333 // { z0, z8 } = ld1 p0/z, [...]
1334 // { z1, z9 } = ld1 p0/z, [...]
1335 // { z2, z10 } = ld1 p0/z, [...]
1336 // { z3, z11 } = ld1 p0/z, [...]
1337 // { z0, z1, z2, z3 } =
1338 // FORM_TRANSPOSED_X4 {z0, z8}:0, {z1, z9}:0, {z2, z10}:0, {z3, z11}:0
1339 //
1340 // Below we distinguish two cases when trying to find a register:
1341 // * None of the registers used by FORM_TRANSPOSED_X4 have been assigned
1342 // yet. In this case the code muse ensure that there are at least UseOps
1343 // free consecutive registers. If IsMulZPR is true, then the first of
1344 // registers must also be a multiple of UseOps, e.g. { z0, z1, z2, z3 }
1345 // is valid but { z1, z2, z3, z5 } is not.
1346 // * One or more of the registers used by FORM_TRANSPOSED_X4 is already
1347 // assigned a physical register, which means only checking that a
1348 // consecutive range of free tuple registers exists which includes
1349 // the assigned register.
1350 // e.g. in the example above, if { z0, z8 } is already allocated for
1351 // %v0, we just need to ensure that { z1, z9 }, { z2, z10 } and
1352 // { z3, z11 } are also free. If so, we add { z2, z10 }.
1353
1354 if (AssignedRegOp == Use.operands_end()) {
1355 // There are no registers already assigned to any of the pseudo
1356 // operands. Look for a valid starting register for the group.
1357 for (unsigned I = 0; I < StridedOrder.size(); ++I) {
1358 MCPhysReg Reg = StridedOrder[I];
1359
1360 // If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting
1361 // register of the first load should be a multiple of 2 or 4.
1362 unsigned SubRegIdx = Use.getOperand(i: OpIdx).getSubReg();
1363 if (IsMulZPR && (getSubReg(Reg, Idx: SubRegIdx) - AArch64::Z0) % UseOps !=
1364 ((unsigned)OpIdx - 1))
1365 continue;
1366
1367 // In the example above, if VirtReg is the third operand of the
1368 // tuple (%v2) and Reg == Z2_Z10, then we need to make sure that
1369 // Z0_Z8, Z1_Z9 and Z3_Z11 are also available.
1370 auto IsFreeConsecutiveReg = [&](unsigned UseOp) {
1371 unsigned R = Reg - (OpIdx - 1) + UseOp;
1372 return StridedRC->contains(Reg: R) &&
1373 (UseOp == 0 ||
1374 ((getSubReg(Reg: R, Idx: AArch64::zsub0) - AArch64::Z0) ==
1375 (getSubReg(Reg: R - 1, Idx: AArch64::zsub0) - AArch64::Z0) + 1)) &&
1376 !Matrix->isPhysRegUsed(PhysReg: R);
1377 };
1378 if (all_of(Range: iota_range<unsigned>(0U, UseOps, /*Inclusive=*/false),
1379 P: IsFreeConsecutiveReg))
1380 Hints.push_back(Elt: Reg);
1381 }
1382 } else {
1383 // At least one operand already has a physical register assigned.
1384 // Find the starting sub-register of this and use it to work out the
1385 // correct strided register to suggest based on the current op index.
1386 MCPhysReg TargetStartReg =
1387 getSubReg(Reg: VRM->getPhys(virtReg: AssignedRegOp->getReg()), Idx: AArch64::zsub0) +
1388 (OpIdx - AssignedRegOp->getOperandNo());
1389
1390 for (unsigned I = 0; I < StridedOrder.size(); ++I)
1391 if (getSubReg(Reg: StridedOrder[I], Idx: AArch64::zsub0) == TargetStartReg)
1392 Hints.push_back(Elt: StridedOrder[I]);
1393 }
1394
1395 if (!Hints.empty())
1396 return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints,
1397 MF, VRM);
1398 }
1399 }
1400
1401 for (MachineInstr &MI : MRI.def_instructions(Reg: VirtReg)) {
1402 if (MI.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO &&
1403 MI.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO)
1404 return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints,
1405 MF, VRM);
1406
1407 unsigned FirstOpSubReg = MI.getOperand(i: 1).getSubReg();
1408 switch (FirstOpSubReg) {
1409 case AArch64::zsub0:
1410 case AArch64::zsub1:
1411 case AArch64::zsub2:
1412 case AArch64::zsub3:
1413 break;
1414 default:
1415 continue;
1416 }
1417
1418 // Look up the physical register mapped to the first operand of the pseudo.
1419 Register FirstOpVirtReg = MI.getOperand(i: 1).getReg();
1420 if (!VRM->hasPhys(virtReg: FirstOpVirtReg))
1421 continue;
1422
1423 MCRegister TupleStartReg =
1424 getSubReg(Reg: VRM->getPhys(virtReg: FirstOpVirtReg), Idx: FirstOpSubReg);
1425 for (unsigned I = 0; I < Order.size(); ++I)
1426 if (MCRegister R = getSubReg(Reg: Order[I], Idx: AArch64::zsub0))
1427 if (R == TupleStartReg)
1428 Hints.push_back(Elt: Order[I]);
1429 }
1430
1431 return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
1432 VRM);
1433}
1434
1435unsigned AArch64RegisterInfo::getLocalAddressRegister(
1436 const MachineFunction &MF) const {
1437 const auto &MFI = MF.getFrameInfo();
1438 if (!MF.hasEHFunclets() && !MFI.hasVarSizedObjects())
1439 return AArch64::SP;
1440 else if (hasStackRealignment(MF))
1441 return getBaseRegister();
1442 return getFrameRegister(MF);
1443}
1444
1445/// SrcRC and DstRC will be morphed into NewRC if this returns true
1446bool AArch64RegisterInfo::shouldCoalesce(
1447 MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
1448 const TargetRegisterClass *DstRC, unsigned DstSubReg,
1449 const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
1450 MachineFunction &MF = *MI->getMF();
1451 MachineRegisterInfo &MRI = MF.getRegInfo();
1452
1453 if (MI->isSubregToReg() && MRI.subRegLivenessEnabled() &&
1454 !MF.getSubtarget<AArch64Subtarget>().enableSRLTSubregToRegMitigation())
1455 return false;
1456
1457 if (MI->isCopy() &&
1458 ((DstRC->getID() == AArch64::GPR64RegClassID) ||
1459 (DstRC->getID() == AArch64::GPR64commonRegClassID)) &&
1460 MI->getOperand(i: 0).getSubReg() && MI->getOperand(i: 1).getSubReg())
1461 // Do not coalesce in the case of a 32-bit subregister copy
1462 // which implements a 32 to 64 bit zero extension
1463 // which relies on the upper 32 bits being zeroed.
1464 return false;
1465
1466 auto IsCoalescerBarrier = [](const MachineInstr &MI) {
1467 switch (MI.getOpcode()) {
1468 case AArch64::COALESCER_BARRIER_FPR16:
1469 case AArch64::COALESCER_BARRIER_FPR32:
1470 case AArch64::COALESCER_BARRIER_FPR64:
1471 case AArch64::COALESCER_BARRIER_FPR128:
1472 return true;
1473 default:
1474 return false;
1475 }
1476 };
1477
1478 // For calls that temporarily have to toggle streaming mode as part of the
1479 // call-sequence, we need to be more careful when coalescing copy instructions
1480 // so that we don't end up coalescing the NEON/FP result or argument register
1481 // with a whole Z-register, such that after coalescing the register allocator
1482 // will try to spill/reload the entire Z register.
1483 //
1484 // We do this by checking if the node has any defs/uses that are
1485 // COALESCER_BARRIER pseudos. These are 'nops' in practice, but they exist to
1486 // instruct the coalescer to avoid coalescing the copy.
1487 if (MI->isCopy() && SubReg != DstSubReg &&
1488 (AArch64::ZPRRegClass.hasSubClassEq(RC: DstRC) ||
1489 AArch64::ZPRRegClass.hasSubClassEq(RC: SrcRC))) {
1490 unsigned SrcReg = MI->getOperand(i: 1).getReg();
1491 if (any_of(Range: MRI.def_instructions(Reg: SrcReg), P: IsCoalescerBarrier))
1492 return false;
1493 unsigned DstReg = MI->getOperand(i: 0).getReg();
1494 if (any_of(Range: MRI.use_nodbg_instructions(Reg: DstReg), P: IsCoalescerBarrier))
1495 return false;
1496 }
1497
1498 return true;
1499}
1500
1501bool AArch64RegisterInfo::shouldAnalyzePhysregInMachineLoopInfo(
1502 MCRegister R) const {
1503 return R == AArch64::VG;
1504}
1505
1506bool AArch64RegisterInfo::isIgnoredCVReg(MCRegister LLVMReg) const {
1507 return (LLVMReg >= AArch64::Z0 && LLVMReg <= AArch64::Z31) ||
1508 (LLVMReg >= AArch64::P0 && LLVMReg <= AArch64::P15);
1509}
1510