| 1 | //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains the X86 implementation of the TargetRegisterInfo class. |
| 10 | // This file is responsible for the frame pointer elimination optimization |
| 11 | // on X86. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "X86RegisterInfo.h" |
| 16 | #include "X86FrameLowering.h" |
| 17 | #include "X86MachineFunctionInfo.h" |
| 18 | #include "X86Subtarget.h" |
| 19 | #include "llvm/ADT/BitVector.h" |
| 20 | #include "llvm/ADT/STLExtras.h" |
| 21 | #include "llvm/ADT/SmallSet.h" |
| 22 | #include "llvm/CodeGen/LiveRegMatrix.h" |
| 23 | #include "llvm/CodeGen/MachineFrameInfo.h" |
| 24 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 25 | #include "llvm/CodeGen/RegisterScavenging.h" |
| 26 | #include "llvm/CodeGen/TargetFrameLowering.h" |
| 27 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 28 | #include "llvm/CodeGen/TileShapeInfo.h" |
| 29 | #include "llvm/CodeGen/VirtRegMap.h" |
| 30 | #include "llvm/IR/Function.h" |
| 31 | #include "llvm/IR/Type.h" |
| 32 | #include "llvm/MC/MCContext.h" |
| 33 | #include "llvm/Support/CommandLine.h" |
| 34 | #include "llvm/Support/ErrorHandling.h" |
| 35 | #include "llvm/Target/TargetMachine.h" |
| 36 | #include "llvm/Target/TargetOptions.h" |
| 37 | |
| 38 | using namespace llvm; |
| 39 | |
| 40 | #define GET_REGINFO_TARGET_DESC |
| 41 | #include "X86GenRegisterInfo.inc" |
| 42 | |
| 43 | static cl::opt<bool> |
| 44 | EnableBasePointer("x86-use-base-pointer" , cl::Hidden, cl::init(Val: true), |
| 45 | cl::desc("Enable use of a base pointer for complex stack frames" )); |
| 46 | |
| 47 | static cl::opt<bool> |
| 48 | DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd" , cl::Hidden, |
| 49 | cl::init(Val: false), |
| 50 | cl::desc("Disable two address hints for register " |
| 51 | "allocation" )); |
| 52 | |
| 53 | extern cl::opt<bool> X86EnableAPXForRelocation; |
| 54 | |
| 55 | X86RegisterInfo::X86RegisterInfo(const Triple &TT) |
| 56 | : X86GenRegisterInfo((TT.isX86_64() ? X86::RIP : X86::EIP), |
| 57 | X86_MC::getDwarfRegFlavour(TT, isEH: false), |
| 58 | X86_MC::getDwarfRegFlavour(TT, isEH: true), |
| 59 | (TT.isX86_64() ? X86::RIP : X86::EIP)) { |
| 60 | X86_MC::initLLVMToSEHAndCVRegMapping(MRI: this); |
| 61 | |
| 62 | // Cache some information. |
| 63 | Is64Bit = TT.isX86_64(); |
| 64 | IsTarget64BitLP64 = Is64Bit && !TT.isX32(); |
| 65 | IsWin64 = Is64Bit && TT.isOSWindows(); |
| 66 | IsUEFI64 = Is64Bit && TT.isUEFI(); |
| 67 | |
| 68 | // Use a callee-saved register as the base pointer. These registers must |
| 69 | // not conflict with any ABI requirements. For example, in 32-bit mode PIC |
| 70 | // requires GOT in the EBX register before function calls via PLT GOT pointer. |
| 71 | if (Is64Bit) { |
| 72 | SlotSize = 8; |
| 73 | // This matches the simplified 32-bit pointer code in the data layout |
| 74 | // computation. |
| 75 | // FIXME: Should use the data layout? |
| 76 | bool Use64BitReg = !TT.isX32(); |
| 77 | StackPtr = Use64BitReg ? X86::RSP : X86::ESP; |
| 78 | FramePtr = Use64BitReg ? X86::RBP : X86::EBP; |
| 79 | BasePtr = Use64BitReg ? X86::RBX : X86::EBX; |
| 80 | } else { |
| 81 | SlotSize = 4; |
| 82 | StackPtr = X86::ESP; |
| 83 | FramePtr = X86::EBP; |
| 84 | BasePtr = X86::ESI; |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | const TargetRegisterClass * |
| 89 | X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, |
| 90 | unsigned Idx) const { |
| 91 | // The sub_8bit sub-register index is more constrained in 32-bit mode. |
| 92 | // It behaves just like the sub_8bit_hi index. |
| 93 | if (!Is64Bit && Idx == X86::sub_8bit) |
| 94 | Idx = X86::sub_8bit_hi; |
| 95 | |
| 96 | // Forward to TableGen's default version. |
| 97 | return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx); |
| 98 | } |
| 99 | |
| 100 | const TargetRegisterClass * |
| 101 | X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, |
| 102 | const TargetRegisterClass *B, |
| 103 | unsigned SubIdx) const { |
| 104 | // The sub_8bit sub-register index is more constrained in 32-bit mode. |
| 105 | if (!Is64Bit && SubIdx == X86::sub_8bit) { |
| 106 | A = X86GenRegisterInfo::getSubClassWithSubReg(RC: A, Idx: X86::sub_8bit_hi); |
| 107 | if (!A) |
| 108 | return nullptr; |
| 109 | } |
| 110 | return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, Idx: SubIdx); |
| 111 | } |
| 112 | |
| 113 | const TargetRegisterClass * |
| 114 | X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, |
| 115 | const MachineFunction &MF) const { |
| 116 | // Don't allow super-classes of GR8_NOREX. This class is only used after |
| 117 | // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied |
| 118 | // to the full GR8 register class in 64-bit mode, so we cannot allow the |
| 119 | // reigster class inflation. |
| 120 | // |
| 121 | // The GR8_NOREX class is always used in a way that won't be constrained to a |
| 122 | // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the |
| 123 | // full GR8 class. |
| 124 | if (RC == &X86::GR8_NOREXRegClass) |
| 125 | return RC; |
| 126 | |
| 127 | // Keep using non-rex2 register class when APX feature (EGPR/NDD/NF) is not |
| 128 | // enabled for relocation. |
| 129 | if (!X86EnableAPXForRelocation && isNonRex2RegClass(RC)) |
| 130 | return RC; |
| 131 | |
| 132 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
| 133 | |
| 134 | const TargetRegisterClass *Super = RC; |
| 135 | auto I = RC->superclasses().begin(); |
| 136 | auto E = RC->superclasses().end(); |
| 137 | do { |
| 138 | switch (Super->getID()) { |
| 139 | case X86::FR32RegClassID: |
| 140 | case X86::FR64RegClassID: |
| 141 | // If AVX-512 isn't supported we should only inflate to these classes. |
| 142 | if (!Subtarget.hasAVX512() && |
| 143 | getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC)) |
| 144 | return Super; |
| 145 | break; |
| 146 | case X86::VR128RegClassID: |
| 147 | case X86::VR256RegClassID: |
| 148 | // If VLX isn't supported we should only inflate to these classes. |
| 149 | if (!Subtarget.hasVLX() && |
| 150 | getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC)) |
| 151 | return Super; |
| 152 | break; |
| 153 | case X86::VR128XRegClassID: |
| 154 | case X86::VR256XRegClassID: |
| 155 | // If VLX isn't support we shouldn't inflate to these classes. |
| 156 | if (Subtarget.hasVLX() && |
| 157 | getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC)) |
| 158 | return Super; |
| 159 | break; |
| 160 | case X86::FR32XRegClassID: |
| 161 | case X86::FR64XRegClassID: |
| 162 | // If AVX-512 isn't support we shouldn't inflate to these classes. |
| 163 | if (Subtarget.hasAVX512() && |
| 164 | getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC)) |
| 165 | return Super; |
| 166 | break; |
| 167 | case X86::GR8RegClassID: |
| 168 | case X86::GR16RegClassID: |
| 169 | case X86::GR32RegClassID: |
| 170 | case X86::GR64RegClassID: |
| 171 | case X86::GR8_NOREX2RegClassID: |
| 172 | case X86::GR16_NOREX2RegClassID: |
| 173 | case X86::GR32_NOREX2RegClassID: |
| 174 | case X86::GR64_NOREX2RegClassID: |
| 175 | case X86::RFP32RegClassID: |
| 176 | case X86::RFP64RegClassID: |
| 177 | case X86::RFP80RegClassID: |
| 178 | case X86::VR512_0_15RegClassID: |
| 179 | case X86::VR512RegClassID: |
| 180 | // Don't return a super-class that would shrink the spill size. |
| 181 | // That can happen with the vector and float classes. |
| 182 | if (getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC)) |
| 183 | return Super; |
| 184 | } |
| 185 | if (I != E) { |
| 186 | Super = getRegClass(i: *I); |
| 187 | ++I; |
| 188 | } else { |
| 189 | Super = nullptr; |
| 190 | } |
| 191 | } while (Super); |
| 192 | return RC; |
| 193 | } |
| 194 | |
| 195 | const TargetRegisterClass * |
| 196 | X86RegisterInfo::getPointerRegClass(unsigned Kind) const { |
| 197 | assert(Kind == 0 && "this should only be used for default cases" ); |
| 198 | if (IsTarget64BitLP64) |
| 199 | return &X86::GR64RegClass; |
| 200 | // If the target is 64bit but we have been told to use 32bit addresses, |
| 201 | // we can still use 64-bit register as long as we know the high bits |
| 202 | // are zeros. |
| 203 | // Reflect that in the returned register class. |
| 204 | return Is64Bit ? &X86::LOW32_ADDR_ACCESSRegClass : &X86::GR32RegClass; |
| 205 | } |
| 206 | |
| 207 | const TargetRegisterClass * |
| 208 | X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { |
| 209 | if (RC == &X86::CCRRegClass) { |
| 210 | if (Is64Bit) |
| 211 | return &X86::GR64RegClass; |
| 212 | else |
| 213 | return &X86::GR32RegClass; |
| 214 | } |
| 215 | return RC; |
| 216 | } |
| 217 | |
| 218 | unsigned |
| 219 | X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, |
| 220 | MachineFunction &MF) const { |
| 221 | const X86FrameLowering *TFI = getFrameLowering(MF); |
| 222 | |
| 223 | unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; |
| 224 | switch (RC->getID()) { |
| 225 | default: |
| 226 | return 0; |
| 227 | case X86::GR32RegClassID: |
| 228 | return 4 - FPDiff; |
| 229 | case X86::GR64RegClassID: |
| 230 | return 12 - FPDiff; |
| 231 | case X86::VR128RegClassID: |
| 232 | return Is64Bit ? 10 : 4; |
| 233 | case X86::VR64RegClassID: |
| 234 | return 4; |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | const MCPhysReg * |
| 239 | X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { |
| 240 | assert(MF && "MachineFunction required" ); |
| 241 | |
| 242 | const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>(); |
| 243 | const Function &F = MF->getFunction(); |
| 244 | bool HasSSE = Subtarget.hasSSE1(); |
| 245 | bool HasAVX = Subtarget.hasAVX(); |
| 246 | bool HasAVX512 = Subtarget.hasAVX512(); |
| 247 | bool CallsEHReturn = MF->callsEHReturn(); |
| 248 | |
| 249 | CallingConv::ID CC = F.getCallingConv(); |
| 250 | |
| 251 | // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling |
| 252 | // convention because it has the CSR list. |
| 253 | if (MF->getFunction().hasFnAttribute(Kind: "no_caller_saved_registers" )) |
| 254 | CC = CallingConv::X86_INTR; |
| 255 | |
| 256 | // If atribute specified, override the CSRs normally specified by the |
| 257 | // calling convention and use the empty set instead. |
| 258 | if (MF->getFunction().hasFnAttribute(Kind: "no_callee_saved_registers" )) |
| 259 | return CSR_NoRegs_SaveList; |
| 260 | |
| 261 | switch (CC) { |
| 262 | case CallingConv::GHC: |
| 263 | case CallingConv::HiPE: |
| 264 | return CSR_NoRegs_SaveList; |
| 265 | case CallingConv::AnyReg: |
| 266 | if (HasAVX) |
| 267 | return CSR_64_AllRegs_AVX_SaveList; |
| 268 | return CSR_64_AllRegs_SaveList; |
| 269 | case CallingConv::PreserveMost: |
| 270 | return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList |
| 271 | : CSR_64_RT_MostRegs_SaveList; |
| 272 | case CallingConv::PreserveAll: |
| 273 | if (HasAVX) |
| 274 | return CSR_64_RT_AllRegs_AVX_SaveList; |
| 275 | return CSR_64_RT_AllRegs_SaveList; |
| 276 | case CallingConv::PreserveNone: |
| 277 | return CSR_64_NoneRegs_SaveList; |
| 278 | case CallingConv::CXX_FAST_TLS: |
| 279 | if (Is64Bit) |
| 280 | return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ? |
| 281 | CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList; |
| 282 | break; |
| 283 | case CallingConv::Intel_OCL_BI: { |
| 284 | if (HasAVX512 && IsWin64) |
| 285 | return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; |
| 286 | if (HasAVX512 && Is64Bit) |
| 287 | return CSR_64_Intel_OCL_BI_AVX512_SaveList; |
| 288 | if (HasAVX && IsWin64) |
| 289 | return CSR_Win64_Intel_OCL_BI_AVX_SaveList; |
| 290 | if (HasAVX && Is64Bit) |
| 291 | return CSR_64_Intel_OCL_BI_AVX_SaveList; |
| 292 | if (!HasAVX && !IsWin64 && Is64Bit) |
| 293 | return CSR_64_Intel_OCL_BI_SaveList; |
| 294 | break; |
| 295 | } |
| 296 | case CallingConv::X86_RegCall: |
| 297 | if (Is64Bit) { |
| 298 | if (IsWin64) { |
| 299 | return (HasSSE ? CSR_Win64_RegCall_SaveList : |
| 300 | CSR_Win64_RegCall_NoSSE_SaveList); |
| 301 | } else { |
| 302 | return (HasSSE ? CSR_SysV64_RegCall_SaveList : |
| 303 | CSR_SysV64_RegCall_NoSSE_SaveList); |
| 304 | } |
| 305 | } else { |
| 306 | return (HasSSE ? CSR_32_RegCall_SaveList : |
| 307 | CSR_32_RegCall_NoSSE_SaveList); |
| 308 | } |
| 309 | case CallingConv::CFGuard_Check: |
| 310 | assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86" ); |
| 311 | return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList |
| 312 | : CSR_Win32_CFGuard_Check_NoSSE_SaveList); |
| 313 | case CallingConv::Cold: |
| 314 | if (Is64Bit) |
| 315 | return CSR_64_MostRegs_SaveList; |
| 316 | break; |
| 317 | case CallingConv::Win64: |
| 318 | if (!HasSSE) |
| 319 | return CSR_Win64_NoSSE_SaveList; |
| 320 | return CSR_Win64_SaveList; |
| 321 | case CallingConv::SwiftTail: |
| 322 | if (!Is64Bit) |
| 323 | return CSR_32_SaveList; |
| 324 | return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList; |
| 325 | case CallingConv::X86_64_SysV: |
| 326 | if (CallsEHReturn) |
| 327 | return CSR_64EHRet_SaveList; |
| 328 | return CSR_64_SaveList; |
| 329 | case CallingConv::X86_INTR: |
| 330 | if (Is64Bit) { |
| 331 | if (HasAVX512) |
| 332 | return CSR_64_AllRegs_AVX512_SaveList; |
| 333 | if (HasAVX) |
| 334 | return CSR_64_AllRegs_AVX_SaveList; |
| 335 | if (HasSSE) |
| 336 | return CSR_64_AllRegs_SaveList; |
| 337 | return CSR_64_AllRegs_NoSSE_SaveList; |
| 338 | } else { |
| 339 | if (HasAVX512) |
| 340 | return CSR_32_AllRegs_AVX512_SaveList; |
| 341 | if (HasAVX) |
| 342 | return CSR_32_AllRegs_AVX_SaveList; |
| 343 | if (HasSSE) |
| 344 | return CSR_32_AllRegs_SSE_SaveList; |
| 345 | return CSR_32_AllRegs_SaveList; |
| 346 | } |
| 347 | default: |
| 348 | break; |
| 349 | } |
| 350 | |
| 351 | if (Is64Bit) { |
| 352 | bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && |
| 353 | F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError); |
| 354 | if (IsSwiftCC) |
| 355 | return IsWin64 ? CSR_Win64_SwiftError_SaveList |
| 356 | : CSR_64_SwiftError_SaveList; |
| 357 | |
| 358 | if (IsWin64 || IsUEFI64) |
| 359 | return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList; |
| 360 | if (CallsEHReturn) |
| 361 | return CSR_64EHRet_SaveList; |
| 362 | return CSR_64_SaveList; |
| 363 | } |
| 364 | |
| 365 | return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList; |
| 366 | } |
| 367 | |
| 368 | const MCPhysReg * |
| 369 | X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const { |
| 370 | return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList; |
| 371 | } |
| 372 | |
| 373 | const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( |
| 374 | const MachineFunction *MF) const { |
| 375 | assert(MF && "Invalid MachineFunction pointer." ); |
| 376 | if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && |
| 377 | MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR()) |
| 378 | return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; |
| 379 | return nullptr; |
| 380 | } |
| 381 | |
| 382 | const uint32_t * |
| 383 | X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, |
| 384 | CallingConv::ID CC) const { |
| 385 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
| 386 | bool HasSSE = Subtarget.hasSSE1(); |
| 387 | bool HasAVX = Subtarget.hasAVX(); |
| 388 | bool HasAVX512 = Subtarget.hasAVX512(); |
| 389 | |
| 390 | switch (CC) { |
| 391 | case CallingConv::GHC: |
| 392 | case CallingConv::HiPE: |
| 393 | return CSR_NoRegs_RegMask; |
| 394 | case CallingConv::AnyReg: |
| 395 | if (HasAVX) |
| 396 | return CSR_64_AllRegs_AVX_RegMask; |
| 397 | return CSR_64_AllRegs_RegMask; |
| 398 | case CallingConv::PreserveMost: |
| 399 | return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask; |
| 400 | case CallingConv::PreserveAll: |
| 401 | if (HasAVX) |
| 402 | return CSR_64_RT_AllRegs_AVX_RegMask; |
| 403 | return CSR_64_RT_AllRegs_RegMask; |
| 404 | case CallingConv::PreserveNone: |
| 405 | return CSR_64_NoneRegs_RegMask; |
| 406 | case CallingConv::CXX_FAST_TLS: |
| 407 | if (Is64Bit) |
| 408 | return CSR_64_TLS_Darwin_RegMask; |
| 409 | break; |
| 410 | case CallingConv::Intel_OCL_BI: { |
| 411 | if (HasAVX512 && IsWin64) |
| 412 | return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; |
| 413 | if (HasAVX512 && Is64Bit) |
| 414 | return CSR_64_Intel_OCL_BI_AVX512_RegMask; |
| 415 | if (HasAVX && IsWin64) |
| 416 | return CSR_Win64_Intel_OCL_BI_AVX_RegMask; |
| 417 | if (HasAVX && Is64Bit) |
| 418 | return CSR_64_Intel_OCL_BI_AVX_RegMask; |
| 419 | if (!HasAVX && !IsWin64 && Is64Bit) |
| 420 | return CSR_64_Intel_OCL_BI_RegMask; |
| 421 | break; |
| 422 | } |
| 423 | case CallingConv::X86_RegCall: |
| 424 | if (Is64Bit) { |
| 425 | if (IsWin64) { |
| 426 | return (HasSSE ? CSR_Win64_RegCall_RegMask : |
| 427 | CSR_Win64_RegCall_NoSSE_RegMask); |
| 428 | } else { |
| 429 | return (HasSSE ? CSR_SysV64_RegCall_RegMask : |
| 430 | CSR_SysV64_RegCall_NoSSE_RegMask); |
| 431 | } |
| 432 | } else { |
| 433 | return (HasSSE ? CSR_32_RegCall_RegMask : |
| 434 | CSR_32_RegCall_NoSSE_RegMask); |
| 435 | } |
| 436 | case CallingConv::CFGuard_Check: |
| 437 | assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86" ); |
| 438 | return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask |
| 439 | : CSR_Win32_CFGuard_Check_NoSSE_RegMask); |
| 440 | case CallingConv::Cold: |
| 441 | if (Is64Bit) |
| 442 | return CSR_64_MostRegs_RegMask; |
| 443 | break; |
| 444 | case CallingConv::Win64: |
| 445 | return CSR_Win64_RegMask; |
| 446 | case CallingConv::SwiftTail: |
| 447 | if (!Is64Bit) |
| 448 | return CSR_32_RegMask; |
| 449 | return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask; |
| 450 | case CallingConv::X86_64_SysV: |
| 451 | return CSR_64_RegMask; |
| 452 | case CallingConv::X86_INTR: |
| 453 | if (Is64Bit) { |
| 454 | if (HasAVX512) |
| 455 | return CSR_64_AllRegs_AVX512_RegMask; |
| 456 | if (HasAVX) |
| 457 | return CSR_64_AllRegs_AVX_RegMask; |
| 458 | if (HasSSE) |
| 459 | return CSR_64_AllRegs_RegMask; |
| 460 | return CSR_64_AllRegs_NoSSE_RegMask; |
| 461 | } else { |
| 462 | if (HasAVX512) |
| 463 | return CSR_32_AllRegs_AVX512_RegMask; |
| 464 | if (HasAVX) |
| 465 | return CSR_32_AllRegs_AVX_RegMask; |
| 466 | if (HasSSE) |
| 467 | return CSR_32_AllRegs_SSE_RegMask; |
| 468 | return CSR_32_AllRegs_RegMask; |
| 469 | } |
| 470 | default: |
| 471 | break; |
| 472 | } |
| 473 | |
| 474 | // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check |
| 475 | // callsEHReturn(). |
| 476 | if (Is64Bit) { |
| 477 | const Function &F = MF.getFunction(); |
| 478 | bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && |
| 479 | F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError); |
| 480 | if (IsSwiftCC) |
| 481 | return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask; |
| 482 | |
| 483 | return (IsWin64 || IsUEFI64) ? CSR_Win64_RegMask : CSR_64_RegMask; |
| 484 | } |
| 485 | |
| 486 | return CSR_32_RegMask; |
| 487 | } |
| 488 | |
| 489 | const uint32_t* |
| 490 | X86RegisterInfo::getNoPreservedMask() const { |
| 491 | return CSR_NoRegs_RegMask; |
| 492 | } |
| 493 | |
| 494 | const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const { |
| 495 | return CSR_64_TLS_Darwin_RegMask; |
| 496 | } |
| 497 | |
| 498 | BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { |
| 499 | BitVector Reserved(getNumRegs()); |
| 500 | const X86FrameLowering *TFI = getFrameLowering(MF); |
| 501 | |
| 502 | // Set the floating point control register as reserved. |
| 503 | Reserved.set(X86::FPCW); |
| 504 | |
| 505 | // Set the floating point status register as reserved. |
| 506 | Reserved.set(X86::FPSW); |
| 507 | |
| 508 | // Set the SIMD floating point control register as reserved. |
| 509 | Reserved.set(X86::MXCSR); |
| 510 | |
| 511 | // Set the stack-pointer register and its aliases as reserved. |
| 512 | for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RSP)) |
| 513 | Reserved.set(SubReg); |
| 514 | |
| 515 | // Set the Shadow Stack Pointer as reserved. |
| 516 | Reserved.set(X86::SSP); |
| 517 | |
| 518 | auto &ST = MF.getSubtarget<X86Subtarget>(); |
| 519 | if (ST.is64Bit() && ST.hasUserReservedRegisters()) { |
| 520 | // Set r# as reserved register if user required |
| 521 | for (unsigned Reg = X86::R8; Reg <= X86::R15; ++Reg) |
| 522 | if (ST.isRegisterReservedByUser(i: Reg)) |
| 523 | for (const MCPhysReg &SubReg : subregs_inclusive(Reg)) |
| 524 | Reserved.set(SubReg); |
| 525 | if (ST.hasEGPR()) |
| 526 | for (unsigned Reg = X86::R16; Reg <= X86::R31; ++Reg) |
| 527 | if (ST.isRegisterReservedByUser(i: Reg)) |
| 528 | for (const MCPhysReg &SubReg : subregs_inclusive(Reg)) |
| 529 | Reserved.set(SubReg); |
| 530 | } |
| 531 | |
| 532 | // Set the instruction pointer register and its aliases as reserved. |
| 533 | for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RIP)) |
| 534 | Reserved.set(SubReg); |
| 535 | |
| 536 | // Set the frame-pointer register and its aliases as reserved if needed. |
| 537 | if (TFI->hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF)) { |
| 538 | if (MF.getInfo<X86MachineFunctionInfo>()->getFPClobberedByInvoke()) |
| 539 | MF.getContext().reportError( |
| 540 | L: SMLoc(), |
| 541 | Msg: "Frame pointer clobbered by function invoke is not supported." ); |
| 542 | |
| 543 | for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RBP)) |
| 544 | Reserved.set(SubReg); |
| 545 | } |
| 546 | |
| 547 | // Set the base-pointer register and its aliases as reserved if needed. |
| 548 | if (hasBasePointer(MF)) { |
| 549 | if (MF.getInfo<X86MachineFunctionInfo>()->getBPClobberedByInvoke()) |
| 550 | MF.getContext().reportError(L: SMLoc(), |
| 551 | Msg: "Stack realignment in presence of dynamic " |
| 552 | "allocas is not supported with " |
| 553 | "this calling convention." ); |
| 554 | |
| 555 | Register BasePtr = getX86SubSuperRegister(Reg: getBaseRegister(), Size: 64); |
| 556 | for (const MCPhysReg &SubReg : subregs_inclusive(Reg: BasePtr)) |
| 557 | Reserved.set(SubReg); |
| 558 | } |
| 559 | |
| 560 | // Mark the segment registers as reserved. |
| 561 | Reserved.set(X86::CS); |
| 562 | Reserved.set(X86::SS); |
| 563 | Reserved.set(X86::DS); |
| 564 | Reserved.set(X86::ES); |
| 565 | Reserved.set(X86::FS); |
| 566 | Reserved.set(X86::GS); |
| 567 | |
| 568 | // Mark the floating point stack registers as reserved. |
| 569 | for (unsigned n = 0; n != 8; ++n) |
| 570 | Reserved.set(X86::ST0 + n); |
| 571 | |
| 572 | // Reserve the registers that only exist in 64-bit mode. |
| 573 | if (!Is64Bit) { |
| 574 | // These 8-bit registers are part of the x86-64 extension even though their |
| 575 | // super-registers are old 32-bits. |
| 576 | Reserved.set(X86::SIL); |
| 577 | Reserved.set(X86::DIL); |
| 578 | Reserved.set(X86::BPL); |
| 579 | Reserved.set(X86::SPL); |
| 580 | Reserved.set(X86::SIH); |
| 581 | Reserved.set(X86::DIH); |
| 582 | Reserved.set(X86::BPH); |
| 583 | Reserved.set(X86::SPH); |
| 584 | |
| 585 | for (unsigned n = 0; n != 8; ++n) { |
| 586 | // R8, R9, ... |
| 587 | for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI) |
| 588 | Reserved.set(*AI); |
| 589 | |
| 590 | // XMM8, XMM9, ... |
| 591 | for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI) |
| 592 | Reserved.set(*AI); |
| 593 | } |
| 594 | } |
| 595 | if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) { |
| 596 | for (unsigned n = 0; n != 16; ++n) { |
| 597 | for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid(); |
| 598 | ++AI) |
| 599 | Reserved.set(*AI); |
| 600 | } |
| 601 | } |
| 602 | |
| 603 | // Reserve the extended general purpose registers. |
| 604 | if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR()) |
| 605 | Reserved.set(I: X86::R16, E: X86::R31WH + 1); |
| 606 | |
| 607 | if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { |
| 608 | for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI) |
| 609 | Reserved.set(*AI); |
| 610 | for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI) |
| 611 | Reserved.set(*AI); |
| 612 | } |
| 613 | |
| 614 | assert(checkAllSuperRegsMarked(Reserved, |
| 615 | {X86::SIL, X86::DIL, X86::BPL, X86::SPL, |
| 616 | X86::SIH, X86::DIH, X86::BPH, X86::SPH})); |
| 617 | return Reserved; |
| 618 | } |
| 619 | |
| 620 | unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const { |
| 621 | // All existing Intel CPUs that support AMX support AVX512 and all existing |
| 622 | // Intel CPUs that support APX support AMX. AVX512 implies AVX. |
| 623 | // |
| 624 | // We enumerate the registers in X86GenRegisterInfo.inc in this order: |
| 625 | // |
| 626 | // Registers before AVX512, |
| 627 | // AVX512 registers (X/YMM16-31, ZMM0-31, K registers) |
| 628 | // AMX registers (TMM) |
| 629 | // APX registers (R16-R31) |
| 630 | // |
| 631 | // and try to return the minimum number of registers supported by the target. |
| 632 | static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) && |
| 633 | (X86::K6_K7 + 1 == X86::TMMCFG) && |
| 634 | (X86::TMM7 + 1 == X86::R16) && |
| 635 | (X86::R31WH + 1 == X86::NUM_TARGET_REGS), |
| 636 | "Register number may be incorrect" ); |
| 637 | |
| 638 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
| 639 | if (ST.hasEGPR()) |
| 640 | return X86::NUM_TARGET_REGS; |
| 641 | if (ST.hasAMXTILE()) |
| 642 | return X86::TMM7 + 1; |
| 643 | if (ST.hasAVX512()) |
| 644 | return X86::K6_K7 + 1; |
| 645 | if (ST.hasAVX()) |
| 646 | return X86::YMM15 + 1; |
| 647 | return X86::R15WH + 1; |
| 648 | } |
| 649 | |
| 650 | bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF, |
| 651 | MCRegister Reg) const { |
| 652 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
| 653 | const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); |
| 654 | auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) { |
| 655 | return TRI.isSuperOrSubRegisterEq(RegA, RegB); |
| 656 | }; |
| 657 | |
| 658 | if (!ST.is64Bit()) |
| 659 | return llvm::any_of( |
| 660 | Range: SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX}, |
| 661 | P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) || |
| 662 | (ST.hasMMX() && X86::VR64RegClass.contains(Reg)); |
| 663 | |
| 664 | CallingConv::ID CC = MF.getFunction().getCallingConv(); |
| 665 | |
| 666 | if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg)) |
| 667 | return true; |
| 668 | |
| 669 | if (llvm::any_of( |
| 670 | Range: SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9}, |
| 671 | P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) |
| 672 | return true; |
| 673 | |
| 674 | if (CC != CallingConv::Win64 && |
| 675 | llvm::any_of(Range: SmallVector<MCRegister>{X86::RDI, X86::RSI}, |
| 676 | P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) |
| 677 | return true; |
| 678 | |
| 679 | if (ST.hasSSE1() && |
| 680 | llvm::any_of(Range: SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2, |
| 681 | X86::XMM3, X86::XMM4, X86::XMM5, |
| 682 | X86::XMM6, X86::XMM7}, |
| 683 | P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) |
| 684 | return true; |
| 685 | |
| 686 | return X86GenRegisterInfo::isArgumentRegister(MF, PhysReg: Reg); |
| 687 | } |
| 688 | |
| 689 | bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF, |
| 690 | MCRegister PhysReg) const { |
| 691 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
| 692 | const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); |
| 693 | |
| 694 | // Stack pointer. |
| 695 | if (TRI.isSuperOrSubRegisterEq(RegA: X86::RSP, RegB: PhysReg)) |
| 696 | return true; |
| 697 | |
| 698 | // Don't use the frame pointer if it's being used. |
| 699 | const X86FrameLowering &TFI = *getFrameLowering(MF); |
| 700 | if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(RegA: X86::RBP, RegB: PhysReg)) |
| 701 | return true; |
| 702 | |
| 703 | return X86GenRegisterInfo::isFixedRegister(MF, PhysReg); |
| 704 | } |
| 705 | |
| 706 | bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const { |
| 707 | return RC->getID() == X86::TILERegClassID; |
| 708 | } |
| 709 | |
| 710 | void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { |
| 711 | // Check if the EFLAGS register is marked as live-out. This shouldn't happen, |
| 712 | // because the calling convention defines the EFLAGS register as NOT |
| 713 | // preserved. |
| 714 | // |
| 715 | // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding |
| 716 | // an assert to track this and clear the register afterwards to avoid |
| 717 | // unnecessary crashes during release builds. |
| 718 | assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) && |
| 719 | "EFLAGS are not live-out from a patchpoint." ); |
| 720 | |
| 721 | // Also clean other registers that don't need preserving (IP). |
| 722 | for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP}) |
| 723 | Mask[Reg / 32] &= ~(1U << (Reg % 32)); |
| 724 | } |
| 725 | |
| 726 | //===----------------------------------------------------------------------===// |
| 727 | // Stack Frame Processing methods |
| 728 | //===----------------------------------------------------------------------===// |
| 729 | |
| 730 | static bool CantUseSP(const MachineFrameInfo &MFI) { |
| 731 | return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment(); |
| 732 | } |
| 733 | |
| 734 | bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { |
| 735 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
| 736 | // We have a virtual register to reference argument, and don't need base |
| 737 | // pointer. |
| 738 | if (X86FI->getStackPtrSaveMI() != nullptr) |
| 739 | return false; |
| 740 | |
| 741 | if (X86FI->hasPreallocatedCall()) |
| 742 | return true; |
| 743 | |
| 744 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 745 | |
| 746 | if (!EnableBasePointer) |
| 747 | return false; |
| 748 | |
| 749 | // When we need stack realignment, we can't address the stack from the frame |
| 750 | // pointer. When we have dynamic allocas or stack-adjusting inline asm, we |
| 751 | // can't address variables from the stack pointer. MS inline asm can |
| 752 | // reference locals while also adjusting the stack pointer. When we can't |
| 753 | // use both the SP and the FP, we need a separate base pointer register. |
| 754 | bool CantUseFP = hasStackRealignment(MF); |
| 755 | return CantUseFP && CantUseSP(MFI); |
| 756 | } |
| 757 | |
| 758 | bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { |
| 759 | if (!TargetRegisterInfo::canRealignStack(MF)) |
| 760 | return false; |
| 761 | |
| 762 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 763 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
| 764 | |
| 765 | // Stack realignment requires a frame pointer. If we already started |
| 766 | // register allocation with frame pointer elimination, it is too late now. |
| 767 | if (!MRI->canReserveReg(PhysReg: FramePtr)) |
| 768 | return false; |
| 769 | |
| 770 | // If a base pointer is necessary. Check that it isn't too late to reserve |
| 771 | // it. |
| 772 | if (CantUseSP(MFI)) |
| 773 | return MRI->canReserveReg(PhysReg: BasePtr); |
| 774 | return true; |
| 775 | } |
| 776 | |
| 777 | bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const { |
| 778 | if (TargetRegisterInfo::shouldRealignStack(MF)) |
| 779 | return true; |
| 780 | |
| 781 | return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR; |
| 782 | } |
| 783 | |
| 784 | // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction |
| 785 | // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'. |
| 786 | // TODO: In this case we should be really trying first to entirely eliminate |
| 787 | // this instruction which is a plain copy. |
| 788 | static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { |
| 789 | MachineInstr &MI = *II; |
| 790 | unsigned Opc = II->getOpcode(); |
| 791 | // Check if this is a LEA of the form 'lea (%esp), %ebx' |
| 792 | if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) || |
| 793 | MI.getOperand(i: 2).getImm() != 1 || |
| 794 | MI.getOperand(i: 3).getReg() != X86::NoRegister || |
| 795 | MI.getOperand(i: 4).getImm() != 0 || |
| 796 | MI.getOperand(i: 5).getReg() != X86::NoRegister) |
| 797 | return false; |
| 798 | Register BasePtr = MI.getOperand(i: 1).getReg(); |
| 799 | // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will |
| 800 | // be replaced with a 32-bit operand MOV which will zero extend the upper |
| 801 | // 32-bits of the super register. |
| 802 | if (Opc == X86::LEA64_32r) |
| 803 | BasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 32); |
| 804 | Register NewDestReg = MI.getOperand(i: 0).getReg(); |
| 805 | const X86InstrInfo *TII = |
| 806 | MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo(); |
| 807 | TII->copyPhysReg(MBB&: *MI.getParent(), MI: II, DL: MI.getDebugLoc(), DestReg: NewDestReg, SrcReg: BasePtr, |
| 808 | KillSrc: MI.getOperand(i: 1).isKill()); |
| 809 | MI.eraseFromParent(); |
| 810 | return true; |
| 811 | } |
| 812 | |
| 813 | static bool isFuncletReturnInstr(MachineInstr &MI) { |
| 814 | switch (MI.getOpcode()) { |
| 815 | case X86::CATCHRET: |
| 816 | case X86::CLEANUPRET: |
| 817 | return true; |
| 818 | default: |
| 819 | return false; |
| 820 | } |
| 821 | llvm_unreachable("impossible" ); |
| 822 | } |
| 823 | |
| 824 | void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, |
| 825 | unsigned FIOperandNum, |
| 826 | Register BaseReg, |
| 827 | int FIOffset) const { |
| 828 | MachineInstr &MI = *II; |
| 829 | unsigned Opc = MI.getOpcode(); |
| 830 | if (Opc == TargetOpcode::LOCAL_ESCAPE) { |
| 831 | MachineOperand &FI = MI.getOperand(i: FIOperandNum); |
| 832 | FI.ChangeToImmediate(ImmVal: FIOffset); |
| 833 | return; |
| 834 | } |
| 835 | |
| 836 | MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: BaseReg, isDef: false); |
| 837 | |
| 838 | // The frame index format for stackmaps and patchpoints is different from the |
| 839 | // X86 format. It only has a FI and an offset. |
| 840 | if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { |
| 841 | assert(BasePtr == FramePtr && "Expected the FP as base register" ); |
| 842 | int64_t Offset = MI.getOperand(i: FIOperandNum + 1).getImm() + FIOffset; |
| 843 | MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset); |
| 844 | return; |
| 845 | } |
| 846 | |
| 847 | if (MI.getOperand(i: FIOperandNum + 3).isImm()) { |
| 848 | // Offset is a 32-bit integer. |
| 849 | int Imm = (int)(MI.getOperand(i: FIOperandNum + 3).getImm()); |
| 850 | int Offset = FIOffset + Imm; |
| 851 | assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && |
| 852 | "Requesting 64-bit offset in 32-bit immediate!" ); |
| 853 | if (Offset != 0) |
| 854 | MI.getOperand(i: FIOperandNum + 3).ChangeToImmediate(ImmVal: Offset); |
| 855 | } else { |
| 856 | // Offset is symbolic. This is extremely rare. |
| 857 | uint64_t Offset = |
| 858 | FIOffset + (uint64_t)MI.getOperand(i: FIOperandNum + 3).getOffset(); |
| 859 | MI.getOperand(i: FIOperandNum + 3).setOffset(Offset); |
| 860 | } |
| 861 | } |
| 862 | |
| 863 | bool |
| 864 | X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, |
| 865 | int SPAdj, unsigned FIOperandNum, |
| 866 | RegScavenger *RS) const { |
| 867 | MachineInstr &MI = *II; |
| 868 | MachineBasicBlock &MBB = *MI.getParent(); |
| 869 | MachineFunction &MF = *MBB.getParent(); |
| 870 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
| 871 | bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false |
| 872 | : isFuncletReturnInstr(MI&: *MBBI); |
| 873 | const X86FrameLowering *TFI = getFrameLowering(MF); |
| 874 | int FrameIndex = MI.getOperand(i: FIOperandNum).getIndex(); |
| 875 | |
| 876 | // Determine base register and offset. |
| 877 | int64_t FIOffset; |
| 878 | Register BasePtr; |
| 879 | if (MI.isReturn()) { |
| 880 | assert((!hasStackRealignment(MF) || |
| 881 | MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && |
| 882 | "Return instruction can only reference SP relative frame objects" ); |
| 883 | FIOffset = |
| 884 | TFI->getFrameIndexReferenceSP(MF, FI: FrameIndex, SPReg&: BasePtr, Adjustment: 0).getFixed(); |
| 885 | } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) { |
| 886 | FIOffset = TFI->getWin64EHFrameIndexRef(MF, FI: FrameIndex, SPReg&: BasePtr); |
| 887 | } else { |
| 888 | FIOffset = TFI->getFrameIndexReference(MF, FI: FrameIndex, FrameReg&: BasePtr).getFixed(); |
| 889 | } |
| 890 | |
| 891 | // LOCAL_ESCAPE uses a single offset, with no register. It only works in the |
| 892 | // simple FP case, and doesn't work with stack realignment. On 32-bit, the |
| 893 | // offset is from the traditional base pointer location. On 64-bit, the |
| 894 | // offset is from the SP at the end of the prologue, not the FP location. This |
| 895 | // matches the behavior of llvm.frameaddress. |
| 896 | unsigned Opc = MI.getOpcode(); |
| 897 | if (Opc == TargetOpcode::LOCAL_ESCAPE) { |
| 898 | MachineOperand &FI = MI.getOperand(i: FIOperandNum); |
| 899 | FI.ChangeToImmediate(ImmVal: FIOffset); |
| 900 | return false; |
| 901 | } |
| 902 | |
| 903 | // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit |
| 904 | // register as source operand, semantic is the same and destination is |
| 905 | // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. |
| 906 | // Don't change BasePtr since it is used later for stack adjustment. |
| 907 | Register MachineBasePtr = BasePtr; |
| 908 | if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(Reg: BasePtr)) |
| 909 | MachineBasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 64); |
| 910 | |
| 911 | // This must be part of a four operand memory reference. Replace the |
| 912 | // FrameIndex with base register. Add an offset to the offset. |
| 913 | MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: MachineBasePtr, isDef: false); |
| 914 | |
| 915 | if (BasePtr == StackPtr) |
| 916 | FIOffset += SPAdj; |
| 917 | |
| 918 | // The frame index format for stackmaps and patchpoints is different from the |
| 919 | // X86 format. It only has a FI and an offset. |
| 920 | if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { |
| 921 | assert(BasePtr == FramePtr && "Expected the FP as base register" ); |
| 922 | int64_t Offset = MI.getOperand(i: FIOperandNum + 1).getImm() + FIOffset; |
| 923 | MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset); |
| 924 | return false; |
| 925 | } |
| 926 | |
| 927 | if (MI.getOperand(i: FIOperandNum+3).isImm()) { |
| 928 | const X86InstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); |
| 929 | const DebugLoc &DL = MI.getDebugLoc(); |
| 930 | int64_t Imm = MI.getOperand(i: FIOperandNum + 3).getImm(); |
| 931 | int64_t Offset = FIOffset + Imm; |
| 932 | bool FitsIn32Bits = isInt<32>(x: Offset); |
| 933 | // If the offset will not fit in a 32-bit displacement, then for 64-bit |
| 934 | // targets, scavenge a register to hold it. Otherwise... |
| 935 | if (Is64Bit && !FitsIn32Bits) { |
| 936 | assert(RS && "RegisterScavenger was NULL" ); |
| 937 | |
| 938 | RS->enterBasicBlockEnd(MBB); |
| 939 | RS->backward(I: std::next(x: II)); |
| 940 | |
| 941 | Register ScratchReg = RS->scavengeRegisterBackwards( |
| 942 | RC: X86::GR64RegClass, To: II, /*RestoreAfter=*/false, /*SPAdj=*/0, |
| 943 | /*AllowSpill=*/true); |
| 944 | assert(ScratchReg != 0 && "scratch reg was 0" ); |
| 945 | RS->setRegUsed(Reg: ScratchReg); |
| 946 | |
| 947 | BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: TII->get(Opcode: X86::MOV64ri), DestReg: ScratchReg).addImm(Val: Offset); |
| 948 | |
| 949 | MI.getOperand(i: FIOperandNum + 3).setImm(0); |
| 950 | MI.getOperand(i: FIOperandNum + 2).setReg(ScratchReg); |
| 951 | |
| 952 | return false; |
| 953 | } |
| 954 | |
| 955 | // ... for 32-bit targets, this is a bug! |
| 956 | if (!Is64Bit && !FitsIn32Bits) { |
| 957 | MI.emitGenericError(ErrMsg: "64-bit offset calculated but target is 32-bit" ); |
| 958 | // Trap so that the instruction verification pass does not fail if run. |
| 959 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TRAP)); |
| 960 | return false; |
| 961 | } |
| 962 | |
| 963 | if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) |
| 964 | MI.getOperand(i: FIOperandNum + 3).ChangeToImmediate(ImmVal: Offset); |
| 965 | } else { |
| 966 | // Offset is symbolic. This is extremely rare. |
| 967 | uint64_t Offset = FIOffset + |
| 968 | (uint64_t)MI.getOperand(i: FIOperandNum+3).getOffset(); |
| 969 | MI.getOperand(i: FIOperandNum + 3).setOffset(Offset); |
| 970 | } |
| 971 | return false; |
| 972 | } |
| 973 | |
| 974 | unsigned X86RegisterInfo::findDeadCallerSavedReg( |
| 975 | MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const { |
| 976 | const MachineFunction *MF = MBB.getParent(); |
| 977 | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
| 978 | if (MF->callsEHReturn()) |
| 979 | return 0; |
| 980 | |
| 981 | if (MBBI == MBB.end()) |
| 982 | return 0; |
| 983 | |
| 984 | switch (MBBI->getOpcode()) { |
| 985 | default: |
| 986 | return 0; |
| 987 | case TargetOpcode::PATCHABLE_RET: |
| 988 | case X86::RET: |
| 989 | case X86::RET32: |
| 990 | case X86::RET64: |
| 991 | case X86::RETI32: |
| 992 | case X86::RETI64: |
| 993 | case X86::TCRETURNdi: |
| 994 | case X86::TCRETURNri: |
| 995 | case X86::TCRETURN_WIN64ri: |
| 996 | case X86::TCRETURN_HIPE32ri: |
| 997 | case X86::TCRETURNmi: |
| 998 | case X86::TCRETURNdi64: |
| 999 | case X86::TCRETURNri64: |
| 1000 | case X86::TCRETURNri64_ImpCall: |
| 1001 | case X86::TCRETURNmi64: |
| 1002 | case X86::TCRETURN_WINmi64: |
| 1003 | case X86::EH_RETURN: |
| 1004 | case X86::EH_RETURN64: { |
| 1005 | LiveRegUnits LRU(*this); |
| 1006 | LRU.addLiveOuts(MBB); |
| 1007 | LRU.stepBackward(MI: *MBBI); |
| 1008 | |
| 1009 | const TargetRegisterClass &RC = |
| 1010 | Is64Bit ? X86::GR64_NOSPRegClass : X86::GR32_NOSPRegClass; |
| 1011 | for (MCRegister Reg : RC) { |
| 1012 | if (LRU.available(Reg) && !MRI.isReserved(PhysReg: Reg)) |
| 1013 | return Reg; |
| 1014 | } |
| 1015 | } |
| 1016 | } |
| 1017 | |
| 1018 | return 0; |
| 1019 | } |
| 1020 | |
| 1021 | Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { |
| 1022 | const X86FrameLowering *TFI = getFrameLowering(MF); |
| 1023 | return TFI->hasFP(MF) ? FramePtr : StackPtr; |
| 1024 | } |
| 1025 | |
| 1026 | Register |
| 1027 | X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { |
| 1028 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
| 1029 | Register FrameReg = getFrameRegister(MF); |
| 1030 | if (Subtarget.isTarget64BitILP32()) |
| 1031 | FrameReg = getX86SubSuperRegister(Reg: FrameReg, Size: 32); |
| 1032 | return FrameReg; |
| 1033 | } |
| 1034 | |
| 1035 | Register |
| 1036 | X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const { |
| 1037 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
| 1038 | Register StackReg = getStackRegister(); |
| 1039 | if (Subtarget.isTarget64BitILP32()) |
| 1040 | StackReg = getX86SubSuperRegister(Reg: StackReg, Size: 32); |
| 1041 | return StackReg; |
| 1042 | } |
| 1043 | |
| 1044 | static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM, |
| 1045 | const MachineRegisterInfo *MRI) { |
| 1046 | if (VRM->hasShape(virtReg: VirtReg)) |
| 1047 | return VRM->getShape(virtReg: VirtReg); |
| 1048 | |
| 1049 | const MachineOperand &Def = *MRI->def_begin(RegNo: VirtReg); |
| 1050 | MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent()); |
| 1051 | unsigned OpCode = MI->getOpcode(); |
| 1052 | switch (OpCode) { |
| 1053 | default: |
| 1054 | llvm_unreachable("Unexpected machine instruction on tile register!" ); |
| 1055 | break; |
| 1056 | case X86::COPY: { |
| 1057 | Register SrcReg = MI->getOperand(i: 1).getReg(); |
| 1058 | ShapeT Shape = getTileShape(VirtReg: SrcReg, VRM, MRI); |
| 1059 | VRM->assignVirt2Shape(virtReg: VirtReg, shape: Shape); |
| 1060 | return Shape; |
| 1061 | } |
| 1062 | // We only collect the tile shape that is defined. |
| 1063 | case X86::PTILELOADDV: |
| 1064 | case X86::PTILELOADDT1V: |
| 1065 | case X86::PTDPBSSDV: |
| 1066 | case X86::PTDPBSUDV: |
| 1067 | case X86::PTDPBUSDV: |
| 1068 | case X86::PTDPBUUDV: |
| 1069 | case X86::PTILEZEROV: |
| 1070 | case X86::PTDPBF16PSV: |
| 1071 | case X86::PTDPFP16PSV: |
| 1072 | case X86::PTCMMIMFP16PSV: |
| 1073 | case X86::PTCMMRLFP16PSV: |
| 1074 | case X86::PTILELOADDRSV: |
| 1075 | case X86::PTILELOADDRST1V: |
| 1076 | case X86::PTMMULTF32PSV: |
| 1077 | case X86::PTDPBF8PSV: |
| 1078 | case X86::PTDPBHF8PSV: |
| 1079 | case X86::PTDPHBF8PSV: |
| 1080 | case X86::PTDPHF8PSV: { |
| 1081 | MachineOperand &MO1 = MI->getOperand(i: 1); |
| 1082 | MachineOperand &MO2 = MI->getOperand(i: 2); |
| 1083 | ShapeT Shape(&MO1, &MO2, MRI); |
| 1084 | VRM->assignVirt2Shape(virtReg: VirtReg, shape: Shape); |
| 1085 | return Shape; |
| 1086 | } |
| 1087 | } |
| 1088 | } |
| 1089 | |
| 1090 | bool X86RegisterInfo::getRegAllocationHints(Register VirtReg, |
| 1091 | ArrayRef<MCPhysReg> Order, |
| 1092 | SmallVectorImpl<MCPhysReg> &Hints, |
| 1093 | const MachineFunction &MF, |
| 1094 | const VirtRegMap *VRM, |
| 1095 | const LiveRegMatrix *Matrix) const { |
| 1096 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
| 1097 | const TargetRegisterClass &RC = *MRI->getRegClass(Reg: VirtReg); |
| 1098 | bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( |
| 1099 | VirtReg, Order, Hints, MF, VRM, Matrix); |
| 1100 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
| 1101 | const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); |
| 1102 | |
| 1103 | unsigned ID = RC.getID(); |
| 1104 | |
| 1105 | if (!VRM) |
| 1106 | return BaseImplRetVal; |
| 1107 | |
| 1108 | if (ID != X86::TILERegClassID) { |
| 1109 | if (DisableRegAllocNDDHints || !ST.hasNDD() || |
| 1110 | !TRI.isGeneralPurposeRegisterClass(RC: &RC)) |
| 1111 | return BaseImplRetVal; |
| 1112 | |
| 1113 | // Add any two address hints after any copy hints. |
| 1114 | SmallSet<unsigned, 4> TwoAddrHints; |
| 1115 | |
| 1116 | auto TryAddNDDHint = [&](const MachineOperand &MO) { |
| 1117 | Register Reg = MO.getReg(); |
| 1118 | Register PhysReg = Reg.isPhysical() ? Reg : Register(VRM->getPhys(virtReg: Reg)); |
| 1119 | if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Range&: Hints, Element: PhysReg)) |
| 1120 | TwoAddrHints.insert(V: PhysReg); |
| 1121 | }; |
| 1122 | |
| 1123 | // NDD instructions is compressible when Op0 is allocated to the same |
| 1124 | // physic register as Op1 (or Op2 if it's commutable). |
| 1125 | for (auto &MO : MRI->reg_nodbg_operands(Reg: VirtReg)) { |
| 1126 | const MachineInstr &MI = *MO.getParent(); |
| 1127 | if (!X86::getNonNDVariant(Opc: MI.getOpcode())) |
| 1128 | continue; |
| 1129 | unsigned OpIdx = MI.getOperandNo(I: &MO); |
| 1130 | if (OpIdx == 0) { |
| 1131 | assert(MI.getOperand(1).isReg()); |
| 1132 | TryAddNDDHint(MI.getOperand(i: 1)); |
| 1133 | if (MI.isCommutable()) { |
| 1134 | assert(MI.getOperand(2).isReg()); |
| 1135 | TryAddNDDHint(MI.getOperand(i: 2)); |
| 1136 | } |
| 1137 | } else if (OpIdx == 1) { |
| 1138 | TryAddNDDHint(MI.getOperand(i: 0)); |
| 1139 | } else if (MI.isCommutable() && OpIdx == 2) { |
| 1140 | TryAddNDDHint(MI.getOperand(i: 0)); |
| 1141 | } |
| 1142 | } |
| 1143 | |
| 1144 | for (MCPhysReg OrderReg : Order) |
| 1145 | if (TwoAddrHints.count(V: OrderReg)) |
| 1146 | Hints.push_back(Elt: OrderReg); |
| 1147 | |
| 1148 | return BaseImplRetVal; |
| 1149 | } |
| 1150 | |
| 1151 | ShapeT VirtShape = getTileShape(VirtReg, VRM: const_cast<VirtRegMap *>(VRM), MRI); |
| 1152 | auto AddHint = [&](MCPhysReg PhysReg) { |
| 1153 | Register VReg = Matrix->getOneVReg(PhysReg); |
| 1154 | if (VReg == MCRegister::NoRegister) { // Not allocated yet |
| 1155 | Hints.push_back(Elt: PhysReg); |
| 1156 | return; |
| 1157 | } |
| 1158 | ShapeT PhysShape = getTileShape(VirtReg: VReg, VRM: const_cast<VirtRegMap *>(VRM), MRI); |
| 1159 | if (PhysShape == VirtShape) |
| 1160 | Hints.push_back(Elt: PhysReg); |
| 1161 | }; |
| 1162 | |
| 1163 | SmallSet<MCPhysReg, 4> CopyHints(llvm::from_range, Hints); |
| 1164 | Hints.clear(); |
| 1165 | for (auto Hint : CopyHints) { |
| 1166 | if (RC.contains(Reg: Hint) && !MRI->isReserved(PhysReg: Hint)) |
| 1167 | AddHint(Hint); |
| 1168 | } |
| 1169 | for (MCPhysReg PhysReg : Order) { |
| 1170 | if (!CopyHints.count(V: PhysReg) && RC.contains(Reg: PhysReg) && |
| 1171 | !MRI->isReserved(PhysReg)) |
| 1172 | AddHint(PhysReg); |
| 1173 | } |
| 1174 | |
| 1175 | #define DEBUG_TYPE "tile-hint" |
| 1176 | LLVM_DEBUG({ |
| 1177 | dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n" ; |
| 1178 | for (auto Hint : Hints) { |
| 1179 | dbgs() << "tmm" << Hint << "," ; |
| 1180 | } |
| 1181 | dbgs() << "\n" ; |
| 1182 | }); |
| 1183 | #undef DEBUG_TYPE |
| 1184 | |
| 1185 | return true; |
| 1186 | } |
| 1187 | |
| 1188 | const TargetRegisterClass *X86RegisterInfo::constrainRegClassToNonRex2( |
| 1189 | const TargetRegisterClass *RC) const { |
| 1190 | switch (RC->getID()) { |
| 1191 | default: |
| 1192 | return RC; |
| 1193 | case X86::GR8RegClassID: |
| 1194 | return &X86::GR8_NOREX2RegClass; |
| 1195 | case X86::GR16RegClassID: |
| 1196 | return &X86::GR16_NOREX2RegClass; |
| 1197 | case X86::GR32RegClassID: |
| 1198 | return &X86::GR32_NOREX2RegClass; |
| 1199 | case X86::GR64RegClassID: |
| 1200 | return &X86::GR64_NOREX2RegClass; |
| 1201 | case X86::GR32_NOSPRegClassID: |
| 1202 | return &X86::GR32_NOREX2_NOSPRegClass; |
| 1203 | case X86::GR64_NOSPRegClassID: |
| 1204 | return &X86::GR64_NOREX2_NOSPRegClass; |
| 1205 | } |
| 1206 | } |
| 1207 | |
| 1208 | bool X86RegisterInfo::isNonRex2RegClass(const TargetRegisterClass *RC) const { |
| 1209 | switch (RC->getID()) { |
| 1210 | default: |
| 1211 | return false; |
| 1212 | case X86::GR8_NOREX2RegClassID: |
| 1213 | case X86::GR16_NOREX2RegClassID: |
| 1214 | case X86::GR32_NOREX2RegClassID: |
| 1215 | case X86::GR64_NOREX2RegClassID: |
| 1216 | case X86::GR32_NOREX2_NOSPRegClassID: |
| 1217 | case X86::GR64_NOREX2_NOSPRegClassID: |
| 1218 | case X86::GR64_with_sub_16bit_in_GR16_NOREX2RegClassID: |
| 1219 | return true; |
| 1220 | } |
| 1221 | } |
| 1222 | |