| 1 | //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains the X86 implementation of the TargetRegisterInfo class. |
| 10 | // This file is responsible for the frame pointer elimination optimization |
| 11 | // on X86. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "X86RegisterInfo.h" |
| 16 | #include "X86FrameLowering.h" |
| 17 | #include "X86MachineFunctionInfo.h" |
| 18 | #include "X86Subtarget.h" |
| 19 | #include "llvm/ADT/BitVector.h" |
| 20 | #include "llvm/ADT/STLExtras.h" |
| 21 | #include "llvm/ADT/SmallSet.h" |
| 22 | #include "llvm/CodeGen/LiveRegMatrix.h" |
| 23 | #include "llvm/CodeGen/MachineFrameInfo.h" |
| 24 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 25 | #include "llvm/CodeGen/RegisterScavenging.h" |
| 26 | #include "llvm/CodeGen/TargetFrameLowering.h" |
| 27 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 28 | #include "llvm/CodeGen/TileShapeInfo.h" |
| 29 | #include "llvm/CodeGen/VirtRegMap.h" |
| 30 | #include "llvm/IR/Function.h" |
| 31 | #include "llvm/IR/Type.h" |
| 32 | #include "llvm/MC/MCContext.h" |
| 33 | #include "llvm/Support/CommandLine.h" |
| 34 | #include "llvm/Support/ErrorHandling.h" |
| 35 | #include "llvm/Target/TargetMachine.h" |
| 36 | #include "llvm/Target/TargetOptions.h" |
| 37 | |
| 38 | using namespace llvm; |
| 39 | |
| 40 | #define GET_REGINFO_TARGET_DESC |
| 41 | #include "X86GenRegisterInfo.inc" |
| 42 | |
| 43 | static cl::opt<bool> |
| 44 | EnableBasePointer("x86-use-base-pointer" , cl::Hidden, cl::init(Val: true), |
| 45 | cl::desc("Enable use of a base pointer for complex stack frames" )); |
| 46 | |
| 47 | static cl::opt<bool> |
| 48 | DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd" , cl::Hidden, |
| 49 | cl::init(Val: false), |
| 50 | cl::desc("Disable two address hints for register " |
| 51 | "allocation" )); |
| 52 | |
| 53 | extern cl::opt<bool> X86EnableAPXForRelocation; |
| 54 | |
| 55 | X86RegisterInfo::X86RegisterInfo(const Triple &TT) |
| 56 | : X86GenRegisterInfo((TT.isX86_64() ? X86::RIP : X86::EIP), |
| 57 | X86_MC::getDwarfRegFlavour(TT, isEH: false), |
| 58 | X86_MC::getDwarfRegFlavour(TT, isEH: true), |
| 59 | (TT.isX86_64() ? X86::RIP : X86::EIP)) { |
| 60 | X86_MC::initLLVMToSEHAndCVRegMapping(MRI: this); |
| 61 | |
| 62 | // Cache some information. |
| 63 | Is64Bit = TT.isX86_64(); |
| 64 | IsTarget64BitLP64 = Is64Bit && !TT.isX32(); |
| 65 | IsWin64 = Is64Bit && TT.isOSWindows(); |
| 66 | IsUEFI64 = Is64Bit && TT.isUEFI(); |
| 67 | |
| 68 | // Use a callee-saved register as the base pointer. These registers must |
| 69 | // not conflict with any ABI requirements. For example, in 32-bit mode PIC |
| 70 | // requires GOT in the EBX register before function calls via PLT GOT pointer. |
| 71 | if (Is64Bit) { |
| 72 | SlotSize = 8; |
| 73 | // This matches the simplified 32-bit pointer code in the data layout |
| 74 | // computation. |
| 75 | // FIXME: Should use the data layout? |
| 76 | bool Use64BitReg = !TT.isX32(); |
| 77 | StackPtr = Use64BitReg ? X86::RSP : X86::ESP; |
| 78 | FramePtr = Use64BitReg ? X86::RBP : X86::EBP; |
| 79 | BasePtr = Use64BitReg ? X86::RBX : X86::EBX; |
| 80 | } else { |
| 81 | SlotSize = 4; |
| 82 | StackPtr = X86::ESP; |
| 83 | FramePtr = X86::EBP; |
| 84 | BasePtr = X86::ESI; |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | const TargetRegisterClass * |
| 89 | X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, |
| 90 | unsigned Idx) const { |
| 91 | // The sub_8bit sub-register index is more constrained in 32-bit mode. |
| 92 | // It behaves just like the sub_8bit_hi index. |
| 93 | if (!Is64Bit && Idx == X86::sub_8bit) |
| 94 | Idx = X86::sub_8bit_hi; |
| 95 | |
| 96 | // Forward to TableGen's default version. |
| 97 | return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx); |
| 98 | } |
| 99 | |
| 100 | const TargetRegisterClass * |
| 101 | X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, |
| 102 | const TargetRegisterClass *B, |
| 103 | unsigned SubIdx) const { |
| 104 | // The sub_8bit sub-register index is more constrained in 32-bit mode. |
| 105 | if (!Is64Bit && SubIdx == X86::sub_8bit) { |
| 106 | A = X86GenRegisterInfo::getSubClassWithSubReg(RC: A, Idx: X86::sub_8bit_hi); |
| 107 | if (!A) |
| 108 | return nullptr; |
| 109 | } |
| 110 | return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, Idx: SubIdx); |
| 111 | } |
| 112 | |
| 113 | const TargetRegisterClass * |
| 114 | X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, |
| 115 | const MachineFunction &MF) const { |
| 116 | // Don't allow super-classes of GR8_NOREX. This class is only used after |
| 117 | // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied |
| 118 | // to the full GR8 register class in 64-bit mode, so we cannot allow the |
| 119 | // reigster class inflation. |
| 120 | // |
| 121 | // The GR8_NOREX class is always used in a way that won't be constrained to a |
| 122 | // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the |
| 123 | // full GR8 class. |
| 124 | if (RC == &X86::GR8_NOREXRegClass) |
| 125 | return RC; |
| 126 | |
| 127 | // Keep using non-rex2 register class when APX feature (EGPR/NDD/NF) is not |
| 128 | // enabled for relocation. |
| 129 | if (!X86EnableAPXForRelocation && isNonRex2RegClass(RC)) |
| 130 | return RC; |
| 131 | |
| 132 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
| 133 | |
| 134 | const TargetRegisterClass *Super = RC; |
| 135 | auto I = RC->superclasses().begin(); |
| 136 | auto E = RC->superclasses().end(); |
| 137 | do { |
| 138 | switch (Super->getID()) { |
| 139 | case X86::FR32RegClassID: |
| 140 | case X86::FR64RegClassID: |
| 141 | // If AVX-512 isn't supported we should only inflate to these classes. |
| 142 | if (!Subtarget.hasAVX512() && |
| 143 | getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC)) |
| 144 | return Super; |
| 145 | break; |
| 146 | case X86::VR128RegClassID: |
| 147 | case X86::VR256RegClassID: |
| 148 | // If VLX isn't supported we should only inflate to these classes. |
| 149 | if (!Subtarget.hasVLX() && |
| 150 | getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC)) |
| 151 | return Super; |
| 152 | break; |
| 153 | case X86::VR128XRegClassID: |
| 154 | case X86::VR256XRegClassID: |
| 155 | // If VLX isn't support we shouldn't inflate to these classes. |
| 156 | if (Subtarget.hasVLX() && |
| 157 | getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC)) |
| 158 | return Super; |
| 159 | break; |
| 160 | case X86::FR32XRegClassID: |
| 161 | case X86::FR64XRegClassID: |
| 162 | // If AVX-512 isn't support we shouldn't inflate to these classes. |
| 163 | if (Subtarget.hasAVX512() && |
| 164 | getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC)) |
| 165 | return Super; |
| 166 | break; |
| 167 | case X86::GR8RegClassID: |
| 168 | case X86::GR16RegClassID: |
| 169 | case X86::GR32RegClassID: |
| 170 | case X86::GR64RegClassID: |
| 171 | case X86::GR8_NOREX2RegClassID: |
| 172 | case X86::GR16_NOREX2RegClassID: |
| 173 | case X86::GR32_NOREX2RegClassID: |
| 174 | case X86::GR64_NOREX2RegClassID: |
| 175 | case X86::RFP32RegClassID: |
| 176 | case X86::RFP64RegClassID: |
| 177 | case X86::RFP80RegClassID: |
| 178 | case X86::VR512_0_15RegClassID: |
| 179 | case X86::VR512RegClassID: |
| 180 | // Don't return a super-class that would shrink the spill size. |
| 181 | // That can happen with the vector and float classes. |
| 182 | if (getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC)) |
| 183 | return Super; |
| 184 | } |
| 185 | if (I != E) { |
| 186 | Super = getRegClass(i: *I); |
| 187 | ++I; |
| 188 | } else { |
| 189 | Super = nullptr; |
| 190 | } |
| 191 | } while (Super); |
| 192 | return RC; |
| 193 | } |
| 194 | |
| 195 | const TargetRegisterClass * |
| 196 | X86RegisterInfo::getPointerRegClass(unsigned Kind) const { |
| 197 | assert(Kind == 0 && "this should only be used for default cases" ); |
| 198 | if (IsTarget64BitLP64) |
| 199 | return &X86::GR64RegClass; |
| 200 | // If the target is 64bit but we have been told to use 32bit addresses, |
| 201 | // we can still use 64-bit register as long as we know the high bits |
| 202 | // are zeros. |
| 203 | // Reflect that in the returned register class. |
| 204 | return Is64Bit ? &X86::LOW32_ADDR_ACCESSRegClass : &X86::GR32RegClass; |
| 205 | } |
| 206 | |
| 207 | const TargetRegisterClass * |
| 208 | X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { |
| 209 | if (RC == &X86::CCRRegClass) { |
| 210 | if (Is64Bit) |
| 211 | return &X86::GR64RegClass; |
| 212 | else |
| 213 | return &X86::GR32RegClass; |
| 214 | } |
| 215 | return RC; |
| 216 | } |
| 217 | |
| 218 | unsigned |
| 219 | X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, |
| 220 | MachineFunction &MF) const { |
| 221 | const X86FrameLowering *TFI = getFrameLowering(MF); |
| 222 | |
| 223 | unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; |
| 224 | switch (RC->getID()) { |
| 225 | default: |
| 226 | return 0; |
| 227 | case X86::GR32RegClassID: |
| 228 | return 4 - FPDiff; |
| 229 | case X86::GR64RegClassID: |
| 230 | return 12 - FPDiff; |
| 231 | case X86::VR128RegClassID: |
| 232 | return Is64Bit ? 10 : 4; |
| 233 | case X86::VR64RegClassID: |
| 234 | return 4; |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | const MCPhysReg * |
| 239 | X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { |
| 240 | assert(MF && "MachineFunction required" ); |
| 241 | |
| 242 | const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>(); |
| 243 | const Function &F = MF->getFunction(); |
| 244 | bool HasSSE = Subtarget.hasSSE1(); |
| 245 | bool HasAVX = Subtarget.hasAVX(); |
| 246 | bool HasAVX512 = Subtarget.hasAVX512(); |
| 247 | bool CallsEHReturn = MF->callsEHReturn(); |
| 248 | |
| 249 | CallingConv::ID CC = F.getCallingConv(); |
| 250 | |
| 251 | // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling |
| 252 | // convention because it has the CSR list. |
| 253 | if (MF->getFunction().hasFnAttribute(Kind: "no_caller_saved_registers" )) |
| 254 | CC = CallingConv::X86_INTR; |
| 255 | |
| 256 | // If atribute specified, override the CSRs normally specified by the |
| 257 | // calling convention and use the empty set instead. |
| 258 | if (MF->getFunction().hasFnAttribute(Kind: "no_callee_saved_registers" )) |
| 259 | return CSR_NoRegs_SaveList; |
| 260 | |
| 261 | switch (CC) { |
| 262 | case CallingConv::GHC: |
| 263 | case CallingConv::HiPE: |
| 264 | return CSR_NoRegs_SaveList; |
| 265 | case CallingConv::AnyReg: |
| 266 | if (HasAVX) |
| 267 | return CSR_64_AllRegs_AVX_SaveList; |
| 268 | return CSR_64_AllRegs_SaveList; |
| 269 | case CallingConv::PreserveMost: |
| 270 | return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList |
| 271 | : CSR_64_RT_MostRegs_SaveList; |
| 272 | case CallingConv::PreserveAll: |
| 273 | if (HasAVX) |
| 274 | return CSR_64_RT_AllRegs_AVX_SaveList; |
| 275 | return CSR_64_RT_AllRegs_SaveList; |
| 276 | case CallingConv::PreserveNone: |
| 277 | return CSR_64_NoneRegs_SaveList; |
| 278 | case CallingConv::CXX_FAST_TLS: |
| 279 | if (Is64Bit) |
| 280 | return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ? |
| 281 | CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList; |
| 282 | break; |
| 283 | case CallingConv::Intel_OCL_BI: { |
| 284 | if (HasAVX512 && IsWin64) |
| 285 | return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; |
| 286 | if (HasAVX512 && Is64Bit) |
| 287 | return CSR_64_Intel_OCL_BI_AVX512_SaveList; |
| 288 | if (HasAVX && IsWin64) |
| 289 | return CSR_Win64_Intel_OCL_BI_AVX_SaveList; |
| 290 | if (HasAVX && Is64Bit) |
| 291 | return CSR_64_Intel_OCL_BI_AVX_SaveList; |
| 292 | if (!HasAVX && !IsWin64 && Is64Bit) |
| 293 | return CSR_64_Intel_OCL_BI_SaveList; |
| 294 | break; |
| 295 | } |
| 296 | case CallingConv::X86_RegCall: |
| 297 | if (Is64Bit) { |
| 298 | if (IsWin64) { |
| 299 | return (HasSSE ? CSR_Win64_RegCall_SaveList : |
| 300 | CSR_Win64_RegCall_NoSSE_SaveList); |
| 301 | } else { |
| 302 | return (HasSSE ? CSR_SysV64_RegCall_SaveList : |
| 303 | CSR_SysV64_RegCall_NoSSE_SaveList); |
| 304 | } |
| 305 | } else { |
| 306 | return (HasSSE ? CSR_32_RegCall_SaveList : |
| 307 | CSR_32_RegCall_NoSSE_SaveList); |
| 308 | } |
| 309 | case CallingConv::CFGuard_Check: |
| 310 | assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86" ); |
| 311 | return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList |
| 312 | : CSR_Win32_CFGuard_Check_NoSSE_SaveList); |
| 313 | case CallingConv::Cold: |
| 314 | if (Is64Bit) |
| 315 | return CSR_64_MostRegs_SaveList; |
| 316 | break; |
| 317 | case CallingConv::Win64: |
| 318 | if (!HasSSE) |
| 319 | return CSR_Win64_NoSSE_SaveList; |
| 320 | return CSR_Win64_SaveList; |
| 321 | case CallingConv::SwiftTail: |
| 322 | if (!Is64Bit) |
| 323 | return CSR_32_SaveList; |
| 324 | return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList; |
| 325 | case CallingConv::X86_64_SysV: |
| 326 | if (CallsEHReturn) |
| 327 | return CSR_64EHRet_SaveList; |
| 328 | return CSR_64_SaveList; |
| 329 | case CallingConv::X86_INTR: |
| 330 | if (Is64Bit) { |
| 331 | if (HasAVX512) |
| 332 | return CSR_64_AllRegs_AVX512_SaveList; |
| 333 | if (HasAVX) |
| 334 | return CSR_64_AllRegs_AVX_SaveList; |
| 335 | if (HasSSE) |
| 336 | return CSR_64_AllRegs_SaveList; |
| 337 | return CSR_64_AllRegs_NoSSE_SaveList; |
| 338 | } else { |
| 339 | if (HasAVX512) |
| 340 | return CSR_32_AllRegs_AVX512_SaveList; |
| 341 | if (HasAVX) |
| 342 | return CSR_32_AllRegs_AVX_SaveList; |
| 343 | if (HasSSE) |
| 344 | return CSR_32_AllRegs_SSE_SaveList; |
| 345 | return CSR_32_AllRegs_SaveList; |
| 346 | } |
| 347 | default: |
| 348 | break; |
| 349 | } |
| 350 | |
| 351 | if (Is64Bit) { |
| 352 | bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && |
| 353 | F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError); |
| 354 | if (IsSwiftCC) |
| 355 | return IsWin64 ? CSR_Win64_SwiftError_SaveList |
| 356 | : CSR_64_SwiftError_SaveList; |
| 357 | |
| 358 | if (IsWin64 || IsUEFI64) |
| 359 | return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList; |
| 360 | if (CallsEHReturn) |
| 361 | return CSR_64EHRet_SaveList; |
| 362 | return CSR_64_SaveList; |
| 363 | } |
| 364 | |
| 365 | return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList; |
| 366 | } |
| 367 | |
| 368 | const MCPhysReg * |
| 369 | X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const { |
| 370 | return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList; |
| 371 | } |
| 372 | |
| 373 | const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( |
| 374 | const MachineFunction *MF) const { |
| 375 | assert(MF && "Invalid MachineFunction pointer." ); |
| 376 | if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && |
| 377 | MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR()) |
| 378 | return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; |
| 379 | return nullptr; |
| 380 | } |
| 381 | |
| 382 | const uint32_t * |
| 383 | X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, |
| 384 | CallingConv::ID CC) const { |
| 385 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
| 386 | bool HasSSE = Subtarget.hasSSE1(); |
| 387 | bool HasAVX = Subtarget.hasAVX(); |
| 388 | bool HasAVX512 = Subtarget.hasAVX512(); |
| 389 | |
| 390 | switch (CC) { |
| 391 | case CallingConv::GHC: |
| 392 | case CallingConv::HiPE: |
| 393 | return CSR_NoRegs_RegMask; |
| 394 | case CallingConv::AnyReg: |
| 395 | if (HasAVX) |
| 396 | return CSR_64_AllRegs_AVX_RegMask; |
| 397 | return CSR_64_AllRegs_RegMask; |
| 398 | case CallingConv::PreserveMost: |
| 399 | return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask; |
| 400 | case CallingConv::PreserveAll: |
| 401 | if (HasAVX) |
| 402 | return CSR_64_RT_AllRegs_AVX_RegMask; |
| 403 | return CSR_64_RT_AllRegs_RegMask; |
| 404 | case CallingConv::PreserveNone: |
| 405 | return CSR_64_NoneRegs_RegMask; |
| 406 | case CallingConv::CXX_FAST_TLS: |
| 407 | if (Is64Bit) |
| 408 | return CSR_64_TLS_Darwin_RegMask; |
| 409 | break; |
| 410 | case CallingConv::Intel_OCL_BI: { |
| 411 | if (HasAVX512 && IsWin64) |
| 412 | return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; |
| 413 | if (HasAVX512 && Is64Bit) |
| 414 | return CSR_64_Intel_OCL_BI_AVX512_RegMask; |
| 415 | if (HasAVX && IsWin64) |
| 416 | return CSR_Win64_Intel_OCL_BI_AVX_RegMask; |
| 417 | if (HasAVX && Is64Bit) |
| 418 | return CSR_64_Intel_OCL_BI_AVX_RegMask; |
| 419 | if (!HasAVX && !IsWin64 && Is64Bit) |
| 420 | return CSR_64_Intel_OCL_BI_RegMask; |
| 421 | break; |
| 422 | } |
| 423 | case CallingConv::X86_RegCall: |
| 424 | if (Is64Bit) { |
| 425 | if (IsWin64) { |
| 426 | return (HasSSE ? CSR_Win64_RegCall_RegMask : |
| 427 | CSR_Win64_RegCall_NoSSE_RegMask); |
| 428 | } else { |
| 429 | return (HasSSE ? CSR_SysV64_RegCall_RegMask : |
| 430 | CSR_SysV64_RegCall_NoSSE_RegMask); |
| 431 | } |
| 432 | } else { |
| 433 | return (HasSSE ? CSR_32_RegCall_RegMask : |
| 434 | CSR_32_RegCall_NoSSE_RegMask); |
| 435 | } |
| 436 | case CallingConv::CFGuard_Check: |
| 437 | assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86" ); |
| 438 | return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask |
| 439 | : CSR_Win32_CFGuard_Check_NoSSE_RegMask); |
| 440 | case CallingConv::Cold: |
| 441 | if (Is64Bit) |
| 442 | return CSR_64_MostRegs_RegMask; |
| 443 | break; |
| 444 | case CallingConv::Win64: |
| 445 | return CSR_Win64_RegMask; |
| 446 | case CallingConv::SwiftTail: |
| 447 | if (!Is64Bit) |
| 448 | return CSR_32_RegMask; |
| 449 | return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask; |
| 450 | case CallingConv::X86_64_SysV: |
| 451 | return CSR_64_RegMask; |
| 452 | case CallingConv::X86_INTR: |
| 453 | if (Is64Bit) { |
| 454 | if (HasAVX512) |
| 455 | return CSR_64_AllRegs_AVX512_RegMask; |
| 456 | if (HasAVX) |
| 457 | return CSR_64_AllRegs_AVX_RegMask; |
| 458 | if (HasSSE) |
| 459 | return CSR_64_AllRegs_RegMask; |
| 460 | return CSR_64_AllRegs_NoSSE_RegMask; |
| 461 | } else { |
| 462 | if (HasAVX512) |
| 463 | return CSR_32_AllRegs_AVX512_RegMask; |
| 464 | if (HasAVX) |
| 465 | return CSR_32_AllRegs_AVX_RegMask; |
| 466 | if (HasSSE) |
| 467 | return CSR_32_AllRegs_SSE_RegMask; |
| 468 | return CSR_32_AllRegs_RegMask; |
| 469 | } |
| 470 | default: |
| 471 | break; |
| 472 | } |
| 473 | |
| 474 | // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check |
| 475 | // callsEHReturn(). |
| 476 | if (Is64Bit) { |
| 477 | const Function &F = MF.getFunction(); |
| 478 | bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && |
| 479 | F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError); |
| 480 | if (IsSwiftCC) |
| 481 | return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask; |
| 482 | |
| 483 | return (IsWin64 || IsUEFI64) ? CSR_Win64_RegMask : CSR_64_RegMask; |
| 484 | } |
| 485 | |
| 486 | return CSR_32_RegMask; |
| 487 | } |
| 488 | |
| 489 | const uint32_t* |
| 490 | X86RegisterInfo::getNoPreservedMask() const { |
| 491 | return CSR_NoRegs_RegMask; |
| 492 | } |
| 493 | |
| 494 | const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const { |
| 495 | return CSR_64_TLS_Darwin_RegMask; |
| 496 | } |
| 497 | |
| 498 | BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { |
| 499 | BitVector Reserved(getNumRegs()); |
| 500 | const X86FrameLowering *TFI = getFrameLowering(MF); |
| 501 | |
| 502 | // Set the floating point control register as reserved. |
| 503 | Reserved.set(X86::FPCW); |
| 504 | |
| 505 | // Set the floating point status register as reserved. |
| 506 | Reserved.set(X86::FPSW); |
| 507 | |
| 508 | // Set the SIMD floating point control register as reserved. |
| 509 | Reserved.set(X86::MXCSR); |
| 510 | |
| 511 | // Set the stack-pointer register and its aliases as reserved. |
| 512 | for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RSP)) |
| 513 | Reserved.set(SubReg); |
| 514 | |
| 515 | // Set the Shadow Stack Pointer as reserved. |
| 516 | Reserved.set(X86::SSP); |
| 517 | |
| 518 | // Set the instruction pointer register and its aliases as reserved. |
| 519 | for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RIP)) |
| 520 | Reserved.set(SubReg); |
| 521 | |
| 522 | // Set the frame-pointer register and its aliases as reserved if needed. |
| 523 | if (TFI->hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF)) { |
| 524 | if (MF.getInfo<X86MachineFunctionInfo>()->getFPClobberedByInvoke()) |
| 525 | MF.getContext().reportError( |
| 526 | L: SMLoc(), |
| 527 | Msg: "Frame pointer clobbered by function invoke is not supported." ); |
| 528 | |
| 529 | for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RBP)) |
| 530 | Reserved.set(SubReg); |
| 531 | } |
| 532 | |
| 533 | // Set the base-pointer register and its aliases as reserved if needed. |
| 534 | if (hasBasePointer(MF)) { |
| 535 | if (MF.getInfo<X86MachineFunctionInfo>()->getBPClobberedByInvoke()) |
| 536 | MF.getContext().reportError(L: SMLoc(), |
| 537 | Msg: "Stack realignment in presence of dynamic " |
| 538 | "allocas is not supported with " |
| 539 | "this calling convention." ); |
| 540 | |
| 541 | Register BasePtr = getX86SubSuperRegister(Reg: getBaseRegister(), Size: 64); |
| 542 | for (const MCPhysReg &SubReg : subregs_inclusive(Reg: BasePtr)) |
| 543 | Reserved.set(SubReg); |
| 544 | } |
| 545 | |
| 546 | // Mark the segment registers as reserved. |
| 547 | Reserved.set(X86::CS); |
| 548 | Reserved.set(X86::SS); |
| 549 | Reserved.set(X86::DS); |
| 550 | Reserved.set(X86::ES); |
| 551 | Reserved.set(X86::FS); |
| 552 | Reserved.set(X86::GS); |
| 553 | |
| 554 | // Mark the floating point stack registers as reserved. |
| 555 | for (unsigned n = 0; n != 8; ++n) |
| 556 | Reserved.set(X86::ST0 + n); |
| 557 | |
| 558 | // Reserve the registers that only exist in 64-bit mode. |
| 559 | if (!Is64Bit) { |
| 560 | // These 8-bit registers are part of the x86-64 extension even though their |
| 561 | // super-registers are old 32-bits. |
| 562 | Reserved.set(X86::SIL); |
| 563 | Reserved.set(X86::DIL); |
| 564 | Reserved.set(X86::BPL); |
| 565 | Reserved.set(X86::SPL); |
| 566 | Reserved.set(X86::SIH); |
| 567 | Reserved.set(X86::DIH); |
| 568 | Reserved.set(X86::BPH); |
| 569 | Reserved.set(X86::SPH); |
| 570 | |
| 571 | for (unsigned n = 0; n != 8; ++n) { |
| 572 | // R8, R9, ... |
| 573 | for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI) |
| 574 | Reserved.set(*AI); |
| 575 | |
| 576 | // XMM8, XMM9, ... |
| 577 | for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI) |
| 578 | Reserved.set(*AI); |
| 579 | } |
| 580 | } |
| 581 | if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) { |
| 582 | for (unsigned n = 0; n != 16; ++n) { |
| 583 | for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid(); |
| 584 | ++AI) |
| 585 | Reserved.set(*AI); |
| 586 | } |
| 587 | } |
| 588 | |
| 589 | // Reserve the extended general purpose registers. |
| 590 | if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR()) |
| 591 | Reserved.set(I: X86::R16, E: X86::R31WH + 1); |
| 592 | |
| 593 | if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { |
| 594 | for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI) |
| 595 | Reserved.set(*AI); |
| 596 | for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI) |
| 597 | Reserved.set(*AI); |
| 598 | } |
| 599 | |
| 600 | assert(checkAllSuperRegsMarked(Reserved, |
| 601 | {X86::SIL, X86::DIL, X86::BPL, X86::SPL, |
| 602 | X86::SIH, X86::DIH, X86::BPH, X86::SPH})); |
| 603 | return Reserved; |
| 604 | } |
| 605 | |
| 606 | unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const { |
| 607 | // All existing Intel CPUs that support AMX support AVX512 and all existing |
| 608 | // Intel CPUs that support APX support AMX. AVX512 implies AVX. |
| 609 | // |
| 610 | // We enumerate the registers in X86GenRegisterInfo.inc in this order: |
| 611 | // |
| 612 | // Registers before AVX512, |
| 613 | // AVX512 registers (X/YMM16-31, ZMM0-31, K registers) |
| 614 | // AMX registers (TMM) |
| 615 | // APX registers (R16-R31) |
| 616 | // |
| 617 | // and try to return the minimum number of registers supported by the target. |
| 618 | static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) && |
| 619 | (X86::K6_K7 + 1 == X86::TMMCFG) && |
| 620 | (X86::TMM7 + 1 == X86::R16) && |
| 621 | (X86::R31WH + 1 == X86::NUM_TARGET_REGS), |
| 622 | "Register number may be incorrect" ); |
| 623 | |
| 624 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
| 625 | if (ST.hasEGPR()) |
| 626 | return X86::NUM_TARGET_REGS; |
| 627 | if (ST.hasAMXTILE()) |
| 628 | return X86::TMM7 + 1; |
| 629 | if (ST.hasAVX512()) |
| 630 | return X86::K6_K7 + 1; |
| 631 | if (ST.hasAVX()) |
| 632 | return X86::YMM15 + 1; |
| 633 | return X86::R15WH + 1; |
| 634 | } |
| 635 | |
| 636 | bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF, |
| 637 | MCRegister Reg) const { |
| 638 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
| 639 | const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); |
| 640 | auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) { |
| 641 | return TRI.isSuperOrSubRegisterEq(RegA, RegB); |
| 642 | }; |
| 643 | |
| 644 | if (!ST.is64Bit()) |
| 645 | return llvm::any_of( |
| 646 | Range: SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX}, |
| 647 | P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) || |
| 648 | (ST.hasMMX() && X86::VR64RegClass.contains(Reg)); |
| 649 | |
| 650 | CallingConv::ID CC = MF.getFunction().getCallingConv(); |
| 651 | |
| 652 | if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg)) |
| 653 | return true; |
| 654 | |
| 655 | if (llvm::any_of( |
| 656 | Range: SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9}, |
| 657 | P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) |
| 658 | return true; |
| 659 | |
| 660 | if (CC != CallingConv::Win64 && |
| 661 | llvm::any_of(Range: SmallVector<MCRegister>{X86::RDI, X86::RSI}, |
| 662 | P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) |
| 663 | return true; |
| 664 | |
| 665 | if (ST.hasSSE1() && |
| 666 | llvm::any_of(Range: SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2, |
| 667 | X86::XMM3, X86::XMM4, X86::XMM5, |
| 668 | X86::XMM6, X86::XMM7}, |
| 669 | P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) |
| 670 | return true; |
| 671 | |
| 672 | return X86GenRegisterInfo::isArgumentRegister(MF, PhysReg: Reg); |
| 673 | } |
| 674 | |
| 675 | bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF, |
| 676 | MCRegister PhysReg) const { |
| 677 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
| 678 | const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); |
| 679 | |
| 680 | // Stack pointer. |
| 681 | if (TRI.isSuperOrSubRegisterEq(RegA: X86::RSP, RegB: PhysReg)) |
| 682 | return true; |
| 683 | |
| 684 | // Don't use the frame pointer if it's being used. |
| 685 | const X86FrameLowering &TFI = *getFrameLowering(MF); |
| 686 | if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(RegA: X86::RBP, RegB: PhysReg)) |
| 687 | return true; |
| 688 | |
| 689 | return X86GenRegisterInfo::isFixedRegister(MF, PhysReg); |
| 690 | } |
| 691 | |
| 692 | bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const { |
| 693 | return RC->getID() == X86::TILERegClassID; |
| 694 | } |
| 695 | |
| 696 | void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { |
| 697 | // Check if the EFLAGS register is marked as live-out. This shouldn't happen, |
| 698 | // because the calling convention defines the EFLAGS register as NOT |
| 699 | // preserved. |
| 700 | // |
| 701 | // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding |
| 702 | // an assert to track this and clear the register afterwards to avoid |
| 703 | // unnecessary crashes during release builds. |
| 704 | assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) && |
| 705 | "EFLAGS are not live-out from a patchpoint." ); |
| 706 | |
| 707 | // Also clean other registers that don't need preserving (IP). |
| 708 | for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP}) |
| 709 | Mask[Reg / 32] &= ~(1U << (Reg % 32)); |
| 710 | } |
| 711 | |
| 712 | //===----------------------------------------------------------------------===// |
| 713 | // Stack Frame Processing methods |
| 714 | //===----------------------------------------------------------------------===// |
| 715 | |
| 716 | static bool CantUseSP(const MachineFrameInfo &MFI) { |
| 717 | return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment(); |
| 718 | } |
| 719 | |
| 720 | bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { |
| 721 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
| 722 | // We have a virtual register to reference argument, and don't need base |
| 723 | // pointer. |
| 724 | if (X86FI->getStackPtrSaveMI() != nullptr) |
| 725 | return false; |
| 726 | |
| 727 | if (X86FI->hasPreallocatedCall()) |
| 728 | return true; |
| 729 | |
| 730 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 731 | |
| 732 | if (!EnableBasePointer) |
| 733 | return false; |
| 734 | |
| 735 | // When we need stack realignment, we can't address the stack from the frame |
| 736 | // pointer. When we have dynamic allocas or stack-adjusting inline asm, we |
| 737 | // can't address variables from the stack pointer. MS inline asm can |
| 738 | // reference locals while also adjusting the stack pointer. When we can't |
| 739 | // use both the SP and the FP, we need a separate base pointer register. |
| 740 | bool CantUseFP = hasStackRealignment(MF); |
| 741 | return CantUseFP && CantUseSP(MFI); |
| 742 | } |
| 743 | |
| 744 | bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { |
| 745 | if (!TargetRegisterInfo::canRealignStack(MF)) |
| 746 | return false; |
| 747 | |
| 748 | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 749 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
| 750 | |
| 751 | // Stack realignment requires a frame pointer. If we already started |
| 752 | // register allocation with frame pointer elimination, it is too late now. |
| 753 | if (!MRI->canReserveReg(PhysReg: FramePtr)) |
| 754 | return false; |
| 755 | |
| 756 | // If a base pointer is necessary. Check that it isn't too late to reserve |
| 757 | // it. |
| 758 | if (CantUseSP(MFI)) |
| 759 | return MRI->canReserveReg(PhysReg: BasePtr); |
| 760 | return true; |
| 761 | } |
| 762 | |
| 763 | bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const { |
| 764 | if (TargetRegisterInfo::shouldRealignStack(MF)) |
| 765 | return true; |
| 766 | |
| 767 | return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR; |
| 768 | } |
| 769 | |
| 770 | // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction |
| 771 | // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'. |
| 772 | // TODO: In this case we should be really trying first to entirely eliminate |
| 773 | // this instruction which is a plain copy. |
| 774 | static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { |
| 775 | MachineInstr &MI = *II; |
| 776 | unsigned Opc = II->getOpcode(); |
| 777 | // Check if this is a LEA of the form 'lea (%esp), %ebx' |
| 778 | if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) || |
| 779 | MI.getOperand(i: 2).getImm() != 1 || |
| 780 | MI.getOperand(i: 3).getReg() != X86::NoRegister || |
| 781 | MI.getOperand(i: 4).getImm() != 0 || |
| 782 | MI.getOperand(i: 5).getReg() != X86::NoRegister) |
| 783 | return false; |
| 784 | Register BasePtr = MI.getOperand(i: 1).getReg(); |
| 785 | // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will |
| 786 | // be replaced with a 32-bit operand MOV which will zero extend the upper |
| 787 | // 32-bits of the super register. |
| 788 | if (Opc == X86::LEA64_32r) |
| 789 | BasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 32); |
| 790 | Register NewDestReg = MI.getOperand(i: 0).getReg(); |
| 791 | const X86InstrInfo *TII = |
| 792 | MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo(); |
| 793 | TII->copyPhysReg(MBB&: *MI.getParent(), MI: II, DL: MI.getDebugLoc(), DestReg: NewDestReg, SrcReg: BasePtr, |
| 794 | KillSrc: MI.getOperand(i: 1).isKill()); |
| 795 | MI.eraseFromParent(); |
| 796 | return true; |
| 797 | } |
| 798 | |
| 799 | static bool isFuncletReturnInstr(MachineInstr &MI) { |
| 800 | switch (MI.getOpcode()) { |
| 801 | case X86::CATCHRET: |
| 802 | case X86::CLEANUPRET: |
| 803 | return true; |
| 804 | default: |
| 805 | return false; |
| 806 | } |
| 807 | llvm_unreachable("impossible" ); |
| 808 | } |
| 809 | |
| 810 | void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, |
| 811 | unsigned FIOperandNum, |
| 812 | Register BaseReg, |
| 813 | int FIOffset) const { |
| 814 | MachineInstr &MI = *II; |
| 815 | unsigned Opc = MI.getOpcode(); |
| 816 | if (Opc == TargetOpcode::LOCAL_ESCAPE) { |
| 817 | MachineOperand &FI = MI.getOperand(i: FIOperandNum); |
| 818 | FI.ChangeToImmediate(ImmVal: FIOffset); |
| 819 | return; |
| 820 | } |
| 821 | |
| 822 | MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: BaseReg, isDef: false); |
| 823 | |
| 824 | // The frame index format for stackmaps and patchpoints is different from the |
| 825 | // X86 format. It only has a FI and an offset. |
| 826 | if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { |
| 827 | assert(BasePtr == FramePtr && "Expected the FP as base register" ); |
| 828 | int64_t Offset = MI.getOperand(i: FIOperandNum + 1).getImm() + FIOffset; |
| 829 | MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset); |
| 830 | return; |
| 831 | } |
| 832 | |
| 833 | if (MI.getOperand(i: FIOperandNum + 3).isImm()) { |
| 834 | // Offset is a 32-bit integer. |
| 835 | int Imm = (int)(MI.getOperand(i: FIOperandNum + 3).getImm()); |
| 836 | int Offset = FIOffset + Imm; |
| 837 | assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && |
| 838 | "Requesting 64-bit offset in 32-bit immediate!" ); |
| 839 | if (Offset != 0) |
| 840 | MI.getOperand(i: FIOperandNum + 3).ChangeToImmediate(ImmVal: Offset); |
| 841 | } else { |
| 842 | // Offset is symbolic. This is extremely rare. |
| 843 | uint64_t Offset = |
| 844 | FIOffset + (uint64_t)MI.getOperand(i: FIOperandNum + 3).getOffset(); |
| 845 | MI.getOperand(i: FIOperandNum + 3).setOffset(Offset); |
| 846 | } |
| 847 | } |
| 848 | |
| 849 | bool |
| 850 | X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, |
| 851 | int SPAdj, unsigned FIOperandNum, |
| 852 | RegScavenger *RS) const { |
| 853 | MachineInstr &MI = *II; |
| 854 | MachineBasicBlock &MBB = *MI.getParent(); |
| 855 | MachineFunction &MF = *MBB.getParent(); |
| 856 | MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); |
| 857 | bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false |
| 858 | : isFuncletReturnInstr(MI&: *MBBI); |
| 859 | const X86FrameLowering *TFI = getFrameLowering(MF); |
| 860 | int FrameIndex = MI.getOperand(i: FIOperandNum).getIndex(); |
| 861 | |
| 862 | // Determine base register and offset. |
| 863 | int64_t FIOffset; |
| 864 | Register BasePtr; |
| 865 | if (MI.isReturn()) { |
| 866 | assert((!hasStackRealignment(MF) || |
| 867 | MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && |
| 868 | "Return instruction can only reference SP relative frame objects" ); |
| 869 | FIOffset = |
| 870 | TFI->getFrameIndexReferenceSP(MF, FI: FrameIndex, SPReg&: BasePtr, Adjustment: 0).getFixed(); |
| 871 | } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) { |
| 872 | FIOffset = TFI->getWin64EHFrameIndexRef(MF, FI: FrameIndex, SPReg&: BasePtr); |
| 873 | } else { |
| 874 | FIOffset = TFI->getFrameIndexReference(MF, FI: FrameIndex, FrameReg&: BasePtr).getFixed(); |
| 875 | } |
| 876 | |
| 877 | // LOCAL_ESCAPE uses a single offset, with no register. It only works in the |
| 878 | // simple FP case, and doesn't work with stack realignment. On 32-bit, the |
| 879 | // offset is from the traditional base pointer location. On 64-bit, the |
| 880 | // offset is from the SP at the end of the prologue, not the FP location. This |
| 881 | // matches the behavior of llvm.frameaddress. |
| 882 | unsigned Opc = MI.getOpcode(); |
| 883 | if (Opc == TargetOpcode::LOCAL_ESCAPE) { |
| 884 | MachineOperand &FI = MI.getOperand(i: FIOperandNum); |
| 885 | FI.ChangeToImmediate(ImmVal: FIOffset); |
| 886 | return false; |
| 887 | } |
| 888 | |
| 889 | // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit |
| 890 | // register as source operand, semantic is the same and destination is |
| 891 | // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. |
| 892 | // Don't change BasePtr since it is used later for stack adjustment. |
| 893 | Register MachineBasePtr = BasePtr; |
| 894 | if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(Reg: BasePtr)) |
| 895 | MachineBasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 64); |
| 896 | |
| 897 | // This must be part of a four operand memory reference. Replace the |
| 898 | // FrameIndex with base register. Add an offset to the offset. |
| 899 | MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: MachineBasePtr, isDef: false); |
| 900 | |
| 901 | if (BasePtr == StackPtr) |
| 902 | FIOffset += SPAdj; |
| 903 | |
| 904 | // The frame index format for stackmaps and patchpoints is different from the |
| 905 | // X86 format. It only has a FI and an offset. |
| 906 | if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { |
| 907 | assert(BasePtr == FramePtr && "Expected the FP as base register" ); |
| 908 | int64_t Offset = MI.getOperand(i: FIOperandNum + 1).getImm() + FIOffset; |
| 909 | MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset); |
| 910 | return false; |
| 911 | } |
| 912 | |
| 913 | if (MI.getOperand(i: FIOperandNum+3).isImm()) { |
| 914 | const X86InstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); |
| 915 | const DebugLoc &DL = MI.getDebugLoc(); |
| 916 | int64_t Imm = MI.getOperand(i: FIOperandNum + 3).getImm(); |
| 917 | int64_t Offset = FIOffset + Imm; |
| 918 | bool FitsIn32Bits = isInt<32>(x: Offset); |
| 919 | // If the offset will not fit in a 32-bit displacement, then for 64-bit |
| 920 | // targets, scavenge a register to hold it. Otherwise... |
| 921 | if (Is64Bit && !FitsIn32Bits) { |
| 922 | assert(RS && "RegisterScavenger was NULL" ); |
| 923 | |
| 924 | RS->enterBasicBlockEnd(MBB); |
| 925 | RS->backward(I: std::next(x: II)); |
| 926 | |
| 927 | Register ScratchReg = RS->scavengeRegisterBackwards( |
| 928 | RC: X86::GR64RegClass, To: II, /*RestoreAfter=*/false, /*SPAdj=*/0, |
| 929 | /*AllowSpill=*/true); |
| 930 | assert(ScratchReg != 0 && "scratch reg was 0" ); |
| 931 | RS->setRegUsed(Reg: ScratchReg); |
| 932 | |
| 933 | BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: TII->get(Opcode: X86::MOV64ri), DestReg: ScratchReg).addImm(Val: Offset); |
| 934 | |
| 935 | MI.getOperand(i: FIOperandNum + 3).setImm(0); |
| 936 | MI.getOperand(i: FIOperandNum + 2).setReg(ScratchReg); |
| 937 | |
| 938 | return false; |
| 939 | } |
| 940 | |
| 941 | // ... for 32-bit targets, this is a bug! |
| 942 | if (!Is64Bit && !FitsIn32Bits) { |
| 943 | MI.emitGenericError(ErrMsg: "64-bit offset calculated but target is 32-bit" ); |
| 944 | // Trap so that the instruction verification pass does not fail if run. |
| 945 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TRAP)); |
| 946 | return false; |
| 947 | } |
| 948 | |
| 949 | if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) |
| 950 | MI.getOperand(i: FIOperandNum + 3).ChangeToImmediate(ImmVal: Offset); |
| 951 | } else { |
| 952 | // Offset is symbolic. This is extremely rare. |
| 953 | uint64_t Offset = FIOffset + |
| 954 | (uint64_t)MI.getOperand(i: FIOperandNum+3).getOffset(); |
| 955 | MI.getOperand(i: FIOperandNum + 3).setOffset(Offset); |
| 956 | } |
| 957 | return false; |
| 958 | } |
| 959 | |
| 960 | unsigned X86RegisterInfo::findDeadCallerSavedReg( |
| 961 | MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const { |
| 962 | const MachineFunction *MF = MBB.getParent(); |
| 963 | const MachineRegisterInfo &MRI = MF->getRegInfo(); |
| 964 | if (MF->callsEHReturn()) |
| 965 | return 0; |
| 966 | |
| 967 | if (MBBI == MBB.end()) |
| 968 | return 0; |
| 969 | |
| 970 | switch (MBBI->getOpcode()) { |
| 971 | default: |
| 972 | return 0; |
| 973 | case TargetOpcode::PATCHABLE_RET: |
| 974 | case X86::RET: |
| 975 | case X86::RET32: |
| 976 | case X86::RET64: |
| 977 | case X86::RETI32: |
| 978 | case X86::RETI64: |
| 979 | case X86::TCRETURNdi: |
| 980 | case X86::TCRETURNri: |
| 981 | case X86::TCRETURN_WIN64ri: |
| 982 | case X86::TCRETURN_HIPE32ri: |
| 983 | case X86::TCRETURNmi: |
| 984 | case X86::TCRETURNdi64: |
| 985 | case X86::TCRETURNri64: |
| 986 | case X86::TCRETURNri64_ImpCall: |
| 987 | case X86::TCRETURNmi64: |
| 988 | case X86::TCRETURN_WINmi64: |
| 989 | case X86::EH_RETURN: |
| 990 | case X86::EH_RETURN64: { |
| 991 | LiveRegUnits LRU(*this); |
| 992 | LRU.addLiveOuts(MBB); |
| 993 | LRU.stepBackward(MI: *MBBI); |
| 994 | |
| 995 | const TargetRegisterClass &RC = |
| 996 | Is64Bit ? X86::GR64_NOSPRegClass : X86::GR32_NOSPRegClass; |
| 997 | for (MCRegister Reg : RC) { |
| 998 | if (LRU.available(Reg) && !MRI.isReserved(PhysReg: Reg)) |
| 999 | return Reg; |
| 1000 | } |
| 1001 | } |
| 1002 | } |
| 1003 | |
| 1004 | return 0; |
| 1005 | } |
| 1006 | |
| 1007 | Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { |
| 1008 | const X86FrameLowering *TFI = getFrameLowering(MF); |
| 1009 | return TFI->hasFP(MF) ? FramePtr : StackPtr; |
| 1010 | } |
| 1011 | |
| 1012 | Register |
| 1013 | X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { |
| 1014 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
| 1015 | Register FrameReg = getFrameRegister(MF); |
| 1016 | if (Subtarget.isTarget64BitILP32()) |
| 1017 | FrameReg = getX86SubSuperRegister(Reg: FrameReg, Size: 32); |
| 1018 | return FrameReg; |
| 1019 | } |
| 1020 | |
| 1021 | Register |
| 1022 | X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const { |
| 1023 | const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); |
| 1024 | Register StackReg = getStackRegister(); |
| 1025 | if (Subtarget.isTarget64BitILP32()) |
| 1026 | StackReg = getX86SubSuperRegister(Reg: StackReg, Size: 32); |
| 1027 | return StackReg; |
| 1028 | } |
| 1029 | |
| 1030 | static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM, |
| 1031 | const MachineRegisterInfo *MRI) { |
| 1032 | if (VRM->hasShape(virtReg: VirtReg)) |
| 1033 | return VRM->getShape(virtReg: VirtReg); |
| 1034 | |
| 1035 | const MachineOperand &Def = *MRI->def_begin(RegNo: VirtReg); |
| 1036 | MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent()); |
| 1037 | unsigned OpCode = MI->getOpcode(); |
| 1038 | switch (OpCode) { |
| 1039 | default: |
| 1040 | llvm_unreachable("Unexpected machine instruction on tile register!" ); |
| 1041 | break; |
| 1042 | case X86::COPY: { |
| 1043 | Register SrcReg = MI->getOperand(i: 1).getReg(); |
| 1044 | ShapeT Shape = getTileShape(VirtReg: SrcReg, VRM, MRI); |
| 1045 | VRM->assignVirt2Shape(virtReg: VirtReg, shape: Shape); |
| 1046 | return Shape; |
| 1047 | } |
| 1048 | // We only collect the tile shape that is defined. |
| 1049 | case X86::PTILELOADDV: |
| 1050 | case X86::PTILELOADDT1V: |
| 1051 | case X86::PTDPBSSDV: |
| 1052 | case X86::PTDPBSUDV: |
| 1053 | case X86::PTDPBUSDV: |
| 1054 | case X86::PTDPBUUDV: |
| 1055 | case X86::PTILEZEROV: |
| 1056 | case X86::PTDPBF16PSV: |
| 1057 | case X86::PTDPFP16PSV: |
| 1058 | case X86::PTCMMIMFP16PSV: |
| 1059 | case X86::PTCMMRLFP16PSV: |
| 1060 | case X86::PTILELOADDRSV: |
| 1061 | case X86::PTILELOADDRST1V: |
| 1062 | case X86::PTMMULTF32PSV: |
| 1063 | case X86::PTDPBF8PSV: |
| 1064 | case X86::PTDPBHF8PSV: |
| 1065 | case X86::PTDPHBF8PSV: |
| 1066 | case X86::PTDPHF8PSV: { |
| 1067 | MachineOperand &MO1 = MI->getOperand(i: 1); |
| 1068 | MachineOperand &MO2 = MI->getOperand(i: 2); |
| 1069 | ShapeT Shape(&MO1, &MO2, MRI); |
| 1070 | VRM->assignVirt2Shape(virtReg: VirtReg, shape: Shape); |
| 1071 | return Shape; |
| 1072 | } |
| 1073 | } |
| 1074 | } |
| 1075 | |
| 1076 | bool X86RegisterInfo::getRegAllocationHints(Register VirtReg, |
| 1077 | ArrayRef<MCPhysReg> Order, |
| 1078 | SmallVectorImpl<MCPhysReg> &Hints, |
| 1079 | const MachineFunction &MF, |
| 1080 | const VirtRegMap *VRM, |
| 1081 | const LiveRegMatrix *Matrix) const { |
| 1082 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
| 1083 | const TargetRegisterClass &RC = *MRI->getRegClass(Reg: VirtReg); |
| 1084 | bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( |
| 1085 | VirtReg, Order, Hints, MF, VRM, Matrix); |
| 1086 | const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); |
| 1087 | const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); |
| 1088 | |
| 1089 | unsigned ID = RC.getID(); |
| 1090 | |
| 1091 | if (!VRM) |
| 1092 | return BaseImplRetVal; |
| 1093 | |
| 1094 | if (ID != X86::TILERegClassID) { |
| 1095 | if (DisableRegAllocNDDHints || !ST.hasNDD() || |
| 1096 | !TRI.isGeneralPurposeRegisterClass(RC: &RC)) |
| 1097 | return BaseImplRetVal; |
| 1098 | |
| 1099 | // Add any two address hints after any copy hints. |
| 1100 | SmallSet<unsigned, 4> TwoAddrHints; |
| 1101 | |
| 1102 | auto TryAddNDDHint = [&](const MachineOperand &MO) { |
| 1103 | Register Reg = MO.getReg(); |
| 1104 | Register PhysReg = Reg.isPhysical() ? Reg : Register(VRM->getPhys(virtReg: Reg)); |
| 1105 | if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Range&: Hints, Element: PhysReg)) |
| 1106 | TwoAddrHints.insert(V: PhysReg); |
| 1107 | }; |
| 1108 | |
| 1109 | // NDD instructions is compressible when Op0 is allocated to the same |
| 1110 | // physic register as Op1 (or Op2 if it's commutable). |
| 1111 | for (auto &MO : MRI->reg_nodbg_operands(Reg: VirtReg)) { |
| 1112 | const MachineInstr &MI = *MO.getParent(); |
| 1113 | if (!X86::getNonNDVariant(Opc: MI.getOpcode())) |
| 1114 | continue; |
| 1115 | unsigned OpIdx = MI.getOperandNo(I: &MO); |
| 1116 | if (OpIdx == 0) { |
| 1117 | assert(MI.getOperand(1).isReg()); |
| 1118 | TryAddNDDHint(MI.getOperand(i: 1)); |
| 1119 | if (MI.isCommutable()) { |
| 1120 | assert(MI.getOperand(2).isReg()); |
| 1121 | TryAddNDDHint(MI.getOperand(i: 2)); |
| 1122 | } |
| 1123 | } else if (OpIdx == 1) { |
| 1124 | TryAddNDDHint(MI.getOperand(i: 0)); |
| 1125 | } else if (MI.isCommutable() && OpIdx == 2) { |
| 1126 | TryAddNDDHint(MI.getOperand(i: 0)); |
| 1127 | } |
| 1128 | } |
| 1129 | |
| 1130 | for (MCPhysReg OrderReg : Order) |
| 1131 | if (TwoAddrHints.count(V: OrderReg)) |
| 1132 | Hints.push_back(Elt: OrderReg); |
| 1133 | |
| 1134 | return BaseImplRetVal; |
| 1135 | } |
| 1136 | |
| 1137 | ShapeT VirtShape = getTileShape(VirtReg, VRM: const_cast<VirtRegMap *>(VRM), MRI); |
| 1138 | auto AddHint = [&](MCPhysReg PhysReg) { |
| 1139 | Register VReg = Matrix->getOneVReg(PhysReg); |
| 1140 | if (VReg == MCRegister::NoRegister) { // Not allocated yet |
| 1141 | Hints.push_back(Elt: PhysReg); |
| 1142 | return; |
| 1143 | } |
| 1144 | ShapeT PhysShape = getTileShape(VirtReg: VReg, VRM: const_cast<VirtRegMap *>(VRM), MRI); |
| 1145 | if (PhysShape == VirtShape) |
| 1146 | Hints.push_back(Elt: PhysReg); |
| 1147 | }; |
| 1148 | |
| 1149 | SmallSet<MCPhysReg, 4> CopyHints(llvm::from_range, Hints); |
| 1150 | Hints.clear(); |
| 1151 | for (auto Hint : CopyHints) { |
| 1152 | if (RC.contains(Reg: Hint) && !MRI->isReserved(PhysReg: Hint)) |
| 1153 | AddHint(Hint); |
| 1154 | } |
| 1155 | for (MCPhysReg PhysReg : Order) { |
| 1156 | if (!CopyHints.count(V: PhysReg) && RC.contains(Reg: PhysReg) && |
| 1157 | !MRI->isReserved(PhysReg)) |
| 1158 | AddHint(PhysReg); |
| 1159 | } |
| 1160 | |
| 1161 | #define DEBUG_TYPE "tile-hint" |
| 1162 | LLVM_DEBUG({ |
| 1163 | dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n" ; |
| 1164 | for (auto Hint : Hints) { |
| 1165 | dbgs() << "tmm" << Hint << "," ; |
| 1166 | } |
| 1167 | dbgs() << "\n" ; |
| 1168 | }); |
| 1169 | #undef DEBUG_TYPE |
| 1170 | |
| 1171 | return true; |
| 1172 | } |
| 1173 | |
| 1174 | const TargetRegisterClass *X86RegisterInfo::constrainRegClassToNonRex2( |
| 1175 | const TargetRegisterClass *RC) const { |
| 1176 | switch (RC->getID()) { |
| 1177 | default: |
| 1178 | return RC; |
| 1179 | case X86::GR8RegClassID: |
| 1180 | return &X86::GR8_NOREX2RegClass; |
| 1181 | case X86::GR16RegClassID: |
| 1182 | return &X86::GR16_NOREX2RegClass; |
| 1183 | case X86::GR32RegClassID: |
| 1184 | return &X86::GR32_NOREX2RegClass; |
| 1185 | case X86::GR64RegClassID: |
| 1186 | return &X86::GR64_NOREX2RegClass; |
| 1187 | case X86::GR32_NOSPRegClassID: |
| 1188 | return &X86::GR32_NOREX2_NOSPRegClass; |
| 1189 | case X86::GR64_NOSPRegClassID: |
| 1190 | return &X86::GR64_NOREX2_NOSPRegClass; |
| 1191 | } |
| 1192 | } |
| 1193 | |
| 1194 | bool X86RegisterInfo::isNonRex2RegClass(const TargetRegisterClass *RC) const { |
| 1195 | switch (RC->getID()) { |
| 1196 | default: |
| 1197 | return false; |
| 1198 | case X86::GR8_NOREX2RegClassID: |
| 1199 | case X86::GR16_NOREX2RegClassID: |
| 1200 | case X86::GR32_NOREX2RegClassID: |
| 1201 | case X86::GR64_NOREX2RegClassID: |
| 1202 | case X86::GR32_NOREX2_NOSPRegClassID: |
| 1203 | case X86::GR64_NOREX2_NOSPRegClassID: |
| 1204 | case X86::GR64_with_sub_16bit_in_GR16_NOREX2RegClassID: |
| 1205 | return true; |
| 1206 | } |
| 1207 | } |
| 1208 | |