1//===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of the TargetRegisterInfo class.
10// This file is responsible for the frame pointer elimination optimization
11// on X86.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86RegisterInfo.h"
16#include "X86FrameLowering.h"
17#include "X86MachineFunctionInfo.h"
18#include "X86Subtarget.h"
19#include "llvm/ADT/BitVector.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/CodeGen/LiveRegMatrix.h"
23#include "llvm/CodeGen/MachineFrameInfo.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/RegisterScavenging.h"
26#include "llvm/CodeGen/TargetFrameLowering.h"
27#include "llvm/CodeGen/TargetInstrInfo.h"
28#include "llvm/CodeGen/TileShapeInfo.h"
29#include "llvm/CodeGen/VirtRegMap.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Type.h"
32#include "llvm/MC/MCContext.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Target/TargetMachine.h"
36#include "llvm/Target/TargetOptions.h"
37
38using namespace llvm;
39
40#define GET_REGINFO_TARGET_DESC
41#include "X86GenRegisterInfo.inc"
42
43static cl::opt<bool>
44EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(Val: true),
45 cl::desc("Enable use of a base pointer for complex stack frames"));
46
47static cl::opt<bool>
48 DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd", cl::Hidden,
49 cl::init(Val: false),
50 cl::desc("Disable two address hints for register "
51 "allocation"));
52
53extern cl::opt<bool> X86EnableAPXForRelocation;
54
55X86RegisterInfo::X86RegisterInfo(const Triple &TT)
56 : X86GenRegisterInfo((TT.isX86_64() ? X86::RIP : X86::EIP),
57 X86_MC::getDwarfRegFlavour(TT, isEH: false),
58 X86_MC::getDwarfRegFlavour(TT, isEH: true),
59 (TT.isX86_64() ? X86::RIP : X86::EIP)) {
60 X86_MC::initLLVMToSEHAndCVRegMapping(MRI: this);
61
62 // Cache some information.
63 Is64Bit = TT.isX86_64();
64 IsTarget64BitLP64 = Is64Bit && !TT.isX32();
65 IsWin64 = Is64Bit && TT.isOSWindows();
66 IsUEFI64 = Is64Bit && TT.isUEFI();
67
68 // Use a callee-saved register as the base pointer. These registers must
69 // not conflict with any ABI requirements. For example, in 32-bit mode PIC
70 // requires GOT in the EBX register before function calls via PLT GOT pointer.
71 if (Is64Bit) {
72 SlotSize = 8;
73 // This matches the simplified 32-bit pointer code in the data layout
74 // computation.
75 // FIXME: Should use the data layout?
76 bool Use64BitReg = !TT.isX32();
77 StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
78 FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
79 BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
80 } else {
81 SlotSize = 4;
82 StackPtr = X86::ESP;
83 FramePtr = X86::EBP;
84 BasePtr = X86::ESI;
85 }
86}
87
88const TargetRegisterClass *
89X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
90 unsigned Idx) const {
91 // The sub_8bit sub-register index is more constrained in 32-bit mode.
92 // It behaves just like the sub_8bit_hi index.
93 if (!Is64Bit && Idx == X86::sub_8bit)
94 Idx = X86::sub_8bit_hi;
95
96 // Forward to TableGen's default version.
97 return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
98}
99
100const TargetRegisterClass *
101X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
102 const TargetRegisterClass *B,
103 unsigned SubIdx) const {
104 // The sub_8bit sub-register index is more constrained in 32-bit mode.
105 if (!Is64Bit && SubIdx == X86::sub_8bit) {
106 A = X86GenRegisterInfo::getSubClassWithSubReg(RC: A, Idx: X86::sub_8bit_hi);
107 if (!A)
108 return nullptr;
109 }
110 return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, Idx: SubIdx);
111}
112
113const TargetRegisterClass *
114X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
115 const MachineFunction &MF) const {
116 // Don't allow super-classes of GR8_NOREX. This class is only used after
117 // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
118 // to the full GR8 register class in 64-bit mode, so we cannot allow the
119 // reigster class inflation.
120 //
121 // The GR8_NOREX class is always used in a way that won't be constrained to a
122 // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
123 // full GR8 class.
124 if (RC == &X86::GR8_NOREXRegClass)
125 return RC;
126
127 // Keep using non-rex2 register class when APX feature (EGPR/NDD/NF) is not
128 // enabled for relocation.
129 if (!X86EnableAPXForRelocation && isNonRex2RegClass(RC))
130 return RC;
131
132 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
133
134 const TargetRegisterClass *Super = RC;
135 auto I = RC->superclasses().begin();
136 auto E = RC->superclasses().end();
137 do {
138 switch (Super->getID()) {
139 case X86::FR32RegClassID:
140 case X86::FR64RegClassID:
141 // If AVX-512 isn't supported we should only inflate to these classes.
142 if (!Subtarget.hasAVX512() &&
143 getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
144 return Super;
145 break;
146 case X86::VR128RegClassID:
147 case X86::VR256RegClassID:
148 // If VLX isn't supported we should only inflate to these classes.
149 if (!Subtarget.hasVLX() &&
150 getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
151 return Super;
152 break;
153 case X86::VR128XRegClassID:
154 case X86::VR256XRegClassID:
155 // If VLX isn't support we shouldn't inflate to these classes.
156 if (Subtarget.hasVLX() &&
157 getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
158 return Super;
159 break;
160 case X86::FR32XRegClassID:
161 case X86::FR64XRegClassID:
162 // If AVX-512 isn't support we shouldn't inflate to these classes.
163 if (Subtarget.hasAVX512() &&
164 getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
165 return Super;
166 break;
167 case X86::GR8RegClassID:
168 case X86::GR16RegClassID:
169 case X86::GR32RegClassID:
170 case X86::GR64RegClassID:
171 case X86::GR8_NOREX2RegClassID:
172 case X86::GR16_NOREX2RegClassID:
173 case X86::GR32_NOREX2RegClassID:
174 case X86::GR64_NOREX2RegClassID:
175 case X86::RFP32RegClassID:
176 case X86::RFP64RegClassID:
177 case X86::RFP80RegClassID:
178 case X86::VR512_0_15RegClassID:
179 case X86::VR512RegClassID:
180 // Don't return a super-class that would shrink the spill size.
181 // That can happen with the vector and float classes.
182 if (getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
183 return Super;
184 }
185 if (I != E) {
186 Super = getRegClass(i: *I);
187 ++I;
188 } else {
189 Super = nullptr;
190 }
191 } while (Super);
192 return RC;
193}
194
195const TargetRegisterClass *
196X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
197 assert(Kind == 0 && "this should only be used for default cases");
198 if (IsTarget64BitLP64)
199 return &X86::GR64RegClass;
200 // If the target is 64bit but we have been told to use 32bit addresses,
201 // we can still use 64-bit register as long as we know the high bits
202 // are zeros.
203 // Reflect that in the returned register class.
204 return Is64Bit ? &X86::LOW32_ADDR_ACCESSRegClass : &X86::GR32RegClass;
205}
206
207const TargetRegisterClass *
208X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
209 if (RC == &X86::CCRRegClass) {
210 if (Is64Bit)
211 return &X86::GR64RegClass;
212 else
213 return &X86::GR32RegClass;
214 }
215 return RC;
216}
217
218unsigned
219X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
220 MachineFunction &MF) const {
221 const X86FrameLowering *TFI = getFrameLowering(MF);
222
223 unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
224 switch (RC->getID()) {
225 default:
226 return 0;
227 case X86::GR32RegClassID:
228 return 4 - FPDiff;
229 case X86::GR64RegClassID:
230 return 12 - FPDiff;
231 case X86::VR128RegClassID:
232 return Is64Bit ? 10 : 4;
233 case X86::VR64RegClassID:
234 return 4;
235 }
236}
237
238const MCPhysReg *
239X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
240 assert(MF && "MachineFunction required");
241
242 const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
243 const Function &F = MF->getFunction();
244 bool HasSSE = Subtarget.hasSSE1();
245 bool HasAVX = Subtarget.hasAVX();
246 bool HasAVX512 = Subtarget.hasAVX512();
247 bool CallsEHReturn = MF->callsEHReturn();
248
249 CallingConv::ID CC = F.getCallingConv();
250
251 // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
252 // convention because it has the CSR list.
253 if (MF->getFunction().hasFnAttribute(Kind: "no_caller_saved_registers"))
254 CC = CallingConv::X86_INTR;
255
256 // If atribute specified, override the CSRs normally specified by the
257 // calling convention and use the empty set instead.
258 if (MF->getFunction().hasFnAttribute(Kind: "no_callee_saved_registers"))
259 return CSR_NoRegs_SaveList;
260
261 switch (CC) {
262 case CallingConv::GHC:
263 case CallingConv::HiPE:
264 return CSR_NoRegs_SaveList;
265 case CallingConv::AnyReg:
266 if (HasAVX)
267 return CSR_64_AllRegs_AVX_SaveList;
268 return CSR_64_AllRegs_SaveList;
269 case CallingConv::PreserveMost:
270 return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList
271 : CSR_64_RT_MostRegs_SaveList;
272 case CallingConv::PreserveAll:
273 if (HasAVX)
274 return CSR_64_RT_AllRegs_AVX_SaveList;
275 return CSR_64_RT_AllRegs_SaveList;
276 case CallingConv::PreserveNone:
277 return CSR_64_NoneRegs_SaveList;
278 case CallingConv::CXX_FAST_TLS:
279 if (Is64Bit)
280 return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
281 CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
282 break;
283 case CallingConv::Intel_OCL_BI: {
284 if (HasAVX512 && IsWin64)
285 return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
286 if (HasAVX512 && Is64Bit)
287 return CSR_64_Intel_OCL_BI_AVX512_SaveList;
288 if (HasAVX && IsWin64)
289 return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
290 if (HasAVX && Is64Bit)
291 return CSR_64_Intel_OCL_BI_AVX_SaveList;
292 if (!HasAVX && !IsWin64 && Is64Bit)
293 return CSR_64_Intel_OCL_BI_SaveList;
294 break;
295 }
296 case CallingConv::X86_RegCall:
297 if (Is64Bit) {
298 if (IsWin64) {
299 return (HasSSE ? CSR_Win64_RegCall_SaveList :
300 CSR_Win64_RegCall_NoSSE_SaveList);
301 } else {
302 return (HasSSE ? CSR_SysV64_RegCall_SaveList :
303 CSR_SysV64_RegCall_NoSSE_SaveList);
304 }
305 } else {
306 return (HasSSE ? CSR_32_RegCall_SaveList :
307 CSR_32_RegCall_NoSSE_SaveList);
308 }
309 case CallingConv::CFGuard_Check:
310 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
311 return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
312 : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
313 case CallingConv::Cold:
314 if (Is64Bit)
315 return CSR_64_MostRegs_SaveList;
316 break;
317 case CallingConv::Win64:
318 if (!HasSSE)
319 return CSR_Win64_NoSSE_SaveList;
320 return CSR_Win64_SaveList;
321 case CallingConv::SwiftTail:
322 if (!Is64Bit)
323 return CSR_32_SaveList;
324 return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
325 case CallingConv::X86_64_SysV:
326 if (CallsEHReturn)
327 return CSR_64EHRet_SaveList;
328 return CSR_64_SaveList;
329 case CallingConv::X86_INTR:
330 if (Is64Bit) {
331 if (HasAVX512)
332 return CSR_64_AllRegs_AVX512_SaveList;
333 if (HasAVX)
334 return CSR_64_AllRegs_AVX_SaveList;
335 if (HasSSE)
336 return CSR_64_AllRegs_SaveList;
337 return CSR_64_AllRegs_NoSSE_SaveList;
338 } else {
339 if (HasAVX512)
340 return CSR_32_AllRegs_AVX512_SaveList;
341 if (HasAVX)
342 return CSR_32_AllRegs_AVX_SaveList;
343 if (HasSSE)
344 return CSR_32_AllRegs_SSE_SaveList;
345 return CSR_32_AllRegs_SaveList;
346 }
347 default:
348 break;
349 }
350
351 if (Is64Bit) {
352 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
353 F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError);
354 if (IsSwiftCC)
355 return IsWin64 ? CSR_Win64_SwiftError_SaveList
356 : CSR_64_SwiftError_SaveList;
357
358 if (IsWin64 || IsUEFI64)
359 return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
360 if (CallsEHReturn)
361 return CSR_64EHRet_SaveList;
362 return CSR_64_SaveList;
363 }
364
365 return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
366}
367
368const MCPhysReg *
369X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const {
370 return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList;
371}
372
373const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
374 const MachineFunction *MF) const {
375 assert(MF && "Invalid MachineFunction pointer.");
376 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
377 MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
378 return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
379 return nullptr;
380}
381
382const uint32_t *
383X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
384 CallingConv::ID CC) const {
385 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
386 bool HasSSE = Subtarget.hasSSE1();
387 bool HasAVX = Subtarget.hasAVX();
388 bool HasAVX512 = Subtarget.hasAVX512();
389
390 switch (CC) {
391 case CallingConv::GHC:
392 case CallingConv::HiPE:
393 return CSR_NoRegs_RegMask;
394 case CallingConv::AnyReg:
395 if (HasAVX)
396 return CSR_64_AllRegs_AVX_RegMask;
397 return CSR_64_AllRegs_RegMask;
398 case CallingConv::PreserveMost:
399 return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask;
400 case CallingConv::PreserveAll:
401 if (HasAVX)
402 return CSR_64_RT_AllRegs_AVX_RegMask;
403 return CSR_64_RT_AllRegs_RegMask;
404 case CallingConv::PreserveNone:
405 return CSR_64_NoneRegs_RegMask;
406 case CallingConv::CXX_FAST_TLS:
407 if (Is64Bit)
408 return CSR_64_TLS_Darwin_RegMask;
409 break;
410 case CallingConv::Intel_OCL_BI: {
411 if (HasAVX512 && IsWin64)
412 return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
413 if (HasAVX512 && Is64Bit)
414 return CSR_64_Intel_OCL_BI_AVX512_RegMask;
415 if (HasAVX && IsWin64)
416 return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
417 if (HasAVX && Is64Bit)
418 return CSR_64_Intel_OCL_BI_AVX_RegMask;
419 if (!HasAVX && !IsWin64 && Is64Bit)
420 return CSR_64_Intel_OCL_BI_RegMask;
421 break;
422 }
423 case CallingConv::X86_RegCall:
424 if (Is64Bit) {
425 if (IsWin64) {
426 return (HasSSE ? CSR_Win64_RegCall_RegMask :
427 CSR_Win64_RegCall_NoSSE_RegMask);
428 } else {
429 return (HasSSE ? CSR_SysV64_RegCall_RegMask :
430 CSR_SysV64_RegCall_NoSSE_RegMask);
431 }
432 } else {
433 return (HasSSE ? CSR_32_RegCall_RegMask :
434 CSR_32_RegCall_NoSSE_RegMask);
435 }
436 case CallingConv::CFGuard_Check:
437 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
438 return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
439 : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
440 case CallingConv::Cold:
441 if (Is64Bit)
442 return CSR_64_MostRegs_RegMask;
443 break;
444 case CallingConv::Win64:
445 return CSR_Win64_RegMask;
446 case CallingConv::SwiftTail:
447 if (!Is64Bit)
448 return CSR_32_RegMask;
449 return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
450 case CallingConv::X86_64_SysV:
451 return CSR_64_RegMask;
452 case CallingConv::X86_INTR:
453 if (Is64Bit) {
454 if (HasAVX512)
455 return CSR_64_AllRegs_AVX512_RegMask;
456 if (HasAVX)
457 return CSR_64_AllRegs_AVX_RegMask;
458 if (HasSSE)
459 return CSR_64_AllRegs_RegMask;
460 return CSR_64_AllRegs_NoSSE_RegMask;
461 } else {
462 if (HasAVX512)
463 return CSR_32_AllRegs_AVX512_RegMask;
464 if (HasAVX)
465 return CSR_32_AllRegs_AVX_RegMask;
466 if (HasSSE)
467 return CSR_32_AllRegs_SSE_RegMask;
468 return CSR_32_AllRegs_RegMask;
469 }
470 default:
471 break;
472 }
473
474 // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
475 // callsEHReturn().
476 if (Is64Bit) {
477 const Function &F = MF.getFunction();
478 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
479 F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError);
480 if (IsSwiftCC)
481 return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
482
483 return (IsWin64 || IsUEFI64) ? CSR_Win64_RegMask : CSR_64_RegMask;
484 }
485
486 return CSR_32_RegMask;
487}
488
489const uint32_t*
490X86RegisterInfo::getNoPreservedMask() const {
491 return CSR_NoRegs_RegMask;
492}
493
494const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
495 return CSR_64_TLS_Darwin_RegMask;
496}
497
498BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
499 BitVector Reserved(getNumRegs());
500 const X86FrameLowering *TFI = getFrameLowering(MF);
501
502 // Set the floating point control register as reserved.
503 Reserved.set(X86::FPCW);
504
505 // Set the floating point status register as reserved.
506 Reserved.set(X86::FPSW);
507
508 // Set the SIMD floating point control register as reserved.
509 Reserved.set(X86::MXCSR);
510
511 // Set the stack-pointer register and its aliases as reserved.
512 for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RSP))
513 Reserved.set(SubReg);
514
515 // Set the Shadow Stack Pointer as reserved.
516 Reserved.set(X86::SSP);
517
518 auto &ST = MF.getSubtarget<X86Subtarget>();
519 if (ST.is64Bit() && ST.hasUserReservedRegisters()) {
520 // Set r# as reserved register if user required
521 for (unsigned Reg = X86::R8; Reg <= X86::R15; ++Reg)
522 if (ST.isRegisterReservedByUser(i: Reg))
523 for (const MCPhysReg &SubReg : subregs_inclusive(Reg))
524 Reserved.set(SubReg);
525 if (ST.hasEGPR())
526 for (unsigned Reg = X86::R16; Reg <= X86::R31; ++Reg)
527 if (ST.isRegisterReservedByUser(i: Reg))
528 for (const MCPhysReg &SubReg : subregs_inclusive(Reg))
529 Reserved.set(SubReg);
530 }
531
532 // Set the instruction pointer register and its aliases as reserved.
533 for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RIP))
534 Reserved.set(SubReg);
535
536 // Set the frame-pointer register and its aliases as reserved if needed.
537 if (TFI->hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF)) {
538 if (MF.getInfo<X86MachineFunctionInfo>()->getFPClobberedByInvoke())
539 MF.getContext().reportError(
540 L: SMLoc(),
541 Msg: "Frame pointer clobbered by function invoke is not supported.");
542
543 for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RBP))
544 Reserved.set(SubReg);
545 }
546
547 // Set the base-pointer register and its aliases as reserved if needed.
548 if (hasBasePointer(MF)) {
549 if (MF.getInfo<X86MachineFunctionInfo>()->getBPClobberedByInvoke())
550 MF.getContext().reportError(L: SMLoc(),
551 Msg: "Stack realignment in presence of dynamic "
552 "allocas is not supported with "
553 "this calling convention.");
554
555 Register BasePtr = getX86SubSuperRegister(Reg: getBaseRegister(), Size: 64);
556 for (const MCPhysReg &SubReg : subregs_inclusive(Reg: BasePtr))
557 Reserved.set(SubReg);
558 }
559
560 // Mark the segment registers as reserved.
561 Reserved.set(X86::CS);
562 Reserved.set(X86::SS);
563 Reserved.set(X86::DS);
564 Reserved.set(X86::ES);
565 Reserved.set(X86::FS);
566 Reserved.set(X86::GS);
567
568 // Mark the floating point stack registers as reserved.
569 for (unsigned n = 0; n != 8; ++n)
570 Reserved.set(X86::ST0 + n);
571
572 // Reserve the registers that only exist in 64-bit mode.
573 if (!Is64Bit) {
574 // These 8-bit registers are part of the x86-64 extension even though their
575 // super-registers are old 32-bits.
576 Reserved.set(X86::SIL);
577 Reserved.set(X86::DIL);
578 Reserved.set(X86::BPL);
579 Reserved.set(X86::SPL);
580 Reserved.set(X86::SIH);
581 Reserved.set(X86::DIH);
582 Reserved.set(X86::BPH);
583 Reserved.set(X86::SPH);
584
585 for (unsigned n = 0; n != 8; ++n) {
586 // R8, R9, ...
587 for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
588 Reserved.set(*AI);
589
590 // XMM8, XMM9, ...
591 for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
592 Reserved.set(*AI);
593 }
594 }
595 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
596 for (unsigned n = 0; n != 16; ++n) {
597 for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid();
598 ++AI)
599 Reserved.set(*AI);
600 }
601 }
602
603 // Reserve the extended general purpose registers.
604 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR())
605 Reserved.set(I: X86::R16, E: X86::R31WH + 1);
606
607 if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
608 for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI)
609 Reserved.set(*AI);
610 for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI)
611 Reserved.set(*AI);
612 }
613
614 assert(checkAllSuperRegsMarked(Reserved,
615 {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
616 X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
617 return Reserved;
618}
619
620unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
621 // All existing Intel CPUs that support AMX support AVX512 and all existing
622 // Intel CPUs that support APX support AMX. AVX512 implies AVX.
623 //
624 // We enumerate the registers in X86GenRegisterInfo.inc in this order:
625 //
626 // Registers before AVX512,
627 // AVX512 registers (X/YMM16-31, ZMM0-31, K registers)
628 // AMX registers (TMM)
629 // APX registers (R16-R31)
630 //
631 // and try to return the minimum number of registers supported by the target.
632 static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
633 (X86::K6_K7 + 1 == X86::TMMCFG) &&
634 (X86::TMM7 + 1 == X86::R16) &&
635 (X86::R31WH + 1 == X86::NUM_TARGET_REGS),
636 "Register number may be incorrect");
637
638 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
639 if (ST.hasEGPR())
640 return X86::NUM_TARGET_REGS;
641 if (ST.hasAMXTILE())
642 return X86::TMM7 + 1;
643 if (ST.hasAVX512())
644 return X86::K6_K7 + 1;
645 if (ST.hasAVX())
646 return X86::YMM15 + 1;
647 return X86::R15WH + 1;
648}
649
650bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
651 MCRegister Reg) const {
652 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
653 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
654 auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) {
655 return TRI.isSuperOrSubRegisterEq(RegA, RegB);
656 };
657
658 if (!ST.is64Bit())
659 return llvm::any_of(
660 Range: SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX},
661 P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) ||
662 (ST.hasMMX() && X86::VR64RegClass.contains(Reg));
663
664 CallingConv::ID CC = MF.getFunction().getCallingConv();
665
666 if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg))
667 return true;
668
669 if (llvm::any_of(
670 Range: SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9},
671 P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
672 return true;
673
674 if (CC != CallingConv::Win64 &&
675 llvm::any_of(Range: SmallVector<MCRegister>{X86::RDI, X86::RSI},
676 P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
677 return true;
678
679 if (ST.hasSSE1() &&
680 llvm::any_of(Range: SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2,
681 X86::XMM3, X86::XMM4, X86::XMM5,
682 X86::XMM6, X86::XMM7},
683 P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
684 return true;
685
686 return X86GenRegisterInfo::isArgumentRegister(MF, PhysReg: Reg);
687}
688
689bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
690 MCRegister PhysReg) const {
691 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
692 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
693
694 // Stack pointer.
695 if (TRI.isSuperOrSubRegisterEq(RegA: X86::RSP, RegB: PhysReg))
696 return true;
697
698 // Don't use the frame pointer if it's being used.
699 const X86FrameLowering &TFI = *getFrameLowering(MF);
700 if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(RegA: X86::RBP, RegB: PhysReg))
701 return true;
702
703 return X86GenRegisterInfo::isFixedRegister(MF, PhysReg);
704}
705
706bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
707 return RC->getID() == X86::TILERegClassID;
708}
709
710void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
711 // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
712 // because the calling convention defines the EFLAGS register as NOT
713 // preserved.
714 //
715 // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
716 // an assert to track this and clear the register afterwards to avoid
717 // unnecessary crashes during release builds.
718 assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
719 "EFLAGS are not live-out from a patchpoint.");
720
721 // Also clean other registers that don't need preserving (IP).
722 for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
723 Mask[Reg / 32] &= ~(1U << (Reg % 32));
724}
725
726//===----------------------------------------------------------------------===//
727// Stack Frame Processing methods
728//===----------------------------------------------------------------------===//
729
730static bool CantUseSP(const MachineFrameInfo &MFI) {
731 return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
732}
733
734bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
735 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
736 // We have a virtual register to reference argument, and don't need base
737 // pointer.
738 if (X86FI->getStackPtrSaveMI() != nullptr)
739 return false;
740
741 if (X86FI->hasPreallocatedCall())
742 return true;
743
744 const MachineFrameInfo &MFI = MF.getFrameInfo();
745
746 if (!EnableBasePointer)
747 return false;
748
749 // When we need stack realignment, we can't address the stack from the frame
750 // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
751 // can't address variables from the stack pointer. MS inline asm can
752 // reference locals while also adjusting the stack pointer. When we can't
753 // use both the SP and the FP, we need a separate base pointer register.
754 bool CantUseFP = hasStackRealignment(MF);
755 return CantUseFP && CantUseSP(MFI);
756}
757
758bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
759 if (!TargetRegisterInfo::canRealignStack(MF))
760 return false;
761
762 const MachineFrameInfo &MFI = MF.getFrameInfo();
763 const MachineRegisterInfo *MRI = &MF.getRegInfo();
764
765 // Stack realignment requires a frame pointer. If we already started
766 // register allocation with frame pointer elimination, it is too late now.
767 if (!MRI->canReserveReg(PhysReg: FramePtr))
768 return false;
769
770 // If a base pointer is necessary. Check that it isn't too late to reserve
771 // it.
772 if (CantUseSP(MFI))
773 return MRI->canReserveReg(PhysReg: BasePtr);
774 return true;
775}
776
777bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
778 if (TargetRegisterInfo::shouldRealignStack(MF))
779 return true;
780
781 return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
782}
783
784// tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
785// of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
786// TODO: In this case we should be really trying first to entirely eliminate
787// this instruction which is a plain copy.
788static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
789 MachineInstr &MI = *II;
790 unsigned Opc = II->getOpcode();
791 // Check if this is a LEA of the form 'lea (%esp), %ebx'
792 if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
793 MI.getOperand(i: 2).getImm() != 1 ||
794 MI.getOperand(i: 3).getReg() != X86::NoRegister ||
795 MI.getOperand(i: 4).getImm() != 0 ||
796 MI.getOperand(i: 5).getReg() != X86::NoRegister)
797 return false;
798 Register BasePtr = MI.getOperand(i: 1).getReg();
799 // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
800 // be replaced with a 32-bit operand MOV which will zero extend the upper
801 // 32-bits of the super register.
802 if (Opc == X86::LEA64_32r)
803 BasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 32);
804 Register NewDestReg = MI.getOperand(i: 0).getReg();
805 const X86InstrInfo *TII =
806 MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
807 TII->copyPhysReg(MBB&: *MI.getParent(), MI: II, DL: MI.getDebugLoc(), DestReg: NewDestReg, SrcReg: BasePtr,
808 KillSrc: MI.getOperand(i: 1).isKill());
809 MI.eraseFromParent();
810 return true;
811}
812
813static bool isFuncletReturnInstr(MachineInstr &MI) {
814 switch (MI.getOpcode()) {
815 case X86::CATCHRET:
816 case X86::CLEANUPRET:
817 return true;
818 default:
819 return false;
820 }
821 llvm_unreachable("impossible");
822}
823
824void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
825 unsigned FIOperandNum,
826 Register BaseReg,
827 int FIOffset) const {
828 MachineInstr &MI = *II;
829 unsigned Opc = MI.getOpcode();
830 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
831 MachineOperand &FI = MI.getOperand(i: FIOperandNum);
832 FI.ChangeToImmediate(ImmVal: FIOffset);
833 return;
834 }
835
836 MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: BaseReg, isDef: false);
837
838 // The frame index format for stackmaps and patchpoints is different from the
839 // X86 format. It only has a FI and an offset.
840 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
841 assert(BasePtr == FramePtr && "Expected the FP as base register");
842 int64_t Offset = MI.getOperand(i: FIOperandNum + 1).getImm() + FIOffset;
843 MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset);
844 return;
845 }
846
847 if (MI.getOperand(i: FIOperandNum + 3).isImm()) {
848 // Offset is a 32-bit integer.
849 int Imm = (int)(MI.getOperand(i: FIOperandNum + 3).getImm());
850 int Offset = FIOffset + Imm;
851 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
852 "Requesting 64-bit offset in 32-bit immediate!");
853 if (Offset != 0)
854 MI.getOperand(i: FIOperandNum + 3).ChangeToImmediate(ImmVal: Offset);
855 } else {
856 // Offset is symbolic. This is extremely rare.
857 uint64_t Offset =
858 FIOffset + (uint64_t)MI.getOperand(i: FIOperandNum + 3).getOffset();
859 MI.getOperand(i: FIOperandNum + 3).setOffset(Offset);
860 }
861}
862
863bool
864X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
865 int SPAdj, unsigned FIOperandNum,
866 RegScavenger *RS) const {
867 MachineInstr &MI = *II;
868 MachineBasicBlock &MBB = *MI.getParent();
869 MachineFunction &MF = *MBB.getParent();
870 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
871 bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
872 : isFuncletReturnInstr(MI&: *MBBI);
873 const X86FrameLowering *TFI = getFrameLowering(MF);
874 int FrameIndex = MI.getOperand(i: FIOperandNum).getIndex();
875
876 // Determine base register and offset.
877 int64_t FIOffset;
878 Register BasePtr;
879 if (MI.isReturn()) {
880 assert((!hasStackRealignment(MF) ||
881 MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
882 "Return instruction can only reference SP relative frame objects");
883 FIOffset =
884 TFI->getFrameIndexReferenceSP(MF, FI: FrameIndex, SPReg&: BasePtr, Adjustment: 0).getFixed();
885 } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
886 FIOffset = TFI->getWin64EHFrameIndexRef(MF, FI: FrameIndex, SPReg&: BasePtr);
887 } else {
888 FIOffset = TFI->getFrameIndexReference(MF, FI: FrameIndex, FrameReg&: BasePtr).getFixed();
889 }
890
891 // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
892 // simple FP case, and doesn't work with stack realignment. On 32-bit, the
893 // offset is from the traditional base pointer location. On 64-bit, the
894 // offset is from the SP at the end of the prologue, not the FP location. This
895 // matches the behavior of llvm.frameaddress.
896 unsigned Opc = MI.getOpcode();
897 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
898 MachineOperand &FI = MI.getOperand(i: FIOperandNum);
899 FI.ChangeToImmediate(ImmVal: FIOffset);
900 return false;
901 }
902
903 // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
904 // register as source operand, semantic is the same and destination is
905 // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
906 // Don't change BasePtr since it is used later for stack adjustment.
907 Register MachineBasePtr = BasePtr;
908 if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(Reg: BasePtr))
909 MachineBasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 64);
910
911 // This must be part of a four operand memory reference. Replace the
912 // FrameIndex with base register. Add an offset to the offset.
913 MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: MachineBasePtr, isDef: false);
914
915 if (BasePtr == StackPtr)
916 FIOffset += SPAdj;
917
918 // The frame index format for stackmaps and patchpoints is different from the
919 // X86 format. It only has a FI and an offset.
920 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
921 assert(BasePtr == FramePtr && "Expected the FP as base register");
922 int64_t Offset = MI.getOperand(i: FIOperandNum + 1).getImm() + FIOffset;
923 MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset);
924 return false;
925 }
926
927 if (MI.getOperand(i: FIOperandNum+3).isImm()) {
928 const X86InstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
929 const DebugLoc &DL = MI.getDebugLoc();
930 int64_t Imm = MI.getOperand(i: FIOperandNum + 3).getImm();
931 int64_t Offset = FIOffset + Imm;
932 bool FitsIn32Bits = isInt<32>(x: Offset);
933 // If the offset will not fit in a 32-bit displacement, then for 64-bit
934 // targets, scavenge a register to hold it. Otherwise...
935 if (Is64Bit && !FitsIn32Bits) {
936 assert(RS && "RegisterScavenger was NULL");
937
938 RS->enterBasicBlockEnd(MBB);
939 RS->backward(I: std::next(x: II));
940
941 Register ScratchReg = RS->scavengeRegisterBackwards(
942 RC: X86::GR64RegClass, To: II, /*RestoreAfter=*/false, /*SPAdj=*/0,
943 /*AllowSpill=*/true);
944 assert(ScratchReg != 0 && "scratch reg was 0");
945 RS->setRegUsed(Reg: ScratchReg);
946
947 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: TII->get(Opcode: X86::MOV64ri), DestReg: ScratchReg).addImm(Val: Offset);
948
949 MI.getOperand(i: FIOperandNum + 3).setImm(0);
950 MI.getOperand(i: FIOperandNum + 2).setReg(ScratchReg);
951
952 return false;
953 }
954
955 // ... for 32-bit targets, this is a bug!
956 if (!Is64Bit && !FitsIn32Bits) {
957 MI.emitGenericError(ErrMsg: "64-bit offset calculated but target is 32-bit");
958 // Trap so that the instruction verification pass does not fail if run.
959 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TRAP));
960 return false;
961 }
962
963 if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
964 MI.getOperand(i: FIOperandNum + 3).ChangeToImmediate(ImmVal: Offset);
965 } else {
966 // Offset is symbolic. This is extremely rare.
967 uint64_t Offset = FIOffset +
968 (uint64_t)MI.getOperand(i: FIOperandNum+3).getOffset();
969 MI.getOperand(i: FIOperandNum + 3).setOffset(Offset);
970 }
971 return false;
972}
973
974unsigned X86RegisterInfo::findDeadCallerSavedReg(
975 MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
976 const MachineFunction *MF = MBB.getParent();
977 const MachineRegisterInfo &MRI = MF->getRegInfo();
978 if (MF->callsEHReturn())
979 return 0;
980
981 if (MBBI == MBB.end())
982 return 0;
983
984 switch (MBBI->getOpcode()) {
985 default:
986 return 0;
987 case TargetOpcode::PATCHABLE_RET:
988 case X86::RET:
989 case X86::RET32:
990 case X86::RET64:
991 case X86::RETI32:
992 case X86::RETI64:
993 case X86::TCRETURNdi:
994 case X86::TCRETURNri:
995 case X86::TCRETURN_WIN64ri:
996 case X86::TCRETURN_HIPE32ri:
997 case X86::TCRETURNmi:
998 case X86::TCRETURNdi64:
999 case X86::TCRETURNri64:
1000 case X86::TCRETURNri64_ImpCall:
1001 case X86::TCRETURNmi64:
1002 case X86::TCRETURN_WINmi64:
1003 case X86::EH_RETURN:
1004 case X86::EH_RETURN64: {
1005 LiveRegUnits LRU(*this);
1006 LRU.addLiveOuts(MBB);
1007 LRU.stepBackward(MI: *MBBI);
1008
1009 const TargetRegisterClass &RC =
1010 Is64Bit ? X86::GR64_NOSPRegClass : X86::GR32_NOSPRegClass;
1011 for (MCRegister Reg : RC) {
1012 if (LRU.available(Reg) && !MRI.isReserved(PhysReg: Reg))
1013 return Reg;
1014 }
1015 }
1016 }
1017
1018 return 0;
1019}
1020
1021Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
1022 const X86FrameLowering *TFI = getFrameLowering(MF);
1023 return TFI->hasFP(MF) ? FramePtr : StackPtr;
1024}
1025
1026Register
1027X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
1028 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1029 Register FrameReg = getFrameRegister(MF);
1030 if (Subtarget.isTarget64BitILP32())
1031 FrameReg = getX86SubSuperRegister(Reg: FrameReg, Size: 32);
1032 return FrameReg;
1033}
1034
1035Register
1036X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
1037 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1038 Register StackReg = getStackRegister();
1039 if (Subtarget.isTarget64BitILP32())
1040 StackReg = getX86SubSuperRegister(Reg: StackReg, Size: 32);
1041 return StackReg;
1042}
1043
1044static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
1045 const MachineRegisterInfo *MRI) {
1046 if (VRM->hasShape(virtReg: VirtReg))
1047 return VRM->getShape(virtReg: VirtReg);
1048
1049 const MachineOperand &Def = *MRI->def_begin(RegNo: VirtReg);
1050 MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
1051 unsigned OpCode = MI->getOpcode();
1052 switch (OpCode) {
1053 default:
1054 llvm_unreachable("Unexpected machine instruction on tile register!");
1055 break;
1056 case X86::COPY: {
1057 Register SrcReg = MI->getOperand(i: 1).getReg();
1058 ShapeT Shape = getTileShape(VirtReg: SrcReg, VRM, MRI);
1059 VRM->assignVirt2Shape(virtReg: VirtReg, shape: Shape);
1060 return Shape;
1061 }
1062 // We only collect the tile shape that is defined.
1063 case X86::PTILELOADDV:
1064 case X86::PTILELOADDT1V:
1065 case X86::PTDPBSSDV:
1066 case X86::PTDPBSUDV:
1067 case X86::PTDPBUSDV:
1068 case X86::PTDPBUUDV:
1069 case X86::PTILEZEROV:
1070 case X86::PTDPBF16PSV:
1071 case X86::PTDPFP16PSV:
1072 case X86::PTCMMIMFP16PSV:
1073 case X86::PTCMMRLFP16PSV:
1074 case X86::PTILELOADDRSV:
1075 case X86::PTILELOADDRST1V:
1076 case X86::PTMMULTF32PSV:
1077 case X86::PTDPBF8PSV:
1078 case X86::PTDPBHF8PSV:
1079 case X86::PTDPHBF8PSV:
1080 case X86::PTDPHF8PSV: {
1081 MachineOperand &MO1 = MI->getOperand(i: 1);
1082 MachineOperand &MO2 = MI->getOperand(i: 2);
1083 ShapeT Shape(&MO1, &MO2, MRI);
1084 VRM->assignVirt2Shape(virtReg: VirtReg, shape: Shape);
1085 return Shape;
1086 }
1087 }
1088}
1089
1090bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
1091 ArrayRef<MCPhysReg> Order,
1092 SmallVectorImpl<MCPhysReg> &Hints,
1093 const MachineFunction &MF,
1094 const VirtRegMap *VRM,
1095 const LiveRegMatrix *Matrix) const {
1096 const MachineRegisterInfo *MRI = &MF.getRegInfo();
1097 const TargetRegisterClass &RC = *MRI->getRegClass(Reg: VirtReg);
1098 bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
1099 VirtReg, Order, Hints, MF, VRM, Matrix);
1100 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
1101 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
1102
1103 unsigned ID = RC.getID();
1104
1105 if (!VRM)
1106 return BaseImplRetVal;
1107
1108 if (ID != X86::TILERegClassID) {
1109 if (DisableRegAllocNDDHints || !ST.hasNDD() ||
1110 !TRI.isGeneralPurposeRegisterClass(RC: &RC))
1111 return BaseImplRetVal;
1112
1113 // Add any two address hints after any copy hints.
1114 SmallSet<unsigned, 4> TwoAddrHints;
1115
1116 auto TryAddNDDHint = [&](const MachineOperand &MO) {
1117 Register Reg = MO.getReg();
1118 Register PhysReg = Reg.isPhysical() ? Reg : Register(VRM->getPhys(virtReg: Reg));
1119 if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Range&: Hints, Element: PhysReg))
1120 TwoAddrHints.insert(V: PhysReg);
1121 };
1122
1123 // NDD instructions is compressible when Op0 is allocated to the same
1124 // physic register as Op1 (or Op2 if it's commutable).
1125 for (auto &MO : MRI->reg_nodbg_operands(Reg: VirtReg)) {
1126 const MachineInstr &MI = *MO.getParent();
1127 if (!X86::getNonNDVariant(Opc: MI.getOpcode()))
1128 continue;
1129 unsigned OpIdx = MI.getOperandNo(I: &MO);
1130 if (OpIdx == 0) {
1131 assert(MI.getOperand(1).isReg());
1132 TryAddNDDHint(MI.getOperand(i: 1));
1133 if (MI.isCommutable()) {
1134 assert(MI.getOperand(2).isReg());
1135 TryAddNDDHint(MI.getOperand(i: 2));
1136 }
1137 } else if (OpIdx == 1) {
1138 TryAddNDDHint(MI.getOperand(i: 0));
1139 } else if (MI.isCommutable() && OpIdx == 2) {
1140 TryAddNDDHint(MI.getOperand(i: 0));
1141 }
1142 }
1143
1144 for (MCPhysReg OrderReg : Order)
1145 if (TwoAddrHints.count(V: OrderReg))
1146 Hints.push_back(Elt: OrderReg);
1147
1148 return BaseImplRetVal;
1149 }
1150
1151 ShapeT VirtShape = getTileShape(VirtReg, VRM: const_cast<VirtRegMap *>(VRM), MRI);
1152 auto AddHint = [&](MCPhysReg PhysReg) {
1153 Register VReg = Matrix->getOneVReg(PhysReg);
1154 if (VReg == MCRegister::NoRegister) { // Not allocated yet
1155 Hints.push_back(Elt: PhysReg);
1156 return;
1157 }
1158 ShapeT PhysShape = getTileShape(VirtReg: VReg, VRM: const_cast<VirtRegMap *>(VRM), MRI);
1159 if (PhysShape == VirtShape)
1160 Hints.push_back(Elt: PhysReg);
1161 };
1162
1163 SmallSet<MCPhysReg, 4> CopyHints(llvm::from_range, Hints);
1164 Hints.clear();
1165 for (auto Hint : CopyHints) {
1166 if (RC.contains(Reg: Hint) && !MRI->isReserved(PhysReg: Hint))
1167 AddHint(Hint);
1168 }
1169 for (MCPhysReg PhysReg : Order) {
1170 if (!CopyHints.count(V: PhysReg) && RC.contains(Reg: PhysReg) &&
1171 !MRI->isReserved(PhysReg))
1172 AddHint(PhysReg);
1173 }
1174
1175#define DEBUG_TYPE "tile-hint"
1176 LLVM_DEBUG({
1177 dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
1178 for (auto Hint : Hints) {
1179 dbgs() << "tmm" << Hint << ",";
1180 }
1181 dbgs() << "\n";
1182 });
1183#undef DEBUG_TYPE
1184
1185 return true;
1186}
1187
1188const TargetRegisterClass *X86RegisterInfo::constrainRegClassToNonRex2(
1189 const TargetRegisterClass *RC) const {
1190 switch (RC->getID()) {
1191 default:
1192 return RC;
1193 case X86::GR8RegClassID:
1194 return &X86::GR8_NOREX2RegClass;
1195 case X86::GR16RegClassID:
1196 return &X86::GR16_NOREX2RegClass;
1197 case X86::GR32RegClassID:
1198 return &X86::GR32_NOREX2RegClass;
1199 case X86::GR64RegClassID:
1200 return &X86::GR64_NOREX2RegClass;
1201 case X86::GR32_NOSPRegClassID:
1202 return &X86::GR32_NOREX2_NOSPRegClass;
1203 case X86::GR64_NOSPRegClassID:
1204 return &X86::GR64_NOREX2_NOSPRegClass;
1205 }
1206}
1207
1208bool X86RegisterInfo::isNonRex2RegClass(const TargetRegisterClass *RC) const {
1209 switch (RC->getID()) {
1210 default:
1211 return false;
1212 case X86::GR8_NOREX2RegClassID:
1213 case X86::GR16_NOREX2RegClassID:
1214 case X86::GR32_NOREX2RegClassID:
1215 case X86::GR64_NOREX2RegClassID:
1216 case X86::GR32_NOREX2_NOSPRegClassID:
1217 case X86::GR64_NOREX2_NOSPRegClassID:
1218 case X86::GR64_with_sub_16bit_in_GR16_NOREX2RegClassID:
1219 return true;
1220 }
1221}
1222