1//===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of the TargetRegisterInfo class.
10// This file is responsible for the frame pointer elimination optimization
11// on X86.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86RegisterInfo.h"
16#include "X86FrameLowering.h"
17#include "X86MachineFunctionInfo.h"
18#include "X86Subtarget.h"
19#include "llvm/ADT/BitVector.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/CodeGen/LiveRegMatrix.h"
23#include "llvm/CodeGen/MachineFrameInfo.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/RegisterScavenging.h"
26#include "llvm/CodeGen/TargetFrameLowering.h"
27#include "llvm/CodeGen/TargetInstrInfo.h"
28#include "llvm/CodeGen/TileShapeInfo.h"
29#include "llvm/CodeGen/VirtRegMap.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Type.h"
32#include "llvm/MC/MCContext.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Target/TargetMachine.h"
36#include "llvm/Target/TargetOptions.h"
37
38using namespace llvm;
39
40#define GET_REGINFO_TARGET_DESC
41#include "X86GenRegisterInfo.inc"
42
43static cl::opt<bool>
44EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(Val: true),
45 cl::desc("Enable use of a base pointer for complex stack frames"));
46
47static cl::opt<bool>
48 DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd", cl::Hidden,
49 cl::init(Val: false),
50 cl::desc("Disable two address hints for register "
51 "allocation"));
52
53extern cl::opt<bool> X86EnableAPXForRelocation;
54
55X86RegisterInfo::X86RegisterInfo(const Triple &TT)
56 : X86GenRegisterInfo((TT.isX86_64() ? X86::RIP : X86::EIP),
57 X86_MC::getDwarfRegFlavour(TT, isEH: false),
58 X86_MC::getDwarfRegFlavour(TT, isEH: true),
59 (TT.isX86_64() ? X86::RIP : X86::EIP)) {
60 X86_MC::initLLVMToSEHAndCVRegMapping(MRI: this);
61
62 // Cache some information.
63 Is64Bit = TT.isX86_64();
64 IsTarget64BitLP64 = Is64Bit && !TT.isX32();
65 IsWin64 = Is64Bit && TT.isOSWindows();
66 IsUEFI64 = Is64Bit && TT.isUEFI();
67
68 // Use a callee-saved register as the base pointer. These registers must
69 // not conflict with any ABI requirements. For example, in 32-bit mode PIC
70 // requires GOT in the EBX register before function calls via PLT GOT pointer.
71 if (Is64Bit) {
72 SlotSize = 8;
73 // This matches the simplified 32-bit pointer code in the data layout
74 // computation.
75 // FIXME: Should use the data layout?
76 bool Use64BitReg = !TT.isX32();
77 StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
78 FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
79 BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
80 } else {
81 SlotSize = 4;
82 StackPtr = X86::ESP;
83 FramePtr = X86::EBP;
84 BasePtr = X86::ESI;
85 }
86}
87
88const TargetRegisterClass *
89X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
90 unsigned Idx) const {
91 // The sub_8bit sub-register index is more constrained in 32-bit mode.
92 // It behaves just like the sub_8bit_hi index.
93 if (!Is64Bit && Idx == X86::sub_8bit)
94 Idx = X86::sub_8bit_hi;
95
96 // Forward to TableGen's default version.
97 return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
98}
99
100const TargetRegisterClass *
101X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
102 const TargetRegisterClass *B,
103 unsigned SubIdx) const {
104 // The sub_8bit sub-register index is more constrained in 32-bit mode.
105 if (!Is64Bit && SubIdx == X86::sub_8bit) {
106 A = X86GenRegisterInfo::getSubClassWithSubReg(RC: A, Idx: X86::sub_8bit_hi);
107 if (!A)
108 return nullptr;
109 }
110 return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, Idx: SubIdx);
111}
112
113const TargetRegisterClass *
114X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
115 const MachineFunction &MF) const {
116 // Don't allow super-classes of GR8_NOREX. This class is only used after
117 // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
118 // to the full GR8 register class in 64-bit mode, so we cannot allow the
119 // reigster class inflation.
120 //
121 // The GR8_NOREX class is always used in a way that won't be constrained to a
122 // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
123 // full GR8 class.
124 if (RC == &X86::GR8_NOREXRegClass)
125 return RC;
126
127 // Keep using non-rex2 register class when APX feature (EGPR/NDD/NF) is not
128 // enabled for relocation.
129 if (!X86EnableAPXForRelocation && isNonRex2RegClass(RC))
130 return RC;
131
132 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
133
134 const TargetRegisterClass *Super = RC;
135 auto I = RC->superclasses().begin();
136 auto E = RC->superclasses().end();
137 do {
138 switch (Super->getID()) {
139 case X86::FR32RegClassID:
140 case X86::FR64RegClassID:
141 // If AVX-512 isn't supported we should only inflate to these classes.
142 if (!Subtarget.hasAVX512() &&
143 getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
144 return Super;
145 break;
146 case X86::VR128RegClassID:
147 case X86::VR256RegClassID:
148 // If VLX isn't supported we should only inflate to these classes.
149 if (!Subtarget.hasVLX() &&
150 getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
151 return Super;
152 break;
153 case X86::VR128XRegClassID:
154 case X86::VR256XRegClassID:
155 // If VLX isn't support we shouldn't inflate to these classes.
156 if (Subtarget.hasVLX() &&
157 getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
158 return Super;
159 break;
160 case X86::FR32XRegClassID:
161 case X86::FR64XRegClassID:
162 // If AVX-512 isn't support we shouldn't inflate to these classes.
163 if (Subtarget.hasAVX512() &&
164 getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
165 return Super;
166 break;
167 case X86::GR8RegClassID:
168 case X86::GR16RegClassID:
169 case X86::GR32RegClassID:
170 case X86::GR64RegClassID:
171 case X86::GR8_NOREX2RegClassID:
172 case X86::GR16_NOREX2RegClassID:
173 case X86::GR32_NOREX2RegClassID:
174 case X86::GR64_NOREX2RegClassID:
175 case X86::RFP32RegClassID:
176 case X86::RFP64RegClassID:
177 case X86::RFP80RegClassID:
178 case X86::VR512_0_15RegClassID:
179 case X86::VR512RegClassID:
180 // Don't return a super-class that would shrink the spill size.
181 // That can happen with the vector and float classes.
182 if (getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
183 return Super;
184 }
185 if (I != E) {
186 Super = getRegClass(i: *I);
187 ++I;
188 } else {
189 Super = nullptr;
190 }
191 } while (Super);
192 return RC;
193}
194
195const TargetRegisterClass *
196X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
197 assert(Kind == 0 && "this should only be used for default cases");
198 if (IsTarget64BitLP64)
199 return &X86::GR64RegClass;
200 // If the target is 64bit but we have been told to use 32bit addresses,
201 // we can still use 64-bit register as long as we know the high bits
202 // are zeros.
203 // Reflect that in the returned register class.
204 return Is64Bit ? &X86::LOW32_ADDR_ACCESSRegClass : &X86::GR32RegClass;
205}
206
207const TargetRegisterClass *
208X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
209 if (RC == &X86::CCRRegClass) {
210 if (Is64Bit)
211 return &X86::GR64RegClass;
212 else
213 return &X86::GR32RegClass;
214 }
215 return RC;
216}
217
218unsigned
219X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
220 MachineFunction &MF) const {
221 const X86FrameLowering *TFI = getFrameLowering(MF);
222
223 unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
224 switch (RC->getID()) {
225 default:
226 return 0;
227 case X86::GR32RegClassID:
228 return 4 - FPDiff;
229 case X86::GR64RegClassID:
230 return 12 - FPDiff;
231 case X86::VR128RegClassID:
232 return Is64Bit ? 10 : 4;
233 case X86::VR64RegClassID:
234 return 4;
235 }
236}
237
238const MCPhysReg *
239X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
240 assert(MF && "MachineFunction required");
241
242 const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
243 const Function &F = MF->getFunction();
244 bool HasSSE = Subtarget.hasSSE1();
245 bool HasAVX = Subtarget.hasAVX();
246 bool HasAVX512 = Subtarget.hasAVX512();
247 bool CallsEHReturn = MF->callsEHReturn();
248
249 CallingConv::ID CC = F.getCallingConv();
250
251 // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
252 // convention because it has the CSR list.
253 if (MF->getFunction().hasFnAttribute(Kind: "no_caller_saved_registers"))
254 CC = CallingConv::X86_INTR;
255
256 // If atribute specified, override the CSRs normally specified by the
257 // calling convention and use the empty set instead.
258 if (MF->getFunction().hasFnAttribute(Kind: "no_callee_saved_registers"))
259 return CSR_NoRegs_SaveList;
260
261 switch (CC) {
262 case CallingConv::GHC:
263 case CallingConv::HiPE:
264 return CSR_NoRegs_SaveList;
265 case CallingConv::AnyReg:
266 if (HasAVX)
267 return CSR_64_AllRegs_AVX_SaveList;
268 return CSR_64_AllRegs_SaveList;
269 case CallingConv::PreserveMost:
270 return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList
271 : CSR_64_RT_MostRegs_SaveList;
272 case CallingConv::PreserveAll:
273 if (HasAVX)
274 return CSR_64_RT_AllRegs_AVX_SaveList;
275 return CSR_64_RT_AllRegs_SaveList;
276 case CallingConv::PreserveNone:
277 return CSR_64_NoneRegs_SaveList;
278 case CallingConv::CXX_FAST_TLS:
279 if (Is64Bit)
280 return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
281 CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
282 break;
283 case CallingConv::Intel_OCL_BI: {
284 if (HasAVX512 && IsWin64)
285 return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
286 if (HasAVX512 && Is64Bit)
287 return CSR_64_Intel_OCL_BI_AVX512_SaveList;
288 if (HasAVX && IsWin64)
289 return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
290 if (HasAVX && Is64Bit)
291 return CSR_64_Intel_OCL_BI_AVX_SaveList;
292 if (!HasAVX && !IsWin64 && Is64Bit)
293 return CSR_64_Intel_OCL_BI_SaveList;
294 break;
295 }
296 case CallingConv::X86_RegCall:
297 if (Is64Bit) {
298 if (IsWin64) {
299 return (HasSSE ? CSR_Win64_RegCall_SaveList :
300 CSR_Win64_RegCall_NoSSE_SaveList);
301 } else {
302 return (HasSSE ? CSR_SysV64_RegCall_SaveList :
303 CSR_SysV64_RegCall_NoSSE_SaveList);
304 }
305 } else {
306 return (HasSSE ? CSR_32_RegCall_SaveList :
307 CSR_32_RegCall_NoSSE_SaveList);
308 }
309 case CallingConv::CFGuard_Check:
310 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
311 return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
312 : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
313 case CallingConv::Cold:
314 if (Is64Bit)
315 return CSR_64_MostRegs_SaveList;
316 break;
317 case CallingConv::Win64:
318 if (!HasSSE)
319 return CSR_Win64_NoSSE_SaveList;
320 return CSR_Win64_SaveList;
321 case CallingConv::SwiftTail:
322 if (!Is64Bit)
323 return CSR_32_SaveList;
324 return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
325 case CallingConv::X86_64_SysV:
326 if (CallsEHReturn)
327 return CSR_64EHRet_SaveList;
328 return CSR_64_SaveList;
329 case CallingConv::X86_INTR:
330 if (Is64Bit) {
331 if (HasAVX512)
332 return CSR_64_AllRegs_AVX512_SaveList;
333 if (HasAVX)
334 return CSR_64_AllRegs_AVX_SaveList;
335 if (HasSSE)
336 return CSR_64_AllRegs_SaveList;
337 return CSR_64_AllRegs_NoSSE_SaveList;
338 } else {
339 if (HasAVX512)
340 return CSR_32_AllRegs_AVX512_SaveList;
341 if (HasAVX)
342 return CSR_32_AllRegs_AVX_SaveList;
343 if (HasSSE)
344 return CSR_32_AllRegs_SSE_SaveList;
345 return CSR_32_AllRegs_SaveList;
346 }
347 default:
348 break;
349 }
350
351 if (Is64Bit) {
352 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
353 F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError);
354 if (IsSwiftCC)
355 return IsWin64 ? CSR_Win64_SwiftError_SaveList
356 : CSR_64_SwiftError_SaveList;
357
358 if (IsWin64 || IsUEFI64)
359 return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
360 if (CallsEHReturn)
361 return CSR_64EHRet_SaveList;
362 return CSR_64_SaveList;
363 }
364
365 return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
366}
367
368const MCPhysReg *
369X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const {
370 return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList;
371}
372
373const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
374 const MachineFunction *MF) const {
375 assert(MF && "Invalid MachineFunction pointer.");
376 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
377 MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
378 return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
379 return nullptr;
380}
381
382const uint32_t *
383X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
384 CallingConv::ID CC) const {
385 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
386 bool HasSSE = Subtarget.hasSSE1();
387 bool HasAVX = Subtarget.hasAVX();
388 bool HasAVX512 = Subtarget.hasAVX512();
389
390 switch (CC) {
391 case CallingConv::GHC:
392 case CallingConv::HiPE:
393 return CSR_NoRegs_RegMask;
394 case CallingConv::AnyReg:
395 if (HasAVX)
396 return CSR_64_AllRegs_AVX_RegMask;
397 return CSR_64_AllRegs_RegMask;
398 case CallingConv::PreserveMost:
399 return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask;
400 case CallingConv::PreserveAll:
401 if (HasAVX)
402 return CSR_64_RT_AllRegs_AVX_RegMask;
403 return CSR_64_RT_AllRegs_RegMask;
404 case CallingConv::PreserveNone:
405 return CSR_64_NoneRegs_RegMask;
406 case CallingConv::CXX_FAST_TLS:
407 if (Is64Bit)
408 return CSR_64_TLS_Darwin_RegMask;
409 break;
410 case CallingConv::Intel_OCL_BI: {
411 if (HasAVX512 && IsWin64)
412 return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
413 if (HasAVX512 && Is64Bit)
414 return CSR_64_Intel_OCL_BI_AVX512_RegMask;
415 if (HasAVX && IsWin64)
416 return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
417 if (HasAVX && Is64Bit)
418 return CSR_64_Intel_OCL_BI_AVX_RegMask;
419 if (!HasAVX && !IsWin64 && Is64Bit)
420 return CSR_64_Intel_OCL_BI_RegMask;
421 break;
422 }
423 case CallingConv::X86_RegCall:
424 if (Is64Bit) {
425 if (IsWin64) {
426 return (HasSSE ? CSR_Win64_RegCall_RegMask :
427 CSR_Win64_RegCall_NoSSE_RegMask);
428 } else {
429 return (HasSSE ? CSR_SysV64_RegCall_RegMask :
430 CSR_SysV64_RegCall_NoSSE_RegMask);
431 }
432 } else {
433 return (HasSSE ? CSR_32_RegCall_RegMask :
434 CSR_32_RegCall_NoSSE_RegMask);
435 }
436 case CallingConv::CFGuard_Check:
437 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
438 return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
439 : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
440 case CallingConv::Cold:
441 if (Is64Bit)
442 return CSR_64_MostRegs_RegMask;
443 break;
444 case CallingConv::Win64:
445 return CSR_Win64_RegMask;
446 case CallingConv::SwiftTail:
447 if (!Is64Bit)
448 return CSR_32_RegMask;
449 return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
450 case CallingConv::X86_64_SysV:
451 return CSR_64_RegMask;
452 case CallingConv::X86_INTR:
453 if (Is64Bit) {
454 if (HasAVX512)
455 return CSR_64_AllRegs_AVX512_RegMask;
456 if (HasAVX)
457 return CSR_64_AllRegs_AVX_RegMask;
458 if (HasSSE)
459 return CSR_64_AllRegs_RegMask;
460 return CSR_64_AllRegs_NoSSE_RegMask;
461 } else {
462 if (HasAVX512)
463 return CSR_32_AllRegs_AVX512_RegMask;
464 if (HasAVX)
465 return CSR_32_AllRegs_AVX_RegMask;
466 if (HasSSE)
467 return CSR_32_AllRegs_SSE_RegMask;
468 return CSR_32_AllRegs_RegMask;
469 }
470 default:
471 break;
472 }
473
474 // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
475 // callsEHReturn().
476 if (Is64Bit) {
477 const Function &F = MF.getFunction();
478 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
479 F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError);
480 if (IsSwiftCC)
481 return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
482
483 return (IsWin64 || IsUEFI64) ? CSR_Win64_RegMask : CSR_64_RegMask;
484 }
485
486 return CSR_32_RegMask;
487}
488
489const uint32_t*
490X86RegisterInfo::getNoPreservedMask() const {
491 return CSR_NoRegs_RegMask;
492}
493
494const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
495 return CSR_64_TLS_Darwin_RegMask;
496}
497
498BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
499 BitVector Reserved(getNumRegs());
500 const X86FrameLowering *TFI = getFrameLowering(MF);
501
502 // Set the floating point control register as reserved.
503 Reserved.set(X86::FPCW);
504
505 // Set the floating point status register as reserved.
506 Reserved.set(X86::FPSW);
507
508 // Set the SIMD floating point control register as reserved.
509 Reserved.set(X86::MXCSR);
510
511 // Set the stack-pointer register and its aliases as reserved.
512 for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RSP))
513 Reserved.set(SubReg);
514
515 // Set the Shadow Stack Pointer as reserved.
516 Reserved.set(X86::SSP);
517
518 // Set the instruction pointer register and its aliases as reserved.
519 for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RIP))
520 Reserved.set(SubReg);
521
522 // Set the frame-pointer register and its aliases as reserved if needed.
523 if (TFI->hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF)) {
524 if (MF.getInfo<X86MachineFunctionInfo>()->getFPClobberedByInvoke())
525 MF.getContext().reportError(
526 L: SMLoc(),
527 Msg: "Frame pointer clobbered by function invoke is not supported.");
528
529 for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RBP))
530 Reserved.set(SubReg);
531 }
532
533 // Set the base-pointer register and its aliases as reserved if needed.
534 if (hasBasePointer(MF)) {
535 if (MF.getInfo<X86MachineFunctionInfo>()->getBPClobberedByInvoke())
536 MF.getContext().reportError(L: SMLoc(),
537 Msg: "Stack realignment in presence of dynamic "
538 "allocas is not supported with "
539 "this calling convention.");
540
541 Register BasePtr = getX86SubSuperRegister(Reg: getBaseRegister(), Size: 64);
542 for (const MCPhysReg &SubReg : subregs_inclusive(Reg: BasePtr))
543 Reserved.set(SubReg);
544 }
545
546 // Mark the segment registers as reserved.
547 Reserved.set(X86::CS);
548 Reserved.set(X86::SS);
549 Reserved.set(X86::DS);
550 Reserved.set(X86::ES);
551 Reserved.set(X86::FS);
552 Reserved.set(X86::GS);
553
554 // Mark the floating point stack registers as reserved.
555 for (unsigned n = 0; n != 8; ++n)
556 Reserved.set(X86::ST0 + n);
557
558 // Reserve the registers that only exist in 64-bit mode.
559 if (!Is64Bit) {
560 // These 8-bit registers are part of the x86-64 extension even though their
561 // super-registers are old 32-bits.
562 Reserved.set(X86::SIL);
563 Reserved.set(X86::DIL);
564 Reserved.set(X86::BPL);
565 Reserved.set(X86::SPL);
566 Reserved.set(X86::SIH);
567 Reserved.set(X86::DIH);
568 Reserved.set(X86::BPH);
569 Reserved.set(X86::SPH);
570
571 for (unsigned n = 0; n != 8; ++n) {
572 // R8, R9, ...
573 for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
574 Reserved.set(*AI);
575
576 // XMM8, XMM9, ...
577 for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
578 Reserved.set(*AI);
579 }
580 }
581 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
582 for (unsigned n = 0; n != 16; ++n) {
583 for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid();
584 ++AI)
585 Reserved.set(*AI);
586 }
587 }
588
589 // Reserve the extended general purpose registers.
590 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR())
591 Reserved.set(I: X86::R16, E: X86::R31WH + 1);
592
593 if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
594 for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI)
595 Reserved.set(*AI);
596 for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI)
597 Reserved.set(*AI);
598 }
599
600 assert(checkAllSuperRegsMarked(Reserved,
601 {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
602 X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
603 return Reserved;
604}
605
606unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
607 // All existing Intel CPUs that support AMX support AVX512 and all existing
608 // Intel CPUs that support APX support AMX. AVX512 implies AVX.
609 //
610 // We enumerate the registers in X86GenRegisterInfo.inc in this order:
611 //
612 // Registers before AVX512,
613 // AVX512 registers (X/YMM16-31, ZMM0-31, K registers)
614 // AMX registers (TMM)
615 // APX registers (R16-R31)
616 //
617 // and try to return the minimum number of registers supported by the target.
618 static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
619 (X86::K6_K7 + 1 == X86::TMMCFG) &&
620 (X86::TMM7 + 1 == X86::R16) &&
621 (X86::R31WH + 1 == X86::NUM_TARGET_REGS),
622 "Register number may be incorrect");
623
624 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
625 if (ST.hasEGPR())
626 return X86::NUM_TARGET_REGS;
627 if (ST.hasAMXTILE())
628 return X86::TMM7 + 1;
629 if (ST.hasAVX512())
630 return X86::K6_K7 + 1;
631 if (ST.hasAVX())
632 return X86::YMM15 + 1;
633 return X86::R15WH + 1;
634}
635
636bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
637 MCRegister Reg) const {
638 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
639 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
640 auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) {
641 return TRI.isSuperOrSubRegisterEq(RegA, RegB);
642 };
643
644 if (!ST.is64Bit())
645 return llvm::any_of(
646 Range: SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX},
647 P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) ||
648 (ST.hasMMX() && X86::VR64RegClass.contains(Reg));
649
650 CallingConv::ID CC = MF.getFunction().getCallingConv();
651
652 if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg))
653 return true;
654
655 if (llvm::any_of(
656 Range: SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9},
657 P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
658 return true;
659
660 if (CC != CallingConv::Win64 &&
661 llvm::any_of(Range: SmallVector<MCRegister>{X86::RDI, X86::RSI},
662 P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
663 return true;
664
665 if (ST.hasSSE1() &&
666 llvm::any_of(Range: SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2,
667 X86::XMM3, X86::XMM4, X86::XMM5,
668 X86::XMM6, X86::XMM7},
669 P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
670 return true;
671
672 return X86GenRegisterInfo::isArgumentRegister(MF, PhysReg: Reg);
673}
674
675bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
676 MCRegister PhysReg) const {
677 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
678 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
679
680 // Stack pointer.
681 if (TRI.isSuperOrSubRegisterEq(RegA: X86::RSP, RegB: PhysReg))
682 return true;
683
684 // Don't use the frame pointer if it's being used.
685 const X86FrameLowering &TFI = *getFrameLowering(MF);
686 if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(RegA: X86::RBP, RegB: PhysReg))
687 return true;
688
689 return X86GenRegisterInfo::isFixedRegister(MF, PhysReg);
690}
691
692bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
693 return RC->getID() == X86::TILERegClassID;
694}
695
696void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
697 // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
698 // because the calling convention defines the EFLAGS register as NOT
699 // preserved.
700 //
701 // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
702 // an assert to track this and clear the register afterwards to avoid
703 // unnecessary crashes during release builds.
704 assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
705 "EFLAGS are not live-out from a patchpoint.");
706
707 // Also clean other registers that don't need preserving (IP).
708 for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
709 Mask[Reg / 32] &= ~(1U << (Reg % 32));
710}
711
712//===----------------------------------------------------------------------===//
713// Stack Frame Processing methods
714//===----------------------------------------------------------------------===//
715
716static bool CantUseSP(const MachineFrameInfo &MFI) {
717 return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
718}
719
720bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
721 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
722 // We have a virtual register to reference argument, and don't need base
723 // pointer.
724 if (X86FI->getStackPtrSaveMI() != nullptr)
725 return false;
726
727 if (X86FI->hasPreallocatedCall())
728 return true;
729
730 const MachineFrameInfo &MFI = MF.getFrameInfo();
731
732 if (!EnableBasePointer)
733 return false;
734
735 // When we need stack realignment, we can't address the stack from the frame
736 // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
737 // can't address variables from the stack pointer. MS inline asm can
738 // reference locals while also adjusting the stack pointer. When we can't
739 // use both the SP and the FP, we need a separate base pointer register.
740 bool CantUseFP = hasStackRealignment(MF);
741 return CantUseFP && CantUseSP(MFI);
742}
743
744bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
745 if (!TargetRegisterInfo::canRealignStack(MF))
746 return false;
747
748 const MachineFrameInfo &MFI = MF.getFrameInfo();
749 const MachineRegisterInfo *MRI = &MF.getRegInfo();
750
751 // Stack realignment requires a frame pointer. If we already started
752 // register allocation with frame pointer elimination, it is too late now.
753 if (!MRI->canReserveReg(PhysReg: FramePtr))
754 return false;
755
756 // If a base pointer is necessary. Check that it isn't too late to reserve
757 // it.
758 if (CantUseSP(MFI))
759 return MRI->canReserveReg(PhysReg: BasePtr);
760 return true;
761}
762
763bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
764 if (TargetRegisterInfo::shouldRealignStack(MF))
765 return true;
766
767 return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
768}
769
770// tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
771// of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
772// TODO: In this case we should be really trying first to entirely eliminate
773// this instruction which is a plain copy.
774static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
775 MachineInstr &MI = *II;
776 unsigned Opc = II->getOpcode();
777 // Check if this is a LEA of the form 'lea (%esp), %ebx'
778 if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
779 MI.getOperand(i: 2).getImm() != 1 ||
780 MI.getOperand(i: 3).getReg() != X86::NoRegister ||
781 MI.getOperand(i: 4).getImm() != 0 ||
782 MI.getOperand(i: 5).getReg() != X86::NoRegister)
783 return false;
784 Register BasePtr = MI.getOperand(i: 1).getReg();
785 // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
786 // be replaced with a 32-bit operand MOV which will zero extend the upper
787 // 32-bits of the super register.
788 if (Opc == X86::LEA64_32r)
789 BasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 32);
790 Register NewDestReg = MI.getOperand(i: 0).getReg();
791 const X86InstrInfo *TII =
792 MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
793 TII->copyPhysReg(MBB&: *MI.getParent(), MI: II, DL: MI.getDebugLoc(), DestReg: NewDestReg, SrcReg: BasePtr,
794 KillSrc: MI.getOperand(i: 1).isKill());
795 MI.eraseFromParent();
796 return true;
797}
798
799static bool isFuncletReturnInstr(MachineInstr &MI) {
800 switch (MI.getOpcode()) {
801 case X86::CATCHRET:
802 case X86::CLEANUPRET:
803 return true;
804 default:
805 return false;
806 }
807 llvm_unreachable("impossible");
808}
809
810void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
811 unsigned FIOperandNum,
812 Register BaseReg,
813 int FIOffset) const {
814 MachineInstr &MI = *II;
815 unsigned Opc = MI.getOpcode();
816 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
817 MachineOperand &FI = MI.getOperand(i: FIOperandNum);
818 FI.ChangeToImmediate(ImmVal: FIOffset);
819 return;
820 }
821
822 MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: BaseReg, isDef: false);
823
824 // The frame index format for stackmaps and patchpoints is different from the
825 // X86 format. It only has a FI and an offset.
826 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
827 assert(BasePtr == FramePtr && "Expected the FP as base register");
828 int64_t Offset = MI.getOperand(i: FIOperandNum + 1).getImm() + FIOffset;
829 MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset);
830 return;
831 }
832
833 if (MI.getOperand(i: FIOperandNum + 3).isImm()) {
834 // Offset is a 32-bit integer.
835 int Imm = (int)(MI.getOperand(i: FIOperandNum + 3).getImm());
836 int Offset = FIOffset + Imm;
837 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
838 "Requesting 64-bit offset in 32-bit immediate!");
839 if (Offset != 0)
840 MI.getOperand(i: FIOperandNum + 3).ChangeToImmediate(ImmVal: Offset);
841 } else {
842 // Offset is symbolic. This is extremely rare.
843 uint64_t Offset =
844 FIOffset + (uint64_t)MI.getOperand(i: FIOperandNum + 3).getOffset();
845 MI.getOperand(i: FIOperandNum + 3).setOffset(Offset);
846 }
847}
848
849bool
850X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
851 int SPAdj, unsigned FIOperandNum,
852 RegScavenger *RS) const {
853 MachineInstr &MI = *II;
854 MachineBasicBlock &MBB = *MI.getParent();
855 MachineFunction &MF = *MBB.getParent();
856 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
857 bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
858 : isFuncletReturnInstr(MI&: *MBBI);
859 const X86FrameLowering *TFI = getFrameLowering(MF);
860 int FrameIndex = MI.getOperand(i: FIOperandNum).getIndex();
861
862 // Determine base register and offset.
863 int64_t FIOffset;
864 Register BasePtr;
865 if (MI.isReturn()) {
866 assert((!hasStackRealignment(MF) ||
867 MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
868 "Return instruction can only reference SP relative frame objects");
869 FIOffset =
870 TFI->getFrameIndexReferenceSP(MF, FI: FrameIndex, SPReg&: BasePtr, Adjustment: 0).getFixed();
871 } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
872 FIOffset = TFI->getWin64EHFrameIndexRef(MF, FI: FrameIndex, SPReg&: BasePtr);
873 } else {
874 FIOffset = TFI->getFrameIndexReference(MF, FI: FrameIndex, FrameReg&: BasePtr).getFixed();
875 }
876
877 // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
878 // simple FP case, and doesn't work with stack realignment. On 32-bit, the
879 // offset is from the traditional base pointer location. On 64-bit, the
880 // offset is from the SP at the end of the prologue, not the FP location. This
881 // matches the behavior of llvm.frameaddress.
882 unsigned Opc = MI.getOpcode();
883 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
884 MachineOperand &FI = MI.getOperand(i: FIOperandNum);
885 FI.ChangeToImmediate(ImmVal: FIOffset);
886 return false;
887 }
888
889 // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
890 // register as source operand, semantic is the same and destination is
891 // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
892 // Don't change BasePtr since it is used later for stack adjustment.
893 Register MachineBasePtr = BasePtr;
894 if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(Reg: BasePtr))
895 MachineBasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 64);
896
897 // This must be part of a four operand memory reference. Replace the
898 // FrameIndex with base register. Add an offset to the offset.
899 MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: MachineBasePtr, isDef: false);
900
901 if (BasePtr == StackPtr)
902 FIOffset += SPAdj;
903
904 // The frame index format for stackmaps and patchpoints is different from the
905 // X86 format. It only has a FI and an offset.
906 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
907 assert(BasePtr == FramePtr && "Expected the FP as base register");
908 int64_t Offset = MI.getOperand(i: FIOperandNum + 1).getImm() + FIOffset;
909 MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset);
910 return false;
911 }
912
913 if (MI.getOperand(i: FIOperandNum+3).isImm()) {
914 const X86InstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
915 const DebugLoc &DL = MI.getDebugLoc();
916 int64_t Imm = MI.getOperand(i: FIOperandNum + 3).getImm();
917 int64_t Offset = FIOffset + Imm;
918 bool FitsIn32Bits = isInt<32>(x: Offset);
919 // If the offset will not fit in a 32-bit displacement, then for 64-bit
920 // targets, scavenge a register to hold it. Otherwise...
921 if (Is64Bit && !FitsIn32Bits) {
922 assert(RS && "RegisterScavenger was NULL");
923
924 RS->enterBasicBlockEnd(MBB);
925 RS->backward(I: std::next(x: II));
926
927 Register ScratchReg = RS->scavengeRegisterBackwards(
928 RC: X86::GR64RegClass, To: II, /*RestoreAfter=*/false, /*SPAdj=*/0,
929 /*AllowSpill=*/true);
930 assert(ScratchReg != 0 && "scratch reg was 0");
931 RS->setRegUsed(Reg: ScratchReg);
932
933 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: TII->get(Opcode: X86::MOV64ri), DestReg: ScratchReg).addImm(Val: Offset);
934
935 MI.getOperand(i: FIOperandNum + 3).setImm(0);
936 MI.getOperand(i: FIOperandNum + 2).setReg(ScratchReg);
937
938 return false;
939 }
940
941 // ... for 32-bit targets, this is a bug!
942 if (!Is64Bit && !FitsIn32Bits) {
943 MI.emitGenericError(ErrMsg: "64-bit offset calculated but target is 32-bit");
944 // Trap so that the instruction verification pass does not fail if run.
945 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TRAP));
946 return false;
947 }
948
949 if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
950 MI.getOperand(i: FIOperandNum + 3).ChangeToImmediate(ImmVal: Offset);
951 } else {
952 // Offset is symbolic. This is extremely rare.
953 uint64_t Offset = FIOffset +
954 (uint64_t)MI.getOperand(i: FIOperandNum+3).getOffset();
955 MI.getOperand(i: FIOperandNum + 3).setOffset(Offset);
956 }
957 return false;
958}
959
960unsigned X86RegisterInfo::findDeadCallerSavedReg(
961 MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
962 const MachineFunction *MF = MBB.getParent();
963 const MachineRegisterInfo &MRI = MF->getRegInfo();
964 if (MF->callsEHReturn())
965 return 0;
966
967 if (MBBI == MBB.end())
968 return 0;
969
970 switch (MBBI->getOpcode()) {
971 default:
972 return 0;
973 case TargetOpcode::PATCHABLE_RET:
974 case X86::RET:
975 case X86::RET32:
976 case X86::RET64:
977 case X86::RETI32:
978 case X86::RETI64:
979 case X86::TCRETURNdi:
980 case X86::TCRETURNri:
981 case X86::TCRETURN_WIN64ri:
982 case X86::TCRETURN_HIPE32ri:
983 case X86::TCRETURNmi:
984 case X86::TCRETURNdi64:
985 case X86::TCRETURNri64:
986 case X86::TCRETURNri64_ImpCall:
987 case X86::TCRETURNmi64:
988 case X86::TCRETURN_WINmi64:
989 case X86::EH_RETURN:
990 case X86::EH_RETURN64: {
991 LiveRegUnits LRU(*this);
992 LRU.addLiveOuts(MBB);
993 LRU.stepBackward(MI: *MBBI);
994
995 const TargetRegisterClass &RC =
996 Is64Bit ? X86::GR64_NOSPRegClass : X86::GR32_NOSPRegClass;
997 for (MCRegister Reg : RC) {
998 if (LRU.available(Reg) && !MRI.isReserved(PhysReg: Reg))
999 return Reg;
1000 }
1001 }
1002 }
1003
1004 return 0;
1005}
1006
1007Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
1008 const X86FrameLowering *TFI = getFrameLowering(MF);
1009 return TFI->hasFP(MF) ? FramePtr : StackPtr;
1010}
1011
1012Register
1013X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
1014 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1015 Register FrameReg = getFrameRegister(MF);
1016 if (Subtarget.isTarget64BitILP32())
1017 FrameReg = getX86SubSuperRegister(Reg: FrameReg, Size: 32);
1018 return FrameReg;
1019}
1020
1021Register
1022X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
1023 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1024 Register StackReg = getStackRegister();
1025 if (Subtarget.isTarget64BitILP32())
1026 StackReg = getX86SubSuperRegister(Reg: StackReg, Size: 32);
1027 return StackReg;
1028}
1029
1030static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
1031 const MachineRegisterInfo *MRI) {
1032 if (VRM->hasShape(virtReg: VirtReg))
1033 return VRM->getShape(virtReg: VirtReg);
1034
1035 const MachineOperand &Def = *MRI->def_begin(RegNo: VirtReg);
1036 MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
1037 unsigned OpCode = MI->getOpcode();
1038 switch (OpCode) {
1039 default:
1040 llvm_unreachable("Unexpected machine instruction on tile register!");
1041 break;
1042 case X86::COPY: {
1043 Register SrcReg = MI->getOperand(i: 1).getReg();
1044 ShapeT Shape = getTileShape(VirtReg: SrcReg, VRM, MRI);
1045 VRM->assignVirt2Shape(virtReg: VirtReg, shape: Shape);
1046 return Shape;
1047 }
1048 // We only collect the tile shape that is defined.
1049 case X86::PTILELOADDV:
1050 case X86::PTILELOADDT1V:
1051 case X86::PTDPBSSDV:
1052 case X86::PTDPBSUDV:
1053 case X86::PTDPBUSDV:
1054 case X86::PTDPBUUDV:
1055 case X86::PTILEZEROV:
1056 case X86::PTDPBF16PSV:
1057 case X86::PTDPFP16PSV:
1058 case X86::PTCMMIMFP16PSV:
1059 case X86::PTCMMRLFP16PSV:
1060 case X86::PTILELOADDRSV:
1061 case X86::PTILELOADDRST1V:
1062 case X86::PTMMULTF32PSV:
1063 case X86::PTDPBF8PSV:
1064 case X86::PTDPBHF8PSV:
1065 case X86::PTDPHBF8PSV:
1066 case X86::PTDPHF8PSV: {
1067 MachineOperand &MO1 = MI->getOperand(i: 1);
1068 MachineOperand &MO2 = MI->getOperand(i: 2);
1069 ShapeT Shape(&MO1, &MO2, MRI);
1070 VRM->assignVirt2Shape(virtReg: VirtReg, shape: Shape);
1071 return Shape;
1072 }
1073 }
1074}
1075
1076bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
1077 ArrayRef<MCPhysReg> Order,
1078 SmallVectorImpl<MCPhysReg> &Hints,
1079 const MachineFunction &MF,
1080 const VirtRegMap *VRM,
1081 const LiveRegMatrix *Matrix) const {
1082 const MachineRegisterInfo *MRI = &MF.getRegInfo();
1083 const TargetRegisterClass &RC = *MRI->getRegClass(Reg: VirtReg);
1084 bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
1085 VirtReg, Order, Hints, MF, VRM, Matrix);
1086 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
1087 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
1088
1089 unsigned ID = RC.getID();
1090
1091 if (!VRM)
1092 return BaseImplRetVal;
1093
1094 if (ID != X86::TILERegClassID) {
1095 if (DisableRegAllocNDDHints || !ST.hasNDD() ||
1096 !TRI.isGeneralPurposeRegisterClass(RC: &RC))
1097 return BaseImplRetVal;
1098
1099 // Add any two address hints after any copy hints.
1100 SmallSet<unsigned, 4> TwoAddrHints;
1101
1102 auto TryAddNDDHint = [&](const MachineOperand &MO) {
1103 Register Reg = MO.getReg();
1104 Register PhysReg = Reg.isPhysical() ? Reg : Register(VRM->getPhys(virtReg: Reg));
1105 if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Range&: Hints, Element: PhysReg))
1106 TwoAddrHints.insert(V: PhysReg);
1107 };
1108
1109 // NDD instructions is compressible when Op0 is allocated to the same
1110 // physic register as Op1 (or Op2 if it's commutable).
1111 for (auto &MO : MRI->reg_nodbg_operands(Reg: VirtReg)) {
1112 const MachineInstr &MI = *MO.getParent();
1113 if (!X86::getNonNDVariant(Opc: MI.getOpcode()))
1114 continue;
1115 unsigned OpIdx = MI.getOperandNo(I: &MO);
1116 if (OpIdx == 0) {
1117 assert(MI.getOperand(1).isReg());
1118 TryAddNDDHint(MI.getOperand(i: 1));
1119 if (MI.isCommutable()) {
1120 assert(MI.getOperand(2).isReg());
1121 TryAddNDDHint(MI.getOperand(i: 2));
1122 }
1123 } else if (OpIdx == 1) {
1124 TryAddNDDHint(MI.getOperand(i: 0));
1125 } else if (MI.isCommutable() && OpIdx == 2) {
1126 TryAddNDDHint(MI.getOperand(i: 0));
1127 }
1128 }
1129
1130 for (MCPhysReg OrderReg : Order)
1131 if (TwoAddrHints.count(V: OrderReg))
1132 Hints.push_back(Elt: OrderReg);
1133
1134 return BaseImplRetVal;
1135 }
1136
1137 ShapeT VirtShape = getTileShape(VirtReg, VRM: const_cast<VirtRegMap *>(VRM), MRI);
1138 auto AddHint = [&](MCPhysReg PhysReg) {
1139 Register VReg = Matrix->getOneVReg(PhysReg);
1140 if (VReg == MCRegister::NoRegister) { // Not allocated yet
1141 Hints.push_back(Elt: PhysReg);
1142 return;
1143 }
1144 ShapeT PhysShape = getTileShape(VirtReg: VReg, VRM: const_cast<VirtRegMap *>(VRM), MRI);
1145 if (PhysShape == VirtShape)
1146 Hints.push_back(Elt: PhysReg);
1147 };
1148
1149 SmallSet<MCPhysReg, 4> CopyHints(llvm::from_range, Hints);
1150 Hints.clear();
1151 for (auto Hint : CopyHints) {
1152 if (RC.contains(Reg: Hint) && !MRI->isReserved(PhysReg: Hint))
1153 AddHint(Hint);
1154 }
1155 for (MCPhysReg PhysReg : Order) {
1156 if (!CopyHints.count(V: PhysReg) && RC.contains(Reg: PhysReg) &&
1157 !MRI->isReserved(PhysReg))
1158 AddHint(PhysReg);
1159 }
1160
1161#define DEBUG_TYPE "tile-hint"
1162 LLVM_DEBUG({
1163 dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
1164 for (auto Hint : Hints) {
1165 dbgs() << "tmm" << Hint << ",";
1166 }
1167 dbgs() << "\n";
1168 });
1169#undef DEBUG_TYPE
1170
1171 return true;
1172}
1173
1174const TargetRegisterClass *X86RegisterInfo::constrainRegClassToNonRex2(
1175 const TargetRegisterClass *RC) const {
1176 switch (RC->getID()) {
1177 default:
1178 return RC;
1179 case X86::GR8RegClassID:
1180 return &X86::GR8_NOREX2RegClass;
1181 case X86::GR16RegClassID:
1182 return &X86::GR16_NOREX2RegClass;
1183 case X86::GR32RegClassID:
1184 return &X86::GR32_NOREX2RegClass;
1185 case X86::GR64RegClassID:
1186 return &X86::GR64_NOREX2RegClass;
1187 case X86::GR32_NOSPRegClassID:
1188 return &X86::GR32_NOREX2_NOSPRegClass;
1189 case X86::GR64_NOSPRegClassID:
1190 return &X86::GR64_NOREX2_NOSPRegClass;
1191 }
1192}
1193
1194bool X86RegisterInfo::isNonRex2RegClass(const TargetRegisterClass *RC) const {
1195 switch (RC->getID()) {
1196 default:
1197 return false;
1198 case X86::GR8_NOREX2RegClassID:
1199 case X86::GR16_NOREX2RegClassID:
1200 case X86::GR32_NOREX2RegClassID:
1201 case X86::GR64_NOREX2RegClassID:
1202 case X86::GR32_NOREX2_NOSPRegClassID:
1203 case X86::GR64_NOREX2_NOSPRegClassID:
1204 case X86::GR64_with_sub_16bit_in_GR16_NOREX2RegClassID:
1205 return true;
1206 }
1207}
1208