1//===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of the TargetRegisterInfo class.
10// This file is responsible for the frame pointer elimination optimization
11// on X86.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86RegisterInfo.h"
16#include "X86FrameLowering.h"
17#include "X86MachineFunctionInfo.h"
18#include "X86Subtarget.h"
19#include "llvm/ADT/BitVector.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/CodeGen/LiveRegMatrix.h"
23#include "llvm/CodeGen/MachineFrameInfo.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/RegisterScavenging.h"
26#include "llvm/CodeGen/TargetFrameLowering.h"
27#include "llvm/CodeGen/TargetInstrInfo.h"
28#include "llvm/CodeGen/TileShapeInfo.h"
29#include "llvm/CodeGen/VirtRegMap.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/Type.h"
32#include "llvm/MC/MCContext.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Target/TargetMachine.h"
36#include "llvm/Target/TargetOptions.h"
37
38using namespace llvm;
39
40#define GET_REGINFO_TARGET_DESC
41#include "X86GenRegisterInfo.inc"
42
43static cl::opt<bool>
44EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(Val: true),
45 cl::desc("Enable use of a base pointer for complex stack frames"));
46
47static cl::opt<bool>
48 DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd", cl::Hidden,
49 cl::init(Val: false),
50 cl::desc("Disable two address hints for register "
51 "allocation"));
52
53extern cl::opt<bool> X86EnableAPXForRelocation;
54
55X86RegisterInfo::X86RegisterInfo(const Triple &TT)
56 : X86GenRegisterInfo((TT.isX86_64() ? X86::RIP : X86::EIP),
57 X86_MC::getDwarfRegFlavour(TT, isEH: false),
58 X86_MC::getDwarfRegFlavour(TT, isEH: true),
59 (TT.isX86_64() ? X86::RIP : X86::EIP)) {
60 X86_MC::initLLVMToSEHAndCVRegMapping(MRI: this);
61
62 // Cache some information.
63 Is64Bit = TT.isX86_64();
64 IsTarget64BitLP64 = Is64Bit && !TT.isX32();
65 IsWin64 = Is64Bit && TT.isOSWindows();
66 IsUEFI64 = Is64Bit && TT.isUEFI();
67
68 // Use a callee-saved register as the base pointer. These registers must
69 // not conflict with any ABI requirements. For example, in 32-bit mode PIC
70 // requires GOT in the EBX register before function calls via PLT GOT pointer.
71 if (Is64Bit) {
72 SlotSize = 8;
73 // This matches the simplified 32-bit pointer code in the data layout
74 // computation.
75 // FIXME: Should use the data layout?
76 bool Use64BitReg = !TT.isX32();
77 StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
78 FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
79 BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
80 } else {
81 SlotSize = 4;
82 StackPtr = X86::ESP;
83 FramePtr = X86::EBP;
84 BasePtr = X86::ESI;
85 }
86}
87
88const TargetRegisterClass *
89X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
90 unsigned Idx) const {
91 // The sub_8bit sub-register index is more constrained in 32-bit mode.
92 // It behaves just like the sub_8bit_hi index.
93 if (!Is64Bit && Idx == X86::sub_8bit)
94 Idx = X86::sub_8bit_hi;
95
96 // Forward to TableGen's default version.
97 return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
98}
99
100const TargetRegisterClass *
101X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
102 const TargetRegisterClass *B,
103 unsigned SubIdx) const {
104 // The sub_8bit sub-register index is more constrained in 32-bit mode.
105 if (!Is64Bit && SubIdx == X86::sub_8bit) {
106 A = X86GenRegisterInfo::getSubClassWithSubReg(RC: A, Idx: X86::sub_8bit_hi);
107 if (!A)
108 return nullptr;
109 }
110 return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, Idx: SubIdx);
111}
112
113const TargetRegisterClass *
114X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
115 const MachineFunction &MF) const {
116 // Don't allow super-classes of GR8_NOREX. This class is only used after
117 // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
118 // to the full GR8 register class in 64-bit mode, so we cannot allow the
119 // reigster class inflation.
120 //
121 // The GR8_NOREX class is always used in a way that won't be constrained to a
122 // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
123 // full GR8 class.
124 if (RC == &X86::GR8_NOREXRegClass)
125 return RC;
126
127 // Keep using non-rex2 register class when APX feature (EGPR/NDD/NF) is not
128 // enabled for relocation.
129 if (!X86EnableAPXForRelocation && isNonRex2RegClass(RC))
130 return RC;
131
132 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
133
134 const TargetRegisterClass *Super = RC;
135 auto I = RC->superclasses().begin();
136 auto E = RC->superclasses().end();
137 do {
138 switch (Super->getID()) {
139 case X86::FR32RegClassID:
140 case X86::FR64RegClassID:
141 // If AVX-512 isn't supported we should only inflate to these classes.
142 if (!Subtarget.hasAVX512() &&
143 getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
144 return Super;
145 break;
146 case X86::VR128RegClassID:
147 case X86::VR256RegClassID:
148 // If VLX isn't supported we should only inflate to these classes.
149 if (!Subtarget.hasVLX() &&
150 getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
151 return Super;
152 break;
153 case X86::VR128XRegClassID:
154 case X86::VR256XRegClassID:
155 // If VLX isn't support we shouldn't inflate to these classes.
156 if (Subtarget.hasVLX() &&
157 getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
158 return Super;
159 break;
160 case X86::FR32XRegClassID:
161 case X86::FR64XRegClassID:
162 // If AVX-512 isn't support we shouldn't inflate to these classes.
163 if (Subtarget.hasAVX512() &&
164 getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
165 return Super;
166 break;
167 case X86::GR8RegClassID:
168 case X86::GR16RegClassID:
169 case X86::GR32RegClassID:
170 case X86::GR64RegClassID:
171 case X86::GR8_NOREX2RegClassID:
172 case X86::GR16_NOREX2RegClassID:
173 case X86::GR32_NOREX2RegClassID:
174 case X86::GR64_NOREX2RegClassID:
175 case X86::RFP32RegClassID:
176 case X86::RFP64RegClassID:
177 case X86::RFP80RegClassID:
178 case X86::VR512_0_15RegClassID:
179 case X86::VR512RegClassID:
180 // Don't return a super-class that would shrink the spill size.
181 // That can happen with the vector and float classes.
182 if (getRegSizeInBits(RC: *Super) == getRegSizeInBits(RC: *RC))
183 return Super;
184 }
185 if (I != E) {
186 Super = getRegClass(i: *I);
187 ++I;
188 } else {
189 Super = nullptr;
190 }
191 } while (Super);
192 return RC;
193}
194
195const TargetRegisterClass *
196X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
197 assert(Kind == 0 && "this should only be used for default cases");
198 if (IsTarget64BitLP64)
199 return &X86::GR64RegClass;
200 // If the target is 64bit but we have been told to use 32bit addresses,
201 // we can still use 64-bit register as long as we know the high bits
202 // are zeros.
203 // Reflect that in the returned register class.
204 return Is64Bit ? &X86::LOW32_ADDR_ACCESSRegClass : &X86::GR32RegClass;
205}
206
207const TargetRegisterClass *
208X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
209 if (RC == &X86::CCRRegClass) {
210 if (Is64Bit)
211 return &X86::GR64RegClass;
212 else
213 return &X86::GR32RegClass;
214 }
215 return RC;
216}
217
218unsigned
219X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
220 MachineFunction &MF) const {
221 const X86FrameLowering *TFI = getFrameLowering(MF);
222
223 unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
224 switch (RC->getID()) {
225 default:
226 return 0;
227 case X86::GR32RegClassID:
228 return 4 - FPDiff;
229 case X86::GR64RegClassID:
230 return 12 - FPDiff;
231 case X86::VR128RegClassID:
232 return Is64Bit ? 10 : 4;
233 case X86::VR64RegClassID:
234 return 4;
235 }
236}
237
238const MCPhysReg *
239X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
240 assert(MF && "MachineFunction required");
241
242 const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
243 const Function &F = MF->getFunction();
244 bool HasSSE = Subtarget.hasSSE1();
245 bool HasAVX = Subtarget.hasAVX();
246 bool HasAVX512 = Subtarget.hasAVX512();
247 bool CallsEHReturn = MF->callsEHReturn();
248
249 CallingConv::ID CC = F.getCallingConv();
250
251 // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
252 // convention because it has the CSR list.
253 if (MF->getFunction().hasFnAttribute(Kind: "no_caller_saved_registers"))
254 CC = CallingConv::X86_INTR;
255
256 // If atribute specified, override the CSRs normally specified by the
257 // calling convention and use the empty set instead.
258 if (MF->getFunction().hasFnAttribute(Kind: "no_callee_saved_registers"))
259 return CSR_NoRegs_SaveList;
260
261 switch (CC) {
262 case CallingConv::GHC:
263 case CallingConv::HiPE:
264 return CSR_NoRegs_SaveList;
265 case CallingConv::AnyReg:
266 if (HasAVX)
267 return CSR_64_AllRegs_AVX_SaveList;
268 return CSR_64_AllRegs_SaveList;
269 case CallingConv::PreserveMost:
270 return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList
271 : CSR_64_RT_MostRegs_SaveList;
272 case CallingConv::PreserveAll:
273 if (HasAVX)
274 return CSR_64_RT_AllRegs_AVX_SaveList;
275 return CSR_64_RT_AllRegs_SaveList;
276 case CallingConv::PreserveNone:
277 return CSR_64_NoneRegs_SaveList;
278 case CallingConv::CXX_FAST_TLS:
279 if (Is64Bit)
280 return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
281 CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
282 break;
283 case CallingConv::Intel_OCL_BI: {
284 if (HasAVX512 && IsWin64)
285 return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
286 if (HasAVX512 && Is64Bit)
287 return CSR_64_Intel_OCL_BI_AVX512_SaveList;
288 if (HasAVX && IsWin64)
289 return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
290 if (HasAVX && Is64Bit)
291 return CSR_64_Intel_OCL_BI_AVX_SaveList;
292 if (!HasAVX && !IsWin64 && Is64Bit)
293 return CSR_64_Intel_OCL_BI_SaveList;
294 break;
295 }
296 case CallingConv::X86_RegCall:
297 if (Is64Bit) {
298 if (IsWin64) {
299 return (HasSSE ? CSR_Win64_RegCall_SaveList :
300 CSR_Win64_RegCall_NoSSE_SaveList);
301 } else {
302 return (HasSSE ? CSR_SysV64_RegCall_SaveList :
303 CSR_SysV64_RegCall_NoSSE_SaveList);
304 }
305 } else {
306 return (HasSSE ? CSR_32_RegCall_SaveList :
307 CSR_32_RegCall_NoSSE_SaveList);
308 }
309 case CallingConv::CFGuard_Check:
310 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
311 return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
312 : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
313 case CallingConv::Cold:
314 if (Is64Bit)
315 return CSR_64_MostRegs_SaveList;
316 break;
317 case CallingConv::Win64:
318 if (!HasSSE)
319 return CSR_Win64_NoSSE_SaveList;
320 return CSR_Win64_SaveList;
321 case CallingConv::SwiftTail:
322 if (!Is64Bit)
323 return CSR_32_SaveList;
324 return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
325 case CallingConv::X86_64_SysV:
326 if (CallsEHReturn)
327 return CSR_64EHRet_SaveList;
328 return CSR_64_SaveList;
329 case CallingConv::X86_INTR:
330 if (Is64Bit) {
331 if (HasAVX512)
332 return CSR_64_AllRegs_AVX512_SaveList;
333 if (HasAVX)
334 return CSR_64_AllRegs_AVX_SaveList;
335 if (HasSSE)
336 return CSR_64_AllRegs_SaveList;
337 return CSR_64_AllRegs_NoSSE_SaveList;
338 } else {
339 if (HasAVX512)
340 return CSR_32_AllRegs_AVX512_SaveList;
341 if (HasAVX)
342 return CSR_32_AllRegs_AVX_SaveList;
343 if (HasSSE)
344 return CSR_32_AllRegs_SSE_SaveList;
345 return CSR_32_AllRegs_SaveList;
346 }
347 default:
348 break;
349 }
350
351 if (Is64Bit) {
352 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
353 F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError);
354 if (IsSwiftCC)
355 return IsWin64 ? CSR_Win64_SwiftError_SaveList
356 : CSR_64_SwiftError_SaveList;
357
358 if (IsWin64 || IsUEFI64)
359 return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
360 if (CallsEHReturn)
361 return CSR_64EHRet_SaveList;
362 return CSR_64_SaveList;
363 }
364
365 return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
366}
367
368const MCPhysReg *
369X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const {
370 return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList;
371}
372
373const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
374 const MachineFunction *MF) const {
375 assert(MF && "Invalid MachineFunction pointer.");
376 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
377 MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
378 return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
379 return nullptr;
380}
381
382const uint32_t *
383X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
384 CallingConv::ID CC) const {
385 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
386 bool HasSSE = Subtarget.hasSSE1();
387 bool HasAVX = Subtarget.hasAVX();
388 bool HasAVX512 = Subtarget.hasAVX512();
389
390 switch (CC) {
391 case CallingConv::GHC:
392 case CallingConv::HiPE:
393 return CSR_NoRegs_RegMask;
394 case CallingConv::AnyReg:
395 if (HasAVX)
396 return CSR_64_AllRegs_AVX_RegMask;
397 return CSR_64_AllRegs_RegMask;
398 case CallingConv::PreserveMost:
399 return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask;
400 case CallingConv::PreserveAll:
401 if (HasAVX)
402 return CSR_64_RT_AllRegs_AVX_RegMask;
403 return CSR_64_RT_AllRegs_RegMask;
404 case CallingConv::PreserveNone:
405 return CSR_64_NoneRegs_RegMask;
406 case CallingConv::CXX_FAST_TLS:
407 if (Is64Bit)
408 return CSR_64_TLS_Darwin_RegMask;
409 break;
410 case CallingConv::Intel_OCL_BI: {
411 if (HasAVX512 && IsWin64)
412 return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
413 if (HasAVX512 && Is64Bit)
414 return CSR_64_Intel_OCL_BI_AVX512_RegMask;
415 if (HasAVX && IsWin64)
416 return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
417 if (HasAVX && Is64Bit)
418 return CSR_64_Intel_OCL_BI_AVX_RegMask;
419 if (!HasAVX && !IsWin64 && Is64Bit)
420 return CSR_64_Intel_OCL_BI_RegMask;
421 break;
422 }
423 case CallingConv::X86_RegCall:
424 if (Is64Bit) {
425 if (IsWin64) {
426 return (HasSSE ? CSR_Win64_RegCall_RegMask :
427 CSR_Win64_RegCall_NoSSE_RegMask);
428 } else {
429 return (HasSSE ? CSR_SysV64_RegCall_RegMask :
430 CSR_SysV64_RegCall_NoSSE_RegMask);
431 }
432 } else {
433 return (HasSSE ? CSR_32_RegCall_RegMask :
434 CSR_32_RegCall_NoSSE_RegMask);
435 }
436 case CallingConv::CFGuard_Check:
437 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
438 return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
439 : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
440 case CallingConv::Cold:
441 if (Is64Bit)
442 return CSR_64_MostRegs_RegMask;
443 break;
444 case CallingConv::Win64:
445 return CSR_Win64_RegMask;
446 case CallingConv::SwiftTail:
447 if (!Is64Bit)
448 return CSR_32_RegMask;
449 return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
450 case CallingConv::X86_64_SysV:
451 return CSR_64_RegMask;
452 case CallingConv::X86_INTR:
453 if (Is64Bit) {
454 if (HasAVX512)
455 return CSR_64_AllRegs_AVX512_RegMask;
456 if (HasAVX)
457 return CSR_64_AllRegs_AVX_RegMask;
458 if (HasSSE)
459 return CSR_64_AllRegs_RegMask;
460 return CSR_64_AllRegs_NoSSE_RegMask;
461 } else {
462 if (HasAVX512)
463 return CSR_32_AllRegs_AVX512_RegMask;
464 if (HasAVX)
465 return CSR_32_AllRegs_AVX_RegMask;
466 if (HasSSE)
467 return CSR_32_AllRegs_SSE_RegMask;
468 return CSR_32_AllRegs_RegMask;
469 }
470 default:
471 break;
472 }
473
474 // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
475 // callsEHReturn().
476 if (Is64Bit) {
477 const Function &F = MF.getFunction();
478 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
479 F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError);
480 if (IsSwiftCC)
481 return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
482
483 return (IsWin64 || IsUEFI64) ? CSR_Win64_RegMask : CSR_64_RegMask;
484 }
485
486 return CSR_32_RegMask;
487}
488
489const uint32_t*
490X86RegisterInfo::getNoPreservedMask() const {
491 return CSR_NoRegs_RegMask;
492}
493
494const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
495 return CSR_64_TLS_Darwin_RegMask;
496}
497
498BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
499 BitVector Reserved(getNumRegs());
500 const X86FrameLowering *TFI = getFrameLowering(MF);
501
502 // Set the floating point control register as reserved.
503 Reserved.set(X86::FPCW);
504
505 // Set the floating point status register as reserved.
506 Reserved.set(X86::FPSW);
507
508 // Set the SIMD floating point control register as reserved.
509 Reserved.set(X86::MXCSR);
510
511 // Set the stack-pointer register and its aliases as reserved.
512 for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RSP))
513 Reserved.set(SubReg);
514
515 // Set the Shadow Stack Pointer as reserved.
516 Reserved.set(X86::SSP);
517
518 auto &ST = MF.getSubtarget<X86Subtarget>();
519 if (ST.hasUserReservedRegisters()) {
520 if (ST.is64Bit()) {
521 // Set r# as reserved register if user required.
522 for (unsigned Reg = X86::R8; Reg <= X86::R15; ++Reg)
523 if (ST.isRegisterReservedByUser(i: Reg))
524 for (const MCPhysReg &SubReg : subregs_inclusive(Reg))
525 Reserved.set(SubReg);
526 if (ST.hasEGPR())
527 for (unsigned Reg = X86::R16; Reg <= X86::R31; ++Reg)
528 if (ST.isRegisterReservedByUser(i: Reg))
529 for (const MCPhysReg &SubReg : subregs_inclusive(Reg))
530 Reserved.set(SubReg);
531 } else {
532 if (ST.isRegisterReservedByUser(i: X86::EDI))
533 for (const MCPhysReg &SubReg : sub_and_superregs_inclusive(Reg: X86::EDI))
534 Reserved.set(SubReg);
535 }
536 }
537
538 // Set the instruction pointer register and its aliases as reserved.
539 for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RIP))
540 Reserved.set(SubReg);
541
542 // Set the frame-pointer register and its aliases as reserved if needed.
543 if (TFI->hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF)) {
544 if (MF.getInfo<X86MachineFunctionInfo>()->getFPClobberedByInvoke())
545 MF.getContext().reportError(
546 L: SMLoc(),
547 Msg: "Frame pointer clobbered by function invoke is not supported.");
548
549 for (const MCPhysReg &SubReg : subregs_inclusive(Reg: X86::RBP))
550 Reserved.set(SubReg);
551 }
552
553 // Set the base-pointer register and its aliases as reserved if needed.
554 if (hasBasePointer(MF)) {
555 if (MF.getInfo<X86MachineFunctionInfo>()->getBPClobberedByInvoke())
556 MF.getContext().reportError(L: SMLoc(),
557 Msg: "Stack realignment in presence of dynamic "
558 "allocas is not supported with "
559 "this calling convention.");
560
561 Register BasePtr = getX86SubSuperRegister(Reg: getBaseRegister(), Size: 64);
562 for (const MCPhysReg &SubReg : subregs_inclusive(Reg: BasePtr))
563 Reserved.set(SubReg);
564 }
565
566 // Mark the segment registers as reserved.
567 Reserved.set(X86::CS);
568 Reserved.set(X86::SS);
569 Reserved.set(X86::DS);
570 Reserved.set(X86::ES);
571 Reserved.set(X86::FS);
572 Reserved.set(X86::GS);
573
574 // Mark the floating point stack registers as reserved.
575 for (unsigned n = 0; n != 8; ++n)
576 Reserved.set(X86::ST0 + n);
577
578 // Reserve the registers that only exist in 64-bit mode.
579 if (!Is64Bit) {
580 // These 8-bit registers are part of the x86-64 extension even though their
581 // super-registers are old 32-bits.
582 Reserved.set(X86::SIL);
583 Reserved.set(X86::DIL);
584 Reserved.set(X86::BPL);
585 Reserved.set(X86::SPL);
586 Reserved.set(X86::SIH);
587 Reserved.set(X86::DIH);
588 Reserved.set(X86::BPH);
589 Reserved.set(X86::SPH);
590
591 for (unsigned n = 0; n != 8; ++n) {
592 // R8, R9, ...
593 for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
594 Reserved.set(*AI);
595
596 // XMM8, XMM9, ...
597 for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
598 Reserved.set(*AI);
599 }
600 }
601 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
602 for (unsigned n = 0; n != 16; ++n) {
603 for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid();
604 ++AI)
605 Reserved.set(*AI);
606 }
607 }
608
609 // Reserve the extended general purpose registers.
610 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR())
611 Reserved.set(I: X86::R16, E: X86::R31WH + 1);
612
613 if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
614 for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI)
615 Reserved.set(*AI);
616 for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI)
617 Reserved.set(*AI);
618 }
619
620 assert(checkAllSuperRegsMarked(Reserved,
621 {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
622 X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
623 return Reserved;
624}
625
626unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
627 // All existing Intel CPUs that support AMX support AVX512 and all existing
628 // Intel CPUs that support APX support AMX. AVX512 implies AVX.
629 //
630 // We enumerate the registers in X86GenRegisterInfo.inc in this order:
631 //
632 // Registers before AVX512,
633 // AVX512 registers (X/YMM16-31, ZMM0-31, K registers)
634 // AMX registers (TMM)
635 // APX registers (R16-R31)
636 //
637 // and try to return the minimum number of registers supported by the target.
638 static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
639 (X86::K6_K7 + 1 == X86::TMMCFG) &&
640 (X86::TMM7 + 1 == X86::R16) &&
641 (X86::R31WH + 1 == X86::NUM_TARGET_REGS),
642 "Register number may be incorrect");
643
644 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
645 if (ST.hasEGPR())
646 return X86::NUM_TARGET_REGS;
647 if (ST.hasAMXTILE())
648 return X86::TMM7 + 1;
649 if (ST.hasAVX512())
650 return X86::K6_K7 + 1;
651 if (ST.hasAVX())
652 return X86::YMM15 + 1;
653 return X86::R15WH + 1;
654}
655
656bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
657 MCRegister Reg) const {
658 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
659 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
660 auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) {
661 return TRI.isSuperOrSubRegisterEq(RegA, RegB);
662 };
663
664 if (!ST.is64Bit())
665 return llvm::any_of(
666 Range: SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX},
667 P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) ||
668 (ST.hasMMX() && X86::VR64RegClass.contains(Reg));
669
670 CallingConv::ID CC = MF.getFunction().getCallingConv();
671
672 if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg))
673 return true;
674
675 if (llvm::any_of(
676 Range: SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9},
677 P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
678 return true;
679
680 if (CC != CallingConv::Win64 &&
681 llvm::any_of(Range: SmallVector<MCRegister>{X86::RDI, X86::RSI},
682 P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
683 return true;
684
685 if (ST.hasSSE1() &&
686 llvm::any_of(Range: SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2,
687 X86::XMM3, X86::XMM4, X86::XMM5,
688 X86::XMM6, X86::XMM7},
689 P: [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
690 return true;
691
692 return X86GenRegisterInfo::isArgumentRegister(MF, PhysReg: Reg);
693}
694
695bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
696 MCRegister PhysReg) const {
697 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
698 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
699
700 // Stack pointer.
701 if (TRI.isSuperOrSubRegisterEq(RegA: X86::RSP, RegB: PhysReg))
702 return true;
703
704 // Don't use the frame pointer if it's being used.
705 const X86FrameLowering &TFI = *getFrameLowering(MF);
706 if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(RegA: X86::RBP, RegB: PhysReg))
707 return true;
708
709 return X86GenRegisterInfo::isFixedRegister(MF, PhysReg);
710}
711
712bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
713 return RC->getID() == X86::TILERegClassID;
714}
715
716void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
717 // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
718 // because the calling convention defines the EFLAGS register as NOT
719 // preserved.
720 //
721 // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
722 // an assert to track this and clear the register afterwards to avoid
723 // unnecessary crashes during release builds.
724 assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
725 "EFLAGS are not live-out from a patchpoint.");
726
727 // Also clean other registers that don't need preserving (IP).
728 for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
729 Mask[Reg / 32] &= ~(1U << (Reg % 32));
730}
731
732//===----------------------------------------------------------------------===//
733// Stack Frame Processing methods
734//===----------------------------------------------------------------------===//
735
736static bool CantUseSP(const MachineFrameInfo &MFI) {
737 return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
738}
739
740bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
741 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
742 // We have a virtual register to reference argument, and don't need base
743 // pointer.
744 if (X86FI->getStackPtrSaveMI() != nullptr)
745 return false;
746
747 if (X86FI->hasPreallocatedCall())
748 return true;
749
750 const MachineFrameInfo &MFI = MF.getFrameInfo();
751
752 if (!EnableBasePointer)
753 return false;
754
755 // When we need stack realignment, we can't address the stack from the frame
756 // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
757 // can't address variables from the stack pointer. MS inline asm can
758 // reference locals while also adjusting the stack pointer. When we can't
759 // use both the SP and the FP, we need a separate base pointer register.
760 bool CantUseFP = hasStackRealignment(MF);
761 return CantUseFP && CantUseSP(MFI);
762}
763
764bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
765 if (!TargetRegisterInfo::canRealignStack(MF))
766 return false;
767
768 const MachineFrameInfo &MFI = MF.getFrameInfo();
769 const MachineRegisterInfo *MRI = &MF.getRegInfo();
770
771 // Stack realignment requires a frame pointer. If we already started
772 // register allocation with frame pointer elimination, it is too late now.
773 if (!MRI->canReserveReg(PhysReg: FramePtr))
774 return false;
775
776 // If a base pointer is necessary. Check that it isn't too late to reserve
777 // it.
778 if (CantUseSP(MFI))
779 return MRI->canReserveReg(PhysReg: BasePtr);
780 return true;
781}
782
783bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
784 if (TargetRegisterInfo::shouldRealignStack(MF))
785 return true;
786
787 return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
788}
789
790// tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
791// of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
792// TODO: In this case we should be really trying first to entirely eliminate
793// this instruction which is a plain copy.
794static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
795 MachineInstr &MI = *II;
796 unsigned Opc = II->getOpcode();
797 // Check if this is a LEA of the form 'lea (%esp), %ebx'
798 if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
799 MI.getOperand(i: 2).getImm() != 1 ||
800 MI.getOperand(i: 3).getReg() != X86::NoRegister ||
801 MI.getOperand(i: 4).getImm() != 0 ||
802 MI.getOperand(i: 5).getReg() != X86::NoRegister)
803 return false;
804 Register BasePtr = MI.getOperand(i: 1).getReg();
805 // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
806 // be replaced with a 32-bit operand MOV which will zero extend the upper
807 // 32-bits of the super register.
808 if (Opc == X86::LEA64_32r)
809 BasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 32);
810 Register NewDestReg = MI.getOperand(i: 0).getReg();
811 const X86InstrInfo *TII =
812 MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
813 TII->copyPhysReg(MBB&: *MI.getParent(), MI: II, DL: MI.getDebugLoc(), DestReg: NewDestReg, SrcReg: BasePtr,
814 KillSrc: MI.getOperand(i: 1).isKill());
815 MI.eraseFromParent();
816 return true;
817}
818
819static bool isFuncletReturnInstr(MachineInstr &MI) {
820 switch (MI.getOpcode()) {
821 case X86::CATCHRET:
822 case X86::CLEANUPRET:
823 return true;
824 default:
825 return false;
826 }
827 llvm_unreachable("impossible");
828}
829
830void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
831 unsigned FIOperandNum,
832 Register BaseReg,
833 int FIOffset) const {
834 MachineInstr &MI = *II;
835 unsigned Opc = MI.getOpcode();
836 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
837 MachineOperand &FI = MI.getOperand(i: FIOperandNum);
838 FI.ChangeToImmediate(ImmVal: FIOffset);
839 return;
840 }
841
842 MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: BaseReg, isDef: false);
843
844 // The frame index format for stackmaps and patchpoints is different from the
845 // X86 format. It only has a FI and an offset.
846 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
847 assert(BasePtr == FramePtr && "Expected the FP as base register");
848 int64_t Offset = MI.getOperand(i: FIOperandNum + 1).getImm() + FIOffset;
849 MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset);
850 return;
851 }
852
853 if (MI.getOperand(i: FIOperandNum + 3).isImm()) {
854 // Offset is a 32-bit integer.
855 int Imm = (int)(MI.getOperand(i: FIOperandNum + 3).getImm());
856 int Offset = FIOffset + Imm;
857 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
858 "Requesting 64-bit offset in 32-bit immediate!");
859 if (Offset != 0)
860 MI.getOperand(i: FIOperandNum + 3).ChangeToImmediate(ImmVal: Offset);
861 } else {
862 // Offset is symbolic. This is extremely rare.
863 uint64_t Offset =
864 FIOffset + (uint64_t)MI.getOperand(i: FIOperandNum + 3).getOffset();
865 MI.getOperand(i: FIOperandNum + 3).setOffset(Offset);
866 }
867}
868
869bool
870X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
871 int SPAdj, unsigned FIOperandNum,
872 RegScavenger *RS) const {
873 MachineInstr &MI = *II;
874 MachineBasicBlock &MBB = *MI.getParent();
875 MachineFunction &MF = *MBB.getParent();
876 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
877 bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
878 : isFuncletReturnInstr(MI&: *MBBI);
879 const X86FrameLowering *TFI = getFrameLowering(MF);
880 int FrameIndex = MI.getOperand(i: FIOperandNum).getIndex();
881
882 // Determine base register and offset.
883 int64_t FIOffset;
884 Register BasePtr;
885 if (MI.isReturn()) {
886 assert((!hasStackRealignment(MF) ||
887 MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
888 "Return instruction can only reference SP relative frame objects");
889 FIOffset =
890 TFI->getFrameIndexReferenceSP(MF, FI: FrameIndex, SPReg&: BasePtr, Adjustment: 0).getFixed();
891 } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
892 FIOffset = TFI->getWin64EHFrameIndexRef(MF, FI: FrameIndex, SPReg&: BasePtr);
893 } else {
894 FIOffset = TFI->getFrameIndexReference(MF, FI: FrameIndex, FrameReg&: BasePtr).getFixed();
895 }
896
897 // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
898 // simple FP case, and doesn't work with stack realignment. On 32-bit, the
899 // offset is from the traditional base pointer location. On 64-bit, the
900 // offset is from the SP at the end of the prologue, not the FP location. This
901 // matches the behavior of llvm.frameaddress.
902 unsigned Opc = MI.getOpcode();
903 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
904 MachineOperand &FI = MI.getOperand(i: FIOperandNum);
905 FI.ChangeToImmediate(ImmVal: FIOffset);
906 return false;
907 }
908
909 // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
910 // register as source operand, semantic is the same and destination is
911 // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
912 // Don't change BasePtr since it is used later for stack adjustment.
913 Register MachineBasePtr = BasePtr;
914 if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(Reg: BasePtr))
915 MachineBasePtr = getX86SubSuperRegister(Reg: BasePtr, Size: 64);
916
917 // This must be part of a four operand memory reference. Replace the
918 // FrameIndex with base register. Add an offset to the offset.
919 MI.getOperand(i: FIOperandNum).ChangeToRegister(Reg: MachineBasePtr, isDef: false);
920
921 if (BasePtr == StackPtr)
922 FIOffset += SPAdj;
923
924 // The frame index format for stackmaps and patchpoints is different from the
925 // X86 format. It only has a FI and an offset.
926 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
927 assert(BasePtr == FramePtr && "Expected the FP as base register");
928 int64_t Offset = MI.getOperand(i: FIOperandNum + 1).getImm() + FIOffset;
929 MI.getOperand(i: FIOperandNum + 1).ChangeToImmediate(ImmVal: Offset);
930 return false;
931 }
932
933 if (MI.getOperand(i: FIOperandNum+3).isImm()) {
934 const X86InstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
935 const DebugLoc &DL = MI.getDebugLoc();
936 int64_t Imm = MI.getOperand(i: FIOperandNum + 3).getImm();
937 int64_t Offset = FIOffset + Imm;
938 bool FitsIn32Bits = isInt<32>(x: Offset);
939 // If the offset will not fit in a 32-bit displacement, then for 64-bit
940 // targets, scavenge a register to hold it. Otherwise...
941 if (Is64Bit && !FitsIn32Bits) {
942 assert(RS && "RegisterScavenger was NULL");
943
944 RS->enterBasicBlockEnd(MBB);
945 RS->backward(I: std::next(x: II));
946
947 Register ScratchReg = RS->scavengeRegisterBackwards(
948 RC: X86::GR64RegClass, To: II, /*RestoreAfter=*/false, /*SPAdj=*/0,
949 /*AllowSpill=*/true);
950 assert(ScratchReg != 0 && "scratch reg was 0");
951 RS->setRegUsed(Reg: ScratchReg);
952
953 BuildMI(BB&: MBB, I: II, MIMD: DL, MCID: TII->get(Opcode: X86::MOV64ri), DestReg: ScratchReg).addImm(Val: Offset);
954
955 MI.getOperand(i: FIOperandNum + 3).setImm(0);
956 MI.getOperand(i: FIOperandNum + 2).setReg(ScratchReg);
957
958 return false;
959 }
960
961 // ... for 32-bit targets, this is a bug!
962 if (!Is64Bit && !FitsIn32Bits) {
963 MI.emitGenericError(ErrMsg: "64-bit offset calculated but target is 32-bit");
964 // Trap so that the instruction verification pass does not fail if run.
965 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: X86::TRAP));
966 return false;
967 }
968
969 if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
970 MI.getOperand(i: FIOperandNum + 3).ChangeToImmediate(ImmVal: Offset);
971 } else {
972 // Offset is symbolic. This is extremely rare.
973 uint64_t Offset = FIOffset +
974 (uint64_t)MI.getOperand(i: FIOperandNum+3).getOffset();
975 MI.getOperand(i: FIOperandNum + 3).setOffset(Offset);
976 }
977 return false;
978}
979
980unsigned X86RegisterInfo::findDeadCallerSavedReg(
981 MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
982 const MachineFunction *MF = MBB.getParent();
983 const MachineRegisterInfo &MRI = MF->getRegInfo();
984 if (MF->callsEHReturn())
985 return 0;
986
987 if (MBBI == MBB.end())
988 return 0;
989
990 switch (MBBI->getOpcode()) {
991 default:
992 return 0;
993 case TargetOpcode::PATCHABLE_RET:
994 case X86::RET:
995 case X86::RET32:
996 case X86::RET64:
997 case X86::RETI32:
998 case X86::RETI64:
999 case X86::TCRETURNdi:
1000 case X86::TCRETURNri:
1001 case X86::TCRETURN_WIN64ri:
1002 case X86::TCRETURN_HIPE32ri:
1003 case X86::TCRETURNmi:
1004 case X86::TCRETURNdi64:
1005 case X86::TCRETURNri64:
1006 case X86::TCRETURNri64_ImpCall:
1007 case X86::TCRETURNmi64:
1008 case X86::TCRETURN_WINmi64:
1009 case X86::EH_RETURN:
1010 case X86::EH_RETURN64: {
1011 LiveRegUnits LRU(*this);
1012 LRU.addLiveOuts(MBB);
1013 LRU.stepBackward(MI: *MBBI);
1014
1015 const TargetRegisterClass &RC =
1016 Is64Bit ? X86::GR64_NOSPRegClass : X86::GR32_NOSPRegClass;
1017 for (MCRegister Reg : RC) {
1018 if (LRU.available(Reg) && !MRI.isReserved(PhysReg: Reg))
1019 return Reg;
1020 }
1021 }
1022 }
1023
1024 return 0;
1025}
1026
1027Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
1028 const X86FrameLowering *TFI = getFrameLowering(MF);
1029 return TFI->hasFP(MF) ? FramePtr : StackPtr;
1030}
1031
1032Register
1033X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
1034 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1035 Register FrameReg = getFrameRegister(MF);
1036 if (Subtarget.isTarget64BitILP32())
1037 FrameReg = getX86SubSuperRegister(Reg: FrameReg, Size: 32);
1038 return FrameReg;
1039}
1040
1041Register
1042X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
1043 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1044 Register StackReg = getStackRegister();
1045 if (Subtarget.isTarget64BitILP32())
1046 StackReg = getX86SubSuperRegister(Reg: StackReg, Size: 32);
1047 return StackReg;
1048}
1049
1050static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
1051 const MachineRegisterInfo *MRI) {
1052 if (VRM->hasShape(virtReg: VirtReg))
1053 return VRM->getShape(virtReg: VirtReg);
1054
1055 const MachineOperand &Def = *MRI->def_begin(RegNo: VirtReg);
1056 MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
1057 unsigned OpCode = MI->getOpcode();
1058 switch (OpCode) {
1059 default:
1060 llvm_unreachable("Unexpected machine instruction on tile register!");
1061 break;
1062 case X86::COPY: {
1063 Register SrcReg = MI->getOperand(i: 1).getReg();
1064 ShapeT Shape = getTileShape(VirtReg: SrcReg, VRM, MRI);
1065 VRM->assignVirt2Shape(virtReg: VirtReg, shape: Shape);
1066 return Shape;
1067 }
1068 // We only collect the tile shape that is defined.
1069 case X86::PTILELOADDV:
1070 case X86::PTILELOADDT1V:
1071 case X86::PTDPBSSDV:
1072 case X86::PTDPBSUDV:
1073 case X86::PTDPBUSDV:
1074 case X86::PTDPBUUDV:
1075 case X86::PTILEZEROV:
1076 case X86::PTDPBF16PSV:
1077 case X86::PTDPFP16PSV:
1078 case X86::PTCMMIMFP16PSV:
1079 case X86::PTCMMRLFP16PSV:
1080 case X86::PTILELOADDRSV:
1081 case X86::PTILELOADDRST1V:
1082 case X86::PTMMULTF32PSV:
1083 case X86::PTDPBF8PSV:
1084 case X86::PTDPBHF8PSV:
1085 case X86::PTDPHBF8PSV:
1086 case X86::PTDPHF8PSV: {
1087 MachineOperand &MO1 = MI->getOperand(i: 1);
1088 MachineOperand &MO2 = MI->getOperand(i: 2);
1089 ShapeT Shape(&MO1, &MO2, MRI);
1090 VRM->assignVirt2Shape(virtReg: VirtReg, shape: Shape);
1091 return Shape;
1092 }
1093 }
1094}
1095
1096bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
1097 ArrayRef<MCPhysReg> Order,
1098 SmallVectorImpl<MCPhysReg> &Hints,
1099 const MachineFunction &MF,
1100 const VirtRegMap *VRM,
1101 const LiveRegMatrix *Matrix) const {
1102 const MachineRegisterInfo *MRI = &MF.getRegInfo();
1103 const TargetRegisterClass &RC = *MRI->getRegClass(Reg: VirtReg);
1104 bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
1105 VirtReg, Order, Hints, MF, VRM, Matrix);
1106 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
1107 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
1108
1109 unsigned ID = RC.getID();
1110
1111 if (!VRM)
1112 return BaseImplRetVal;
1113
1114 if (ID != X86::TILERegClassID) {
1115 if (DisableRegAllocNDDHints || !ST.hasNDD() ||
1116 !TRI.isGeneralPurposeRegisterClass(RC: &RC))
1117 return BaseImplRetVal;
1118
1119 // Add any two address hints after any copy hints.
1120 SmallSet<unsigned, 4> TwoAddrHints;
1121
1122 auto TryAddNDDHint = [&](const MachineOperand &MO) {
1123 Register Reg = MO.getReg();
1124 Register PhysReg = Reg.isPhysical() ? Reg : Register(VRM->getPhys(virtReg: Reg));
1125 if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Range&: Hints, Element: PhysReg))
1126 TwoAddrHints.insert(V: PhysReg);
1127 };
1128
1129 // NDD instructions is compressible when Op0 is allocated to the same
1130 // physic register as Op1 (or Op2 if it's commutable).
1131 for (auto &MO : MRI->reg_nodbg_operands(Reg: VirtReg)) {
1132 const MachineInstr &MI = *MO.getParent();
1133 if (!X86::getNonNDVariant(Opc: MI.getOpcode()))
1134 continue;
1135 unsigned OpIdx = MI.getOperandNo(I: &MO);
1136 if (OpIdx == 0) {
1137 assert(MI.getOperand(1).isReg());
1138 TryAddNDDHint(MI.getOperand(i: 1));
1139 if (MI.isCommutable()) {
1140 assert(MI.getOperand(2).isReg());
1141 TryAddNDDHint(MI.getOperand(i: 2));
1142 }
1143 } else if (OpIdx == 1) {
1144 TryAddNDDHint(MI.getOperand(i: 0));
1145 } else if (MI.isCommutable() && OpIdx == 2) {
1146 TryAddNDDHint(MI.getOperand(i: 0));
1147 }
1148 }
1149
1150 for (MCPhysReg OrderReg : Order)
1151 if (TwoAddrHints.count(V: OrderReg))
1152 Hints.push_back(Elt: OrderReg);
1153
1154 return BaseImplRetVal;
1155 }
1156
1157 ShapeT VirtShape = getTileShape(VirtReg, VRM: const_cast<VirtRegMap *>(VRM), MRI);
1158 auto AddHint = [&](MCPhysReg PhysReg) {
1159 Register VReg = Matrix->getOneVReg(PhysReg);
1160 if (VReg == MCRegister::NoRegister) { // Not allocated yet
1161 Hints.push_back(Elt: PhysReg);
1162 return;
1163 }
1164 ShapeT PhysShape = getTileShape(VirtReg: VReg, VRM: const_cast<VirtRegMap *>(VRM), MRI);
1165 if (PhysShape == VirtShape)
1166 Hints.push_back(Elt: PhysReg);
1167 };
1168
1169 SmallSet<MCPhysReg, 4> CopyHints(llvm::from_range, Hints);
1170 Hints.clear();
1171 for (auto Hint : CopyHints) {
1172 if (RC.contains(Reg: Hint) && !MRI->isReserved(PhysReg: Hint))
1173 AddHint(Hint);
1174 }
1175 for (MCPhysReg PhysReg : Order) {
1176 if (!CopyHints.count(V: PhysReg) && RC.contains(Reg: PhysReg) &&
1177 !MRI->isReserved(PhysReg))
1178 AddHint(PhysReg);
1179 }
1180
1181#define DEBUG_TYPE "tile-hint"
1182 LLVM_DEBUG({
1183 dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
1184 for (auto Hint : Hints) {
1185 dbgs() << "tmm" << Hint << ",";
1186 }
1187 dbgs() << "\n";
1188 });
1189#undef DEBUG_TYPE
1190
1191 return true;
1192}
1193
1194const TargetRegisterClass *X86RegisterInfo::constrainRegClassToNonRex2(
1195 const TargetRegisterClass *RC) const {
1196 switch (RC->getID()) {
1197 default:
1198 return RC;
1199 case X86::GR8RegClassID:
1200 return &X86::GR8_NOREX2RegClass;
1201 case X86::GR16RegClassID:
1202 return &X86::GR16_NOREX2RegClass;
1203 case X86::GR32RegClassID:
1204 return &X86::GR32_NOREX2RegClass;
1205 case X86::GR64RegClassID:
1206 return &X86::GR64_NOREX2RegClass;
1207 case X86::GR32_NOSPRegClassID:
1208 return &X86::GR32_NOREX2_NOSPRegClass;
1209 case X86::GR64_NOSPRegClassID:
1210 return &X86::GR64_NOREX2_NOSPRegClass;
1211 }
1212}
1213
1214bool X86RegisterInfo::isNonRex2RegClass(const TargetRegisterClass *RC) const {
1215 switch (RC->getID()) {
1216 default:
1217 return false;
1218 case X86::GR8_NOREX2RegClassID:
1219 case X86::GR16_NOREX2RegClassID:
1220 case X86::GR32_NOREX2RegClassID:
1221 case X86::GR64_NOREX2RegClassID:
1222 case X86::GR32_NOREX2_NOSPRegClassID:
1223 case X86::GR64_NOREX2_NOSPRegClassID:
1224 case X86::GR64_with_sub_16bit_in_GR16_NOREX2RegClassID:
1225 return true;
1226 }
1227}
1228