1 | //===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Interface definition for SIRegisterInfo |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H |
15 | #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H |
16 | |
17 | #include "llvm/ADT/BitVector.h" |
18 | |
19 | #define |
20 | #include "AMDGPUGenRegisterInfo.inc" |
21 | |
22 | #include "SIDefines.h" |
23 | |
24 | namespace llvm { |
25 | |
26 | class GCNSubtarget; |
27 | class LiveIntervals; |
28 | class LiveRegUnits; |
29 | class RegisterBank; |
30 | struct SGPRSpillBuilder; |
31 | |
32 | class SIRegisterInfo final : public AMDGPUGenRegisterInfo { |
33 | private: |
34 | const GCNSubtarget &ST; |
35 | bool SpillSGPRToVGPR; |
36 | bool isWave32; |
37 | BitVector RegPressureIgnoredUnits; |
38 | |
39 | /// Sub reg indexes for getRegSplitParts. |
40 | /// First index represents subreg size from 1 to 16 DWORDs. |
41 | /// The inner vector is sorted by bit offset. |
42 | /// Provided a register can be fully split with given subregs, |
43 | /// all elements of the inner vector combined give a full lane mask. |
44 | static std::array<std::vector<int16_t>, 16> RegSplitParts; |
45 | |
46 | // Table representing sub reg of given width and offset. |
47 | // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512. |
48 | // Second index is 32 different dword offsets. |
49 | static std::array<std::array<uint16_t, 32>, 9> SubRegFromChannelTable; |
50 | |
51 | void reserveRegisterTuples(BitVector &, MCRegister Reg) const; |
52 | |
53 | public: |
54 | SIRegisterInfo(const GCNSubtarget &ST); |
55 | |
56 | struct SpilledReg { |
57 | Register VGPR; |
58 | int Lane = -1; |
59 | |
60 | SpilledReg() = default; |
61 | SpilledReg(Register R, int L) : VGPR(R), Lane(L) {} |
62 | |
63 | bool hasLane() { return Lane != -1; } |
64 | bool hasReg() { return VGPR != 0; } |
65 | }; |
66 | |
67 | /// \returns the sub reg enum value for the given \p Channel |
68 | /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) |
69 | static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1); |
70 | |
71 | bool spillSGPRToVGPR() const { |
72 | return SpillSGPRToVGPR; |
73 | } |
74 | |
75 | /// Return the largest available SGPR aligned to \p Align for the register |
76 | /// class \p RC. |
77 | MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, |
78 | const unsigned Align, |
79 | const TargetRegisterClass *RC) const; |
80 | |
81 | /// Return the end register initially reserved for the scratch buffer in case |
82 | /// spilling is needed. |
83 | MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const; |
84 | |
85 | BitVector getReservedRegs(const MachineFunction &MF) const override; |
86 | bool isAsmClobberable(const MachineFunction &MF, |
87 | MCRegister PhysReg) const override; |
88 | |
89 | const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; |
90 | const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const; |
91 | const uint32_t *getCallPreservedMask(const MachineFunction &MF, |
92 | CallingConv::ID) const override; |
93 | const uint32_t *getNoPreservedMask() const override; |
94 | |
95 | // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling |
96 | // conventions are free to use certain VGPRs without saving and restoring any |
97 | // lanes (not even inactive ones). |
98 | static bool isChainScratchRegister(Register VGPR); |
99 | |
100 | // Stack access is very expensive. CSRs are also the high registers, and we |
101 | // want to minimize the number of used registers. |
102 | unsigned getCSRFirstUseCost() const override { |
103 | return 100; |
104 | } |
105 | |
106 | const TargetRegisterClass * |
107 | getLargestLegalSuperClass(const TargetRegisterClass *RC, |
108 | const MachineFunction &MF) const override; |
109 | |
110 | Register getFrameRegister(const MachineFunction &MF) const override; |
111 | |
112 | bool hasBasePointer(const MachineFunction &MF) const; |
113 | Register getBaseRegister() const; |
114 | |
115 | bool shouldRealignStack(const MachineFunction &MF) const override; |
116 | bool requiresRegisterScavenging(const MachineFunction &Fn) const override; |
117 | |
118 | bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; |
119 | bool requiresFrameIndexReplacementScavenging( |
120 | const MachineFunction &MF) const override; |
121 | bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override; |
122 | |
123 | int64_t getScratchInstrOffset(const MachineInstr *MI) const; |
124 | |
125 | int64_t getFrameIndexInstrOffset(const MachineInstr *MI, |
126 | int Idx) const override; |
127 | |
128 | bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; |
129 | |
130 | Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, |
131 | int64_t Offset) const override; |
132 | |
133 | void resolveFrameIndex(MachineInstr &MI, Register BaseReg, |
134 | int64_t Offset) const override; |
135 | |
136 | bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, |
137 | int64_t Offset) const override; |
138 | |
139 | const TargetRegisterClass *getPointerRegClass( |
140 | const MachineFunction &MF, unsigned Kind = 0) const override; |
141 | |
142 | /// Returns a legal register class to copy a register in the specified class |
143 | /// to or from. If it is possible to copy the register directly without using |
144 | /// a cross register class copy, return the specified RC. Returns NULL if it |
145 | /// is not possible to copy between two registers of the specified class. |
146 | const TargetRegisterClass * |
147 | getCrossCopyRegClass(const TargetRegisterClass *RC) const override; |
148 | |
149 | void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, |
150 | bool IsLoad, bool IsKill = true) const; |
151 | |
152 | /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a |
153 | /// free VGPR lane to spill. |
154 | bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, |
155 | SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, |
156 | bool OnlyToVGPR = false, |
157 | bool SpillToPhysVGPRLane = false) const; |
158 | |
159 | bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, |
160 | SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, |
161 | bool OnlyToVGPR = false, |
162 | bool SpillToPhysVGPRLane = false) const; |
163 | |
164 | bool spillEmergencySGPR(MachineBasicBlock::iterator MI, |
165 | MachineBasicBlock &RestoreMBB, Register SGPR, |
166 | RegScavenger *RS) const; |
167 | |
168 | bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, |
169 | unsigned FIOperandNum, |
170 | RegScavenger *RS) const override; |
171 | |
172 | bool eliminateSGPRToVGPRSpillFrameIndex( |
173 | MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, |
174 | SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, |
175 | bool SpillToPhysVGPRLane = false) const; |
176 | |
177 | StringRef getRegAsmName(MCRegister Reg) const override; |
178 | |
179 | // Pseudo regs are not allowed |
180 | unsigned getHWRegIndex(MCRegister Reg) const { |
181 | return getEncodingValue(RegNo: Reg) & 0xff; |
182 | } |
183 | |
184 | LLVM_READONLY |
185 | const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) const; |
186 | |
187 | LLVM_READONLY |
188 | const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const; |
189 | |
190 | LLVM_READONLY |
191 | const TargetRegisterClass * |
192 | getVectorSuperClassForBitWidth(unsigned BitWidth) const; |
193 | |
194 | LLVM_READONLY |
195 | static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth); |
196 | |
197 | /// \returns true if this class contains only SGPR registers |
198 | static bool isSGPRClass(const TargetRegisterClass *RC) { |
199 | return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC); |
200 | } |
201 | |
202 | /// \returns true if this class ID contains only SGPR registers |
203 | bool isSGPRClassID(unsigned RCID) const { |
204 | return isSGPRClass(RC: getRegClass(RCID)); |
205 | } |
206 | |
207 | bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const; |
208 | |
209 | /// \returns true if this class contains only VGPR registers |
210 | static bool isVGPRClass(const TargetRegisterClass *RC) { |
211 | return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC); |
212 | } |
213 | |
214 | /// \returns true if this class contains only AGPR registers |
215 | static bool isAGPRClass(const TargetRegisterClass *RC) { |
216 | return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC); |
217 | } |
218 | |
219 | /// \returns true only if this class contains both VGPR and AGPR registers |
220 | bool isVectorSuperClass(const TargetRegisterClass *RC) const { |
221 | return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC); |
222 | } |
223 | |
224 | /// \returns true only if this class contains both VGPR and SGPR registers |
225 | bool isVSSuperClass(const TargetRegisterClass *RC) const { |
226 | return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC); |
227 | } |
228 | |
229 | /// \returns true if this class contains VGPR registers. |
230 | static bool hasVGPRs(const TargetRegisterClass *RC) { |
231 | return RC->TSFlags & SIRCFlags::HasVGPR; |
232 | } |
233 | |
234 | /// \returns true if this class contains AGPR registers. |
235 | static bool hasAGPRs(const TargetRegisterClass *RC) { |
236 | return RC->TSFlags & SIRCFlags::HasAGPR; |
237 | } |
238 | |
239 | /// \returns true if this class contains SGPR registers. |
240 | static bool hasSGPRs(const TargetRegisterClass *RC) { |
241 | return RC->TSFlags & SIRCFlags::HasSGPR; |
242 | } |
243 | |
244 | /// \returns true if this class contains any vector registers. |
245 | static bool hasVectorRegisters(const TargetRegisterClass *RC) { |
246 | return hasVGPRs(RC) || hasAGPRs(RC); |
247 | } |
248 | |
249 | /// \returns A VGPR reg class with the same width as \p SRC |
250 | const TargetRegisterClass * |
251 | getEquivalentVGPRClass(const TargetRegisterClass *SRC) const; |
252 | |
253 | /// \returns An AGPR reg class with the same width as \p SRC |
254 | const TargetRegisterClass * |
255 | getEquivalentAGPRClass(const TargetRegisterClass *SRC) const; |
256 | |
257 | /// \returns A SGPR reg class with the same width as \p SRC |
258 | const TargetRegisterClass * |
259 | getEquivalentSGPRClass(const TargetRegisterClass *VRC) const; |
260 | |
261 | /// Returns a register class which is compatible with \p SuperRC, such that a |
262 | /// subregister exists with class \p SubRC with subregister index \p |
263 | /// SubIdx. If this is impossible (e.g., an unaligned subregister index within |
264 | /// a register tuple), return null. |
265 | const TargetRegisterClass * |
266 | getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, |
267 | const TargetRegisterClass *SubRC, |
268 | unsigned SubIdx) const; |
269 | |
270 | bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, |
271 | unsigned DefSubReg, |
272 | const TargetRegisterClass *SrcRC, |
273 | unsigned SrcSubReg) const override; |
274 | |
275 | /// \returns True if operands defined with this operand type can accept |
276 | /// a literal constant (i.e. any 32-bit immediate). |
277 | bool opCanUseLiteralConstant(unsigned OpType) const; |
278 | |
279 | /// \returns True if operands defined with this operand type can accept |
280 | /// an inline constant. i.e. An integer value in the range (-16, 64) or |
281 | /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f. |
282 | bool opCanUseInlineConstant(unsigned OpType) const; |
283 | |
284 | MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, |
285 | const TargetRegisterClass *RC, |
286 | const MachineFunction &MF, |
287 | bool ReserveHighestVGPR = false) const; |
288 | |
289 | const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI, |
290 | Register Reg) const; |
291 | const TargetRegisterClass * |
292 | getRegClassForOperandReg(const MachineRegisterInfo &MRI, |
293 | const MachineOperand &MO) const; |
294 | |
295 | bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const; |
296 | bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const; |
297 | bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const { |
298 | return isVGPR(MRI, Reg) || isAGPR(MRI, Reg); |
299 | } |
300 | |
301 | // FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs |
302 | // (such as VCC) which hold a wave-wide vector of boolean values. Examining |
303 | // just the register class is not suffcient; it needs to be combined with a |
304 | // value type. The next predicate isUniformReg() does this correctly. |
305 | bool isDivergentRegClass(const TargetRegisterClass *RC) const override { |
306 | return !isSGPRClass(RC); |
307 | } |
308 | |
309 | bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, |
310 | Register Reg) const override; |
311 | |
312 | ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC, |
313 | unsigned EltSize) const; |
314 | |
315 | bool shouldCoalesce(MachineInstr *MI, |
316 | const TargetRegisterClass *SrcRC, |
317 | unsigned SubReg, |
318 | const TargetRegisterClass *DstRC, |
319 | unsigned DstSubReg, |
320 | const TargetRegisterClass *NewRC, |
321 | LiveIntervals &LIS) const override; |
322 | |
323 | unsigned getRegPressureLimit(const TargetRegisterClass *RC, |
324 | MachineFunction &MF) const override; |
325 | |
326 | unsigned getRegPressureSetLimit(const MachineFunction &MF, |
327 | unsigned Idx) const override; |
328 | |
329 | const int *getRegUnitPressureSets(unsigned RegUnit) const override; |
330 | |
331 | MCRegister getReturnAddressReg(const MachineFunction &MF) const; |
332 | |
333 | const TargetRegisterClass * |
334 | getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const; |
335 | |
336 | const TargetRegisterClass * |
337 | getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const { |
338 | return getRegClassForSizeOnBank(Size: Ty.getSizeInBits(), Bank); |
339 | } |
340 | |
341 | const TargetRegisterClass * |
342 | getConstrainedRegClassForOperand(const MachineOperand &MO, |
343 | const MachineRegisterInfo &MRI) const override; |
344 | |
345 | const TargetRegisterClass *getBoolRC() const { |
346 | return isWave32 ? &AMDGPU::SReg_32RegClass |
347 | : &AMDGPU::SReg_64RegClass; |
348 | } |
349 | |
350 | const TargetRegisterClass *getWaveMaskRegClass() const { |
351 | return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass |
352 | : &AMDGPU::SReg_64_XEXECRegClass; |
353 | } |
354 | |
355 | // Return the appropriate register class to use for 64-bit VGPRs for the |
356 | // subtarget. |
357 | const TargetRegisterClass *getVGPR64Class() const; |
358 | |
359 | MCRegister getVCC() const; |
360 | |
361 | MCRegister getExec() const; |
362 | |
363 | const TargetRegisterClass *getRegClass(unsigned RCID) const; |
364 | |
365 | // Find reaching register definition |
366 | MachineInstr *findReachingDef(Register Reg, unsigned SubReg, |
367 | MachineInstr &Use, |
368 | MachineRegisterInfo &MRI, |
369 | LiveIntervals *LIS) const; |
370 | |
371 | const uint32_t *getAllVGPRRegMask() const; |
372 | const uint32_t *getAllAGPRRegMask() const; |
373 | const uint32_t *getAllVectorRegMask() const; |
374 | const uint32_t *getAllAllocatableSRegMask() const; |
375 | |
376 | // \returns number of 32 bit registers covered by a \p LM |
377 | static unsigned getNumCoveredRegs(LaneBitmask LM) { |
378 | // The assumption is that every lo16 subreg is an even bit and every hi16 |
379 | // is an adjacent odd bit or vice versa. |
380 | uint64_t Mask = LM.getAsInteger(); |
381 | uint64_t Even = Mask & 0xAAAAAAAAAAAAAAAAULL; |
382 | Mask = (Even >> 1) | Mask; |
383 | uint64_t Odd = Mask & 0x5555555555555555ULL; |
384 | return llvm::popcount(Value: Odd); |
385 | } |
386 | |
387 | // \returns a DWORD offset of a \p SubReg |
388 | unsigned getChannelFromSubReg(unsigned SubReg) const { |
389 | return SubReg ? (getSubRegIdxOffset(Idx: SubReg) + 31) / 32 : 0; |
390 | } |
391 | |
392 | // \returns a DWORD size of a \p SubReg |
393 | unsigned getNumChannelsFromSubReg(unsigned SubReg) const { |
394 | return getNumCoveredRegs(LM: getSubRegIndexLaneMask(SubIdx: SubReg)); |
395 | } |
396 | |
397 | // For a given 16 bit \p Reg \returns a 32 bit register holding it. |
398 | // \returns \p Reg otherwise. |
399 | MCPhysReg get32BitRegister(MCPhysReg Reg) const; |
400 | |
401 | // Returns true if a given register class is properly aligned for |
402 | // the subtarget. |
403 | bool isProperlyAlignedRC(const TargetRegisterClass &RC) const; |
404 | |
405 | // Given \p RC returns corresponding aligned register class if required |
406 | // by the subtarget. |
407 | const TargetRegisterClass * |
408 | getProperlyAlignedRC(const TargetRegisterClass *RC) const; |
409 | |
410 | /// Return all SGPR128 which satisfy the waves per execution unit requirement |
411 | /// of the subtarget. |
412 | ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const; |
413 | |
414 | /// Return all SGPR64 which satisfy the waves per execution unit requirement |
415 | /// of the subtarget. |
416 | ArrayRef<MCPhysReg> getAllSGPR64(const MachineFunction &MF) const; |
417 | |
418 | /// Return all SGPR32 which satisfy the waves per execution unit requirement |
419 | /// of the subtarget. |
420 | ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const; |
421 | |
422 | // Insert spill or restore instructions. |
423 | // When lowering spill pseudos, the RegScavenger should be set. |
424 | // For creating spill instructions during frame lowering, where no scavenger |
425 | // is available, LiveUnits can be used. |
426 | void buildSpillLoadStore(MachineBasicBlock &MBB, |
427 | MachineBasicBlock::iterator MI, const DebugLoc &DL, |
428 | unsigned LoadStoreOp, int Index, Register ValueReg, |
429 | bool ValueIsKill, MCRegister ScratchOffsetReg, |
430 | int64_t InstrOffset, MachineMemOperand *MMO, |
431 | RegScavenger *RS, |
432 | LiveRegUnits *LiveUnits = nullptr) const; |
433 | |
434 | // Return alignment in register file of first register in a register tuple. |
435 | unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const { |
436 | return (RC->TSFlags & SIRCFlags::RegTupleAlignUnitsMask) * 32; |
437 | } |
438 | |
439 | // Check if register class RC has required alignment. |
440 | bool isRegClassAligned(const TargetRegisterClass *RC, |
441 | unsigned AlignNumBits) const { |
442 | assert(AlignNumBits != 0); |
443 | unsigned RCAlign = getRegClassAlignmentNumBits(RC); |
444 | return RCAlign == AlignNumBits || |
445 | (RCAlign > AlignNumBits && (RCAlign % AlignNumBits) == 0); |
446 | } |
447 | |
448 | // Return alignment of a SubReg relative to start of a register in RC class. |
449 | // No check if the subreg is supported by the current RC is made. |
450 | unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, |
451 | unsigned SubReg) const; |
452 | }; |
453 | |
454 | namespace AMDGPU { |
455 | /// Get the size in bits of a register from the register class \p RC. |
456 | unsigned getRegBitWidth(const TargetRegisterClass &RC); |
457 | } // namespace AMDGPU |
458 | |
459 | } // End namespace llvm |
460 | |
461 | #endif |
462 | |