1 | //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H |
14 | #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H |
15 | |
16 | #include "AMDGPUArgumentUsageInfo.h" |
17 | #include "AMDGPUMachineFunction.h" |
18 | #include "AMDGPUTargetMachine.h" |
19 | #include "GCNSubtarget.h" |
20 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
21 | #include "SIInstrInfo.h" |
22 | #include "SIModeRegisterDefaults.h" |
23 | #include "llvm/ADT/SetVector.h" |
24 | #include "llvm/ADT/SmallVector.h" |
25 | #include "llvm/CodeGen/MIRYamlMapping.h" |
26 | #include "llvm/CodeGen/PseudoSourceValue.h" |
27 | #include "llvm/Support/raw_ostream.h" |
28 | #include <optional> |
29 | |
30 | namespace llvm { |
31 | |
32 | class MachineFrameInfo; |
33 | class MachineFunction; |
34 | class SIMachineFunctionInfo; |
35 | class SIRegisterInfo; |
36 | class TargetRegisterClass; |
37 | |
38 | class AMDGPUPseudoSourceValue : public PseudoSourceValue { |
39 | public: |
40 | enum AMDGPUPSVKind : unsigned { |
41 | PSVImage = PseudoSourceValue::TargetCustom, |
42 | GWSResource |
43 | }; |
44 | |
45 | protected: |
46 | AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM) |
47 | : PseudoSourceValue(Kind, TM) {} |
48 | |
49 | public: |
50 | bool isConstant(const MachineFrameInfo *) const override { |
51 | // This should probably be true for most images, but we will start by being |
52 | // conservative. |
53 | return false; |
54 | } |
55 | |
56 | bool isAliased(const MachineFrameInfo *) const override { |
57 | return true; |
58 | } |
59 | |
60 | bool mayAlias(const MachineFrameInfo *) const override { |
61 | return true; |
62 | } |
63 | }; |
64 | |
65 | class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue { |
66 | public: |
67 | explicit AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM) |
68 | : AMDGPUPseudoSourceValue(GWSResource, TM) {} |
69 | |
70 | static bool classof(const PseudoSourceValue *V) { |
71 | return V->kind() == GWSResource; |
72 | } |
73 | |
74 | // These are inaccessible memory from IR. |
75 | bool isAliased(const MachineFrameInfo *) const override { |
76 | return false; |
77 | } |
78 | |
79 | // These are inaccessible memory from IR. |
80 | bool mayAlias(const MachineFrameInfo *) const override { |
81 | return false; |
82 | } |
83 | |
84 | void printCustom(raw_ostream &OS) const override { |
85 | OS << "GWSResource" ; |
86 | } |
87 | }; |
88 | |
89 | namespace yaml { |
90 | |
91 | struct SIArgument { |
92 | bool IsRegister; |
93 | union { |
94 | StringValue RegisterName; |
95 | unsigned StackOffset; |
96 | }; |
97 | std::optional<unsigned> Mask; |
98 | |
99 | // Default constructor, which creates a stack argument. |
100 | SIArgument() : IsRegister(false), StackOffset(0) {} |
101 | SIArgument(const SIArgument &Other) { |
102 | IsRegister = Other.IsRegister; |
103 | if (IsRegister) { |
104 | ::new ((void *)std::addressof(r&: RegisterName)) |
105 | StringValue(Other.RegisterName); |
106 | } else |
107 | StackOffset = Other.StackOffset; |
108 | Mask = Other.Mask; |
109 | } |
110 | SIArgument &operator=(const SIArgument &Other) { |
111 | IsRegister = Other.IsRegister; |
112 | if (IsRegister) { |
113 | ::new ((void *)std::addressof(r&: RegisterName)) |
114 | StringValue(Other.RegisterName); |
115 | } else |
116 | StackOffset = Other.StackOffset; |
117 | Mask = Other.Mask; |
118 | return *this; |
119 | } |
120 | ~SIArgument() { |
121 | if (IsRegister) |
122 | RegisterName.~StringValue(); |
123 | } |
124 | |
125 | // Helper to create a register or stack argument. |
126 | static inline SIArgument createArgument(bool IsReg) { |
127 | if (IsReg) |
128 | return SIArgument(IsReg); |
129 | return SIArgument(); |
130 | } |
131 | |
132 | private: |
133 | // Construct a register argument. |
134 | SIArgument(bool) : IsRegister(true), RegisterName() {} |
135 | }; |
136 | |
137 | template <> struct MappingTraits<SIArgument> { |
138 | static void mapping(IO &YamlIO, SIArgument &A) { |
139 | if (YamlIO.outputting()) { |
140 | if (A.IsRegister) |
141 | YamlIO.mapRequired(Key: "reg" , Val&: A.RegisterName); |
142 | else |
143 | YamlIO.mapRequired(Key: "offset" , Val&: A.StackOffset); |
144 | } else { |
145 | auto Keys = YamlIO.keys(); |
146 | if (is_contained(Range&: Keys, Element: "reg" )) { |
147 | A = SIArgument::createArgument(IsReg: true); |
148 | YamlIO.mapRequired(Key: "reg" , Val&: A.RegisterName); |
149 | } else if (is_contained(Range&: Keys, Element: "offset" )) |
150 | YamlIO.mapRequired(Key: "offset" , Val&: A.StackOffset); |
151 | else |
152 | YamlIO.setError("missing required key 'reg' or 'offset'" ); |
153 | } |
154 | YamlIO.mapOptional(Key: "mask" , Val&: A.Mask); |
155 | } |
156 | static const bool flow = true; |
157 | }; |
158 | |
159 | struct SIArgumentInfo { |
160 | std::optional<SIArgument> PrivateSegmentBuffer; |
161 | std::optional<SIArgument> DispatchPtr; |
162 | std::optional<SIArgument> QueuePtr; |
163 | std::optional<SIArgument> KernargSegmentPtr; |
164 | std::optional<SIArgument> DispatchID; |
165 | std::optional<SIArgument> FlatScratchInit; |
166 | std::optional<SIArgument> PrivateSegmentSize; |
167 | |
168 | std::optional<SIArgument> WorkGroupIDX; |
169 | std::optional<SIArgument> WorkGroupIDY; |
170 | std::optional<SIArgument> WorkGroupIDZ; |
171 | std::optional<SIArgument> WorkGroupInfo; |
172 | std::optional<SIArgument> LDSKernelId; |
173 | std::optional<SIArgument> PrivateSegmentWaveByteOffset; |
174 | |
175 | std::optional<SIArgument> ImplicitArgPtr; |
176 | std::optional<SIArgument> ImplicitBufferPtr; |
177 | |
178 | std::optional<SIArgument> WorkItemIDX; |
179 | std::optional<SIArgument> WorkItemIDY; |
180 | std::optional<SIArgument> WorkItemIDZ; |
181 | }; |
182 | |
183 | template <> struct MappingTraits<SIArgumentInfo> { |
184 | static void mapping(IO &YamlIO, SIArgumentInfo &AI) { |
185 | YamlIO.mapOptional(Key: "privateSegmentBuffer" , Val&: AI.PrivateSegmentBuffer); |
186 | YamlIO.mapOptional(Key: "dispatchPtr" , Val&: AI.DispatchPtr); |
187 | YamlIO.mapOptional(Key: "queuePtr" , Val&: AI.QueuePtr); |
188 | YamlIO.mapOptional(Key: "kernargSegmentPtr" , Val&: AI.KernargSegmentPtr); |
189 | YamlIO.mapOptional(Key: "dispatchID" , Val&: AI.DispatchID); |
190 | YamlIO.mapOptional(Key: "flatScratchInit" , Val&: AI.FlatScratchInit); |
191 | YamlIO.mapOptional(Key: "privateSegmentSize" , Val&: AI.PrivateSegmentSize); |
192 | |
193 | YamlIO.mapOptional(Key: "workGroupIDX" , Val&: AI.WorkGroupIDX); |
194 | YamlIO.mapOptional(Key: "workGroupIDY" , Val&: AI.WorkGroupIDY); |
195 | YamlIO.mapOptional(Key: "workGroupIDZ" , Val&: AI.WorkGroupIDZ); |
196 | YamlIO.mapOptional(Key: "workGroupInfo" , Val&: AI.WorkGroupInfo); |
197 | YamlIO.mapOptional(Key: "LDSKernelId" , Val&: AI.LDSKernelId); |
198 | YamlIO.mapOptional(Key: "privateSegmentWaveByteOffset" , |
199 | Val&: AI.PrivateSegmentWaveByteOffset); |
200 | |
201 | YamlIO.mapOptional(Key: "implicitArgPtr" , Val&: AI.ImplicitArgPtr); |
202 | YamlIO.mapOptional(Key: "implicitBufferPtr" , Val&: AI.ImplicitBufferPtr); |
203 | |
204 | YamlIO.mapOptional(Key: "workItemIDX" , Val&: AI.WorkItemIDX); |
205 | YamlIO.mapOptional(Key: "workItemIDY" , Val&: AI.WorkItemIDY); |
206 | YamlIO.mapOptional(Key: "workItemIDZ" , Val&: AI.WorkItemIDZ); |
207 | } |
208 | }; |
209 | |
210 | // Default to default mode for default calling convention. |
211 | struct SIMode { |
212 | bool IEEE = true; |
213 | bool DX10Clamp = true; |
214 | bool FP32InputDenormals = true; |
215 | bool FP32OutputDenormals = true; |
216 | bool FP64FP16InputDenormals = true; |
217 | bool FP64FP16OutputDenormals = true; |
218 | |
219 | SIMode() = default; |
220 | |
221 | SIMode(const SIModeRegisterDefaults &Mode) { |
222 | IEEE = Mode.IEEE; |
223 | DX10Clamp = Mode.DX10Clamp; |
224 | FP32InputDenormals = Mode.FP32Denormals.Input != DenormalMode::PreserveSign; |
225 | FP32OutputDenormals = |
226 | Mode.FP32Denormals.Output != DenormalMode::PreserveSign; |
227 | FP64FP16InputDenormals = |
228 | Mode.FP64FP16Denormals.Input != DenormalMode::PreserveSign; |
229 | FP64FP16OutputDenormals = |
230 | Mode.FP64FP16Denormals.Output != DenormalMode::PreserveSign; |
231 | } |
232 | |
233 | bool operator ==(const SIMode Other) const { |
234 | return IEEE == Other.IEEE && |
235 | DX10Clamp == Other.DX10Clamp && |
236 | FP32InputDenormals == Other.FP32InputDenormals && |
237 | FP32OutputDenormals == Other.FP32OutputDenormals && |
238 | FP64FP16InputDenormals == Other.FP64FP16InputDenormals && |
239 | FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; |
240 | } |
241 | }; |
242 | |
243 | template <> struct MappingTraits<SIMode> { |
244 | static void mapping(IO &YamlIO, SIMode &Mode) { |
245 | YamlIO.mapOptional(Key: "ieee" , Val&: Mode.IEEE, Default: true); |
246 | YamlIO.mapOptional(Key: "dx10-clamp" , Val&: Mode.DX10Clamp, Default: true); |
247 | YamlIO.mapOptional(Key: "fp32-input-denormals" , Val&: Mode.FP32InputDenormals, Default: true); |
248 | YamlIO.mapOptional(Key: "fp32-output-denormals" , Val&: Mode.FP32OutputDenormals, Default: true); |
249 | YamlIO.mapOptional(Key: "fp64-fp16-input-denormals" , Val&: Mode.FP64FP16InputDenormals, Default: true); |
250 | YamlIO.mapOptional(Key: "fp64-fp16-output-denormals" , Val&: Mode.FP64FP16OutputDenormals, Default: true); |
251 | } |
252 | }; |
253 | |
254 | struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { |
255 | uint64_t ExplicitKernArgSize = 0; |
256 | Align MaxKernArgAlign; |
257 | uint32_t LDSSize = 0; |
258 | uint32_t GDSSize = 0; |
259 | Align DynLDSAlign; |
260 | bool IsEntryFunction = false; |
261 | bool IsChainFunction = false; |
262 | bool NoSignedZerosFPMath = false; |
263 | bool MemoryBound = false; |
264 | bool WaveLimiter = false; |
265 | bool HasSpilledSGPRs = false; |
266 | bool HasSpilledVGPRs = false; |
267 | uint32_t HighBitsOf32BitAddress = 0; |
268 | |
269 | // TODO: 10 may be a better default since it's the maximum. |
270 | unsigned Occupancy = 0; |
271 | |
272 | SmallVector<StringValue> WWMReservedRegs; |
273 | |
274 | StringValue ScratchRSrcReg = "$private_rsrc_reg" ; |
275 | StringValue FrameOffsetReg = "$fp_reg" ; |
276 | StringValue StackPtrOffsetReg = "$sp_reg" ; |
277 | |
278 | unsigned BytesInStackArgArea = 0; |
279 | bool ReturnsVoid = true; |
280 | |
281 | std::optional<SIArgumentInfo> ArgInfo; |
282 | |
283 | unsigned PSInputAddr = 0; |
284 | unsigned PSInputEnable = 0; |
285 | |
286 | SIMode Mode; |
287 | std::optional<FrameIndex> ScavengeFI; |
288 | StringValue VGPRForAGPRCopy; |
289 | StringValue SGPRForEXECCopy; |
290 | StringValue LongBranchReservedReg; |
291 | |
292 | SIMachineFunctionInfo() = default; |
293 | SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, |
294 | const TargetRegisterInfo &TRI, |
295 | const llvm::MachineFunction &MF); |
296 | |
297 | void mappingImpl(yaml::IO &YamlIO) override; |
298 | ~SIMachineFunctionInfo() = default; |
299 | }; |
300 | |
301 | template <> struct MappingTraits<SIMachineFunctionInfo> { |
302 | static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) { |
303 | YamlIO.mapOptional(Key: "explicitKernArgSize" , Val&: MFI.ExplicitKernArgSize, |
304 | UINT64_C(0)); |
305 | YamlIO.mapOptional(Key: "maxKernArgAlign" , Val&: MFI.MaxKernArgAlign); |
306 | YamlIO.mapOptional(Key: "ldsSize" , Val&: MFI.LDSSize, Default: 0u); |
307 | YamlIO.mapOptional(Key: "gdsSize" , Val&: MFI.GDSSize, Default: 0u); |
308 | YamlIO.mapOptional(Key: "dynLDSAlign" , Val&: MFI.DynLDSAlign, Default: Align()); |
309 | YamlIO.mapOptional(Key: "isEntryFunction" , Val&: MFI.IsEntryFunction, Default: false); |
310 | YamlIO.mapOptional(Key: "isChainFunction" , Val&: MFI.IsChainFunction, Default: false); |
311 | YamlIO.mapOptional(Key: "noSignedZerosFPMath" , Val&: MFI.NoSignedZerosFPMath, Default: false); |
312 | YamlIO.mapOptional(Key: "memoryBound" , Val&: MFI.MemoryBound, Default: false); |
313 | YamlIO.mapOptional(Key: "waveLimiter" , Val&: MFI.WaveLimiter, Default: false); |
314 | YamlIO.mapOptional(Key: "hasSpilledSGPRs" , Val&: MFI.HasSpilledSGPRs, Default: false); |
315 | YamlIO.mapOptional(Key: "hasSpilledVGPRs" , Val&: MFI.HasSpilledVGPRs, Default: false); |
316 | YamlIO.mapOptional(Key: "scratchRSrcReg" , Val&: MFI.ScratchRSrcReg, |
317 | Default: StringValue("$private_rsrc_reg" )); |
318 | YamlIO.mapOptional(Key: "frameOffsetReg" , Val&: MFI.FrameOffsetReg, |
319 | Default: StringValue("$fp_reg" )); |
320 | YamlIO.mapOptional(Key: "stackPtrOffsetReg" , Val&: MFI.StackPtrOffsetReg, |
321 | Default: StringValue("$sp_reg" )); |
322 | YamlIO.mapOptional(Key: "bytesInStackArgArea" , Val&: MFI.BytesInStackArgArea, Default: 0u); |
323 | YamlIO.mapOptional(Key: "returnsVoid" , Val&: MFI.ReturnsVoid, Default: true); |
324 | YamlIO.mapOptional(Key: "argumentInfo" , Val&: MFI.ArgInfo); |
325 | YamlIO.mapOptional(Key: "psInputAddr" , Val&: MFI.PSInputAddr, Default: 0u); |
326 | YamlIO.mapOptional(Key: "psInputEnable" , Val&: MFI.PSInputEnable, Default: 0u); |
327 | YamlIO.mapOptional(Key: "mode" , Val&: MFI.Mode, Default: SIMode()); |
328 | YamlIO.mapOptional(Key: "highBitsOf32BitAddress" , |
329 | Val&: MFI.HighBitsOf32BitAddress, Default: 0u); |
330 | YamlIO.mapOptional(Key: "occupancy" , Val&: MFI.Occupancy, Default: 0); |
331 | YamlIO.mapOptional(Key: "wwmReservedRegs" , Val&: MFI.WWMReservedRegs); |
332 | YamlIO.mapOptional(Key: "scavengeFI" , Val&: MFI.ScavengeFI); |
333 | YamlIO.mapOptional(Key: "vgprForAGPRCopy" , Val&: MFI.VGPRForAGPRCopy, |
334 | Default: StringValue()); // Don't print out when it's empty. |
335 | YamlIO.mapOptional(Key: "sgprForEXECCopy" , Val&: MFI.SGPRForEXECCopy, |
336 | Default: StringValue()); // Don't print out when it's empty. |
337 | YamlIO.mapOptional(Key: "longBranchReservedReg" , Val&: MFI.LongBranchReservedReg, |
338 | Default: StringValue()); |
339 | } |
340 | }; |
341 | |
342 | } // end namespace yaml |
343 | |
344 | // A CSR SGPR value can be preserved inside a callee using one of the following |
345 | // methods. |
346 | // 1. Copy to an unused scratch SGPR. |
347 | // 2. Spill to a VGPR lane. |
348 | // 3. Spill to memory via. a scratch VGPR. |
349 | // class PrologEpilogSGPRSaveRestoreInfo represents the save/restore method used |
350 | // for an SGPR at function prolog/epilog. |
351 | enum class SGPRSaveKind : uint8_t { |
352 | COPY_TO_SCRATCH_SGPR, |
353 | SPILL_TO_VGPR_LANE, |
354 | SPILL_TO_MEM |
355 | }; |
356 | |
357 | class PrologEpilogSGPRSaveRestoreInfo { |
358 | SGPRSaveKind Kind; |
359 | union { |
360 | int Index; |
361 | Register Reg; |
362 | }; |
363 | |
364 | public: |
365 | PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, int I) : Kind(K), Index(I) {} |
366 | PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, Register R) |
367 | : Kind(K), Reg(R) {} |
368 | Register getReg() const { return Reg; } |
369 | int getIndex() const { return Index; } |
370 | SGPRSaveKind getKind() const { return Kind; } |
371 | }; |
372 | |
373 | /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which |
374 | /// tells the hardware which interpolation parameters to load. |
375 | class SIMachineFunctionInfo final : public AMDGPUMachineFunction, |
376 | private MachineRegisterInfo::Delegate { |
377 | friend class GCNTargetMachine; |
378 | |
379 | // State of MODE register, assumed FP mode. |
380 | SIModeRegisterDefaults Mode; |
381 | |
382 | // Registers that may be reserved for spilling purposes. These may be the same |
383 | // as the input registers. |
384 | Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; |
385 | |
386 | // This is the unswizzled offset from the current dispatch's scratch wave |
387 | // base to the beginning of the current function's frame. |
388 | Register FrameOffsetReg = AMDGPU::FP_REG; |
389 | |
390 | // This is an ABI register used in the non-entry calling convention to |
391 | // communicate the unswizzled offset from the current dispatch's scratch wave |
392 | // base to the beginning of the new function's frame. |
393 | Register StackPtrOffsetReg = AMDGPU::SP_REG; |
394 | |
395 | // Registers that may be reserved when RA doesn't allocate enough |
396 | // registers to plan for the case where an indirect branch ends up |
397 | // being needed during branch relaxation. |
398 | Register LongBranchReservedReg; |
399 | |
400 | AMDGPUFunctionArgInfo ArgInfo; |
401 | |
402 | // Graphics info. |
403 | unsigned PSInputAddr = 0; |
404 | unsigned PSInputEnable = 0; |
405 | |
406 | /// Number of bytes of arguments this function has on the stack. If the callee |
407 | /// is expected to restore the argument stack this should be a multiple of 16, |
408 | /// all usable during a tail call. |
409 | /// |
410 | /// The alternative would forbid tail call optimisation in some cases: if we |
411 | /// want to transfer control from a function with 8-bytes of stack-argument |
412 | /// space to a function with 16-bytes then misalignment of this value would |
413 | /// make a stack adjustment necessary, which could not be undone by the |
414 | /// callee. |
415 | unsigned BytesInStackArgArea = 0; |
416 | |
417 | bool ReturnsVoid = true; |
418 | |
419 | // A pair of default/requested minimum/maximum flat work group sizes. |
420 | // Minimum - first, maximum - second. |
421 | std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; |
422 | |
423 | // A pair of default/requested minimum/maximum number of waves per execution |
424 | // unit. Minimum - first, maximum - second. |
425 | std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; |
426 | |
427 | const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV; |
428 | |
429 | // Default/requested number of work groups for the function. |
430 | SmallVector<unsigned> MaxNumWorkGroups = {0, 0, 0}; |
431 | |
432 | private: |
433 | unsigned NumUserSGPRs = 0; |
434 | unsigned NumSystemSGPRs = 0; |
435 | |
436 | bool HasSpilledSGPRs = false; |
437 | bool HasSpilledVGPRs = false; |
438 | bool HasNonSpillStackObjects = false; |
439 | bool IsStackRealigned = false; |
440 | |
441 | unsigned NumSpilledSGPRs = 0; |
442 | unsigned NumSpilledVGPRs = 0; |
443 | |
444 | // Tracks information about user SGPRs that will be setup by hardware which |
445 | // will apply to all wavefronts of the grid. |
446 | GCNUserSGPRUsageInfo UserSGPRInfo; |
447 | |
448 | // Feature bits required for inputs passed in system SGPRs. |
449 | bool WorkGroupIDX : 1; // Always initialized. |
450 | bool WorkGroupIDY : 1; |
451 | bool WorkGroupIDZ : 1; |
452 | bool WorkGroupInfo : 1; |
453 | bool LDSKernelId : 1; |
454 | bool PrivateSegmentWaveByteOffset : 1; |
455 | |
456 | bool WorkItemIDX : 1; // Always initialized. |
457 | bool WorkItemIDY : 1; |
458 | bool WorkItemIDZ : 1; |
459 | |
460 | // Pointer to where the ABI inserts special kernel arguments separate from the |
461 | // user arguments. This is an offset from the KernargSegmentPtr. |
462 | bool ImplicitArgPtr : 1; |
463 | |
464 | bool MayNeedAGPRs : 1; |
465 | |
466 | // The hard-wired high half of the address of the global information table |
467 | // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since |
468 | // current hardware only allows a 16 bit value. |
469 | unsigned GITPtrHigh; |
470 | |
471 | unsigned HighBitsOf32BitAddress; |
472 | |
473 | // Flags associated with the virtual registers. |
474 | IndexedMap<uint8_t, VirtReg2IndexFunctor> VRegFlags; |
475 | |
476 | // Current recorded maximum possible occupancy. |
477 | unsigned Occupancy; |
478 | |
479 | mutable std::optional<bool> UsesAGPRs; |
480 | |
481 | MCPhysReg getNextUserSGPR() const; |
482 | |
483 | MCPhysReg getNextSystemSGPR() const; |
484 | |
485 | // MachineRegisterInfo callback functions to notify events. |
486 | void MRI_NoteNewVirtualRegister(Register Reg) override; |
487 | void MRI_NoteCloneVirtualRegister(Register NewReg, Register SrcReg) override; |
488 | |
489 | public: |
490 | struct VGPRSpillToAGPR { |
491 | SmallVector<MCPhysReg, 32> Lanes; |
492 | bool FullyAllocated = false; |
493 | bool IsDead = false; |
494 | }; |
495 | |
496 | private: |
497 | // To track virtual VGPR + lane index for each subregister of the SGPR spilled |
498 | // to frameindex key during SILowerSGPRSpills pass. |
499 | DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>> |
500 | SGPRSpillsToVirtualVGPRLanes; |
501 | // To track physical VGPR + lane index for CSR SGPR spills and special SGPRs |
502 | // like Frame Pointer identified during PrologEpilogInserter. |
503 | DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>> |
504 | SGPRSpillsToPhysicalVGPRLanes; |
505 | unsigned NumVirtualVGPRSpillLanes = 0; |
506 | unsigned NumPhysicalVGPRSpillLanes = 0; |
507 | SmallVector<Register, 2> SpillVGPRs; |
508 | SmallVector<Register, 2> SpillPhysVGPRs; |
509 | using WWMSpillsMap = MapVector<Register, int>; |
510 | // To track the registers used in instructions that can potentially modify the |
511 | // inactive lanes. The WWM instructions and the writelane instructions for |
512 | // spilling SGPRs to VGPRs fall under such category of operations. The VGPRs |
513 | // modified by them should be spilled/restored at function prolog/epilog to |
514 | // avoid any undesired outcome. Each entry in this map holds a pair of values, |
515 | // the VGPR and its stack slot index. |
516 | WWMSpillsMap WWMSpills; |
517 | |
518 | using ReservedRegSet = SmallSetVector<Register, 8>; |
519 | // To track the VGPRs reserved for WWM instructions. They get stack slots |
520 | // later during PrologEpilogInserter and get added into the superset WWMSpills |
521 | // for actual spilling. A separate set makes the register reserved part and |
522 | // the serialization easier. |
523 | ReservedRegSet WWMReservedRegs; |
524 | |
525 | using PrologEpilogSGPRSpill = |
526 | std::pair<Register, PrologEpilogSGPRSaveRestoreInfo>; |
527 | // To track the SGPR spill method used for a CSR SGPR register during |
528 | // frame lowering. Even though the SGPR spills are handled during |
529 | // SILowerSGPRSpills pass, some special handling needed later during the |
530 | // PrologEpilogInserter. |
531 | SmallVector<PrologEpilogSGPRSpill, 3> PrologEpilogSGPRSpills; |
532 | |
533 | // To save/restore EXEC MASK around WWM spills and copies. |
534 | Register SGPRForEXECCopy; |
535 | |
536 | DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills; |
537 | |
538 | // AGPRs used for VGPR spills. |
539 | SmallVector<MCPhysReg, 32> SpillAGPR; |
540 | |
541 | // VGPRs used for AGPR spills. |
542 | SmallVector<MCPhysReg, 32> SpillVGPR; |
543 | |
544 | // Emergency stack slot. Sometimes, we create this before finalizing the stack |
545 | // frame, so save it here and add it to the RegScavenger later. |
546 | std::optional<int> ScavengeFI; |
547 | |
548 | private: |
549 | Register VGPRForAGPRCopy; |
550 | |
551 | bool allocateVirtualVGPRForSGPRSpills(MachineFunction &MF, int FI, |
552 | unsigned LaneIndex); |
553 | bool allocatePhysicalVGPRForSGPRSpills(MachineFunction &MF, int FI, |
554 | unsigned LaneIndex, |
555 | bool IsPrologEpilog); |
556 | |
557 | public: |
558 | Register getVGPRForAGPRCopy() const { |
559 | return VGPRForAGPRCopy; |
560 | } |
561 | |
562 | void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy) { |
563 | VGPRForAGPRCopy = NewVGPRForAGPRCopy; |
564 | } |
565 | |
566 | bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const; |
567 | |
568 | public: |
569 | SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI) = default; |
570 | SIMachineFunctionInfo(const Function &F, const GCNSubtarget *STI); |
571 | |
572 | MachineFunctionInfo * |
573 | clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, |
574 | const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) |
575 | const override; |
576 | |
577 | bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, |
578 | const MachineFunction &MF, |
579 | PerFunctionMIParsingState &PFS, |
580 | SMDiagnostic &Error, SMRange &SourceRange); |
581 | |
582 | void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(X: Reg); } |
583 | |
584 | SIModeRegisterDefaults getMode() const { return Mode; } |
585 | |
586 | ArrayRef<SIRegisterInfo::SpilledReg> |
587 | getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const { |
588 | auto I = SGPRSpillsToVirtualVGPRLanes.find(Val: FrameIndex); |
589 | return (I == SGPRSpillsToVirtualVGPRLanes.end()) |
590 | ? ArrayRef<SIRegisterInfo::SpilledReg>() |
591 | : ArrayRef(I->second); |
592 | } |
593 | |
594 | ArrayRef<Register> getSGPRSpillVGPRs() const { return SpillVGPRs; } |
595 | |
596 | const WWMSpillsMap &getWWMSpills() const { return WWMSpills; } |
597 | const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; } |
598 | |
599 | ArrayRef<PrologEpilogSGPRSpill> getPrologEpilogSGPRSpills() const { |
600 | assert(is_sorted(PrologEpilogSGPRSpills, llvm::less_first())); |
601 | return PrologEpilogSGPRSpills; |
602 | } |
603 | |
604 | GCNUserSGPRUsageInfo &getUserSGPRInfo() { return UserSGPRInfo; } |
605 | |
606 | const GCNUserSGPRUsageInfo &getUserSGPRInfo() const { return UserSGPRInfo; } |
607 | |
608 | void addToPrologEpilogSGPRSpills(Register Reg, |
609 | PrologEpilogSGPRSaveRestoreInfo SI) { |
610 | assert(!hasPrologEpilogSGPRSpillEntry(Reg)); |
611 | |
612 | // Insert a new entry in the right place to keep the vector in sorted order. |
613 | // This should be cheap since the vector is expected to be very short. |
614 | PrologEpilogSGPRSpills.insert( |
615 | I: upper_bound( |
616 | Range&: PrologEpilogSGPRSpills, Value&: Reg, |
617 | C: [](const auto &LHS, const auto &RHS) { return LHS < RHS.first; }), |
618 | Elt: std::make_pair(x&: Reg, y&: SI)); |
619 | } |
620 | |
621 | // Check if an entry created for \p Reg in PrologEpilogSGPRSpills. Return true |
622 | // on success and false otherwise. |
623 | bool hasPrologEpilogSGPRSpillEntry(Register Reg) const { |
624 | auto I = find_if(Range: PrologEpilogSGPRSpills, |
625 | P: [&Reg](const auto &Spill) { return Spill.first == Reg; }); |
626 | return I != PrologEpilogSGPRSpills.end(); |
627 | } |
628 | |
629 | // Get the scratch SGPR if allocated to save/restore \p Reg. |
630 | Register getScratchSGPRCopyDstReg(Register Reg) const { |
631 | auto I = find_if(Range: PrologEpilogSGPRSpills, |
632 | P: [&Reg](const auto &Spill) { return Spill.first == Reg; }); |
633 | if (I != PrologEpilogSGPRSpills.end() && |
634 | I->second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR) |
635 | return I->second.getReg(); |
636 | |
637 | return AMDGPU::NoRegister; |
638 | } |
639 | |
640 | // Get all scratch SGPRs allocated to copy/restore the SGPR spills. |
641 | void getAllScratchSGPRCopyDstRegs(SmallVectorImpl<Register> &Regs) const { |
642 | for (const auto &SI : PrologEpilogSGPRSpills) { |
643 | if (SI.second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR) |
644 | Regs.push_back(Elt: SI.second.getReg()); |
645 | } |
646 | } |
647 | |
648 | // Check if \p FI is allocated for any SGPR spill to a VGPR lane during PEI. |
649 | bool checkIndexInPrologEpilogSGPRSpills(int FI) const { |
650 | return find_if(Range: PrologEpilogSGPRSpills, |
651 | P: [FI](const std::pair<Register, |
652 | PrologEpilogSGPRSaveRestoreInfo> &SI) { |
653 | return SI.second.getKind() == |
654 | SGPRSaveKind::SPILL_TO_VGPR_LANE && |
655 | SI.second.getIndex() == FI; |
656 | }) != PrologEpilogSGPRSpills.end(); |
657 | } |
658 | |
659 | const PrologEpilogSGPRSaveRestoreInfo & |
660 | getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const { |
661 | auto I = find_if(Range: PrologEpilogSGPRSpills, |
662 | P: [&Reg](const auto &Spill) { return Spill.first == Reg; }); |
663 | assert(I != PrologEpilogSGPRSpills.end()); |
664 | |
665 | return I->second; |
666 | } |
667 | |
668 | ArrayRef<SIRegisterInfo::SpilledReg> |
669 | getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const { |
670 | auto I = SGPRSpillsToPhysicalVGPRLanes.find(Val: FrameIndex); |
671 | return (I == SGPRSpillsToPhysicalVGPRLanes.end()) |
672 | ? ArrayRef<SIRegisterInfo::SpilledReg>() |
673 | : ArrayRef(I->second); |
674 | } |
675 | |
676 | void setFlag(Register Reg, uint8_t Flag) { |
677 | assert(Reg.isVirtual()); |
678 | if (VRegFlags.inBounds(n: Reg)) |
679 | VRegFlags[Reg] |= Flag; |
680 | } |
681 | |
682 | bool checkFlag(Register Reg, uint8_t Flag) const { |
683 | if (Reg.isPhysical()) |
684 | return false; |
685 | |
686 | return VRegFlags.inBounds(n: Reg) && VRegFlags[Reg] & Flag; |
687 | } |
688 | |
689 | bool hasVRegFlags() { return VRegFlags.size(); } |
690 | |
691 | void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size = 4, |
692 | Align Alignment = Align(4)); |
693 | |
694 | void splitWWMSpillRegisters( |
695 | MachineFunction &MF, |
696 | SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs, |
697 | SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const; |
698 | |
699 | ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const { |
700 | return SpillAGPR; |
701 | } |
702 | |
703 | Register getSGPRForEXECCopy() const { return SGPRForEXECCopy; } |
704 | |
705 | void setSGPRForEXECCopy(Register Reg) { SGPRForEXECCopy = Reg; } |
706 | |
707 | ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const { |
708 | return SpillVGPR; |
709 | } |
710 | |
711 | MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const { |
712 | auto I = VGPRToAGPRSpills.find(Val: FrameIndex); |
713 | return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister |
714 | : I->second.Lanes[Lane]; |
715 | } |
716 | |
717 | void setVGPRToAGPRSpillDead(int FrameIndex) { |
718 | auto I = VGPRToAGPRSpills.find(Val: FrameIndex); |
719 | if (I != VGPRToAGPRSpills.end()) |
720 | I->second.IsDead = true; |
721 | } |
722 | |
723 | // To bring the Physical VGPRs in the highest range allocated for CSR SGPR |
724 | // spilling into the lowest available range. |
725 | void shiftSpillPhysVGPRsToLowestRange(MachineFunction &MF); |
726 | |
727 | bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, |
728 | bool SpillToPhysVGPRLane = false, |
729 | bool IsPrologEpilog = false); |
730 | bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR); |
731 | |
732 | /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill |
733 | /// to the default stack. |
734 | bool removeDeadFrameIndices(MachineFrameInfo &MFI, |
735 | bool ResetSGPRSpillStackIDs); |
736 | |
737 | int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI); |
738 | std::optional<int> getOptionalScavengeFI() const { return ScavengeFI; } |
739 | |
740 | unsigned getBytesInStackArgArea() const { |
741 | return BytesInStackArgArea; |
742 | } |
743 | |
744 | void setBytesInStackArgArea(unsigned Bytes) { |
745 | BytesInStackArgArea = Bytes; |
746 | } |
747 | |
748 | // Add user SGPRs. |
749 | Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI); |
750 | Register addDispatchPtr(const SIRegisterInfo &TRI); |
751 | Register addQueuePtr(const SIRegisterInfo &TRI); |
752 | Register addKernargSegmentPtr(const SIRegisterInfo &TRI); |
753 | Register addDispatchID(const SIRegisterInfo &TRI); |
754 | Register addFlatScratchInit(const SIRegisterInfo &TRI); |
755 | Register addPrivateSegmentSize(const SIRegisterInfo &TRI); |
756 | Register addImplicitBufferPtr(const SIRegisterInfo &TRI); |
757 | Register addLDSKernelId(); |
758 | SmallVectorImpl<MCRegister> * |
759 | addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC, |
760 | unsigned AllocSizeDWord, int KernArgIdx, |
761 | int PaddingSGPRs); |
762 | |
763 | /// Increment user SGPRs used for padding the argument list only. |
764 | Register addReservedUserSGPR() { |
765 | Register Next = getNextUserSGPR(); |
766 | ++NumUserSGPRs; |
767 | return Next; |
768 | } |
769 | |
770 | // Add system SGPRs. |
771 | Register addWorkGroupIDX() { |
772 | ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(Reg: getNextSystemSGPR()); |
773 | NumSystemSGPRs += 1; |
774 | return ArgInfo.WorkGroupIDX.getRegister(); |
775 | } |
776 | |
777 | Register addWorkGroupIDY() { |
778 | ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(Reg: getNextSystemSGPR()); |
779 | NumSystemSGPRs += 1; |
780 | return ArgInfo.WorkGroupIDY.getRegister(); |
781 | } |
782 | |
783 | Register addWorkGroupIDZ() { |
784 | ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(Reg: getNextSystemSGPR()); |
785 | NumSystemSGPRs += 1; |
786 | return ArgInfo.WorkGroupIDZ.getRegister(); |
787 | } |
788 | |
789 | Register addWorkGroupInfo() { |
790 | ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(Reg: getNextSystemSGPR()); |
791 | NumSystemSGPRs += 1; |
792 | return ArgInfo.WorkGroupInfo.getRegister(); |
793 | } |
794 | |
795 | bool hasLDSKernelId() const { return LDSKernelId; } |
796 | |
797 | // Add special VGPR inputs |
798 | void setWorkItemIDX(ArgDescriptor Arg) { |
799 | ArgInfo.WorkItemIDX = Arg; |
800 | } |
801 | |
802 | void setWorkItemIDY(ArgDescriptor Arg) { |
803 | ArgInfo.WorkItemIDY = Arg; |
804 | } |
805 | |
806 | void setWorkItemIDZ(ArgDescriptor Arg) { |
807 | ArgInfo.WorkItemIDZ = Arg; |
808 | } |
809 | |
810 | Register addPrivateSegmentWaveByteOffset() { |
811 | ArgInfo.PrivateSegmentWaveByteOffset |
812 | = ArgDescriptor::createRegister(Reg: getNextSystemSGPR()); |
813 | NumSystemSGPRs += 1; |
814 | return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); |
815 | } |
816 | |
817 | void setPrivateSegmentWaveByteOffset(Register Reg) { |
818 | ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); |
819 | } |
820 | |
821 | bool hasWorkGroupIDX() const { |
822 | return WorkGroupIDX; |
823 | } |
824 | |
825 | bool hasWorkGroupIDY() const { |
826 | return WorkGroupIDY; |
827 | } |
828 | |
829 | bool hasWorkGroupIDZ() const { |
830 | return WorkGroupIDZ; |
831 | } |
832 | |
833 | bool hasWorkGroupInfo() const { |
834 | return WorkGroupInfo; |
835 | } |
836 | |
837 | bool hasPrivateSegmentWaveByteOffset() const { |
838 | return PrivateSegmentWaveByteOffset; |
839 | } |
840 | |
841 | bool hasWorkItemIDX() const { |
842 | return WorkItemIDX; |
843 | } |
844 | |
845 | bool hasWorkItemIDY() const { |
846 | return WorkItemIDY; |
847 | } |
848 | |
849 | bool hasWorkItemIDZ() const { |
850 | return WorkItemIDZ; |
851 | } |
852 | |
853 | bool hasImplicitArgPtr() const { |
854 | return ImplicitArgPtr; |
855 | } |
856 | |
857 | AMDGPUFunctionArgInfo &getArgInfo() { |
858 | return ArgInfo; |
859 | } |
860 | |
861 | const AMDGPUFunctionArgInfo &getArgInfo() const { |
862 | return ArgInfo; |
863 | } |
864 | |
865 | std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT> |
866 | getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { |
867 | return ArgInfo.getPreloadedValue(Value); |
868 | } |
869 | |
870 | MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { |
871 | auto Arg = std::get<0>(t: ArgInfo.getPreloadedValue(Value)); |
872 | return Arg ? Arg->getRegister() : MCRegister(); |
873 | } |
874 | |
875 | unsigned getGITPtrHigh() const { |
876 | return GITPtrHigh; |
877 | } |
878 | |
879 | Register getGITPtrLoReg(const MachineFunction &MF) const; |
880 | |
881 | uint32_t get32BitAddressHighBits() const { |
882 | return HighBitsOf32BitAddress; |
883 | } |
884 | |
885 | unsigned getNumUserSGPRs() const { |
886 | return NumUserSGPRs; |
887 | } |
888 | |
889 | unsigned getNumPreloadedSGPRs() const { |
890 | return NumUserSGPRs + NumSystemSGPRs; |
891 | } |
892 | |
893 | unsigned getNumKernargPreloadedSGPRs() const { |
894 | return UserSGPRInfo.getNumKernargPreloadSGPRs(); |
895 | } |
896 | |
897 | Register getPrivateSegmentWaveByteOffsetSystemSGPR() const { |
898 | return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); |
899 | } |
900 | |
901 | /// Returns the physical register reserved for use as the resource |
902 | /// descriptor for scratch accesses. |
903 | Register getScratchRSrcReg() const { |
904 | return ScratchRSrcReg; |
905 | } |
906 | |
907 | void setScratchRSrcReg(Register Reg) { |
908 | assert(Reg != 0 && "Should never be unset" ); |
909 | ScratchRSrcReg = Reg; |
910 | } |
911 | |
912 | Register getFrameOffsetReg() const { |
913 | return FrameOffsetReg; |
914 | } |
915 | |
916 | void setFrameOffsetReg(Register Reg) { |
917 | assert(Reg != 0 && "Should never be unset" ); |
918 | FrameOffsetReg = Reg; |
919 | } |
920 | |
921 | void setStackPtrOffsetReg(Register Reg) { |
922 | assert(Reg != 0 && "Should never be unset" ); |
923 | StackPtrOffsetReg = Reg; |
924 | } |
925 | |
926 | void setLongBranchReservedReg(Register Reg) { LongBranchReservedReg = Reg; } |
927 | |
928 | // Note the unset value for this is AMDGPU::SP_REG rather than |
929 | // NoRegister. This is mostly a workaround for MIR tests where state that |
930 | // can't be directly computed from the function is not preserved in serialized |
931 | // MIR. |
932 | Register getStackPtrOffsetReg() const { |
933 | return StackPtrOffsetReg; |
934 | } |
935 | |
936 | Register getLongBranchReservedReg() const { return LongBranchReservedReg; } |
937 | |
938 | Register getQueuePtrUserSGPR() const { |
939 | return ArgInfo.QueuePtr.getRegister(); |
940 | } |
941 | |
942 | Register getImplicitBufferPtrUserSGPR() const { |
943 | return ArgInfo.ImplicitBufferPtr.getRegister(); |
944 | } |
945 | |
946 | bool hasSpilledSGPRs() const { |
947 | return HasSpilledSGPRs; |
948 | } |
949 | |
950 | void setHasSpilledSGPRs(bool Spill = true) { |
951 | HasSpilledSGPRs = Spill; |
952 | } |
953 | |
954 | bool hasSpilledVGPRs() const { |
955 | return HasSpilledVGPRs; |
956 | } |
957 | |
958 | void setHasSpilledVGPRs(bool Spill = true) { |
959 | HasSpilledVGPRs = Spill; |
960 | } |
961 | |
962 | bool hasNonSpillStackObjects() const { |
963 | return HasNonSpillStackObjects; |
964 | } |
965 | |
966 | void setHasNonSpillStackObjects(bool StackObject = true) { |
967 | HasNonSpillStackObjects = StackObject; |
968 | } |
969 | |
970 | bool isStackRealigned() const { |
971 | return IsStackRealigned; |
972 | } |
973 | |
974 | void setIsStackRealigned(bool Realigned = true) { |
975 | IsStackRealigned = Realigned; |
976 | } |
977 | |
978 | unsigned getNumSpilledSGPRs() const { |
979 | return NumSpilledSGPRs; |
980 | } |
981 | |
982 | unsigned getNumSpilledVGPRs() const { |
983 | return NumSpilledVGPRs; |
984 | } |
985 | |
986 | void addToSpilledSGPRs(unsigned num) { |
987 | NumSpilledSGPRs += num; |
988 | } |
989 | |
990 | void addToSpilledVGPRs(unsigned num) { |
991 | NumSpilledVGPRs += num; |
992 | } |
993 | |
994 | unsigned getPSInputAddr() const { |
995 | return PSInputAddr; |
996 | } |
997 | |
998 | unsigned getPSInputEnable() const { |
999 | return PSInputEnable; |
1000 | } |
1001 | |
1002 | bool isPSInputAllocated(unsigned Index) const { |
1003 | return PSInputAddr & (1 << Index); |
1004 | } |
1005 | |
1006 | void markPSInputAllocated(unsigned Index) { |
1007 | PSInputAddr |= 1 << Index; |
1008 | } |
1009 | |
1010 | void markPSInputEnabled(unsigned Index) { |
1011 | PSInputEnable |= 1 << Index; |
1012 | } |
1013 | |
1014 | bool returnsVoid() const { |
1015 | return ReturnsVoid; |
1016 | } |
1017 | |
1018 | void setIfReturnsVoid(bool Value) { |
1019 | ReturnsVoid = Value; |
1020 | } |
1021 | |
1022 | /// \returns A pair of default/requested minimum/maximum flat work group sizes |
1023 | /// for this function. |
1024 | std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { |
1025 | return FlatWorkGroupSizes; |
1026 | } |
1027 | |
1028 | /// \returns Default/requested minimum flat work group size for this function. |
1029 | unsigned getMinFlatWorkGroupSize() const { |
1030 | return FlatWorkGroupSizes.first; |
1031 | } |
1032 | |
1033 | /// \returns Default/requested maximum flat work group size for this function. |
1034 | unsigned getMaxFlatWorkGroupSize() const { |
1035 | return FlatWorkGroupSizes.second; |
1036 | } |
1037 | |
1038 | /// \returns A pair of default/requested minimum/maximum number of waves per |
1039 | /// execution unit. |
1040 | std::pair<unsigned, unsigned> getWavesPerEU() const { |
1041 | return WavesPerEU; |
1042 | } |
1043 | |
1044 | /// \returns Default/requested minimum number of waves per execution unit. |
1045 | unsigned getMinWavesPerEU() const { |
1046 | return WavesPerEU.first; |
1047 | } |
1048 | |
1049 | /// \returns Default/requested maximum number of waves per execution unit. |
1050 | unsigned getMaxWavesPerEU() const { |
1051 | return WavesPerEU.second; |
1052 | } |
1053 | |
1054 | const AMDGPUGWSResourcePseudoSourceValue * |
1055 | getGWSPSV(const AMDGPUTargetMachine &TM) { |
1056 | return &GWSResourcePSV; |
1057 | } |
1058 | |
1059 | unsigned getOccupancy() const { |
1060 | return Occupancy; |
1061 | } |
1062 | |
1063 | unsigned getMinAllowedOccupancy() const { |
1064 | if (!isMemoryBound() && !needsWaveLimiter()) |
1065 | return Occupancy; |
1066 | return (Occupancy < 4) ? Occupancy : 4; |
1067 | } |
1068 | |
1069 | void limitOccupancy(const MachineFunction &MF); |
1070 | |
1071 | void limitOccupancy(unsigned Limit) { |
1072 | if (Occupancy > Limit) |
1073 | Occupancy = Limit; |
1074 | } |
1075 | |
1076 | void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { |
1077 | if (Occupancy < Limit) |
1078 | Occupancy = Limit; |
1079 | limitOccupancy(MF); |
1080 | } |
1081 | |
1082 | bool mayNeedAGPRs() const { |
1083 | return MayNeedAGPRs; |
1084 | } |
1085 | |
1086 | // \returns true if a function has a use of AGPRs via inline asm or |
1087 | // has a call which may use it. |
1088 | bool mayUseAGPRs(const Function &F) const; |
1089 | |
1090 | // \returns true if a function needs or may need AGPRs. |
1091 | bool usesAGPRs(const MachineFunction &MF) const; |
1092 | |
1093 | /// \returns Default/requested number of work groups for this function. |
1094 | SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; } |
1095 | |
1096 | unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups[0]; } |
1097 | unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups[1]; } |
1098 | unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups[2]; } |
1099 | }; |
1100 | |
1101 | } // end namespace llvm |
1102 | |
1103 | #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H |
1104 | |