| 1 | //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H |
| 14 | #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H |
| 15 | |
| 16 | #include "AMDGPUArgumentUsageInfo.h" |
| 17 | #include "AMDGPUMachineFunction.h" |
| 18 | #include "AMDGPUTargetMachine.h" |
| 19 | #include "GCNSubtarget.h" |
| 20 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 21 | #include "SIInstrInfo.h" |
| 22 | #include "SIModeRegisterDefaults.h" |
| 23 | #include "llvm/ADT/SetVector.h" |
| 24 | #include "llvm/ADT/SmallVector.h" |
| 25 | #include "llvm/CodeGen/MIRYamlMapping.h" |
| 26 | #include "llvm/CodeGen/PseudoSourceValue.h" |
| 27 | #include "llvm/Support/raw_ostream.h" |
| 28 | #include <optional> |
| 29 | |
| 30 | namespace llvm { |
| 31 | |
| 32 | class MachineFrameInfo; |
| 33 | class MachineFunction; |
| 34 | class SIMachineFunctionInfo; |
| 35 | class SIRegisterInfo; |
| 36 | class TargetRegisterClass; |
| 37 | |
| 38 | class AMDGPUPseudoSourceValue : public PseudoSourceValue { |
| 39 | public: |
| 40 | enum AMDGPUPSVKind : unsigned { |
| 41 | PSVImage = PseudoSourceValue::TargetCustom, |
| 42 | GWSResource |
| 43 | }; |
| 44 | |
| 45 | protected: |
| 46 | AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM) |
| 47 | : PseudoSourceValue(Kind, TM) {} |
| 48 | |
| 49 | public: |
| 50 | bool isConstant(const MachineFrameInfo *) const override { |
| 51 | // This should probably be true for most images, but we will start by being |
| 52 | // conservative. |
| 53 | return false; |
| 54 | } |
| 55 | |
| 56 | bool isAliased(const MachineFrameInfo *) const override { |
| 57 | return true; |
| 58 | } |
| 59 | |
| 60 | bool mayAlias(const MachineFrameInfo *) const override { |
| 61 | return true; |
| 62 | } |
| 63 | }; |
| 64 | |
| 65 | class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue { |
| 66 | public: |
| 67 | explicit AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM) |
| 68 | : AMDGPUPseudoSourceValue(GWSResource, TM) {} |
| 69 | |
| 70 | static bool classof(const PseudoSourceValue *V) { |
| 71 | return V->kind() == GWSResource; |
| 72 | } |
| 73 | |
| 74 | // These are inaccessible memory from IR. |
| 75 | bool isAliased(const MachineFrameInfo *) const override { |
| 76 | return false; |
| 77 | } |
| 78 | |
| 79 | // These are inaccessible memory from IR. |
| 80 | bool mayAlias(const MachineFrameInfo *) const override { |
| 81 | return false; |
| 82 | } |
| 83 | |
| 84 | void printCustom(raw_ostream &OS) const override { |
| 85 | OS << "GWSResource" ; |
| 86 | } |
| 87 | }; |
| 88 | |
| 89 | namespace yaml { |
| 90 | |
| 91 | struct SIArgument { |
| 92 | bool IsRegister; |
| 93 | union { |
| 94 | StringValue RegisterName; |
| 95 | unsigned StackOffset; |
| 96 | }; |
| 97 | std::optional<unsigned> Mask; |
| 98 | |
| 99 | // Default constructor, which creates a stack argument. |
| 100 | SIArgument() : IsRegister(false), StackOffset(0) {} |
| 101 | SIArgument(const SIArgument &Other) { |
| 102 | IsRegister = Other.IsRegister; |
| 103 | if (IsRegister) |
| 104 | new (&RegisterName) StringValue(Other.RegisterName); |
| 105 | else |
| 106 | StackOffset = Other.StackOffset; |
| 107 | Mask = Other.Mask; |
| 108 | } |
| 109 | SIArgument &operator=(const SIArgument &Other) { |
| 110 | // Default-construct or destruct the old RegisterName in case of switching |
| 111 | // union members |
| 112 | if (IsRegister != Other.IsRegister) { |
| 113 | if (Other.IsRegister) |
| 114 | new (&RegisterName) StringValue(); |
| 115 | else |
| 116 | RegisterName.~StringValue(); |
| 117 | } |
| 118 | IsRegister = Other.IsRegister; |
| 119 | if (IsRegister) |
| 120 | RegisterName = Other.RegisterName; |
| 121 | else |
| 122 | StackOffset = Other.StackOffset; |
| 123 | Mask = Other.Mask; |
| 124 | return *this; |
| 125 | } |
| 126 | ~SIArgument() { |
| 127 | if (IsRegister) |
| 128 | RegisterName.~StringValue(); |
| 129 | } |
| 130 | |
| 131 | // Helper to create a register or stack argument. |
| 132 | static inline SIArgument createArgument(bool IsReg) { |
| 133 | if (IsReg) |
| 134 | return SIArgument(IsReg); |
| 135 | return SIArgument(); |
| 136 | } |
| 137 | |
| 138 | private: |
| 139 | // Construct a register argument. |
| 140 | SIArgument(bool) : IsRegister(true), RegisterName() {} |
| 141 | }; |
| 142 | |
| 143 | template <> struct MappingTraits<SIArgument> { |
| 144 | static void mapping(IO &YamlIO, SIArgument &A) { |
| 145 | if (YamlIO.outputting()) { |
| 146 | if (A.IsRegister) |
| 147 | YamlIO.mapRequired(Key: "reg" , Val&: A.RegisterName); |
| 148 | else |
| 149 | YamlIO.mapRequired(Key: "offset" , Val&: A.StackOffset); |
| 150 | } else { |
| 151 | auto Keys = YamlIO.keys(); |
| 152 | if (is_contained(Range&: Keys, Element: "reg" )) { |
| 153 | A = SIArgument::createArgument(IsReg: true); |
| 154 | YamlIO.mapRequired(Key: "reg" , Val&: A.RegisterName); |
| 155 | } else if (is_contained(Range&: Keys, Element: "offset" )) |
| 156 | YamlIO.mapRequired(Key: "offset" , Val&: A.StackOffset); |
| 157 | else |
| 158 | YamlIO.setError("missing required key 'reg' or 'offset'" ); |
| 159 | } |
| 160 | YamlIO.mapOptional(Key: "mask" , Val&: A.Mask); |
| 161 | } |
| 162 | static const bool flow = true; |
| 163 | }; |
| 164 | |
| 165 | struct SIArgumentInfo { |
| 166 | std::optional<SIArgument> PrivateSegmentBuffer; |
| 167 | std::optional<SIArgument> DispatchPtr; |
| 168 | std::optional<SIArgument> QueuePtr; |
| 169 | std::optional<SIArgument> KernargSegmentPtr; |
| 170 | std::optional<SIArgument> DispatchID; |
| 171 | std::optional<SIArgument> FlatScratchInit; |
| 172 | std::optional<SIArgument> PrivateSegmentSize; |
| 173 | |
| 174 | std::optional<SIArgument> WorkGroupIDX; |
| 175 | std::optional<SIArgument> WorkGroupIDY; |
| 176 | std::optional<SIArgument> WorkGroupIDZ; |
| 177 | std::optional<SIArgument> WorkGroupInfo; |
| 178 | std::optional<SIArgument> LDSKernelId; |
| 179 | std::optional<SIArgument> PrivateSegmentWaveByteOffset; |
| 180 | |
| 181 | std::optional<SIArgument> ImplicitArgPtr; |
| 182 | std::optional<SIArgument> ImplicitBufferPtr; |
| 183 | |
| 184 | std::optional<SIArgument> WorkItemIDX; |
| 185 | std::optional<SIArgument> WorkItemIDY; |
| 186 | std::optional<SIArgument> WorkItemIDZ; |
| 187 | }; |
| 188 | |
| 189 | template <> struct MappingTraits<SIArgumentInfo> { |
| 190 | static void mapping(IO &YamlIO, SIArgumentInfo &AI) { |
| 191 | YamlIO.mapOptional(Key: "privateSegmentBuffer" , Val&: AI.PrivateSegmentBuffer); |
| 192 | YamlIO.mapOptional(Key: "dispatchPtr" , Val&: AI.DispatchPtr); |
| 193 | YamlIO.mapOptional(Key: "queuePtr" , Val&: AI.QueuePtr); |
| 194 | YamlIO.mapOptional(Key: "kernargSegmentPtr" , Val&: AI.KernargSegmentPtr); |
| 195 | YamlIO.mapOptional(Key: "dispatchID" , Val&: AI.DispatchID); |
| 196 | YamlIO.mapOptional(Key: "flatScratchInit" , Val&: AI.FlatScratchInit); |
| 197 | YamlIO.mapOptional(Key: "privateSegmentSize" , Val&: AI.PrivateSegmentSize); |
| 198 | |
| 199 | YamlIO.mapOptional(Key: "workGroupIDX" , Val&: AI.WorkGroupIDX); |
| 200 | YamlIO.mapOptional(Key: "workGroupIDY" , Val&: AI.WorkGroupIDY); |
| 201 | YamlIO.mapOptional(Key: "workGroupIDZ" , Val&: AI.WorkGroupIDZ); |
| 202 | YamlIO.mapOptional(Key: "workGroupInfo" , Val&: AI.WorkGroupInfo); |
| 203 | YamlIO.mapOptional(Key: "LDSKernelId" , Val&: AI.LDSKernelId); |
| 204 | YamlIO.mapOptional(Key: "privateSegmentWaveByteOffset" , |
| 205 | Val&: AI.PrivateSegmentWaveByteOffset); |
| 206 | |
| 207 | YamlIO.mapOptional(Key: "implicitArgPtr" , Val&: AI.ImplicitArgPtr); |
| 208 | YamlIO.mapOptional(Key: "implicitBufferPtr" , Val&: AI.ImplicitBufferPtr); |
| 209 | |
| 210 | YamlIO.mapOptional(Key: "workItemIDX" , Val&: AI.WorkItemIDX); |
| 211 | YamlIO.mapOptional(Key: "workItemIDY" , Val&: AI.WorkItemIDY); |
| 212 | YamlIO.mapOptional(Key: "workItemIDZ" , Val&: AI.WorkItemIDZ); |
| 213 | } |
| 214 | }; |
| 215 | |
| 216 | // Default to default mode for default calling convention. |
| 217 | struct SIMode { |
| 218 | bool IEEE = true; |
| 219 | bool DX10Clamp = true; |
| 220 | bool FP32InputDenormals = true; |
| 221 | bool FP32OutputDenormals = true; |
| 222 | bool FP64FP16InputDenormals = true; |
| 223 | bool FP64FP16OutputDenormals = true; |
| 224 | |
| 225 | SIMode() = default; |
| 226 | |
| 227 | SIMode(const SIModeRegisterDefaults &Mode) { |
| 228 | IEEE = Mode.IEEE; |
| 229 | DX10Clamp = Mode.DX10Clamp; |
| 230 | FP32InputDenormals = Mode.FP32Denormals.Input != DenormalMode::PreserveSign; |
| 231 | FP32OutputDenormals = |
| 232 | Mode.FP32Denormals.Output != DenormalMode::PreserveSign; |
| 233 | FP64FP16InputDenormals = |
| 234 | Mode.FP64FP16Denormals.Input != DenormalMode::PreserveSign; |
| 235 | FP64FP16OutputDenormals = |
| 236 | Mode.FP64FP16Denormals.Output != DenormalMode::PreserveSign; |
| 237 | } |
| 238 | |
| 239 | bool operator ==(const SIMode Other) const { |
| 240 | return IEEE == Other.IEEE && |
| 241 | DX10Clamp == Other.DX10Clamp && |
| 242 | FP32InputDenormals == Other.FP32InputDenormals && |
| 243 | FP32OutputDenormals == Other.FP32OutputDenormals && |
| 244 | FP64FP16InputDenormals == Other.FP64FP16InputDenormals && |
| 245 | FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; |
| 246 | } |
| 247 | }; |
| 248 | |
| 249 | template <> struct MappingTraits<SIMode> { |
| 250 | static void mapping(IO &YamlIO, SIMode &Mode) { |
| 251 | YamlIO.mapOptional(Key: "ieee" , Val&: Mode.IEEE, Default: true); |
| 252 | YamlIO.mapOptional(Key: "dx10-clamp" , Val&: Mode.DX10Clamp, Default: true); |
| 253 | YamlIO.mapOptional(Key: "fp32-input-denormals" , Val&: Mode.FP32InputDenormals, Default: true); |
| 254 | YamlIO.mapOptional(Key: "fp32-output-denormals" , Val&: Mode.FP32OutputDenormals, Default: true); |
| 255 | YamlIO.mapOptional(Key: "fp64-fp16-input-denormals" , Val&: Mode.FP64FP16InputDenormals, Default: true); |
| 256 | YamlIO.mapOptional(Key: "fp64-fp16-output-denormals" , Val&: Mode.FP64FP16OutputDenormals, Default: true); |
| 257 | } |
| 258 | }; |
| 259 | |
| 260 | struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { |
| 261 | uint64_t ExplicitKernArgSize = 0; |
| 262 | Align MaxKernArgAlign; |
| 263 | uint32_t LDSSize = 0; |
| 264 | uint32_t GDSSize = 0; |
| 265 | Align DynLDSAlign; |
| 266 | bool IsEntryFunction = false; |
| 267 | bool IsChainFunction = false; |
| 268 | bool NoSignedZerosFPMath = false; |
| 269 | bool MemoryBound = false; |
| 270 | bool WaveLimiter = false; |
| 271 | bool HasSpilledSGPRs = false; |
| 272 | bool HasSpilledVGPRs = false; |
| 273 | uint32_t HighBitsOf32BitAddress = 0; |
| 274 | |
| 275 | // TODO: 10 may be a better default since it's the maximum. |
| 276 | unsigned Occupancy = 0; |
| 277 | |
| 278 | SmallVector<StringValue, 2> SpillPhysVGPRS; |
| 279 | SmallVector<StringValue> WWMReservedRegs; |
| 280 | |
| 281 | StringValue ScratchRSrcReg = "$private_rsrc_reg" ; |
| 282 | StringValue FrameOffsetReg = "$fp_reg" ; |
| 283 | StringValue StackPtrOffsetReg = "$sp_reg" ; |
| 284 | |
| 285 | unsigned BytesInStackArgArea = 0; |
| 286 | bool ReturnsVoid = true; |
| 287 | |
| 288 | std::optional<SIArgumentInfo> ArgInfo; |
| 289 | |
| 290 | unsigned PSInputAddr = 0; |
| 291 | unsigned PSInputEnable = 0; |
| 292 | unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit; |
| 293 | |
| 294 | SIMode Mode; |
| 295 | std::optional<FrameIndex> ScavengeFI; |
| 296 | StringValue VGPRForAGPRCopy; |
| 297 | StringValue SGPRForEXECCopy; |
| 298 | StringValue LongBranchReservedReg; |
| 299 | |
| 300 | bool HasInitWholeWave = false; |
| 301 | |
| 302 | unsigned DynamicVGPRBlockSize = 0; |
| 303 | unsigned ScratchReservedForDynamicVGPRs = 0; |
| 304 | |
| 305 | SIMachineFunctionInfo() = default; |
| 306 | SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &, |
| 307 | const TargetRegisterInfo &TRI, |
| 308 | const llvm::MachineFunction &MF); |
| 309 | |
| 310 | void mappingImpl(yaml::IO &YamlIO) override; |
| 311 | ~SIMachineFunctionInfo() = default; |
| 312 | }; |
| 313 | |
| 314 | template <> struct MappingTraits<SIMachineFunctionInfo> { |
| 315 | static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) { |
| 316 | YamlIO.mapOptional(Key: "explicitKernArgSize" , Val&: MFI.ExplicitKernArgSize, |
| 317 | UINT64_C(0)); |
| 318 | YamlIO.mapOptional(Key: "maxKernArgAlign" , Val&: MFI.MaxKernArgAlign); |
| 319 | YamlIO.mapOptional(Key: "ldsSize" , Val&: MFI.LDSSize, Default: 0u); |
| 320 | YamlIO.mapOptional(Key: "gdsSize" , Val&: MFI.GDSSize, Default: 0u); |
| 321 | YamlIO.mapOptional(Key: "dynLDSAlign" , Val&: MFI.DynLDSAlign, Default: Align()); |
| 322 | YamlIO.mapOptional(Key: "isEntryFunction" , Val&: MFI.IsEntryFunction, Default: false); |
| 323 | YamlIO.mapOptional(Key: "isChainFunction" , Val&: MFI.IsChainFunction, Default: false); |
| 324 | YamlIO.mapOptional(Key: "noSignedZerosFPMath" , Val&: MFI.NoSignedZerosFPMath, Default: false); |
| 325 | YamlIO.mapOptional(Key: "memoryBound" , Val&: MFI.MemoryBound, Default: false); |
| 326 | YamlIO.mapOptional(Key: "waveLimiter" , Val&: MFI.WaveLimiter, Default: false); |
| 327 | YamlIO.mapOptional(Key: "hasSpilledSGPRs" , Val&: MFI.HasSpilledSGPRs, Default: false); |
| 328 | YamlIO.mapOptional(Key: "hasSpilledVGPRs" , Val&: MFI.HasSpilledVGPRs, Default: false); |
| 329 | YamlIO.mapOptional(Key: "scratchRSrcReg" , Val&: MFI.ScratchRSrcReg, |
| 330 | Default: StringValue("$private_rsrc_reg" )); |
| 331 | YamlIO.mapOptional(Key: "frameOffsetReg" , Val&: MFI.FrameOffsetReg, |
| 332 | Default: StringValue("$fp_reg" )); |
| 333 | YamlIO.mapOptional(Key: "stackPtrOffsetReg" , Val&: MFI.StackPtrOffsetReg, |
| 334 | Default: StringValue("$sp_reg" )); |
| 335 | YamlIO.mapOptional(Key: "bytesInStackArgArea" , Val&: MFI.BytesInStackArgArea, Default: 0u); |
| 336 | YamlIO.mapOptional(Key: "returnsVoid" , Val&: MFI.ReturnsVoid, Default: true); |
| 337 | YamlIO.mapOptional(Key: "argumentInfo" , Val&: MFI.ArgInfo); |
| 338 | YamlIO.mapOptional(Key: "psInputAddr" , Val&: MFI.PSInputAddr, Default: 0u); |
| 339 | YamlIO.mapOptional(Key: "psInputEnable" , Val&: MFI.PSInputEnable, Default: 0u); |
| 340 | YamlIO.mapOptional(Key: "maxMemoryClusterDWords" , Val&: MFI.MaxMemoryClusterDWords, |
| 341 | Default: DefaultMemoryClusterDWordsLimit); |
| 342 | YamlIO.mapOptional(Key: "mode" , Val&: MFI.Mode, Default: SIMode()); |
| 343 | YamlIO.mapOptional(Key: "highBitsOf32BitAddress" , |
| 344 | Val&: MFI.HighBitsOf32BitAddress, Default: 0u); |
| 345 | YamlIO.mapOptional(Key: "occupancy" , Val&: MFI.Occupancy, Default: 0); |
| 346 | YamlIO.mapOptional(Key: "spillPhysVGPRs" , Val&: MFI.SpillPhysVGPRS); |
| 347 | YamlIO.mapOptional(Key: "wwmReservedRegs" , Val&: MFI.WWMReservedRegs); |
| 348 | YamlIO.mapOptional(Key: "scavengeFI" , Val&: MFI.ScavengeFI); |
| 349 | YamlIO.mapOptional(Key: "vgprForAGPRCopy" , Val&: MFI.VGPRForAGPRCopy, |
| 350 | Default: StringValue()); // Don't print out when it's empty. |
| 351 | YamlIO.mapOptional(Key: "sgprForEXECCopy" , Val&: MFI.SGPRForEXECCopy, |
| 352 | Default: StringValue()); // Don't print out when it's empty. |
| 353 | YamlIO.mapOptional(Key: "longBranchReservedReg" , Val&: MFI.LongBranchReservedReg, |
| 354 | Default: StringValue()); |
| 355 | YamlIO.mapOptional(Key: "hasInitWholeWave" , Val&: MFI.HasInitWholeWave, Default: false); |
| 356 | YamlIO.mapOptional(Key: "dynamicVGPRBlockSize" , Val&: MFI.DynamicVGPRBlockSize, Default: false); |
| 357 | YamlIO.mapOptional(Key: "scratchReservedForDynamicVGPRs" , |
| 358 | Val&: MFI.ScratchReservedForDynamicVGPRs, Default: 0); |
| 359 | } |
| 360 | }; |
| 361 | |
| 362 | } // end namespace yaml |
| 363 | |
| 364 | // A CSR SGPR value can be preserved inside a callee using one of the following |
| 365 | // methods. |
| 366 | // 1. Copy to an unused scratch SGPR. |
| 367 | // 2. Spill to a VGPR lane. |
| 368 | // 3. Spill to memory via. a scratch VGPR. |
| 369 | // class PrologEpilogSGPRSaveRestoreInfo represents the save/restore method used |
| 370 | // for an SGPR at function prolog/epilog. |
| 371 | enum class SGPRSaveKind : uint8_t { |
| 372 | COPY_TO_SCRATCH_SGPR, |
| 373 | SPILL_TO_VGPR_LANE, |
| 374 | SPILL_TO_MEM |
| 375 | }; |
| 376 | |
| 377 | class PrologEpilogSGPRSaveRestoreInfo { |
| 378 | SGPRSaveKind Kind; |
| 379 | union { |
| 380 | int Index; |
| 381 | Register Reg; |
| 382 | }; |
| 383 | |
| 384 | public: |
| 385 | PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, int I) : Kind(K), Index(I) {} |
| 386 | PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, Register R) |
| 387 | : Kind(K), Reg(R) {} |
| 388 | Register getReg() const { return Reg; } |
| 389 | int getIndex() const { return Index; } |
| 390 | SGPRSaveKind getKind() const { return Kind; } |
| 391 | }; |
| 392 | |
| 393 | struct VGPRBlock2IndexFunctor { |
| 394 | using argument_type = Register; |
| 395 | unsigned operator()(Register Reg) const { |
| 396 | assert(AMDGPU::VReg_1024RegClass.contains(Reg) && "Expecting a VGPR block" ); |
| 397 | |
| 398 | const MCRegister FirstVGPRBlock = AMDGPU::VReg_1024RegClass.getRegister(i: 0); |
| 399 | return Reg - FirstVGPRBlock; |
| 400 | } |
| 401 | }; |
| 402 | |
| 403 | /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which |
| 404 | /// tells the hardware which interpolation parameters to load. |
| 405 | class SIMachineFunctionInfo final : public AMDGPUMachineFunction, |
| 406 | private MachineRegisterInfo::Delegate { |
| 407 | friend class GCNTargetMachine; |
| 408 | |
| 409 | // State of MODE register, assumed FP mode. |
| 410 | SIModeRegisterDefaults Mode; |
| 411 | |
| 412 | // Registers that may be reserved for spilling purposes. These may be the same |
| 413 | // as the input registers. |
| 414 | Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; |
| 415 | |
| 416 | // This is the unswizzled offset from the current dispatch's scratch wave |
| 417 | // base to the beginning of the current function's frame. |
| 418 | Register FrameOffsetReg = AMDGPU::FP_REG; |
| 419 | |
| 420 | // This is an ABI register used in the non-entry calling convention to |
| 421 | // communicate the unswizzled offset from the current dispatch's scratch wave |
| 422 | // base to the beginning of the new function's frame. |
| 423 | Register StackPtrOffsetReg = AMDGPU::SP_REG; |
| 424 | |
| 425 | // Registers that may be reserved when RA doesn't allocate enough |
| 426 | // registers to plan for the case where an indirect branch ends up |
| 427 | // being needed during branch relaxation. |
| 428 | Register LongBranchReservedReg; |
| 429 | |
| 430 | AMDGPUFunctionArgInfo ArgInfo; |
| 431 | |
| 432 | // Graphics info. |
| 433 | unsigned PSInputAddr = 0; |
| 434 | unsigned PSInputEnable = 0; |
| 435 | |
| 436 | /// Number of bytes of arguments this function has on the stack. If the callee |
| 437 | /// is expected to restore the argument stack this should be a multiple of 16, |
| 438 | /// all usable during a tail call. |
| 439 | /// |
| 440 | /// The alternative would forbid tail call optimisation in some cases: if we |
| 441 | /// want to transfer control from a function with 8-bytes of stack-argument |
| 442 | /// space to a function with 16-bytes then misalignment of this value would |
| 443 | /// make a stack adjustment necessary, which could not be undone by the |
| 444 | /// callee. |
| 445 | unsigned BytesInStackArgArea = 0; |
| 446 | |
| 447 | bool ReturnsVoid = true; |
| 448 | |
| 449 | // A pair of default/requested minimum/maximum flat work group sizes. |
| 450 | // Minimum - first, maximum - second. |
| 451 | std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0}; |
| 452 | |
| 453 | // A pair of default/requested minimum/maximum number of waves per execution |
| 454 | // unit. Minimum - first, maximum - second. |
| 455 | std::pair<unsigned, unsigned> WavesPerEU = {0, 0}; |
| 456 | |
| 457 | const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV; |
| 458 | |
| 459 | // Default/requested number of work groups for the function. |
| 460 | SmallVector<unsigned> MaxNumWorkGroups = {0, 0, 0}; |
| 461 | |
| 462 | private: |
| 463 | unsigned NumUserSGPRs = 0; |
| 464 | unsigned NumSystemSGPRs = 0; |
| 465 | |
| 466 | bool HasSpilledSGPRs = false; |
| 467 | bool HasSpilledVGPRs = false; |
| 468 | bool HasNonSpillStackObjects = false; |
| 469 | bool IsStackRealigned = false; |
| 470 | |
| 471 | unsigned NumSpilledSGPRs = 0; |
| 472 | unsigned NumSpilledVGPRs = 0; |
| 473 | |
| 474 | unsigned DynamicVGPRBlockSize = 0; |
| 475 | |
| 476 | // The size in bytes of the scratch space reserved for the CWSR trap handler |
| 477 | // to spill some of the dynamic VGPRs. |
| 478 | unsigned ScratchReservedForDynamicVGPRs = 0; |
| 479 | |
| 480 | // Tracks information about user SGPRs that will be setup by hardware which |
| 481 | // will apply to all wavefronts of the grid. |
| 482 | GCNUserSGPRUsageInfo UserSGPRInfo; |
| 483 | |
| 484 | // Feature bits required for inputs passed in system SGPRs. |
| 485 | bool WorkGroupIDX : 1; // Always initialized. |
| 486 | bool WorkGroupIDY : 1; |
| 487 | bool WorkGroupIDZ : 1; |
| 488 | bool WorkGroupInfo : 1; |
| 489 | bool LDSKernelId : 1; |
| 490 | bool PrivateSegmentWaveByteOffset : 1; |
| 491 | |
| 492 | bool WorkItemIDX : 1; // Always initialized. |
| 493 | bool WorkItemIDY : 1; |
| 494 | bool WorkItemIDZ : 1; |
| 495 | |
| 496 | // Pointer to where the ABI inserts special kernel arguments separate from the |
| 497 | // user arguments. This is an offset from the KernargSegmentPtr. |
| 498 | bool ImplicitArgPtr : 1; |
| 499 | |
| 500 | bool MayNeedAGPRs : 1; |
| 501 | |
| 502 | // The hard-wired high half of the address of the global information table |
| 503 | // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since |
| 504 | // current hardware only allows a 16 bit value. |
| 505 | unsigned GITPtrHigh; |
| 506 | |
| 507 | unsigned HighBitsOf32BitAddress; |
| 508 | |
| 509 | // Flags associated with the virtual registers. |
| 510 | IndexedMap<uint8_t, VirtReg2IndexFunctor> VRegFlags; |
| 511 | |
| 512 | // Current recorded maximum possible occupancy. |
| 513 | unsigned Occupancy; |
| 514 | |
| 515 | // Maximum number of dwords that can be clusterred during instruction |
| 516 | // scheduler stage. |
| 517 | unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit; |
| 518 | |
| 519 | MCPhysReg getNextUserSGPR() const; |
| 520 | |
| 521 | MCPhysReg getNextSystemSGPR() const; |
| 522 | |
| 523 | // MachineRegisterInfo callback functions to notify events. |
| 524 | void MRI_NoteNewVirtualRegister(Register Reg) override; |
| 525 | void MRI_NoteCloneVirtualRegister(Register NewReg, Register SrcReg) override; |
| 526 | |
| 527 | public: |
| 528 | struct VGPRSpillToAGPR { |
| 529 | SmallVector<MCPhysReg, 32> Lanes; |
| 530 | bool FullyAllocated = false; |
| 531 | bool IsDead = false; |
| 532 | }; |
| 533 | |
| 534 | private: |
| 535 | // To track virtual VGPR + lane index for each subregister of the SGPR spilled |
| 536 | // to frameindex key during SILowerSGPRSpills pass. |
| 537 | DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>> |
| 538 | SGPRSpillsToVirtualVGPRLanes; |
| 539 | // To track physical VGPR + lane index for CSR SGPR spills and special SGPRs |
| 540 | // like Frame Pointer identified during PrologEpilogInserter. |
| 541 | DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>> |
| 542 | SGPRSpillsToPhysicalVGPRLanes; |
| 543 | unsigned NumVirtualVGPRSpillLanes = 0; |
| 544 | unsigned NumPhysicalVGPRSpillLanes = 0; |
| 545 | SmallVector<Register, 2> SpillVGPRs; |
| 546 | SmallVector<Register, 2> SpillPhysVGPRs; |
| 547 | using WWMSpillsMap = MapVector<Register, int>; |
| 548 | // To track the registers used in instructions that can potentially modify the |
| 549 | // inactive lanes. The WWM instructions and the writelane instructions for |
| 550 | // spilling SGPRs to VGPRs fall under such category of operations. The VGPRs |
| 551 | // modified by them should be spilled/restored at function prolog/epilog to |
| 552 | // avoid any undesired outcome. Each entry in this map holds a pair of values, |
| 553 | // the VGPR and its stack slot index. |
| 554 | WWMSpillsMap WWMSpills; |
| 555 | |
| 556 | // Before allocation, the VGPR registers are partitioned into two distinct |
| 557 | // sets, the first one for WWM-values and the second set for non-WWM values. |
| 558 | // The latter set should be reserved during WWM-regalloc. |
| 559 | BitVector NonWWMRegMask; |
| 560 | |
| 561 | using ReservedRegSet = SmallSetVector<Register, 8>; |
| 562 | // To track the VGPRs reserved for WWM instructions. They get stack slots |
| 563 | // later during PrologEpilogInserter and get added into the superset WWMSpills |
| 564 | // for actual spilling. A separate set makes the register reserved part and |
| 565 | // the serialization easier. |
| 566 | ReservedRegSet WWMReservedRegs; |
| 567 | |
| 568 | using PrologEpilogSGPRSpill = |
| 569 | std::pair<Register, PrologEpilogSGPRSaveRestoreInfo>; |
| 570 | // To track the SGPR spill method used for a CSR SGPR register during |
| 571 | // frame lowering. Even though the SGPR spills are handled during |
| 572 | // SILowerSGPRSpills pass, some special handling needed later during the |
| 573 | // PrologEpilogInserter. |
| 574 | SmallVector<PrologEpilogSGPRSpill, 3> PrologEpilogSGPRSpills; |
| 575 | |
| 576 | // To save/restore EXEC MASK around WWM spills and copies. |
| 577 | Register SGPRForEXECCopy; |
| 578 | |
| 579 | DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills; |
| 580 | |
| 581 | // AGPRs used for VGPR spills. |
| 582 | SmallVector<MCPhysReg, 32> SpillAGPR; |
| 583 | |
| 584 | // VGPRs used for AGPR spills. |
| 585 | SmallVector<MCPhysReg, 32> SpillVGPR; |
| 586 | |
| 587 | // Emergency stack slot. Sometimes, we create this before finalizing the stack |
| 588 | // frame, so save it here and add it to the RegScavenger later. |
| 589 | std::optional<int> ScavengeFI; |
| 590 | |
| 591 | // Map each VGPR CSR to the mask needed to save and restore it using block |
| 592 | // load/store instructions. Only used if the subtarget feature for VGPR block |
| 593 | // load/store is enabled. |
| 594 | IndexedMap<uint32_t, VGPRBlock2IndexFunctor> MaskForVGPRBlockOps; |
| 595 | |
| 596 | private: |
| 597 | Register VGPRForAGPRCopy; |
| 598 | |
| 599 | bool allocateVirtualVGPRForSGPRSpills(MachineFunction &MF, int FI, |
| 600 | unsigned LaneIndex); |
| 601 | bool allocatePhysicalVGPRForSGPRSpills(MachineFunction &MF, int FI, |
| 602 | unsigned LaneIndex, |
| 603 | bool IsPrologEpilog); |
| 604 | |
| 605 | public: |
| 606 | Register getVGPRForAGPRCopy() const { |
| 607 | return VGPRForAGPRCopy; |
| 608 | } |
| 609 | |
| 610 | void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy) { |
| 611 | VGPRForAGPRCopy = NewVGPRForAGPRCopy; |
| 612 | } |
| 613 | |
| 614 | bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const; |
| 615 | |
| 616 | void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask) { |
| 617 | MaskForVGPRBlockOps.grow(n: RegisterBlock); |
| 618 | MaskForVGPRBlockOps[RegisterBlock] = Mask; |
| 619 | } |
| 620 | |
| 621 | uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const { |
| 622 | return MaskForVGPRBlockOps[RegisterBlock]; |
| 623 | } |
| 624 | |
| 625 | bool hasMaskForVGPRBlockOps(Register RegisterBlock) const { |
| 626 | return MaskForVGPRBlockOps.inBounds(n: RegisterBlock); |
| 627 | } |
| 628 | |
| 629 | public: |
| 630 | SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI) = default; |
| 631 | SIMachineFunctionInfo(const Function &F, const GCNSubtarget *STI); |
| 632 | |
| 633 | MachineFunctionInfo * |
| 634 | clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, |
| 635 | const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB) |
| 636 | const override; |
| 637 | |
| 638 | bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, |
| 639 | const MachineFunction &MF, |
| 640 | PerFunctionMIParsingState &PFS, |
| 641 | SMDiagnostic &Error, SMRange &SourceRange); |
| 642 | |
| 643 | void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(X: Reg); } |
| 644 | bool isWWMReg(Register Reg) const { |
| 645 | return Reg.isVirtual() ? checkFlag(Reg, Flag: AMDGPU::VirtRegFlag::WWM_REG) |
| 646 | : WWMReservedRegs.contains(key: Reg); |
| 647 | } |
| 648 | |
| 649 | void updateNonWWMRegMask(BitVector &RegMask) { NonWWMRegMask = RegMask; } |
| 650 | BitVector getNonWWMRegMask() const { return NonWWMRegMask; } |
| 651 | void clearNonWWMRegAllocMask() { NonWWMRegMask.clear(); } |
| 652 | |
| 653 | SIModeRegisterDefaults getMode() const { return Mode; } |
| 654 | |
| 655 | ArrayRef<SIRegisterInfo::SpilledReg> |
| 656 | getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const { |
| 657 | auto I = SGPRSpillsToVirtualVGPRLanes.find(Val: FrameIndex); |
| 658 | return (I == SGPRSpillsToVirtualVGPRLanes.end()) |
| 659 | ? ArrayRef<SIRegisterInfo::SpilledReg>() |
| 660 | : ArrayRef(I->second); |
| 661 | } |
| 662 | |
| 663 | ArrayRef<Register> getSGPRSpillVGPRs() const { return SpillVGPRs; } |
| 664 | ArrayRef<Register> getSGPRSpillPhysVGPRs() const { return SpillPhysVGPRs; } |
| 665 | |
| 666 | const WWMSpillsMap &getWWMSpills() const { return WWMSpills; } |
| 667 | const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; } |
| 668 | |
| 669 | bool isWWMReservedRegister(Register Reg) const { |
| 670 | return WWMReservedRegs.contains(key: Reg); |
| 671 | } |
| 672 | |
| 673 | ArrayRef<PrologEpilogSGPRSpill> getPrologEpilogSGPRSpills() const { |
| 674 | assert(is_sorted(PrologEpilogSGPRSpills, llvm::less_first())); |
| 675 | return PrologEpilogSGPRSpills; |
| 676 | } |
| 677 | |
| 678 | GCNUserSGPRUsageInfo &getUserSGPRInfo() { return UserSGPRInfo; } |
| 679 | |
| 680 | const GCNUserSGPRUsageInfo &getUserSGPRInfo() const { return UserSGPRInfo; } |
| 681 | |
| 682 | void addToPrologEpilogSGPRSpills(Register Reg, |
| 683 | PrologEpilogSGPRSaveRestoreInfo SI) { |
| 684 | assert(!hasPrologEpilogSGPRSpillEntry(Reg)); |
| 685 | |
| 686 | // Insert a new entry in the right place to keep the vector in sorted order. |
| 687 | // This should be cheap since the vector is expected to be very short. |
| 688 | PrologEpilogSGPRSpills.insert( |
| 689 | I: upper_bound( |
| 690 | Range&: PrologEpilogSGPRSpills, Value&: Reg, |
| 691 | C: [](const auto &LHS, const auto &RHS) { return LHS < RHS.first; }), |
| 692 | Elt: std::make_pair(x&: Reg, y&: SI)); |
| 693 | } |
| 694 | |
| 695 | // Check if an entry created for \p Reg in PrologEpilogSGPRSpills. Return true |
| 696 | // on success and false otherwise. |
| 697 | bool hasPrologEpilogSGPRSpillEntry(Register Reg) const { |
| 698 | const auto *I = find_if(Range: PrologEpilogSGPRSpills, P: [&Reg](const auto &Spill) { |
| 699 | return Spill.first == Reg; |
| 700 | }); |
| 701 | return I != PrologEpilogSGPRSpills.end(); |
| 702 | } |
| 703 | |
| 704 | // Get the scratch SGPR if allocated to save/restore \p Reg. |
| 705 | Register getScratchSGPRCopyDstReg(Register Reg) const { |
| 706 | const auto *I = find_if(Range: PrologEpilogSGPRSpills, P: [&Reg](const auto &Spill) { |
| 707 | return Spill.first == Reg; |
| 708 | }); |
| 709 | if (I != PrologEpilogSGPRSpills.end() && |
| 710 | I->second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR) |
| 711 | return I->second.getReg(); |
| 712 | |
| 713 | return AMDGPU::NoRegister; |
| 714 | } |
| 715 | |
| 716 | // Get all scratch SGPRs allocated to copy/restore the SGPR spills. |
| 717 | void getAllScratchSGPRCopyDstRegs(SmallVectorImpl<Register> &Regs) const { |
| 718 | for (const auto &SI : PrologEpilogSGPRSpills) { |
| 719 | if (SI.second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR) |
| 720 | Regs.push_back(Elt: SI.second.getReg()); |
| 721 | } |
| 722 | } |
| 723 | |
| 724 | // Check if \p FI is allocated for any SGPR spill to a VGPR lane during PEI. |
| 725 | bool checkIndexInPrologEpilogSGPRSpills(int FI) const { |
| 726 | return find_if(Range: PrologEpilogSGPRSpills, |
| 727 | P: [FI](const std::pair<Register, |
| 728 | PrologEpilogSGPRSaveRestoreInfo> &SI) { |
| 729 | return SI.second.getKind() == |
| 730 | SGPRSaveKind::SPILL_TO_VGPR_LANE && |
| 731 | SI.second.getIndex() == FI; |
| 732 | }) != PrologEpilogSGPRSpills.end(); |
| 733 | } |
| 734 | |
| 735 | const PrologEpilogSGPRSaveRestoreInfo & |
| 736 | getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const { |
| 737 | const auto *I = find_if(Range: PrologEpilogSGPRSpills, P: [&Reg](const auto &Spill) { |
| 738 | return Spill.first == Reg; |
| 739 | }); |
| 740 | assert(I != PrologEpilogSGPRSpills.end()); |
| 741 | |
| 742 | return I->second; |
| 743 | } |
| 744 | |
| 745 | ArrayRef<SIRegisterInfo::SpilledReg> |
| 746 | getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const { |
| 747 | auto I = SGPRSpillsToPhysicalVGPRLanes.find(Val: FrameIndex); |
| 748 | return (I == SGPRSpillsToPhysicalVGPRLanes.end()) |
| 749 | ? ArrayRef<SIRegisterInfo::SpilledReg>() |
| 750 | : ArrayRef(I->second); |
| 751 | } |
| 752 | |
| 753 | void setFlag(Register Reg, uint8_t Flag) { |
| 754 | assert(Reg.isVirtual()); |
| 755 | if (VRegFlags.inBounds(n: Reg)) |
| 756 | VRegFlags[Reg] |= Flag; |
| 757 | } |
| 758 | |
| 759 | bool checkFlag(Register Reg, uint8_t Flag) const { |
| 760 | if (Reg.isPhysical()) |
| 761 | return false; |
| 762 | |
| 763 | return VRegFlags.inBounds(n: Reg) && VRegFlags[Reg] & Flag; |
| 764 | } |
| 765 | |
| 766 | bool hasVRegFlags() { return VRegFlags.size(); } |
| 767 | |
| 768 | void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size = 4, |
| 769 | Align Alignment = Align(4)); |
| 770 | |
| 771 | void splitWWMSpillRegisters( |
| 772 | MachineFunction &MF, |
| 773 | SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs, |
| 774 | SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const; |
| 775 | |
| 776 | ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const { |
| 777 | return SpillAGPR; |
| 778 | } |
| 779 | |
| 780 | Register getSGPRForEXECCopy() const { return SGPRForEXECCopy; } |
| 781 | |
| 782 | void setSGPRForEXECCopy(Register Reg) { SGPRForEXECCopy = Reg; } |
| 783 | |
| 784 | ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const { |
| 785 | return SpillVGPR; |
| 786 | } |
| 787 | |
| 788 | MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const { |
| 789 | auto I = VGPRToAGPRSpills.find(Val: FrameIndex); |
| 790 | return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister |
| 791 | : I->second.Lanes[Lane]; |
| 792 | } |
| 793 | |
| 794 | void setVGPRToAGPRSpillDead(int FrameIndex) { |
| 795 | auto I = VGPRToAGPRSpills.find(Val: FrameIndex); |
| 796 | if (I != VGPRToAGPRSpills.end()) |
| 797 | I->second.IsDead = true; |
| 798 | } |
| 799 | |
| 800 | // To bring the allocated WWM registers in \p WWMVGPRs to the lowest available |
| 801 | // range. |
| 802 | void shiftWwmVGPRsToLowestRange(MachineFunction &MF, |
| 803 | SmallVectorImpl<Register> &WWMVGPRs, |
| 804 | BitVector &SavedVGPRs); |
| 805 | |
| 806 | bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, |
| 807 | bool SpillToPhysVGPRLane = false, |
| 808 | bool IsPrologEpilog = false); |
| 809 | bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR); |
| 810 | |
| 811 | /// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill |
| 812 | /// to the default stack. |
| 813 | bool removeDeadFrameIndices(MachineFrameInfo &MFI, |
| 814 | bool ResetSGPRSpillStackIDs); |
| 815 | |
| 816 | int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI); |
| 817 | std::optional<int> getOptionalScavengeFI() const { return ScavengeFI; } |
| 818 | |
| 819 | unsigned getBytesInStackArgArea() const { |
| 820 | return BytesInStackArgArea; |
| 821 | } |
| 822 | |
| 823 | void setBytesInStackArgArea(unsigned Bytes) { |
| 824 | BytesInStackArgArea = Bytes; |
| 825 | } |
| 826 | |
| 827 | bool isDynamicVGPREnabled() const { return DynamicVGPRBlockSize != 0; } |
| 828 | unsigned getDynamicVGPRBlockSize() const { return DynamicVGPRBlockSize; } |
| 829 | |
| 830 | // This is only used if we need to save any dynamic VGPRs in scratch. |
| 831 | unsigned getScratchReservedForDynamicVGPRs() const { |
| 832 | return ScratchReservedForDynamicVGPRs; |
| 833 | } |
| 834 | |
| 835 | void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes) { |
| 836 | ScratchReservedForDynamicVGPRs = SizeInBytes; |
| 837 | } |
| 838 | |
| 839 | // Add user SGPRs. |
| 840 | Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI); |
| 841 | Register addDispatchPtr(const SIRegisterInfo &TRI); |
| 842 | Register addQueuePtr(const SIRegisterInfo &TRI); |
| 843 | Register addKernargSegmentPtr(const SIRegisterInfo &TRI); |
| 844 | Register addDispatchID(const SIRegisterInfo &TRI); |
| 845 | Register addFlatScratchInit(const SIRegisterInfo &TRI); |
| 846 | Register addPrivateSegmentSize(const SIRegisterInfo &TRI); |
| 847 | Register addImplicitBufferPtr(const SIRegisterInfo &TRI); |
| 848 | Register addLDSKernelId(); |
| 849 | SmallVectorImpl<MCRegister> * |
| 850 | addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC, |
| 851 | unsigned AllocSizeDWord, int KernArgIdx, |
| 852 | int PaddingSGPRs); |
| 853 | |
| 854 | /// Increment user SGPRs used for padding the argument list only. |
| 855 | Register addReservedUserSGPR() { |
| 856 | Register Next = getNextUserSGPR(); |
| 857 | ++NumUserSGPRs; |
| 858 | return Next; |
| 859 | } |
| 860 | |
| 861 | // Add system SGPRs. |
| 862 | Register addWorkGroupIDX() { |
| 863 | ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(Reg: getNextSystemSGPR()); |
| 864 | NumSystemSGPRs += 1; |
| 865 | return ArgInfo.WorkGroupIDX.getRegister(); |
| 866 | } |
| 867 | |
| 868 | Register addWorkGroupIDY() { |
| 869 | ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(Reg: getNextSystemSGPR()); |
| 870 | NumSystemSGPRs += 1; |
| 871 | return ArgInfo.WorkGroupIDY.getRegister(); |
| 872 | } |
| 873 | |
| 874 | Register addWorkGroupIDZ() { |
| 875 | ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(Reg: getNextSystemSGPR()); |
| 876 | NumSystemSGPRs += 1; |
| 877 | return ArgInfo.WorkGroupIDZ.getRegister(); |
| 878 | } |
| 879 | |
| 880 | Register addWorkGroupInfo() { |
| 881 | ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(Reg: getNextSystemSGPR()); |
| 882 | NumSystemSGPRs += 1; |
| 883 | return ArgInfo.WorkGroupInfo.getRegister(); |
| 884 | } |
| 885 | |
| 886 | bool hasLDSKernelId() const { return LDSKernelId; } |
| 887 | |
| 888 | // Add special VGPR inputs |
| 889 | void setWorkItemIDX(ArgDescriptor Arg) { |
| 890 | ArgInfo.WorkItemIDX = Arg; |
| 891 | } |
| 892 | |
| 893 | void setWorkItemIDY(ArgDescriptor Arg) { |
| 894 | ArgInfo.WorkItemIDY = Arg; |
| 895 | } |
| 896 | |
| 897 | void setWorkItemIDZ(ArgDescriptor Arg) { |
| 898 | ArgInfo.WorkItemIDZ = Arg; |
| 899 | } |
| 900 | |
| 901 | Register addPrivateSegmentWaveByteOffset() { |
| 902 | ArgInfo.PrivateSegmentWaveByteOffset |
| 903 | = ArgDescriptor::createRegister(Reg: getNextSystemSGPR()); |
| 904 | NumSystemSGPRs += 1; |
| 905 | return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); |
| 906 | } |
| 907 | |
| 908 | void setPrivateSegmentWaveByteOffset(Register Reg) { |
| 909 | ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg); |
| 910 | } |
| 911 | |
| 912 | bool hasWorkGroupIDX() const { |
| 913 | return WorkGroupIDX; |
| 914 | } |
| 915 | |
| 916 | bool hasWorkGroupIDY() const { |
| 917 | return WorkGroupIDY; |
| 918 | } |
| 919 | |
| 920 | bool hasWorkGroupIDZ() const { |
| 921 | return WorkGroupIDZ; |
| 922 | } |
| 923 | |
| 924 | bool hasWorkGroupInfo() const { |
| 925 | return WorkGroupInfo; |
| 926 | } |
| 927 | |
| 928 | bool hasPrivateSegmentWaveByteOffset() const { |
| 929 | return PrivateSegmentWaveByteOffset; |
| 930 | } |
| 931 | |
| 932 | bool hasWorkItemIDX() const { |
| 933 | return WorkItemIDX; |
| 934 | } |
| 935 | |
| 936 | bool hasWorkItemIDY() const { |
| 937 | return WorkItemIDY; |
| 938 | } |
| 939 | |
| 940 | bool hasWorkItemIDZ() const { |
| 941 | return WorkItemIDZ; |
| 942 | } |
| 943 | |
| 944 | bool hasImplicitArgPtr() const { |
| 945 | return ImplicitArgPtr; |
| 946 | } |
| 947 | |
| 948 | AMDGPUFunctionArgInfo &getArgInfo() { |
| 949 | return ArgInfo; |
| 950 | } |
| 951 | |
| 952 | const AMDGPUFunctionArgInfo &getArgInfo() const { |
| 953 | return ArgInfo; |
| 954 | } |
| 955 | |
| 956 | std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT> |
| 957 | getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const { |
| 958 | return ArgInfo.getPreloadedValue(Value); |
| 959 | } |
| 960 | |
| 961 | MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { |
| 962 | const auto *Arg = std::get<0>(t: ArgInfo.getPreloadedValue(Value)); |
| 963 | return Arg ? Arg->getRegister() : MCRegister(); |
| 964 | } |
| 965 | |
| 966 | unsigned getGITPtrHigh() const { |
| 967 | return GITPtrHigh; |
| 968 | } |
| 969 | |
| 970 | Register getGITPtrLoReg(const MachineFunction &MF) const; |
| 971 | |
| 972 | uint32_t get32BitAddressHighBits() const { |
| 973 | return HighBitsOf32BitAddress; |
| 974 | } |
| 975 | |
| 976 | unsigned getNumUserSGPRs() const { |
| 977 | return NumUserSGPRs; |
| 978 | } |
| 979 | |
| 980 | unsigned getNumPreloadedSGPRs() const { |
| 981 | return NumUserSGPRs + NumSystemSGPRs; |
| 982 | } |
| 983 | |
| 984 | unsigned getNumKernargPreloadedSGPRs() const { |
| 985 | return UserSGPRInfo.getNumKernargPreloadSGPRs(); |
| 986 | } |
| 987 | |
| 988 | Register getPrivateSegmentWaveByteOffsetSystemSGPR() const { |
| 989 | return ArgInfo.PrivateSegmentWaveByteOffset.getRegister(); |
| 990 | } |
| 991 | |
| 992 | /// Returns the physical register reserved for use as the resource |
| 993 | /// descriptor for scratch accesses. |
| 994 | Register getScratchRSrcReg() const { |
| 995 | return ScratchRSrcReg; |
| 996 | } |
| 997 | |
| 998 | void setScratchRSrcReg(Register Reg) { |
| 999 | assert(Reg != 0 && "Should never be unset" ); |
| 1000 | ScratchRSrcReg = Reg; |
| 1001 | } |
| 1002 | |
| 1003 | Register getFrameOffsetReg() const { |
| 1004 | return FrameOffsetReg; |
| 1005 | } |
| 1006 | |
| 1007 | void setFrameOffsetReg(Register Reg) { |
| 1008 | assert(Reg != 0 && "Should never be unset" ); |
| 1009 | FrameOffsetReg = Reg; |
| 1010 | } |
| 1011 | |
| 1012 | void setStackPtrOffsetReg(Register Reg) { |
| 1013 | assert(Reg != 0 && "Should never be unset" ); |
| 1014 | StackPtrOffsetReg = Reg; |
| 1015 | } |
| 1016 | |
| 1017 | void setLongBranchReservedReg(Register Reg) { LongBranchReservedReg = Reg; } |
| 1018 | |
| 1019 | // Note the unset value for this is AMDGPU::SP_REG rather than |
| 1020 | // NoRegister. This is mostly a workaround for MIR tests where state that |
| 1021 | // can't be directly computed from the function is not preserved in serialized |
| 1022 | // MIR. |
| 1023 | Register getStackPtrOffsetReg() const { |
| 1024 | return StackPtrOffsetReg; |
| 1025 | } |
| 1026 | |
| 1027 | Register getLongBranchReservedReg() const { return LongBranchReservedReg; } |
| 1028 | |
| 1029 | Register getQueuePtrUserSGPR() const { |
| 1030 | return ArgInfo.QueuePtr.getRegister(); |
| 1031 | } |
| 1032 | |
| 1033 | Register getImplicitBufferPtrUserSGPR() const { |
| 1034 | return ArgInfo.ImplicitBufferPtr.getRegister(); |
| 1035 | } |
| 1036 | |
| 1037 | bool hasSpilledSGPRs() const { |
| 1038 | return HasSpilledSGPRs; |
| 1039 | } |
| 1040 | |
| 1041 | void setHasSpilledSGPRs(bool Spill = true) { |
| 1042 | HasSpilledSGPRs = Spill; |
| 1043 | } |
| 1044 | |
| 1045 | bool hasSpilledVGPRs() const { |
| 1046 | return HasSpilledVGPRs; |
| 1047 | } |
| 1048 | |
| 1049 | void setHasSpilledVGPRs(bool Spill = true) { |
| 1050 | HasSpilledVGPRs = Spill; |
| 1051 | } |
| 1052 | |
| 1053 | bool hasNonSpillStackObjects() const { |
| 1054 | return HasNonSpillStackObjects; |
| 1055 | } |
| 1056 | |
| 1057 | void setHasNonSpillStackObjects(bool StackObject = true) { |
| 1058 | HasNonSpillStackObjects = StackObject; |
| 1059 | } |
| 1060 | |
| 1061 | bool isStackRealigned() const { |
| 1062 | return IsStackRealigned; |
| 1063 | } |
| 1064 | |
| 1065 | void setIsStackRealigned(bool Realigned = true) { |
| 1066 | IsStackRealigned = Realigned; |
| 1067 | } |
| 1068 | |
| 1069 | unsigned getNumSpilledSGPRs() const { |
| 1070 | return NumSpilledSGPRs; |
| 1071 | } |
| 1072 | |
| 1073 | unsigned getNumSpilledVGPRs() const { |
| 1074 | return NumSpilledVGPRs; |
| 1075 | } |
| 1076 | |
| 1077 | void addToSpilledSGPRs(unsigned num) { |
| 1078 | NumSpilledSGPRs += num; |
| 1079 | } |
| 1080 | |
| 1081 | void addToSpilledVGPRs(unsigned num) { |
| 1082 | NumSpilledVGPRs += num; |
| 1083 | } |
| 1084 | |
| 1085 | unsigned getPSInputAddr() const { |
| 1086 | return PSInputAddr; |
| 1087 | } |
| 1088 | |
| 1089 | unsigned getPSInputEnable() const { |
| 1090 | return PSInputEnable; |
| 1091 | } |
| 1092 | |
| 1093 | bool isPSInputAllocated(unsigned Index) const { |
| 1094 | return PSInputAddr & (1 << Index); |
| 1095 | } |
| 1096 | |
| 1097 | void markPSInputAllocated(unsigned Index) { |
| 1098 | PSInputAddr |= 1 << Index; |
| 1099 | } |
| 1100 | |
| 1101 | void markPSInputEnabled(unsigned Index) { |
| 1102 | PSInputEnable |= 1 << Index; |
| 1103 | } |
| 1104 | |
| 1105 | bool returnsVoid() const { |
| 1106 | return ReturnsVoid; |
| 1107 | } |
| 1108 | |
| 1109 | void setIfReturnsVoid(bool Value) { |
| 1110 | ReturnsVoid = Value; |
| 1111 | } |
| 1112 | |
| 1113 | /// \returns A pair of default/requested minimum/maximum flat work group sizes |
| 1114 | /// for this function. |
| 1115 | std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { |
| 1116 | return FlatWorkGroupSizes; |
| 1117 | } |
| 1118 | |
| 1119 | /// \returns Default/requested minimum flat work group size for this function. |
| 1120 | unsigned getMinFlatWorkGroupSize() const { |
| 1121 | return FlatWorkGroupSizes.first; |
| 1122 | } |
| 1123 | |
| 1124 | /// \returns Default/requested maximum flat work group size for this function. |
| 1125 | unsigned getMaxFlatWorkGroupSize() const { |
| 1126 | return FlatWorkGroupSizes.second; |
| 1127 | } |
| 1128 | |
| 1129 | /// \returns A pair of default/requested minimum/maximum number of waves per |
| 1130 | /// execution unit. |
| 1131 | std::pair<unsigned, unsigned> getWavesPerEU() const { |
| 1132 | return WavesPerEU; |
| 1133 | } |
| 1134 | |
| 1135 | /// \returns Default/requested minimum number of waves per execution unit. |
| 1136 | unsigned getMinWavesPerEU() const { |
| 1137 | return WavesPerEU.first; |
| 1138 | } |
| 1139 | |
| 1140 | /// \returns Default/requested maximum number of waves per execution unit. |
| 1141 | unsigned getMaxWavesPerEU() const { |
| 1142 | return WavesPerEU.second; |
| 1143 | } |
| 1144 | |
| 1145 | const AMDGPUGWSResourcePseudoSourceValue * |
| 1146 | getGWSPSV(const AMDGPUTargetMachine &TM) { |
| 1147 | return &GWSResourcePSV; |
| 1148 | } |
| 1149 | |
| 1150 | unsigned getOccupancy() const { |
| 1151 | return Occupancy; |
| 1152 | } |
| 1153 | |
| 1154 | unsigned getMinAllowedOccupancy() const { |
| 1155 | if (!isMemoryBound() && !needsWaveLimiter()) |
| 1156 | return Occupancy; |
| 1157 | return (Occupancy < 4) ? Occupancy : 4; |
| 1158 | } |
| 1159 | |
| 1160 | void limitOccupancy(const MachineFunction &MF); |
| 1161 | |
| 1162 | void limitOccupancy(unsigned Limit) { |
| 1163 | if (Occupancy > Limit) |
| 1164 | Occupancy = Limit; |
| 1165 | } |
| 1166 | |
| 1167 | void increaseOccupancy(const MachineFunction &MF, unsigned Limit) { |
| 1168 | if (Occupancy < Limit) |
| 1169 | Occupancy = Limit; |
| 1170 | limitOccupancy(MF); |
| 1171 | } |
| 1172 | |
| 1173 | unsigned getMaxMemoryClusterDWords() const { return MaxMemoryClusterDWords; } |
| 1174 | |
| 1175 | bool mayNeedAGPRs() const { |
| 1176 | return MayNeedAGPRs; |
| 1177 | } |
| 1178 | |
| 1179 | // \returns true if a function has a use of AGPRs via inline asm or |
| 1180 | // has a call which may use it. |
| 1181 | bool mayUseAGPRs(const Function &F) const; |
| 1182 | |
| 1183 | /// \returns Default/requested number of work groups for this function. |
| 1184 | SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; } |
| 1185 | |
| 1186 | unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups[0]; } |
| 1187 | unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups[1]; } |
| 1188 | unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups[2]; } |
| 1189 | }; |
| 1190 | |
| 1191 | } // end namespace llvm |
| 1192 | |
| 1193 | #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H |
| 1194 | |