1 | //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AMDGPUBaseInfo.h" |
10 | #include "AMDGPU.h" |
11 | #include "AMDGPUAsmUtils.h" |
12 | #include "AMDKernelCodeT.h" |
13 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
14 | #include "Utils/AMDKernelCodeTUtils.h" |
15 | #include "llvm/ADT/StringExtras.h" |
16 | #include "llvm/BinaryFormat/ELF.h" |
17 | #include "llvm/IR/Attributes.h" |
18 | #include "llvm/IR/Constants.h" |
19 | #include "llvm/IR/Function.h" |
20 | #include "llvm/IR/GlobalValue.h" |
21 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
22 | #include "llvm/IR/IntrinsicsR600.h" |
23 | #include "llvm/IR/LLVMContext.h" |
24 | #include "llvm/MC/MCInstrInfo.h" |
25 | #include "llvm/MC/MCRegisterInfo.h" |
26 | #include "llvm/MC/MCSubtargetInfo.h" |
27 | #include "llvm/Support/AMDHSAKernelDescriptor.h" |
28 | #include "llvm/Support/CommandLine.h" |
29 | #include "llvm/TargetParser/TargetParser.h" |
30 | #include <optional> |
31 | |
32 | #define GET_INSTRINFO_NAMED_OPS |
33 | #define GET_INSTRMAP_INFO |
34 | #include "AMDGPUGenInstrInfo.inc" |
35 | |
36 | static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion( |
37 | "amdhsa-code-object-version" , llvm::cl::Hidden, |
38 | llvm::cl::init(Val: llvm::AMDGPU::AMDHSA_COV5), |
39 | llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " |
40 | "or asm directive still take priority if present)" )); |
41 | |
42 | namespace { |
43 | |
44 | /// \returns Bit mask for given bit \p Shift and bit \p Width. |
45 | unsigned getBitMask(unsigned Shift, unsigned Width) { |
46 | return ((1 << Width) - 1) << Shift; |
47 | } |
48 | |
49 | /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. |
50 | /// |
51 | /// \returns Packed \p Dst. |
52 | unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { |
53 | unsigned Mask = getBitMask(Shift, Width); |
54 | return ((Src << Shift) & Mask) | (Dst & ~Mask); |
55 | } |
56 | |
57 | /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. |
58 | /// |
59 | /// \returns Unpacked bits. |
60 | unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { |
61 | return (Src & getBitMask(Shift, Width)) >> Shift; |
62 | } |
63 | |
64 | /// \returns Vmcnt bit shift (lower bits). |
65 | unsigned getVmcntBitShiftLo(unsigned VersionMajor) { |
66 | return VersionMajor >= 11 ? 10 : 0; |
67 | } |
68 | |
69 | /// \returns Vmcnt bit width (lower bits). |
70 | unsigned getVmcntBitWidthLo(unsigned VersionMajor) { |
71 | return VersionMajor >= 11 ? 6 : 4; |
72 | } |
73 | |
74 | /// \returns Expcnt bit shift. |
75 | unsigned getExpcntBitShift(unsigned VersionMajor) { |
76 | return VersionMajor >= 11 ? 0 : 4; |
77 | } |
78 | |
79 | /// \returns Expcnt bit width. |
80 | unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } |
81 | |
82 | /// \returns Lgkmcnt bit shift. |
83 | unsigned getLgkmcntBitShift(unsigned VersionMajor) { |
84 | return VersionMajor >= 11 ? 4 : 8; |
85 | } |
86 | |
87 | /// \returns Lgkmcnt bit width. |
88 | unsigned getLgkmcntBitWidth(unsigned VersionMajor) { |
89 | return VersionMajor >= 10 ? 6 : 4; |
90 | } |
91 | |
92 | /// \returns Vmcnt bit shift (higher bits). |
93 | unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } |
94 | |
95 | /// \returns Vmcnt bit width (higher bits). |
96 | unsigned getVmcntBitWidthHi(unsigned VersionMajor) { |
97 | return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; |
98 | } |
99 | |
100 | /// \returns Loadcnt bit width |
101 | unsigned getLoadcntBitWidth(unsigned VersionMajor) { |
102 | return VersionMajor >= 12 ? 6 : 0; |
103 | } |
104 | |
105 | /// \returns Samplecnt bit width. |
106 | unsigned getSamplecntBitWidth(unsigned VersionMajor) { |
107 | return VersionMajor >= 12 ? 6 : 0; |
108 | } |
109 | |
110 | /// \returns Bvhcnt bit width. |
111 | unsigned getBvhcntBitWidth(unsigned VersionMajor) { |
112 | return VersionMajor >= 12 ? 3 : 0; |
113 | } |
114 | |
115 | /// \returns Dscnt bit width. |
116 | unsigned getDscntBitWidth(unsigned VersionMajor) { |
117 | return VersionMajor >= 12 ? 6 : 0; |
118 | } |
119 | |
120 | /// \returns Dscnt bit shift in combined S_WAIT instructions. |
121 | unsigned getDscntBitShift(unsigned VersionMajor) { return 0; } |
122 | |
123 | /// \returns Storecnt or Vscnt bit width, depending on VersionMajor. |
124 | unsigned getStorecntBitWidth(unsigned VersionMajor) { |
125 | return VersionMajor >= 10 ? 6 : 0; |
126 | } |
127 | |
128 | /// \returns Kmcnt bit width. |
129 | unsigned getKmcntBitWidth(unsigned VersionMajor) { |
130 | return VersionMajor >= 12 ? 5 : 0; |
131 | } |
132 | |
133 | /// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions. |
134 | unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) { |
135 | return VersionMajor >= 12 ? 8 : 0; |
136 | } |
137 | |
138 | /// \returns VmVsrc bit width |
139 | inline unsigned getVmVsrcBitWidth() { return 3; } |
140 | |
141 | /// \returns VmVsrc bit shift |
142 | inline unsigned getVmVsrcBitShift() { return 2; } |
143 | |
144 | /// \returns VaVdst bit width |
145 | inline unsigned getVaVdstBitWidth() { return 4; } |
146 | |
147 | /// \returns VaVdst bit shift |
148 | inline unsigned getVaVdstBitShift() { return 12; } |
149 | |
150 | /// \returns SaSdst bit width |
151 | inline unsigned getSaSdstBitWidth() { return 1; } |
152 | |
153 | /// \returns SaSdst bit shift |
154 | inline unsigned getSaSdstBitShift() { return 0; } |
155 | |
156 | } // end anonymous namespace |
157 | |
158 | namespace llvm { |
159 | |
160 | namespace AMDGPU { |
161 | |
162 | /// \returns true if the target supports signed immediate offset for SMRD |
163 | /// instructions. |
164 | bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { |
165 | return isGFX9Plus(STI: ST); |
166 | } |
167 | |
168 | /// \returns True if \p STI is AMDHSA. |
169 | bool isHsaAbi(const MCSubtargetInfo &STI) { |
170 | return STI.getTargetTriple().getOS() == Triple::AMDHSA; |
171 | } |
172 | |
173 | unsigned getAMDHSACodeObjectVersion(const Module &M) { |
174 | if (auto Ver = mdconst::extract_or_null<ConstantInt>( |
175 | MD: M.getModuleFlag(Key: "amdhsa_code_object_version" ))) { |
176 | return (unsigned)Ver->getZExtValue() / 100; |
177 | } |
178 | |
179 | return getDefaultAMDHSACodeObjectVersion(); |
180 | } |
181 | |
182 | unsigned getDefaultAMDHSACodeObjectVersion() { |
183 | return DefaultAMDHSACodeObjectVersion; |
184 | } |
185 | |
186 | unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) { |
187 | switch (ABIVersion) { |
188 | case ELF::ELFABIVERSION_AMDGPU_HSA_V4: |
189 | return 4; |
190 | case ELF::ELFABIVERSION_AMDGPU_HSA_V5: |
191 | return 5; |
192 | case ELF::ELFABIVERSION_AMDGPU_HSA_V6: |
193 | return 6; |
194 | default: |
195 | return getDefaultAMDHSACodeObjectVersion(); |
196 | } |
197 | } |
198 | |
199 | uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) { |
200 | if (T.getOS() != Triple::AMDHSA) |
201 | return 0; |
202 | |
203 | switch (CodeObjectVersion) { |
204 | case 4: |
205 | return ELF::ELFABIVERSION_AMDGPU_HSA_V4; |
206 | case 5: |
207 | return ELF::ELFABIVERSION_AMDGPU_HSA_V5; |
208 | case 6: |
209 | return ELF::ELFABIVERSION_AMDGPU_HSA_V6; |
210 | default: |
211 | report_fatal_error(reason: "Unsupported AMDHSA Code Object Version " + |
212 | Twine(CodeObjectVersion)); |
213 | } |
214 | } |
215 | |
216 | unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { |
217 | switch (CodeObjectVersion) { |
218 | case AMDHSA_COV4: |
219 | return 48; |
220 | case AMDHSA_COV5: |
221 | case AMDHSA_COV6: |
222 | default: |
223 | return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; |
224 | } |
225 | } |
226 | |
227 | |
228 | // FIXME: All such magic numbers about the ABI should be in a |
229 | // central TD file. |
230 | unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { |
231 | switch (CodeObjectVersion) { |
232 | case AMDHSA_COV4: |
233 | return 24; |
234 | case AMDHSA_COV5: |
235 | case AMDHSA_COV6: |
236 | default: |
237 | return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; |
238 | } |
239 | } |
240 | |
241 | unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { |
242 | switch (CodeObjectVersion) { |
243 | case AMDHSA_COV4: |
244 | return 32; |
245 | case AMDHSA_COV5: |
246 | case AMDHSA_COV6: |
247 | default: |
248 | return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET; |
249 | } |
250 | } |
251 | |
252 | unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { |
253 | switch (CodeObjectVersion) { |
254 | case AMDHSA_COV4: |
255 | return 40; |
256 | case AMDHSA_COV5: |
257 | case AMDHSA_COV6: |
258 | default: |
259 | return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET; |
260 | } |
261 | } |
262 | |
263 | #define GET_MIMGBaseOpcodesTable_IMPL |
264 | #define GET_MIMGDimInfoTable_IMPL |
265 | #define GET_MIMGInfoTable_IMPL |
266 | #define GET_MIMGLZMappingTable_IMPL |
267 | #define GET_MIMGMIPMappingTable_IMPL |
268 | #define GET_MIMGBiasMappingTable_IMPL |
269 | #define GET_MIMGOffsetMappingTable_IMPL |
270 | #define GET_MIMGG16MappingTable_IMPL |
271 | #define GET_MAIInstInfoTable_IMPL |
272 | #include "AMDGPUGenSearchableTables.inc" |
273 | |
274 | int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, |
275 | unsigned VDataDwords, unsigned VAddrDwords) { |
276 | const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, |
277 | VDataDwords, VAddrDwords); |
278 | return Info ? Info->Opcode : -1; |
279 | } |
280 | |
281 | const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { |
282 | const MIMGInfo *Info = getMIMGInfo(Opcode: Opc); |
283 | return Info ? getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode) : nullptr; |
284 | } |
285 | |
286 | int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { |
287 | const MIMGInfo *OrigInfo = getMIMGInfo(Opcode: Opc); |
288 | const MIMGInfo *NewInfo = |
289 | getMIMGOpcodeHelper(BaseOpcode: OrigInfo->BaseOpcode, MIMGEncoding: OrigInfo->MIMGEncoding, |
290 | VDataDwords: NewChannels, VAddrDwords: OrigInfo->VAddrDwords); |
291 | return NewInfo ? NewInfo->Opcode : -1; |
292 | } |
293 | |
294 | unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, |
295 | const MIMGDimInfo *Dim, bool IsA16, |
296 | bool IsG16Supported) { |
297 | unsigned AddrWords = BaseOpcode->NumExtraArgs; |
298 | unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + |
299 | (BaseOpcode->LodOrClampOrMip ? 1 : 0); |
300 | if (IsA16) |
301 | AddrWords += divideCeil(Numerator: AddrComponents, Denominator: 2); |
302 | else |
303 | AddrWords += AddrComponents; |
304 | |
305 | // Note: For subtargets that support A16 but not G16, enabling A16 also |
306 | // enables 16 bit gradients. |
307 | // For subtargets that support A16 (operand) and G16 (done with a different |
308 | // instruction encoding), they are independent. |
309 | |
310 | if (BaseOpcode->Gradients) { |
311 | if ((IsA16 && !IsG16Supported) || BaseOpcode->G16) |
312 | // There are two gradients per coordinate, we pack them separately. |
313 | // For the 3d case, |
314 | // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) |
315 | AddrWords += alignTo<2>(Value: Dim->NumGradients / 2); |
316 | else |
317 | AddrWords += Dim->NumGradients; |
318 | } |
319 | return AddrWords; |
320 | } |
321 | |
322 | struct MUBUFInfo { |
323 | uint16_t Opcode; |
324 | uint16_t BaseOpcode; |
325 | uint8_t elements; |
326 | bool has_vaddr; |
327 | bool has_srsrc; |
328 | bool has_soffset; |
329 | bool IsBufferInv; |
330 | bool tfe; |
331 | }; |
332 | |
333 | struct MTBUFInfo { |
334 | uint16_t Opcode; |
335 | uint16_t BaseOpcode; |
336 | uint8_t elements; |
337 | bool has_vaddr; |
338 | bool has_srsrc; |
339 | bool has_soffset; |
340 | }; |
341 | |
342 | struct SMInfo { |
343 | uint16_t Opcode; |
344 | bool IsBuffer; |
345 | }; |
346 | |
347 | struct VOPInfo { |
348 | uint16_t Opcode; |
349 | bool IsSingle; |
350 | }; |
351 | |
352 | struct VOPC64DPPInfo { |
353 | uint16_t Opcode; |
354 | }; |
355 | |
356 | struct VOPCDPPAsmOnlyInfo { |
357 | uint16_t Opcode; |
358 | }; |
359 | |
360 | struct VOP3CDPPAsmOnlyInfo { |
361 | uint16_t Opcode; |
362 | }; |
363 | |
364 | struct VOPDComponentInfo { |
365 | uint16_t BaseVOP; |
366 | uint16_t VOPDOp; |
367 | bool CanBeVOPDX; |
368 | }; |
369 | |
370 | struct VOPDInfo { |
371 | uint16_t Opcode; |
372 | uint16_t OpX; |
373 | uint16_t OpY; |
374 | uint16_t Subtarget; |
375 | }; |
376 | |
377 | struct VOPTrue16Info { |
378 | uint16_t Opcode; |
379 | bool IsTrue16; |
380 | }; |
381 | |
382 | struct SingleUseExceptionInfo { |
383 | uint16_t Opcode; |
384 | bool IsInvalidSingleUseConsumer; |
385 | bool IsInvalidSingleUseProducer; |
386 | }; |
387 | |
388 | #define GET_MTBUFInfoTable_DECL |
389 | #define GET_MTBUFInfoTable_IMPL |
390 | #define GET_MUBUFInfoTable_DECL |
391 | #define GET_MUBUFInfoTable_IMPL |
392 | #define GET_SingleUseExceptionTable_DECL |
393 | #define GET_SingleUseExceptionTable_IMPL |
394 | #define GET_SMInfoTable_DECL |
395 | #define GET_SMInfoTable_IMPL |
396 | #define GET_VOP1InfoTable_DECL |
397 | #define GET_VOP1InfoTable_IMPL |
398 | #define GET_VOP2InfoTable_DECL |
399 | #define GET_VOP2InfoTable_IMPL |
400 | #define GET_VOP3InfoTable_DECL |
401 | #define GET_VOP3InfoTable_IMPL |
402 | #define GET_VOPC64DPPTable_DECL |
403 | #define GET_VOPC64DPPTable_IMPL |
404 | #define GET_VOPC64DPP8Table_DECL |
405 | #define GET_VOPC64DPP8Table_IMPL |
406 | #define GET_VOPCAsmOnlyInfoTable_DECL |
407 | #define GET_VOPCAsmOnlyInfoTable_IMPL |
408 | #define GET_VOP3CAsmOnlyInfoTable_DECL |
409 | #define GET_VOP3CAsmOnlyInfoTable_IMPL |
410 | #define GET_VOPDComponentTable_DECL |
411 | #define GET_VOPDComponentTable_IMPL |
412 | #define GET_VOPDPairs_DECL |
413 | #define GET_VOPDPairs_IMPL |
414 | #define GET_VOPTrue16Table_DECL |
415 | #define GET_VOPTrue16Table_IMPL |
416 | #define GET_WMMAOpcode2AddrMappingTable_DECL |
417 | #define GET_WMMAOpcode2AddrMappingTable_IMPL |
418 | #define GET_WMMAOpcode3AddrMappingTable_DECL |
419 | #define GET_WMMAOpcode3AddrMappingTable_IMPL |
420 | #include "AMDGPUGenSearchableTables.inc" |
421 | |
422 | int getMTBUFBaseOpcode(unsigned Opc) { |
423 | const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opcode: Opc); |
424 | return Info ? Info->BaseOpcode : -1; |
425 | } |
426 | |
427 | int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { |
428 | const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements); |
429 | return Info ? Info->Opcode : -1; |
430 | } |
431 | |
432 | int getMTBUFElements(unsigned Opc) { |
433 | const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc); |
434 | return Info ? Info->elements : 0; |
435 | } |
436 | |
437 | bool getMTBUFHasVAddr(unsigned Opc) { |
438 | const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc); |
439 | return Info ? Info->has_vaddr : false; |
440 | } |
441 | |
442 | bool getMTBUFHasSrsrc(unsigned Opc) { |
443 | const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc); |
444 | return Info ? Info->has_srsrc : false; |
445 | } |
446 | |
447 | bool getMTBUFHasSoffset(unsigned Opc) { |
448 | const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc); |
449 | return Info ? Info->has_soffset : false; |
450 | } |
451 | |
452 | int getMUBUFBaseOpcode(unsigned Opc) { |
453 | const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opcode: Opc); |
454 | return Info ? Info->BaseOpcode : -1; |
455 | } |
456 | |
457 | int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { |
458 | const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements); |
459 | return Info ? Info->Opcode : -1; |
460 | } |
461 | |
462 | int getMUBUFElements(unsigned Opc) { |
463 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc); |
464 | return Info ? Info->elements : 0; |
465 | } |
466 | |
467 | bool getMUBUFHasVAddr(unsigned Opc) { |
468 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc); |
469 | return Info ? Info->has_vaddr : false; |
470 | } |
471 | |
472 | bool getMUBUFHasSrsrc(unsigned Opc) { |
473 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc); |
474 | return Info ? Info->has_srsrc : false; |
475 | } |
476 | |
477 | bool getMUBUFHasSoffset(unsigned Opc) { |
478 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc); |
479 | return Info ? Info->has_soffset : false; |
480 | } |
481 | |
482 | bool getMUBUFIsBufferInv(unsigned Opc) { |
483 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc); |
484 | return Info ? Info->IsBufferInv : false; |
485 | } |
486 | |
487 | bool getMUBUFTfe(unsigned Opc) { |
488 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc); |
489 | return Info ? Info->tfe : false; |
490 | } |
491 | |
492 | bool getSMEMIsBuffer(unsigned Opc) { |
493 | const SMInfo *Info = getSMEMOpcodeHelper(Opcode: Opc); |
494 | return Info ? Info->IsBuffer : false; |
495 | } |
496 | |
497 | bool getVOP1IsSingle(unsigned Opc) { |
498 | const VOPInfo *Info = getVOP1OpcodeHelper(Opcode: Opc); |
499 | return Info ? Info->IsSingle : true; |
500 | } |
501 | |
502 | bool getVOP2IsSingle(unsigned Opc) { |
503 | const VOPInfo *Info = getVOP2OpcodeHelper(Opcode: Opc); |
504 | return Info ? Info->IsSingle : true; |
505 | } |
506 | |
507 | bool getVOP3IsSingle(unsigned Opc) { |
508 | const VOPInfo *Info = getVOP3OpcodeHelper(Opcode: Opc); |
509 | return Info ? Info->IsSingle : true; |
510 | } |
511 | |
512 | bool isVOPC64DPP(unsigned Opc) { |
513 | return isVOPC64DPPOpcodeHelper(Opcode: Opc) || isVOPC64DPP8OpcodeHelper(Opcode: Opc); |
514 | } |
515 | |
516 | bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opcode: Opc); } |
517 | |
518 | bool getMAIIsDGEMM(unsigned Opc) { |
519 | const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc); |
520 | return Info ? Info->is_dgemm : false; |
521 | } |
522 | |
523 | bool getMAIIsGFX940XDL(unsigned Opc) { |
524 | const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc); |
525 | return Info ? Info->is_gfx940_xdl : false; |
526 | } |
527 | |
528 | unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) { |
529 | if (ST.hasFeature(Feature: AMDGPU::FeatureGFX12Insts)) |
530 | return SIEncodingFamily::GFX12; |
531 | if (ST.hasFeature(Feature: AMDGPU::FeatureGFX11Insts)) |
532 | return SIEncodingFamily::GFX11; |
533 | llvm_unreachable("Subtarget generation does not support VOPD!" ); |
534 | } |
535 | |
536 | CanBeVOPD getCanBeVOPD(unsigned Opc) { |
537 | const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc); |
538 | if (Info) |
539 | return {.X: Info->CanBeVOPDX, .Y: true}; |
540 | return {.X: false, .Y: false}; |
541 | } |
542 | |
543 | unsigned getVOPDOpcode(unsigned Opc) { |
544 | const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc); |
545 | return Info ? Info->VOPDOp : ~0u; |
546 | } |
547 | |
548 | bool isVOPD(unsigned Opc) { |
549 | return AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0X); |
550 | } |
551 | |
552 | bool isMAC(unsigned Opc) { |
553 | return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || |
554 | Opc == AMDGPU::V_MAC_F32_e64_gfx10 || |
555 | Opc == AMDGPU::V_MAC_F32_e64_vi || |
556 | Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || |
557 | Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || |
558 | Opc == AMDGPU::V_MAC_F16_e64_vi || |
559 | Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || |
560 | Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || |
561 | Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || |
562 | Opc == AMDGPU::V_FMAC_F32_e64_gfx12 || |
563 | Opc == AMDGPU::V_FMAC_F32_e64_vi || |
564 | Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || |
565 | Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || |
566 | Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || |
567 | Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 || |
568 | Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 || |
569 | Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || |
570 | Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi || |
571 | Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi || |
572 | Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi; |
573 | } |
574 | |
575 | bool isPermlane16(unsigned Opc) { |
576 | return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || |
577 | Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 || |
578 | Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 || |
579 | Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 || |
580 | Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 || |
581 | Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 || |
582 | Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 || |
583 | Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12; |
584 | } |
585 | |
586 | bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) { |
587 | return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 || |
588 | Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 || |
589 | Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 || |
590 | Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 || |
591 | Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 || |
592 | Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 || |
593 | Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 || |
594 | Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12; |
595 | } |
596 | |
597 | bool isGenericAtomic(unsigned Opc) { |
598 | return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP || |
599 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD || |
600 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB || |
601 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN || |
602 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN || |
603 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX || |
604 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX || |
605 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND || |
606 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR || |
607 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR || |
608 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC || |
609 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC || |
610 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD || |
611 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN || |
612 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX || |
613 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP || |
614 | Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG; |
615 | } |
616 | |
617 | bool isTrue16Inst(unsigned Opc) { |
618 | const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opcode: Opc); |
619 | return Info ? Info->IsTrue16 : false; |
620 | } |
621 | |
622 | bool isInvalidSingleUseConsumerInst(unsigned Opc) { |
623 | const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opcode: Opc); |
624 | return Info && Info->IsInvalidSingleUseConsumer; |
625 | } |
626 | |
627 | bool isInvalidSingleUseProducerInst(unsigned Opc) { |
628 | const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opcode: Opc); |
629 | return Info && Info->IsInvalidSingleUseProducer; |
630 | } |
631 | |
632 | unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { |
633 | const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opcode2Addr: Opc); |
634 | return Info ? Info->Opcode3Addr : ~0u; |
635 | } |
636 | |
637 | unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { |
638 | const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opcode3Addr: Opc); |
639 | return Info ? Info->Opcode2Addr : ~0u; |
640 | } |
641 | |
642 | // Wrapper for Tablegen'd function. enum Subtarget is not defined in any |
643 | // header files, so we need to wrap it in a function that takes unsigned |
644 | // instead. |
645 | int getMCOpcode(uint16_t Opcode, unsigned Gen) { |
646 | return getMCOpcodeGen(Opcode, inSubtarget: static_cast<Subtarget>(Gen)); |
647 | } |
648 | |
649 | int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) { |
650 | const VOPDInfo *Info = |
651 | getVOPDInfoFromComponentOpcodes(OpX, OpY, SubTgt: EncodingFamily); |
652 | return Info ? Info->Opcode : -1; |
653 | } |
654 | |
655 | std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) { |
656 | const VOPDInfo *Info = getVOPDOpcodeHelper(Opcode: VOPDOpcode); |
657 | assert(Info); |
658 | auto OpX = getVOPDBaseFromComponent(VOPDOp: Info->OpX); |
659 | auto OpY = getVOPDBaseFromComponent(VOPDOp: Info->OpY); |
660 | assert(OpX && OpY); |
661 | return {OpX->BaseVOP, OpY->BaseVOP}; |
662 | } |
663 | |
664 | namespace VOPD { |
665 | |
666 | ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { |
667 | assert(OpDesc.getNumDefs() == Component::DST_NUM); |
668 | |
669 | assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1); |
670 | assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1); |
671 | auto TiedIdx = OpDesc.getOperandConstraint(OpNum: Component::SRC2, Constraint: MCOI::TIED_TO); |
672 | assert(TiedIdx == -1 || TiedIdx == Component::DST); |
673 | HasSrc2Acc = TiedIdx != -1; |
674 | |
675 | SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs(); |
676 | assert(SrcOperandsNum <= Component::MAX_SRC_NUM); |
677 | |
678 | auto OperandsNum = OpDesc.getNumOperands(); |
679 | unsigned CompOprIdx; |
680 | for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) { |
681 | if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) { |
682 | MandatoryLiteralIdx = CompOprIdx; |
683 | break; |
684 | } |
685 | } |
686 | } |
687 | |
688 | unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { |
689 | assert(CompOprIdx < Component::MAX_OPR_NUM); |
690 | |
691 | if (CompOprIdx == Component::DST) |
692 | return getIndexOfDstInParsedOperands(); |
693 | |
694 | auto CompSrcIdx = CompOprIdx - Component::DST_NUM; |
695 | if (CompSrcIdx < getCompParsedSrcOperandsNum()) |
696 | return getIndexOfSrcInParsedOperands(CompSrcIdx); |
697 | |
698 | // The specified operand does not exist. |
699 | return 0; |
700 | } |
701 | |
702 | std::optional<unsigned> InstInfo::getInvalidCompOperandIndex( |
703 | std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const { |
704 | |
705 | auto OpXRegs = getRegIndices(ComponentIdx: ComponentIndex::X, GetRegIdx); |
706 | auto OpYRegs = getRegIndices(ComponentIdx: ComponentIndex::Y, GetRegIdx); |
707 | |
708 | const unsigned CompOprNum = |
709 | SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM; |
710 | unsigned CompOprIdx; |
711 | for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) { |
712 | unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx]; |
713 | if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] && |
714 | ((OpXRegs[CompOprIdx] & BanksMasks) == |
715 | (OpYRegs[CompOprIdx] & BanksMasks))) |
716 | return CompOprIdx; |
717 | } |
718 | |
719 | return {}; |
720 | } |
721 | |
722 | // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used |
723 | // by the specified component. If an operand is unused |
724 | // or is not a VGPR, the corresponding value is 0. |
725 | // |
726 | // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index |
727 | // for the specified component and MC operand. The callback must return 0 |
728 | // if the operand is not a register or not a VGPR. |
729 | InstInfo::RegIndices InstInfo::getRegIndices( |
730 | unsigned CompIdx, |
731 | std::function<unsigned(unsigned, unsigned)> GetRegIdx) const { |
732 | assert(CompIdx < COMPONENTS_NUM); |
733 | |
734 | const auto &Comp = CompInfo[CompIdx]; |
735 | InstInfo::RegIndices RegIndices; |
736 | |
737 | RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands()); |
738 | |
739 | for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) { |
740 | unsigned CompSrcIdx = CompOprIdx - DST_NUM; |
741 | RegIndices[CompOprIdx] = |
742 | Comp.hasRegSrcOperand(CompSrcIdx) |
743 | ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx)) |
744 | : 0; |
745 | } |
746 | return RegIndices; |
747 | } |
748 | |
749 | } // namespace VOPD |
750 | |
751 | VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) { |
752 | return VOPD::InstInfo(OpX, OpY); |
753 | } |
754 | |
755 | VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, |
756 | const MCInstrInfo *InstrInfo) { |
757 | auto [OpX, OpY] = getVOPDComponents(VOPDOpcode); |
758 | const auto &OpXDesc = InstrInfo->get(Opcode: OpX); |
759 | const auto &OpYDesc = InstrInfo->get(Opcode: OpY); |
760 | VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X); |
761 | VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo); |
762 | return VOPD::InstInfo(OpXInfo, OpYInfo); |
763 | } |
764 | |
765 | namespace IsaInfo { |
766 | |
767 | AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) |
768 | : STI(STI), XnackSetting(TargetIDSetting::Any), |
769 | SramEccSetting(TargetIDSetting::Any) { |
770 | if (!STI.getFeatureBits().test(I: FeatureSupportsXNACK)) |
771 | XnackSetting = TargetIDSetting::Unsupported; |
772 | if (!STI.getFeatureBits().test(I: FeatureSupportsSRAMECC)) |
773 | SramEccSetting = TargetIDSetting::Unsupported; |
774 | } |
775 | |
776 | void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { |
777 | // Check if xnack or sramecc is explicitly enabled or disabled. In the |
778 | // absence of the target features we assume we must generate code that can run |
779 | // in any environment. |
780 | SubtargetFeatures Features(FS); |
781 | std::optional<bool> XnackRequested; |
782 | std::optional<bool> SramEccRequested; |
783 | |
784 | for (const std::string &Feature : Features.getFeatures()) { |
785 | if (Feature == "+xnack" ) |
786 | XnackRequested = true; |
787 | else if (Feature == "-xnack" ) |
788 | XnackRequested = false; |
789 | else if (Feature == "+sramecc" ) |
790 | SramEccRequested = true; |
791 | else if (Feature == "-sramecc" ) |
792 | SramEccRequested = false; |
793 | } |
794 | |
795 | bool XnackSupported = isXnackSupported(); |
796 | bool SramEccSupported = isSramEccSupported(); |
797 | |
798 | if (XnackRequested) { |
799 | if (XnackSupported) { |
800 | XnackSetting = |
801 | *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; |
802 | } else { |
803 | // If a specific xnack setting was requested and this GPU does not support |
804 | // xnack emit a warning. Setting will remain set to "Unsupported". |
805 | if (*XnackRequested) { |
806 | errs() << "warning: xnack 'On' was requested for a processor that does " |
807 | "not support it!\n" ; |
808 | } else { |
809 | errs() << "warning: xnack 'Off' was requested for a processor that " |
810 | "does not support it!\n" ; |
811 | } |
812 | } |
813 | } |
814 | |
815 | if (SramEccRequested) { |
816 | if (SramEccSupported) { |
817 | SramEccSetting = |
818 | *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; |
819 | } else { |
820 | // If a specific sramecc setting was requested and this GPU does not |
821 | // support sramecc emit a warning. Setting will remain set to |
822 | // "Unsupported". |
823 | if (*SramEccRequested) { |
824 | errs() << "warning: sramecc 'On' was requested for a processor that " |
825 | "does not support it!\n" ; |
826 | } else { |
827 | errs() << "warning: sramecc 'Off' was requested for a processor that " |
828 | "does not support it!\n" ; |
829 | } |
830 | } |
831 | } |
832 | } |
833 | |
834 | static TargetIDSetting |
835 | getTargetIDSettingFromFeatureString(StringRef FeatureString) { |
836 | if (FeatureString.ends_with(Suffix: "-" )) |
837 | return TargetIDSetting::Off; |
838 | if (FeatureString.ends_with(Suffix: "+" )) |
839 | return TargetIDSetting::On; |
840 | |
841 | llvm_unreachable("Malformed feature string" ); |
842 | } |
843 | |
844 | void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { |
845 | SmallVector<StringRef, 3> TargetIDSplit; |
846 | TargetID.split(A&: TargetIDSplit, Separator: ':'); |
847 | |
848 | for (const auto &FeatureString : TargetIDSplit) { |
849 | if (FeatureString.starts_with(Prefix: "xnack" )) |
850 | XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); |
851 | if (FeatureString.starts_with(Prefix: "sramecc" )) |
852 | SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); |
853 | } |
854 | } |
855 | |
856 | std::string AMDGPUTargetID::toString() const { |
857 | std::string StringRep; |
858 | raw_string_ostream StreamRep(StringRep); |
859 | |
860 | auto TargetTriple = STI.getTargetTriple(); |
861 | auto Version = getIsaVersion(GPU: STI.getCPU()); |
862 | |
863 | StreamRep << TargetTriple.getArchName() << '-' |
864 | << TargetTriple.getVendorName() << '-' |
865 | << TargetTriple.getOSName() << '-' |
866 | << TargetTriple.getEnvironmentName() << '-'; |
867 | |
868 | std::string Processor; |
869 | // TODO: Following else statement is present here because we used various |
870 | // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). |
871 | // Remove once all aliases are removed from GCNProcessors.td. |
872 | if (Version.Major >= 9) |
873 | Processor = STI.getCPU().str(); |
874 | else |
875 | Processor = (Twine("gfx" ) + Twine(Version.Major) + Twine(Version.Minor) + |
876 | Twine(Version.Stepping)) |
877 | .str(); |
878 | |
879 | std::string Features; |
880 | if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { |
881 | // sramecc. |
882 | if (getSramEccSetting() == TargetIDSetting::Off) |
883 | Features += ":sramecc-" ; |
884 | else if (getSramEccSetting() == TargetIDSetting::On) |
885 | Features += ":sramecc+" ; |
886 | // xnack. |
887 | if (getXnackSetting() == TargetIDSetting::Off) |
888 | Features += ":xnack-" ; |
889 | else if (getXnackSetting() == TargetIDSetting::On) |
890 | Features += ":xnack+" ; |
891 | } |
892 | |
893 | StreamRep << Processor << Features; |
894 | |
895 | StreamRep.flush(); |
896 | return StringRep; |
897 | } |
898 | |
899 | unsigned getWavefrontSize(const MCSubtargetInfo *STI) { |
900 | if (STI->getFeatureBits().test(I: FeatureWavefrontSize16)) |
901 | return 16; |
902 | if (STI->getFeatureBits().test(I: FeatureWavefrontSize32)) |
903 | return 32; |
904 | |
905 | return 64; |
906 | } |
907 | |
908 | unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { |
909 | unsigned BytesPerCU = 0; |
910 | if (STI->getFeatureBits().test(I: FeatureLocalMemorySize32768)) |
911 | BytesPerCU = 32768; |
912 | if (STI->getFeatureBits().test(I: FeatureLocalMemorySize65536)) |
913 | BytesPerCU = 65536; |
914 | |
915 | // "Per CU" really means "per whatever functional block the waves of a |
916 | // workgroup must share". So the effective local memory size is doubled in |
917 | // WGP mode on gfx10. |
918 | if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode)) |
919 | BytesPerCU *= 2; |
920 | |
921 | return BytesPerCU; |
922 | } |
923 | |
924 | unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) { |
925 | if (STI->getFeatureBits().test(I: FeatureLocalMemorySize32768)) |
926 | return 32768; |
927 | if (STI->getFeatureBits().test(I: FeatureLocalMemorySize65536)) |
928 | return 65536; |
929 | return 0; |
930 | } |
931 | |
932 | unsigned getEUsPerCU(const MCSubtargetInfo *STI) { |
933 | // "Per CU" really means "per whatever functional block the waves of a |
934 | // workgroup must share". For gfx10 in CU mode this is the CU, which contains |
935 | // two SIMDs. |
936 | if (isGFX10Plus(STI: *STI) && STI->getFeatureBits().test(I: FeatureCuMode)) |
937 | return 2; |
938 | // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains |
939 | // two CUs, so a total of four SIMDs. |
940 | return 4; |
941 | } |
942 | |
943 | unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, |
944 | unsigned FlatWorkGroupSize) { |
945 | assert(FlatWorkGroupSize != 0); |
946 | if (STI->getTargetTriple().getArch() != Triple::amdgcn) |
947 | return 8; |
948 | unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI); |
949 | unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); |
950 | if (N == 1) { |
951 | // Single-wave workgroups don't consume barrier resources. |
952 | return MaxWaves; |
953 | } |
954 | |
955 | unsigned MaxBarriers = 16; |
956 | if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode)) |
957 | MaxBarriers = 32; |
958 | |
959 | return std::min(a: MaxWaves / N, b: MaxBarriers); |
960 | } |
961 | |
962 | unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { |
963 | return 1; |
964 | } |
965 | |
966 | unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { |
967 | // FIXME: Need to take scratch memory into account. |
968 | if (isGFX90A(STI: *STI)) |
969 | return 8; |
970 | if (!isGFX10Plus(STI: *STI)) |
971 | return 10; |
972 | return hasGFX10_3Insts(STI: *STI) ? 16 : 20; |
973 | } |
974 | |
975 | unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, |
976 | unsigned FlatWorkGroupSize) { |
977 | return divideCeil(Numerator: getWavesPerWorkGroup(STI, FlatWorkGroupSize), |
978 | Denominator: getEUsPerCU(STI)); |
979 | } |
980 | |
981 | unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { |
982 | return 1; |
983 | } |
984 | |
985 | unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { |
986 | // Some subtargets allow encoding 2048, but this isn't tested or supported. |
987 | return 1024; |
988 | } |
989 | |
990 | unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, |
991 | unsigned FlatWorkGroupSize) { |
992 | return divideCeil(Numerator: FlatWorkGroupSize, Denominator: getWavefrontSize(STI)); |
993 | } |
994 | |
995 | unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { |
996 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
997 | if (Version.Major >= 10) |
998 | return getAddressableNumSGPRs(STI); |
999 | if (Version.Major >= 8) |
1000 | return 16; |
1001 | return 8; |
1002 | } |
1003 | |
1004 | unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { |
1005 | return 8; |
1006 | } |
1007 | |
1008 | unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { |
1009 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1010 | if (Version.Major >= 8) |
1011 | return 800; |
1012 | return 512; |
1013 | } |
1014 | |
1015 | unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { |
1016 | if (STI->getFeatureBits().test(I: FeatureSGPRInitBug)) |
1017 | return FIXED_NUM_SGPRS_FOR_INIT_BUG; |
1018 | |
1019 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1020 | if (Version.Major >= 10) |
1021 | return 106; |
1022 | if (Version.Major >= 8) |
1023 | return 102; |
1024 | return 104; |
1025 | } |
1026 | |
1027 | unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { |
1028 | assert(WavesPerEU != 0); |
1029 | |
1030 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1031 | if (Version.Major >= 10) |
1032 | return 0; |
1033 | |
1034 | if (WavesPerEU >= getMaxWavesPerEU(STI)) |
1035 | return 0; |
1036 | |
1037 | unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); |
1038 | if (STI->getFeatureBits().test(I: FeatureTrapHandler)) |
1039 | MinNumSGPRs -= std::min(a: MinNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS); |
1040 | MinNumSGPRs = alignDown(Value: MinNumSGPRs, Align: getSGPRAllocGranule(STI)) + 1; |
1041 | return std::min(a: MinNumSGPRs, b: getAddressableNumSGPRs(STI)); |
1042 | } |
1043 | |
1044 | unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, |
1045 | bool Addressable) { |
1046 | assert(WavesPerEU != 0); |
1047 | |
1048 | unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); |
1049 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1050 | if (Version.Major >= 10) |
1051 | return Addressable ? AddressableNumSGPRs : 108; |
1052 | if (Version.Major >= 8 && !Addressable) |
1053 | AddressableNumSGPRs = 112; |
1054 | unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; |
1055 | if (STI->getFeatureBits().test(I: FeatureTrapHandler)) |
1056 | MaxNumSGPRs -= std::min(a: MaxNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS); |
1057 | MaxNumSGPRs = alignDown(Value: MaxNumSGPRs, Align: getSGPRAllocGranule(STI)); |
1058 | return std::min(a: MaxNumSGPRs, b: AddressableNumSGPRs); |
1059 | } |
1060 | |
1061 | unsigned (const MCSubtargetInfo *STI, bool VCCUsed, |
1062 | bool FlatScrUsed, bool XNACKUsed) { |
1063 | unsigned = 0; |
1064 | if (VCCUsed) |
1065 | ExtraSGPRs = 2; |
1066 | |
1067 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1068 | if (Version.Major >= 10) |
1069 | return ExtraSGPRs; |
1070 | |
1071 | if (Version.Major < 8) { |
1072 | if (FlatScrUsed) |
1073 | ExtraSGPRs = 4; |
1074 | } else { |
1075 | if (XNACKUsed) |
1076 | ExtraSGPRs = 4; |
1077 | |
1078 | if (FlatScrUsed || |
1079 | STI->getFeatureBits().test(I: AMDGPU::FeatureArchitectedFlatScratch)) |
1080 | ExtraSGPRs = 6; |
1081 | } |
1082 | |
1083 | return ExtraSGPRs; |
1084 | } |
1085 | |
1086 | unsigned (const MCSubtargetInfo *STI, bool VCCUsed, |
1087 | bool FlatScrUsed) { |
1088 | return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, |
1089 | XNACKUsed: STI->getFeatureBits().test(I: AMDGPU::FeatureXNACK)); |
1090 | } |
1091 | |
1092 | static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, |
1093 | unsigned Granule) { |
1094 | return divideCeil(Numerator: std::max(a: 1u, b: NumRegs), Denominator: Granule); |
1095 | } |
1096 | |
1097 | unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { |
1098 | // SGPRBlocks is actual number of SGPR blocks minus 1. |
1099 | return getGranulatedNumRegisterBlocks(NumRegs: NumSGPRs, Granule: getSGPREncodingGranule(STI)) - |
1100 | 1; |
1101 | } |
1102 | |
1103 | unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, |
1104 | std::optional<bool> EnableWavefrontSize32) { |
1105 | if (STI->getFeatureBits().test(I: FeatureGFX90AInsts)) |
1106 | return 8; |
1107 | |
1108 | bool IsWave32 = EnableWavefrontSize32 ? |
1109 | *EnableWavefrontSize32 : |
1110 | STI->getFeatureBits().test(I: FeatureWavefrontSize32); |
1111 | |
1112 | if (STI->getFeatureBits().test(I: Feature1_5xVGPRs)) |
1113 | return IsWave32 ? 24 : 12; |
1114 | |
1115 | if (hasGFX10_3Insts(STI: *STI)) |
1116 | return IsWave32 ? 16 : 8; |
1117 | |
1118 | return IsWave32 ? 8 : 4; |
1119 | } |
1120 | |
1121 | unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, |
1122 | std::optional<bool> EnableWavefrontSize32) { |
1123 | if (STI->getFeatureBits().test(I: FeatureGFX90AInsts)) |
1124 | return 8; |
1125 | |
1126 | bool IsWave32 = EnableWavefrontSize32 ? |
1127 | *EnableWavefrontSize32 : |
1128 | STI->getFeatureBits().test(I: FeatureWavefrontSize32); |
1129 | |
1130 | return IsWave32 ? 8 : 4; |
1131 | } |
1132 | |
1133 | unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { |
1134 | if (STI->getFeatureBits().test(I: FeatureGFX90AInsts)) |
1135 | return 512; |
1136 | if (!isGFX10Plus(STI: *STI)) |
1137 | return 256; |
1138 | bool IsWave32 = STI->getFeatureBits().test(I: FeatureWavefrontSize32); |
1139 | if (STI->getFeatureBits().test(I: Feature1_5xVGPRs)) |
1140 | return IsWave32 ? 1536 : 768; |
1141 | return IsWave32 ? 1024 : 512; |
1142 | } |
1143 | |
1144 | unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; } |
1145 | |
1146 | unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { |
1147 | if (STI->getFeatureBits().test(I: FeatureGFX90AInsts)) |
1148 | return 512; |
1149 | return getAddressableNumArchVGPRs(STI); |
1150 | } |
1151 | |
1152 | unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, |
1153 | unsigned NumVGPRs) { |
1154 | return getNumWavesPerEUWithNumVGPRs(NumVGPRs, Granule: getVGPRAllocGranule(STI), |
1155 | MaxWaves: getMaxWavesPerEU(STI), |
1156 | TotalNumVGPRs: getTotalNumVGPRs(STI)); |
1157 | } |
1158 | |
1159 | unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule, |
1160 | unsigned MaxWaves, |
1161 | unsigned TotalNumVGPRs) { |
1162 | if (NumVGPRs < Granule) |
1163 | return MaxWaves; |
1164 | unsigned RoundedRegs = alignTo(Value: NumVGPRs, Align: Granule); |
1165 | return std::min(a: std::max(a: TotalNumVGPRs / RoundedRegs, b: 1u), b: MaxWaves); |
1166 | } |
1167 | |
1168 | unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, |
1169 | AMDGPUSubtarget::Generation Gen) { |
1170 | if (Gen >= AMDGPUSubtarget::GFX10) |
1171 | return MaxWaves; |
1172 | |
1173 | if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { |
1174 | if (SGPRs <= 80) |
1175 | return 10; |
1176 | if (SGPRs <= 88) |
1177 | return 9; |
1178 | if (SGPRs <= 100) |
1179 | return 8; |
1180 | return 7; |
1181 | } |
1182 | if (SGPRs <= 48) |
1183 | return 10; |
1184 | if (SGPRs <= 56) |
1185 | return 9; |
1186 | if (SGPRs <= 64) |
1187 | return 8; |
1188 | if (SGPRs <= 72) |
1189 | return 7; |
1190 | if (SGPRs <= 80) |
1191 | return 6; |
1192 | return 5; |
1193 | } |
1194 | |
1195 | unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { |
1196 | assert(WavesPerEU != 0); |
1197 | |
1198 | unsigned MaxWavesPerEU = getMaxWavesPerEU(STI); |
1199 | if (WavesPerEU >= MaxWavesPerEU) |
1200 | return 0; |
1201 | |
1202 | unsigned TotNumVGPRs = getTotalNumVGPRs(STI); |
1203 | unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI); |
1204 | unsigned Granule = getVGPRAllocGranule(STI); |
1205 | unsigned MaxNumVGPRs = alignDown(Value: TotNumVGPRs / WavesPerEU, Align: Granule); |
1206 | |
1207 | if (MaxNumVGPRs == alignDown(Value: TotNumVGPRs / MaxWavesPerEU, Align: Granule)) |
1208 | return 0; |
1209 | |
1210 | unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, NumVGPRs: AddrsableNumVGPRs); |
1211 | if (WavesPerEU < MinWavesPerEU) |
1212 | return getMinNumVGPRs(STI, WavesPerEU: MinWavesPerEU); |
1213 | |
1214 | unsigned MaxNumVGPRsNext = alignDown(Value: TotNumVGPRs / (WavesPerEU + 1), Align: Granule); |
1215 | unsigned MinNumVGPRs = 1 + std::min(a: MaxNumVGPRs - Granule, b: MaxNumVGPRsNext); |
1216 | return std::min(a: MinNumVGPRs, b: AddrsableNumVGPRs); |
1217 | } |
1218 | |
1219 | unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { |
1220 | assert(WavesPerEU != 0); |
1221 | |
1222 | unsigned MaxNumVGPRs = alignDown(Value: getTotalNumVGPRs(STI) / WavesPerEU, |
1223 | Align: getVGPRAllocGranule(STI)); |
1224 | unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); |
1225 | return std::min(a: MaxNumVGPRs, b: AddressableNumVGPRs); |
1226 | } |
1227 | |
1228 | unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, |
1229 | std::optional<bool> EnableWavefrontSize32) { |
1230 | return getGranulatedNumRegisterBlocks( |
1231 | NumRegs: NumVGPRs, Granule: getVGPREncodingGranule(STI, EnableWavefrontSize32)) - |
1232 | 1; |
1233 | } |
1234 | |
1235 | unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, |
1236 | unsigned NumVGPRs, |
1237 | std::optional<bool> EnableWavefrontSize32) { |
1238 | return getGranulatedNumRegisterBlocks( |
1239 | NumRegs: NumVGPRs, Granule: getVGPRAllocGranule(STI, EnableWavefrontSize32)); |
1240 | } |
1241 | } // end namespace IsaInfo |
1242 | |
1243 | void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, |
1244 | const MCSubtargetInfo *STI) { |
1245 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1246 | KernelCode.amd_kernel_code_version_major = 1; |
1247 | KernelCode.amd_kernel_code_version_minor = 2; |
1248 | KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU |
1249 | KernelCode.amd_machine_version_major = Version.Major; |
1250 | KernelCode.amd_machine_version_minor = Version.Minor; |
1251 | KernelCode.amd_machine_version_stepping = Version.Stepping; |
1252 | KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t); |
1253 | if (STI->getFeatureBits().test(I: FeatureWavefrontSize32)) { |
1254 | KernelCode.wavefront_size = 5; |
1255 | KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; |
1256 | } else { |
1257 | KernelCode.wavefront_size = 6; |
1258 | } |
1259 | |
1260 | // If the code object does not support indirect functions, then the value must |
1261 | // be 0xffffffff. |
1262 | KernelCode.call_convention = -1; |
1263 | |
1264 | // These alignment values are specified in powers of two, so alignment = |
1265 | // 2^n. The minimum alignment is 2^4 = 16. |
1266 | KernelCode.kernarg_segment_alignment = 4; |
1267 | KernelCode.group_segment_alignment = 4; |
1268 | KernelCode.private_segment_alignment = 4; |
1269 | |
1270 | if (Version.Major >= 10) { |
1271 | KernelCode.compute_pgm_resource_registers |= |
1272 | S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | |
1273 | S_00B848_MEM_ORDERED(1); |
1274 | } |
1275 | } |
1276 | |
1277 | bool isGroupSegment(const GlobalValue *GV) { |
1278 | return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; |
1279 | } |
1280 | |
1281 | bool isGlobalSegment(const GlobalValue *GV) { |
1282 | return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; |
1283 | } |
1284 | |
1285 | bool isReadOnlySegment(const GlobalValue *GV) { |
1286 | unsigned AS = GV->getAddressSpace(); |
1287 | return AS == AMDGPUAS::CONSTANT_ADDRESS || |
1288 | AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; |
1289 | } |
1290 | |
1291 | bool shouldEmitConstantsToTextSection(const Triple &TT) { |
1292 | return TT.getArch() == Triple::r600; |
1293 | } |
1294 | |
1295 | std::pair<unsigned, unsigned> |
1296 | getIntegerPairAttribute(const Function &F, StringRef Name, |
1297 | std::pair<unsigned, unsigned> Default, |
1298 | bool OnlyFirstRequired) { |
1299 | Attribute A = F.getFnAttribute(Kind: Name); |
1300 | if (!A.isStringAttribute()) |
1301 | return Default; |
1302 | |
1303 | LLVMContext &Ctx = F.getContext(); |
1304 | std::pair<unsigned, unsigned> Ints = Default; |
1305 | std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(Separator: ','); |
1306 | if (Strs.first.trim().getAsInteger(Radix: 0, Result&: Ints.first)) { |
1307 | Ctx.emitError(ErrorStr: "can't parse first integer attribute " + Name); |
1308 | return Default; |
1309 | } |
1310 | if (Strs.second.trim().getAsInteger(Radix: 0, Result&: Ints.second)) { |
1311 | if (!OnlyFirstRequired || !Strs.second.trim().empty()) { |
1312 | Ctx.emitError(ErrorStr: "can't parse second integer attribute " + Name); |
1313 | return Default; |
1314 | } |
1315 | } |
1316 | |
1317 | return Ints; |
1318 | } |
1319 | |
1320 | SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name, |
1321 | unsigned Size) { |
1322 | assert(Size > 2); |
1323 | SmallVector<unsigned> Default(Size, 0); |
1324 | |
1325 | Attribute A = F.getFnAttribute(Kind: Name); |
1326 | if (!A.isStringAttribute()) |
1327 | return Default; |
1328 | |
1329 | SmallVector<unsigned> Vals(Size, 0); |
1330 | |
1331 | LLVMContext &Ctx = F.getContext(); |
1332 | |
1333 | StringRef S = A.getValueAsString(); |
1334 | unsigned i = 0; |
1335 | for (; !S.empty() && i < Size; i++) { |
1336 | std::pair<StringRef, StringRef> Strs = S.split(Separator: ','); |
1337 | unsigned IntVal; |
1338 | if (Strs.first.trim().getAsInteger(Radix: 0, Result&: IntVal)) { |
1339 | Ctx.emitError(ErrorStr: "can't parse integer attribute " + Strs.first + " in " + |
1340 | Name); |
1341 | return Default; |
1342 | } |
1343 | Vals[i] = IntVal; |
1344 | S = Strs.second; |
1345 | } |
1346 | |
1347 | if (!S.empty() || i < Size) { |
1348 | Ctx.emitError(ErrorStr: "attribute " + Name + |
1349 | " has incorrect number of integers; expected " + |
1350 | llvm::utostr(X: Size)); |
1351 | return Default; |
1352 | } |
1353 | return Vals; |
1354 | } |
1355 | |
1356 | unsigned getVmcntBitMask(const IsaVersion &Version) { |
1357 | return (1 << (getVmcntBitWidthLo(VersionMajor: Version.Major) + |
1358 | getVmcntBitWidthHi(VersionMajor: Version.Major))) - |
1359 | 1; |
1360 | } |
1361 | |
1362 | unsigned getLoadcntBitMask(const IsaVersion &Version) { |
1363 | return (1 << getLoadcntBitWidth(VersionMajor: Version.Major)) - 1; |
1364 | } |
1365 | |
1366 | unsigned getSamplecntBitMask(const IsaVersion &Version) { |
1367 | return (1 << getSamplecntBitWidth(VersionMajor: Version.Major)) - 1; |
1368 | } |
1369 | |
1370 | unsigned getBvhcntBitMask(const IsaVersion &Version) { |
1371 | return (1 << getBvhcntBitWidth(VersionMajor: Version.Major)) - 1; |
1372 | } |
1373 | |
1374 | unsigned getExpcntBitMask(const IsaVersion &Version) { |
1375 | return (1 << getExpcntBitWidth(VersionMajor: Version.Major)) - 1; |
1376 | } |
1377 | |
1378 | unsigned getLgkmcntBitMask(const IsaVersion &Version) { |
1379 | return (1 << getLgkmcntBitWidth(VersionMajor: Version.Major)) - 1; |
1380 | } |
1381 | |
1382 | unsigned getDscntBitMask(const IsaVersion &Version) { |
1383 | return (1 << getDscntBitWidth(VersionMajor: Version.Major)) - 1; |
1384 | } |
1385 | |
1386 | unsigned getKmcntBitMask(const IsaVersion &Version) { |
1387 | return (1 << getKmcntBitWidth(VersionMajor: Version.Major)) - 1; |
1388 | } |
1389 | |
1390 | unsigned getStorecntBitMask(const IsaVersion &Version) { |
1391 | return (1 << getStorecntBitWidth(VersionMajor: Version.Major)) - 1; |
1392 | } |
1393 | |
1394 | unsigned getWaitcntBitMask(const IsaVersion &Version) { |
1395 | unsigned VmcntLo = getBitMask(Shift: getVmcntBitShiftLo(VersionMajor: Version.Major), |
1396 | Width: getVmcntBitWidthLo(VersionMajor: Version.Major)); |
1397 | unsigned Expcnt = getBitMask(Shift: getExpcntBitShift(VersionMajor: Version.Major), |
1398 | Width: getExpcntBitWidth(VersionMajor: Version.Major)); |
1399 | unsigned Lgkmcnt = getBitMask(Shift: getLgkmcntBitShift(VersionMajor: Version.Major), |
1400 | Width: getLgkmcntBitWidth(VersionMajor: Version.Major)); |
1401 | unsigned VmcntHi = getBitMask(Shift: getVmcntBitShiftHi(VersionMajor: Version.Major), |
1402 | Width: getVmcntBitWidthHi(VersionMajor: Version.Major)); |
1403 | return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; |
1404 | } |
1405 | |
1406 | unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { |
1407 | unsigned VmcntLo = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major), |
1408 | Width: getVmcntBitWidthLo(VersionMajor: Version.Major)); |
1409 | unsigned VmcntHi = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftHi(VersionMajor: Version.Major), |
1410 | Width: getVmcntBitWidthHi(VersionMajor: Version.Major)); |
1411 | return VmcntLo | VmcntHi << getVmcntBitWidthLo(VersionMajor: Version.Major); |
1412 | } |
1413 | |
1414 | unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { |
1415 | return unpackBits(Src: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major), |
1416 | Width: getExpcntBitWidth(VersionMajor: Version.Major)); |
1417 | } |
1418 | |
1419 | unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { |
1420 | return unpackBits(Src: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major), |
1421 | Width: getLgkmcntBitWidth(VersionMajor: Version.Major)); |
1422 | } |
1423 | |
1424 | void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, |
1425 | unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { |
1426 | Vmcnt = decodeVmcnt(Version, Waitcnt); |
1427 | Expcnt = decodeExpcnt(Version, Waitcnt); |
1428 | Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); |
1429 | } |
1430 | |
1431 | Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { |
1432 | Waitcnt Decoded; |
1433 | Decoded.LoadCnt = decodeVmcnt(Version, Waitcnt: Encoded); |
1434 | Decoded.ExpCnt = decodeExpcnt(Version, Waitcnt: Encoded); |
1435 | Decoded.DsCnt = decodeLgkmcnt(Version, Waitcnt: Encoded); |
1436 | return Decoded; |
1437 | } |
1438 | |
1439 | unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, |
1440 | unsigned Vmcnt) { |
1441 | Waitcnt = packBits(Src: Vmcnt, Dst: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major), |
1442 | Width: getVmcntBitWidthLo(VersionMajor: Version.Major)); |
1443 | return packBits(Src: Vmcnt >> getVmcntBitWidthLo(VersionMajor: Version.Major), Dst: Waitcnt, |
1444 | Shift: getVmcntBitShiftHi(VersionMajor: Version.Major), |
1445 | Width: getVmcntBitWidthHi(VersionMajor: Version.Major)); |
1446 | } |
1447 | |
1448 | unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, |
1449 | unsigned Expcnt) { |
1450 | return packBits(Src: Expcnt, Dst: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major), |
1451 | Width: getExpcntBitWidth(VersionMajor: Version.Major)); |
1452 | } |
1453 | |
1454 | unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, |
1455 | unsigned Lgkmcnt) { |
1456 | return packBits(Src: Lgkmcnt, Dst: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major), |
1457 | Width: getLgkmcntBitWidth(VersionMajor: Version.Major)); |
1458 | } |
1459 | |
1460 | unsigned encodeWaitcnt(const IsaVersion &Version, |
1461 | unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { |
1462 | unsigned Waitcnt = getWaitcntBitMask(Version); |
1463 | Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); |
1464 | Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); |
1465 | Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); |
1466 | return Waitcnt; |
1467 | } |
1468 | |
1469 | unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { |
1470 | return encodeWaitcnt(Version, Vmcnt: Decoded.LoadCnt, Expcnt: Decoded.ExpCnt, Lgkmcnt: Decoded.DsCnt); |
1471 | } |
1472 | |
1473 | static unsigned getCombinedCountBitMask(const IsaVersion &Version, |
1474 | bool IsStore) { |
1475 | unsigned Dscnt = getBitMask(Shift: getDscntBitShift(VersionMajor: Version.Major), |
1476 | Width: getDscntBitWidth(VersionMajor: Version.Major)); |
1477 | if (IsStore) { |
1478 | unsigned Storecnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1479 | Width: getStorecntBitWidth(VersionMajor: Version.Major)); |
1480 | return Dscnt | Storecnt; |
1481 | } |
1482 | unsigned Loadcnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1483 | Width: getLoadcntBitWidth(VersionMajor: Version.Major)); |
1484 | return Dscnt | Loadcnt; |
1485 | } |
1486 | |
1487 | Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) { |
1488 | Waitcnt Decoded; |
1489 | Decoded.LoadCnt = |
1490 | unpackBits(Src: LoadcntDscnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1491 | Width: getLoadcntBitWidth(VersionMajor: Version.Major)); |
1492 | Decoded.DsCnt = unpackBits(Src: LoadcntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major), |
1493 | Width: getDscntBitWidth(VersionMajor: Version.Major)); |
1494 | return Decoded; |
1495 | } |
1496 | |
1497 | Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) { |
1498 | Waitcnt Decoded; |
1499 | Decoded.StoreCnt = |
1500 | unpackBits(Src: StorecntDscnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1501 | Width: getStorecntBitWidth(VersionMajor: Version.Major)); |
1502 | Decoded.DsCnt = unpackBits(Src: StorecntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major), |
1503 | Width: getDscntBitWidth(VersionMajor: Version.Major)); |
1504 | return Decoded; |
1505 | } |
1506 | |
1507 | static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, |
1508 | unsigned Loadcnt) { |
1509 | return packBits(Src: Loadcnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1510 | Width: getLoadcntBitWidth(VersionMajor: Version.Major)); |
1511 | } |
1512 | |
1513 | static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, |
1514 | unsigned Storecnt) { |
1515 | return packBits(Src: Storecnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1516 | Width: getStorecntBitWidth(VersionMajor: Version.Major)); |
1517 | } |
1518 | |
1519 | static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, |
1520 | unsigned Dscnt) { |
1521 | return packBits(Src: Dscnt, Dst: Waitcnt, Shift: getDscntBitShift(VersionMajor: Version.Major), |
1522 | Width: getDscntBitWidth(VersionMajor: Version.Major)); |
1523 | } |
1524 | |
1525 | static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, |
1526 | unsigned Dscnt) { |
1527 | unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: false); |
1528 | Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt); |
1529 | Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); |
1530 | return Waitcnt; |
1531 | } |
1532 | |
1533 | unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) { |
1534 | return encodeLoadcntDscnt(Version, Loadcnt: Decoded.LoadCnt, Dscnt: Decoded.DsCnt); |
1535 | } |
1536 | |
1537 | static unsigned encodeStorecntDscnt(const IsaVersion &Version, |
1538 | unsigned Storecnt, unsigned Dscnt) { |
1539 | unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: true); |
1540 | Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt); |
1541 | Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); |
1542 | return Waitcnt; |
1543 | } |
1544 | |
1545 | unsigned encodeStorecntDscnt(const IsaVersion &Version, |
1546 | const Waitcnt &Decoded) { |
1547 | return encodeStorecntDscnt(Version, Storecnt: Decoded.StoreCnt, Dscnt: Decoded.DsCnt); |
1548 | } |
1549 | |
1550 | //===----------------------------------------------------------------------===// |
1551 | // Custom Operand Values |
1552 | //===----------------------------------------------------------------------===// |
1553 | |
1554 | static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, |
1555 | int Size, |
1556 | const MCSubtargetInfo &STI) { |
1557 | unsigned Enc = 0; |
1558 | for (int Idx = 0; Idx < Size; ++Idx) { |
1559 | const auto &Op = Opr[Idx]; |
1560 | if (Op.isSupported(STI)) |
1561 | Enc |= Op.encode(Val: Op.Default); |
1562 | } |
1563 | return Enc; |
1564 | } |
1565 | |
1566 | static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, |
1567 | int Size, unsigned Code, |
1568 | bool &HasNonDefaultVal, |
1569 | const MCSubtargetInfo &STI) { |
1570 | unsigned UsedOprMask = 0; |
1571 | HasNonDefaultVal = false; |
1572 | for (int Idx = 0; Idx < Size; ++Idx) { |
1573 | const auto &Op = Opr[Idx]; |
1574 | if (!Op.isSupported(STI)) |
1575 | continue; |
1576 | UsedOprMask |= Op.getMask(); |
1577 | unsigned Val = Op.decode(Code); |
1578 | if (!Op.isValid(Val)) |
1579 | return false; |
1580 | HasNonDefaultVal |= (Val != Op.Default); |
1581 | } |
1582 | return (Code & ~UsedOprMask) == 0; |
1583 | } |
1584 | |
1585 | static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, |
1586 | unsigned Code, int &Idx, StringRef &Name, |
1587 | unsigned &Val, bool &IsDefault, |
1588 | const MCSubtargetInfo &STI) { |
1589 | while (Idx < Size) { |
1590 | const auto &Op = Opr[Idx++]; |
1591 | if (Op.isSupported(STI)) { |
1592 | Name = Op.Name; |
1593 | Val = Op.decode(Code); |
1594 | IsDefault = (Val == Op.Default); |
1595 | return true; |
1596 | } |
1597 | } |
1598 | |
1599 | return false; |
1600 | } |
1601 | |
1602 | static int encodeCustomOperandVal(const CustomOperandVal &Op, |
1603 | int64_t InputVal) { |
1604 | if (InputVal < 0 || InputVal > Op.Max) |
1605 | return OPR_VAL_INVALID; |
1606 | return Op.encode(Val: InputVal); |
1607 | } |
1608 | |
1609 | static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, |
1610 | const StringRef Name, int64_t InputVal, |
1611 | unsigned &UsedOprMask, |
1612 | const MCSubtargetInfo &STI) { |
1613 | int InvalidId = OPR_ID_UNKNOWN; |
1614 | for (int Idx = 0; Idx < Size; ++Idx) { |
1615 | const auto &Op = Opr[Idx]; |
1616 | if (Op.Name == Name) { |
1617 | if (!Op.isSupported(STI)) { |
1618 | InvalidId = OPR_ID_UNSUPPORTED; |
1619 | continue; |
1620 | } |
1621 | auto OprMask = Op.getMask(); |
1622 | if (OprMask & UsedOprMask) |
1623 | return OPR_ID_DUPLICATE; |
1624 | UsedOprMask |= OprMask; |
1625 | return encodeCustomOperandVal(Op, InputVal); |
1626 | } |
1627 | } |
1628 | return InvalidId; |
1629 | } |
1630 | |
1631 | //===----------------------------------------------------------------------===// |
1632 | // DepCtr |
1633 | //===----------------------------------------------------------------------===// |
1634 | |
1635 | namespace DepCtr { |
1636 | |
1637 | int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { |
1638 | static int Default = -1; |
1639 | if (Default == -1) |
1640 | Default = getDefaultCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, STI); |
1641 | return Default; |
1642 | } |
1643 | |
1644 | bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, |
1645 | const MCSubtargetInfo &STI) { |
1646 | return isSymbolicCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code, |
1647 | HasNonDefaultVal, STI); |
1648 | } |
1649 | |
1650 | bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, |
1651 | bool &IsDefault, const MCSubtargetInfo &STI) { |
1652 | return decodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code, Idx&: Id, Name, Val, |
1653 | IsDefault, STI); |
1654 | } |
1655 | |
1656 | int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, |
1657 | const MCSubtargetInfo &STI) { |
1658 | return encodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Name, InputVal: Val, UsedOprMask, |
1659 | STI); |
1660 | } |
1661 | |
1662 | unsigned decodeFieldVmVsrc(unsigned Encoded) { |
1663 | return unpackBits(Src: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth()); |
1664 | } |
1665 | |
1666 | unsigned decodeFieldVaVdst(unsigned Encoded) { |
1667 | return unpackBits(Src: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth()); |
1668 | } |
1669 | |
1670 | unsigned decodeFieldSaSdst(unsigned Encoded) { |
1671 | return unpackBits(Src: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth()); |
1672 | } |
1673 | |
1674 | unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { |
1675 | return packBits(Src: VmVsrc, Dst: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth()); |
1676 | } |
1677 | |
1678 | unsigned encodeFieldVmVsrc(unsigned VmVsrc) { |
1679 | return encodeFieldVmVsrc(Encoded: 0xffff, VmVsrc); |
1680 | } |
1681 | |
1682 | unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) { |
1683 | return packBits(Src: VaVdst, Dst: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth()); |
1684 | } |
1685 | |
1686 | unsigned encodeFieldVaVdst(unsigned VaVdst) { |
1687 | return encodeFieldVaVdst(Encoded: 0xffff, VaVdst); |
1688 | } |
1689 | |
1690 | unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) { |
1691 | return packBits(Src: SaSdst, Dst: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth()); |
1692 | } |
1693 | |
1694 | unsigned encodeFieldSaSdst(unsigned SaSdst) { |
1695 | return encodeFieldSaSdst(Encoded: 0xffff, SaSdst); |
1696 | } |
1697 | |
1698 | } // namespace DepCtr |
1699 | |
1700 | //===----------------------------------------------------------------------===// |
1701 | // exp tgt |
1702 | //===----------------------------------------------------------------------===// |
1703 | |
1704 | namespace Exp { |
1705 | |
1706 | struct ExpTgt { |
1707 | StringLiteral Name; |
1708 | unsigned Tgt; |
1709 | unsigned MaxIndex; |
1710 | }; |
1711 | |
1712 | static constexpr ExpTgt ExpTgtInfo[] = { |
1713 | {.Name: {"null" }, .Tgt: ET_NULL, .MaxIndex: ET_NULL_MAX_IDX}, |
1714 | {.Name: {"mrtz" }, .Tgt: ET_MRTZ, .MaxIndex: ET_MRTZ_MAX_IDX}, |
1715 | {.Name: {"prim" }, .Tgt: ET_PRIM, .MaxIndex: ET_PRIM_MAX_IDX}, |
1716 | {.Name: {"mrt" }, .Tgt: ET_MRT0, .MaxIndex: ET_MRT_MAX_IDX}, |
1717 | {.Name: {"pos" }, .Tgt: ET_POS0, .MaxIndex: ET_POS_MAX_IDX}, |
1718 | {.Name: {"dual_src_blend" }, .Tgt: ET_DUAL_SRC_BLEND0, .MaxIndex: ET_DUAL_SRC_BLEND_MAX_IDX}, |
1719 | {.Name: {"param" }, .Tgt: ET_PARAM0, .MaxIndex: ET_PARAM_MAX_IDX}, |
1720 | }; |
1721 | |
1722 | bool getTgtName(unsigned Id, StringRef &Name, int &Index) { |
1723 | for (const ExpTgt &Val : ExpTgtInfo) { |
1724 | if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { |
1725 | Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); |
1726 | Name = Val.Name; |
1727 | return true; |
1728 | } |
1729 | } |
1730 | return false; |
1731 | } |
1732 | |
1733 | unsigned getTgtId(const StringRef Name) { |
1734 | |
1735 | for (const ExpTgt &Val : ExpTgtInfo) { |
1736 | if (Val.MaxIndex == 0 && Name == Val.Name) |
1737 | return Val.Tgt; |
1738 | |
1739 | if (Val.MaxIndex > 0 && Name.starts_with(Prefix: Val.Name)) { |
1740 | StringRef Suffix = Name.drop_front(N: Val.Name.size()); |
1741 | |
1742 | unsigned Id; |
1743 | if (Suffix.getAsInteger(Radix: 10, Result&: Id) || Id > Val.MaxIndex) |
1744 | return ET_INVALID; |
1745 | |
1746 | // Disable leading zeroes |
1747 | if (Suffix.size() > 1 && Suffix[0] == '0') |
1748 | return ET_INVALID; |
1749 | |
1750 | return Val.Tgt + Id; |
1751 | } |
1752 | } |
1753 | return ET_INVALID; |
1754 | } |
1755 | |
1756 | bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { |
1757 | switch (Id) { |
1758 | case ET_NULL: |
1759 | return !isGFX11Plus(STI); |
1760 | case ET_POS4: |
1761 | case ET_PRIM: |
1762 | return isGFX10Plus(STI); |
1763 | case ET_DUAL_SRC_BLEND0: |
1764 | case ET_DUAL_SRC_BLEND1: |
1765 | return isGFX11Plus(STI); |
1766 | default: |
1767 | if (Id >= ET_PARAM0 && Id <= ET_PARAM31) |
1768 | return !isGFX11Plus(STI); |
1769 | return true; |
1770 | } |
1771 | } |
1772 | |
1773 | } // namespace Exp |
1774 | |
1775 | //===----------------------------------------------------------------------===// |
1776 | // MTBUF Format |
1777 | //===----------------------------------------------------------------------===// |
1778 | |
1779 | namespace MTBUFFormat { |
1780 | |
1781 | int64_t getDfmt(const StringRef Name) { |
1782 | for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { |
1783 | if (Name == DfmtSymbolic[Id]) |
1784 | return Id; |
1785 | } |
1786 | return DFMT_UNDEF; |
1787 | } |
1788 | |
1789 | StringRef getDfmtName(unsigned Id) { |
1790 | assert(Id <= DFMT_MAX); |
1791 | return DfmtSymbolic[Id]; |
1792 | } |
1793 | |
1794 | static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { |
1795 | if (isSI(STI) || isCI(STI)) |
1796 | return NfmtSymbolicSICI; |
1797 | if (isVI(STI) || isGFX9(STI)) |
1798 | return NfmtSymbolicVI; |
1799 | return NfmtSymbolicGFX10; |
1800 | } |
1801 | |
1802 | int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { |
1803 | auto lookupTable = getNfmtLookupTable(STI); |
1804 | for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { |
1805 | if (Name == lookupTable[Id]) |
1806 | return Id; |
1807 | } |
1808 | return NFMT_UNDEF; |
1809 | } |
1810 | |
1811 | StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { |
1812 | assert(Id <= NFMT_MAX); |
1813 | return getNfmtLookupTable(STI)[Id]; |
1814 | } |
1815 | |
1816 | bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { |
1817 | unsigned Dfmt; |
1818 | unsigned Nfmt; |
1819 | decodeDfmtNfmt(Format: Id, Dfmt, Nfmt); |
1820 | return isValidNfmt(Val: Nfmt, STI); |
1821 | } |
1822 | |
1823 | bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { |
1824 | return !getNfmtName(Id, STI).empty(); |
1825 | } |
1826 | |
1827 | int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { |
1828 | return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); |
1829 | } |
1830 | |
1831 | void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { |
1832 | Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; |
1833 | Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; |
1834 | } |
1835 | |
1836 | int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { |
1837 | if (isGFX11Plus(STI)) { |
1838 | for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { |
1839 | if (Name == UfmtSymbolicGFX11[Id]) |
1840 | return Id; |
1841 | } |
1842 | } else { |
1843 | for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { |
1844 | if (Name == UfmtSymbolicGFX10[Id]) |
1845 | return Id; |
1846 | } |
1847 | } |
1848 | return UFMT_UNDEF; |
1849 | } |
1850 | |
1851 | StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { |
1852 | if(isValidUnifiedFormat(Val: Id, STI)) |
1853 | return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; |
1854 | return "" ; |
1855 | } |
1856 | |
1857 | bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { |
1858 | return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; |
1859 | } |
1860 | |
1861 | int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, |
1862 | const MCSubtargetInfo &STI) { |
1863 | int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); |
1864 | if (isGFX11Plus(STI)) { |
1865 | for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { |
1866 | if (Fmt == DfmtNfmt2UFmtGFX11[Id]) |
1867 | return Id; |
1868 | } |
1869 | } else { |
1870 | for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { |
1871 | if (Fmt == DfmtNfmt2UFmtGFX10[Id]) |
1872 | return Id; |
1873 | } |
1874 | } |
1875 | return UFMT_UNDEF; |
1876 | } |
1877 | |
1878 | bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { |
1879 | return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); |
1880 | } |
1881 | |
1882 | unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { |
1883 | if (isGFX10Plus(STI)) |
1884 | return UFMT_DEFAULT; |
1885 | return DFMT_NFMT_DEFAULT; |
1886 | } |
1887 | |
1888 | } // namespace MTBUFFormat |
1889 | |
1890 | //===----------------------------------------------------------------------===// |
1891 | // SendMsg |
1892 | //===----------------------------------------------------------------------===// |
1893 | |
1894 | namespace SendMsg { |
1895 | |
1896 | static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { |
1897 | return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; |
1898 | } |
1899 | |
1900 | bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { |
1901 | return (MsgId & ~(getMsgIdMask(STI))) == 0; |
1902 | } |
1903 | |
1904 | bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, |
1905 | bool Strict) { |
1906 | assert(isValidMsgId(MsgId, STI)); |
1907 | |
1908 | if (!Strict) |
1909 | return 0 <= OpId && isUInt<OP_WIDTH_>(x: OpId); |
1910 | |
1911 | if (msgRequiresOp(MsgId, STI)) { |
1912 | if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP) |
1913 | return false; |
1914 | |
1915 | return !getMsgOpName(MsgId, Encoding: OpId, STI).empty(); |
1916 | } |
1917 | |
1918 | return OpId == OP_NONE_; |
1919 | } |
1920 | |
1921 | bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, |
1922 | const MCSubtargetInfo &STI, bool Strict) { |
1923 | assert(isValidMsgOp(MsgId, OpId, STI, Strict)); |
1924 | |
1925 | if (!Strict) |
1926 | return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(x: StreamId); |
1927 | |
1928 | if (!isGFX11Plus(STI)) { |
1929 | switch (MsgId) { |
1930 | case ID_GS_PreGFX11: |
1931 | return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; |
1932 | case ID_GS_DONE_PreGFX11: |
1933 | return (OpId == OP_GS_NOP) ? |
1934 | (StreamId == STREAM_ID_NONE_) : |
1935 | (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); |
1936 | } |
1937 | } |
1938 | return StreamId == STREAM_ID_NONE_; |
1939 | } |
1940 | |
1941 | bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { |
1942 | return MsgId == ID_SYSMSG || |
1943 | (!isGFX11Plus(STI) && |
1944 | (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); |
1945 | } |
1946 | |
1947 | bool msgSupportsStream(int64_t MsgId, int64_t OpId, |
1948 | const MCSubtargetInfo &STI) { |
1949 | return !isGFX11Plus(STI) && |
1950 | (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && |
1951 | OpId != OP_GS_NOP; |
1952 | } |
1953 | |
1954 | void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, |
1955 | uint16_t &StreamId, const MCSubtargetInfo &STI) { |
1956 | MsgId = Val & getMsgIdMask(STI); |
1957 | if (isGFX11Plus(STI)) { |
1958 | OpId = 0; |
1959 | StreamId = 0; |
1960 | } else { |
1961 | OpId = (Val & OP_MASK_) >> OP_SHIFT_; |
1962 | StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; |
1963 | } |
1964 | } |
1965 | |
1966 | uint64_t encodeMsg(uint64_t MsgId, |
1967 | uint64_t OpId, |
1968 | uint64_t StreamId) { |
1969 | return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); |
1970 | } |
1971 | |
1972 | } // namespace SendMsg |
1973 | |
1974 | //===----------------------------------------------------------------------===// |
1975 | // |
1976 | //===----------------------------------------------------------------------===// |
1977 | |
1978 | unsigned getInitialPSInputAddr(const Function &F) { |
1979 | return F.getFnAttributeAsParsedInteger(Kind: "InitialPSInputAddr" , Default: 0); |
1980 | } |
1981 | |
1982 | bool getHasColorExport(const Function &F) { |
1983 | // As a safe default always respond as if PS has color exports. |
1984 | return F.getFnAttributeAsParsedInteger( |
1985 | Kind: "amdgpu-color-export" , |
1986 | Default: F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; |
1987 | } |
1988 | |
1989 | bool getHasDepthExport(const Function &F) { |
1990 | return F.getFnAttributeAsParsedInteger(Kind: "amdgpu-depth-export" , Default: 0) != 0; |
1991 | } |
1992 | |
1993 | bool isShader(CallingConv::ID cc) { |
1994 | switch(cc) { |
1995 | case CallingConv::AMDGPU_VS: |
1996 | case CallingConv::AMDGPU_LS: |
1997 | case CallingConv::AMDGPU_HS: |
1998 | case CallingConv::AMDGPU_ES: |
1999 | case CallingConv::AMDGPU_GS: |
2000 | case CallingConv::AMDGPU_PS: |
2001 | case CallingConv::AMDGPU_CS_Chain: |
2002 | case CallingConv::AMDGPU_CS_ChainPreserve: |
2003 | case CallingConv::AMDGPU_CS: |
2004 | return true; |
2005 | default: |
2006 | return false; |
2007 | } |
2008 | } |
2009 | |
2010 | bool isGraphics(CallingConv::ID cc) { |
2011 | return isShader(cc) || cc == CallingConv::AMDGPU_Gfx; |
2012 | } |
2013 | |
2014 | bool isCompute(CallingConv::ID cc) { |
2015 | return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS; |
2016 | } |
2017 | |
2018 | bool isEntryFunctionCC(CallingConv::ID CC) { |
2019 | switch (CC) { |
2020 | case CallingConv::AMDGPU_KERNEL: |
2021 | case CallingConv::SPIR_KERNEL: |
2022 | case CallingConv::AMDGPU_VS: |
2023 | case CallingConv::AMDGPU_GS: |
2024 | case CallingConv::AMDGPU_PS: |
2025 | case CallingConv::AMDGPU_CS: |
2026 | case CallingConv::AMDGPU_ES: |
2027 | case CallingConv::AMDGPU_HS: |
2028 | case CallingConv::AMDGPU_LS: |
2029 | return true; |
2030 | default: |
2031 | return false; |
2032 | } |
2033 | } |
2034 | |
2035 | bool isModuleEntryFunctionCC(CallingConv::ID CC) { |
2036 | switch (CC) { |
2037 | case CallingConv::AMDGPU_Gfx: |
2038 | return true; |
2039 | default: |
2040 | return isEntryFunctionCC(CC) || isChainCC(CC); |
2041 | } |
2042 | } |
2043 | |
2044 | bool isChainCC(CallingConv::ID CC) { |
2045 | switch (CC) { |
2046 | case CallingConv::AMDGPU_CS_Chain: |
2047 | case CallingConv::AMDGPU_CS_ChainPreserve: |
2048 | return true; |
2049 | default: |
2050 | return false; |
2051 | } |
2052 | } |
2053 | |
2054 | bool isKernelCC(const Function *Func) { |
2055 | return AMDGPU::isModuleEntryFunctionCC(CC: Func->getCallingConv()); |
2056 | } |
2057 | |
2058 | bool hasXNACK(const MCSubtargetInfo &STI) { |
2059 | return STI.hasFeature(Feature: AMDGPU::FeatureXNACK); |
2060 | } |
2061 | |
2062 | bool hasSRAMECC(const MCSubtargetInfo &STI) { |
2063 | return STI.hasFeature(Feature: AMDGPU::FeatureSRAMECC); |
2064 | } |
2065 | |
2066 | bool hasMIMG_R128(const MCSubtargetInfo &STI) { |
2067 | return STI.hasFeature(Feature: AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(Feature: AMDGPU::FeatureR128A16); |
2068 | } |
2069 | |
2070 | bool hasA16(const MCSubtargetInfo &STI) { |
2071 | return STI.hasFeature(Feature: AMDGPU::FeatureA16); |
2072 | } |
2073 | |
2074 | bool hasG16(const MCSubtargetInfo &STI) { |
2075 | return STI.hasFeature(Feature: AMDGPU::FeatureG16); |
2076 | } |
2077 | |
2078 | bool hasPackedD16(const MCSubtargetInfo &STI) { |
2079 | return !STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) && |
2080 | !isSI(STI); |
2081 | } |
2082 | |
2083 | bool hasGDS(const MCSubtargetInfo &STI) { |
2084 | return STI.hasFeature(Feature: AMDGPU::FeatureGDS); |
2085 | } |
2086 | |
2087 | unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) { |
2088 | auto Version = getIsaVersion(GPU: STI.getCPU()); |
2089 | if (Version.Major == 10) |
2090 | return Version.Minor >= 3 ? 13 : 5; |
2091 | if (Version.Major == 11) |
2092 | return 5; |
2093 | if (Version.Major >= 12) |
2094 | return HasSampler ? 4 : 5; |
2095 | return 0; |
2096 | } |
2097 | |
2098 | unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; } |
2099 | |
2100 | bool isSI(const MCSubtargetInfo &STI) { |
2101 | return STI.hasFeature(Feature: AMDGPU::FeatureSouthernIslands); |
2102 | } |
2103 | |
2104 | bool isCI(const MCSubtargetInfo &STI) { |
2105 | return STI.hasFeature(Feature: AMDGPU::FeatureSeaIslands); |
2106 | } |
2107 | |
2108 | bool isVI(const MCSubtargetInfo &STI) { |
2109 | return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands); |
2110 | } |
2111 | |
2112 | bool isGFX9(const MCSubtargetInfo &STI) { |
2113 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX9); |
2114 | } |
2115 | |
2116 | bool isGFX9_GFX10(const MCSubtargetInfo &STI) { |
2117 | return isGFX9(STI) || isGFX10(STI); |
2118 | } |
2119 | |
2120 | bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) { |
2121 | return isGFX9(STI) || isGFX10(STI) || isGFX11(STI); |
2122 | } |
2123 | |
2124 | bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { |
2125 | return isVI(STI) || isGFX9(STI) || isGFX10(STI); |
2126 | } |
2127 | |
2128 | bool isGFX8Plus(const MCSubtargetInfo &STI) { |
2129 | return isVI(STI) || isGFX9Plus(STI); |
2130 | } |
2131 | |
2132 | bool isGFX9Plus(const MCSubtargetInfo &STI) { |
2133 | return isGFX9(STI) || isGFX10Plus(STI); |
2134 | } |
2135 | |
2136 | bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); } |
2137 | |
2138 | bool isGFX10(const MCSubtargetInfo &STI) { |
2139 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX10); |
2140 | } |
2141 | |
2142 | bool isGFX10_GFX11(const MCSubtargetInfo &STI) { |
2143 | return isGFX10(STI) || isGFX11(STI); |
2144 | } |
2145 | |
2146 | bool isGFX10Plus(const MCSubtargetInfo &STI) { |
2147 | return isGFX10(STI) || isGFX11Plus(STI); |
2148 | } |
2149 | |
2150 | bool isGFX11(const MCSubtargetInfo &STI) { |
2151 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX11); |
2152 | } |
2153 | |
2154 | bool isGFX11Plus(const MCSubtargetInfo &STI) { |
2155 | return isGFX11(STI) || isGFX12Plus(STI); |
2156 | } |
2157 | |
2158 | bool isGFX12(const MCSubtargetInfo &STI) { |
2159 | return STI.getFeatureBits()[AMDGPU::FeatureGFX12]; |
2160 | } |
2161 | |
2162 | bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); } |
2163 | |
2164 | bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); } |
2165 | |
2166 | bool isNotGFX11Plus(const MCSubtargetInfo &STI) { |
2167 | return !isGFX11Plus(STI); |
2168 | } |
2169 | |
2170 | bool isNotGFX10Plus(const MCSubtargetInfo &STI) { |
2171 | return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); |
2172 | } |
2173 | |
2174 | bool isGFX10Before1030(const MCSubtargetInfo &STI) { |
2175 | return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); |
2176 | } |
2177 | |
2178 | bool isGCN3Encoding(const MCSubtargetInfo &STI) { |
2179 | return STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding); |
2180 | } |
2181 | |
2182 | bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { |
2183 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_AEncoding); |
2184 | } |
2185 | |
2186 | bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { |
2187 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding); |
2188 | } |
2189 | |
2190 | bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { |
2191 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_3Insts); |
2192 | } |
2193 | |
2194 | bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) { |
2195 | return isGFX10_BEncoding(STI) && !isGFX12Plus(STI); |
2196 | } |
2197 | |
2198 | bool isGFX90A(const MCSubtargetInfo &STI) { |
2199 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts); |
2200 | } |
2201 | |
2202 | bool isGFX940(const MCSubtargetInfo &STI) { |
2203 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts); |
2204 | } |
2205 | |
2206 | bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { |
2207 | return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch); |
2208 | } |
2209 | |
2210 | bool hasMAIInsts(const MCSubtargetInfo &STI) { |
2211 | return STI.hasFeature(Feature: AMDGPU::FeatureMAIInsts); |
2212 | } |
2213 | |
2214 | bool hasVOPD(const MCSubtargetInfo &STI) { |
2215 | return STI.hasFeature(Feature: AMDGPU::FeatureVOPD); |
2216 | } |
2217 | |
2218 | bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) { |
2219 | return STI.hasFeature(Feature: AMDGPU::FeatureDPPSrc1SGPR); |
2220 | } |
2221 | |
2222 | unsigned hasKernargPreload(const MCSubtargetInfo &STI) { |
2223 | return STI.hasFeature(Feature: AMDGPU::FeatureKernargPreload); |
2224 | } |
2225 | |
2226 | int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, |
2227 | int32_t ArgNumVGPR) { |
2228 | if (has90AInsts && ArgNumAGPR) |
2229 | return alignTo(Value: ArgNumVGPR, Align: 4) + ArgNumAGPR; |
2230 | return std::max(a: ArgNumVGPR, b: ArgNumAGPR); |
2231 | } |
2232 | |
2233 | bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { |
2234 | const MCRegisterClass SGPRClass = TRI->getRegClass(i: AMDGPU::SReg_32RegClassID); |
2235 | const unsigned FirstSubReg = TRI->getSubReg(Reg, Idx: AMDGPU::sub0); |
2236 | return SGPRClass.contains(Reg: FirstSubReg != 0 ? FirstSubReg : Reg) || |
2237 | Reg == AMDGPU::SCC; |
2238 | } |
2239 | |
2240 | bool isHi(unsigned Reg, const MCRegisterInfo &MRI) { |
2241 | return MRI.getEncodingValue(RegNo: Reg) & AMDGPU::HWEncoding::IS_HI; |
2242 | } |
2243 | |
2244 | #define MAP_REG2REG \ |
2245 | using namespace AMDGPU; \ |
2246 | switch(Reg) { \ |
2247 | default: return Reg; \ |
2248 | CASE_CI_VI(FLAT_SCR) \ |
2249 | CASE_CI_VI(FLAT_SCR_LO) \ |
2250 | CASE_CI_VI(FLAT_SCR_HI) \ |
2251 | CASE_VI_GFX9PLUS(TTMP0) \ |
2252 | CASE_VI_GFX9PLUS(TTMP1) \ |
2253 | CASE_VI_GFX9PLUS(TTMP2) \ |
2254 | CASE_VI_GFX9PLUS(TTMP3) \ |
2255 | CASE_VI_GFX9PLUS(TTMP4) \ |
2256 | CASE_VI_GFX9PLUS(TTMP5) \ |
2257 | CASE_VI_GFX9PLUS(TTMP6) \ |
2258 | CASE_VI_GFX9PLUS(TTMP7) \ |
2259 | CASE_VI_GFX9PLUS(TTMP8) \ |
2260 | CASE_VI_GFX9PLUS(TTMP9) \ |
2261 | CASE_VI_GFX9PLUS(TTMP10) \ |
2262 | CASE_VI_GFX9PLUS(TTMP11) \ |
2263 | CASE_VI_GFX9PLUS(TTMP12) \ |
2264 | CASE_VI_GFX9PLUS(TTMP13) \ |
2265 | CASE_VI_GFX9PLUS(TTMP14) \ |
2266 | CASE_VI_GFX9PLUS(TTMP15) \ |
2267 | CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ |
2268 | CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ |
2269 | CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ |
2270 | CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ |
2271 | CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ |
2272 | CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ |
2273 | CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ |
2274 | CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ |
2275 | CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ |
2276 | CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ |
2277 | CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ |
2278 | CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ |
2279 | CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ |
2280 | CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ |
2281 | CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ |
2282 | CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ |
2283 | CASE_GFXPRE11_GFX11PLUS(M0) \ |
2284 | CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ |
2285 | CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ |
2286 | } |
2287 | |
2288 | #define CASE_CI_VI(node) \ |
2289 | assert(!isSI(STI)); \ |
2290 | case node: return isCI(STI) ? node##_ci : node##_vi; |
2291 | |
2292 | #define CASE_VI_GFX9PLUS(node) \ |
2293 | case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; |
2294 | |
2295 | #define CASE_GFXPRE11_GFX11PLUS(node) \ |
2296 | case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; |
2297 | |
2298 | #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ |
2299 | case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; |
2300 | |
2301 | unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { |
2302 | if (STI.getTargetTriple().getArch() == Triple::r600) |
2303 | return Reg; |
2304 | MAP_REG2REG |
2305 | } |
2306 | |
2307 | #undef CASE_CI_VI |
2308 | #undef CASE_VI_GFX9PLUS |
2309 | #undef CASE_GFXPRE11_GFX11PLUS |
2310 | #undef CASE_GFXPRE11_GFX11PLUS_TO |
2311 | |
2312 | #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; |
2313 | #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; |
2314 | #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node; |
2315 | #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) |
2316 | |
2317 | unsigned mc2PseudoReg(unsigned Reg) { |
2318 | MAP_REG2REG |
2319 | } |
2320 | |
2321 | bool isInlineValue(unsigned Reg) { |
2322 | switch (Reg) { |
2323 | case AMDGPU::SRC_SHARED_BASE_LO: |
2324 | case AMDGPU::SRC_SHARED_BASE: |
2325 | case AMDGPU::SRC_SHARED_LIMIT_LO: |
2326 | case AMDGPU::SRC_SHARED_LIMIT: |
2327 | case AMDGPU::SRC_PRIVATE_BASE_LO: |
2328 | case AMDGPU::SRC_PRIVATE_BASE: |
2329 | case AMDGPU::SRC_PRIVATE_LIMIT_LO: |
2330 | case AMDGPU::SRC_PRIVATE_LIMIT: |
2331 | case AMDGPU::SRC_POPS_EXITING_WAVE_ID: |
2332 | return true; |
2333 | case AMDGPU::SRC_VCCZ: |
2334 | case AMDGPU::SRC_EXECZ: |
2335 | case AMDGPU::SRC_SCC: |
2336 | return true; |
2337 | case AMDGPU::SGPR_NULL: |
2338 | return true; |
2339 | default: |
2340 | return false; |
2341 | } |
2342 | } |
2343 | |
2344 | #undef CASE_CI_VI |
2345 | #undef CASE_VI_GFX9PLUS |
2346 | #undef CASE_GFXPRE11_GFX11PLUS |
2347 | #undef CASE_GFXPRE11_GFX11PLUS_TO |
2348 | #undef MAP_REG2REG |
2349 | |
2350 | bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { |
2351 | assert(OpNo < Desc.NumOperands); |
2352 | unsigned OpType = Desc.operands()[OpNo].OperandType; |
2353 | return OpType >= AMDGPU::OPERAND_SRC_FIRST && |
2354 | OpType <= AMDGPU::OPERAND_SRC_LAST; |
2355 | } |
2356 | |
2357 | bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) { |
2358 | assert(OpNo < Desc.NumOperands); |
2359 | unsigned OpType = Desc.operands()[OpNo].OperandType; |
2360 | return OpType >= AMDGPU::OPERAND_KIMM_FIRST && |
2361 | OpType <= AMDGPU::OPERAND_KIMM_LAST; |
2362 | } |
2363 | |
2364 | bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { |
2365 | assert(OpNo < Desc.NumOperands); |
2366 | unsigned OpType = Desc.operands()[OpNo].OperandType; |
2367 | switch (OpType) { |
2368 | case AMDGPU::OPERAND_REG_IMM_FP32: |
2369 | case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: |
2370 | case AMDGPU::OPERAND_REG_IMM_FP64: |
2371 | case AMDGPU::OPERAND_REG_IMM_FP16: |
2372 | case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: |
2373 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
2374 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
2375 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
2376 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
2377 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
2378 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: |
2379 | case AMDGPU::OPERAND_REG_INLINE_AC_FP16: |
2380 | case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: |
2381 | case AMDGPU::OPERAND_REG_IMM_V2FP32: |
2382 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: |
2383 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: |
2384 | return true; |
2385 | default: |
2386 | return false; |
2387 | } |
2388 | } |
2389 | |
2390 | bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { |
2391 | assert(OpNo < Desc.NumOperands); |
2392 | unsigned OpType = Desc.operands()[OpNo].OperandType; |
2393 | return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && |
2394 | OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) || |
2395 | (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && |
2396 | OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST); |
2397 | } |
2398 | |
2399 | // Avoid using MCRegisterClass::getSize, since that function will go away |
2400 | // (move from MC* level to Target* level). Return size in bits. |
2401 | unsigned getRegBitWidth(unsigned RCID) { |
2402 | switch (RCID) { |
2403 | case AMDGPU::SGPR_LO16RegClassID: |
2404 | case AMDGPU::AGPR_LO16RegClassID: |
2405 | return 16; |
2406 | case AMDGPU::SGPR_32RegClassID: |
2407 | case AMDGPU::VGPR_32RegClassID: |
2408 | case AMDGPU::VRegOrLds_32RegClassID: |
2409 | case AMDGPU::AGPR_32RegClassID: |
2410 | case AMDGPU::VS_32RegClassID: |
2411 | case AMDGPU::AV_32RegClassID: |
2412 | case AMDGPU::SReg_32RegClassID: |
2413 | case AMDGPU::SReg_32_XM0RegClassID: |
2414 | case AMDGPU::SRegOrLds_32RegClassID: |
2415 | return 32; |
2416 | case AMDGPU::SGPR_64RegClassID: |
2417 | case AMDGPU::VS_64RegClassID: |
2418 | case AMDGPU::SReg_64RegClassID: |
2419 | case AMDGPU::VReg_64RegClassID: |
2420 | case AMDGPU::AReg_64RegClassID: |
2421 | case AMDGPU::SReg_64_XEXECRegClassID: |
2422 | case AMDGPU::VReg_64_Align2RegClassID: |
2423 | case AMDGPU::AReg_64_Align2RegClassID: |
2424 | case AMDGPU::AV_64RegClassID: |
2425 | case AMDGPU::AV_64_Align2RegClassID: |
2426 | return 64; |
2427 | case AMDGPU::SGPR_96RegClassID: |
2428 | case AMDGPU::SReg_96RegClassID: |
2429 | case AMDGPU::VReg_96RegClassID: |
2430 | case AMDGPU::AReg_96RegClassID: |
2431 | case AMDGPU::VReg_96_Align2RegClassID: |
2432 | case AMDGPU::AReg_96_Align2RegClassID: |
2433 | case AMDGPU::AV_96RegClassID: |
2434 | case AMDGPU::AV_96_Align2RegClassID: |
2435 | return 96; |
2436 | case AMDGPU::SGPR_128RegClassID: |
2437 | case AMDGPU::SReg_128RegClassID: |
2438 | case AMDGPU::VReg_128RegClassID: |
2439 | case AMDGPU::AReg_128RegClassID: |
2440 | case AMDGPU::VReg_128_Align2RegClassID: |
2441 | case AMDGPU::AReg_128_Align2RegClassID: |
2442 | case AMDGPU::AV_128RegClassID: |
2443 | case AMDGPU::AV_128_Align2RegClassID: |
2444 | return 128; |
2445 | case AMDGPU::SGPR_160RegClassID: |
2446 | case AMDGPU::SReg_160RegClassID: |
2447 | case AMDGPU::VReg_160RegClassID: |
2448 | case AMDGPU::AReg_160RegClassID: |
2449 | case AMDGPU::VReg_160_Align2RegClassID: |
2450 | case AMDGPU::AReg_160_Align2RegClassID: |
2451 | case AMDGPU::AV_160RegClassID: |
2452 | case AMDGPU::AV_160_Align2RegClassID: |
2453 | return 160; |
2454 | case AMDGPU::SGPR_192RegClassID: |
2455 | case AMDGPU::SReg_192RegClassID: |
2456 | case AMDGPU::VReg_192RegClassID: |
2457 | case AMDGPU::AReg_192RegClassID: |
2458 | case AMDGPU::VReg_192_Align2RegClassID: |
2459 | case AMDGPU::AReg_192_Align2RegClassID: |
2460 | case AMDGPU::AV_192RegClassID: |
2461 | case AMDGPU::AV_192_Align2RegClassID: |
2462 | return 192; |
2463 | case AMDGPU::SGPR_224RegClassID: |
2464 | case AMDGPU::SReg_224RegClassID: |
2465 | case AMDGPU::VReg_224RegClassID: |
2466 | case AMDGPU::AReg_224RegClassID: |
2467 | case AMDGPU::VReg_224_Align2RegClassID: |
2468 | case AMDGPU::AReg_224_Align2RegClassID: |
2469 | case AMDGPU::AV_224RegClassID: |
2470 | case AMDGPU::AV_224_Align2RegClassID: |
2471 | return 224; |
2472 | case AMDGPU::SGPR_256RegClassID: |
2473 | case AMDGPU::SReg_256RegClassID: |
2474 | case AMDGPU::VReg_256RegClassID: |
2475 | case AMDGPU::AReg_256RegClassID: |
2476 | case AMDGPU::VReg_256_Align2RegClassID: |
2477 | case AMDGPU::AReg_256_Align2RegClassID: |
2478 | case AMDGPU::AV_256RegClassID: |
2479 | case AMDGPU::AV_256_Align2RegClassID: |
2480 | return 256; |
2481 | case AMDGPU::SGPR_288RegClassID: |
2482 | case AMDGPU::SReg_288RegClassID: |
2483 | case AMDGPU::VReg_288RegClassID: |
2484 | case AMDGPU::AReg_288RegClassID: |
2485 | case AMDGPU::VReg_288_Align2RegClassID: |
2486 | case AMDGPU::AReg_288_Align2RegClassID: |
2487 | case AMDGPU::AV_288RegClassID: |
2488 | case AMDGPU::AV_288_Align2RegClassID: |
2489 | return 288; |
2490 | case AMDGPU::SGPR_320RegClassID: |
2491 | case AMDGPU::SReg_320RegClassID: |
2492 | case AMDGPU::VReg_320RegClassID: |
2493 | case AMDGPU::AReg_320RegClassID: |
2494 | case AMDGPU::VReg_320_Align2RegClassID: |
2495 | case AMDGPU::AReg_320_Align2RegClassID: |
2496 | case AMDGPU::AV_320RegClassID: |
2497 | case AMDGPU::AV_320_Align2RegClassID: |
2498 | return 320; |
2499 | case AMDGPU::SGPR_352RegClassID: |
2500 | case AMDGPU::SReg_352RegClassID: |
2501 | case AMDGPU::VReg_352RegClassID: |
2502 | case AMDGPU::AReg_352RegClassID: |
2503 | case AMDGPU::VReg_352_Align2RegClassID: |
2504 | case AMDGPU::AReg_352_Align2RegClassID: |
2505 | case AMDGPU::AV_352RegClassID: |
2506 | case AMDGPU::AV_352_Align2RegClassID: |
2507 | return 352; |
2508 | case AMDGPU::SGPR_384RegClassID: |
2509 | case AMDGPU::SReg_384RegClassID: |
2510 | case AMDGPU::VReg_384RegClassID: |
2511 | case AMDGPU::AReg_384RegClassID: |
2512 | case AMDGPU::VReg_384_Align2RegClassID: |
2513 | case AMDGPU::AReg_384_Align2RegClassID: |
2514 | case AMDGPU::AV_384RegClassID: |
2515 | case AMDGPU::AV_384_Align2RegClassID: |
2516 | return 384; |
2517 | case AMDGPU::SGPR_512RegClassID: |
2518 | case AMDGPU::SReg_512RegClassID: |
2519 | case AMDGPU::VReg_512RegClassID: |
2520 | case AMDGPU::AReg_512RegClassID: |
2521 | case AMDGPU::VReg_512_Align2RegClassID: |
2522 | case AMDGPU::AReg_512_Align2RegClassID: |
2523 | case AMDGPU::AV_512RegClassID: |
2524 | case AMDGPU::AV_512_Align2RegClassID: |
2525 | return 512; |
2526 | case AMDGPU::SGPR_1024RegClassID: |
2527 | case AMDGPU::SReg_1024RegClassID: |
2528 | case AMDGPU::VReg_1024RegClassID: |
2529 | case AMDGPU::AReg_1024RegClassID: |
2530 | case AMDGPU::VReg_1024_Align2RegClassID: |
2531 | case AMDGPU::AReg_1024_Align2RegClassID: |
2532 | case AMDGPU::AV_1024RegClassID: |
2533 | case AMDGPU::AV_1024_Align2RegClassID: |
2534 | return 1024; |
2535 | default: |
2536 | llvm_unreachable("Unexpected register class" ); |
2537 | } |
2538 | } |
2539 | |
2540 | unsigned getRegBitWidth(const MCRegisterClass &RC) { |
2541 | return getRegBitWidth(RCID: RC.getID()); |
2542 | } |
2543 | |
2544 | unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, |
2545 | unsigned OpNo) { |
2546 | assert(OpNo < Desc.NumOperands); |
2547 | unsigned RCID = Desc.operands()[OpNo].RegClass; |
2548 | return getRegBitWidth(RCID) / 8; |
2549 | } |
2550 | |
2551 | bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { |
2552 | if (isInlinableIntLiteral(Literal)) |
2553 | return true; |
2554 | |
2555 | uint64_t Val = static_cast<uint64_t>(Literal); |
2556 | return (Val == llvm::bit_cast<uint64_t>(from: 0.0)) || |
2557 | (Val == llvm::bit_cast<uint64_t>(from: 1.0)) || |
2558 | (Val == llvm::bit_cast<uint64_t>(from: -1.0)) || |
2559 | (Val == llvm::bit_cast<uint64_t>(from: 0.5)) || |
2560 | (Val == llvm::bit_cast<uint64_t>(from: -0.5)) || |
2561 | (Val == llvm::bit_cast<uint64_t>(from: 2.0)) || |
2562 | (Val == llvm::bit_cast<uint64_t>(from: -2.0)) || |
2563 | (Val == llvm::bit_cast<uint64_t>(from: 4.0)) || |
2564 | (Val == llvm::bit_cast<uint64_t>(from: -4.0)) || |
2565 | (Val == 0x3fc45f306dc9c882 && HasInv2Pi); |
2566 | } |
2567 | |
2568 | bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { |
2569 | if (isInlinableIntLiteral(Literal)) |
2570 | return true; |
2571 | |
2572 | // The actual type of the operand does not seem to matter as long |
2573 | // as the bits match one of the inline immediate values. For example: |
2574 | // |
2575 | // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, |
2576 | // so it is a legal inline immediate. |
2577 | // |
2578 | // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in |
2579 | // floating-point, so it is a legal inline immediate. |
2580 | |
2581 | uint32_t Val = static_cast<uint32_t>(Literal); |
2582 | return (Val == llvm::bit_cast<uint32_t>(from: 0.0f)) || |
2583 | (Val == llvm::bit_cast<uint32_t>(from: 1.0f)) || |
2584 | (Val == llvm::bit_cast<uint32_t>(from: -1.0f)) || |
2585 | (Val == llvm::bit_cast<uint32_t>(from: 0.5f)) || |
2586 | (Val == llvm::bit_cast<uint32_t>(from: -0.5f)) || |
2587 | (Val == llvm::bit_cast<uint32_t>(from: 2.0f)) || |
2588 | (Val == llvm::bit_cast<uint32_t>(from: -2.0f)) || |
2589 | (Val == llvm::bit_cast<uint32_t>(from: 4.0f)) || |
2590 | (Val == llvm::bit_cast<uint32_t>(from: -4.0f)) || |
2591 | (Val == 0x3e22f983 && HasInv2Pi); |
2592 | } |
2593 | |
2594 | bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) { |
2595 | if (!HasInv2Pi) |
2596 | return false; |
2597 | if (isInlinableIntLiteral(Literal)) |
2598 | return true; |
2599 | uint16_t Val = static_cast<uint16_t>(Literal); |
2600 | return Val == 0x3F00 || // 0.5 |
2601 | Val == 0xBF00 || // -0.5 |
2602 | Val == 0x3F80 || // 1.0 |
2603 | Val == 0xBF80 || // -1.0 |
2604 | Val == 0x4000 || // 2.0 |
2605 | Val == 0xC000 || // -2.0 |
2606 | Val == 0x4080 || // 4.0 |
2607 | Val == 0xC080 || // -4.0 |
2608 | Val == 0x3E22; // 1.0 / (2.0 * pi) |
2609 | } |
2610 | |
2611 | bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) { |
2612 | return isInlinableLiteral32(Literal, HasInv2Pi); |
2613 | } |
2614 | |
2615 | bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) { |
2616 | if (!HasInv2Pi) |
2617 | return false; |
2618 | if (isInlinableIntLiteral(Literal)) |
2619 | return true; |
2620 | uint16_t Val = static_cast<uint16_t>(Literal); |
2621 | return Val == 0x3C00 || // 1.0 |
2622 | Val == 0xBC00 || // -1.0 |
2623 | Val == 0x3800 || // 0.5 |
2624 | Val == 0xB800 || // -0.5 |
2625 | Val == 0x4000 || // 2.0 |
2626 | Val == 0xC000 || // -2.0 |
2627 | Val == 0x4400 || // 4.0 |
2628 | Val == 0xC400 || // -4.0 |
2629 | Val == 0x3118; // 1/2pi |
2630 | } |
2631 | |
2632 | std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) { |
2633 | // Unfortunately, the Instruction Set Architecture Reference Guide is |
2634 | // misleading about how the inline operands work for (packed) 16-bit |
2635 | // instructions. In a nutshell, the actual HW behavior is: |
2636 | // |
2637 | // - integer encodings (-16 .. 64) are always produced as sign-extended |
2638 | // 32-bit values |
2639 | // - float encodings are produced as: |
2640 | // - for F16 instructions: corresponding half-precision float values in |
2641 | // the LSBs, 0 in the MSBs |
2642 | // - for UI16 instructions: corresponding single-precision float value |
2643 | int32_t Signed = static_cast<int32_t>(Literal); |
2644 | if (Signed >= 0 && Signed <= 64) |
2645 | return 128 + Signed; |
2646 | |
2647 | if (Signed >= -16 && Signed <= -1) |
2648 | return 192 + std::abs(x: Signed); |
2649 | |
2650 | if (IsFloat) { |
2651 | // clang-format off |
2652 | switch (Literal) { |
2653 | case 0x3800: return 240; // 0.5 |
2654 | case 0xB800: return 241; // -0.5 |
2655 | case 0x3C00: return 242; // 1.0 |
2656 | case 0xBC00: return 243; // -1.0 |
2657 | case 0x4000: return 244; // 2.0 |
2658 | case 0xC000: return 245; // -2.0 |
2659 | case 0x4400: return 246; // 4.0 |
2660 | case 0xC400: return 247; // -4.0 |
2661 | case 0x3118: return 248; // 1.0 / (2.0 * pi) |
2662 | default: break; |
2663 | } |
2664 | // clang-format on |
2665 | } else { |
2666 | // clang-format off |
2667 | switch (Literal) { |
2668 | case 0x3F000000: return 240; // 0.5 |
2669 | case 0xBF000000: return 241; // -0.5 |
2670 | case 0x3F800000: return 242; // 1.0 |
2671 | case 0xBF800000: return 243; // -1.0 |
2672 | case 0x40000000: return 244; // 2.0 |
2673 | case 0xC0000000: return 245; // -2.0 |
2674 | case 0x40800000: return 246; // 4.0 |
2675 | case 0xC0800000: return 247; // -4.0 |
2676 | case 0x3E22F983: return 248; // 1.0 / (2.0 * pi) |
2677 | default: break; |
2678 | } |
2679 | // clang-format on |
2680 | } |
2681 | |
2682 | return {}; |
2683 | } |
2684 | |
2685 | // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction |
2686 | // or nullopt. |
2687 | std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) { |
2688 | return getInlineEncodingV216(IsFloat: false, Literal); |
2689 | } |
2690 | |
2691 | // Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction |
2692 | // or nullopt. |
2693 | std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) { |
2694 | int32_t Signed = static_cast<int32_t>(Literal); |
2695 | if (Signed >= 0 && Signed <= 64) |
2696 | return 128 + Signed; |
2697 | |
2698 | if (Signed >= -16 && Signed <= -1) |
2699 | return 192 + std::abs(x: Signed); |
2700 | |
2701 | // clang-format off |
2702 | switch (Literal) { |
2703 | case 0x3F00: return 240; // 0.5 |
2704 | case 0xBF00: return 241; // -0.5 |
2705 | case 0x3F80: return 242; // 1.0 |
2706 | case 0xBF80: return 243; // -1.0 |
2707 | case 0x4000: return 244; // 2.0 |
2708 | case 0xC000: return 245; // -2.0 |
2709 | case 0x4080: return 246; // 4.0 |
2710 | case 0xC080: return 247; // -4.0 |
2711 | case 0x3E22: return 248; // 1.0 / (2.0 * pi) |
2712 | default: break; |
2713 | } |
2714 | // clang-format on |
2715 | |
2716 | return std::nullopt; |
2717 | } |
2718 | |
2719 | // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction |
2720 | // or nullopt. |
2721 | std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) { |
2722 | return getInlineEncodingV216(IsFloat: true, Literal); |
2723 | } |
2724 | |
2725 | // Whether the given literal can be inlined for a V_PK_* instruction. |
2726 | bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { |
2727 | switch (OpType) { |
2728 | case AMDGPU::OPERAND_REG_IMM_V2INT16: |
2729 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: |
2730 | case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: |
2731 | return getInlineEncodingV216(IsFloat: false, Literal).has_value(); |
2732 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
2733 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
2734 | case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: |
2735 | return getInlineEncodingV216(IsFloat: true, Literal).has_value(); |
2736 | case AMDGPU::OPERAND_REG_IMM_V2BF16: |
2737 | case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: |
2738 | case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: |
2739 | return isInlinableLiteralV2BF16(Literal); |
2740 | default: |
2741 | llvm_unreachable("bad packed operand type" ); |
2742 | } |
2743 | } |
2744 | |
2745 | // Whether the given literal can be inlined for a V_PK_*_IU16 instruction. |
2746 | bool isInlinableLiteralV2I16(uint32_t Literal) { |
2747 | return getInlineEncodingV2I16(Literal).has_value(); |
2748 | } |
2749 | |
2750 | // Whether the given literal can be inlined for a V_PK_*_BF16 instruction. |
2751 | bool isInlinableLiteralV2BF16(uint32_t Literal) { |
2752 | return getInlineEncodingV2BF16(Literal).has_value(); |
2753 | } |
2754 | |
2755 | // Whether the given literal can be inlined for a V_PK_*_F16 instruction. |
2756 | bool isInlinableLiteralV2F16(uint32_t Literal) { |
2757 | return getInlineEncodingV2F16(Literal).has_value(); |
2758 | } |
2759 | |
2760 | bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { |
2761 | if (IsFP64) |
2762 | return !(Val & 0xffffffffu); |
2763 | |
2764 | return isUInt<32>(x: Val) || isInt<32>(x: Val); |
2765 | } |
2766 | |
2767 | bool isArgPassedInSGPR(const Argument *A) { |
2768 | const Function *F = A->getParent(); |
2769 | |
2770 | // Arguments to compute shaders are never a source of divergence. |
2771 | CallingConv::ID CC = F->getCallingConv(); |
2772 | switch (CC) { |
2773 | case CallingConv::AMDGPU_KERNEL: |
2774 | case CallingConv::SPIR_KERNEL: |
2775 | return true; |
2776 | case CallingConv::AMDGPU_VS: |
2777 | case CallingConv::AMDGPU_LS: |
2778 | case CallingConv::AMDGPU_HS: |
2779 | case CallingConv::AMDGPU_ES: |
2780 | case CallingConv::AMDGPU_GS: |
2781 | case CallingConv::AMDGPU_PS: |
2782 | case CallingConv::AMDGPU_CS: |
2783 | case CallingConv::AMDGPU_Gfx: |
2784 | case CallingConv::AMDGPU_CS_Chain: |
2785 | case CallingConv::AMDGPU_CS_ChainPreserve: |
2786 | // For non-compute shaders, SGPR inputs are marked with either inreg or |
2787 | // byval. Everything else is in VGPRs. |
2788 | return A->hasAttribute(Kind: Attribute::InReg) || |
2789 | A->hasAttribute(Kind: Attribute::ByVal); |
2790 | default: |
2791 | // TODO: treat i1 as divergent? |
2792 | return A->hasAttribute(Kind: Attribute::InReg); |
2793 | } |
2794 | } |
2795 | |
2796 | bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) { |
2797 | // Arguments to compute shaders are never a source of divergence. |
2798 | CallingConv::ID CC = CB->getCallingConv(); |
2799 | switch (CC) { |
2800 | case CallingConv::AMDGPU_KERNEL: |
2801 | case CallingConv::SPIR_KERNEL: |
2802 | return true; |
2803 | case CallingConv::AMDGPU_VS: |
2804 | case CallingConv::AMDGPU_LS: |
2805 | case CallingConv::AMDGPU_HS: |
2806 | case CallingConv::AMDGPU_ES: |
2807 | case CallingConv::AMDGPU_GS: |
2808 | case CallingConv::AMDGPU_PS: |
2809 | case CallingConv::AMDGPU_CS: |
2810 | case CallingConv::AMDGPU_Gfx: |
2811 | case CallingConv::AMDGPU_CS_Chain: |
2812 | case CallingConv::AMDGPU_CS_ChainPreserve: |
2813 | // For non-compute shaders, SGPR inputs are marked with either inreg or |
2814 | // byval. Everything else is in VGPRs. |
2815 | return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg) || |
2816 | CB->paramHasAttr(ArgNo, Kind: Attribute::ByVal); |
2817 | default: |
2818 | return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg); |
2819 | } |
2820 | } |
2821 | |
2822 | static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { |
2823 | return isGCN3Encoding(STI: ST) || isGFX10Plus(STI: ST); |
2824 | } |
2825 | |
2826 | bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, |
2827 | int64_t EncodedOffset) { |
2828 | if (isGFX12Plus(STI: ST)) |
2829 | return isUInt<23>(x: EncodedOffset); |
2830 | |
2831 | return hasSMEMByteOffset(ST) ? isUInt<20>(x: EncodedOffset) |
2832 | : isUInt<8>(x: EncodedOffset); |
2833 | } |
2834 | |
2835 | bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, |
2836 | int64_t EncodedOffset, |
2837 | bool IsBuffer) { |
2838 | if (isGFX12Plus(STI: ST)) |
2839 | return isInt<24>(x: EncodedOffset); |
2840 | |
2841 | return !IsBuffer && |
2842 | hasSMRDSignedImmOffset(ST) && |
2843 | isInt<21>(x: EncodedOffset); |
2844 | } |
2845 | |
2846 | static bool isDwordAligned(uint64_t ByteOffset) { |
2847 | return (ByteOffset & 3) == 0; |
2848 | } |
2849 | |
2850 | uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, |
2851 | uint64_t ByteOffset) { |
2852 | if (hasSMEMByteOffset(ST)) |
2853 | return ByteOffset; |
2854 | |
2855 | assert(isDwordAligned(ByteOffset)); |
2856 | return ByteOffset >> 2; |
2857 | } |
2858 | |
2859 | std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, |
2860 | int64_t ByteOffset, bool IsBuffer, |
2861 | bool HasSOffset) { |
2862 | // For unbuffered smem loads, it is illegal for the Immediate Offset to be |
2863 | // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. |
2864 | // Handle case where SOffset is not present. |
2865 | if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST)) |
2866 | return std::nullopt; |
2867 | |
2868 | if (isGFX12Plus(STI: ST)) // 24 bit signed offsets |
2869 | return isInt<24>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset) |
2870 | : std::nullopt; |
2871 | |
2872 | // The signed version is always a byte offset. |
2873 | if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { |
2874 | assert(hasSMEMByteOffset(ST)); |
2875 | return isInt<20>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset) |
2876 | : std::nullopt; |
2877 | } |
2878 | |
2879 | if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) |
2880 | return std::nullopt; |
2881 | |
2882 | int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); |
2883 | return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) |
2884 | ? std::optional<int64_t>(EncodedOffset) |
2885 | : std::nullopt; |
2886 | } |
2887 | |
2888 | std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, |
2889 | int64_t ByteOffset) { |
2890 | if (!isCI(STI: ST) || !isDwordAligned(ByteOffset)) |
2891 | return std::nullopt; |
2892 | |
2893 | int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); |
2894 | return isUInt<32>(x: EncodedOffset) ? std::optional<int64_t>(EncodedOffset) |
2895 | : std::nullopt; |
2896 | } |
2897 | |
2898 | unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) { |
2899 | if (AMDGPU::isGFX10(STI: ST)) |
2900 | return 12; |
2901 | |
2902 | if (AMDGPU::isGFX12(STI: ST)) |
2903 | return 24; |
2904 | return 13; |
2905 | } |
2906 | |
2907 | namespace { |
2908 | |
2909 | struct SourceOfDivergence { |
2910 | unsigned Intr; |
2911 | }; |
2912 | const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); |
2913 | |
2914 | struct AlwaysUniform { |
2915 | unsigned Intr; |
2916 | }; |
2917 | const AlwaysUniform *lookupAlwaysUniform(unsigned Intr); |
2918 | |
2919 | #define GET_SourcesOfDivergence_IMPL |
2920 | #define GET_UniformIntrinsics_IMPL |
2921 | #define GET_Gfx9BufferFormat_IMPL |
2922 | #define GET_Gfx10BufferFormat_IMPL |
2923 | #define GET_Gfx11PlusBufferFormat_IMPL |
2924 | #include "AMDGPUGenSearchableTables.inc" |
2925 | |
2926 | } // end anonymous namespace |
2927 | |
2928 | bool isIntrinsicSourceOfDivergence(unsigned IntrID) { |
2929 | return lookupSourceOfDivergence(Intr: IntrID); |
2930 | } |
2931 | |
2932 | bool isIntrinsicAlwaysUniform(unsigned IntrID) { |
2933 | return lookupAlwaysUniform(Intr: IntrID); |
2934 | } |
2935 | |
2936 | const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, |
2937 | uint8_t NumComponents, |
2938 | uint8_t NumFormat, |
2939 | const MCSubtargetInfo &STI) { |
2940 | return isGFX11Plus(STI) |
2941 | ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents, |
2942 | NumFormat) |
2943 | : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp, |
2944 | NumComponents, NumFormat) |
2945 | : getGfx9BufferFormatInfo(BitsPerComp, |
2946 | NumComponents, NumFormat); |
2947 | } |
2948 | |
2949 | const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, |
2950 | const MCSubtargetInfo &STI) { |
2951 | return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) |
2952 | : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) |
2953 | : getGfx9BufferFormatInfo(Format); |
2954 | } |
2955 | |
2956 | bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) { |
2957 | for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1, |
2958 | OpName::src2 }) { |
2959 | int Idx = getNamedOperandIdx(Opcode: OpDesc.getOpcode(), NamedIdx: OpName); |
2960 | if (Idx == -1) |
2961 | continue; |
2962 | |
2963 | if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID || |
2964 | OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID) |
2965 | return true; |
2966 | } |
2967 | |
2968 | return false; |
2969 | } |
2970 | |
2971 | bool isDPALU_DPP(const MCInstrDesc &OpDesc) { |
2972 | return hasAny64BitVGPROperands(OpDesc); |
2973 | } |
2974 | |
2975 | unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) { |
2976 | // Currently this is 128 for all subtargets |
2977 | return 128; |
2978 | } |
2979 | |
2980 | } // namespace AMDGPU |
2981 | |
2982 | raw_ostream &operator<<(raw_ostream &OS, |
2983 | const AMDGPU::IsaInfo::TargetIDSetting S) { |
2984 | switch (S) { |
2985 | case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): |
2986 | OS << "Unsupported" ; |
2987 | break; |
2988 | case (AMDGPU::IsaInfo::TargetIDSetting::Any): |
2989 | OS << "Any" ; |
2990 | break; |
2991 | case (AMDGPU::IsaInfo::TargetIDSetting::Off): |
2992 | OS << "Off" ; |
2993 | break; |
2994 | case (AMDGPU::IsaInfo::TargetIDSetting::On): |
2995 | OS << "On" ; |
2996 | break; |
2997 | } |
2998 | return OS; |
2999 | } |
3000 | |
3001 | } // namespace llvm |
3002 | |