1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
10#include "MCTargetDesc/AMDGPUInstPrinter.h"
11#include "MCTargetDesc/AMDGPUMCAsmInfo.h"
12#include "MCTargetDesc/AMDGPUMCExpr.h"
13#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
14#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15#include "MCTargetDesc/AMDGPUTargetStreamer.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
18#include "TargetInfo/AMDGPUTargetInfo.h"
19#include "Utils/AMDGPUAsmUtils.h"
20#include "Utils/AMDGPUBaseInfo.h"
21#include "Utils/AMDKernelCodeTUtils.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/SmallBitVector.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
26#include "llvm/BinaryFormat/ELF.h"
27#include "llvm/CodeGenTypes/MachineValueType.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/MC/MCParser/AsmLexer.h"
34#include "llvm/MC/MCParser/MCAsmParser.h"
35#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
36#include "llvm/MC/MCParser/MCTargetAsmParser.h"
37#include "llvm/MC/MCRegisterInfo.h"
38#include "llvm/MC/MCSymbol.h"
39#include "llvm/MC/TargetRegistry.h"
40#include "llvm/Support/AMDGPUMetadata.h"
41#include "llvm/Support/AMDHSAKernelDescriptor.h"
42#include "llvm/Support/Casting.h"
43#include "llvm/Support/Compiler.h"
44#include "llvm/Support/MathExtras.h"
45#include "llvm/TargetParser/TargetParser.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 bool Lit = false;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyD16,
130 ImmTyClamp,
131 ImmTyOModSI,
132 ImmTySDWADstSel,
133 ImmTySDWASrc0Sel,
134 ImmTySDWASrc1Sel,
135 ImmTySDWADstUnused,
136 ImmTyDMask,
137 ImmTyDim,
138 ImmTyUNorm,
139 ImmTyDA,
140 ImmTyR128A16,
141 ImmTyA16,
142 ImmTyLWE,
143 ImmTyExpTgt,
144 ImmTyExpCompr,
145 ImmTyExpVM,
146 ImmTyFORMAT,
147 ImmTyHwreg,
148 ImmTyOff,
149 ImmTySendMsg,
150 ImmTyInterpSlot,
151 ImmTyInterpAttr,
152 ImmTyInterpAttrChan,
153 ImmTyOpSel,
154 ImmTyOpSelHi,
155 ImmTyNegLo,
156 ImmTyNegHi,
157 ImmTyIndexKey8bit,
158 ImmTyIndexKey16bit,
159 ImmTyDPP8,
160 ImmTyDppCtrl,
161 ImmTyDppRowMask,
162 ImmTyDppBankMask,
163 ImmTyDppBoundCtrl,
164 ImmTyDppFI,
165 ImmTySwizzle,
166 ImmTyGprIdxMode,
167 ImmTyHigh,
168 ImmTyBLGP,
169 ImmTyCBSZ,
170 ImmTyABID,
171 ImmTyEndpgm,
172 ImmTyWaitVDST,
173 ImmTyWaitEXP,
174 ImmTyWaitVAVDst,
175 ImmTyWaitVMVSrc,
176 ImmTyByteSel,
177 ImmTyBitOp3,
178 };
179
180 // Immediate operand kind.
181 // It helps to identify the location of an offending operand after an error.
182 // Note that regular literals and mandatory literals (KImm) must be handled
183 // differently. When looking for an offending operand, we should usually
184 // ignore mandatory literals because they are part of the instruction and
185 // cannot be changed. Report location of mandatory operands only for VOPD,
186 // when both OpX and OpY have a KImm and there are no other literals.
187 enum ImmKindTy {
188 ImmKindTyNone,
189 ImmKindTyLiteral,
190 ImmKindTyMandatoryLiteral,
191 ImmKindTyConst,
192 };
193
194private:
195 struct TokOp {
196 const char *Data;
197 unsigned Length;
198 };
199
200 struct ImmOp {
201 int64_t Val;
202 ImmTy Type;
203 bool IsFPImm;
204 mutable ImmKindTy Kind;
205 Modifiers Mods;
206 };
207
208 struct RegOp {
209 MCRegister RegNo;
210 Modifiers Mods;
211 };
212
213 union {
214 TokOp Tok;
215 ImmOp Imm;
216 RegOp Reg;
217 const MCExpr *Expr;
218 };
219
220public:
221 bool isToken() const override { return Kind == Token; }
222
223 bool isSymbolRefExpr() const {
224 return isExpr() && Expr && isa<MCSymbolRefExpr>(Val: Expr);
225 }
226
227 bool isImm() const override {
228 return Kind == Immediate;
229 }
230
231 void setImmKindNone() const {
232 assert(isImm());
233 Imm.Kind = ImmKindTyNone;
234 }
235
236 void setImmKindLiteral() const {
237 assert(isImm());
238 Imm.Kind = ImmKindTyLiteral;
239 }
240
241 void setImmKindMandatoryLiteral() const {
242 assert(isImm());
243 Imm.Kind = ImmKindTyMandatoryLiteral;
244 }
245
246 void setImmKindConst() const {
247 assert(isImm());
248 Imm.Kind = ImmKindTyConst;
249 }
250
251 bool IsImmKindLiteral() const {
252 return isImm() && Imm.Kind == ImmKindTyLiteral;
253 }
254
255 bool IsImmKindMandatoryLiteral() const {
256 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
257 }
258
259 bool isImmKindConst() const {
260 return isImm() && Imm.Kind == ImmKindTyConst;
261 }
262
263 bool isInlinableImm(MVT type) const;
264 bool isLiteralImm(MVT type) const;
265
266 bool isRegKind() const {
267 return Kind == Register;
268 }
269
270 bool isReg() const override {
271 return isRegKind() && !hasModifiers();
272 }
273
274 bool isRegOrInline(unsigned RCID, MVT type) const {
275 return isRegClass(RCID) || isInlinableImm(type);
276 }
277
278 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
279 return isRegOrInline(RCID, type) || isLiteralImm(type);
280 }
281
282 bool isRegOrImmWithInt16InputMods() const {
283 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
284 }
285
286 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
287 return isRegOrImmWithInputMods(
288 RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::i16);
289 }
290
291 bool isRegOrImmWithInt32InputMods() const {
292 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
293 }
294
295 bool isRegOrInlineImmWithInt16InputMods() const {
296 return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
297 }
298
299 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
300 return isRegOrInline(
301 RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::i16);
302 }
303
304 bool isRegOrInlineImmWithInt32InputMods() const {
305 return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
306 }
307
308 bool isRegOrImmWithInt64InputMods() const {
309 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64);
310 }
311
312 bool isRegOrImmWithFP16InputMods() const {
313 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16);
314 }
315
316 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
317 return isRegOrImmWithInputMods(
318 RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16);
319 }
320
321 bool isRegOrImmWithFP32InputMods() const {
322 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
323 }
324
325 bool isRegOrImmWithFP64InputMods() const {
326 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
327 }
328
329 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
330 return isRegOrInline(
331 RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16);
332 }
333
334 bool isRegOrInlineImmWithFP32InputMods() const {
335 return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
336 }
337
338 bool isPackedFP16InputMods() const {
339 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::v2f16);
340 }
341
342 bool isPackedVGPRFP32InputMods() const {
343 return isRegOrImmWithInputMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::v2f32);
344 }
345
346 bool isVReg() const {
347 return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) ||
348 isRegClass(RCID: AMDGPU::VReg_64RegClassID) ||
349 isRegClass(RCID: AMDGPU::VReg_96RegClassID) ||
350 isRegClass(RCID: AMDGPU::VReg_128RegClassID) ||
351 isRegClass(RCID: AMDGPU::VReg_160RegClassID) ||
352 isRegClass(RCID: AMDGPU::VReg_192RegClassID) ||
353 isRegClass(RCID: AMDGPU::VReg_256RegClassID) ||
354 isRegClass(RCID: AMDGPU::VReg_512RegClassID) ||
355 isRegClass(RCID: AMDGPU::VReg_1024RegClassID);
356 }
357
358 bool isVReg32() const {
359 return isRegClass(RCID: AMDGPU::VGPR_32RegClassID);
360 }
361
362 bool isVReg32OrOff() const {
363 return isOff() || isVReg32();
364 }
365
366 bool isNull() const {
367 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
368 }
369
370 bool isVRegWithInputMods() const;
371 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
372 template <bool IsFake16> bool isT16VRegWithInputMods() const;
373
374 bool isSDWAOperand(MVT type) const;
375 bool isSDWAFP16Operand() const;
376 bool isSDWAFP32Operand() const;
377 bool isSDWAInt16Operand() const;
378 bool isSDWAInt32Operand() const;
379
380 bool isImmTy(ImmTy ImmT) const {
381 return isImm() && Imm.Type == ImmT;
382 }
383
384 template <ImmTy Ty> bool isImmTy() const { return isImmTy(ImmT: Ty); }
385
386 bool isImmLiteral() const { return isImmTy(ImmT: ImmTyNone); }
387
388 bool isImmModifier() const {
389 return isImm() && Imm.Type != ImmTyNone;
390 }
391
392 bool isOModSI() const { return isImmTy(ImmT: ImmTyOModSI); }
393 bool isDim() const { return isImmTy(ImmT: ImmTyDim); }
394 bool isR128A16() const { return isImmTy(ImmT: ImmTyR128A16); }
395 bool isOff() const { return isImmTy(ImmT: ImmTyOff); }
396 bool isExpTgt() const { return isImmTy(ImmT: ImmTyExpTgt); }
397 bool isOffen() const { return isImmTy(ImmT: ImmTyOffen); }
398 bool isIdxen() const { return isImmTy(ImmT: ImmTyIdxen); }
399 bool isAddr64() const { return isImmTy(ImmT: ImmTyAddr64); }
400 bool isSMEMOffsetMod() const { return isImmTy(ImmT: ImmTySMEMOffsetMod); }
401 bool isFlatOffset() const { return isImmTy(ImmT: ImmTyOffset) || isImmTy(ImmT: ImmTyInstOffset); }
402 bool isGDS() const { return isImmTy(ImmT: ImmTyGDS); }
403 bool isLDS() const { return isImmTy(ImmT: ImmTyLDS); }
404 bool isCPol() const { return isImmTy(ImmT: ImmTyCPol); }
405 bool isIndexKey8bit() const { return isImmTy(ImmT: ImmTyIndexKey8bit); }
406 bool isIndexKey16bit() const { return isImmTy(ImmT: ImmTyIndexKey16bit); }
407 bool isTFE() const { return isImmTy(ImmT: ImmTyTFE); }
408 bool isFORMAT() const { return isImmTy(ImmT: ImmTyFORMAT) && isUInt<7>(x: getImm()); }
409 bool isDppFI() const { return isImmTy(ImmT: ImmTyDppFI); }
410 bool isSDWADstSel() const { return isImmTy(ImmT: ImmTySDWADstSel); }
411 bool isSDWASrc0Sel() const { return isImmTy(ImmT: ImmTySDWASrc0Sel); }
412 bool isSDWASrc1Sel() const { return isImmTy(ImmT: ImmTySDWASrc1Sel); }
413 bool isSDWADstUnused() const { return isImmTy(ImmT: ImmTySDWADstUnused); }
414 bool isInterpSlot() const { return isImmTy(ImmT: ImmTyInterpSlot); }
415 bool isInterpAttr() const { return isImmTy(ImmT: ImmTyInterpAttr); }
416 bool isInterpAttrChan() const { return isImmTy(ImmT: ImmTyInterpAttrChan); }
417 bool isOpSel() const { return isImmTy(ImmT: ImmTyOpSel); }
418 bool isOpSelHi() const { return isImmTy(ImmT: ImmTyOpSelHi); }
419 bool isNegLo() const { return isImmTy(ImmT: ImmTyNegLo); }
420 bool isNegHi() const { return isImmTy(ImmT: ImmTyNegHi); }
421 bool isBitOp3() const { return isImmTy(ImmT: ImmTyBitOp3) && isUInt<8>(x: getImm()); }
422
423 bool isRegOrImm() const {
424 return isReg() || isImm();
425 }
426
427 bool isRegClass(unsigned RCID) const;
428
429 bool isInlineValue() const;
430
431 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
432 return isRegOrInline(RCID, type) && !hasModifiers();
433 }
434
435 bool isSCSrcB16() const {
436 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i16);
437 }
438
439 bool isSCSrcV2B16() const {
440 return isSCSrcB16();
441 }
442
443 bool isSCSrc_b32() const {
444 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i32);
445 }
446
447 bool isSCSrc_b64() const {
448 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::i64);
449 }
450
451 bool isBoolReg() const;
452
453 bool isSCSrcF16() const {
454 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f16);
455 }
456
457 bool isSCSrcV2F16() const {
458 return isSCSrcF16();
459 }
460
461 bool isSCSrcF32() const {
462 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f32);
463 }
464
465 bool isSCSrcF64() const {
466 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::f64);
467 }
468
469 bool isSSrc_b32() const {
470 return isSCSrc_b32() || isLiteralImm(type: MVT::i32) || isExpr();
471 }
472
473 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(type: MVT::i16); }
474
475 bool isSSrcV2B16() const {
476 llvm_unreachable("cannot happen");
477 return isSSrc_b16();
478 }
479
480 bool isSSrc_b64() const {
481 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
482 // See isVSrc64().
483 return isSCSrc_b64() || isLiteralImm(type: MVT::i64);
484 }
485
486 bool isSSrc_f32() const {
487 return isSCSrc_b32() || isLiteralImm(type: MVT::f32) || isExpr();
488 }
489
490 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(type: MVT::f64); }
491
492 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(type: MVT::bf16); }
493
494 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(type: MVT::f16); }
495
496 bool isSSrcV2F16() const {
497 llvm_unreachable("cannot happen");
498 return isSSrc_f16();
499 }
500
501 bool isSSrcV2FP32() const {
502 llvm_unreachable("cannot happen");
503 return isSSrc_f32();
504 }
505
506 bool isSCSrcV2FP32() const {
507 llvm_unreachable("cannot happen");
508 return isSCSrcF32();
509 }
510
511 bool isSSrcV2INT32() const {
512 llvm_unreachable("cannot happen");
513 return isSSrc_b32();
514 }
515
516 bool isSCSrcV2INT32() const {
517 llvm_unreachable("cannot happen");
518 return isSCSrc_b32();
519 }
520
521 bool isSSrcOrLds_b32() const {
522 return isRegOrInlineNoMods(RCID: AMDGPU::SRegOrLds_32RegClassID, type: MVT::i32) ||
523 isLiteralImm(type: MVT::i32) || isExpr();
524 }
525
526 bool isVCSrc_b32() const {
527 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
528 }
529
530 bool isVCSrcB64() const {
531 return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64);
532 }
533
534 bool isVCSrcT_b16() const {
535 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::i16);
536 }
537
538 bool isVCSrcTB16_Lo128() const {
539 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::i16);
540 }
541
542 bool isVCSrcFake16B16_Lo128() const {
543 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::i16);
544 }
545
546 bool isVCSrc_b16() const {
547 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
548 }
549
550 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
551
552 bool isVCSrc_f32() const {
553 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
554 }
555
556 bool isVCSrcF64() const {
557 return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
558 }
559
560 bool isVCSrcTBF16() const {
561 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::bf16);
562 }
563
564 bool isVCSrcT_f16() const {
565 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16);
566 }
567
568 bool isVCSrcT_bf16() const {
569 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16);
570 }
571
572 bool isVCSrcTBF16_Lo128() const {
573 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::bf16);
574 }
575
576 bool isVCSrcTF16_Lo128() const {
577 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::f16);
578 }
579
580 bool isVCSrcFake16BF16_Lo128() const {
581 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::bf16);
582 }
583
584 bool isVCSrcFake16F16_Lo128() const {
585 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::f16);
586 }
587
588 bool isVCSrc_bf16() const {
589 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::bf16);
590 }
591
592 bool isVCSrc_f16() const {
593 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16);
594 }
595
596 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
597
598 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
599
600 bool isVSrc_b32() const {
601 return isVCSrc_f32() || isLiteralImm(type: MVT::i32) || isExpr();
602 }
603
604 bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(type: MVT::i64); }
605
606 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(type: MVT::i16); }
607
608 bool isVSrcT_b16_Lo128() const {
609 return isVCSrcTB16_Lo128() || isLiteralImm(type: MVT::i16);
610 }
611
612 bool isVSrcFake16_b16_Lo128() const {
613 return isVCSrcFake16B16_Lo128() || isLiteralImm(type: MVT::i16);
614 }
615
616 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(type: MVT::i16); }
617
618 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(type: MVT::v2i16); }
619
620 bool isVCSrcV2FP32() const {
621 return isVCSrcF64();
622 }
623
624 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(type: MVT::v2f32); }
625
626 bool isVCSrcV2INT32() const {
627 return isVCSrcB64();
628 }
629
630 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(type: MVT::v2i32); }
631
632 bool isVSrc_f32() const {
633 return isVCSrc_f32() || isLiteralImm(type: MVT::f32) || isExpr();
634 }
635
636 bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(type: MVT::f64); }
637
638 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(type: MVT::bf16); }
639
640 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(type: MVT::f16); }
641
642 bool isVSrcT_bf16_Lo128() const {
643 return isVCSrcTBF16_Lo128() || isLiteralImm(type: MVT::bf16);
644 }
645
646 bool isVSrcT_f16_Lo128() const {
647 return isVCSrcTF16_Lo128() || isLiteralImm(type: MVT::f16);
648 }
649
650 bool isVSrcFake16_bf16_Lo128() const {
651 return isVCSrcFake16BF16_Lo128() || isLiteralImm(type: MVT::bf16);
652 }
653
654 bool isVSrcFake16_f16_Lo128() const {
655 return isVCSrcFake16F16_Lo128() || isLiteralImm(type: MVT::f16);
656 }
657
658 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(type: MVT::bf16); }
659
660 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(type: MVT::f16); }
661
662 bool isVSrc_v2bf16() const {
663 return isVSrc_bf16() || isLiteralImm(type: MVT::v2bf16);
664 }
665
666 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(type: MVT::v2f16); }
667
668 bool isVISrcB32() const {
669 return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i32);
670 }
671
672 bool isVISrcB16() const {
673 return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i16);
674 }
675
676 bool isVISrcV2B16() const {
677 return isVISrcB16();
678 }
679
680 bool isVISrcF32() const {
681 return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f32);
682 }
683
684 bool isVISrcF16() const {
685 return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f16);
686 }
687
688 bool isVISrcV2F16() const {
689 return isVISrcF16() || isVISrcB32();
690 }
691
692 bool isVISrc_64_bf16() const {
693 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::bf16);
694 }
695
696 bool isVISrc_64_f16() const {
697 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f16);
698 }
699
700 bool isVISrc_64_b32() const {
701 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32);
702 }
703
704 bool isVISrc_64B64() const {
705 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i64);
706 }
707
708 bool isVISrc_64_f64() const {
709 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f64);
710 }
711
712 bool isVISrc_64V2FP32() const {
713 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f32);
714 }
715
716 bool isVISrc_64V2INT32() const {
717 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32);
718 }
719
720 bool isVISrc_256_b32() const {
721 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32);
722 }
723
724 bool isVISrc_256_f32() const {
725 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32);
726 }
727
728 bool isVISrc_256B64() const {
729 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i64);
730 }
731
732 bool isVISrc_256_f64() const {
733 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f64);
734 }
735
736 bool isVISrc_128B16() const {
737 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i16);
738 }
739
740 bool isVISrc_128V2B16() const {
741 return isVISrc_128B16();
742 }
743
744 bool isVISrc_128_b32() const {
745 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i32);
746 }
747
748 bool isVISrc_128_f32() const {
749 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f32);
750 }
751
752 bool isVISrc_256V2FP32() const {
753 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32);
754 }
755
756 bool isVISrc_256V2INT32() const {
757 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32);
758 }
759
760 bool isVISrc_512_b32() const {
761 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i32);
762 }
763
764 bool isVISrc_512B16() const {
765 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i16);
766 }
767
768 bool isVISrc_512V2B16() const {
769 return isVISrc_512B16();
770 }
771
772 bool isVISrc_512_f32() const {
773 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f32);
774 }
775
776 bool isVISrc_512F16() const {
777 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f16);
778 }
779
780 bool isVISrc_512V2F16() const {
781 return isVISrc_512F16() || isVISrc_512_b32();
782 }
783
784 bool isVISrc_1024_b32() const {
785 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i32);
786 }
787
788 bool isVISrc_1024B16() const {
789 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i16);
790 }
791
792 bool isVISrc_1024V2B16() const {
793 return isVISrc_1024B16();
794 }
795
796 bool isVISrc_1024_f32() const {
797 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f32);
798 }
799
800 bool isVISrc_1024F16() const {
801 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f16);
802 }
803
804 bool isVISrc_1024V2F16() const {
805 return isVISrc_1024F16() || isVISrc_1024_b32();
806 }
807
808 bool isAISrcB32() const {
809 return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i32);
810 }
811
812 bool isAISrcB16() const {
813 return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i16);
814 }
815
816 bool isAISrcV2B16() const {
817 return isAISrcB16();
818 }
819
820 bool isAISrcF32() const {
821 return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f32);
822 }
823
824 bool isAISrcF16() const {
825 return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f16);
826 }
827
828 bool isAISrcV2F16() const {
829 return isAISrcF16() || isAISrcB32();
830 }
831
832 bool isAISrc_64B64() const {
833 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::i64);
834 }
835
836 bool isAISrc_64_f64() const {
837 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::f64);
838 }
839
840 bool isAISrc_128_b32() const {
841 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i32);
842 }
843
844 bool isAISrc_128B16() const {
845 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i16);
846 }
847
848 bool isAISrc_128V2B16() const {
849 return isAISrc_128B16();
850 }
851
852 bool isAISrc_128_f32() const {
853 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f32);
854 }
855
856 bool isAISrc_128F16() const {
857 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f16);
858 }
859
860 bool isAISrc_128V2F16() const {
861 return isAISrc_128F16() || isAISrc_128_b32();
862 }
863
864 bool isVISrc_128_bf16() const {
865 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::bf16);
866 }
867
868 bool isVISrc_128_f16() const {
869 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f16);
870 }
871
872 bool isVISrc_128V2F16() const {
873 return isVISrc_128_f16() || isVISrc_128_b32();
874 }
875
876 bool isAISrc_256B64() const {
877 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::i64);
878 }
879
880 bool isAISrc_256_f64() const {
881 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::f64);
882 }
883
884 bool isAISrc_512_b32() const {
885 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i32);
886 }
887
888 bool isAISrc_512B16() const {
889 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i16);
890 }
891
892 bool isAISrc_512V2B16() const {
893 return isAISrc_512B16();
894 }
895
896 bool isAISrc_512_f32() const {
897 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f32);
898 }
899
900 bool isAISrc_512F16() const {
901 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f16);
902 }
903
904 bool isAISrc_512V2F16() const {
905 return isAISrc_512F16() || isAISrc_512_b32();
906 }
907
908 bool isAISrc_1024_b32() const {
909 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i32);
910 }
911
912 bool isAISrc_1024B16() const {
913 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i16);
914 }
915
916 bool isAISrc_1024V2B16() const {
917 return isAISrc_1024B16();
918 }
919
920 bool isAISrc_1024_f32() const {
921 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f32);
922 }
923
924 bool isAISrc_1024F16() const {
925 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f16);
926 }
927
928 bool isAISrc_1024V2F16() const {
929 return isAISrc_1024F16() || isAISrc_1024_b32();
930 }
931
932 bool isKImmFP32() const {
933 return isLiteralImm(type: MVT::f32);
934 }
935
936 bool isKImmFP16() const {
937 return isLiteralImm(type: MVT::f16);
938 }
939
940 bool isMem() const override {
941 return false;
942 }
943
944 bool isExpr() const {
945 return Kind == Expression;
946 }
947
948 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
949
950 bool isSWaitCnt() const;
951 bool isDepCtr() const;
952 bool isSDelayALU() const;
953 bool isHwreg() const;
954 bool isSendMsg() const;
955 bool isSplitBarrier() const;
956 bool isSwizzle() const;
957 bool isSMRDOffset8() const;
958 bool isSMEMOffset() const;
959 bool isSMRDLiteralOffset() const;
960 bool isDPP8() const;
961 bool isDPPCtrl() const;
962 bool isBLGP() const;
963 bool isGPRIdxMode() const;
964 bool isS16Imm() const;
965 bool isU16Imm() const;
966 bool isEndpgm() const;
967
968 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
969 return [=](){ return P(*this); };
970 }
971
972 StringRef getToken() const {
973 assert(isToken());
974 return StringRef(Tok.Data, Tok.Length);
975 }
976
977 int64_t getImm() const {
978 assert(isImm());
979 return Imm.Val;
980 }
981
982 void setImm(int64_t Val) {
983 assert(isImm());
984 Imm.Val = Val;
985 }
986
987 ImmTy getImmTy() const {
988 assert(isImm());
989 return Imm.Type;
990 }
991
992 MCRegister getReg() const override {
993 assert(isRegKind());
994 return Reg.RegNo;
995 }
996
997 SMLoc getStartLoc() const override {
998 return StartLoc;
999 }
1000
1001 SMLoc getEndLoc() const override {
1002 return EndLoc;
1003 }
1004
1005 SMRange getLocRange() const {
1006 return SMRange(StartLoc, EndLoc);
1007 }
1008
1009 Modifiers getModifiers() const {
1010 assert(isRegKind() || isImmTy(ImmTyNone));
1011 return isRegKind() ? Reg.Mods : Imm.Mods;
1012 }
1013
1014 void setModifiers(Modifiers Mods) {
1015 assert(isRegKind() || isImmTy(ImmTyNone));
1016 if (isRegKind())
1017 Reg.Mods = Mods;
1018 else
1019 Imm.Mods = Mods;
1020 }
1021
1022 bool hasModifiers() const {
1023 return getModifiers().hasModifiers();
1024 }
1025
1026 bool hasFPModifiers() const {
1027 return getModifiers().hasFPModifiers();
1028 }
1029
1030 bool hasIntModifiers() const {
1031 return getModifiers().hasIntModifiers();
1032 }
1033
1034 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1035
1036 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1037
1038 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1039
1040 void addRegOperands(MCInst &Inst, unsigned N) const;
1041
1042 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1043 if (isRegKind())
1044 addRegOperands(Inst, N);
1045 else
1046 addImmOperands(Inst, N);
1047 }
1048
1049 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1050 Modifiers Mods = getModifiers();
1051 Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand()));
1052 if (isRegKind()) {
1053 addRegOperands(Inst, N);
1054 } else {
1055 addImmOperands(Inst, N, ApplyModifiers: false);
1056 }
1057 }
1058
1059 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1060 assert(!hasIntModifiers());
1061 addRegOrImmWithInputModsOperands(Inst, N);
1062 }
1063
1064 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1065 assert(!hasFPModifiers());
1066 addRegOrImmWithInputModsOperands(Inst, N);
1067 }
1068
1069 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1070 Modifiers Mods = getModifiers();
1071 Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand()));
1072 assert(isRegKind());
1073 addRegOperands(Inst, N);
1074 }
1075
1076 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1077 assert(!hasIntModifiers());
1078 addRegWithInputModsOperands(Inst, N);
1079 }
1080
1081 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1082 assert(!hasFPModifiers());
1083 addRegWithInputModsOperands(Inst, N);
1084 }
1085
1086 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1087 // clang-format off
1088 switch (Type) {
1089 case ImmTyNone: OS << "None"; break;
1090 case ImmTyGDS: OS << "GDS"; break;
1091 case ImmTyLDS: OS << "LDS"; break;
1092 case ImmTyOffen: OS << "Offen"; break;
1093 case ImmTyIdxen: OS << "Idxen"; break;
1094 case ImmTyAddr64: OS << "Addr64"; break;
1095 case ImmTyOffset: OS << "Offset"; break;
1096 case ImmTyInstOffset: OS << "InstOffset"; break;
1097 case ImmTyOffset0: OS << "Offset0"; break;
1098 case ImmTyOffset1: OS << "Offset1"; break;
1099 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1100 case ImmTyCPol: OS << "CPol"; break;
1101 case ImmTyIndexKey8bit: OS << "index_key"; break;
1102 case ImmTyIndexKey16bit: OS << "index_key"; break;
1103 case ImmTyTFE: OS << "TFE"; break;
1104 case ImmTyD16: OS << "D16"; break;
1105 case ImmTyFORMAT: OS << "FORMAT"; break;
1106 case ImmTyClamp: OS << "Clamp"; break;
1107 case ImmTyOModSI: OS << "OModSI"; break;
1108 case ImmTyDPP8: OS << "DPP8"; break;
1109 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1110 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1111 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1112 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1113 case ImmTyDppFI: OS << "DppFI"; break;
1114 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1115 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1116 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1117 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1118 case ImmTyDMask: OS << "DMask"; break;
1119 case ImmTyDim: OS << "Dim"; break;
1120 case ImmTyUNorm: OS << "UNorm"; break;
1121 case ImmTyDA: OS << "DA"; break;
1122 case ImmTyR128A16: OS << "R128A16"; break;
1123 case ImmTyA16: OS << "A16"; break;
1124 case ImmTyLWE: OS << "LWE"; break;
1125 case ImmTyOff: OS << "Off"; break;
1126 case ImmTyExpTgt: OS << "ExpTgt"; break;
1127 case ImmTyExpCompr: OS << "ExpCompr"; break;
1128 case ImmTyExpVM: OS << "ExpVM"; break;
1129 case ImmTyHwreg: OS << "Hwreg"; break;
1130 case ImmTySendMsg: OS << "SendMsg"; break;
1131 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1132 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1133 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1134 case ImmTyOpSel: OS << "OpSel"; break;
1135 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1136 case ImmTyNegLo: OS << "NegLo"; break;
1137 case ImmTyNegHi: OS << "NegHi"; break;
1138 case ImmTySwizzle: OS << "Swizzle"; break;
1139 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1140 case ImmTyHigh: OS << "High"; break;
1141 case ImmTyBLGP: OS << "BLGP"; break;
1142 case ImmTyCBSZ: OS << "CBSZ"; break;
1143 case ImmTyABID: OS << "ABID"; break;
1144 case ImmTyEndpgm: OS << "Endpgm"; break;
1145 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1146 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1147 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1148 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1149 case ImmTyByteSel: OS << "ByteSel" ; break;
1150 case ImmTyBitOp3: OS << "BitOp3"; break;
1151 }
1152 // clang-format on
1153 }
1154
1155 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1156 switch (Kind) {
1157 case Register:
1158 OS << "<register " << AMDGPUInstPrinter::getRegisterName(Reg: getReg())
1159 << " mods: " << Reg.Mods << '>';
1160 break;
1161 case Immediate:
1162 OS << '<' << getImm();
1163 if (getImmTy() != ImmTyNone) {
1164 OS << " type: "; printImmTy(OS, Type: getImmTy());
1165 }
1166 OS << " mods: " << Imm.Mods << '>';
1167 break;
1168 case Token:
1169 OS << '\'' << getToken() << '\'';
1170 break;
1171 case Expression:
1172 OS << "<expr ";
1173 MAI.printExpr(OS, *Expr);
1174 OS << '>';
1175 break;
1176 }
1177 }
1178
1179 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1180 int64_t Val, SMLoc Loc,
1181 ImmTy Type = ImmTyNone,
1182 bool IsFPImm = false) {
1183 auto Op = std::make_unique<AMDGPUOperand>(args: Immediate, args&: AsmParser);
1184 Op->Imm.Val = Val;
1185 Op->Imm.IsFPImm = IsFPImm;
1186 Op->Imm.Kind = ImmKindTyNone;
1187 Op->Imm.Type = Type;
1188 Op->Imm.Mods = Modifiers();
1189 Op->StartLoc = Loc;
1190 Op->EndLoc = Loc;
1191 return Op;
1192 }
1193
1194 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1195 StringRef Str, SMLoc Loc,
1196 bool HasExplicitEncodingSize = true) {
1197 auto Res = std::make_unique<AMDGPUOperand>(args: Token, args&: AsmParser);
1198 Res->Tok.Data = Str.data();
1199 Res->Tok.Length = Str.size();
1200 Res->StartLoc = Loc;
1201 Res->EndLoc = Loc;
1202 return Res;
1203 }
1204
1205 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1206 MCRegister Reg, SMLoc S, SMLoc E) {
1207 auto Op = std::make_unique<AMDGPUOperand>(args: Register, args&: AsmParser);
1208 Op->Reg.RegNo = Reg;
1209 Op->Reg.Mods = Modifiers();
1210 Op->StartLoc = S;
1211 Op->EndLoc = E;
1212 return Op;
1213 }
1214
1215 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1216 const class MCExpr *Expr, SMLoc S) {
1217 auto Op = std::make_unique<AMDGPUOperand>(args: Expression, args&: AsmParser);
1218 Op->Expr = Expr;
1219 Op->StartLoc = S;
1220 Op->EndLoc = S;
1221 return Op;
1222 }
1223};
1224
1225raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1226 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1227 return OS;
1228}
1229
1230//===----------------------------------------------------------------------===//
1231// AsmParser
1232//===----------------------------------------------------------------------===//
1233
1234// Holds info related to the current kernel, e.g. count of SGPRs used.
1235// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1236// .amdgpu_hsa_kernel or at EOF.
1237class KernelScopeInfo {
1238 int SgprIndexUnusedMin = -1;
1239 int VgprIndexUnusedMin = -1;
1240 int AgprIndexUnusedMin = -1;
1241 MCContext *Ctx = nullptr;
1242 MCSubtargetInfo const *MSTI = nullptr;
1243
1244 void usesSgprAt(int i) {
1245 if (i >= SgprIndexUnusedMin) {
1246 SgprIndexUnusedMin = ++i;
1247 if (Ctx) {
1248 MCSymbol* const Sym =
1249 Ctx->getOrCreateSymbol(Name: Twine(".kernel.sgpr_count"));
1250 Sym->setVariableValue(MCConstantExpr::create(Value: SgprIndexUnusedMin, Ctx&: *Ctx));
1251 }
1252 }
1253 }
1254
1255 void usesVgprAt(int i) {
1256 if (i >= VgprIndexUnusedMin) {
1257 VgprIndexUnusedMin = ++i;
1258 if (Ctx) {
1259 MCSymbol* const Sym =
1260 Ctx->getOrCreateSymbol(Name: Twine(".kernel.vgpr_count"));
1261 int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin,
1262 ArgNumVGPR: VgprIndexUnusedMin);
1263 Sym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx));
1264 }
1265 }
1266 }
1267
1268 void usesAgprAt(int i) {
1269 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1270 if (!hasMAIInsts(STI: *MSTI))
1271 return;
1272
1273 if (i >= AgprIndexUnusedMin) {
1274 AgprIndexUnusedMin = ++i;
1275 if (Ctx) {
1276 MCSymbol* const Sym =
1277 Ctx->getOrCreateSymbol(Name: Twine(".kernel.agpr_count"));
1278 Sym->setVariableValue(MCConstantExpr::create(Value: AgprIndexUnusedMin, Ctx&: *Ctx));
1279
1280 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1281 MCSymbol* const vSym =
1282 Ctx->getOrCreateSymbol(Name: Twine(".kernel.vgpr_count"));
1283 int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin,
1284 ArgNumVGPR: VgprIndexUnusedMin);
1285 vSym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx));
1286 }
1287 }
1288 }
1289
1290public:
1291 KernelScopeInfo() = default;
1292
1293 void initialize(MCContext &Context) {
1294 Ctx = &Context;
1295 MSTI = Ctx->getSubtargetInfo();
1296
1297 usesSgprAt(i: SgprIndexUnusedMin = -1);
1298 usesVgprAt(i: VgprIndexUnusedMin = -1);
1299 if (hasMAIInsts(STI: *MSTI)) {
1300 usesAgprAt(i: AgprIndexUnusedMin = -1);
1301 }
1302 }
1303
1304 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1305 unsigned RegWidth) {
1306 switch (RegKind) {
1307 case IS_SGPR:
1308 usesSgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1);
1309 break;
1310 case IS_AGPR:
1311 usesAgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1);
1312 break;
1313 case IS_VGPR:
1314 usesVgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1);
1315 break;
1316 default:
1317 break;
1318 }
1319 }
1320};
1321
1322class AMDGPUAsmParser : public MCTargetAsmParser {
1323 MCAsmParser &Parser;
1324
1325 unsigned ForcedEncodingSize = 0;
1326 bool ForcedDPP = false;
1327 bool ForcedSDWA = false;
1328 KernelScopeInfo KernelScope;
1329
1330 /// @name Auto-generated Match Functions
1331 /// {
1332
1333#define GET_ASSEMBLER_HEADER
1334#include "AMDGPUGenAsmMatcher.inc"
1335
1336 /// }
1337
1338private:
1339 void createConstantSymbol(StringRef Id, int64_t Val);
1340
1341 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1342 bool OutOfRangeError(SMRange Range);
1343 /// Calculate VGPR/SGPR blocks required for given target, reserved
1344 /// registers, and user-specified NextFreeXGPR values.
1345 ///
1346 /// \param Features [in] Target features, used for bug corrections.
1347 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1348 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1349 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1350 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1351 /// descriptor field, if valid.
1352 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1353 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1354 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1355 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1356 /// \param VGPRBlocks [out] Result VGPR block count.
1357 /// \param SGPRBlocks [out] Result SGPR block count.
1358 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1359 const MCExpr *FlatScrUsed, bool XNACKUsed,
1360 std::optional<bool> EnableWavefrontSize32,
1361 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1362 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1363 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1364 bool ParseDirectiveAMDGCNTarget();
1365 bool ParseDirectiveAMDHSACodeObjectVersion();
1366 bool ParseDirectiveAMDHSAKernel();
1367 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1368 bool ParseDirectiveAMDKernelCodeT();
1369 // TODO: Possibly make subtargetHasRegister const.
1370 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1371 bool ParseDirectiveAMDGPUHsaKernel();
1372
1373 bool ParseDirectiveISAVersion();
1374 bool ParseDirectiveHSAMetadata();
1375 bool ParseDirectivePALMetadataBegin();
1376 bool ParseDirectivePALMetadata();
1377 bool ParseDirectiveAMDGPULDS();
1378
1379 /// Common code to parse out a block of text (typically YAML) between start and
1380 /// end directives.
1381 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1382 const char *AssemblerDirectiveEnd,
1383 std::string &CollectString);
1384
1385 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1386 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1387 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1388 unsigned &RegNum, unsigned &RegWidth,
1389 bool RestoreOnFailure = false);
1390 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1391 unsigned &RegNum, unsigned &RegWidth,
1392 SmallVectorImpl<AsmToken> &Tokens);
1393 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1394 unsigned &RegWidth,
1395 SmallVectorImpl<AsmToken> &Tokens);
1396 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1397 unsigned &RegWidth,
1398 SmallVectorImpl<AsmToken> &Tokens);
1399 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1400 unsigned &RegWidth,
1401 SmallVectorImpl<AsmToken> &Tokens);
1402 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1403 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1404 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1405
1406 bool isRegister();
1407 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1408 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1409 void initializeGprCountSymbol(RegisterKind RegKind);
1410 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1411 unsigned RegWidth);
1412 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1413 bool IsAtomic);
1414
1415public:
1416 enum OperandMode {
1417 OperandMode_Default,
1418 OperandMode_NSA,
1419 };
1420
1421 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1422
1423 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1424 const MCInstrInfo &MII,
1425 const MCTargetOptions &Options)
1426 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1427 MCAsmParserExtension::Initialize(Parser);
1428
1429 if (getFeatureBits().none()) {
1430 // Set default features.
1431 copySTI().ToggleFeature(FS: "southern-islands");
1432 }
1433
1434 FeatureBitset FB = getFeatureBits();
1435 if (!FB[AMDGPU::FeatureWavefrontSize64] &&
1436 !FB[AMDGPU::FeatureWavefrontSize32]) {
1437 // If there is no default wave size it must be a generation before gfx10,
1438 // these have FeatureWavefrontSize64 in their definition already. For
1439 // gfx10+ set wave32 as a default.
1440 copySTI().ToggleFeature(FB: AMDGPU::FeatureWavefrontSize32);
1441 }
1442
1443 setAvailableFeatures(ComputeAvailableFeatures(FB: getFeatureBits()));
1444
1445 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
1446 if (ISA.Major >= 6 && isHsaAbi(STI: getSTI())) {
1447 createConstantSymbol(Id: ".amdgcn.gfx_generation_number", Val: ISA.Major);
1448 createConstantSymbol(Id: ".amdgcn.gfx_generation_minor", Val: ISA.Minor);
1449 createConstantSymbol(Id: ".amdgcn.gfx_generation_stepping", Val: ISA.Stepping);
1450 } else {
1451 createConstantSymbol(Id: ".option.machine_version_major", Val: ISA.Major);
1452 createConstantSymbol(Id: ".option.machine_version_minor", Val: ISA.Minor);
1453 createConstantSymbol(Id: ".option.machine_version_stepping", Val: ISA.Stepping);
1454 }
1455 if (ISA.Major >= 6 && isHsaAbi(STI: getSTI())) {
1456 initializeGprCountSymbol(RegKind: IS_VGPR);
1457 initializeGprCountSymbol(RegKind: IS_SGPR);
1458 } else
1459 KernelScope.initialize(Context&: getContext());
1460
1461 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1462 createConstantSymbol(Id: Symbol, Val: Code);
1463
1464 createConstantSymbol(Id: "UC_VERSION_W64_BIT", Val: 0x2000);
1465 createConstantSymbol(Id: "UC_VERSION_W32_BIT", Val: 0x4000);
1466 createConstantSymbol(Id: "UC_VERSION_MDP_BIT", Val: 0x8000);
1467 }
1468
1469 bool hasMIMG_R128() const {
1470 return AMDGPU::hasMIMG_R128(STI: getSTI());
1471 }
1472
1473 bool hasPackedD16() const {
1474 return AMDGPU::hasPackedD16(STI: getSTI());
1475 }
1476
1477 bool hasA16() const { return AMDGPU::hasA16(STI: getSTI()); }
1478
1479 bool hasG16() const { return AMDGPU::hasG16(STI: getSTI()); }
1480
1481 bool hasGDS() const { return AMDGPU::hasGDS(STI: getSTI()); }
1482
1483 bool isSI() const {
1484 return AMDGPU::isSI(STI: getSTI());
1485 }
1486
1487 bool isCI() const {
1488 return AMDGPU::isCI(STI: getSTI());
1489 }
1490
1491 bool isVI() const {
1492 return AMDGPU::isVI(STI: getSTI());
1493 }
1494
1495 bool isGFX9() const {
1496 return AMDGPU::isGFX9(STI: getSTI());
1497 }
1498
1499 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1500 bool isGFX90A() const {
1501 return AMDGPU::isGFX90A(STI: getSTI());
1502 }
1503
1504 bool isGFX940() const {
1505 return AMDGPU::isGFX940(STI: getSTI());
1506 }
1507
1508 bool isGFX9Plus() const {
1509 return AMDGPU::isGFX9Plus(STI: getSTI());
1510 }
1511
1512 bool isGFX10() const {
1513 return AMDGPU::isGFX10(STI: getSTI());
1514 }
1515
1516 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(STI: getSTI()); }
1517
1518 bool isGFX11() const {
1519 return AMDGPU::isGFX11(STI: getSTI());
1520 }
1521
1522 bool isGFX11Plus() const {
1523 return AMDGPU::isGFX11Plus(STI: getSTI());
1524 }
1525
1526 bool isGFX12() const { return AMDGPU::isGFX12(STI: getSTI()); }
1527
1528 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(STI: getSTI()); }
1529
1530 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(STI: getSTI()); }
1531
1532 bool isGFX10_BEncoding() const {
1533 return AMDGPU::isGFX10_BEncoding(STI: getSTI());
1534 }
1535
1536 bool hasInv2PiInlineImm() const {
1537 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1538 }
1539
1540 bool hasFlatOffsets() const {
1541 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1542 }
1543
1544 bool hasTrue16Insts() const {
1545 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1546 }
1547
1548 bool hasArchitectedFlatScratch() const {
1549 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1550 }
1551
1552 bool hasSGPR102_SGPR103() const {
1553 return !isVI() && !isGFX9();
1554 }
1555
1556 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1557
1558 bool hasIntClamp() const {
1559 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1560 }
1561
1562 bool hasPartialNSAEncoding() const {
1563 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1564 }
1565
1566 unsigned getNSAMaxSize(bool HasSampler = false) const {
1567 return AMDGPU::getNSAMaxSize(STI: getSTI(), HasSampler);
1568 }
1569
1570 unsigned getMaxNumUserSGPRs() const {
1571 return AMDGPU::getMaxNumUserSGPRs(STI: getSTI());
1572 }
1573
1574 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(STI: getSTI()); }
1575
1576 AMDGPUTargetStreamer &getTargetStreamer() {
1577 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1578 return static_cast<AMDGPUTargetStreamer &>(TS);
1579 }
1580
1581 const MCRegisterInfo *getMRI() const {
1582 // We need this const_cast because for some reason getContext() is not const
1583 // in MCAsmParser.
1584 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1585 }
1586
1587 const MCInstrInfo *getMII() const {
1588 return &MII;
1589 }
1590
1591 const FeatureBitset &getFeatureBits() const {
1592 return getSTI().getFeatureBits();
1593 }
1594
1595 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1596 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1597 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1598
1599 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1600 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1601 bool isForcedDPP() const { return ForcedDPP; }
1602 bool isForcedSDWA() const { return ForcedSDWA; }
1603 ArrayRef<unsigned> getMatchedVariants() const;
1604 StringRef getMatchedVariantName() const;
1605
1606 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1607 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1608 bool RestoreOnFailure);
1609 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1610 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1611 SMLoc &EndLoc) override;
1612 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1613 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1614 unsigned Kind) override;
1615 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1616 OperandVector &Operands, MCStreamer &Out,
1617 uint64_t &ErrorInfo,
1618 bool MatchingInlineAsm) override;
1619 bool ParseDirective(AsmToken DirectiveID) override;
1620 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1621 OperandMode Mode = OperandMode_Default);
1622 StringRef parseMnemonicSuffix(StringRef Name);
1623 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1624 SMLoc NameLoc, OperandVector &Operands) override;
1625 //bool ProcessInstruction(MCInst &Inst);
1626
1627 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1628
1629 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1630
1631 ParseStatus
1632 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1633 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1634 std::function<bool(int64_t &)> ConvertResult = nullptr);
1635
1636 ParseStatus parseOperandArrayWithPrefix(
1637 const char *Prefix, OperandVector &Operands,
1638 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1639 bool (*ConvertResult)(int64_t &) = nullptr);
1640
1641 ParseStatus
1642 parseNamedBit(StringRef Name, OperandVector &Operands,
1643 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1644 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1645 ParseStatus parseCPol(OperandVector &Operands);
1646 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1647 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1648 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1649 SMLoc &StringLoc);
1650 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1651 StringRef Name,
1652 ArrayRef<const char *> Ids,
1653 int64_t &IntVal);
1654 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1655 StringRef Name,
1656 ArrayRef<const char *> Ids,
1657 AMDGPUOperand::ImmTy Type);
1658
1659 bool isModifier();
1660 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1661 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1662 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1663 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1664 bool parseSP3NegModifier();
1665 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1666 bool HasLit = false);
1667 ParseStatus parseReg(OperandVector &Operands);
1668 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1669 bool HasLit = false);
1670 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1671 bool AllowImm = true);
1672 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1673 bool AllowImm = true);
1674 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1675 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1676 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1677 ParseStatus tryParseIndexKey(OperandVector &Operands,
1678 AMDGPUOperand::ImmTy ImmTy);
1679 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1680 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1681
1682 ParseStatus parseDfmtNfmt(int64_t &Format);
1683 ParseStatus parseUfmt(int64_t &Format);
1684 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1685 int64_t &Format);
1686 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1687 int64_t &Format);
1688 ParseStatus parseFORMAT(OperandVector &Operands);
1689 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1690 ParseStatus parseNumericFormat(int64_t &Format);
1691 ParseStatus parseFlatOffset(OperandVector &Operands);
1692 ParseStatus parseR128A16(OperandVector &Operands);
1693 ParseStatus parseBLGP(OperandVector &Operands);
1694 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1695 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1696
1697 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1698
1699 bool parseCnt(int64_t &IntVal);
1700 ParseStatus parseSWaitCnt(OperandVector &Operands);
1701
1702 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1703 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1704 ParseStatus parseDepCtr(OperandVector &Operands);
1705
1706 bool parseDelay(int64_t &Delay);
1707 ParseStatus parseSDelayALU(OperandVector &Operands);
1708
1709 ParseStatus parseHwreg(OperandVector &Operands);
1710
1711private:
1712 struct OperandInfoTy {
1713 SMLoc Loc;
1714 int64_t Val;
1715 bool IsSymbolic = false;
1716 bool IsDefined = false;
1717
1718 OperandInfoTy(int64_t Val) : Val(Val) {}
1719 };
1720
1721 struct StructuredOpField : OperandInfoTy {
1722 StringLiteral Id;
1723 StringLiteral Desc;
1724 unsigned Width;
1725 bool IsDefined = false;
1726
1727 StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1728 int64_t Default)
1729 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1730 virtual ~StructuredOpField() = default;
1731
1732 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1733 Parser.Error(L: Loc, Msg: "invalid " + Desc + ": " + Err);
1734 return false;
1735 }
1736
1737 virtual bool validate(AMDGPUAsmParser &Parser) const {
1738 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1739 return Error(Parser, Err: "not supported on this GPU");
1740 if (!isUIntN(N: Width, x: Val))
1741 return Error(Parser, Err: "only " + Twine(Width) + "-bit values are legal");
1742 return true;
1743 }
1744 };
1745
1746 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1747 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1748
1749 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1750 bool validateSendMsg(const OperandInfoTy &Msg,
1751 const OperandInfoTy &Op,
1752 const OperandInfoTy &Stream);
1753
1754 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1755 OperandInfoTy &Width);
1756
1757 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1758 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1759 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1760
1761 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1762 const OperandVector &Operands) const;
1763 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1764 SMLoc getRegLoc(MCRegister Reg, const OperandVector &Operands) const;
1765 SMLoc getLitLoc(const OperandVector &Operands,
1766 bool SearchMandatoryLiterals = false) const;
1767 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1768 SMLoc getConstLoc(const OperandVector &Operands) const;
1769 SMLoc getInstLoc(const OperandVector &Operands) const;
1770
1771 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1772 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1773 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1774 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1775 bool validateSOPLiteral(const MCInst &Inst) const;
1776 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1777 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1778 const OperandVector &Operands);
1779 bool validateIntClampSupported(const MCInst &Inst);
1780 bool validateMIMGAtomicDMask(const MCInst &Inst);
1781 bool validateMIMGGatherDMask(const MCInst &Inst);
1782 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1783 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1784 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1785 bool validateMIMGD16(const MCInst &Inst);
1786 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1787 bool validateTensorR128(const MCInst &Inst);
1788 bool validateMIMGMSAA(const MCInst &Inst);
1789 bool validateOpSel(const MCInst &Inst);
1790 bool validateTrue16OpSel(const MCInst &Inst);
1791 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1792 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1793 bool validateVccOperand(MCRegister Reg) const;
1794 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1795 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1796 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1797 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1798 bool validateAGPRLdSt(const MCInst &Inst) const;
1799 bool validateVGPRAlign(const MCInst &Inst) const;
1800 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1801 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1802 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1803 bool validateDivScale(const MCInst &Inst);
1804 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1805 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1806 const SMLoc &IDLoc);
1807 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1808 const unsigned CPol);
1809 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1810 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1811 unsigned getConstantBusLimit(unsigned Opcode) const;
1812 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1813 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1814 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1815
1816 bool isSupportedMnemo(StringRef Mnemo,
1817 const FeatureBitset &FBS);
1818 bool isSupportedMnemo(StringRef Mnemo,
1819 const FeatureBitset &FBS,
1820 ArrayRef<unsigned> Variants);
1821 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1822
1823 bool isId(const StringRef Id) const;
1824 bool isId(const AsmToken &Token, const StringRef Id) const;
1825 bool isToken(const AsmToken::TokenKind Kind) const;
1826 StringRef getId() const;
1827 bool trySkipId(const StringRef Id);
1828 bool trySkipId(const StringRef Pref, const StringRef Id);
1829 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1830 bool trySkipToken(const AsmToken::TokenKind Kind);
1831 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1832 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1833 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1834
1835 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1836 AsmToken::TokenKind getTokenKind() const;
1837 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1838 bool parseExpr(OperandVector &Operands);
1839 StringRef getTokenStr() const;
1840 AsmToken peekToken(bool ShouldSkipSpace = true);
1841 AsmToken getToken() const;
1842 SMLoc getLoc() const;
1843 void lex();
1844
1845public:
1846 void onBeginOfFile() override;
1847 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1848
1849 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1850
1851 ParseStatus parseExpTgt(OperandVector &Operands);
1852 ParseStatus parseSendMsg(OperandVector &Operands);
1853 ParseStatus parseInterpSlot(OperandVector &Operands);
1854 ParseStatus parseInterpAttr(OperandVector &Operands);
1855 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1856 ParseStatus parseBoolReg(OperandVector &Operands);
1857
1858 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1859 const unsigned MaxVal, const Twine &ErrMsg,
1860 SMLoc &Loc);
1861 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1862 const unsigned MinVal,
1863 const unsigned MaxVal,
1864 const StringRef ErrMsg);
1865 ParseStatus parseSwizzle(OperandVector &Operands);
1866 bool parseSwizzleOffset(int64_t &Imm);
1867 bool parseSwizzleMacro(int64_t &Imm);
1868 bool parseSwizzleQuadPerm(int64_t &Imm);
1869 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1870 bool parseSwizzleBroadcast(int64_t &Imm);
1871 bool parseSwizzleSwap(int64_t &Imm);
1872 bool parseSwizzleReverse(int64_t &Imm);
1873 bool parseSwizzleFFT(int64_t &Imm);
1874 bool parseSwizzleRotate(int64_t &Imm);
1875
1876 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1877 int64_t parseGPRIdxMacro();
1878
1879 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: false); }
1880 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: true); }
1881
1882 ParseStatus parseOModSI(OperandVector &Operands);
1883
1884 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1885 OptionalImmIndexMap &OptionalIdx);
1886 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1887 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1888 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1889 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1890 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1891
1892 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1893 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1894 OptionalImmIndexMap &OptionalIdx);
1895 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1896 OptionalImmIndexMap &OptionalIdx);
1897
1898 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1899 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1900
1901 bool parseDimId(unsigned &Encoding);
1902 ParseStatus parseDim(OperandVector &Operands);
1903 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1904 ParseStatus parseDPP8(OperandVector &Operands);
1905 ParseStatus parseDPPCtrl(OperandVector &Operands);
1906 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1907 int64_t parseDPPCtrlSel(StringRef Ctrl);
1908 int64_t parseDPPCtrlPerm();
1909 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1910 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1911 cvtDPP(Inst, Operands, IsDPP8: true);
1912 }
1913 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1914 bool IsDPP8 = false);
1915 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1916 cvtVOP3DPP(Inst, Operands, IsDPP8: true);
1917 }
1918
1919 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1920 AMDGPUOperand::ImmTy Type);
1921 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1922 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1923 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1924 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1925 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1926 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1927 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1928 uint64_t BasicInstType,
1929 bool SkipDstVcc = false,
1930 bool SkipSrcVcc = false);
1931
1932 ParseStatus parseEndpgm(OperandVector &Operands);
1933
1934 ParseStatus parseVOPD(OperandVector &Operands);
1935};
1936
1937} // end anonymous namespace
1938
1939// May be called with integer type with equivalent bitwidth.
1940static const fltSemantics *getFltSemantics(unsigned Size) {
1941 switch (Size) {
1942 case 4:
1943 return &APFloat::IEEEsingle();
1944 case 8:
1945 return &APFloat::IEEEdouble();
1946 case 2:
1947 return &APFloat::IEEEhalf();
1948 default:
1949 llvm_unreachable("unsupported fp type");
1950 }
1951}
1952
1953static const fltSemantics *getFltSemantics(MVT VT) {
1954 return getFltSemantics(Size: VT.getSizeInBits() / 8);
1955}
1956
1957static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1958 switch (OperandType) {
1959 // When floating-point immediate is used as operand of type i16, the 32-bit
1960 // representation of the constant truncated to the 16 LSBs should be used.
1961 case AMDGPU::OPERAND_REG_IMM_INT16:
1962 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1963 case AMDGPU::OPERAND_REG_IMM_INT32:
1964 case AMDGPU::OPERAND_REG_IMM_FP32:
1965 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1966 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1967 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1968 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1969 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1970 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1971 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1972 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1973 case AMDGPU::OPERAND_KIMM32:
1974 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1975 return &APFloat::IEEEsingle();
1976 case AMDGPU::OPERAND_REG_IMM_INT64:
1977 case AMDGPU::OPERAND_REG_IMM_FP64:
1978 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1979 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1980 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1981 return &APFloat::IEEEdouble();
1982 case AMDGPU::OPERAND_REG_IMM_FP16:
1983 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1984 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1985 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1986 case AMDGPU::OPERAND_KIMM16:
1987 return &APFloat::IEEEhalf();
1988 case AMDGPU::OPERAND_REG_IMM_BF16:
1989 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1990 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1991 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1992 return &APFloat::BFloat();
1993 default:
1994 llvm_unreachable("unsupported fp type");
1995 }
1996}
1997
1998//===----------------------------------------------------------------------===//
1999// Operand
2000//===----------------------------------------------------------------------===//
2001
2002static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2003 bool Lost;
2004
2005 // Convert literal to single precision
2006 APFloat::opStatus Status = FPLiteral.convert(ToSemantics: *getFltSemantics(VT),
2007 RM: APFloat::rmNearestTiesToEven,
2008 losesInfo: &Lost);
2009 // We allow precision lost but not overflow or underflow
2010 if (Status != APFloat::opOK &&
2011 Lost &&
2012 ((Status & APFloat::opOverflow) != 0 ||
2013 (Status & APFloat::opUnderflow) != 0)) {
2014 return false;
2015 }
2016
2017 return true;
2018}
2019
2020static bool isSafeTruncation(int64_t Val, unsigned Size) {
2021 return isUIntN(N: Size, x: Val) || isIntN(N: Size, x: Val);
2022}
2023
2024static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2025 if (VT.getScalarType() == MVT::i16)
2026 return isInlinableLiteral32(Literal: Val, HasInv2Pi);
2027
2028 if (VT.getScalarType() == MVT::f16)
2029 return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi);
2030
2031 assert(VT.getScalarType() == MVT::bf16);
2032
2033 return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi);
2034}
2035
2036bool AMDGPUOperand::isInlinableImm(MVT type) const {
2037
2038 // This is a hack to enable named inline values like
2039 // shared_base with both 32-bit and 64-bit operands.
2040 // Note that these values are defined as
2041 // 32-bit operands only.
2042 if (isInlineValue()) {
2043 return true;
2044 }
2045
2046 if (!isImmTy(ImmT: ImmTyNone)) {
2047 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2048 return false;
2049 }
2050 // TODO: We should avoid using host float here. It would be better to
2051 // check the float bit values which is what a few other places do.
2052 // We've had bot failures before due to weird NaN support on mips hosts.
2053
2054 APInt Literal(64, Imm.Val);
2055
2056 if (Imm.IsFPImm) { // We got fp literal token
2057 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2058 return AMDGPU::isInlinableLiteral64(Literal: Imm.Val,
2059 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2060 }
2061
2062 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2063 if (!canLosslesslyConvertToFPType(FPLiteral, VT: type))
2064 return false;
2065
2066 if (type.getScalarSizeInBits() == 16) {
2067 bool Lost = false;
2068 switch (type.getScalarType().SimpleTy) {
2069 default:
2070 llvm_unreachable("unknown 16-bit type");
2071 case MVT::bf16:
2072 FPLiteral.convert(ToSemantics: APFloatBase::BFloat(), RM: APFloat::rmNearestTiesToEven,
2073 losesInfo: &Lost);
2074 break;
2075 case MVT::f16:
2076 FPLiteral.convert(ToSemantics: APFloatBase::IEEEhalf(), RM: APFloat::rmNearestTiesToEven,
2077 losesInfo: &Lost);
2078 break;
2079 case MVT::i16:
2080 FPLiteral.convert(ToSemantics: APFloatBase::IEEEsingle(),
2081 RM: APFloat::rmNearestTiesToEven, losesInfo: &Lost);
2082 break;
2083 }
2084 // We need to use 32-bit representation here because when a floating-point
2085 // inline constant is used as an i16 operand, its 32-bit representation
2086 // representation will be used. We will need the 32-bit value to check if
2087 // it is FP inline constant.
2088 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2089 return isInlineableLiteralOp16(Val: ImmVal, VT: type,
2090 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2091 }
2092
2093 // Check if single precision literal is inlinable
2094 return AMDGPU::isInlinableLiteral32(
2095 Literal: static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2096 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2097 }
2098
2099 // We got int literal token.
2100 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2101 return AMDGPU::isInlinableLiteral64(Literal: Imm.Val,
2102 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2103 }
2104
2105 if (!isSafeTruncation(Val: Imm.Val, Size: type.getScalarSizeInBits())) {
2106 return false;
2107 }
2108
2109 if (type.getScalarSizeInBits() == 16) {
2110 return isInlineableLiteralOp16(
2111 Val: static_cast<int16_t>(Literal.getLoBits(numBits: 16).getSExtValue()),
2112 VT: type, HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2113 }
2114
2115 return AMDGPU::isInlinableLiteral32(
2116 Literal: static_cast<int32_t>(Literal.getLoBits(numBits: 32).getZExtValue()),
2117 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2118}
2119
2120bool AMDGPUOperand::isLiteralImm(MVT type) const {
2121 // Check that this immediate can be added as literal
2122 if (!isImmTy(ImmT: ImmTyNone)) {
2123 return false;
2124 }
2125
2126 if (!Imm.IsFPImm) {
2127 // We got int literal token.
2128
2129 if (type == MVT::f64 && hasFPModifiers()) {
2130 // Cannot apply fp modifiers to int literals preserving the same semantics
2131 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2132 // disable these cases.
2133 return false;
2134 }
2135
2136 unsigned Size = type.getSizeInBits();
2137 if (Size == 64)
2138 Size = 32;
2139
2140 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2141 // types.
2142 return isSafeTruncation(Val: Imm.Val, Size);
2143 }
2144
2145 // We got fp literal token
2146 if (type == MVT::f64) { // Expected 64-bit fp operand
2147 // We would set low 64-bits of literal to zeroes but we accept this literals
2148 return true;
2149 }
2150
2151 if (type == MVT::i64) { // Expected 64-bit int operand
2152 // We don't allow fp literals in 64-bit integer instructions. It is
2153 // unclear how we should encode them.
2154 return false;
2155 }
2156
2157 // We allow fp literals with f16x2 operands assuming that the specified
2158 // literal goes into the lower half and the upper half is zero. We also
2159 // require that the literal may be losslessly converted to f16.
2160 //
2161 // For i16x2 operands, we assume that the specified literal is encoded as a
2162 // single-precision float. This is pretty odd, but it matches SP3 and what
2163 // happens in hardware.
2164 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2165 : (type == MVT::v2i16) ? MVT::f32
2166 : (type == MVT::v2f32) ? MVT::f32
2167 : type;
2168
2169 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2170 return canLosslesslyConvertToFPType(FPLiteral, VT: ExpectedType);
2171}
2172
2173bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2174 return isRegKind() && AsmParser->getMRI()->getRegClass(i: RCID).contains(Reg: getReg());
2175}
2176
2177bool AMDGPUOperand::isVRegWithInputMods() const {
2178 return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) ||
2179 // GFX90A allows DPP on 64-bit operands.
2180 (isRegClass(RCID: AMDGPU::VReg_64RegClassID) &&
2181 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2182}
2183
2184template <bool IsFake16>
2185bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2186 return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2187 : AMDGPU::VGPR_16_Lo128RegClassID);
2188}
2189
2190template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2191 return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32RegClassID
2192 : AMDGPU::VGPR_16RegClassID);
2193}
2194
2195bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2196 if (AsmParser->isVI())
2197 return isVReg32();
2198 if (AsmParser->isGFX9Plus())
2199 return isRegClass(RCID: AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2200 return false;
2201}
2202
2203bool AMDGPUOperand::isSDWAFP16Operand() const {
2204 return isSDWAOperand(type: MVT::f16);
2205}
2206
2207bool AMDGPUOperand::isSDWAFP32Operand() const {
2208 return isSDWAOperand(type: MVT::f32);
2209}
2210
2211bool AMDGPUOperand::isSDWAInt16Operand() const {
2212 return isSDWAOperand(type: MVT::i16);
2213}
2214
2215bool AMDGPUOperand::isSDWAInt32Operand() const {
2216 return isSDWAOperand(type: MVT::i32);
2217}
2218
2219bool AMDGPUOperand::isBoolReg() const {
2220 auto FB = AsmParser->getFeatureBits();
2221 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
2222 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2223}
2224
2225uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2226{
2227 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2228 assert(Size == 2 || Size == 4 || Size == 8);
2229
2230 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2231
2232 if (Imm.Mods.Abs) {
2233 Val &= ~FpSignMask;
2234 }
2235 if (Imm.Mods.Neg) {
2236 Val ^= FpSignMask;
2237 }
2238
2239 return Val;
2240}
2241
2242void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2243 if (isExpr()) {
2244 Inst.addOperand(Op: MCOperand::createExpr(Val: Expr));
2245 return;
2246 }
2247
2248 if (AMDGPU::isSISrcOperand(Desc: AsmParser->getMII()->get(Opcode: Inst.getOpcode()),
2249 OpNo: Inst.getNumOperands())) {
2250 addLiteralImmOperand(Inst, Val: Imm.Val,
2251 ApplyModifiers: ApplyModifiers &
2252 isImmTy(ImmT: ImmTyNone) && Imm.Mods.hasFPModifiers());
2253 } else {
2254 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2255 Inst.addOperand(Op: MCOperand::createImm(Val: Imm.Val));
2256 setImmKindNone();
2257 }
2258}
2259
2260void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2261 const auto& InstDesc = AsmParser->getMII()->get(Opcode: Inst.getOpcode());
2262 auto OpNum = Inst.getNumOperands();
2263 // Check that this operand accepts literals
2264 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2265
2266 if (ApplyModifiers) {
2267 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2268 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(Desc: InstDesc, OpNo: OpNum);
2269 Val = applyInputFPModifiers(Val, Size);
2270 }
2271
2272 APInt Literal(64, Val);
2273 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2274
2275 if (Imm.IsFPImm) { // We got fp literal token
2276 switch (OpTy) {
2277 case AMDGPU::OPERAND_REG_IMM_INT64:
2278 case AMDGPU::OPERAND_REG_IMM_FP64:
2279 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2280 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2281 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2282 if (AMDGPU::isInlinableLiteral64(Literal: Literal.getZExtValue(),
2283 HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2284 Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getZExtValue()));
2285 setImmKindConst();
2286 return;
2287 }
2288
2289 // Non-inlineable
2290 if (AMDGPU::isSISrcFPOperand(Desc: InstDesc, OpNo: OpNum)) { // Expected 64-bit fp operand
2291 // For fp operands we check if low 32 bits are zeros
2292 if (Literal.getLoBits(numBits: 32) != 0) {
2293 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(L: Inst.getLoc(),
2294 Msg: "Can't encode literal as exact 64-bit floating-point operand. "
2295 "Low 32-bits will be set to zero");
2296 Val &= 0xffffffff00000000u;
2297 }
2298
2299 Inst.addOperand(Op: MCOperand::createImm(Val));
2300 setImmKindLiteral();
2301 return;
2302 }
2303
2304 // We don't allow fp literals in 64-bit integer instructions. It is
2305 // unclear how we should encode them. This case should be checked earlier
2306 // in predicate methods (isLiteralImm())
2307 llvm_unreachable("fp literal in 64-bit integer instruction.");
2308
2309 case AMDGPU::OPERAND_REG_IMM_BF16:
2310 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2311 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2312 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2313 if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2314 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2315 // loss of precision. The constant represents ideomatic fp32 value of
2316 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2317 // bits. Prevent rounding below.
2318 Inst.addOperand(Op: MCOperand::createImm(Val: 0x3e22));
2319 setImmKindLiteral();
2320 return;
2321 }
2322 [[fallthrough]];
2323
2324 case AMDGPU::OPERAND_REG_IMM_INT32:
2325 case AMDGPU::OPERAND_REG_IMM_FP32:
2326 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2327 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2328 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2329 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2330 case AMDGPU::OPERAND_REG_IMM_INT16:
2331 case AMDGPU::OPERAND_REG_IMM_FP16:
2332 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2333 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2334 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2335 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2336 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2337 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2338 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2339 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2340 case AMDGPU::OPERAND_KIMM32:
2341 case AMDGPU::OPERAND_KIMM16:
2342 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2343 bool lost;
2344 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2345 // Convert literal to single precision
2346 FPLiteral.convert(ToSemantics: *getOpFltSemantics(OperandType: OpTy),
2347 RM: APFloat::rmNearestTiesToEven, losesInfo: &lost);
2348 // We allow precision lost but not overflow or underflow. This should be
2349 // checked earlier in isLiteralImm()
2350
2351 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2352 Inst.addOperand(Op: MCOperand::createImm(Val: ImmVal));
2353 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2354 setImmKindMandatoryLiteral();
2355 } else {
2356 setImmKindLiteral();
2357 }
2358 return;
2359 }
2360 default:
2361 llvm_unreachable("invalid operand size");
2362 }
2363
2364 return;
2365 }
2366
2367 // We got int literal token.
2368 // Only sign extend inline immediates.
2369 switch (OpTy) {
2370 case AMDGPU::OPERAND_REG_IMM_INT32:
2371 case AMDGPU::OPERAND_REG_IMM_FP32:
2372 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2373 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2374 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2375 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2376 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2377 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2378 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2379 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2380 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2381 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2382 if (isSafeTruncation(Val, Size: 32) &&
2383 AMDGPU::isInlinableLiteral32(Literal: static_cast<int32_t>(Val),
2384 HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2385 Inst.addOperand(Op: MCOperand::createImm(Val));
2386 setImmKindConst();
2387 return;
2388 }
2389
2390 Inst.addOperand(Op: MCOperand::createImm(Val: Lo_32(Value: Val)));
2391 setImmKindLiteral();
2392 return;
2393
2394 case AMDGPU::OPERAND_REG_IMM_INT64:
2395 case AMDGPU::OPERAND_REG_IMM_FP64:
2396 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2397 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2398 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2399 if (AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2400 Inst.addOperand(Op: MCOperand::createImm(Val));
2401 setImmKindConst();
2402 return;
2403 }
2404
2405 Val = AMDGPU::isSISrcFPOperand(Desc: InstDesc, OpNo: OpNum) ? (uint64_t)Val << 32
2406 : Lo_32(Value: Val);
2407
2408 Inst.addOperand(Op: MCOperand::createImm(Val));
2409 setImmKindLiteral();
2410 return;
2411
2412 case AMDGPU::OPERAND_REG_IMM_INT16:
2413 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2414 if (isSafeTruncation(Val, Size: 16) &&
2415 AMDGPU::isInlinableIntLiteral(Literal: static_cast<int16_t>(Val))) {
2416 Inst.addOperand(Op: MCOperand::createImm(Val: Lo_32(Value: Val)));
2417 setImmKindConst();
2418 return;
2419 }
2420
2421 Inst.addOperand(Op: MCOperand::createImm(Val: Val & 0xffff));
2422 setImmKindLiteral();
2423 return;
2424
2425 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2426 case AMDGPU::OPERAND_REG_IMM_FP16:
2427 if (isSafeTruncation(Val, Size: 16) &&
2428 AMDGPU::isInlinableLiteralFP16(Literal: static_cast<int16_t>(Val),
2429 HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2430 Inst.addOperand(Op: MCOperand::createImm(Val));
2431 setImmKindConst();
2432 return;
2433 }
2434
2435 Inst.addOperand(Op: MCOperand::createImm(Val: Val & 0xffff));
2436 setImmKindLiteral();
2437 return;
2438
2439 case AMDGPU::OPERAND_REG_IMM_BF16:
2440 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2441 if (isSafeTruncation(Val, Size: 16) &&
2442 AMDGPU::isInlinableLiteralBF16(Literal: static_cast<int16_t>(Val),
2443 HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2444 Inst.addOperand(Op: MCOperand::createImm(Val));
2445 setImmKindConst();
2446 return;
2447 }
2448
2449 Inst.addOperand(Op: MCOperand::createImm(Val: Val & 0xffff));
2450 setImmKindLiteral();
2451 return;
2452
2453 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: {
2454 assert(isSafeTruncation(Val, 16));
2455 assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2456 Inst.addOperand(Op: MCOperand::createImm(Val));
2457 return;
2458 }
2459 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
2460 assert(isSafeTruncation(Val, 16));
2461 assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2462 AsmParser->hasInv2PiInlineImm()));
2463
2464 Inst.addOperand(Op: MCOperand::createImm(Val));
2465 return;
2466 }
2467
2468 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: {
2469 assert(isSafeTruncation(Val, 16));
2470 assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2471 AsmParser->hasInv2PiInlineImm()));
2472
2473 Inst.addOperand(Op: MCOperand::createImm(Val));
2474 return;
2475 }
2476
2477 case AMDGPU::OPERAND_KIMM32:
2478 Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getLoBits(numBits: 32).getZExtValue()));
2479 setImmKindMandatoryLiteral();
2480 return;
2481 case AMDGPU::OPERAND_KIMM16:
2482 Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getLoBits(numBits: 16).getZExtValue()));
2483 setImmKindMandatoryLiteral();
2484 return;
2485 default:
2486 llvm_unreachable("invalid operand size");
2487 }
2488}
2489
2490void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2491 Inst.addOperand(Op: MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: getReg(), STI: AsmParser->getSTI())));
2492}
2493
2494bool AMDGPUOperand::isInlineValue() const {
2495 return isRegKind() && ::isInlineValue(Reg: getReg());
2496}
2497
2498//===----------------------------------------------------------------------===//
2499// AsmParser
2500//===----------------------------------------------------------------------===//
2501
2502void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2503 // TODO: make those pre-defined variables read-only.
2504 // Currently there is none suitable machinery in the core llvm-mc for this.
2505 // MCSymbol::isRedefinable is intended for another purpose, and
2506 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2507 MCContext &Ctx = getContext();
2508 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id);
2509 Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx));
2510}
2511
2512static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2513 if (Is == IS_VGPR) {
2514 switch (RegWidth) {
2515 default: return -1;
2516 case 32:
2517 return AMDGPU::VGPR_32RegClassID;
2518 case 64:
2519 return AMDGPU::VReg_64RegClassID;
2520 case 96:
2521 return AMDGPU::VReg_96RegClassID;
2522 case 128:
2523 return AMDGPU::VReg_128RegClassID;
2524 case 160:
2525 return AMDGPU::VReg_160RegClassID;
2526 case 192:
2527 return AMDGPU::VReg_192RegClassID;
2528 case 224:
2529 return AMDGPU::VReg_224RegClassID;
2530 case 256:
2531 return AMDGPU::VReg_256RegClassID;
2532 case 288:
2533 return AMDGPU::VReg_288RegClassID;
2534 case 320:
2535 return AMDGPU::VReg_320RegClassID;
2536 case 352:
2537 return AMDGPU::VReg_352RegClassID;
2538 case 384:
2539 return AMDGPU::VReg_384RegClassID;
2540 case 512:
2541 return AMDGPU::VReg_512RegClassID;
2542 case 1024:
2543 return AMDGPU::VReg_1024RegClassID;
2544 }
2545 } else if (Is == IS_TTMP) {
2546 switch (RegWidth) {
2547 default: return -1;
2548 case 32:
2549 return AMDGPU::TTMP_32RegClassID;
2550 case 64:
2551 return AMDGPU::TTMP_64RegClassID;
2552 case 128:
2553 return AMDGPU::TTMP_128RegClassID;
2554 case 256:
2555 return AMDGPU::TTMP_256RegClassID;
2556 case 512:
2557 return AMDGPU::TTMP_512RegClassID;
2558 }
2559 } else if (Is == IS_SGPR) {
2560 switch (RegWidth) {
2561 default: return -1;
2562 case 32:
2563 return AMDGPU::SGPR_32RegClassID;
2564 case 64:
2565 return AMDGPU::SGPR_64RegClassID;
2566 case 96:
2567 return AMDGPU::SGPR_96RegClassID;
2568 case 128:
2569 return AMDGPU::SGPR_128RegClassID;
2570 case 160:
2571 return AMDGPU::SGPR_160RegClassID;
2572 case 192:
2573 return AMDGPU::SGPR_192RegClassID;
2574 case 224:
2575 return AMDGPU::SGPR_224RegClassID;
2576 case 256:
2577 return AMDGPU::SGPR_256RegClassID;
2578 case 288:
2579 return AMDGPU::SGPR_288RegClassID;
2580 case 320:
2581 return AMDGPU::SGPR_320RegClassID;
2582 case 352:
2583 return AMDGPU::SGPR_352RegClassID;
2584 case 384:
2585 return AMDGPU::SGPR_384RegClassID;
2586 case 512:
2587 return AMDGPU::SGPR_512RegClassID;
2588 }
2589 } else if (Is == IS_AGPR) {
2590 switch (RegWidth) {
2591 default: return -1;
2592 case 32:
2593 return AMDGPU::AGPR_32RegClassID;
2594 case 64:
2595 return AMDGPU::AReg_64RegClassID;
2596 case 96:
2597 return AMDGPU::AReg_96RegClassID;
2598 case 128:
2599 return AMDGPU::AReg_128RegClassID;
2600 case 160:
2601 return AMDGPU::AReg_160RegClassID;
2602 case 192:
2603 return AMDGPU::AReg_192RegClassID;
2604 case 224:
2605 return AMDGPU::AReg_224RegClassID;
2606 case 256:
2607 return AMDGPU::AReg_256RegClassID;
2608 case 288:
2609 return AMDGPU::AReg_288RegClassID;
2610 case 320:
2611 return AMDGPU::AReg_320RegClassID;
2612 case 352:
2613 return AMDGPU::AReg_352RegClassID;
2614 case 384:
2615 return AMDGPU::AReg_384RegClassID;
2616 case 512:
2617 return AMDGPU::AReg_512RegClassID;
2618 case 1024:
2619 return AMDGPU::AReg_1024RegClassID;
2620 }
2621 }
2622 return -1;
2623}
2624
2625static MCRegister getSpecialRegForName(StringRef RegName) {
2626 return StringSwitch<unsigned>(RegName)
2627 .Case(S: "exec", Value: AMDGPU::EXEC)
2628 .Case(S: "vcc", Value: AMDGPU::VCC)
2629 .Case(S: "flat_scratch", Value: AMDGPU::FLAT_SCR)
2630 .Case(S: "xnack_mask", Value: AMDGPU::XNACK_MASK)
2631 .Case(S: "shared_base", Value: AMDGPU::SRC_SHARED_BASE)
2632 .Case(S: "src_shared_base", Value: AMDGPU::SRC_SHARED_BASE)
2633 .Case(S: "shared_limit", Value: AMDGPU::SRC_SHARED_LIMIT)
2634 .Case(S: "src_shared_limit", Value: AMDGPU::SRC_SHARED_LIMIT)
2635 .Case(S: "private_base", Value: AMDGPU::SRC_PRIVATE_BASE)
2636 .Case(S: "src_private_base", Value: AMDGPU::SRC_PRIVATE_BASE)
2637 .Case(S: "private_limit", Value: AMDGPU::SRC_PRIVATE_LIMIT)
2638 .Case(S: "src_private_limit", Value: AMDGPU::SRC_PRIVATE_LIMIT)
2639 .Case(S: "pops_exiting_wave_id", Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2640 .Case(S: "src_pops_exiting_wave_id", Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2641 .Case(S: "lds_direct", Value: AMDGPU::LDS_DIRECT)
2642 .Case(S: "src_lds_direct", Value: AMDGPU::LDS_DIRECT)
2643 .Case(S: "m0", Value: AMDGPU::M0)
2644 .Case(S: "vccz", Value: AMDGPU::SRC_VCCZ)
2645 .Case(S: "src_vccz", Value: AMDGPU::SRC_VCCZ)
2646 .Case(S: "execz", Value: AMDGPU::SRC_EXECZ)
2647 .Case(S: "src_execz", Value: AMDGPU::SRC_EXECZ)
2648 .Case(S: "scc", Value: AMDGPU::SRC_SCC)
2649 .Case(S: "src_scc", Value: AMDGPU::SRC_SCC)
2650 .Case(S: "tba", Value: AMDGPU::TBA)
2651 .Case(S: "tma", Value: AMDGPU::TMA)
2652 .Case(S: "flat_scratch_lo", Value: AMDGPU::FLAT_SCR_LO)
2653 .Case(S: "flat_scratch_hi", Value: AMDGPU::FLAT_SCR_HI)
2654 .Case(S: "xnack_mask_lo", Value: AMDGPU::XNACK_MASK_LO)
2655 .Case(S: "xnack_mask_hi", Value: AMDGPU::XNACK_MASK_HI)
2656 .Case(S: "vcc_lo", Value: AMDGPU::VCC_LO)
2657 .Case(S: "vcc_hi", Value: AMDGPU::VCC_HI)
2658 .Case(S: "exec_lo", Value: AMDGPU::EXEC_LO)
2659 .Case(S: "exec_hi", Value: AMDGPU::EXEC_HI)
2660 .Case(S: "tma_lo", Value: AMDGPU::TMA_LO)
2661 .Case(S: "tma_hi", Value: AMDGPU::TMA_HI)
2662 .Case(S: "tba_lo", Value: AMDGPU::TBA_LO)
2663 .Case(S: "tba_hi", Value: AMDGPU::TBA_HI)
2664 .Case(S: "pc", Value: AMDGPU::PC_REG)
2665 .Case(S: "null", Value: AMDGPU::SGPR_NULL)
2666 .Default(Value: AMDGPU::NoRegister);
2667}
2668
2669bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2670 SMLoc &EndLoc, bool RestoreOnFailure) {
2671 auto R = parseRegister();
2672 if (!R) return true;
2673 assert(R->isReg());
2674 RegNo = R->getReg();
2675 StartLoc = R->getStartLoc();
2676 EndLoc = R->getEndLoc();
2677 return false;
2678}
2679
2680bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2681 SMLoc &EndLoc) {
2682 return ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2683}
2684
2685ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2686 SMLoc &EndLoc) {
2687 bool Result = ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2688 bool PendingErrors = getParser().hasPendingError();
2689 getParser().clearPendingErrors();
2690 if (PendingErrors)
2691 return ParseStatus::Failure;
2692 if (Result)
2693 return ParseStatus::NoMatch;
2694 return ParseStatus::Success;
2695}
2696
2697bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2698 RegisterKind RegKind,
2699 MCRegister Reg1, SMLoc Loc) {
2700 switch (RegKind) {
2701 case IS_SPECIAL:
2702 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2703 Reg = AMDGPU::EXEC;
2704 RegWidth = 64;
2705 return true;
2706 }
2707 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2708 Reg = AMDGPU::FLAT_SCR;
2709 RegWidth = 64;
2710 return true;
2711 }
2712 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2713 Reg = AMDGPU::XNACK_MASK;
2714 RegWidth = 64;
2715 return true;
2716 }
2717 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2718 Reg = AMDGPU::VCC;
2719 RegWidth = 64;
2720 return true;
2721 }
2722 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2723 Reg = AMDGPU::TBA;
2724 RegWidth = 64;
2725 return true;
2726 }
2727 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2728 Reg = AMDGPU::TMA;
2729 RegWidth = 64;
2730 return true;
2731 }
2732 Error(L: Loc, Msg: "register does not fit in the list");
2733 return false;
2734 case IS_VGPR:
2735 case IS_SGPR:
2736 case IS_AGPR:
2737 case IS_TTMP:
2738 if (Reg1 != Reg + RegWidth / 32) {
2739 Error(L: Loc, Msg: "registers in a list must have consecutive indices");
2740 return false;
2741 }
2742 RegWidth += 32;
2743 return true;
2744 default:
2745 llvm_unreachable("unexpected register kind");
2746 }
2747}
2748
2749struct RegInfo {
2750 StringLiteral Name;
2751 RegisterKind Kind;
2752};
2753
2754static constexpr RegInfo RegularRegisters[] = {
2755 {.Name: {"v"}, .Kind: IS_VGPR},
2756 {.Name: {"s"}, .Kind: IS_SGPR},
2757 {.Name: {"ttmp"}, .Kind: IS_TTMP},
2758 {.Name: {"acc"}, .Kind: IS_AGPR},
2759 {.Name: {"a"}, .Kind: IS_AGPR},
2760};
2761
2762static bool isRegularReg(RegisterKind Kind) {
2763 return Kind == IS_VGPR ||
2764 Kind == IS_SGPR ||
2765 Kind == IS_TTMP ||
2766 Kind == IS_AGPR;
2767}
2768
2769static const RegInfo* getRegularRegInfo(StringRef Str) {
2770 for (const RegInfo &Reg : RegularRegisters)
2771 if (Str.starts_with(Prefix: Reg.Name))
2772 return &Reg;
2773 return nullptr;
2774}
2775
2776static bool getRegNum(StringRef Str, unsigned& Num) {
2777 return !Str.getAsInteger(Radix: 10, Result&: Num);
2778}
2779
2780bool
2781AMDGPUAsmParser::isRegister(const AsmToken &Token,
2782 const AsmToken &NextToken) const {
2783
2784 // A list of consecutive registers: [s0,s1,s2,s3]
2785 if (Token.is(K: AsmToken::LBrac))
2786 return true;
2787
2788 if (!Token.is(K: AsmToken::Identifier))
2789 return false;
2790
2791 // A single register like s0 or a range of registers like s[0:1]
2792
2793 StringRef Str = Token.getString();
2794 const RegInfo *Reg = getRegularRegInfo(Str);
2795 if (Reg) {
2796 StringRef RegName = Reg->Name;
2797 StringRef RegSuffix = Str.substr(Start: RegName.size());
2798 if (!RegSuffix.empty()) {
2799 RegSuffix.consume_back(Suffix: ".l");
2800 RegSuffix.consume_back(Suffix: ".h");
2801 unsigned Num;
2802 // A single register with an index: rXX
2803 if (getRegNum(Str: RegSuffix, Num))
2804 return true;
2805 } else {
2806 // A range of registers: r[XX:YY].
2807 if (NextToken.is(K: AsmToken::LBrac))
2808 return true;
2809 }
2810 }
2811
2812 return getSpecialRegForName(RegName: Str).isValid();
2813}
2814
2815bool
2816AMDGPUAsmParser::isRegister()
2817{
2818 return isRegister(Token: getToken(), NextToken: peekToken());
2819}
2820
2821MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2822 unsigned SubReg, unsigned RegWidth,
2823 SMLoc Loc) {
2824 assert(isRegularReg(RegKind));
2825
2826 unsigned AlignSize = 1;
2827 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2828 // SGPR and TTMP registers must be aligned.
2829 // Max required alignment is 4 dwords.
2830 AlignSize = std::min(a: llvm::bit_ceil(Value: RegWidth / 32), b: 4u);
2831 }
2832
2833 if (RegNum % AlignSize != 0) {
2834 Error(L: Loc, Msg: "invalid register alignment");
2835 return MCRegister();
2836 }
2837
2838 unsigned RegIdx = RegNum / AlignSize;
2839 int RCID = getRegClass(Is: RegKind, RegWidth);
2840 if (RCID == -1) {
2841 Error(L: Loc, Msg: "invalid or unsupported register size");
2842 return MCRegister();
2843 }
2844
2845 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2846 const MCRegisterClass RC = TRI->getRegClass(i: RCID);
2847 if (RegIdx >= RC.getNumRegs()) {
2848 Error(L: Loc, Msg: "register index is out of range");
2849 return MCRegister();
2850 }
2851
2852 MCRegister Reg = RC.getRegister(i: RegIdx);
2853
2854 if (SubReg) {
2855 Reg = TRI->getSubReg(Reg, Idx: SubReg);
2856
2857 // Currently all regular registers have their .l and .h subregisters, so
2858 // we should never need to generate an error here.
2859 assert(Reg && "Invalid subregister!");
2860 }
2861
2862 return Reg;
2863}
2864
2865bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2866 unsigned &SubReg) {
2867 int64_t RegLo, RegHi;
2868 if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "missing register index"))
2869 return false;
2870
2871 SMLoc FirstIdxLoc = getLoc();
2872 SMLoc SecondIdxLoc;
2873
2874 if (!parseExpr(Imm&: RegLo))
2875 return false;
2876
2877 if (trySkipToken(Kind: AsmToken::Colon)) {
2878 SecondIdxLoc = getLoc();
2879 if (!parseExpr(Imm&: RegHi))
2880 return false;
2881 } else {
2882 RegHi = RegLo;
2883 }
2884
2885 if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
2886 return false;
2887
2888 if (!isUInt<32>(x: RegLo)) {
2889 Error(L: FirstIdxLoc, Msg: "invalid register index");
2890 return false;
2891 }
2892
2893 if (!isUInt<32>(x: RegHi)) {
2894 Error(L: SecondIdxLoc, Msg: "invalid register index");
2895 return false;
2896 }
2897
2898 if (RegLo > RegHi) {
2899 Error(L: FirstIdxLoc, Msg: "first register index should not exceed second index");
2900 return false;
2901 }
2902
2903 if (RegHi == RegLo) {
2904 StringRef RegSuffix = getTokenStr();
2905 if (RegSuffix == ".l") {
2906 SubReg = AMDGPU::lo16;
2907 lex();
2908 } else if (RegSuffix == ".h") {
2909 SubReg = AMDGPU::hi16;
2910 lex();
2911 }
2912 }
2913
2914 Num = static_cast<unsigned>(RegLo);
2915 RegWidth = 32 * ((RegHi - RegLo) + 1);
2916
2917 return true;
2918}
2919
2920MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2921 unsigned &RegNum,
2922 unsigned &RegWidth,
2923 SmallVectorImpl<AsmToken> &Tokens) {
2924 assert(isToken(AsmToken::Identifier));
2925 MCRegister Reg = getSpecialRegForName(RegName: getTokenStr());
2926 if (Reg) {
2927 RegNum = 0;
2928 RegWidth = 32;
2929 RegKind = IS_SPECIAL;
2930 Tokens.push_back(Elt: getToken());
2931 lex(); // skip register name
2932 }
2933 return Reg;
2934}
2935
2936MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2937 unsigned &RegNum,
2938 unsigned &RegWidth,
2939 SmallVectorImpl<AsmToken> &Tokens) {
2940 assert(isToken(AsmToken::Identifier));
2941 StringRef RegName = getTokenStr();
2942 auto Loc = getLoc();
2943
2944 const RegInfo *RI = getRegularRegInfo(Str: RegName);
2945 if (!RI) {
2946 Error(L: Loc, Msg: "invalid register name");
2947 return MCRegister();
2948 }
2949
2950 Tokens.push_back(Elt: getToken());
2951 lex(); // skip register name
2952
2953 RegKind = RI->Kind;
2954 StringRef RegSuffix = RegName.substr(Start: RI->Name.size());
2955 unsigned SubReg = NoSubRegister;
2956 if (!RegSuffix.empty()) {
2957 if (RegSuffix.consume_back(Suffix: ".l"))
2958 SubReg = AMDGPU::lo16;
2959 else if (RegSuffix.consume_back(Suffix: ".h"))
2960 SubReg = AMDGPU::hi16;
2961
2962 // Single 32-bit register: vXX.
2963 if (!getRegNum(Str: RegSuffix, Num&: RegNum)) {
2964 Error(L: Loc, Msg: "invalid register index");
2965 return MCRegister();
2966 }
2967 RegWidth = 32;
2968 } else {
2969 // Range of registers: v[XX:YY]. ":YY" is optional.
2970 if (!ParseRegRange(Num&: RegNum, RegWidth, SubReg))
2971 return MCRegister();
2972 }
2973
2974 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2975}
2976
2977MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2978 unsigned &RegNum, unsigned &RegWidth,
2979 SmallVectorImpl<AsmToken> &Tokens) {
2980 MCRegister Reg;
2981 auto ListLoc = getLoc();
2982
2983 if (!skipToken(Kind: AsmToken::LBrac,
2984 ErrMsg: "expected a register or a list of registers")) {
2985 return MCRegister();
2986 }
2987
2988 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2989
2990 auto Loc = getLoc();
2991 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2992 return MCRegister();
2993 if (RegWidth != 32) {
2994 Error(L: Loc, Msg: "expected a single 32-bit register");
2995 return MCRegister();
2996 }
2997
2998 for (; trySkipToken(Kind: AsmToken::Comma); ) {
2999 RegisterKind NextRegKind;
3000 MCRegister NextReg;
3001 unsigned NextRegNum, NextRegWidth;
3002 Loc = getLoc();
3003
3004 if (!ParseAMDGPURegister(RegKind&: NextRegKind, Reg&: NextReg,
3005 RegNum&: NextRegNum, RegWidth&: NextRegWidth,
3006 Tokens)) {
3007 return MCRegister();
3008 }
3009 if (NextRegWidth != 32) {
3010 Error(L: Loc, Msg: "expected a single 32-bit register");
3011 return MCRegister();
3012 }
3013 if (NextRegKind != RegKind) {
3014 Error(L: Loc, Msg: "registers in a list must be of the same kind");
3015 return MCRegister();
3016 }
3017 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, Reg1: NextReg, Loc))
3018 return MCRegister();
3019 }
3020
3021 if (!skipToken(Kind: AsmToken::RBrac,
3022 ErrMsg: "expected a comma or a closing square bracket")) {
3023 return MCRegister();
3024 }
3025
3026 if (isRegularReg(Kind: RegKind))
3027 Reg = getRegularReg(RegKind, RegNum, SubReg: NoSubRegister, RegWidth, Loc: ListLoc);
3028
3029 return Reg;
3030}
3031
3032bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3033 MCRegister &Reg, unsigned &RegNum,
3034 unsigned &RegWidth,
3035 SmallVectorImpl<AsmToken> &Tokens) {
3036 auto Loc = getLoc();
3037 Reg = MCRegister();
3038
3039 if (isToken(Kind: AsmToken::Identifier)) {
3040 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3041 if (!Reg)
3042 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3043 } else {
3044 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3045 }
3046
3047 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3048 if (!Reg) {
3049 assert(Parser.hasPendingError());
3050 return false;
3051 }
3052
3053 if (!subtargetHasRegister(MRI: *TRI, Reg)) {
3054 if (Reg == AMDGPU::SGPR_NULL) {
3055 Error(L: Loc, Msg: "'null' operand is not supported on this GPU");
3056 } else {
3057 Error(L: Loc, Msg: Twine(AMDGPUInstPrinter::getRegisterName(Reg)) +
3058 " register not available on this GPU");
3059 }
3060 return false;
3061 }
3062
3063 return true;
3064}
3065
3066bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3067 MCRegister &Reg, unsigned &RegNum,
3068 unsigned &RegWidth,
3069 bool RestoreOnFailure /*=false*/) {
3070 Reg = MCRegister();
3071
3072 SmallVector<AsmToken, 1> Tokens;
3073 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3074 if (RestoreOnFailure) {
3075 while (!Tokens.empty()) {
3076 getLexer().UnLex(Token: Tokens.pop_back_val());
3077 }
3078 }
3079 return true;
3080 }
3081 return false;
3082}
3083
3084std::optional<StringRef>
3085AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3086 switch (RegKind) {
3087 case IS_VGPR:
3088 return StringRef(".amdgcn.next_free_vgpr");
3089 case IS_SGPR:
3090 return StringRef(".amdgcn.next_free_sgpr");
3091 default:
3092 return std::nullopt;
3093 }
3094}
3095
3096void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3097 auto SymbolName = getGprCountSymbolName(RegKind);
3098 assert(SymbolName && "initializing invalid register kind");
3099 MCSymbol *Sym = getContext().getOrCreateSymbol(Name: *SymbolName);
3100 Sym->setVariableValue(MCConstantExpr::create(Value: 0, Ctx&: getContext()));
3101 Sym->setRedefinable(true);
3102}
3103
3104bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3105 unsigned DwordRegIndex,
3106 unsigned RegWidth) {
3107 // Symbols are only defined for GCN targets
3108 if (AMDGPU::getIsaVersion(GPU: getSTI().getCPU()).Major < 6)
3109 return true;
3110
3111 auto SymbolName = getGprCountSymbolName(RegKind);
3112 if (!SymbolName)
3113 return true;
3114 MCSymbol *Sym = getContext().getOrCreateSymbol(Name: *SymbolName);
3115
3116 int64_t NewMax = DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1;
3117 int64_t OldCount;
3118
3119 if (!Sym->isVariable())
3120 return !Error(L: getLoc(),
3121 Msg: ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3122 if (!Sym->getVariableValue()->evaluateAsAbsolute(Res&: OldCount))
3123 return !Error(
3124 L: getLoc(),
3125 Msg: ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3126
3127 if (OldCount <= NewMax)
3128 Sym->setVariableValue(MCConstantExpr::create(Value: NewMax + 1, Ctx&: getContext()));
3129
3130 return true;
3131}
3132
3133std::unique_ptr<AMDGPUOperand>
3134AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3135 const auto &Tok = getToken();
3136 SMLoc StartLoc = Tok.getLoc();
3137 SMLoc EndLoc = Tok.getEndLoc();
3138 RegisterKind RegKind;
3139 MCRegister Reg;
3140 unsigned RegNum, RegWidth;
3141
3142 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3143 return nullptr;
3144 }
3145 if (isHsaAbi(STI: getSTI())) {
3146 if (!updateGprCountSymbols(RegKind, DwordRegIndex: RegNum, RegWidth))
3147 return nullptr;
3148 } else
3149 KernelScope.usesRegister(RegKind, DwordRegIndex: RegNum, RegWidth);
3150 return AMDGPUOperand::CreateReg(AsmParser: this, Reg, S: StartLoc, E: EndLoc);
3151}
3152
3153ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3154 bool HasSP3AbsModifier, bool HasLit) {
3155 // TODO: add syntactic sugar for 1/(2*PI)
3156
3157 if (isRegister())
3158 return ParseStatus::NoMatch;
3159 assert(!isModifier());
3160
3161 if (!HasLit) {
3162 HasLit = trySkipId(Id: "lit");
3163 if (HasLit) {
3164 if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit"))
3165 return ParseStatus::Failure;
3166 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3167 if (S.isSuccess() &&
3168 !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3169 return ParseStatus::Failure;
3170 return S;
3171 }
3172 }
3173
3174 const auto& Tok = getToken();
3175 const auto& NextTok = peekToken();
3176 bool IsReal = Tok.is(K: AsmToken::Real);
3177 SMLoc S = getLoc();
3178 bool Negate = false;
3179
3180 if (!IsReal && Tok.is(K: AsmToken::Minus) && NextTok.is(K: AsmToken::Real)) {
3181 lex();
3182 IsReal = true;
3183 Negate = true;
3184 }
3185
3186 AMDGPUOperand::Modifiers Mods;
3187 Mods.Lit = HasLit;
3188
3189 if (IsReal) {
3190 // Floating-point expressions are not supported.
3191 // Can only allow floating-point literals with an
3192 // optional sign.
3193
3194 StringRef Num = getTokenStr();
3195 lex();
3196
3197 APFloat RealVal(APFloat::IEEEdouble());
3198 auto roundMode = APFloat::rmNearestTiesToEven;
3199 if (errorToBool(Err: RealVal.convertFromString(Num, roundMode).takeError()))
3200 return ParseStatus::Failure;
3201 if (Negate)
3202 RealVal.changeSign();
3203
3204 Operands.push_back(
3205 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: RealVal.bitcastToAPInt().getZExtValue(), Loc: S,
3206 Type: AMDGPUOperand::ImmTyNone, IsFPImm: true));
3207 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3208 Op.setModifiers(Mods);
3209
3210 return ParseStatus::Success;
3211
3212 } else {
3213 int64_t IntVal;
3214 const MCExpr *Expr;
3215 SMLoc S = getLoc();
3216
3217 if (HasSP3AbsModifier) {
3218 // This is a workaround for handling expressions
3219 // as arguments of SP3 'abs' modifier, for example:
3220 // |1.0|
3221 // |-1|
3222 // |1+x|
3223 // This syntax is not compatible with syntax of standard
3224 // MC expressions (due to the trailing '|').
3225 SMLoc EndLoc;
3226 if (getParser().parsePrimaryExpr(Res&: Expr, EndLoc, TypeInfo: nullptr))
3227 return ParseStatus::Failure;
3228 } else {
3229 if (Parser.parseExpression(Res&: Expr))
3230 return ParseStatus::Failure;
3231 }
3232
3233 if (Expr->evaluateAsAbsolute(Res&: IntVal)) {
3234 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S));
3235 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3236 Op.setModifiers(Mods);
3237 } else {
3238 if (HasLit)
3239 return ParseStatus::NoMatch;
3240 Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S));
3241 }
3242
3243 return ParseStatus::Success;
3244 }
3245
3246 return ParseStatus::NoMatch;
3247}
3248
3249ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3250 if (!isRegister())
3251 return ParseStatus::NoMatch;
3252
3253 if (auto R = parseRegister()) {
3254 assert(R->isReg());
3255 Operands.push_back(Elt: std::move(R));
3256 return ParseStatus::Success;
3257 }
3258 return ParseStatus::Failure;
3259}
3260
3261ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3262 bool HasSP3AbsMod, bool HasLit) {
3263 ParseStatus Res = parseReg(Operands);
3264 if (!Res.isNoMatch())
3265 return Res;
3266 if (isModifier())
3267 return ParseStatus::NoMatch;
3268 return parseImm(Operands, HasSP3AbsModifier: HasSP3AbsMod, HasLit);
3269}
3270
3271bool
3272AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3273 if (Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::LParen)) {
3274 const auto &str = Token.getString();
3275 return str == "abs" || str == "neg" || str == "sext";
3276 }
3277 return false;
3278}
3279
3280bool
3281AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3282 return Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::Colon);
3283}
3284
3285bool
3286AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3287 return isNamedOperandModifier(Token, NextToken) || Token.is(K: AsmToken::Pipe);
3288}
3289
3290bool
3291AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3292 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3293}
3294
3295// Check if this is an operand modifier or an opcode modifier
3296// which may look like an expression but it is not. We should
3297// avoid parsing these modifiers as expressions. Currently
3298// recognized sequences are:
3299// |...|
3300// abs(...)
3301// neg(...)
3302// sext(...)
3303// -reg
3304// -|...|
3305// -abs(...)
3306// name:...
3307//
3308bool
3309AMDGPUAsmParser::isModifier() {
3310
3311 AsmToken Tok = getToken();
3312 AsmToken NextToken[2];
3313 peekTokens(Tokens: NextToken);
3314
3315 return isOperandModifier(Token: Tok, NextToken: NextToken[0]) ||
3316 (Tok.is(K: AsmToken::Minus) && isRegOrOperandModifier(Token: NextToken[0], NextToken: NextToken[1])) ||
3317 isOpcodeModifierWithVal(Token: Tok, NextToken: NextToken[0]);
3318}
3319
3320// Check if the current token is an SP3 'neg' modifier.
3321// Currently this modifier is allowed in the following context:
3322//
3323// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3324// 2. Before an 'abs' modifier: -abs(...)
3325// 3. Before an SP3 'abs' modifier: -|...|
3326//
3327// In all other cases "-" is handled as a part
3328// of an expression that follows the sign.
3329//
3330// Note: When "-" is followed by an integer literal,
3331// this is interpreted as integer negation rather
3332// than a floating-point NEG modifier applied to N.
3333// Beside being contr-intuitive, such use of floating-point
3334// NEG modifier would have resulted in different meaning
3335// of integer literals used with VOP1/2/C and VOP3,
3336// for example:
3337// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3338// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3339// Negative fp literals with preceding "-" are
3340// handled likewise for uniformity
3341//
3342bool
3343AMDGPUAsmParser::parseSP3NegModifier() {
3344
3345 AsmToken NextToken[2];
3346 peekTokens(Tokens: NextToken);
3347
3348 if (isToken(Kind: AsmToken::Minus) &&
3349 (isRegister(Token: NextToken[0], NextToken: NextToken[1]) ||
3350 NextToken[0].is(K: AsmToken::Pipe) ||
3351 isId(Token: NextToken[0], Id: "abs"))) {
3352 lex();
3353 return true;
3354 }
3355
3356 return false;
3357}
3358
3359ParseStatus
3360AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3361 bool AllowImm) {
3362 bool Neg, SP3Neg;
3363 bool Abs, SP3Abs;
3364 bool Lit;
3365 SMLoc Loc;
3366
3367 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3368 if (isToken(Kind: AsmToken::Minus) && peekToken().is(K: AsmToken::Minus))
3369 return Error(L: getLoc(), Msg: "invalid syntax, expected 'neg' modifier");
3370
3371 SP3Neg = parseSP3NegModifier();
3372
3373 Loc = getLoc();
3374 Neg = trySkipId(Id: "neg");
3375 if (Neg && SP3Neg)
3376 return Error(L: Loc, Msg: "expected register or immediate");
3377 if (Neg && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after neg"))
3378 return ParseStatus::Failure;
3379
3380 Abs = trySkipId(Id: "abs");
3381 if (Abs && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after abs"))
3382 return ParseStatus::Failure;
3383
3384 Lit = trySkipId(Id: "lit");
3385 if (Lit && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit"))
3386 return ParseStatus::Failure;
3387
3388 Loc = getLoc();
3389 SP3Abs = trySkipToken(Kind: AsmToken::Pipe);
3390 if (Abs && SP3Abs)
3391 return Error(L: Loc, Msg: "expected register or immediate");
3392
3393 ParseStatus Res;
3394 if (AllowImm) {
3395 Res = parseRegOrImm(Operands, HasSP3AbsMod: SP3Abs, HasLit: Lit);
3396 } else {
3397 Res = parseReg(Operands);
3398 }
3399 if (!Res.isSuccess())
3400 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3401
3402 if (Lit && !Operands.back()->isImm())
3403 Error(L: Loc, Msg: "expected immediate with lit modifier");
3404
3405 if (SP3Abs && !skipToken(Kind: AsmToken::Pipe, ErrMsg: "expected vertical bar"))
3406 return ParseStatus::Failure;
3407 if (Abs && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3408 return ParseStatus::Failure;
3409 if (Neg && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3410 return ParseStatus::Failure;
3411 if (Lit && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3412 return ParseStatus::Failure;
3413
3414 AMDGPUOperand::Modifiers Mods;
3415 Mods.Abs = Abs || SP3Abs;
3416 Mods.Neg = Neg || SP3Neg;
3417 Mods.Lit = Lit;
3418
3419 if (Mods.hasFPModifiers() || Lit) {
3420 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3421 if (Op.isExpr())
3422 return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression");
3423 Op.setModifiers(Mods);
3424 }
3425 return ParseStatus::Success;
3426}
3427
3428ParseStatus
3429AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3430 bool AllowImm) {
3431 bool Sext = trySkipId(Id: "sext");
3432 if (Sext && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after sext"))
3433 return ParseStatus::Failure;
3434
3435 ParseStatus Res;
3436 if (AllowImm) {
3437 Res = parseRegOrImm(Operands);
3438 } else {
3439 Res = parseReg(Operands);
3440 }
3441 if (!Res.isSuccess())
3442 return Sext ? ParseStatus::Failure : Res;
3443
3444 if (Sext && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3445 return ParseStatus::Failure;
3446
3447 AMDGPUOperand::Modifiers Mods;
3448 Mods.Sext = Sext;
3449
3450 if (Mods.hasIntModifiers()) {
3451 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3452 if (Op.isExpr())
3453 return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression");
3454 Op.setModifiers(Mods);
3455 }
3456
3457 return ParseStatus::Success;
3458}
3459
3460ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3461 return parseRegOrImmWithFPInputMods(Operands, AllowImm: false);
3462}
3463
3464ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3465 return parseRegOrImmWithIntInputMods(Operands, AllowImm: false);
3466}
3467
3468ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3469 auto Loc = getLoc();
3470 if (trySkipId(Id: "off")) {
3471 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: 0, Loc,
3472 Type: AMDGPUOperand::ImmTyOff, IsFPImm: false));
3473 return ParseStatus::Success;
3474 }
3475
3476 if (!isRegister())
3477 return ParseStatus::NoMatch;
3478
3479 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3480 if (Reg) {
3481 Operands.push_back(Elt: std::move(Reg));
3482 return ParseStatus::Success;
3483 }
3484
3485 return ParseStatus::Failure;
3486}
3487
3488unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3489 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
3490
3491 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3492 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3493 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3494 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3495 return Match_InvalidOperand;
3496
3497 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3498 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3499 // v_mac_f32/16 allow only dst_sel == DWORD;
3500 auto OpNum =
3501 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::dst_sel);
3502 const auto &Op = Inst.getOperand(i: OpNum);
3503 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3504 return Match_InvalidOperand;
3505 }
3506 }
3507
3508 return Match_Success;
3509}
3510
3511static ArrayRef<unsigned> getAllVariants() {
3512 static const unsigned Variants[] = {
3513 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3514 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3515 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3516 };
3517
3518 return ArrayRef(Variants);
3519}
3520
3521// What asm variants we should check
3522ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3523 if (isForcedDPP() && isForcedVOP3()) {
3524 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3525 return ArrayRef(Variants);
3526 }
3527 if (getForcedEncodingSize() == 32) {
3528 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3529 return ArrayRef(Variants);
3530 }
3531
3532 if (isForcedVOP3()) {
3533 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3534 return ArrayRef(Variants);
3535 }
3536
3537 if (isForcedSDWA()) {
3538 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3539 AMDGPUAsmVariants::SDWA9};
3540 return ArrayRef(Variants);
3541 }
3542
3543 if (isForcedDPP()) {
3544 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3545 return ArrayRef(Variants);
3546 }
3547
3548 return getAllVariants();
3549}
3550
3551StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3552 if (isForcedDPP() && isForcedVOP3())
3553 return "e64_dpp";
3554
3555 if (getForcedEncodingSize() == 32)
3556 return "e32";
3557
3558 if (isForcedVOP3())
3559 return "e64";
3560
3561 if (isForcedSDWA())
3562 return "sdwa";
3563
3564 if (isForcedDPP())
3565 return "dpp";
3566
3567 return "";
3568}
3569
3570unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3571 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
3572 for (MCPhysReg Reg : Desc.implicit_uses()) {
3573 switch (Reg) {
3574 case AMDGPU::FLAT_SCR:
3575 case AMDGPU::VCC:
3576 case AMDGPU::VCC_LO:
3577 case AMDGPU::VCC_HI:
3578 case AMDGPU::M0:
3579 return Reg;
3580 default:
3581 break;
3582 }
3583 }
3584 return AMDGPU::NoRegister;
3585}
3586
3587// NB: This code is correct only when used to check constant
3588// bus limitations because GFX7 support no f16 inline constants.
3589// Note that there are no cases when a GFX7 opcode violates
3590// constant bus limitations due to the use of an f16 constant.
3591bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3592 unsigned OpIdx) const {
3593 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
3594
3595 if (!AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx) ||
3596 AMDGPU::isKImmOperand(Desc, OpNo: OpIdx)) {
3597 return false;
3598 }
3599
3600 const MCOperand &MO = Inst.getOperand(i: OpIdx);
3601
3602 int64_t Val = MO.getImm();
3603 auto OpSize = AMDGPU::getOperandSize(Desc, OpNo: OpIdx);
3604
3605 switch (OpSize) { // expected operand size
3606 case 8:
3607 return AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3608 case 4:
3609 return AMDGPU::isInlinableLiteral32(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3610 case 2: {
3611 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3612 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3613 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16)
3614 return AMDGPU::isInlinableLiteralI16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3615
3616 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3617 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3618 return AMDGPU::isInlinableLiteralV2I16(Literal: Val);
3619
3620 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3621 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3622 return AMDGPU::isInlinableLiteralV2F16(Literal: Val);
3623
3624 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 ||
3625 OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3626 return AMDGPU::isInlinableLiteralV2BF16(Literal: Val);
3627
3628 if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3629 OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16)
3630 return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3631
3632 if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
3633 OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16)
3634 return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3635
3636 llvm_unreachable("invalid operand type");
3637 }
3638 default:
3639 llvm_unreachable("invalid operand size");
3640 }
3641}
3642
3643unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3644 if (!isGFX10Plus())
3645 return 1;
3646
3647 switch (Opcode) {
3648 // 64-bit shift instructions can use only one scalar value input
3649 case AMDGPU::V_LSHLREV_B64_e64:
3650 case AMDGPU::V_LSHLREV_B64_gfx10:
3651 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3652 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3653 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3654 case AMDGPU::V_LSHRREV_B64_e64:
3655 case AMDGPU::V_LSHRREV_B64_gfx10:
3656 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3657 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3658 case AMDGPU::V_ASHRREV_I64_e64:
3659 case AMDGPU::V_ASHRREV_I64_gfx10:
3660 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3661 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3662 case AMDGPU::V_LSHL_B64_e64:
3663 case AMDGPU::V_LSHR_B64_e64:
3664 case AMDGPU::V_ASHR_I64_e64:
3665 return 1;
3666 default:
3667 return 2;
3668 }
3669}
3670
3671constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3672using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3673
3674// Get regular operand indices in the same order as specified
3675// in the instruction (but append mandatory literals to the end).
3676static OperandIndices getSrcOperandIndices(unsigned Opcode,
3677 bool AddMandatoryLiterals = false) {
3678
3679 int16_t ImmIdx =
3680 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, Name: OpName::imm) : -1;
3681
3682 if (isVOPD(Opc: Opcode)) {
3683 int16_t ImmXIdx =
3684 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, Name: OpName::immX) : -1;
3685
3686 return {getNamedOperandIdx(Opcode, Name: OpName::src0X),
3687 getNamedOperandIdx(Opcode, Name: OpName::vsrc1X),
3688 getNamedOperandIdx(Opcode, Name: OpName::src0Y),
3689 getNamedOperandIdx(Opcode, Name: OpName::vsrc1Y),
3690 ImmXIdx,
3691 ImmIdx};
3692 }
3693
3694 return {getNamedOperandIdx(Opcode, Name: OpName::src0),
3695 getNamedOperandIdx(Opcode, Name: OpName::src1),
3696 getNamedOperandIdx(Opcode, Name: OpName::src2), ImmIdx};
3697}
3698
3699bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3700 const MCOperand &MO = Inst.getOperand(i: OpIdx);
3701 if (MO.isImm())
3702 return !isInlineConstant(Inst, OpIdx);
3703 if (MO.isReg()) {
3704 auto Reg = MO.getReg();
3705 if (!Reg)
3706 return false;
3707 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3708 auto PReg = mc2PseudoReg(Reg);
3709 return isSGPR(Reg: PReg, TRI) && PReg != SGPR_NULL;
3710 }
3711 return true;
3712}
3713
3714// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3715// Writelane is special in that it can use SGPR and M0 (which would normally
3716// count as using the constant bus twice - but in this case it is allowed since
3717// the lane selector doesn't count as a use of the constant bus). However, it is
3718// still required to abide by the 1 SGPR rule.
3719static bool checkWriteLane(const MCInst &Inst) {
3720 const unsigned Opcode = Inst.getOpcode();
3721 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3722 return false;
3723 const MCOperand &LaneSelOp = Inst.getOperand(i: 2);
3724 if (!LaneSelOp.isReg())
3725 return false;
3726 auto LaneSelReg = mc2PseudoReg(Reg: LaneSelOp.getReg());
3727 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3728}
3729
3730bool AMDGPUAsmParser::validateConstantBusLimitations(
3731 const MCInst &Inst, const OperandVector &Operands) {
3732 const unsigned Opcode = Inst.getOpcode();
3733 const MCInstrDesc &Desc = MII.get(Opcode);
3734 MCRegister LastSGPR;
3735 unsigned ConstantBusUseCount = 0;
3736 unsigned NumLiterals = 0;
3737 unsigned LiteralSize;
3738
3739 if (!(Desc.TSFlags &
3740 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3741 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3742 !isVOPD(Opc: Opcode))
3743 return true;
3744
3745 if (checkWriteLane(Inst))
3746 return true;
3747
3748 // Check special imm operands (used by madmk, etc)
3749 if (AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm)) {
3750 ++NumLiterals;
3751 LiteralSize = 4;
3752 }
3753
3754 SmallDenseSet<unsigned> SGPRsUsed;
3755 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3756 if (SGPRUsed != AMDGPU::NoRegister) {
3757 SGPRsUsed.insert(V: SGPRUsed);
3758 ++ConstantBusUseCount;
3759 }
3760
3761 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3762
3763 for (int OpIdx : OpIndices) {
3764 if (OpIdx == -1)
3765 continue;
3766
3767 const MCOperand &MO = Inst.getOperand(i: OpIdx);
3768 if (usesConstantBus(Inst, OpIdx)) {
3769 if (MO.isReg()) {
3770 LastSGPR = mc2PseudoReg(Reg: MO.getReg());
3771 // Pairs of registers with a partial intersections like these
3772 // s0, s[0:1]
3773 // flat_scratch_lo, flat_scratch
3774 // flat_scratch_lo, flat_scratch_hi
3775 // are theoretically valid but they are disabled anyway.
3776 // Note that this code mimics SIInstrInfo::verifyInstruction
3777 if (SGPRsUsed.insert(V: LastSGPR).second) {
3778 ++ConstantBusUseCount;
3779 }
3780 } else { // Expression or a literal
3781
3782 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3783 continue; // special operand like VINTERP attr_chan
3784
3785 // An instruction may use only one literal.
3786 // This has been validated on the previous step.
3787 // See validateVOPLiteral.
3788 // This literal may be used as more than one operand.
3789 // If all these operands are of the same size,
3790 // this literal counts as one scalar value.
3791 // Otherwise it counts as 2 scalar values.
3792 // See "GFX10 Shader Programming", section 3.6.2.3.
3793
3794 unsigned Size = AMDGPU::getOperandSize(Desc, OpNo: OpIdx);
3795 if (Size < 4)
3796 Size = 4;
3797
3798 if (NumLiterals == 0) {
3799 NumLiterals = 1;
3800 LiteralSize = Size;
3801 } else if (LiteralSize != Size) {
3802 NumLiterals = 2;
3803 }
3804 }
3805 }
3806 }
3807 ConstantBusUseCount += NumLiterals;
3808
3809 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3810 return true;
3811
3812 SMLoc LitLoc = getLitLoc(Operands);
3813 SMLoc RegLoc = getRegLoc(Reg: LastSGPR, Operands);
3814 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3815 Error(L: Loc, Msg: "invalid operand (violates constant bus restrictions)");
3816 return false;
3817}
3818
3819bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3820 const MCInst &Inst, const OperandVector &Operands) {
3821
3822 const unsigned Opcode = Inst.getOpcode();
3823 if (!isVOPD(Opc: Opcode))
3824 return true;
3825
3826 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3827
3828 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3829 const MCOperand &Opr = Inst.getOperand(i: OperandIdx);
3830 return (Opr.isReg() && !isSGPR(Reg: mc2PseudoReg(Reg: Opr.getReg()), TRI))
3831 ? Opr.getReg()
3832 : MCRegister();
3833 };
3834
3835 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3836 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3837
3838 const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII);
3839 auto InvalidCompOprIdx =
3840 InstInfo.getInvalidCompOperandIndex(GetRegIdx: getVRegIdx, SkipSrc);
3841 if (!InvalidCompOprIdx)
3842 return true;
3843
3844 auto CompOprIdx = *InvalidCompOprIdx;
3845 auto ParsedIdx =
3846 std::max(a: InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3847 b: InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3848 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3849
3850 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3851 if (CompOprIdx == VOPD::Component::DST) {
3852 Error(L: Loc, Msg: "one dst register must be even and the other odd");
3853 } else {
3854 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3855 Error(L: Loc, Msg: Twine("src") + Twine(CompSrcIdx) +
3856 " operands must use different VGPR banks");
3857 }
3858
3859 return false;
3860}
3861
3862bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3863
3864 const unsigned Opc = Inst.getOpcode();
3865 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
3866
3867 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3868 int ClampIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::clamp);
3869 assert(ClampIdx != -1);
3870 return Inst.getOperand(i: ClampIdx).getImm() == 0;
3871 }
3872
3873 return true;
3874}
3875
3876constexpr uint64_t MIMGFlags =
3877 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
3878
3879bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3880 const SMLoc &IDLoc) {
3881
3882 const unsigned Opc = Inst.getOpcode();
3883 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
3884
3885 if ((Desc.TSFlags & MIMGFlags) == 0)
3886 return true;
3887
3888 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdata);
3889 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
3890 int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::tfe);
3891
3892 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
3893 return true;
3894
3895 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3896 return true;
3897
3898 unsigned VDataSize = AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VDataIdx);
3899 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(i: TFEIdx).getImm()) ? 1 : 0;
3900 unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf;
3901 if (DMask == 0)
3902 DMask = 1;
3903
3904 bool IsPackedD16 = false;
3905 unsigned DataSize =
3906 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(Value: DMask);
3907 if (hasPackedD16()) {
3908 int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::d16);
3909 IsPackedD16 = D16Idx >= 0;
3910 if (IsPackedD16 && Inst.getOperand(i: D16Idx).getImm())
3911 DataSize = (DataSize + 1) / 2;
3912 }
3913
3914 if ((VDataSize / 4) == DataSize + TFESize)
3915 return true;
3916
3917 StringRef Modifiers;
3918 if (isGFX90A())
3919 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3920 else
3921 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3922
3923 Error(L: IDLoc, Msg: Twine("image data size does not match ") + Modifiers);
3924 return false;
3925}
3926
3927bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3928 const SMLoc &IDLoc) {
3929 const unsigned Opc = Inst.getOpcode();
3930 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
3931
3932 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3933 return true;
3934
3935 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3936
3937 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3938 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
3939 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vaddr0);
3940 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
3941 ? AMDGPU::OpName::srsrc
3942 : AMDGPU::OpName::rsrc;
3943 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: RSrcOpName);
3944 int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dim);
3945 int A16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::a16);
3946
3947 assert(VAddr0Idx != -1);
3948 assert(SrsrcIdx != -1);
3949 assert(SrsrcIdx > VAddr0Idx);
3950
3951 bool IsA16 = (A16Idx != -1 && Inst.getOperand(i: A16Idx).getImm());
3952 if (BaseOpcode->BVH) {
3953 if (IsA16 == BaseOpcode->A16)
3954 return true;
3955 Error(L: IDLoc, Msg: "image address size does not match a16");
3956 return false;
3957 }
3958
3959 unsigned Dim = Inst.getOperand(i: DimIdx).getImm();
3960 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim);
3961 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3962 unsigned ActualAddrSize =
3963 IsNSA ? SrsrcIdx - VAddr0Idx
3964 : AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VAddr0Idx) / 4;
3965
3966 unsigned ExpectedAddrSize =
3967 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim: DimInfo, IsA16, IsG16Supported: hasG16());
3968
3969 if (IsNSA) {
3970 if (hasPartialNSAEncoding() &&
3971 ExpectedAddrSize >
3972 getNSAMaxSize(HasSampler: Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
3973 int VAddrLastIdx = SrsrcIdx - 1;
3974 unsigned VAddrLastSize =
3975 AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VAddrLastIdx) / 4;
3976
3977 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3978 }
3979 } else {
3980 if (ExpectedAddrSize > 12)
3981 ExpectedAddrSize = 16;
3982
3983 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3984 // This provides backward compatibility for assembly created
3985 // before 160b/192b/224b types were directly supported.
3986 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3987 return true;
3988 }
3989
3990 if (ActualAddrSize == ExpectedAddrSize)
3991 return true;
3992
3993 Error(L: IDLoc, Msg: "image address size does not match dim and a16");
3994 return false;
3995}
3996
3997bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3998
3999 const unsigned Opc = Inst.getOpcode();
4000 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4001
4002 if ((Desc.TSFlags & MIMGFlags) == 0)
4003 return true;
4004 if (!Desc.mayLoad() || !Desc.mayStore())
4005 return true; // Not atomic
4006
4007 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4008 unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf;
4009
4010 // This is an incomplete check because image_atomic_cmpswap
4011 // may only use 0x3 and 0xf while other atomic operations
4012 // may use 0x1 and 0x3. However these limitations are
4013 // verified when we check that dmask matches dst size.
4014 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4015}
4016
4017bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4018
4019 const unsigned Opc = Inst.getOpcode();
4020 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4021
4022 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4023 return true;
4024
4025 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4026 unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf;
4027
4028 // GATHER4 instructions use dmask in a different fashion compared to
4029 // other MIMG instructions. The only useful DMASK values are
4030 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4031 // (red,red,red,red) etc.) The ISA document doesn't mention
4032 // this.
4033 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4034}
4035
4036bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4037 const OperandVector &Operands) {
4038 if (!isGFX10Plus())
4039 return true;
4040
4041 const unsigned Opc = Inst.getOpcode();
4042 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4043
4044 if ((Desc.TSFlags & MIMGFlags) == 0)
4045 return true;
4046
4047 // image_bvh_intersect_ray instructions do not have dim
4048 if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4049 return true;
4050
4051 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4052 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4053 if (Op.isDim())
4054 return true;
4055 }
4056 return false;
4057}
4058
4059bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4060 const unsigned Opc = Inst.getOpcode();
4061 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4062
4063 if ((Desc.TSFlags & MIMGFlags) == 0)
4064 return true;
4065
4066 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4067 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4068 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
4069
4070 if (!BaseOpcode->MSAA)
4071 return true;
4072
4073 int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dim);
4074 assert(DimIdx != -1);
4075
4076 unsigned Dim = Inst.getOperand(i: DimIdx).getImm();
4077 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim);
4078
4079 return DimInfo->MSAA;
4080}
4081
4082static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4083{
4084 switch (Opcode) {
4085 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4086 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4087 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4088 return true;
4089 default:
4090 return false;
4091 }
4092}
4093
4094// movrels* opcodes should only allow VGPRS as src0.
4095// This is specified in .td description for vop1/vop3,
4096// but sdwa is handled differently. See isSDWAOperand.
4097bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4098 const OperandVector &Operands) {
4099
4100 const unsigned Opc = Inst.getOpcode();
4101 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4102
4103 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opcode: Opc))
4104 return true;
4105
4106 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4107 assert(Src0Idx != -1);
4108
4109 SMLoc ErrLoc;
4110 const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4111 if (Src0.isReg()) {
4112 auto Reg = mc2PseudoReg(Reg: Src0.getReg());
4113 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4114 if (!isSGPR(Reg, TRI))
4115 return true;
4116 ErrLoc = getRegLoc(Reg, Operands);
4117 } else {
4118 ErrLoc = getConstLoc(Operands);
4119 }
4120
4121 Error(L: ErrLoc, Msg: "source operand must be a VGPR");
4122 return false;
4123}
4124
4125bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4126 const OperandVector &Operands) {
4127
4128 const unsigned Opc = Inst.getOpcode();
4129
4130 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4131 return true;
4132
4133 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4134 assert(Src0Idx != -1);
4135
4136 const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4137 if (!Src0.isReg())
4138 return true;
4139
4140 auto Reg = mc2PseudoReg(Reg: Src0.getReg());
4141 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4142 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4143 Error(L: getRegLoc(Reg, Operands),
4144 Msg: "source operand must be either a VGPR or an inline constant");
4145 return false;
4146 }
4147
4148 return true;
4149}
4150
4151bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4152 const OperandVector &Operands) {
4153 unsigned Opcode = Inst.getOpcode();
4154 const MCInstrDesc &Desc = MII.get(Opcode);
4155
4156 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4157 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4158 return true;
4159
4160 const int Src2Idx = getNamedOperandIdx(Opcode, Name: OpName::src2);
4161 if (Src2Idx == -1)
4162 return true;
4163
4164 if (Inst.getOperand(i: Src2Idx).isImm() && isInlineConstant(Inst, OpIdx: Src2Idx)) {
4165 Error(L: getConstLoc(Operands),
4166 Msg: "inline constants are not allowed for this operand");
4167 return false;
4168 }
4169
4170 return true;
4171}
4172
4173bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4174 const OperandVector &Operands) {
4175 const unsigned Opc = Inst.getOpcode();
4176 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4177
4178 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4179 return true;
4180
4181 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
4182 if (BlgpIdx != -1) {
4183 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opcode: Opc)) {
4184 int CbszIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::cbsz);
4185
4186 unsigned CBSZ = Inst.getOperand(i: CbszIdx).getImm();
4187 unsigned BLGP = Inst.getOperand(i: BlgpIdx).getImm();
4188
4189 // Validate the correct register size was used for the floating point
4190 // format operands
4191
4192 bool Success = true;
4193 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ)) {
4194 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4195 Error(L: getRegLoc(Reg: mc2PseudoReg(Reg: Inst.getOperand(i: Src0Idx).getReg()),
4196 Operands),
4197 Msg: "wrong register tuple size for cbsz value " + Twine(CBSZ));
4198 Success = false;
4199 }
4200
4201 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP)) {
4202 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
4203 Error(L: getRegLoc(Reg: mc2PseudoReg(Reg: Inst.getOperand(i: Src1Idx).getReg()),
4204 Operands),
4205 Msg: "wrong register tuple size for blgp value " + Twine(BLGP));
4206 Success = false;
4207 }
4208
4209 return Success;
4210 }
4211 }
4212
4213 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2);
4214 if (Src2Idx == -1)
4215 return true;
4216
4217 const MCOperand &Src2 = Inst.getOperand(i: Src2Idx);
4218 if (!Src2.isReg())
4219 return true;
4220
4221 MCRegister Src2Reg = Src2.getReg();
4222 MCRegister DstReg = Inst.getOperand(i: 0).getReg();
4223 if (Src2Reg == DstReg)
4224 return true;
4225
4226 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4227 if (TRI->getRegClass(i: Desc.operands()[0].RegClass).getSizeInBits() <= 128)
4228 return true;
4229
4230 if (TRI->regsOverlap(RegA: Src2Reg, RegB: DstReg)) {
4231 Error(L: getRegLoc(Reg: mc2PseudoReg(Reg: Src2Reg), Operands),
4232 Msg: "source 2 operand must not partially overlap with dst");
4233 return false;
4234 }
4235
4236 return true;
4237}
4238
4239bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4240 switch (Inst.getOpcode()) {
4241 default:
4242 return true;
4243 case V_DIV_SCALE_F32_gfx6_gfx7:
4244 case V_DIV_SCALE_F32_vi:
4245 case V_DIV_SCALE_F32_gfx10:
4246 case V_DIV_SCALE_F64_gfx6_gfx7:
4247 case V_DIV_SCALE_F64_vi:
4248 case V_DIV_SCALE_F64_gfx10:
4249 break;
4250 }
4251
4252 // TODO: Check that src0 = src1 or src2.
4253
4254 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4255 AMDGPU::OpName::src2_modifiers,
4256 AMDGPU::OpName::src2_modifiers}) {
4257 if (Inst.getOperand(i: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name))
4258 .getImm() &
4259 SISrcMods::ABS) {
4260 return false;
4261 }
4262 }
4263
4264 return true;
4265}
4266
4267bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4268
4269 const unsigned Opc = Inst.getOpcode();
4270 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4271
4272 if ((Desc.TSFlags & MIMGFlags) == 0)
4273 return true;
4274
4275 int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::d16);
4276 if (D16Idx >= 0 && Inst.getOperand(i: D16Idx).getImm()) {
4277 if (isCI() || isSI())
4278 return false;
4279 }
4280
4281 return true;
4282}
4283
4284bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4285 const unsigned Opc = Inst.getOpcode();
4286 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4287
4288 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4289 return true;
4290
4291 int R128Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::r128);
4292
4293 return R128Idx < 0 || !Inst.getOperand(i: R128Idx).getImm();
4294}
4295
4296static bool IsRevOpcode(const unsigned Opcode)
4297{
4298 switch (Opcode) {
4299 case AMDGPU::V_SUBREV_F32_e32:
4300 case AMDGPU::V_SUBREV_F32_e64:
4301 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4302 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4303 case AMDGPU::V_SUBREV_F32_e32_vi:
4304 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4305 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4306 case AMDGPU::V_SUBREV_F32_e64_vi:
4307
4308 case AMDGPU::V_SUBREV_CO_U32_e32:
4309 case AMDGPU::V_SUBREV_CO_U32_e64:
4310 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4311 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4312
4313 case AMDGPU::V_SUBBREV_U32_e32:
4314 case AMDGPU::V_SUBBREV_U32_e64:
4315 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4316 case AMDGPU::V_SUBBREV_U32_e32_vi:
4317 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4318 case AMDGPU::V_SUBBREV_U32_e64_vi:
4319
4320 case AMDGPU::V_SUBREV_U32_e32:
4321 case AMDGPU::V_SUBREV_U32_e64:
4322 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4323 case AMDGPU::V_SUBREV_U32_e32_vi:
4324 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4325 case AMDGPU::V_SUBREV_U32_e64_vi:
4326
4327 case AMDGPU::V_SUBREV_F16_e32:
4328 case AMDGPU::V_SUBREV_F16_e64:
4329 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4330 case AMDGPU::V_SUBREV_F16_e32_vi:
4331 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4332 case AMDGPU::V_SUBREV_F16_e64_vi:
4333
4334 case AMDGPU::V_SUBREV_U16_e32:
4335 case AMDGPU::V_SUBREV_U16_e64:
4336 case AMDGPU::V_SUBREV_U16_e32_vi:
4337 case AMDGPU::V_SUBREV_U16_e64_vi:
4338
4339 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4340 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4341 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4342
4343 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4344 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4345
4346 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4347 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4348
4349 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4350 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4351
4352 case AMDGPU::V_LSHRREV_B32_e32:
4353 case AMDGPU::V_LSHRREV_B32_e64:
4354 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4355 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4356 case AMDGPU::V_LSHRREV_B32_e32_vi:
4357 case AMDGPU::V_LSHRREV_B32_e64_vi:
4358 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4359 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4360
4361 case AMDGPU::V_ASHRREV_I32_e32:
4362 case AMDGPU::V_ASHRREV_I32_e64:
4363 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4364 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4365 case AMDGPU::V_ASHRREV_I32_e32_vi:
4366 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4367 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4368 case AMDGPU::V_ASHRREV_I32_e64_vi:
4369
4370 case AMDGPU::V_LSHLREV_B32_e32:
4371 case AMDGPU::V_LSHLREV_B32_e64:
4372 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4373 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4374 case AMDGPU::V_LSHLREV_B32_e32_vi:
4375 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4376 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4377 case AMDGPU::V_LSHLREV_B32_e64_vi:
4378
4379 case AMDGPU::V_LSHLREV_B16_e32:
4380 case AMDGPU::V_LSHLREV_B16_e64:
4381 case AMDGPU::V_LSHLREV_B16_e32_vi:
4382 case AMDGPU::V_LSHLREV_B16_e64_vi:
4383 case AMDGPU::V_LSHLREV_B16_gfx10:
4384
4385 case AMDGPU::V_LSHRREV_B16_e32:
4386 case AMDGPU::V_LSHRREV_B16_e64:
4387 case AMDGPU::V_LSHRREV_B16_e32_vi:
4388 case AMDGPU::V_LSHRREV_B16_e64_vi:
4389 case AMDGPU::V_LSHRREV_B16_gfx10:
4390
4391 case AMDGPU::V_ASHRREV_I16_e32:
4392 case AMDGPU::V_ASHRREV_I16_e64:
4393 case AMDGPU::V_ASHRREV_I16_e32_vi:
4394 case AMDGPU::V_ASHRREV_I16_e64_vi:
4395 case AMDGPU::V_ASHRREV_I16_gfx10:
4396
4397 case AMDGPU::V_LSHLREV_B64_e64:
4398 case AMDGPU::V_LSHLREV_B64_gfx10:
4399 case AMDGPU::V_LSHLREV_B64_vi:
4400
4401 case AMDGPU::V_LSHRREV_B64_e64:
4402 case AMDGPU::V_LSHRREV_B64_gfx10:
4403 case AMDGPU::V_LSHRREV_B64_vi:
4404
4405 case AMDGPU::V_ASHRREV_I64_e64:
4406 case AMDGPU::V_ASHRREV_I64_gfx10:
4407 case AMDGPU::V_ASHRREV_I64_vi:
4408
4409 case AMDGPU::V_PK_LSHLREV_B16:
4410 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4411 case AMDGPU::V_PK_LSHLREV_B16_vi:
4412
4413 case AMDGPU::V_PK_LSHRREV_B16:
4414 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4415 case AMDGPU::V_PK_LSHRREV_B16_vi:
4416 case AMDGPU::V_PK_ASHRREV_I16:
4417 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4418 case AMDGPU::V_PK_ASHRREV_I16_vi:
4419 return true;
4420 default:
4421 return false;
4422 }
4423}
4424
4425std::optional<StringRef>
4426AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4427
4428 using namespace SIInstrFlags;
4429 const unsigned Opcode = Inst.getOpcode();
4430 const MCInstrDesc &Desc = MII.get(Opcode);
4431
4432 // lds_direct register is defined so that it can be used
4433 // with 9-bit operands only. Ignore encodings which do not accept these.
4434 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4435 if ((Desc.TSFlags & Enc) == 0)
4436 return std::nullopt;
4437
4438 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4439 auto SrcIdx = getNamedOperandIdx(Opcode, Name: SrcName);
4440 if (SrcIdx == -1)
4441 break;
4442 const auto &Src = Inst.getOperand(i: SrcIdx);
4443 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4444
4445 if (isGFX90A() || isGFX11Plus())
4446 return StringRef("lds_direct is not supported on this GPU");
4447
4448 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4449 return StringRef("lds_direct cannot be used with this instruction");
4450
4451 if (SrcName != OpName::src0)
4452 return StringRef("lds_direct may be used as src0 only");
4453 }
4454 }
4455
4456 return std::nullopt;
4457}
4458
4459SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4460 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4461 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4462 if (Op.isFlatOffset())
4463 return Op.getStartLoc();
4464 }
4465 return getLoc();
4466}
4467
4468bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4469 const OperandVector &Operands) {
4470 auto Opcode = Inst.getOpcode();
4471 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4472 if (OpNum == -1)
4473 return true;
4474
4475 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4476 if ((TSFlags & SIInstrFlags::FLAT))
4477 return validateFlatOffset(Inst, Operands);
4478
4479 if ((TSFlags & SIInstrFlags::SMRD))
4480 return validateSMEMOffset(Inst, Operands);
4481
4482 const auto &Op = Inst.getOperand(i: OpNum);
4483 if (isGFX12Plus() &&
4484 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4485 const unsigned OffsetSize = 24;
4486 if (!isIntN(N: OffsetSize, x: Op.getImm())) {
4487 Error(L: getFlatOffsetLoc(Operands),
4488 Msg: Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4489 return false;
4490 }
4491 } else {
4492 const unsigned OffsetSize = 16;
4493 if (!isUIntN(N: OffsetSize, x: Op.getImm())) {
4494 Error(L: getFlatOffsetLoc(Operands),
4495 Msg: Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4496 return false;
4497 }
4498 }
4499 return true;
4500}
4501
4502bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4503 const OperandVector &Operands) {
4504 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4505 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4506 return true;
4507
4508 auto Opcode = Inst.getOpcode();
4509 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4510 assert(OpNum != -1);
4511
4512 const auto &Op = Inst.getOperand(i: OpNum);
4513 if (!hasFlatOffsets() && Op.getImm() != 0) {
4514 Error(L: getFlatOffsetLoc(Operands),
4515 Msg: "flat offset modifier is not supported on this GPU");
4516 return false;
4517 }
4518
4519 // For pre-GFX12 FLAT instructions the offset must be positive;
4520 // MSB is ignored and forced to zero.
4521 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(ST: getSTI());
4522 bool AllowNegative =
4523 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4524 isGFX12Plus();
4525 if (!isIntN(N: OffsetSize, x: Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4526 Error(L: getFlatOffsetLoc(Operands),
4527 Msg: Twine("expected a ") +
4528 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4529 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4530 return false;
4531 }
4532
4533 return true;
4534}
4535
4536SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4537 // Start with second operand because SMEM Offset cannot be dst or src0.
4538 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4539 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4540 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4541 return Op.getStartLoc();
4542 }
4543 return getLoc();
4544}
4545
4546bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4547 const OperandVector &Operands) {
4548 if (isCI() || isSI())
4549 return true;
4550
4551 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4552 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4553 return true;
4554
4555 auto Opcode = Inst.getOpcode();
4556 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4557 if (OpNum == -1)
4558 return true;
4559
4560 const auto &Op = Inst.getOperand(i: OpNum);
4561 if (!Op.isImm())
4562 return true;
4563
4564 uint64_t Offset = Op.getImm();
4565 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opc: Opcode);
4566 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(ST: getSTI(), EncodedOffset: Offset) ||
4567 AMDGPU::isLegalSMRDEncodedSignedOffset(ST: getSTI(), EncodedOffset: Offset, IsBuffer))
4568 return true;
4569
4570 Error(L: getSMEMOffsetLoc(Operands),
4571 Msg: isGFX12Plus() ? "expected a 24-bit signed offset"
4572 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4573 : "expected a 21-bit signed offset");
4574
4575 return false;
4576}
4577
4578bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4579 unsigned Opcode = Inst.getOpcode();
4580 const MCInstrDesc &Desc = MII.get(Opcode);
4581 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4582 return true;
4583
4584 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::src0);
4585 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::src1);
4586
4587 const int OpIndices[] = { Src0Idx, Src1Idx };
4588
4589 unsigned NumExprs = 0;
4590 unsigned NumLiterals = 0;
4591 uint32_t LiteralValue;
4592
4593 for (int OpIdx : OpIndices) {
4594 if (OpIdx == -1) break;
4595
4596 const MCOperand &MO = Inst.getOperand(i: OpIdx);
4597 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4598 if (AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx)) {
4599 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4600 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4601 if (NumLiterals == 0 || LiteralValue != Value) {
4602 LiteralValue = Value;
4603 ++NumLiterals;
4604 }
4605 } else if (MO.isExpr()) {
4606 ++NumExprs;
4607 }
4608 }
4609 }
4610
4611 return NumLiterals + NumExprs <= 1;
4612}
4613
4614bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4615 const unsigned Opc = Inst.getOpcode();
4616 if (isPermlane16(Opc)) {
4617 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4618 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4619
4620 if (OpSel & ~3)
4621 return false;
4622 }
4623
4624 uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags;
4625
4626 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4627 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4628 if (OpSelIdx != -1) {
4629 if (Inst.getOperand(i: OpSelIdx).getImm() != 0)
4630 return false;
4631 }
4632 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
4633 if (OpSelHiIdx != -1) {
4634 if (Inst.getOperand(i: OpSelHiIdx).getImm() != -1)
4635 return false;
4636 }
4637 }
4638
4639 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4640 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4641 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4642 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4643 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4644 if (OpSel & 3)
4645 return false;
4646 }
4647
4648 return true;
4649}
4650
4651bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4652 if (!hasTrue16Insts())
4653 return true;
4654 const MCRegisterInfo *MRI = getMRI();
4655 const unsigned Opc = Inst.getOpcode();
4656 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4657 if (OpSelIdx == -1)
4658 return true;
4659 unsigned OpSelOpValue = Inst.getOperand(i: OpSelIdx).getImm();
4660 // If the value is 0 we could have a default OpSel Operand, so conservatively
4661 // allow it.
4662 if (OpSelOpValue == 0)
4663 return true;
4664 unsigned OpCount = 0;
4665 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4666 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4667 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: OpName);
4668 if (OpIdx == -1)
4669 continue;
4670 const MCOperand &Op = Inst.getOperand(i: OpIdx);
4671 if (Op.isReg() &&
4672 MRI->getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: Op.getReg())) {
4673 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Reg: Op.getReg(), MRI: *MRI);
4674 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4675 if (OpSelOpIsHi != VGPRSuffixIsHi)
4676 return false;
4677 }
4678 ++OpCount;
4679 }
4680
4681 return true;
4682}
4683
4684bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4685 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4686
4687 const unsigned Opc = Inst.getOpcode();
4688 uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags;
4689
4690 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4691 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4692 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4693 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4694 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4695 !(TSFlags & SIInstrFlags::IsSWMMAC))
4696 return true;
4697
4698 int NegIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName);
4699 if (NegIdx == -1)
4700 return true;
4701
4702 unsigned Neg = Inst.getOperand(i: NegIdx).getImm();
4703
4704 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4705 // on some src operands but not allowed on other.
4706 // It is convenient that such instructions don't have src_modifiers operand
4707 // for src operands that don't allow neg because they also don't allow opsel.
4708
4709 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4710 AMDGPU::OpName::src1_modifiers,
4711 AMDGPU::OpName::src2_modifiers};
4712
4713 for (unsigned i = 0; i < 3; ++i) {
4714 if (!AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: SrcMods[i])) {
4715 if (Neg & (1 << i))
4716 return false;
4717 }
4718 }
4719
4720 return true;
4721}
4722
4723bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4724 const OperandVector &Operands) {
4725 const unsigned Opc = Inst.getOpcode();
4726 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dpp_ctrl);
4727 if (DppCtrlIdx >= 0) {
4728 unsigned DppCtrl = Inst.getOperand(i: DppCtrlIdx).getImm();
4729
4730 if (!AMDGPU::isLegalDPALU_DPPControl(DC: DppCtrl) &&
4731 AMDGPU::isDPALU_DPP(OpDesc: MII.get(Opcode: Opc))) {
4732 // DP ALU DPP is supported for row_newbcast only on GFX9*
4733 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyDppCtrl, Operands);
4734 Error(L: S, Msg: "DP ALU dpp only supports row_newbcast");
4735 return false;
4736 }
4737 }
4738
4739 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dpp8);
4740 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4741
4742 if (IsDPP && !hasDPPSrc1SGPR(STI: getSTI())) {
4743 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
4744 if (Src1Idx >= 0) {
4745 const MCOperand &Src1 = Inst.getOperand(i: Src1Idx);
4746 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4747 if (Src1.isReg() && isSGPR(Reg: mc2PseudoReg(Reg: Src1.getReg()), TRI)) {
4748 auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src1Idx).getReg());
4749 SMLoc S = getRegLoc(Reg, Operands);
4750 Error(L: S, Msg: "invalid operand for instruction");
4751 return false;
4752 }
4753 if (Src1.isImm()) {
4754 Error(L: getInstLoc(Operands),
4755 Msg: "src1 immediate operand invalid for instruction");
4756 return false;
4757 }
4758 }
4759 }
4760
4761 return true;
4762}
4763
4764// Check if VCC register matches wavefront size
4765bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
4766 auto FB = getFeatureBits();
4767 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4768 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4769}
4770
4771// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4772bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4773 const OperandVector &Operands) {
4774 unsigned Opcode = Inst.getOpcode();
4775 const MCInstrDesc &Desc = MII.get(Opcode);
4776 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, Name: OpName::imm) != -1;
4777 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4778 !HasMandatoryLiteral && !isVOPD(Opc: Opcode))
4779 return true;
4780
4781 OperandIndices OpIndices = getSrcOperandIndices(Opcode, AddMandatoryLiterals: HasMandatoryLiteral);
4782
4783 unsigned NumExprs = 0;
4784 unsigned NumLiterals = 0;
4785 uint32_t LiteralValue;
4786
4787 for (int OpIdx : OpIndices) {
4788 if (OpIdx == -1)
4789 continue;
4790
4791 const MCOperand &MO = Inst.getOperand(i: OpIdx);
4792 if (!MO.isImm() && !MO.isExpr())
4793 continue;
4794 if (!isSISrcOperand(Desc, OpNo: OpIdx))
4795 continue;
4796
4797 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4798 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4799 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpNo: OpIdx) &&
4800 AMDGPU::getOperandSize(OpInfo: Desc.operands()[OpIdx]) == 8;
4801 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Val: Value, IsFP64);
4802
4803 if (!IsValid32Op && !isInt<32>(x: Value) && !isUInt<32>(x: Value)) {
4804 Error(L: getLitLoc(Operands), Msg: "invalid operand for instruction");
4805 return false;
4806 }
4807
4808 if (IsFP64 && IsValid32Op)
4809 Value = Hi_32(Value);
4810
4811 if (NumLiterals == 0 || LiteralValue != Value) {
4812 LiteralValue = Value;
4813 ++NumLiterals;
4814 }
4815 } else if (MO.isExpr()) {
4816 ++NumExprs;
4817 }
4818 }
4819 NumLiterals += NumExprs;
4820
4821 if (!NumLiterals)
4822 return true;
4823
4824 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4825 Error(L: getLitLoc(Operands), Msg: "literal operands are not supported");
4826 return false;
4827 }
4828
4829 if (NumLiterals > 1) {
4830 Error(L: getLitLoc(Operands, SearchMandatoryLiterals: true), Msg: "only one unique literal operand is allowed");
4831 return false;
4832 }
4833
4834 return true;
4835}
4836
4837// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4838static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
4839 const MCRegisterInfo *MRI) {
4840 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name);
4841 if (OpIdx < 0)
4842 return -1;
4843
4844 const MCOperand &Op = Inst.getOperand(i: OpIdx);
4845 if (!Op.isReg())
4846 return -1;
4847
4848 MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
4849 auto Reg = Sub ? Sub : Op.getReg();
4850 const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID);
4851 return AGPR32.contains(Reg) ? 1 : 0;
4852}
4853
4854bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4855 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4856 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4857 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4858 SIInstrFlags::DS)) == 0)
4859 return true;
4860
4861 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
4862 ? AMDGPU::OpName::data0
4863 : AMDGPU::OpName::vdata;
4864
4865 const MCRegisterInfo *MRI = getMRI();
4866 int DstAreg = IsAGPROperand(Inst, Name: AMDGPU::OpName::vdst, MRI);
4867 int DataAreg = IsAGPROperand(Inst, Name: DataName, MRI);
4868
4869 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4870 int Data2Areg = IsAGPROperand(Inst, Name: AMDGPU::OpName::data1, MRI);
4871 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4872 return false;
4873 }
4874
4875 auto FB = getFeatureBits();
4876 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4877 if (DataAreg < 0 || DstAreg < 0)
4878 return true;
4879 return DstAreg == DataAreg;
4880 }
4881
4882 return DstAreg < 1 && DataAreg < 1;
4883}
4884
4885bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4886 auto FB = getFeatureBits();
4887 unsigned Opc = Inst.getOpcode();
4888 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
4889 // unaligned VGPR. All others only allow even aligned VGPRs.
4890 if (!(FB[AMDGPU::FeatureGFX90AInsts]) || Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
4891 return true;
4892
4893 const MCRegisterInfo *MRI = getMRI();
4894 const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID);
4895 const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID);
4896 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4897 const MCOperand &Op = Inst.getOperand(i: I);
4898 if (!Op.isReg())
4899 continue;
4900
4901 MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
4902 if (!Sub)
4903 continue;
4904
4905 if (VGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4906 return false;
4907 if (AGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4908 return false;
4909 }
4910
4911 return true;
4912}
4913
4914SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4915 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4916 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4917 if (Op.isBLGP())
4918 return Op.getStartLoc();
4919 }
4920 return SMLoc();
4921}
4922
4923bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4924 const OperandVector &Operands) {
4925 unsigned Opc = Inst.getOpcode();
4926 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
4927 if (BlgpIdx == -1)
4928 return true;
4929 SMLoc BLGPLoc = getBLGPLoc(Operands);
4930 if (!BLGPLoc.isValid())
4931 return true;
4932 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with(Prefix: "neg:");
4933 auto FB = getFeatureBits();
4934 bool UsesNeg = false;
4935 if (FB[AMDGPU::FeatureGFX940Insts]) {
4936 switch (Opc) {
4937 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4938 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4939 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4940 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4941 UsesNeg = true;
4942 }
4943 }
4944
4945 if (IsNeg == UsesNeg)
4946 return true;
4947
4948 Error(L: BLGPLoc,
4949 Msg: UsesNeg ? "invalid modifier: blgp is not supported"
4950 : "invalid modifier: neg is not supported");
4951
4952 return false;
4953}
4954
4955bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4956 const OperandVector &Operands) {
4957 if (!isGFX11Plus())
4958 return true;
4959
4960 unsigned Opc = Inst.getOpcode();
4961 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4962 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4963 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4964 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4965 return true;
4966
4967 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::sdst);
4968 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4969 auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src0Idx).getReg());
4970 if (Reg == AMDGPU::SGPR_NULL)
4971 return true;
4972
4973 SMLoc RegLoc = getRegLoc(Reg, Operands);
4974 Error(L: RegLoc, Msg: "src0 must be null");
4975 return false;
4976}
4977
4978bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4979 const OperandVector &Operands) {
4980 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4981 if ((TSFlags & SIInstrFlags::DS) == 0)
4982 return true;
4983 if (TSFlags & SIInstrFlags::GWS)
4984 return validateGWS(Inst, Operands);
4985 // Only validate GDS for non-GWS instructions.
4986 if (hasGDS())
4987 return true;
4988 int GDSIdx =
4989 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::gds);
4990 if (GDSIdx < 0)
4991 return true;
4992 unsigned GDS = Inst.getOperand(i: GDSIdx).getImm();
4993 if (GDS) {
4994 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyGDS, Operands);
4995 Error(L: S, Msg: "gds modifier is not supported on this GPU");
4996 return false;
4997 }
4998 return true;
4999}
5000
5001// gfx90a has an undocumented limitation:
5002// DS_GWS opcodes must use even aligned registers.
5003bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5004 const OperandVector &Operands) {
5005 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5006 return true;
5007
5008 int Opc = Inst.getOpcode();
5009 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5010 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5011 return true;
5012
5013 const MCRegisterInfo *MRI = getMRI();
5014 const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID);
5015 int Data0Pos =
5016 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::data0);
5017 assert(Data0Pos != -1);
5018 auto Reg = Inst.getOperand(i: Data0Pos).getReg();
5019 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5020 if (RegIdx & 1) {
5021 SMLoc RegLoc = getRegLoc(Reg, Operands);
5022 Error(L: RegLoc, Msg: "vgpr must be even aligned");
5023 return false;
5024 }
5025
5026 return true;
5027}
5028
5029bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5030 const OperandVector &Operands,
5031 const SMLoc &IDLoc) {
5032 int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(),
5033 Name: AMDGPU::OpName::cpol);
5034 if (CPolPos == -1)
5035 return true;
5036
5037 unsigned CPol = Inst.getOperand(i: CPolPos).getImm();
5038
5039 if (isGFX12Plus())
5040 return validateTHAndScopeBits(Inst, Operands, CPol);
5041
5042 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
5043 if (TSFlags & SIInstrFlags::SMRD) {
5044 if (CPol && (isSI() || isCI())) {
5045 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5046 Error(L: S, Msg: "cache policy is not supported for SMRD instructions");
5047 return false;
5048 }
5049 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5050 Error(L: IDLoc, Msg: "invalid cache policy for SMEM instruction");
5051 return false;
5052 }
5053 }
5054
5055 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5056 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5057 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
5058 SIInstrFlags::FLAT;
5059 if (!(TSFlags & AllowSCCModifier)) {
5060 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5061 StringRef CStr(S.getPointer());
5062 S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scc")]);
5063 Error(L: S,
5064 Msg: "scc modifier is not supported for this instruction on this GPU");
5065 return false;
5066 }
5067 }
5068
5069 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
5070 return true;
5071
5072 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5073 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5074 Error(L: IDLoc, Msg: isGFX940() ? "instruction must use sc0"
5075 : "instruction must use glc");
5076 return false;
5077 }
5078 } else {
5079 if (CPol & CPol::GLC) {
5080 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5081 StringRef CStr(S.getPointer());
5082 S = SMLoc::getFromPointer(
5083 Ptr: &CStr.data()[CStr.find(Str: isGFX940() ? "sc0" : "glc")]);
5084 Error(L: S, Msg: isGFX940() ? "instruction must not use sc0"
5085 : "instruction must not use glc");
5086 return false;
5087 }
5088 }
5089
5090 return true;
5091}
5092
5093bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5094 const OperandVector &Operands,
5095 const unsigned CPol) {
5096 const unsigned TH = CPol & AMDGPU::CPol::TH;
5097 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5098
5099 const unsigned Opcode = Inst.getOpcode();
5100 const MCInstrDesc &TID = MII.get(Opcode);
5101
5102 auto PrintError = [&](StringRef Msg) {
5103 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5104 Error(L: S, Msg);
5105 return false;
5106 };
5107
5108 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5109 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
5110 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
5111 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5112
5113 if (TH == 0)
5114 return true;
5115
5116 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5117 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5118 (TH == AMDGPU::CPol::TH_NT_HT)))
5119 return PrintError("invalid th value for SMEM instruction");
5120
5121 if (TH == AMDGPU::CPol::TH_BYPASS) {
5122 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5123 CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
5124 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5125 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
5126 return PrintError("scope and th combination is not valid");
5127 }
5128
5129 unsigned THType = AMDGPU::getTemporalHintType(TID);
5130 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5131 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5132 return PrintError("invalid th value for atomic instructions");
5133 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5134 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5135 return PrintError("invalid th value for store instructions");
5136 } else {
5137 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5138 return PrintError("invalid th value for load instructions");
5139 }
5140
5141 return true;
5142}
5143
5144bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5145 const OperandVector &Operands) {
5146 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
5147 if (Desc.mayStore() &&
5148 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
5149 SMLoc Loc = getImmLoc(Type: AMDGPUOperand::ImmTyTFE, Operands);
5150 if (Loc != getInstLoc(Operands)) {
5151 Error(L: Loc, Msg: "TFE modifier has no meaning for store instructions");
5152 return false;
5153 }
5154 }
5155
5156 return true;
5157}
5158
5159bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5160 const SMLoc &IDLoc,
5161 const OperandVector &Operands) {
5162 if (auto ErrMsg = validateLdsDirect(Inst)) {
5163 Error(L: getRegLoc(Reg: LDS_DIRECT, Operands), Msg: *ErrMsg);
5164 return false;
5165 }
5166 if (!validateTrue16OpSel(Inst)) {
5167 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands),
5168 Msg: "op_sel operand conflicts with 16-bit operand suffix");
5169 return false;
5170 }
5171 if (!validateSOPLiteral(Inst)) {
5172 Error(L: getLitLoc(Operands),
5173 Msg: "only one unique literal operand is allowed");
5174 return false;
5175 }
5176 if (!validateVOPLiteral(Inst, Operands)) {
5177 return false;
5178 }
5179 if (!validateConstantBusLimitations(Inst, Operands)) {
5180 return false;
5181 }
5182 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5183 return false;
5184 }
5185 if (!validateIntClampSupported(Inst)) {
5186 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyClamp, Operands),
5187 Msg: "integer clamping is not supported on this GPU");
5188 return false;
5189 }
5190 if (!validateOpSel(Inst)) {
5191 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands),
5192 Msg: "invalid op_sel operand");
5193 return false;
5194 }
5195 if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_lo)) {
5196 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegLo, Operands),
5197 Msg: "invalid neg_lo operand");
5198 return false;
5199 }
5200 if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_hi)) {
5201 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegHi, Operands),
5202 Msg: "invalid neg_hi operand");
5203 return false;
5204 }
5205 if (!validateDPP(Inst, Operands)) {
5206 return false;
5207 }
5208 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5209 if (!validateMIMGD16(Inst)) {
5210 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands),
5211 Msg: "d16 modifier is not supported on this GPU");
5212 return false;
5213 }
5214 if (!validateMIMGDim(Inst, Operands)) {
5215 Error(L: IDLoc, Msg: "missing dim operand");
5216 return false;
5217 }
5218 if (!validateTensorR128(Inst)) {
5219 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands),
5220 Msg: "instruction must set modifier r128=0");
5221 return false;
5222 }
5223 if (!validateMIMGMSAA(Inst)) {
5224 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDim, Operands),
5225 Msg: "invalid dim; must be MSAA type");
5226 return false;
5227 }
5228 if (!validateMIMGDataSize(Inst, IDLoc)) {
5229 return false;
5230 }
5231 if (!validateMIMGAddrSize(Inst, IDLoc))
5232 return false;
5233 if (!validateMIMGAtomicDMask(Inst)) {
5234 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands),
5235 Msg: "invalid atomic image dmask");
5236 return false;
5237 }
5238 if (!validateMIMGGatherDMask(Inst)) {
5239 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands),
5240 Msg: "invalid image_gather dmask: only one bit must be set");
5241 return false;
5242 }
5243 if (!validateMovrels(Inst, Operands)) {
5244 return false;
5245 }
5246 if (!validateOffset(Inst, Operands)) {
5247 return false;
5248 }
5249 if (!validateMAIAccWrite(Inst, Operands)) {
5250 return false;
5251 }
5252 if (!validateMAISrc2(Inst, Operands)) {
5253 return false;
5254 }
5255 if (!validateMFMA(Inst, Operands)) {
5256 return false;
5257 }
5258 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5259 return false;
5260 }
5261
5262 if (!validateAGPRLdSt(Inst)) {
5263 Error(L: IDLoc, Msg: getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5264 ? "invalid register class: data and dst should be all VGPR or AGPR"
5265 : "invalid register class: agpr loads and stores not supported on this GPU"
5266 );
5267 return false;
5268 }
5269 if (!validateVGPRAlign(Inst)) {
5270 Error(L: IDLoc,
5271 Msg: "invalid register class: vgpr tuples must be 64 bit aligned");
5272 return false;
5273 }
5274 if (!validateDS(Inst, Operands)) {
5275 return false;
5276 }
5277
5278 if (!validateBLGP(Inst, Operands)) {
5279 return false;
5280 }
5281
5282 if (!validateDivScale(Inst)) {
5283 Error(L: IDLoc, Msg: "ABS not allowed in VOP3B instructions");
5284 return false;
5285 }
5286 if (!validateWaitCnt(Inst, Operands)) {
5287 return false;
5288 }
5289 if (!validateTFE(Inst, Operands)) {
5290 return false;
5291 }
5292
5293 return true;
5294}
5295
5296static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5297 const FeatureBitset &FBS,
5298 unsigned VariantID = 0);
5299
5300static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5301 const FeatureBitset &AvailableFeatures,
5302 unsigned VariantID);
5303
5304bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5305 const FeatureBitset &FBS) {
5306 return isSupportedMnemo(Mnemo, FBS, Variants: getAllVariants());
5307}
5308
5309bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5310 const FeatureBitset &FBS,
5311 ArrayRef<unsigned> Variants) {
5312 for (auto Variant : Variants) {
5313 if (AMDGPUCheckMnemonic(Mnemonic: Mnemo, AvailableFeatures: FBS, VariantID: Variant))
5314 return true;
5315 }
5316
5317 return false;
5318}
5319
5320bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5321 const SMLoc &IDLoc) {
5322 FeatureBitset FBS = ComputeAvailableFeatures(FB: getFeatureBits());
5323
5324 // Check if requested instruction variant is supported.
5325 if (isSupportedMnemo(Mnemo, FBS, Variants: getMatchedVariants()))
5326 return false;
5327
5328 // This instruction is not supported.
5329 // Clear any other pending errors because they are no longer relevant.
5330 getParser().clearPendingErrors();
5331
5332 // Requested instruction variant is not supported.
5333 // Check if any other variants are supported.
5334 StringRef VariantName = getMatchedVariantName();
5335 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5336 return Error(L: IDLoc,
5337 Msg: Twine(VariantName,
5338 " variant of this instruction is not supported"));
5339 }
5340
5341 // Check if this instruction may be used with a different wavesize.
5342 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5343 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5344
5345 FeatureBitset FeaturesWS32 = getFeatureBits();
5346 FeaturesWS32.flip(I: AMDGPU::FeatureWavefrontSize64)
5347 .flip(I: AMDGPU::FeatureWavefrontSize32);
5348 FeatureBitset AvailableFeaturesWS32 =
5349 ComputeAvailableFeatures(FB: FeaturesWS32);
5350
5351 if (isSupportedMnemo(Mnemo, FBS: AvailableFeaturesWS32, Variants: getMatchedVariants()))
5352 return Error(L: IDLoc, Msg: "instruction requires wavesize=32");
5353 }
5354
5355 // Finally check if this instruction is supported on any other GPU.
5356 if (isSupportedMnemo(Mnemo, FBS: FeatureBitset().set())) {
5357 return Error(L: IDLoc, Msg: "instruction not supported on this GPU");
5358 }
5359
5360 // Instruction not supported on any GPU. Probably a typo.
5361 std::string Suggestion = AMDGPUMnemonicSpellCheck(S: Mnemo, FBS);
5362 return Error(L: IDLoc, Msg: "invalid instruction" + Suggestion);
5363}
5364
5365static bool isInvalidVOPDY(const OperandVector &Operands,
5366 uint64_t InvalidOprIdx) {
5367 assert(InvalidOprIdx < Operands.size());
5368 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5369 if (Op.isToken() && InvalidOprIdx > 1) {
5370 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5371 return PrevOp.isToken() && PrevOp.getToken() == "::";
5372 }
5373 return false;
5374}
5375
5376bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5377 OperandVector &Operands,
5378 MCStreamer &Out,
5379 uint64_t &ErrorInfo,
5380 bool MatchingInlineAsm) {
5381 MCInst Inst;
5382 unsigned Result = Match_Success;
5383 for (auto Variant : getMatchedVariants()) {
5384 uint64_t EI;
5385 auto R = MatchInstructionImpl(Operands, Inst, ErrorInfo&: EI, matchingInlineAsm: MatchingInlineAsm,
5386 VariantID: Variant);
5387 // We order match statuses from least to most specific. We use most specific
5388 // status as resulting
5389 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5390 if (R == Match_Success || R == Match_MissingFeature ||
5391 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5392 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5393 Result != Match_MissingFeature)) {
5394 Result = R;
5395 ErrorInfo = EI;
5396 }
5397 if (R == Match_Success)
5398 break;
5399 }
5400
5401 if (Result == Match_Success) {
5402 if (!validateInstruction(Inst, IDLoc, Operands)) {
5403 return true;
5404 }
5405 Inst.setLoc(IDLoc);
5406 Out.emitInstruction(Inst, STI: getSTI());
5407 return false;
5408 }
5409
5410 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5411 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5412 return true;
5413 }
5414
5415 switch (Result) {
5416 default: break;
5417 case Match_MissingFeature:
5418 // It has been verified that the specified instruction
5419 // mnemonic is valid. A match was found but it requires
5420 // features which are not supported on this GPU.
5421 return Error(L: IDLoc, Msg: "operands are not valid for this GPU or mode");
5422
5423 case Match_InvalidOperand: {
5424 SMLoc ErrorLoc = IDLoc;
5425 if (ErrorInfo != ~0ULL) {
5426 if (ErrorInfo >= Operands.size()) {
5427 return Error(L: IDLoc, Msg: "too few operands for instruction");
5428 }
5429 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5430 if (ErrorLoc == SMLoc())
5431 ErrorLoc = IDLoc;
5432
5433 if (isInvalidVOPDY(Operands, InvalidOprIdx: ErrorInfo))
5434 return Error(L: ErrorLoc, Msg: "invalid VOPDY instruction");
5435 }
5436 return Error(L: ErrorLoc, Msg: "invalid operand for instruction");
5437 }
5438
5439 case Match_MnemonicFail:
5440 llvm_unreachable("Invalid instructions should have been handled already");
5441 }
5442 llvm_unreachable("Implement any new match types added!");
5443}
5444
5445bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5446 int64_t Tmp = -1;
5447 if (!isToken(Kind: AsmToken::Integer) && !isToken(Kind: AsmToken::Identifier)) {
5448 return true;
5449 }
5450 if (getParser().parseAbsoluteExpression(Res&: Tmp)) {
5451 return true;
5452 }
5453 Ret = static_cast<uint32_t>(Tmp);
5454 return false;
5455}
5456
5457bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5458 if (!getSTI().getTargetTriple().isAMDGCN())
5459 return TokError(Msg: "directive only supported for amdgcn architecture");
5460
5461 std::string TargetIDDirective;
5462 SMLoc TargetStart = getTok().getLoc();
5463 if (getParser().parseEscapedString(Data&: TargetIDDirective))
5464 return true;
5465
5466 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5467 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5468 return getParser().Error(L: TargetRange.Start,
5469 Msg: (Twine(".amdgcn_target directive's target id ") +
5470 Twine(TargetIDDirective) +
5471 Twine(" does not match the specified target id ") +
5472 Twine(getTargetStreamer().getTargetID()->toString())).str());
5473
5474 return false;
5475}
5476
5477bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5478 return Error(L: Range.Start, Msg: "value out of range", Range);
5479}
5480
5481bool AMDGPUAsmParser::calculateGPRBlocks(
5482 const FeatureBitset &Features, const MCExpr *VCCUsed,
5483 const MCExpr *FlatScrUsed, bool XNACKUsed,
5484 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5485 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5486 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5487 // TODO(scott.linder): These calculations are duplicated from
5488 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5489 IsaVersion Version = getIsaVersion(GPU: getSTI().getCPU());
5490 MCContext &Ctx = getContext();
5491
5492 const MCExpr *NumSGPRs = NextFreeSGPR;
5493 int64_t EvaluatedSGPRs;
5494
5495 if (Version.Major >= 10)
5496 NumSGPRs = MCConstantExpr::create(Value: 0, Ctx);
5497 else {
5498 unsigned MaxAddressableNumSGPRs =
5499 IsaInfo::getAddressableNumSGPRs(STI: &getSTI());
5500
5501 if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) && Version.Major >= 8 &&
5502 !Features.test(I: FeatureSGPRInitBug) &&
5503 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5504 return OutOfRangeError(Range: SGPRRange);
5505
5506 const MCExpr *ExtraSGPRs =
5507 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5508 NumSGPRs = MCBinaryExpr::createAdd(LHS: NumSGPRs, RHS: ExtraSGPRs, Ctx);
5509
5510 if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) &&
5511 (Version.Major <= 7 || Features.test(I: FeatureSGPRInitBug)) &&
5512 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5513 return OutOfRangeError(Range: SGPRRange);
5514
5515 if (Features.test(I: FeatureSGPRInitBug))
5516 NumSGPRs =
5517 MCConstantExpr::create(Value: IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);
5518 }
5519
5520 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5521 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5522 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5523 unsigned Granule) -> const MCExpr * {
5524 const MCExpr *OneConst = MCConstantExpr::create(Value: 1ul, Ctx);
5525 const MCExpr *GranuleConst = MCConstantExpr::create(Value: Granule, Ctx);
5526 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax(Args: {NumGPR, OneConst}, Ctx);
5527 const MCExpr *AlignToGPR =
5528 AMDGPUMCExpr::createAlignTo(Value: MaxNumGPR, Align: GranuleConst, Ctx);
5529 const MCExpr *DivGPR =
5530 MCBinaryExpr::createDiv(LHS: AlignToGPR, RHS: GranuleConst, Ctx);
5531 const MCExpr *SubGPR = MCBinaryExpr::createSub(LHS: DivGPR, RHS: OneConst, Ctx);
5532 return SubGPR;
5533 };
5534
5535 VGPRBlocks = GetNumGPRBlocks(
5536 NextFreeVGPR,
5537 IsaInfo::getVGPREncodingGranule(STI: &getSTI(), EnableWavefrontSize32));
5538 SGPRBlocks =
5539 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(STI: &getSTI()));
5540
5541 return false;
5542}
5543
5544bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5545 if (!getSTI().getTargetTriple().isAMDGCN())
5546 return TokError(Msg: "directive only supported for amdgcn architecture");
5547
5548 if (!isHsaAbi(STI: getSTI()))
5549 return TokError(Msg: "directive only supported for amdhsa OS");
5550
5551 StringRef KernelName;
5552 if (getParser().parseIdentifier(Res&: KernelName))
5553 return true;
5554
5555 AMDGPU::MCKernelDescriptor KD =
5556 AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
5557 STI: &getSTI(), Ctx&: getContext());
5558
5559 StringSet<> Seen;
5560
5561 IsaVersion IVersion = getIsaVersion(GPU: getSTI().getCPU());
5562
5563 const MCExpr *ZeroExpr = MCConstantExpr::create(Value: 0, Ctx&: getContext());
5564 const MCExpr *OneExpr = MCConstantExpr::create(Value: 1, Ctx&: getContext());
5565
5566 SMRange VGPRRange;
5567 const MCExpr *NextFreeVGPR = ZeroExpr;
5568 const MCExpr *AccumOffset = MCConstantExpr::create(Value: 0, Ctx&: getContext());
5569 uint64_t SharedVGPRCount = 0;
5570 uint64_t PreloadLength = 0;
5571 uint64_t PreloadOffset = 0;
5572 SMRange SGPRRange;
5573 const MCExpr *NextFreeSGPR = ZeroExpr;
5574
5575 // Count the number of user SGPRs implied from the enabled feature bits.
5576 unsigned ImpliedUserSGPRCount = 0;
5577
5578 // Track if the asm explicitly contains the directive for the user SGPR
5579 // count.
5580 std::optional<unsigned> ExplicitUserSGPRCount;
5581 const MCExpr *ReserveVCC = OneExpr;
5582 const MCExpr *ReserveFlatScr = OneExpr;
5583 std::optional<bool> EnableWavefrontSize32;
5584
5585 while (true) {
5586 while (trySkipToken(Kind: AsmToken::EndOfStatement));
5587
5588 StringRef ID;
5589 SMRange IDRange = getTok().getLocRange();
5590 if (!parseId(Val&: ID, ErrMsg: "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5591 return true;
5592
5593 if (ID == ".end_amdhsa_kernel")
5594 break;
5595
5596 if (!Seen.insert(key: ID).second)
5597 return TokError(Msg: ".amdhsa_ directives cannot be repeated");
5598
5599 SMLoc ValStart = getLoc();
5600 const MCExpr *ExprVal;
5601 if (getParser().parseExpression(Res&: ExprVal))
5602 return true;
5603 SMLoc ValEnd = getLoc();
5604 SMRange ValRange = SMRange(ValStart, ValEnd);
5605
5606 int64_t IVal = 0;
5607 uint64_t Val = IVal;
5608 bool EvaluatableExpr;
5609 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(Res&: IVal))) {
5610 if (IVal < 0)
5611 return OutOfRangeError(Range: ValRange);
5612 Val = IVal;
5613 }
5614
5615#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5616 if (!isUInt<ENTRY##_WIDTH>(Val)) \
5617 return OutOfRangeError(RANGE); \
5618 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5619 getContext());
5620
5621// Some fields use the parsed value immediately which requires the expression to
5622// be solvable.
5623#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5624 if (!(RESOLVED)) \
5625 return Error(IDRange.Start, "directive should have resolvable expression", \
5626 IDRange);
5627
5628 if (ID == ".amdhsa_group_segment_fixed_size") {
5629 if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
5630 CHAR_BIT>(x: Val))
5631 return OutOfRangeError(Range: ValRange);
5632 KD.group_segment_fixed_size = ExprVal;
5633 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5634 if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
5635 CHAR_BIT>(x: Val))
5636 return OutOfRangeError(Range: ValRange);
5637 KD.private_segment_fixed_size = ExprVal;
5638 } else if (ID == ".amdhsa_kernarg_size") {
5639 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(x: Val))
5640 return OutOfRangeError(Range: ValRange);
5641 KD.kernarg_size = ExprVal;
5642 } else if (ID == ".amdhsa_user_sgpr_count") {
5643 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5644 ExplicitUserSGPRCount = Val;
5645 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5646 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5647 if (hasArchitectedFlatScratch())
5648 return Error(L: IDRange.Start,
5649 Msg: "directive is not supported with architected flat scratch",
5650 Range: IDRange);
5651 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5652 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5653 ExprVal, ValRange);
5654 if (Val)
5655 ImpliedUserSGPRCount += 4;
5656 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5657 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5658 if (!hasKernargPreload())
5659 return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
5660
5661 if (Val > getMaxNumUserSGPRs())
5662 return OutOfRangeError(Range: ValRange);
5663 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5664 ValRange);
5665 if (Val) {
5666 ImpliedUserSGPRCount += Val;
5667 PreloadLength = Val;
5668 }
5669 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5670 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5671 if (!hasKernargPreload())
5672 return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
5673
5674 if (Val >= 1024)
5675 return OutOfRangeError(Range: ValRange);
5676 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5677 ValRange);
5678 if (Val)
5679 PreloadOffset = Val;
5680 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5681 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5682 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5683 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5684 ValRange);
5685 if (Val)
5686 ImpliedUserSGPRCount += 2;
5687 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5688 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5689 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5690 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5691 ValRange);
5692 if (Val)
5693 ImpliedUserSGPRCount += 2;
5694 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5695 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5696 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5697 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5698 ExprVal, ValRange);
5699 if (Val)
5700 ImpliedUserSGPRCount += 2;
5701 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5702 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5703 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5704 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5705 ValRange);
5706 if (Val)
5707 ImpliedUserSGPRCount += 2;
5708 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5709 if (hasArchitectedFlatScratch())
5710 return Error(L: IDRange.Start,
5711 Msg: "directive is not supported with architected flat scratch",
5712 Range: IDRange);
5713 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5714 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5715 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5716 ExprVal, ValRange);
5717 if (Val)
5718 ImpliedUserSGPRCount += 2;
5719 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5720 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5721 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5722 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5723 ExprVal, ValRange);
5724 if (Val)
5725 ImpliedUserSGPRCount += 1;
5726 } else if (ID == ".amdhsa_wavefront_size32") {
5727 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5728 if (IVersion.Major < 10)
5729 return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
5730 EnableWavefrontSize32 = Val;
5731 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5732 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5733 ValRange);
5734 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5735 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5736 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5737 ValRange);
5738 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5739 if (hasArchitectedFlatScratch())
5740 return Error(L: IDRange.Start,
5741 Msg: "directive is not supported with architected flat scratch",
5742 Range: IDRange);
5743 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5744 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5745 ValRange);
5746 } else if (ID == ".amdhsa_enable_private_segment") {
5747 if (!hasArchitectedFlatScratch())
5748 return Error(
5749 L: IDRange.Start,
5750 Msg: "directive is not supported without architected flat scratch",
5751 Range: IDRange);
5752 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5753 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5754 ValRange);
5755 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5756 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5757 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5758 ValRange);
5759 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5760 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5761 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5762 ValRange);
5763 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5764 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5765 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5766 ValRange);
5767 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5768 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5769 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5770 ValRange);
5771 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5772 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5773 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5774 ValRange);
5775 } else if (ID == ".amdhsa_next_free_vgpr") {
5776 VGPRRange = ValRange;
5777 NextFreeVGPR = ExprVal;
5778 } else if (ID == ".amdhsa_next_free_sgpr") {
5779 SGPRRange = ValRange;
5780 NextFreeSGPR = ExprVal;
5781 } else if (ID == ".amdhsa_accum_offset") {
5782 if (!isGFX90A())
5783 return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
5784 AccumOffset = ExprVal;
5785 } else if (ID == ".amdhsa_reserve_vcc") {
5786 if (EvaluatableExpr && !isUInt<1>(x: Val))
5787 return OutOfRangeError(Range: ValRange);
5788 ReserveVCC = ExprVal;
5789 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5790 if (IVersion.Major < 7)
5791 return Error(L: IDRange.Start, Msg: "directive requires gfx7+", Range: IDRange);
5792 if (hasArchitectedFlatScratch())
5793 return Error(L: IDRange.Start,
5794 Msg: "directive is not supported with architected flat scratch",
5795 Range: IDRange);
5796 if (EvaluatableExpr && !isUInt<1>(x: Val))
5797 return OutOfRangeError(Range: ValRange);
5798 ReserveFlatScr = ExprVal;
5799 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5800 if (IVersion.Major < 8)
5801 return Error(L: IDRange.Start, Msg: "directive requires gfx8+", Range: IDRange);
5802 if (!isUInt<1>(x: Val))
5803 return OutOfRangeError(Range: ValRange);
5804 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5805 return getParser().Error(L: IDRange.Start, Msg: ".amdhsa_reserve_xnack_mask does not match target id",
5806 Range: IDRange);
5807 } else if (ID == ".amdhsa_float_round_mode_32") {
5808 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5809 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5810 ValRange);
5811 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5812 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5813 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5814 ValRange);
5815 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5816 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5817 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5818 ValRange);
5819 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5820 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5821 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5822 ValRange);
5823 } else if (ID == ".amdhsa_dx10_clamp") {
5824 if (IVersion.Major >= 12)
5825 return Error(L: IDRange.Start, Msg: "directive unsupported on gfx12+", Range: IDRange);
5826 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5827 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5828 ValRange);
5829 } else if (ID == ".amdhsa_ieee_mode") {
5830 if (IVersion.Major >= 12)
5831 return Error(L: IDRange.Start, Msg: "directive unsupported on gfx12+", Range: IDRange);
5832 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5833 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5834 ValRange);
5835 } else if (ID == ".amdhsa_fp16_overflow") {
5836 if (IVersion.Major < 9)
5837 return Error(L: IDRange.Start, Msg: "directive requires gfx9+", Range: IDRange);
5838 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5839 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5840 ValRange);
5841 } else if (ID == ".amdhsa_tg_split") {
5842 if (!isGFX90A())
5843 return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
5844 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5845 ExprVal, ValRange);
5846 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5847 if (IVersion.Major < 10)
5848 return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
5849 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5850 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5851 ValRange);
5852 } else if (ID == ".amdhsa_memory_ordered") {
5853 if (IVersion.Major < 10)
5854 return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
5855 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5856 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5857 ValRange);
5858 } else if (ID == ".amdhsa_forward_progress") {
5859 if (IVersion.Major < 10)
5860 return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
5861 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5862 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5863 ValRange);
5864 } else if (ID == ".amdhsa_shared_vgpr_count") {
5865 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5866 if (IVersion.Major < 10 || IVersion.Major >= 12)
5867 return Error(L: IDRange.Start, Msg: "directive requires gfx10 or gfx11",
5868 Range: IDRange);
5869 SharedVGPRCount = Val;
5870 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5871 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5872 ValRange);
5873 } else if (ID == ".amdhsa_inst_pref_size") {
5874 if (IVersion.Major < 11)
5875 return Error(L: IDRange.Start, Msg: "directive requires gfx11+", Range: IDRange);
5876 if (IVersion.Major == 11) {
5877 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5878 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
5879 ValRange);
5880 } else {
5881 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5882 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
5883 ValRange);
5884 }
5885 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5886 PARSE_BITS_ENTRY(
5887 KD.compute_pgm_rsrc2,
5888 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5889 ExprVal, ValRange);
5890 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5891 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5892 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5893 ExprVal, ValRange);
5894 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5895 PARSE_BITS_ENTRY(
5896 KD.compute_pgm_rsrc2,
5897 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5898 ExprVal, ValRange);
5899 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5900 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5901 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5902 ExprVal, ValRange);
5903 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5904 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5905 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5906 ExprVal, ValRange);
5907 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5908 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5909 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5910 ExprVal, ValRange);
5911 } else if (ID == ".amdhsa_exception_int_div_zero") {
5912 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5913 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5914 ExprVal, ValRange);
5915 } else if (ID == ".amdhsa_round_robin_scheduling") {
5916 if (IVersion.Major < 12)
5917 return Error(L: IDRange.Start, Msg: "directive requires gfx12+", Range: IDRange);
5918 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5919 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5920 ValRange);
5921 } else {
5922 return Error(L: IDRange.Start, Msg: "unknown .amdhsa_kernel directive", Range: IDRange);
5923 }
5924
5925#undef PARSE_BITS_ENTRY
5926 }
5927
5928 if (!Seen.contains(key: ".amdhsa_next_free_vgpr"))
5929 return TokError(Msg: ".amdhsa_next_free_vgpr directive is required");
5930
5931 if (!Seen.contains(key: ".amdhsa_next_free_sgpr"))
5932 return TokError(Msg: ".amdhsa_next_free_sgpr directive is required");
5933
5934 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(u&: ImpliedUserSGPRCount);
5935
5936 // Consider the case where the total number of UserSGPRs with trailing
5937 // allocated preload SGPRs, is greater than the number of explicitly
5938 // referenced SGPRs.
5939 if (PreloadLength) {
5940 MCContext &Ctx = getContext();
5941 NextFreeSGPR = AMDGPUMCExpr::createMax(
5942 Args: {NextFreeSGPR, MCConstantExpr::create(Value: UserSGPRCount, Ctx)}, Ctx);
5943 }
5944
5945 const MCExpr *VGPRBlocks;
5946 const MCExpr *SGPRBlocks;
5947 if (calculateGPRBlocks(Features: getFeatureBits(), VCCUsed: ReserveVCC, FlatScrUsed: ReserveFlatScr,
5948 XNACKUsed: getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5949 EnableWavefrontSize32, NextFreeVGPR,
5950 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5951 SGPRBlocks))
5952 return true;
5953
5954 int64_t EvaluatedVGPRBlocks;
5955 bool VGPRBlocksEvaluatable =
5956 VGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedVGPRBlocks);
5957 if (VGPRBlocksEvaluatable &&
5958 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5959 x: static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5960 return OutOfRangeError(Range: VGPRRange);
5961 }
5962 AMDGPU::MCKernelDescriptor::bits_set(
5963 Dst&: KD.compute_pgm_rsrc1, Value: VGPRBlocks,
5964 Shift: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5965 Mask: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, Ctx&: getContext());
5966
5967 int64_t EvaluatedSGPRBlocks;
5968 if (SGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedSGPRBlocks) &&
5969 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5970 x: static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5971 return OutOfRangeError(Range: SGPRRange);
5972 AMDGPU::MCKernelDescriptor::bits_set(
5973 Dst&: KD.compute_pgm_rsrc1, Value: SGPRBlocks,
5974 Shift: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5975 Mask: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, Ctx&: getContext());
5976
5977 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5978 return TokError(Msg: "amdgpu_user_sgpr_count smaller than than implied by "
5979 "enabled user SGPRs");
5980
5981 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(x: UserSGPRCount))
5982 return TokError(Msg: "too many user SGPRs enabled");
5983 AMDGPU::MCKernelDescriptor::bits_set(
5984 Dst&: KD.compute_pgm_rsrc2, Value: MCConstantExpr::create(Value: UserSGPRCount, Ctx&: getContext()),
5985 Shift: COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5986 Mask: COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, Ctx&: getContext());
5987
5988 int64_t IVal = 0;
5989 if (!KD.kernarg_size->evaluateAsAbsolute(Res&: IVal))
5990 return TokError(Msg: "Kernarg size should be resolvable");
5991 uint64_t kernarg_size = IVal;
5992 if (PreloadLength && kernarg_size &&
5993 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
5994 return TokError(Msg: "Kernarg preload length + offset is larger than the "
5995 "kernarg segment size");
5996
5997 if (isGFX90A()) {
5998 if (!Seen.contains(key: ".amdhsa_accum_offset"))
5999 return TokError(Msg: ".amdhsa_accum_offset directive is required");
6000 int64_t EvaluatedAccum;
6001 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(Res&: EvaluatedAccum);
6002 uint64_t UEvaluatedAccum = EvaluatedAccum;
6003 if (AccumEvaluatable &&
6004 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6005 return TokError(Msg: "accum_offset should be in range [4..256] in "
6006 "increments of 4");
6007
6008 int64_t EvaluatedNumVGPR;
6009 if (NextFreeVGPR->evaluateAsAbsolute(Res&: EvaluatedNumVGPR) &&
6010 AccumEvaluatable &&
6011 UEvaluatedAccum >
6012 alignTo(Value: std::max(a: (uint64_t)1, b: (uint64_t)EvaluatedNumVGPR), Align: 4))
6013 return TokError(Msg: "accum_offset exceeds total VGPR allocation");
6014 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6015 LHS: MCBinaryExpr::createDiv(
6016 LHS: AccumOffset, RHS: MCConstantExpr::create(Value: 4, Ctx&: getContext()), Ctx&: getContext()),
6017 RHS: MCConstantExpr::create(Value: 1, Ctx&: getContext()), Ctx&: getContext());
6018 MCKernelDescriptor::bits_set(Dst&: KD.compute_pgm_rsrc3, Value: AdjustedAccum,
6019 Shift: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6020 Mask: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6021 Ctx&: getContext());
6022 }
6023
6024 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6025 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6026 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6027 return TokError(Msg: "shared_vgpr_count directive not valid on "
6028 "wavefront size 32");
6029 }
6030
6031 if (VGPRBlocksEvaluatable &&
6032 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6033 63)) {
6034 return TokError(Msg: "shared_vgpr_count*2 + "
6035 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6036 "exceed 63\n");
6037 }
6038 }
6039
6040 getTargetStreamer().EmitAmdhsaKernelDescriptor(STI: getSTI(), KernelName, KernelDescriptor: KD,
6041 NextVGPR: NextFreeVGPR, NextSGPR: NextFreeSGPR,
6042 ReserveVCC, ReserveFlatScr);
6043 return false;
6044}
6045
6046bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6047 uint32_t Version;
6048 if (ParseAsAbsoluteExpression(Ret&: Version))
6049 return true;
6050
6051 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(COV: Version);
6052 return false;
6053}
6054
6055bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6056 AMDGPUMCKernelCodeT &C) {
6057 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6058 // assembly for backwards compatibility.
6059 if (ID == "max_scratch_backing_memory_byte_size") {
6060 Parser.eatToEndOfStatement();
6061 return false;
6062 }
6063
6064 SmallString<40> ErrStr;
6065 raw_svector_ostream Err(ErrStr);
6066 if (!C.ParseKernelCodeT(ID, MCParser&: getParser(), Err)) {
6067 return TokError(Msg: Err.str());
6068 }
6069 Lex();
6070
6071 if (ID == "enable_wavefront_size32") {
6072 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6073 if (!isGFX10Plus())
6074 return TokError(Msg: "enable_wavefront_size32=1 is only allowed on GFX10+");
6075 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6076 return TokError(Msg: "enable_wavefront_size32=1 requires +WavefrontSize32");
6077 } else {
6078 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6079 return TokError(Msg: "enable_wavefront_size32=0 requires +WavefrontSize64");
6080 }
6081 }
6082
6083 if (ID == "wavefront_size") {
6084 if (C.wavefront_size == 5) {
6085 if (!isGFX10Plus())
6086 return TokError(Msg: "wavefront_size=5 is only allowed on GFX10+");
6087 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6088 return TokError(Msg: "wavefront_size=5 requires +WavefrontSize32");
6089 } else if (C.wavefront_size == 6) {
6090 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6091 return TokError(Msg: "wavefront_size=6 requires +WavefrontSize64");
6092 }
6093 }
6094
6095 return false;
6096}
6097
6098bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6099 AMDGPUMCKernelCodeT KernelCode;
6100 KernelCode.initDefault(STI: &getSTI(), Ctx&: getContext());
6101
6102 while (true) {
6103 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6104 // will set the current token to EndOfStatement.
6105 while(trySkipToken(Kind: AsmToken::EndOfStatement));
6106
6107 StringRef ID;
6108 if (!parseId(Val&: ID, ErrMsg: "expected value identifier or .end_amd_kernel_code_t"))
6109 return true;
6110
6111 if (ID == ".end_amd_kernel_code_t")
6112 break;
6113
6114 if (ParseAMDKernelCodeTValue(ID, C&: KernelCode))
6115 return true;
6116 }
6117
6118 KernelCode.validate(STI: &getSTI(), Ctx&: getContext());
6119 getTargetStreamer().EmitAMDKernelCodeT(Header&: KernelCode);
6120
6121 return false;
6122}
6123
6124bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6125 StringRef KernelName;
6126 if (!parseId(Val&: KernelName, ErrMsg: "expected symbol name"))
6127 return true;
6128
6129 getTargetStreamer().EmitAMDGPUSymbolType(SymbolName: KernelName,
6130 Type: ELF::STT_AMDGPU_HSA_KERNEL);
6131
6132 KernelScope.initialize(Context&: getContext());
6133 return false;
6134}
6135
6136bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6137 if (!getSTI().getTargetTriple().isAMDGCN()) {
6138 return Error(L: getLoc(),
6139 Msg: ".amd_amdgpu_isa directive is not available on non-amdgcn "
6140 "architectures");
6141 }
6142
6143 auto TargetIDDirective = getLexer().getTok().getStringContents();
6144 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6145 return Error(L: getParser().getTok().getLoc(), Msg: "target id must match options");
6146
6147 getTargetStreamer().EmitISAVersion();
6148 Lex();
6149
6150 return false;
6151}
6152
6153bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6154 assert(isHsaAbi(getSTI()));
6155
6156 std::string HSAMetadataString;
6157 if (ParseToEndDirective(AssemblerDirectiveBegin: HSAMD::V3::AssemblerDirectiveBegin,
6158 AssemblerDirectiveEnd: HSAMD::V3::AssemblerDirectiveEnd, CollectString&: HSAMetadataString))
6159 return true;
6160
6161 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6162 return Error(L: getLoc(), Msg: "invalid HSA metadata");
6163
6164 return false;
6165}
6166
6167/// Common code to parse out a block of text (typically YAML) between start and
6168/// end directives.
6169bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6170 const char *AssemblerDirectiveEnd,
6171 std::string &CollectString) {
6172
6173 raw_string_ostream CollectStream(CollectString);
6174
6175 getLexer().setSkipSpace(false);
6176
6177 bool FoundEnd = false;
6178 while (!isToken(Kind: AsmToken::Eof)) {
6179 while (isToken(Kind: AsmToken::Space)) {
6180 CollectStream << getTokenStr();
6181 Lex();
6182 }
6183
6184 if (trySkipId(Id: AssemblerDirectiveEnd)) {
6185 FoundEnd = true;
6186 break;
6187 }
6188
6189 CollectStream << Parser.parseStringToEndOfStatement()
6190 << getContext().getAsmInfo()->getSeparatorString();
6191
6192 Parser.eatToEndOfStatement();
6193 }
6194
6195 getLexer().setSkipSpace(true);
6196
6197 if (isToken(Kind: AsmToken::Eof) && !FoundEnd) {
6198 return TokError(Msg: Twine("expected directive ") +
6199 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6200 }
6201
6202 return false;
6203}
6204
6205/// Parse the assembler directive for new MsgPack-format PAL metadata.
6206bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6207 std::string String;
6208 if (ParseToEndDirective(AssemblerDirectiveBegin: AMDGPU::PALMD::AssemblerDirectiveBegin,
6209 AssemblerDirectiveEnd: AMDGPU::PALMD::AssemblerDirectiveEnd, CollectString&: String))
6210 return true;
6211
6212 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6213 if (!PALMetadata->setFromString(String))
6214 return Error(L: getLoc(), Msg: "invalid PAL metadata");
6215 return false;
6216}
6217
6218/// Parse the assembler directive for old linear-format PAL metadata.
6219bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6220 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6221 return Error(L: getLoc(),
6222 Msg: (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6223 "not available on non-amdpal OSes")).str());
6224 }
6225
6226 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6227 PALMetadata->setLegacy();
6228 for (;;) {
6229 uint32_t Key, Value;
6230 if (ParseAsAbsoluteExpression(Ret&: Key)) {
6231 return TokError(Msg: Twine("invalid value in ") +
6232 Twine(PALMD::AssemblerDirective));
6233 }
6234 if (!trySkipToken(Kind: AsmToken::Comma)) {
6235 return TokError(Msg: Twine("expected an even number of values in ") +
6236 Twine(PALMD::AssemblerDirective));
6237 }
6238 if (ParseAsAbsoluteExpression(Ret&: Value)) {
6239 return TokError(Msg: Twine("invalid value in ") +
6240 Twine(PALMD::AssemblerDirective));
6241 }
6242 PALMetadata->setRegister(Reg: Key, Val: Value);
6243 if (!trySkipToken(Kind: AsmToken::Comma))
6244 break;
6245 }
6246 return false;
6247}
6248
6249/// ParseDirectiveAMDGPULDS
6250/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6251bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6252 if (getParser().checkForValidSection())
6253 return true;
6254
6255 StringRef Name;
6256 SMLoc NameLoc = getLoc();
6257 if (getParser().parseIdentifier(Res&: Name))
6258 return TokError(Msg: "expected identifier in directive");
6259
6260 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6261 if (getParser().parseComma())
6262 return true;
6263
6264 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(STI: &getSTI());
6265
6266 int64_t Size;
6267 SMLoc SizeLoc = getLoc();
6268 if (getParser().parseAbsoluteExpression(Res&: Size))
6269 return true;
6270 if (Size < 0)
6271 return Error(L: SizeLoc, Msg: "size must be non-negative");
6272 if (Size > LocalMemorySize)
6273 return Error(L: SizeLoc, Msg: "size is too large");
6274
6275 int64_t Alignment = 4;
6276 if (trySkipToken(Kind: AsmToken::Comma)) {
6277 SMLoc AlignLoc = getLoc();
6278 if (getParser().parseAbsoluteExpression(Res&: Alignment))
6279 return true;
6280 if (Alignment < 0 || !isPowerOf2_64(Value: Alignment))
6281 return Error(L: AlignLoc, Msg: "alignment must be a power of two");
6282
6283 // Alignment larger than the size of LDS is possible in theory, as long
6284 // as the linker manages to place to symbol at address 0, but we do want
6285 // to make sure the alignment fits nicely into a 32-bit integer.
6286 if (Alignment >= 1u << 31)
6287 return Error(L: AlignLoc, Msg: "alignment is too large");
6288 }
6289
6290 if (parseEOL())
6291 return true;
6292
6293 Symbol->redefineIfPossible();
6294 if (!Symbol->isUndefined())
6295 return Error(L: NameLoc, Msg: "invalid symbol redefinition");
6296
6297 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Alignment: Align(Alignment));
6298 return false;
6299}
6300
6301bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6302 StringRef IDVal = DirectiveID.getString();
6303
6304 if (isHsaAbi(STI: getSTI())) {
6305 if (IDVal == ".amdhsa_kernel")
6306 return ParseDirectiveAMDHSAKernel();
6307
6308 if (IDVal == ".amdhsa_code_object_version")
6309 return ParseDirectiveAMDHSACodeObjectVersion();
6310
6311 // TODO: Restructure/combine with PAL metadata directive.
6312 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
6313 return ParseDirectiveHSAMetadata();
6314 } else {
6315 if (IDVal == ".amd_kernel_code_t")
6316 return ParseDirectiveAMDKernelCodeT();
6317
6318 if (IDVal == ".amdgpu_hsa_kernel")
6319 return ParseDirectiveAMDGPUHsaKernel();
6320
6321 if (IDVal == ".amd_amdgpu_isa")
6322 return ParseDirectiveISAVersion();
6323
6324 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
6325 return Error(L: getLoc(), Msg: (Twine(HSAMD::AssemblerDirectiveBegin) +
6326 Twine(" directive is "
6327 "not available on non-amdhsa OSes"))
6328 .str());
6329 }
6330 }
6331
6332 if (IDVal == ".amdgcn_target")
6333 return ParseDirectiveAMDGCNTarget();
6334
6335 if (IDVal == ".amdgpu_lds")
6336 return ParseDirectiveAMDGPULDS();
6337
6338 if (IDVal == PALMD::AssemblerDirectiveBegin)
6339 return ParseDirectivePALMetadataBegin();
6340
6341 if (IDVal == PALMD::AssemblerDirective)
6342 return ParseDirectivePALMetadata();
6343
6344 return true;
6345}
6346
6347bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6348 MCRegister Reg) {
6349 if (MRI.regsOverlap(RegA: TTMP12_TTMP13_TTMP14_TTMP15, RegB: Reg))
6350 return isGFX9Plus();
6351
6352 // GFX10+ has 2 more SGPRs 104 and 105.
6353 if (MRI.regsOverlap(RegA: SGPR104_SGPR105, RegB: Reg))
6354 return hasSGPR104_SGPR105();
6355
6356 switch (Reg.id()) {
6357 case SRC_SHARED_BASE_LO:
6358 case SRC_SHARED_BASE:
6359 case SRC_SHARED_LIMIT_LO:
6360 case SRC_SHARED_LIMIT:
6361 case SRC_PRIVATE_BASE_LO:
6362 case SRC_PRIVATE_BASE:
6363 case SRC_PRIVATE_LIMIT_LO:
6364 case SRC_PRIVATE_LIMIT:
6365 return isGFX9Plus();
6366 case SRC_POPS_EXITING_WAVE_ID:
6367 return isGFX9Plus() && !isGFX11Plus();
6368 case TBA:
6369 case TBA_LO:
6370 case TBA_HI:
6371 case TMA:
6372 case TMA_LO:
6373 case TMA_HI:
6374 return !isGFX9Plus();
6375 case XNACK_MASK:
6376 case XNACK_MASK_LO:
6377 case XNACK_MASK_HI:
6378 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6379 case SGPR_NULL:
6380 return isGFX10Plus();
6381 case SRC_EXECZ:
6382 case SRC_VCCZ:
6383 return !isGFX11Plus();
6384 default:
6385 break;
6386 }
6387
6388 if (isCI())
6389 return true;
6390
6391 if (isSI() || isGFX10Plus()) {
6392 // No flat_scr on SI.
6393 // On GFX10Plus flat scratch is not a valid register operand and can only be
6394 // accessed with s_setreg/s_getreg.
6395 switch (Reg.id()) {
6396 case FLAT_SCR:
6397 case FLAT_SCR_LO:
6398 case FLAT_SCR_HI:
6399 return false;
6400 default:
6401 return true;
6402 }
6403 }
6404
6405 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6406 // SI/CI have.
6407 if (MRI.regsOverlap(RegA: SGPR102_SGPR103, RegB: Reg))
6408 return hasSGPR102_SGPR103();
6409
6410 return true;
6411}
6412
6413ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6414 StringRef Mnemonic,
6415 OperandMode Mode) {
6416 ParseStatus Res = parseVOPD(Operands);
6417 if (Res.isSuccess() || Res.isFailure() || isToken(Kind: AsmToken::EndOfStatement))
6418 return Res;
6419
6420 // Try to parse with a custom parser
6421 Res = MatchOperandParserImpl(Operands, Mnemonic);
6422
6423 // If we successfully parsed the operand or if there as an error parsing,
6424 // we are done.
6425 //
6426 // If we are parsing after we reach EndOfStatement then this means we
6427 // are appending default values to the Operands list. This is only done
6428 // by custom parser, so we shouldn't continue on to the generic parsing.
6429 if (Res.isSuccess() || Res.isFailure() || isToken(Kind: AsmToken::EndOfStatement))
6430 return Res;
6431
6432 SMLoc RBraceLoc;
6433 SMLoc LBraceLoc = getLoc();
6434 if (Mode == OperandMode_NSA && trySkipToken(Kind: AsmToken::LBrac)) {
6435 unsigned Prefix = Operands.size();
6436
6437 for (;;) {
6438 auto Loc = getLoc();
6439 Res = parseReg(Operands);
6440 if (Res.isNoMatch())
6441 Error(L: Loc, Msg: "expected a register");
6442 if (!Res.isSuccess())
6443 return ParseStatus::Failure;
6444
6445 RBraceLoc = getLoc();
6446 if (trySkipToken(Kind: AsmToken::RBrac))
6447 break;
6448
6449 if (!skipToken(Kind: AsmToken::Comma,
6450 ErrMsg: "expected a comma or a closing square bracket"))
6451 return ParseStatus::Failure;
6452 }
6453
6454 if (Operands.size() - Prefix > 1) {
6455 Operands.insert(I: Operands.begin() + Prefix,
6456 Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "[", Loc: LBraceLoc));
6457 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "]", Loc: RBraceLoc));
6458 }
6459
6460 return ParseStatus::Success;
6461 }
6462
6463 return parseRegOrImm(Operands);
6464}
6465
6466StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6467 // Clear any forced encodings from the previous instruction.
6468 setForcedEncodingSize(0);
6469 setForcedDPP(false);
6470 setForcedSDWA(false);
6471
6472 if (Name.consume_back(Suffix: "_e64_dpp")) {
6473 setForcedDPP(true);
6474 setForcedEncodingSize(64);
6475 return Name;
6476 }
6477 if (Name.consume_back(Suffix: "_e64")) {
6478 setForcedEncodingSize(64);
6479 return Name;
6480 }
6481 if (Name.consume_back(Suffix: "_e32")) {
6482 setForcedEncodingSize(32);
6483 return Name;
6484 }
6485 if (Name.consume_back(Suffix: "_dpp")) {
6486 setForcedDPP(true);
6487 return Name;
6488 }
6489 if (Name.consume_back(Suffix: "_sdwa")) {
6490 setForcedSDWA(true);
6491 return Name;
6492 }
6493 return Name;
6494}
6495
6496static void applyMnemonicAliases(StringRef &Mnemonic,
6497 const FeatureBitset &Features,
6498 unsigned VariantID);
6499
6500bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6501 StringRef Name, SMLoc NameLoc,
6502 OperandVector &Operands) {
6503 // Add the instruction mnemonic
6504 Name = parseMnemonicSuffix(Name);
6505
6506 // If the target architecture uses MnemonicAlias, call it here to parse
6507 // operands correctly.
6508 applyMnemonicAliases(Mnemonic&: Name, Features: getAvailableFeatures(), VariantID: 0);
6509
6510 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: NameLoc));
6511
6512 bool IsMIMG = Name.starts_with(Prefix: "image_");
6513
6514 while (!trySkipToken(Kind: AsmToken::EndOfStatement)) {
6515 OperandMode Mode = OperandMode_Default;
6516 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6517 Mode = OperandMode_NSA;
6518 ParseStatus Res = parseOperand(Operands, Mnemonic: Name, Mode);
6519
6520 if (!Res.isSuccess()) {
6521 checkUnsupportedInstruction(Mnemo: Name, IDLoc: NameLoc);
6522 if (!Parser.hasPendingError()) {
6523 // FIXME: use real operand location rather than the current location.
6524 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6525 : "not a valid operand.";
6526 Error(L: getLoc(), Msg);
6527 }
6528 while (!trySkipToken(Kind: AsmToken::EndOfStatement)) {
6529 lex();
6530 }
6531 return true;
6532 }
6533
6534 // Eat the comma or space if there is one.
6535 trySkipToken(Kind: AsmToken::Comma);
6536 }
6537
6538 return false;
6539}
6540
6541//===----------------------------------------------------------------------===//
6542// Utility functions
6543//===----------------------------------------------------------------------===//
6544
6545ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6546 OperandVector &Operands) {
6547 SMLoc S = getLoc();
6548 if (!trySkipId(Id: Name))
6549 return ParseStatus::NoMatch;
6550
6551 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: S));
6552 return ParseStatus::Success;
6553}
6554
6555ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6556 int64_t &IntVal) {
6557
6558 if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
6559 return ParseStatus::NoMatch;
6560
6561 return parseExpr(Imm&: IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6562}
6563
6564ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6565 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6566 std::function<bool(int64_t &)> ConvertResult) {
6567 SMLoc S = getLoc();
6568 int64_t Value = 0;
6569
6570 ParseStatus Res = parseIntWithPrefix(Prefix, IntVal&: Value);
6571 if (!Res.isSuccess())
6572 return Res;
6573
6574 if (ConvertResult && !ConvertResult(Value)) {
6575 Error(L: S, Msg: "invalid " + StringRef(Prefix) + " value.");
6576 }
6577
6578 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Value, Loc: S, Type: ImmTy));
6579 return ParseStatus::Success;
6580}
6581
6582ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6583 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6584 bool (*ConvertResult)(int64_t &)) {
6585 SMLoc S = getLoc();
6586 if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
6587 return ParseStatus::NoMatch;
6588
6589 if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected a left square bracket"))
6590 return ParseStatus::Failure;
6591
6592 unsigned Val = 0;
6593 const unsigned MaxSize = 4;
6594
6595 // FIXME: How to verify the number of elements matches the number of src
6596 // operands?
6597 for (int I = 0; ; ++I) {
6598 int64_t Op;
6599 SMLoc Loc = getLoc();
6600 if (!parseExpr(Imm&: Op))
6601 return ParseStatus::Failure;
6602
6603 if (Op != 0 && Op != 1)
6604 return Error(L: Loc, Msg: "invalid " + StringRef(Prefix) + " value.");
6605
6606 Val |= (Op << I);
6607
6608 if (trySkipToken(Kind: AsmToken::RBrac))
6609 break;
6610
6611 if (I + 1 == MaxSize)
6612 return Error(L: getLoc(), Msg: "expected a closing square bracket");
6613
6614 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
6615 return ParseStatus::Failure;
6616 }
6617
6618 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: ImmTy));
6619 return ParseStatus::Success;
6620}
6621
6622ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6623 OperandVector &Operands,
6624 AMDGPUOperand::ImmTy ImmTy) {
6625 int64_t Bit;
6626 SMLoc S = getLoc();
6627
6628 if (trySkipId(Id: Name)) {
6629 Bit = 1;
6630 } else if (trySkipId(Pref: "no", Id: Name)) {
6631 Bit = 0;
6632 } else {
6633 return ParseStatus::NoMatch;
6634 }
6635
6636 if (Name == "r128" && !hasMIMG_R128())
6637 return Error(L: S, Msg: "r128 modifier is not supported on this GPU");
6638 if (Name == "a16" && !hasA16())
6639 return Error(L: S, Msg: "a16 modifier is not supported on this GPU");
6640
6641 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6642 ImmTy = AMDGPUOperand::ImmTyR128A16;
6643
6644 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Bit, Loc: S, Type: ImmTy));
6645 return ParseStatus::Success;
6646}
6647
6648unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6649 bool &Disabling) const {
6650 Disabling = Id.consume_front(Prefix: "no");
6651
6652 if (isGFX940() && !Mnemo.starts_with(Prefix: "s_")) {
6653 return StringSwitch<unsigned>(Id)
6654 .Case(S: "nt", Value: AMDGPU::CPol::NT)
6655 .Case(S: "sc0", Value: AMDGPU::CPol::SC0)
6656 .Case(S: "sc1", Value: AMDGPU::CPol::SC1)
6657 .Default(Value: 0);
6658 }
6659
6660 return StringSwitch<unsigned>(Id)
6661 .Case(S: "dlc", Value: AMDGPU::CPol::DLC)
6662 .Case(S: "glc", Value: AMDGPU::CPol::GLC)
6663 .Case(S: "scc", Value: AMDGPU::CPol::SCC)
6664 .Case(S: "slc", Value: AMDGPU::CPol::SLC)
6665 .Default(Value: 0);
6666}
6667
6668ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6669 if (isGFX12Plus()) {
6670 SMLoc StringLoc = getLoc();
6671
6672 int64_t CPolVal = 0;
6673 ParseStatus ResTH = ParseStatus::NoMatch;
6674 ParseStatus ResScope = ParseStatus::NoMatch;
6675
6676 for (;;) {
6677 if (ResTH.isNoMatch()) {
6678 int64_t TH;
6679 ResTH = parseTH(Operands, TH);
6680 if (ResTH.isFailure())
6681 return ResTH;
6682 if (ResTH.isSuccess()) {
6683 CPolVal |= TH;
6684 continue;
6685 }
6686 }
6687
6688 if (ResScope.isNoMatch()) {
6689 int64_t Scope;
6690 ResScope = parseScope(Operands, Scope);
6691 if (ResScope.isFailure())
6692 return ResScope;
6693 if (ResScope.isSuccess()) {
6694 CPolVal |= Scope;
6695 continue;
6696 }
6697 }
6698
6699 break;
6700 }
6701
6702 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6703 return ParseStatus::NoMatch;
6704
6705 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: CPolVal, Loc: StringLoc,
6706 Type: AMDGPUOperand::ImmTyCPol));
6707 return ParseStatus::Success;
6708 }
6709
6710 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6711 SMLoc OpLoc = getLoc();
6712 unsigned Enabled = 0, Seen = 0;
6713 for (;;) {
6714 SMLoc S = getLoc();
6715 bool Disabling;
6716 unsigned CPol = getCPolKind(Id: getId(), Mnemo, Disabling);
6717 if (!CPol)
6718 break;
6719
6720 lex();
6721
6722 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6723 return Error(L: S, Msg: "dlc modifier is not supported on this GPU");
6724
6725 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6726 return Error(L: S, Msg: "scc modifier is not supported on this GPU");
6727
6728 if (Seen & CPol)
6729 return Error(L: S, Msg: "duplicate cache policy modifier");
6730
6731 if (!Disabling)
6732 Enabled |= CPol;
6733
6734 Seen |= CPol;
6735 }
6736
6737 if (!Seen)
6738 return ParseStatus::NoMatch;
6739
6740 Operands.push_back(
6741 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Enabled, Loc: OpLoc, Type: AMDGPUOperand::ImmTyCPol));
6742 return ParseStatus::Success;
6743}
6744
6745ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6746 int64_t &Scope) {
6747 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
6748 CPol::SCOPE_DEV, CPol::SCOPE_SYS};
6749
6750 ParseStatus Res = parseStringOrIntWithPrefix(
6751 Operands, Name: "scope", Ids: {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
6752 IntVal&: Scope);
6753
6754 if (Res.isSuccess())
6755 Scope = Scopes[Scope];
6756
6757 return Res;
6758}
6759
6760ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6761 TH = AMDGPU::CPol::TH_RT; // default
6762
6763 StringRef Value;
6764 SMLoc StringLoc;
6765 ParseStatus Res = parseStringWithPrefix(Prefix: "th", Value, StringLoc);
6766 if (!Res.isSuccess())
6767 return Res;
6768
6769 if (Value == "TH_DEFAULT")
6770 TH = AMDGPU::CPol::TH_RT;
6771 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
6772 Value == "TH_LOAD_NT_WB") {
6773 return Error(L: StringLoc, Msg: "invalid th value");
6774 } else if (Value.consume_front(Prefix: "TH_ATOMIC_")) {
6775 TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
6776 } else if (Value.consume_front(Prefix: "TH_LOAD_")) {
6777 TH = AMDGPU::CPol::TH_TYPE_LOAD;
6778 } else if (Value.consume_front(Prefix: "TH_STORE_")) {
6779 TH = AMDGPU::CPol::TH_TYPE_STORE;
6780 } else {
6781 return Error(L: StringLoc, Msg: "invalid th value");
6782 }
6783
6784 if (Value == "BYPASS")
6785 TH |= AMDGPU::CPol::TH_REAL_BYPASS;
6786
6787 if (TH != 0) {
6788 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
6789 TH |= StringSwitch<int64_t>(Value)
6790 .Case(S: "RETURN", Value: AMDGPU::CPol::TH_ATOMIC_RETURN)
6791 .Case(S: "RT", Value: AMDGPU::CPol::TH_RT)
6792 .Case(S: "RT_RETURN", Value: AMDGPU::CPol::TH_ATOMIC_RETURN)
6793 .Case(S: "NT", Value: AMDGPU::CPol::TH_ATOMIC_NT)
6794 .Case(S: "NT_RETURN", Value: AMDGPU::CPol::TH_ATOMIC_NT |
6795 AMDGPU::CPol::TH_ATOMIC_RETURN)
6796 .Case(S: "CASCADE_RT", Value: AMDGPU::CPol::TH_ATOMIC_CASCADE)
6797 .Case(S: "CASCADE_NT", Value: AMDGPU::CPol::TH_ATOMIC_CASCADE |
6798 AMDGPU::CPol::TH_ATOMIC_NT)
6799 .Default(Value: 0xffffffff);
6800 else
6801 TH |= StringSwitch<int64_t>(Value)
6802 .Case(S: "RT", Value: AMDGPU::CPol::TH_RT)
6803 .Case(S: "NT", Value: AMDGPU::CPol::TH_NT)
6804 .Case(S: "HT", Value: AMDGPU::CPol::TH_HT)
6805 .Case(S: "LU", Value: AMDGPU::CPol::TH_LU)
6806 .Case(S: "WB", Value: AMDGPU::CPol::TH_WB)
6807 .Case(S: "NT_RT", Value: AMDGPU::CPol::TH_NT_RT)
6808 .Case(S: "RT_NT", Value: AMDGPU::CPol::TH_RT_NT)
6809 .Case(S: "NT_HT", Value: AMDGPU::CPol::TH_NT_HT)
6810 .Case(S: "NT_WB", Value: AMDGPU::CPol::TH_NT_WB)
6811 .Case(S: "BYPASS", Value: AMDGPU::CPol::TH_BYPASS)
6812 .Default(Value: 0xffffffff);
6813 }
6814
6815 if (TH == 0xffffffff)
6816 return Error(L: StringLoc, Msg: "invalid th value");
6817
6818 return ParseStatus::Success;
6819}
6820
6821static void
6822addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,
6823 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
6824 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
6825 std::optional<unsigned> InsertAt = std::nullopt) {
6826 auto i = OptionalIdx.find(x: ImmT);
6827 if (i != OptionalIdx.end()) {
6828 unsigned Idx = i->second;
6829 const AMDGPUOperand &Op =
6830 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
6831 if (InsertAt)
6832 Inst.insert(I: Inst.begin() + *InsertAt, Op: MCOperand::createImm(Val: Op.getImm()));
6833 else
6834 Op.addImmOperands(Inst, N: 1);
6835 } else {
6836 if (InsertAt.has_value())
6837 Inst.insert(I: Inst.begin() + *InsertAt, Op: MCOperand::createImm(Val: Default));
6838 else
6839 Inst.addOperand(Op: MCOperand::createImm(Val: Default));
6840 }
6841}
6842
6843ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6844 StringRef &Value,
6845 SMLoc &StringLoc) {
6846 if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
6847 return ParseStatus::NoMatch;
6848
6849 StringLoc = getLoc();
6850 return parseId(Val&: Value, ErrMsg: "expected an identifier") ? ParseStatus::Success
6851 : ParseStatus::Failure;
6852}
6853
6854ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6855 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
6856 int64_t &IntVal) {
6857 if (!trySkipId(Id: Name, Kind: AsmToken::Colon))
6858 return ParseStatus::NoMatch;
6859
6860 SMLoc StringLoc = getLoc();
6861
6862 StringRef Value;
6863 if (isToken(Kind: AsmToken::Identifier)) {
6864 Value = getTokenStr();
6865 lex();
6866
6867 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
6868 if (Value == Ids[IntVal])
6869 break;
6870 } else if (!parseExpr(Imm&: IntVal))
6871 return ParseStatus::Failure;
6872
6873 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
6874 return Error(L: StringLoc, Msg: "invalid " + Twine(Name) + " value");
6875
6876 return ParseStatus::Success;
6877}
6878
6879ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6880 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
6881 AMDGPUOperand::ImmTy Type) {
6882 SMLoc S = getLoc();
6883 int64_t IntVal;
6884
6885 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
6886 if (Res.isSuccess())
6887 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S, Type));
6888
6889 return Res;
6890}
6891
6892//===----------------------------------------------------------------------===//
6893// MTBUF format
6894//===----------------------------------------------------------------------===//
6895
6896bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6897 int64_t MaxVal,
6898 int64_t &Fmt) {
6899 int64_t Val;
6900 SMLoc Loc = getLoc();
6901
6902 auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: Val);
6903 if (Res.isFailure())
6904 return false;
6905 if (Res.isNoMatch())
6906 return true;
6907
6908 if (Val < 0 || Val > MaxVal) {
6909 Error(L: Loc, Msg: Twine("out of range ", StringRef(Pref)));
6910 return false;
6911 }
6912
6913 Fmt = Val;
6914 return true;
6915}
6916
6917ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6918 AMDGPUOperand::ImmTy ImmTy) {
6919 const char *Pref = "index_key";
6920 int64_t ImmVal = 0;
6921 SMLoc Loc = getLoc();
6922 auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: ImmVal);
6923 if (!Res.isSuccess())
6924 return Res;
6925
6926 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6927 return Error(L: Loc, Msg: Twine("out of range ", StringRef(Pref)));
6928
6929 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6930 return Error(L: Loc, Msg: Twine("out of range ", StringRef(Pref)));
6931
6932 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: ImmTy));
6933 return ParseStatus::Success;
6934}
6935
6936ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6937 return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey8bit);
6938}
6939
6940ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6941 return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey16bit);
6942}
6943
6944// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6945// values to live in a joint format operand in the MCInst encoding.
6946ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6947 using namespace llvm::AMDGPU::MTBUFFormat;
6948
6949 int64_t Dfmt = DFMT_UNDEF;
6950 int64_t Nfmt = NFMT_UNDEF;
6951
6952 // dfmt and nfmt can appear in either order, and each is optional.
6953 for (int I = 0; I < 2; ++I) {
6954 if (Dfmt == DFMT_UNDEF && !tryParseFmt(Pref: "dfmt", MaxVal: DFMT_MAX, Fmt&: Dfmt))
6955 return ParseStatus::Failure;
6956
6957 if (Nfmt == NFMT_UNDEF && !tryParseFmt(Pref: "nfmt", MaxVal: NFMT_MAX, Fmt&: Nfmt))
6958 return ParseStatus::Failure;
6959
6960 // Skip optional comma between dfmt/nfmt
6961 // but guard against 2 commas following each other.
6962 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6963 !peekToken().is(K: AsmToken::Comma)) {
6964 trySkipToken(Kind: AsmToken::Comma);
6965 }
6966 }
6967
6968 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6969 return ParseStatus::NoMatch;
6970
6971 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6972 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6973
6974 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6975 return ParseStatus::Success;
6976}
6977
6978ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6979 using namespace llvm::AMDGPU::MTBUFFormat;
6980
6981 int64_t Fmt = UFMT_UNDEF;
6982
6983 if (!tryParseFmt(Pref: "format", MaxVal: UFMT_MAX, Fmt))
6984 return ParseStatus::Failure;
6985
6986 if (Fmt == UFMT_UNDEF)
6987 return ParseStatus::NoMatch;
6988
6989 Format = Fmt;
6990 return ParseStatus::Success;
6991}
6992
6993bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6994 int64_t &Nfmt,
6995 StringRef FormatStr,
6996 SMLoc Loc) {
6997 using namespace llvm::AMDGPU::MTBUFFormat;
6998 int64_t Format;
6999
7000 Format = getDfmt(Name: FormatStr);
7001 if (Format != DFMT_UNDEF) {
7002 Dfmt = Format;
7003 return true;
7004 }
7005
7006 Format = getNfmt(Name: FormatStr, STI: getSTI());
7007 if (Format != NFMT_UNDEF) {
7008 Nfmt = Format;
7009 return true;
7010 }
7011
7012 Error(L: Loc, Msg: "unsupported format");
7013 return false;
7014}
7015
7016ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7017 SMLoc FormatLoc,
7018 int64_t &Format) {
7019 using namespace llvm::AMDGPU::MTBUFFormat;
7020
7021 int64_t Dfmt = DFMT_UNDEF;
7022 int64_t Nfmt = NFMT_UNDEF;
7023 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, Loc: FormatLoc))
7024 return ParseStatus::Failure;
7025
7026 if (trySkipToken(Kind: AsmToken::Comma)) {
7027 StringRef Str;
7028 SMLoc Loc = getLoc();
7029 if (!parseId(Val&: Str, ErrMsg: "expected a format string") ||
7030 !matchDfmtNfmt(Dfmt, Nfmt, FormatStr: Str, Loc))
7031 return ParseStatus::Failure;
7032 if (Dfmt == DFMT_UNDEF)
7033 return Error(L: Loc, Msg: "duplicate numeric format");
7034 if (Nfmt == NFMT_UNDEF)
7035 return Error(L: Loc, Msg: "duplicate data format");
7036 }
7037
7038 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7039 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7040
7041 if (isGFX10Plus()) {
7042 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, STI: getSTI());
7043 if (Ufmt == UFMT_UNDEF)
7044 return Error(L: FormatLoc, Msg: "unsupported format");
7045 Format = Ufmt;
7046 } else {
7047 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7048 }
7049
7050 return ParseStatus::Success;
7051}
7052
7053ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7054 SMLoc Loc,
7055 int64_t &Format) {
7056 using namespace llvm::AMDGPU::MTBUFFormat;
7057
7058 auto Id = getUnifiedFormat(Name: FormatStr, STI: getSTI());
7059 if (Id == UFMT_UNDEF)
7060 return ParseStatus::NoMatch;
7061
7062 if (!isGFX10Plus())
7063 return Error(L: Loc, Msg: "unified format is not supported on this GPU");
7064
7065 Format = Id;
7066 return ParseStatus::Success;
7067}
7068
7069ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7070 using namespace llvm::AMDGPU::MTBUFFormat;
7071 SMLoc Loc = getLoc();
7072
7073 if (!parseExpr(Imm&: Format))
7074 return ParseStatus::Failure;
7075 if (!isValidFormatEncoding(Val: Format, STI: getSTI()))
7076 return Error(L: Loc, Msg: "out of range format");
7077
7078 return ParseStatus::Success;
7079}
7080
7081ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7082 using namespace llvm::AMDGPU::MTBUFFormat;
7083
7084 if (!trySkipId(Id: "format", Kind: AsmToken::Colon))
7085 return ParseStatus::NoMatch;
7086
7087 if (trySkipToken(Kind: AsmToken::LBrac)) {
7088 StringRef FormatStr;
7089 SMLoc Loc = getLoc();
7090 if (!parseId(Val&: FormatStr, ErrMsg: "expected a format string"))
7091 return ParseStatus::Failure;
7092
7093 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7094 if (Res.isNoMatch())
7095 Res = parseSymbolicSplitFormat(FormatStr, FormatLoc: Loc, Format);
7096 if (!Res.isSuccess())
7097 return Res;
7098
7099 if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
7100 return ParseStatus::Failure;
7101
7102 return ParseStatus::Success;
7103 }
7104
7105 return parseNumericFormat(Format);
7106}
7107
7108ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7109 using namespace llvm::AMDGPU::MTBUFFormat;
7110
7111 int64_t Format = getDefaultFormatEncoding(STI: getSTI());
7112 ParseStatus Res;
7113 SMLoc Loc = getLoc();
7114
7115 // Parse legacy format syntax.
7116 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7117 if (Res.isFailure())
7118 return Res;
7119
7120 bool FormatFound = Res.isSuccess();
7121
7122 Operands.push_back(
7123 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Format, Loc, Type: AMDGPUOperand::ImmTyFORMAT));
7124
7125 if (FormatFound)
7126 trySkipToken(Kind: AsmToken::Comma);
7127
7128 if (isToken(Kind: AsmToken::EndOfStatement)) {
7129 // We are expecting an soffset operand,
7130 // but let matcher handle the error.
7131 return ParseStatus::Success;
7132 }
7133
7134 // Parse soffset.
7135 Res = parseRegOrImm(Operands);
7136 if (!Res.isSuccess())
7137 return Res;
7138
7139 trySkipToken(Kind: AsmToken::Comma);
7140
7141 if (!FormatFound) {
7142 Res = parseSymbolicOrNumericFormat(Format);
7143 if (Res.isFailure())
7144 return Res;
7145 if (Res.isSuccess()) {
7146 auto Size = Operands.size();
7147 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7148 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7149 Op.setImm(Format);
7150 }
7151 return ParseStatus::Success;
7152 }
7153
7154 if (isId(Id: "format") && peekToken().is(K: AsmToken::Colon))
7155 return Error(L: getLoc(), Msg: "duplicate format");
7156 return ParseStatus::Success;
7157}
7158
7159ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7160 ParseStatus Res =
7161 parseIntWithPrefix(Prefix: "offset", Operands, ImmTy: AMDGPUOperand::ImmTyOffset);
7162 if (Res.isNoMatch()) {
7163 Res = parseIntWithPrefix(Prefix: "inst_offset", Operands,
7164 ImmTy: AMDGPUOperand::ImmTyInstOffset);
7165 }
7166 return Res;
7167}
7168
7169ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7170 ParseStatus Res =
7171 parseNamedBit(Name: "r128", Operands, ImmTy: AMDGPUOperand::ImmTyR128A16);
7172 if (Res.isNoMatch())
7173 Res = parseNamedBit(Name: "a16", Operands, ImmTy: AMDGPUOperand::ImmTyA16);
7174 return Res;
7175}
7176
7177ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7178 ParseStatus Res =
7179 parseIntWithPrefix(Prefix: "blgp", Operands, ImmTy: AMDGPUOperand::ImmTyBLGP);
7180 if (Res.isNoMatch()) {
7181 Res =
7182 parseOperandArrayWithPrefix(Prefix: "neg", Operands, ImmTy: AMDGPUOperand::ImmTyBLGP);
7183 }
7184 return Res;
7185}
7186
7187//===----------------------------------------------------------------------===//
7188// Exp
7189//===----------------------------------------------------------------------===//
7190
7191void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7192 OptionalImmIndexMap OptionalIdx;
7193
7194 unsigned OperandIdx[4];
7195 unsigned EnMask = 0;
7196 int SrcIdx = 0;
7197
7198 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7199 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7200
7201 // Add the register arguments
7202 if (Op.isReg()) {
7203 assert(SrcIdx < 4);
7204 OperandIdx[SrcIdx] = Inst.size();
7205 Op.addRegOperands(Inst, N: 1);
7206 ++SrcIdx;
7207 continue;
7208 }
7209
7210 if (Op.isOff()) {
7211 assert(SrcIdx < 4);
7212 OperandIdx[SrcIdx] = Inst.size();
7213 Inst.addOperand(Op: MCOperand::createReg(Reg: MCRegister()));
7214 ++SrcIdx;
7215 continue;
7216 }
7217
7218 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7219 Op.addImmOperands(Inst, N: 1);
7220 continue;
7221 }
7222
7223 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7224 continue;
7225
7226 // Handle optional arguments
7227 OptionalIdx[Op.getImmTy()] = i;
7228 }
7229
7230 assert(SrcIdx == 4);
7231
7232 bool Compr = false;
7233 if (OptionalIdx.find(x: AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7234 Compr = true;
7235 Inst.getOperand(i: OperandIdx[1]) = Inst.getOperand(i: OperandIdx[2]);
7236 Inst.getOperand(i: OperandIdx[2]).setReg(MCRegister());
7237 Inst.getOperand(i: OperandIdx[3]).setReg(MCRegister());
7238 }
7239
7240 for (auto i = 0; i < SrcIdx; ++i) {
7241 if (Inst.getOperand(i: OperandIdx[i]).getReg()) {
7242 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7243 }
7244 }
7245
7246 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpVM);
7247 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpCompr);
7248
7249 Inst.addOperand(Op: MCOperand::createImm(Val: EnMask));
7250}
7251
7252//===----------------------------------------------------------------------===//
7253// s_waitcnt
7254//===----------------------------------------------------------------------===//
7255
7256static bool
7257encodeCnt(
7258 const AMDGPU::IsaVersion ISA,
7259 int64_t &IntVal,
7260 int64_t CntVal,
7261 bool Saturate,
7262 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7263 unsigned (*decode)(const IsaVersion &Version, unsigned))
7264{
7265 bool Failed = false;
7266
7267 IntVal = encode(ISA, IntVal, CntVal);
7268 if (CntVal != decode(ISA, IntVal)) {
7269 if (Saturate) {
7270 IntVal = encode(ISA, IntVal, -1);
7271 } else {
7272 Failed = true;
7273 }
7274 }
7275 return Failed;
7276}
7277
7278bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7279
7280 SMLoc CntLoc = getLoc();
7281 StringRef CntName = getTokenStr();
7282
7283 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name") ||
7284 !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7285 return false;
7286
7287 int64_t CntVal;
7288 SMLoc ValLoc = getLoc();
7289 if (!parseExpr(Imm&: CntVal))
7290 return false;
7291
7292 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
7293
7294 bool Failed = true;
7295 bool Sat = CntName.ends_with(Suffix: "_sat");
7296
7297 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7298 Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeVmcnt, decode: decodeVmcnt);
7299 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7300 Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeExpcnt, decode: decodeExpcnt);
7301 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7302 Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeLgkmcnt, decode: decodeLgkmcnt);
7303 } else {
7304 Error(L: CntLoc, Msg: "invalid counter name " + CntName);
7305 return false;
7306 }
7307
7308 if (Failed) {
7309 Error(L: ValLoc, Msg: "too large value for " + CntName);
7310 return false;
7311 }
7312
7313 if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
7314 return false;
7315
7316 if (trySkipToken(Kind: AsmToken::Amp) || trySkipToken(Kind: AsmToken::Comma)) {
7317 if (isToken(Kind: AsmToken::EndOfStatement)) {
7318 Error(L: getLoc(), Msg: "expected a counter name");
7319 return false;
7320 }
7321 }
7322
7323 return true;
7324}
7325
7326ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7327 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
7328 int64_t Waitcnt = getWaitcntBitMask(Version: ISA);
7329 SMLoc S = getLoc();
7330
7331 if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7332 while (!isToken(Kind: AsmToken::EndOfStatement)) {
7333 if (!parseCnt(IntVal&: Waitcnt))
7334 return ParseStatus::Failure;
7335 }
7336 } else {
7337 if (!parseExpr(Imm&: Waitcnt))
7338 return ParseStatus::Failure;
7339 }
7340
7341 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Waitcnt, Loc: S));
7342 return ParseStatus::Success;
7343}
7344
7345bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7346 SMLoc FieldLoc = getLoc();
7347 StringRef FieldName = getTokenStr();
7348 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a field name") ||
7349 !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7350 return false;
7351
7352 SMLoc ValueLoc = getLoc();
7353 StringRef ValueName = getTokenStr();
7354 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a value name") ||
7355 !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a right parenthesis"))
7356 return false;
7357
7358 unsigned Shift;
7359 if (FieldName == "instid0") {
7360 Shift = 0;
7361 } else if (FieldName == "instskip") {
7362 Shift = 4;
7363 } else if (FieldName == "instid1") {
7364 Shift = 7;
7365 } else {
7366 Error(L: FieldLoc, Msg: "invalid field name " + FieldName);
7367 return false;
7368 }
7369
7370 int Value;
7371 if (Shift == 4) {
7372 // Parse values for instskip.
7373 Value = StringSwitch<int>(ValueName)
7374 .Case(S: "SAME", Value: 0)
7375 .Case(S: "NEXT", Value: 1)
7376 .Case(S: "SKIP_1", Value: 2)
7377 .Case(S: "SKIP_2", Value: 3)
7378 .Case(S: "SKIP_3", Value: 4)
7379 .Case(S: "SKIP_4", Value: 5)
7380 .Default(Value: -1);
7381 } else {
7382 // Parse values for instid0 and instid1.
7383 Value = StringSwitch<int>(ValueName)
7384 .Case(S: "NO_DEP", Value: 0)
7385 .Case(S: "VALU_DEP_1", Value: 1)
7386 .Case(S: "VALU_DEP_2", Value: 2)
7387 .Case(S: "VALU_DEP_3", Value: 3)
7388 .Case(S: "VALU_DEP_4", Value: 4)
7389 .Case(S: "TRANS32_DEP_1", Value: 5)
7390 .Case(S: "TRANS32_DEP_2", Value: 6)
7391 .Case(S: "TRANS32_DEP_3", Value: 7)
7392 .Case(S: "FMA_ACCUM_CYCLE_1", Value: 8)
7393 .Case(S: "SALU_CYCLE_1", Value: 9)
7394 .Case(S: "SALU_CYCLE_2", Value: 10)
7395 .Case(S: "SALU_CYCLE_3", Value: 11)
7396 .Default(Value: -1);
7397 }
7398 if (Value < 0) {
7399 Error(L: ValueLoc, Msg: "invalid value name " + ValueName);
7400 return false;
7401 }
7402
7403 Delay |= Value << Shift;
7404 return true;
7405}
7406
7407ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7408 int64_t Delay = 0;
7409 SMLoc S = getLoc();
7410
7411 if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7412 do {
7413 if (!parseDelay(Delay))
7414 return ParseStatus::Failure;
7415 } while (trySkipToken(Kind: AsmToken::Pipe));
7416 } else {
7417 if (!parseExpr(Imm&: Delay))
7418 return ParseStatus::Failure;
7419 }
7420
7421 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Delay, Loc: S));
7422 return ParseStatus::Success;
7423}
7424
7425bool
7426AMDGPUOperand::isSWaitCnt() const {
7427 return isImm();
7428}
7429
7430bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7431
7432//===----------------------------------------------------------------------===//
7433// DepCtr
7434//===----------------------------------------------------------------------===//
7435
7436void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7437 StringRef DepCtrName) {
7438 switch (ErrorId) {
7439 case OPR_ID_UNKNOWN:
7440 Error(L: Loc, Msg: Twine("invalid counter name ", DepCtrName));
7441 return;
7442 case OPR_ID_UNSUPPORTED:
7443 Error(L: Loc, Msg: Twine(DepCtrName, " is not supported on this GPU"));
7444 return;
7445 case OPR_ID_DUPLICATE:
7446 Error(L: Loc, Msg: Twine("duplicate counter name ", DepCtrName));
7447 return;
7448 case OPR_VAL_INVALID:
7449 Error(L: Loc, Msg: Twine("invalid value for ", DepCtrName));
7450 return;
7451 default:
7452 assert(false);
7453 }
7454}
7455
7456bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7457
7458 using namespace llvm::AMDGPU::DepCtr;
7459
7460 SMLoc DepCtrLoc = getLoc();
7461 StringRef DepCtrName = getTokenStr();
7462
7463 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name") ||
7464 !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7465 return false;
7466
7467 int64_t ExprVal;
7468 if (!parseExpr(Imm&: ExprVal))
7469 return false;
7470
7471 unsigned PrevOprMask = UsedOprMask;
7472 int CntVal = encodeDepCtr(Name: DepCtrName, Val: ExprVal, UsedOprMask, STI: getSTI());
7473
7474 if (CntVal < 0) {
7475 depCtrError(Loc: DepCtrLoc, ErrorId: CntVal, DepCtrName);
7476 return false;
7477 }
7478
7479 if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
7480 return false;
7481
7482 if (trySkipToken(Kind: AsmToken::Amp) || trySkipToken(Kind: AsmToken::Comma)) {
7483 if (isToken(Kind: AsmToken::EndOfStatement)) {
7484 Error(L: getLoc(), Msg: "expected a counter name");
7485 return false;
7486 }
7487 }
7488
7489 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7490 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7491 return true;
7492}
7493
7494ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7495 using namespace llvm::AMDGPU::DepCtr;
7496
7497 int64_t DepCtr = getDefaultDepCtrEncoding(STI: getSTI());
7498 SMLoc Loc = getLoc();
7499
7500 if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7501 unsigned UsedOprMask = 0;
7502 while (!isToken(Kind: AsmToken::EndOfStatement)) {
7503 if (!parseDepCtr(DepCtr, UsedOprMask))
7504 return ParseStatus::Failure;
7505 }
7506 } else {
7507 if (!parseExpr(Imm&: DepCtr))
7508 return ParseStatus::Failure;
7509 }
7510
7511 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DepCtr, Loc));
7512 return ParseStatus::Success;
7513}
7514
7515bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7516
7517//===----------------------------------------------------------------------===//
7518// hwreg
7519//===----------------------------------------------------------------------===//
7520
7521ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7522 OperandInfoTy &Offset,
7523 OperandInfoTy &Width) {
7524 using namespace llvm::AMDGPU::Hwreg;
7525
7526 if (!trySkipId(Id: "hwreg", Kind: AsmToken::LParen))
7527 return ParseStatus::NoMatch;
7528
7529 // The register may be specified by name or using a numeric code
7530 HwReg.Loc = getLoc();
7531 if (isToken(Kind: AsmToken::Identifier) &&
7532 (HwReg.Val = getHwregId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) {
7533 HwReg.IsSymbolic = true;
7534 lex(); // skip register name
7535 } else if (!parseExpr(Imm&: HwReg.Val, Expected: "a register name")) {
7536 return ParseStatus::Failure;
7537 }
7538
7539 if (trySkipToken(Kind: AsmToken::RParen))
7540 return ParseStatus::Success;
7541
7542 // parse optional params
7543 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma or a closing parenthesis"))
7544 return ParseStatus::Failure;
7545
7546 Offset.Loc = getLoc();
7547 if (!parseExpr(Imm&: Offset.Val))
7548 return ParseStatus::Failure;
7549
7550 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
7551 return ParseStatus::Failure;
7552
7553 Width.Loc = getLoc();
7554 if (!parseExpr(Imm&: Width.Val) ||
7555 !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
7556 return ParseStatus::Failure;
7557
7558 return ParseStatus::Success;
7559}
7560
7561ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7562 using namespace llvm::AMDGPU::Hwreg;
7563
7564 int64_t ImmVal = 0;
7565 SMLoc Loc = getLoc();
7566
7567 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7568 HwregId::Default);
7569 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7570 HwregOffset::Default);
7571 struct : StructuredOpField {
7572 using StructuredOpField::StructuredOpField;
7573 bool validate(AMDGPUAsmParser &Parser) const override {
7574 if (!isUIntN(N: Width, x: Val - 1))
7575 return Error(Parser, Err: "only values from 1 to 32 are legal");
7576 return true;
7577 }
7578 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7579 ParseStatus Res = parseStructuredOpFields(Fields: {&HwReg, &Offset, &Width});
7580
7581 if (Res.isNoMatch())
7582 Res = parseHwregFunc(HwReg, Offset, Width);
7583
7584 if (Res.isSuccess()) {
7585 if (!validateStructuredOpFields(Fields: {&HwReg, &Offset, &Width}))
7586 return ParseStatus::Failure;
7587 ImmVal = HwregEncoding::encode(Values: HwReg.Val, Values: Offset.Val, Values: Width.Val);
7588 }
7589
7590 if (Res.isNoMatch() &&
7591 parseExpr(Imm&: ImmVal, Expected: "a hwreg macro, structured immediate"))
7592 Res = ParseStatus::Success;
7593
7594 if (!Res.isSuccess())
7595 return ParseStatus::Failure;
7596
7597 if (!isUInt<16>(x: ImmVal))
7598 return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
7599 Operands.push_back(
7600 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTyHwreg));
7601 return ParseStatus::Success;
7602}
7603
7604bool AMDGPUOperand::isHwreg() const {
7605 return isImmTy(ImmT: ImmTyHwreg);
7606}
7607
7608//===----------------------------------------------------------------------===//
7609// sendmsg
7610//===----------------------------------------------------------------------===//
7611
7612bool
7613AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7614 OperandInfoTy &Op,
7615 OperandInfoTy &Stream) {
7616 using namespace llvm::AMDGPU::SendMsg;
7617
7618 Msg.Loc = getLoc();
7619 if (isToken(Kind: AsmToken::Identifier) &&
7620 (Msg.Val = getMsgId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) {
7621 Msg.IsSymbolic = true;
7622 lex(); // skip message name
7623 } else if (!parseExpr(Imm&: Msg.Val, Expected: "a message name")) {
7624 return false;
7625 }
7626
7627 if (trySkipToken(Kind: AsmToken::Comma)) {
7628 Op.IsDefined = true;
7629 Op.Loc = getLoc();
7630 if (isToken(Kind: AsmToken::Identifier) &&
7631 (Op.Val = getMsgOpId(MsgId: Msg.Val, Name: getTokenStr(), STI: getSTI())) !=
7632 OPR_ID_UNKNOWN) {
7633 lex(); // skip operation name
7634 } else if (!parseExpr(Imm&: Op.Val, Expected: "an operation name")) {
7635 return false;
7636 }
7637
7638 if (trySkipToken(Kind: AsmToken::Comma)) {
7639 Stream.IsDefined = true;
7640 Stream.Loc = getLoc();
7641 if (!parseExpr(Imm&: Stream.Val))
7642 return false;
7643 }
7644 }
7645
7646 return skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis");
7647}
7648
7649bool
7650AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7651 const OperandInfoTy &Op,
7652 const OperandInfoTy &Stream) {
7653 using namespace llvm::AMDGPU::SendMsg;
7654
7655 // Validation strictness depends on whether message is specified
7656 // in a symbolic or in a numeric form. In the latter case
7657 // only encoding possibility is checked.
7658 bool Strict = Msg.IsSymbolic;
7659
7660 if (Strict) {
7661 if (Msg.Val == OPR_ID_UNSUPPORTED) {
7662 Error(L: Msg.Loc, Msg: "specified message id is not supported on this GPU");
7663 return false;
7664 }
7665 } else {
7666 if (!isValidMsgId(MsgId: Msg.Val, STI: getSTI())) {
7667 Error(L: Msg.Loc, Msg: "invalid message id");
7668 return false;
7669 }
7670 }
7671 if (Strict && (msgRequiresOp(MsgId: Msg.Val, STI: getSTI()) != Op.IsDefined)) {
7672 if (Op.IsDefined) {
7673 Error(L: Op.Loc, Msg: "message does not support operations");
7674 } else {
7675 Error(L: Msg.Loc, Msg: "missing message operation");
7676 }
7677 return false;
7678 }
7679 if (!isValidMsgOp(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI(), Strict)) {
7680 if (Op.Val == OPR_ID_UNSUPPORTED)
7681 Error(L: Op.Loc, Msg: "specified operation id is not supported on this GPU");
7682 else
7683 Error(L: Op.Loc, Msg: "invalid operation id");
7684 return false;
7685 }
7686 if (Strict && !msgSupportsStream(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI()) &&
7687 Stream.IsDefined) {
7688 Error(L: Stream.Loc, Msg: "message operation does not support streams");
7689 return false;
7690 }
7691 if (!isValidMsgStream(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val, STI: getSTI(), Strict)) {
7692 Error(L: Stream.Loc, Msg: "invalid message stream id");
7693 return false;
7694 }
7695 return true;
7696}
7697
7698ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7699 using namespace llvm::AMDGPU::SendMsg;
7700
7701 int64_t ImmVal = 0;
7702 SMLoc Loc = getLoc();
7703
7704 if (trySkipId(Id: "sendmsg", Kind: AsmToken::LParen)) {
7705 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7706 OperandInfoTy Op(OP_NONE_);
7707 OperandInfoTy Stream(STREAM_ID_NONE_);
7708 if (parseSendMsgBody(Msg, Op, Stream) &&
7709 validateSendMsg(Msg, Op, Stream)) {
7710 ImmVal = encodeMsg(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val);
7711 } else {
7712 return ParseStatus::Failure;
7713 }
7714 } else if (parseExpr(Imm&: ImmVal, Expected: "a sendmsg macro")) {
7715 if (ImmVal < 0 || !isUInt<16>(x: ImmVal))
7716 return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
7717 } else {
7718 return ParseStatus::Failure;
7719 }
7720
7721 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTySendMsg));
7722 return ParseStatus::Success;
7723}
7724
7725bool AMDGPUOperand::isSendMsg() const {
7726 return isImmTy(ImmT: ImmTySendMsg);
7727}
7728
7729//===----------------------------------------------------------------------===//
7730// v_interp
7731//===----------------------------------------------------------------------===//
7732
7733ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7734 StringRef Str;
7735 SMLoc S = getLoc();
7736
7737 if (!parseId(Val&: Str))
7738 return ParseStatus::NoMatch;
7739
7740 int Slot = StringSwitch<int>(Str)
7741 .Case(S: "p10", Value: 0)
7742 .Case(S: "p20", Value: 1)
7743 .Case(S: "p0", Value: 2)
7744 .Default(Value: -1);
7745
7746 if (Slot == -1)
7747 return Error(L: S, Msg: "invalid interpolation slot");
7748
7749 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Slot, Loc: S,
7750 Type: AMDGPUOperand::ImmTyInterpSlot));
7751 return ParseStatus::Success;
7752}
7753
7754ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7755 StringRef Str;
7756 SMLoc S = getLoc();
7757
7758 if (!parseId(Val&: Str))
7759 return ParseStatus::NoMatch;
7760
7761 if (!Str.starts_with(Prefix: "attr"))
7762 return Error(L: S, Msg: "invalid interpolation attribute");
7763
7764 StringRef Chan = Str.take_back(N: 2);
7765 int AttrChan = StringSwitch<int>(Chan)
7766 .Case(S: ".x", Value: 0)
7767 .Case(S: ".y", Value: 1)
7768 .Case(S: ".z", Value: 2)
7769 .Case(S: ".w", Value: 3)
7770 .Default(Value: -1);
7771 if (AttrChan == -1)
7772 return Error(L: S, Msg: "invalid or missing interpolation attribute channel");
7773
7774 Str = Str.drop_back(N: 2).drop_front(N: 4);
7775
7776 uint8_t Attr;
7777 if (Str.getAsInteger(Radix: 10, Result&: Attr))
7778 return Error(L: S, Msg: "invalid or missing interpolation attribute number");
7779
7780 if (Attr > 32)
7781 return Error(L: S, Msg: "out of bounds interpolation attribute number");
7782
7783 SMLoc SChan = SMLoc::getFromPointer(Ptr: Chan.data());
7784
7785 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Attr, Loc: S,
7786 Type: AMDGPUOperand::ImmTyInterpAttr));
7787 Operands.push_back(Elt: AMDGPUOperand::CreateImm(
7788 AsmParser: this, Val: AttrChan, Loc: SChan, Type: AMDGPUOperand::ImmTyInterpAttrChan));
7789 return ParseStatus::Success;
7790}
7791
7792//===----------------------------------------------------------------------===//
7793// exp
7794//===----------------------------------------------------------------------===//
7795
7796ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7797 using namespace llvm::AMDGPU::Exp;
7798
7799 StringRef Str;
7800 SMLoc S = getLoc();
7801
7802 if (!parseId(Val&: Str))
7803 return ParseStatus::NoMatch;
7804
7805 unsigned Id = getTgtId(Name: Str);
7806 if (Id == ET_INVALID || !isSupportedTgtId(Id, STI: getSTI()))
7807 return Error(L: S, Msg: (Id == ET_INVALID)
7808 ? "invalid exp target"
7809 : "exp target is not supported on this GPU");
7810
7811 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Id, Loc: S,
7812 Type: AMDGPUOperand::ImmTyExpTgt));
7813 return ParseStatus::Success;
7814}
7815
7816//===----------------------------------------------------------------------===//
7817// parser helpers
7818//===----------------------------------------------------------------------===//
7819
7820bool
7821AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7822 return Token.is(K: AsmToken::Identifier) && Token.getString() == Id;
7823}
7824
7825bool
7826AMDGPUAsmParser::isId(const StringRef Id) const {
7827 return isId(Token: getToken(), Id);
7828}
7829
7830bool
7831AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7832 return getTokenKind() == Kind;
7833}
7834
7835StringRef AMDGPUAsmParser::getId() const {
7836 return isToken(Kind: AsmToken::Identifier) ? getTokenStr() : StringRef();
7837}
7838
7839bool
7840AMDGPUAsmParser::trySkipId(const StringRef Id) {
7841 if (isId(Id)) {
7842 lex();
7843 return true;
7844 }
7845 return false;
7846}
7847
7848bool
7849AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7850 if (isToken(Kind: AsmToken::Identifier)) {
7851 StringRef Tok = getTokenStr();
7852 if (Tok.starts_with(Prefix: Pref) && Tok.drop_front(N: Pref.size()) == Id) {
7853 lex();
7854 return true;
7855 }
7856 }
7857 return false;
7858}
7859
7860bool
7861AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7862 if (isId(Id) && peekToken().is(K: Kind)) {
7863 lex();
7864 lex();
7865 return true;
7866 }
7867 return false;
7868}
7869
7870bool
7871AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7872 if (isToken(Kind)) {
7873 lex();
7874 return true;
7875 }
7876 return false;
7877}
7878
7879bool
7880AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7881 const StringRef ErrMsg) {
7882 if (!trySkipToken(Kind)) {
7883 Error(L: getLoc(), Msg: ErrMsg);
7884 return false;
7885 }
7886 return true;
7887}
7888
7889bool
7890AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7891 SMLoc S = getLoc();
7892
7893 const MCExpr *Expr;
7894 if (Parser.parseExpression(Res&: Expr))
7895 return false;
7896
7897 if (Expr->evaluateAsAbsolute(Res&: Imm))
7898 return true;
7899
7900 if (Expected.empty()) {
7901 Error(L: S, Msg: "expected absolute expression");
7902 } else {
7903 Error(L: S, Msg: Twine("expected ", Expected) +
7904 Twine(" or an absolute expression"));
7905 }
7906 return false;
7907}
7908
7909bool
7910AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7911 SMLoc S = getLoc();
7912
7913 const MCExpr *Expr;
7914 if (Parser.parseExpression(Res&: Expr))
7915 return false;
7916
7917 int64_t IntVal;
7918 if (Expr->evaluateAsAbsolute(Res&: IntVal)) {
7919 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S));
7920 } else {
7921 Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S));
7922 }
7923 return true;
7924}
7925
7926bool
7927AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7928 if (isToken(Kind: AsmToken::String)) {
7929 Val = getToken().getStringContents();
7930 lex();
7931 return true;
7932 }
7933 Error(L: getLoc(), Msg: ErrMsg);
7934 return false;
7935}
7936
7937bool
7938AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7939 if (isToken(Kind: AsmToken::Identifier)) {
7940 Val = getTokenStr();
7941 lex();
7942 return true;
7943 }
7944 if (!ErrMsg.empty())
7945 Error(L: getLoc(), Msg: ErrMsg);
7946 return false;
7947}
7948
7949AsmToken
7950AMDGPUAsmParser::getToken() const {
7951 return Parser.getTok();
7952}
7953
7954AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7955 return isToken(Kind: AsmToken::EndOfStatement)
7956 ? getToken()
7957 : getLexer().peekTok(ShouldSkipSpace);
7958}
7959
7960void
7961AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7962 auto TokCount = getLexer().peekTokens(Buf: Tokens);
7963
7964 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7965 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7966}
7967
7968AsmToken::TokenKind
7969AMDGPUAsmParser::getTokenKind() const {
7970 return getLexer().getKind();
7971}
7972
7973SMLoc
7974AMDGPUAsmParser::getLoc() const {
7975 return getToken().getLoc();
7976}
7977
7978StringRef
7979AMDGPUAsmParser::getTokenStr() const {
7980 return getToken().getString();
7981}
7982
7983void
7984AMDGPUAsmParser::lex() {
7985 Parser.Lex();
7986}
7987
7988SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7989 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7990}
7991
7992SMLoc
7993AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7994 const OperandVector &Operands) const {
7995 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7996 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7997 if (Test(Op))
7998 return Op.getStartLoc();
7999 }
8000 return getInstLoc(Operands);
8001}
8002
8003SMLoc
8004AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8005 const OperandVector &Operands) const {
8006 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(ImmT: Type); };
8007 return getOperandLoc(Test, Operands);
8008}
8009
8010SMLoc AMDGPUAsmParser::getRegLoc(MCRegister Reg,
8011 const OperandVector &Operands) const {
8012 auto Test = [=](const AMDGPUOperand& Op) {
8013 return Op.isRegKind() && Op.getReg() == Reg;
8014 };
8015 return getOperandLoc(Test, Operands);
8016}
8017
8018SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
8019 bool SearchMandatoryLiterals) const {
8020 auto Test = [](const AMDGPUOperand& Op) {
8021 return Op.IsImmKindLiteral() || Op.isExpr();
8022 };
8023 SMLoc Loc = getOperandLoc(Test, Operands);
8024 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
8025 Loc = getMandatoryLitLoc(Operands);
8026 return Loc;
8027}
8028
8029SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
8030 auto Test = [](const AMDGPUOperand &Op) {
8031 return Op.IsImmKindMandatoryLiteral();
8032 };
8033 return getOperandLoc(Test, Operands);
8034}
8035
8036SMLoc
8037AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
8038 auto Test = [](const AMDGPUOperand& Op) {
8039 return Op.isImmKindConst();
8040 };
8041 return getOperandLoc(Test, Operands);
8042}
8043
8044ParseStatus
8045AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8046 if (!trySkipToken(Kind: AsmToken::LCurly))
8047 return ParseStatus::NoMatch;
8048
8049 bool First = true;
8050 while (!trySkipToken(Kind: AsmToken::RCurly)) {
8051 if (!First &&
8052 !skipToken(Kind: AsmToken::Comma, ErrMsg: "comma or closing brace expected"))
8053 return ParseStatus::Failure;
8054
8055 StringRef Id = getTokenStr();
8056 SMLoc IdLoc = getLoc();
8057 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "field name expected") ||
8058 !skipToken(Kind: AsmToken::Colon, ErrMsg: "colon expected"))
8059 return ParseStatus::Failure;
8060
8061 const auto *I =
8062 find_if(Range&: Fields, P: [Id](StructuredOpField *F) { return F->Id == Id; });
8063 if (I == Fields.end())
8064 return Error(L: IdLoc, Msg: "unknown field");
8065 if ((*I)->IsDefined)
8066 return Error(L: IdLoc, Msg: "duplicate field");
8067
8068 // TODO: Support symbolic values.
8069 (*I)->Loc = getLoc();
8070 if (!parseExpr(Imm&: (*I)->Val))
8071 return ParseStatus::Failure;
8072 (*I)->IsDefined = true;
8073
8074 First = false;
8075 }
8076 return ParseStatus::Success;
8077}
8078
8079bool AMDGPUAsmParser::validateStructuredOpFields(
8080 ArrayRef<const StructuredOpField *> Fields) {
8081 return all_of(Range&: Fields, P: [this](const StructuredOpField *F) {
8082 return F->validate(Parser&: *this);
8083 });
8084}
8085
8086//===----------------------------------------------------------------------===//
8087// swizzle
8088//===----------------------------------------------------------------------===//
8089
8090LLVM_READNONE
8091static unsigned
8092encodeBitmaskPerm(const unsigned AndMask,
8093 const unsigned OrMask,
8094 const unsigned XorMask) {
8095 using namespace llvm::AMDGPU::Swizzle;
8096
8097 return BITMASK_PERM_ENC |
8098 (AndMask << BITMASK_AND_SHIFT) |
8099 (OrMask << BITMASK_OR_SHIFT) |
8100 (XorMask << BITMASK_XOR_SHIFT);
8101}
8102
8103bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8104 const unsigned MaxVal,
8105 const Twine &ErrMsg, SMLoc &Loc) {
8106 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma")) {
8107 return false;
8108 }
8109 Loc = getLoc();
8110 if (!parseExpr(Imm&: Op)) {
8111 return false;
8112 }
8113 if (Op < MinVal || Op > MaxVal) {
8114 Error(L: Loc, Msg: ErrMsg);
8115 return false;
8116 }
8117
8118 return true;
8119}
8120
8121bool
8122AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8123 const unsigned MinVal,
8124 const unsigned MaxVal,
8125 const StringRef ErrMsg) {
8126 SMLoc Loc;
8127 for (unsigned i = 0; i < OpNum; ++i) {
8128 if (!parseSwizzleOperand(Op&: Op[i], MinVal, MaxVal, ErrMsg, Loc))
8129 return false;
8130 }
8131
8132 return true;
8133}
8134
8135bool
8136AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8137 using namespace llvm::AMDGPU::Swizzle;
8138
8139 int64_t Lane[LANE_NUM];
8140 if (parseSwizzleOperands(OpNum: LANE_NUM, Op: Lane, MinVal: 0, MaxVal: LANE_MAX,
8141 ErrMsg: "expected a 2-bit lane id")) {
8142 Imm = QUAD_PERM_ENC;
8143 for (unsigned I = 0; I < LANE_NUM; ++I) {
8144 Imm |= Lane[I] << (LANE_SHIFT * I);
8145 }
8146 return true;
8147 }
8148 return false;
8149}
8150
8151bool
8152AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8153 using namespace llvm::AMDGPU::Swizzle;
8154
8155 SMLoc Loc;
8156 int64_t GroupSize;
8157 int64_t LaneIdx;
8158
8159 if (!parseSwizzleOperand(Op&: GroupSize,
8160 MinVal: 2, MaxVal: 32,
8161 ErrMsg: "group size must be in the interval [2,32]",
8162 Loc)) {
8163 return false;
8164 }
8165 if (!isPowerOf2_64(Value: GroupSize)) {
8166 Error(L: Loc, Msg: "group size must be a power of two");
8167 return false;
8168 }
8169 if (parseSwizzleOperand(Op&: LaneIdx,
8170 MinVal: 0, MaxVal: GroupSize - 1,
8171 ErrMsg: "lane id must be in the interval [0,group size - 1]",
8172 Loc)) {
8173 Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX - GroupSize + 1, OrMask: LaneIdx, XorMask: 0);
8174 return true;
8175 }
8176 return false;
8177}
8178
8179bool
8180AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8181 using namespace llvm::AMDGPU::Swizzle;
8182
8183 SMLoc Loc;
8184 int64_t GroupSize;
8185
8186 if (!parseSwizzleOperand(Op&: GroupSize,
8187 MinVal: 2, MaxVal: 32,
8188 ErrMsg: "group size must be in the interval [2,32]",
8189 Loc)) {
8190 return false;
8191 }
8192 if (!isPowerOf2_64(Value: GroupSize)) {
8193 Error(L: Loc, Msg: "group size must be a power of two");
8194 return false;
8195 }
8196
8197 Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: 0, XorMask: GroupSize - 1);
8198 return true;
8199}
8200
8201bool
8202AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8203 using namespace llvm::AMDGPU::Swizzle;
8204
8205 SMLoc Loc;
8206 int64_t GroupSize;
8207
8208 if (!parseSwizzleOperand(Op&: GroupSize,
8209 MinVal: 1, MaxVal: 16,
8210 ErrMsg: "group size must be in the interval [1,16]",
8211 Loc)) {
8212 return false;
8213 }
8214 if (!isPowerOf2_64(Value: GroupSize)) {
8215 Error(L: Loc, Msg: "group size must be a power of two");
8216 return false;
8217 }
8218
8219 Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: 0, XorMask: GroupSize);
8220 return true;
8221}
8222
8223bool
8224AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8225 using namespace llvm::AMDGPU::Swizzle;
8226
8227 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma")) {
8228 return false;
8229 }
8230
8231 StringRef Ctl;
8232 SMLoc StrLoc = getLoc();
8233 if (!parseString(Val&: Ctl)) {
8234 return false;
8235 }
8236 if (Ctl.size() != BITMASK_WIDTH) {
8237 Error(L: StrLoc, Msg: "expected a 5-character mask");
8238 return false;
8239 }
8240
8241 unsigned AndMask = 0;
8242 unsigned OrMask = 0;
8243 unsigned XorMask = 0;
8244
8245 for (size_t i = 0; i < Ctl.size(); ++i) {
8246 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8247 switch(Ctl[i]) {
8248 default:
8249 Error(L: StrLoc, Msg: "invalid mask");
8250 return false;
8251 case '0':
8252 break;
8253 case '1':
8254 OrMask |= Mask;
8255 break;
8256 case 'p':
8257 AndMask |= Mask;
8258 break;
8259 case 'i':
8260 AndMask |= Mask;
8261 XorMask |= Mask;
8262 break;
8263 }
8264 }
8265
8266 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8267 return true;
8268}
8269
8270bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8271 using namespace llvm::AMDGPU::Swizzle;
8272
8273 if (!AMDGPU::isGFX9Plus(STI: getSTI())) {
8274 Error(L: getLoc(), Msg: "FFT mode swizzle not supported on this GPU");
8275 return false;
8276 }
8277
8278 int64_t Swizzle;
8279 SMLoc Loc;
8280 if (!parseSwizzleOperand(Op&: Swizzle, MinVal: 0, MaxVal: FFT_SWIZZLE_MAX,
8281 ErrMsg: "FFT swizzle must be in the interval [0," +
8282 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8283 Loc))
8284 return false;
8285
8286 Imm = FFT_MODE_ENC | Swizzle;
8287 return true;
8288}
8289
8290bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8291 using namespace llvm::AMDGPU::Swizzle;
8292
8293 if (!AMDGPU::isGFX9Plus(STI: getSTI())) {
8294 Error(L: getLoc(), Msg: "Rotate mode swizzle not supported on this GPU");
8295 return false;
8296 }
8297
8298 SMLoc Loc;
8299 int64_t Direction;
8300
8301 if (!parseSwizzleOperand(Op&: Direction, MinVal: 0, MaxVal: 1,
8302 ErrMsg: "direction must be 0 (left) or 1 (right)", Loc))
8303 return false;
8304
8305 int64_t RotateSize;
8306 if (!parseSwizzleOperand(
8307 Op&: RotateSize, MinVal: 0, MaxVal: ROTATE_MAX_SIZE,
8308 ErrMsg: "number of threads to rotate must be in the interval [0," +
8309 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8310 Loc))
8311 return false;
8312
8313 Imm = ROTATE_MODE_ENC | (Direction << ROTATE_DIR_SHIFT) |
8314 (RotateSize << ROTATE_SIZE_SHIFT);
8315 return true;
8316}
8317
8318bool
8319AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8320
8321 SMLoc OffsetLoc = getLoc();
8322
8323 if (!parseExpr(Imm, Expected: "a swizzle macro")) {
8324 return false;
8325 }
8326 if (!isUInt<16>(x: Imm)) {
8327 Error(L: OffsetLoc, Msg: "expected a 16-bit offset");
8328 return false;
8329 }
8330 return true;
8331}
8332
8333bool
8334AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8335 using namespace llvm::AMDGPU::Swizzle;
8336
8337 if (skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parentheses")) {
8338
8339 SMLoc ModeLoc = getLoc();
8340 bool Ok = false;
8341
8342 if (trySkipId(Id: IdSymbolic[ID_QUAD_PERM])) {
8343 Ok = parseSwizzleQuadPerm(Imm);
8344 } else if (trySkipId(Id: IdSymbolic[ID_BITMASK_PERM])) {
8345 Ok = parseSwizzleBitmaskPerm(Imm);
8346 } else if (trySkipId(Id: IdSymbolic[ID_BROADCAST])) {
8347 Ok = parseSwizzleBroadcast(Imm);
8348 } else if (trySkipId(Id: IdSymbolic[ID_SWAP])) {
8349 Ok = parseSwizzleSwap(Imm);
8350 } else if (trySkipId(Id: IdSymbolic[ID_REVERSE])) {
8351 Ok = parseSwizzleReverse(Imm);
8352 } else if (trySkipId(Id: IdSymbolic[ID_FFT])) {
8353 Ok = parseSwizzleFFT(Imm);
8354 } else if (trySkipId(Id: IdSymbolic[ID_ROTATE])) {
8355 Ok = parseSwizzleRotate(Imm);
8356 } else {
8357 Error(L: ModeLoc, Msg: "expected a swizzle mode");
8358 }
8359
8360 return Ok && skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parentheses");
8361 }
8362
8363 return false;
8364}
8365
8366ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8367 SMLoc S = getLoc();
8368 int64_t Imm = 0;
8369
8370 if (trySkipId(Id: "offset")) {
8371
8372 bool Ok = false;
8373 if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon")) {
8374 if (trySkipId(Id: "swizzle")) {
8375 Ok = parseSwizzleMacro(Imm);
8376 } else {
8377 Ok = parseSwizzleOffset(Imm);
8378 }
8379 }
8380
8381 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTySwizzle));
8382
8383 return Ok ? ParseStatus::Success : ParseStatus::Failure;
8384 }
8385 return ParseStatus::NoMatch;
8386}
8387
8388bool
8389AMDGPUOperand::isSwizzle() const {
8390 return isImmTy(ImmT: ImmTySwizzle);
8391}
8392
8393//===----------------------------------------------------------------------===//
8394// VGPR Index Mode
8395//===----------------------------------------------------------------------===//
8396
8397int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8398
8399 using namespace llvm::AMDGPU::VGPRIndexMode;
8400
8401 if (trySkipToken(Kind: AsmToken::RParen)) {
8402 return OFF;
8403 }
8404
8405 int64_t Imm = 0;
8406
8407 while (true) {
8408 unsigned Mode = 0;
8409 SMLoc S = getLoc();
8410
8411 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8412 if (trySkipId(Id: IdSymbolic[ModeId])) {
8413 Mode = 1 << ModeId;
8414 break;
8415 }
8416 }
8417
8418 if (Mode == 0) {
8419 Error(L: S, Msg: (Imm == 0)?
8420 "expected a VGPR index mode or a closing parenthesis" :
8421 "expected a VGPR index mode");
8422 return UNDEF;
8423 }
8424
8425 if (Imm & Mode) {
8426 Error(L: S, Msg: "duplicate VGPR index mode");
8427 return UNDEF;
8428 }
8429 Imm |= Mode;
8430
8431 if (trySkipToken(Kind: AsmToken::RParen))
8432 break;
8433 if (!skipToken(Kind: AsmToken::Comma,
8434 ErrMsg: "expected a comma or a closing parenthesis"))
8435 return UNDEF;
8436 }
8437
8438 return Imm;
8439}
8440
8441ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8442
8443 using namespace llvm::AMDGPU::VGPRIndexMode;
8444
8445 int64_t Imm = 0;
8446 SMLoc S = getLoc();
8447
8448 if (trySkipId(Id: "gpr_idx", Kind: AsmToken::LParen)) {
8449 Imm = parseGPRIdxMacro();
8450 if (Imm == UNDEF)
8451 return ParseStatus::Failure;
8452 } else {
8453 if (getParser().parseAbsoluteExpression(Res&: Imm))
8454 return ParseStatus::Failure;
8455 if (Imm < 0 || !isUInt<4>(x: Imm))
8456 return Error(L: S, Msg: "invalid immediate: only 4-bit values are legal");
8457 }
8458
8459 Operands.push_back(
8460 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyGprIdxMode));
8461 return ParseStatus::Success;
8462}
8463
8464bool AMDGPUOperand::isGPRIdxMode() const {
8465 return isImmTy(ImmT: ImmTyGprIdxMode);
8466}
8467
8468//===----------------------------------------------------------------------===//
8469// sopp branch targets
8470//===----------------------------------------------------------------------===//
8471
8472ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8473
8474 // Make sure we are not parsing something
8475 // that looks like a label or an expression but is not.
8476 // This will improve error messages.
8477 if (isRegister() || isModifier())
8478 return ParseStatus::NoMatch;
8479
8480 if (!parseExpr(Operands))
8481 return ParseStatus::Failure;
8482
8483 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
8484 assert(Opr.isImm() || Opr.isExpr());
8485 SMLoc Loc = Opr.getStartLoc();
8486
8487 // Currently we do not support arbitrary expressions as branch targets.
8488 // Only labels and absolute expressions are accepted.
8489 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8490 Error(L: Loc, Msg: "expected an absolute expression or a label");
8491 } else if (Opr.isImm() && !Opr.isS16Imm()) {
8492 Error(L: Loc, Msg: "expected a 16-bit signed jump offset");
8493 }
8494
8495 return ParseStatus::Success;
8496}
8497
8498//===----------------------------------------------------------------------===//
8499// Boolean holding registers
8500//===----------------------------------------------------------------------===//
8501
8502ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8503 return parseReg(Operands);
8504}
8505
8506//===----------------------------------------------------------------------===//
8507// mubuf
8508//===----------------------------------------------------------------------===//
8509
8510void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8511 const OperandVector &Operands,
8512 bool IsAtomic) {
8513 OptionalImmIndexMap OptionalIdx;
8514 unsigned FirstOperandIdx = 1;
8515 bool IsAtomicReturn = false;
8516
8517 if (IsAtomic) {
8518 IsAtomicReturn = MII.get(Opcode: Inst.getOpcode()).TSFlags &
8519 SIInstrFlags::IsAtomicRet;
8520 }
8521
8522 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8523 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8524
8525 // Add the register arguments
8526 if (Op.isReg()) {
8527 Op.addRegOperands(Inst, N: 1);
8528 // Insert a tied src for atomic return dst.
8529 // This cannot be postponed as subsequent calls to
8530 // addImmOperands rely on correct number of MC operands.
8531 if (IsAtomicReturn && i == FirstOperandIdx)
8532 Op.addRegOperands(Inst, N: 1);
8533 continue;
8534 }
8535
8536 // Handle the case where soffset is an immediate
8537 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8538 Op.addImmOperands(Inst, N: 1);
8539 continue;
8540 }
8541
8542 // Handle tokens like 'offen' which are sometimes hard-coded into the
8543 // asm string. There are no MCInst operands for these.
8544 if (Op.isToken()) {
8545 continue;
8546 }
8547 assert(Op.isImm());
8548
8549 // Handle optional arguments
8550 OptionalIdx[Op.getImmTy()] = i;
8551 }
8552
8553 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOffset);
8554 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyCPol, Default: 0);
8555}
8556
8557//===----------------------------------------------------------------------===//
8558// smrd
8559//===----------------------------------------------------------------------===//
8560
8561bool AMDGPUOperand::isSMRDOffset8() const {
8562 return isImmLiteral() && isUInt<8>(x: getImm());
8563}
8564
8565bool AMDGPUOperand::isSMEMOffset() const {
8566 // Offset range is checked later by validator.
8567 return isImmLiteral();
8568}
8569
8570bool AMDGPUOperand::isSMRDLiteralOffset() const {
8571 // 32-bit literals are only supported on CI and we only want to use them
8572 // when the offset is > 8-bits.
8573 return isImmLiteral() && !isUInt<8>(x: getImm()) && isUInt<32>(x: getImm());
8574}
8575
8576//===----------------------------------------------------------------------===//
8577// vop3
8578//===----------------------------------------------------------------------===//
8579
8580static bool ConvertOmodMul(int64_t &Mul) {
8581 if (Mul != 1 && Mul != 2 && Mul != 4)
8582 return false;
8583
8584 Mul >>= 1;
8585 return true;
8586}
8587
8588static bool ConvertOmodDiv(int64_t &Div) {
8589 if (Div == 1) {
8590 Div = 0;
8591 return true;
8592 }
8593
8594 if (Div == 2) {
8595 Div = 3;
8596 return true;
8597 }
8598
8599 return false;
8600}
8601
8602// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8603// This is intentional and ensures compatibility with sp3.
8604// See bug 35397 for details.
8605bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8606 if (BoundCtrl == 0 || BoundCtrl == 1) {
8607 if (!isGFX11Plus())
8608 BoundCtrl = 1;
8609 return true;
8610 }
8611 return false;
8612}
8613
8614void AMDGPUAsmParser::onBeginOfFile() {
8615 if (!getParser().getStreamer().getTargetStreamer() ||
8616 getSTI().getTargetTriple().getArch() == Triple::r600)
8617 return;
8618
8619 if (!getTargetStreamer().getTargetID())
8620 getTargetStreamer().initializeTargetID(STI: getSTI(),
8621 FeatureString: getSTI().getFeatureString());
8622
8623 if (isHsaAbi(STI: getSTI()))
8624 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8625}
8626
8627/// Parse AMDGPU specific expressions.
8628///
8629/// expr ::= or(expr, ...) |
8630/// max(expr, ...)
8631///
8632bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8633 using AGVK = AMDGPUMCExpr::VariantKind;
8634
8635 if (isToken(Kind: AsmToken::Identifier)) {
8636 StringRef TokenId = getTokenStr();
8637 AGVK VK = StringSwitch<AGVK>(TokenId)
8638 .Case(S: "max", Value: AGVK::AGVK_Max)
8639 .Case(S: "or", Value: AGVK::AGVK_Or)
8640 .Case(S: "extrasgprs", Value: AGVK::AGVK_ExtraSGPRs)
8641 .Case(S: "totalnumvgprs", Value: AGVK::AGVK_TotalNumVGPRs)
8642 .Case(S: "alignto", Value: AGVK::AGVK_AlignTo)
8643 .Case(S: "occupancy", Value: AGVK::AGVK_Occupancy)
8644 .Default(Value: AGVK::AGVK_None);
8645
8646 if (VK != AGVK::AGVK_None && peekToken().is(K: AsmToken::LParen)) {
8647 SmallVector<const MCExpr *, 4> Exprs;
8648 uint64_t CommaCount = 0;
8649 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8650 lex(); // Eat '('
8651 while (true) {
8652 if (trySkipToken(Kind: AsmToken::RParen)) {
8653 if (Exprs.empty()) {
8654 Error(L: getToken().getLoc(),
8655 Msg: "empty " + Twine(TokenId) + " expression");
8656 return true;
8657 }
8658 if (CommaCount + 1 != Exprs.size()) {
8659 Error(L: getToken().getLoc(),
8660 Msg: "mismatch of commas in " + Twine(TokenId) + " expression");
8661 return true;
8662 }
8663 Res = AMDGPUMCExpr::create(Kind: VK, Args: Exprs, Ctx&: getContext());
8664 return false;
8665 }
8666 const MCExpr *Expr;
8667 if (getParser().parseExpression(Res&: Expr, EndLoc))
8668 return true;
8669 Exprs.push_back(Elt: Expr);
8670 bool LastTokenWasComma = trySkipToken(Kind: AsmToken::Comma);
8671 if (LastTokenWasComma)
8672 CommaCount++;
8673 if (!LastTokenWasComma && !isToken(Kind: AsmToken::RParen)) {
8674 Error(L: getToken().getLoc(),
8675 Msg: "unexpected token in " + Twine(TokenId) + " expression");
8676 return true;
8677 }
8678 }
8679 }
8680 }
8681 return getParser().parsePrimaryExpr(Res, EndLoc, TypeInfo: nullptr);
8682}
8683
8684ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8685 StringRef Name = getTokenStr();
8686 if (Name == "mul") {
8687 return parseIntWithPrefix(Prefix: "mul", Operands,
8688 ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodMul);
8689 }
8690
8691 if (Name == "div") {
8692 return parseIntWithPrefix(Prefix: "div", Operands,
8693 ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodDiv);
8694 }
8695
8696 return ParseStatus::NoMatch;
8697}
8698
8699// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8700// the number of src operands present, then copies that bit into src0_modifiers.
8701static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8702 int Opc = Inst.getOpcode();
8703 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
8704 if (OpSelIdx == -1)
8705 return;
8706
8707 int SrcNum;
8708 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
8709 AMDGPU::OpName::src2};
8710 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: Ops[SrcNum]);
8711 ++SrcNum)
8712 ;
8713 assert(SrcNum > 0);
8714
8715 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
8716
8717 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst);
8718 if (DstIdx == -1)
8719 return;
8720
8721 const MCOperand &DstOp = Inst.getOperand(i: DstIdx);
8722 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0_modifiers);
8723 uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm();
8724 if (DstOp.isReg() &&
8725 MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: DstOp.getReg())) {
8726 if (AMDGPU::isHi16Reg(Reg: DstOp.getReg(), MRI))
8727 ModVal |= SISrcMods::DST_OP_SEL;
8728 } else {
8729 if ((OpSel & (1 << SrcNum)) != 0)
8730 ModVal |= SISrcMods::DST_OP_SEL;
8731 }
8732 Inst.getOperand(i: ModIdx).setImm(ModVal);
8733}
8734
8735void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8736 const OperandVector &Operands) {
8737 cvtVOP3P(Inst, Operands);
8738 cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI());
8739}
8740
8741void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8742 OptionalImmIndexMap &OptionalIdx) {
8743 cvtVOP3P(Inst, Operands, OptionalIdx);
8744 cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI());
8745}
8746
8747static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8748 return
8749 // 1. This operand is input modifiers
8750 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8751 // 2. This is not last operand
8752 && Desc.NumOperands > (OpNum + 1)
8753 // 3. Next operand is register class
8754 && Desc.operands()[OpNum + 1].RegClass != -1
8755 // 4. Next register is not tied to any other operand
8756 && Desc.getOperandConstraint(OpNum: OpNum + 1,
8757 Constraint: MCOI::OperandConstraint::TIED_TO) == -1;
8758}
8759
8760void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8761{
8762 OptionalImmIndexMap OptionalIdx;
8763 unsigned Opc = Inst.getOpcode();
8764
8765 unsigned I = 1;
8766 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
8767 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8768 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
8769 }
8770
8771 for (unsigned E = Operands.size(); I != E; ++I) {
8772 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8773 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
8774 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
8775 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8776 Op.isInterpAttrChan()) {
8777 Inst.addOperand(Op: MCOperand::createImm(Val: Op.getImm()));
8778 } else if (Op.isImmModifier()) {
8779 OptionalIdx[Op.getImmTy()] = I;
8780 } else {
8781 llvm_unreachable("unhandled operand type");
8782 }
8783 }
8784
8785 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::high))
8786 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8787 ImmT: AMDGPUOperand::ImmTyHigh);
8788
8789 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
8790 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8791 ImmT: AMDGPUOperand::ImmTyClamp);
8792
8793 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
8794 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8795 ImmT: AMDGPUOperand::ImmTyOModSI);
8796}
8797
8798void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8799{
8800 OptionalImmIndexMap OptionalIdx;
8801 unsigned Opc = Inst.getOpcode();
8802
8803 unsigned I = 1;
8804 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
8805 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8806 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
8807 }
8808
8809 for (unsigned E = Operands.size(); I != E; ++I) {
8810 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8811 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
8812 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
8813 } else if (Op.isImmModifier()) {
8814 OptionalIdx[Op.getImmTy()] = I;
8815 } else {
8816 llvm_unreachable("unhandled operand type");
8817 }
8818 }
8819
8820 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyClamp);
8821
8822 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
8823 if (OpSelIdx != -1)
8824 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
8825
8826 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyWaitEXP);
8827
8828 if (OpSelIdx == -1)
8829 return;
8830
8831 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
8832 AMDGPU::OpName::src2};
8833 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
8834 AMDGPU::OpName::src1_modifiers,
8835 AMDGPU::OpName::src2_modifiers};
8836
8837 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
8838
8839 for (int J = 0; J < 3; ++J) {
8840 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: Ops[J]);
8841 if (OpIdx == -1)
8842 break;
8843
8844 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
8845 uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm();
8846
8847 if ((OpSel & (1 << J)) != 0)
8848 ModVal |= SISrcMods::OP_SEL_0;
8849 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8850 (OpSel & (1 << 3)) != 0)
8851 ModVal |= SISrcMods::DST_OP_SEL;
8852
8853 Inst.getOperand(i: ModIdx).setImm(ModVal);
8854 }
8855}
8856void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
8857 const OperandVector &Operands) {
8858 OptionalImmIndexMap OptionalIdx;
8859 unsigned Opc = Inst.getOpcode();
8860 unsigned I = 1;
8861 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::cbsz);
8862
8863 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
8864
8865 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
8866 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, N: 1);
8867
8868 for (unsigned E = Operands.size(); I != E; ++I) {
8869 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
8870 int NumOperands = Inst.getNumOperands();
8871 // The order of operands in MCInst and parsed operands are different.
8872 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
8873 // indices for parsing scale values correctly.
8874 if (NumOperands == CbszOpIdx) {
8875 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
8876 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
8877 }
8878 if (isRegOrImmWithInputMods(Desc, OpNum: NumOperands)) {
8879 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
8880 } else if (Op.isImmModifier()) {
8881 OptionalIdx[Op.getImmTy()] = I;
8882 } else {
8883 Op.addRegOrImmOperands(Inst, N: 1);
8884 }
8885 }
8886
8887 // Insert CBSZ and BLGP operands for F8F6F4 variants
8888 auto CbszIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyCBSZ);
8889 if (CbszIdx != OptionalIdx.end()) {
8890 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
8891 Inst.getOperand(i: CbszOpIdx).setImm(CbszVal);
8892 }
8893
8894 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
8895 auto BlgpIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyBLGP);
8896 if (BlgpIdx != OptionalIdx.end()) {
8897 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
8898 Inst.getOperand(i: BlgpOpIdx).setImm(BlgpVal);
8899 }
8900
8901 // Add dummy src_modifiers
8902 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
8903 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
8904
8905 // Handle op_sel fields
8906
8907 unsigned OpSel = 0;
8908 auto OpselIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyOpSel);
8909 if (OpselIdx != OptionalIdx.end()) {
8910 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
8911 .getImm();
8912 }
8913
8914 unsigned OpSelHi = 0;
8915 auto OpselHiIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyOpSelHi);
8916 if (OpselHiIdx != OptionalIdx.end()) {
8917 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
8918 .getImm();
8919 }
8920 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
8921 AMDGPU::OpName::src1_modifiers};
8922
8923 for (unsigned J = 0; J < 2; ++J) {
8924 unsigned ModVal = 0;
8925 if (OpSel & (1 << J))
8926 ModVal |= SISrcMods::OP_SEL_0;
8927 if (OpSelHi & (1 << J))
8928 ModVal |= SISrcMods::OP_SEL_1;
8929
8930 const int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
8931 Inst.getOperand(i: ModIdx).setImm(ModVal);
8932 }
8933}
8934
8935void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8936 OptionalImmIndexMap &OptionalIdx) {
8937 unsigned Opc = Inst.getOpcode();
8938
8939 unsigned I = 1;
8940 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
8941 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8942 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
8943 }
8944
8945 for (unsigned E = Operands.size(); I != E; ++I) {
8946 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8947 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
8948 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
8949 } else if (Op.isImmModifier()) {
8950 OptionalIdx[Op.getImmTy()] = I;
8951 } else {
8952 Op.addRegOrImmOperands(Inst, N: 1);
8953 }
8954 }
8955
8956 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) {
8957 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in))
8958 Inst.addOperand(Op: Inst.getOperand(i: 0));
8959 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8960 ImmT: AMDGPUOperand::ImmTyByteSel);
8961 }
8962
8963 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
8964 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8965 ImmT: AMDGPUOperand::ImmTyClamp);
8966
8967 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
8968 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8969 ImmT: AMDGPUOperand::ImmTyOModSI);
8970
8971 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8972 // it has src2 register operand that is tied to dst operand
8973 // we don't allow modifiers for this operand in assembler so src2_modifiers
8974 // should be 0.
8975 if (isMAC(Opc)) {
8976 auto *it = Inst.begin();
8977 std::advance(i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2_modifiers));
8978 it = Inst.insert(I: it, Op: MCOperand::createImm(Val: 0)); // no modifiers for src2
8979 ++it;
8980 // Copy the operand to ensure it's not invalidated when Inst grows.
8981 Inst.insert(I: it, Op: MCOperand(Inst.getOperand(i: 0))); // src2 = dst
8982 }
8983}
8984
8985void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8986 OptionalImmIndexMap OptionalIdx;
8987 cvtVOP3(Inst, Operands, OptionalIdx);
8988}
8989
8990void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8991 OptionalImmIndexMap &OptIdx) {
8992 const int Opc = Inst.getOpcode();
8993 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
8994
8995 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8996
8997 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
8998 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
8999 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9000 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9001 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9002 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9003 Inst.addOperand(Op: MCOperand::createImm(Val: 0)); // Placeholder for src2_mods
9004 Inst.addOperand(Op: Inst.getOperand(i: 0));
9005 }
9006
9007 // Adding vdst_in operand is already covered for these DPP instructions in
9008 // cvtVOP3DPP.
9009 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in) &&
9010 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9011 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9012 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9013 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9014 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9015 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9016 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9017 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9018 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9019 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9020 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9021 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
9022 Inst.addOperand(Op: Inst.getOperand(i: 0));
9023 }
9024
9025 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::bitop3);
9026 if (BitOp3Idx != -1) {
9027 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyBitOp3);
9028 }
9029
9030 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9031 // instruction, and then figure out where to actually put the modifiers
9032
9033 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9034 if (OpSelIdx != -1) {
9035 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
9036 }
9037
9038 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
9039 if (OpSelHiIdx != -1) {
9040 int DefaultVal = IsPacked ? -1 : 0;
9041 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSelHi,
9042 Default: DefaultVal);
9043 }
9044
9045 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::neg_lo);
9046 if (NegLoIdx != -1)
9047 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegLo);
9048
9049 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::neg_hi);
9050 if (NegHiIdx != -1)
9051 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegHi);
9052
9053 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9054 AMDGPU::OpName::src2};
9055 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9056 AMDGPU::OpName::src1_modifiers,
9057 AMDGPU::OpName::src2_modifiers};
9058
9059 unsigned OpSel = 0;
9060 unsigned OpSelHi = 0;
9061 unsigned NegLo = 0;
9062 unsigned NegHi = 0;
9063
9064 if (OpSelIdx != -1)
9065 OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9066
9067 if (OpSelHiIdx != -1)
9068 OpSelHi = Inst.getOperand(i: OpSelHiIdx).getImm();
9069
9070 if (NegLoIdx != -1)
9071 NegLo = Inst.getOperand(i: NegLoIdx).getImm();
9072
9073 if (NegHiIdx != -1)
9074 NegHi = Inst.getOperand(i: NegHiIdx).getImm();
9075
9076 for (int J = 0; J < 3; ++J) {
9077 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: Ops[J]);
9078 if (OpIdx == -1)
9079 break;
9080
9081 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
9082
9083 if (ModIdx == -1)
9084 continue;
9085
9086 uint32_t ModVal = 0;
9087
9088 const MCOperand &SrcOp = Inst.getOperand(i: OpIdx);
9089 if (SrcOp.isReg() && getMRI()
9090 ->getRegClass(i: AMDGPU::VGPR_16RegClassID)
9091 .contains(Reg: SrcOp.getReg())) {
9092 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Reg: SrcOp.getReg(), MRI: *getMRI());
9093 if (VGPRSuffixIsHi)
9094 ModVal |= SISrcMods::OP_SEL_0;
9095 } else {
9096 if ((OpSel & (1 << J)) != 0)
9097 ModVal |= SISrcMods::OP_SEL_0;
9098 }
9099
9100 if ((OpSelHi & (1 << J)) != 0)
9101 ModVal |= SISrcMods::OP_SEL_1;
9102
9103 if ((NegLo & (1 << J)) != 0)
9104 ModVal |= SISrcMods::NEG;
9105
9106 if ((NegHi & (1 << J)) != 0)
9107 ModVal |= SISrcMods::NEG_HI;
9108
9109 Inst.getOperand(i: ModIdx).setImm(Inst.getOperand(i: ModIdx).getImm() | ModVal);
9110 }
9111}
9112
9113void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9114 OptionalImmIndexMap OptIdx;
9115 cvtVOP3(Inst, Operands, OptionalIdx&: OptIdx);
9116 cvtVOP3P(Inst, Operands, OptIdx);
9117}
9118
9119static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
9120 unsigned i, unsigned Opc,
9121 AMDGPU::OpName OpName) {
9122 if (AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName) != -1)
9123 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9124 else
9125 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, N: 1);
9126}
9127
9128void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9129 unsigned Opc = Inst.getOpcode();
9130
9131 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, N: 1);
9132 addSrcModifiersAndSrc(Inst, Operands, i: 2, Opc, OpName: AMDGPU::OpName::src0_modifiers);
9133 addSrcModifiersAndSrc(Inst, Operands, i: 3, Opc, OpName: AMDGPU::OpName::src1_modifiers);
9134 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, N: 1); // srcTiedDef
9135 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, N: 1); // src2
9136
9137 OptionalImmIndexMap OptIdx;
9138 for (unsigned i = 5; i < Operands.size(); ++i) {
9139 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9140 OptIdx[Op.getImmTy()] = i;
9141 }
9142
9143 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_8bit))
9144 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9145 ImmT: AMDGPUOperand::ImmTyIndexKey8bit);
9146
9147 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_16bit))
9148 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9149 ImmT: AMDGPUOperand::ImmTyIndexKey16bit);
9150
9151 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9152 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyClamp);
9153
9154 cvtVOP3P(Inst, Operands, OptIdx);
9155}
9156
9157//===----------------------------------------------------------------------===//
9158// VOPD
9159//===----------------------------------------------------------------------===//
9160
9161ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9162 if (!hasVOPD(STI: getSTI()))
9163 return ParseStatus::NoMatch;
9164
9165 if (isToken(Kind: AsmToken::Colon) && peekToken(ShouldSkipSpace: false).is(K: AsmToken::Colon)) {
9166 SMLoc S = getLoc();
9167 lex();
9168 lex();
9169 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "::", Loc: S));
9170 SMLoc OpYLoc = getLoc();
9171 StringRef OpYName;
9172 if (isToken(Kind: AsmToken::Identifier) && !Parser.parseIdentifier(Res&: OpYName)) {
9173 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: OpYName, Loc: OpYLoc));
9174 return ParseStatus::Success;
9175 }
9176 return Error(L: OpYLoc, Msg: "expected a VOPDY instruction after ::");
9177 }
9178 return ParseStatus::NoMatch;
9179}
9180
9181// Create VOPD MCInst operands using parsed assembler operands.
9182void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9183 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9184 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9185 if (Op.isReg()) {
9186 Op.addRegOperands(Inst, N: 1);
9187 return;
9188 }
9189 if (Op.isImm()) {
9190 Op.addImmOperands(Inst, N: 1);
9191 return;
9192 }
9193 llvm_unreachable("Unhandled operand type in cvtVOPD");
9194 };
9195
9196 const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Inst.getOpcode(), InstrInfo: &MII);
9197
9198 // MCInst operands are ordered as follows:
9199 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9200
9201 for (auto CompIdx : VOPD::COMPONENTS) {
9202 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9203 }
9204
9205 for (auto CompIdx : VOPD::COMPONENTS) {
9206 const auto &CInfo = InstInfo[CompIdx];
9207 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9208 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9209 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9210 if (CInfo.hasSrc2Acc())
9211 addOp(CInfo.getIndexOfDstInParsedOperands());
9212 }
9213}
9214
9215//===----------------------------------------------------------------------===//
9216// dpp
9217//===----------------------------------------------------------------------===//
9218
9219bool AMDGPUOperand::isDPP8() const {
9220 return isImmTy(ImmT: ImmTyDPP8);
9221}
9222
9223bool AMDGPUOperand::isDPPCtrl() const {
9224 using namespace AMDGPU::DPP;
9225
9226 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(x: getImm());
9227 if (result) {
9228 int64_t Imm = getImm();
9229 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9230 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9231 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9232 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9233 (Imm == DppCtrl::WAVE_SHL1) ||
9234 (Imm == DppCtrl::WAVE_ROL1) ||
9235 (Imm == DppCtrl::WAVE_SHR1) ||
9236 (Imm == DppCtrl::WAVE_ROR1) ||
9237 (Imm == DppCtrl::ROW_MIRROR) ||
9238 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9239 (Imm == DppCtrl::BCAST15) ||
9240 (Imm == DppCtrl::BCAST31) ||
9241 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9242 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9243 }
9244 return false;
9245}
9246
9247//===----------------------------------------------------------------------===//
9248// mAI
9249//===----------------------------------------------------------------------===//
9250
9251bool AMDGPUOperand::isBLGP() const {
9252 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(x: getImm());
9253}
9254
9255bool AMDGPUOperand::isS16Imm() const {
9256 return isImmLiteral() && (isInt<16>(x: getImm()) || isUInt<16>(x: getImm()));
9257}
9258
9259bool AMDGPUOperand::isU16Imm() const {
9260 return isImmLiteral() && isUInt<16>(x: getImm());
9261}
9262
9263//===----------------------------------------------------------------------===//
9264// dim
9265//===----------------------------------------------------------------------===//
9266
9267bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9268 // We want to allow "dim:1D" etc.,
9269 // but the initial 1 is tokenized as an integer.
9270 std::string Token;
9271 if (isToken(Kind: AsmToken::Integer)) {
9272 SMLoc Loc = getToken().getEndLoc();
9273 Token = std::string(getTokenStr());
9274 lex();
9275 if (getLoc() != Loc)
9276 return false;
9277 }
9278
9279 StringRef Suffix;
9280 if (!parseId(Val&: Suffix))
9281 return false;
9282 Token += Suffix;
9283
9284 StringRef DimId = Token;
9285 DimId.consume_front(Prefix: "SQ_RSRC_IMG_");
9286
9287 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(AsmSuffix: DimId);
9288 if (!DimInfo)
9289 return false;
9290
9291 Encoding = DimInfo->Encoding;
9292 return true;
9293}
9294
9295ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9296 if (!isGFX10Plus())
9297 return ParseStatus::NoMatch;
9298
9299 SMLoc S = getLoc();
9300
9301 if (!trySkipId(Id: "dim", Kind: AsmToken::Colon))
9302 return ParseStatus::NoMatch;
9303
9304 unsigned Encoding;
9305 SMLoc Loc = getLoc();
9306 if (!parseDimId(Encoding))
9307 return Error(L: Loc, Msg: "invalid dim value");
9308
9309 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Encoding, Loc: S,
9310 Type: AMDGPUOperand::ImmTyDim));
9311 return ParseStatus::Success;
9312}
9313
9314//===----------------------------------------------------------------------===//
9315// dpp
9316//===----------------------------------------------------------------------===//
9317
9318ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9319 SMLoc S = getLoc();
9320
9321 if (!isGFX10Plus() || !trySkipId(Id: "dpp8", Kind: AsmToken::Colon))
9322 return ParseStatus::NoMatch;
9323
9324 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9325
9326 int64_t Sels[8];
9327
9328 if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket"))
9329 return ParseStatus::Failure;
9330
9331 for (size_t i = 0; i < 8; ++i) {
9332 if (i > 0 && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
9333 return ParseStatus::Failure;
9334
9335 SMLoc Loc = getLoc();
9336 if (getParser().parseAbsoluteExpression(Res&: Sels[i]))
9337 return ParseStatus::Failure;
9338 if (0 > Sels[i] || 7 < Sels[i])
9339 return Error(L: Loc, Msg: "expected a 3-bit value");
9340 }
9341
9342 if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
9343 return ParseStatus::Failure;
9344
9345 unsigned DPP8 = 0;
9346 for (size_t i = 0; i < 8; ++i)
9347 DPP8 |= (Sels[i] << (i * 3));
9348
9349 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DPP8, Loc: S, Type: AMDGPUOperand::ImmTyDPP8));
9350 return ParseStatus::Success;
9351}
9352
9353bool
9354AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9355 const OperandVector &Operands) {
9356 if (Ctrl == "row_newbcast")
9357 return isGFX90A();
9358
9359 if (Ctrl == "row_share" ||
9360 Ctrl == "row_xmask")
9361 return isGFX10Plus();
9362
9363 if (Ctrl == "wave_shl" ||
9364 Ctrl == "wave_shr" ||
9365 Ctrl == "wave_rol" ||
9366 Ctrl == "wave_ror" ||
9367 Ctrl == "row_bcast")
9368 return isVI() || isGFX9();
9369
9370 return Ctrl == "row_mirror" ||
9371 Ctrl == "row_half_mirror" ||
9372 Ctrl == "quad_perm" ||
9373 Ctrl == "row_shl" ||
9374 Ctrl == "row_shr" ||
9375 Ctrl == "row_ror";
9376}
9377
9378int64_t
9379AMDGPUAsmParser::parseDPPCtrlPerm() {
9380 // quad_perm:[%d,%d,%d,%d]
9381
9382 if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket"))
9383 return -1;
9384
9385 int64_t Val = 0;
9386 for (int i = 0; i < 4; ++i) {
9387 if (i > 0 && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
9388 return -1;
9389
9390 int64_t Temp;
9391 SMLoc Loc = getLoc();
9392 if (getParser().parseAbsoluteExpression(Res&: Temp))
9393 return -1;
9394 if (Temp < 0 || Temp > 3) {
9395 Error(L: Loc, Msg: "expected a 2-bit value");
9396 return -1;
9397 }
9398
9399 Val += (Temp << i * 2);
9400 }
9401
9402 if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
9403 return -1;
9404
9405 return Val;
9406}
9407
9408int64_t
9409AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9410 using namespace AMDGPU::DPP;
9411
9412 // sel:%d
9413
9414 int64_t Val;
9415 SMLoc Loc = getLoc();
9416
9417 if (getParser().parseAbsoluteExpression(Res&: Val))
9418 return -1;
9419
9420 struct DppCtrlCheck {
9421 int64_t Ctrl;
9422 int Lo;
9423 int Hi;
9424 };
9425
9426 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9427 .Case(S: "wave_shl", Value: {.Ctrl: DppCtrl::WAVE_SHL1, .Lo: 1, .Hi: 1})
9428 .Case(S: "wave_rol", Value: {.Ctrl: DppCtrl::WAVE_ROL1, .Lo: 1, .Hi: 1})
9429 .Case(S: "wave_shr", Value: {.Ctrl: DppCtrl::WAVE_SHR1, .Lo: 1, .Hi: 1})
9430 .Case(S: "wave_ror", Value: {.Ctrl: DppCtrl::WAVE_ROR1, .Lo: 1, .Hi: 1})
9431 .Case(S: "row_shl", Value: {.Ctrl: DppCtrl::ROW_SHL0, .Lo: 1, .Hi: 15})
9432 .Case(S: "row_shr", Value: {.Ctrl: DppCtrl::ROW_SHR0, .Lo: 1, .Hi: 15})
9433 .Case(S: "row_ror", Value: {.Ctrl: DppCtrl::ROW_ROR0, .Lo: 1, .Hi: 15})
9434 .Case(S: "row_share", Value: {.Ctrl: DppCtrl::ROW_SHARE_FIRST, .Lo: 0, .Hi: 15})
9435 .Case(S: "row_xmask", Value: {.Ctrl: DppCtrl::ROW_XMASK_FIRST, .Lo: 0, .Hi: 15})
9436 .Case(S: "row_newbcast", Value: {.Ctrl: DppCtrl::ROW_NEWBCAST_FIRST, .Lo: 0, .Hi: 15})
9437 .Default(Value: {.Ctrl: -1, .Lo: 0, .Hi: 0});
9438
9439 bool Valid;
9440 if (Check.Ctrl == -1) {
9441 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
9442 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9443 } else {
9444 Valid = Check.Lo <= Val && Val <= Check.Hi;
9445 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
9446 }
9447
9448 if (!Valid) {
9449 Error(L: Loc, Msg: Twine("invalid ", Ctrl) + Twine(" value"));
9450 return -1;
9451 }
9452
9453 return Val;
9454}
9455
9456ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9457 using namespace AMDGPU::DPP;
9458
9459 if (!isToken(Kind: AsmToken::Identifier) ||
9460 !isSupportedDPPCtrl(Ctrl: getTokenStr(), Operands))
9461 return ParseStatus::NoMatch;
9462
9463 SMLoc S = getLoc();
9464 int64_t Val = -1;
9465 StringRef Ctrl;
9466
9467 parseId(Val&: Ctrl);
9468
9469 if (Ctrl == "row_mirror") {
9470 Val = DppCtrl::ROW_MIRROR;
9471 } else if (Ctrl == "row_half_mirror") {
9472 Val = DppCtrl::ROW_HALF_MIRROR;
9473 } else {
9474 if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon")) {
9475 if (Ctrl == "quad_perm") {
9476 Val = parseDPPCtrlPerm();
9477 } else {
9478 Val = parseDPPCtrlSel(Ctrl);
9479 }
9480 }
9481 }
9482
9483 if (Val == -1)
9484 return ParseStatus::Failure;
9485
9486 Operands.push_back(
9487 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: AMDGPUOperand::ImmTyDppCtrl));
9488 return ParseStatus::Success;
9489}
9490
9491void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9492 bool IsDPP8) {
9493 OptionalImmIndexMap OptionalIdx;
9494 unsigned Opc = Inst.getOpcode();
9495 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9496
9497 // MAC instructions are special because they have 'old'
9498 // operand which is not tied to dst (but assumed to be).
9499 // They also have dummy unused src2_modifiers.
9500 int OldIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::old);
9501 int Src2ModIdx =
9502 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2_modifiers);
9503 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
9504 Desc.getOperandConstraint(OpNum: OldIdx, Constraint: MCOI::TIED_TO) == -1;
9505
9506 unsigned I = 1;
9507 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9508 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
9509 }
9510
9511 int Fi = 0;
9512 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst_in);
9513 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9514 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9515 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9516 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9517
9518 for (unsigned E = Operands.size(); I != E; ++I) {
9519
9520 if (IsMAC) {
9521 int NumOperands = Inst.getNumOperands();
9522 if (OldIdx == NumOperands) {
9523 // Handle old operand
9524 constexpr int DST_IDX = 0;
9525 Inst.addOperand(Op: Inst.getOperand(i: DST_IDX));
9526 } else if (Src2ModIdx == NumOperands) {
9527 // Add unused dummy src2_modifiers
9528 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
9529 }
9530 }
9531
9532 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9533 Inst.addOperand(Op: Inst.getOperand(i: 0));
9534 }
9535
9536 if (IsVOP3CvtSrDpp) {
9537 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9538 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
9539 Inst.addOperand(Op: MCOperand::createReg(Reg: MCRegister()));
9540 }
9541 }
9542
9543 auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(),
9544 Constraint: MCOI::TIED_TO);
9545 if (TiedTo != -1) {
9546 assert((unsigned)TiedTo < Inst.getNumOperands());
9547 // handle tied old or src2 for MAC instructions
9548 Inst.addOperand(Op: Inst.getOperand(i: TiedTo));
9549 }
9550 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9551 // Add the register arguments
9552 if (IsDPP8 && Op.isDppFI()) {
9553 Fi = Op.getImm();
9554 } else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9555 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9556 } else if (Op.isReg()) {
9557 Op.addRegOperands(Inst, N: 1);
9558 } else if (Op.isImm() &&
9559 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
9560 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9561 Op.addImmOperands(Inst, N: 1);
9562 } else if (Op.isImm()) {
9563 OptionalIdx[Op.getImmTy()] = I;
9564 } else {
9565 llvm_unreachable("unhandled operand type");
9566 }
9567 }
9568
9569 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel))
9570 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9571 ImmT: AMDGPUOperand::ImmTyByteSel);
9572
9573 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9574 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9575 ImmT: AMDGPUOperand::ImmTyClamp);
9576
9577 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
9578 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI);
9579
9580 if (Desc.TSFlags & SIInstrFlags::VOP3P)
9581 cvtVOP3P(Inst, Operands, OptIdx&: OptionalIdx);
9582 else if (Desc.TSFlags & SIInstrFlags::VOP3)
9583 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9584 else if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
9585 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
9586 }
9587
9588 if (IsDPP8) {
9589 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDPP8);
9590 using namespace llvm::AMDGPU::DPP;
9591 Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0));
9592 } else {
9593 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppCtrl, Default: 0xe4);
9594 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: 0xf);
9595 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: 0xf);
9596 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl);
9597
9598 if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi))
9599 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9600 ImmT: AMDGPUOperand::ImmTyDppFI);
9601 }
9602}
9603
9604void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9605 OptionalImmIndexMap OptionalIdx;
9606
9607 unsigned I = 1;
9608 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9609 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9610 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
9611 }
9612
9613 int Fi = 0;
9614 for (unsigned E = Operands.size(); I != E; ++I) {
9615 auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(),
9616 Constraint: MCOI::TIED_TO);
9617 if (TiedTo != -1) {
9618 assert((unsigned)TiedTo < Inst.getNumOperands());
9619 // handle tied old or src2 for MAC instructions
9620 Inst.addOperand(Op: Inst.getOperand(i: TiedTo));
9621 }
9622 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9623 // Add the register arguments
9624 if (Op.isReg() && validateVccOperand(Reg: Op.getReg())) {
9625 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9626 // Skip it.
9627 continue;
9628 }
9629
9630 if (IsDPP8) {
9631 if (Op.isDPP8()) {
9632 Op.addImmOperands(Inst, N: 1);
9633 } else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9634 Op.addRegWithFPInputModsOperands(Inst, N: 2);
9635 } else if (Op.isDppFI()) {
9636 Fi = Op.getImm();
9637 } else if (Op.isReg()) {
9638 Op.addRegOperands(Inst, N: 1);
9639 } else {
9640 llvm_unreachable("Invalid operand type");
9641 }
9642 } else {
9643 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9644 Op.addRegWithFPInputModsOperands(Inst, N: 2);
9645 } else if (Op.isReg()) {
9646 Op.addRegOperands(Inst, N: 1);
9647 } else if (Op.isDPPCtrl()) {
9648 Op.addImmOperands(Inst, N: 1);
9649 } else if (Op.isImm()) {
9650 // Handle optional arguments
9651 OptionalIdx[Op.getImmTy()] = I;
9652 } else {
9653 llvm_unreachable("Invalid operand type");
9654 }
9655 }
9656 }
9657
9658 if (IsDPP8) {
9659 using namespace llvm::AMDGPU::DPP;
9660 Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0));
9661 } else {
9662 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: 0xf);
9663 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: 0xf);
9664 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl);
9665 if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi)) {
9666 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9667 ImmT: AMDGPUOperand::ImmTyDppFI);
9668 }
9669 }
9670}
9671
9672//===----------------------------------------------------------------------===//
9673// sdwa
9674//===----------------------------------------------------------------------===//
9675
9676ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9677 StringRef Prefix,
9678 AMDGPUOperand::ImmTy Type) {
9679 return parseStringOrIntWithPrefix(
9680 Operands, Name: Prefix,
9681 Ids: {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
9682 Type);
9683}
9684
9685ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9686 return parseStringOrIntWithPrefix(
9687 Operands, Name: "dst_unused", Ids: {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
9688 Type: AMDGPUOperand::ImmTySDWADstUnused);
9689}
9690
9691void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9692 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP1);
9693}
9694
9695void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9696 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2);
9697}
9698
9699void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9700 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: true, SkipSrcVcc: true);
9701}
9702
9703void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9704 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: false, SkipSrcVcc: true);
9705}
9706
9707void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9708 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOPC, SkipDstVcc: isVI());
9709}
9710
9711void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9712 uint64_t BasicInstType,
9713 bool SkipDstVcc,
9714 bool SkipSrcVcc) {
9715 using namespace llvm::AMDGPU::SDWA;
9716
9717 OptionalImmIndexMap OptionalIdx;
9718 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9719 bool SkippedVcc = false;
9720
9721 unsigned I = 1;
9722 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9723 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9724 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
9725 }
9726
9727 for (unsigned E = Operands.size(); I != E; ++I) {
9728 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9729 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9730 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9731 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9732 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9733 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9734 // Skip VCC only if we didn't skip it on previous iteration.
9735 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9736 if (BasicInstType == SIInstrFlags::VOP2 &&
9737 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9738 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9739 SkippedVcc = true;
9740 continue;
9741 }
9742 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
9743 SkippedVcc = true;
9744 continue;
9745 }
9746 }
9747 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9748 Op.addRegOrImmWithInputModsOperands(Inst, N: 2);
9749 } else if (Op.isImm()) {
9750 // Handle optional arguments
9751 OptionalIdx[Op.getImmTy()] = I;
9752 } else {
9753 llvm_unreachable("Invalid operand type");
9754 }
9755 SkippedVcc = false;
9756 }
9757
9758 const unsigned Opc = Inst.getOpcode();
9759 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9760 Opc != AMDGPU::V_NOP_sdwa_vi) {
9761 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9762 switch (BasicInstType) {
9763 case SIInstrFlags::VOP1:
9764 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9765 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9766 ImmT: AMDGPUOperand::ImmTyClamp, Default: 0);
9767
9768 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
9769 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9770 ImmT: AMDGPUOperand::ImmTyOModSI, Default: 0);
9771
9772 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_sel))
9773 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9774 ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD);
9775
9776 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_unused))
9777 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9778 ImmT: AMDGPUOperand::ImmTySDWADstUnused,
9779 Default: DstUnused::UNUSED_PRESERVE);
9780
9781 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
9782 break;
9783
9784 case SIInstrFlags::VOP2:
9785 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9786 ImmT: AMDGPUOperand::ImmTyClamp, Default: 0);
9787
9788 if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::omod))
9789 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI, Default: 0);
9790
9791 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD);
9792 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstUnused, Default: DstUnused::UNUSED_PRESERVE);
9793 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
9794 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD);
9795 break;
9796
9797 case SIInstrFlags::VOPC:
9798 if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::clamp))
9799 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9800 ImmT: AMDGPUOperand::ImmTyClamp, Default: 0);
9801 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
9802 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD);
9803 break;
9804
9805 default:
9806 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9807 }
9808 }
9809
9810 // special case v_mac_{f16, f32}:
9811 // it has src2 register operand that is tied to dst operand
9812 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9813 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9814 auto *it = Inst.begin();
9815 std::advance(
9816 i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::src2));
9817 Inst.insert(I: it, Op: Inst.getOperand(i: 0)); // src2 = dst
9818 }
9819}
9820
9821/// Force static initialization.
9822extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
9823LLVMInitializeAMDGPUAsmParser() {
9824 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
9825 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9826}
9827
9828#define GET_REGISTER_MATCHER
9829#define GET_MATCHER_IMPLEMENTATION
9830#define GET_MNEMONIC_SPELL_CHECKER
9831#define GET_MNEMONIC_CHECKER
9832#include "AMDGPUGenAsmMatcher.inc"
9833
9834ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9835 unsigned MCK) {
9836 switch (MCK) {
9837 case MCK_addr64:
9838 return parseTokenOp(Name: "addr64", Operands);
9839 case MCK_done:
9840 return parseTokenOp(Name: "done", Operands);
9841 case MCK_idxen:
9842 return parseTokenOp(Name: "idxen", Operands);
9843 case MCK_lds:
9844 return parseTokenOp(Name: "lds", Operands);
9845 case MCK_offen:
9846 return parseTokenOp(Name: "offen", Operands);
9847 case MCK_off:
9848 return parseTokenOp(Name: "off", Operands);
9849 case MCK_row_95_en:
9850 return parseTokenOp(Name: "row_en", Operands);
9851 case MCK_gds:
9852 return parseNamedBit(Name: "gds", Operands, ImmTy: AMDGPUOperand::ImmTyGDS);
9853 case MCK_tfe:
9854 return parseNamedBit(Name: "tfe", Operands, ImmTy: AMDGPUOperand::ImmTyTFE);
9855 }
9856 return tryCustomParseOperand(Operands, MCK);
9857}
9858
9859// This function should be defined after auto-generated include so that we have
9860// MatchClassKind enum defined
9861unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9862 unsigned Kind) {
9863 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9864 // But MatchInstructionImpl() expects to meet token and fails to validate
9865 // operand. This method checks if we are given immediate operand but expect to
9866 // get corresponding token.
9867 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9868 switch (Kind) {
9869 case MCK_addr64:
9870 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9871 case MCK_gds:
9872 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9873 case MCK_lds:
9874 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9875 case MCK_idxen:
9876 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9877 case MCK_offen:
9878 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9879 case MCK_tfe:
9880 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9881 case MCK_SSrc_b32:
9882 // When operands have expression values, they will return true for isToken,
9883 // because it is not possible to distinguish between a token and an
9884 // expression at parse time. MatchInstructionImpl() will always try to
9885 // match an operand as a token, when isToken returns true, and when the
9886 // name of the expression is not a valid token, the match will fail,
9887 // so we need to handle it here.
9888 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9889 case MCK_SSrc_f32:
9890 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9891 case MCK_SOPPBrTarget:
9892 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9893 case MCK_VReg32OrOff:
9894 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9895 case MCK_InterpSlot:
9896 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9897 case MCK_InterpAttr:
9898 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9899 case MCK_InterpAttrChan:
9900 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9901 case MCK_SReg_64:
9902 case MCK_SReg_64_XEXEC:
9903 // Null is defined as a 32-bit register but
9904 // it should also be enabled with 64-bit operands or larger.
9905 // The following code enables it for SReg_64 and larger operands
9906 // used as source and destination. Remaining source
9907 // operands are handled in isInlinableImm.
9908 case MCK_SReg_96:
9909 case MCK_SReg_128:
9910 case MCK_SReg_256:
9911 case MCK_SReg_512:
9912 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9913 default:
9914 return Match_InvalidOperand;
9915 }
9916}
9917
9918//===----------------------------------------------------------------------===//
9919// endpgm
9920//===----------------------------------------------------------------------===//
9921
9922ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9923 SMLoc S = getLoc();
9924 int64_t Imm = 0;
9925
9926 if (!parseExpr(Imm)) {
9927 // The operand is optional, if not present default to 0
9928 Imm = 0;
9929 }
9930
9931 if (!isUInt<16>(x: Imm))
9932 return Error(L: S, Msg: "expected a 16-bit value");
9933
9934 Operands.push_back(
9935 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyEndpgm));
9936 return ParseStatus::Success;
9937}
9938
9939bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmT: ImmTyEndpgm); }
9940
9941//===----------------------------------------------------------------------===//
9942// Split Barrier
9943//===----------------------------------------------------------------------===//
9944
9945bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(type: MVT::i32); }
9946