1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
10#include "MCTargetDesc/AMDGPUInstPrinter.h"
11#include "MCTargetDesc/AMDGPUMCAsmInfo.h"
12#include "MCTargetDesc/AMDGPUMCExpr.h"
13#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
14#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15#include "MCTargetDesc/AMDGPUTargetStreamer.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
18#include "TargetInfo/AMDGPUTargetInfo.h"
19#include "Utils/AMDGPUAsmUtils.h"
20#include "Utils/AMDGPUBaseInfo.h"
21#include "Utils/AMDKernelCodeTUtils.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/SmallBitVector.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
26#include "llvm/BinaryFormat/ELF.h"
27#include "llvm/CodeGenTypes/MachineValueType.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/MC/MCParser/AsmLexer.h"
34#include "llvm/MC/MCParser/MCAsmParser.h"
35#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
36#include "llvm/MC/MCParser/MCTargetAsmParser.h"
37#include "llvm/MC/MCRegisterInfo.h"
38#include "llvm/MC/MCSymbol.h"
39#include "llvm/MC/TargetRegistry.h"
40#include "llvm/Support/AMDGPUMetadata.h"
41#include "llvm/Support/AMDHSAKernelDescriptor.h"
42#include "llvm/Support/Casting.h"
43#include "llvm/Support/Compiler.h"
44#include "llvm/Support/MathExtras.h"
45#include "llvm/TargetParser/TargetParser.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyIsAsync,
130 ImmTyD16,
131 ImmTyClamp,
132 ImmTyOModSI,
133 ImmTySDWADstSel,
134 ImmTySDWASrc0Sel,
135 ImmTySDWASrc1Sel,
136 ImmTySDWADstUnused,
137 ImmTyDMask,
138 ImmTyDim,
139 ImmTyUNorm,
140 ImmTyDA,
141 ImmTyR128A16,
142 ImmTyA16,
143 ImmTyLWE,
144 ImmTyExpTgt,
145 ImmTyExpCompr,
146 ImmTyExpVM,
147 ImmTyDone,
148 ImmTyRowEn,
149 ImmTyFORMAT,
150 ImmTyHwreg,
151 ImmTyOff,
152 ImmTySendMsg,
153 ImmTyWaitEvent,
154 ImmTyInterpSlot,
155 ImmTyInterpAttr,
156 ImmTyInterpAttrChan,
157 ImmTyOpSel,
158 ImmTyOpSelHi,
159 ImmTyNegLo,
160 ImmTyNegHi,
161 ImmTyIndexKey8bit,
162 ImmTyIndexKey16bit,
163 ImmTyIndexKey32bit,
164 ImmTyDPP8,
165 ImmTyDppCtrl,
166 ImmTyDppRowMask,
167 ImmTyDppBankMask,
168 ImmTyDppBoundCtrl,
169 ImmTyDppFI,
170 ImmTySwizzle,
171 ImmTyGprIdxMode,
172 ImmTyHigh,
173 ImmTyBLGP,
174 ImmTyCBSZ,
175 ImmTyABID,
176 ImmTyEndpgm,
177 ImmTyWaitVDST,
178 ImmTyWaitEXP,
179 ImmTyWaitVAVDst,
180 ImmTyWaitVMVSrc,
181 ImmTyBitOp3,
182 ImmTyMatrixAFMT,
183 ImmTyMatrixBFMT,
184 ImmTyMatrixAScale,
185 ImmTyMatrixBScale,
186 ImmTyMatrixAScaleFmt,
187 ImmTyMatrixBScaleFmt,
188 ImmTyMatrixAReuse,
189 ImmTyMatrixBReuse,
190 ImmTyScaleSel,
191 ImmTyByteSel,
192 };
193
194private:
195 struct TokOp {
196 const char *Data;
197 unsigned Length;
198 };
199
200 struct ImmOp {
201 int64_t Val;
202 ImmTy Type;
203 bool IsFPImm;
204 Modifiers Mods;
205 };
206
207 struct RegOp {
208 MCRegister RegNo;
209 Modifiers Mods;
210 };
211
212 union {
213 TokOp Tok;
214 ImmOp Imm;
215 RegOp Reg;
216 const MCExpr *Expr;
217 };
218
219 // The index of the associated MCInst operand.
220 mutable int MCOpIdx = -1;
221
222public:
223 bool isToken() const override { return Kind == Token; }
224
225 bool isSymbolRefExpr() const {
226 return isExpr() && Expr && isa<MCSymbolRefExpr>(Val: Expr);
227 }
228
229 bool isImm() const override {
230 return Kind == Immediate;
231 }
232
233 bool isInlinableImm(MVT type) const;
234 bool isLiteralImm(MVT type) const;
235
236 bool isRegKind() const {
237 return Kind == Register;
238 }
239
240 bool isReg() const override {
241 return isRegKind() && !hasModifiers();
242 }
243
244 bool isRegOrInline(unsigned RCID, MVT type) const {
245 return isRegClass(RCID) || isInlinableImm(type);
246 }
247
248 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
249 return isRegOrInline(RCID, type) || isLiteralImm(type);
250 }
251
252 bool isRegOrImmWithInt16InputMods() const {
253 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
254 }
255
256 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
257 return isRegOrImmWithInputMods(
258 RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::i16);
259 }
260
261 bool isRegOrImmWithInt32InputMods() const {
262 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
263 }
264
265 bool isRegOrInlineImmWithInt16InputMods() const {
266 return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
267 }
268
269 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
270 return isRegOrInline(
271 RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::i16);
272 }
273
274 bool isRegOrInlineImmWithInt32InputMods() const {
275 return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
276 }
277
278 bool isRegOrImmWithInt64InputMods() const {
279 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64);
280 }
281
282 bool isRegOrImmWithFP16InputMods() const {
283 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16);
284 }
285
286 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
287 return isRegOrImmWithInputMods(
288 RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16);
289 }
290
291 bool isRegOrImmWithFP32InputMods() const {
292 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
293 }
294
295 bool isRegOrImmWithFP64InputMods() const {
296 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
297 }
298
299 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
300 return isRegOrInline(
301 RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16);
302 }
303
304 bool isRegOrInlineImmWithFP32InputMods() const {
305 return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
306 }
307
308 bool isRegOrInlineImmWithFP64InputMods() const {
309 return isRegOrInline(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
310 }
311
312 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
313
314 bool isVRegWithFP32InputMods() const {
315 return isVRegWithInputMods(RCID: AMDGPU::VGPR_32RegClassID);
316 }
317
318 bool isVRegWithFP64InputMods() const {
319 return isVRegWithInputMods(RCID: AMDGPU::VReg_64RegClassID);
320 }
321
322 bool isPackedFP16InputMods() const {
323 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::v2f16);
324 }
325
326 bool isPackedVGPRFP32InputMods() const {
327 return isRegOrImmWithInputMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::v2f32);
328 }
329
330 bool isVReg() const {
331 return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) ||
332 isRegClass(RCID: AMDGPU::VReg_64RegClassID) ||
333 isRegClass(RCID: AMDGPU::VReg_96RegClassID) ||
334 isRegClass(RCID: AMDGPU::VReg_128RegClassID) ||
335 isRegClass(RCID: AMDGPU::VReg_160RegClassID) ||
336 isRegClass(RCID: AMDGPU::VReg_192RegClassID) ||
337 isRegClass(RCID: AMDGPU::VReg_256RegClassID) ||
338 isRegClass(RCID: AMDGPU::VReg_512RegClassID) ||
339 isRegClass(RCID: AMDGPU::VReg_1024RegClassID);
340 }
341
342 bool isVReg32() const {
343 return isRegClass(RCID: AMDGPU::VGPR_32RegClassID);
344 }
345
346 bool isVReg32OrOff() const {
347 return isOff() || isVReg32();
348 }
349
350 bool isNull() const {
351 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
352 }
353
354 bool isAV_LdSt_32_Align2_RegOp() const {
355 return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) ||
356 isRegClass(RCID: AMDGPU::AGPR_32RegClassID);
357 }
358
359 bool isVRegWithInputMods() const;
360 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
361 template <bool IsFake16> bool isT16VRegWithInputMods() const;
362
363 bool isSDWAOperand(MVT type) const;
364 bool isSDWAFP16Operand() const;
365 bool isSDWAFP32Operand() const;
366 bool isSDWAInt16Operand() const;
367 bool isSDWAInt32Operand() const;
368
369 bool isImmTy(ImmTy ImmT) const {
370 return isImm() && Imm.Type == ImmT;
371 }
372
373 template <ImmTy Ty> bool isImmTy() const { return isImmTy(ImmT: Ty); }
374
375 bool isImmLiteral() const { return isImmTy(ImmT: ImmTyNone); }
376
377 bool isImmModifier() const {
378 return isImm() && Imm.Type != ImmTyNone;
379 }
380
381 bool isOModSI() const { return isImmTy(ImmT: ImmTyOModSI); }
382 bool isDim() const { return isImmTy(ImmT: ImmTyDim); }
383 bool isR128A16() const { return isImmTy(ImmT: ImmTyR128A16); }
384 bool isOff() const { return isImmTy(ImmT: ImmTyOff); }
385 bool isExpTgt() const { return isImmTy(ImmT: ImmTyExpTgt); }
386 bool isOffen() const { return isImmTy(ImmT: ImmTyOffen); }
387 bool isIdxen() const { return isImmTy(ImmT: ImmTyIdxen); }
388 bool isAddr64() const { return isImmTy(ImmT: ImmTyAddr64); }
389 bool isSMEMOffsetMod() const { return isImmTy(ImmT: ImmTySMEMOffsetMod); }
390 bool isFlatOffset() const { return isImmTy(ImmT: ImmTyOffset) || isImmTy(ImmT: ImmTyInstOffset); }
391 bool isGDS() const { return isImmTy(ImmT: ImmTyGDS); }
392 bool isLDS() const { return isImmTy(ImmT: ImmTyLDS); }
393 bool isCPol() const { return isImmTy(ImmT: ImmTyCPol); }
394 bool isIndexKey8bit() const { return isImmTy(ImmT: ImmTyIndexKey8bit); }
395 bool isIndexKey16bit() const { return isImmTy(ImmT: ImmTyIndexKey16bit); }
396 bool isIndexKey32bit() const { return isImmTy(ImmT: ImmTyIndexKey32bit); }
397 bool isMatrixAFMT() const { return isImmTy(ImmT: ImmTyMatrixAFMT); }
398 bool isMatrixBFMT() const { return isImmTy(ImmT: ImmTyMatrixBFMT); }
399 bool isMatrixAScale() const { return isImmTy(ImmT: ImmTyMatrixAScale); }
400 bool isMatrixBScale() const { return isImmTy(ImmT: ImmTyMatrixBScale); }
401 bool isMatrixAScaleFmt() const { return isImmTy(ImmT: ImmTyMatrixAScaleFmt); }
402 bool isMatrixBScaleFmt() const { return isImmTy(ImmT: ImmTyMatrixBScaleFmt); }
403 bool isMatrixAReuse() const { return isImmTy(ImmT: ImmTyMatrixAReuse); }
404 bool isMatrixBReuse() const { return isImmTy(ImmT: ImmTyMatrixBReuse); }
405 bool isTFE() const { return isImmTy(ImmT: ImmTyTFE); }
406 bool isFORMAT() const { return isImmTy(ImmT: ImmTyFORMAT) && isUInt<7>(x: getImm()); }
407 bool isDppFI() const { return isImmTy(ImmT: ImmTyDppFI); }
408 bool isSDWADstSel() const { return isImmTy(ImmT: ImmTySDWADstSel); }
409 bool isSDWASrc0Sel() const { return isImmTy(ImmT: ImmTySDWASrc0Sel); }
410 bool isSDWASrc1Sel() const { return isImmTy(ImmT: ImmTySDWASrc1Sel); }
411 bool isSDWADstUnused() const { return isImmTy(ImmT: ImmTySDWADstUnused); }
412 bool isInterpSlot() const { return isImmTy(ImmT: ImmTyInterpSlot); }
413 bool isInterpAttr() const { return isImmTy(ImmT: ImmTyInterpAttr); }
414 bool isInterpAttrChan() const { return isImmTy(ImmT: ImmTyInterpAttrChan); }
415 bool isOpSel() const { return isImmTy(ImmT: ImmTyOpSel); }
416 bool isOpSelHi() const { return isImmTy(ImmT: ImmTyOpSelHi); }
417 bool isNegLo() const { return isImmTy(ImmT: ImmTyNegLo); }
418 bool isNegHi() const { return isImmTy(ImmT: ImmTyNegHi); }
419 bool isBitOp3() const { return isImmTy(ImmT: ImmTyBitOp3) && isUInt<8>(x: getImm()); }
420 bool isDone() const { return isImmTy(ImmT: ImmTyDone); }
421 bool isRowEn() const { return isImmTy(ImmT: ImmTyRowEn); }
422
423 bool isRegOrImm() const {
424 return isReg() || isImm();
425 }
426
427 bool isRegClass(unsigned RCID) const;
428
429 bool isInlineValue() const;
430
431 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
432 return isRegOrInline(RCID, type) && !hasModifiers();
433 }
434
435 bool isSCSrcB16() const {
436 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i16);
437 }
438
439 bool isSCSrcV2B16() const {
440 return isSCSrcB16();
441 }
442
443 bool isSCSrc_b32() const {
444 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i32);
445 }
446
447 bool isSCSrc_b64() const {
448 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::i64);
449 }
450
451 bool isBoolReg() const;
452
453 bool isSCSrcF16() const {
454 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f16);
455 }
456
457 bool isSCSrcV2F16() const {
458 return isSCSrcF16();
459 }
460
461 bool isSCSrcF32() const {
462 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f32);
463 }
464
465 bool isSCSrcF64() const {
466 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::f64);
467 }
468
469 bool isSSrc_b32() const {
470 return isSCSrc_b32() || isLiteralImm(type: MVT::i32) || isExpr();
471 }
472
473 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(type: MVT::i16); }
474
475 bool isSSrcV2B16() const {
476 llvm_unreachable("cannot happen");
477 return isSSrc_b16();
478 }
479
480 bool isSSrc_b64() const {
481 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
482 // See isVSrc64().
483 return isSCSrc_b64() || isLiteralImm(type: MVT::i64) ||
484 (((const MCTargetAsmParser *)AsmParser)
485 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
486 isExpr());
487 }
488
489 bool isSSrc_f32() const {
490 return isSCSrc_b32() || isLiteralImm(type: MVT::f32) || isExpr();
491 }
492
493 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(type: MVT::f64); }
494
495 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(type: MVT::bf16); }
496
497 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(type: MVT::f16); }
498
499 bool isSSrcV2F16() const {
500 llvm_unreachable("cannot happen");
501 return isSSrc_f16();
502 }
503
504 bool isSSrcV2FP32() const {
505 llvm_unreachable("cannot happen");
506 return isSSrc_f32();
507 }
508
509 bool isSCSrcV2FP32() const {
510 llvm_unreachable("cannot happen");
511 return isSCSrcF32();
512 }
513
514 bool isSSrcV2INT32() const {
515 llvm_unreachable("cannot happen");
516 return isSSrc_b32();
517 }
518
519 bool isSCSrcV2INT32() const {
520 llvm_unreachable("cannot happen");
521 return isSCSrc_b32();
522 }
523
524 bool isSSrcOrLds_b32() const {
525 return isRegOrInlineNoMods(RCID: AMDGPU::SRegOrLds_32RegClassID, type: MVT::i32) ||
526 isLiteralImm(type: MVT::i32) || isExpr();
527 }
528
529 bool isVCSrc_b32() const {
530 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
531 }
532
533 bool isVCSrc_b32_Lo256() const {
534 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo256RegClassID, type: MVT::i32);
535 }
536
537 bool isVCSrc_b64_Lo256() const {
538 return isRegOrInlineNoMods(RCID: AMDGPU::VS_64_Lo256RegClassID, type: MVT::i64);
539 }
540
541 bool isVCSrc_b64() const {
542 return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64);
543 }
544
545 bool isVCSrcT_b16() const {
546 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::i16);
547 }
548
549 bool isVCSrcTB16_Lo128() const {
550 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::i16);
551 }
552
553 bool isVCSrcFake16B16_Lo128() const {
554 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::i16);
555 }
556
557 bool isVCSrc_b16() const {
558 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
559 }
560
561 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
562
563 bool isVCSrc_f32() const {
564 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
565 }
566
567 bool isVCSrc_f64() const {
568 return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
569 }
570
571 bool isVCSrcTBF16() const {
572 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::bf16);
573 }
574
575 bool isVCSrcT_f16() const {
576 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16);
577 }
578
579 bool isVCSrcT_bf16() const {
580 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16);
581 }
582
583 bool isVCSrcTBF16_Lo128() const {
584 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::bf16);
585 }
586
587 bool isVCSrcTF16_Lo128() const {
588 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::f16);
589 }
590
591 bool isVCSrcFake16BF16_Lo128() const {
592 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::bf16);
593 }
594
595 bool isVCSrcFake16F16_Lo128() const {
596 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::f16);
597 }
598
599 bool isVCSrc_bf16() const {
600 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::bf16);
601 }
602
603 bool isVCSrc_f16() const {
604 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16);
605 }
606
607 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
608
609 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
610
611 bool isVSrc_b32() const {
612 return isVCSrc_f32() || isLiteralImm(type: MVT::i32) || isExpr();
613 }
614
615 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(type: MVT::i64); }
616
617 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(type: MVT::i16); }
618
619 bool isVSrcT_b16_Lo128() const {
620 return isVCSrcTB16_Lo128() || isLiteralImm(type: MVT::i16);
621 }
622
623 bool isVSrcFake16_b16_Lo128() const {
624 return isVCSrcFake16B16_Lo128() || isLiteralImm(type: MVT::i16);
625 }
626
627 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(type: MVT::i16); }
628
629 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(type: MVT::v2i16); }
630
631 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
632
633 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(type: MVT::v2f32); }
634
635 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
636
637 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(type: MVT::v2i32); }
638
639 bool isVSrc_f32() const {
640 return isVCSrc_f32() || isLiteralImm(type: MVT::f32) || isExpr();
641 }
642
643 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(type: MVT::f64); }
644
645 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(type: MVT::bf16); }
646
647 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(type: MVT::f16); }
648
649 bool isVSrcT_bf16_Lo128() const {
650 return isVCSrcTBF16_Lo128() || isLiteralImm(type: MVT::bf16);
651 }
652
653 bool isVSrcT_f16_Lo128() const {
654 return isVCSrcTF16_Lo128() || isLiteralImm(type: MVT::f16);
655 }
656
657 bool isVSrcFake16_bf16_Lo128() const {
658 return isVCSrcFake16BF16_Lo128() || isLiteralImm(type: MVT::bf16);
659 }
660
661 bool isVSrcFake16_f16_Lo128() const {
662 return isVCSrcFake16F16_Lo128() || isLiteralImm(type: MVT::f16);
663 }
664
665 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(type: MVT::bf16); }
666
667 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(type: MVT::f16); }
668
669 bool isVSrc_v2bf16() const {
670 return isVSrc_bf16() || isLiteralImm(type: MVT::v2bf16);
671 }
672
673 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(type: MVT::v2f16); }
674
675 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
676
677 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
678
679 bool isVISrcB32() const {
680 return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i32);
681 }
682
683 bool isVISrcB16() const {
684 return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i16);
685 }
686
687 bool isVISrcV2B16() const {
688 return isVISrcB16();
689 }
690
691 bool isVISrcF32() const {
692 return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f32);
693 }
694
695 bool isVISrcF16() const {
696 return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f16);
697 }
698
699 bool isVISrcV2F16() const {
700 return isVISrcF16() || isVISrcB32();
701 }
702
703 bool isVISrc_64_bf16() const {
704 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::bf16);
705 }
706
707 bool isVISrc_64_f16() const {
708 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f16);
709 }
710
711 bool isVISrc_64_b32() const {
712 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32);
713 }
714
715 bool isVISrc_64B64() const {
716 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i64);
717 }
718
719 bool isVISrc_64_f64() const {
720 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f64);
721 }
722
723 bool isVISrc_64V2FP32() const {
724 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f32);
725 }
726
727 bool isVISrc_64V2INT32() const {
728 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32);
729 }
730
731 bool isVISrc_256_b32() const {
732 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32);
733 }
734
735 bool isVISrc_256_f32() const {
736 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32);
737 }
738
739 bool isVISrc_256B64() const {
740 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i64);
741 }
742
743 bool isVISrc_256_f64() const {
744 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f64);
745 }
746
747 bool isVISrc_512_f64() const {
748 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f64);
749 }
750
751 bool isVISrc_128B16() const {
752 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i16);
753 }
754
755 bool isVISrc_128V2B16() const {
756 return isVISrc_128B16();
757 }
758
759 bool isVISrc_128_b32() const {
760 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i32);
761 }
762
763 bool isVISrc_128_f32() const {
764 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f32);
765 }
766
767 bool isVISrc_256V2FP32() const {
768 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32);
769 }
770
771 bool isVISrc_256V2INT32() const {
772 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32);
773 }
774
775 bool isVISrc_512_b32() const {
776 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i32);
777 }
778
779 bool isVISrc_512B16() const {
780 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i16);
781 }
782
783 bool isVISrc_512V2B16() const {
784 return isVISrc_512B16();
785 }
786
787 bool isVISrc_512_f32() const {
788 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f32);
789 }
790
791 bool isVISrc_512F16() const {
792 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f16);
793 }
794
795 bool isVISrc_512V2F16() const {
796 return isVISrc_512F16() || isVISrc_512_b32();
797 }
798
799 bool isVISrc_1024_b32() const {
800 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i32);
801 }
802
803 bool isVISrc_1024B16() const {
804 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i16);
805 }
806
807 bool isVISrc_1024V2B16() const {
808 return isVISrc_1024B16();
809 }
810
811 bool isVISrc_1024_f32() const {
812 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f32);
813 }
814
815 bool isVISrc_1024F16() const {
816 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f16);
817 }
818
819 bool isVISrc_1024V2F16() const {
820 return isVISrc_1024F16() || isVISrc_1024_b32();
821 }
822
823 bool isAISrcB32() const {
824 return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i32);
825 }
826
827 bool isAISrcB16() const {
828 return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i16);
829 }
830
831 bool isAISrcV2B16() const {
832 return isAISrcB16();
833 }
834
835 bool isAISrcF32() const {
836 return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f32);
837 }
838
839 bool isAISrcF16() const {
840 return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f16);
841 }
842
843 bool isAISrcV2F16() const {
844 return isAISrcF16() || isAISrcB32();
845 }
846
847 bool isAISrc_64B64() const {
848 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::i64);
849 }
850
851 bool isAISrc_64_f64() const {
852 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::f64);
853 }
854
855 bool isAISrc_128_b32() const {
856 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i32);
857 }
858
859 bool isAISrc_128B16() const {
860 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i16);
861 }
862
863 bool isAISrc_128V2B16() const {
864 return isAISrc_128B16();
865 }
866
867 bool isAISrc_128_f32() const {
868 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f32);
869 }
870
871 bool isAISrc_128F16() const {
872 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f16);
873 }
874
875 bool isAISrc_128V2F16() const {
876 return isAISrc_128F16() || isAISrc_128_b32();
877 }
878
879 bool isVISrc_128_bf16() const {
880 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::bf16);
881 }
882
883 bool isVISrc_128_f16() const {
884 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f16);
885 }
886
887 bool isVISrc_128V2F16() const {
888 return isVISrc_128_f16() || isVISrc_128_b32();
889 }
890
891 bool isAISrc_256B64() const {
892 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::i64);
893 }
894
895 bool isAISrc_256_f64() const {
896 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::f64);
897 }
898
899 bool isAISrc_512_b32() const {
900 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i32);
901 }
902
903 bool isAISrc_512B16() const {
904 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i16);
905 }
906
907 bool isAISrc_512V2B16() const {
908 return isAISrc_512B16();
909 }
910
911 bool isAISrc_512_f32() const {
912 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f32);
913 }
914
915 bool isAISrc_512F16() const {
916 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f16);
917 }
918
919 bool isAISrc_512V2F16() const {
920 return isAISrc_512F16() || isAISrc_512_b32();
921 }
922
923 bool isAISrc_1024_b32() const {
924 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i32);
925 }
926
927 bool isAISrc_1024B16() const {
928 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i16);
929 }
930
931 bool isAISrc_1024V2B16() const {
932 return isAISrc_1024B16();
933 }
934
935 bool isAISrc_1024_f32() const {
936 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f32);
937 }
938
939 bool isAISrc_1024F16() const {
940 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f16);
941 }
942
943 bool isAISrc_1024V2F16() const {
944 return isAISrc_1024F16() || isAISrc_1024_b32();
945 }
946
947 bool isKImmFP32() const {
948 return isLiteralImm(type: MVT::f32);
949 }
950
951 bool isKImmFP16() const {
952 return isLiteralImm(type: MVT::f16);
953 }
954
955 bool isKImmFP64() const { return isLiteralImm(type: MVT::f64); }
956
957 bool isMem() const override {
958 return false;
959 }
960
961 bool isExpr() const {
962 return Kind == Expression;
963 }
964
965 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
966
967 bool isSWaitCnt() const;
968 bool isDepCtr() const;
969 bool isSDelayALU() const;
970 bool isHwreg() const;
971 bool isSendMsg() const;
972 bool isWaitEvent() const;
973 bool isSplitBarrier() const;
974 bool isSwizzle() const;
975 bool isSMRDOffset8() const;
976 bool isSMEMOffset() const;
977 bool isSMRDLiteralOffset() const;
978 bool isDPP8() const;
979 bool isDPPCtrl() const;
980 bool isBLGP() const;
981 bool isGPRIdxMode() const;
982 bool isS16Imm() const;
983 bool isU16Imm() const;
984 bool isEndpgm() const;
985
986 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
987 return [this, P]() { return P(*this); };
988 }
989
990 StringRef getToken() const {
991 assert(isToken());
992 return StringRef(Tok.Data, Tok.Length);
993 }
994
995 int64_t getImm() const {
996 assert(isImm());
997 return Imm.Val;
998 }
999
1000 void setImm(int64_t Val) {
1001 assert(isImm());
1002 Imm.Val = Val;
1003 }
1004
1005 ImmTy getImmTy() const {
1006 assert(isImm());
1007 return Imm.Type;
1008 }
1009
1010 MCRegister getReg() const override {
1011 assert(isRegKind());
1012 return Reg.RegNo;
1013 }
1014
1015 SMLoc getStartLoc() const override {
1016 return StartLoc;
1017 }
1018
1019 SMLoc getEndLoc() const override {
1020 return EndLoc;
1021 }
1022
1023 SMRange getLocRange() const {
1024 return SMRange(StartLoc, EndLoc);
1025 }
1026
1027 int getMCOpIdx() const { return MCOpIdx; }
1028
1029 Modifiers getModifiers() const {
1030 assert(isRegKind() || isImmTy(ImmTyNone));
1031 return isRegKind() ? Reg.Mods : Imm.Mods;
1032 }
1033
1034 void setModifiers(Modifiers Mods) {
1035 assert(isRegKind() || isImmTy(ImmTyNone));
1036 if (isRegKind())
1037 Reg.Mods = Mods;
1038 else
1039 Imm.Mods = Mods;
1040 }
1041
1042 bool hasModifiers() const {
1043 return getModifiers().hasModifiers();
1044 }
1045
1046 bool hasFPModifiers() const {
1047 return getModifiers().hasFPModifiers();
1048 }
1049
1050 bool hasIntModifiers() const {
1051 return getModifiers().hasIntModifiers();
1052 }
1053
1054 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1055
1056 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1057
1058 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1059
1060 void addRegOperands(MCInst &Inst, unsigned N) const;
1061
1062 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1063 if (isRegKind())
1064 addRegOperands(Inst, N);
1065 else
1066 addImmOperands(Inst, N);
1067 }
1068
1069 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1070 Modifiers Mods = getModifiers();
1071 Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand()));
1072 if (isRegKind()) {
1073 addRegOperands(Inst, N);
1074 } else {
1075 addImmOperands(Inst, N, ApplyModifiers: false);
1076 }
1077 }
1078
1079 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1080 assert(!hasIntModifiers());
1081 addRegOrImmWithInputModsOperands(Inst, N);
1082 }
1083
1084 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1085 assert(!hasFPModifiers());
1086 addRegOrImmWithInputModsOperands(Inst, N);
1087 }
1088
1089 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1090 Modifiers Mods = getModifiers();
1091 Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand()));
1092 assert(isRegKind());
1093 addRegOperands(Inst, N);
1094 }
1095
1096 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1097 assert(!hasIntModifiers());
1098 addRegWithInputModsOperands(Inst, N);
1099 }
1100
1101 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1102 assert(!hasFPModifiers());
1103 addRegWithInputModsOperands(Inst, N);
1104 }
1105
1106 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1107 // clang-format off
1108 switch (Type) {
1109 case ImmTyNone: OS << "None"; break;
1110 case ImmTyGDS: OS << "GDS"; break;
1111 case ImmTyLDS: OS << "LDS"; break;
1112 case ImmTyOffen: OS << "Offen"; break;
1113 case ImmTyIdxen: OS << "Idxen"; break;
1114 case ImmTyAddr64: OS << "Addr64"; break;
1115 case ImmTyOffset: OS << "Offset"; break;
1116 case ImmTyInstOffset: OS << "InstOffset"; break;
1117 case ImmTyOffset0: OS << "Offset0"; break;
1118 case ImmTyOffset1: OS << "Offset1"; break;
1119 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1120 case ImmTyCPol: OS << "CPol"; break;
1121 case ImmTyIndexKey8bit: OS << "index_key"; break;
1122 case ImmTyIndexKey16bit: OS << "index_key"; break;
1123 case ImmTyIndexKey32bit: OS << "index_key"; break;
1124 case ImmTyTFE: OS << "TFE"; break;
1125 case ImmTyIsAsync: OS << "IsAsync"; break;
1126 case ImmTyD16: OS << "D16"; break;
1127 case ImmTyFORMAT: OS << "FORMAT"; break;
1128 case ImmTyClamp: OS << "Clamp"; break;
1129 case ImmTyOModSI: OS << "OModSI"; break;
1130 case ImmTyDPP8: OS << "DPP8"; break;
1131 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1132 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1133 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1134 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1135 case ImmTyDppFI: OS << "DppFI"; break;
1136 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1137 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1138 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1139 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1140 case ImmTyDMask: OS << "DMask"; break;
1141 case ImmTyDim: OS << "Dim"; break;
1142 case ImmTyUNorm: OS << "UNorm"; break;
1143 case ImmTyDA: OS << "DA"; break;
1144 case ImmTyR128A16: OS << "R128A16"; break;
1145 case ImmTyA16: OS << "A16"; break;
1146 case ImmTyLWE: OS << "LWE"; break;
1147 case ImmTyOff: OS << "Off"; break;
1148 case ImmTyExpTgt: OS << "ExpTgt"; break;
1149 case ImmTyExpCompr: OS << "ExpCompr"; break;
1150 case ImmTyExpVM: OS << "ExpVM"; break;
1151 case ImmTyDone: OS << "Done"; break;
1152 case ImmTyRowEn: OS << "RowEn"; break;
1153 case ImmTyHwreg: OS << "Hwreg"; break;
1154 case ImmTySendMsg: OS << "SendMsg"; break;
1155 case ImmTyWaitEvent: OS << "WaitEvent"; break;
1156 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1157 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1158 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1159 case ImmTyOpSel: OS << "OpSel"; break;
1160 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1161 case ImmTyNegLo: OS << "NegLo"; break;
1162 case ImmTyNegHi: OS << "NegHi"; break;
1163 case ImmTySwizzle: OS << "Swizzle"; break;
1164 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1165 case ImmTyHigh: OS << "High"; break;
1166 case ImmTyBLGP: OS << "BLGP"; break;
1167 case ImmTyCBSZ: OS << "CBSZ"; break;
1168 case ImmTyABID: OS << "ABID"; break;
1169 case ImmTyEndpgm: OS << "Endpgm"; break;
1170 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1171 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1172 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1173 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1174 case ImmTyBitOp3: OS << "BitOp3"; break;
1175 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1176 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1177 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1178 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1179 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1180 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1181 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1182 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1183 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1184 case ImmTyByteSel: OS << "ByteSel" ; break;
1185 }
1186 // clang-format on
1187 }
1188
1189 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1190 switch (Kind) {
1191 case Register:
1192 OS << "<register " << AMDGPUInstPrinter::getRegisterName(Reg: getReg())
1193 << " mods: " << Reg.Mods << '>';
1194 break;
1195 case Immediate:
1196 OS << '<' << getImm();
1197 if (getImmTy() != ImmTyNone) {
1198 OS << " type: "; printImmTy(OS, Type: getImmTy());
1199 }
1200 OS << " mods: " << Imm.Mods << '>';
1201 break;
1202 case Token:
1203 OS << '\'' << getToken() << '\'';
1204 break;
1205 case Expression:
1206 OS << "<expr ";
1207 MAI.printExpr(OS, *Expr);
1208 OS << '>';
1209 break;
1210 }
1211 }
1212
1213 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1214 int64_t Val, SMLoc Loc,
1215 ImmTy Type = ImmTyNone,
1216 bool IsFPImm = false) {
1217 auto Op = std::make_unique<AMDGPUOperand>(args: Immediate, args&: AsmParser);
1218 Op->Imm.Val = Val;
1219 Op->Imm.IsFPImm = IsFPImm;
1220 Op->Imm.Type = Type;
1221 Op->Imm.Mods = Modifiers();
1222 Op->StartLoc = Loc;
1223 Op->EndLoc = Loc;
1224 return Op;
1225 }
1226
1227 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1228 StringRef Str, SMLoc Loc,
1229 bool HasExplicitEncodingSize = true) {
1230 auto Res = std::make_unique<AMDGPUOperand>(args: Token, args&: AsmParser);
1231 Res->Tok.Data = Str.data();
1232 Res->Tok.Length = Str.size();
1233 Res->StartLoc = Loc;
1234 Res->EndLoc = Loc;
1235 return Res;
1236 }
1237
1238 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1239 MCRegister Reg, SMLoc S, SMLoc E) {
1240 auto Op = std::make_unique<AMDGPUOperand>(args: Register, args&: AsmParser);
1241 Op->Reg.RegNo = Reg;
1242 Op->Reg.Mods = Modifiers();
1243 Op->StartLoc = S;
1244 Op->EndLoc = E;
1245 return Op;
1246 }
1247
1248 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1249 const class MCExpr *Expr, SMLoc S) {
1250 auto Op = std::make_unique<AMDGPUOperand>(args: Expression, args&: AsmParser);
1251 Op->Expr = Expr;
1252 Op->StartLoc = S;
1253 Op->EndLoc = S;
1254 return Op;
1255 }
1256};
1257
1258raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1259 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1260 return OS;
1261}
1262
1263//===----------------------------------------------------------------------===//
1264// AsmParser
1265//===----------------------------------------------------------------------===//
1266
1267// TODO: define GET_SUBTARGET_FEATURE_NAME
1268#define GET_REGISTER_MATCHER
1269#include "AMDGPUGenAsmMatcher.inc"
1270#undef GET_REGISTER_MATCHER
1271#undef GET_SUBTARGET_FEATURE_NAME
1272
1273// Holds info related to the current kernel, e.g. count of SGPRs used.
1274// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1275// .amdgpu_hsa_kernel or at EOF.
1276class KernelScopeInfo {
1277 int SgprIndexUnusedMin = -1;
1278 int VgprIndexUnusedMin = -1;
1279 int AgprIndexUnusedMin = -1;
1280 MCContext *Ctx = nullptr;
1281 MCSubtargetInfo const *MSTI = nullptr;
1282
1283 void usesSgprAt(int i) {
1284 if (i >= SgprIndexUnusedMin) {
1285 SgprIndexUnusedMin = ++i;
1286 if (Ctx) {
1287 MCSymbol* const Sym =
1288 Ctx->getOrCreateSymbol(Name: Twine(".kernel.sgpr_count"));
1289 Sym->setVariableValue(MCConstantExpr::create(Value: SgprIndexUnusedMin, Ctx&: *Ctx));
1290 }
1291 }
1292 }
1293
1294 void usesVgprAt(int i) {
1295 if (i >= VgprIndexUnusedMin) {
1296 VgprIndexUnusedMin = ++i;
1297 if (Ctx) {
1298 MCSymbol* const Sym =
1299 Ctx->getOrCreateSymbol(Name: Twine(".kernel.vgpr_count"));
1300 int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin,
1301 ArgNumVGPR: VgprIndexUnusedMin);
1302 Sym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx));
1303 }
1304 }
1305 }
1306
1307 void usesAgprAt(int i) {
1308 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1309 if (!hasMAIInsts(STI: *MSTI))
1310 return;
1311
1312 if (i >= AgprIndexUnusedMin) {
1313 AgprIndexUnusedMin = ++i;
1314 if (Ctx) {
1315 MCSymbol* const Sym =
1316 Ctx->getOrCreateSymbol(Name: Twine(".kernel.agpr_count"));
1317 Sym->setVariableValue(MCConstantExpr::create(Value: AgprIndexUnusedMin, Ctx&: *Ctx));
1318
1319 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1320 MCSymbol* const vSym =
1321 Ctx->getOrCreateSymbol(Name: Twine(".kernel.vgpr_count"));
1322 int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin,
1323 ArgNumVGPR: VgprIndexUnusedMin);
1324 vSym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx));
1325 }
1326 }
1327 }
1328
1329public:
1330 KernelScopeInfo() = default;
1331
1332 void initialize(MCContext &Context) {
1333 Ctx = &Context;
1334 MSTI = Ctx->getSubtargetInfo();
1335
1336 usesSgprAt(i: SgprIndexUnusedMin = -1);
1337 usesVgprAt(i: VgprIndexUnusedMin = -1);
1338 if (hasMAIInsts(STI: *MSTI)) {
1339 usesAgprAt(i: AgprIndexUnusedMin = -1);
1340 }
1341 }
1342
1343 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1344 unsigned RegWidth) {
1345 switch (RegKind) {
1346 case IS_SGPR:
1347 usesSgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1);
1348 break;
1349 case IS_AGPR:
1350 usesAgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1);
1351 break;
1352 case IS_VGPR:
1353 usesVgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1);
1354 break;
1355 default:
1356 break;
1357 }
1358 }
1359};
1360
1361class AMDGPUAsmParser : public MCTargetAsmParser {
1362 MCAsmParser &Parser;
1363
1364 unsigned ForcedEncodingSize = 0;
1365 bool ForcedDPP = false;
1366 bool ForcedSDWA = false;
1367 KernelScopeInfo KernelScope;
1368 const unsigned HwMode;
1369
1370 /// @name Auto-generated Match Functions
1371 /// {
1372
1373#define GET_ASSEMBLER_HEADER
1374#include "AMDGPUGenAsmMatcher.inc"
1375
1376 /// }
1377
1378 /// Get size of register operand
1379 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1380 assert(OpNo < Desc.NumOperands);
1381 int16_t RCID = MII.getOpRegClassID(OpInfo: Desc.operands()[OpNo], HwModeId: HwMode);
1382 return getRegBitWidth(RCID) / 8;
1383 }
1384
1385private:
1386 void createConstantSymbol(StringRef Id, int64_t Val);
1387
1388 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1389 bool OutOfRangeError(SMRange Range);
1390 /// Calculate VGPR/SGPR blocks required for given target, reserved
1391 /// registers, and user-specified NextFreeXGPR values.
1392 ///
1393 /// \param Features [in] Target features, used for bug corrections.
1394 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1395 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1396 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1397 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1398 /// descriptor field, if valid.
1399 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1400 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1401 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1402 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1403 /// \param VGPRBlocks [out] Result VGPR block count.
1404 /// \param SGPRBlocks [out] Result SGPR block count.
1405 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1406 const MCExpr *FlatScrUsed, bool XNACKUsed,
1407 std::optional<bool> EnableWavefrontSize32,
1408 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1409 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1410 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1411 bool ParseDirectiveAMDGCNTarget();
1412 bool ParseDirectiveAMDHSACodeObjectVersion();
1413 bool ParseDirectiveAMDHSAKernel();
1414 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1415 bool ParseDirectiveAMDKernelCodeT();
1416 // TODO: Possibly make subtargetHasRegister const.
1417 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1418 bool ParseDirectiveAMDGPUHsaKernel();
1419
1420 bool ParseDirectiveISAVersion();
1421 bool ParseDirectiveHSAMetadata();
1422 bool ParseDirectivePALMetadataBegin();
1423 bool ParseDirectivePALMetadata();
1424 bool ParseDirectiveAMDGPULDS();
1425
1426 /// Common code to parse out a block of text (typically YAML) between start and
1427 /// end directives.
1428 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1429 const char *AssemblerDirectiveEnd,
1430 std::string &CollectString);
1431
1432 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1433 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1434 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1435 unsigned &RegNum, unsigned &RegWidth,
1436 bool RestoreOnFailure = false);
1437 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1438 unsigned &RegNum, unsigned &RegWidth,
1439 SmallVectorImpl<AsmToken> &Tokens);
1440 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1441 unsigned &RegWidth,
1442 SmallVectorImpl<AsmToken> &Tokens);
1443 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1444 unsigned &RegWidth,
1445 SmallVectorImpl<AsmToken> &Tokens);
1446 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1447 unsigned &RegWidth,
1448 SmallVectorImpl<AsmToken> &Tokens);
1449 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1450 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1451 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1452
1453 bool isRegister();
1454 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1455 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1456 void initializeGprCountSymbol(RegisterKind RegKind);
1457 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1458 unsigned RegWidth);
1459 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1460 bool IsAtomic);
1461
1462public:
1463 enum OperandMode {
1464 OperandMode_Default,
1465 OperandMode_NSA,
1466 };
1467
1468 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1469
1470 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1471 const MCInstrInfo &MII, const MCTargetOptions &Options)
1472 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
1473 HwMode(STI.getHwMode(type: MCSubtargetInfo::HwMode_RegInfo)) {
1474 MCAsmParserExtension::Initialize(Parser);
1475
1476 setAvailableFeatures(ComputeAvailableFeatures(FB: getFeatureBits()));
1477
1478 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
1479 if (ISA.Major >= 6 && isHsaAbi(STI: getSTI())) {
1480 createConstantSymbol(Id: ".amdgcn.gfx_generation_number", Val: ISA.Major);
1481 createConstantSymbol(Id: ".amdgcn.gfx_generation_minor", Val: ISA.Minor);
1482 createConstantSymbol(Id: ".amdgcn.gfx_generation_stepping", Val: ISA.Stepping);
1483 } else {
1484 createConstantSymbol(Id: ".option.machine_version_major", Val: ISA.Major);
1485 createConstantSymbol(Id: ".option.machine_version_minor", Val: ISA.Minor);
1486 createConstantSymbol(Id: ".option.machine_version_stepping", Val: ISA.Stepping);
1487 }
1488 if (ISA.Major >= 6 && isHsaAbi(STI: getSTI())) {
1489 initializeGprCountSymbol(RegKind: IS_VGPR);
1490 initializeGprCountSymbol(RegKind: IS_SGPR);
1491 } else
1492 KernelScope.initialize(Context&: getContext());
1493
1494 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1495 createConstantSymbol(Id: Symbol, Val: Code);
1496
1497 createConstantSymbol(Id: "UC_VERSION_W64_BIT", Val: 0x2000);
1498 createConstantSymbol(Id: "UC_VERSION_W32_BIT", Val: 0x4000);
1499 createConstantSymbol(Id: "UC_VERSION_MDP_BIT", Val: 0x8000);
1500 }
1501
1502 bool hasMIMG_R128() const {
1503 return AMDGPU::hasMIMG_R128(STI: getSTI());
1504 }
1505
1506 bool hasPackedD16() const {
1507 return AMDGPU::hasPackedD16(STI: getSTI());
1508 }
1509
1510 bool hasA16() const { return AMDGPU::hasA16(STI: getSTI()); }
1511
1512 bool hasG16() const { return AMDGPU::hasG16(STI: getSTI()); }
1513
1514 bool hasGDS() const { return AMDGPU::hasGDS(STI: getSTI()); }
1515
1516 bool isSI() const {
1517 return AMDGPU::isSI(STI: getSTI());
1518 }
1519
1520 bool isCI() const {
1521 return AMDGPU::isCI(STI: getSTI());
1522 }
1523
1524 bool isVI() const {
1525 return AMDGPU::isVI(STI: getSTI());
1526 }
1527
1528 bool isGFX9() const {
1529 return AMDGPU::isGFX9(STI: getSTI());
1530 }
1531
1532 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1533 bool isGFX90A() const {
1534 return AMDGPU::isGFX90A(STI: getSTI());
1535 }
1536
1537 bool isGFX940() const {
1538 return AMDGPU::isGFX940(STI: getSTI());
1539 }
1540
1541 bool isGFX9Plus() const {
1542 return AMDGPU::isGFX9Plus(STI: getSTI());
1543 }
1544
1545 bool isGFX10() const {
1546 return AMDGPU::isGFX10(STI: getSTI());
1547 }
1548
1549 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(STI: getSTI()); }
1550
1551 bool isGFX11() const {
1552 return AMDGPU::isGFX11(STI: getSTI());
1553 }
1554
1555 bool isGFX11Plus() const {
1556 return AMDGPU::isGFX11Plus(STI: getSTI());
1557 }
1558
1559 bool isGFX1170() const { return AMDGPU::isGFX1170(STI: getSTI()); }
1560
1561 bool isGFX12() const { return AMDGPU::isGFX12(STI: getSTI()); }
1562
1563 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(STI: getSTI()); }
1564
1565 bool isGFX1250() const { return AMDGPU::isGFX1250(STI: getSTI()); }
1566
1567 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(STI: getSTI()); }
1568
1569 bool isGFX13() const { return AMDGPU::isGFX13(STI: getSTI()); }
1570
1571 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(STI: getSTI()); }
1572
1573 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(STI: getSTI()); }
1574
1575 bool isGFX10_BEncoding() const {
1576 return AMDGPU::isGFX10_BEncoding(STI: getSTI());
1577 }
1578
1579 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1580
1581 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1582
1583 bool hasInv2PiInlineImm() const {
1584 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1585 }
1586
1587 bool has64BitLiterals() const {
1588 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1589 }
1590
1591 bool hasFlatOffsets() const {
1592 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1593 }
1594
1595 bool hasTrue16Insts() const {
1596 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1597 }
1598
1599 bool hasArchitectedFlatScratch() const {
1600 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1601 }
1602
1603 bool hasSGPR102_SGPR103() const {
1604 return !isVI() && !isGFX9();
1605 }
1606
1607 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1608
1609 bool hasIntClamp() const {
1610 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1611 }
1612
1613 bool hasPartialNSAEncoding() const {
1614 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1615 }
1616
1617 bool hasGloballyAddressableScratch() const {
1618 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1619 }
1620
1621 unsigned getNSAMaxSize(bool HasSampler = false) const {
1622 return AMDGPU::getNSAMaxSize(STI: getSTI(), HasSampler);
1623 }
1624
1625 unsigned getMaxNumUserSGPRs() const {
1626 return AMDGPU::getMaxNumUserSGPRs(STI: getSTI());
1627 }
1628
1629 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(STI: getSTI()); }
1630
1631 AMDGPUTargetStreamer &getTargetStreamer() {
1632 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1633 return static_cast<AMDGPUTargetStreamer &>(TS);
1634 }
1635
1636 MCContext &getContext() const {
1637 // We need this const_cast because for some reason getContext() is not const
1638 // in MCAsmParser.
1639 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1640 }
1641
1642 const MCRegisterInfo *getMRI() const {
1643 return getContext().getRegisterInfo();
1644 }
1645
1646 const MCInstrInfo *getMII() const {
1647 return &MII;
1648 }
1649
1650 // FIXME: This should not be used. Instead, should use queries derived from
1651 // getAvailableFeatures().
1652 const FeatureBitset &getFeatureBits() const {
1653 return getSTI().getFeatureBits();
1654 }
1655
1656 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1657 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1658 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1659
1660 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1661 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1662 bool isForcedDPP() const { return ForcedDPP; }
1663 bool isForcedSDWA() const { return ForcedSDWA; }
1664 ArrayRef<unsigned> getMatchedVariants() const;
1665 StringRef getMatchedVariantName() const;
1666
1667 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1668 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1669 bool RestoreOnFailure);
1670 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1671 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1672 SMLoc &EndLoc) override;
1673 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1674 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1675 unsigned Kind) override;
1676 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1677 OperandVector &Operands, MCStreamer &Out,
1678 uint64_t &ErrorInfo,
1679 bool MatchingInlineAsm) override;
1680 bool ParseDirective(AsmToken DirectiveID) override;
1681 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1682 OperandMode Mode = OperandMode_Default);
1683 StringRef parseMnemonicSuffix(StringRef Name);
1684 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1685 SMLoc NameLoc, OperandVector &Operands) override;
1686 //bool ProcessInstruction(MCInst &Inst);
1687
1688 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1689
1690 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1691
1692 ParseStatus
1693 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1694 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1695 std::function<bool(int64_t &)> ConvertResult = nullptr);
1696
1697 ParseStatus parseOperandArrayWithPrefix(
1698 const char *Prefix, OperandVector &Operands,
1699 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1700 bool (*ConvertResult)(int64_t &) = nullptr);
1701
1702 ParseStatus
1703 parseNamedBit(StringRef Name, OperandVector &Operands,
1704 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1705 bool IgnoreNegative = false);
1706 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1707 ParseStatus parseCPol(OperandVector &Operands);
1708 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1709 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1710 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1711 SMLoc &StringLoc);
1712 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1713 StringRef Name,
1714 ArrayRef<const char *> Ids,
1715 int64_t &IntVal);
1716 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1717 StringRef Name,
1718 ArrayRef<const char *> Ids,
1719 AMDGPUOperand::ImmTy Type);
1720
1721 bool isModifier();
1722 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1723 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1724 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1725 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1726 bool parseSP3NegModifier();
1727 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1728 LitModifier Lit = LitModifier::None);
1729 ParseStatus parseReg(OperandVector &Operands);
1730 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1731 LitModifier Lit = LitModifier::None);
1732 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1733 bool AllowImm = true);
1734 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1735 bool AllowImm = true);
1736 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1737 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1738 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1739 ParseStatus tryParseIndexKey(OperandVector &Operands,
1740 AMDGPUOperand::ImmTy ImmTy);
1741 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1742 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1743 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1744 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1745 AMDGPUOperand::ImmTy Type);
1746 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1747 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1748 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1749 AMDGPUOperand::ImmTy Type);
1750 ParseStatus parseMatrixAScale(OperandVector &Operands);
1751 ParseStatus parseMatrixBScale(OperandVector &Operands);
1752 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1753 AMDGPUOperand::ImmTy Type);
1754 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1755 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1756
1757 ParseStatus parseDfmtNfmt(int64_t &Format);
1758 ParseStatus parseUfmt(int64_t &Format);
1759 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1760 int64_t &Format);
1761 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1762 int64_t &Format);
1763 ParseStatus parseFORMAT(OperandVector &Operands);
1764 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1765 ParseStatus parseNumericFormat(int64_t &Format);
1766 ParseStatus parseFlatOffset(OperandVector &Operands);
1767 ParseStatus parseR128A16(OperandVector &Operands);
1768 ParseStatus parseBLGP(OperandVector &Operands);
1769 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1770 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1771
1772 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1773
1774 bool parseCnt(int64_t &IntVal);
1775 ParseStatus parseSWaitCnt(OperandVector &Operands);
1776
1777 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1778 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1779 ParseStatus parseDepCtr(OperandVector &Operands);
1780
1781 bool parseDelay(int64_t &Delay);
1782 ParseStatus parseSDelayALU(OperandVector &Operands);
1783
1784 ParseStatus parseHwreg(OperandVector &Operands);
1785
1786private:
1787 struct OperandInfoTy {
1788 SMLoc Loc;
1789 int64_t Val;
1790 bool IsSymbolic = false;
1791 bool IsDefined = false;
1792
1793 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1794 };
1795
1796 struct StructuredOpField : OperandInfoTy {
1797 StringLiteral Id;
1798 StringLiteral Desc;
1799 unsigned Width;
1800 bool IsDefined = false;
1801
1802 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1803 unsigned Width, int64_t Default)
1804 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1805 virtual ~StructuredOpField() = default;
1806
1807 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1808 Parser.Error(L: Loc, Msg: "invalid " + Desc + ": " + Err);
1809 return false;
1810 }
1811
1812 virtual bool validate(AMDGPUAsmParser &Parser) const {
1813 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1814 return Error(Parser, Err: "not supported on this GPU");
1815 if (!isUIntN(N: Width, x: Val))
1816 return Error(Parser, Err: "only " + Twine(Width) + "-bit values are legal");
1817 return true;
1818 }
1819 };
1820
1821 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1822 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1823
1824 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1825 bool validateSendMsg(const OperandInfoTy &Msg,
1826 const OperandInfoTy &Op,
1827 const OperandInfoTy &Stream);
1828
1829 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1830 OperandInfoTy &Width);
1831
1832 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1833
1834 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1835 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1836 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1837
1838 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1839 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1840 const OperandVector &Operands) const;
1841 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1842 const OperandVector &Operands) const;
1843 SMLoc getInstLoc(const OperandVector &Operands) const;
1844
1845 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1846 const OperandVector &Operands);
1847 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1848 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1849 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1850 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1851 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1852 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1853 bool AsVOPD3);
1854 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1855 bool tryVOPD(const MCInst &Inst);
1856 bool tryVOPD3(const MCInst &Inst);
1857 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1858
1859 bool validateIntClampSupported(const MCInst &Inst);
1860 bool validateMIMGAtomicDMask(const MCInst &Inst);
1861 bool validateMIMGGatherDMask(const MCInst &Inst);
1862 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1863 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1864 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1865 bool validateMIMGD16(const MCInst &Inst);
1866 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1867 bool validateTensorR128(const MCInst &Inst);
1868 bool validateMIMGMSAA(const MCInst &Inst);
1869 bool validateOpSel(const MCInst &Inst);
1870 bool validateTrue16OpSel(const MCInst &Inst);
1871 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1872 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1873 bool validateVccOperand(MCRegister Reg) const;
1874 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1875 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1876 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1877 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1878 bool validateAGPRLdSt(const MCInst &Inst) const;
1879 bool validateVGPRAlign(const MCInst &Inst) const;
1880 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1881 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1882 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1883 bool validateDivScale(const MCInst &Inst);
1884 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1885 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1886 SMLoc IDLoc);
1887 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1888 const unsigned CPol);
1889 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1890 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1891 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1892 unsigned getConstantBusLimit(unsigned Opcode) const;
1893 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1894 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1895 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1896
1897 bool isSupportedMnemo(StringRef Mnemo,
1898 const FeatureBitset &FBS);
1899 bool isSupportedMnemo(StringRef Mnemo,
1900 const FeatureBitset &FBS,
1901 ArrayRef<unsigned> Variants);
1902 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1903
1904 bool isId(const StringRef Id) const;
1905 bool isId(const AsmToken &Token, const StringRef Id) const;
1906 bool isToken(const AsmToken::TokenKind Kind) const;
1907 StringRef getId() const;
1908 bool trySkipId(const StringRef Id);
1909 bool trySkipId(const StringRef Pref, const StringRef Id);
1910 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1911 bool trySkipToken(const AsmToken::TokenKind Kind);
1912 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1913 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1914 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1915
1916 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1917 AsmToken::TokenKind getTokenKind() const;
1918 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1919 bool parseExpr(OperandVector &Operands);
1920 StringRef getTokenStr() const;
1921 AsmToken peekToken(bool ShouldSkipSpace = true);
1922 AsmToken getToken() const;
1923 SMLoc getLoc() const;
1924 void lex();
1925
1926public:
1927 void onBeginOfFile() override;
1928 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1929
1930 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1931
1932 ParseStatus parseExpTgt(OperandVector &Operands);
1933 ParseStatus parseSendMsg(OperandVector &Operands);
1934 ParseStatus parseWaitEvent(OperandVector &Operands);
1935 ParseStatus parseInterpSlot(OperandVector &Operands);
1936 ParseStatus parseInterpAttr(OperandVector &Operands);
1937 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1938 ParseStatus parseBoolReg(OperandVector &Operands);
1939
1940 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1941 const unsigned MaxVal, const Twine &ErrMsg,
1942 SMLoc &Loc);
1943 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1944 const unsigned MinVal,
1945 const unsigned MaxVal,
1946 const StringRef ErrMsg);
1947 ParseStatus parseSwizzle(OperandVector &Operands);
1948 bool parseSwizzleOffset(int64_t &Imm);
1949 bool parseSwizzleMacro(int64_t &Imm);
1950 bool parseSwizzleQuadPerm(int64_t &Imm);
1951 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1952 bool parseSwizzleBroadcast(int64_t &Imm);
1953 bool parseSwizzleSwap(int64_t &Imm);
1954 bool parseSwizzleReverse(int64_t &Imm);
1955 bool parseSwizzleFFT(int64_t &Imm);
1956 bool parseSwizzleRotate(int64_t &Imm);
1957
1958 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1959 int64_t parseGPRIdxMacro();
1960
1961 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: false); }
1962 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: true); }
1963
1964 ParseStatus parseOModSI(OperandVector &Operands);
1965
1966 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1967 OptionalImmIndexMap &OptionalIdx);
1968 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1969 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1970 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1971 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1972 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1973
1974 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1975 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1976 OptionalImmIndexMap &OptionalIdx);
1977 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1978 OptionalImmIndexMap &OptionalIdx);
1979
1980 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1981 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1982 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1983
1984 bool parseDimId(unsigned &Encoding);
1985 ParseStatus parseDim(OperandVector &Operands);
1986 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1987 ParseStatus parseDPP8(OperandVector &Operands);
1988 ParseStatus parseDPPCtrl(OperandVector &Operands);
1989 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1990 int64_t parseDPPCtrlSel(StringRef Ctrl);
1991 int64_t parseDPPCtrlPerm();
1992 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1993 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1994 cvtDPP(Inst, Operands, IsDPP8: true);
1995 }
1996 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1997 bool IsDPP8 = false);
1998 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1999 cvtVOP3DPP(Inst, Operands, IsDPP8: true);
2000 }
2001
2002 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2003 AMDGPUOperand::ImmTy Type);
2004 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2005 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2006 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2007 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2008 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2009 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2010 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2011 uint64_t BasicInstType,
2012 bool SkipDstVcc = false,
2013 bool SkipSrcVcc = false);
2014
2015 ParseStatus parseEndpgm(OperandVector &Operands);
2016
2017 ParseStatus parseVOPD(OperandVector &Operands);
2018};
2019
2020} // end anonymous namespace
2021
2022// May be called with integer type with equivalent bitwidth.
2023static const fltSemantics *getFltSemantics(unsigned Size) {
2024 switch (Size) {
2025 case 4:
2026 return &APFloat::IEEEsingle();
2027 case 8:
2028 return &APFloat::IEEEdouble();
2029 case 2:
2030 return &APFloat::IEEEhalf();
2031 default:
2032 llvm_unreachable("unsupported fp type");
2033 }
2034}
2035
2036static const fltSemantics *getFltSemantics(MVT VT) {
2037 return getFltSemantics(Size: VT.getSizeInBits() / 8);
2038}
2039
2040static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
2041 switch (OperandType) {
2042 // When floating-point immediate is used as operand of type i16, the 32-bit
2043 // representation of the constant truncated to the 16 LSBs should be used.
2044 case AMDGPU::OPERAND_REG_IMM_INT16:
2045 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2046 case AMDGPU::OPERAND_REG_IMM_INT32:
2047 case AMDGPU::OPERAND_REG_IMM_FP32:
2048 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2049 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2050 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2051 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2052 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2053 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2054 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2055 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2056 case AMDGPU::OPERAND_KIMM32:
2057 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2058 return &APFloat::IEEEsingle();
2059 case AMDGPU::OPERAND_REG_IMM_INT64:
2060 case AMDGPU::OPERAND_REG_IMM_FP64:
2061 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2062 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2063 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2064 case AMDGPU::OPERAND_KIMM64:
2065 return &APFloat::IEEEdouble();
2066 case AMDGPU::OPERAND_REG_IMM_FP16:
2067 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2068 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2069 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2070 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2071 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2072 case AMDGPU::OPERAND_KIMM16:
2073 return &APFloat::IEEEhalf();
2074 case AMDGPU::OPERAND_REG_IMM_BF16:
2075 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2076 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2077 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2078 return &APFloat::BFloat();
2079 default:
2080 llvm_unreachable("unsupported fp type");
2081 }
2082}
2083
2084//===----------------------------------------------------------------------===//
2085// Operand
2086//===----------------------------------------------------------------------===//
2087
2088static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2089 bool Lost;
2090
2091 // Convert literal to single precision
2092 APFloat::opStatus Status = FPLiteral.convert(ToSemantics: *getFltSemantics(VT),
2093 RM: APFloat::rmNearestTiesToEven,
2094 losesInfo: &Lost);
2095 // We allow precision lost but not overflow or underflow
2096 if (Status != APFloat::opOK &&
2097 Lost &&
2098 ((Status & APFloat::opOverflow) != 0 ||
2099 (Status & APFloat::opUnderflow) != 0)) {
2100 return false;
2101 }
2102
2103 return true;
2104}
2105
2106static bool isSafeTruncation(int64_t Val, unsigned Size) {
2107 return isUIntN(N: Size, x: Val) || isIntN(N: Size, x: Val);
2108}
2109
2110static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2111 if (VT.getScalarType() == MVT::i16)
2112 return isInlinableLiteral32(Literal: Val, HasInv2Pi);
2113
2114 if (VT.getScalarType() == MVT::f16)
2115 return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi);
2116
2117 assert(VT.getScalarType() == MVT::bf16);
2118
2119 return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi);
2120}
2121
2122bool AMDGPUOperand::isInlinableImm(MVT type) const {
2123
2124 // This is a hack to enable named inline values like
2125 // shared_base with both 32-bit and 64-bit operands.
2126 // Note that these values are defined as
2127 // 32-bit operands only.
2128 if (isInlineValue()) {
2129 return true;
2130 }
2131
2132 if (!isImmTy(ImmT: ImmTyNone)) {
2133 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2134 return false;
2135 }
2136
2137 if (getModifiers().Lit != LitModifier::None)
2138 return false;
2139
2140 // TODO: We should avoid using host float here. It would be better to
2141 // check the float bit values which is what a few other places do.
2142 // We've had bot failures before due to weird NaN support on mips hosts.
2143
2144 APInt Literal(64, Imm.Val);
2145
2146 if (Imm.IsFPImm) { // We got fp literal token
2147 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2148 return AMDGPU::isInlinableLiteral64(Literal: Imm.Val,
2149 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2150 }
2151
2152 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2153 if (!canLosslesslyConvertToFPType(FPLiteral, VT: type))
2154 return false;
2155
2156 if (type.getScalarSizeInBits() == 16) {
2157 bool Lost = false;
2158 switch (type.getScalarType().SimpleTy) {
2159 default:
2160 llvm_unreachable("unknown 16-bit type");
2161 case MVT::bf16:
2162 FPLiteral.convert(ToSemantics: APFloatBase::BFloat(), RM: APFloat::rmNearestTiesToEven,
2163 losesInfo: &Lost);
2164 break;
2165 case MVT::f16:
2166 FPLiteral.convert(ToSemantics: APFloatBase::IEEEhalf(), RM: APFloat::rmNearestTiesToEven,
2167 losesInfo: &Lost);
2168 break;
2169 case MVT::i16:
2170 FPLiteral.convert(ToSemantics: APFloatBase::IEEEsingle(),
2171 RM: APFloat::rmNearestTiesToEven, losesInfo: &Lost);
2172 break;
2173 }
2174 // We need to use 32-bit representation here because when a floating-point
2175 // inline constant is used as an i16 operand, its 32-bit representation
2176 // representation will be used. We will need the 32-bit value to check if
2177 // it is FP inline constant.
2178 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2179 return isInlineableLiteralOp16(Val: ImmVal, VT: type,
2180 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2181 }
2182
2183 // Check if single precision literal is inlinable
2184 return AMDGPU::isInlinableLiteral32(
2185 Literal: static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2186 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2187 }
2188
2189 // We got int literal token.
2190 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2191 return AMDGPU::isInlinableLiteral64(Literal: Imm.Val,
2192 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2193 }
2194
2195 if (!isSafeTruncation(Val: Imm.Val, Size: type.getScalarSizeInBits())) {
2196 return false;
2197 }
2198
2199 if (type.getScalarSizeInBits() == 16) {
2200 return isInlineableLiteralOp16(
2201 Val: static_cast<int16_t>(Literal.getLoBits(numBits: 16).getSExtValue()),
2202 VT: type, HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2203 }
2204
2205 return AMDGPU::isInlinableLiteral32(
2206 Literal: static_cast<int32_t>(Literal.getLoBits(numBits: 32).getZExtValue()),
2207 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2208}
2209
2210bool AMDGPUOperand::isLiteralImm(MVT type) const {
2211 // Check that this immediate can be added as literal
2212 if (!isImmTy(ImmT: ImmTyNone)) {
2213 return false;
2214 }
2215
2216 bool Allow64Bit =
2217 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2218
2219 if (!Imm.IsFPImm) {
2220 // We got int literal token.
2221
2222 if (type == MVT::f64 && hasFPModifiers()) {
2223 // Cannot apply fp modifiers to int literals preserving the same semantics
2224 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2225 // disable these cases.
2226 return false;
2227 }
2228
2229 unsigned Size = type.getSizeInBits();
2230 if (Size == 64) {
2231 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Val: Imm.Val, IsFP64: false))
2232 return true;
2233 Size = 32;
2234 }
2235
2236 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2237 // types.
2238 return isSafeTruncation(Val: Imm.Val, Size);
2239 }
2240
2241 // We got fp literal token
2242 if (type == MVT::f64) { // Expected 64-bit fp operand
2243 // We would set low 64-bits of literal to zeroes but we accept this literals
2244 return true;
2245 }
2246
2247 if (type == MVT::i64) { // Expected 64-bit int operand
2248 // We don't allow fp literals in 64-bit integer instructions. It is
2249 // unclear how we should encode them.
2250 return false;
2251 }
2252
2253 // We allow fp literals with f16x2 operands assuming that the specified
2254 // literal goes into the lower half and the upper half is zero. We also
2255 // require that the literal may be losslessly converted to f16.
2256 //
2257 // For i16x2 operands, we assume that the specified literal is encoded as a
2258 // single-precision float. This is pretty odd, but it matches SP3 and what
2259 // happens in hardware.
2260 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2261 : (type == MVT::v2i16) ? MVT::f32
2262 : (type == MVT::v2f32) ? MVT::f32
2263 : type;
2264
2265 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2266 return canLosslesslyConvertToFPType(FPLiteral, VT: ExpectedType);
2267}
2268
2269bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2270 return isRegKind() && AsmParser->getMRI()->getRegClass(i: RCID).contains(Reg: getReg());
2271}
2272
2273bool AMDGPUOperand::isVRegWithInputMods() const {
2274 return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) ||
2275 // GFX90A allows DPP on 64-bit operands.
2276 (isRegClass(RCID: AMDGPU::VReg_64RegClassID) &&
2277 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2278}
2279
2280template <bool IsFake16>
2281bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2282 return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2283 : AMDGPU::VGPR_16_Lo128RegClassID);
2284}
2285
2286template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2287 return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32RegClassID
2288 : AMDGPU::VGPR_16RegClassID);
2289}
2290
2291bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2292 if (AsmParser->isVI())
2293 return isVReg32();
2294 if (AsmParser->isGFX9Plus())
2295 return isRegClass(RCID: AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2296 return false;
2297}
2298
2299bool AMDGPUOperand::isSDWAFP16Operand() const {
2300 return isSDWAOperand(type: MVT::f16);
2301}
2302
2303bool AMDGPUOperand::isSDWAFP32Operand() const {
2304 return isSDWAOperand(type: MVT::f32);
2305}
2306
2307bool AMDGPUOperand::isSDWAInt16Operand() const {
2308 return isSDWAOperand(type: MVT::i16);
2309}
2310
2311bool AMDGPUOperand::isSDWAInt32Operand() const {
2312 return isSDWAOperand(type: MVT::i32);
2313}
2314
2315bool AMDGPUOperand::isBoolReg() const {
2316 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2317 (AsmParser->isWave32() && isSCSrc_b32()));
2318}
2319
2320uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2321{
2322 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2323 assert(Size == 2 || Size == 4 || Size == 8);
2324
2325 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2326
2327 if (Imm.Mods.Abs) {
2328 Val &= ~FpSignMask;
2329 }
2330 if (Imm.Mods.Neg) {
2331 Val ^= FpSignMask;
2332 }
2333
2334 return Val;
2335}
2336
2337void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2338 MCOpIdx = Inst.getNumOperands();
2339
2340 if (isExpr()) {
2341 Inst.addOperand(Op: MCOperand::createExpr(Val: Expr));
2342 return;
2343 }
2344
2345 if (AMDGPU::isSISrcOperand(Desc: AsmParser->getMII()->get(Opcode: Inst.getOpcode()),
2346 OpNo: Inst.getNumOperands())) {
2347 addLiteralImmOperand(Inst, Val: Imm.Val,
2348 ApplyModifiers: ApplyModifiers &
2349 isImmTy(ImmT: ImmTyNone) && Imm.Mods.hasFPModifiers());
2350 } else {
2351 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2352 Inst.addOperand(Op: MCOperand::createImm(Val: Imm.Val));
2353 }
2354}
2355
2356void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2357 const auto& InstDesc = AsmParser->getMII()->get(Opcode: Inst.getOpcode());
2358 auto OpNum = Inst.getNumOperands();
2359 // Check that this operand accepts literals
2360 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2361
2362 if (ApplyModifiers) {
2363 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2364 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(Desc: InstDesc, OpNo: OpNum);
2365 Val = applyInputFPModifiers(Val, Size);
2366 }
2367
2368 APInt Literal(64, Val);
2369 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2370
2371 bool CanUse64BitLiterals =
2372 AsmParser->has64BitLiterals() &&
2373 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2374 LitModifier Lit = getModifiers().Lit;
2375 MCContext &Ctx = AsmParser->getContext();
2376
2377 if (Imm.IsFPImm) { // We got fp literal token
2378 switch (OpTy) {
2379 case AMDGPU::OPERAND_REG_IMM_INT64:
2380 case AMDGPU::OPERAND_REG_IMM_FP64:
2381 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2382 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2383 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2384 if (Lit == LitModifier::None &&
2385 AMDGPU::isInlinableLiteral64(Literal: Literal.getZExtValue(),
2386 HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2387 Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getZExtValue()));
2388 return;
2389 }
2390
2391 // Non-inlineable
2392 if (AMDGPU::isSISrcFPOperand(Desc: InstDesc,
2393 OpNo: OpNum)) { // Expected 64-bit fp operand
2394 bool HasMandatoryLiteral =
2395 AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::imm);
2396 // For fp operands we check if low 32 bits are zeros
2397 if (Literal.getLoBits(numBits: 32) != 0 &&
2398 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2399 !HasMandatoryLiteral) {
2400 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2401 L: Inst.getLoc(),
2402 Msg: "Can't encode literal as exact 64-bit floating-point operand. "
2403 "Low 32-bits will be set to zero");
2404 Val &= 0xffffffff00000000u;
2405 }
2406
2407 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2408 OpTy == AMDGPU::OPERAND_REG_INLINE_C_FP64 ||
2409 OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64)) {
2410 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2411 (isInt<32>(x: Val) || isUInt<32>(x: Val))) {
2412 // The floating-point operand will be verbalized as an
2413 // integer one. If that integer happens to fit 32 bits, on
2414 // re-assembling it will be intepreted as the high half of
2415 // the actual value, so we have to wrap it into lit64().
2416 Lit = LitModifier::Lit64;
2417 } else if (Lit == LitModifier::Lit) {
2418 // For FP64 operands lit() specifies the high half of the value.
2419 Val = Hi_32(Value: Val);
2420 }
2421 }
2422 break;
2423 }
2424
2425 // We don't allow fp literals in 64-bit integer instructions. It is
2426 // unclear how we should encode them. This case should be checked earlier
2427 // in predicate methods (isLiteralImm())
2428 llvm_unreachable("fp literal in 64-bit integer instruction.");
2429
2430 case AMDGPU::OPERAND_KIMM64:
2431 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2432 (isInt<32>(x: Val) || isUInt<32>(x: Val)))
2433 Lit = LitModifier::Lit64;
2434 break;
2435
2436 case AMDGPU::OPERAND_REG_IMM_BF16:
2437 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2438 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2439 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2440 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2441 Literal == 0x3fc45f306725feed) {
2442 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2443 // loss of precision. The constant represents ideomatic fp32 value of
2444 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2445 // bits. Prevent rounding below.
2446 Inst.addOperand(Op: MCOperand::createImm(Val: 0x3e22));
2447 return;
2448 }
2449 [[fallthrough]];
2450
2451 case AMDGPU::OPERAND_REG_IMM_INT32:
2452 case AMDGPU::OPERAND_REG_IMM_FP32:
2453 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2454 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2455 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2456 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2457 case AMDGPU::OPERAND_REG_IMM_INT16:
2458 case AMDGPU::OPERAND_REG_IMM_FP16:
2459 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2460 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2461 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2462 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2463 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2464 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2465 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2466 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2467 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2468 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2469 case AMDGPU::OPERAND_KIMM32:
2470 case AMDGPU::OPERAND_KIMM16:
2471 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2472 bool lost;
2473 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2474 // Convert literal to single precision
2475 FPLiteral.convert(ToSemantics: *getOpFltSemantics(OperandType: OpTy),
2476 RM: APFloat::rmNearestTiesToEven, losesInfo: &lost);
2477 // We allow precision lost but not overflow or underflow. This should be
2478 // checked earlier in isLiteralImm()
2479
2480 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2481 break;
2482 }
2483 default:
2484 llvm_unreachable("invalid operand size");
2485 }
2486
2487 if (Lit != LitModifier::None) {
2488 Inst.addOperand(
2489 Op: MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(Lit, Value: Val, Ctx)));
2490 } else {
2491 Inst.addOperand(Op: MCOperand::createImm(Val));
2492 }
2493 return;
2494 }
2495
2496 // We got int literal token.
2497 // Only sign extend inline immediates.
2498 switch (OpTy) {
2499 case AMDGPU::OPERAND_REG_IMM_INT32:
2500 case AMDGPU::OPERAND_REG_IMM_FP32:
2501 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2502 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2503 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2504 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2505 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2506 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2507 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2508 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2509 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2510 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2511 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2512 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2513 break;
2514
2515 case AMDGPU::OPERAND_REG_IMM_INT64:
2516 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2517 if (Lit == LitModifier::None &&
2518 AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2519 Inst.addOperand(Op: MCOperand::createImm(Val));
2520 return;
2521 }
2522
2523 // When the 32 MSBs are not zero (effectively means it can't be safely
2524 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2525 // the lit modifier is explicitly used, we need to truncate it to the 32
2526 // LSBs.
2527 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2528 Val = Lo_32(Value: Val);
2529 break;
2530
2531 case AMDGPU::OPERAND_REG_IMM_FP64:
2532 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2533 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2534 if (Lit == LitModifier::None &&
2535 AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2536 Inst.addOperand(Op: MCOperand::createImm(Val));
2537 return;
2538 }
2539
2540 // If the target doesn't support 64-bit literals, we need to use the
2541 // constant as the high 32 MSBs of a double-precision floating point value.
2542 if (!AsmParser->has64BitLiterals()) {
2543 Val = static_cast<uint64_t>(Val) << 32;
2544 } else {
2545 // Now the target does support 64-bit literals, there are two cases
2546 // where we still want to use src_literal encoding:
2547 // 1) explicitly forced by using lit modifier;
2548 // 2) the value is a valid 32-bit representation (signed or unsigned),
2549 // meanwhile not forced by lit64 modifier.
2550 if (Lit == LitModifier::Lit ||
2551 (Lit != LitModifier::Lit64 && (isInt<32>(x: Val) || isUInt<32>(x: Val))))
2552 Val = static_cast<uint64_t>(Val) << 32;
2553 }
2554
2555 // For FP64 operands lit() specifies the high half of the value.
2556 if (Lit == LitModifier::Lit)
2557 Val = Hi_32(Value: Val);
2558 break;
2559
2560 case AMDGPU::OPERAND_REG_IMM_INT16:
2561 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2562 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2563 case AMDGPU::OPERAND_REG_IMM_FP16:
2564 case AMDGPU::OPERAND_REG_IMM_BF16:
2565 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2566 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2567 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2568 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2569 case AMDGPU::OPERAND_KIMM32:
2570 case AMDGPU::OPERAND_KIMM16:
2571 break;
2572
2573 case AMDGPU::OPERAND_KIMM64:
2574 if ((isInt<32>(x: Val) || isUInt<32>(x: Val)) && Lit != LitModifier::Lit64)
2575 Val <<= 32;
2576 break;
2577
2578 default:
2579 llvm_unreachable("invalid operand type");
2580 }
2581
2582 if (Lit != LitModifier::None) {
2583 Inst.addOperand(
2584 Op: MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(Lit, Value: Val, Ctx)));
2585 } else {
2586 Inst.addOperand(Op: MCOperand::createImm(Val));
2587 }
2588}
2589
2590void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2591 MCOpIdx = Inst.getNumOperands();
2592 Inst.addOperand(Op: MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: getReg(), STI: AsmParser->getSTI())));
2593}
2594
2595bool AMDGPUOperand::isInlineValue() const {
2596 return isRegKind() && ::isInlineValue(Reg: getReg());
2597}
2598
2599//===----------------------------------------------------------------------===//
2600// AsmParser
2601//===----------------------------------------------------------------------===//
2602
2603void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2604 // TODO: make those pre-defined variables read-only.
2605 // Currently there is none suitable machinery in the core llvm-mc for this.
2606 // MCSymbol::isRedefinable is intended for another purpose, and
2607 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2608 MCContext &Ctx = getContext();
2609 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id);
2610 Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx));
2611}
2612
2613static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2614 if (Is == IS_VGPR) {
2615 switch (RegWidth) {
2616 default: return -1;
2617 case 32:
2618 return AMDGPU::VGPR_32RegClassID;
2619 case 64:
2620 return AMDGPU::VReg_64RegClassID;
2621 case 96:
2622 return AMDGPU::VReg_96RegClassID;
2623 case 128:
2624 return AMDGPU::VReg_128RegClassID;
2625 case 160:
2626 return AMDGPU::VReg_160RegClassID;
2627 case 192:
2628 return AMDGPU::VReg_192RegClassID;
2629 case 224:
2630 return AMDGPU::VReg_224RegClassID;
2631 case 256:
2632 return AMDGPU::VReg_256RegClassID;
2633 case 288:
2634 return AMDGPU::VReg_288RegClassID;
2635 case 320:
2636 return AMDGPU::VReg_320RegClassID;
2637 case 352:
2638 return AMDGPU::VReg_352RegClassID;
2639 case 384:
2640 return AMDGPU::VReg_384RegClassID;
2641 case 512:
2642 return AMDGPU::VReg_512RegClassID;
2643 case 1024:
2644 return AMDGPU::VReg_1024RegClassID;
2645 }
2646 } else if (Is == IS_TTMP) {
2647 switch (RegWidth) {
2648 default: return -1;
2649 case 32:
2650 return AMDGPU::TTMP_32RegClassID;
2651 case 64:
2652 return AMDGPU::TTMP_64RegClassID;
2653 case 128:
2654 return AMDGPU::TTMP_128RegClassID;
2655 case 256:
2656 return AMDGPU::TTMP_256RegClassID;
2657 case 512:
2658 return AMDGPU::TTMP_512RegClassID;
2659 }
2660 } else if (Is == IS_SGPR) {
2661 switch (RegWidth) {
2662 default: return -1;
2663 case 32:
2664 return AMDGPU::SGPR_32RegClassID;
2665 case 64:
2666 return AMDGPU::SGPR_64RegClassID;
2667 case 96:
2668 return AMDGPU::SGPR_96RegClassID;
2669 case 128:
2670 return AMDGPU::SGPR_128RegClassID;
2671 case 160:
2672 return AMDGPU::SGPR_160RegClassID;
2673 case 192:
2674 return AMDGPU::SGPR_192RegClassID;
2675 case 224:
2676 return AMDGPU::SGPR_224RegClassID;
2677 case 256:
2678 return AMDGPU::SGPR_256RegClassID;
2679 case 288:
2680 return AMDGPU::SGPR_288RegClassID;
2681 case 320:
2682 return AMDGPU::SGPR_320RegClassID;
2683 case 352:
2684 return AMDGPU::SGPR_352RegClassID;
2685 case 384:
2686 return AMDGPU::SGPR_384RegClassID;
2687 case 512:
2688 return AMDGPU::SGPR_512RegClassID;
2689 }
2690 } else if (Is == IS_AGPR) {
2691 switch (RegWidth) {
2692 default: return -1;
2693 case 32:
2694 return AMDGPU::AGPR_32RegClassID;
2695 case 64:
2696 return AMDGPU::AReg_64RegClassID;
2697 case 96:
2698 return AMDGPU::AReg_96RegClassID;
2699 case 128:
2700 return AMDGPU::AReg_128RegClassID;
2701 case 160:
2702 return AMDGPU::AReg_160RegClassID;
2703 case 192:
2704 return AMDGPU::AReg_192RegClassID;
2705 case 224:
2706 return AMDGPU::AReg_224RegClassID;
2707 case 256:
2708 return AMDGPU::AReg_256RegClassID;
2709 case 288:
2710 return AMDGPU::AReg_288RegClassID;
2711 case 320:
2712 return AMDGPU::AReg_320RegClassID;
2713 case 352:
2714 return AMDGPU::AReg_352RegClassID;
2715 case 384:
2716 return AMDGPU::AReg_384RegClassID;
2717 case 512:
2718 return AMDGPU::AReg_512RegClassID;
2719 case 1024:
2720 return AMDGPU::AReg_1024RegClassID;
2721 }
2722 }
2723 return -1;
2724}
2725
2726static MCRegister getSpecialRegForName(StringRef RegName) {
2727 return StringSwitch<unsigned>(RegName)
2728 .Case(S: "exec", Value: AMDGPU::EXEC)
2729 .Case(S: "vcc", Value: AMDGPU::VCC)
2730 .Case(S: "flat_scratch", Value: AMDGPU::FLAT_SCR)
2731 .Case(S: "xnack_mask", Value: AMDGPU::XNACK_MASK)
2732 .Case(S: "shared_base", Value: AMDGPU::SRC_SHARED_BASE)
2733 .Case(S: "src_shared_base", Value: AMDGPU::SRC_SHARED_BASE)
2734 .Case(S: "shared_limit", Value: AMDGPU::SRC_SHARED_LIMIT)
2735 .Case(S: "src_shared_limit", Value: AMDGPU::SRC_SHARED_LIMIT)
2736 .Case(S: "private_base", Value: AMDGPU::SRC_PRIVATE_BASE)
2737 .Case(S: "src_private_base", Value: AMDGPU::SRC_PRIVATE_BASE)
2738 .Case(S: "private_limit", Value: AMDGPU::SRC_PRIVATE_LIMIT)
2739 .Case(S: "src_private_limit", Value: AMDGPU::SRC_PRIVATE_LIMIT)
2740 .Case(S: "src_flat_scratch_base_lo", Value: AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2741 .Case(S: "src_flat_scratch_base_hi", Value: AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2742 .Case(S: "pops_exiting_wave_id", Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2743 .Case(S: "src_pops_exiting_wave_id", Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2744 .Case(S: "lds_direct", Value: AMDGPU::LDS_DIRECT)
2745 .Case(S: "src_lds_direct", Value: AMDGPU::LDS_DIRECT)
2746 .Case(S: "m0", Value: AMDGPU::M0)
2747 .Case(S: "vccz", Value: AMDGPU::SRC_VCCZ)
2748 .Case(S: "src_vccz", Value: AMDGPU::SRC_VCCZ)
2749 .Case(S: "execz", Value: AMDGPU::SRC_EXECZ)
2750 .Case(S: "src_execz", Value: AMDGPU::SRC_EXECZ)
2751 .Case(S: "scc", Value: AMDGPU::SRC_SCC)
2752 .Case(S: "src_scc", Value: AMDGPU::SRC_SCC)
2753 .Case(S: "tba", Value: AMDGPU::TBA)
2754 .Case(S: "tma", Value: AMDGPU::TMA)
2755 .Case(S: "flat_scratch_lo", Value: AMDGPU::FLAT_SCR_LO)
2756 .Case(S: "flat_scratch_hi", Value: AMDGPU::FLAT_SCR_HI)
2757 .Case(S: "xnack_mask_lo", Value: AMDGPU::XNACK_MASK_LO)
2758 .Case(S: "xnack_mask_hi", Value: AMDGPU::XNACK_MASK_HI)
2759 .Case(S: "vcc_lo", Value: AMDGPU::VCC_LO)
2760 .Case(S: "vcc_hi", Value: AMDGPU::VCC_HI)
2761 .Case(S: "exec_lo", Value: AMDGPU::EXEC_LO)
2762 .Case(S: "exec_hi", Value: AMDGPU::EXEC_HI)
2763 .Case(S: "tma_lo", Value: AMDGPU::TMA_LO)
2764 .Case(S: "tma_hi", Value: AMDGPU::TMA_HI)
2765 .Case(S: "tba_lo", Value: AMDGPU::TBA_LO)
2766 .Case(S: "tba_hi", Value: AMDGPU::TBA_HI)
2767 .Case(S: "pc", Value: AMDGPU::PC_REG)
2768 .Case(S: "null", Value: AMDGPU::SGPR_NULL)
2769 .Default(Value: AMDGPU::NoRegister);
2770}
2771
2772bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2773 SMLoc &EndLoc, bool RestoreOnFailure) {
2774 auto R = parseRegister();
2775 if (!R) return true;
2776 assert(R->isReg());
2777 RegNo = R->getReg();
2778 StartLoc = R->getStartLoc();
2779 EndLoc = R->getEndLoc();
2780 return false;
2781}
2782
2783bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2784 SMLoc &EndLoc) {
2785 return ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2786}
2787
2788ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2789 SMLoc &EndLoc) {
2790 bool Result = ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2791 bool PendingErrors = getParser().hasPendingError();
2792 getParser().clearPendingErrors();
2793 if (PendingErrors)
2794 return ParseStatus::Failure;
2795 if (Result)
2796 return ParseStatus::NoMatch;
2797 return ParseStatus::Success;
2798}
2799
2800bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2801 RegisterKind RegKind,
2802 MCRegister Reg1, SMLoc Loc) {
2803 switch (RegKind) {
2804 case IS_SPECIAL:
2805 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2806 Reg = AMDGPU::EXEC;
2807 RegWidth = 64;
2808 return true;
2809 }
2810 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2811 Reg = AMDGPU::FLAT_SCR;
2812 RegWidth = 64;
2813 return true;
2814 }
2815 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2816 Reg = AMDGPU::XNACK_MASK;
2817 RegWidth = 64;
2818 return true;
2819 }
2820 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2821 Reg = AMDGPU::VCC;
2822 RegWidth = 64;
2823 return true;
2824 }
2825 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2826 Reg = AMDGPU::TBA;
2827 RegWidth = 64;
2828 return true;
2829 }
2830 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2831 Reg = AMDGPU::TMA;
2832 RegWidth = 64;
2833 return true;
2834 }
2835 Error(L: Loc, Msg: "register does not fit in the list");
2836 return false;
2837 case IS_VGPR:
2838 case IS_SGPR:
2839 case IS_AGPR:
2840 case IS_TTMP:
2841 if (Reg1 != Reg + RegWidth / 32) {
2842 Error(L: Loc, Msg: "registers in a list must have consecutive indices");
2843 return false;
2844 }
2845 RegWidth += 32;
2846 return true;
2847 default:
2848 llvm_unreachable("unexpected register kind");
2849 }
2850}
2851
2852struct RegInfo {
2853 StringLiteral Name;
2854 RegisterKind Kind;
2855};
2856
2857static constexpr RegInfo RegularRegisters[] = {
2858 {.Name: {"v"}, .Kind: IS_VGPR},
2859 {.Name: {"s"}, .Kind: IS_SGPR},
2860 {.Name: {"ttmp"}, .Kind: IS_TTMP},
2861 {.Name: {"acc"}, .Kind: IS_AGPR},
2862 {.Name: {"a"}, .Kind: IS_AGPR},
2863};
2864
2865static bool isRegularReg(RegisterKind Kind) {
2866 return Kind == IS_VGPR ||
2867 Kind == IS_SGPR ||
2868 Kind == IS_TTMP ||
2869 Kind == IS_AGPR;
2870}
2871
2872static const RegInfo* getRegularRegInfo(StringRef Str) {
2873 for (const RegInfo &Reg : RegularRegisters)
2874 if (Str.starts_with(Prefix: Reg.Name))
2875 return &Reg;
2876 return nullptr;
2877}
2878
2879static bool getRegNum(StringRef Str, unsigned& Num) {
2880 return !Str.getAsInteger(Radix: 10, Result&: Num);
2881}
2882
2883bool
2884AMDGPUAsmParser::isRegister(const AsmToken &Token,
2885 const AsmToken &NextToken) const {
2886
2887 // A list of consecutive registers: [s0,s1,s2,s3]
2888 if (Token.is(K: AsmToken::LBrac))
2889 return true;
2890
2891 if (!Token.is(K: AsmToken::Identifier))
2892 return false;
2893
2894 // A single register like s0 or a range of registers like s[0:1]
2895
2896 StringRef Str = Token.getString();
2897 const RegInfo *Reg = getRegularRegInfo(Str);
2898 if (Reg) {
2899 StringRef RegName = Reg->Name;
2900 StringRef RegSuffix = Str.substr(Start: RegName.size());
2901 if (!RegSuffix.empty()) {
2902 RegSuffix.consume_back(Suffix: ".l");
2903 RegSuffix.consume_back(Suffix: ".h");
2904 unsigned Num;
2905 // A single register with an index: rXX
2906 if (getRegNum(Str: RegSuffix, Num))
2907 return true;
2908 } else {
2909 // A range of registers: r[XX:YY].
2910 if (NextToken.is(K: AsmToken::LBrac))
2911 return true;
2912 }
2913 }
2914
2915 return getSpecialRegForName(RegName: Str).isValid();
2916}
2917
2918bool
2919AMDGPUAsmParser::isRegister()
2920{
2921 return isRegister(Token: getToken(), NextToken: peekToken());
2922}
2923
2924MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2925 unsigned SubReg, unsigned RegWidth,
2926 SMLoc Loc) {
2927 assert(isRegularReg(RegKind));
2928
2929 unsigned AlignSize = 1;
2930 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2931 // SGPR and TTMP registers must be aligned.
2932 // Max required alignment is 4 dwords.
2933 AlignSize = std::min(a: llvm::bit_ceil(Value: RegWidth / 32), b: 4u);
2934 }
2935
2936 if (RegNum % AlignSize != 0) {
2937 Error(L: Loc, Msg: "invalid register alignment");
2938 return MCRegister();
2939 }
2940
2941 unsigned RegIdx = RegNum / AlignSize;
2942 int RCID = getRegClass(Is: RegKind, RegWidth);
2943 if (RCID == -1) {
2944 Error(L: Loc, Msg: "invalid or unsupported register size");
2945 return MCRegister();
2946 }
2947
2948 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2949 const MCRegisterClass RC = TRI->getRegClass(i: RCID);
2950 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2951 Error(L: Loc, Msg: "register index is out of range");
2952 return AMDGPU::NoRegister;
2953 }
2954
2955 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2956 Error(L: Loc, Msg: "register index is out of range");
2957 return MCRegister();
2958 }
2959
2960 MCRegister Reg = RC.getRegister(i: RegIdx);
2961
2962 if (SubReg) {
2963 Reg = TRI->getSubReg(Reg, Idx: SubReg);
2964
2965 // Currently all regular registers have their .l and .h subregisters, so
2966 // we should never need to generate an error here.
2967 assert(Reg && "Invalid subregister!");
2968 }
2969
2970 return Reg;
2971}
2972
2973bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2974 unsigned &SubReg) {
2975 int64_t RegLo, RegHi;
2976 if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "missing register index"))
2977 return false;
2978
2979 SMLoc FirstIdxLoc = getLoc();
2980 SMLoc SecondIdxLoc;
2981
2982 if (!parseExpr(Imm&: RegLo))
2983 return false;
2984
2985 if (trySkipToken(Kind: AsmToken::Colon)) {
2986 SecondIdxLoc = getLoc();
2987 if (!parseExpr(Imm&: RegHi))
2988 return false;
2989 } else {
2990 RegHi = RegLo;
2991 }
2992
2993 if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
2994 return false;
2995
2996 if (!isUInt<32>(x: RegLo)) {
2997 Error(L: FirstIdxLoc, Msg: "invalid register index");
2998 return false;
2999 }
3000
3001 if (!isUInt<32>(x: RegHi)) {
3002 Error(L: SecondIdxLoc, Msg: "invalid register index");
3003 return false;
3004 }
3005
3006 if (RegLo > RegHi) {
3007 Error(L: FirstIdxLoc, Msg: "first register index should not exceed second index");
3008 return false;
3009 }
3010
3011 if (RegHi == RegLo) {
3012 StringRef RegSuffix = getTokenStr();
3013 if (RegSuffix == ".l") {
3014 SubReg = AMDGPU::lo16;
3015 lex();
3016 } else if (RegSuffix == ".h") {
3017 SubReg = AMDGPU::hi16;
3018 lex();
3019 }
3020 }
3021
3022 Num = static_cast<unsigned>(RegLo);
3023 RegWidth = 32 * ((RegHi - RegLo) + 1);
3024
3025 return true;
3026}
3027
3028MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3029 unsigned &RegNum,
3030 unsigned &RegWidth,
3031 SmallVectorImpl<AsmToken> &Tokens) {
3032 assert(isToken(AsmToken::Identifier));
3033 MCRegister Reg = getSpecialRegForName(RegName: getTokenStr());
3034 if (Reg) {
3035 RegNum = 0;
3036 RegWidth = 32;
3037 RegKind = IS_SPECIAL;
3038 Tokens.push_back(Elt: getToken());
3039 lex(); // skip register name
3040 }
3041 return Reg;
3042}
3043
3044MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3045 unsigned &RegNum,
3046 unsigned &RegWidth,
3047 SmallVectorImpl<AsmToken> &Tokens) {
3048 assert(isToken(AsmToken::Identifier));
3049 StringRef RegName = getTokenStr();
3050 auto Loc = getLoc();
3051
3052 const RegInfo *RI = getRegularRegInfo(Str: RegName);
3053 if (!RI) {
3054 Error(L: Loc, Msg: "invalid register name");
3055 return MCRegister();
3056 }
3057
3058 Tokens.push_back(Elt: getToken());
3059 lex(); // skip register name
3060
3061 RegKind = RI->Kind;
3062 StringRef RegSuffix = RegName.substr(Start: RI->Name.size());
3063 unsigned SubReg = NoSubRegister;
3064 if (!RegSuffix.empty()) {
3065 if (RegSuffix.consume_back(Suffix: ".l"))
3066 SubReg = AMDGPU::lo16;
3067 else if (RegSuffix.consume_back(Suffix: ".h"))
3068 SubReg = AMDGPU::hi16;
3069
3070 // Single 32-bit register: vXX.
3071 if (!getRegNum(Str: RegSuffix, Num&: RegNum)) {
3072 Error(L: Loc, Msg: "invalid register index");
3073 return MCRegister();
3074 }
3075 RegWidth = 32;
3076 } else {
3077 // Range of registers: v[XX:YY]. ":YY" is optional.
3078 if (!ParseRegRange(Num&: RegNum, RegWidth, SubReg))
3079 return MCRegister();
3080 }
3081
3082 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3083}
3084
3085MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3086 unsigned &RegNum, unsigned &RegWidth,
3087 SmallVectorImpl<AsmToken> &Tokens) {
3088 MCRegister Reg;
3089 auto ListLoc = getLoc();
3090
3091 if (!skipToken(Kind: AsmToken::LBrac,
3092 ErrMsg: "expected a register or a list of registers")) {
3093 return MCRegister();
3094 }
3095
3096 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3097
3098 auto Loc = getLoc();
3099 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3100 return MCRegister();
3101 if (RegWidth != 32) {
3102 Error(L: Loc, Msg: "expected a single 32-bit register");
3103 return MCRegister();
3104 }
3105
3106 for (; trySkipToken(Kind: AsmToken::Comma); ) {
3107 RegisterKind NextRegKind;
3108 MCRegister NextReg;
3109 unsigned NextRegNum, NextRegWidth;
3110 Loc = getLoc();
3111
3112 if (!ParseAMDGPURegister(RegKind&: NextRegKind, Reg&: NextReg,
3113 RegNum&: NextRegNum, RegWidth&: NextRegWidth,
3114 Tokens)) {
3115 return MCRegister();
3116 }
3117 if (NextRegWidth != 32) {
3118 Error(L: Loc, Msg: "expected a single 32-bit register");
3119 return MCRegister();
3120 }
3121 if (NextRegKind != RegKind) {
3122 Error(L: Loc, Msg: "registers in a list must be of the same kind");
3123 return MCRegister();
3124 }
3125 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, Reg1: NextReg, Loc))
3126 return MCRegister();
3127 }
3128
3129 if (!skipToken(Kind: AsmToken::RBrac,
3130 ErrMsg: "expected a comma or a closing square bracket")) {
3131 return MCRegister();
3132 }
3133
3134 if (isRegularReg(Kind: RegKind))
3135 Reg = getRegularReg(RegKind, RegNum, SubReg: NoSubRegister, RegWidth, Loc: ListLoc);
3136
3137 return Reg;
3138}
3139
3140bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3141 MCRegister &Reg, unsigned &RegNum,
3142 unsigned &RegWidth,
3143 SmallVectorImpl<AsmToken> &Tokens) {
3144 auto Loc = getLoc();
3145 Reg = MCRegister();
3146
3147 if (isToken(Kind: AsmToken::Identifier)) {
3148 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3149 if (!Reg)
3150 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3151 } else {
3152 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3153 }
3154
3155 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3156 if (!Reg) {
3157 assert(Parser.hasPendingError());
3158 return false;
3159 }
3160
3161 if (!subtargetHasRegister(MRI: *TRI, Reg)) {
3162 if (Reg == AMDGPU::SGPR_NULL) {
3163 Error(L: Loc, Msg: "'null' operand is not supported on this GPU");
3164 } else {
3165 Error(L: Loc, Msg: Twine(AMDGPUInstPrinter::getRegisterName(Reg)) +
3166 " register not available on this GPU");
3167 }
3168 return false;
3169 }
3170
3171 return true;
3172}
3173
3174bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3175 MCRegister &Reg, unsigned &RegNum,
3176 unsigned &RegWidth,
3177 bool RestoreOnFailure /*=false*/) {
3178 Reg = MCRegister();
3179
3180 SmallVector<AsmToken, 1> Tokens;
3181 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3182 if (RestoreOnFailure) {
3183 while (!Tokens.empty()) {
3184 getLexer().UnLex(Token: Tokens.pop_back_val());
3185 }
3186 }
3187 return true;
3188 }
3189 return false;
3190}
3191
3192std::optional<StringRef>
3193AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3194 switch (RegKind) {
3195 case IS_VGPR:
3196 return StringRef(".amdgcn.next_free_vgpr");
3197 case IS_SGPR:
3198 return StringRef(".amdgcn.next_free_sgpr");
3199 default:
3200 return std::nullopt;
3201 }
3202}
3203
3204void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3205 auto SymbolName = getGprCountSymbolName(RegKind);
3206 assert(SymbolName && "initializing invalid register kind");
3207 MCSymbol *Sym = getContext().getOrCreateSymbol(Name: *SymbolName);
3208 Sym->setVariableValue(MCConstantExpr::create(Value: 0, Ctx&: getContext()));
3209 Sym->setRedefinable(true);
3210}
3211
3212bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3213 unsigned DwordRegIndex,
3214 unsigned RegWidth) {
3215 // Symbols are only defined for GCN targets
3216 if (AMDGPU::getIsaVersion(GPU: getSTI().getCPU()).Major < 6)
3217 return true;
3218
3219 auto SymbolName = getGprCountSymbolName(RegKind);
3220 if (!SymbolName)
3221 return true;
3222 MCSymbol *Sym = getContext().getOrCreateSymbol(Name: *SymbolName);
3223
3224 int64_t NewMax = DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1;
3225 int64_t OldCount;
3226
3227 if (!Sym->isVariable())
3228 return !Error(L: getLoc(),
3229 Msg: ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3230 if (!Sym->getVariableValue()->evaluateAsAbsolute(Res&: OldCount))
3231 return !Error(
3232 L: getLoc(),
3233 Msg: ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3234
3235 if (OldCount <= NewMax)
3236 Sym->setVariableValue(MCConstantExpr::create(Value: NewMax + 1, Ctx&: getContext()));
3237
3238 return true;
3239}
3240
3241std::unique_ptr<AMDGPUOperand>
3242AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3243 const auto &Tok = getToken();
3244 SMLoc StartLoc = Tok.getLoc();
3245 SMLoc EndLoc = Tok.getEndLoc();
3246 RegisterKind RegKind;
3247 MCRegister Reg;
3248 unsigned RegNum, RegWidth;
3249
3250 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3251 return nullptr;
3252 }
3253 if (isHsaAbi(STI: getSTI())) {
3254 if (!updateGprCountSymbols(RegKind, DwordRegIndex: RegNum, RegWidth))
3255 return nullptr;
3256 } else
3257 KernelScope.usesRegister(RegKind, DwordRegIndex: RegNum, RegWidth);
3258 return AMDGPUOperand::CreateReg(AsmParser: this, Reg, S: StartLoc, E: EndLoc);
3259}
3260
3261ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3262 bool HasSP3AbsModifier, LitModifier Lit) {
3263 // TODO: add syntactic sugar for 1/(2*PI)
3264
3265 if (isRegister() || isModifier())
3266 return ParseStatus::NoMatch;
3267
3268 if (Lit == LitModifier::None) {
3269 if (trySkipId(Id: "lit"))
3270 Lit = LitModifier::Lit;
3271 else if (trySkipId(Id: "lit64"))
3272 Lit = LitModifier::Lit64;
3273
3274 if (Lit != LitModifier::None) {
3275 if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit"))
3276 return ParseStatus::Failure;
3277 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3278 if (S.isSuccess() &&
3279 !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3280 return ParseStatus::Failure;
3281 return S;
3282 }
3283 }
3284
3285 const auto& Tok = getToken();
3286 const auto& NextTok = peekToken();
3287 bool IsReal = Tok.is(K: AsmToken::Real);
3288 SMLoc S = getLoc();
3289 bool Negate = false;
3290
3291 if (!IsReal && Tok.is(K: AsmToken::Minus) && NextTok.is(K: AsmToken::Real)) {
3292 lex();
3293 IsReal = true;
3294 Negate = true;
3295 }
3296
3297 AMDGPUOperand::Modifiers Mods;
3298 Mods.Lit = Lit;
3299
3300 if (IsReal) {
3301 // Floating-point expressions are not supported.
3302 // Can only allow floating-point literals with an
3303 // optional sign.
3304
3305 StringRef Num = getTokenStr();
3306 lex();
3307
3308 APFloat RealVal(APFloat::IEEEdouble());
3309 auto roundMode = APFloat::rmNearestTiesToEven;
3310 if (errorToBool(Err: RealVal.convertFromString(Num, roundMode).takeError()))
3311 return ParseStatus::Failure;
3312 if (Negate)
3313 RealVal.changeSign();
3314
3315 Operands.push_back(
3316 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: RealVal.bitcastToAPInt().getZExtValue(), Loc: S,
3317 Type: AMDGPUOperand::ImmTyNone, IsFPImm: true));
3318 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3319 Op.setModifiers(Mods);
3320
3321 return ParseStatus::Success;
3322
3323 } else {
3324 int64_t IntVal;
3325 const MCExpr *Expr;
3326 SMLoc S = getLoc();
3327
3328 if (HasSP3AbsModifier) {
3329 // This is a workaround for handling expressions
3330 // as arguments of SP3 'abs' modifier, for example:
3331 // |1.0|
3332 // |-1|
3333 // |1+x|
3334 // This syntax is not compatible with syntax of standard
3335 // MC expressions (due to the trailing '|').
3336 SMLoc EndLoc;
3337 if (getParser().parsePrimaryExpr(Res&: Expr, EndLoc, TypeInfo: nullptr))
3338 return ParseStatus::Failure;
3339 } else {
3340 if (Parser.parseExpression(Res&: Expr))
3341 return ParseStatus::Failure;
3342 }
3343
3344 if (Expr->evaluateAsAbsolute(Res&: IntVal)) {
3345 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S));
3346 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3347 Op.setModifiers(Mods);
3348 } else {
3349 if (Lit != LitModifier::None)
3350 return ParseStatus::NoMatch;
3351 Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S));
3352 }
3353
3354 return ParseStatus::Success;
3355 }
3356
3357 return ParseStatus::NoMatch;
3358}
3359
3360ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3361 if (!isRegister())
3362 return ParseStatus::NoMatch;
3363
3364 if (auto R = parseRegister()) {
3365 assert(R->isReg());
3366 Operands.push_back(Elt: std::move(R));
3367 return ParseStatus::Success;
3368 }
3369 return ParseStatus::Failure;
3370}
3371
3372ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3373 bool HasSP3AbsMod, LitModifier Lit) {
3374 ParseStatus Res = parseReg(Operands);
3375 if (!Res.isNoMatch())
3376 return Res;
3377 if (isModifier())
3378 return ParseStatus::NoMatch;
3379 return parseImm(Operands, HasSP3AbsModifier: HasSP3AbsMod, Lit);
3380}
3381
3382bool
3383AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3384 if (Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::LParen)) {
3385 const auto &str = Token.getString();
3386 return str == "abs" || str == "neg" || str == "sext";
3387 }
3388 return false;
3389}
3390
3391bool
3392AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3393 return Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::Colon);
3394}
3395
3396bool
3397AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3398 return isNamedOperandModifier(Token, NextToken) || Token.is(K: AsmToken::Pipe);
3399}
3400
3401bool
3402AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3403 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3404}
3405
3406// Check if this is an operand modifier or an opcode modifier
3407// which may look like an expression but it is not. We should
3408// avoid parsing these modifiers as expressions. Currently
3409// recognized sequences are:
3410// |...|
3411// abs(...)
3412// neg(...)
3413// sext(...)
3414// -reg
3415// -|...|
3416// -abs(...)
3417// name:...
3418//
3419bool
3420AMDGPUAsmParser::isModifier() {
3421
3422 AsmToken Tok = getToken();
3423 AsmToken NextToken[2];
3424 peekTokens(Tokens: NextToken);
3425
3426 return isOperandModifier(Token: Tok, NextToken: NextToken[0]) ||
3427 (Tok.is(K: AsmToken::Minus) && isRegOrOperandModifier(Token: NextToken[0], NextToken: NextToken[1])) ||
3428 isOpcodeModifierWithVal(Token: Tok, NextToken: NextToken[0]);
3429}
3430
3431// Check if the current token is an SP3 'neg' modifier.
3432// Currently this modifier is allowed in the following context:
3433//
3434// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3435// 2. Before an 'abs' modifier: -abs(...)
3436// 3. Before an SP3 'abs' modifier: -|...|
3437//
3438// In all other cases "-" is handled as a part
3439// of an expression that follows the sign.
3440//
3441// Note: When "-" is followed by an integer literal,
3442// this is interpreted as integer negation rather
3443// than a floating-point NEG modifier applied to N.
3444// Beside being contr-intuitive, such use of floating-point
3445// NEG modifier would have resulted in different meaning
3446// of integer literals used with VOP1/2/C and VOP3,
3447// for example:
3448// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3449// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3450// Negative fp literals with preceding "-" are
3451// handled likewise for uniformity
3452//
3453bool
3454AMDGPUAsmParser::parseSP3NegModifier() {
3455
3456 AsmToken NextToken[2];
3457 peekTokens(Tokens: NextToken);
3458
3459 if (isToken(Kind: AsmToken::Minus) &&
3460 (isRegister(Token: NextToken[0], NextToken: NextToken[1]) ||
3461 NextToken[0].is(K: AsmToken::Pipe) ||
3462 isId(Token: NextToken[0], Id: "abs"))) {
3463 lex();
3464 return true;
3465 }
3466
3467 return false;
3468}
3469
3470ParseStatus
3471AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3472 bool AllowImm) {
3473 bool Neg, SP3Neg;
3474 bool Abs, SP3Abs;
3475 SMLoc Loc;
3476
3477 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3478 if (isToken(Kind: AsmToken::Minus) && peekToken().is(K: AsmToken::Minus))
3479 return Error(L: getLoc(), Msg: "invalid syntax, expected 'neg' modifier");
3480
3481 SP3Neg = parseSP3NegModifier();
3482
3483 Loc = getLoc();
3484 Neg = trySkipId(Id: "neg");
3485 if (Neg && SP3Neg)
3486 return Error(L: Loc, Msg: "expected register or immediate");
3487 if (Neg && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after neg"))
3488 return ParseStatus::Failure;
3489
3490 Abs = trySkipId(Id: "abs");
3491 if (Abs && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after abs"))
3492 return ParseStatus::Failure;
3493
3494 LitModifier Lit = LitModifier::None;
3495 if (trySkipId(Id: "lit")) {
3496 Lit = LitModifier::Lit;
3497 if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit"))
3498 return ParseStatus::Failure;
3499 } else if (trySkipId(Id: "lit64")) {
3500 Lit = LitModifier::Lit64;
3501 if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit64"))
3502 return ParseStatus::Failure;
3503 if (!has64BitLiterals())
3504 return Error(L: Loc, Msg: "lit64 is not supported on this GPU");
3505 }
3506
3507 Loc = getLoc();
3508 SP3Abs = trySkipToken(Kind: AsmToken::Pipe);
3509 if (Abs && SP3Abs)
3510 return Error(L: Loc, Msg: "expected register or immediate");
3511
3512 ParseStatus Res;
3513 if (AllowImm) {
3514 Res = parseRegOrImm(Operands, HasSP3AbsMod: SP3Abs, Lit);
3515 } else {
3516 Res = parseReg(Operands);
3517 }
3518 if (!Res.isSuccess())
3519 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3520 ? ParseStatus::Failure
3521 : Res;
3522
3523 if (Lit != LitModifier::None && !Operands.back()->isImm())
3524 Error(L: Loc, Msg: "expected immediate with lit modifier");
3525
3526 if (SP3Abs && !skipToken(Kind: AsmToken::Pipe, ErrMsg: "expected vertical bar"))
3527 return ParseStatus::Failure;
3528 if (Abs && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3529 return ParseStatus::Failure;
3530 if (Neg && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3531 return ParseStatus::Failure;
3532 if (Lit != LitModifier::None &&
3533 !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3534 return ParseStatus::Failure;
3535
3536 AMDGPUOperand::Modifiers Mods;
3537 Mods.Abs = Abs || SP3Abs;
3538 Mods.Neg = Neg || SP3Neg;
3539 Mods.Lit = Lit;
3540
3541 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3542 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3543 if (Op.isExpr())
3544 return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression");
3545 Op.setModifiers(Mods);
3546 }
3547 return ParseStatus::Success;
3548}
3549
3550ParseStatus
3551AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3552 bool AllowImm) {
3553 bool Sext = trySkipId(Id: "sext");
3554 if (Sext && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after sext"))
3555 return ParseStatus::Failure;
3556
3557 ParseStatus Res;
3558 if (AllowImm) {
3559 Res = parseRegOrImm(Operands);
3560 } else {
3561 Res = parseReg(Operands);
3562 }
3563 if (!Res.isSuccess())
3564 return Sext ? ParseStatus::Failure : Res;
3565
3566 if (Sext && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3567 return ParseStatus::Failure;
3568
3569 AMDGPUOperand::Modifiers Mods;
3570 Mods.Sext = Sext;
3571
3572 if (Mods.hasIntModifiers()) {
3573 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3574 if (Op.isExpr())
3575 return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression");
3576 Op.setModifiers(Mods);
3577 }
3578
3579 return ParseStatus::Success;
3580}
3581
3582ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3583 return parseRegOrImmWithFPInputMods(Operands, AllowImm: false);
3584}
3585
3586ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3587 return parseRegOrImmWithIntInputMods(Operands, AllowImm: false);
3588}
3589
3590ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3591 auto Loc = getLoc();
3592 if (trySkipId(Id: "off")) {
3593 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: 0, Loc,
3594 Type: AMDGPUOperand::ImmTyOff, IsFPImm: false));
3595 return ParseStatus::Success;
3596 }
3597
3598 if (!isRegister())
3599 return ParseStatus::NoMatch;
3600
3601 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3602 if (Reg) {
3603 Operands.push_back(Elt: std::move(Reg));
3604 return ParseStatus::Success;
3605 }
3606
3607 return ParseStatus::Failure;
3608}
3609
3610unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3611 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
3612
3613 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3614 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3615 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3616 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3617 return Match_InvalidOperand;
3618
3619 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3620 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3621 // v_mac_f32/16 allow only dst_sel == DWORD;
3622 auto OpNum =
3623 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::dst_sel);
3624 const auto &Op = Inst.getOperand(i: OpNum);
3625 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3626 return Match_InvalidOperand;
3627 }
3628 }
3629
3630 // Asm can first try to match VOPD or VOPD3. By failing early here with
3631 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3632 // Checking later during validateInstruction does not give a chance to retry
3633 // parsing as a different encoding.
3634 if (tryAnotherVOPDEncoding(Inst))
3635 return Match_InvalidOperand;
3636
3637 return Match_Success;
3638}
3639
3640static ArrayRef<unsigned> getAllVariants() {
3641 static const unsigned Variants[] = {
3642 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3643 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3644 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3645 };
3646
3647 return ArrayRef(Variants);
3648}
3649
3650// What asm variants we should check
3651ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3652 if (isForcedDPP() && isForcedVOP3()) {
3653 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3654 return ArrayRef(Variants);
3655 }
3656 if (getForcedEncodingSize() == 32) {
3657 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3658 return ArrayRef(Variants);
3659 }
3660
3661 if (isForcedVOP3()) {
3662 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3663 return ArrayRef(Variants);
3664 }
3665
3666 if (isForcedSDWA()) {
3667 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3668 AMDGPUAsmVariants::SDWA9};
3669 return ArrayRef(Variants);
3670 }
3671
3672 if (isForcedDPP()) {
3673 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3674 return ArrayRef(Variants);
3675 }
3676
3677 return getAllVariants();
3678}
3679
3680StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3681 if (isForcedDPP() && isForcedVOP3())
3682 return "e64_dpp";
3683
3684 if (getForcedEncodingSize() == 32)
3685 return "e32";
3686
3687 if (isForcedVOP3())
3688 return "e64";
3689
3690 if (isForcedSDWA())
3691 return "sdwa";
3692
3693 if (isForcedDPP())
3694 return "dpp";
3695
3696 return "";
3697}
3698
3699MCRegister
3700AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3701 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
3702 for (MCPhysReg Reg : Desc.implicit_uses()) {
3703 switch (Reg) {
3704 case AMDGPU::FLAT_SCR:
3705 case AMDGPU::VCC:
3706 case AMDGPU::VCC_LO:
3707 case AMDGPU::VCC_HI:
3708 case AMDGPU::M0:
3709 return Reg;
3710 default:
3711 break;
3712 }
3713 }
3714 return MCRegister();
3715}
3716
3717// NB: This code is correct only when used to check constant
3718// bus limitations because GFX7 support no f16 inline constants.
3719// Note that there are no cases when a GFX7 opcode violates
3720// constant bus limitations due to the use of an f16 constant.
3721bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3722 unsigned OpIdx) const {
3723 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
3724
3725 if (!AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx) ||
3726 AMDGPU::isKImmOperand(Desc, OpNo: OpIdx)) {
3727 return false;
3728 }
3729
3730 const MCOperand &MO = Inst.getOperand(i: OpIdx);
3731
3732 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(Expr: MO.getExpr());
3733 auto OpSize = AMDGPU::getOperandSize(Desc, OpNo: OpIdx);
3734
3735 switch (OpSize) { // expected operand size
3736 case 8:
3737 return AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3738 case 4:
3739 return AMDGPU::isInlinableLiteral32(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3740 case 2: {
3741 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3742 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3743 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16)
3744 return AMDGPU::isInlinableLiteralI16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3745
3746 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3747 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3748 return AMDGPU::isInlinableLiteralV2I16(Literal: Val);
3749
3750 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3751 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3752 return AMDGPU::isInlinableLiteralV2F16(Literal: Val);
3753
3754 if (OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT)
3755 return AMDGPU::isPKFMACF16InlineConstant(Literal: Val, IsGFX11Plus: isGFX11Plus());
3756
3757 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 ||
3758 OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3759 return AMDGPU::isInlinableLiteralV2BF16(Literal: Val);
3760
3761 if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3762 OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16)
3763 return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3764
3765 if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
3766 OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16)
3767 return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3768
3769 if (OperandType == AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16)
3770 return false;
3771
3772 llvm_unreachable("invalid operand type");
3773 }
3774 default:
3775 llvm_unreachable("invalid operand size");
3776 }
3777}
3778
3779unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3780 if (!isGFX10Plus())
3781 return 1;
3782
3783 switch (Opcode) {
3784 // 64-bit shift instructions can use only one scalar value input
3785 case AMDGPU::V_LSHLREV_B64_e64:
3786 case AMDGPU::V_LSHLREV_B64_gfx10:
3787 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3788 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3789 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3790 case AMDGPU::V_LSHRREV_B64_e64:
3791 case AMDGPU::V_LSHRREV_B64_gfx10:
3792 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3793 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3794 case AMDGPU::V_ASHRREV_I64_e64:
3795 case AMDGPU::V_ASHRREV_I64_gfx10:
3796 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3797 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3798 case AMDGPU::V_LSHL_B64_e64:
3799 case AMDGPU::V_LSHR_B64_e64:
3800 case AMDGPU::V_ASHR_I64_e64:
3801 return 1;
3802 default:
3803 return 2;
3804 }
3805}
3806
3807constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3808using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3809
3810// Get regular operand indices in the same order as specified
3811// in the instruction (but append mandatory literals to the end).
3812static OperandIndices getSrcOperandIndices(unsigned Opcode,
3813 bool AddMandatoryLiterals = false) {
3814
3815 int16_t ImmIdx =
3816 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, Name: OpName::imm) : -1;
3817
3818 if (isVOPD(Opc: Opcode)) {
3819 int16_t ImmXIdx =
3820 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, Name: OpName::immX) : -1;
3821
3822 return {getNamedOperandIdx(Opcode, Name: OpName::src0X),
3823 getNamedOperandIdx(Opcode, Name: OpName::vsrc1X),
3824 getNamedOperandIdx(Opcode, Name: OpName::vsrc2X),
3825 getNamedOperandIdx(Opcode, Name: OpName::src0Y),
3826 getNamedOperandIdx(Opcode, Name: OpName::vsrc1Y),
3827 getNamedOperandIdx(Opcode, Name: OpName::vsrc2Y),
3828 ImmXIdx,
3829 ImmIdx};
3830 }
3831
3832 return {getNamedOperandIdx(Opcode, Name: OpName::src0),
3833 getNamedOperandIdx(Opcode, Name: OpName::src1),
3834 getNamedOperandIdx(Opcode, Name: OpName::src2), ImmIdx};
3835}
3836
3837bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3838 const MCOperand &MO = Inst.getOperand(i: OpIdx);
3839 if (MO.isImm())
3840 return !isInlineConstant(Inst, OpIdx);
3841 if (MO.isReg()) {
3842 auto Reg = MO.getReg();
3843 if (!Reg)
3844 return false;
3845 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3846 auto PReg = mc2PseudoReg(Reg);
3847 return isSGPR(Reg: PReg, TRI) && PReg != SGPR_NULL;
3848 }
3849 return true;
3850}
3851
3852// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3853// Writelane is special in that it can use SGPR and M0 (which would normally
3854// count as using the constant bus twice - but in this case it is allowed since
3855// the lane selector doesn't count as a use of the constant bus). However, it is
3856// still required to abide by the 1 SGPR rule.
3857static bool checkWriteLane(const MCInst &Inst) {
3858 const unsigned Opcode = Inst.getOpcode();
3859 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3860 return false;
3861 const MCOperand &LaneSelOp = Inst.getOperand(i: 2);
3862 if (!LaneSelOp.isReg())
3863 return false;
3864 auto LaneSelReg = mc2PseudoReg(Reg: LaneSelOp.getReg());
3865 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3866}
3867
3868bool AMDGPUAsmParser::validateConstantBusLimitations(
3869 const MCInst &Inst, const OperandVector &Operands) {
3870 const unsigned Opcode = Inst.getOpcode();
3871 const MCInstrDesc &Desc = MII.get(Opcode);
3872 MCRegister LastSGPR;
3873 unsigned ConstantBusUseCount = 0;
3874 unsigned NumLiterals = 0;
3875 unsigned LiteralSize;
3876
3877 if (!(Desc.TSFlags &
3878 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3879 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3880 !isVOPD(Opc: Opcode))
3881 return true;
3882
3883 if (checkWriteLane(Inst))
3884 return true;
3885
3886 // Check special imm operands (used by madmk, etc)
3887 if (AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm)) {
3888 ++NumLiterals;
3889 LiteralSize = 4;
3890 }
3891
3892 SmallDenseSet<MCRegister> SGPRsUsed;
3893 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3894 if (SGPRUsed) {
3895 SGPRsUsed.insert(V: SGPRUsed);
3896 ++ConstantBusUseCount;
3897 }
3898
3899 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3900
3901 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3902
3903 for (int OpIdx : OpIndices) {
3904 if (OpIdx == -1)
3905 continue;
3906
3907 const MCOperand &MO = Inst.getOperand(i: OpIdx);
3908 if (usesConstantBus(Inst, OpIdx)) {
3909 if (MO.isReg()) {
3910 LastSGPR = mc2PseudoReg(Reg: MO.getReg());
3911 // Pairs of registers with a partial intersections like these
3912 // s0, s[0:1]
3913 // flat_scratch_lo, flat_scratch
3914 // flat_scratch_lo, flat_scratch_hi
3915 // are theoretically valid but they are disabled anyway.
3916 // Note that this code mimics SIInstrInfo::verifyInstruction
3917 if (SGPRsUsed.insert(V: LastSGPR).second) {
3918 ++ConstantBusUseCount;
3919 }
3920 } else { // Expression or a literal
3921
3922 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3923 continue; // special operand like VINTERP attr_chan
3924
3925 // An instruction may use only one literal.
3926 // This has been validated on the previous step.
3927 // See validateVOPLiteral.
3928 // This literal may be used as more than one operand.
3929 // If all these operands are of the same size,
3930 // this literal counts as one scalar value.
3931 // Otherwise it counts as 2 scalar values.
3932 // See "GFX10 Shader Programming", section 3.6.2.3.
3933
3934 unsigned Size = AMDGPU::getOperandSize(Desc, OpNo: OpIdx);
3935 if (Size < 4)
3936 Size = 4;
3937
3938 if (NumLiterals == 0) {
3939 NumLiterals = 1;
3940 LiteralSize = Size;
3941 } else if (LiteralSize != Size) {
3942 NumLiterals = 2;
3943 }
3944 }
3945 }
3946
3947 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3948 Error(L: getOperandLoc(Operands, MCOpIdx: OpIdx),
3949 Msg: "invalid operand (violates constant bus restrictions)");
3950 return false;
3951 }
3952 }
3953 return true;
3954}
3955
3956std::optional<unsigned>
3957AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3958
3959 const unsigned Opcode = Inst.getOpcode();
3960 if (!isVOPD(Opc: Opcode))
3961 return {};
3962
3963 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3964
3965 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3966 const MCOperand &Opr = Inst.getOperand(i: OperandIdx);
3967 return (Opr.isReg() && !isSGPR(Reg: mc2PseudoReg(Reg: Opr.getReg()), TRI))
3968 ? Opr.getReg()
3969 : MCRegister();
3970 };
3971
3972 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3973 // source-cache.
3974 bool SkipSrc =
3975 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3976 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3977 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
3978 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
3979 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
3980 bool AllowSameVGPR = isGFX1250Plus();
3981
3982 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3983 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3984 int I = getNamedOperandIdx(Opcode, Name: OpName);
3985 const MCOperand &Op = Inst.getOperand(i: I);
3986 if (!Op.isImm())
3987 continue;
3988 int64_t Imm = Op.getImm();
3989 if (!AMDGPU::isInlinableLiteral32(Literal: Imm, HasInv2Pi: hasInv2PiInlineImm()) &&
3990 !AMDGPU::isInlinableLiteral64(Literal: Imm, HasInv2Pi: hasInv2PiInlineImm()))
3991 return (unsigned)I;
3992 }
3993
3994 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3995 OpName::vsrc2Y, OpName::imm}) {
3996 int I = getNamedOperandIdx(Opcode, Name: OpName);
3997 if (I == -1)
3998 continue;
3999 const MCOperand &Op = Inst.getOperand(i: I);
4000 if (Op.isImm())
4001 return (unsigned)I;
4002 }
4003 }
4004
4005 const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII);
4006 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4007 GetRegIdx: getVRegIdx, MRI: *TRI, SkipSrc, AllowSameVGPR, VOPD3: AsVOPD3);
4008
4009 return InvalidCompOprIdx;
4010}
4011
4012bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4013 const OperandVector &Operands) {
4014
4015 unsigned Opcode = Inst.getOpcode();
4016 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4017
4018 if (AsVOPD3) {
4019 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4020 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4021 if ((Op.isRegKind() || Op.isImmTy(ImmT: AMDGPUOperand::ImmTyNone)) &&
4022 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4023 Error(L: Op.getStartLoc(), Msg: "ABS not allowed in VOPD3 instructions");
4024 }
4025 }
4026
4027 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4028 if (!InvalidCompOprIdx.has_value())
4029 return true;
4030
4031 auto CompOprIdx = *InvalidCompOprIdx;
4032 const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII);
4033 auto ParsedIdx =
4034 std::max(a: InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4035 b: InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4036 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4037
4038 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4039 if (CompOprIdx == VOPD::Component::DST) {
4040 if (AsVOPD3)
4041 Error(L: Loc, Msg: "dst registers must be distinct");
4042 else
4043 Error(L: Loc, Msg: "one dst register must be even and the other odd");
4044 } else {
4045 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4046 Error(L: Loc, Msg: Twine("src") + Twine(CompSrcIdx) +
4047 " operands must use different VGPR banks");
4048 }
4049
4050 return false;
4051}
4052
4053// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4054// potentially used as VOPD3 with the same operands.
4055bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4056 // First check if it fits VOPD
4057 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3: false);
4058 if (!InvalidCompOprIdx.has_value())
4059 return false;
4060
4061 // Then if it fits VOPD3
4062 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3: true);
4063 if (InvalidCompOprIdx.has_value()) {
4064 // If failed operand is dst it is better to show error about VOPD3
4065 // instruction as it has more capabilities and error message will be
4066 // more informative. If the dst is not legal for VOPD3, then it is not
4067 // legal for VOPD either.
4068 if (*InvalidCompOprIdx == VOPD::Component::DST)
4069 return true;
4070
4071 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4072 // with a conflict in tied implicit src2 of fmac and no asm operand to
4073 // to point to.
4074 return false;
4075 }
4076 return true;
4077}
4078
4079// \returns true is a VOPD3 instruction can be also represented as a shorter
4080// VOPD encoding.
4081bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4082 const unsigned Opcode = Inst.getOpcode();
4083 const auto &II = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII);
4084 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST: getSTI());
4085 if (!getCanBeVOPD(Opc: II[VOPD::X].getOpcode(), EncodingFamily, VOPD3: false).X ||
4086 !getCanBeVOPD(Opc: II[VOPD::Y].getOpcode(), EncodingFamily, VOPD3: false).Y)
4087 return false;
4088
4089 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4090 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4091 // be parsed as VOPD which does not accept src2.
4092 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4093 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4094 return false;
4095
4096 // If any modifiers are set this cannot be VOPD.
4097 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4098 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4099 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4100 int I = getNamedOperandIdx(Opcode, Name: OpName);
4101 if (I == -1)
4102 continue;
4103 if (Inst.getOperand(i: I).getImm())
4104 return false;
4105 }
4106
4107 return !tryVOPD3(Inst);
4108}
4109
4110// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4111// form but switch to VOPD3 otherwise.
4112bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4113 const unsigned Opcode = Inst.getOpcode();
4114 if (!isGFX1250Plus() || !isVOPD(Opc: Opcode))
4115 return false;
4116
4117 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4118 return tryVOPD(Inst);
4119 return tryVOPD3(Inst);
4120}
4121
4122bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4123
4124 const unsigned Opc = Inst.getOpcode();
4125 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4126
4127 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4128 int ClampIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::clamp);
4129 assert(ClampIdx != -1);
4130 return Inst.getOperand(i: ClampIdx).getImm() == 0;
4131 }
4132
4133 return true;
4134}
4135
4136constexpr uint64_t MIMGFlags =
4137 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
4138
4139bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4140
4141 const unsigned Opc = Inst.getOpcode();
4142 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4143
4144 if ((Desc.TSFlags & MIMGFlags) == 0)
4145 return true;
4146
4147 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdata);
4148 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4149 int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::tfe);
4150
4151 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4152 return true;
4153
4154 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4155 return true;
4156
4157 unsigned VDataSize = getRegOperandSize(Desc, OpNo: VDataIdx);
4158 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(i: TFEIdx).getImm()) ? 1 : 0;
4159 unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf;
4160 if (DMask == 0)
4161 DMask = 1;
4162
4163 bool IsPackedD16 = false;
4164 unsigned DataSize =
4165 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(Value: DMask);
4166 if (hasPackedD16()) {
4167 int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::d16);
4168 IsPackedD16 = D16Idx >= 0;
4169 if (IsPackedD16 && Inst.getOperand(i: D16Idx).getImm())
4170 DataSize = (DataSize + 1) / 2;
4171 }
4172
4173 if ((VDataSize / 4) == DataSize + TFESize)
4174 return true;
4175
4176 StringRef Modifiers;
4177 if (isGFX90A())
4178 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4179 else
4180 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4181
4182 Error(L: IDLoc, Msg: Twine("image data size does not match ") + Modifiers);
4183 return false;
4184}
4185
4186bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4187 const unsigned Opc = Inst.getOpcode();
4188 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4189
4190 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4191 return true;
4192
4193 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4194
4195 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4196 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
4197 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vaddr0);
4198 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4199 ? AMDGPU::OpName::srsrc
4200 : AMDGPU::OpName::rsrc;
4201 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: RSrcOpName);
4202 int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dim);
4203 int A16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::a16);
4204
4205 assert(VAddr0Idx != -1);
4206 assert(SrsrcIdx != -1);
4207 assert(SrsrcIdx > VAddr0Idx);
4208
4209 bool IsA16 = (A16Idx != -1 && Inst.getOperand(i: A16Idx).getImm());
4210 if (BaseOpcode->BVH) {
4211 if (IsA16 == BaseOpcode->A16)
4212 return true;
4213 Error(L: IDLoc, Msg: "image address size does not match a16");
4214 return false;
4215 }
4216
4217 unsigned Dim = Inst.getOperand(i: DimIdx).getImm();
4218 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim);
4219 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4220 unsigned ActualAddrSize =
4221 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, OpNo: VAddr0Idx) / 4;
4222
4223 unsigned ExpectedAddrSize =
4224 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim: DimInfo, IsA16, IsG16Supported: hasG16());
4225
4226 if (IsNSA) {
4227 if (hasPartialNSAEncoding() &&
4228 ExpectedAddrSize >
4229 getNSAMaxSize(HasSampler: Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
4230 int VAddrLastIdx = SrsrcIdx - 1;
4231 unsigned VAddrLastSize = getRegOperandSize(Desc, OpNo: VAddrLastIdx) / 4;
4232
4233 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4234 }
4235 } else {
4236 if (ExpectedAddrSize > 12)
4237 ExpectedAddrSize = 16;
4238
4239 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4240 // This provides backward compatibility for assembly created
4241 // before 160b/192b/224b types were directly supported.
4242 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4243 return true;
4244 }
4245
4246 if (ActualAddrSize == ExpectedAddrSize)
4247 return true;
4248
4249 Error(L: IDLoc, Msg: "image address size does not match dim and a16");
4250 return false;
4251}
4252
4253bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4254
4255 const unsigned Opc = Inst.getOpcode();
4256 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4257
4258 if ((Desc.TSFlags & MIMGFlags) == 0)
4259 return true;
4260 if (!Desc.mayLoad() || !Desc.mayStore())
4261 return true; // Not atomic
4262
4263 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4264 unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf;
4265
4266 // This is an incomplete check because image_atomic_cmpswap
4267 // may only use 0x3 and 0xf while other atomic operations
4268 // may use 0x1 and 0x3. However these limitations are
4269 // verified when we check that dmask matches dst size.
4270 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4271}
4272
4273bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4274
4275 const unsigned Opc = Inst.getOpcode();
4276 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4277
4278 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4279 return true;
4280
4281 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4282 unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf;
4283
4284 // GATHER4 instructions use dmask in a different fashion compared to
4285 // other MIMG instructions. The only useful DMASK values are
4286 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4287 // (red,red,red,red) etc.) The ISA document doesn't mention
4288 // this.
4289 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4290}
4291
4292bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4293 const OperandVector &Operands) {
4294 if (!isGFX10Plus())
4295 return true;
4296
4297 const unsigned Opc = Inst.getOpcode();
4298 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4299
4300 if ((Desc.TSFlags & MIMGFlags) == 0)
4301 return true;
4302
4303 // image_bvh_intersect_ray instructions do not have dim
4304 if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4305 return true;
4306
4307 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4308 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4309 if (Op.isDim())
4310 return true;
4311 }
4312 return false;
4313}
4314
4315bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4316 const unsigned Opc = Inst.getOpcode();
4317 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4318
4319 if ((Desc.TSFlags & MIMGFlags) == 0)
4320 return true;
4321
4322 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4323 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4324 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
4325
4326 if (!BaseOpcode->MSAA)
4327 return true;
4328
4329 int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dim);
4330 assert(DimIdx != -1);
4331
4332 unsigned Dim = Inst.getOperand(i: DimIdx).getImm();
4333 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim);
4334
4335 return DimInfo->MSAA;
4336}
4337
4338static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4339{
4340 switch (Opcode) {
4341 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4342 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4343 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4344 return true;
4345 default:
4346 return false;
4347 }
4348}
4349
4350// movrels* opcodes should only allow VGPRS as src0.
4351// This is specified in .td description for vop1/vop3,
4352// but sdwa is handled differently. See isSDWAOperand.
4353bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4354 const OperandVector &Operands) {
4355
4356 const unsigned Opc = Inst.getOpcode();
4357 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4358
4359 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opcode: Opc))
4360 return true;
4361
4362 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4363 assert(Src0Idx != -1);
4364
4365 const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4366 if (Src0.isReg()) {
4367 auto Reg = mc2PseudoReg(Reg: Src0.getReg());
4368 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4369 if (!isSGPR(Reg, TRI))
4370 return true;
4371 }
4372
4373 Error(L: getOperandLoc(Operands, MCOpIdx: Src0Idx), Msg: "source operand must be a VGPR");
4374 return false;
4375}
4376
4377bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4378 const OperandVector &Operands) {
4379
4380 const unsigned Opc = Inst.getOpcode();
4381
4382 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4383 return true;
4384
4385 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4386 assert(Src0Idx != -1);
4387
4388 const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4389 if (!Src0.isReg())
4390 return true;
4391
4392 auto Reg = mc2PseudoReg(Reg: Src0.getReg());
4393 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4394 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4395 Error(L: getOperandLoc(Operands, MCOpIdx: Src0Idx),
4396 Msg: "source operand must be either a VGPR or an inline constant");
4397 return false;
4398 }
4399
4400 return true;
4401}
4402
4403bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4404 const OperandVector &Operands) {
4405 unsigned Opcode = Inst.getOpcode();
4406 const MCInstrDesc &Desc = MII.get(Opcode);
4407
4408 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4409 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4410 return true;
4411
4412 const int Src2Idx = getNamedOperandIdx(Opcode, Name: OpName::src2);
4413 if (Src2Idx == -1)
4414 return true;
4415
4416 if (Inst.getOperand(i: Src2Idx).isImm() && isInlineConstant(Inst, OpIdx: Src2Idx)) {
4417 Error(L: getOperandLoc(Operands, MCOpIdx: Src2Idx),
4418 Msg: "inline constants are not allowed for this operand");
4419 return false;
4420 }
4421
4422 return true;
4423}
4424
4425bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4426 const OperandVector &Operands) {
4427 const unsigned Opc = Inst.getOpcode();
4428 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4429
4430 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4431 return true;
4432
4433 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
4434 if (BlgpIdx != -1) {
4435 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opcode: Opc)) {
4436 int CbszIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::cbsz);
4437
4438 unsigned CBSZ = Inst.getOperand(i: CbszIdx).getImm();
4439 unsigned BLGP = Inst.getOperand(i: BlgpIdx).getImm();
4440
4441 // Validate the correct register size was used for the floating point
4442 // format operands
4443
4444 bool Success = true;
4445 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ)) {
4446 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4447 Error(L: getOperandLoc(Operands, MCOpIdx: Src0Idx),
4448 Msg: "wrong register tuple size for cbsz value " + Twine(CBSZ));
4449 Success = false;
4450 }
4451
4452 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP)) {
4453 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
4454 Error(L: getOperandLoc(Operands, MCOpIdx: Src1Idx),
4455 Msg: "wrong register tuple size for blgp value " + Twine(BLGP));
4456 Success = false;
4457 }
4458
4459 return Success;
4460 }
4461 }
4462
4463 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2);
4464 if (Src2Idx == -1)
4465 return true;
4466
4467 const MCOperand &Src2 = Inst.getOperand(i: Src2Idx);
4468 if (!Src2.isReg())
4469 return true;
4470
4471 MCRegister Src2Reg = Src2.getReg();
4472 MCRegister DstReg = Inst.getOperand(i: 0).getReg();
4473 if (Src2Reg == DstReg)
4474 return true;
4475
4476 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4477 if (TRI->getRegClass(i: MII.getOpRegClassID(OpInfo: Desc.operands()[0], HwModeId: HwMode))
4478 .getSizeInBits() <= 128)
4479 return true;
4480
4481 if (TRI->regsOverlap(RegA: Src2Reg, RegB: DstReg)) {
4482 Error(L: getOperandLoc(Operands, MCOpIdx: Src2Idx),
4483 Msg: "source 2 operand must not partially overlap with dst");
4484 return false;
4485 }
4486
4487 return true;
4488}
4489
4490bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4491 switch (Inst.getOpcode()) {
4492 default:
4493 return true;
4494 case V_DIV_SCALE_F32_gfx6_gfx7:
4495 case V_DIV_SCALE_F32_vi:
4496 case V_DIV_SCALE_F32_gfx10:
4497 case V_DIV_SCALE_F64_gfx6_gfx7:
4498 case V_DIV_SCALE_F64_vi:
4499 case V_DIV_SCALE_F64_gfx10:
4500 break;
4501 }
4502
4503 // TODO: Check that src0 = src1 or src2.
4504
4505 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4506 AMDGPU::OpName::src2_modifiers,
4507 AMDGPU::OpName::src2_modifiers}) {
4508 if (Inst.getOperand(i: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name))
4509 .getImm() &
4510 SISrcMods::ABS) {
4511 return false;
4512 }
4513 }
4514
4515 return true;
4516}
4517
4518bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4519
4520 const unsigned Opc = Inst.getOpcode();
4521 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4522
4523 if ((Desc.TSFlags & MIMGFlags) == 0)
4524 return true;
4525
4526 int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::d16);
4527 if (D16Idx >= 0 && Inst.getOperand(i: D16Idx).getImm()) {
4528 if (isCI() || isSI())
4529 return false;
4530 }
4531
4532 return true;
4533}
4534
4535bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4536 const unsigned Opc = Inst.getOpcode();
4537 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4538
4539 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4540 return true;
4541
4542 int R128Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::r128);
4543
4544 return R128Idx < 0 || !Inst.getOperand(i: R128Idx).getImm();
4545}
4546
4547static bool IsRevOpcode(const unsigned Opcode)
4548{
4549 switch (Opcode) {
4550 case AMDGPU::V_SUBREV_F32_e32:
4551 case AMDGPU::V_SUBREV_F32_e64:
4552 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4553 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4554 case AMDGPU::V_SUBREV_F32_e32_vi:
4555 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4556 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4557 case AMDGPU::V_SUBREV_F32_e64_vi:
4558
4559 case AMDGPU::V_SUBREV_CO_U32_e32:
4560 case AMDGPU::V_SUBREV_CO_U32_e64:
4561 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4562 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4563
4564 case AMDGPU::V_SUBBREV_U32_e32:
4565 case AMDGPU::V_SUBBREV_U32_e64:
4566 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4567 case AMDGPU::V_SUBBREV_U32_e32_vi:
4568 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4569 case AMDGPU::V_SUBBREV_U32_e64_vi:
4570
4571 case AMDGPU::V_SUBREV_U32_e32:
4572 case AMDGPU::V_SUBREV_U32_e64:
4573 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4574 case AMDGPU::V_SUBREV_U32_e32_vi:
4575 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4576 case AMDGPU::V_SUBREV_U32_e64_vi:
4577
4578 case AMDGPU::V_SUBREV_F16_e32:
4579 case AMDGPU::V_SUBREV_F16_e64:
4580 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4581 case AMDGPU::V_SUBREV_F16_e32_vi:
4582 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4583 case AMDGPU::V_SUBREV_F16_e64_vi:
4584
4585 case AMDGPU::V_SUBREV_U16_e32:
4586 case AMDGPU::V_SUBREV_U16_e64:
4587 case AMDGPU::V_SUBREV_U16_e32_vi:
4588 case AMDGPU::V_SUBREV_U16_e64_vi:
4589
4590 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4591 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4592 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4593
4594 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4595 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4596
4597 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4598 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4599
4600 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4601 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4602
4603 case AMDGPU::V_LSHRREV_B32_e32:
4604 case AMDGPU::V_LSHRREV_B32_e64:
4605 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4606 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4607 case AMDGPU::V_LSHRREV_B32_e32_vi:
4608 case AMDGPU::V_LSHRREV_B32_e64_vi:
4609 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4610 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4611
4612 case AMDGPU::V_ASHRREV_I32_e32:
4613 case AMDGPU::V_ASHRREV_I32_e64:
4614 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4615 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4616 case AMDGPU::V_ASHRREV_I32_e32_vi:
4617 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4618 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4619 case AMDGPU::V_ASHRREV_I32_e64_vi:
4620
4621 case AMDGPU::V_LSHLREV_B32_e32:
4622 case AMDGPU::V_LSHLREV_B32_e64:
4623 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4624 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4625 case AMDGPU::V_LSHLREV_B32_e32_vi:
4626 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4627 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4628 case AMDGPU::V_LSHLREV_B32_e64_vi:
4629
4630 case AMDGPU::V_LSHLREV_B16_e32:
4631 case AMDGPU::V_LSHLREV_B16_e64:
4632 case AMDGPU::V_LSHLREV_B16_e32_vi:
4633 case AMDGPU::V_LSHLREV_B16_e64_vi:
4634 case AMDGPU::V_LSHLREV_B16_gfx10:
4635
4636 case AMDGPU::V_LSHRREV_B16_e32:
4637 case AMDGPU::V_LSHRREV_B16_e64:
4638 case AMDGPU::V_LSHRREV_B16_e32_vi:
4639 case AMDGPU::V_LSHRREV_B16_e64_vi:
4640 case AMDGPU::V_LSHRREV_B16_gfx10:
4641
4642 case AMDGPU::V_ASHRREV_I16_e32:
4643 case AMDGPU::V_ASHRREV_I16_e64:
4644 case AMDGPU::V_ASHRREV_I16_e32_vi:
4645 case AMDGPU::V_ASHRREV_I16_e64_vi:
4646 case AMDGPU::V_ASHRREV_I16_gfx10:
4647
4648 case AMDGPU::V_LSHLREV_B64_e64:
4649 case AMDGPU::V_LSHLREV_B64_gfx10:
4650 case AMDGPU::V_LSHLREV_B64_vi:
4651
4652 case AMDGPU::V_LSHRREV_B64_e64:
4653 case AMDGPU::V_LSHRREV_B64_gfx10:
4654 case AMDGPU::V_LSHRREV_B64_vi:
4655
4656 case AMDGPU::V_ASHRREV_I64_e64:
4657 case AMDGPU::V_ASHRREV_I64_gfx10:
4658 case AMDGPU::V_ASHRREV_I64_vi:
4659
4660 case AMDGPU::V_PK_LSHLREV_B16:
4661 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4662 case AMDGPU::V_PK_LSHLREV_B16_vi:
4663
4664 case AMDGPU::V_PK_LSHRREV_B16:
4665 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4666 case AMDGPU::V_PK_LSHRREV_B16_vi:
4667 case AMDGPU::V_PK_ASHRREV_I16:
4668 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4669 case AMDGPU::V_PK_ASHRREV_I16_vi:
4670 return true;
4671 default:
4672 return false;
4673 }
4674}
4675
4676bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4677 const OperandVector &Operands) {
4678 using namespace SIInstrFlags;
4679 const unsigned Opcode = Inst.getOpcode();
4680 const MCInstrDesc &Desc = MII.get(Opcode);
4681
4682 // lds_direct register is defined so that it can be used
4683 // with 9-bit operands only. Ignore encodings which do not accept these.
4684 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4685 if ((Desc.TSFlags & Enc) == 0)
4686 return true;
4687
4688 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4689 auto SrcIdx = getNamedOperandIdx(Opcode, Name: SrcName);
4690 if (SrcIdx == -1)
4691 break;
4692 const auto &Src = Inst.getOperand(i: SrcIdx);
4693 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4694
4695 if (isGFX90A() || isGFX11Plus()) {
4696 Error(L: getOperandLoc(Operands, MCOpIdx: SrcIdx),
4697 Msg: "lds_direct is not supported on this GPU");
4698 return false;
4699 }
4700
4701 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4702 Error(L: getOperandLoc(Operands, MCOpIdx: SrcIdx),
4703 Msg: "lds_direct cannot be used with this instruction");
4704 return false;
4705 }
4706
4707 if (SrcName != OpName::src0) {
4708 Error(L: getOperandLoc(Operands, MCOpIdx: SrcIdx),
4709 Msg: "lds_direct may be used as src0 only");
4710 return false;
4711 }
4712 }
4713 }
4714
4715 return true;
4716}
4717
4718SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4719 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4720 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4721 if (Op.isFlatOffset())
4722 return Op.getStartLoc();
4723 }
4724 return getLoc();
4725}
4726
4727bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4728 const OperandVector &Operands) {
4729 auto Opcode = Inst.getOpcode();
4730 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4731 if (OpNum == -1)
4732 return true;
4733
4734 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4735 if ((TSFlags & SIInstrFlags::FLAT))
4736 return validateFlatOffset(Inst, Operands);
4737
4738 if ((TSFlags & SIInstrFlags::SMRD))
4739 return validateSMEMOffset(Inst, Operands);
4740
4741 const auto &Op = Inst.getOperand(i: OpNum);
4742 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4743 if (isGFX12Plus() &&
4744 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4745 const unsigned OffsetSize = 24;
4746 if (!isUIntN(N: OffsetSize - 1, x: Op.getImm())) {
4747 Error(L: getFlatOffsetLoc(Operands),
4748 Msg: Twine("expected a ") + Twine(OffsetSize - 1) +
4749 "-bit unsigned offset for buffer ops");
4750 return false;
4751 }
4752 } else {
4753 const unsigned OffsetSize = 16;
4754 if (!isUIntN(N: OffsetSize, x: Op.getImm())) {
4755 Error(L: getFlatOffsetLoc(Operands),
4756 Msg: Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4757 return false;
4758 }
4759 }
4760 return true;
4761}
4762
4763bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4764 const OperandVector &Operands) {
4765 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4766 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4767 return true;
4768
4769 auto Opcode = Inst.getOpcode();
4770 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4771 assert(OpNum != -1);
4772
4773 const auto &Op = Inst.getOperand(i: OpNum);
4774 if (!hasFlatOffsets() && Op.getImm() != 0) {
4775 Error(L: getFlatOffsetLoc(Operands),
4776 Msg: "flat offset modifier is not supported on this GPU");
4777 return false;
4778 }
4779
4780 // For pre-GFX12 FLAT instructions the offset must be positive;
4781 // MSB is ignored and forced to zero.
4782 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(ST: getSTI());
4783 bool AllowNegative =
4784 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4785 isGFX12Plus();
4786 if (!isIntN(N: OffsetSize, x: Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4787 Error(L: getFlatOffsetLoc(Operands),
4788 Msg: Twine("expected a ") +
4789 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4790 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4791 return false;
4792 }
4793
4794 return true;
4795}
4796
4797SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4798 // Start with second operand because SMEM Offset cannot be dst or src0.
4799 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4800 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4801 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4802 return Op.getStartLoc();
4803 }
4804 return getLoc();
4805}
4806
4807bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4808 const OperandVector &Operands) {
4809 if (isCI() || isSI())
4810 return true;
4811
4812 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4813 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4814 return true;
4815
4816 auto Opcode = Inst.getOpcode();
4817 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4818 if (OpNum == -1)
4819 return true;
4820
4821 const auto &Op = Inst.getOperand(i: OpNum);
4822 if (!Op.isImm())
4823 return true;
4824
4825 uint64_t Offset = Op.getImm();
4826 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opc: Opcode);
4827 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(ST: getSTI(), EncodedOffset: Offset) ||
4828 AMDGPU::isLegalSMRDEncodedSignedOffset(ST: getSTI(), EncodedOffset: Offset, IsBuffer))
4829 return true;
4830
4831 Error(L: getSMEMOffsetLoc(Operands),
4832 Msg: isGFX12Plus() && IsBuffer
4833 ? "expected a 23-bit unsigned offset for buffer ops"
4834 : isGFX12Plus() ? "expected a 24-bit signed offset"
4835 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4836 : "expected a 21-bit signed offset");
4837
4838 return false;
4839}
4840
4841bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4842 const OperandVector &Operands) {
4843 unsigned Opcode = Inst.getOpcode();
4844 const MCInstrDesc &Desc = MII.get(Opcode);
4845 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4846 return true;
4847
4848 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::src0);
4849 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::src1);
4850
4851 const int OpIndices[] = { Src0Idx, Src1Idx };
4852
4853 unsigned NumExprs = 0;
4854 unsigned NumLiterals = 0;
4855 int64_t LiteralValue;
4856
4857 for (int OpIdx : OpIndices) {
4858 if (OpIdx == -1) break;
4859
4860 const MCOperand &MO = Inst.getOperand(i: OpIdx);
4861 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4862 if (AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx)) {
4863 bool IsLit = false;
4864 std::optional<int64_t> Imm;
4865 if (MO.isImm()) {
4866 Imm = MO.getImm();
4867 } else if (MO.isExpr()) {
4868 if (isLitExpr(Expr: MO.getExpr())) {
4869 IsLit = true;
4870 Imm = getLitValue(Expr: MO.getExpr());
4871 }
4872 } else {
4873 continue;
4874 }
4875
4876 if (!Imm.has_value()) {
4877 ++NumExprs;
4878 } else if (!isInlineConstant(Inst, OpIdx)) {
4879 auto OpType = static_cast<AMDGPU::OperandType>(
4880 Desc.operands()[OpIdx].OperandType);
4881 int64_t Value = encode32BitLiteral(Imm: *Imm, Type: OpType, IsLit);
4882 if (NumLiterals == 0 || LiteralValue != Value) {
4883 LiteralValue = Value;
4884 ++NumLiterals;
4885 }
4886 }
4887 }
4888 }
4889
4890 if (NumLiterals + NumExprs <= 1)
4891 return true;
4892
4893 Error(L: getOperandLoc(Operands, MCOpIdx: Src1Idx),
4894 Msg: "only one unique literal operand is allowed");
4895 return false;
4896}
4897
4898bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4899 const unsigned Opc = Inst.getOpcode();
4900 if (isPermlane16(Opc)) {
4901 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4902 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4903
4904 if (OpSel & ~3)
4905 return false;
4906 }
4907
4908 uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags;
4909
4910 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4911 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4912 if (OpSelIdx != -1) {
4913 if (Inst.getOperand(i: OpSelIdx).getImm() != 0)
4914 return false;
4915 }
4916 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
4917 if (OpSelHiIdx != -1) {
4918 if (Inst.getOperand(i: OpSelHiIdx).getImm() != -1)
4919 return false;
4920 }
4921 }
4922
4923 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4924 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4925 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4926 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4927 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4928 if (OpSel & 3)
4929 return false;
4930 }
4931
4932 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4933 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4934 // the first SGPR and use it for both the low and high operations.
4935 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4936 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4937 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
4938 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4939 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
4940
4941 const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4942 const MCOperand &Src1 = Inst.getOperand(i: Src1Idx);
4943 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4944 unsigned OpSelHi = Inst.getOperand(i: OpSelHiIdx).getImm();
4945
4946 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4947
4948 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4949 unsigned Mask = 1U << Index;
4950 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4951 };
4952
4953 if (Src0.isReg() && isSGPR(Reg: Src0.getReg(), TRI) &&
4954 !VerifyOneSGPR(/*Index=*/0))
4955 return false;
4956 if (Src1.isReg() && isSGPR(Reg: Src1.getReg(), TRI) &&
4957 !VerifyOneSGPR(/*Index=*/1))
4958 return false;
4959
4960 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2);
4961 if (Src2Idx != -1) {
4962 const MCOperand &Src2 = Inst.getOperand(i: Src2Idx);
4963 if (Src2.isReg() && isSGPR(Reg: Src2.getReg(), TRI) &&
4964 !VerifyOneSGPR(/*Index=*/2))
4965 return false;
4966 }
4967 }
4968
4969 return true;
4970}
4971
4972bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4973 if (!hasTrue16Insts())
4974 return true;
4975 const MCRegisterInfo *MRI = getMRI();
4976 const unsigned Opc = Inst.getOpcode();
4977 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4978 if (OpSelIdx == -1)
4979 return true;
4980 unsigned OpSelOpValue = Inst.getOperand(i: OpSelIdx).getImm();
4981 // If the value is 0 we could have a default OpSel Operand, so conservatively
4982 // allow it.
4983 if (OpSelOpValue == 0)
4984 return true;
4985 unsigned OpCount = 0;
4986 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4987 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4988 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: OpName);
4989 if (OpIdx == -1)
4990 continue;
4991 const MCOperand &Op = Inst.getOperand(i: OpIdx);
4992 if (Op.isReg() &&
4993 MRI->getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: Op.getReg())) {
4994 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Reg: Op.getReg(), MRI: *MRI);
4995 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4996 if (OpSelOpIsHi != VGPRSuffixIsHi)
4997 return false;
4998 }
4999 ++OpCount;
5000 }
5001
5002 return true;
5003}
5004
5005bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5006 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5007
5008 const unsigned Opc = Inst.getOpcode();
5009 uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags;
5010
5011 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5012 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5013 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5014 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5015 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5016 !(TSFlags & SIInstrFlags::IsSWMMAC))
5017 return true;
5018
5019 int NegIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName);
5020 if (NegIdx == -1)
5021 return true;
5022
5023 unsigned Neg = Inst.getOperand(i: NegIdx).getImm();
5024
5025 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5026 // on some src operands but not allowed on other.
5027 // It is convenient that such instructions don't have src_modifiers operand
5028 // for src operands that don't allow neg because they also don't allow opsel.
5029
5030 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5031 AMDGPU::OpName::src1_modifiers,
5032 AMDGPU::OpName::src2_modifiers};
5033
5034 for (unsigned i = 0; i < 3; ++i) {
5035 if (!AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: SrcMods[i])) {
5036 if (Neg & (1 << i))
5037 return false;
5038 }
5039 }
5040
5041 return true;
5042}
5043
5044bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5045 const OperandVector &Operands) {
5046 const unsigned Opc = Inst.getOpcode();
5047 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dpp_ctrl);
5048 if (DppCtrlIdx >= 0) {
5049 unsigned DppCtrl = Inst.getOperand(i: DppCtrlIdx).getImm();
5050
5051 if (!AMDGPU::isLegalDPALU_DPPControl(ST: getSTI(), DC: DppCtrl) &&
5052 AMDGPU::isDPALU_DPP(OpDesc: MII.get(Opcode: Opc), MII, ST: getSTI())) {
5053 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5054 // only on GFX12.
5055 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyDppCtrl, Operands);
5056 Error(L: S, Msg: isGFX12() ? "DP ALU dpp only supports row_share"
5057 : "DP ALU dpp only supports row_newbcast");
5058 return false;
5059 }
5060 }
5061
5062 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dpp8);
5063 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5064
5065 if (IsDPP && !hasDPPSrc1SGPR(STI: getSTI())) {
5066 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
5067 if (Src1Idx >= 0) {
5068 const MCOperand &Src1 = Inst.getOperand(i: Src1Idx);
5069 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5070 if (Src1.isReg() && isSGPR(Reg: mc2PseudoReg(Reg: Src1.getReg()), TRI)) {
5071 Error(L: getOperandLoc(Operands, MCOpIdx: Src1Idx),
5072 Msg: "invalid operand for instruction");
5073 return false;
5074 }
5075 if (Src1.isImm()) {
5076 Error(L: getInstLoc(Operands),
5077 Msg: "src1 immediate operand invalid for instruction");
5078 return false;
5079 }
5080 }
5081 }
5082
5083 return true;
5084}
5085
5086// Check if VCC register matches wavefront size
5087bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5088 return (Reg == AMDGPU::VCC && isWave64()) ||
5089 (Reg == AMDGPU::VCC_LO && isWave32());
5090}
5091
5092// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5093bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5094 const OperandVector &Operands) {
5095 unsigned Opcode = Inst.getOpcode();
5096 const MCInstrDesc &Desc = MII.get(Opcode);
5097 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, Name: OpName::imm) != -1;
5098 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5099 !HasMandatoryLiteral && !isVOPD(Opc: Opcode))
5100 return true;
5101
5102 OperandIndices OpIndices = getSrcOperandIndices(Opcode, AddMandatoryLiterals: HasMandatoryLiteral);
5103
5104 std::optional<unsigned> LiteralOpIdx;
5105 std::optional<uint64_t> LiteralValue;
5106
5107 for (int OpIdx : OpIndices) {
5108 if (OpIdx == -1)
5109 continue;
5110
5111 const MCOperand &MO = Inst.getOperand(i: OpIdx);
5112 if (!MO.isImm() && !MO.isExpr())
5113 continue;
5114 if (!isSISrcOperand(Desc, OpNo: OpIdx))
5115 continue;
5116
5117 std::optional<int64_t> Imm;
5118 if (MO.isImm())
5119 Imm = MO.getImm();
5120 else if (MO.isExpr() && isLitExpr(Expr: MO.getExpr()))
5121 Imm = getLitValue(Expr: MO.getExpr());
5122
5123 bool IsAnotherLiteral = false;
5124 if (!Imm.has_value()) {
5125 // Literal value not known, so we conservately assume it's different.
5126 IsAnotherLiteral = true;
5127 } else if (!isInlineConstant(Inst, OpIdx)) {
5128 uint64_t Value = *Imm;
5129 bool IsForcedFP64 =
5130 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5131 (Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_REG_IMM_FP64 &&
5132 HasMandatoryLiteral);
5133 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpNo: OpIdx)) &&
5134 AMDGPU::getOperandSize(OpInfo: Desc.operands()[OpIdx]) == 8;
5135 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Val: Value, IsFP64);
5136
5137 if (!IsValid32Op && !isInt<32>(x: Value) && !isUInt<32>(x: Value) &&
5138 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5139 Error(L: getOperandLoc(Operands, MCOpIdx: OpIdx),
5140 Msg: "invalid operand for instruction");
5141 return false;
5142 }
5143
5144 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5145 Value = Hi_32(Value);
5146
5147 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5148 LiteralValue = Value;
5149 }
5150
5151 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5152 !getFeatureBits()[FeatureVOP3Literal]) {
5153 Error(L: getOperandLoc(Operands, MCOpIdx: OpIdx),
5154 Msg: "literal operands are not supported");
5155 return false;
5156 }
5157
5158 if (LiteralOpIdx && IsAnotherLiteral) {
5159 Error(L: getLaterLoc(a: getOperandLoc(Operands, MCOpIdx: OpIdx),
5160 b: getOperandLoc(Operands, MCOpIdx: *LiteralOpIdx)),
5161 Msg: "only one unique literal operand is allowed");
5162 return false;
5163 }
5164
5165 if (IsAnotherLiteral)
5166 LiteralOpIdx = OpIdx;
5167 }
5168
5169 return true;
5170}
5171
5172// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5173static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5174 const MCRegisterInfo *MRI) {
5175 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name);
5176 if (OpIdx < 0)
5177 return -1;
5178
5179 const MCOperand &Op = Inst.getOperand(i: OpIdx);
5180 if (!Op.isReg())
5181 return -1;
5182
5183 MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
5184 auto Reg = Sub ? Sub : Op.getReg();
5185 const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID);
5186 return AGPR32.contains(Reg) ? 1 : 0;
5187}
5188
5189bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5190 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
5191 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5192 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
5193 SIInstrFlags::DS)) == 0)
5194 return true;
5195
5196 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5197 ? AMDGPU::OpName::data0
5198 : AMDGPU::OpName::vdata;
5199
5200 const MCRegisterInfo *MRI = getMRI();
5201 int DstAreg = IsAGPROperand(Inst, Name: AMDGPU::OpName::vdst, MRI);
5202 int DataAreg = IsAGPROperand(Inst, Name: DataName, MRI);
5203
5204 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5205 int Data2Areg = IsAGPROperand(Inst, Name: AMDGPU::OpName::data1, MRI);
5206 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5207 return false;
5208 }
5209
5210 auto FB = getFeatureBits();
5211 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5212 if (DataAreg < 0 || DstAreg < 0)
5213 return true;
5214 return DstAreg == DataAreg;
5215 }
5216
5217 return DstAreg < 1 && DataAreg < 1;
5218}
5219
5220bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5221 auto FB = getFeatureBits();
5222 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5223 return true;
5224
5225 unsigned Opc = Inst.getOpcode();
5226 const MCRegisterInfo *MRI = getMRI();
5227 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5228 // unaligned VGPR. All others only allow even aligned VGPRs.
5229 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5230 return true;
5231
5232 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5233 switch (Opc) {
5234 default:
5235 break;
5236 case AMDGPU::DS_LOAD_TR6_B96:
5237 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5238 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5239 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5240 return true;
5241 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5242 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5243 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5244 // allows unaligned VGPR for vdst, but other operands still only allow
5245 // even aligned VGPRs.
5246 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vaddr);
5247 if (VAddrIdx != -1) {
5248 const MCOperand &Op = Inst.getOperand(i: VAddrIdx);
5249 MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
5250 if ((Sub - AMDGPU::VGPR0) & 1)
5251 return false;
5252 }
5253 return true;
5254 }
5255 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5256 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5257 return true;
5258 }
5259 }
5260
5261 const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID);
5262 const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID);
5263 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5264 const MCOperand &Op = Inst.getOperand(i: I);
5265 if (!Op.isReg())
5266 continue;
5267
5268 MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
5269 if (!Sub)
5270 continue;
5271
5272 if (VGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5273 return false;
5274 if (AGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5275 return false;
5276 }
5277
5278 return true;
5279}
5280
5281SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5282 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5283 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5284 if (Op.isBLGP())
5285 return Op.getStartLoc();
5286 }
5287 return SMLoc();
5288}
5289
5290bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5291 const OperandVector &Operands) {
5292 unsigned Opc = Inst.getOpcode();
5293 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
5294 if (BlgpIdx == -1)
5295 return true;
5296 SMLoc BLGPLoc = getBLGPLoc(Operands);
5297 if (!BLGPLoc.isValid())
5298 return true;
5299 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with(Prefix: "neg:");
5300 auto FB = getFeatureBits();
5301 bool UsesNeg = false;
5302 if (FB[AMDGPU::FeatureGFX940Insts]) {
5303 switch (Opc) {
5304 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5305 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5306 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5307 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5308 UsesNeg = true;
5309 }
5310 }
5311
5312 if (IsNeg == UsesNeg)
5313 return true;
5314
5315 Error(L: BLGPLoc,
5316 Msg: UsesNeg ? "invalid modifier: blgp is not supported"
5317 : "invalid modifier: neg is not supported");
5318
5319 return false;
5320}
5321
5322bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5323 const OperandVector &Operands) {
5324 if (!isGFX11Plus())
5325 return true;
5326
5327 unsigned Opc = Inst.getOpcode();
5328 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5329 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5330 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5331 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5332 return true;
5333
5334 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::sdst);
5335 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5336 auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src0Idx).getReg());
5337 if (Reg == AMDGPU::SGPR_NULL)
5338 return true;
5339
5340 Error(L: getOperandLoc(Operands, MCOpIdx: Src0Idx), Msg: "src0 must be null");
5341 return false;
5342}
5343
5344bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5345 const OperandVector &Operands) {
5346 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
5347 if ((TSFlags & SIInstrFlags::DS) == 0)
5348 return true;
5349 if (TSFlags & SIInstrFlags::GWS)
5350 return validateGWS(Inst, Operands);
5351 // Only validate GDS for non-GWS instructions.
5352 if (hasGDS())
5353 return true;
5354 int GDSIdx =
5355 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::gds);
5356 if (GDSIdx < 0)
5357 return true;
5358 unsigned GDS = Inst.getOperand(i: GDSIdx).getImm();
5359 if (GDS) {
5360 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyGDS, Operands);
5361 Error(L: S, Msg: "gds modifier is not supported on this GPU");
5362 return false;
5363 }
5364 return true;
5365}
5366
5367// gfx90a has an undocumented limitation:
5368// DS_GWS opcodes must use even aligned registers.
5369bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5370 const OperandVector &Operands) {
5371 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5372 return true;
5373
5374 int Opc = Inst.getOpcode();
5375 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5376 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5377 return true;
5378
5379 const MCRegisterInfo *MRI = getMRI();
5380 const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID);
5381 int Data0Pos =
5382 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::data0);
5383 assert(Data0Pos != -1);
5384 auto Reg = Inst.getOperand(i: Data0Pos).getReg();
5385 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5386 if (RegIdx & 1) {
5387 Error(L: getOperandLoc(Operands, MCOpIdx: Data0Pos), Msg: "vgpr must be even aligned");
5388 return false;
5389 }
5390
5391 return true;
5392}
5393
5394bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5395 const OperandVector &Operands,
5396 SMLoc IDLoc) {
5397 int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(),
5398 Name: AMDGPU::OpName::cpol);
5399 if (CPolPos == -1)
5400 return true;
5401
5402 unsigned CPol = Inst.getOperand(i: CPolPos).getImm();
5403
5404 if (!isGFX1250Plus()) {
5405 if (CPol & CPol::SCAL) {
5406 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5407 StringRef CStr(S.getPointer());
5408 S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scale_offset")]);
5409 Error(L: S, Msg: "scale_offset is not supported on this GPU");
5410 }
5411 if (CPol & CPol::NV) {
5412 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5413 StringRef CStr(S.getPointer());
5414 S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "nv")]);
5415 Error(L: S, Msg: "nv is not supported on this GPU");
5416 }
5417 }
5418
5419 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Opcode: Inst.getOpcode())) {
5420 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5421 StringRef CStr(S.getPointer());
5422 S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scale_offset")]);
5423 Error(L: S, Msg: "scale_offset is not supported for this instruction");
5424 }
5425
5426 if (isGFX12Plus())
5427 return validateTHAndScopeBits(Inst, Operands, CPol);
5428
5429 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
5430 if (TSFlags & SIInstrFlags::SMRD) {
5431 if (CPol && (isSI() || isCI())) {
5432 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5433 Error(L: S, Msg: "cache policy is not supported for SMRD instructions");
5434 return false;
5435 }
5436 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5437 Error(L: IDLoc, Msg: "invalid cache policy for SMEM instruction");
5438 return false;
5439 }
5440 }
5441
5442 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5443 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5444 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
5445 SIInstrFlags::FLAT;
5446 if (!(TSFlags & AllowSCCModifier)) {
5447 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5448 StringRef CStr(S.getPointer());
5449 S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scc")]);
5450 Error(L: S,
5451 Msg: "scc modifier is not supported for this instruction on this GPU");
5452 return false;
5453 }
5454 }
5455
5456 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
5457 return true;
5458
5459 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5460 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5461 Error(L: IDLoc, Msg: isGFX940() ? "instruction must use sc0"
5462 : "instruction must use glc");
5463 return false;
5464 }
5465 } else {
5466 if (CPol & CPol::GLC) {
5467 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5468 StringRef CStr(S.getPointer());
5469 S = SMLoc::getFromPointer(
5470 Ptr: &CStr.data()[CStr.find(Str: isGFX940() ? "sc0" : "glc")]);
5471 Error(L: S, Msg: isGFX940() ? "instruction must not use sc0"
5472 : "instruction must not use glc");
5473 return false;
5474 }
5475 }
5476
5477 return true;
5478}
5479
5480bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5481 const OperandVector &Operands,
5482 const unsigned CPol) {
5483 const unsigned TH = CPol & AMDGPU::CPol::TH;
5484 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5485
5486 const unsigned Opcode = Inst.getOpcode();
5487 const MCInstrDesc &TID = MII.get(Opcode);
5488
5489 auto PrintError = [&](StringRef Msg) {
5490 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5491 Error(L: S, Msg);
5492 return false;
5493 };
5494
5495 if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5496 (TID.TSFlags & SIInstrFlags::IsAtomicNoRet))
5497 return PrintError("th:TH_ATOMIC_RETURN requires a destination operand");
5498
5499 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5500 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
5501 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
5502 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5503
5504 if (TH == 0)
5505 return true;
5506
5507 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5508 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5509 (TH == AMDGPU::CPol::TH_NT_HT)))
5510 return PrintError("invalid th value for SMEM instruction");
5511
5512 if (TH == AMDGPU::CPol::TH_BYPASS) {
5513 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5514 CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
5515 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5516 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
5517 return PrintError("scope and th combination is not valid");
5518 }
5519
5520 unsigned THType = AMDGPU::getTemporalHintType(TID);
5521 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5522 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5523 return PrintError("invalid th value for atomic instructions");
5524 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5525 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5526 return PrintError("invalid th value for store instructions");
5527 } else {
5528 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5529 return PrintError("invalid th value for load instructions");
5530 }
5531
5532 return true;
5533}
5534
5535bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5536 const OperandVector &Operands) {
5537 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
5538 if (Desc.mayStore() &&
5539 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
5540 SMLoc Loc = getImmLoc(Type: AMDGPUOperand::ImmTyTFE, Operands);
5541 if (Loc != getInstLoc(Operands)) {
5542 Error(L: Loc, Msg: "TFE modifier has no meaning for store instructions");
5543 return false;
5544 }
5545 }
5546
5547 return true;
5548}
5549
5550bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5551 const OperandVector &Operands) {
5552 unsigned Opc = Inst.getOpcode();
5553 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5554 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
5555
5556 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5557 int FmtIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: FmtOp);
5558 if (FmtIdx == -1)
5559 return true;
5560 unsigned Fmt = Inst.getOperand(i: FmtIdx).getImm();
5561 int SrcIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: SrcOp);
5562 unsigned RegSize =
5563 TRI->getRegClass(i: MII.getOpRegClassID(OpInfo: Desc.operands()[SrcIdx], HwModeId: HwMode))
5564 .getSizeInBits();
5565
5566 if (RegSize == AMDGPU::wmmaScaleF8F6F4FormatToNumRegs(Fmt) * 32)
5567 return true;
5568
5569 Error(L: getOperandLoc(Operands, MCOpIdx: SrcIdx),
5570 Msg: "wrong register tuple size for " +
5571 Twine(WMMAMods::ModMatrixFmt[Fmt]));
5572 return false;
5573 };
5574
5575 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5576 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5577}
5578
5579bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5580 const OperandVector &Operands) {
5581 if (!validateLdsDirect(Inst, Operands))
5582 return false;
5583 if (!validateTrue16OpSel(Inst)) {
5584 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands),
5585 Msg: "op_sel operand conflicts with 16-bit operand suffix");
5586 return false;
5587 }
5588 if (!validateSOPLiteral(Inst, Operands))
5589 return false;
5590 if (!validateVOPLiteral(Inst, Operands)) {
5591 return false;
5592 }
5593 if (!validateConstantBusLimitations(Inst, Operands)) {
5594 return false;
5595 }
5596 if (!validateVOPD(Inst, Operands)) {
5597 return false;
5598 }
5599 if (!validateIntClampSupported(Inst)) {
5600 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyClamp, Operands),
5601 Msg: "integer clamping is not supported on this GPU");
5602 return false;
5603 }
5604 if (!validateOpSel(Inst)) {
5605 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands),
5606 Msg: "invalid op_sel operand");
5607 return false;
5608 }
5609 if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_lo)) {
5610 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegLo, Operands),
5611 Msg: "invalid neg_lo operand");
5612 return false;
5613 }
5614 if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_hi)) {
5615 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegHi, Operands),
5616 Msg: "invalid neg_hi operand");
5617 return false;
5618 }
5619 if (!validateDPP(Inst, Operands)) {
5620 return false;
5621 }
5622 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5623 if (!validateMIMGD16(Inst)) {
5624 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands),
5625 Msg: "d16 modifier is not supported on this GPU");
5626 return false;
5627 }
5628 if (!validateMIMGDim(Inst, Operands)) {
5629 Error(L: IDLoc, Msg: "missing dim operand");
5630 return false;
5631 }
5632 if (!validateTensorR128(Inst)) {
5633 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands),
5634 Msg: "instruction must set modifier r128=0");
5635 return false;
5636 }
5637 if (!validateMIMGMSAA(Inst)) {
5638 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDim, Operands),
5639 Msg: "invalid dim; must be MSAA type");
5640 return false;
5641 }
5642 if (!validateMIMGDataSize(Inst, IDLoc)) {
5643 return false;
5644 }
5645 if (!validateMIMGAddrSize(Inst, IDLoc))
5646 return false;
5647 if (!validateMIMGAtomicDMask(Inst)) {
5648 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands),
5649 Msg: "invalid atomic image dmask");
5650 return false;
5651 }
5652 if (!validateMIMGGatherDMask(Inst)) {
5653 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands),
5654 Msg: "invalid image_gather dmask: only one bit must be set");
5655 return false;
5656 }
5657 if (!validateMovrels(Inst, Operands)) {
5658 return false;
5659 }
5660 if (!validateOffset(Inst, Operands)) {
5661 return false;
5662 }
5663 if (!validateMAIAccWrite(Inst, Operands)) {
5664 return false;
5665 }
5666 if (!validateMAISrc2(Inst, Operands)) {
5667 return false;
5668 }
5669 if (!validateMFMA(Inst, Operands)) {
5670 return false;
5671 }
5672 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5673 return false;
5674 }
5675
5676 if (!validateAGPRLdSt(Inst)) {
5677 Error(L: IDLoc, Msg: getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5678 ? "invalid register class: data and dst should be all VGPR or AGPR"
5679 : "invalid register class: agpr loads and stores not supported on this GPU"
5680 );
5681 return false;
5682 }
5683 if (!validateVGPRAlign(Inst)) {
5684 Error(L: IDLoc,
5685 Msg: "invalid register class: vgpr tuples must be 64 bit aligned");
5686 return false;
5687 }
5688 if (!validateDS(Inst, Operands)) {
5689 return false;
5690 }
5691
5692 if (!validateBLGP(Inst, Operands)) {
5693 return false;
5694 }
5695
5696 if (!validateDivScale(Inst)) {
5697 Error(L: IDLoc, Msg: "ABS not allowed in VOP3B instructions");
5698 return false;
5699 }
5700 if (!validateWaitCnt(Inst, Operands)) {
5701 return false;
5702 }
5703 if (!validateTFE(Inst, Operands)) {
5704 return false;
5705 }
5706 if (!validateWMMA(Inst, Operands)) {
5707 return false;
5708 }
5709
5710 return true;
5711}
5712
5713static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5714 const FeatureBitset &FBS,
5715 unsigned VariantID = 0);
5716
5717static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5718 const FeatureBitset &AvailableFeatures,
5719 unsigned VariantID);
5720
5721bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5722 const FeatureBitset &FBS) {
5723 return isSupportedMnemo(Mnemo, FBS, Variants: getAllVariants());
5724}
5725
5726bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5727 const FeatureBitset &FBS,
5728 ArrayRef<unsigned> Variants) {
5729 for (auto Variant : Variants) {
5730 if (AMDGPUCheckMnemonic(Mnemonic: Mnemo, AvailableFeatures: FBS, VariantID: Variant))
5731 return true;
5732 }
5733
5734 return false;
5735}
5736
5737bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5738 SMLoc IDLoc) {
5739 FeatureBitset FBS = ComputeAvailableFeatures(FB: getFeatureBits());
5740
5741 // Check if requested instruction variant is supported.
5742 if (isSupportedMnemo(Mnemo, FBS, Variants: getMatchedVariants()))
5743 return false;
5744
5745 // This instruction is not supported.
5746 // Clear any other pending errors because they are no longer relevant.
5747 getParser().clearPendingErrors();
5748
5749 // Requested instruction variant is not supported.
5750 // Check if any other variants are supported.
5751 StringRef VariantName = getMatchedVariantName();
5752 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5753 return Error(L: IDLoc,
5754 Msg: Twine(VariantName,
5755 " variant of this instruction is not supported"));
5756 }
5757
5758 // Check if this instruction may be used with a different wavesize.
5759 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5760 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5761 // FIXME: Use getAvailableFeatures, and do not manually recompute
5762 FeatureBitset FeaturesWS32 = getFeatureBits();
5763 FeaturesWS32.flip(I: AMDGPU::FeatureWavefrontSize64)
5764 .flip(I: AMDGPU::FeatureWavefrontSize32);
5765 FeatureBitset AvailableFeaturesWS32 =
5766 ComputeAvailableFeatures(FB: FeaturesWS32);
5767
5768 if (isSupportedMnemo(Mnemo, FBS: AvailableFeaturesWS32, Variants: getMatchedVariants()))
5769 return Error(L: IDLoc, Msg: "instruction requires wavesize=32");
5770 }
5771
5772 // Finally check if this instruction is supported on any other GPU.
5773 if (isSupportedMnemo(Mnemo, FBS: FeatureBitset().set())) {
5774 return Error(L: IDLoc, Msg: "instruction not supported on this GPU");
5775 }
5776
5777 // Instruction not supported on any GPU. Probably a typo.
5778 std::string Suggestion = AMDGPUMnemonicSpellCheck(S: Mnemo, FBS);
5779 return Error(L: IDLoc, Msg: "invalid instruction" + Suggestion);
5780}
5781
5782static bool isInvalidVOPDY(const OperandVector &Operands,
5783 uint64_t InvalidOprIdx) {
5784 assert(InvalidOprIdx < Operands.size());
5785 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5786 if (Op.isToken() && InvalidOprIdx > 1) {
5787 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5788 return PrevOp.isToken() && PrevOp.getToken() == "::";
5789 }
5790 return false;
5791}
5792
5793bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5794 OperandVector &Operands,
5795 MCStreamer &Out,
5796 uint64_t &ErrorInfo,
5797 bool MatchingInlineAsm) {
5798 MCInst Inst;
5799 Inst.setLoc(IDLoc);
5800 unsigned Result = Match_Success;
5801 for (auto Variant : getMatchedVariants()) {
5802 uint64_t EI;
5803 auto R = MatchInstructionImpl(Operands, Inst, ErrorInfo&: EI, matchingInlineAsm: MatchingInlineAsm,
5804 VariantID: Variant);
5805 // We order match statuses from least to most specific. We use most specific
5806 // status as resulting
5807 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5808 if (R == Match_Success || R == Match_MissingFeature ||
5809 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5810 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5811 Result != Match_MissingFeature)) {
5812 Result = R;
5813 ErrorInfo = EI;
5814 }
5815 if (R == Match_Success)
5816 break;
5817 }
5818
5819 if (Result == Match_Success) {
5820 if (!validateInstruction(Inst, IDLoc, Operands)) {
5821 return true;
5822 }
5823 Out.emitInstruction(Inst, STI: getSTI());
5824 return false;
5825 }
5826
5827 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5828 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5829 return true;
5830 }
5831
5832 switch (Result) {
5833 default: break;
5834 case Match_MissingFeature:
5835 // It has been verified that the specified instruction
5836 // mnemonic is valid. A match was found but it requires
5837 // features which are not supported on this GPU.
5838 return Error(L: IDLoc, Msg: "operands are not valid for this GPU or mode");
5839
5840 case Match_InvalidOperand: {
5841 SMLoc ErrorLoc = IDLoc;
5842 if (ErrorInfo != ~0ULL) {
5843 if (ErrorInfo >= Operands.size()) {
5844 return Error(L: IDLoc, Msg: "too few operands for instruction");
5845 }
5846 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5847 if (ErrorLoc == SMLoc())
5848 ErrorLoc = IDLoc;
5849
5850 if (isInvalidVOPDY(Operands, InvalidOprIdx: ErrorInfo))
5851 return Error(L: ErrorLoc, Msg: "invalid VOPDY instruction");
5852 }
5853 return Error(L: ErrorLoc, Msg: "invalid operand for instruction");
5854 }
5855
5856 case Match_MnemonicFail:
5857 llvm_unreachable("Invalid instructions should have been handled already");
5858 }
5859 llvm_unreachable("Implement any new match types added!");
5860}
5861
5862bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5863 int64_t Tmp = -1;
5864 if (!isToken(Kind: AsmToken::Integer) && !isToken(Kind: AsmToken::Identifier)) {
5865 return true;
5866 }
5867 if (getParser().parseAbsoluteExpression(Res&: Tmp)) {
5868 return true;
5869 }
5870 Ret = static_cast<uint32_t>(Tmp);
5871 return false;
5872}
5873
5874bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5875 if (!getSTI().getTargetTriple().isAMDGCN())
5876 return TokError(Msg: "directive only supported for amdgcn architecture");
5877
5878 std::string TargetIDDirective;
5879 SMLoc TargetStart = getTok().getLoc();
5880 if (getParser().parseEscapedString(Data&: TargetIDDirective))
5881 return true;
5882
5883 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5884 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5885 return getParser().Error(L: TargetRange.Start,
5886 Msg: (Twine(".amdgcn_target directive's target id ") +
5887 Twine(TargetIDDirective) +
5888 Twine(" does not match the specified target id ") +
5889 Twine(getTargetStreamer().getTargetID()->toString())).str());
5890
5891 return false;
5892}
5893
5894bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5895 return Error(L: Range.Start, Msg: "value out of range", Range);
5896}
5897
5898bool AMDGPUAsmParser::calculateGPRBlocks(
5899 const FeatureBitset &Features, const MCExpr *VCCUsed,
5900 const MCExpr *FlatScrUsed, bool XNACKUsed,
5901 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5902 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5903 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5904 // TODO(scott.linder): These calculations are duplicated from
5905 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5906 IsaVersion Version = getIsaVersion(GPU: getSTI().getCPU());
5907 MCContext &Ctx = getContext();
5908
5909 const MCExpr *NumSGPRs = NextFreeSGPR;
5910 int64_t EvaluatedSGPRs;
5911
5912 if (Version.Major >= 10)
5913 NumSGPRs = MCConstantExpr::create(Value: 0, Ctx);
5914 else {
5915 unsigned MaxAddressableNumSGPRs =
5916 IsaInfo::getAddressableNumSGPRs(STI: &getSTI());
5917
5918 if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) && Version.Major >= 8 &&
5919 !Features.test(I: FeatureSGPRInitBug) &&
5920 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5921 return OutOfRangeError(Range: SGPRRange);
5922
5923 const MCExpr *ExtraSGPRs =
5924 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5925 NumSGPRs = MCBinaryExpr::createAdd(LHS: NumSGPRs, RHS: ExtraSGPRs, Ctx);
5926
5927 if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) &&
5928 (Version.Major <= 7 || Features.test(I: FeatureSGPRInitBug)) &&
5929 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5930 return OutOfRangeError(Range: SGPRRange);
5931
5932 if (Features.test(I: FeatureSGPRInitBug))
5933 NumSGPRs =
5934 MCConstantExpr::create(Value: IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);
5935 }
5936
5937 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5938 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5939 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5940 unsigned Granule) -> const MCExpr * {
5941 const MCExpr *OneConst = MCConstantExpr::create(Value: 1ul, Ctx);
5942 const MCExpr *GranuleConst = MCConstantExpr::create(Value: Granule, Ctx);
5943 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax(Args: {NumGPR, OneConst}, Ctx);
5944 const MCExpr *AlignToGPR =
5945 AMDGPUMCExpr::createAlignTo(Value: MaxNumGPR, Align: GranuleConst, Ctx);
5946 const MCExpr *DivGPR =
5947 MCBinaryExpr::createDiv(LHS: AlignToGPR, RHS: GranuleConst, Ctx);
5948 const MCExpr *SubGPR = MCBinaryExpr::createSub(LHS: DivGPR, RHS: OneConst, Ctx);
5949 return SubGPR;
5950 };
5951
5952 VGPRBlocks = GetNumGPRBlocks(
5953 NextFreeVGPR,
5954 IsaInfo::getVGPREncodingGranule(STI: &getSTI(), EnableWavefrontSize32));
5955 SGPRBlocks =
5956 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(STI: &getSTI()));
5957
5958 return false;
5959}
5960
5961bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5962 if (!getSTI().getTargetTriple().isAMDGCN())
5963 return TokError(Msg: "directive only supported for amdgcn architecture");
5964
5965 if (!isHsaAbi(STI: getSTI()))
5966 return TokError(Msg: "directive only supported for amdhsa OS");
5967
5968 StringRef KernelName;
5969 if (getParser().parseIdentifier(Res&: KernelName))
5970 return true;
5971
5972 AMDGPU::MCKernelDescriptor KD =
5973 AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
5974 STI: &getSTI(), Ctx&: getContext());
5975
5976 StringSet<> Seen;
5977
5978 IsaVersion IVersion = getIsaVersion(GPU: getSTI().getCPU());
5979
5980 const MCExpr *ZeroExpr = MCConstantExpr::create(Value: 0, Ctx&: getContext());
5981 const MCExpr *OneExpr = MCConstantExpr::create(Value: 1, Ctx&: getContext());
5982
5983 SMRange VGPRRange;
5984 const MCExpr *NextFreeVGPR = ZeroExpr;
5985 const MCExpr *AccumOffset = MCConstantExpr::create(Value: 0, Ctx&: getContext());
5986 const MCExpr *NamedBarCnt = ZeroExpr;
5987 uint64_t SharedVGPRCount = 0;
5988 uint64_t PreloadLength = 0;
5989 uint64_t PreloadOffset = 0;
5990 SMRange SGPRRange;
5991 const MCExpr *NextFreeSGPR = ZeroExpr;
5992
5993 // Count the number of user SGPRs implied from the enabled feature bits.
5994 unsigned ImpliedUserSGPRCount = 0;
5995
5996 // Track if the asm explicitly contains the directive for the user SGPR
5997 // count.
5998 std::optional<unsigned> ExplicitUserSGPRCount;
5999 const MCExpr *ReserveVCC = OneExpr;
6000 const MCExpr *ReserveFlatScr = OneExpr;
6001 std::optional<bool> EnableWavefrontSize32;
6002
6003 while (true) {
6004 while (trySkipToken(Kind: AsmToken::EndOfStatement));
6005
6006 StringRef ID;
6007 SMRange IDRange = getTok().getLocRange();
6008 if (!parseId(Val&: ID, ErrMsg: "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6009 return true;
6010
6011 if (ID == ".end_amdhsa_kernel")
6012 break;
6013
6014 if (!Seen.insert(key: ID).second)
6015 return TokError(Msg: ".amdhsa_ directives cannot be repeated");
6016
6017 SMLoc ValStart = getLoc();
6018 const MCExpr *ExprVal;
6019 if (getParser().parseExpression(Res&: ExprVal))
6020 return true;
6021 SMLoc ValEnd = getLoc();
6022 SMRange ValRange = SMRange(ValStart, ValEnd);
6023
6024 int64_t IVal = 0;
6025 uint64_t Val = IVal;
6026 bool EvaluatableExpr;
6027 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(Res&: IVal))) {
6028 if (IVal < 0)
6029 return OutOfRangeError(Range: ValRange);
6030 Val = IVal;
6031 }
6032
6033#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6034 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6035 return OutOfRangeError(RANGE); \
6036 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6037 getContext());
6038
6039// Some fields use the parsed value immediately which requires the expression to
6040// be solvable.
6041#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6042 if (!(RESOLVED)) \
6043 return Error(IDRange.Start, "directive should have resolvable expression", \
6044 IDRange);
6045
6046 if (ID == ".amdhsa_group_segment_fixed_size") {
6047 if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
6048 CHAR_BIT>(x: Val))
6049 return OutOfRangeError(Range: ValRange);
6050 KD.group_segment_fixed_size = ExprVal;
6051 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6052 if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
6053 CHAR_BIT>(x: Val))
6054 return OutOfRangeError(Range: ValRange);
6055 KD.private_segment_fixed_size = ExprVal;
6056 } else if (ID == ".amdhsa_kernarg_size") {
6057 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(x: Val))
6058 return OutOfRangeError(Range: ValRange);
6059 KD.kernarg_size = ExprVal;
6060 } else if (ID == ".amdhsa_user_sgpr_count") {
6061 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6062 ExplicitUserSGPRCount = Val;
6063 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6064 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6065 if (hasArchitectedFlatScratch())
6066 return Error(L: IDRange.Start,
6067 Msg: "directive is not supported with architected flat scratch",
6068 Range: IDRange);
6069 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6070 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6071 ExprVal, ValRange);
6072 if (Val)
6073 ImpliedUserSGPRCount += 4;
6074 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6075 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6076 if (!hasKernargPreload())
6077 return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
6078
6079 if (Val > getMaxNumUserSGPRs())
6080 return OutOfRangeError(Range: ValRange);
6081 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6082 ValRange);
6083 if (Val) {
6084 ImpliedUserSGPRCount += Val;
6085 PreloadLength = Val;
6086 }
6087 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6088 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6089 if (!hasKernargPreload())
6090 return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
6091
6092 if (Val >= 1024)
6093 return OutOfRangeError(Range: ValRange);
6094 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6095 ValRange);
6096 if (Val)
6097 PreloadOffset = Val;
6098 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6099 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6100 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6101 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6102 ValRange);
6103 if (Val)
6104 ImpliedUserSGPRCount += 2;
6105 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6106 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6107 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6108 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6109 ValRange);
6110 if (Val)
6111 ImpliedUserSGPRCount += 2;
6112 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6113 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6114 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6115 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6116 ExprVal, ValRange);
6117 if (Val)
6118 ImpliedUserSGPRCount += 2;
6119 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6120 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6121 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6122 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6123 ValRange);
6124 if (Val)
6125 ImpliedUserSGPRCount += 2;
6126 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6127 if (hasArchitectedFlatScratch())
6128 return Error(L: IDRange.Start,
6129 Msg: "directive is not supported with architected flat scratch",
6130 Range: IDRange);
6131 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6132 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6133 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6134 ExprVal, ValRange);
6135 if (Val)
6136 ImpliedUserSGPRCount += 2;
6137 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6138 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6139 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6140 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6141 ExprVal, ValRange);
6142 if (Val)
6143 ImpliedUserSGPRCount += 1;
6144 } else if (ID == ".amdhsa_wavefront_size32") {
6145 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6146 if (IVersion.Major < 10)
6147 return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
6148 EnableWavefrontSize32 = Val;
6149 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6150 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6151 ValRange);
6152 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6153 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6154 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6155 ValRange);
6156 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6157 if (hasArchitectedFlatScratch())
6158 return Error(L: IDRange.Start,
6159 Msg: "directive is not supported with architected flat scratch",
6160 Range: IDRange);
6161 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6162 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6163 ValRange);
6164 } else if (ID == ".amdhsa_enable_private_segment") {
6165 if (!hasArchitectedFlatScratch())
6166 return Error(
6167 L: IDRange.Start,
6168 Msg: "directive is not supported without architected flat scratch",
6169 Range: IDRange);
6170 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6171 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6172 ValRange);
6173 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6174 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6175 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6176 ValRange);
6177 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6178 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6179 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6180 ValRange);
6181 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6182 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6183 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6184 ValRange);
6185 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6186 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6187 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6188 ValRange);
6189 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6190 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6191 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6192 ValRange);
6193 } else if (ID == ".amdhsa_next_free_vgpr") {
6194 VGPRRange = ValRange;
6195 NextFreeVGPR = ExprVal;
6196 } else if (ID == ".amdhsa_next_free_sgpr") {
6197 SGPRRange = ValRange;
6198 NextFreeSGPR = ExprVal;
6199 } else if (ID == ".amdhsa_accum_offset") {
6200 if (!isGFX90A())
6201 return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
6202 AccumOffset = ExprVal;
6203 } else if (ID == ".amdhsa_named_barrier_count") {
6204 if (!isGFX1250Plus())
6205 return Error(L: IDRange.Start, Msg: "directive requires gfx1250+", Range: IDRange);
6206 NamedBarCnt = ExprVal;
6207 } else if (ID == ".amdhsa_reserve_vcc") {
6208 if (EvaluatableExpr && !isUInt<1>(x: Val))
6209 return OutOfRangeError(Range: ValRange);
6210 ReserveVCC = ExprVal;
6211 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6212 if (IVersion.Major < 7)
6213 return Error(L: IDRange.Start, Msg: "directive requires gfx7+", Range: IDRange);
6214 if (hasArchitectedFlatScratch())
6215 return Error(L: IDRange.Start,
6216 Msg: "directive is not supported with architected flat scratch",
6217 Range: IDRange);
6218 if (EvaluatableExpr && !isUInt<1>(x: Val))
6219 return OutOfRangeError(Range: ValRange);
6220 ReserveFlatScr = ExprVal;
6221 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6222 if (IVersion.Major < 8)
6223 return Error(L: IDRange.Start, Msg: "directive requires gfx8+", Range: IDRange);
6224 if (!isUInt<1>(x: Val))
6225 return OutOfRangeError(Range: ValRange);
6226 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6227 return getParser().Error(L: IDRange.Start, Msg: ".amdhsa_reserve_xnack_mask does not match target id",
6228 Range: IDRange);
6229 } else if (ID == ".amdhsa_float_round_mode_32") {
6230 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6231 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6232 ValRange);
6233 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6234 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6235 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6236 ValRange);
6237 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6238 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6239 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6240 ValRange);
6241 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6242 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6243 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6244 ValRange);
6245 } else if (ID == ".amdhsa_dx10_clamp") {
6246 if (IVersion.Major >= 12)
6247 return Error(L: IDRange.Start, Msg: "directive unsupported on gfx12+", Range: IDRange);
6248 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6249 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6250 ValRange);
6251 } else if (ID == ".amdhsa_ieee_mode") {
6252 if (IVersion.Major >= 12)
6253 return Error(L: IDRange.Start, Msg: "directive unsupported on gfx12+", Range: IDRange);
6254 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6255 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6256 ValRange);
6257 } else if (ID == ".amdhsa_fp16_overflow") {
6258 if (IVersion.Major < 9)
6259 return Error(L: IDRange.Start, Msg: "directive requires gfx9+", Range: IDRange);
6260 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6261 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6262 ValRange);
6263 } else if (ID == ".amdhsa_tg_split") {
6264 if (!isGFX90A())
6265 return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
6266 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6267 ExprVal, ValRange);
6268 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6269 if (!supportsWGP(STI: getSTI()))
6270 return Error(L: IDRange.Start,
6271 Msg: "directive unsupported on " + getSTI().getCPU(), Range: IDRange);
6272 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6273 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6274 ValRange);
6275 } else if (ID == ".amdhsa_memory_ordered") {
6276 if (IVersion.Major < 10)
6277 return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
6278 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6279 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6280 ValRange);
6281 } else if (ID == ".amdhsa_forward_progress") {
6282 if (IVersion.Major < 10)
6283 return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
6284 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6285 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6286 ValRange);
6287 } else if (ID == ".amdhsa_shared_vgpr_count") {
6288 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6289 if (IVersion.Major < 10 || IVersion.Major >= 12)
6290 return Error(L: IDRange.Start, Msg: "directive requires gfx10 or gfx11",
6291 Range: IDRange);
6292 SharedVGPRCount = Val;
6293 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6294 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6295 ValRange);
6296 } else if (ID == ".amdhsa_inst_pref_size") {
6297 if (IVersion.Major < 11)
6298 return Error(L: IDRange.Start, Msg: "directive requires gfx11+", Range: IDRange);
6299 if (IVersion.Major == 11) {
6300 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6301 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6302 ValRange);
6303 } else {
6304 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6305 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6306 ValRange);
6307 }
6308 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6309 PARSE_BITS_ENTRY(
6310 KD.compute_pgm_rsrc2,
6311 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6312 ExprVal, ValRange);
6313 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6314 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6315 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6316 ExprVal, ValRange);
6317 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6318 PARSE_BITS_ENTRY(
6319 KD.compute_pgm_rsrc2,
6320 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6321 ExprVal, ValRange);
6322 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6323 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6324 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6325 ExprVal, ValRange);
6326 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6327 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6328 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6329 ExprVal, ValRange);
6330 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6331 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6332 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6333 ExprVal, ValRange);
6334 } else if (ID == ".amdhsa_exception_int_div_zero") {
6335 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6336 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6337 ExprVal, ValRange);
6338 } else if (ID == ".amdhsa_round_robin_scheduling") {
6339 if (IVersion.Major < 12)
6340 return Error(L: IDRange.Start, Msg: "directive requires gfx12+", Range: IDRange);
6341 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6342 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6343 ValRange);
6344 } else {
6345 return Error(L: IDRange.Start, Msg: "unknown .amdhsa_kernel directive", Range: IDRange);
6346 }
6347
6348#undef PARSE_BITS_ENTRY
6349 }
6350
6351 if (!Seen.contains(key: ".amdhsa_next_free_vgpr"))
6352 return TokError(Msg: ".amdhsa_next_free_vgpr directive is required");
6353
6354 if (!Seen.contains(key: ".amdhsa_next_free_sgpr"))
6355 return TokError(Msg: ".amdhsa_next_free_sgpr directive is required");
6356
6357 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(u&: ImpliedUserSGPRCount);
6358
6359 // Consider the case where the total number of UserSGPRs with trailing
6360 // allocated preload SGPRs, is greater than the number of explicitly
6361 // referenced SGPRs.
6362 if (PreloadLength) {
6363 MCContext &Ctx = getContext();
6364 NextFreeSGPR = AMDGPUMCExpr::createMax(
6365 Args: {NextFreeSGPR, MCConstantExpr::create(Value: UserSGPRCount, Ctx)}, Ctx);
6366 }
6367
6368 const MCExpr *VGPRBlocks;
6369 const MCExpr *SGPRBlocks;
6370 if (calculateGPRBlocks(Features: getFeatureBits(), VCCUsed: ReserveVCC, FlatScrUsed: ReserveFlatScr,
6371 XNACKUsed: getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6372 EnableWavefrontSize32, NextFreeVGPR,
6373 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6374 SGPRBlocks))
6375 return true;
6376
6377 int64_t EvaluatedVGPRBlocks;
6378 bool VGPRBlocksEvaluatable =
6379 VGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedVGPRBlocks);
6380 if (VGPRBlocksEvaluatable &&
6381 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
6382 x: static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6383 return OutOfRangeError(Range: VGPRRange);
6384 }
6385 AMDGPU::MCKernelDescriptor::bits_set(
6386 Dst&: KD.compute_pgm_rsrc1, Value: VGPRBlocks,
6387 Shift: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6388 Mask: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, Ctx&: getContext());
6389
6390 int64_t EvaluatedSGPRBlocks;
6391 if (SGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedSGPRBlocks) &&
6392 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
6393 x: static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6394 return OutOfRangeError(Range: SGPRRange);
6395 AMDGPU::MCKernelDescriptor::bits_set(
6396 Dst&: KD.compute_pgm_rsrc1, Value: SGPRBlocks,
6397 Shift: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6398 Mask: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, Ctx&: getContext());
6399
6400 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6401 return TokError(Msg: "amdgpu_user_sgpr_count smaller than than implied by "
6402 "enabled user SGPRs");
6403
6404 if (isGFX1250Plus()) {
6405 if (!isUInt<COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_WIDTH>(x: UserSGPRCount))
6406 return TokError(Msg: "too many user SGPRs enabled");
6407 AMDGPU::MCKernelDescriptor::bits_set(
6408 Dst&: KD.compute_pgm_rsrc2,
6409 Value: MCConstantExpr::create(Value: UserSGPRCount, Ctx&: getContext()),
6410 Shift: COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6411 Mask: COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, Ctx&: getContext());
6412 } else {
6413 if (!isUInt<COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_WIDTH>(
6414 x: UserSGPRCount))
6415 return TokError(Msg: "too many user SGPRs enabled");
6416 AMDGPU::MCKernelDescriptor::bits_set(
6417 Dst&: KD.compute_pgm_rsrc2,
6418 Value: MCConstantExpr::create(Value: UserSGPRCount, Ctx&: getContext()),
6419 Shift: COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6420 Mask: COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, Ctx&: getContext());
6421 }
6422
6423 int64_t IVal = 0;
6424 if (!KD.kernarg_size->evaluateAsAbsolute(Res&: IVal))
6425 return TokError(Msg: "Kernarg size should be resolvable");
6426 uint64_t kernarg_size = IVal;
6427 if (PreloadLength && kernarg_size &&
6428 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6429 return TokError(Msg: "Kernarg preload length + offset is larger than the "
6430 "kernarg segment size");
6431
6432 if (isGFX90A()) {
6433 if (!Seen.contains(key: ".amdhsa_accum_offset"))
6434 return TokError(Msg: ".amdhsa_accum_offset directive is required");
6435 int64_t EvaluatedAccum;
6436 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(Res&: EvaluatedAccum);
6437 uint64_t UEvaluatedAccum = EvaluatedAccum;
6438 if (AccumEvaluatable &&
6439 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6440 return TokError(Msg: "accum_offset should be in range [4..256] in "
6441 "increments of 4");
6442
6443 int64_t EvaluatedNumVGPR;
6444 if (NextFreeVGPR->evaluateAsAbsolute(Res&: EvaluatedNumVGPR) &&
6445 AccumEvaluatable &&
6446 UEvaluatedAccum >
6447 alignTo(Value: std::max(a: (uint64_t)1, b: (uint64_t)EvaluatedNumVGPR), Align: 4))
6448 return TokError(Msg: "accum_offset exceeds total VGPR allocation");
6449 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6450 LHS: MCBinaryExpr::createDiv(
6451 LHS: AccumOffset, RHS: MCConstantExpr::create(Value: 4, Ctx&: getContext()), Ctx&: getContext()),
6452 RHS: MCConstantExpr::create(Value: 1, Ctx&: getContext()), Ctx&: getContext());
6453 MCKernelDescriptor::bits_set(Dst&: KD.compute_pgm_rsrc3, Value: AdjustedAccum,
6454 Shift: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6455 Mask: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6456 Ctx&: getContext());
6457 }
6458
6459 if (isGFX1250Plus())
6460 MCKernelDescriptor::bits_set(Dst&: KD.compute_pgm_rsrc3, Value: NamedBarCnt,
6461 Shift: COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6462 Mask: COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6463 Ctx&: getContext());
6464
6465 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6466 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6467 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6468 return TokError(Msg: "shared_vgpr_count directive not valid on "
6469 "wavefront size 32");
6470 }
6471
6472 if (VGPRBlocksEvaluatable &&
6473 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6474 63)) {
6475 return TokError(Msg: "shared_vgpr_count*2 + "
6476 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6477 "exceed 63\n");
6478 }
6479 }
6480
6481 getTargetStreamer().EmitAmdhsaKernelDescriptor(STI: getSTI(), KernelName, KernelDescriptor: KD,
6482 NextVGPR: NextFreeVGPR, NextSGPR: NextFreeSGPR,
6483 ReserveVCC, ReserveFlatScr);
6484 return false;
6485}
6486
6487bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6488 uint32_t Version;
6489 if (ParseAsAbsoluteExpression(Ret&: Version))
6490 return true;
6491
6492 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(COV: Version);
6493 return false;
6494}
6495
6496bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6497 AMDGPUMCKernelCodeT &C) {
6498 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6499 // assembly for backwards compatibility.
6500 if (ID == "max_scratch_backing_memory_byte_size") {
6501 Parser.eatToEndOfStatement();
6502 return false;
6503 }
6504
6505 SmallString<40> ErrStr;
6506 raw_svector_ostream Err(ErrStr);
6507 if (!C.ParseKernelCodeT(ID, MCParser&: getParser(), Err)) {
6508 return TokError(Msg: Err.str());
6509 }
6510 Lex();
6511
6512 if (ID == "enable_wavefront_size32") {
6513 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6514 if (!isGFX10Plus())
6515 return TokError(Msg: "enable_wavefront_size32=1 is only allowed on GFX10+");
6516 if (!isWave32())
6517 return TokError(Msg: "enable_wavefront_size32=1 requires +WavefrontSize32");
6518 } else {
6519 if (!isWave64())
6520 return TokError(Msg: "enable_wavefront_size32=0 requires +WavefrontSize64");
6521 }
6522 }
6523
6524 if (ID == "wavefront_size") {
6525 if (C.wavefront_size == 5) {
6526 if (!isGFX10Plus())
6527 return TokError(Msg: "wavefront_size=5 is only allowed on GFX10+");
6528 if (!isWave32())
6529 return TokError(Msg: "wavefront_size=5 requires +WavefrontSize32");
6530 } else if (C.wavefront_size == 6) {
6531 if (!isWave64())
6532 return TokError(Msg: "wavefront_size=6 requires +WavefrontSize64");
6533 }
6534 }
6535
6536 return false;
6537}
6538
6539bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6540 AMDGPUMCKernelCodeT KernelCode;
6541 KernelCode.initDefault(STI: &getSTI(), Ctx&: getContext());
6542
6543 while (true) {
6544 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6545 // will set the current token to EndOfStatement.
6546 while(trySkipToken(Kind: AsmToken::EndOfStatement));
6547
6548 StringRef ID;
6549 if (!parseId(Val&: ID, ErrMsg: "expected value identifier or .end_amd_kernel_code_t"))
6550 return true;
6551
6552 if (ID == ".end_amd_kernel_code_t")
6553 break;
6554
6555 if (ParseAMDKernelCodeTValue(ID, C&: KernelCode))
6556 return true;
6557 }
6558
6559 KernelCode.validate(STI: &getSTI(), Ctx&: getContext());
6560 getTargetStreamer().EmitAMDKernelCodeT(Header&: KernelCode);
6561
6562 return false;
6563}
6564
6565bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6566 StringRef KernelName;
6567 if (!parseId(Val&: KernelName, ErrMsg: "expected symbol name"))
6568 return true;
6569
6570 getTargetStreamer().EmitAMDGPUSymbolType(SymbolName: KernelName,
6571 Type: ELF::STT_AMDGPU_HSA_KERNEL);
6572
6573 KernelScope.initialize(Context&: getContext());
6574 return false;
6575}
6576
6577bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6578 if (!getSTI().getTargetTriple().isAMDGCN()) {
6579 return Error(L: getLoc(),
6580 Msg: ".amd_amdgpu_isa directive is not available on non-amdgcn "
6581 "architectures");
6582 }
6583
6584 auto TargetIDDirective = getLexer().getTok().getStringContents();
6585 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6586 return Error(L: getParser().getTok().getLoc(), Msg: "target id must match options");
6587
6588 getTargetStreamer().EmitISAVersion();
6589 Lex();
6590
6591 return false;
6592}
6593
6594bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6595 assert(isHsaAbi(getSTI()));
6596
6597 std::string HSAMetadataString;
6598 if (ParseToEndDirective(AssemblerDirectiveBegin: HSAMD::V3::AssemblerDirectiveBegin,
6599 AssemblerDirectiveEnd: HSAMD::V3::AssemblerDirectiveEnd, CollectString&: HSAMetadataString))
6600 return true;
6601
6602 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6603 return Error(L: getLoc(), Msg: "invalid HSA metadata");
6604
6605 return false;
6606}
6607
6608/// Common code to parse out a block of text (typically YAML) between start and
6609/// end directives.
6610bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6611 const char *AssemblerDirectiveEnd,
6612 std::string &CollectString) {
6613
6614 raw_string_ostream CollectStream(CollectString);
6615
6616 getLexer().setSkipSpace(false);
6617
6618 bool FoundEnd = false;
6619 while (!isToken(Kind: AsmToken::Eof)) {
6620 while (isToken(Kind: AsmToken::Space)) {
6621 CollectStream << getTokenStr();
6622 Lex();
6623 }
6624
6625 if (trySkipId(Id: AssemblerDirectiveEnd)) {
6626 FoundEnd = true;
6627 break;
6628 }
6629
6630 CollectStream << Parser.parseStringToEndOfStatement()
6631 << getContext().getAsmInfo()->getSeparatorString();
6632
6633 Parser.eatToEndOfStatement();
6634 }
6635
6636 getLexer().setSkipSpace(true);
6637
6638 if (isToken(Kind: AsmToken::Eof) && !FoundEnd) {
6639 return TokError(Msg: Twine("expected directive ") +
6640 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6641 }
6642
6643 return false;
6644}
6645
6646/// Parse the assembler directive for new MsgPack-format PAL metadata.
6647bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6648 std::string String;
6649 if (ParseToEndDirective(AssemblerDirectiveBegin: AMDGPU::PALMD::AssemblerDirectiveBegin,
6650 AssemblerDirectiveEnd: AMDGPU::PALMD::AssemblerDirectiveEnd, CollectString&: String))
6651 return true;
6652
6653 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6654 if (!PALMetadata->setFromString(String))
6655 return Error(L: getLoc(), Msg: "invalid PAL metadata");
6656 return false;
6657}
6658
6659/// Parse the assembler directive for old linear-format PAL metadata.
6660bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6661 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6662 return Error(L: getLoc(),
6663 Msg: (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6664 "not available on non-amdpal OSes")).str());
6665 }
6666
6667 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6668 PALMetadata->setLegacy();
6669 for (;;) {
6670 uint32_t Key, Value;
6671 if (ParseAsAbsoluteExpression(Ret&: Key)) {
6672 return TokError(Msg: Twine("invalid value in ") +
6673 Twine(PALMD::AssemblerDirective));
6674 }
6675 if (!trySkipToken(Kind: AsmToken::Comma)) {
6676 return TokError(Msg: Twine("expected an even number of values in ") +
6677 Twine(PALMD::AssemblerDirective));
6678 }
6679 if (ParseAsAbsoluteExpression(Ret&: Value)) {
6680 return TokError(Msg: Twine("invalid value in ") +
6681 Twine(PALMD::AssemblerDirective));
6682 }
6683 PALMetadata->setRegister(Reg: Key, Val: Value);
6684 if (!trySkipToken(Kind: AsmToken::Comma))
6685 break;
6686 }
6687 return false;
6688}
6689
6690/// ParseDirectiveAMDGPULDS
6691/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6692bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6693 if (getParser().checkForValidSection())
6694 return true;
6695
6696 StringRef Name;
6697 SMLoc NameLoc = getLoc();
6698 if (getParser().parseIdentifier(Res&: Name))
6699 return TokError(Msg: "expected identifier in directive");
6700
6701 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6702 if (getParser().parseComma())
6703 return true;
6704
6705 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(STI: &getSTI());
6706
6707 int64_t Size;
6708 SMLoc SizeLoc = getLoc();
6709 if (getParser().parseAbsoluteExpression(Res&: Size))
6710 return true;
6711 if (Size < 0)
6712 return Error(L: SizeLoc, Msg: "size must be non-negative");
6713 if (Size > LocalMemorySize)
6714 return Error(L: SizeLoc, Msg: "size is too large");
6715
6716 int64_t Alignment = 4;
6717 if (trySkipToken(Kind: AsmToken::Comma)) {
6718 SMLoc AlignLoc = getLoc();
6719 if (getParser().parseAbsoluteExpression(Res&: Alignment))
6720 return true;
6721 if (Alignment < 0 || !isPowerOf2_64(Value: Alignment))
6722 return Error(L: AlignLoc, Msg: "alignment must be a power of two");
6723
6724 // Alignment larger than the size of LDS is possible in theory, as long
6725 // as the linker manages to place to symbol at address 0, but we do want
6726 // to make sure the alignment fits nicely into a 32-bit integer.
6727 if (Alignment >= 1u << 31)
6728 return Error(L: AlignLoc, Msg: "alignment is too large");
6729 }
6730
6731 if (parseEOL())
6732 return true;
6733
6734 Symbol->redefineIfPossible();
6735 if (!Symbol->isUndefined())
6736 return Error(L: NameLoc, Msg: "invalid symbol redefinition");
6737
6738 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Alignment: Align(Alignment));
6739 return false;
6740}
6741
6742bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6743 StringRef IDVal = DirectiveID.getString();
6744
6745 if (isHsaAbi(STI: getSTI())) {
6746 if (IDVal == ".amdhsa_kernel")
6747 return ParseDirectiveAMDHSAKernel();
6748
6749 if (IDVal == ".amdhsa_code_object_version")
6750 return ParseDirectiveAMDHSACodeObjectVersion();
6751
6752 // TODO: Restructure/combine with PAL metadata directive.
6753 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
6754 return ParseDirectiveHSAMetadata();
6755 } else {
6756 if (IDVal == ".amd_kernel_code_t")
6757 return ParseDirectiveAMDKernelCodeT();
6758
6759 if (IDVal == ".amdgpu_hsa_kernel")
6760 return ParseDirectiveAMDGPUHsaKernel();
6761
6762 if (IDVal == ".amd_amdgpu_isa")
6763 return ParseDirectiveISAVersion();
6764
6765 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
6766 return Error(L: getLoc(), Msg: (Twine(HSAMD::AssemblerDirectiveBegin) +
6767 Twine(" directive is "
6768 "not available on non-amdhsa OSes"))
6769 .str());
6770 }
6771 }
6772
6773 if (IDVal == ".amdgcn_target")
6774 return ParseDirectiveAMDGCNTarget();
6775
6776 if (IDVal == ".amdgpu_lds")
6777 return ParseDirectiveAMDGPULDS();
6778
6779 if (IDVal == PALMD::AssemblerDirectiveBegin)
6780 return ParseDirectivePALMetadataBegin();
6781
6782 if (IDVal == PALMD::AssemblerDirective)
6783 return ParseDirectivePALMetadata();
6784
6785 return true;
6786}
6787
6788bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6789 MCRegister Reg) {
6790 if (MRI.regsOverlap(RegA: TTMP12_TTMP13_TTMP14_TTMP15, RegB: Reg))
6791 return isGFX9Plus();
6792
6793 // GFX10+ has 2 more SGPRs 104 and 105.
6794 if (MRI.regsOverlap(RegA: SGPR104_SGPR105, RegB: Reg))
6795 return hasSGPR104_SGPR105();
6796
6797 switch (Reg.id()) {
6798 case SRC_SHARED_BASE_LO:
6799 case SRC_SHARED_BASE:
6800 case SRC_SHARED_LIMIT_LO:
6801 case SRC_SHARED_LIMIT:
6802 case SRC_PRIVATE_BASE_LO:
6803 case SRC_PRIVATE_BASE:
6804 case SRC_PRIVATE_LIMIT_LO:
6805 case SRC_PRIVATE_LIMIT:
6806 return isGFX9Plus();
6807 case SRC_FLAT_SCRATCH_BASE_LO:
6808 case SRC_FLAT_SCRATCH_BASE_HI:
6809 return hasGloballyAddressableScratch();
6810 case SRC_POPS_EXITING_WAVE_ID:
6811 return isGFX9Plus() && !isGFX11Plus();
6812 case TBA:
6813 case TBA_LO:
6814 case TBA_HI:
6815 case TMA:
6816 case TMA_LO:
6817 case TMA_HI:
6818 return !isGFX9Plus();
6819 case XNACK_MASK:
6820 case XNACK_MASK_LO:
6821 case XNACK_MASK_HI:
6822 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6823 case SGPR_NULL:
6824 return isGFX10Plus();
6825 case SRC_EXECZ:
6826 case SRC_VCCZ:
6827 return !isGFX11Plus();
6828 default:
6829 break;
6830 }
6831
6832 if (isCI())
6833 return true;
6834
6835 if (isSI() || isGFX10Plus()) {
6836 // No flat_scr on SI.
6837 // On GFX10Plus flat scratch is not a valid register operand and can only be
6838 // accessed with s_setreg/s_getreg.
6839 switch (Reg.id()) {
6840 case FLAT_SCR:
6841 case FLAT_SCR_LO:
6842 case FLAT_SCR_HI:
6843 return false;
6844 default:
6845 return true;
6846 }
6847 }
6848
6849 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6850 // SI/CI have.
6851 if (MRI.regsOverlap(RegA: SGPR102_SGPR103, RegB: Reg))
6852 return hasSGPR102_SGPR103();
6853
6854 return true;
6855}
6856
6857ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6858 StringRef Mnemonic,
6859 OperandMode Mode) {
6860 ParseStatus Res = parseVOPD(Operands);
6861 if (Res.isSuccess() || Res.isFailure() || isToken(Kind: AsmToken::EndOfStatement))
6862 return Res;
6863
6864 // Try to parse with a custom parser
6865 Res = MatchOperandParserImpl(Operands, Mnemonic);
6866
6867 // If we successfully parsed the operand or if there as an error parsing,
6868 // we are done.
6869 //
6870 // If we are parsing after we reach EndOfStatement then this means we
6871 // are appending default values to the Operands list. This is only done
6872 // by custom parser, so we shouldn't continue on to the generic parsing.
6873 if (Res.isSuccess() || Res.isFailure() || isToken(Kind: AsmToken::EndOfStatement))
6874 return Res;
6875
6876 SMLoc RBraceLoc;
6877 SMLoc LBraceLoc = getLoc();
6878 if (Mode == OperandMode_NSA && trySkipToken(Kind: AsmToken::LBrac)) {
6879 unsigned Prefix = Operands.size();
6880
6881 for (;;) {
6882 auto Loc = getLoc();
6883 Res = parseReg(Operands);
6884 if (Res.isNoMatch())
6885 Error(L: Loc, Msg: "expected a register");
6886 if (!Res.isSuccess())
6887 return ParseStatus::Failure;
6888
6889 RBraceLoc = getLoc();
6890 if (trySkipToken(Kind: AsmToken::RBrac))
6891 break;
6892
6893 if (!skipToken(Kind: AsmToken::Comma,
6894 ErrMsg: "expected a comma or a closing square bracket"))
6895 return ParseStatus::Failure;
6896 }
6897
6898 if (Operands.size() - Prefix > 1) {
6899 Operands.insert(I: Operands.begin() + Prefix,
6900 Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "[", Loc: LBraceLoc));
6901 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "]", Loc: RBraceLoc));
6902 }
6903
6904 return ParseStatus::Success;
6905 }
6906
6907 return parseRegOrImm(Operands);
6908}
6909
6910StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6911 // Clear any forced encodings from the previous instruction.
6912 setForcedEncodingSize(0);
6913 setForcedDPP(false);
6914 setForcedSDWA(false);
6915
6916 if (Name.consume_back(Suffix: "_e64_dpp")) {
6917 setForcedDPP(true);
6918 setForcedEncodingSize(64);
6919 return Name;
6920 }
6921 if (Name.consume_back(Suffix: "_e64")) {
6922 setForcedEncodingSize(64);
6923 return Name;
6924 }
6925 if (Name.consume_back(Suffix: "_e32")) {
6926 setForcedEncodingSize(32);
6927 return Name;
6928 }
6929 if (Name.consume_back(Suffix: "_dpp")) {
6930 setForcedDPP(true);
6931 return Name;
6932 }
6933 if (Name.consume_back(Suffix: "_sdwa")) {
6934 setForcedSDWA(true);
6935 return Name;
6936 }
6937 return Name;
6938}
6939
6940static void applyMnemonicAliases(StringRef &Mnemonic,
6941 const FeatureBitset &Features,
6942 unsigned VariantID);
6943
6944bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6945 StringRef Name, SMLoc NameLoc,
6946 OperandVector &Operands) {
6947 // Add the instruction mnemonic
6948 Name = parseMnemonicSuffix(Name);
6949
6950 // If the target architecture uses MnemonicAlias, call it here to parse
6951 // operands correctly.
6952 applyMnemonicAliases(Mnemonic&: Name, Features: getAvailableFeatures(), VariantID: 0);
6953
6954 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: NameLoc));
6955
6956 bool IsMIMG = Name.starts_with(Prefix: "image_");
6957
6958 while (!trySkipToken(Kind: AsmToken::EndOfStatement)) {
6959 OperandMode Mode = OperandMode_Default;
6960 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6961 Mode = OperandMode_NSA;
6962 ParseStatus Res = parseOperand(Operands, Mnemonic: Name, Mode);
6963
6964 if (!Res.isSuccess()) {
6965 checkUnsupportedInstruction(Mnemo: Name, IDLoc: NameLoc);
6966 if (!Parser.hasPendingError()) {
6967 // FIXME: use real operand location rather than the current location.
6968 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6969 : "not a valid operand.";
6970 Error(L: getLoc(), Msg);
6971 }
6972 while (!trySkipToken(Kind: AsmToken::EndOfStatement)) {
6973 lex();
6974 }
6975 return true;
6976 }
6977
6978 // Eat the comma or space if there is one.
6979 trySkipToken(Kind: AsmToken::Comma);
6980 }
6981
6982 return false;
6983}
6984
6985//===----------------------------------------------------------------------===//
6986// Utility functions
6987//===----------------------------------------------------------------------===//
6988
6989ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6990 OperandVector &Operands) {
6991 SMLoc S = getLoc();
6992 if (!trySkipId(Id: Name))
6993 return ParseStatus::NoMatch;
6994
6995 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: S));
6996 return ParseStatus::Success;
6997}
6998
6999ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7000 int64_t &IntVal) {
7001
7002 if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
7003 return ParseStatus::NoMatch;
7004
7005 return parseExpr(Imm&: IntVal) ? ParseStatus::Success : ParseStatus::Failure;
7006}
7007
7008ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7009 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7010 std::function<bool(int64_t &)> ConvertResult) {
7011 SMLoc S = getLoc();
7012 int64_t Value = 0;
7013
7014 ParseStatus Res = parseIntWithPrefix(Prefix, IntVal&: Value);
7015 if (!Res.isSuccess())
7016 return Res;
7017
7018 if (ConvertResult && !ConvertResult(Value)) {
7019 Error(L: S, Msg: "invalid " + StringRef(Prefix) + " value.");
7020 }
7021
7022 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Value, Loc: S, Type: ImmTy));
7023 return ParseStatus::Success;
7024}
7025
7026ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7027 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7028 bool (*ConvertResult)(int64_t &)) {
7029 SMLoc S = getLoc();
7030 if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
7031 return ParseStatus::NoMatch;
7032
7033 if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected a left square bracket"))
7034 return ParseStatus::Failure;
7035
7036 unsigned Val = 0;
7037 const unsigned MaxSize = 4;
7038
7039 // FIXME: How to verify the number of elements matches the number of src
7040 // operands?
7041 for (int I = 0; ; ++I) {
7042 int64_t Op;
7043 SMLoc Loc = getLoc();
7044 if (!parseExpr(Imm&: Op))
7045 return ParseStatus::Failure;
7046
7047 if (Op != 0 && Op != 1)
7048 return Error(L: Loc, Msg: "invalid " + StringRef(Prefix) + " value.");
7049
7050 Val |= (Op << I);
7051
7052 if (trySkipToken(Kind: AsmToken::RBrac))
7053 break;
7054
7055 if (I + 1 == MaxSize)
7056 return Error(L: getLoc(), Msg: "expected a closing square bracket");
7057
7058 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
7059 return ParseStatus::Failure;
7060 }
7061
7062 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: ImmTy));
7063 return ParseStatus::Success;
7064}
7065
7066ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7067 OperandVector &Operands,
7068 AMDGPUOperand::ImmTy ImmTy,
7069 bool IgnoreNegative) {
7070 int64_t Bit;
7071 SMLoc S = getLoc();
7072
7073 if (trySkipId(Id: Name)) {
7074 Bit = 1;
7075 } else if (trySkipId(Pref: "no", Id: Name)) {
7076 if (IgnoreNegative)
7077 return ParseStatus::Success;
7078 Bit = 0;
7079 } else {
7080 return ParseStatus::NoMatch;
7081 }
7082
7083 if (Name == "r128" && !hasMIMG_R128())
7084 return Error(L: S, Msg: "r128 modifier is not supported on this GPU");
7085 if (Name == "a16" && !hasA16())
7086 return Error(L: S, Msg: "a16 modifier is not supported on this GPU");
7087
7088 if (Bit == 0 && Name == "gds") {
7089 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7090 if (Mnemo.starts_with(Prefix: "ds_gws"))
7091 return Error(L: S, Msg: "nogds is not allowed");
7092 }
7093
7094 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7095 ImmTy = AMDGPUOperand::ImmTyR128A16;
7096
7097 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Bit, Loc: S, Type: ImmTy));
7098 return ParseStatus::Success;
7099}
7100
7101unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7102 bool &Disabling) const {
7103 Disabling = Id.consume_front(Prefix: "no");
7104
7105 if (isGFX940() && !Mnemo.starts_with(Prefix: "s_")) {
7106 return StringSwitch<unsigned>(Id)
7107 .Case(S: "nt", Value: AMDGPU::CPol::NT)
7108 .Case(S: "sc0", Value: AMDGPU::CPol::SC0)
7109 .Case(S: "sc1", Value: AMDGPU::CPol::SC1)
7110 .Default(Value: 0);
7111 }
7112
7113 return StringSwitch<unsigned>(Id)
7114 .Case(S: "dlc", Value: AMDGPU::CPol::DLC)
7115 .Case(S: "glc", Value: AMDGPU::CPol::GLC)
7116 .Case(S: "scc", Value: AMDGPU::CPol::SCC)
7117 .Case(S: "slc", Value: AMDGPU::CPol::SLC)
7118 .Default(Value: 0);
7119}
7120
7121ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7122 if (isGFX12Plus()) {
7123 SMLoc StringLoc = getLoc();
7124
7125 int64_t CPolVal = 0;
7126 ParseStatus ResTH = ParseStatus::NoMatch;
7127 ParseStatus ResScope = ParseStatus::NoMatch;
7128 ParseStatus ResNV = ParseStatus::NoMatch;
7129 ParseStatus ResScal = ParseStatus::NoMatch;
7130
7131 for (;;) {
7132 if (ResTH.isNoMatch()) {
7133 int64_t TH;
7134 ResTH = parseTH(Operands, TH);
7135 if (ResTH.isFailure())
7136 return ResTH;
7137 if (ResTH.isSuccess()) {
7138 CPolVal |= TH;
7139 continue;
7140 }
7141 }
7142
7143 if (ResScope.isNoMatch()) {
7144 int64_t Scope;
7145 ResScope = parseScope(Operands, Scope);
7146 if (ResScope.isFailure())
7147 return ResScope;
7148 if (ResScope.isSuccess()) {
7149 CPolVal |= Scope;
7150 continue;
7151 }
7152 }
7153
7154 // NV bit exists on GFX12+, but does something starting from GFX1250.
7155 // Allow parsing on all GFX12 and fail on validation for better
7156 // diagnostics.
7157 if (ResNV.isNoMatch()) {
7158 if (trySkipId(Id: "nv")) {
7159 ResNV = ParseStatus::Success;
7160 CPolVal |= CPol::NV;
7161 continue;
7162 } else if (trySkipId(Pref: "no", Id: "nv")) {
7163 ResNV = ParseStatus::Success;
7164 continue;
7165 }
7166 }
7167
7168 if (ResScal.isNoMatch()) {
7169 if (trySkipId(Id: "scale_offset")) {
7170 ResScal = ParseStatus::Success;
7171 CPolVal |= CPol::SCAL;
7172 continue;
7173 } else if (trySkipId(Pref: "no", Id: "scale_offset")) {
7174 ResScal = ParseStatus::Success;
7175 continue;
7176 }
7177 }
7178
7179 break;
7180 }
7181
7182 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7183 ResScal.isNoMatch())
7184 return ParseStatus::NoMatch;
7185
7186 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: CPolVal, Loc: StringLoc,
7187 Type: AMDGPUOperand::ImmTyCPol));
7188 return ParseStatus::Success;
7189 }
7190
7191 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7192 SMLoc OpLoc = getLoc();
7193 unsigned Enabled = 0, Seen = 0;
7194 for (;;) {
7195 SMLoc S = getLoc();
7196 bool Disabling;
7197 unsigned CPol = getCPolKind(Id: getId(), Mnemo, Disabling);
7198 if (!CPol)
7199 break;
7200
7201 lex();
7202
7203 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7204 return Error(L: S, Msg: "dlc modifier is not supported on this GPU");
7205
7206 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7207 return Error(L: S, Msg: "scc modifier is not supported on this GPU");
7208
7209 if (Seen & CPol)
7210 return Error(L: S, Msg: "duplicate cache policy modifier");
7211
7212 if (!Disabling)
7213 Enabled |= CPol;
7214
7215 Seen |= CPol;
7216 }
7217
7218 if (!Seen)
7219 return ParseStatus::NoMatch;
7220
7221 Operands.push_back(
7222 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Enabled, Loc: OpLoc, Type: AMDGPUOperand::ImmTyCPol));
7223 return ParseStatus::Success;
7224}
7225
7226ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7227 int64_t &Scope) {
7228 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7229 CPol::SCOPE_DEV, CPol::SCOPE_SYS};
7230
7231 ParseStatus Res = parseStringOrIntWithPrefix(
7232 Operands, Name: "scope", Ids: {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7233 IntVal&: Scope);
7234
7235 if (Res.isSuccess())
7236 Scope = Scopes[Scope];
7237
7238 return Res;
7239}
7240
7241ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7242 TH = AMDGPU::CPol::TH_RT; // default
7243
7244 StringRef Value;
7245 SMLoc StringLoc;
7246 ParseStatus Res = parseStringWithPrefix(Prefix: "th", Value, StringLoc);
7247 if (!Res.isSuccess())
7248 return Res;
7249
7250 if (Value == "TH_DEFAULT")
7251 TH = AMDGPU::CPol::TH_RT;
7252 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7253 Value == "TH_LOAD_NT_WB") {
7254 return Error(L: StringLoc, Msg: "invalid th value");
7255 } else if (Value.consume_front(Prefix: "TH_ATOMIC_")) {
7256 TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
7257 } else if (Value.consume_front(Prefix: "TH_LOAD_")) {
7258 TH = AMDGPU::CPol::TH_TYPE_LOAD;
7259 } else if (Value.consume_front(Prefix: "TH_STORE_")) {
7260 TH = AMDGPU::CPol::TH_TYPE_STORE;
7261 } else {
7262 return Error(L: StringLoc, Msg: "invalid th value");
7263 }
7264
7265 if (Value == "BYPASS")
7266 TH |= AMDGPU::CPol::TH_REAL_BYPASS;
7267
7268 if (TH != 0) {
7269 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
7270 TH |= StringSwitch<int64_t>(Value)
7271 .Case(S: "RETURN", Value: AMDGPU::CPol::TH_ATOMIC_RETURN)
7272 .Case(S: "RT", Value: AMDGPU::CPol::TH_RT)
7273 .Case(S: "RT_RETURN", Value: AMDGPU::CPol::TH_ATOMIC_RETURN)
7274 .Case(S: "NT", Value: AMDGPU::CPol::TH_ATOMIC_NT)
7275 .Case(S: "NT_RETURN", Value: AMDGPU::CPol::TH_ATOMIC_NT |
7276 AMDGPU::CPol::TH_ATOMIC_RETURN)
7277 .Case(S: "CASCADE_RT", Value: AMDGPU::CPol::TH_ATOMIC_CASCADE)
7278 .Case(S: "CASCADE_NT", Value: AMDGPU::CPol::TH_ATOMIC_CASCADE |
7279 AMDGPU::CPol::TH_ATOMIC_NT)
7280 .Default(Value: 0xffffffff);
7281 else
7282 TH |= StringSwitch<int64_t>(Value)
7283 .Case(S: "RT", Value: AMDGPU::CPol::TH_RT)
7284 .Case(S: "NT", Value: AMDGPU::CPol::TH_NT)
7285 .Case(S: "HT", Value: AMDGPU::CPol::TH_HT)
7286 .Case(S: "LU", Value: AMDGPU::CPol::TH_LU)
7287 .Case(S: "WB", Value: AMDGPU::CPol::TH_WB)
7288 .Case(S: "NT_RT", Value: AMDGPU::CPol::TH_NT_RT)
7289 .Case(S: "RT_NT", Value: AMDGPU::CPol::TH_RT_NT)
7290 .Case(S: "NT_HT", Value: AMDGPU::CPol::TH_NT_HT)
7291 .Case(S: "NT_WB", Value: AMDGPU::CPol::TH_NT_WB)
7292 .Case(S: "BYPASS", Value: AMDGPU::CPol::TH_BYPASS)
7293 .Default(Value: 0xffffffff);
7294 }
7295
7296 if (TH == 0xffffffff)
7297 return Error(L: StringLoc, Msg: "invalid th value");
7298
7299 return ParseStatus::Success;
7300}
7301
7302static void
7303addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,
7304 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7305 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7306 std::optional<unsigned> InsertAt = std::nullopt) {
7307 auto i = OptionalIdx.find(x: ImmT);
7308 if (i != OptionalIdx.end()) {
7309 unsigned Idx = i->second;
7310 const AMDGPUOperand &Op =
7311 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7312 if (InsertAt)
7313 Inst.insert(I: Inst.begin() + *InsertAt, Op: MCOperand::createImm(Val: Op.getImm()));
7314 else
7315 Op.addImmOperands(Inst, N: 1);
7316 } else {
7317 if (InsertAt.has_value())
7318 Inst.insert(I: Inst.begin() + *InsertAt, Op: MCOperand::createImm(Val: Default));
7319 else
7320 Inst.addOperand(Op: MCOperand::createImm(Val: Default));
7321 }
7322}
7323
7324ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7325 StringRef &Value,
7326 SMLoc &StringLoc) {
7327 if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
7328 return ParseStatus::NoMatch;
7329
7330 StringLoc = getLoc();
7331 return parseId(Val&: Value, ErrMsg: "expected an identifier") ? ParseStatus::Success
7332 : ParseStatus::Failure;
7333}
7334
7335ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7336 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7337 int64_t &IntVal) {
7338 if (!trySkipId(Id: Name, Kind: AsmToken::Colon))
7339 return ParseStatus::NoMatch;
7340
7341 SMLoc StringLoc = getLoc();
7342
7343 StringRef Value;
7344 if (isToken(Kind: AsmToken::Identifier)) {
7345 Value = getTokenStr();
7346 lex();
7347
7348 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7349 if (Value == Ids[IntVal])
7350 break;
7351 } else if (!parseExpr(Imm&: IntVal))
7352 return ParseStatus::Failure;
7353
7354 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7355 return Error(L: StringLoc, Msg: "invalid " + Twine(Name) + " value");
7356
7357 return ParseStatus::Success;
7358}
7359
7360ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7361 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7362 AMDGPUOperand::ImmTy Type) {
7363 SMLoc S = getLoc();
7364 int64_t IntVal;
7365
7366 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7367 if (Res.isSuccess())
7368 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S, Type));
7369
7370 return Res;
7371}
7372
7373//===----------------------------------------------------------------------===//
7374// MTBUF format
7375//===----------------------------------------------------------------------===//
7376
7377bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7378 int64_t MaxVal,
7379 int64_t &Fmt) {
7380 int64_t Val;
7381 SMLoc Loc = getLoc();
7382
7383 auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: Val);
7384 if (Res.isFailure())
7385 return false;
7386 if (Res.isNoMatch())
7387 return true;
7388
7389 if (Val < 0 || Val > MaxVal) {
7390 Error(L: Loc, Msg: Twine("out of range ", StringRef(Pref)));
7391 return false;
7392 }
7393
7394 Fmt = Val;
7395 return true;
7396}
7397
7398ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7399 AMDGPUOperand::ImmTy ImmTy) {
7400 const char *Pref = "index_key";
7401 int64_t ImmVal = 0;
7402 SMLoc Loc = getLoc();
7403 auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: ImmVal);
7404 if (!Res.isSuccess())
7405 return Res;
7406
7407 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7408 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7409 (ImmVal < 0 || ImmVal > 1))
7410 return Error(L: Loc, Msg: Twine("out of range ", StringRef(Pref)));
7411
7412 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7413 return Error(L: Loc, Msg: Twine("out of range ", StringRef(Pref)));
7414
7415 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: ImmTy));
7416 return ParseStatus::Success;
7417}
7418
7419ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7420 return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey8bit);
7421}
7422
7423ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7424 return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey16bit);
7425}
7426
7427ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7428 return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey32bit);
7429}
7430
7431ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7432 StringRef Name,
7433 AMDGPUOperand::ImmTy Type) {
7434 return parseStringOrIntWithPrefix(Operands, Name, Ids: WMMAMods::ModMatrixFmt,
7435 Type);
7436}
7437
7438ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7439 return tryParseMatrixFMT(Operands, Name: "matrix_a_fmt",
7440 Type: AMDGPUOperand::ImmTyMatrixAFMT);
7441}
7442
7443ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7444 return tryParseMatrixFMT(Operands, Name: "matrix_b_fmt",
7445 Type: AMDGPUOperand::ImmTyMatrixBFMT);
7446}
7447
7448ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7449 StringRef Name,
7450 AMDGPUOperand::ImmTy Type) {
7451 return parseStringOrIntWithPrefix(Operands, Name, Ids: WMMAMods::ModMatrixScale,
7452 Type);
7453}
7454
7455ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7456 return tryParseMatrixScale(Operands, Name: "matrix_a_scale",
7457 Type: AMDGPUOperand::ImmTyMatrixAScale);
7458}
7459
7460ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7461 return tryParseMatrixScale(Operands, Name: "matrix_b_scale",
7462 Type: AMDGPUOperand::ImmTyMatrixBScale);
7463}
7464
7465ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7466 StringRef Name,
7467 AMDGPUOperand::ImmTy Type) {
7468 return parseStringOrIntWithPrefix(Operands, Name, Ids: WMMAMods::ModMatrixScaleFmt,
7469 Type);
7470}
7471
7472ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7473 return tryParseMatrixScaleFmt(Operands, Name: "matrix_a_scale_fmt",
7474 Type: AMDGPUOperand::ImmTyMatrixAScaleFmt);
7475}
7476
7477ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7478 return tryParseMatrixScaleFmt(Operands, Name: "matrix_b_scale_fmt",
7479 Type: AMDGPUOperand::ImmTyMatrixBScaleFmt);
7480}
7481
7482// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7483// values to live in a joint format operand in the MCInst encoding.
7484ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7485 using namespace llvm::AMDGPU::MTBUFFormat;
7486
7487 int64_t Dfmt = DFMT_UNDEF;
7488 int64_t Nfmt = NFMT_UNDEF;
7489
7490 // dfmt and nfmt can appear in either order, and each is optional.
7491 for (int I = 0; I < 2; ++I) {
7492 if (Dfmt == DFMT_UNDEF && !tryParseFmt(Pref: "dfmt", MaxVal: DFMT_MAX, Fmt&: Dfmt))
7493 return ParseStatus::Failure;
7494
7495 if (Nfmt == NFMT_UNDEF && !tryParseFmt(Pref: "nfmt", MaxVal: NFMT_MAX, Fmt&: Nfmt))
7496 return ParseStatus::Failure;
7497
7498 // Skip optional comma between dfmt/nfmt
7499 // but guard against 2 commas following each other.
7500 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7501 !peekToken().is(K: AsmToken::Comma)) {
7502 trySkipToken(Kind: AsmToken::Comma);
7503 }
7504 }
7505
7506 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7507 return ParseStatus::NoMatch;
7508
7509 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7510 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7511
7512 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7513 return ParseStatus::Success;
7514}
7515
7516ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7517 using namespace llvm::AMDGPU::MTBUFFormat;
7518
7519 int64_t Fmt = UFMT_UNDEF;
7520
7521 if (!tryParseFmt(Pref: "format", MaxVal: UFMT_MAX, Fmt))
7522 return ParseStatus::Failure;
7523
7524 if (Fmt == UFMT_UNDEF)
7525 return ParseStatus::NoMatch;
7526
7527 Format = Fmt;
7528 return ParseStatus::Success;
7529}
7530
7531bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7532 int64_t &Nfmt,
7533 StringRef FormatStr,
7534 SMLoc Loc) {
7535 using namespace llvm::AMDGPU::MTBUFFormat;
7536 int64_t Format;
7537
7538 Format = getDfmt(Name: FormatStr);
7539 if (Format != DFMT_UNDEF) {
7540 Dfmt = Format;
7541 return true;
7542 }
7543
7544 Format = getNfmt(Name: FormatStr, STI: getSTI());
7545 if (Format != NFMT_UNDEF) {
7546 Nfmt = Format;
7547 return true;
7548 }
7549
7550 Error(L: Loc, Msg: "unsupported format");
7551 return false;
7552}
7553
7554ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7555 SMLoc FormatLoc,
7556 int64_t &Format) {
7557 using namespace llvm::AMDGPU::MTBUFFormat;
7558
7559 int64_t Dfmt = DFMT_UNDEF;
7560 int64_t Nfmt = NFMT_UNDEF;
7561 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, Loc: FormatLoc))
7562 return ParseStatus::Failure;
7563
7564 if (trySkipToken(Kind: AsmToken::Comma)) {
7565 StringRef Str;
7566 SMLoc Loc = getLoc();
7567 if (!parseId(Val&: Str, ErrMsg: "expected a format string") ||
7568 !matchDfmtNfmt(Dfmt, Nfmt, FormatStr: Str, Loc))
7569 return ParseStatus::Failure;
7570 if (Dfmt == DFMT_UNDEF)
7571 return Error(L: Loc, Msg: "duplicate numeric format");
7572 if (Nfmt == NFMT_UNDEF)
7573 return Error(L: Loc, Msg: "duplicate data format");
7574 }
7575
7576 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7577 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7578
7579 if (isGFX10Plus()) {
7580 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, STI: getSTI());
7581 if (Ufmt == UFMT_UNDEF)
7582 return Error(L: FormatLoc, Msg: "unsupported format");
7583 Format = Ufmt;
7584 } else {
7585 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7586 }
7587
7588 return ParseStatus::Success;
7589}
7590
7591ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7592 SMLoc Loc,
7593 int64_t &Format) {
7594 using namespace llvm::AMDGPU::MTBUFFormat;
7595
7596 auto Id = getUnifiedFormat(Name: FormatStr, STI: getSTI());
7597 if (Id == UFMT_UNDEF)
7598 return ParseStatus::NoMatch;
7599
7600 if (!isGFX10Plus())
7601 return Error(L: Loc, Msg: "unified format is not supported on this GPU");
7602
7603 Format = Id;
7604 return ParseStatus::Success;
7605}
7606
7607ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7608 using namespace llvm::AMDGPU::MTBUFFormat;
7609 SMLoc Loc = getLoc();
7610
7611 if (!parseExpr(Imm&: Format))
7612 return ParseStatus::Failure;
7613 if (!isValidFormatEncoding(Val: Format, STI: getSTI()))
7614 return Error(L: Loc, Msg: "out of range format");
7615
7616 return ParseStatus::Success;
7617}
7618
7619ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7620 using namespace llvm::AMDGPU::MTBUFFormat;
7621
7622 if (!trySkipId(Id: "format", Kind: AsmToken::Colon))
7623 return ParseStatus::NoMatch;
7624
7625 if (trySkipToken(Kind: AsmToken::LBrac)) {
7626 StringRef FormatStr;
7627 SMLoc Loc = getLoc();
7628 if (!parseId(Val&: FormatStr, ErrMsg: "expected a format string"))
7629 return ParseStatus::Failure;
7630
7631 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7632 if (Res.isNoMatch())
7633 Res = parseSymbolicSplitFormat(FormatStr, FormatLoc: Loc, Format);
7634 if (!Res.isSuccess())
7635 return Res;
7636
7637 if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
7638 return ParseStatus::Failure;
7639
7640 return ParseStatus::Success;
7641 }
7642
7643 return parseNumericFormat(Format);
7644}
7645
7646ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7647 using namespace llvm::AMDGPU::MTBUFFormat;
7648
7649 int64_t Format = getDefaultFormatEncoding(STI: getSTI());
7650 ParseStatus Res;
7651 SMLoc Loc = getLoc();
7652
7653 // Parse legacy format syntax.
7654 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7655 if (Res.isFailure())
7656 return Res;
7657
7658 bool FormatFound = Res.isSuccess();
7659
7660 Operands.push_back(
7661 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Format, Loc, Type: AMDGPUOperand::ImmTyFORMAT));
7662
7663 if (FormatFound)
7664 trySkipToken(Kind: AsmToken::Comma);
7665
7666 if (isToken(Kind: AsmToken::EndOfStatement)) {
7667 // We are expecting an soffset operand,
7668 // but let matcher handle the error.
7669 return ParseStatus::Success;
7670 }
7671
7672 // Parse soffset.
7673 Res = parseRegOrImm(Operands);
7674 if (!Res.isSuccess())
7675 return Res;
7676
7677 trySkipToken(Kind: AsmToken::Comma);
7678
7679 if (!FormatFound) {
7680 Res = parseSymbolicOrNumericFormat(Format);
7681 if (Res.isFailure())
7682 return Res;
7683 if (Res.isSuccess()) {
7684 auto Size = Operands.size();
7685 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7686 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7687 Op.setImm(Format);
7688 }
7689 return ParseStatus::Success;
7690 }
7691
7692 if (isId(Id: "format") && peekToken().is(K: AsmToken::Colon))
7693 return Error(L: getLoc(), Msg: "duplicate format");
7694 return ParseStatus::Success;
7695}
7696
7697ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7698 ParseStatus Res =
7699 parseIntWithPrefix(Prefix: "offset", Operands, ImmTy: AMDGPUOperand::ImmTyOffset);
7700 if (Res.isNoMatch()) {
7701 Res = parseIntWithPrefix(Prefix: "inst_offset", Operands,
7702 ImmTy: AMDGPUOperand::ImmTyInstOffset);
7703 }
7704 return Res;
7705}
7706
7707ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7708 ParseStatus Res =
7709 parseNamedBit(Name: "r128", Operands, ImmTy: AMDGPUOperand::ImmTyR128A16);
7710 if (Res.isNoMatch())
7711 Res = parseNamedBit(Name: "a16", Operands, ImmTy: AMDGPUOperand::ImmTyA16);
7712 return Res;
7713}
7714
7715ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7716 ParseStatus Res =
7717 parseIntWithPrefix(Prefix: "blgp", Operands, ImmTy: AMDGPUOperand::ImmTyBLGP);
7718 if (Res.isNoMatch()) {
7719 Res =
7720 parseOperandArrayWithPrefix(Prefix: "neg", Operands, ImmTy: AMDGPUOperand::ImmTyBLGP);
7721 }
7722 return Res;
7723}
7724
7725//===----------------------------------------------------------------------===//
7726// Exp
7727//===----------------------------------------------------------------------===//
7728
7729void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7730 OptionalImmIndexMap OptionalIdx;
7731
7732 unsigned OperandIdx[4];
7733 unsigned EnMask = 0;
7734 int SrcIdx = 0;
7735
7736 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7737 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7738
7739 // Add the register arguments
7740 if (Op.isReg()) {
7741 assert(SrcIdx < 4);
7742 OperandIdx[SrcIdx] = Inst.size();
7743 Op.addRegOperands(Inst, N: 1);
7744 ++SrcIdx;
7745 continue;
7746 }
7747
7748 if (Op.isOff()) {
7749 assert(SrcIdx < 4);
7750 OperandIdx[SrcIdx] = Inst.size();
7751 Inst.addOperand(Op: MCOperand::createReg(Reg: MCRegister()));
7752 ++SrcIdx;
7753 continue;
7754 }
7755
7756 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7757 Op.addImmOperands(Inst, N: 1);
7758 continue;
7759 }
7760
7761 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7762 continue;
7763
7764 // Handle optional arguments
7765 OptionalIdx[Op.getImmTy()] = i;
7766 }
7767
7768 assert(SrcIdx == 4);
7769
7770 bool Compr = false;
7771 if (OptionalIdx.find(x: AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7772 Compr = true;
7773 Inst.getOperand(i: OperandIdx[1]) = Inst.getOperand(i: OperandIdx[2]);
7774 Inst.getOperand(i: OperandIdx[2]).setReg(MCRegister());
7775 Inst.getOperand(i: OperandIdx[3]).setReg(MCRegister());
7776 }
7777
7778 for (auto i = 0; i < SrcIdx; ++i) {
7779 if (Inst.getOperand(i: OperandIdx[i]).getReg()) {
7780 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7781 }
7782 }
7783
7784 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpVM);
7785 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpCompr);
7786
7787 Inst.addOperand(Op: MCOperand::createImm(Val: EnMask));
7788}
7789
7790//===----------------------------------------------------------------------===//
7791// s_waitcnt
7792//===----------------------------------------------------------------------===//
7793
7794static bool
7795encodeCnt(
7796 const AMDGPU::IsaVersion ISA,
7797 int64_t &IntVal,
7798 int64_t CntVal,
7799 bool Saturate,
7800 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7801 unsigned (*decode)(const IsaVersion &Version, unsigned))
7802{
7803 bool Failed = false;
7804
7805 IntVal = encode(ISA, IntVal, CntVal);
7806 if (CntVal != decode(ISA, IntVal)) {
7807 if (Saturate) {
7808 IntVal = encode(ISA, IntVal, -1);
7809 } else {
7810 Failed = true;
7811 }
7812 }
7813 return Failed;
7814}
7815
7816bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7817
7818 SMLoc CntLoc = getLoc();
7819 StringRef CntName = getTokenStr();
7820
7821 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name") ||
7822 !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7823 return false;
7824
7825 int64_t CntVal;
7826 SMLoc ValLoc = getLoc();
7827 if (!parseExpr(Imm&: CntVal))
7828 return false;
7829
7830 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
7831
7832 bool Failed = true;
7833 bool Sat = CntName.ends_with(Suffix: "_sat");
7834
7835 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7836 Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeVmcnt, decode: decodeVmcnt);
7837 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7838 Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeExpcnt, decode: decodeExpcnt);
7839 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7840 Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeLgkmcnt, decode: decodeLgkmcnt);
7841 } else {
7842 Error(L: CntLoc, Msg: "invalid counter name " + CntName);
7843 return false;
7844 }
7845
7846 if (Failed) {
7847 Error(L: ValLoc, Msg: "too large value for " + CntName);
7848 return false;
7849 }
7850
7851 if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
7852 return false;
7853
7854 if (trySkipToken(Kind: AsmToken::Amp) || trySkipToken(Kind: AsmToken::Comma)) {
7855 if (isToken(Kind: AsmToken::EndOfStatement)) {
7856 Error(L: getLoc(), Msg: "expected a counter name");
7857 return false;
7858 }
7859 }
7860
7861 return true;
7862}
7863
7864ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7865 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
7866 int64_t Waitcnt = getWaitcntBitMask(Version: ISA);
7867 SMLoc S = getLoc();
7868
7869 if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7870 while (!isToken(Kind: AsmToken::EndOfStatement)) {
7871 if (!parseCnt(IntVal&: Waitcnt))
7872 return ParseStatus::Failure;
7873 }
7874 } else {
7875 if (!parseExpr(Imm&: Waitcnt))
7876 return ParseStatus::Failure;
7877 }
7878
7879 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Waitcnt, Loc: S));
7880 return ParseStatus::Success;
7881}
7882
7883bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7884 SMLoc FieldLoc = getLoc();
7885 StringRef FieldName = getTokenStr();
7886 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a field name") ||
7887 !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7888 return false;
7889
7890 SMLoc ValueLoc = getLoc();
7891 StringRef ValueName = getTokenStr();
7892 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a value name") ||
7893 !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a right parenthesis"))
7894 return false;
7895
7896 unsigned Shift;
7897 if (FieldName == "instid0") {
7898 Shift = 0;
7899 } else if (FieldName == "instskip") {
7900 Shift = 4;
7901 } else if (FieldName == "instid1") {
7902 Shift = 7;
7903 } else {
7904 Error(L: FieldLoc, Msg: "invalid field name " + FieldName);
7905 return false;
7906 }
7907
7908 int Value;
7909 if (Shift == 4) {
7910 // Parse values for instskip.
7911 Value = StringSwitch<int>(ValueName)
7912 .Case(S: "SAME", Value: 0)
7913 .Case(S: "NEXT", Value: 1)
7914 .Case(S: "SKIP_1", Value: 2)
7915 .Case(S: "SKIP_2", Value: 3)
7916 .Case(S: "SKIP_3", Value: 4)
7917 .Case(S: "SKIP_4", Value: 5)
7918 .Default(Value: -1);
7919 } else {
7920 // Parse values for instid0 and instid1.
7921 Value = StringSwitch<int>(ValueName)
7922 .Case(S: "NO_DEP", Value: 0)
7923 .Case(S: "VALU_DEP_1", Value: 1)
7924 .Case(S: "VALU_DEP_2", Value: 2)
7925 .Case(S: "VALU_DEP_3", Value: 3)
7926 .Case(S: "VALU_DEP_4", Value: 4)
7927 .Case(S: "TRANS32_DEP_1", Value: 5)
7928 .Case(S: "TRANS32_DEP_2", Value: 6)
7929 .Case(S: "TRANS32_DEP_3", Value: 7)
7930 .Case(S: "FMA_ACCUM_CYCLE_1", Value: 8)
7931 .Case(S: "SALU_CYCLE_1", Value: 9)
7932 .Case(S: "SALU_CYCLE_2", Value: 10)
7933 .Case(S: "SALU_CYCLE_3", Value: 11)
7934 .Default(Value: -1);
7935 }
7936 if (Value < 0) {
7937 Error(L: ValueLoc, Msg: "invalid value name " + ValueName);
7938 return false;
7939 }
7940
7941 Delay |= Value << Shift;
7942 return true;
7943}
7944
7945ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7946 int64_t Delay = 0;
7947 SMLoc S = getLoc();
7948
7949 if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7950 do {
7951 if (!parseDelay(Delay))
7952 return ParseStatus::Failure;
7953 } while (trySkipToken(Kind: AsmToken::Pipe));
7954 } else {
7955 if (!parseExpr(Imm&: Delay))
7956 return ParseStatus::Failure;
7957 }
7958
7959 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Delay, Loc: S));
7960 return ParseStatus::Success;
7961}
7962
7963bool
7964AMDGPUOperand::isSWaitCnt() const {
7965 return isImm();
7966}
7967
7968bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7969
7970//===----------------------------------------------------------------------===//
7971// DepCtr
7972//===----------------------------------------------------------------------===//
7973
7974void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7975 StringRef DepCtrName) {
7976 switch (ErrorId) {
7977 case OPR_ID_UNKNOWN:
7978 Error(L: Loc, Msg: Twine("invalid counter name ", DepCtrName));
7979 return;
7980 case OPR_ID_UNSUPPORTED:
7981 Error(L: Loc, Msg: Twine(DepCtrName, " is not supported on this GPU"));
7982 return;
7983 case OPR_ID_DUPLICATE:
7984 Error(L: Loc, Msg: Twine("duplicate counter name ", DepCtrName));
7985 return;
7986 case OPR_VAL_INVALID:
7987 Error(L: Loc, Msg: Twine("invalid value for ", DepCtrName));
7988 return;
7989 default:
7990 assert(false);
7991 }
7992}
7993
7994bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7995
7996 using namespace llvm::AMDGPU::DepCtr;
7997
7998 SMLoc DepCtrLoc = getLoc();
7999 StringRef DepCtrName = getTokenStr();
8000
8001 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name") ||
8002 !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
8003 return false;
8004
8005 int64_t ExprVal;
8006 if (!parseExpr(Imm&: ExprVal))
8007 return false;
8008
8009 unsigned PrevOprMask = UsedOprMask;
8010 int CntVal = encodeDepCtr(Name: DepCtrName, Val: ExprVal, UsedOprMask, STI: getSTI());
8011
8012 if (CntVal < 0) {
8013 depCtrError(Loc: DepCtrLoc, ErrorId: CntVal, DepCtrName);
8014 return false;
8015 }
8016
8017 if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
8018 return false;
8019
8020 if (trySkipToken(Kind: AsmToken::Amp) || trySkipToken(Kind: AsmToken::Comma)) {
8021 if (isToken(Kind: AsmToken::EndOfStatement)) {
8022 Error(L: getLoc(), Msg: "expected a counter name");
8023 return false;
8024 }
8025 }
8026
8027 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8028 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8029 return true;
8030}
8031
8032ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8033 using namespace llvm::AMDGPU::DepCtr;
8034
8035 int64_t DepCtr = getDefaultDepCtrEncoding(STI: getSTI());
8036 SMLoc Loc = getLoc();
8037
8038 if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
8039 unsigned UsedOprMask = 0;
8040 while (!isToken(Kind: AsmToken::EndOfStatement)) {
8041 if (!parseDepCtr(DepCtr, UsedOprMask))
8042 return ParseStatus::Failure;
8043 }
8044 } else {
8045 if (!parseExpr(Imm&: DepCtr))
8046 return ParseStatus::Failure;
8047 }
8048
8049 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DepCtr, Loc));
8050 return ParseStatus::Success;
8051}
8052
8053bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8054
8055//===----------------------------------------------------------------------===//
8056// hwreg
8057//===----------------------------------------------------------------------===//
8058
8059ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8060 OperandInfoTy &Offset,
8061 OperandInfoTy &Width) {
8062 using namespace llvm::AMDGPU::Hwreg;
8063
8064 if (!trySkipId(Id: "hwreg", Kind: AsmToken::LParen))
8065 return ParseStatus::NoMatch;
8066
8067 // The register may be specified by name or using a numeric code
8068 HwReg.Loc = getLoc();
8069 if (isToken(Kind: AsmToken::Identifier) &&
8070 (HwReg.Val = getHwregId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) {
8071 HwReg.IsSymbolic = true;
8072 lex(); // skip register name
8073 } else if (!parseExpr(Imm&: HwReg.Val, Expected: "a register name")) {
8074 return ParseStatus::Failure;
8075 }
8076
8077 if (trySkipToken(Kind: AsmToken::RParen))
8078 return ParseStatus::Success;
8079
8080 // parse optional params
8081 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma or a closing parenthesis"))
8082 return ParseStatus::Failure;
8083
8084 Offset.Loc = getLoc();
8085 if (!parseExpr(Imm&: Offset.Val))
8086 return ParseStatus::Failure;
8087
8088 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
8089 return ParseStatus::Failure;
8090
8091 Width.Loc = getLoc();
8092 if (!parseExpr(Imm&: Width.Val) ||
8093 !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
8094 return ParseStatus::Failure;
8095
8096 return ParseStatus::Success;
8097}
8098
8099ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8100 using namespace llvm::AMDGPU::Hwreg;
8101
8102 int64_t ImmVal = 0;
8103 SMLoc Loc = getLoc();
8104
8105 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8106 HwregId::Default);
8107 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8108 HwregOffset::Default);
8109 struct : StructuredOpField {
8110 using StructuredOpField::StructuredOpField;
8111 bool validate(AMDGPUAsmParser &Parser) const override {
8112 if (!isUIntN(N: Width, x: Val - 1))
8113 return Error(Parser, Err: "only values from 1 to 32 are legal");
8114 return true;
8115 }
8116 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8117 ParseStatus Res = parseStructuredOpFields(Fields: {&HwReg, &Offset, &Width});
8118
8119 if (Res.isNoMatch())
8120 Res = parseHwregFunc(HwReg, Offset, Width);
8121
8122 if (Res.isSuccess()) {
8123 if (!validateStructuredOpFields(Fields: {&HwReg, &Offset, &Width}))
8124 return ParseStatus::Failure;
8125 ImmVal = HwregEncoding::encode(Values: HwReg.Val, Values: Offset.Val, Values: Width.Val);
8126 }
8127
8128 if (Res.isNoMatch() &&
8129 parseExpr(Imm&: ImmVal, Expected: "a hwreg macro, structured immediate"))
8130 Res = ParseStatus::Success;
8131
8132 if (!Res.isSuccess())
8133 return ParseStatus::Failure;
8134
8135 if (!isUInt<16>(x: ImmVal))
8136 return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
8137 Operands.push_back(
8138 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTyHwreg));
8139 return ParseStatus::Success;
8140}
8141
8142bool AMDGPUOperand::isHwreg() const {
8143 return isImmTy(ImmT: ImmTyHwreg);
8144}
8145
8146//===----------------------------------------------------------------------===//
8147// sendmsg
8148//===----------------------------------------------------------------------===//
8149
8150bool
8151AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8152 OperandInfoTy &Op,
8153 OperandInfoTy &Stream) {
8154 using namespace llvm::AMDGPU::SendMsg;
8155
8156 Msg.Loc = getLoc();
8157 if (isToken(Kind: AsmToken::Identifier) &&
8158 (Msg.Val = getMsgId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) {
8159 Msg.IsSymbolic = true;
8160 lex(); // skip message name
8161 } else if (!parseExpr(Imm&: Msg.Val, Expected: "a message name")) {
8162 return false;
8163 }
8164
8165 if (trySkipToken(Kind: AsmToken::Comma)) {
8166 Op.IsDefined = true;
8167 Op.Loc = getLoc();
8168 if (isToken(Kind: AsmToken::Identifier) &&
8169 (Op.Val = getMsgOpId(MsgId: Msg.Val, Name: getTokenStr(), STI: getSTI())) !=
8170 OPR_ID_UNKNOWN) {
8171 lex(); // skip operation name
8172 } else if (!parseExpr(Imm&: Op.Val, Expected: "an operation name")) {
8173 return false;
8174 }
8175
8176 if (trySkipToken(Kind: AsmToken::Comma)) {
8177 Stream.IsDefined = true;
8178 Stream.Loc = getLoc();
8179 if (!parseExpr(Imm&: Stream.Val))
8180 return false;
8181 }
8182 }
8183
8184 return skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis");
8185}
8186
8187bool
8188AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8189 const OperandInfoTy &Op,
8190 const OperandInfoTy &Stream) {
8191 using namespace llvm::AMDGPU::SendMsg;
8192
8193 // Validation strictness depends on whether message is specified
8194 // in a symbolic or in a numeric form. In the latter case
8195 // only encoding possibility is checked.
8196 bool Strict = Msg.IsSymbolic;
8197
8198 if (Strict) {
8199 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8200 Error(L: Msg.Loc, Msg: "specified message id is not supported on this GPU");
8201 return false;
8202 }
8203 } else {
8204 if (!isValidMsgId(MsgId: Msg.Val, STI: getSTI())) {
8205 Error(L: Msg.Loc, Msg: "invalid message id");
8206 return false;
8207 }
8208 }
8209 if (Strict && (msgRequiresOp(MsgId: Msg.Val, STI: getSTI()) != Op.IsDefined)) {
8210 if (Op.IsDefined) {
8211 Error(L: Op.Loc, Msg: "message does not support operations");
8212 } else {
8213 Error(L: Msg.Loc, Msg: "missing message operation");
8214 }
8215 return false;
8216 }
8217 if (!isValidMsgOp(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI(), Strict)) {
8218 if (Op.Val == OPR_ID_UNSUPPORTED)
8219 Error(L: Op.Loc, Msg: "specified operation id is not supported on this GPU");
8220 else
8221 Error(L: Op.Loc, Msg: "invalid operation id");
8222 return false;
8223 }
8224 if (Strict && !msgSupportsStream(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI()) &&
8225 Stream.IsDefined) {
8226 Error(L: Stream.Loc, Msg: "message operation does not support streams");
8227 return false;
8228 }
8229 if (!isValidMsgStream(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val, STI: getSTI(), Strict)) {
8230 Error(L: Stream.Loc, Msg: "invalid message stream id");
8231 return false;
8232 }
8233 return true;
8234}
8235
8236ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8237 using namespace llvm::AMDGPU::SendMsg;
8238
8239 int64_t ImmVal = 0;
8240 SMLoc Loc = getLoc();
8241
8242 if (trySkipId(Id: "sendmsg", Kind: AsmToken::LParen)) {
8243 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8244 OperandInfoTy Op(OP_NONE_);
8245 OperandInfoTy Stream(STREAM_ID_NONE_);
8246 if (parseSendMsgBody(Msg, Op, Stream) &&
8247 validateSendMsg(Msg, Op, Stream)) {
8248 ImmVal = encodeMsg(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val);
8249 } else {
8250 return ParseStatus::Failure;
8251 }
8252 } else if (parseExpr(Imm&: ImmVal, Expected: "a sendmsg macro")) {
8253 if (ImmVal < 0 || !isUInt<16>(x: ImmVal))
8254 return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
8255 } else {
8256 return ParseStatus::Failure;
8257 }
8258
8259 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTySendMsg));
8260 return ParseStatus::Success;
8261}
8262
8263bool AMDGPUOperand::isSendMsg() const {
8264 return isImmTy(ImmT: ImmTySendMsg);
8265}
8266
8267ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8268 using namespace llvm::AMDGPU::WaitEvent;
8269
8270 SMLoc Loc = getLoc();
8271 int64_t ImmVal = 0;
8272
8273 StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8274 1, 0);
8275 StructuredOpField ExportReady("export_ready", "bit value", 1, 0);
8276
8277 StructuredOpField *TargetBitfield =
8278 isGFX11() ? &DontWaitExportReady : &ExportReady;
8279
8280 ParseStatus Res = parseStructuredOpFields(Fields: {TargetBitfield});
8281 if (Res.isNoMatch() && parseExpr(Imm&: ImmVal, Expected: "structured immediate"))
8282 Res = ParseStatus::Success;
8283 else if (Res.isSuccess()) {
8284 if (!validateStructuredOpFields(Fields: {TargetBitfield}))
8285 return ParseStatus::Failure;
8286 ImmVal = TargetBitfield->Val;
8287 }
8288
8289 if (!Res.isSuccess())
8290 return ParseStatus::Failure;
8291
8292 if (!isUInt<16>(x: ImmVal))
8293 return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
8294
8295 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc,
8296 Type: AMDGPUOperand::ImmTyWaitEvent));
8297 return ParseStatus::Success;
8298}
8299
8300bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmT: ImmTyWaitEvent); }
8301
8302//===----------------------------------------------------------------------===//
8303// v_interp
8304//===----------------------------------------------------------------------===//
8305
8306ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8307 StringRef Str;
8308 SMLoc S = getLoc();
8309
8310 if (!parseId(Val&: Str))
8311 return ParseStatus::NoMatch;
8312
8313 int Slot = StringSwitch<int>(Str)
8314 .Case(S: "p10", Value: 0)
8315 .Case(S: "p20", Value: 1)
8316 .Case(S: "p0", Value: 2)
8317 .Default(Value: -1);
8318
8319 if (Slot == -1)
8320 return Error(L: S, Msg: "invalid interpolation slot");
8321
8322 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Slot, Loc: S,
8323 Type: AMDGPUOperand::ImmTyInterpSlot));
8324 return ParseStatus::Success;
8325}
8326
8327ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8328 StringRef Str;
8329 SMLoc S = getLoc();
8330
8331 if (!parseId(Val&: Str))
8332 return ParseStatus::NoMatch;
8333
8334 if (!Str.starts_with(Prefix: "attr"))
8335 return Error(L: S, Msg: "invalid interpolation attribute");
8336
8337 StringRef Chan = Str.take_back(N: 2);
8338 int AttrChan = StringSwitch<int>(Chan)
8339 .Case(S: ".x", Value: 0)
8340 .Case(S: ".y", Value: 1)
8341 .Case(S: ".z", Value: 2)
8342 .Case(S: ".w", Value: 3)
8343 .Default(Value: -1);
8344 if (AttrChan == -1)
8345 return Error(L: S, Msg: "invalid or missing interpolation attribute channel");
8346
8347 Str = Str.drop_back(N: 2).drop_front(N: 4);
8348
8349 uint8_t Attr;
8350 if (Str.getAsInteger(Radix: 10, Result&: Attr))
8351 return Error(L: S, Msg: "invalid or missing interpolation attribute number");
8352
8353 if (Attr > 32)
8354 return Error(L: S, Msg: "out of bounds interpolation attribute number");
8355
8356 SMLoc SChan = SMLoc::getFromPointer(Ptr: Chan.data());
8357
8358 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Attr, Loc: S,
8359 Type: AMDGPUOperand::ImmTyInterpAttr));
8360 Operands.push_back(Elt: AMDGPUOperand::CreateImm(
8361 AsmParser: this, Val: AttrChan, Loc: SChan, Type: AMDGPUOperand::ImmTyInterpAttrChan));
8362 return ParseStatus::Success;
8363}
8364
8365//===----------------------------------------------------------------------===//
8366// exp
8367//===----------------------------------------------------------------------===//
8368
8369ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8370 using namespace llvm::AMDGPU::Exp;
8371
8372 StringRef Str;
8373 SMLoc S = getLoc();
8374
8375 if (!parseId(Val&: Str))
8376 return ParseStatus::NoMatch;
8377
8378 unsigned Id = getTgtId(Name: Str);
8379 if (Id == ET_INVALID || !isSupportedTgtId(Id, STI: getSTI()))
8380 return Error(L: S, Msg: (Id == ET_INVALID)
8381 ? "invalid exp target"
8382 : "exp target is not supported on this GPU");
8383
8384 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Id, Loc: S,
8385 Type: AMDGPUOperand::ImmTyExpTgt));
8386 return ParseStatus::Success;
8387}
8388
8389//===----------------------------------------------------------------------===//
8390// parser helpers
8391//===----------------------------------------------------------------------===//
8392
8393bool
8394AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8395 return Token.is(K: AsmToken::Identifier) && Token.getString() == Id;
8396}
8397
8398bool
8399AMDGPUAsmParser::isId(const StringRef Id) const {
8400 return isId(Token: getToken(), Id);
8401}
8402
8403bool
8404AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8405 return getTokenKind() == Kind;
8406}
8407
8408StringRef AMDGPUAsmParser::getId() const {
8409 return isToken(Kind: AsmToken::Identifier) ? getTokenStr() : StringRef();
8410}
8411
8412bool
8413AMDGPUAsmParser::trySkipId(const StringRef Id) {
8414 if (isId(Id)) {
8415 lex();
8416 return true;
8417 }
8418 return false;
8419}
8420
8421bool
8422AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8423 if (isToken(Kind: AsmToken::Identifier)) {
8424 StringRef Tok = getTokenStr();
8425 if (Tok.starts_with(Prefix: Pref) && Tok.drop_front(N: Pref.size()) == Id) {
8426 lex();
8427 return true;
8428 }
8429 }
8430 return false;
8431}
8432
8433bool
8434AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8435 if (isId(Id) && peekToken().is(K: Kind)) {
8436 lex();
8437 lex();
8438 return true;
8439 }
8440 return false;
8441}
8442
8443bool
8444AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8445 if (isToken(Kind)) {
8446 lex();
8447 return true;
8448 }
8449 return false;
8450}
8451
8452bool
8453AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8454 const StringRef ErrMsg) {
8455 if (!trySkipToken(Kind)) {
8456 Error(L: getLoc(), Msg: ErrMsg);
8457 return false;
8458 }
8459 return true;
8460}
8461
8462bool
8463AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8464 SMLoc S = getLoc();
8465
8466 const MCExpr *Expr;
8467 if (Parser.parseExpression(Res&: Expr))
8468 return false;
8469
8470 if (Expr->evaluateAsAbsolute(Res&: Imm))
8471 return true;
8472
8473 if (Expected.empty()) {
8474 Error(L: S, Msg: "expected absolute expression");
8475 } else {
8476 Error(L: S, Msg: Twine("expected ", Expected) +
8477 Twine(" or an absolute expression"));
8478 }
8479 return false;
8480}
8481
8482bool
8483AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8484 SMLoc S = getLoc();
8485
8486 const MCExpr *Expr;
8487 if (Parser.parseExpression(Res&: Expr))
8488 return false;
8489
8490 int64_t IntVal;
8491 if (Expr->evaluateAsAbsolute(Res&: IntVal)) {
8492 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S));
8493 } else {
8494 Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S));
8495 }
8496 return true;
8497}
8498
8499bool
8500AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8501 if (isToken(Kind: AsmToken::String)) {
8502 Val = getToken().getStringContents();
8503 lex();
8504 return true;
8505 }
8506 Error(L: getLoc(), Msg: ErrMsg);
8507 return false;
8508}
8509
8510bool
8511AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8512 if (isToken(Kind: AsmToken::Identifier)) {
8513 Val = getTokenStr();
8514 lex();
8515 return true;
8516 }
8517 if (!ErrMsg.empty())
8518 Error(L: getLoc(), Msg: ErrMsg);
8519 return false;
8520}
8521
8522AsmToken
8523AMDGPUAsmParser::getToken() const {
8524 return Parser.getTok();
8525}
8526
8527AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8528 return isToken(Kind: AsmToken::EndOfStatement)
8529 ? getToken()
8530 : getLexer().peekTok(ShouldSkipSpace);
8531}
8532
8533void
8534AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8535 auto TokCount = getLexer().peekTokens(Buf: Tokens);
8536
8537 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8538 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8539}
8540
8541AsmToken::TokenKind
8542AMDGPUAsmParser::getTokenKind() const {
8543 return getLexer().getKind();
8544}
8545
8546SMLoc
8547AMDGPUAsmParser::getLoc() const {
8548 return getToken().getLoc();
8549}
8550
8551StringRef
8552AMDGPUAsmParser::getTokenStr() const {
8553 return getToken().getString();
8554}
8555
8556void
8557AMDGPUAsmParser::lex() {
8558 Parser.Lex();
8559}
8560
8561SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8562 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8563}
8564
8565// Returns one of the given locations that comes later in the source.
8566SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8567 return a.getPointer() < b.getPointer() ? b : a;
8568}
8569
8570SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8571 int MCOpIdx) const {
8572 for (const auto &Op : Operands) {
8573 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8574 if (TargetOp.getMCOpIdx() == MCOpIdx)
8575 return TargetOp.getStartLoc();
8576 }
8577 llvm_unreachable("No such MC operand!");
8578}
8579
8580SMLoc
8581AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8582 const OperandVector &Operands) const {
8583 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8584 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8585 if (Test(Op))
8586 return Op.getStartLoc();
8587 }
8588 return getInstLoc(Operands);
8589}
8590
8591SMLoc
8592AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8593 const OperandVector &Operands) const {
8594 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(ImmT: Type); };
8595 return getOperandLoc(Test, Operands);
8596}
8597
8598ParseStatus
8599AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8600 if (!trySkipToken(Kind: AsmToken::LCurly))
8601 return ParseStatus::NoMatch;
8602
8603 bool First = true;
8604 while (!trySkipToken(Kind: AsmToken::RCurly)) {
8605 if (!First &&
8606 !skipToken(Kind: AsmToken::Comma, ErrMsg: "comma or closing brace expected"))
8607 return ParseStatus::Failure;
8608
8609 StringRef Id = getTokenStr();
8610 SMLoc IdLoc = getLoc();
8611 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "field name expected") ||
8612 !skipToken(Kind: AsmToken::Colon, ErrMsg: "colon expected"))
8613 return ParseStatus::Failure;
8614
8615 const auto *I =
8616 find_if(Range&: Fields, P: [Id](StructuredOpField *F) { return F->Id == Id; });
8617 if (I == Fields.end())
8618 return Error(L: IdLoc, Msg: "unknown field");
8619 if ((*I)->IsDefined)
8620 return Error(L: IdLoc, Msg: "duplicate field");
8621
8622 // TODO: Support symbolic values.
8623 (*I)->Loc = getLoc();
8624 if (!parseExpr(Imm&: (*I)->Val))
8625 return ParseStatus::Failure;
8626 (*I)->IsDefined = true;
8627
8628 First = false;
8629 }
8630 return ParseStatus::Success;
8631}
8632
8633bool AMDGPUAsmParser::validateStructuredOpFields(
8634 ArrayRef<const StructuredOpField *> Fields) {
8635 return all_of(Range&: Fields, P: [this](const StructuredOpField *F) {
8636 return F->validate(Parser&: *this);
8637 });
8638}
8639
8640//===----------------------------------------------------------------------===//
8641// swizzle
8642//===----------------------------------------------------------------------===//
8643
8644LLVM_READNONE
8645static unsigned
8646encodeBitmaskPerm(const unsigned AndMask,
8647 const unsigned OrMask,
8648 const unsigned XorMask) {
8649 using namespace llvm::AMDGPU::Swizzle;
8650
8651 return BITMASK_PERM_ENC |
8652 (AndMask << BITMASK_AND_SHIFT) |
8653 (OrMask << BITMASK_OR_SHIFT) |
8654 (XorMask << BITMASK_XOR_SHIFT);
8655}
8656
8657bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8658 const unsigned MaxVal,
8659 const Twine &ErrMsg, SMLoc &Loc) {
8660 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma")) {
8661 return false;
8662 }
8663 Loc = getLoc();
8664 if (!parseExpr(Imm&: Op)) {
8665 return false;
8666 }
8667 if (Op < MinVal || Op > MaxVal) {
8668 Error(L: Loc, Msg: ErrMsg);
8669 return false;
8670 }
8671
8672 return true;
8673}
8674
8675bool
8676AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8677 const unsigned MinVal,
8678 const unsigned MaxVal,
8679 const StringRef ErrMsg) {
8680 SMLoc Loc;
8681 for (unsigned i = 0; i < OpNum; ++i) {
8682 if (!parseSwizzleOperand(Op&: Op[i], MinVal, MaxVal, ErrMsg, Loc))
8683 return false;
8684 }
8685
8686 return true;
8687}
8688
8689bool
8690AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8691 using namespace llvm::AMDGPU::Swizzle;
8692
8693 int64_t Lane[LANE_NUM];
8694 if (parseSwizzleOperands(OpNum: LANE_NUM, Op: Lane, MinVal: 0, MaxVal: LANE_MAX,
8695 ErrMsg: "expected a 2-bit lane id")) {
8696 Imm = QUAD_PERM_ENC;
8697 for (unsigned I = 0; I < LANE_NUM; ++I) {
8698 Imm |= Lane[I] << (LANE_SHIFT * I);
8699 }
8700 return true;
8701 }
8702 return false;
8703}
8704
8705bool
8706AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8707 using namespace llvm::AMDGPU::Swizzle;
8708
8709 SMLoc Loc;
8710 int64_t GroupSize;
8711 int64_t LaneIdx;
8712
8713 if (!parseSwizzleOperand(Op&: GroupSize,
8714 MinVal: 2, MaxVal: 32,
8715 ErrMsg: "group size must be in the interval [2,32]",
8716 Loc)) {
8717 return false;
8718 }
8719 if (!isPowerOf2_64(Value: GroupSize)) {
8720 Error(L: Loc, Msg: "group size must be a power of two");
8721 return false;
8722 }
8723 if (parseSwizzleOperand(Op&: LaneIdx,
8724 MinVal: 0, MaxVal: GroupSize - 1,
8725 ErrMsg: "lane id must be in the interval [0,group size - 1]",
8726 Loc)) {
8727 Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX - GroupSize + 1, OrMask: LaneIdx, XorMask: 0);
8728 return true;
8729 }
8730 return false;
8731}
8732
8733bool
8734AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8735 using namespace llvm::AMDGPU::Swizzle;
8736
8737 SMLoc Loc;
8738 int64_t GroupSize;
8739
8740 if (!parseSwizzleOperand(Op&: GroupSize,
8741 MinVal: 2, MaxVal: 32,
8742 ErrMsg: "group size must be in the interval [2,32]",
8743 Loc)) {
8744 return false;
8745 }
8746 if (!isPowerOf2_64(Value: GroupSize)) {
8747 Error(L: Loc, Msg: "group size must be a power of two");
8748 return false;
8749 }
8750
8751 Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: 0, XorMask: GroupSize - 1);
8752 return true;
8753}
8754
8755bool
8756AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8757 using namespace llvm::AMDGPU::Swizzle;
8758
8759 SMLoc Loc;
8760 int64_t GroupSize;
8761
8762 if (!parseSwizzleOperand(Op&: GroupSize,
8763 MinVal: 1, MaxVal: 16,
8764 ErrMsg: "group size must be in the interval [1,16]",
8765 Loc)) {
8766 return false;
8767 }
8768 if (!isPowerOf2_64(Value: GroupSize)) {
8769 Error(L: Loc, Msg: "group size must be a power of two");
8770 return false;
8771 }
8772
8773 Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: 0, XorMask: GroupSize);
8774 return true;
8775}
8776
8777bool
8778AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8779 using namespace llvm::AMDGPU::Swizzle;
8780
8781 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma")) {
8782 return false;
8783 }
8784
8785 StringRef Ctl;
8786 SMLoc StrLoc = getLoc();
8787 if (!parseString(Val&: Ctl)) {
8788 return false;
8789 }
8790 if (Ctl.size() != BITMASK_WIDTH) {
8791 Error(L: StrLoc, Msg: "expected a 5-character mask");
8792 return false;
8793 }
8794
8795 unsigned AndMask = 0;
8796 unsigned OrMask = 0;
8797 unsigned XorMask = 0;
8798
8799 for (size_t i = 0; i < Ctl.size(); ++i) {
8800 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8801 switch(Ctl[i]) {
8802 default:
8803 Error(L: StrLoc, Msg: "invalid mask");
8804 return false;
8805 case '0':
8806 break;
8807 case '1':
8808 OrMask |= Mask;
8809 break;
8810 case 'p':
8811 AndMask |= Mask;
8812 break;
8813 case 'i':
8814 AndMask |= Mask;
8815 XorMask |= Mask;
8816 break;
8817 }
8818 }
8819
8820 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8821 return true;
8822}
8823
8824bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8825 using namespace llvm::AMDGPU::Swizzle;
8826
8827 if (!AMDGPU::isGFX9Plus(STI: getSTI())) {
8828 Error(L: getLoc(), Msg: "FFT mode swizzle not supported on this GPU");
8829 return false;
8830 }
8831
8832 int64_t Swizzle;
8833 SMLoc Loc;
8834 if (!parseSwizzleOperand(Op&: Swizzle, MinVal: 0, MaxVal: FFT_SWIZZLE_MAX,
8835 ErrMsg: "FFT swizzle must be in the interval [0," +
8836 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8837 Loc))
8838 return false;
8839
8840 Imm = FFT_MODE_ENC | Swizzle;
8841 return true;
8842}
8843
8844bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8845 using namespace llvm::AMDGPU::Swizzle;
8846
8847 if (!AMDGPU::isGFX9Plus(STI: getSTI())) {
8848 Error(L: getLoc(), Msg: "Rotate mode swizzle not supported on this GPU");
8849 return false;
8850 }
8851
8852 SMLoc Loc;
8853 int64_t Direction;
8854
8855 if (!parseSwizzleOperand(Op&: Direction, MinVal: 0, MaxVal: 1,
8856 ErrMsg: "direction must be 0 (left) or 1 (right)", Loc))
8857 return false;
8858
8859 int64_t RotateSize;
8860 if (!parseSwizzleOperand(
8861 Op&: RotateSize, MinVal: 0, MaxVal: ROTATE_MAX_SIZE,
8862 ErrMsg: "number of threads to rotate must be in the interval [0," +
8863 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8864 Loc))
8865 return false;
8866
8867 Imm = ROTATE_MODE_ENC | (Direction << ROTATE_DIR_SHIFT) |
8868 (RotateSize << ROTATE_SIZE_SHIFT);
8869 return true;
8870}
8871
8872bool
8873AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8874
8875 SMLoc OffsetLoc = getLoc();
8876
8877 if (!parseExpr(Imm, Expected: "a swizzle macro")) {
8878 return false;
8879 }
8880 if (!isUInt<16>(x: Imm)) {
8881 Error(L: OffsetLoc, Msg: "expected a 16-bit offset");
8882 return false;
8883 }
8884 return true;
8885}
8886
8887bool
8888AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8889 using namespace llvm::AMDGPU::Swizzle;
8890
8891 if (skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parentheses")) {
8892
8893 SMLoc ModeLoc = getLoc();
8894 bool Ok = false;
8895
8896 if (trySkipId(Id: IdSymbolic[ID_QUAD_PERM])) {
8897 Ok = parseSwizzleQuadPerm(Imm);
8898 } else if (trySkipId(Id: IdSymbolic[ID_BITMASK_PERM])) {
8899 Ok = parseSwizzleBitmaskPerm(Imm);
8900 } else if (trySkipId(Id: IdSymbolic[ID_BROADCAST])) {
8901 Ok = parseSwizzleBroadcast(Imm);
8902 } else if (trySkipId(Id: IdSymbolic[ID_SWAP])) {
8903 Ok = parseSwizzleSwap(Imm);
8904 } else if (trySkipId(Id: IdSymbolic[ID_REVERSE])) {
8905 Ok = parseSwizzleReverse(Imm);
8906 } else if (trySkipId(Id: IdSymbolic[ID_FFT])) {
8907 Ok = parseSwizzleFFT(Imm);
8908 } else if (trySkipId(Id: IdSymbolic[ID_ROTATE])) {
8909 Ok = parseSwizzleRotate(Imm);
8910 } else {
8911 Error(L: ModeLoc, Msg: "expected a swizzle mode");
8912 }
8913
8914 return Ok && skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parentheses");
8915 }
8916
8917 return false;
8918}
8919
8920ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8921 SMLoc S = getLoc();
8922 int64_t Imm = 0;
8923
8924 if (trySkipId(Id: "offset")) {
8925
8926 bool Ok = false;
8927 if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon")) {
8928 if (trySkipId(Id: "swizzle")) {
8929 Ok = parseSwizzleMacro(Imm);
8930 } else {
8931 Ok = parseSwizzleOffset(Imm);
8932 }
8933 }
8934
8935 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTySwizzle));
8936
8937 return Ok ? ParseStatus::Success : ParseStatus::Failure;
8938 }
8939 return ParseStatus::NoMatch;
8940}
8941
8942bool
8943AMDGPUOperand::isSwizzle() const {
8944 return isImmTy(ImmT: ImmTySwizzle);
8945}
8946
8947//===----------------------------------------------------------------------===//
8948// VGPR Index Mode
8949//===----------------------------------------------------------------------===//
8950
8951int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8952
8953 using namespace llvm::AMDGPU::VGPRIndexMode;
8954
8955 if (trySkipToken(Kind: AsmToken::RParen)) {
8956 return OFF;
8957 }
8958
8959 int64_t Imm = 0;
8960
8961 while (true) {
8962 unsigned Mode = 0;
8963 SMLoc S = getLoc();
8964
8965 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8966 if (trySkipId(Id: IdSymbolic[ModeId])) {
8967 Mode = 1 << ModeId;
8968 break;
8969 }
8970 }
8971
8972 if (Mode == 0) {
8973 Error(L: S, Msg: (Imm == 0)?
8974 "expected a VGPR index mode or a closing parenthesis" :
8975 "expected a VGPR index mode");
8976 return UNDEF;
8977 }
8978
8979 if (Imm & Mode) {
8980 Error(L: S, Msg: "duplicate VGPR index mode");
8981 return UNDEF;
8982 }
8983 Imm |= Mode;
8984
8985 if (trySkipToken(Kind: AsmToken::RParen))
8986 break;
8987 if (!skipToken(Kind: AsmToken::Comma,
8988 ErrMsg: "expected a comma or a closing parenthesis"))
8989 return UNDEF;
8990 }
8991
8992 return Imm;
8993}
8994
8995ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8996
8997 using namespace llvm::AMDGPU::VGPRIndexMode;
8998
8999 int64_t Imm = 0;
9000 SMLoc S = getLoc();
9001
9002 if (trySkipId(Id: "gpr_idx", Kind: AsmToken::LParen)) {
9003 Imm = parseGPRIdxMacro();
9004 if (Imm == UNDEF)
9005 return ParseStatus::Failure;
9006 } else {
9007 if (getParser().parseAbsoluteExpression(Res&: Imm))
9008 return ParseStatus::Failure;
9009 if (Imm < 0 || !isUInt<4>(x: Imm))
9010 return Error(L: S, Msg: "invalid immediate: only 4-bit values are legal");
9011 }
9012
9013 Operands.push_back(
9014 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyGprIdxMode));
9015 return ParseStatus::Success;
9016}
9017
9018bool AMDGPUOperand::isGPRIdxMode() const {
9019 return isImmTy(ImmT: ImmTyGprIdxMode);
9020}
9021
9022//===----------------------------------------------------------------------===//
9023// sopp branch targets
9024//===----------------------------------------------------------------------===//
9025
9026ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9027
9028 // Make sure we are not parsing something
9029 // that looks like a label or an expression but is not.
9030 // This will improve error messages.
9031 if (isRegister() || isModifier())
9032 return ParseStatus::NoMatch;
9033
9034 if (!parseExpr(Operands))
9035 return ParseStatus::Failure;
9036
9037 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9038 assert(Opr.isImm() || Opr.isExpr());
9039 SMLoc Loc = Opr.getStartLoc();
9040
9041 // Currently we do not support arbitrary expressions as branch targets.
9042 // Only labels and absolute expressions are accepted.
9043 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9044 Error(L: Loc, Msg: "expected an absolute expression or a label");
9045 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9046 Error(L: Loc, Msg: "expected a 16-bit signed jump offset");
9047 }
9048
9049 return ParseStatus::Success;
9050}
9051
9052//===----------------------------------------------------------------------===//
9053// Boolean holding registers
9054//===----------------------------------------------------------------------===//
9055
9056ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9057 return parseReg(Operands);
9058}
9059
9060//===----------------------------------------------------------------------===//
9061// mubuf
9062//===----------------------------------------------------------------------===//
9063
9064void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9065 const OperandVector &Operands,
9066 bool IsAtomic) {
9067 OptionalImmIndexMap OptionalIdx;
9068 unsigned FirstOperandIdx = 1;
9069 bool IsAtomicReturn = false;
9070
9071 if (IsAtomic) {
9072 IsAtomicReturn = MII.get(Opcode: Inst.getOpcode()).TSFlags &
9073 SIInstrFlags::IsAtomicRet;
9074 }
9075
9076 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9077 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9078
9079 // Add the register arguments
9080 if (Op.isReg()) {
9081 Op.addRegOperands(Inst, N: 1);
9082 // Insert a tied src for atomic return dst.
9083 // This cannot be postponed as subsequent calls to
9084 // addImmOperands rely on correct number of MC operands.
9085 if (IsAtomicReturn && i == FirstOperandIdx)
9086 Op.addRegOperands(Inst, N: 1);
9087 continue;
9088 }
9089
9090 // Handle the case where soffset is an immediate
9091 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9092 Op.addImmOperands(Inst, N: 1);
9093 continue;
9094 }
9095
9096 // Handle tokens like 'offen' which are sometimes hard-coded into the
9097 // asm string. There are no MCInst operands for these.
9098 if (Op.isToken()) {
9099 continue;
9100 }
9101 assert(Op.isImm());
9102
9103 // Handle optional arguments
9104 OptionalIdx[Op.getImmTy()] = i;
9105 }
9106
9107 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOffset);
9108 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyCPol, Default: 0);
9109 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9110 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9111 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
9112}
9113
9114//===----------------------------------------------------------------------===//
9115// smrd
9116//===----------------------------------------------------------------------===//
9117
9118bool AMDGPUOperand::isSMRDOffset8() const {
9119 return isImmLiteral() && isUInt<8>(x: getImm());
9120}
9121
9122bool AMDGPUOperand::isSMEMOffset() const {
9123 // Offset range is checked later by validator.
9124 return isImmLiteral();
9125}
9126
9127bool AMDGPUOperand::isSMRDLiteralOffset() const {
9128 // 32-bit literals are only supported on CI and we only want to use them
9129 // when the offset is > 8-bits.
9130 return isImmLiteral() && !isUInt<8>(x: getImm()) && isUInt<32>(x: getImm());
9131}
9132
9133//===----------------------------------------------------------------------===//
9134// vop3
9135//===----------------------------------------------------------------------===//
9136
9137static bool ConvertOmodMul(int64_t &Mul) {
9138 if (Mul != 1 && Mul != 2 && Mul != 4)
9139 return false;
9140
9141 Mul >>= 1;
9142 return true;
9143}
9144
9145static bool ConvertOmodDiv(int64_t &Div) {
9146 if (Div == 1) {
9147 Div = 0;
9148 return true;
9149 }
9150
9151 if (Div == 2) {
9152 Div = 3;
9153 return true;
9154 }
9155
9156 return false;
9157}
9158
9159// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9160// This is intentional and ensures compatibility with sp3.
9161// See bug 35397 for details.
9162bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9163 if (BoundCtrl == 0 || BoundCtrl == 1) {
9164 if (!isGFX11Plus())
9165 BoundCtrl = 1;
9166 return true;
9167 }
9168 return false;
9169}
9170
9171void AMDGPUAsmParser::onBeginOfFile() {
9172 if (!getParser().getStreamer().getTargetStreamer() ||
9173 getSTI().getTargetTriple().getArch() == Triple::r600)
9174 return;
9175
9176 if (!getTargetStreamer().getTargetID())
9177 getTargetStreamer().initializeTargetID(STI: getSTI(),
9178 FeatureString: getSTI().getFeatureString());
9179
9180 if (isHsaAbi(STI: getSTI()))
9181 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9182}
9183
9184/// Parse AMDGPU specific expressions.
9185///
9186/// expr ::= or(expr, ...) |
9187/// max(expr, ...)
9188///
9189bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9190 using AGVK = AMDGPUMCExpr::VariantKind;
9191
9192 if (isToken(Kind: AsmToken::Identifier)) {
9193 StringRef TokenId = getTokenStr();
9194 AGVK VK = StringSwitch<AGVK>(TokenId)
9195 .Case(S: "max", Value: AGVK::AGVK_Max)
9196 .Case(S: "or", Value: AGVK::AGVK_Or)
9197 .Case(S: "extrasgprs", Value: AGVK::AGVK_ExtraSGPRs)
9198 .Case(S: "totalnumvgprs", Value: AGVK::AGVK_TotalNumVGPRs)
9199 .Case(S: "alignto", Value: AGVK::AGVK_AlignTo)
9200 .Case(S: "occupancy", Value: AGVK::AGVK_Occupancy)
9201 .Default(Value: AGVK::AGVK_None);
9202
9203 if (VK != AGVK::AGVK_None && peekToken().is(K: AsmToken::LParen)) {
9204 SmallVector<const MCExpr *, 4> Exprs;
9205 uint64_t CommaCount = 0;
9206 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9207 lex(); // Eat '('
9208 while (true) {
9209 if (trySkipToken(Kind: AsmToken::RParen)) {
9210 if (Exprs.empty()) {
9211 Error(L: getToken().getLoc(),
9212 Msg: "empty " + Twine(TokenId) + " expression");
9213 return true;
9214 }
9215 if (CommaCount + 1 != Exprs.size()) {
9216 Error(L: getToken().getLoc(),
9217 Msg: "mismatch of commas in " + Twine(TokenId) + " expression");
9218 return true;
9219 }
9220 Res = AMDGPUMCExpr::create(Kind: VK, Args: Exprs, Ctx&: getContext());
9221 return false;
9222 }
9223 const MCExpr *Expr;
9224 if (getParser().parseExpression(Res&: Expr, EndLoc))
9225 return true;
9226 Exprs.push_back(Elt: Expr);
9227 bool LastTokenWasComma = trySkipToken(Kind: AsmToken::Comma);
9228 if (LastTokenWasComma)
9229 CommaCount++;
9230 if (!LastTokenWasComma && !isToken(Kind: AsmToken::RParen)) {
9231 Error(L: getToken().getLoc(),
9232 Msg: "unexpected token in " + Twine(TokenId) + " expression");
9233 return true;
9234 }
9235 }
9236 }
9237 }
9238 return getParser().parsePrimaryExpr(Res, EndLoc, TypeInfo: nullptr);
9239}
9240
9241ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9242 StringRef Name = getTokenStr();
9243 if (Name == "mul") {
9244 return parseIntWithPrefix(Prefix: "mul", Operands,
9245 ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodMul);
9246 }
9247
9248 if (Name == "div") {
9249 return parseIntWithPrefix(Prefix: "div", Operands,
9250 ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodDiv);
9251 }
9252
9253 return ParseStatus::NoMatch;
9254}
9255
9256// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9257// the number of src operands present, then copies that bit into src0_modifiers.
9258static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9259 int Opc = Inst.getOpcode();
9260 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9261 if (OpSelIdx == -1)
9262 return;
9263
9264 int SrcNum;
9265 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9266 AMDGPU::OpName::src2};
9267 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: Ops[SrcNum]);
9268 ++SrcNum)
9269 ;
9270 assert(SrcNum > 0);
9271
9272 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9273
9274 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst);
9275 if (DstIdx == -1)
9276 return;
9277
9278 const MCOperand &DstOp = Inst.getOperand(i: DstIdx);
9279 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0_modifiers);
9280 uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm();
9281 if (DstOp.isReg() &&
9282 MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: DstOp.getReg())) {
9283 if (AMDGPU::isHi16Reg(Reg: DstOp.getReg(), MRI))
9284 ModVal |= SISrcMods::DST_OP_SEL;
9285 } else {
9286 if ((OpSel & (1 << SrcNum)) != 0)
9287 ModVal |= SISrcMods::DST_OP_SEL;
9288 }
9289 Inst.getOperand(i: ModIdx).setImm(ModVal);
9290}
9291
9292void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9293 const OperandVector &Operands) {
9294 cvtVOP3P(Inst, Operands);
9295 cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI());
9296}
9297
9298void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9299 OptionalImmIndexMap &OptionalIdx) {
9300 cvtVOP3P(Inst, Operands, OptionalIdx);
9301 cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI());
9302}
9303
9304static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9305 return
9306 // 1. This operand is input modifiers
9307 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9308 // 2. This is not last operand
9309 && Desc.NumOperands > (OpNum + 1)
9310 // 3. Next operand is register class
9311 && Desc.operands()[OpNum + 1].RegClass != -1
9312 // 4. Next register is not tied to any other operand
9313 && Desc.getOperandConstraint(OpNum: OpNum + 1,
9314 Constraint: MCOI::OperandConstraint::TIED_TO) == -1;
9315}
9316
9317void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9318 unsigned Opc = Inst.getOpcode();
9319 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9320 AMDGPU::OpName::src2};
9321 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9322 AMDGPU::OpName::src1_modifiers,
9323 AMDGPU::OpName::src2_modifiers};
9324 for (int J = 0; J < 3; ++J) {
9325 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: Ops[J]);
9326 if (OpIdx == -1)
9327 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9328 // no src1. So continue instead of break.
9329 continue;
9330
9331 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
9332 uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm();
9333
9334 if ((OpSel & (1 << J)) != 0)
9335 ModVal |= SISrcMods::OP_SEL_0;
9336 // op_sel[3] is encoded in src0_modifiers.
9337 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9338 ModVal |= SISrcMods::DST_OP_SEL;
9339
9340 Inst.getOperand(i: ModIdx).setImm(ModVal);
9341 }
9342}
9343
9344void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9345{
9346 OptionalImmIndexMap OptionalIdx;
9347 unsigned Opc = Inst.getOpcode();
9348
9349 unsigned I = 1;
9350 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9351 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9352 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
9353 }
9354
9355 for (unsigned E = Operands.size(); I != E; ++I) {
9356 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9357 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9358 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9359 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9360 Op.isInterpAttrChan()) {
9361 Inst.addOperand(Op: MCOperand::createImm(Val: Op.getImm()));
9362 } else if (Op.isImmModifier()) {
9363 OptionalIdx[Op.getImmTy()] = I;
9364 } else {
9365 llvm_unreachable("unhandled operand type");
9366 }
9367 }
9368
9369 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::high))
9370 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9371 ImmT: AMDGPUOperand::ImmTyHigh);
9372
9373 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9374 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9375 ImmT: AMDGPUOperand::ImmTyClamp);
9376
9377 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
9378 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9379 ImmT: AMDGPUOperand::ImmTyOModSI);
9380
9381 // Some v_interp instructions use op_sel[3] for dst.
9382 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
9383 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9384 ImmT: AMDGPUOperand::ImmTyOpSel);
9385 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9386 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9387
9388 cvtOpSelHelper(Inst, OpSel);
9389 }
9390}
9391
9392void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9393{
9394 OptionalImmIndexMap OptionalIdx;
9395 unsigned Opc = Inst.getOpcode();
9396
9397 unsigned I = 1;
9398 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9399 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9400 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
9401 }
9402
9403 for (unsigned E = Operands.size(); I != E; ++I) {
9404 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9405 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9406 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9407 } else if (Op.isImmModifier()) {
9408 OptionalIdx[Op.getImmTy()] = I;
9409 } else {
9410 llvm_unreachable("unhandled operand type");
9411 }
9412 }
9413
9414 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyClamp);
9415
9416 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9417 if (OpSelIdx != -1)
9418 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
9419
9420 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyWaitEXP);
9421
9422 if (OpSelIdx == -1)
9423 return;
9424
9425 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9426 cvtOpSelHelper(Inst, OpSel);
9427}
9428
9429void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9430 const OperandVector &Operands) {
9431 OptionalImmIndexMap OptionalIdx;
9432 unsigned Opc = Inst.getOpcode();
9433 unsigned I = 1;
9434 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::cbsz);
9435
9436 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
9437
9438 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9439 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, N: 1);
9440
9441 for (unsigned E = Operands.size(); I != E; ++I) {
9442 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9443 int NumOperands = Inst.getNumOperands();
9444 // The order of operands in MCInst and parsed operands are different.
9445 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9446 // indices for parsing scale values correctly.
9447 if (NumOperands == CbszOpIdx) {
9448 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
9449 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
9450 }
9451 if (isRegOrImmWithInputMods(Desc, OpNum: NumOperands)) {
9452 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9453 } else if (Op.isImmModifier()) {
9454 OptionalIdx[Op.getImmTy()] = I;
9455 } else {
9456 Op.addRegOrImmOperands(Inst, N: 1);
9457 }
9458 }
9459
9460 // Insert CBSZ and BLGP operands for F8F6F4 variants
9461 auto CbszIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyCBSZ);
9462 if (CbszIdx != OptionalIdx.end()) {
9463 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9464 Inst.getOperand(i: CbszOpIdx).setImm(CbszVal);
9465 }
9466
9467 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
9468 auto BlgpIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyBLGP);
9469 if (BlgpIdx != OptionalIdx.end()) {
9470 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9471 Inst.getOperand(i: BlgpOpIdx).setImm(BlgpVal);
9472 }
9473
9474 // Add dummy src_modifiers
9475 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
9476 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
9477
9478 // Handle op_sel fields
9479
9480 unsigned OpSel = 0;
9481 auto OpselIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyOpSel);
9482 if (OpselIdx != OptionalIdx.end()) {
9483 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9484 .getImm();
9485 }
9486
9487 unsigned OpSelHi = 0;
9488 auto OpselHiIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyOpSelHi);
9489 if (OpselHiIdx != OptionalIdx.end()) {
9490 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9491 .getImm();
9492 }
9493 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9494 AMDGPU::OpName::src1_modifiers};
9495
9496 for (unsigned J = 0; J < 2; ++J) {
9497 unsigned ModVal = 0;
9498 if (OpSel & (1 << J))
9499 ModVal |= SISrcMods::OP_SEL_0;
9500 if (OpSelHi & (1 << J))
9501 ModVal |= SISrcMods::OP_SEL_1;
9502
9503 const int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
9504 Inst.getOperand(i: ModIdx).setImm(ModVal);
9505 }
9506}
9507
9508void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9509 OptionalImmIndexMap &OptionalIdx) {
9510 unsigned Opc = Inst.getOpcode();
9511
9512 unsigned I = 1;
9513 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9514 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9515 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
9516 }
9517
9518 for (unsigned E = Operands.size(); I != E; ++I) {
9519 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9520 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9521 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9522 } else if (Op.isImmModifier()) {
9523 OptionalIdx[Op.getImmTy()] = I;
9524 } else {
9525 Op.addRegOrImmOperands(Inst, N: 1);
9526 }
9527 }
9528
9529 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::scale_sel))
9530 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9531 ImmT: AMDGPUOperand::ImmTyScaleSel);
9532
9533 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9534 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9535 ImmT: AMDGPUOperand::ImmTyClamp);
9536
9537 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) {
9538 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in))
9539 Inst.addOperand(Op: Inst.getOperand(i: 0));
9540 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9541 ImmT: AMDGPUOperand::ImmTyByteSel);
9542 }
9543
9544 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
9545 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9546 ImmT: AMDGPUOperand::ImmTyOModSI);
9547
9548 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9549 // it has src2 register operand that is tied to dst operand
9550 // we don't allow modifiers for this operand in assembler so src2_modifiers
9551 // should be 0.
9552 if (isMAC(Opc)) {
9553 auto *it = Inst.begin();
9554 std::advance(i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2_modifiers));
9555 it = Inst.insert(I: it, Op: MCOperand::createImm(Val: 0)); // no modifiers for src2
9556 ++it;
9557 // Copy the operand to ensure it's not invalidated when Inst grows.
9558 Inst.insert(I: it, Op: MCOperand(Inst.getOperand(i: 0))); // src2 = dst
9559 }
9560}
9561
9562void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9563 OptionalImmIndexMap OptionalIdx;
9564 cvtVOP3(Inst, Operands, OptionalIdx);
9565}
9566
9567void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9568 OptionalImmIndexMap &OptIdx) {
9569 const int Opc = Inst.getOpcode();
9570 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
9571
9572 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9573
9574 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9575 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9576 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9577 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9578 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9579 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9580 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9581 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9582 Inst.addOperand(Op: MCOperand::createImm(Val: 0)); // Placeholder for src2_mods
9583 Inst.addOperand(Op: Inst.getOperand(i: 0));
9584 }
9585
9586 // Adding vdst_in operand is already covered for these DPP instructions in
9587 // cvtVOP3DPP.
9588 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in) &&
9589 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx11 ||
9590 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx11 ||
9591 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx11 ||
9592 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx11 ||
9593 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx11 ||
9594 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx11 ||
9595 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx11 ||
9596 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx11 ||
9597 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx11 ||
9598 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx11 ||
9599 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx11 ||
9600 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx11 ||
9601 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9602 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9603 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9604 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9605 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9606 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9607 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9608 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9609 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9610 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9611 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9612 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9613 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9614 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9615 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9616 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9617 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9618 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9619 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9620 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9621 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9622 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9623 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9624 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9625 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9626 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9627 Inst.addOperand(Op: Inst.getOperand(i: 0));
9628 }
9629
9630 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::bitop3);
9631 if (BitOp3Idx != -1) {
9632 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyBitOp3);
9633 }
9634
9635 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9636 // instruction, and then figure out where to actually put the modifiers
9637
9638 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9639 if (OpSelIdx != -1) {
9640 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
9641 }
9642
9643 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
9644 if (OpSelHiIdx != -1) {
9645 int DefaultVal = IsPacked ? -1 : 0;
9646 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSelHi,
9647 Default: DefaultVal);
9648 }
9649
9650 int MatrixAFMTIdx =
9651 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_a_fmt);
9652 if (MatrixAFMTIdx != -1) {
9653 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9654 ImmT: AMDGPUOperand::ImmTyMatrixAFMT, Default: 0);
9655 }
9656
9657 int MatrixBFMTIdx =
9658 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_b_fmt);
9659 if (MatrixBFMTIdx != -1) {
9660 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9661 ImmT: AMDGPUOperand::ImmTyMatrixBFMT, Default: 0);
9662 }
9663
9664 int MatrixAScaleIdx =
9665 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_a_scale);
9666 if (MatrixAScaleIdx != -1) {
9667 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9668 ImmT: AMDGPUOperand::ImmTyMatrixAScale, Default: 0);
9669 }
9670
9671 int MatrixBScaleIdx =
9672 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_b_scale);
9673 if (MatrixBScaleIdx != -1) {
9674 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9675 ImmT: AMDGPUOperand::ImmTyMatrixBScale, Default: 0);
9676 }
9677
9678 int MatrixAScaleFmtIdx =
9679 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_a_scale_fmt);
9680 if (MatrixAScaleFmtIdx != -1) {
9681 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9682 ImmT: AMDGPUOperand::ImmTyMatrixAScaleFmt, Default: 0);
9683 }
9684
9685 int MatrixBScaleFmtIdx =
9686 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_b_scale_fmt);
9687 if (MatrixBScaleFmtIdx != -1) {
9688 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9689 ImmT: AMDGPUOperand::ImmTyMatrixBScaleFmt, Default: 0);
9690 }
9691
9692 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::matrix_a_reuse))
9693 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9694 ImmT: AMDGPUOperand::ImmTyMatrixAReuse, Default: 0);
9695
9696 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::matrix_b_reuse))
9697 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9698 ImmT: AMDGPUOperand::ImmTyMatrixBReuse, Default: 0);
9699
9700 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::neg_lo);
9701 if (NegLoIdx != -1)
9702 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegLo);
9703
9704 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::neg_hi);
9705 if (NegHiIdx != -1)
9706 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegHi);
9707
9708 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9709 AMDGPU::OpName::src2};
9710 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9711 AMDGPU::OpName::src1_modifiers,
9712 AMDGPU::OpName::src2_modifiers};
9713
9714 unsigned OpSel = 0;
9715 unsigned OpSelHi = 0;
9716 unsigned NegLo = 0;
9717 unsigned NegHi = 0;
9718
9719 if (OpSelIdx != -1)
9720 OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9721
9722 if (OpSelHiIdx != -1)
9723 OpSelHi = Inst.getOperand(i: OpSelHiIdx).getImm();
9724
9725 if (NegLoIdx != -1)
9726 NegLo = Inst.getOperand(i: NegLoIdx).getImm();
9727
9728 if (NegHiIdx != -1)
9729 NegHi = Inst.getOperand(i: NegHiIdx).getImm();
9730
9731 for (int J = 0; J < 3; ++J) {
9732 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: Ops[J]);
9733 if (OpIdx == -1)
9734 break;
9735
9736 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
9737
9738 if (ModIdx == -1)
9739 continue;
9740
9741 uint32_t ModVal = 0;
9742
9743 const MCOperand &SrcOp = Inst.getOperand(i: OpIdx);
9744 if (SrcOp.isReg() && getMRI()
9745 ->getRegClass(i: AMDGPU::VGPR_16RegClassID)
9746 .contains(Reg: SrcOp.getReg())) {
9747 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Reg: SrcOp.getReg(), MRI: *getMRI());
9748 if (VGPRSuffixIsHi)
9749 ModVal |= SISrcMods::OP_SEL_0;
9750 } else {
9751 if ((OpSel & (1 << J)) != 0)
9752 ModVal |= SISrcMods::OP_SEL_0;
9753 }
9754
9755 if ((OpSelHi & (1 << J)) != 0)
9756 ModVal |= SISrcMods::OP_SEL_1;
9757
9758 if ((NegLo & (1 << J)) != 0)
9759 ModVal |= SISrcMods::NEG;
9760
9761 if ((NegHi & (1 << J)) != 0)
9762 ModVal |= SISrcMods::NEG_HI;
9763
9764 Inst.getOperand(i: ModIdx).setImm(Inst.getOperand(i: ModIdx).getImm() | ModVal);
9765 }
9766}
9767
9768void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9769 OptionalImmIndexMap OptIdx;
9770 cvtVOP3(Inst, Operands, OptionalIdx&: OptIdx);
9771 cvtVOP3P(Inst, Operands, OptIdx);
9772}
9773
9774static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
9775 unsigned i, unsigned Opc,
9776 AMDGPU::OpName OpName) {
9777 if (AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName) != -1)
9778 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9779 else
9780 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, N: 1);
9781}
9782
9783void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9784 unsigned Opc = Inst.getOpcode();
9785
9786 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, N: 1);
9787 addSrcModifiersAndSrc(Inst, Operands, i: 2, Opc, OpName: AMDGPU::OpName::src0_modifiers);
9788 addSrcModifiersAndSrc(Inst, Operands, i: 3, Opc, OpName: AMDGPU::OpName::src1_modifiers);
9789 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, N: 1); // srcTiedDef
9790 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, N: 1); // src2
9791
9792 OptionalImmIndexMap OptIdx;
9793 for (unsigned i = 5; i < Operands.size(); ++i) {
9794 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9795 OptIdx[Op.getImmTy()] = i;
9796 }
9797
9798 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_8bit))
9799 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9800 ImmT: AMDGPUOperand::ImmTyIndexKey8bit);
9801
9802 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_16bit))
9803 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9804 ImmT: AMDGPUOperand::ImmTyIndexKey16bit);
9805
9806 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_32bit))
9807 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9808 ImmT: AMDGPUOperand::ImmTyIndexKey32bit);
9809
9810 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9811 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyClamp);
9812
9813 cvtVOP3P(Inst, Operands, OptIdx);
9814}
9815
9816//===----------------------------------------------------------------------===//
9817// VOPD
9818//===----------------------------------------------------------------------===//
9819
9820ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9821 if (!hasVOPD(STI: getSTI()))
9822 return ParseStatus::NoMatch;
9823
9824 if (isToken(Kind: AsmToken::Colon) && peekToken(ShouldSkipSpace: false).is(K: AsmToken::Colon)) {
9825 SMLoc S = getLoc();
9826 lex();
9827 lex();
9828 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "::", Loc: S));
9829 SMLoc OpYLoc = getLoc();
9830 StringRef OpYName;
9831 if (isToken(Kind: AsmToken::Identifier) && !Parser.parseIdentifier(Res&: OpYName)) {
9832 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: OpYName, Loc: OpYLoc));
9833 return ParseStatus::Success;
9834 }
9835 return Error(L: OpYLoc, Msg: "expected a VOPDY instruction after ::");
9836 }
9837 return ParseStatus::NoMatch;
9838}
9839
9840// Create VOPD MCInst operands using parsed assembler operands.
9841void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9842 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9843
9844 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9845 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9846 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9847 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9848 return;
9849 }
9850 if (Op.isReg()) {
9851 Op.addRegOperands(Inst, N: 1);
9852 return;
9853 }
9854 if (Op.isImm()) {
9855 Op.addImmOperands(Inst, N: 1);
9856 return;
9857 }
9858 llvm_unreachable("Unhandled operand type in cvtVOPD");
9859 };
9860
9861 const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Inst.getOpcode(), InstrInfo: &MII);
9862
9863 // MCInst operands are ordered as follows:
9864 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9865
9866 for (auto CompIdx : VOPD::COMPONENTS) {
9867 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9868 }
9869
9870 for (auto CompIdx : VOPD::COMPONENTS) {
9871 const auto &CInfo = InstInfo[CompIdx];
9872 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9873 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9874 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9875 if (CInfo.hasSrc2Acc())
9876 addOp(CInfo.getIndexOfDstInParsedOperands());
9877 }
9878
9879 int BitOp3Idx =
9880 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::bitop3);
9881 if (BitOp3Idx != -1) {
9882 OptionalImmIndexMap OptIdx;
9883 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9884 if (Op.isImm())
9885 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9886
9887 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyBitOp3);
9888 }
9889}
9890
9891//===----------------------------------------------------------------------===//
9892// dpp
9893//===----------------------------------------------------------------------===//
9894
9895bool AMDGPUOperand::isDPP8() const {
9896 return isImmTy(ImmT: ImmTyDPP8);
9897}
9898
9899bool AMDGPUOperand::isDPPCtrl() const {
9900 using namespace AMDGPU::DPP;
9901
9902 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(x: getImm());
9903 if (result) {
9904 int64_t Imm = getImm();
9905 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9906 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9907 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9908 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9909 (Imm == DppCtrl::WAVE_SHL1) ||
9910 (Imm == DppCtrl::WAVE_ROL1) ||
9911 (Imm == DppCtrl::WAVE_SHR1) ||
9912 (Imm == DppCtrl::WAVE_ROR1) ||
9913 (Imm == DppCtrl::ROW_MIRROR) ||
9914 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9915 (Imm == DppCtrl::BCAST15) ||
9916 (Imm == DppCtrl::BCAST31) ||
9917 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9918 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9919 }
9920 return false;
9921}
9922
9923//===----------------------------------------------------------------------===//
9924// mAI
9925//===----------------------------------------------------------------------===//
9926
9927bool AMDGPUOperand::isBLGP() const {
9928 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(x: getImm());
9929}
9930
9931bool AMDGPUOperand::isS16Imm() const {
9932 return isImmLiteral() && (isInt<16>(x: getImm()) || isUInt<16>(x: getImm()));
9933}
9934
9935bool AMDGPUOperand::isU16Imm() const {
9936 return isImmLiteral() && isUInt<16>(x: getImm());
9937}
9938
9939//===----------------------------------------------------------------------===//
9940// dim
9941//===----------------------------------------------------------------------===//
9942
9943bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9944 // We want to allow "dim:1D" etc.,
9945 // but the initial 1 is tokenized as an integer.
9946 std::string Token;
9947 if (isToken(Kind: AsmToken::Integer)) {
9948 SMLoc Loc = getToken().getEndLoc();
9949 Token = std::string(getTokenStr());
9950 lex();
9951 if (getLoc() != Loc)
9952 return false;
9953 }
9954
9955 StringRef Suffix;
9956 if (!parseId(Val&: Suffix))
9957 return false;
9958 Token += Suffix;
9959
9960 StringRef DimId = Token;
9961 DimId.consume_front(Prefix: "SQ_RSRC_IMG_");
9962
9963 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(AsmSuffix: DimId);
9964 if (!DimInfo)
9965 return false;
9966
9967 Encoding = DimInfo->Encoding;
9968 return true;
9969}
9970
9971ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9972 if (!isGFX10Plus())
9973 return ParseStatus::NoMatch;
9974
9975 SMLoc S = getLoc();
9976
9977 if (!trySkipId(Id: "dim", Kind: AsmToken::Colon))
9978 return ParseStatus::NoMatch;
9979
9980 unsigned Encoding;
9981 SMLoc Loc = getLoc();
9982 if (!parseDimId(Encoding))
9983 return Error(L: Loc, Msg: "invalid dim value");
9984
9985 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Encoding, Loc: S,
9986 Type: AMDGPUOperand::ImmTyDim));
9987 return ParseStatus::Success;
9988}
9989
9990//===----------------------------------------------------------------------===//
9991// dpp
9992//===----------------------------------------------------------------------===//
9993
9994ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9995 SMLoc S = getLoc();
9996
9997 if (!isGFX10Plus() || !trySkipId(Id: "dpp8", Kind: AsmToken::Colon))
9998 return ParseStatus::NoMatch;
9999
10000 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
10001
10002 int64_t Sels[8];
10003
10004 if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket"))
10005 return ParseStatus::Failure;
10006
10007 for (size_t i = 0; i < 8; ++i) {
10008 if (i > 0 && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
10009 return ParseStatus::Failure;
10010
10011 SMLoc Loc = getLoc();
10012 if (getParser().parseAbsoluteExpression(Res&: Sels[i]))
10013 return ParseStatus::Failure;
10014 if (0 > Sels[i] || 7 < Sels[i])
10015 return Error(L: Loc, Msg: "expected a 3-bit value");
10016 }
10017
10018 if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
10019 return ParseStatus::Failure;
10020
10021 unsigned DPP8 = 0;
10022 for (size_t i = 0; i < 8; ++i)
10023 DPP8 |= (Sels[i] << (i * 3));
10024
10025 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DPP8, Loc: S, Type: AMDGPUOperand::ImmTyDPP8));
10026 return ParseStatus::Success;
10027}
10028
10029bool
10030AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10031 const OperandVector &Operands) {
10032 if (Ctrl == "row_newbcast")
10033 return isGFX90A();
10034
10035 if (Ctrl == "row_share" ||
10036 Ctrl == "row_xmask")
10037 return isGFX10Plus();
10038
10039 if (Ctrl == "wave_shl" ||
10040 Ctrl == "wave_shr" ||
10041 Ctrl == "wave_rol" ||
10042 Ctrl == "wave_ror" ||
10043 Ctrl == "row_bcast")
10044 return isVI() || isGFX9();
10045
10046 return Ctrl == "row_mirror" ||
10047 Ctrl == "row_half_mirror" ||
10048 Ctrl == "quad_perm" ||
10049 Ctrl == "row_shl" ||
10050 Ctrl == "row_shr" ||
10051 Ctrl == "row_ror";
10052}
10053
10054int64_t
10055AMDGPUAsmParser::parseDPPCtrlPerm() {
10056 // quad_perm:[%d,%d,%d,%d]
10057
10058 if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket"))
10059 return -1;
10060
10061 int64_t Val = 0;
10062 for (int i = 0; i < 4; ++i) {
10063 if (i > 0 && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
10064 return -1;
10065
10066 int64_t Temp;
10067 SMLoc Loc = getLoc();
10068 if (getParser().parseAbsoluteExpression(Res&: Temp))
10069 return -1;
10070 if (Temp < 0 || Temp > 3) {
10071 Error(L: Loc, Msg: "expected a 2-bit value");
10072 return -1;
10073 }
10074
10075 Val += (Temp << i * 2);
10076 }
10077
10078 if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
10079 return -1;
10080
10081 return Val;
10082}
10083
10084int64_t
10085AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10086 using namespace AMDGPU::DPP;
10087
10088 // sel:%d
10089
10090 int64_t Val;
10091 SMLoc Loc = getLoc();
10092
10093 if (getParser().parseAbsoluteExpression(Res&: Val))
10094 return -1;
10095
10096 struct DppCtrlCheck {
10097 int64_t Ctrl;
10098 int Lo;
10099 int Hi;
10100 };
10101
10102 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10103 .Case(S: "wave_shl", Value: {.Ctrl: DppCtrl::WAVE_SHL1, .Lo: 1, .Hi: 1})
10104 .Case(S: "wave_rol", Value: {.Ctrl: DppCtrl::WAVE_ROL1, .Lo: 1, .Hi: 1})
10105 .Case(S: "wave_shr", Value: {.Ctrl: DppCtrl::WAVE_SHR1, .Lo: 1, .Hi: 1})
10106 .Case(S: "wave_ror", Value: {.Ctrl: DppCtrl::WAVE_ROR1, .Lo: 1, .Hi: 1})
10107 .Case(S: "row_shl", Value: {.Ctrl: DppCtrl::ROW_SHL0, .Lo: 1, .Hi: 15})
10108 .Case(S: "row_shr", Value: {.Ctrl: DppCtrl::ROW_SHR0, .Lo: 1, .Hi: 15})
10109 .Case(S: "row_ror", Value: {.Ctrl: DppCtrl::ROW_ROR0, .Lo: 1, .Hi: 15})
10110 .Case(S: "row_share", Value: {.Ctrl: DppCtrl::ROW_SHARE_FIRST, .Lo: 0, .Hi: 15})
10111 .Case(S: "row_xmask", Value: {.Ctrl: DppCtrl::ROW_XMASK_FIRST, .Lo: 0, .Hi: 15})
10112 .Case(S: "row_newbcast", Value: {.Ctrl: DppCtrl::ROW_NEWBCAST_FIRST, .Lo: 0, .Hi: 15})
10113 .Default(Value: {.Ctrl: -1, .Lo: 0, .Hi: 0});
10114
10115 bool Valid;
10116 if (Check.Ctrl == -1) {
10117 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10118 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10119 } else {
10120 Valid = Check.Lo <= Val && Val <= Check.Hi;
10121 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10122 }
10123
10124 if (!Valid) {
10125 Error(L: Loc, Msg: Twine("invalid ", Ctrl) + Twine(" value"));
10126 return -1;
10127 }
10128
10129 return Val;
10130}
10131
10132ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10133 using namespace AMDGPU::DPP;
10134
10135 if (!isToken(Kind: AsmToken::Identifier) ||
10136 !isSupportedDPPCtrl(Ctrl: getTokenStr(), Operands))
10137 return ParseStatus::NoMatch;
10138
10139 SMLoc S = getLoc();
10140 int64_t Val = -1;
10141 StringRef Ctrl;
10142
10143 parseId(Val&: Ctrl);
10144
10145 if (Ctrl == "row_mirror") {
10146 Val = DppCtrl::ROW_MIRROR;
10147 } else if (Ctrl == "row_half_mirror") {
10148 Val = DppCtrl::ROW_HALF_MIRROR;
10149 } else {
10150 if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon")) {
10151 if (Ctrl == "quad_perm") {
10152 Val = parseDPPCtrlPerm();
10153 } else {
10154 Val = parseDPPCtrlSel(Ctrl);
10155 }
10156 }
10157 }
10158
10159 if (Val == -1)
10160 return ParseStatus::Failure;
10161
10162 Operands.push_back(
10163 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: AMDGPUOperand::ImmTyDppCtrl));
10164 return ParseStatus::Success;
10165}
10166
10167void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10168 bool IsDPP8) {
10169 OptionalImmIndexMap OptionalIdx;
10170 unsigned Opc = Inst.getOpcode();
10171 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
10172
10173 // MAC instructions are special because they have 'old'
10174 // operand which is not tied to dst (but assumed to be).
10175 // They also have dummy unused src2_modifiers.
10176 int OldIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::old);
10177 int Src2ModIdx =
10178 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2_modifiers);
10179 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10180 Desc.getOperandConstraint(OpNum: OldIdx, Constraint: MCOI::TIED_TO) == -1;
10181
10182 unsigned I = 1;
10183 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10184 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
10185 }
10186
10187 int Fi = 0;
10188 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst_in);
10189 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10190 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10191 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10192 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10193
10194 for (unsigned E = Operands.size(); I != E; ++I) {
10195
10196 if (IsMAC) {
10197 int NumOperands = Inst.getNumOperands();
10198 if (OldIdx == NumOperands) {
10199 // Handle old operand
10200 constexpr int DST_IDX = 0;
10201 Inst.addOperand(Op: Inst.getOperand(i: DST_IDX));
10202 } else if (Src2ModIdx == NumOperands) {
10203 // Add unused dummy src2_modifiers
10204 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
10205 }
10206 }
10207
10208 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10209 Inst.addOperand(Op: Inst.getOperand(i: 0));
10210 }
10211
10212 if (IsVOP3CvtSrDpp) {
10213 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10214 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
10215 Inst.addOperand(Op: MCOperand::createReg(Reg: MCRegister()));
10216 }
10217 }
10218
10219 auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(),
10220 Constraint: MCOI::TIED_TO);
10221 if (TiedTo != -1) {
10222 assert((unsigned)TiedTo < Inst.getNumOperands());
10223 // handle tied old or src2 for MAC instructions
10224 Inst.addOperand(Op: Inst.getOperand(i: TiedTo));
10225 }
10226 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10227 // Add the register arguments
10228 if (IsDPP8 && Op.isDppFI()) {
10229 Fi = Op.getImm();
10230 } else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10231 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
10232 } else if (Op.isReg()) {
10233 Op.addRegOperands(Inst, N: 1);
10234 } else if (Op.isImm() &&
10235 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10236 Op.addImmOperands(Inst, N: 1);
10237 } else if (Op.isImm()) {
10238 OptionalIdx[Op.getImmTy()] = I;
10239 } else {
10240 llvm_unreachable("unhandled operand type");
10241 }
10242 }
10243
10244 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10245 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10246 ImmT: AMDGPUOperand::ImmTyClamp);
10247
10248 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) {
10249 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10250 Inst.addOperand(Op: Inst.getOperand(i: 0));
10251 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10252 ImmT: AMDGPUOperand::ImmTyByteSel);
10253 }
10254
10255 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
10256 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI);
10257
10258 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10259 cvtVOP3P(Inst, Operands, OptIdx&: OptionalIdx);
10260 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10261 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10262 else if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
10263 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
10264 }
10265
10266 if (IsDPP8) {
10267 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDPP8);
10268 using namespace llvm::AMDGPU::DPP;
10269 Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0));
10270 } else {
10271 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppCtrl, Default: 0xe4);
10272 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: 0xf);
10273 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: 0xf);
10274 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl);
10275
10276 if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi))
10277 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10278 ImmT: AMDGPUOperand::ImmTyDppFI);
10279 }
10280}
10281
10282void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10283 OptionalImmIndexMap OptionalIdx;
10284
10285 unsigned I = 1;
10286 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
10287 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10288 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
10289 }
10290
10291 int Fi = 0;
10292 for (unsigned E = Operands.size(); I != E; ++I) {
10293 auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(),
10294 Constraint: MCOI::TIED_TO);
10295 if (TiedTo != -1) {
10296 assert((unsigned)TiedTo < Inst.getNumOperands());
10297 // handle tied old or src2 for MAC instructions
10298 Inst.addOperand(Op: Inst.getOperand(i: TiedTo));
10299 }
10300 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10301 // Add the register arguments
10302 if (Op.isReg() && validateVccOperand(Reg: Op.getReg())) {
10303 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10304 // Skip it.
10305 continue;
10306 }
10307
10308 if (IsDPP8) {
10309 if (Op.isDPP8()) {
10310 Op.addImmOperands(Inst, N: 1);
10311 } else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10312 Op.addRegWithFPInputModsOperands(Inst, N: 2);
10313 } else if (Op.isDppFI()) {
10314 Fi = Op.getImm();
10315 } else if (Op.isReg()) {
10316 Op.addRegOperands(Inst, N: 1);
10317 } else {
10318 llvm_unreachable("Invalid operand type");
10319 }
10320 } else {
10321 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10322 Op.addRegWithFPInputModsOperands(Inst, N: 2);
10323 } else if (Op.isReg()) {
10324 Op.addRegOperands(Inst, N: 1);
10325 } else if (Op.isDPPCtrl()) {
10326 Op.addImmOperands(Inst, N: 1);
10327 } else if (Op.isImm()) {
10328 // Handle optional arguments
10329 OptionalIdx[Op.getImmTy()] = I;
10330 } else {
10331 llvm_unreachable("Invalid operand type");
10332 }
10333 }
10334 }
10335
10336 if (IsDPP8) {
10337 using namespace llvm::AMDGPU::DPP;
10338 Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0));
10339 } else {
10340 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: 0xf);
10341 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: 0xf);
10342 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl);
10343 if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi)) {
10344 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10345 ImmT: AMDGPUOperand::ImmTyDppFI);
10346 }
10347 }
10348}
10349
10350//===----------------------------------------------------------------------===//
10351// sdwa
10352//===----------------------------------------------------------------------===//
10353
10354ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10355 StringRef Prefix,
10356 AMDGPUOperand::ImmTy Type) {
10357 return parseStringOrIntWithPrefix(
10358 Operands, Name: Prefix,
10359 Ids: {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10360 Type);
10361}
10362
10363ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10364 return parseStringOrIntWithPrefix(
10365 Operands, Name: "dst_unused", Ids: {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10366 Type: AMDGPUOperand::ImmTySDWADstUnused);
10367}
10368
10369void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10370 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP1);
10371}
10372
10373void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10374 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2);
10375}
10376
10377void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10378 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: true, SkipSrcVcc: true);
10379}
10380
10381void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10382 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: false, SkipSrcVcc: true);
10383}
10384
10385void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10386 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOPC, SkipDstVcc: isVI());
10387}
10388
10389void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10390 uint64_t BasicInstType,
10391 bool SkipDstVcc,
10392 bool SkipSrcVcc) {
10393 using namespace llvm::AMDGPU::SDWA;
10394
10395 OptionalImmIndexMap OptionalIdx;
10396 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10397 bool SkippedVcc = false;
10398
10399 unsigned I = 1;
10400 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
10401 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10402 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
10403 }
10404
10405 for (unsigned E = Operands.size(); I != E; ++I) {
10406 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10407 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10408 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10409 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10410 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10411 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10412 // Skip VCC only if we didn't skip it on previous iteration.
10413 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10414 if (BasicInstType == SIInstrFlags::VOP2 &&
10415 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10416 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10417 SkippedVcc = true;
10418 continue;
10419 }
10420 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10421 SkippedVcc = true;
10422 continue;
10423 }
10424 }
10425 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10426 Op.addRegOrImmWithInputModsOperands(Inst, N: 2);
10427 } else if (Op.isImm()) {
10428 // Handle optional arguments
10429 OptionalIdx[Op.getImmTy()] = I;
10430 } else {
10431 llvm_unreachable("Invalid operand type");
10432 }
10433 SkippedVcc = false;
10434 }
10435
10436 const unsigned Opc = Inst.getOpcode();
10437 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10438 Opc != AMDGPU::V_NOP_sdwa_vi) {
10439 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10440 switch (BasicInstType) {
10441 case SIInstrFlags::VOP1:
10442 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
10443 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10444 ImmT: AMDGPUOperand::ImmTyClamp, Default: 0);
10445
10446 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
10447 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10448 ImmT: AMDGPUOperand::ImmTyOModSI, Default: 0);
10449
10450 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_sel))
10451 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10452 ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD);
10453
10454 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_unused))
10455 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10456 ImmT: AMDGPUOperand::ImmTySDWADstUnused,
10457 Default: DstUnused::UNUSED_PRESERVE);
10458
10459 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
10460 break;
10461
10462 case SIInstrFlags::VOP2:
10463 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10464 ImmT: AMDGPUOperand::ImmTyClamp, Default: 0);
10465
10466 if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::omod))
10467 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI, Default: 0);
10468
10469 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD);
10470 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstUnused, Default: DstUnused::UNUSED_PRESERVE);
10471 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
10472 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD);
10473 break;
10474
10475 case SIInstrFlags::VOPC:
10476 if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::clamp))
10477 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10478 ImmT: AMDGPUOperand::ImmTyClamp, Default: 0);
10479 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
10480 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD);
10481 break;
10482
10483 default:
10484 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10485 }
10486 }
10487
10488 // special case v_mac_{f16, f32}:
10489 // it has src2 register operand that is tied to dst operand
10490 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10491 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10492 auto *it = Inst.begin();
10493 std::advance(
10494 i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::src2));
10495 Inst.insert(I: it, Op: Inst.getOperand(i: 0)); // src2 = dst
10496 }
10497}
10498
10499/// Force static initialization.
10500extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10501LLVMInitializeAMDGPUAsmParser() {
10502 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
10503 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
10504}
10505
10506#define GET_MATCHER_IMPLEMENTATION
10507#define GET_MNEMONIC_SPELL_CHECKER
10508#define GET_MNEMONIC_CHECKER
10509#include "AMDGPUGenAsmMatcher.inc"
10510
10511ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10512 unsigned MCK) {
10513 switch (MCK) {
10514 case MCK_addr64:
10515 return parseTokenOp(Name: "addr64", Operands);
10516 case MCK_done:
10517 return parseNamedBit(Name: "done", Operands, ImmTy: AMDGPUOperand::ImmTyDone, IgnoreNegative: true);
10518 case MCK_idxen:
10519 return parseTokenOp(Name: "idxen", Operands);
10520 case MCK_lds:
10521 return parseTokenOp(Name: "lds", Operands);
10522 case MCK_offen:
10523 return parseTokenOp(Name: "offen", Operands);
10524 case MCK_off:
10525 return parseTokenOp(Name: "off", Operands);
10526 case MCK_row_95_en:
10527 return parseNamedBit(Name: "row_en", Operands, ImmTy: AMDGPUOperand::ImmTyRowEn, IgnoreNegative: true);
10528 case MCK_gds:
10529 return parseNamedBit(Name: "gds", Operands, ImmTy: AMDGPUOperand::ImmTyGDS);
10530 case MCK_tfe:
10531 return parseNamedBit(Name: "tfe", Operands, ImmTy: AMDGPUOperand::ImmTyTFE);
10532 }
10533 return tryCustomParseOperand(Operands, MCK);
10534}
10535
10536// This function should be defined after auto-generated include so that we have
10537// MatchClassKind enum defined
10538unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10539 unsigned Kind) {
10540 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10541 // But MatchInstructionImpl() expects to meet token and fails to validate
10542 // operand. This method checks if we are given immediate operand but expect to
10543 // get corresponding token.
10544 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10545 switch (Kind) {
10546 case MCK_addr64:
10547 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10548 case MCK_gds:
10549 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10550 case MCK_lds:
10551 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10552 case MCK_idxen:
10553 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10554 case MCK_offen:
10555 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10556 case MCK_tfe:
10557 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10558 case MCK_done:
10559 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10560 case MCK_row_95_en:
10561 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10562 case MCK_SSrc_b32:
10563 // When operands have expression values, they will return true for isToken,
10564 // because it is not possible to distinguish between a token and an
10565 // expression at parse time. MatchInstructionImpl() will always try to
10566 // match an operand as a token, when isToken returns true, and when the
10567 // name of the expression is not a valid token, the match will fail,
10568 // so we need to handle it here.
10569 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10570 case MCK_SSrc_f32:
10571 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10572 case MCK_SOPPBrTarget:
10573 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10574 case MCK_VReg32OrOff:
10575 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10576 case MCK_InterpSlot:
10577 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10578 case MCK_InterpAttr:
10579 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10580 case MCK_InterpAttrChan:
10581 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10582 case MCK_SReg_64:
10583 case MCK_SReg_64_XEXEC:
10584 // Null is defined as a 32-bit register but
10585 // it should also be enabled with 64-bit operands or larger.
10586 // The following code enables it for SReg_64 and larger operands
10587 // used as source and destination. Remaining source
10588 // operands are handled in isInlinableImm.
10589 case MCK_SReg_96:
10590 case MCK_SReg_128:
10591 case MCK_SReg_256:
10592 case MCK_SReg_512:
10593 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10594 default:
10595 return Match_InvalidOperand;
10596 }
10597}
10598
10599//===----------------------------------------------------------------------===//
10600// endpgm
10601//===----------------------------------------------------------------------===//
10602
10603ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10604 SMLoc S = getLoc();
10605 int64_t Imm = 0;
10606
10607 if (!parseExpr(Imm)) {
10608 // The operand is optional, if not present default to 0
10609 Imm = 0;
10610 }
10611
10612 if (!isUInt<16>(x: Imm))
10613 return Error(L: S, Msg: "expected a 16-bit value");
10614
10615 Operands.push_back(
10616 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyEndpgm));
10617 return ParseStatus::Success;
10618}
10619
10620bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmT: ImmTyEndpgm); }
10621
10622//===----------------------------------------------------------------------===//
10623// Split Barrier
10624//===----------------------------------------------------------------------===//
10625
10626bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(type: MVT::i32); }
10627