1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
10#include "MCTargetDesc/AMDGPUInstPrinter.h"
11#include "MCTargetDesc/AMDGPUMCAsmInfo.h"
12#include "MCTargetDesc/AMDGPUMCExpr.h"
13#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
14#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15#include "MCTargetDesc/AMDGPUTargetStreamer.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
18#include "TargetInfo/AMDGPUTargetInfo.h"
19#include "Utils/AMDGPUAsmUtils.h"
20#include "Utils/AMDGPUBaseInfo.h"
21#include "Utils/AMDKernelCodeTUtils.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/SmallBitVector.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
26#include "llvm/BinaryFormat/ELF.h"
27#include "llvm/CodeGenTypes/MachineValueType.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/MC/MCParser/AsmLexer.h"
34#include "llvm/MC/MCParser/MCAsmParser.h"
35#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
36#include "llvm/MC/MCParser/MCTargetAsmParser.h"
37#include "llvm/MC/MCRegisterInfo.h"
38#include "llvm/MC/MCSymbol.h"
39#include "llvm/MC/TargetRegistry.h"
40#include "llvm/Support/AMDGPUMetadata.h"
41#include "llvm/Support/AMDHSAKernelDescriptor.h"
42#include "llvm/Support/Casting.h"
43#include "llvm/Support/Compiler.h"
44#include "llvm/Support/MathExtras.h"
45#include "llvm/TargetParser/TargetParser.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyIsAsync,
130 ImmTyD16,
131 ImmTyClamp,
132 ImmTyOModSI,
133 ImmTySDWADstSel,
134 ImmTySDWASrc0Sel,
135 ImmTySDWASrc1Sel,
136 ImmTySDWADstUnused,
137 ImmTyDMask,
138 ImmTyDim,
139 ImmTyUNorm,
140 ImmTyDA,
141 ImmTyR128A16,
142 ImmTyA16,
143 ImmTyLWE,
144 ImmTyExpTgt,
145 ImmTyExpCompr,
146 ImmTyExpVM,
147 ImmTyDone,
148 ImmTyRowEn,
149 ImmTyFORMAT,
150 ImmTyHwreg,
151 ImmTyOff,
152 ImmTySendMsg,
153 ImmTyWaitEvent,
154 ImmTyInterpSlot,
155 ImmTyInterpAttr,
156 ImmTyInterpAttrChan,
157 ImmTyOpSel,
158 ImmTyOpSelHi,
159 ImmTyNegLo,
160 ImmTyNegHi,
161 ImmTyIndexKey8bit,
162 ImmTyIndexKey16bit,
163 ImmTyIndexKey32bit,
164 ImmTyDPP8,
165 ImmTyDppCtrl,
166 ImmTyDppRowMask,
167 ImmTyDppBankMask,
168 ImmTyDppBoundCtrl,
169 ImmTyDppFI,
170 ImmTySwizzle,
171 ImmTyGprIdxMode,
172 ImmTyHigh,
173 ImmTyBLGP,
174 ImmTyCBSZ,
175 ImmTyABID,
176 ImmTyEndpgm,
177 ImmTyWaitVDST,
178 ImmTyWaitEXP,
179 ImmTyWaitVAVDst,
180 ImmTyWaitVMVSrc,
181 ImmTyBitOp3,
182 ImmTyMatrixAFMT,
183 ImmTyMatrixBFMT,
184 ImmTyMatrixAScale,
185 ImmTyMatrixBScale,
186 ImmTyMatrixAScaleFmt,
187 ImmTyMatrixBScaleFmt,
188 ImmTyMatrixAReuse,
189 ImmTyMatrixBReuse,
190 ImmTyScaleSel,
191 ImmTyByteSel,
192 };
193
194private:
195 struct TokOp {
196 const char *Data;
197 unsigned Length;
198 };
199
200 struct ImmOp {
201 int64_t Val;
202 ImmTy Type;
203 bool IsFPImm;
204 Modifiers Mods;
205 };
206
207 struct RegOp {
208 MCRegister RegNo;
209 Modifiers Mods;
210 };
211
212 union {
213 TokOp Tok;
214 ImmOp Imm;
215 RegOp Reg;
216 const MCExpr *Expr;
217 };
218
219 // The index of the associated MCInst operand.
220 mutable int MCOpIdx = -1;
221
222public:
223 bool isToken() const override { return Kind == Token; }
224
225 bool isSymbolRefExpr() const {
226 return isExpr() && Expr && isa<MCSymbolRefExpr>(Val: Expr);
227 }
228
229 bool isImm() const override {
230 return Kind == Immediate;
231 }
232
233 bool isInlinableImm(MVT type) const;
234 bool isLiteralImm(MVT type) const;
235
236 bool isRegKind() const {
237 return Kind == Register;
238 }
239
240 bool isReg() const override {
241 return isRegKind() && !hasModifiers();
242 }
243
244 bool isRegOrInline(unsigned RCID, MVT type) const {
245 return isRegClass(RCID) || isInlinableImm(type);
246 }
247
248 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
249 return isRegOrInline(RCID, type) || isLiteralImm(type);
250 }
251
252 bool isRegOrImmWithInt16InputMods() const {
253 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
254 }
255
256 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
257 return isRegOrImmWithInputMods(
258 RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::i16);
259 }
260
261 bool isRegOrImmWithInt32InputMods() const {
262 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
263 }
264
265 bool isRegOrInlineImmWithInt16InputMods() const {
266 return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
267 }
268
269 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
270 return isRegOrInline(
271 RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::i16);
272 }
273
274 bool isRegOrInlineImmWithInt32InputMods() const {
275 return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
276 }
277
278 bool isRegOrImmWithInt64InputMods() const {
279 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64);
280 }
281
282 bool isRegOrImmWithFP16InputMods() const {
283 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16);
284 }
285
286 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
287 return isRegOrImmWithInputMods(
288 RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16);
289 }
290
291 bool isRegOrImmWithFP32InputMods() const {
292 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
293 }
294
295 bool isRegOrImmWithFP64InputMods() const {
296 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
297 }
298
299 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
300 return isRegOrInline(
301 RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16);
302 }
303
304 bool isRegOrInlineImmWithFP32InputMods() const {
305 return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
306 }
307
308 bool isRegOrInlineImmWithFP64InputMods() const {
309 return isRegOrInline(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
310 }
311
312 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
313
314 bool isVRegWithFP32InputMods() const {
315 return isVRegWithInputMods(RCID: AMDGPU::VGPR_32RegClassID);
316 }
317
318 bool isVRegWithFP64InputMods() const {
319 return isVRegWithInputMods(RCID: AMDGPU::VReg_64RegClassID);
320 }
321
322 bool isPackedFP16InputMods() const {
323 return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::v2f16);
324 }
325
326 bool isPackedVGPRFP32InputMods() const {
327 return isRegOrImmWithInputMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::v2f32);
328 }
329
330 bool isVReg() const {
331 return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) ||
332 isRegClass(RCID: AMDGPU::VReg_64RegClassID) ||
333 isRegClass(RCID: AMDGPU::VReg_96RegClassID) ||
334 isRegClass(RCID: AMDGPU::VReg_128RegClassID) ||
335 isRegClass(RCID: AMDGPU::VReg_160RegClassID) ||
336 isRegClass(RCID: AMDGPU::VReg_192RegClassID) ||
337 isRegClass(RCID: AMDGPU::VReg_256RegClassID) ||
338 isRegClass(RCID: AMDGPU::VReg_512RegClassID) ||
339 isRegClass(RCID: AMDGPU::VReg_1024RegClassID);
340 }
341
342 bool isVReg32() const {
343 return isRegClass(RCID: AMDGPU::VGPR_32RegClassID);
344 }
345
346 bool isVReg32OrOff() const {
347 return isOff() || isVReg32();
348 }
349
350 bool isNull() const {
351 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
352 }
353
354 bool isAV_LdSt_32_Align2_RegOp() const {
355 return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) ||
356 isRegClass(RCID: AMDGPU::AGPR_32RegClassID);
357 }
358
359 bool isVRegWithInputMods() const;
360 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
361 template <bool IsFake16> bool isT16VRegWithInputMods() const;
362
363 bool isSDWAOperand(MVT type) const;
364 bool isSDWAFP16Operand() const;
365 bool isSDWAFP32Operand() const;
366 bool isSDWAInt16Operand() const;
367 bool isSDWAInt32Operand() const;
368
369 bool isImmTy(ImmTy ImmT) const {
370 return isImm() && Imm.Type == ImmT;
371 }
372
373 template <ImmTy Ty> bool isImmTy() const { return isImmTy(ImmT: Ty); }
374
375 bool isImmLiteral() const { return isImmTy(ImmT: ImmTyNone); }
376
377 bool isImmModifier() const {
378 return isImm() && Imm.Type != ImmTyNone;
379 }
380
381 bool isOModSI() const { return isImmTy(ImmT: ImmTyOModSI); }
382 bool isDim() const { return isImmTy(ImmT: ImmTyDim); }
383 bool isR128A16() const { return isImmTy(ImmT: ImmTyR128A16); }
384 bool isOff() const { return isImmTy(ImmT: ImmTyOff); }
385 bool isExpTgt() const { return isImmTy(ImmT: ImmTyExpTgt); }
386 bool isOffen() const { return isImmTy(ImmT: ImmTyOffen); }
387 bool isIdxen() const { return isImmTy(ImmT: ImmTyIdxen); }
388 bool isAddr64() const { return isImmTy(ImmT: ImmTyAddr64); }
389 bool isSMEMOffsetMod() const { return isImmTy(ImmT: ImmTySMEMOffsetMod); }
390 bool isFlatOffset() const { return isImmTy(ImmT: ImmTyOffset) || isImmTy(ImmT: ImmTyInstOffset); }
391 bool isGDS() const { return isImmTy(ImmT: ImmTyGDS); }
392 bool isLDS() const { return isImmTy(ImmT: ImmTyLDS); }
393 bool isCPol() const { return isImmTy(ImmT: ImmTyCPol); }
394 bool isIndexKey8bit() const { return isImmTy(ImmT: ImmTyIndexKey8bit); }
395 bool isIndexKey16bit() const { return isImmTy(ImmT: ImmTyIndexKey16bit); }
396 bool isIndexKey32bit() const { return isImmTy(ImmT: ImmTyIndexKey32bit); }
397 bool isMatrixAFMT() const { return isImmTy(ImmT: ImmTyMatrixAFMT); }
398 bool isMatrixBFMT() const { return isImmTy(ImmT: ImmTyMatrixBFMT); }
399 bool isMatrixAScale() const { return isImmTy(ImmT: ImmTyMatrixAScale); }
400 bool isMatrixBScale() const { return isImmTy(ImmT: ImmTyMatrixBScale); }
401 bool isMatrixAScaleFmt() const { return isImmTy(ImmT: ImmTyMatrixAScaleFmt); }
402 bool isMatrixBScaleFmt() const { return isImmTy(ImmT: ImmTyMatrixBScaleFmt); }
403 bool isMatrixAReuse() const { return isImmTy(ImmT: ImmTyMatrixAReuse); }
404 bool isMatrixBReuse() const { return isImmTy(ImmT: ImmTyMatrixBReuse); }
405 bool isTFE() const { return isImmTy(ImmT: ImmTyTFE); }
406 bool isFORMAT() const { return isImmTy(ImmT: ImmTyFORMAT) && isUInt<7>(x: getImm()); }
407 bool isDppFI() const { return isImmTy(ImmT: ImmTyDppFI); }
408 bool isSDWADstSel() const { return isImmTy(ImmT: ImmTySDWADstSel); }
409 bool isSDWASrc0Sel() const { return isImmTy(ImmT: ImmTySDWASrc0Sel); }
410 bool isSDWASrc1Sel() const { return isImmTy(ImmT: ImmTySDWASrc1Sel); }
411 bool isSDWADstUnused() const { return isImmTy(ImmT: ImmTySDWADstUnused); }
412 bool isInterpSlot() const { return isImmTy(ImmT: ImmTyInterpSlot); }
413 bool isInterpAttr() const { return isImmTy(ImmT: ImmTyInterpAttr); }
414 bool isInterpAttrChan() const { return isImmTy(ImmT: ImmTyInterpAttrChan); }
415 bool isOpSel() const { return isImmTy(ImmT: ImmTyOpSel); }
416 bool isOpSelHi() const { return isImmTy(ImmT: ImmTyOpSelHi); }
417 bool isNegLo() const { return isImmTy(ImmT: ImmTyNegLo); }
418 bool isNegHi() const { return isImmTy(ImmT: ImmTyNegHi); }
419 bool isBitOp3() const { return isImmTy(ImmT: ImmTyBitOp3) && isUInt<8>(x: getImm()); }
420 bool isDone() const { return isImmTy(ImmT: ImmTyDone); }
421 bool isRowEn() const { return isImmTy(ImmT: ImmTyRowEn); }
422
423 bool isRegOrImm() const {
424 return isReg() || isImm();
425 }
426
427 bool isRegClass(unsigned RCID) const;
428
429 bool isInlineValue() const;
430
431 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
432 return isRegOrInline(RCID, type) && !hasModifiers();
433 }
434
435 bool isSCSrcB16() const {
436 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i16);
437 }
438
439 bool isSCSrcV2B16() const {
440 return isSCSrcB16();
441 }
442
443 bool isSCSrc_b32() const {
444 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i32);
445 }
446
447 bool isSCSrc_b64() const {
448 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::i64);
449 }
450
451 bool isBoolReg() const;
452
453 bool isSCSrcF16() const {
454 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f16);
455 }
456
457 bool isSCSrcV2F16() const {
458 return isSCSrcF16();
459 }
460
461 bool isSCSrcF32() const {
462 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f32);
463 }
464
465 bool isSCSrcF64() const {
466 return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::f64);
467 }
468
469 bool isSSrc_b32() const {
470 return isSCSrc_b32() || isLiteralImm(type: MVT::i32) || isExpr();
471 }
472
473 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(type: MVT::i16); }
474
475 bool isSSrcV2B16() const {
476 llvm_unreachable("cannot happen");
477 return isSSrc_b16();
478 }
479
480 bool isSSrc_b64() const {
481 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
482 // See isVSrc64().
483 return isSCSrc_b64() || isLiteralImm(type: MVT::i64) ||
484 (((const MCTargetAsmParser *)AsmParser)
485 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
486 isExpr());
487 }
488
489 bool isSSrc_f32() const {
490 return isSCSrc_b32() || isLiteralImm(type: MVT::f32) || isExpr();
491 }
492
493 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(type: MVT::f64); }
494
495 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(type: MVT::bf16); }
496
497 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(type: MVT::f16); }
498
499 bool isSSrcV2F16() const {
500 llvm_unreachable("cannot happen");
501 return isSSrc_f16();
502 }
503
504 bool isSSrcV2FP32() const {
505 llvm_unreachable("cannot happen");
506 return isSSrc_f32();
507 }
508
509 bool isSCSrcV2FP32() const {
510 llvm_unreachable("cannot happen");
511 return isSCSrcF32();
512 }
513
514 bool isSSrcV2INT32() const {
515 llvm_unreachable("cannot happen");
516 return isSSrc_b32();
517 }
518
519 bool isSCSrcV2INT32() const {
520 llvm_unreachable("cannot happen");
521 return isSCSrc_b32();
522 }
523
524 bool isSSrcOrLds_b32() const {
525 return isRegOrInlineNoMods(RCID: AMDGPU::SRegOrLds_32RegClassID, type: MVT::i32) ||
526 isLiteralImm(type: MVT::i32) || isExpr();
527 }
528
529 bool isVCSrc_b32() const {
530 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
531 }
532
533 bool isVCSrc_b32_Lo256() const {
534 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo256RegClassID, type: MVT::i32);
535 }
536
537 bool isVCSrc_b64_Lo256() const {
538 return isRegOrInlineNoMods(RCID: AMDGPU::VS_64_Lo256RegClassID, type: MVT::i64);
539 }
540
541 bool isVCSrc_b64() const {
542 return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64);
543 }
544
545 bool isVCSrcT_b16() const {
546 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::i16);
547 }
548
549 bool isVCSrcTB16_Lo128() const {
550 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::i16);
551 }
552
553 bool isVCSrcFake16B16_Lo128() const {
554 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::i16);
555 }
556
557 bool isVCSrc_b16() const {
558 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
559 }
560
561 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
562
563 bool isVCSrc_f32() const {
564 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
565 }
566
567 bool isVCSrc_f64() const {
568 return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
569 }
570
571 bool isVCSrcTBF16() const {
572 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::bf16);
573 }
574
575 bool isVCSrcT_f16() const {
576 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16);
577 }
578
579 bool isVCSrcT_bf16() const {
580 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16);
581 }
582
583 bool isVCSrcTBF16_Lo128() const {
584 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::bf16);
585 }
586
587 bool isVCSrcTF16_Lo128() const {
588 return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::f16);
589 }
590
591 bool isVCSrcFake16BF16_Lo128() const {
592 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::bf16);
593 }
594
595 bool isVCSrcFake16F16_Lo128() const {
596 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::f16);
597 }
598
599 bool isVCSrc_bf16() const {
600 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::bf16);
601 }
602
603 bool isVCSrc_f16() const {
604 return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16);
605 }
606
607 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
608
609 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
610
611 bool isVSrc_b32() const {
612 return isVCSrc_f32() || isLiteralImm(type: MVT::i32) || isExpr();
613 }
614
615 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(type: MVT::i64); }
616
617 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(type: MVT::i16); }
618
619 bool isVSrcT_b16_Lo128() const {
620 return isVCSrcTB16_Lo128() || isLiteralImm(type: MVT::i16);
621 }
622
623 bool isVSrcFake16_b16_Lo128() const {
624 return isVCSrcFake16B16_Lo128() || isLiteralImm(type: MVT::i16);
625 }
626
627 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(type: MVT::i16); }
628
629 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(type: MVT::v2i16); }
630
631 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
632
633 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(type: MVT::v2f32); }
634
635 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
636
637 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(type: MVT::v2i32); }
638
639 bool isVSrc_f32() const {
640 return isVCSrc_f32() || isLiteralImm(type: MVT::f32) || isExpr();
641 }
642
643 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(type: MVT::f64); }
644
645 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(type: MVT::bf16); }
646
647 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(type: MVT::f16); }
648
649 bool isVSrcT_bf16_Lo128() const {
650 return isVCSrcTBF16_Lo128() || isLiteralImm(type: MVT::bf16);
651 }
652
653 bool isVSrcT_f16_Lo128() const {
654 return isVCSrcTF16_Lo128() || isLiteralImm(type: MVT::f16);
655 }
656
657 bool isVSrcFake16_bf16_Lo128() const {
658 return isVCSrcFake16BF16_Lo128() || isLiteralImm(type: MVT::bf16);
659 }
660
661 bool isVSrcFake16_f16_Lo128() const {
662 return isVCSrcFake16F16_Lo128() || isLiteralImm(type: MVT::f16);
663 }
664
665 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(type: MVT::bf16); }
666
667 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(type: MVT::f16); }
668
669 bool isVSrc_v2bf16() const {
670 return isVSrc_bf16() || isLiteralImm(type: MVT::v2bf16);
671 }
672
673 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(type: MVT::v2f16); }
674
675 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
676
677 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
678
679 bool isVISrcB32() const {
680 return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i32);
681 }
682
683 bool isVISrcB16() const {
684 return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i16);
685 }
686
687 bool isVISrcV2B16() const {
688 return isVISrcB16();
689 }
690
691 bool isVISrcF32() const {
692 return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f32);
693 }
694
695 bool isVISrcF16() const {
696 return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f16);
697 }
698
699 bool isVISrcV2F16() const {
700 return isVISrcF16() || isVISrcB32();
701 }
702
703 bool isVISrc_64_bf16() const {
704 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::bf16);
705 }
706
707 bool isVISrc_64_f16() const {
708 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f16);
709 }
710
711 bool isVISrc_64_b32() const {
712 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32);
713 }
714
715 bool isVISrc_64B64() const {
716 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i64);
717 }
718
719 bool isVISrc_64_f64() const {
720 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f64);
721 }
722
723 bool isVISrc_64V2FP32() const {
724 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f32);
725 }
726
727 bool isVISrc_64V2INT32() const {
728 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32);
729 }
730
731 bool isVISrc_256_b32() const {
732 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32);
733 }
734
735 bool isVISrc_256_f32() const {
736 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32);
737 }
738
739 bool isVISrc_256B64() const {
740 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i64);
741 }
742
743 bool isVISrc_256_f64() const {
744 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f64);
745 }
746
747 bool isVISrc_512_f64() const {
748 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f64);
749 }
750
751 bool isVISrc_128B16() const {
752 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i16);
753 }
754
755 bool isVISrc_128V2B16() const {
756 return isVISrc_128B16();
757 }
758
759 bool isVISrc_128_b32() const {
760 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i32);
761 }
762
763 bool isVISrc_128_f32() const {
764 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f32);
765 }
766
767 bool isVISrc_256V2FP32() const {
768 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32);
769 }
770
771 bool isVISrc_256V2INT32() const {
772 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32);
773 }
774
775 bool isVISrc_512_b32() const {
776 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i32);
777 }
778
779 bool isVISrc_512B16() const {
780 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i16);
781 }
782
783 bool isVISrc_512V2B16() const {
784 return isVISrc_512B16();
785 }
786
787 bool isVISrc_512_f32() const {
788 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f32);
789 }
790
791 bool isVISrc_512F16() const {
792 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f16);
793 }
794
795 bool isVISrc_512V2F16() const {
796 return isVISrc_512F16() || isVISrc_512_b32();
797 }
798
799 bool isVISrc_1024_b32() const {
800 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i32);
801 }
802
803 bool isVISrc_1024B16() const {
804 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i16);
805 }
806
807 bool isVISrc_1024V2B16() const {
808 return isVISrc_1024B16();
809 }
810
811 bool isVISrc_1024_f32() const {
812 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f32);
813 }
814
815 bool isVISrc_1024F16() const {
816 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f16);
817 }
818
819 bool isVISrc_1024V2F16() const {
820 return isVISrc_1024F16() || isVISrc_1024_b32();
821 }
822
823 bool isAISrcB32() const {
824 return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i32);
825 }
826
827 bool isAISrcB16() const {
828 return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i16);
829 }
830
831 bool isAISrcV2B16() const {
832 return isAISrcB16();
833 }
834
835 bool isAISrcF32() const {
836 return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f32);
837 }
838
839 bool isAISrcF16() const {
840 return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f16);
841 }
842
843 bool isAISrcV2F16() const {
844 return isAISrcF16() || isAISrcB32();
845 }
846
847 bool isAISrc_64B64() const {
848 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::i64);
849 }
850
851 bool isAISrc_64_f64() const {
852 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::f64);
853 }
854
855 bool isAISrc_128_b32() const {
856 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i32);
857 }
858
859 bool isAISrc_128B16() const {
860 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i16);
861 }
862
863 bool isAISrc_128V2B16() const {
864 return isAISrc_128B16();
865 }
866
867 bool isAISrc_128_f32() const {
868 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f32);
869 }
870
871 bool isAISrc_128F16() const {
872 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f16);
873 }
874
875 bool isAISrc_128V2F16() const {
876 return isAISrc_128F16() || isAISrc_128_b32();
877 }
878
879 bool isVISrc_128_bf16() const {
880 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::bf16);
881 }
882
883 bool isVISrc_128_f16() const {
884 return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f16);
885 }
886
887 bool isVISrc_128V2F16() const {
888 return isVISrc_128_f16() || isVISrc_128_b32();
889 }
890
891 bool isAISrc_256B64() const {
892 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::i64);
893 }
894
895 bool isAISrc_256_f64() const {
896 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::f64);
897 }
898
899 bool isAISrc_512_b32() const {
900 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i32);
901 }
902
903 bool isAISrc_512B16() const {
904 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i16);
905 }
906
907 bool isAISrc_512V2B16() const {
908 return isAISrc_512B16();
909 }
910
911 bool isAISrc_512_f32() const {
912 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f32);
913 }
914
915 bool isAISrc_512F16() const {
916 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f16);
917 }
918
919 bool isAISrc_512V2F16() const {
920 return isAISrc_512F16() || isAISrc_512_b32();
921 }
922
923 bool isAISrc_1024_b32() const {
924 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i32);
925 }
926
927 bool isAISrc_1024B16() const {
928 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i16);
929 }
930
931 bool isAISrc_1024V2B16() const {
932 return isAISrc_1024B16();
933 }
934
935 bool isAISrc_1024_f32() const {
936 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f32);
937 }
938
939 bool isAISrc_1024F16() const {
940 return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f16);
941 }
942
943 bool isAISrc_1024V2F16() const {
944 return isAISrc_1024F16() || isAISrc_1024_b32();
945 }
946
947 bool isKImmFP32() const {
948 return isLiteralImm(type: MVT::f32);
949 }
950
951 bool isKImmFP16() const {
952 return isLiteralImm(type: MVT::f16);
953 }
954
955 bool isKImmFP64() const { return isLiteralImm(type: MVT::f64); }
956
957 bool isMem() const override {
958 return false;
959 }
960
961 bool isExpr() const {
962 return Kind == Expression;
963 }
964
965 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
966
967 bool isSWaitCnt() const;
968 bool isDepCtr() const;
969 bool isSDelayALU() const;
970 bool isHwreg() const;
971 bool isSendMsg() const;
972 bool isWaitEvent() const;
973 bool isSplitBarrier() const;
974 bool isSwizzle() const;
975 bool isSMRDOffset8() const;
976 bool isSMEMOffset() const;
977 bool isSMRDLiteralOffset() const;
978 bool isDPP8() const;
979 bool isDPPCtrl() const;
980 bool isBLGP() const;
981 bool isGPRIdxMode() const;
982 bool isS16Imm() const;
983 bool isU16Imm() const;
984 bool isEndpgm() const;
985
986 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
987 return [this, P]() { return P(*this); };
988 }
989
990 StringRef getToken() const {
991 assert(isToken());
992 return StringRef(Tok.Data, Tok.Length);
993 }
994
995 int64_t getImm() const {
996 assert(isImm());
997 return Imm.Val;
998 }
999
1000 void setImm(int64_t Val) {
1001 assert(isImm());
1002 Imm.Val = Val;
1003 }
1004
1005 ImmTy getImmTy() const {
1006 assert(isImm());
1007 return Imm.Type;
1008 }
1009
1010 MCRegister getReg() const override {
1011 assert(isRegKind());
1012 return Reg.RegNo;
1013 }
1014
1015 SMLoc getStartLoc() const override {
1016 return StartLoc;
1017 }
1018
1019 SMLoc getEndLoc() const override {
1020 return EndLoc;
1021 }
1022
1023 SMRange getLocRange() const {
1024 return SMRange(StartLoc, EndLoc);
1025 }
1026
1027 int getMCOpIdx() const { return MCOpIdx; }
1028
1029 Modifiers getModifiers() const {
1030 assert(isRegKind() || isImmTy(ImmTyNone));
1031 return isRegKind() ? Reg.Mods : Imm.Mods;
1032 }
1033
1034 void setModifiers(Modifiers Mods) {
1035 assert(isRegKind() || isImmTy(ImmTyNone));
1036 if (isRegKind())
1037 Reg.Mods = Mods;
1038 else
1039 Imm.Mods = Mods;
1040 }
1041
1042 bool hasModifiers() const {
1043 return getModifiers().hasModifiers();
1044 }
1045
1046 bool hasFPModifiers() const {
1047 return getModifiers().hasFPModifiers();
1048 }
1049
1050 bool hasIntModifiers() const {
1051 return getModifiers().hasIntModifiers();
1052 }
1053
1054 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1055
1056 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1057
1058 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1059
1060 void addRegOperands(MCInst &Inst, unsigned N) const;
1061
1062 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1063 if (isRegKind())
1064 addRegOperands(Inst, N);
1065 else
1066 addImmOperands(Inst, N);
1067 }
1068
1069 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1070 Modifiers Mods = getModifiers();
1071 Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand()));
1072 if (isRegKind()) {
1073 addRegOperands(Inst, N);
1074 } else {
1075 addImmOperands(Inst, N, ApplyModifiers: false);
1076 }
1077 }
1078
1079 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1080 assert(!hasIntModifiers());
1081 addRegOrImmWithInputModsOperands(Inst, N);
1082 }
1083
1084 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1085 assert(!hasFPModifiers());
1086 addRegOrImmWithInputModsOperands(Inst, N);
1087 }
1088
1089 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1090 Modifiers Mods = getModifiers();
1091 Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand()));
1092 assert(isRegKind());
1093 addRegOperands(Inst, N);
1094 }
1095
1096 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1097 assert(!hasIntModifiers());
1098 addRegWithInputModsOperands(Inst, N);
1099 }
1100
1101 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1102 assert(!hasFPModifiers());
1103 addRegWithInputModsOperands(Inst, N);
1104 }
1105
1106 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1107 // clang-format off
1108 switch (Type) {
1109 case ImmTyNone: OS << "None"; break;
1110 case ImmTyGDS: OS << "GDS"; break;
1111 case ImmTyLDS: OS << "LDS"; break;
1112 case ImmTyOffen: OS << "Offen"; break;
1113 case ImmTyIdxen: OS << "Idxen"; break;
1114 case ImmTyAddr64: OS << "Addr64"; break;
1115 case ImmTyOffset: OS << "Offset"; break;
1116 case ImmTyInstOffset: OS << "InstOffset"; break;
1117 case ImmTyOffset0: OS << "Offset0"; break;
1118 case ImmTyOffset1: OS << "Offset1"; break;
1119 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1120 case ImmTyCPol: OS << "CPol"; break;
1121 case ImmTyIndexKey8bit: OS << "index_key"; break;
1122 case ImmTyIndexKey16bit: OS << "index_key"; break;
1123 case ImmTyIndexKey32bit: OS << "index_key"; break;
1124 case ImmTyTFE: OS << "TFE"; break;
1125 case ImmTyIsAsync: OS << "IsAsync"; break;
1126 case ImmTyD16: OS << "D16"; break;
1127 case ImmTyFORMAT: OS << "FORMAT"; break;
1128 case ImmTyClamp: OS << "Clamp"; break;
1129 case ImmTyOModSI: OS << "OModSI"; break;
1130 case ImmTyDPP8: OS << "DPP8"; break;
1131 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1132 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1133 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1134 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1135 case ImmTyDppFI: OS << "DppFI"; break;
1136 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1137 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1138 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1139 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1140 case ImmTyDMask: OS << "DMask"; break;
1141 case ImmTyDim: OS << "Dim"; break;
1142 case ImmTyUNorm: OS << "UNorm"; break;
1143 case ImmTyDA: OS << "DA"; break;
1144 case ImmTyR128A16: OS << "R128A16"; break;
1145 case ImmTyA16: OS << "A16"; break;
1146 case ImmTyLWE: OS << "LWE"; break;
1147 case ImmTyOff: OS << "Off"; break;
1148 case ImmTyExpTgt: OS << "ExpTgt"; break;
1149 case ImmTyExpCompr: OS << "ExpCompr"; break;
1150 case ImmTyExpVM: OS << "ExpVM"; break;
1151 case ImmTyDone: OS << "Done"; break;
1152 case ImmTyRowEn: OS << "RowEn"; break;
1153 case ImmTyHwreg: OS << "Hwreg"; break;
1154 case ImmTySendMsg: OS << "SendMsg"; break;
1155 case ImmTyWaitEvent: OS << "WaitEvent"; break;
1156 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1157 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1158 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1159 case ImmTyOpSel: OS << "OpSel"; break;
1160 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1161 case ImmTyNegLo: OS << "NegLo"; break;
1162 case ImmTyNegHi: OS << "NegHi"; break;
1163 case ImmTySwizzle: OS << "Swizzle"; break;
1164 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1165 case ImmTyHigh: OS << "High"; break;
1166 case ImmTyBLGP: OS << "BLGP"; break;
1167 case ImmTyCBSZ: OS << "CBSZ"; break;
1168 case ImmTyABID: OS << "ABID"; break;
1169 case ImmTyEndpgm: OS << "Endpgm"; break;
1170 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1171 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1172 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1173 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1174 case ImmTyBitOp3: OS << "BitOp3"; break;
1175 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1176 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1177 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1178 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1179 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1180 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1181 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1182 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1183 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1184 case ImmTyByteSel: OS << "ByteSel" ; break;
1185 }
1186 // clang-format on
1187 }
1188
1189 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1190 switch (Kind) {
1191 case Register:
1192 OS << "<register " << AMDGPUInstPrinter::getRegisterName(Reg: getReg())
1193 << " mods: " << Reg.Mods << '>';
1194 break;
1195 case Immediate:
1196 OS << '<' << getImm();
1197 if (getImmTy() != ImmTyNone) {
1198 OS << " type: "; printImmTy(OS, Type: getImmTy());
1199 }
1200 OS << " mods: " << Imm.Mods << '>';
1201 break;
1202 case Token:
1203 OS << '\'' << getToken() << '\'';
1204 break;
1205 case Expression:
1206 OS << "<expr ";
1207 MAI.printExpr(OS, *Expr);
1208 OS << '>';
1209 break;
1210 }
1211 }
1212
1213 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1214 int64_t Val, SMLoc Loc,
1215 ImmTy Type = ImmTyNone,
1216 bool IsFPImm = false) {
1217 auto Op = std::make_unique<AMDGPUOperand>(args: Immediate, args&: AsmParser);
1218 Op->Imm.Val = Val;
1219 Op->Imm.IsFPImm = IsFPImm;
1220 Op->Imm.Type = Type;
1221 Op->Imm.Mods = Modifiers();
1222 Op->StartLoc = Loc;
1223 Op->EndLoc = Loc;
1224 return Op;
1225 }
1226
1227 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1228 StringRef Str, SMLoc Loc,
1229 bool HasExplicitEncodingSize = true) {
1230 auto Res = std::make_unique<AMDGPUOperand>(args: Token, args&: AsmParser);
1231 Res->Tok.Data = Str.data();
1232 Res->Tok.Length = Str.size();
1233 Res->StartLoc = Loc;
1234 Res->EndLoc = Loc;
1235 return Res;
1236 }
1237
1238 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1239 MCRegister Reg, SMLoc S, SMLoc E) {
1240 auto Op = std::make_unique<AMDGPUOperand>(args: Register, args&: AsmParser);
1241 Op->Reg.RegNo = Reg;
1242 Op->Reg.Mods = Modifiers();
1243 Op->StartLoc = S;
1244 Op->EndLoc = E;
1245 return Op;
1246 }
1247
1248 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1249 const class MCExpr *Expr, SMLoc S) {
1250 auto Op = std::make_unique<AMDGPUOperand>(args: Expression, args&: AsmParser);
1251 Op->Expr = Expr;
1252 Op->StartLoc = S;
1253 Op->EndLoc = S;
1254 return Op;
1255 }
1256};
1257
1258raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1259 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1260 return OS;
1261}
1262
1263//===----------------------------------------------------------------------===//
1264// AsmParser
1265//===----------------------------------------------------------------------===//
1266
1267// TODO: define GET_SUBTARGET_FEATURE_NAME
1268#define GET_REGISTER_MATCHER
1269#include "AMDGPUGenAsmMatcher.inc"
1270#undef GET_REGISTER_MATCHER
1271#undef GET_SUBTARGET_FEATURE_NAME
1272
1273// Holds info related to the current kernel, e.g. count of SGPRs used.
1274// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1275// .amdgpu_hsa_kernel or at EOF.
1276class KernelScopeInfo {
1277 int SgprIndexUnusedMin = -1;
1278 int VgprIndexUnusedMin = -1;
1279 int AgprIndexUnusedMin = -1;
1280 MCContext *Ctx = nullptr;
1281 MCSubtargetInfo const *MSTI = nullptr;
1282
1283 void usesSgprAt(int i) {
1284 if (i >= SgprIndexUnusedMin) {
1285 SgprIndexUnusedMin = ++i;
1286 if (Ctx) {
1287 MCSymbol* const Sym =
1288 Ctx->getOrCreateSymbol(Name: Twine(".kernel.sgpr_count"));
1289 Sym->setVariableValue(MCConstantExpr::create(Value: SgprIndexUnusedMin, Ctx&: *Ctx));
1290 }
1291 }
1292 }
1293
1294 void usesVgprAt(int i) {
1295 if (i >= VgprIndexUnusedMin) {
1296 VgprIndexUnusedMin = ++i;
1297 if (Ctx) {
1298 MCSymbol* const Sym =
1299 Ctx->getOrCreateSymbol(Name: Twine(".kernel.vgpr_count"));
1300 int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin,
1301 ArgNumVGPR: VgprIndexUnusedMin);
1302 Sym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx));
1303 }
1304 }
1305 }
1306
1307 void usesAgprAt(int i) {
1308 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1309 if (!hasMAIInsts(STI: *MSTI))
1310 return;
1311
1312 if (i >= AgprIndexUnusedMin) {
1313 AgprIndexUnusedMin = ++i;
1314 if (Ctx) {
1315 MCSymbol* const Sym =
1316 Ctx->getOrCreateSymbol(Name: Twine(".kernel.agpr_count"));
1317 Sym->setVariableValue(MCConstantExpr::create(Value: AgprIndexUnusedMin, Ctx&: *Ctx));
1318
1319 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1320 MCSymbol* const vSym =
1321 Ctx->getOrCreateSymbol(Name: Twine(".kernel.vgpr_count"));
1322 int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin,
1323 ArgNumVGPR: VgprIndexUnusedMin);
1324 vSym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx));
1325 }
1326 }
1327 }
1328
1329public:
1330 KernelScopeInfo() = default;
1331
1332 void initialize(MCContext &Context) {
1333 Ctx = &Context;
1334 MSTI = Ctx->getSubtargetInfo();
1335
1336 usesSgprAt(i: SgprIndexUnusedMin = -1);
1337 usesVgprAt(i: VgprIndexUnusedMin = -1);
1338 if (hasMAIInsts(STI: *MSTI)) {
1339 usesAgprAt(i: AgprIndexUnusedMin = -1);
1340 }
1341 }
1342
1343 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1344 unsigned RegWidth) {
1345 switch (RegKind) {
1346 case IS_SGPR:
1347 usesSgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1);
1348 break;
1349 case IS_AGPR:
1350 usesAgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1);
1351 break;
1352 case IS_VGPR:
1353 usesVgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1);
1354 break;
1355 default:
1356 break;
1357 }
1358 }
1359};
1360
1361class AMDGPUAsmParser : public MCTargetAsmParser {
1362 MCAsmParser &Parser;
1363
1364 unsigned ForcedEncodingSize = 0;
1365 bool ForcedDPP = false;
1366 bool ForcedSDWA = false;
1367 KernelScopeInfo KernelScope;
1368 const unsigned HwMode;
1369
1370 /// @name Auto-generated Match Functions
1371 /// {
1372
1373#define GET_ASSEMBLER_HEADER
1374#include "AMDGPUGenAsmMatcher.inc"
1375
1376 /// }
1377
1378 /// Get size of register operand
1379 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1380 assert(OpNo < Desc.NumOperands);
1381 int16_t RCID = MII.getOpRegClassID(OpInfo: Desc.operands()[OpNo], HwModeId: HwMode);
1382 return getRegBitWidth(RCID) / 8;
1383 }
1384
1385private:
1386 void createConstantSymbol(StringRef Id, int64_t Val);
1387
1388 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1389 bool OutOfRangeError(SMRange Range);
1390 /// Calculate VGPR/SGPR blocks required for given target, reserved
1391 /// registers, and user-specified NextFreeXGPR values.
1392 ///
1393 /// \param Features [in] Target features, used for bug corrections.
1394 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1395 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1396 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1397 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1398 /// descriptor field, if valid.
1399 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1400 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1401 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1402 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1403 /// \param VGPRBlocks [out] Result VGPR block count.
1404 /// \param SGPRBlocks [out] Result SGPR block count.
1405 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1406 const MCExpr *FlatScrUsed, bool XNACKUsed,
1407 std::optional<bool> EnableWavefrontSize32,
1408 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1409 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1410 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1411 bool ParseDirectiveAMDGCNTarget();
1412 bool ParseDirectiveAMDHSACodeObjectVersion();
1413 bool ParseDirectiveAMDHSAKernel();
1414 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1415 bool ParseDirectiveAMDKernelCodeT();
1416 // TODO: Possibly make subtargetHasRegister const.
1417 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1418 bool ParseDirectiveAMDGPUHsaKernel();
1419
1420 bool ParseDirectiveISAVersion();
1421 bool ParseDirectiveHSAMetadata();
1422 bool ParseDirectivePALMetadataBegin();
1423 bool ParseDirectivePALMetadata();
1424 bool ParseDirectiveAMDGPULDS();
1425
1426 /// Common code to parse out a block of text (typically YAML) between start and
1427 /// end directives.
1428 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1429 const char *AssemblerDirectiveEnd,
1430 std::string &CollectString);
1431
1432 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1433 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1434 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1435 unsigned &RegNum, unsigned &RegWidth,
1436 bool RestoreOnFailure = false);
1437 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1438 unsigned &RegNum, unsigned &RegWidth,
1439 SmallVectorImpl<AsmToken> &Tokens);
1440 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1441 unsigned &RegWidth,
1442 SmallVectorImpl<AsmToken> &Tokens);
1443 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1444 unsigned &RegWidth,
1445 SmallVectorImpl<AsmToken> &Tokens);
1446 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1447 unsigned &RegWidth,
1448 SmallVectorImpl<AsmToken> &Tokens);
1449 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1450 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1451 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1452
1453 bool isRegister();
1454 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1455 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1456 void initializeGprCountSymbol(RegisterKind RegKind);
1457 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1458 unsigned RegWidth);
1459 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1460 bool IsAtomic);
1461
1462public:
1463 enum OperandMode {
1464 OperandMode_Default,
1465 OperandMode_NSA,
1466 };
1467
1468 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1469
1470 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1471 const MCInstrInfo &MII, const MCTargetOptions &Options)
1472 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
1473 HwMode(STI.getHwMode(type: MCSubtargetInfo::HwMode_RegInfo)) {
1474 MCAsmParserExtension::Initialize(Parser);
1475
1476 setAvailableFeatures(ComputeAvailableFeatures(FB: getFeatureBits()));
1477
1478 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
1479 if (ISA.Major >= 6 && isHsaAbi(STI: getSTI())) {
1480 createConstantSymbol(Id: ".amdgcn.gfx_generation_number", Val: ISA.Major);
1481 createConstantSymbol(Id: ".amdgcn.gfx_generation_minor", Val: ISA.Minor);
1482 createConstantSymbol(Id: ".amdgcn.gfx_generation_stepping", Val: ISA.Stepping);
1483 } else {
1484 createConstantSymbol(Id: ".option.machine_version_major", Val: ISA.Major);
1485 createConstantSymbol(Id: ".option.machine_version_minor", Val: ISA.Minor);
1486 createConstantSymbol(Id: ".option.machine_version_stepping", Val: ISA.Stepping);
1487 }
1488 if (ISA.Major >= 6 && isHsaAbi(STI: getSTI())) {
1489 initializeGprCountSymbol(RegKind: IS_VGPR);
1490 initializeGprCountSymbol(RegKind: IS_SGPR);
1491 } else
1492 KernelScope.initialize(Context&: getContext());
1493
1494 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1495 createConstantSymbol(Id: Symbol, Val: Code);
1496
1497 createConstantSymbol(Id: "UC_VERSION_W64_BIT", Val: 0x2000);
1498 createConstantSymbol(Id: "UC_VERSION_W32_BIT", Val: 0x4000);
1499 createConstantSymbol(Id: "UC_VERSION_MDP_BIT", Val: 0x8000);
1500 }
1501
1502 bool hasMIMG_R128() const {
1503 return AMDGPU::hasMIMG_R128(STI: getSTI());
1504 }
1505
1506 bool hasPackedD16() const {
1507 return AMDGPU::hasPackedD16(STI: getSTI());
1508 }
1509
1510 bool hasA16() const { return AMDGPU::hasA16(STI: getSTI()); }
1511
1512 bool hasG16() const { return AMDGPU::hasG16(STI: getSTI()); }
1513
1514 bool hasGDS() const { return AMDGPU::hasGDS(STI: getSTI()); }
1515
1516 bool isSI() const {
1517 return AMDGPU::isSI(STI: getSTI());
1518 }
1519
1520 bool isCI() const {
1521 return AMDGPU::isCI(STI: getSTI());
1522 }
1523
1524 bool isVI() const {
1525 return AMDGPU::isVI(STI: getSTI());
1526 }
1527
1528 bool isGFX9() const {
1529 return AMDGPU::isGFX9(STI: getSTI());
1530 }
1531
1532 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1533 bool isGFX90A() const {
1534 return AMDGPU::isGFX90A(STI: getSTI());
1535 }
1536
1537 bool isGFX940() const {
1538 return AMDGPU::isGFX940(STI: getSTI());
1539 }
1540
1541 bool isGFX9Plus() const {
1542 return AMDGPU::isGFX9Plus(STI: getSTI());
1543 }
1544
1545 bool isGFX10() const {
1546 return AMDGPU::isGFX10(STI: getSTI());
1547 }
1548
1549 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(STI: getSTI()); }
1550
1551 bool isGFX11() const {
1552 return AMDGPU::isGFX11(STI: getSTI());
1553 }
1554
1555 bool isGFX11Plus() const {
1556 return AMDGPU::isGFX11Plus(STI: getSTI());
1557 }
1558
1559 bool isGFX12() const { return AMDGPU::isGFX12(STI: getSTI()); }
1560
1561 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(STI: getSTI()); }
1562
1563 bool isGFX1250() const { return AMDGPU::isGFX1250(STI: getSTI()); }
1564
1565 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(STI: getSTI()); }
1566
1567 bool isGFX13() const { return AMDGPU::isGFX13(STI: getSTI()); }
1568
1569 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(STI: getSTI()); }
1570
1571 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(STI: getSTI()); }
1572
1573 bool isGFX10_BEncoding() const {
1574 return AMDGPU::isGFX10_BEncoding(STI: getSTI());
1575 }
1576
1577 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1578
1579 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1580
1581 bool hasInv2PiInlineImm() const {
1582 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1583 }
1584
1585 bool has64BitLiterals() const {
1586 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1587 }
1588
1589 bool hasFlatOffsets() const {
1590 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1591 }
1592
1593 bool hasTrue16Insts() const {
1594 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1595 }
1596
1597 bool hasArchitectedFlatScratch() const {
1598 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1599 }
1600
1601 bool hasSGPR102_SGPR103() const {
1602 return !isVI() && !isGFX9();
1603 }
1604
1605 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1606
1607 bool hasIntClamp() const {
1608 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1609 }
1610
1611 bool hasPartialNSAEncoding() const {
1612 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1613 }
1614
1615 bool hasGloballyAddressableScratch() const {
1616 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1617 }
1618
1619 unsigned getNSAMaxSize(bool HasSampler = false) const {
1620 return AMDGPU::getNSAMaxSize(STI: getSTI(), HasSampler);
1621 }
1622
1623 unsigned getMaxNumUserSGPRs() const {
1624 return AMDGPU::getMaxNumUserSGPRs(STI: getSTI());
1625 }
1626
1627 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(STI: getSTI()); }
1628
1629 AMDGPUTargetStreamer &getTargetStreamer() {
1630 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1631 return static_cast<AMDGPUTargetStreamer &>(TS);
1632 }
1633
1634 MCContext &getContext() const {
1635 // We need this const_cast because for some reason getContext() is not const
1636 // in MCAsmParser.
1637 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1638 }
1639
1640 const MCRegisterInfo *getMRI() const {
1641 return getContext().getRegisterInfo();
1642 }
1643
1644 const MCInstrInfo *getMII() const {
1645 return &MII;
1646 }
1647
1648 // FIXME: This should not be used. Instead, should use queries derived from
1649 // getAvailableFeatures().
1650 const FeatureBitset &getFeatureBits() const {
1651 return getSTI().getFeatureBits();
1652 }
1653
1654 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1655 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1656 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1657
1658 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1659 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1660 bool isForcedDPP() const { return ForcedDPP; }
1661 bool isForcedSDWA() const { return ForcedSDWA; }
1662 ArrayRef<unsigned> getMatchedVariants() const;
1663 StringRef getMatchedVariantName() const;
1664
1665 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1666 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1667 bool RestoreOnFailure);
1668 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1669 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1670 SMLoc &EndLoc) override;
1671 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1672 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1673 unsigned Kind) override;
1674 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1675 OperandVector &Operands, MCStreamer &Out,
1676 uint64_t &ErrorInfo,
1677 bool MatchingInlineAsm) override;
1678 bool ParseDirective(AsmToken DirectiveID) override;
1679 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1680 OperandMode Mode = OperandMode_Default);
1681 StringRef parseMnemonicSuffix(StringRef Name);
1682 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1683 SMLoc NameLoc, OperandVector &Operands) override;
1684 //bool ProcessInstruction(MCInst &Inst);
1685
1686 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1687
1688 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1689
1690 ParseStatus
1691 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1692 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1693 std::function<bool(int64_t &)> ConvertResult = nullptr);
1694
1695 ParseStatus parseOperandArrayWithPrefix(
1696 const char *Prefix, OperandVector &Operands,
1697 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1698 bool (*ConvertResult)(int64_t &) = nullptr);
1699
1700 ParseStatus
1701 parseNamedBit(StringRef Name, OperandVector &Operands,
1702 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1703 bool IgnoreNegative = false);
1704 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1705 ParseStatus parseCPol(OperandVector &Operands);
1706 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1707 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1708 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1709 SMLoc &StringLoc);
1710 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1711 StringRef Name,
1712 ArrayRef<const char *> Ids,
1713 int64_t &IntVal);
1714 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1715 StringRef Name,
1716 ArrayRef<const char *> Ids,
1717 AMDGPUOperand::ImmTy Type);
1718
1719 bool isModifier();
1720 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1721 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1722 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1723 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1724 bool parseSP3NegModifier();
1725 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1726 LitModifier Lit = LitModifier::None);
1727 ParseStatus parseReg(OperandVector &Operands);
1728 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1729 LitModifier Lit = LitModifier::None);
1730 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1731 bool AllowImm = true);
1732 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1733 bool AllowImm = true);
1734 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1735 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1736 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1737 ParseStatus tryParseIndexKey(OperandVector &Operands,
1738 AMDGPUOperand::ImmTy ImmTy);
1739 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1740 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1741 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1742 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1743 AMDGPUOperand::ImmTy Type);
1744 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1745 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1746 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1747 AMDGPUOperand::ImmTy Type);
1748 ParseStatus parseMatrixAScale(OperandVector &Operands);
1749 ParseStatus parseMatrixBScale(OperandVector &Operands);
1750 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1751 AMDGPUOperand::ImmTy Type);
1752 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1753 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1754
1755 ParseStatus parseDfmtNfmt(int64_t &Format);
1756 ParseStatus parseUfmt(int64_t &Format);
1757 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1758 int64_t &Format);
1759 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1760 int64_t &Format);
1761 ParseStatus parseFORMAT(OperandVector &Operands);
1762 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1763 ParseStatus parseNumericFormat(int64_t &Format);
1764 ParseStatus parseFlatOffset(OperandVector &Operands);
1765 ParseStatus parseR128A16(OperandVector &Operands);
1766 ParseStatus parseBLGP(OperandVector &Operands);
1767 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1768 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1769
1770 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1771
1772 bool parseCnt(int64_t &IntVal);
1773 ParseStatus parseSWaitCnt(OperandVector &Operands);
1774
1775 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1776 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1777 ParseStatus parseDepCtr(OperandVector &Operands);
1778
1779 bool parseDelay(int64_t &Delay);
1780 ParseStatus parseSDelayALU(OperandVector &Operands);
1781
1782 ParseStatus parseHwreg(OperandVector &Operands);
1783
1784private:
1785 struct OperandInfoTy {
1786 SMLoc Loc;
1787 int64_t Val;
1788 bool IsSymbolic = false;
1789 bool IsDefined = false;
1790
1791 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1792 };
1793
1794 struct StructuredOpField : OperandInfoTy {
1795 StringLiteral Id;
1796 StringLiteral Desc;
1797 unsigned Width;
1798 bool IsDefined = false;
1799
1800 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1801 unsigned Width, int64_t Default)
1802 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1803 virtual ~StructuredOpField() = default;
1804
1805 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1806 Parser.Error(L: Loc, Msg: "invalid " + Desc + ": " + Err);
1807 return false;
1808 }
1809
1810 virtual bool validate(AMDGPUAsmParser &Parser) const {
1811 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1812 return Error(Parser, Err: "not supported on this GPU");
1813 if (!isUIntN(N: Width, x: Val))
1814 return Error(Parser, Err: "only " + Twine(Width) + "-bit values are legal");
1815 return true;
1816 }
1817 };
1818
1819 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1820 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1821
1822 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1823 bool validateSendMsg(const OperandInfoTy &Msg,
1824 const OperandInfoTy &Op,
1825 const OperandInfoTy &Stream);
1826
1827 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1828 OperandInfoTy &Width);
1829
1830 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1831
1832 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1833 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1834 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1835
1836 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1837 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1838 const OperandVector &Operands) const;
1839 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1840 const OperandVector &Operands) const;
1841 SMLoc getInstLoc(const OperandVector &Operands) const;
1842
1843 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1844 const OperandVector &Operands);
1845 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1846 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1847 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1848 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1849 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1850 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1851 bool AsVOPD3);
1852 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1853 bool tryVOPD(const MCInst &Inst);
1854 bool tryVOPD3(const MCInst &Inst);
1855 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1856
1857 bool validateIntClampSupported(const MCInst &Inst);
1858 bool validateMIMGAtomicDMask(const MCInst &Inst);
1859 bool validateMIMGGatherDMask(const MCInst &Inst);
1860 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1861 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1862 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1863 bool validateMIMGD16(const MCInst &Inst);
1864 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1865 bool validateTensorR128(const MCInst &Inst);
1866 bool validateMIMGMSAA(const MCInst &Inst);
1867 bool validateOpSel(const MCInst &Inst);
1868 bool validateTrue16OpSel(const MCInst &Inst);
1869 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1870 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1871 bool validateVccOperand(MCRegister Reg) const;
1872 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1873 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1874 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1875 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1876 bool validateAGPRLdSt(const MCInst &Inst) const;
1877 bool validateVGPRAlign(const MCInst &Inst) const;
1878 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1879 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1880 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1881 bool validateDivScale(const MCInst &Inst);
1882 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1883 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1884 SMLoc IDLoc);
1885 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1886 const unsigned CPol);
1887 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1888 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1889 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1890 unsigned getConstantBusLimit(unsigned Opcode) const;
1891 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1892 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1893 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1894
1895 bool isSupportedMnemo(StringRef Mnemo,
1896 const FeatureBitset &FBS);
1897 bool isSupportedMnemo(StringRef Mnemo,
1898 const FeatureBitset &FBS,
1899 ArrayRef<unsigned> Variants);
1900 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1901
1902 bool isId(const StringRef Id) const;
1903 bool isId(const AsmToken &Token, const StringRef Id) const;
1904 bool isToken(const AsmToken::TokenKind Kind) const;
1905 StringRef getId() const;
1906 bool trySkipId(const StringRef Id);
1907 bool trySkipId(const StringRef Pref, const StringRef Id);
1908 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1909 bool trySkipToken(const AsmToken::TokenKind Kind);
1910 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1911 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1912 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1913
1914 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1915 AsmToken::TokenKind getTokenKind() const;
1916 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1917 bool parseExpr(OperandVector &Operands);
1918 StringRef getTokenStr() const;
1919 AsmToken peekToken(bool ShouldSkipSpace = true);
1920 AsmToken getToken() const;
1921 SMLoc getLoc() const;
1922 void lex();
1923
1924public:
1925 void onBeginOfFile() override;
1926 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1927
1928 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1929
1930 ParseStatus parseExpTgt(OperandVector &Operands);
1931 ParseStatus parseSendMsg(OperandVector &Operands);
1932 ParseStatus parseWaitEvent(OperandVector &Operands);
1933 ParseStatus parseInterpSlot(OperandVector &Operands);
1934 ParseStatus parseInterpAttr(OperandVector &Operands);
1935 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1936 ParseStatus parseBoolReg(OperandVector &Operands);
1937
1938 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1939 const unsigned MaxVal, const Twine &ErrMsg,
1940 SMLoc &Loc);
1941 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1942 const unsigned MinVal,
1943 const unsigned MaxVal,
1944 const StringRef ErrMsg);
1945 ParseStatus parseSwizzle(OperandVector &Operands);
1946 bool parseSwizzleOffset(int64_t &Imm);
1947 bool parseSwizzleMacro(int64_t &Imm);
1948 bool parseSwizzleQuadPerm(int64_t &Imm);
1949 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1950 bool parseSwizzleBroadcast(int64_t &Imm);
1951 bool parseSwizzleSwap(int64_t &Imm);
1952 bool parseSwizzleReverse(int64_t &Imm);
1953 bool parseSwizzleFFT(int64_t &Imm);
1954 bool parseSwizzleRotate(int64_t &Imm);
1955
1956 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1957 int64_t parseGPRIdxMacro();
1958
1959 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: false); }
1960 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: true); }
1961
1962 ParseStatus parseOModSI(OperandVector &Operands);
1963
1964 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1965 OptionalImmIndexMap &OptionalIdx);
1966 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1967 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1968 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1969 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1970 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1971
1972 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1973 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1974 OptionalImmIndexMap &OptionalIdx);
1975 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1976 OptionalImmIndexMap &OptionalIdx);
1977
1978 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1979 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1980 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1981
1982 bool parseDimId(unsigned &Encoding);
1983 ParseStatus parseDim(OperandVector &Operands);
1984 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1985 ParseStatus parseDPP8(OperandVector &Operands);
1986 ParseStatus parseDPPCtrl(OperandVector &Operands);
1987 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1988 int64_t parseDPPCtrlSel(StringRef Ctrl);
1989 int64_t parseDPPCtrlPerm();
1990 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1991 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1992 cvtDPP(Inst, Operands, IsDPP8: true);
1993 }
1994 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1995 bool IsDPP8 = false);
1996 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1997 cvtVOP3DPP(Inst, Operands, IsDPP8: true);
1998 }
1999
2000 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2001 AMDGPUOperand::ImmTy Type);
2002 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2003 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2004 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2005 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2006 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2007 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2008 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2009 uint64_t BasicInstType,
2010 bool SkipDstVcc = false,
2011 bool SkipSrcVcc = false);
2012
2013 ParseStatus parseEndpgm(OperandVector &Operands);
2014
2015 ParseStatus parseVOPD(OperandVector &Operands);
2016};
2017
2018} // end anonymous namespace
2019
2020// May be called with integer type with equivalent bitwidth.
2021static const fltSemantics *getFltSemantics(unsigned Size) {
2022 switch (Size) {
2023 case 4:
2024 return &APFloat::IEEEsingle();
2025 case 8:
2026 return &APFloat::IEEEdouble();
2027 case 2:
2028 return &APFloat::IEEEhalf();
2029 default:
2030 llvm_unreachable("unsupported fp type");
2031 }
2032}
2033
2034static const fltSemantics *getFltSemantics(MVT VT) {
2035 return getFltSemantics(Size: VT.getSizeInBits() / 8);
2036}
2037
2038static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
2039 switch (OperandType) {
2040 // When floating-point immediate is used as operand of type i16, the 32-bit
2041 // representation of the constant truncated to the 16 LSBs should be used.
2042 case AMDGPU::OPERAND_REG_IMM_INT16:
2043 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2044 case AMDGPU::OPERAND_REG_IMM_INT32:
2045 case AMDGPU::OPERAND_REG_IMM_FP32:
2046 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2047 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2048 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2049 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2050 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2051 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2052 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2053 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2054 case AMDGPU::OPERAND_KIMM32:
2055 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2056 return &APFloat::IEEEsingle();
2057 case AMDGPU::OPERAND_REG_IMM_INT64:
2058 case AMDGPU::OPERAND_REG_IMM_FP64:
2059 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2060 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2061 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2062 case AMDGPU::OPERAND_KIMM64:
2063 return &APFloat::IEEEdouble();
2064 case AMDGPU::OPERAND_REG_IMM_FP16:
2065 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2066 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2067 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2068 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2069 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2070 case AMDGPU::OPERAND_KIMM16:
2071 return &APFloat::IEEEhalf();
2072 case AMDGPU::OPERAND_REG_IMM_BF16:
2073 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2074 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2075 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2076 return &APFloat::BFloat();
2077 default:
2078 llvm_unreachable("unsupported fp type");
2079 }
2080}
2081
2082//===----------------------------------------------------------------------===//
2083// Operand
2084//===----------------------------------------------------------------------===//
2085
2086static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2087 bool Lost;
2088
2089 // Convert literal to single precision
2090 APFloat::opStatus Status = FPLiteral.convert(ToSemantics: *getFltSemantics(VT),
2091 RM: APFloat::rmNearestTiesToEven,
2092 losesInfo: &Lost);
2093 // We allow precision lost but not overflow or underflow
2094 if (Status != APFloat::opOK &&
2095 Lost &&
2096 ((Status & APFloat::opOverflow) != 0 ||
2097 (Status & APFloat::opUnderflow) != 0)) {
2098 return false;
2099 }
2100
2101 return true;
2102}
2103
2104static bool isSafeTruncation(int64_t Val, unsigned Size) {
2105 return isUIntN(N: Size, x: Val) || isIntN(N: Size, x: Val);
2106}
2107
2108static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2109 if (VT.getScalarType() == MVT::i16)
2110 return isInlinableLiteral32(Literal: Val, HasInv2Pi);
2111
2112 if (VT.getScalarType() == MVT::f16)
2113 return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi);
2114
2115 assert(VT.getScalarType() == MVT::bf16);
2116
2117 return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi);
2118}
2119
2120bool AMDGPUOperand::isInlinableImm(MVT type) const {
2121
2122 // This is a hack to enable named inline values like
2123 // shared_base with both 32-bit and 64-bit operands.
2124 // Note that these values are defined as
2125 // 32-bit operands only.
2126 if (isInlineValue()) {
2127 return true;
2128 }
2129
2130 if (!isImmTy(ImmT: ImmTyNone)) {
2131 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2132 return false;
2133 }
2134
2135 if (getModifiers().Lit != LitModifier::None)
2136 return false;
2137
2138 // TODO: We should avoid using host float here. It would be better to
2139 // check the float bit values which is what a few other places do.
2140 // We've had bot failures before due to weird NaN support on mips hosts.
2141
2142 APInt Literal(64, Imm.Val);
2143
2144 if (Imm.IsFPImm) { // We got fp literal token
2145 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2146 return AMDGPU::isInlinableLiteral64(Literal: Imm.Val,
2147 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2148 }
2149
2150 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2151 if (!canLosslesslyConvertToFPType(FPLiteral, VT: type))
2152 return false;
2153
2154 if (type.getScalarSizeInBits() == 16) {
2155 bool Lost = false;
2156 switch (type.getScalarType().SimpleTy) {
2157 default:
2158 llvm_unreachable("unknown 16-bit type");
2159 case MVT::bf16:
2160 FPLiteral.convert(ToSemantics: APFloatBase::BFloat(), RM: APFloat::rmNearestTiesToEven,
2161 losesInfo: &Lost);
2162 break;
2163 case MVT::f16:
2164 FPLiteral.convert(ToSemantics: APFloatBase::IEEEhalf(), RM: APFloat::rmNearestTiesToEven,
2165 losesInfo: &Lost);
2166 break;
2167 case MVT::i16:
2168 FPLiteral.convert(ToSemantics: APFloatBase::IEEEsingle(),
2169 RM: APFloat::rmNearestTiesToEven, losesInfo: &Lost);
2170 break;
2171 }
2172 // We need to use 32-bit representation here because when a floating-point
2173 // inline constant is used as an i16 operand, its 32-bit representation
2174 // representation will be used. We will need the 32-bit value to check if
2175 // it is FP inline constant.
2176 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2177 return isInlineableLiteralOp16(Val: ImmVal, VT: type,
2178 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2179 }
2180
2181 // Check if single precision literal is inlinable
2182 return AMDGPU::isInlinableLiteral32(
2183 Literal: static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2184 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2185 }
2186
2187 // We got int literal token.
2188 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2189 return AMDGPU::isInlinableLiteral64(Literal: Imm.Val,
2190 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2191 }
2192
2193 if (!isSafeTruncation(Val: Imm.Val, Size: type.getScalarSizeInBits())) {
2194 return false;
2195 }
2196
2197 if (type.getScalarSizeInBits() == 16) {
2198 return isInlineableLiteralOp16(
2199 Val: static_cast<int16_t>(Literal.getLoBits(numBits: 16).getSExtValue()),
2200 VT: type, HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2201 }
2202
2203 return AMDGPU::isInlinableLiteral32(
2204 Literal: static_cast<int32_t>(Literal.getLoBits(numBits: 32).getZExtValue()),
2205 HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2206}
2207
2208bool AMDGPUOperand::isLiteralImm(MVT type) const {
2209 // Check that this immediate can be added as literal
2210 if (!isImmTy(ImmT: ImmTyNone)) {
2211 return false;
2212 }
2213
2214 bool Allow64Bit =
2215 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2216
2217 if (!Imm.IsFPImm) {
2218 // We got int literal token.
2219
2220 if (type == MVT::f64 && hasFPModifiers()) {
2221 // Cannot apply fp modifiers to int literals preserving the same semantics
2222 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2223 // disable these cases.
2224 return false;
2225 }
2226
2227 unsigned Size = type.getSizeInBits();
2228 if (Size == 64) {
2229 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Val: Imm.Val, IsFP64: false))
2230 return true;
2231 Size = 32;
2232 }
2233
2234 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2235 // types.
2236 return isSafeTruncation(Val: Imm.Val, Size);
2237 }
2238
2239 // We got fp literal token
2240 if (type == MVT::f64) { // Expected 64-bit fp operand
2241 // We would set low 64-bits of literal to zeroes but we accept this literals
2242 return true;
2243 }
2244
2245 if (type == MVT::i64) { // Expected 64-bit int operand
2246 // We don't allow fp literals in 64-bit integer instructions. It is
2247 // unclear how we should encode them.
2248 return false;
2249 }
2250
2251 // We allow fp literals with f16x2 operands assuming that the specified
2252 // literal goes into the lower half and the upper half is zero. We also
2253 // require that the literal may be losslessly converted to f16.
2254 //
2255 // For i16x2 operands, we assume that the specified literal is encoded as a
2256 // single-precision float. This is pretty odd, but it matches SP3 and what
2257 // happens in hardware.
2258 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2259 : (type == MVT::v2i16) ? MVT::f32
2260 : (type == MVT::v2f32) ? MVT::f32
2261 : type;
2262
2263 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2264 return canLosslesslyConvertToFPType(FPLiteral, VT: ExpectedType);
2265}
2266
2267bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2268 return isRegKind() && AsmParser->getMRI()->getRegClass(i: RCID).contains(Reg: getReg());
2269}
2270
2271bool AMDGPUOperand::isVRegWithInputMods() const {
2272 return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) ||
2273 // GFX90A allows DPP on 64-bit operands.
2274 (isRegClass(RCID: AMDGPU::VReg_64RegClassID) &&
2275 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2276}
2277
2278template <bool IsFake16>
2279bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2280 return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2281 : AMDGPU::VGPR_16_Lo128RegClassID);
2282}
2283
2284template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2285 return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32RegClassID
2286 : AMDGPU::VGPR_16RegClassID);
2287}
2288
2289bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2290 if (AsmParser->isVI())
2291 return isVReg32();
2292 if (AsmParser->isGFX9Plus())
2293 return isRegClass(RCID: AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2294 return false;
2295}
2296
2297bool AMDGPUOperand::isSDWAFP16Operand() const {
2298 return isSDWAOperand(type: MVT::f16);
2299}
2300
2301bool AMDGPUOperand::isSDWAFP32Operand() const {
2302 return isSDWAOperand(type: MVT::f32);
2303}
2304
2305bool AMDGPUOperand::isSDWAInt16Operand() const {
2306 return isSDWAOperand(type: MVT::i16);
2307}
2308
2309bool AMDGPUOperand::isSDWAInt32Operand() const {
2310 return isSDWAOperand(type: MVT::i32);
2311}
2312
2313bool AMDGPUOperand::isBoolReg() const {
2314 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2315 (AsmParser->isWave32() && isSCSrc_b32()));
2316}
2317
2318uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2319{
2320 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2321 assert(Size == 2 || Size == 4 || Size == 8);
2322
2323 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2324
2325 if (Imm.Mods.Abs) {
2326 Val &= ~FpSignMask;
2327 }
2328 if (Imm.Mods.Neg) {
2329 Val ^= FpSignMask;
2330 }
2331
2332 return Val;
2333}
2334
2335void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2336 MCOpIdx = Inst.getNumOperands();
2337
2338 if (isExpr()) {
2339 Inst.addOperand(Op: MCOperand::createExpr(Val: Expr));
2340 return;
2341 }
2342
2343 if (AMDGPU::isSISrcOperand(Desc: AsmParser->getMII()->get(Opcode: Inst.getOpcode()),
2344 OpNo: Inst.getNumOperands())) {
2345 addLiteralImmOperand(Inst, Val: Imm.Val,
2346 ApplyModifiers: ApplyModifiers &
2347 isImmTy(ImmT: ImmTyNone) && Imm.Mods.hasFPModifiers());
2348 } else {
2349 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2350 Inst.addOperand(Op: MCOperand::createImm(Val: Imm.Val));
2351 }
2352}
2353
2354void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2355 const auto& InstDesc = AsmParser->getMII()->get(Opcode: Inst.getOpcode());
2356 auto OpNum = Inst.getNumOperands();
2357 // Check that this operand accepts literals
2358 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2359
2360 if (ApplyModifiers) {
2361 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2362 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(Desc: InstDesc, OpNo: OpNum);
2363 Val = applyInputFPModifiers(Val, Size);
2364 }
2365
2366 APInt Literal(64, Val);
2367 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2368
2369 bool CanUse64BitLiterals =
2370 AsmParser->has64BitLiterals() &&
2371 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2372 LitModifier Lit = getModifiers().Lit;
2373 MCContext &Ctx = AsmParser->getContext();
2374
2375 if (Imm.IsFPImm) { // We got fp literal token
2376 switch (OpTy) {
2377 case AMDGPU::OPERAND_REG_IMM_INT64:
2378 case AMDGPU::OPERAND_REG_IMM_FP64:
2379 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2380 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2381 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2382 if (Lit == LitModifier::None &&
2383 AMDGPU::isInlinableLiteral64(Literal: Literal.getZExtValue(),
2384 HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2385 Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getZExtValue()));
2386 return;
2387 }
2388
2389 // Non-inlineable
2390 if (AMDGPU::isSISrcFPOperand(Desc: InstDesc,
2391 OpNo: OpNum)) { // Expected 64-bit fp operand
2392 bool HasMandatoryLiteral =
2393 AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::imm);
2394 // For fp operands we check if low 32 bits are zeros
2395 if (Literal.getLoBits(numBits: 32) != 0 &&
2396 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2397 !HasMandatoryLiteral) {
2398 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2399 L: Inst.getLoc(),
2400 Msg: "Can't encode literal as exact 64-bit floating-point operand. "
2401 "Low 32-bits will be set to zero");
2402 Val &= 0xffffffff00000000u;
2403 }
2404
2405 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2406 OpTy == AMDGPU::OPERAND_REG_INLINE_C_FP64 ||
2407 OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64)) {
2408 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2409 (isInt<32>(x: Val) || isUInt<32>(x: Val))) {
2410 // The floating-point operand will be verbalized as an
2411 // integer one. If that integer happens to fit 32 bits, on
2412 // re-assembling it will be intepreted as the high half of
2413 // the actual value, so we have to wrap it into lit64().
2414 Lit = LitModifier::Lit64;
2415 } else if (Lit == LitModifier::Lit) {
2416 // For FP64 operands lit() specifies the high half of the value.
2417 Val = Hi_32(Value: Val);
2418 }
2419 }
2420 break;
2421 }
2422
2423 // We don't allow fp literals in 64-bit integer instructions. It is
2424 // unclear how we should encode them. This case should be checked earlier
2425 // in predicate methods (isLiteralImm())
2426 llvm_unreachable("fp literal in 64-bit integer instruction.");
2427
2428 case AMDGPU::OPERAND_KIMM64:
2429 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2430 (isInt<32>(x: Val) || isUInt<32>(x: Val)))
2431 Lit = LitModifier::Lit64;
2432 break;
2433
2434 case AMDGPU::OPERAND_REG_IMM_BF16:
2435 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2436 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2437 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2438 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2439 Literal == 0x3fc45f306725feed) {
2440 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2441 // loss of precision. The constant represents ideomatic fp32 value of
2442 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2443 // bits. Prevent rounding below.
2444 Inst.addOperand(Op: MCOperand::createImm(Val: 0x3e22));
2445 return;
2446 }
2447 [[fallthrough]];
2448
2449 case AMDGPU::OPERAND_REG_IMM_INT32:
2450 case AMDGPU::OPERAND_REG_IMM_FP32:
2451 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2452 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2453 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2454 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2455 case AMDGPU::OPERAND_REG_IMM_INT16:
2456 case AMDGPU::OPERAND_REG_IMM_FP16:
2457 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2458 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2459 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2460 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2461 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2462 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2463 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2464 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2465 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2466 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2467 case AMDGPU::OPERAND_KIMM32:
2468 case AMDGPU::OPERAND_KIMM16:
2469 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2470 bool lost;
2471 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2472 // Convert literal to single precision
2473 FPLiteral.convert(ToSemantics: *getOpFltSemantics(OperandType: OpTy),
2474 RM: APFloat::rmNearestTiesToEven, losesInfo: &lost);
2475 // We allow precision lost but not overflow or underflow. This should be
2476 // checked earlier in isLiteralImm()
2477
2478 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2479 break;
2480 }
2481 default:
2482 llvm_unreachable("invalid operand size");
2483 }
2484
2485 if (Lit != LitModifier::None) {
2486 Inst.addOperand(
2487 Op: MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(Lit, Value: Val, Ctx)));
2488 } else {
2489 Inst.addOperand(Op: MCOperand::createImm(Val));
2490 }
2491 return;
2492 }
2493
2494 // We got int literal token.
2495 // Only sign extend inline immediates.
2496 switch (OpTy) {
2497 case AMDGPU::OPERAND_REG_IMM_INT32:
2498 case AMDGPU::OPERAND_REG_IMM_FP32:
2499 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2500 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2501 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2502 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2503 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2504 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2505 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2506 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2507 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2508 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2509 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2510 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2511 break;
2512
2513 case AMDGPU::OPERAND_REG_IMM_INT64:
2514 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2515 if (Lit == LitModifier::None &&
2516 AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2517 Inst.addOperand(Op: MCOperand::createImm(Val));
2518 return;
2519 }
2520
2521 // When the 32 MSBs are not zero (effectively means it can't be safely
2522 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2523 // the lit modifier is explicitly used, we need to truncate it to the 32
2524 // LSBs.
2525 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2526 Val = Lo_32(Value: Val);
2527 break;
2528
2529 case AMDGPU::OPERAND_REG_IMM_FP64:
2530 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2531 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2532 if (Lit == LitModifier::None &&
2533 AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2534 Inst.addOperand(Op: MCOperand::createImm(Val));
2535 return;
2536 }
2537
2538 // If the target doesn't support 64-bit literals, we need to use the
2539 // constant as the high 32 MSBs of a double-precision floating point value.
2540 if (!AsmParser->has64BitLiterals()) {
2541 Val = static_cast<uint64_t>(Val) << 32;
2542 } else {
2543 // Now the target does support 64-bit literals, there are two cases
2544 // where we still want to use src_literal encoding:
2545 // 1) explicitly forced by using lit modifier;
2546 // 2) the value is a valid 32-bit representation (signed or unsigned),
2547 // meanwhile not forced by lit64 modifier.
2548 if (Lit == LitModifier::Lit ||
2549 (Lit != LitModifier::Lit64 && (isInt<32>(x: Val) || isUInt<32>(x: Val))))
2550 Val = static_cast<uint64_t>(Val) << 32;
2551 }
2552
2553 // For FP64 operands lit() specifies the high half of the value.
2554 if (Lit == LitModifier::Lit)
2555 Val = Hi_32(Value: Val);
2556 break;
2557
2558 case AMDGPU::OPERAND_REG_IMM_INT16:
2559 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2560 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2561 case AMDGPU::OPERAND_REG_IMM_FP16:
2562 case AMDGPU::OPERAND_REG_IMM_BF16:
2563 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2564 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2565 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2566 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2567 case AMDGPU::OPERAND_KIMM32:
2568 case AMDGPU::OPERAND_KIMM16:
2569 break;
2570
2571 case AMDGPU::OPERAND_KIMM64:
2572 if ((isInt<32>(x: Val) || isUInt<32>(x: Val)) && Lit != LitModifier::Lit64)
2573 Val <<= 32;
2574 break;
2575
2576 default:
2577 llvm_unreachable("invalid operand type");
2578 }
2579
2580 if (Lit != LitModifier::None) {
2581 Inst.addOperand(
2582 Op: MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(Lit, Value: Val, Ctx)));
2583 } else {
2584 Inst.addOperand(Op: MCOperand::createImm(Val));
2585 }
2586}
2587
2588void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2589 MCOpIdx = Inst.getNumOperands();
2590 Inst.addOperand(Op: MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: getReg(), STI: AsmParser->getSTI())));
2591}
2592
2593bool AMDGPUOperand::isInlineValue() const {
2594 return isRegKind() && ::isInlineValue(Reg: getReg());
2595}
2596
2597//===----------------------------------------------------------------------===//
2598// AsmParser
2599//===----------------------------------------------------------------------===//
2600
2601void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2602 // TODO: make those pre-defined variables read-only.
2603 // Currently there is none suitable machinery in the core llvm-mc for this.
2604 // MCSymbol::isRedefinable is intended for another purpose, and
2605 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2606 MCContext &Ctx = getContext();
2607 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id);
2608 Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx));
2609}
2610
2611static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2612 if (Is == IS_VGPR) {
2613 switch (RegWidth) {
2614 default: return -1;
2615 case 32:
2616 return AMDGPU::VGPR_32RegClassID;
2617 case 64:
2618 return AMDGPU::VReg_64RegClassID;
2619 case 96:
2620 return AMDGPU::VReg_96RegClassID;
2621 case 128:
2622 return AMDGPU::VReg_128RegClassID;
2623 case 160:
2624 return AMDGPU::VReg_160RegClassID;
2625 case 192:
2626 return AMDGPU::VReg_192RegClassID;
2627 case 224:
2628 return AMDGPU::VReg_224RegClassID;
2629 case 256:
2630 return AMDGPU::VReg_256RegClassID;
2631 case 288:
2632 return AMDGPU::VReg_288RegClassID;
2633 case 320:
2634 return AMDGPU::VReg_320RegClassID;
2635 case 352:
2636 return AMDGPU::VReg_352RegClassID;
2637 case 384:
2638 return AMDGPU::VReg_384RegClassID;
2639 case 512:
2640 return AMDGPU::VReg_512RegClassID;
2641 case 1024:
2642 return AMDGPU::VReg_1024RegClassID;
2643 }
2644 } else if (Is == IS_TTMP) {
2645 switch (RegWidth) {
2646 default: return -1;
2647 case 32:
2648 return AMDGPU::TTMP_32RegClassID;
2649 case 64:
2650 return AMDGPU::TTMP_64RegClassID;
2651 case 128:
2652 return AMDGPU::TTMP_128RegClassID;
2653 case 256:
2654 return AMDGPU::TTMP_256RegClassID;
2655 case 512:
2656 return AMDGPU::TTMP_512RegClassID;
2657 }
2658 } else if (Is == IS_SGPR) {
2659 switch (RegWidth) {
2660 default: return -1;
2661 case 32:
2662 return AMDGPU::SGPR_32RegClassID;
2663 case 64:
2664 return AMDGPU::SGPR_64RegClassID;
2665 case 96:
2666 return AMDGPU::SGPR_96RegClassID;
2667 case 128:
2668 return AMDGPU::SGPR_128RegClassID;
2669 case 160:
2670 return AMDGPU::SGPR_160RegClassID;
2671 case 192:
2672 return AMDGPU::SGPR_192RegClassID;
2673 case 224:
2674 return AMDGPU::SGPR_224RegClassID;
2675 case 256:
2676 return AMDGPU::SGPR_256RegClassID;
2677 case 288:
2678 return AMDGPU::SGPR_288RegClassID;
2679 case 320:
2680 return AMDGPU::SGPR_320RegClassID;
2681 case 352:
2682 return AMDGPU::SGPR_352RegClassID;
2683 case 384:
2684 return AMDGPU::SGPR_384RegClassID;
2685 case 512:
2686 return AMDGPU::SGPR_512RegClassID;
2687 }
2688 } else if (Is == IS_AGPR) {
2689 switch (RegWidth) {
2690 default: return -1;
2691 case 32:
2692 return AMDGPU::AGPR_32RegClassID;
2693 case 64:
2694 return AMDGPU::AReg_64RegClassID;
2695 case 96:
2696 return AMDGPU::AReg_96RegClassID;
2697 case 128:
2698 return AMDGPU::AReg_128RegClassID;
2699 case 160:
2700 return AMDGPU::AReg_160RegClassID;
2701 case 192:
2702 return AMDGPU::AReg_192RegClassID;
2703 case 224:
2704 return AMDGPU::AReg_224RegClassID;
2705 case 256:
2706 return AMDGPU::AReg_256RegClassID;
2707 case 288:
2708 return AMDGPU::AReg_288RegClassID;
2709 case 320:
2710 return AMDGPU::AReg_320RegClassID;
2711 case 352:
2712 return AMDGPU::AReg_352RegClassID;
2713 case 384:
2714 return AMDGPU::AReg_384RegClassID;
2715 case 512:
2716 return AMDGPU::AReg_512RegClassID;
2717 case 1024:
2718 return AMDGPU::AReg_1024RegClassID;
2719 }
2720 }
2721 return -1;
2722}
2723
2724static MCRegister getSpecialRegForName(StringRef RegName) {
2725 return StringSwitch<unsigned>(RegName)
2726 .Case(S: "exec", Value: AMDGPU::EXEC)
2727 .Case(S: "vcc", Value: AMDGPU::VCC)
2728 .Case(S: "flat_scratch", Value: AMDGPU::FLAT_SCR)
2729 .Case(S: "xnack_mask", Value: AMDGPU::XNACK_MASK)
2730 .Case(S: "shared_base", Value: AMDGPU::SRC_SHARED_BASE)
2731 .Case(S: "src_shared_base", Value: AMDGPU::SRC_SHARED_BASE)
2732 .Case(S: "shared_limit", Value: AMDGPU::SRC_SHARED_LIMIT)
2733 .Case(S: "src_shared_limit", Value: AMDGPU::SRC_SHARED_LIMIT)
2734 .Case(S: "private_base", Value: AMDGPU::SRC_PRIVATE_BASE)
2735 .Case(S: "src_private_base", Value: AMDGPU::SRC_PRIVATE_BASE)
2736 .Case(S: "private_limit", Value: AMDGPU::SRC_PRIVATE_LIMIT)
2737 .Case(S: "src_private_limit", Value: AMDGPU::SRC_PRIVATE_LIMIT)
2738 .Case(S: "src_flat_scratch_base_lo", Value: AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2739 .Case(S: "src_flat_scratch_base_hi", Value: AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2740 .Case(S: "pops_exiting_wave_id", Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2741 .Case(S: "src_pops_exiting_wave_id", Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2742 .Case(S: "lds_direct", Value: AMDGPU::LDS_DIRECT)
2743 .Case(S: "src_lds_direct", Value: AMDGPU::LDS_DIRECT)
2744 .Case(S: "m0", Value: AMDGPU::M0)
2745 .Case(S: "vccz", Value: AMDGPU::SRC_VCCZ)
2746 .Case(S: "src_vccz", Value: AMDGPU::SRC_VCCZ)
2747 .Case(S: "execz", Value: AMDGPU::SRC_EXECZ)
2748 .Case(S: "src_execz", Value: AMDGPU::SRC_EXECZ)
2749 .Case(S: "scc", Value: AMDGPU::SRC_SCC)
2750 .Case(S: "src_scc", Value: AMDGPU::SRC_SCC)
2751 .Case(S: "tba", Value: AMDGPU::TBA)
2752 .Case(S: "tma", Value: AMDGPU::TMA)
2753 .Case(S: "flat_scratch_lo", Value: AMDGPU::FLAT_SCR_LO)
2754 .Case(S: "flat_scratch_hi", Value: AMDGPU::FLAT_SCR_HI)
2755 .Case(S: "xnack_mask_lo", Value: AMDGPU::XNACK_MASK_LO)
2756 .Case(S: "xnack_mask_hi", Value: AMDGPU::XNACK_MASK_HI)
2757 .Case(S: "vcc_lo", Value: AMDGPU::VCC_LO)
2758 .Case(S: "vcc_hi", Value: AMDGPU::VCC_HI)
2759 .Case(S: "exec_lo", Value: AMDGPU::EXEC_LO)
2760 .Case(S: "exec_hi", Value: AMDGPU::EXEC_HI)
2761 .Case(S: "tma_lo", Value: AMDGPU::TMA_LO)
2762 .Case(S: "tma_hi", Value: AMDGPU::TMA_HI)
2763 .Case(S: "tba_lo", Value: AMDGPU::TBA_LO)
2764 .Case(S: "tba_hi", Value: AMDGPU::TBA_HI)
2765 .Case(S: "pc", Value: AMDGPU::PC_REG)
2766 .Case(S: "null", Value: AMDGPU::SGPR_NULL)
2767 .Default(Value: AMDGPU::NoRegister);
2768}
2769
2770bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2771 SMLoc &EndLoc, bool RestoreOnFailure) {
2772 auto R = parseRegister();
2773 if (!R) return true;
2774 assert(R->isReg());
2775 RegNo = R->getReg();
2776 StartLoc = R->getStartLoc();
2777 EndLoc = R->getEndLoc();
2778 return false;
2779}
2780
2781bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2782 SMLoc &EndLoc) {
2783 return ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2784}
2785
2786ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2787 SMLoc &EndLoc) {
2788 bool Result = ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2789 bool PendingErrors = getParser().hasPendingError();
2790 getParser().clearPendingErrors();
2791 if (PendingErrors)
2792 return ParseStatus::Failure;
2793 if (Result)
2794 return ParseStatus::NoMatch;
2795 return ParseStatus::Success;
2796}
2797
2798bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2799 RegisterKind RegKind,
2800 MCRegister Reg1, SMLoc Loc) {
2801 switch (RegKind) {
2802 case IS_SPECIAL:
2803 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2804 Reg = AMDGPU::EXEC;
2805 RegWidth = 64;
2806 return true;
2807 }
2808 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2809 Reg = AMDGPU::FLAT_SCR;
2810 RegWidth = 64;
2811 return true;
2812 }
2813 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2814 Reg = AMDGPU::XNACK_MASK;
2815 RegWidth = 64;
2816 return true;
2817 }
2818 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2819 Reg = AMDGPU::VCC;
2820 RegWidth = 64;
2821 return true;
2822 }
2823 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2824 Reg = AMDGPU::TBA;
2825 RegWidth = 64;
2826 return true;
2827 }
2828 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2829 Reg = AMDGPU::TMA;
2830 RegWidth = 64;
2831 return true;
2832 }
2833 Error(L: Loc, Msg: "register does not fit in the list");
2834 return false;
2835 case IS_VGPR:
2836 case IS_SGPR:
2837 case IS_AGPR:
2838 case IS_TTMP:
2839 if (Reg1 != Reg + RegWidth / 32) {
2840 Error(L: Loc, Msg: "registers in a list must have consecutive indices");
2841 return false;
2842 }
2843 RegWidth += 32;
2844 return true;
2845 default:
2846 llvm_unreachable("unexpected register kind");
2847 }
2848}
2849
2850struct RegInfo {
2851 StringLiteral Name;
2852 RegisterKind Kind;
2853};
2854
2855static constexpr RegInfo RegularRegisters[] = {
2856 {.Name: {"v"}, .Kind: IS_VGPR},
2857 {.Name: {"s"}, .Kind: IS_SGPR},
2858 {.Name: {"ttmp"}, .Kind: IS_TTMP},
2859 {.Name: {"acc"}, .Kind: IS_AGPR},
2860 {.Name: {"a"}, .Kind: IS_AGPR},
2861};
2862
2863static bool isRegularReg(RegisterKind Kind) {
2864 return Kind == IS_VGPR ||
2865 Kind == IS_SGPR ||
2866 Kind == IS_TTMP ||
2867 Kind == IS_AGPR;
2868}
2869
2870static const RegInfo* getRegularRegInfo(StringRef Str) {
2871 for (const RegInfo &Reg : RegularRegisters)
2872 if (Str.starts_with(Prefix: Reg.Name))
2873 return &Reg;
2874 return nullptr;
2875}
2876
2877static bool getRegNum(StringRef Str, unsigned& Num) {
2878 return !Str.getAsInteger(Radix: 10, Result&: Num);
2879}
2880
2881bool
2882AMDGPUAsmParser::isRegister(const AsmToken &Token,
2883 const AsmToken &NextToken) const {
2884
2885 // A list of consecutive registers: [s0,s1,s2,s3]
2886 if (Token.is(K: AsmToken::LBrac))
2887 return true;
2888
2889 if (!Token.is(K: AsmToken::Identifier))
2890 return false;
2891
2892 // A single register like s0 or a range of registers like s[0:1]
2893
2894 StringRef Str = Token.getString();
2895 const RegInfo *Reg = getRegularRegInfo(Str);
2896 if (Reg) {
2897 StringRef RegName = Reg->Name;
2898 StringRef RegSuffix = Str.substr(Start: RegName.size());
2899 if (!RegSuffix.empty()) {
2900 RegSuffix.consume_back(Suffix: ".l");
2901 RegSuffix.consume_back(Suffix: ".h");
2902 unsigned Num;
2903 // A single register with an index: rXX
2904 if (getRegNum(Str: RegSuffix, Num))
2905 return true;
2906 } else {
2907 // A range of registers: r[XX:YY].
2908 if (NextToken.is(K: AsmToken::LBrac))
2909 return true;
2910 }
2911 }
2912
2913 return getSpecialRegForName(RegName: Str).isValid();
2914}
2915
2916bool
2917AMDGPUAsmParser::isRegister()
2918{
2919 return isRegister(Token: getToken(), NextToken: peekToken());
2920}
2921
2922MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2923 unsigned SubReg, unsigned RegWidth,
2924 SMLoc Loc) {
2925 assert(isRegularReg(RegKind));
2926
2927 unsigned AlignSize = 1;
2928 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2929 // SGPR and TTMP registers must be aligned.
2930 // Max required alignment is 4 dwords.
2931 AlignSize = std::min(a: llvm::bit_ceil(Value: RegWidth / 32), b: 4u);
2932 }
2933
2934 if (RegNum % AlignSize != 0) {
2935 Error(L: Loc, Msg: "invalid register alignment");
2936 return MCRegister();
2937 }
2938
2939 unsigned RegIdx = RegNum / AlignSize;
2940 int RCID = getRegClass(Is: RegKind, RegWidth);
2941 if (RCID == -1) {
2942 Error(L: Loc, Msg: "invalid or unsupported register size");
2943 return MCRegister();
2944 }
2945
2946 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2947 const MCRegisterClass RC = TRI->getRegClass(i: RCID);
2948 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2949 Error(L: Loc, Msg: "register index is out of range");
2950 return AMDGPU::NoRegister;
2951 }
2952
2953 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2954 Error(L: Loc, Msg: "register index is out of range");
2955 return MCRegister();
2956 }
2957
2958 MCRegister Reg = RC.getRegister(i: RegIdx);
2959
2960 if (SubReg) {
2961 Reg = TRI->getSubReg(Reg, Idx: SubReg);
2962
2963 // Currently all regular registers have their .l and .h subregisters, so
2964 // we should never need to generate an error here.
2965 assert(Reg && "Invalid subregister!");
2966 }
2967
2968 return Reg;
2969}
2970
2971bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2972 unsigned &SubReg) {
2973 int64_t RegLo, RegHi;
2974 if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "missing register index"))
2975 return false;
2976
2977 SMLoc FirstIdxLoc = getLoc();
2978 SMLoc SecondIdxLoc;
2979
2980 if (!parseExpr(Imm&: RegLo))
2981 return false;
2982
2983 if (trySkipToken(Kind: AsmToken::Colon)) {
2984 SecondIdxLoc = getLoc();
2985 if (!parseExpr(Imm&: RegHi))
2986 return false;
2987 } else {
2988 RegHi = RegLo;
2989 }
2990
2991 if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
2992 return false;
2993
2994 if (!isUInt<32>(x: RegLo)) {
2995 Error(L: FirstIdxLoc, Msg: "invalid register index");
2996 return false;
2997 }
2998
2999 if (!isUInt<32>(x: RegHi)) {
3000 Error(L: SecondIdxLoc, Msg: "invalid register index");
3001 return false;
3002 }
3003
3004 if (RegLo > RegHi) {
3005 Error(L: FirstIdxLoc, Msg: "first register index should not exceed second index");
3006 return false;
3007 }
3008
3009 if (RegHi == RegLo) {
3010 StringRef RegSuffix = getTokenStr();
3011 if (RegSuffix == ".l") {
3012 SubReg = AMDGPU::lo16;
3013 lex();
3014 } else if (RegSuffix == ".h") {
3015 SubReg = AMDGPU::hi16;
3016 lex();
3017 }
3018 }
3019
3020 Num = static_cast<unsigned>(RegLo);
3021 RegWidth = 32 * ((RegHi - RegLo) + 1);
3022
3023 return true;
3024}
3025
3026MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3027 unsigned &RegNum,
3028 unsigned &RegWidth,
3029 SmallVectorImpl<AsmToken> &Tokens) {
3030 assert(isToken(AsmToken::Identifier));
3031 MCRegister Reg = getSpecialRegForName(RegName: getTokenStr());
3032 if (Reg) {
3033 RegNum = 0;
3034 RegWidth = 32;
3035 RegKind = IS_SPECIAL;
3036 Tokens.push_back(Elt: getToken());
3037 lex(); // skip register name
3038 }
3039 return Reg;
3040}
3041
3042MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3043 unsigned &RegNum,
3044 unsigned &RegWidth,
3045 SmallVectorImpl<AsmToken> &Tokens) {
3046 assert(isToken(AsmToken::Identifier));
3047 StringRef RegName = getTokenStr();
3048 auto Loc = getLoc();
3049
3050 const RegInfo *RI = getRegularRegInfo(Str: RegName);
3051 if (!RI) {
3052 Error(L: Loc, Msg: "invalid register name");
3053 return MCRegister();
3054 }
3055
3056 Tokens.push_back(Elt: getToken());
3057 lex(); // skip register name
3058
3059 RegKind = RI->Kind;
3060 StringRef RegSuffix = RegName.substr(Start: RI->Name.size());
3061 unsigned SubReg = NoSubRegister;
3062 if (!RegSuffix.empty()) {
3063 if (RegSuffix.consume_back(Suffix: ".l"))
3064 SubReg = AMDGPU::lo16;
3065 else if (RegSuffix.consume_back(Suffix: ".h"))
3066 SubReg = AMDGPU::hi16;
3067
3068 // Single 32-bit register: vXX.
3069 if (!getRegNum(Str: RegSuffix, Num&: RegNum)) {
3070 Error(L: Loc, Msg: "invalid register index");
3071 return MCRegister();
3072 }
3073 RegWidth = 32;
3074 } else {
3075 // Range of registers: v[XX:YY]. ":YY" is optional.
3076 if (!ParseRegRange(Num&: RegNum, RegWidth, SubReg))
3077 return MCRegister();
3078 }
3079
3080 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3081}
3082
3083MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3084 unsigned &RegNum, unsigned &RegWidth,
3085 SmallVectorImpl<AsmToken> &Tokens) {
3086 MCRegister Reg;
3087 auto ListLoc = getLoc();
3088
3089 if (!skipToken(Kind: AsmToken::LBrac,
3090 ErrMsg: "expected a register or a list of registers")) {
3091 return MCRegister();
3092 }
3093
3094 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3095
3096 auto Loc = getLoc();
3097 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3098 return MCRegister();
3099 if (RegWidth != 32) {
3100 Error(L: Loc, Msg: "expected a single 32-bit register");
3101 return MCRegister();
3102 }
3103
3104 for (; trySkipToken(Kind: AsmToken::Comma); ) {
3105 RegisterKind NextRegKind;
3106 MCRegister NextReg;
3107 unsigned NextRegNum, NextRegWidth;
3108 Loc = getLoc();
3109
3110 if (!ParseAMDGPURegister(RegKind&: NextRegKind, Reg&: NextReg,
3111 RegNum&: NextRegNum, RegWidth&: NextRegWidth,
3112 Tokens)) {
3113 return MCRegister();
3114 }
3115 if (NextRegWidth != 32) {
3116 Error(L: Loc, Msg: "expected a single 32-bit register");
3117 return MCRegister();
3118 }
3119 if (NextRegKind != RegKind) {
3120 Error(L: Loc, Msg: "registers in a list must be of the same kind");
3121 return MCRegister();
3122 }
3123 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, Reg1: NextReg, Loc))
3124 return MCRegister();
3125 }
3126
3127 if (!skipToken(Kind: AsmToken::RBrac,
3128 ErrMsg: "expected a comma or a closing square bracket")) {
3129 return MCRegister();
3130 }
3131
3132 if (isRegularReg(Kind: RegKind))
3133 Reg = getRegularReg(RegKind, RegNum, SubReg: NoSubRegister, RegWidth, Loc: ListLoc);
3134
3135 return Reg;
3136}
3137
3138bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3139 MCRegister &Reg, unsigned &RegNum,
3140 unsigned &RegWidth,
3141 SmallVectorImpl<AsmToken> &Tokens) {
3142 auto Loc = getLoc();
3143 Reg = MCRegister();
3144
3145 if (isToken(Kind: AsmToken::Identifier)) {
3146 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3147 if (!Reg)
3148 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3149 } else {
3150 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3151 }
3152
3153 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3154 if (!Reg) {
3155 assert(Parser.hasPendingError());
3156 return false;
3157 }
3158
3159 if (!subtargetHasRegister(MRI: *TRI, Reg)) {
3160 if (Reg == AMDGPU::SGPR_NULL) {
3161 Error(L: Loc, Msg: "'null' operand is not supported on this GPU");
3162 } else {
3163 Error(L: Loc, Msg: Twine(AMDGPUInstPrinter::getRegisterName(Reg)) +
3164 " register not available on this GPU");
3165 }
3166 return false;
3167 }
3168
3169 return true;
3170}
3171
3172bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3173 MCRegister &Reg, unsigned &RegNum,
3174 unsigned &RegWidth,
3175 bool RestoreOnFailure /*=false*/) {
3176 Reg = MCRegister();
3177
3178 SmallVector<AsmToken, 1> Tokens;
3179 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3180 if (RestoreOnFailure) {
3181 while (!Tokens.empty()) {
3182 getLexer().UnLex(Token: Tokens.pop_back_val());
3183 }
3184 }
3185 return true;
3186 }
3187 return false;
3188}
3189
3190std::optional<StringRef>
3191AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3192 switch (RegKind) {
3193 case IS_VGPR:
3194 return StringRef(".amdgcn.next_free_vgpr");
3195 case IS_SGPR:
3196 return StringRef(".amdgcn.next_free_sgpr");
3197 default:
3198 return std::nullopt;
3199 }
3200}
3201
3202void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3203 auto SymbolName = getGprCountSymbolName(RegKind);
3204 assert(SymbolName && "initializing invalid register kind");
3205 MCSymbol *Sym = getContext().getOrCreateSymbol(Name: *SymbolName);
3206 Sym->setVariableValue(MCConstantExpr::create(Value: 0, Ctx&: getContext()));
3207 Sym->setRedefinable(true);
3208}
3209
3210bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3211 unsigned DwordRegIndex,
3212 unsigned RegWidth) {
3213 // Symbols are only defined for GCN targets
3214 if (AMDGPU::getIsaVersion(GPU: getSTI().getCPU()).Major < 6)
3215 return true;
3216
3217 auto SymbolName = getGprCountSymbolName(RegKind);
3218 if (!SymbolName)
3219 return true;
3220 MCSymbol *Sym = getContext().getOrCreateSymbol(Name: *SymbolName);
3221
3222 int64_t NewMax = DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1;
3223 int64_t OldCount;
3224
3225 if (!Sym->isVariable())
3226 return !Error(L: getLoc(),
3227 Msg: ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3228 if (!Sym->getVariableValue()->evaluateAsAbsolute(Res&: OldCount))
3229 return !Error(
3230 L: getLoc(),
3231 Msg: ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3232
3233 if (OldCount <= NewMax)
3234 Sym->setVariableValue(MCConstantExpr::create(Value: NewMax + 1, Ctx&: getContext()));
3235
3236 return true;
3237}
3238
3239std::unique_ptr<AMDGPUOperand>
3240AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3241 const auto &Tok = getToken();
3242 SMLoc StartLoc = Tok.getLoc();
3243 SMLoc EndLoc = Tok.getEndLoc();
3244 RegisterKind RegKind;
3245 MCRegister Reg;
3246 unsigned RegNum, RegWidth;
3247
3248 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3249 return nullptr;
3250 }
3251 if (isHsaAbi(STI: getSTI())) {
3252 if (!updateGprCountSymbols(RegKind, DwordRegIndex: RegNum, RegWidth))
3253 return nullptr;
3254 } else
3255 KernelScope.usesRegister(RegKind, DwordRegIndex: RegNum, RegWidth);
3256 return AMDGPUOperand::CreateReg(AsmParser: this, Reg, S: StartLoc, E: EndLoc);
3257}
3258
3259ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3260 bool HasSP3AbsModifier, LitModifier Lit) {
3261 // TODO: add syntactic sugar for 1/(2*PI)
3262
3263 if (isRegister() || isModifier())
3264 return ParseStatus::NoMatch;
3265
3266 if (Lit == LitModifier::None) {
3267 if (trySkipId(Id: "lit"))
3268 Lit = LitModifier::Lit;
3269 else if (trySkipId(Id: "lit64"))
3270 Lit = LitModifier::Lit64;
3271
3272 if (Lit != LitModifier::None) {
3273 if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit"))
3274 return ParseStatus::Failure;
3275 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3276 if (S.isSuccess() &&
3277 !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3278 return ParseStatus::Failure;
3279 return S;
3280 }
3281 }
3282
3283 const auto& Tok = getToken();
3284 const auto& NextTok = peekToken();
3285 bool IsReal = Tok.is(K: AsmToken::Real);
3286 SMLoc S = getLoc();
3287 bool Negate = false;
3288
3289 if (!IsReal && Tok.is(K: AsmToken::Minus) && NextTok.is(K: AsmToken::Real)) {
3290 lex();
3291 IsReal = true;
3292 Negate = true;
3293 }
3294
3295 AMDGPUOperand::Modifiers Mods;
3296 Mods.Lit = Lit;
3297
3298 if (IsReal) {
3299 // Floating-point expressions are not supported.
3300 // Can only allow floating-point literals with an
3301 // optional sign.
3302
3303 StringRef Num = getTokenStr();
3304 lex();
3305
3306 APFloat RealVal(APFloat::IEEEdouble());
3307 auto roundMode = APFloat::rmNearestTiesToEven;
3308 if (errorToBool(Err: RealVal.convertFromString(Num, roundMode).takeError()))
3309 return ParseStatus::Failure;
3310 if (Negate)
3311 RealVal.changeSign();
3312
3313 Operands.push_back(
3314 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: RealVal.bitcastToAPInt().getZExtValue(), Loc: S,
3315 Type: AMDGPUOperand::ImmTyNone, IsFPImm: true));
3316 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3317 Op.setModifiers(Mods);
3318
3319 return ParseStatus::Success;
3320
3321 } else {
3322 int64_t IntVal;
3323 const MCExpr *Expr;
3324 SMLoc S = getLoc();
3325
3326 if (HasSP3AbsModifier) {
3327 // This is a workaround for handling expressions
3328 // as arguments of SP3 'abs' modifier, for example:
3329 // |1.0|
3330 // |-1|
3331 // |1+x|
3332 // This syntax is not compatible with syntax of standard
3333 // MC expressions (due to the trailing '|').
3334 SMLoc EndLoc;
3335 if (getParser().parsePrimaryExpr(Res&: Expr, EndLoc, TypeInfo: nullptr))
3336 return ParseStatus::Failure;
3337 } else {
3338 if (Parser.parseExpression(Res&: Expr))
3339 return ParseStatus::Failure;
3340 }
3341
3342 if (Expr->evaluateAsAbsolute(Res&: IntVal)) {
3343 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S));
3344 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3345 Op.setModifiers(Mods);
3346 } else {
3347 if (Lit != LitModifier::None)
3348 return ParseStatus::NoMatch;
3349 Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S));
3350 }
3351
3352 return ParseStatus::Success;
3353 }
3354
3355 return ParseStatus::NoMatch;
3356}
3357
3358ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3359 if (!isRegister())
3360 return ParseStatus::NoMatch;
3361
3362 if (auto R = parseRegister()) {
3363 assert(R->isReg());
3364 Operands.push_back(Elt: std::move(R));
3365 return ParseStatus::Success;
3366 }
3367 return ParseStatus::Failure;
3368}
3369
3370ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3371 bool HasSP3AbsMod, LitModifier Lit) {
3372 ParseStatus Res = parseReg(Operands);
3373 if (!Res.isNoMatch())
3374 return Res;
3375 if (isModifier())
3376 return ParseStatus::NoMatch;
3377 return parseImm(Operands, HasSP3AbsModifier: HasSP3AbsMod, Lit);
3378}
3379
3380bool
3381AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3382 if (Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::LParen)) {
3383 const auto &str = Token.getString();
3384 return str == "abs" || str == "neg" || str == "sext";
3385 }
3386 return false;
3387}
3388
3389bool
3390AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3391 return Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::Colon);
3392}
3393
3394bool
3395AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3396 return isNamedOperandModifier(Token, NextToken) || Token.is(K: AsmToken::Pipe);
3397}
3398
3399bool
3400AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3401 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3402}
3403
3404// Check if this is an operand modifier or an opcode modifier
3405// which may look like an expression but it is not. We should
3406// avoid parsing these modifiers as expressions. Currently
3407// recognized sequences are:
3408// |...|
3409// abs(...)
3410// neg(...)
3411// sext(...)
3412// -reg
3413// -|...|
3414// -abs(...)
3415// name:...
3416//
3417bool
3418AMDGPUAsmParser::isModifier() {
3419
3420 AsmToken Tok = getToken();
3421 AsmToken NextToken[2];
3422 peekTokens(Tokens: NextToken);
3423
3424 return isOperandModifier(Token: Tok, NextToken: NextToken[0]) ||
3425 (Tok.is(K: AsmToken::Minus) && isRegOrOperandModifier(Token: NextToken[0], NextToken: NextToken[1])) ||
3426 isOpcodeModifierWithVal(Token: Tok, NextToken: NextToken[0]);
3427}
3428
3429// Check if the current token is an SP3 'neg' modifier.
3430// Currently this modifier is allowed in the following context:
3431//
3432// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3433// 2. Before an 'abs' modifier: -abs(...)
3434// 3. Before an SP3 'abs' modifier: -|...|
3435//
3436// In all other cases "-" is handled as a part
3437// of an expression that follows the sign.
3438//
3439// Note: When "-" is followed by an integer literal,
3440// this is interpreted as integer negation rather
3441// than a floating-point NEG modifier applied to N.
3442// Beside being contr-intuitive, such use of floating-point
3443// NEG modifier would have resulted in different meaning
3444// of integer literals used with VOP1/2/C and VOP3,
3445// for example:
3446// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3447// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3448// Negative fp literals with preceding "-" are
3449// handled likewise for uniformity
3450//
3451bool
3452AMDGPUAsmParser::parseSP3NegModifier() {
3453
3454 AsmToken NextToken[2];
3455 peekTokens(Tokens: NextToken);
3456
3457 if (isToken(Kind: AsmToken::Minus) &&
3458 (isRegister(Token: NextToken[0], NextToken: NextToken[1]) ||
3459 NextToken[0].is(K: AsmToken::Pipe) ||
3460 isId(Token: NextToken[0], Id: "abs"))) {
3461 lex();
3462 return true;
3463 }
3464
3465 return false;
3466}
3467
3468ParseStatus
3469AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3470 bool AllowImm) {
3471 bool Neg, SP3Neg;
3472 bool Abs, SP3Abs;
3473 SMLoc Loc;
3474
3475 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3476 if (isToken(Kind: AsmToken::Minus) && peekToken().is(K: AsmToken::Minus))
3477 return Error(L: getLoc(), Msg: "invalid syntax, expected 'neg' modifier");
3478
3479 SP3Neg = parseSP3NegModifier();
3480
3481 Loc = getLoc();
3482 Neg = trySkipId(Id: "neg");
3483 if (Neg && SP3Neg)
3484 return Error(L: Loc, Msg: "expected register or immediate");
3485 if (Neg && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after neg"))
3486 return ParseStatus::Failure;
3487
3488 Abs = trySkipId(Id: "abs");
3489 if (Abs && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after abs"))
3490 return ParseStatus::Failure;
3491
3492 LitModifier Lit = LitModifier::None;
3493 if (trySkipId(Id: "lit")) {
3494 Lit = LitModifier::Lit;
3495 if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit"))
3496 return ParseStatus::Failure;
3497 } else if (trySkipId(Id: "lit64")) {
3498 Lit = LitModifier::Lit64;
3499 if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit64"))
3500 return ParseStatus::Failure;
3501 if (!has64BitLiterals())
3502 return Error(L: Loc, Msg: "lit64 is not supported on this GPU");
3503 }
3504
3505 Loc = getLoc();
3506 SP3Abs = trySkipToken(Kind: AsmToken::Pipe);
3507 if (Abs && SP3Abs)
3508 return Error(L: Loc, Msg: "expected register or immediate");
3509
3510 ParseStatus Res;
3511 if (AllowImm) {
3512 Res = parseRegOrImm(Operands, HasSP3AbsMod: SP3Abs, Lit);
3513 } else {
3514 Res = parseReg(Operands);
3515 }
3516 if (!Res.isSuccess())
3517 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3518 ? ParseStatus::Failure
3519 : Res;
3520
3521 if (Lit != LitModifier::None && !Operands.back()->isImm())
3522 Error(L: Loc, Msg: "expected immediate with lit modifier");
3523
3524 if (SP3Abs && !skipToken(Kind: AsmToken::Pipe, ErrMsg: "expected vertical bar"))
3525 return ParseStatus::Failure;
3526 if (Abs && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3527 return ParseStatus::Failure;
3528 if (Neg && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3529 return ParseStatus::Failure;
3530 if (Lit != LitModifier::None &&
3531 !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3532 return ParseStatus::Failure;
3533
3534 AMDGPUOperand::Modifiers Mods;
3535 Mods.Abs = Abs || SP3Abs;
3536 Mods.Neg = Neg || SP3Neg;
3537 Mods.Lit = Lit;
3538
3539 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3540 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3541 if (Op.isExpr())
3542 return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression");
3543 Op.setModifiers(Mods);
3544 }
3545 return ParseStatus::Success;
3546}
3547
3548ParseStatus
3549AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3550 bool AllowImm) {
3551 bool Sext = trySkipId(Id: "sext");
3552 if (Sext && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after sext"))
3553 return ParseStatus::Failure;
3554
3555 ParseStatus Res;
3556 if (AllowImm) {
3557 Res = parseRegOrImm(Operands);
3558 } else {
3559 Res = parseReg(Operands);
3560 }
3561 if (!Res.isSuccess())
3562 return Sext ? ParseStatus::Failure : Res;
3563
3564 if (Sext && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3565 return ParseStatus::Failure;
3566
3567 AMDGPUOperand::Modifiers Mods;
3568 Mods.Sext = Sext;
3569
3570 if (Mods.hasIntModifiers()) {
3571 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3572 if (Op.isExpr())
3573 return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression");
3574 Op.setModifiers(Mods);
3575 }
3576
3577 return ParseStatus::Success;
3578}
3579
3580ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3581 return parseRegOrImmWithFPInputMods(Operands, AllowImm: false);
3582}
3583
3584ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3585 return parseRegOrImmWithIntInputMods(Operands, AllowImm: false);
3586}
3587
3588ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3589 auto Loc = getLoc();
3590 if (trySkipId(Id: "off")) {
3591 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: 0, Loc,
3592 Type: AMDGPUOperand::ImmTyOff, IsFPImm: false));
3593 return ParseStatus::Success;
3594 }
3595
3596 if (!isRegister())
3597 return ParseStatus::NoMatch;
3598
3599 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3600 if (Reg) {
3601 Operands.push_back(Elt: std::move(Reg));
3602 return ParseStatus::Success;
3603 }
3604
3605 return ParseStatus::Failure;
3606}
3607
3608unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3609 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
3610
3611 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3612 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3613 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3614 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3615 return Match_InvalidOperand;
3616
3617 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3618 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3619 // v_mac_f32/16 allow only dst_sel == DWORD;
3620 auto OpNum =
3621 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::dst_sel);
3622 const auto &Op = Inst.getOperand(i: OpNum);
3623 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3624 return Match_InvalidOperand;
3625 }
3626 }
3627
3628 // Asm can first try to match VOPD or VOPD3. By failing early here with
3629 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3630 // Checking later during validateInstruction does not give a chance to retry
3631 // parsing as a different encoding.
3632 if (tryAnotherVOPDEncoding(Inst))
3633 return Match_InvalidOperand;
3634
3635 return Match_Success;
3636}
3637
3638static ArrayRef<unsigned> getAllVariants() {
3639 static const unsigned Variants[] = {
3640 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3641 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3642 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3643 };
3644
3645 return ArrayRef(Variants);
3646}
3647
3648// What asm variants we should check
3649ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3650 if (isForcedDPP() && isForcedVOP3()) {
3651 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3652 return ArrayRef(Variants);
3653 }
3654 if (getForcedEncodingSize() == 32) {
3655 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3656 return ArrayRef(Variants);
3657 }
3658
3659 if (isForcedVOP3()) {
3660 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3661 return ArrayRef(Variants);
3662 }
3663
3664 if (isForcedSDWA()) {
3665 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3666 AMDGPUAsmVariants::SDWA9};
3667 return ArrayRef(Variants);
3668 }
3669
3670 if (isForcedDPP()) {
3671 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3672 return ArrayRef(Variants);
3673 }
3674
3675 return getAllVariants();
3676}
3677
3678StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3679 if (isForcedDPP() && isForcedVOP3())
3680 return "e64_dpp";
3681
3682 if (getForcedEncodingSize() == 32)
3683 return "e32";
3684
3685 if (isForcedVOP3())
3686 return "e64";
3687
3688 if (isForcedSDWA())
3689 return "sdwa";
3690
3691 if (isForcedDPP())
3692 return "dpp";
3693
3694 return "";
3695}
3696
3697MCRegister
3698AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3699 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
3700 for (MCPhysReg Reg : Desc.implicit_uses()) {
3701 switch (Reg) {
3702 case AMDGPU::FLAT_SCR:
3703 case AMDGPU::VCC:
3704 case AMDGPU::VCC_LO:
3705 case AMDGPU::VCC_HI:
3706 case AMDGPU::M0:
3707 return Reg;
3708 default:
3709 break;
3710 }
3711 }
3712 return MCRegister();
3713}
3714
3715// NB: This code is correct only when used to check constant
3716// bus limitations because GFX7 support no f16 inline constants.
3717// Note that there are no cases when a GFX7 opcode violates
3718// constant bus limitations due to the use of an f16 constant.
3719bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3720 unsigned OpIdx) const {
3721 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
3722
3723 if (!AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx) ||
3724 AMDGPU::isKImmOperand(Desc, OpNo: OpIdx)) {
3725 return false;
3726 }
3727
3728 const MCOperand &MO = Inst.getOperand(i: OpIdx);
3729
3730 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(Expr: MO.getExpr());
3731 auto OpSize = AMDGPU::getOperandSize(Desc, OpNo: OpIdx);
3732
3733 switch (OpSize) { // expected operand size
3734 case 8:
3735 return AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3736 case 4:
3737 return AMDGPU::isInlinableLiteral32(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3738 case 2: {
3739 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3740 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3741 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16)
3742 return AMDGPU::isInlinableLiteralI16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3743
3744 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3745 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3746 return AMDGPU::isInlinableLiteralV2I16(Literal: Val);
3747
3748 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3749 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3750 return AMDGPU::isInlinableLiteralV2F16(Literal: Val);
3751
3752 if (OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT)
3753 return AMDGPU::isPKFMACF16InlineConstant(Literal: Val, IsGFX11Plus: isGFX11Plus());
3754
3755 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 ||
3756 OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3757 return AMDGPU::isInlinableLiteralV2BF16(Literal: Val);
3758
3759 if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
3760 OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16)
3761 return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3762
3763 if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
3764 OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16)
3765 return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3766
3767 if (OperandType == AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16)
3768 return false;
3769
3770 llvm_unreachable("invalid operand type");
3771 }
3772 default:
3773 llvm_unreachable("invalid operand size");
3774 }
3775}
3776
3777unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3778 if (!isGFX10Plus())
3779 return 1;
3780
3781 switch (Opcode) {
3782 // 64-bit shift instructions can use only one scalar value input
3783 case AMDGPU::V_LSHLREV_B64_e64:
3784 case AMDGPU::V_LSHLREV_B64_gfx10:
3785 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3786 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3787 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3788 case AMDGPU::V_LSHRREV_B64_e64:
3789 case AMDGPU::V_LSHRREV_B64_gfx10:
3790 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3791 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3792 case AMDGPU::V_ASHRREV_I64_e64:
3793 case AMDGPU::V_ASHRREV_I64_gfx10:
3794 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3795 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3796 case AMDGPU::V_LSHL_B64_e64:
3797 case AMDGPU::V_LSHR_B64_e64:
3798 case AMDGPU::V_ASHR_I64_e64:
3799 return 1;
3800 default:
3801 return 2;
3802 }
3803}
3804
3805constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3806using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3807
3808// Get regular operand indices in the same order as specified
3809// in the instruction (but append mandatory literals to the end).
3810static OperandIndices getSrcOperandIndices(unsigned Opcode,
3811 bool AddMandatoryLiterals = false) {
3812
3813 int16_t ImmIdx =
3814 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, Name: OpName::imm) : -1;
3815
3816 if (isVOPD(Opc: Opcode)) {
3817 int16_t ImmXIdx =
3818 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, Name: OpName::immX) : -1;
3819
3820 return {getNamedOperandIdx(Opcode, Name: OpName::src0X),
3821 getNamedOperandIdx(Opcode, Name: OpName::vsrc1X),
3822 getNamedOperandIdx(Opcode, Name: OpName::vsrc2X),
3823 getNamedOperandIdx(Opcode, Name: OpName::src0Y),
3824 getNamedOperandIdx(Opcode, Name: OpName::vsrc1Y),
3825 getNamedOperandIdx(Opcode, Name: OpName::vsrc2Y),
3826 ImmXIdx,
3827 ImmIdx};
3828 }
3829
3830 return {getNamedOperandIdx(Opcode, Name: OpName::src0),
3831 getNamedOperandIdx(Opcode, Name: OpName::src1),
3832 getNamedOperandIdx(Opcode, Name: OpName::src2), ImmIdx};
3833}
3834
3835bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3836 const MCOperand &MO = Inst.getOperand(i: OpIdx);
3837 if (MO.isImm())
3838 return !isInlineConstant(Inst, OpIdx);
3839 if (MO.isReg()) {
3840 auto Reg = MO.getReg();
3841 if (!Reg)
3842 return false;
3843 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3844 auto PReg = mc2PseudoReg(Reg);
3845 return isSGPR(Reg: PReg, TRI) && PReg != SGPR_NULL;
3846 }
3847 return true;
3848}
3849
3850// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3851// Writelane is special in that it can use SGPR and M0 (which would normally
3852// count as using the constant bus twice - but in this case it is allowed since
3853// the lane selector doesn't count as a use of the constant bus). However, it is
3854// still required to abide by the 1 SGPR rule.
3855static bool checkWriteLane(const MCInst &Inst) {
3856 const unsigned Opcode = Inst.getOpcode();
3857 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3858 return false;
3859 const MCOperand &LaneSelOp = Inst.getOperand(i: 2);
3860 if (!LaneSelOp.isReg())
3861 return false;
3862 auto LaneSelReg = mc2PseudoReg(Reg: LaneSelOp.getReg());
3863 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3864}
3865
3866bool AMDGPUAsmParser::validateConstantBusLimitations(
3867 const MCInst &Inst, const OperandVector &Operands) {
3868 const unsigned Opcode = Inst.getOpcode();
3869 const MCInstrDesc &Desc = MII.get(Opcode);
3870 MCRegister LastSGPR;
3871 unsigned ConstantBusUseCount = 0;
3872 unsigned NumLiterals = 0;
3873 unsigned LiteralSize;
3874
3875 if (!(Desc.TSFlags &
3876 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3877 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3878 !isVOPD(Opc: Opcode))
3879 return true;
3880
3881 if (checkWriteLane(Inst))
3882 return true;
3883
3884 // Check special imm operands (used by madmk, etc)
3885 if (AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm)) {
3886 ++NumLiterals;
3887 LiteralSize = 4;
3888 }
3889
3890 SmallDenseSet<MCRegister> SGPRsUsed;
3891 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3892 if (SGPRUsed) {
3893 SGPRsUsed.insert(V: SGPRUsed);
3894 ++ConstantBusUseCount;
3895 }
3896
3897 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3898
3899 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3900
3901 for (int OpIdx : OpIndices) {
3902 if (OpIdx == -1)
3903 continue;
3904
3905 const MCOperand &MO = Inst.getOperand(i: OpIdx);
3906 if (usesConstantBus(Inst, OpIdx)) {
3907 if (MO.isReg()) {
3908 LastSGPR = mc2PseudoReg(Reg: MO.getReg());
3909 // Pairs of registers with a partial intersections like these
3910 // s0, s[0:1]
3911 // flat_scratch_lo, flat_scratch
3912 // flat_scratch_lo, flat_scratch_hi
3913 // are theoretically valid but they are disabled anyway.
3914 // Note that this code mimics SIInstrInfo::verifyInstruction
3915 if (SGPRsUsed.insert(V: LastSGPR).second) {
3916 ++ConstantBusUseCount;
3917 }
3918 } else { // Expression or a literal
3919
3920 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3921 continue; // special operand like VINTERP attr_chan
3922
3923 // An instruction may use only one literal.
3924 // This has been validated on the previous step.
3925 // See validateVOPLiteral.
3926 // This literal may be used as more than one operand.
3927 // If all these operands are of the same size,
3928 // this literal counts as one scalar value.
3929 // Otherwise it counts as 2 scalar values.
3930 // See "GFX10 Shader Programming", section 3.6.2.3.
3931
3932 unsigned Size = AMDGPU::getOperandSize(Desc, OpNo: OpIdx);
3933 if (Size < 4)
3934 Size = 4;
3935
3936 if (NumLiterals == 0) {
3937 NumLiterals = 1;
3938 LiteralSize = Size;
3939 } else if (LiteralSize != Size) {
3940 NumLiterals = 2;
3941 }
3942 }
3943 }
3944
3945 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3946 Error(L: getOperandLoc(Operands, MCOpIdx: OpIdx),
3947 Msg: "invalid operand (violates constant bus restrictions)");
3948 return false;
3949 }
3950 }
3951 return true;
3952}
3953
3954std::optional<unsigned>
3955AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3956
3957 const unsigned Opcode = Inst.getOpcode();
3958 if (!isVOPD(Opc: Opcode))
3959 return {};
3960
3961 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3962
3963 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3964 const MCOperand &Opr = Inst.getOperand(i: OperandIdx);
3965 return (Opr.isReg() && !isSGPR(Reg: mc2PseudoReg(Reg: Opr.getReg()), TRI))
3966 ? Opr.getReg()
3967 : MCRegister();
3968 };
3969
3970 // On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3971 // source-cache.
3972 bool SkipSrc =
3973 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 ||
3974 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3975 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3976 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
3977 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
3978 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
3979 bool AllowSameVGPR = isGFX1250Plus();
3980
3981 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3982 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3983 int I = getNamedOperandIdx(Opcode, Name: OpName);
3984 const MCOperand &Op = Inst.getOperand(i: I);
3985 if (!Op.isImm())
3986 continue;
3987 int64_t Imm = Op.getImm();
3988 if (!AMDGPU::isInlinableLiteral32(Literal: Imm, HasInv2Pi: hasInv2PiInlineImm()) &&
3989 !AMDGPU::isInlinableLiteral64(Literal: Imm, HasInv2Pi: hasInv2PiInlineImm()))
3990 return (unsigned)I;
3991 }
3992
3993 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3994 OpName::vsrc2Y, OpName::imm}) {
3995 int I = getNamedOperandIdx(Opcode, Name: OpName);
3996 if (I == -1)
3997 continue;
3998 const MCOperand &Op = Inst.getOperand(i: I);
3999 if (Op.isImm())
4000 return (unsigned)I;
4001 }
4002 }
4003
4004 const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII);
4005 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4006 GetRegIdx: getVRegIdx, MRI: *TRI, SkipSrc, AllowSameVGPR, VOPD3: AsVOPD3);
4007
4008 return InvalidCompOprIdx;
4009}
4010
4011bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4012 const OperandVector &Operands) {
4013
4014 unsigned Opcode = Inst.getOpcode();
4015 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4016
4017 if (AsVOPD3) {
4018 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4019 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4020 if ((Op.isRegKind() || Op.isImmTy(ImmT: AMDGPUOperand::ImmTyNone)) &&
4021 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4022 Error(L: Op.getStartLoc(), Msg: "ABS not allowed in VOPD3 instructions");
4023 }
4024 }
4025
4026 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4027 if (!InvalidCompOprIdx.has_value())
4028 return true;
4029
4030 auto CompOprIdx = *InvalidCompOprIdx;
4031 const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII);
4032 auto ParsedIdx =
4033 std::max(a: InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4034 b: InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4035 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4036
4037 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4038 if (CompOprIdx == VOPD::Component::DST) {
4039 if (AsVOPD3)
4040 Error(L: Loc, Msg: "dst registers must be distinct");
4041 else
4042 Error(L: Loc, Msg: "one dst register must be even and the other odd");
4043 } else {
4044 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4045 Error(L: Loc, Msg: Twine("src") + Twine(CompSrcIdx) +
4046 " operands must use different VGPR banks");
4047 }
4048
4049 return false;
4050}
4051
4052// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4053// potentially used as VOPD3 with the same operands.
4054bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4055 // First check if it fits VOPD
4056 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3: false);
4057 if (!InvalidCompOprIdx.has_value())
4058 return false;
4059
4060 // Then if it fits VOPD3
4061 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3: true);
4062 if (InvalidCompOprIdx.has_value()) {
4063 // If failed operand is dst it is better to show error about VOPD3
4064 // instruction as it has more capabilities and error message will be
4065 // more informative. If the dst is not legal for VOPD3, then it is not
4066 // legal for VOPD either.
4067 if (*InvalidCompOprIdx == VOPD::Component::DST)
4068 return true;
4069
4070 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4071 // with a conflict in tied implicit src2 of fmac and no asm operand to
4072 // to point to.
4073 return false;
4074 }
4075 return true;
4076}
4077
4078// \returns true is a VOPD3 instruction can be also represented as a shorter
4079// VOPD encoding.
4080bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4081 const unsigned Opcode = Inst.getOpcode();
4082 const auto &II = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII);
4083 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST: getSTI());
4084 if (!getCanBeVOPD(Opc: II[VOPD::X].getOpcode(), EncodingFamily, VOPD3: false).X ||
4085 !getCanBeVOPD(Opc: II[VOPD::Y].getOpcode(), EncodingFamily, VOPD3: false).Y)
4086 return false;
4087
4088 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4089 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4090 // be parsed as VOPD which does not accept src2.
4091 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4092 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4093 return false;
4094
4095 // If any modifiers are set this cannot be VOPD.
4096 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4097 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4098 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4099 int I = getNamedOperandIdx(Opcode, Name: OpName);
4100 if (I == -1)
4101 continue;
4102 if (Inst.getOperand(i: I).getImm())
4103 return false;
4104 }
4105
4106 return !tryVOPD3(Inst);
4107}
4108
4109// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4110// form but switch to VOPD3 otherwise.
4111bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4112 const unsigned Opcode = Inst.getOpcode();
4113 if (!isGFX1250Plus() || !isVOPD(Opc: Opcode))
4114 return false;
4115
4116 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4117 return tryVOPD(Inst);
4118 return tryVOPD3(Inst);
4119}
4120
4121bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4122
4123 const unsigned Opc = Inst.getOpcode();
4124 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4125
4126 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4127 int ClampIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::clamp);
4128 assert(ClampIdx != -1);
4129 return Inst.getOperand(i: ClampIdx).getImm() == 0;
4130 }
4131
4132 return true;
4133}
4134
4135constexpr uint64_t MIMGFlags =
4136 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
4137
4138bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4139
4140 const unsigned Opc = Inst.getOpcode();
4141 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4142
4143 if ((Desc.TSFlags & MIMGFlags) == 0)
4144 return true;
4145
4146 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdata);
4147 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4148 int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::tfe);
4149
4150 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4151 return true;
4152
4153 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4154 return true;
4155
4156 unsigned VDataSize = getRegOperandSize(Desc, OpNo: VDataIdx);
4157 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(i: TFEIdx).getImm()) ? 1 : 0;
4158 unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf;
4159 if (DMask == 0)
4160 DMask = 1;
4161
4162 bool IsPackedD16 = false;
4163 unsigned DataSize =
4164 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(Value: DMask);
4165 if (hasPackedD16()) {
4166 int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::d16);
4167 IsPackedD16 = D16Idx >= 0;
4168 if (IsPackedD16 && Inst.getOperand(i: D16Idx).getImm())
4169 DataSize = (DataSize + 1) / 2;
4170 }
4171
4172 if ((VDataSize / 4) == DataSize + TFESize)
4173 return true;
4174
4175 StringRef Modifiers;
4176 if (isGFX90A())
4177 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4178 else
4179 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4180
4181 Error(L: IDLoc, Msg: Twine("image data size does not match ") + Modifiers);
4182 return false;
4183}
4184
4185bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4186 const unsigned Opc = Inst.getOpcode();
4187 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4188
4189 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4190 return true;
4191
4192 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4193
4194 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4195 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
4196 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vaddr0);
4197 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4198 ? AMDGPU::OpName::srsrc
4199 : AMDGPU::OpName::rsrc;
4200 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: RSrcOpName);
4201 int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dim);
4202 int A16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::a16);
4203
4204 assert(VAddr0Idx != -1);
4205 assert(SrsrcIdx != -1);
4206 assert(SrsrcIdx > VAddr0Idx);
4207
4208 bool IsA16 = (A16Idx != -1 && Inst.getOperand(i: A16Idx).getImm());
4209 if (BaseOpcode->BVH) {
4210 if (IsA16 == BaseOpcode->A16)
4211 return true;
4212 Error(L: IDLoc, Msg: "image address size does not match a16");
4213 return false;
4214 }
4215
4216 unsigned Dim = Inst.getOperand(i: DimIdx).getImm();
4217 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim);
4218 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4219 unsigned ActualAddrSize =
4220 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, OpNo: VAddr0Idx) / 4;
4221
4222 unsigned ExpectedAddrSize =
4223 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim: DimInfo, IsA16, IsG16Supported: hasG16());
4224
4225 if (IsNSA) {
4226 if (hasPartialNSAEncoding() &&
4227 ExpectedAddrSize >
4228 getNSAMaxSize(HasSampler: Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
4229 int VAddrLastIdx = SrsrcIdx - 1;
4230 unsigned VAddrLastSize = getRegOperandSize(Desc, OpNo: VAddrLastIdx) / 4;
4231
4232 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4233 }
4234 } else {
4235 if (ExpectedAddrSize > 12)
4236 ExpectedAddrSize = 16;
4237
4238 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4239 // This provides backward compatibility for assembly created
4240 // before 160b/192b/224b types were directly supported.
4241 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4242 return true;
4243 }
4244
4245 if (ActualAddrSize == ExpectedAddrSize)
4246 return true;
4247
4248 Error(L: IDLoc, Msg: "image address size does not match dim and a16");
4249 return false;
4250}
4251
4252bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4253
4254 const unsigned Opc = Inst.getOpcode();
4255 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4256
4257 if ((Desc.TSFlags & MIMGFlags) == 0)
4258 return true;
4259 if (!Desc.mayLoad() || !Desc.mayStore())
4260 return true; // Not atomic
4261
4262 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4263 unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf;
4264
4265 // This is an incomplete check because image_atomic_cmpswap
4266 // may only use 0x3 and 0xf while other atomic operations
4267 // may use 0x1 and 0x3. However these limitations are
4268 // verified when we check that dmask matches dst size.
4269 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4270}
4271
4272bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4273
4274 const unsigned Opc = Inst.getOpcode();
4275 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4276
4277 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4278 return true;
4279
4280 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4281 unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf;
4282
4283 // GATHER4 instructions use dmask in a different fashion compared to
4284 // other MIMG instructions. The only useful DMASK values are
4285 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4286 // (red,red,red,red) etc.) The ISA document doesn't mention
4287 // this.
4288 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4289}
4290
4291bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4292 const OperandVector &Operands) {
4293 if (!isGFX10Plus())
4294 return true;
4295
4296 const unsigned Opc = Inst.getOpcode();
4297 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4298
4299 if ((Desc.TSFlags & MIMGFlags) == 0)
4300 return true;
4301
4302 // image_bvh_intersect_ray instructions do not have dim
4303 if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4304 return true;
4305
4306 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4307 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4308 if (Op.isDim())
4309 return true;
4310 }
4311 return false;
4312}
4313
4314bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4315 const unsigned Opc = Inst.getOpcode();
4316 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4317
4318 if ((Desc.TSFlags & MIMGFlags) == 0)
4319 return true;
4320
4321 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4322 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4323 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
4324
4325 if (!BaseOpcode->MSAA)
4326 return true;
4327
4328 int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dim);
4329 assert(DimIdx != -1);
4330
4331 unsigned Dim = Inst.getOperand(i: DimIdx).getImm();
4332 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim);
4333
4334 return DimInfo->MSAA;
4335}
4336
4337static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4338{
4339 switch (Opcode) {
4340 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4341 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4342 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4343 return true;
4344 default:
4345 return false;
4346 }
4347}
4348
4349// movrels* opcodes should only allow VGPRS as src0.
4350// This is specified in .td description for vop1/vop3,
4351// but sdwa is handled differently. See isSDWAOperand.
4352bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4353 const OperandVector &Operands) {
4354
4355 const unsigned Opc = Inst.getOpcode();
4356 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4357
4358 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opcode: Opc))
4359 return true;
4360
4361 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4362 assert(Src0Idx != -1);
4363
4364 const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4365 if (Src0.isReg()) {
4366 auto Reg = mc2PseudoReg(Reg: Src0.getReg());
4367 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4368 if (!isSGPR(Reg, TRI))
4369 return true;
4370 }
4371
4372 Error(L: getOperandLoc(Operands, MCOpIdx: Src0Idx), Msg: "source operand must be a VGPR");
4373 return false;
4374}
4375
4376bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4377 const OperandVector &Operands) {
4378
4379 const unsigned Opc = Inst.getOpcode();
4380
4381 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4382 return true;
4383
4384 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4385 assert(Src0Idx != -1);
4386
4387 const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4388 if (!Src0.isReg())
4389 return true;
4390
4391 auto Reg = mc2PseudoReg(Reg: Src0.getReg());
4392 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4393 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4394 Error(L: getOperandLoc(Operands, MCOpIdx: Src0Idx),
4395 Msg: "source operand must be either a VGPR or an inline constant");
4396 return false;
4397 }
4398
4399 return true;
4400}
4401
4402bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4403 const OperandVector &Operands) {
4404 unsigned Opcode = Inst.getOpcode();
4405 const MCInstrDesc &Desc = MII.get(Opcode);
4406
4407 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4408 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4409 return true;
4410
4411 const int Src2Idx = getNamedOperandIdx(Opcode, Name: OpName::src2);
4412 if (Src2Idx == -1)
4413 return true;
4414
4415 if (Inst.getOperand(i: Src2Idx).isImm() && isInlineConstant(Inst, OpIdx: Src2Idx)) {
4416 Error(L: getOperandLoc(Operands, MCOpIdx: Src2Idx),
4417 Msg: "inline constants are not allowed for this operand");
4418 return false;
4419 }
4420
4421 return true;
4422}
4423
4424bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4425 const OperandVector &Operands) {
4426 const unsigned Opc = Inst.getOpcode();
4427 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4428
4429 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4430 return true;
4431
4432 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
4433 if (BlgpIdx != -1) {
4434 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opcode: Opc)) {
4435 int CbszIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::cbsz);
4436
4437 unsigned CBSZ = Inst.getOperand(i: CbszIdx).getImm();
4438 unsigned BLGP = Inst.getOperand(i: BlgpIdx).getImm();
4439
4440 // Validate the correct register size was used for the floating point
4441 // format operands
4442
4443 bool Success = true;
4444 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ)) {
4445 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4446 Error(L: getOperandLoc(Operands, MCOpIdx: Src0Idx),
4447 Msg: "wrong register tuple size for cbsz value " + Twine(CBSZ));
4448 Success = false;
4449 }
4450
4451 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP)) {
4452 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
4453 Error(L: getOperandLoc(Operands, MCOpIdx: Src1Idx),
4454 Msg: "wrong register tuple size for blgp value " + Twine(BLGP));
4455 Success = false;
4456 }
4457
4458 return Success;
4459 }
4460 }
4461
4462 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2);
4463 if (Src2Idx == -1)
4464 return true;
4465
4466 const MCOperand &Src2 = Inst.getOperand(i: Src2Idx);
4467 if (!Src2.isReg())
4468 return true;
4469
4470 MCRegister Src2Reg = Src2.getReg();
4471 MCRegister DstReg = Inst.getOperand(i: 0).getReg();
4472 if (Src2Reg == DstReg)
4473 return true;
4474
4475 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4476 if (TRI->getRegClass(i: MII.getOpRegClassID(OpInfo: Desc.operands()[0], HwModeId: HwMode))
4477 .getSizeInBits() <= 128)
4478 return true;
4479
4480 if (TRI->regsOverlap(RegA: Src2Reg, RegB: DstReg)) {
4481 Error(L: getOperandLoc(Operands, MCOpIdx: Src2Idx),
4482 Msg: "source 2 operand must not partially overlap with dst");
4483 return false;
4484 }
4485
4486 return true;
4487}
4488
4489bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4490 switch (Inst.getOpcode()) {
4491 default:
4492 return true;
4493 case V_DIV_SCALE_F32_gfx6_gfx7:
4494 case V_DIV_SCALE_F32_vi:
4495 case V_DIV_SCALE_F32_gfx10:
4496 case V_DIV_SCALE_F64_gfx6_gfx7:
4497 case V_DIV_SCALE_F64_vi:
4498 case V_DIV_SCALE_F64_gfx10:
4499 break;
4500 }
4501
4502 // TODO: Check that src0 = src1 or src2.
4503
4504 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4505 AMDGPU::OpName::src2_modifiers,
4506 AMDGPU::OpName::src2_modifiers}) {
4507 if (Inst.getOperand(i: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name))
4508 .getImm() &
4509 SISrcMods::ABS) {
4510 return false;
4511 }
4512 }
4513
4514 return true;
4515}
4516
4517bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4518
4519 const unsigned Opc = Inst.getOpcode();
4520 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4521
4522 if ((Desc.TSFlags & MIMGFlags) == 0)
4523 return true;
4524
4525 int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::d16);
4526 if (D16Idx >= 0 && Inst.getOperand(i: D16Idx).getImm()) {
4527 if (isCI() || isSI())
4528 return false;
4529 }
4530
4531 return true;
4532}
4533
4534bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4535 const unsigned Opc = Inst.getOpcode();
4536 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4537
4538 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4539 return true;
4540
4541 int R128Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::r128);
4542
4543 return R128Idx < 0 || !Inst.getOperand(i: R128Idx).getImm();
4544}
4545
4546static bool IsRevOpcode(const unsigned Opcode)
4547{
4548 switch (Opcode) {
4549 case AMDGPU::V_SUBREV_F32_e32:
4550 case AMDGPU::V_SUBREV_F32_e64:
4551 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4552 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4553 case AMDGPU::V_SUBREV_F32_e32_vi:
4554 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4555 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4556 case AMDGPU::V_SUBREV_F32_e64_vi:
4557
4558 case AMDGPU::V_SUBREV_CO_U32_e32:
4559 case AMDGPU::V_SUBREV_CO_U32_e64:
4560 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4561 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4562
4563 case AMDGPU::V_SUBBREV_U32_e32:
4564 case AMDGPU::V_SUBBREV_U32_e64:
4565 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4566 case AMDGPU::V_SUBBREV_U32_e32_vi:
4567 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4568 case AMDGPU::V_SUBBREV_U32_e64_vi:
4569
4570 case AMDGPU::V_SUBREV_U32_e32:
4571 case AMDGPU::V_SUBREV_U32_e64:
4572 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4573 case AMDGPU::V_SUBREV_U32_e32_vi:
4574 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4575 case AMDGPU::V_SUBREV_U32_e64_vi:
4576
4577 case AMDGPU::V_SUBREV_F16_e32:
4578 case AMDGPU::V_SUBREV_F16_e64:
4579 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4580 case AMDGPU::V_SUBREV_F16_e32_vi:
4581 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4582 case AMDGPU::V_SUBREV_F16_e64_vi:
4583
4584 case AMDGPU::V_SUBREV_U16_e32:
4585 case AMDGPU::V_SUBREV_U16_e64:
4586 case AMDGPU::V_SUBREV_U16_e32_vi:
4587 case AMDGPU::V_SUBREV_U16_e64_vi:
4588
4589 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4590 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4591 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4592
4593 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4594 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4595
4596 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4597 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4598
4599 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4600 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4601
4602 case AMDGPU::V_LSHRREV_B32_e32:
4603 case AMDGPU::V_LSHRREV_B32_e64:
4604 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4605 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4606 case AMDGPU::V_LSHRREV_B32_e32_vi:
4607 case AMDGPU::V_LSHRREV_B32_e64_vi:
4608 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4609 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4610
4611 case AMDGPU::V_ASHRREV_I32_e32:
4612 case AMDGPU::V_ASHRREV_I32_e64:
4613 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4614 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4615 case AMDGPU::V_ASHRREV_I32_e32_vi:
4616 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4617 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4618 case AMDGPU::V_ASHRREV_I32_e64_vi:
4619
4620 case AMDGPU::V_LSHLREV_B32_e32:
4621 case AMDGPU::V_LSHLREV_B32_e64:
4622 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4623 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4624 case AMDGPU::V_LSHLREV_B32_e32_vi:
4625 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4626 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4627 case AMDGPU::V_LSHLREV_B32_e64_vi:
4628
4629 case AMDGPU::V_LSHLREV_B16_e32:
4630 case AMDGPU::V_LSHLREV_B16_e64:
4631 case AMDGPU::V_LSHLREV_B16_e32_vi:
4632 case AMDGPU::V_LSHLREV_B16_e64_vi:
4633 case AMDGPU::V_LSHLREV_B16_gfx10:
4634
4635 case AMDGPU::V_LSHRREV_B16_e32:
4636 case AMDGPU::V_LSHRREV_B16_e64:
4637 case AMDGPU::V_LSHRREV_B16_e32_vi:
4638 case AMDGPU::V_LSHRREV_B16_e64_vi:
4639 case AMDGPU::V_LSHRREV_B16_gfx10:
4640
4641 case AMDGPU::V_ASHRREV_I16_e32:
4642 case AMDGPU::V_ASHRREV_I16_e64:
4643 case AMDGPU::V_ASHRREV_I16_e32_vi:
4644 case AMDGPU::V_ASHRREV_I16_e64_vi:
4645 case AMDGPU::V_ASHRREV_I16_gfx10:
4646
4647 case AMDGPU::V_LSHLREV_B64_e64:
4648 case AMDGPU::V_LSHLREV_B64_gfx10:
4649 case AMDGPU::V_LSHLREV_B64_vi:
4650
4651 case AMDGPU::V_LSHRREV_B64_e64:
4652 case AMDGPU::V_LSHRREV_B64_gfx10:
4653 case AMDGPU::V_LSHRREV_B64_vi:
4654
4655 case AMDGPU::V_ASHRREV_I64_e64:
4656 case AMDGPU::V_ASHRREV_I64_gfx10:
4657 case AMDGPU::V_ASHRREV_I64_vi:
4658
4659 case AMDGPU::V_PK_LSHLREV_B16:
4660 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4661 case AMDGPU::V_PK_LSHLREV_B16_vi:
4662
4663 case AMDGPU::V_PK_LSHRREV_B16:
4664 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4665 case AMDGPU::V_PK_LSHRREV_B16_vi:
4666 case AMDGPU::V_PK_ASHRREV_I16:
4667 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4668 case AMDGPU::V_PK_ASHRREV_I16_vi:
4669 return true;
4670 default:
4671 return false;
4672 }
4673}
4674
4675bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4676 const OperandVector &Operands) {
4677 using namespace SIInstrFlags;
4678 const unsigned Opcode = Inst.getOpcode();
4679 const MCInstrDesc &Desc = MII.get(Opcode);
4680
4681 // lds_direct register is defined so that it can be used
4682 // with 9-bit operands only. Ignore encodings which do not accept these.
4683 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4684 if ((Desc.TSFlags & Enc) == 0)
4685 return true;
4686
4687 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4688 auto SrcIdx = getNamedOperandIdx(Opcode, Name: SrcName);
4689 if (SrcIdx == -1)
4690 break;
4691 const auto &Src = Inst.getOperand(i: SrcIdx);
4692 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4693
4694 if (isGFX90A() || isGFX11Plus()) {
4695 Error(L: getOperandLoc(Operands, MCOpIdx: SrcIdx),
4696 Msg: "lds_direct is not supported on this GPU");
4697 return false;
4698 }
4699
4700 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4701 Error(L: getOperandLoc(Operands, MCOpIdx: SrcIdx),
4702 Msg: "lds_direct cannot be used with this instruction");
4703 return false;
4704 }
4705
4706 if (SrcName != OpName::src0) {
4707 Error(L: getOperandLoc(Operands, MCOpIdx: SrcIdx),
4708 Msg: "lds_direct may be used as src0 only");
4709 return false;
4710 }
4711 }
4712 }
4713
4714 return true;
4715}
4716
4717SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4718 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4719 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4720 if (Op.isFlatOffset())
4721 return Op.getStartLoc();
4722 }
4723 return getLoc();
4724}
4725
4726bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4727 const OperandVector &Operands) {
4728 auto Opcode = Inst.getOpcode();
4729 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4730 if (OpNum == -1)
4731 return true;
4732
4733 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4734 if ((TSFlags & SIInstrFlags::FLAT))
4735 return validateFlatOffset(Inst, Operands);
4736
4737 if ((TSFlags & SIInstrFlags::SMRD))
4738 return validateSMEMOffset(Inst, Operands);
4739
4740 const auto &Op = Inst.getOperand(i: OpNum);
4741 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4742 if (isGFX12Plus() &&
4743 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4744 const unsigned OffsetSize = 24;
4745 if (!isUIntN(N: OffsetSize - 1, x: Op.getImm())) {
4746 Error(L: getFlatOffsetLoc(Operands),
4747 Msg: Twine("expected a ") + Twine(OffsetSize - 1) +
4748 "-bit unsigned offset for buffer ops");
4749 return false;
4750 }
4751 } else {
4752 const unsigned OffsetSize = 16;
4753 if (!isUIntN(N: OffsetSize, x: Op.getImm())) {
4754 Error(L: getFlatOffsetLoc(Operands),
4755 Msg: Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4756 return false;
4757 }
4758 }
4759 return true;
4760}
4761
4762bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4763 const OperandVector &Operands) {
4764 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4765 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4766 return true;
4767
4768 auto Opcode = Inst.getOpcode();
4769 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4770 assert(OpNum != -1);
4771
4772 const auto &Op = Inst.getOperand(i: OpNum);
4773 if (!hasFlatOffsets() && Op.getImm() != 0) {
4774 Error(L: getFlatOffsetLoc(Operands),
4775 Msg: "flat offset modifier is not supported on this GPU");
4776 return false;
4777 }
4778
4779 // For pre-GFX12 FLAT instructions the offset must be positive;
4780 // MSB is ignored and forced to zero.
4781 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(ST: getSTI());
4782 bool AllowNegative =
4783 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4784 isGFX12Plus();
4785 if (!isIntN(N: OffsetSize, x: Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4786 Error(L: getFlatOffsetLoc(Operands),
4787 Msg: Twine("expected a ") +
4788 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4789 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4790 return false;
4791 }
4792
4793 return true;
4794}
4795
4796SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4797 // Start with second operand because SMEM Offset cannot be dst or src0.
4798 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4799 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4800 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4801 return Op.getStartLoc();
4802 }
4803 return getLoc();
4804}
4805
4806bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4807 const OperandVector &Operands) {
4808 if (isCI() || isSI())
4809 return true;
4810
4811 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4812 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4813 return true;
4814
4815 auto Opcode = Inst.getOpcode();
4816 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4817 if (OpNum == -1)
4818 return true;
4819
4820 const auto &Op = Inst.getOperand(i: OpNum);
4821 if (!Op.isImm())
4822 return true;
4823
4824 uint64_t Offset = Op.getImm();
4825 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opc: Opcode);
4826 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(ST: getSTI(), EncodedOffset: Offset) ||
4827 AMDGPU::isLegalSMRDEncodedSignedOffset(ST: getSTI(), EncodedOffset: Offset, IsBuffer))
4828 return true;
4829
4830 Error(L: getSMEMOffsetLoc(Operands),
4831 Msg: isGFX12Plus() && IsBuffer
4832 ? "expected a 23-bit unsigned offset for buffer ops"
4833 : isGFX12Plus() ? "expected a 24-bit signed offset"
4834 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4835 : "expected a 21-bit signed offset");
4836
4837 return false;
4838}
4839
4840bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4841 const OperandVector &Operands) {
4842 unsigned Opcode = Inst.getOpcode();
4843 const MCInstrDesc &Desc = MII.get(Opcode);
4844 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4845 return true;
4846
4847 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::src0);
4848 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::src1);
4849
4850 const int OpIndices[] = { Src0Idx, Src1Idx };
4851
4852 unsigned NumExprs = 0;
4853 unsigned NumLiterals = 0;
4854 int64_t LiteralValue;
4855
4856 for (int OpIdx : OpIndices) {
4857 if (OpIdx == -1) break;
4858
4859 const MCOperand &MO = Inst.getOperand(i: OpIdx);
4860 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4861 if (AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx)) {
4862 bool IsLit = false;
4863 std::optional<int64_t> Imm;
4864 if (MO.isImm()) {
4865 Imm = MO.getImm();
4866 } else if (MO.isExpr()) {
4867 if (isLitExpr(Expr: MO.getExpr())) {
4868 IsLit = true;
4869 Imm = getLitValue(Expr: MO.getExpr());
4870 }
4871 } else {
4872 continue;
4873 }
4874
4875 if (!Imm.has_value()) {
4876 ++NumExprs;
4877 } else if (!isInlineConstant(Inst, OpIdx)) {
4878 auto OpType = static_cast<AMDGPU::OperandType>(
4879 Desc.operands()[OpIdx].OperandType);
4880 int64_t Value = encode32BitLiteral(Imm: *Imm, Type: OpType, IsLit);
4881 if (NumLiterals == 0 || LiteralValue != Value) {
4882 LiteralValue = Value;
4883 ++NumLiterals;
4884 }
4885 }
4886 }
4887 }
4888
4889 if (NumLiterals + NumExprs <= 1)
4890 return true;
4891
4892 Error(L: getOperandLoc(Operands, MCOpIdx: Src1Idx),
4893 Msg: "only one unique literal operand is allowed");
4894 return false;
4895}
4896
4897bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4898 const unsigned Opc = Inst.getOpcode();
4899 if (isPermlane16(Opc)) {
4900 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4901 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4902
4903 if (OpSel & ~3)
4904 return false;
4905 }
4906
4907 uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags;
4908
4909 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4910 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4911 if (OpSelIdx != -1) {
4912 if (Inst.getOperand(i: OpSelIdx).getImm() != 0)
4913 return false;
4914 }
4915 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
4916 if (OpSelHiIdx != -1) {
4917 if (Inst.getOperand(i: OpSelHiIdx).getImm() != -1)
4918 return false;
4919 }
4920 }
4921
4922 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4923 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4924 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4925 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4926 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4927 if (OpSel & 3)
4928 return false;
4929 }
4930
4931 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4932 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4933 // the first SGPR and use it for both the low and high operations.
4934 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4935 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4936 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
4937 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4938 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
4939
4940 const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4941 const MCOperand &Src1 = Inst.getOperand(i: Src1Idx);
4942 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4943 unsigned OpSelHi = Inst.getOperand(i: OpSelHiIdx).getImm();
4944
4945 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4946
4947 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4948 unsigned Mask = 1U << Index;
4949 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4950 };
4951
4952 if (Src0.isReg() && isSGPR(Reg: Src0.getReg(), TRI) &&
4953 !VerifyOneSGPR(/*Index=*/0))
4954 return false;
4955 if (Src1.isReg() && isSGPR(Reg: Src1.getReg(), TRI) &&
4956 !VerifyOneSGPR(/*Index=*/1))
4957 return false;
4958
4959 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2);
4960 if (Src2Idx != -1) {
4961 const MCOperand &Src2 = Inst.getOperand(i: Src2Idx);
4962 if (Src2.isReg() && isSGPR(Reg: Src2.getReg(), TRI) &&
4963 !VerifyOneSGPR(/*Index=*/2))
4964 return false;
4965 }
4966 }
4967
4968 return true;
4969}
4970
4971bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4972 if (!hasTrue16Insts())
4973 return true;
4974 const MCRegisterInfo *MRI = getMRI();
4975 const unsigned Opc = Inst.getOpcode();
4976 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4977 if (OpSelIdx == -1)
4978 return true;
4979 unsigned OpSelOpValue = Inst.getOperand(i: OpSelIdx).getImm();
4980 // If the value is 0 we could have a default OpSel Operand, so conservatively
4981 // allow it.
4982 if (OpSelOpValue == 0)
4983 return true;
4984 unsigned OpCount = 0;
4985 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4986 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4987 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: OpName);
4988 if (OpIdx == -1)
4989 continue;
4990 const MCOperand &Op = Inst.getOperand(i: OpIdx);
4991 if (Op.isReg() &&
4992 MRI->getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: Op.getReg())) {
4993 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Reg: Op.getReg(), MRI: *MRI);
4994 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4995 if (OpSelOpIsHi != VGPRSuffixIsHi)
4996 return false;
4997 }
4998 ++OpCount;
4999 }
5000
5001 return true;
5002}
5003
5004bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5005 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5006
5007 const unsigned Opc = Inst.getOpcode();
5008 uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags;
5009
5010 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5011 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5012 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5013 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5014 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5015 !(TSFlags & SIInstrFlags::IsSWMMAC))
5016 return true;
5017
5018 int NegIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName);
5019 if (NegIdx == -1)
5020 return true;
5021
5022 unsigned Neg = Inst.getOperand(i: NegIdx).getImm();
5023
5024 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5025 // on some src operands but not allowed on other.
5026 // It is convenient that such instructions don't have src_modifiers operand
5027 // for src operands that don't allow neg because they also don't allow opsel.
5028
5029 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5030 AMDGPU::OpName::src1_modifiers,
5031 AMDGPU::OpName::src2_modifiers};
5032
5033 for (unsigned i = 0; i < 3; ++i) {
5034 if (!AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: SrcMods[i])) {
5035 if (Neg & (1 << i))
5036 return false;
5037 }
5038 }
5039
5040 return true;
5041}
5042
5043bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5044 const OperandVector &Operands) {
5045 const unsigned Opc = Inst.getOpcode();
5046 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dpp_ctrl);
5047 if (DppCtrlIdx >= 0) {
5048 unsigned DppCtrl = Inst.getOperand(i: DppCtrlIdx).getImm();
5049
5050 if (!AMDGPU::isLegalDPALU_DPPControl(ST: getSTI(), DC: DppCtrl) &&
5051 AMDGPU::isDPALU_DPP(OpDesc: MII.get(Opcode: Opc), MII, ST: getSTI())) {
5052 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5053 // only on GFX12.
5054 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyDppCtrl, Operands);
5055 Error(L: S, Msg: isGFX12() ? "DP ALU dpp only supports row_share"
5056 : "DP ALU dpp only supports row_newbcast");
5057 return false;
5058 }
5059 }
5060
5061 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dpp8);
5062 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5063
5064 if (IsDPP && !hasDPPSrc1SGPR(STI: getSTI())) {
5065 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
5066 if (Src1Idx >= 0) {
5067 const MCOperand &Src1 = Inst.getOperand(i: Src1Idx);
5068 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5069 if (Src1.isReg() && isSGPR(Reg: mc2PseudoReg(Reg: Src1.getReg()), TRI)) {
5070 Error(L: getOperandLoc(Operands, MCOpIdx: Src1Idx),
5071 Msg: "invalid operand for instruction");
5072 return false;
5073 }
5074 if (Src1.isImm()) {
5075 Error(L: getInstLoc(Operands),
5076 Msg: "src1 immediate operand invalid for instruction");
5077 return false;
5078 }
5079 }
5080 }
5081
5082 return true;
5083}
5084
5085// Check if VCC register matches wavefront size
5086bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5087 return (Reg == AMDGPU::VCC && isWave64()) ||
5088 (Reg == AMDGPU::VCC_LO && isWave32());
5089}
5090
5091// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5092bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5093 const OperandVector &Operands) {
5094 unsigned Opcode = Inst.getOpcode();
5095 const MCInstrDesc &Desc = MII.get(Opcode);
5096 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, Name: OpName::imm) != -1;
5097 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5098 !HasMandatoryLiteral && !isVOPD(Opc: Opcode))
5099 return true;
5100
5101 OperandIndices OpIndices = getSrcOperandIndices(Opcode, AddMandatoryLiterals: HasMandatoryLiteral);
5102
5103 std::optional<unsigned> LiteralOpIdx;
5104 std::optional<uint64_t> LiteralValue;
5105
5106 for (int OpIdx : OpIndices) {
5107 if (OpIdx == -1)
5108 continue;
5109
5110 const MCOperand &MO = Inst.getOperand(i: OpIdx);
5111 if (!MO.isImm() && !MO.isExpr())
5112 continue;
5113 if (!isSISrcOperand(Desc, OpNo: OpIdx))
5114 continue;
5115
5116 std::optional<int64_t> Imm;
5117 if (MO.isImm())
5118 Imm = MO.getImm();
5119 else if (MO.isExpr() && isLitExpr(Expr: MO.getExpr()))
5120 Imm = getLitValue(Expr: MO.getExpr());
5121
5122 bool IsAnotherLiteral = false;
5123 if (!Imm.has_value()) {
5124 // Literal value not known, so we conservately assume it's different.
5125 IsAnotherLiteral = true;
5126 } else if (!isInlineConstant(Inst, OpIdx)) {
5127 uint64_t Value = *Imm;
5128 bool IsForcedFP64 =
5129 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5130 (Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_REG_IMM_FP64 &&
5131 HasMandatoryLiteral);
5132 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpNo: OpIdx)) &&
5133 AMDGPU::getOperandSize(OpInfo: Desc.operands()[OpIdx]) == 8;
5134 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Val: Value, IsFP64);
5135
5136 if (!IsValid32Op && !isInt<32>(x: Value) && !isUInt<32>(x: Value) &&
5137 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5138 Error(L: getOperandLoc(Operands, MCOpIdx: OpIdx),
5139 Msg: "invalid operand for instruction");
5140 return false;
5141 }
5142
5143 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5144 Value = Hi_32(Value);
5145
5146 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5147 LiteralValue = Value;
5148 }
5149
5150 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5151 !getFeatureBits()[FeatureVOP3Literal]) {
5152 Error(L: getOperandLoc(Operands, MCOpIdx: OpIdx),
5153 Msg: "literal operands are not supported");
5154 return false;
5155 }
5156
5157 if (LiteralOpIdx && IsAnotherLiteral) {
5158 Error(L: getLaterLoc(a: getOperandLoc(Operands, MCOpIdx: OpIdx),
5159 b: getOperandLoc(Operands, MCOpIdx: *LiteralOpIdx)),
5160 Msg: "only one unique literal operand is allowed");
5161 return false;
5162 }
5163
5164 if (IsAnotherLiteral)
5165 LiteralOpIdx = OpIdx;
5166 }
5167
5168 return true;
5169}
5170
5171// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5172static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5173 const MCRegisterInfo *MRI) {
5174 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name);
5175 if (OpIdx < 0)
5176 return -1;
5177
5178 const MCOperand &Op = Inst.getOperand(i: OpIdx);
5179 if (!Op.isReg())
5180 return -1;
5181
5182 MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
5183 auto Reg = Sub ? Sub : Op.getReg();
5184 const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID);
5185 return AGPR32.contains(Reg) ? 1 : 0;
5186}
5187
5188bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5189 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
5190 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5191 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
5192 SIInstrFlags::DS)) == 0)
5193 return true;
5194
5195 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5196 ? AMDGPU::OpName::data0
5197 : AMDGPU::OpName::vdata;
5198
5199 const MCRegisterInfo *MRI = getMRI();
5200 int DstAreg = IsAGPROperand(Inst, Name: AMDGPU::OpName::vdst, MRI);
5201 int DataAreg = IsAGPROperand(Inst, Name: DataName, MRI);
5202
5203 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5204 int Data2Areg = IsAGPROperand(Inst, Name: AMDGPU::OpName::data1, MRI);
5205 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5206 return false;
5207 }
5208
5209 auto FB = getFeatureBits();
5210 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5211 if (DataAreg < 0 || DstAreg < 0)
5212 return true;
5213 return DstAreg == DataAreg;
5214 }
5215
5216 return DstAreg < 1 && DataAreg < 1;
5217}
5218
5219bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5220 auto FB = getFeatureBits();
5221 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5222 return true;
5223
5224 unsigned Opc = Inst.getOpcode();
5225 const MCRegisterInfo *MRI = getMRI();
5226 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5227 // unaligned VGPR. All others only allow even aligned VGPRs.
5228 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5229 return true;
5230
5231 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5232 switch (Opc) {
5233 default:
5234 break;
5235 case AMDGPU::DS_LOAD_TR6_B96:
5236 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5237 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5238 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5239 return true;
5240 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5241 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5242 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5243 // allows unaligned VGPR for vdst, but other operands still only allow
5244 // even aligned VGPRs.
5245 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vaddr);
5246 if (VAddrIdx != -1) {
5247 const MCOperand &Op = Inst.getOperand(i: VAddrIdx);
5248 MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
5249 if ((Sub - AMDGPU::VGPR0) & 1)
5250 return false;
5251 }
5252 return true;
5253 }
5254 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5255 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5256 return true;
5257 }
5258 }
5259
5260 const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID);
5261 const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID);
5262 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5263 const MCOperand &Op = Inst.getOperand(i: I);
5264 if (!Op.isReg())
5265 continue;
5266
5267 MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
5268 if (!Sub)
5269 continue;
5270
5271 if (VGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5272 return false;
5273 if (AGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5274 return false;
5275 }
5276
5277 return true;
5278}
5279
5280SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5281 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5282 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5283 if (Op.isBLGP())
5284 return Op.getStartLoc();
5285 }
5286 return SMLoc();
5287}
5288
5289bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5290 const OperandVector &Operands) {
5291 unsigned Opc = Inst.getOpcode();
5292 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
5293 if (BlgpIdx == -1)
5294 return true;
5295 SMLoc BLGPLoc = getBLGPLoc(Operands);
5296 if (!BLGPLoc.isValid())
5297 return true;
5298 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with(Prefix: "neg:");
5299 auto FB = getFeatureBits();
5300 bool UsesNeg = false;
5301 if (FB[AMDGPU::FeatureGFX940Insts]) {
5302 switch (Opc) {
5303 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5304 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5305 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5306 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5307 UsesNeg = true;
5308 }
5309 }
5310
5311 if (IsNeg == UsesNeg)
5312 return true;
5313
5314 Error(L: BLGPLoc,
5315 Msg: UsesNeg ? "invalid modifier: blgp is not supported"
5316 : "invalid modifier: neg is not supported");
5317
5318 return false;
5319}
5320
5321bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5322 const OperandVector &Operands) {
5323 if (!isGFX11Plus())
5324 return true;
5325
5326 unsigned Opc = Inst.getOpcode();
5327 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5328 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5329 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5330 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5331 return true;
5332
5333 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::sdst);
5334 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5335 auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src0Idx).getReg());
5336 if (Reg == AMDGPU::SGPR_NULL)
5337 return true;
5338
5339 Error(L: getOperandLoc(Operands, MCOpIdx: Src0Idx), Msg: "src0 must be null");
5340 return false;
5341}
5342
5343bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5344 const OperandVector &Operands) {
5345 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
5346 if ((TSFlags & SIInstrFlags::DS) == 0)
5347 return true;
5348 if (TSFlags & SIInstrFlags::GWS)
5349 return validateGWS(Inst, Operands);
5350 // Only validate GDS for non-GWS instructions.
5351 if (hasGDS())
5352 return true;
5353 int GDSIdx =
5354 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::gds);
5355 if (GDSIdx < 0)
5356 return true;
5357 unsigned GDS = Inst.getOperand(i: GDSIdx).getImm();
5358 if (GDS) {
5359 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyGDS, Operands);
5360 Error(L: S, Msg: "gds modifier is not supported on this GPU");
5361 return false;
5362 }
5363 return true;
5364}
5365
5366// gfx90a has an undocumented limitation:
5367// DS_GWS opcodes must use even aligned registers.
5368bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5369 const OperandVector &Operands) {
5370 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5371 return true;
5372
5373 int Opc = Inst.getOpcode();
5374 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5375 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5376 return true;
5377
5378 const MCRegisterInfo *MRI = getMRI();
5379 const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID);
5380 int Data0Pos =
5381 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::data0);
5382 assert(Data0Pos != -1);
5383 auto Reg = Inst.getOperand(i: Data0Pos).getReg();
5384 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5385 if (RegIdx & 1) {
5386 Error(L: getOperandLoc(Operands, MCOpIdx: Data0Pos), Msg: "vgpr must be even aligned");
5387 return false;
5388 }
5389
5390 return true;
5391}
5392
5393bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5394 const OperandVector &Operands,
5395 SMLoc IDLoc) {
5396 int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(),
5397 Name: AMDGPU::OpName::cpol);
5398 if (CPolPos == -1)
5399 return true;
5400
5401 unsigned CPol = Inst.getOperand(i: CPolPos).getImm();
5402
5403 if (!isGFX1250Plus()) {
5404 if (CPol & CPol::SCAL) {
5405 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5406 StringRef CStr(S.getPointer());
5407 S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scale_offset")]);
5408 Error(L: S, Msg: "scale_offset is not supported on this GPU");
5409 }
5410 if (CPol & CPol::NV) {
5411 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5412 StringRef CStr(S.getPointer());
5413 S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "nv")]);
5414 Error(L: S, Msg: "nv is not supported on this GPU");
5415 }
5416 }
5417
5418 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Opcode: Inst.getOpcode())) {
5419 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5420 StringRef CStr(S.getPointer());
5421 S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scale_offset")]);
5422 Error(L: S, Msg: "scale_offset is not supported for this instruction");
5423 }
5424
5425 if (isGFX12Plus())
5426 return validateTHAndScopeBits(Inst, Operands, CPol);
5427
5428 uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
5429 if (TSFlags & SIInstrFlags::SMRD) {
5430 if (CPol && (isSI() || isCI())) {
5431 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5432 Error(L: S, Msg: "cache policy is not supported for SMRD instructions");
5433 return false;
5434 }
5435 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5436 Error(L: IDLoc, Msg: "invalid cache policy for SMEM instruction");
5437 return false;
5438 }
5439 }
5440
5441 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5442 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5443 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
5444 SIInstrFlags::FLAT;
5445 if (!(TSFlags & AllowSCCModifier)) {
5446 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5447 StringRef CStr(S.getPointer());
5448 S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scc")]);
5449 Error(L: S,
5450 Msg: "scc modifier is not supported for this instruction on this GPU");
5451 return false;
5452 }
5453 }
5454
5455 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
5456 return true;
5457
5458 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5459 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5460 Error(L: IDLoc, Msg: isGFX940() ? "instruction must use sc0"
5461 : "instruction must use glc");
5462 return false;
5463 }
5464 } else {
5465 if (CPol & CPol::GLC) {
5466 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5467 StringRef CStr(S.getPointer());
5468 S = SMLoc::getFromPointer(
5469 Ptr: &CStr.data()[CStr.find(Str: isGFX940() ? "sc0" : "glc")]);
5470 Error(L: S, Msg: isGFX940() ? "instruction must not use sc0"
5471 : "instruction must not use glc");
5472 return false;
5473 }
5474 }
5475
5476 return true;
5477}
5478
5479bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5480 const OperandVector &Operands,
5481 const unsigned CPol) {
5482 const unsigned TH = CPol & AMDGPU::CPol::TH;
5483 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5484
5485 const unsigned Opcode = Inst.getOpcode();
5486 const MCInstrDesc &TID = MII.get(Opcode);
5487
5488 auto PrintError = [&](StringRef Msg) {
5489 SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5490 Error(L: S, Msg);
5491 return false;
5492 };
5493
5494 if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5495 (TID.TSFlags & SIInstrFlags::IsAtomicNoRet))
5496 return PrintError("th:TH_ATOMIC_RETURN requires a destination operand");
5497
5498 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5499 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
5500 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
5501 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5502
5503 if (TH == 0)
5504 return true;
5505
5506 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5507 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5508 (TH == AMDGPU::CPol::TH_NT_HT)))
5509 return PrintError("invalid th value for SMEM instruction");
5510
5511 if (TH == AMDGPU::CPol::TH_BYPASS) {
5512 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5513 CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
5514 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5515 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
5516 return PrintError("scope and th combination is not valid");
5517 }
5518
5519 unsigned THType = AMDGPU::getTemporalHintType(TID);
5520 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5521 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5522 return PrintError("invalid th value for atomic instructions");
5523 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5524 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5525 return PrintError("invalid th value for store instructions");
5526 } else {
5527 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5528 return PrintError("invalid th value for load instructions");
5529 }
5530
5531 return true;
5532}
5533
5534bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5535 const OperandVector &Operands) {
5536 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
5537 if (Desc.mayStore() &&
5538 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
5539 SMLoc Loc = getImmLoc(Type: AMDGPUOperand::ImmTyTFE, Operands);
5540 if (Loc != getInstLoc(Operands)) {
5541 Error(L: Loc, Msg: "TFE modifier has no meaning for store instructions");
5542 return false;
5543 }
5544 }
5545
5546 return true;
5547}
5548
5549bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5550 const OperandVector &Operands) {
5551 unsigned Opc = Inst.getOpcode();
5552 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5553 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
5554
5555 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5556 int FmtIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: FmtOp);
5557 if (FmtIdx == -1)
5558 return true;
5559 unsigned Fmt = Inst.getOperand(i: FmtIdx).getImm();
5560 int SrcIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: SrcOp);
5561 unsigned RegSize =
5562 TRI->getRegClass(i: MII.getOpRegClassID(OpInfo: Desc.operands()[SrcIdx], HwModeId: HwMode))
5563 .getSizeInBits();
5564
5565 if (RegSize == AMDGPU::wmmaScaleF8F6F4FormatToNumRegs(Fmt) * 32)
5566 return true;
5567
5568 Error(L: getOperandLoc(Operands, MCOpIdx: SrcIdx),
5569 Msg: "wrong register tuple size for " +
5570 Twine(WMMAMods::ModMatrixFmt[Fmt]));
5571 return false;
5572 };
5573
5574 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5575 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5576}
5577
5578bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5579 const OperandVector &Operands) {
5580 if (!validateLdsDirect(Inst, Operands))
5581 return false;
5582 if (!validateTrue16OpSel(Inst)) {
5583 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands),
5584 Msg: "op_sel operand conflicts with 16-bit operand suffix");
5585 return false;
5586 }
5587 if (!validateSOPLiteral(Inst, Operands))
5588 return false;
5589 if (!validateVOPLiteral(Inst, Operands)) {
5590 return false;
5591 }
5592 if (!validateConstantBusLimitations(Inst, Operands)) {
5593 return false;
5594 }
5595 if (!validateVOPD(Inst, Operands)) {
5596 return false;
5597 }
5598 if (!validateIntClampSupported(Inst)) {
5599 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyClamp, Operands),
5600 Msg: "integer clamping is not supported on this GPU");
5601 return false;
5602 }
5603 if (!validateOpSel(Inst)) {
5604 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands),
5605 Msg: "invalid op_sel operand");
5606 return false;
5607 }
5608 if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_lo)) {
5609 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegLo, Operands),
5610 Msg: "invalid neg_lo operand");
5611 return false;
5612 }
5613 if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_hi)) {
5614 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegHi, Operands),
5615 Msg: "invalid neg_hi operand");
5616 return false;
5617 }
5618 if (!validateDPP(Inst, Operands)) {
5619 return false;
5620 }
5621 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5622 if (!validateMIMGD16(Inst)) {
5623 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands),
5624 Msg: "d16 modifier is not supported on this GPU");
5625 return false;
5626 }
5627 if (!validateMIMGDim(Inst, Operands)) {
5628 Error(L: IDLoc, Msg: "missing dim operand");
5629 return false;
5630 }
5631 if (!validateTensorR128(Inst)) {
5632 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands),
5633 Msg: "instruction must set modifier r128=0");
5634 return false;
5635 }
5636 if (!validateMIMGMSAA(Inst)) {
5637 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDim, Operands),
5638 Msg: "invalid dim; must be MSAA type");
5639 return false;
5640 }
5641 if (!validateMIMGDataSize(Inst, IDLoc)) {
5642 return false;
5643 }
5644 if (!validateMIMGAddrSize(Inst, IDLoc))
5645 return false;
5646 if (!validateMIMGAtomicDMask(Inst)) {
5647 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands),
5648 Msg: "invalid atomic image dmask");
5649 return false;
5650 }
5651 if (!validateMIMGGatherDMask(Inst)) {
5652 Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands),
5653 Msg: "invalid image_gather dmask: only one bit must be set");
5654 return false;
5655 }
5656 if (!validateMovrels(Inst, Operands)) {
5657 return false;
5658 }
5659 if (!validateOffset(Inst, Operands)) {
5660 return false;
5661 }
5662 if (!validateMAIAccWrite(Inst, Operands)) {
5663 return false;
5664 }
5665 if (!validateMAISrc2(Inst, Operands)) {
5666 return false;
5667 }
5668 if (!validateMFMA(Inst, Operands)) {
5669 return false;
5670 }
5671 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5672 return false;
5673 }
5674
5675 if (!validateAGPRLdSt(Inst)) {
5676 Error(L: IDLoc, Msg: getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5677 ? "invalid register class: data and dst should be all VGPR or AGPR"
5678 : "invalid register class: agpr loads and stores not supported on this GPU"
5679 );
5680 return false;
5681 }
5682 if (!validateVGPRAlign(Inst)) {
5683 Error(L: IDLoc,
5684 Msg: "invalid register class: vgpr tuples must be 64 bit aligned");
5685 return false;
5686 }
5687 if (!validateDS(Inst, Operands)) {
5688 return false;
5689 }
5690
5691 if (!validateBLGP(Inst, Operands)) {
5692 return false;
5693 }
5694
5695 if (!validateDivScale(Inst)) {
5696 Error(L: IDLoc, Msg: "ABS not allowed in VOP3B instructions");
5697 return false;
5698 }
5699 if (!validateWaitCnt(Inst, Operands)) {
5700 return false;
5701 }
5702 if (!validateTFE(Inst, Operands)) {
5703 return false;
5704 }
5705 if (!validateWMMA(Inst, Operands)) {
5706 return false;
5707 }
5708
5709 return true;
5710}
5711
5712static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5713 const FeatureBitset &FBS,
5714 unsigned VariantID = 0);
5715
5716static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5717 const FeatureBitset &AvailableFeatures,
5718 unsigned VariantID);
5719
5720bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5721 const FeatureBitset &FBS) {
5722 return isSupportedMnemo(Mnemo, FBS, Variants: getAllVariants());
5723}
5724
5725bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5726 const FeatureBitset &FBS,
5727 ArrayRef<unsigned> Variants) {
5728 for (auto Variant : Variants) {
5729 if (AMDGPUCheckMnemonic(Mnemonic: Mnemo, AvailableFeatures: FBS, VariantID: Variant))
5730 return true;
5731 }
5732
5733 return false;
5734}
5735
5736bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5737 SMLoc IDLoc) {
5738 FeatureBitset FBS = ComputeAvailableFeatures(FB: getFeatureBits());
5739
5740 // Check if requested instruction variant is supported.
5741 if (isSupportedMnemo(Mnemo, FBS, Variants: getMatchedVariants()))
5742 return false;
5743
5744 // This instruction is not supported.
5745 // Clear any other pending errors because they are no longer relevant.
5746 getParser().clearPendingErrors();
5747
5748 // Requested instruction variant is not supported.
5749 // Check if any other variants are supported.
5750 StringRef VariantName = getMatchedVariantName();
5751 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5752 return Error(L: IDLoc,
5753 Msg: Twine(VariantName,
5754 " variant of this instruction is not supported"));
5755 }
5756
5757 // Check if this instruction may be used with a different wavesize.
5758 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5759 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5760 // FIXME: Use getAvailableFeatures, and do not manually recompute
5761 FeatureBitset FeaturesWS32 = getFeatureBits();
5762 FeaturesWS32.flip(I: AMDGPU::FeatureWavefrontSize64)
5763 .flip(I: AMDGPU::FeatureWavefrontSize32);
5764 FeatureBitset AvailableFeaturesWS32 =
5765 ComputeAvailableFeatures(FB: FeaturesWS32);
5766
5767 if (isSupportedMnemo(Mnemo, FBS: AvailableFeaturesWS32, Variants: getMatchedVariants()))
5768 return Error(L: IDLoc, Msg: "instruction requires wavesize=32");
5769 }
5770
5771 // Finally check if this instruction is supported on any other GPU.
5772 if (isSupportedMnemo(Mnemo, FBS: FeatureBitset().set())) {
5773 return Error(L: IDLoc, Msg: "instruction not supported on this GPU");
5774 }
5775
5776 // Instruction not supported on any GPU. Probably a typo.
5777 std::string Suggestion = AMDGPUMnemonicSpellCheck(S: Mnemo, FBS);
5778 return Error(L: IDLoc, Msg: "invalid instruction" + Suggestion);
5779}
5780
5781static bool isInvalidVOPDY(const OperandVector &Operands,
5782 uint64_t InvalidOprIdx) {
5783 assert(InvalidOprIdx < Operands.size());
5784 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5785 if (Op.isToken() && InvalidOprIdx > 1) {
5786 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5787 return PrevOp.isToken() && PrevOp.getToken() == "::";
5788 }
5789 return false;
5790}
5791
5792bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5793 OperandVector &Operands,
5794 MCStreamer &Out,
5795 uint64_t &ErrorInfo,
5796 bool MatchingInlineAsm) {
5797 MCInst Inst;
5798 Inst.setLoc(IDLoc);
5799 unsigned Result = Match_Success;
5800 for (auto Variant : getMatchedVariants()) {
5801 uint64_t EI;
5802 auto R = MatchInstructionImpl(Operands, Inst, ErrorInfo&: EI, matchingInlineAsm: MatchingInlineAsm,
5803 VariantID: Variant);
5804 // We order match statuses from least to most specific. We use most specific
5805 // status as resulting
5806 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5807 if (R == Match_Success || R == Match_MissingFeature ||
5808 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5809 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5810 Result != Match_MissingFeature)) {
5811 Result = R;
5812 ErrorInfo = EI;
5813 }
5814 if (R == Match_Success)
5815 break;
5816 }
5817
5818 if (Result == Match_Success) {
5819 if (!validateInstruction(Inst, IDLoc, Operands)) {
5820 return true;
5821 }
5822 Out.emitInstruction(Inst, STI: getSTI());
5823 return false;
5824 }
5825
5826 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5827 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5828 return true;
5829 }
5830
5831 switch (Result) {
5832 default: break;
5833 case Match_MissingFeature:
5834 // It has been verified that the specified instruction
5835 // mnemonic is valid. A match was found but it requires
5836 // features which are not supported on this GPU.
5837 return Error(L: IDLoc, Msg: "operands are not valid for this GPU or mode");
5838
5839 case Match_InvalidOperand: {
5840 SMLoc ErrorLoc = IDLoc;
5841 if (ErrorInfo != ~0ULL) {
5842 if (ErrorInfo >= Operands.size()) {
5843 return Error(L: IDLoc, Msg: "too few operands for instruction");
5844 }
5845 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5846 if (ErrorLoc == SMLoc())
5847 ErrorLoc = IDLoc;
5848
5849 if (isInvalidVOPDY(Operands, InvalidOprIdx: ErrorInfo))
5850 return Error(L: ErrorLoc, Msg: "invalid VOPDY instruction");
5851 }
5852 return Error(L: ErrorLoc, Msg: "invalid operand for instruction");
5853 }
5854
5855 case Match_MnemonicFail:
5856 llvm_unreachable("Invalid instructions should have been handled already");
5857 }
5858 llvm_unreachable("Implement any new match types added!");
5859}
5860
5861bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5862 int64_t Tmp = -1;
5863 if (!isToken(Kind: AsmToken::Integer) && !isToken(Kind: AsmToken::Identifier)) {
5864 return true;
5865 }
5866 if (getParser().parseAbsoluteExpression(Res&: Tmp)) {
5867 return true;
5868 }
5869 Ret = static_cast<uint32_t>(Tmp);
5870 return false;
5871}
5872
5873bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5874 if (!getSTI().getTargetTriple().isAMDGCN())
5875 return TokError(Msg: "directive only supported for amdgcn architecture");
5876
5877 std::string TargetIDDirective;
5878 SMLoc TargetStart = getTok().getLoc();
5879 if (getParser().parseEscapedString(Data&: TargetIDDirective))
5880 return true;
5881
5882 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5883 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5884 return getParser().Error(L: TargetRange.Start,
5885 Msg: (Twine(".amdgcn_target directive's target id ") +
5886 Twine(TargetIDDirective) +
5887 Twine(" does not match the specified target id ") +
5888 Twine(getTargetStreamer().getTargetID()->toString())).str());
5889
5890 return false;
5891}
5892
5893bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5894 return Error(L: Range.Start, Msg: "value out of range", Range);
5895}
5896
5897bool AMDGPUAsmParser::calculateGPRBlocks(
5898 const FeatureBitset &Features, const MCExpr *VCCUsed,
5899 const MCExpr *FlatScrUsed, bool XNACKUsed,
5900 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5901 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5902 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5903 // TODO(scott.linder): These calculations are duplicated from
5904 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5905 IsaVersion Version = getIsaVersion(GPU: getSTI().getCPU());
5906 MCContext &Ctx = getContext();
5907
5908 const MCExpr *NumSGPRs = NextFreeSGPR;
5909 int64_t EvaluatedSGPRs;
5910
5911 if (Version.Major >= 10)
5912 NumSGPRs = MCConstantExpr::create(Value: 0, Ctx);
5913 else {
5914 unsigned MaxAddressableNumSGPRs =
5915 IsaInfo::getAddressableNumSGPRs(STI: &getSTI());
5916
5917 if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) && Version.Major >= 8 &&
5918 !Features.test(I: FeatureSGPRInitBug) &&
5919 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5920 return OutOfRangeError(Range: SGPRRange);
5921
5922 const MCExpr *ExtraSGPRs =
5923 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5924 NumSGPRs = MCBinaryExpr::createAdd(LHS: NumSGPRs, RHS: ExtraSGPRs, Ctx);
5925
5926 if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) &&
5927 (Version.Major <= 7 || Features.test(I: FeatureSGPRInitBug)) &&
5928 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5929 return OutOfRangeError(Range: SGPRRange);
5930
5931 if (Features.test(I: FeatureSGPRInitBug))
5932 NumSGPRs =
5933 MCConstantExpr::create(Value: IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);
5934 }
5935
5936 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5937 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5938 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5939 unsigned Granule) -> const MCExpr * {
5940 const MCExpr *OneConst = MCConstantExpr::create(Value: 1ul, Ctx);
5941 const MCExpr *GranuleConst = MCConstantExpr::create(Value: Granule, Ctx);
5942 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax(Args: {NumGPR, OneConst}, Ctx);
5943 const MCExpr *AlignToGPR =
5944 AMDGPUMCExpr::createAlignTo(Value: MaxNumGPR, Align: GranuleConst, Ctx);
5945 const MCExpr *DivGPR =
5946 MCBinaryExpr::createDiv(LHS: AlignToGPR, RHS: GranuleConst, Ctx);
5947 const MCExpr *SubGPR = MCBinaryExpr::createSub(LHS: DivGPR, RHS: OneConst, Ctx);
5948 return SubGPR;
5949 };
5950
5951 VGPRBlocks = GetNumGPRBlocks(
5952 NextFreeVGPR,
5953 IsaInfo::getVGPREncodingGranule(STI: &getSTI(), EnableWavefrontSize32));
5954 SGPRBlocks =
5955 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(STI: &getSTI()));
5956
5957 return false;
5958}
5959
5960bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5961 if (!getSTI().getTargetTriple().isAMDGCN())
5962 return TokError(Msg: "directive only supported for amdgcn architecture");
5963
5964 if (!isHsaAbi(STI: getSTI()))
5965 return TokError(Msg: "directive only supported for amdhsa OS");
5966
5967 StringRef KernelName;
5968 if (getParser().parseIdentifier(Res&: KernelName))
5969 return true;
5970
5971 AMDGPU::MCKernelDescriptor KD =
5972 AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
5973 STI: &getSTI(), Ctx&: getContext());
5974
5975 StringSet<> Seen;
5976
5977 IsaVersion IVersion = getIsaVersion(GPU: getSTI().getCPU());
5978
5979 const MCExpr *ZeroExpr = MCConstantExpr::create(Value: 0, Ctx&: getContext());
5980 const MCExpr *OneExpr = MCConstantExpr::create(Value: 1, Ctx&: getContext());
5981
5982 SMRange VGPRRange;
5983 const MCExpr *NextFreeVGPR = ZeroExpr;
5984 const MCExpr *AccumOffset = MCConstantExpr::create(Value: 0, Ctx&: getContext());
5985 const MCExpr *NamedBarCnt = ZeroExpr;
5986 uint64_t SharedVGPRCount = 0;
5987 uint64_t PreloadLength = 0;
5988 uint64_t PreloadOffset = 0;
5989 SMRange SGPRRange;
5990 const MCExpr *NextFreeSGPR = ZeroExpr;
5991
5992 // Count the number of user SGPRs implied from the enabled feature bits.
5993 unsigned ImpliedUserSGPRCount = 0;
5994
5995 // Track if the asm explicitly contains the directive for the user SGPR
5996 // count.
5997 std::optional<unsigned> ExplicitUserSGPRCount;
5998 const MCExpr *ReserveVCC = OneExpr;
5999 const MCExpr *ReserveFlatScr = OneExpr;
6000 std::optional<bool> EnableWavefrontSize32;
6001
6002 while (true) {
6003 while (trySkipToken(Kind: AsmToken::EndOfStatement));
6004
6005 StringRef ID;
6006 SMRange IDRange = getTok().getLocRange();
6007 if (!parseId(Val&: ID, ErrMsg: "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6008 return true;
6009
6010 if (ID == ".end_amdhsa_kernel")
6011 break;
6012
6013 if (!Seen.insert(key: ID).second)
6014 return TokError(Msg: ".amdhsa_ directives cannot be repeated");
6015
6016 SMLoc ValStart = getLoc();
6017 const MCExpr *ExprVal;
6018 if (getParser().parseExpression(Res&: ExprVal))
6019 return true;
6020 SMLoc ValEnd = getLoc();
6021 SMRange ValRange = SMRange(ValStart, ValEnd);
6022
6023 int64_t IVal = 0;
6024 uint64_t Val = IVal;
6025 bool EvaluatableExpr;
6026 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(Res&: IVal))) {
6027 if (IVal < 0)
6028 return OutOfRangeError(Range: ValRange);
6029 Val = IVal;
6030 }
6031
6032#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6033 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6034 return OutOfRangeError(RANGE); \
6035 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6036 getContext());
6037
6038// Some fields use the parsed value immediately which requires the expression to
6039// be solvable.
6040#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6041 if (!(RESOLVED)) \
6042 return Error(IDRange.Start, "directive should have resolvable expression", \
6043 IDRange);
6044
6045 if (ID == ".amdhsa_group_segment_fixed_size") {
6046 if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
6047 CHAR_BIT>(x: Val))
6048 return OutOfRangeError(Range: ValRange);
6049 KD.group_segment_fixed_size = ExprVal;
6050 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6051 if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
6052 CHAR_BIT>(x: Val))
6053 return OutOfRangeError(Range: ValRange);
6054 KD.private_segment_fixed_size = ExprVal;
6055 } else if (ID == ".amdhsa_kernarg_size") {
6056 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(x: Val))
6057 return OutOfRangeError(Range: ValRange);
6058 KD.kernarg_size = ExprVal;
6059 } else if (ID == ".amdhsa_user_sgpr_count") {
6060 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6061 ExplicitUserSGPRCount = Val;
6062 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6063 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6064 if (hasArchitectedFlatScratch())
6065 return Error(L: IDRange.Start,
6066 Msg: "directive is not supported with architected flat scratch",
6067 Range: IDRange);
6068 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6069 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6070 ExprVal, ValRange);
6071 if (Val)
6072 ImpliedUserSGPRCount += 4;
6073 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6074 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6075 if (!hasKernargPreload())
6076 return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
6077
6078 if (Val > getMaxNumUserSGPRs())
6079 return OutOfRangeError(Range: ValRange);
6080 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6081 ValRange);
6082 if (Val) {
6083 ImpliedUserSGPRCount += Val;
6084 PreloadLength = Val;
6085 }
6086 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6087 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6088 if (!hasKernargPreload())
6089 return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
6090
6091 if (Val >= 1024)
6092 return OutOfRangeError(Range: ValRange);
6093 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6094 ValRange);
6095 if (Val)
6096 PreloadOffset = Val;
6097 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6098 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6099 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6100 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6101 ValRange);
6102 if (Val)
6103 ImpliedUserSGPRCount += 2;
6104 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6105 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6106 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6107 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6108 ValRange);
6109 if (Val)
6110 ImpliedUserSGPRCount += 2;
6111 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6112 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6113 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6114 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6115 ExprVal, ValRange);
6116 if (Val)
6117 ImpliedUserSGPRCount += 2;
6118 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6119 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6120 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6121 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6122 ValRange);
6123 if (Val)
6124 ImpliedUserSGPRCount += 2;
6125 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6126 if (hasArchitectedFlatScratch())
6127 return Error(L: IDRange.Start,
6128 Msg: "directive is not supported with architected flat scratch",
6129 Range: IDRange);
6130 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6131 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6132 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6133 ExprVal, ValRange);
6134 if (Val)
6135 ImpliedUserSGPRCount += 2;
6136 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6137 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6138 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6139 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6140 ExprVal, ValRange);
6141 if (Val)
6142 ImpliedUserSGPRCount += 1;
6143 } else if (ID == ".amdhsa_wavefront_size32") {
6144 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6145 if (IVersion.Major < 10)
6146 return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
6147 EnableWavefrontSize32 = Val;
6148 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6149 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6150 ValRange);
6151 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6152 PARSE_BITS_ENTRY(KD.kernel_code_properties,
6153 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6154 ValRange);
6155 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6156 if (hasArchitectedFlatScratch())
6157 return Error(L: IDRange.Start,
6158 Msg: "directive is not supported with architected flat scratch",
6159 Range: IDRange);
6160 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6161 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6162 ValRange);
6163 } else if (ID == ".amdhsa_enable_private_segment") {
6164 if (!hasArchitectedFlatScratch())
6165 return Error(
6166 L: IDRange.Start,
6167 Msg: "directive is not supported without architected flat scratch",
6168 Range: IDRange);
6169 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6170 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6171 ValRange);
6172 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6173 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6174 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6175 ValRange);
6176 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6177 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6178 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6179 ValRange);
6180 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6181 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6182 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6183 ValRange);
6184 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6185 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6186 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6187 ValRange);
6188 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6189 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6190 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6191 ValRange);
6192 } else if (ID == ".amdhsa_next_free_vgpr") {
6193 VGPRRange = ValRange;
6194 NextFreeVGPR = ExprVal;
6195 } else if (ID == ".amdhsa_next_free_sgpr") {
6196 SGPRRange = ValRange;
6197 NextFreeSGPR = ExprVal;
6198 } else if (ID == ".amdhsa_accum_offset") {
6199 if (!isGFX90A())
6200 return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
6201 AccumOffset = ExprVal;
6202 } else if (ID == ".amdhsa_named_barrier_count") {
6203 if (!isGFX1250Plus())
6204 return Error(L: IDRange.Start, Msg: "directive requires gfx1250+", Range: IDRange);
6205 NamedBarCnt = ExprVal;
6206 } else if (ID == ".amdhsa_reserve_vcc") {
6207 if (EvaluatableExpr && !isUInt<1>(x: Val))
6208 return OutOfRangeError(Range: ValRange);
6209 ReserveVCC = ExprVal;
6210 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6211 if (IVersion.Major < 7)
6212 return Error(L: IDRange.Start, Msg: "directive requires gfx7+", Range: IDRange);
6213 if (hasArchitectedFlatScratch())
6214 return Error(L: IDRange.Start,
6215 Msg: "directive is not supported with architected flat scratch",
6216 Range: IDRange);
6217 if (EvaluatableExpr && !isUInt<1>(x: Val))
6218 return OutOfRangeError(Range: ValRange);
6219 ReserveFlatScr = ExprVal;
6220 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6221 if (IVersion.Major < 8)
6222 return Error(L: IDRange.Start, Msg: "directive requires gfx8+", Range: IDRange);
6223 if (!isUInt<1>(x: Val))
6224 return OutOfRangeError(Range: ValRange);
6225 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6226 return getParser().Error(L: IDRange.Start, Msg: ".amdhsa_reserve_xnack_mask does not match target id",
6227 Range: IDRange);
6228 } else if (ID == ".amdhsa_float_round_mode_32") {
6229 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6230 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6231 ValRange);
6232 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6233 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6234 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6235 ValRange);
6236 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6237 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6238 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6239 ValRange);
6240 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6241 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6242 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6243 ValRange);
6244 } else if (ID == ".amdhsa_dx10_clamp") {
6245 if (!getSTI().hasFeature(Feature: AMDGPU::FeatureDX10ClampAndIEEEMode))
6246 return Error(L: IDRange.Start, Msg: "directive unsupported on gfx1170+",
6247 Range: IDRange);
6248 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6249 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6250 ValRange);
6251 } else if (ID == ".amdhsa_ieee_mode") {
6252 if (!getSTI().hasFeature(Feature: AMDGPU::FeatureDX10ClampAndIEEEMode))
6253 return Error(L: IDRange.Start, Msg: "directive unsupported on gfx1170+",
6254 Range: IDRange);
6255 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6256 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6257 ValRange);
6258 } else if (ID == ".amdhsa_fp16_overflow") {
6259 if (IVersion.Major < 9)
6260 return Error(L: IDRange.Start, Msg: "directive requires gfx9+", Range: IDRange);
6261 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6262 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6263 ValRange);
6264 } else if (ID == ".amdhsa_tg_split") {
6265 if (!isGFX90A())
6266 return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
6267 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6268 ExprVal, ValRange);
6269 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6270 if (!supportsWGP(STI: getSTI()))
6271 return Error(L: IDRange.Start,
6272 Msg: "directive unsupported on " + getSTI().getCPU(), Range: IDRange);
6273 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6274 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6275 ValRange);
6276 } else if (ID == ".amdhsa_memory_ordered") {
6277 if (IVersion.Major < 10)
6278 return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
6279 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6280 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6281 ValRange);
6282 } else if (ID == ".amdhsa_forward_progress") {
6283 if (IVersion.Major < 10)
6284 return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
6285 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6286 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6287 ValRange);
6288 } else if (ID == ".amdhsa_shared_vgpr_count") {
6289 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6290 if (IVersion.Major < 10 || IVersion.Major >= 12)
6291 return Error(L: IDRange.Start, Msg: "directive requires gfx10 or gfx11",
6292 Range: IDRange);
6293 SharedVGPRCount = Val;
6294 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6295 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6296 ValRange);
6297 } else if (ID == ".amdhsa_inst_pref_size") {
6298 if (IVersion.Major < 11)
6299 return Error(L: IDRange.Start, Msg: "directive requires gfx11+", Range: IDRange);
6300 if (IVersion.Major == 11) {
6301 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6302 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6303 ValRange);
6304 } else {
6305 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6306 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6307 ValRange);
6308 }
6309 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6310 PARSE_BITS_ENTRY(
6311 KD.compute_pgm_rsrc2,
6312 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6313 ExprVal, ValRange);
6314 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6315 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6316 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6317 ExprVal, ValRange);
6318 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6319 PARSE_BITS_ENTRY(
6320 KD.compute_pgm_rsrc2,
6321 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6322 ExprVal, ValRange);
6323 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6324 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6325 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6326 ExprVal, ValRange);
6327 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6328 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6329 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6330 ExprVal, ValRange);
6331 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6332 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6333 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6334 ExprVal, ValRange);
6335 } else if (ID == ".amdhsa_exception_int_div_zero") {
6336 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6337 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6338 ExprVal, ValRange);
6339 } else if (ID == ".amdhsa_round_robin_scheduling") {
6340 if (IVersion.Major < 12)
6341 return Error(L: IDRange.Start, Msg: "directive requires gfx12+", Range: IDRange);
6342 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6343 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6344 ValRange);
6345 } else {
6346 return Error(L: IDRange.Start, Msg: "unknown .amdhsa_kernel directive", Range: IDRange);
6347 }
6348
6349#undef PARSE_BITS_ENTRY
6350 }
6351
6352 if (!Seen.contains(key: ".amdhsa_next_free_vgpr"))
6353 return TokError(Msg: ".amdhsa_next_free_vgpr directive is required");
6354
6355 if (!Seen.contains(key: ".amdhsa_next_free_sgpr"))
6356 return TokError(Msg: ".amdhsa_next_free_sgpr directive is required");
6357
6358 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(u&: ImpliedUserSGPRCount);
6359
6360 // Consider the case where the total number of UserSGPRs with trailing
6361 // allocated preload SGPRs, is greater than the number of explicitly
6362 // referenced SGPRs.
6363 if (PreloadLength) {
6364 MCContext &Ctx = getContext();
6365 NextFreeSGPR = AMDGPUMCExpr::createMax(
6366 Args: {NextFreeSGPR, MCConstantExpr::create(Value: UserSGPRCount, Ctx)}, Ctx);
6367 }
6368
6369 const MCExpr *VGPRBlocks;
6370 const MCExpr *SGPRBlocks;
6371 if (calculateGPRBlocks(Features: getFeatureBits(), VCCUsed: ReserveVCC, FlatScrUsed: ReserveFlatScr,
6372 XNACKUsed: getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6373 EnableWavefrontSize32, NextFreeVGPR,
6374 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6375 SGPRBlocks))
6376 return true;
6377
6378 int64_t EvaluatedVGPRBlocks;
6379 bool VGPRBlocksEvaluatable =
6380 VGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedVGPRBlocks);
6381 if (VGPRBlocksEvaluatable &&
6382 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
6383 x: static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6384 return OutOfRangeError(Range: VGPRRange);
6385 }
6386 AMDGPU::MCKernelDescriptor::bits_set(
6387 Dst&: KD.compute_pgm_rsrc1, Value: VGPRBlocks,
6388 Shift: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6389 Mask: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, Ctx&: getContext());
6390
6391 int64_t EvaluatedSGPRBlocks;
6392 if (SGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedSGPRBlocks) &&
6393 !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
6394 x: static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6395 return OutOfRangeError(Range: SGPRRange);
6396 AMDGPU::MCKernelDescriptor::bits_set(
6397 Dst&: KD.compute_pgm_rsrc1, Value: SGPRBlocks,
6398 Shift: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6399 Mask: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, Ctx&: getContext());
6400
6401 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6402 return TokError(Msg: "amdgpu_user_sgpr_count smaller than than implied by "
6403 "enabled user SGPRs");
6404
6405 if (isGFX1250Plus()) {
6406 if (!isUInt<COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_WIDTH>(x: UserSGPRCount))
6407 return TokError(Msg: "too many user SGPRs enabled");
6408 AMDGPU::MCKernelDescriptor::bits_set(
6409 Dst&: KD.compute_pgm_rsrc2,
6410 Value: MCConstantExpr::create(Value: UserSGPRCount, Ctx&: getContext()),
6411 Shift: COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6412 Mask: COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, Ctx&: getContext());
6413 } else {
6414 if (!isUInt<COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_WIDTH>(
6415 x: UserSGPRCount))
6416 return TokError(Msg: "too many user SGPRs enabled");
6417 AMDGPU::MCKernelDescriptor::bits_set(
6418 Dst&: KD.compute_pgm_rsrc2,
6419 Value: MCConstantExpr::create(Value: UserSGPRCount, Ctx&: getContext()),
6420 Shift: COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6421 Mask: COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, Ctx&: getContext());
6422 }
6423
6424 int64_t IVal = 0;
6425 if (!KD.kernarg_size->evaluateAsAbsolute(Res&: IVal))
6426 return TokError(Msg: "Kernarg size should be resolvable");
6427 uint64_t kernarg_size = IVal;
6428 if (PreloadLength && kernarg_size &&
6429 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6430 return TokError(Msg: "Kernarg preload length + offset is larger than the "
6431 "kernarg segment size");
6432
6433 if (isGFX90A()) {
6434 if (!Seen.contains(key: ".amdhsa_accum_offset"))
6435 return TokError(Msg: ".amdhsa_accum_offset directive is required");
6436 int64_t EvaluatedAccum;
6437 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(Res&: EvaluatedAccum);
6438 uint64_t UEvaluatedAccum = EvaluatedAccum;
6439 if (AccumEvaluatable &&
6440 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6441 return TokError(Msg: "accum_offset should be in range [4..256] in "
6442 "increments of 4");
6443
6444 int64_t EvaluatedNumVGPR;
6445 if (NextFreeVGPR->evaluateAsAbsolute(Res&: EvaluatedNumVGPR) &&
6446 AccumEvaluatable &&
6447 UEvaluatedAccum >
6448 alignTo(Value: std::max(a: (uint64_t)1, b: (uint64_t)EvaluatedNumVGPR), Align: 4))
6449 return TokError(Msg: "accum_offset exceeds total VGPR allocation");
6450 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6451 LHS: MCBinaryExpr::createDiv(
6452 LHS: AccumOffset, RHS: MCConstantExpr::create(Value: 4, Ctx&: getContext()), Ctx&: getContext()),
6453 RHS: MCConstantExpr::create(Value: 1, Ctx&: getContext()), Ctx&: getContext());
6454 MCKernelDescriptor::bits_set(Dst&: KD.compute_pgm_rsrc3, Value: AdjustedAccum,
6455 Shift: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6456 Mask: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6457 Ctx&: getContext());
6458 }
6459
6460 if (isGFX1250Plus())
6461 MCKernelDescriptor::bits_set(Dst&: KD.compute_pgm_rsrc3, Value: NamedBarCnt,
6462 Shift: COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6463 Mask: COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6464 Ctx&: getContext());
6465
6466 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6467 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6468 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6469 return TokError(Msg: "shared_vgpr_count directive not valid on "
6470 "wavefront size 32");
6471 }
6472
6473 if (VGPRBlocksEvaluatable &&
6474 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6475 63)) {
6476 return TokError(Msg: "shared_vgpr_count*2 + "
6477 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6478 "exceed 63\n");
6479 }
6480 }
6481
6482 getTargetStreamer().EmitAmdhsaKernelDescriptor(STI: getSTI(), KernelName, KernelDescriptor: KD,
6483 NextVGPR: NextFreeVGPR, NextSGPR: NextFreeSGPR,
6484 ReserveVCC, ReserveFlatScr);
6485 return false;
6486}
6487
6488bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6489 uint32_t Version;
6490 if (ParseAsAbsoluteExpression(Ret&: Version))
6491 return true;
6492
6493 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(COV: Version);
6494 return false;
6495}
6496
6497bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6498 AMDGPUMCKernelCodeT &C) {
6499 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6500 // assembly for backwards compatibility.
6501 if (ID == "max_scratch_backing_memory_byte_size") {
6502 Parser.eatToEndOfStatement();
6503 return false;
6504 }
6505
6506 SmallString<40> ErrStr;
6507 raw_svector_ostream Err(ErrStr);
6508 if (!C.ParseKernelCodeT(ID, MCParser&: getParser(), Err)) {
6509 return TokError(Msg: Err.str());
6510 }
6511 Lex();
6512
6513 if (ID == "enable_wavefront_size32") {
6514 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6515 if (!isGFX10Plus())
6516 return TokError(Msg: "enable_wavefront_size32=1 is only allowed on GFX10+");
6517 if (!isWave32())
6518 return TokError(Msg: "enable_wavefront_size32=1 requires +WavefrontSize32");
6519 } else {
6520 if (!isWave64())
6521 return TokError(Msg: "enable_wavefront_size32=0 requires +WavefrontSize64");
6522 }
6523 }
6524
6525 if (ID == "wavefront_size") {
6526 if (C.wavefront_size == 5) {
6527 if (!isGFX10Plus())
6528 return TokError(Msg: "wavefront_size=5 is only allowed on GFX10+");
6529 if (!isWave32())
6530 return TokError(Msg: "wavefront_size=5 requires +WavefrontSize32");
6531 } else if (C.wavefront_size == 6) {
6532 if (!isWave64())
6533 return TokError(Msg: "wavefront_size=6 requires +WavefrontSize64");
6534 }
6535 }
6536
6537 return false;
6538}
6539
6540bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6541 AMDGPUMCKernelCodeT KernelCode;
6542 KernelCode.initDefault(STI: &getSTI(), Ctx&: getContext());
6543
6544 while (true) {
6545 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6546 // will set the current token to EndOfStatement.
6547 while(trySkipToken(Kind: AsmToken::EndOfStatement));
6548
6549 StringRef ID;
6550 if (!parseId(Val&: ID, ErrMsg: "expected value identifier or .end_amd_kernel_code_t"))
6551 return true;
6552
6553 if (ID == ".end_amd_kernel_code_t")
6554 break;
6555
6556 if (ParseAMDKernelCodeTValue(ID, C&: KernelCode))
6557 return true;
6558 }
6559
6560 KernelCode.validate(STI: &getSTI(), Ctx&: getContext());
6561 getTargetStreamer().EmitAMDKernelCodeT(Header&: KernelCode);
6562
6563 return false;
6564}
6565
6566bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6567 StringRef KernelName;
6568 if (!parseId(Val&: KernelName, ErrMsg: "expected symbol name"))
6569 return true;
6570
6571 getTargetStreamer().EmitAMDGPUSymbolType(SymbolName: KernelName,
6572 Type: ELF::STT_AMDGPU_HSA_KERNEL);
6573
6574 KernelScope.initialize(Context&: getContext());
6575 return false;
6576}
6577
6578bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6579 if (!getSTI().getTargetTriple().isAMDGCN()) {
6580 return Error(L: getLoc(),
6581 Msg: ".amd_amdgpu_isa directive is not available on non-amdgcn "
6582 "architectures");
6583 }
6584
6585 auto TargetIDDirective = getLexer().getTok().getStringContents();
6586 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6587 return Error(L: getParser().getTok().getLoc(), Msg: "target id must match options");
6588
6589 getTargetStreamer().EmitISAVersion();
6590 Lex();
6591
6592 return false;
6593}
6594
6595bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6596 assert(isHsaAbi(getSTI()));
6597
6598 std::string HSAMetadataString;
6599 if (ParseToEndDirective(AssemblerDirectiveBegin: HSAMD::V3::AssemblerDirectiveBegin,
6600 AssemblerDirectiveEnd: HSAMD::V3::AssemblerDirectiveEnd, CollectString&: HSAMetadataString))
6601 return true;
6602
6603 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6604 return Error(L: getLoc(), Msg: "invalid HSA metadata");
6605
6606 return false;
6607}
6608
6609/// Common code to parse out a block of text (typically YAML) between start and
6610/// end directives.
6611bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6612 const char *AssemblerDirectiveEnd,
6613 std::string &CollectString) {
6614
6615 raw_string_ostream CollectStream(CollectString);
6616
6617 getLexer().setSkipSpace(false);
6618
6619 bool FoundEnd = false;
6620 while (!isToken(Kind: AsmToken::Eof)) {
6621 while (isToken(Kind: AsmToken::Space)) {
6622 CollectStream << getTokenStr();
6623 Lex();
6624 }
6625
6626 if (trySkipId(Id: AssemblerDirectiveEnd)) {
6627 FoundEnd = true;
6628 break;
6629 }
6630
6631 CollectStream << Parser.parseStringToEndOfStatement()
6632 << getContext().getAsmInfo()->getSeparatorString();
6633
6634 Parser.eatToEndOfStatement();
6635 }
6636
6637 getLexer().setSkipSpace(true);
6638
6639 if (isToken(Kind: AsmToken::Eof) && !FoundEnd) {
6640 return TokError(Msg: Twine("expected directive ") +
6641 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6642 }
6643
6644 return false;
6645}
6646
6647/// Parse the assembler directive for new MsgPack-format PAL metadata.
6648bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6649 std::string String;
6650 if (ParseToEndDirective(AssemblerDirectiveBegin: AMDGPU::PALMD::AssemblerDirectiveBegin,
6651 AssemblerDirectiveEnd: AMDGPU::PALMD::AssemblerDirectiveEnd, CollectString&: String))
6652 return true;
6653
6654 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6655 if (!PALMetadata->setFromString(String))
6656 return Error(L: getLoc(), Msg: "invalid PAL metadata");
6657 return false;
6658}
6659
6660/// Parse the assembler directive for old linear-format PAL metadata.
6661bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6662 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6663 return Error(L: getLoc(),
6664 Msg: (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6665 "not available on non-amdpal OSes")).str());
6666 }
6667
6668 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6669 PALMetadata->setLegacy();
6670 for (;;) {
6671 uint32_t Key, Value;
6672 if (ParseAsAbsoluteExpression(Ret&: Key)) {
6673 return TokError(Msg: Twine("invalid value in ") +
6674 Twine(PALMD::AssemblerDirective));
6675 }
6676 if (!trySkipToken(Kind: AsmToken::Comma)) {
6677 return TokError(Msg: Twine("expected an even number of values in ") +
6678 Twine(PALMD::AssemblerDirective));
6679 }
6680 if (ParseAsAbsoluteExpression(Ret&: Value)) {
6681 return TokError(Msg: Twine("invalid value in ") +
6682 Twine(PALMD::AssemblerDirective));
6683 }
6684 PALMetadata->setRegister(Reg: Key, Val: Value);
6685 if (!trySkipToken(Kind: AsmToken::Comma))
6686 break;
6687 }
6688 return false;
6689}
6690
6691/// ParseDirectiveAMDGPULDS
6692/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6693bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6694 if (getParser().checkForValidSection())
6695 return true;
6696
6697 StringRef Name;
6698 SMLoc NameLoc = getLoc();
6699 if (getParser().parseIdentifier(Res&: Name))
6700 return TokError(Msg: "expected identifier in directive");
6701
6702 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6703 if (getParser().parseComma())
6704 return true;
6705
6706 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(STI: &getSTI());
6707
6708 int64_t Size;
6709 SMLoc SizeLoc = getLoc();
6710 if (getParser().parseAbsoluteExpression(Res&: Size))
6711 return true;
6712 if (Size < 0)
6713 return Error(L: SizeLoc, Msg: "size must be non-negative");
6714 if (Size > LocalMemorySize)
6715 return Error(L: SizeLoc, Msg: "size is too large");
6716
6717 int64_t Alignment = 4;
6718 if (trySkipToken(Kind: AsmToken::Comma)) {
6719 SMLoc AlignLoc = getLoc();
6720 if (getParser().parseAbsoluteExpression(Res&: Alignment))
6721 return true;
6722 if (Alignment < 0 || !isPowerOf2_64(Value: Alignment))
6723 return Error(L: AlignLoc, Msg: "alignment must be a power of two");
6724
6725 // Alignment larger than the size of LDS is possible in theory, as long
6726 // as the linker manages to place to symbol at address 0, but we do want
6727 // to make sure the alignment fits nicely into a 32-bit integer.
6728 if (Alignment >= 1u << 31)
6729 return Error(L: AlignLoc, Msg: "alignment is too large");
6730 }
6731
6732 if (parseEOL())
6733 return true;
6734
6735 Symbol->redefineIfPossible();
6736 if (!Symbol->isUndefined())
6737 return Error(L: NameLoc, Msg: "invalid symbol redefinition");
6738
6739 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Alignment: Align(Alignment));
6740 return false;
6741}
6742
6743bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6744 StringRef IDVal = DirectiveID.getString();
6745
6746 if (isHsaAbi(STI: getSTI())) {
6747 if (IDVal == ".amdhsa_kernel")
6748 return ParseDirectiveAMDHSAKernel();
6749
6750 if (IDVal == ".amdhsa_code_object_version")
6751 return ParseDirectiveAMDHSACodeObjectVersion();
6752
6753 // TODO: Restructure/combine with PAL metadata directive.
6754 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
6755 return ParseDirectiveHSAMetadata();
6756 } else {
6757 if (IDVal == ".amd_kernel_code_t")
6758 return ParseDirectiveAMDKernelCodeT();
6759
6760 if (IDVal == ".amdgpu_hsa_kernel")
6761 return ParseDirectiveAMDGPUHsaKernel();
6762
6763 if (IDVal == ".amd_amdgpu_isa")
6764 return ParseDirectiveISAVersion();
6765
6766 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
6767 return Error(L: getLoc(), Msg: (Twine(HSAMD::AssemblerDirectiveBegin) +
6768 Twine(" directive is "
6769 "not available on non-amdhsa OSes"))
6770 .str());
6771 }
6772 }
6773
6774 if (IDVal == ".amdgcn_target")
6775 return ParseDirectiveAMDGCNTarget();
6776
6777 if (IDVal == ".amdgpu_lds")
6778 return ParseDirectiveAMDGPULDS();
6779
6780 if (IDVal == PALMD::AssemblerDirectiveBegin)
6781 return ParseDirectivePALMetadataBegin();
6782
6783 if (IDVal == PALMD::AssemblerDirective)
6784 return ParseDirectivePALMetadata();
6785
6786 return true;
6787}
6788
6789bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6790 MCRegister Reg) {
6791 if (MRI.regsOverlap(RegA: TTMP12_TTMP13_TTMP14_TTMP15, RegB: Reg))
6792 return isGFX9Plus();
6793
6794 // GFX10+ has 2 more SGPRs 104 and 105.
6795 if (MRI.regsOverlap(RegA: SGPR104_SGPR105, RegB: Reg))
6796 return hasSGPR104_SGPR105();
6797
6798 switch (Reg.id()) {
6799 case SRC_SHARED_BASE_LO:
6800 case SRC_SHARED_BASE:
6801 case SRC_SHARED_LIMIT_LO:
6802 case SRC_SHARED_LIMIT:
6803 case SRC_PRIVATE_BASE_LO:
6804 case SRC_PRIVATE_BASE:
6805 case SRC_PRIVATE_LIMIT_LO:
6806 case SRC_PRIVATE_LIMIT:
6807 return isGFX9Plus();
6808 case SRC_FLAT_SCRATCH_BASE_LO:
6809 case SRC_FLAT_SCRATCH_BASE_HI:
6810 return hasGloballyAddressableScratch();
6811 case SRC_POPS_EXITING_WAVE_ID:
6812 return isGFX9Plus() && !isGFX11Plus();
6813 case TBA:
6814 case TBA_LO:
6815 case TBA_HI:
6816 case TMA:
6817 case TMA_LO:
6818 case TMA_HI:
6819 return !isGFX9Plus();
6820 case XNACK_MASK:
6821 case XNACK_MASK_LO:
6822 case XNACK_MASK_HI:
6823 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6824 case SGPR_NULL:
6825 return isGFX10Plus();
6826 case SRC_EXECZ:
6827 case SRC_VCCZ:
6828 return !isGFX11Plus();
6829 default:
6830 break;
6831 }
6832
6833 if (isCI())
6834 return true;
6835
6836 if (isSI() || isGFX10Plus()) {
6837 // No flat_scr on SI.
6838 // On GFX10Plus flat scratch is not a valid register operand and can only be
6839 // accessed with s_setreg/s_getreg.
6840 switch (Reg.id()) {
6841 case FLAT_SCR:
6842 case FLAT_SCR_LO:
6843 case FLAT_SCR_HI:
6844 return false;
6845 default:
6846 return true;
6847 }
6848 }
6849
6850 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6851 // SI/CI have.
6852 if (MRI.regsOverlap(RegA: SGPR102_SGPR103, RegB: Reg))
6853 return hasSGPR102_SGPR103();
6854
6855 return true;
6856}
6857
6858ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6859 StringRef Mnemonic,
6860 OperandMode Mode) {
6861 ParseStatus Res = parseVOPD(Operands);
6862 if (Res.isSuccess() || Res.isFailure() || isToken(Kind: AsmToken::EndOfStatement))
6863 return Res;
6864
6865 // Try to parse with a custom parser
6866 Res = MatchOperandParserImpl(Operands, Mnemonic);
6867
6868 // If we successfully parsed the operand or if there as an error parsing,
6869 // we are done.
6870 //
6871 // If we are parsing after we reach EndOfStatement then this means we
6872 // are appending default values to the Operands list. This is only done
6873 // by custom parser, so we shouldn't continue on to the generic parsing.
6874 if (Res.isSuccess() || Res.isFailure() || isToken(Kind: AsmToken::EndOfStatement))
6875 return Res;
6876
6877 SMLoc RBraceLoc;
6878 SMLoc LBraceLoc = getLoc();
6879 if (Mode == OperandMode_NSA && trySkipToken(Kind: AsmToken::LBrac)) {
6880 unsigned Prefix = Operands.size();
6881
6882 for (;;) {
6883 auto Loc = getLoc();
6884 Res = parseReg(Operands);
6885 if (Res.isNoMatch())
6886 Error(L: Loc, Msg: "expected a register");
6887 if (!Res.isSuccess())
6888 return ParseStatus::Failure;
6889
6890 RBraceLoc = getLoc();
6891 if (trySkipToken(Kind: AsmToken::RBrac))
6892 break;
6893
6894 if (!skipToken(Kind: AsmToken::Comma,
6895 ErrMsg: "expected a comma or a closing square bracket"))
6896 return ParseStatus::Failure;
6897 }
6898
6899 if (Operands.size() - Prefix > 1) {
6900 Operands.insert(I: Operands.begin() + Prefix,
6901 Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "[", Loc: LBraceLoc));
6902 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "]", Loc: RBraceLoc));
6903 }
6904
6905 return ParseStatus::Success;
6906 }
6907
6908 return parseRegOrImm(Operands);
6909}
6910
6911StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6912 // Clear any forced encodings from the previous instruction.
6913 setForcedEncodingSize(0);
6914 setForcedDPP(false);
6915 setForcedSDWA(false);
6916
6917 if (Name.consume_back(Suffix: "_e64_dpp")) {
6918 setForcedDPP(true);
6919 setForcedEncodingSize(64);
6920 return Name;
6921 }
6922 if (Name.consume_back(Suffix: "_e64")) {
6923 setForcedEncodingSize(64);
6924 return Name;
6925 }
6926 if (Name.consume_back(Suffix: "_e32")) {
6927 setForcedEncodingSize(32);
6928 return Name;
6929 }
6930 if (Name.consume_back(Suffix: "_dpp")) {
6931 setForcedDPP(true);
6932 return Name;
6933 }
6934 if (Name.consume_back(Suffix: "_sdwa")) {
6935 setForcedSDWA(true);
6936 return Name;
6937 }
6938 return Name;
6939}
6940
6941static void applyMnemonicAliases(StringRef &Mnemonic,
6942 const FeatureBitset &Features,
6943 unsigned VariantID);
6944
6945bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6946 StringRef Name, SMLoc NameLoc,
6947 OperandVector &Operands) {
6948 // Add the instruction mnemonic
6949 Name = parseMnemonicSuffix(Name);
6950
6951 // If the target architecture uses MnemonicAlias, call it here to parse
6952 // operands correctly.
6953 applyMnemonicAliases(Mnemonic&: Name, Features: getAvailableFeatures(), VariantID: 0);
6954
6955 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: NameLoc));
6956
6957 bool IsMIMG = Name.starts_with(Prefix: "image_");
6958
6959 while (!trySkipToken(Kind: AsmToken::EndOfStatement)) {
6960 OperandMode Mode = OperandMode_Default;
6961 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6962 Mode = OperandMode_NSA;
6963 ParseStatus Res = parseOperand(Operands, Mnemonic: Name, Mode);
6964
6965 if (!Res.isSuccess()) {
6966 checkUnsupportedInstruction(Mnemo: Name, IDLoc: NameLoc);
6967 if (!Parser.hasPendingError()) {
6968 // FIXME: use real operand location rather than the current location.
6969 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6970 : "not a valid operand.";
6971 Error(L: getLoc(), Msg);
6972 }
6973 while (!trySkipToken(Kind: AsmToken::EndOfStatement)) {
6974 lex();
6975 }
6976 return true;
6977 }
6978
6979 // Eat the comma or space if there is one.
6980 trySkipToken(Kind: AsmToken::Comma);
6981 }
6982
6983 return false;
6984}
6985
6986//===----------------------------------------------------------------------===//
6987// Utility functions
6988//===----------------------------------------------------------------------===//
6989
6990ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6991 OperandVector &Operands) {
6992 SMLoc S = getLoc();
6993 if (!trySkipId(Id: Name))
6994 return ParseStatus::NoMatch;
6995
6996 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: S));
6997 return ParseStatus::Success;
6998}
6999
7000ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7001 int64_t &IntVal) {
7002
7003 if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
7004 return ParseStatus::NoMatch;
7005
7006 return parseExpr(Imm&: IntVal) ? ParseStatus::Success : ParseStatus::Failure;
7007}
7008
7009ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7010 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7011 std::function<bool(int64_t &)> ConvertResult) {
7012 SMLoc S = getLoc();
7013 int64_t Value = 0;
7014
7015 ParseStatus Res = parseIntWithPrefix(Prefix, IntVal&: Value);
7016 if (!Res.isSuccess())
7017 return Res;
7018
7019 if (ConvertResult && !ConvertResult(Value)) {
7020 Error(L: S, Msg: "invalid " + StringRef(Prefix) + " value.");
7021 }
7022
7023 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Value, Loc: S, Type: ImmTy));
7024 return ParseStatus::Success;
7025}
7026
7027ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7028 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7029 bool (*ConvertResult)(int64_t &)) {
7030 SMLoc S = getLoc();
7031 if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
7032 return ParseStatus::NoMatch;
7033
7034 if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected a left square bracket"))
7035 return ParseStatus::Failure;
7036
7037 unsigned Val = 0;
7038 const unsigned MaxSize = 4;
7039
7040 // FIXME: How to verify the number of elements matches the number of src
7041 // operands?
7042 for (int I = 0; ; ++I) {
7043 int64_t Op;
7044 SMLoc Loc = getLoc();
7045 if (!parseExpr(Imm&: Op))
7046 return ParseStatus::Failure;
7047
7048 if (Op != 0 && Op != 1)
7049 return Error(L: Loc, Msg: "invalid " + StringRef(Prefix) + " value.");
7050
7051 Val |= (Op << I);
7052
7053 if (trySkipToken(Kind: AsmToken::RBrac))
7054 break;
7055
7056 if (I + 1 == MaxSize)
7057 return Error(L: getLoc(), Msg: "expected a closing square bracket");
7058
7059 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
7060 return ParseStatus::Failure;
7061 }
7062
7063 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: ImmTy));
7064 return ParseStatus::Success;
7065}
7066
7067ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7068 OperandVector &Operands,
7069 AMDGPUOperand::ImmTy ImmTy,
7070 bool IgnoreNegative) {
7071 int64_t Bit;
7072 SMLoc S = getLoc();
7073
7074 if (trySkipId(Id: Name)) {
7075 Bit = 1;
7076 } else if (trySkipId(Pref: "no", Id: Name)) {
7077 if (IgnoreNegative)
7078 return ParseStatus::Success;
7079 Bit = 0;
7080 } else {
7081 return ParseStatus::NoMatch;
7082 }
7083
7084 if (Name == "r128" && !hasMIMG_R128())
7085 return Error(L: S, Msg: "r128 modifier is not supported on this GPU");
7086 if (Name == "a16" && !hasA16())
7087 return Error(L: S, Msg: "a16 modifier is not supported on this GPU");
7088
7089 if (Bit == 0 && Name == "gds") {
7090 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7091 if (Mnemo.starts_with(Prefix: "ds_gws"))
7092 return Error(L: S, Msg: "nogds is not allowed");
7093 }
7094
7095 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7096 ImmTy = AMDGPUOperand::ImmTyR128A16;
7097
7098 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Bit, Loc: S, Type: ImmTy));
7099 return ParseStatus::Success;
7100}
7101
7102unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7103 bool &Disabling) const {
7104 Disabling = Id.consume_front(Prefix: "no");
7105
7106 if (isGFX940() && !Mnemo.starts_with(Prefix: "s_")) {
7107 return StringSwitch<unsigned>(Id)
7108 .Case(S: "nt", Value: AMDGPU::CPol::NT)
7109 .Case(S: "sc0", Value: AMDGPU::CPol::SC0)
7110 .Case(S: "sc1", Value: AMDGPU::CPol::SC1)
7111 .Default(Value: 0);
7112 }
7113
7114 return StringSwitch<unsigned>(Id)
7115 .Case(S: "dlc", Value: AMDGPU::CPol::DLC)
7116 .Case(S: "glc", Value: AMDGPU::CPol::GLC)
7117 .Case(S: "scc", Value: AMDGPU::CPol::SCC)
7118 .Case(S: "slc", Value: AMDGPU::CPol::SLC)
7119 .Default(Value: 0);
7120}
7121
7122ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7123 if (isGFX12Plus()) {
7124 SMLoc StringLoc = getLoc();
7125
7126 int64_t CPolVal = 0;
7127 ParseStatus ResTH = ParseStatus::NoMatch;
7128 ParseStatus ResScope = ParseStatus::NoMatch;
7129 ParseStatus ResNV = ParseStatus::NoMatch;
7130 ParseStatus ResScal = ParseStatus::NoMatch;
7131
7132 for (;;) {
7133 if (ResTH.isNoMatch()) {
7134 int64_t TH;
7135 ResTH = parseTH(Operands, TH);
7136 if (ResTH.isFailure())
7137 return ResTH;
7138 if (ResTH.isSuccess()) {
7139 CPolVal |= TH;
7140 continue;
7141 }
7142 }
7143
7144 if (ResScope.isNoMatch()) {
7145 int64_t Scope;
7146 ResScope = parseScope(Operands, Scope);
7147 if (ResScope.isFailure())
7148 return ResScope;
7149 if (ResScope.isSuccess()) {
7150 CPolVal |= Scope;
7151 continue;
7152 }
7153 }
7154
7155 // NV bit exists on GFX12+, but does something starting from GFX1250.
7156 // Allow parsing on all GFX12 and fail on validation for better
7157 // diagnostics.
7158 if (ResNV.isNoMatch()) {
7159 if (trySkipId(Id: "nv")) {
7160 ResNV = ParseStatus::Success;
7161 CPolVal |= CPol::NV;
7162 continue;
7163 } else if (trySkipId(Pref: "no", Id: "nv")) {
7164 ResNV = ParseStatus::Success;
7165 continue;
7166 }
7167 }
7168
7169 if (ResScal.isNoMatch()) {
7170 if (trySkipId(Id: "scale_offset")) {
7171 ResScal = ParseStatus::Success;
7172 CPolVal |= CPol::SCAL;
7173 continue;
7174 } else if (trySkipId(Pref: "no", Id: "scale_offset")) {
7175 ResScal = ParseStatus::Success;
7176 continue;
7177 }
7178 }
7179
7180 break;
7181 }
7182
7183 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7184 ResScal.isNoMatch())
7185 return ParseStatus::NoMatch;
7186
7187 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: CPolVal, Loc: StringLoc,
7188 Type: AMDGPUOperand::ImmTyCPol));
7189 return ParseStatus::Success;
7190 }
7191
7192 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7193 SMLoc OpLoc = getLoc();
7194 unsigned Enabled = 0, Seen = 0;
7195 for (;;) {
7196 SMLoc S = getLoc();
7197 bool Disabling;
7198 unsigned CPol = getCPolKind(Id: getId(), Mnemo, Disabling);
7199 if (!CPol)
7200 break;
7201
7202 lex();
7203
7204 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7205 return Error(L: S, Msg: "dlc modifier is not supported on this GPU");
7206
7207 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7208 return Error(L: S, Msg: "scc modifier is not supported on this GPU");
7209
7210 if (Seen & CPol)
7211 return Error(L: S, Msg: "duplicate cache policy modifier");
7212
7213 if (!Disabling)
7214 Enabled |= CPol;
7215
7216 Seen |= CPol;
7217 }
7218
7219 if (!Seen)
7220 return ParseStatus::NoMatch;
7221
7222 Operands.push_back(
7223 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Enabled, Loc: OpLoc, Type: AMDGPUOperand::ImmTyCPol));
7224 return ParseStatus::Success;
7225}
7226
7227ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7228 int64_t &Scope) {
7229 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7230 CPol::SCOPE_DEV, CPol::SCOPE_SYS};
7231
7232 ParseStatus Res = parseStringOrIntWithPrefix(
7233 Operands, Name: "scope", Ids: {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7234 IntVal&: Scope);
7235
7236 if (Res.isSuccess())
7237 Scope = Scopes[Scope];
7238
7239 return Res;
7240}
7241
7242ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7243 TH = AMDGPU::CPol::TH_RT; // default
7244
7245 StringRef Value;
7246 SMLoc StringLoc;
7247 ParseStatus Res = parseStringWithPrefix(Prefix: "th", Value, StringLoc);
7248 if (!Res.isSuccess())
7249 return Res;
7250
7251 if (Value == "TH_DEFAULT")
7252 TH = AMDGPU::CPol::TH_RT;
7253 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7254 Value == "TH_LOAD_NT_WB") {
7255 return Error(L: StringLoc, Msg: "invalid th value");
7256 } else if (Value.consume_front(Prefix: "TH_ATOMIC_")) {
7257 TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
7258 } else if (Value.consume_front(Prefix: "TH_LOAD_")) {
7259 TH = AMDGPU::CPol::TH_TYPE_LOAD;
7260 } else if (Value.consume_front(Prefix: "TH_STORE_")) {
7261 TH = AMDGPU::CPol::TH_TYPE_STORE;
7262 } else {
7263 return Error(L: StringLoc, Msg: "invalid th value");
7264 }
7265
7266 if (Value == "BYPASS")
7267 TH |= AMDGPU::CPol::TH_REAL_BYPASS;
7268
7269 if (TH != 0) {
7270 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
7271 TH |= StringSwitch<int64_t>(Value)
7272 .Case(S: "RETURN", Value: AMDGPU::CPol::TH_ATOMIC_RETURN)
7273 .Case(S: "RT", Value: AMDGPU::CPol::TH_RT)
7274 .Case(S: "RT_RETURN", Value: AMDGPU::CPol::TH_ATOMIC_RETURN)
7275 .Case(S: "NT", Value: AMDGPU::CPol::TH_ATOMIC_NT)
7276 .Case(S: "NT_RETURN", Value: AMDGPU::CPol::TH_ATOMIC_NT |
7277 AMDGPU::CPol::TH_ATOMIC_RETURN)
7278 .Case(S: "CASCADE_RT", Value: AMDGPU::CPol::TH_ATOMIC_CASCADE)
7279 .Case(S: "CASCADE_NT", Value: AMDGPU::CPol::TH_ATOMIC_CASCADE |
7280 AMDGPU::CPol::TH_ATOMIC_NT)
7281 .Default(Value: 0xffffffff);
7282 else
7283 TH |= StringSwitch<int64_t>(Value)
7284 .Case(S: "RT", Value: AMDGPU::CPol::TH_RT)
7285 .Case(S: "NT", Value: AMDGPU::CPol::TH_NT)
7286 .Case(S: "HT", Value: AMDGPU::CPol::TH_HT)
7287 .Case(S: "LU", Value: AMDGPU::CPol::TH_LU)
7288 .Case(S: "WB", Value: AMDGPU::CPol::TH_WB)
7289 .Case(S: "NT_RT", Value: AMDGPU::CPol::TH_NT_RT)
7290 .Case(S: "RT_NT", Value: AMDGPU::CPol::TH_RT_NT)
7291 .Case(S: "NT_HT", Value: AMDGPU::CPol::TH_NT_HT)
7292 .Case(S: "NT_WB", Value: AMDGPU::CPol::TH_NT_WB)
7293 .Case(S: "BYPASS", Value: AMDGPU::CPol::TH_BYPASS)
7294 .Default(Value: 0xffffffff);
7295 }
7296
7297 if (TH == 0xffffffff)
7298 return Error(L: StringLoc, Msg: "invalid th value");
7299
7300 return ParseStatus::Success;
7301}
7302
7303static void
7304addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,
7305 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7306 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7307 std::optional<unsigned> InsertAt = std::nullopt) {
7308 auto i = OptionalIdx.find(x: ImmT);
7309 if (i != OptionalIdx.end()) {
7310 unsigned Idx = i->second;
7311 const AMDGPUOperand &Op =
7312 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7313 if (InsertAt)
7314 Inst.insert(I: Inst.begin() + *InsertAt, Op: MCOperand::createImm(Val: Op.getImm()));
7315 else
7316 Op.addImmOperands(Inst, N: 1);
7317 } else {
7318 if (InsertAt.has_value())
7319 Inst.insert(I: Inst.begin() + *InsertAt, Op: MCOperand::createImm(Val: Default));
7320 else
7321 Inst.addOperand(Op: MCOperand::createImm(Val: Default));
7322 }
7323}
7324
7325ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7326 StringRef &Value,
7327 SMLoc &StringLoc) {
7328 if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
7329 return ParseStatus::NoMatch;
7330
7331 StringLoc = getLoc();
7332 return parseId(Val&: Value, ErrMsg: "expected an identifier") ? ParseStatus::Success
7333 : ParseStatus::Failure;
7334}
7335
7336ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7337 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7338 int64_t &IntVal) {
7339 if (!trySkipId(Id: Name, Kind: AsmToken::Colon))
7340 return ParseStatus::NoMatch;
7341
7342 SMLoc StringLoc = getLoc();
7343
7344 StringRef Value;
7345 if (isToken(Kind: AsmToken::Identifier)) {
7346 Value = getTokenStr();
7347 lex();
7348
7349 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7350 if (Value == Ids[IntVal])
7351 break;
7352 } else if (!parseExpr(Imm&: IntVal))
7353 return ParseStatus::Failure;
7354
7355 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7356 return Error(L: StringLoc, Msg: "invalid " + Twine(Name) + " value");
7357
7358 return ParseStatus::Success;
7359}
7360
7361ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7362 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7363 AMDGPUOperand::ImmTy Type) {
7364 SMLoc S = getLoc();
7365 int64_t IntVal;
7366
7367 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7368 if (Res.isSuccess())
7369 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S, Type));
7370
7371 return Res;
7372}
7373
7374//===----------------------------------------------------------------------===//
7375// MTBUF format
7376//===----------------------------------------------------------------------===//
7377
7378bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7379 int64_t MaxVal,
7380 int64_t &Fmt) {
7381 int64_t Val;
7382 SMLoc Loc = getLoc();
7383
7384 auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: Val);
7385 if (Res.isFailure())
7386 return false;
7387 if (Res.isNoMatch())
7388 return true;
7389
7390 if (Val < 0 || Val > MaxVal) {
7391 Error(L: Loc, Msg: Twine("out of range ", StringRef(Pref)));
7392 return false;
7393 }
7394
7395 Fmt = Val;
7396 return true;
7397}
7398
7399ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7400 AMDGPUOperand::ImmTy ImmTy) {
7401 const char *Pref = "index_key";
7402 int64_t ImmVal = 0;
7403 SMLoc Loc = getLoc();
7404 auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: ImmVal);
7405 if (!Res.isSuccess())
7406 return Res;
7407
7408 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7409 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7410 (ImmVal < 0 || ImmVal > 1))
7411 return Error(L: Loc, Msg: Twine("out of range ", StringRef(Pref)));
7412
7413 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7414 return Error(L: Loc, Msg: Twine("out of range ", StringRef(Pref)));
7415
7416 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: ImmTy));
7417 return ParseStatus::Success;
7418}
7419
7420ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7421 return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey8bit);
7422}
7423
7424ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7425 return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey16bit);
7426}
7427
7428ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7429 return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey32bit);
7430}
7431
7432ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7433 StringRef Name,
7434 AMDGPUOperand::ImmTy Type) {
7435 return parseStringOrIntWithPrefix(Operands, Name, Ids: WMMAMods::ModMatrixFmt,
7436 Type);
7437}
7438
7439ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7440 return tryParseMatrixFMT(Operands, Name: "matrix_a_fmt",
7441 Type: AMDGPUOperand::ImmTyMatrixAFMT);
7442}
7443
7444ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7445 return tryParseMatrixFMT(Operands, Name: "matrix_b_fmt",
7446 Type: AMDGPUOperand::ImmTyMatrixBFMT);
7447}
7448
7449ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7450 StringRef Name,
7451 AMDGPUOperand::ImmTy Type) {
7452 return parseStringOrIntWithPrefix(Operands, Name, Ids: WMMAMods::ModMatrixScale,
7453 Type);
7454}
7455
7456ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7457 return tryParseMatrixScale(Operands, Name: "matrix_a_scale",
7458 Type: AMDGPUOperand::ImmTyMatrixAScale);
7459}
7460
7461ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7462 return tryParseMatrixScale(Operands, Name: "matrix_b_scale",
7463 Type: AMDGPUOperand::ImmTyMatrixBScale);
7464}
7465
7466ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7467 StringRef Name,
7468 AMDGPUOperand::ImmTy Type) {
7469 return parseStringOrIntWithPrefix(Operands, Name, Ids: WMMAMods::ModMatrixScaleFmt,
7470 Type);
7471}
7472
7473ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7474 return tryParseMatrixScaleFmt(Operands, Name: "matrix_a_scale_fmt",
7475 Type: AMDGPUOperand::ImmTyMatrixAScaleFmt);
7476}
7477
7478ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7479 return tryParseMatrixScaleFmt(Operands, Name: "matrix_b_scale_fmt",
7480 Type: AMDGPUOperand::ImmTyMatrixBScaleFmt);
7481}
7482
7483// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7484// values to live in a joint format operand in the MCInst encoding.
7485ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7486 using namespace llvm::AMDGPU::MTBUFFormat;
7487
7488 int64_t Dfmt = DFMT_UNDEF;
7489 int64_t Nfmt = NFMT_UNDEF;
7490
7491 // dfmt and nfmt can appear in either order, and each is optional.
7492 for (int I = 0; I < 2; ++I) {
7493 if (Dfmt == DFMT_UNDEF && !tryParseFmt(Pref: "dfmt", MaxVal: DFMT_MAX, Fmt&: Dfmt))
7494 return ParseStatus::Failure;
7495
7496 if (Nfmt == NFMT_UNDEF && !tryParseFmt(Pref: "nfmt", MaxVal: NFMT_MAX, Fmt&: Nfmt))
7497 return ParseStatus::Failure;
7498
7499 // Skip optional comma between dfmt/nfmt
7500 // but guard against 2 commas following each other.
7501 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7502 !peekToken().is(K: AsmToken::Comma)) {
7503 trySkipToken(Kind: AsmToken::Comma);
7504 }
7505 }
7506
7507 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7508 return ParseStatus::NoMatch;
7509
7510 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7511 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7512
7513 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7514 return ParseStatus::Success;
7515}
7516
7517ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7518 using namespace llvm::AMDGPU::MTBUFFormat;
7519
7520 int64_t Fmt = UFMT_UNDEF;
7521
7522 if (!tryParseFmt(Pref: "format", MaxVal: UFMT_MAX, Fmt))
7523 return ParseStatus::Failure;
7524
7525 if (Fmt == UFMT_UNDEF)
7526 return ParseStatus::NoMatch;
7527
7528 Format = Fmt;
7529 return ParseStatus::Success;
7530}
7531
7532bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7533 int64_t &Nfmt,
7534 StringRef FormatStr,
7535 SMLoc Loc) {
7536 using namespace llvm::AMDGPU::MTBUFFormat;
7537 int64_t Format;
7538
7539 Format = getDfmt(Name: FormatStr);
7540 if (Format != DFMT_UNDEF) {
7541 Dfmt = Format;
7542 return true;
7543 }
7544
7545 Format = getNfmt(Name: FormatStr, STI: getSTI());
7546 if (Format != NFMT_UNDEF) {
7547 Nfmt = Format;
7548 return true;
7549 }
7550
7551 Error(L: Loc, Msg: "unsupported format");
7552 return false;
7553}
7554
7555ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7556 SMLoc FormatLoc,
7557 int64_t &Format) {
7558 using namespace llvm::AMDGPU::MTBUFFormat;
7559
7560 int64_t Dfmt = DFMT_UNDEF;
7561 int64_t Nfmt = NFMT_UNDEF;
7562 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, Loc: FormatLoc))
7563 return ParseStatus::Failure;
7564
7565 if (trySkipToken(Kind: AsmToken::Comma)) {
7566 StringRef Str;
7567 SMLoc Loc = getLoc();
7568 if (!parseId(Val&: Str, ErrMsg: "expected a format string") ||
7569 !matchDfmtNfmt(Dfmt, Nfmt, FormatStr: Str, Loc))
7570 return ParseStatus::Failure;
7571 if (Dfmt == DFMT_UNDEF)
7572 return Error(L: Loc, Msg: "duplicate numeric format");
7573 if (Nfmt == NFMT_UNDEF)
7574 return Error(L: Loc, Msg: "duplicate data format");
7575 }
7576
7577 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7578 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7579
7580 if (isGFX10Plus()) {
7581 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, STI: getSTI());
7582 if (Ufmt == UFMT_UNDEF)
7583 return Error(L: FormatLoc, Msg: "unsupported format");
7584 Format = Ufmt;
7585 } else {
7586 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7587 }
7588
7589 return ParseStatus::Success;
7590}
7591
7592ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7593 SMLoc Loc,
7594 int64_t &Format) {
7595 using namespace llvm::AMDGPU::MTBUFFormat;
7596
7597 auto Id = getUnifiedFormat(Name: FormatStr, STI: getSTI());
7598 if (Id == UFMT_UNDEF)
7599 return ParseStatus::NoMatch;
7600
7601 if (!isGFX10Plus())
7602 return Error(L: Loc, Msg: "unified format is not supported on this GPU");
7603
7604 Format = Id;
7605 return ParseStatus::Success;
7606}
7607
7608ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7609 using namespace llvm::AMDGPU::MTBUFFormat;
7610 SMLoc Loc = getLoc();
7611
7612 if (!parseExpr(Imm&: Format))
7613 return ParseStatus::Failure;
7614 if (!isValidFormatEncoding(Val: Format, STI: getSTI()))
7615 return Error(L: Loc, Msg: "out of range format");
7616
7617 return ParseStatus::Success;
7618}
7619
7620ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7621 using namespace llvm::AMDGPU::MTBUFFormat;
7622
7623 if (!trySkipId(Id: "format", Kind: AsmToken::Colon))
7624 return ParseStatus::NoMatch;
7625
7626 if (trySkipToken(Kind: AsmToken::LBrac)) {
7627 StringRef FormatStr;
7628 SMLoc Loc = getLoc();
7629 if (!parseId(Val&: FormatStr, ErrMsg: "expected a format string"))
7630 return ParseStatus::Failure;
7631
7632 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7633 if (Res.isNoMatch())
7634 Res = parseSymbolicSplitFormat(FormatStr, FormatLoc: Loc, Format);
7635 if (!Res.isSuccess())
7636 return Res;
7637
7638 if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
7639 return ParseStatus::Failure;
7640
7641 return ParseStatus::Success;
7642 }
7643
7644 return parseNumericFormat(Format);
7645}
7646
7647ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7648 using namespace llvm::AMDGPU::MTBUFFormat;
7649
7650 int64_t Format = getDefaultFormatEncoding(STI: getSTI());
7651 ParseStatus Res;
7652 SMLoc Loc = getLoc();
7653
7654 // Parse legacy format syntax.
7655 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7656 if (Res.isFailure())
7657 return Res;
7658
7659 bool FormatFound = Res.isSuccess();
7660
7661 Operands.push_back(
7662 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Format, Loc, Type: AMDGPUOperand::ImmTyFORMAT));
7663
7664 if (FormatFound)
7665 trySkipToken(Kind: AsmToken::Comma);
7666
7667 if (isToken(Kind: AsmToken::EndOfStatement)) {
7668 // We are expecting an soffset operand,
7669 // but let matcher handle the error.
7670 return ParseStatus::Success;
7671 }
7672
7673 // Parse soffset.
7674 Res = parseRegOrImm(Operands);
7675 if (!Res.isSuccess())
7676 return Res;
7677
7678 trySkipToken(Kind: AsmToken::Comma);
7679
7680 if (!FormatFound) {
7681 Res = parseSymbolicOrNumericFormat(Format);
7682 if (Res.isFailure())
7683 return Res;
7684 if (Res.isSuccess()) {
7685 auto Size = Operands.size();
7686 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7687 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7688 Op.setImm(Format);
7689 }
7690 return ParseStatus::Success;
7691 }
7692
7693 if (isId(Id: "format") && peekToken().is(K: AsmToken::Colon))
7694 return Error(L: getLoc(), Msg: "duplicate format");
7695 return ParseStatus::Success;
7696}
7697
7698ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7699 ParseStatus Res =
7700 parseIntWithPrefix(Prefix: "offset", Operands, ImmTy: AMDGPUOperand::ImmTyOffset);
7701 if (Res.isNoMatch()) {
7702 Res = parseIntWithPrefix(Prefix: "inst_offset", Operands,
7703 ImmTy: AMDGPUOperand::ImmTyInstOffset);
7704 }
7705 return Res;
7706}
7707
7708ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7709 ParseStatus Res =
7710 parseNamedBit(Name: "r128", Operands, ImmTy: AMDGPUOperand::ImmTyR128A16);
7711 if (Res.isNoMatch())
7712 Res = parseNamedBit(Name: "a16", Operands, ImmTy: AMDGPUOperand::ImmTyA16);
7713 return Res;
7714}
7715
7716ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7717 ParseStatus Res =
7718 parseIntWithPrefix(Prefix: "blgp", Operands, ImmTy: AMDGPUOperand::ImmTyBLGP);
7719 if (Res.isNoMatch()) {
7720 Res =
7721 parseOperandArrayWithPrefix(Prefix: "neg", Operands, ImmTy: AMDGPUOperand::ImmTyBLGP);
7722 }
7723 return Res;
7724}
7725
7726//===----------------------------------------------------------------------===//
7727// Exp
7728//===----------------------------------------------------------------------===//
7729
7730void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7731 OptionalImmIndexMap OptionalIdx;
7732
7733 unsigned OperandIdx[4];
7734 unsigned EnMask = 0;
7735 int SrcIdx = 0;
7736
7737 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7738 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7739
7740 // Add the register arguments
7741 if (Op.isReg()) {
7742 assert(SrcIdx < 4);
7743 OperandIdx[SrcIdx] = Inst.size();
7744 Op.addRegOperands(Inst, N: 1);
7745 ++SrcIdx;
7746 continue;
7747 }
7748
7749 if (Op.isOff()) {
7750 assert(SrcIdx < 4);
7751 OperandIdx[SrcIdx] = Inst.size();
7752 Inst.addOperand(Op: MCOperand::createReg(Reg: MCRegister()));
7753 ++SrcIdx;
7754 continue;
7755 }
7756
7757 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7758 Op.addImmOperands(Inst, N: 1);
7759 continue;
7760 }
7761
7762 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7763 continue;
7764
7765 // Handle optional arguments
7766 OptionalIdx[Op.getImmTy()] = i;
7767 }
7768
7769 assert(SrcIdx == 4);
7770
7771 bool Compr = false;
7772 if (OptionalIdx.find(x: AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7773 Compr = true;
7774 Inst.getOperand(i: OperandIdx[1]) = Inst.getOperand(i: OperandIdx[2]);
7775 Inst.getOperand(i: OperandIdx[2]).setReg(MCRegister());
7776 Inst.getOperand(i: OperandIdx[3]).setReg(MCRegister());
7777 }
7778
7779 for (auto i = 0; i < SrcIdx; ++i) {
7780 if (Inst.getOperand(i: OperandIdx[i]).getReg()) {
7781 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7782 }
7783 }
7784
7785 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpVM);
7786 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpCompr);
7787
7788 Inst.addOperand(Op: MCOperand::createImm(Val: EnMask));
7789}
7790
7791//===----------------------------------------------------------------------===//
7792// s_waitcnt
7793//===----------------------------------------------------------------------===//
7794
7795static bool
7796encodeCnt(
7797 const AMDGPU::IsaVersion ISA,
7798 int64_t &IntVal,
7799 int64_t CntVal,
7800 bool Saturate,
7801 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7802 unsigned (*decode)(const IsaVersion &Version, unsigned))
7803{
7804 bool Failed = false;
7805
7806 IntVal = encode(ISA, IntVal, CntVal);
7807 if (CntVal != decode(ISA, IntVal)) {
7808 if (Saturate) {
7809 IntVal = encode(ISA, IntVal, -1);
7810 } else {
7811 Failed = true;
7812 }
7813 }
7814 return Failed;
7815}
7816
7817bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7818
7819 SMLoc CntLoc = getLoc();
7820 StringRef CntName = getTokenStr();
7821
7822 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name") ||
7823 !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7824 return false;
7825
7826 int64_t CntVal;
7827 SMLoc ValLoc = getLoc();
7828 if (!parseExpr(Imm&: CntVal))
7829 return false;
7830
7831 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
7832
7833 bool Failed = true;
7834 bool Sat = CntName.ends_with(Suffix: "_sat");
7835
7836 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7837 Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeVmcnt, decode: decodeVmcnt);
7838 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7839 Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeExpcnt, decode: decodeExpcnt);
7840 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7841 Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeLgkmcnt, decode: decodeLgkmcnt);
7842 } else {
7843 Error(L: CntLoc, Msg: "invalid counter name " + CntName);
7844 return false;
7845 }
7846
7847 if (Failed) {
7848 Error(L: ValLoc, Msg: "too large value for " + CntName);
7849 return false;
7850 }
7851
7852 if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
7853 return false;
7854
7855 if (trySkipToken(Kind: AsmToken::Amp) || trySkipToken(Kind: AsmToken::Comma)) {
7856 if (isToken(Kind: AsmToken::EndOfStatement)) {
7857 Error(L: getLoc(), Msg: "expected a counter name");
7858 return false;
7859 }
7860 }
7861
7862 return true;
7863}
7864
7865ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7866 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
7867 int64_t Waitcnt = getWaitcntBitMask(Version: ISA);
7868 SMLoc S = getLoc();
7869
7870 if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7871 while (!isToken(Kind: AsmToken::EndOfStatement)) {
7872 if (!parseCnt(IntVal&: Waitcnt))
7873 return ParseStatus::Failure;
7874 }
7875 } else {
7876 if (!parseExpr(Imm&: Waitcnt))
7877 return ParseStatus::Failure;
7878 }
7879
7880 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Waitcnt, Loc: S));
7881 return ParseStatus::Success;
7882}
7883
7884bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7885 SMLoc FieldLoc = getLoc();
7886 StringRef FieldName = getTokenStr();
7887 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a field name") ||
7888 !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7889 return false;
7890
7891 SMLoc ValueLoc = getLoc();
7892 StringRef ValueName = getTokenStr();
7893 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a value name") ||
7894 !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a right parenthesis"))
7895 return false;
7896
7897 unsigned Shift;
7898 if (FieldName == "instid0") {
7899 Shift = 0;
7900 } else if (FieldName == "instskip") {
7901 Shift = 4;
7902 } else if (FieldName == "instid1") {
7903 Shift = 7;
7904 } else {
7905 Error(L: FieldLoc, Msg: "invalid field name " + FieldName);
7906 return false;
7907 }
7908
7909 int Value;
7910 if (Shift == 4) {
7911 // Parse values for instskip.
7912 Value = StringSwitch<int>(ValueName)
7913 .Case(S: "SAME", Value: 0)
7914 .Case(S: "NEXT", Value: 1)
7915 .Case(S: "SKIP_1", Value: 2)
7916 .Case(S: "SKIP_2", Value: 3)
7917 .Case(S: "SKIP_3", Value: 4)
7918 .Case(S: "SKIP_4", Value: 5)
7919 .Default(Value: -1);
7920 } else {
7921 // Parse values for instid0 and instid1.
7922 Value = StringSwitch<int>(ValueName)
7923 .Case(S: "NO_DEP", Value: 0)
7924 .Case(S: "VALU_DEP_1", Value: 1)
7925 .Case(S: "VALU_DEP_2", Value: 2)
7926 .Case(S: "VALU_DEP_3", Value: 3)
7927 .Case(S: "VALU_DEP_4", Value: 4)
7928 .Case(S: "TRANS32_DEP_1", Value: 5)
7929 .Case(S: "TRANS32_DEP_2", Value: 6)
7930 .Case(S: "TRANS32_DEP_3", Value: 7)
7931 .Case(S: "FMA_ACCUM_CYCLE_1", Value: 8)
7932 .Case(S: "SALU_CYCLE_1", Value: 9)
7933 .Case(S: "SALU_CYCLE_2", Value: 10)
7934 .Case(S: "SALU_CYCLE_3", Value: 11)
7935 .Default(Value: -1);
7936 }
7937 if (Value < 0) {
7938 Error(L: ValueLoc, Msg: "invalid value name " + ValueName);
7939 return false;
7940 }
7941
7942 Delay |= Value << Shift;
7943 return true;
7944}
7945
7946ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7947 int64_t Delay = 0;
7948 SMLoc S = getLoc();
7949
7950 if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7951 do {
7952 if (!parseDelay(Delay))
7953 return ParseStatus::Failure;
7954 } while (trySkipToken(Kind: AsmToken::Pipe));
7955 } else {
7956 if (!parseExpr(Imm&: Delay))
7957 return ParseStatus::Failure;
7958 }
7959
7960 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Delay, Loc: S));
7961 return ParseStatus::Success;
7962}
7963
7964bool
7965AMDGPUOperand::isSWaitCnt() const {
7966 return isImm();
7967}
7968
7969bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7970
7971//===----------------------------------------------------------------------===//
7972// DepCtr
7973//===----------------------------------------------------------------------===//
7974
7975void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7976 StringRef DepCtrName) {
7977 switch (ErrorId) {
7978 case OPR_ID_UNKNOWN:
7979 Error(L: Loc, Msg: Twine("invalid counter name ", DepCtrName));
7980 return;
7981 case OPR_ID_UNSUPPORTED:
7982 Error(L: Loc, Msg: Twine(DepCtrName, " is not supported on this GPU"));
7983 return;
7984 case OPR_ID_DUPLICATE:
7985 Error(L: Loc, Msg: Twine("duplicate counter name ", DepCtrName));
7986 return;
7987 case OPR_VAL_INVALID:
7988 Error(L: Loc, Msg: Twine("invalid value for ", DepCtrName));
7989 return;
7990 default:
7991 assert(false);
7992 }
7993}
7994
7995bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7996
7997 using namespace llvm::AMDGPU::DepCtr;
7998
7999 SMLoc DepCtrLoc = getLoc();
8000 StringRef DepCtrName = getTokenStr();
8001
8002 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name") ||
8003 !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
8004 return false;
8005
8006 int64_t ExprVal;
8007 if (!parseExpr(Imm&: ExprVal))
8008 return false;
8009
8010 unsigned PrevOprMask = UsedOprMask;
8011 int CntVal = encodeDepCtr(Name: DepCtrName, Val: ExprVal, UsedOprMask, STI: getSTI());
8012
8013 if (CntVal < 0) {
8014 depCtrError(Loc: DepCtrLoc, ErrorId: CntVal, DepCtrName);
8015 return false;
8016 }
8017
8018 if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
8019 return false;
8020
8021 if (trySkipToken(Kind: AsmToken::Amp) || trySkipToken(Kind: AsmToken::Comma)) {
8022 if (isToken(Kind: AsmToken::EndOfStatement)) {
8023 Error(L: getLoc(), Msg: "expected a counter name");
8024 return false;
8025 }
8026 }
8027
8028 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8029 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8030 return true;
8031}
8032
8033ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8034 using namespace llvm::AMDGPU::DepCtr;
8035
8036 int64_t DepCtr = getDefaultDepCtrEncoding(STI: getSTI());
8037 SMLoc Loc = getLoc();
8038
8039 if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
8040 unsigned UsedOprMask = 0;
8041 while (!isToken(Kind: AsmToken::EndOfStatement)) {
8042 if (!parseDepCtr(DepCtr, UsedOprMask))
8043 return ParseStatus::Failure;
8044 }
8045 } else {
8046 if (!parseExpr(Imm&: DepCtr))
8047 return ParseStatus::Failure;
8048 }
8049
8050 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DepCtr, Loc));
8051 return ParseStatus::Success;
8052}
8053
8054bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8055
8056//===----------------------------------------------------------------------===//
8057// hwreg
8058//===----------------------------------------------------------------------===//
8059
8060ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8061 OperandInfoTy &Offset,
8062 OperandInfoTy &Width) {
8063 using namespace llvm::AMDGPU::Hwreg;
8064
8065 if (!trySkipId(Id: "hwreg", Kind: AsmToken::LParen))
8066 return ParseStatus::NoMatch;
8067
8068 // The register may be specified by name or using a numeric code
8069 HwReg.Loc = getLoc();
8070 if (isToken(Kind: AsmToken::Identifier) &&
8071 (HwReg.Val = getHwregId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) {
8072 HwReg.IsSymbolic = true;
8073 lex(); // skip register name
8074 } else if (!parseExpr(Imm&: HwReg.Val, Expected: "a register name")) {
8075 return ParseStatus::Failure;
8076 }
8077
8078 if (trySkipToken(Kind: AsmToken::RParen))
8079 return ParseStatus::Success;
8080
8081 // parse optional params
8082 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma or a closing parenthesis"))
8083 return ParseStatus::Failure;
8084
8085 Offset.Loc = getLoc();
8086 if (!parseExpr(Imm&: Offset.Val))
8087 return ParseStatus::Failure;
8088
8089 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
8090 return ParseStatus::Failure;
8091
8092 Width.Loc = getLoc();
8093 if (!parseExpr(Imm&: Width.Val) ||
8094 !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
8095 return ParseStatus::Failure;
8096
8097 return ParseStatus::Success;
8098}
8099
8100ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8101 using namespace llvm::AMDGPU::Hwreg;
8102
8103 int64_t ImmVal = 0;
8104 SMLoc Loc = getLoc();
8105
8106 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8107 HwregId::Default);
8108 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8109 HwregOffset::Default);
8110 struct : StructuredOpField {
8111 using StructuredOpField::StructuredOpField;
8112 bool validate(AMDGPUAsmParser &Parser) const override {
8113 if (!isUIntN(N: Width, x: Val - 1))
8114 return Error(Parser, Err: "only values from 1 to 32 are legal");
8115 return true;
8116 }
8117 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8118 ParseStatus Res = parseStructuredOpFields(Fields: {&HwReg, &Offset, &Width});
8119
8120 if (Res.isNoMatch())
8121 Res = parseHwregFunc(HwReg, Offset, Width);
8122
8123 if (Res.isSuccess()) {
8124 if (!validateStructuredOpFields(Fields: {&HwReg, &Offset, &Width}))
8125 return ParseStatus::Failure;
8126 ImmVal = HwregEncoding::encode(Values: HwReg.Val, Values: Offset.Val, Values: Width.Val);
8127 }
8128
8129 if (Res.isNoMatch() &&
8130 parseExpr(Imm&: ImmVal, Expected: "a hwreg macro, structured immediate"))
8131 Res = ParseStatus::Success;
8132
8133 if (!Res.isSuccess())
8134 return ParseStatus::Failure;
8135
8136 if (!isUInt<16>(x: ImmVal))
8137 return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
8138 Operands.push_back(
8139 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTyHwreg));
8140 return ParseStatus::Success;
8141}
8142
8143bool AMDGPUOperand::isHwreg() const {
8144 return isImmTy(ImmT: ImmTyHwreg);
8145}
8146
8147//===----------------------------------------------------------------------===//
8148// sendmsg
8149//===----------------------------------------------------------------------===//
8150
8151bool
8152AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8153 OperandInfoTy &Op,
8154 OperandInfoTy &Stream) {
8155 using namespace llvm::AMDGPU::SendMsg;
8156
8157 Msg.Loc = getLoc();
8158 if (isToken(Kind: AsmToken::Identifier) &&
8159 (Msg.Val = getMsgId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) {
8160 Msg.IsSymbolic = true;
8161 lex(); // skip message name
8162 } else if (!parseExpr(Imm&: Msg.Val, Expected: "a message name")) {
8163 return false;
8164 }
8165
8166 if (trySkipToken(Kind: AsmToken::Comma)) {
8167 Op.IsDefined = true;
8168 Op.Loc = getLoc();
8169 if (isToken(Kind: AsmToken::Identifier) &&
8170 (Op.Val = getMsgOpId(MsgId: Msg.Val, Name: getTokenStr(), STI: getSTI())) !=
8171 OPR_ID_UNKNOWN) {
8172 lex(); // skip operation name
8173 } else if (!parseExpr(Imm&: Op.Val, Expected: "an operation name")) {
8174 return false;
8175 }
8176
8177 if (trySkipToken(Kind: AsmToken::Comma)) {
8178 Stream.IsDefined = true;
8179 Stream.Loc = getLoc();
8180 if (!parseExpr(Imm&: Stream.Val))
8181 return false;
8182 }
8183 }
8184
8185 return skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis");
8186}
8187
8188bool
8189AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8190 const OperandInfoTy &Op,
8191 const OperandInfoTy &Stream) {
8192 using namespace llvm::AMDGPU::SendMsg;
8193
8194 // Validation strictness depends on whether message is specified
8195 // in a symbolic or in a numeric form. In the latter case
8196 // only encoding possibility is checked.
8197 bool Strict = Msg.IsSymbolic;
8198
8199 if (Strict) {
8200 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8201 Error(L: Msg.Loc, Msg: "specified message id is not supported on this GPU");
8202 return false;
8203 }
8204 } else {
8205 if (!isValidMsgId(MsgId: Msg.Val, STI: getSTI())) {
8206 Error(L: Msg.Loc, Msg: "invalid message id");
8207 return false;
8208 }
8209 }
8210 if (Strict && (msgRequiresOp(MsgId: Msg.Val, STI: getSTI()) != Op.IsDefined)) {
8211 if (Op.IsDefined) {
8212 Error(L: Op.Loc, Msg: "message does not support operations");
8213 } else {
8214 Error(L: Msg.Loc, Msg: "missing message operation");
8215 }
8216 return false;
8217 }
8218 if (!isValidMsgOp(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI(), Strict)) {
8219 if (Op.Val == OPR_ID_UNSUPPORTED)
8220 Error(L: Op.Loc, Msg: "specified operation id is not supported on this GPU");
8221 else
8222 Error(L: Op.Loc, Msg: "invalid operation id");
8223 return false;
8224 }
8225 if (Strict && !msgSupportsStream(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI()) &&
8226 Stream.IsDefined) {
8227 Error(L: Stream.Loc, Msg: "message operation does not support streams");
8228 return false;
8229 }
8230 if (!isValidMsgStream(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val, STI: getSTI(), Strict)) {
8231 Error(L: Stream.Loc, Msg: "invalid message stream id");
8232 return false;
8233 }
8234 return true;
8235}
8236
8237ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8238 using namespace llvm::AMDGPU::SendMsg;
8239
8240 int64_t ImmVal = 0;
8241 SMLoc Loc = getLoc();
8242
8243 if (trySkipId(Id: "sendmsg", Kind: AsmToken::LParen)) {
8244 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8245 OperandInfoTy Op(OP_NONE_);
8246 OperandInfoTy Stream(STREAM_ID_NONE_);
8247 if (parseSendMsgBody(Msg, Op, Stream) &&
8248 validateSendMsg(Msg, Op, Stream)) {
8249 ImmVal = encodeMsg(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val);
8250 } else {
8251 return ParseStatus::Failure;
8252 }
8253 } else if (parseExpr(Imm&: ImmVal, Expected: "a sendmsg macro")) {
8254 if (ImmVal < 0 || !isUInt<16>(x: ImmVal))
8255 return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
8256 } else {
8257 return ParseStatus::Failure;
8258 }
8259
8260 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTySendMsg));
8261 return ParseStatus::Success;
8262}
8263
8264bool AMDGPUOperand::isSendMsg() const {
8265 return isImmTy(ImmT: ImmTySendMsg);
8266}
8267
8268ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8269 using namespace llvm::AMDGPU::WaitEvent;
8270
8271 SMLoc Loc = getLoc();
8272 int64_t ImmVal = 0;
8273
8274 StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8275 1, 0);
8276 StructuredOpField ExportReady("export_ready", "bit value", 1, 0);
8277
8278 StructuredOpField *TargetBitfield =
8279 isGFX11() ? &DontWaitExportReady : &ExportReady;
8280
8281 ParseStatus Res = parseStructuredOpFields(Fields: {TargetBitfield});
8282 if (Res.isNoMatch() && parseExpr(Imm&: ImmVal, Expected: "structured immediate"))
8283 Res = ParseStatus::Success;
8284 else if (Res.isSuccess()) {
8285 if (!validateStructuredOpFields(Fields: {TargetBitfield}))
8286 return ParseStatus::Failure;
8287 ImmVal = TargetBitfield->Val;
8288 }
8289
8290 if (!Res.isSuccess())
8291 return ParseStatus::Failure;
8292
8293 if (!isUInt<16>(x: ImmVal))
8294 return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
8295
8296 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc,
8297 Type: AMDGPUOperand::ImmTyWaitEvent));
8298 return ParseStatus::Success;
8299}
8300
8301bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmT: ImmTyWaitEvent); }
8302
8303//===----------------------------------------------------------------------===//
8304// v_interp
8305//===----------------------------------------------------------------------===//
8306
8307ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8308 StringRef Str;
8309 SMLoc S = getLoc();
8310
8311 if (!parseId(Val&: Str))
8312 return ParseStatus::NoMatch;
8313
8314 int Slot = StringSwitch<int>(Str)
8315 .Case(S: "p10", Value: 0)
8316 .Case(S: "p20", Value: 1)
8317 .Case(S: "p0", Value: 2)
8318 .Default(Value: -1);
8319
8320 if (Slot == -1)
8321 return Error(L: S, Msg: "invalid interpolation slot");
8322
8323 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Slot, Loc: S,
8324 Type: AMDGPUOperand::ImmTyInterpSlot));
8325 return ParseStatus::Success;
8326}
8327
8328ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8329 StringRef Str;
8330 SMLoc S = getLoc();
8331
8332 if (!parseId(Val&: Str))
8333 return ParseStatus::NoMatch;
8334
8335 if (!Str.starts_with(Prefix: "attr"))
8336 return Error(L: S, Msg: "invalid interpolation attribute");
8337
8338 StringRef Chan = Str.take_back(N: 2);
8339 int AttrChan = StringSwitch<int>(Chan)
8340 .Case(S: ".x", Value: 0)
8341 .Case(S: ".y", Value: 1)
8342 .Case(S: ".z", Value: 2)
8343 .Case(S: ".w", Value: 3)
8344 .Default(Value: -1);
8345 if (AttrChan == -1)
8346 return Error(L: S, Msg: "invalid or missing interpolation attribute channel");
8347
8348 Str = Str.drop_back(N: 2).drop_front(N: 4);
8349
8350 uint8_t Attr;
8351 if (Str.getAsInteger(Radix: 10, Result&: Attr))
8352 return Error(L: S, Msg: "invalid or missing interpolation attribute number");
8353
8354 if (Attr > 32)
8355 return Error(L: S, Msg: "out of bounds interpolation attribute number");
8356
8357 SMLoc SChan = SMLoc::getFromPointer(Ptr: Chan.data());
8358
8359 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Attr, Loc: S,
8360 Type: AMDGPUOperand::ImmTyInterpAttr));
8361 Operands.push_back(Elt: AMDGPUOperand::CreateImm(
8362 AsmParser: this, Val: AttrChan, Loc: SChan, Type: AMDGPUOperand::ImmTyInterpAttrChan));
8363 return ParseStatus::Success;
8364}
8365
8366//===----------------------------------------------------------------------===//
8367// exp
8368//===----------------------------------------------------------------------===//
8369
8370ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8371 using namespace llvm::AMDGPU::Exp;
8372
8373 StringRef Str;
8374 SMLoc S = getLoc();
8375
8376 if (!parseId(Val&: Str))
8377 return ParseStatus::NoMatch;
8378
8379 unsigned Id = getTgtId(Name: Str);
8380 if (Id == ET_INVALID || !isSupportedTgtId(Id, STI: getSTI()))
8381 return Error(L: S, Msg: (Id == ET_INVALID)
8382 ? "invalid exp target"
8383 : "exp target is not supported on this GPU");
8384
8385 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Id, Loc: S,
8386 Type: AMDGPUOperand::ImmTyExpTgt));
8387 return ParseStatus::Success;
8388}
8389
8390//===----------------------------------------------------------------------===//
8391// parser helpers
8392//===----------------------------------------------------------------------===//
8393
8394bool
8395AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8396 return Token.is(K: AsmToken::Identifier) && Token.getString() == Id;
8397}
8398
8399bool
8400AMDGPUAsmParser::isId(const StringRef Id) const {
8401 return isId(Token: getToken(), Id);
8402}
8403
8404bool
8405AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8406 return getTokenKind() == Kind;
8407}
8408
8409StringRef AMDGPUAsmParser::getId() const {
8410 return isToken(Kind: AsmToken::Identifier) ? getTokenStr() : StringRef();
8411}
8412
8413bool
8414AMDGPUAsmParser::trySkipId(const StringRef Id) {
8415 if (isId(Id)) {
8416 lex();
8417 return true;
8418 }
8419 return false;
8420}
8421
8422bool
8423AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8424 if (isToken(Kind: AsmToken::Identifier)) {
8425 StringRef Tok = getTokenStr();
8426 if (Tok.starts_with(Prefix: Pref) && Tok.drop_front(N: Pref.size()) == Id) {
8427 lex();
8428 return true;
8429 }
8430 }
8431 return false;
8432}
8433
8434bool
8435AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8436 if (isId(Id) && peekToken().is(K: Kind)) {
8437 lex();
8438 lex();
8439 return true;
8440 }
8441 return false;
8442}
8443
8444bool
8445AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8446 if (isToken(Kind)) {
8447 lex();
8448 return true;
8449 }
8450 return false;
8451}
8452
8453bool
8454AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8455 const StringRef ErrMsg) {
8456 if (!trySkipToken(Kind)) {
8457 Error(L: getLoc(), Msg: ErrMsg);
8458 return false;
8459 }
8460 return true;
8461}
8462
8463bool
8464AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8465 SMLoc S = getLoc();
8466
8467 const MCExpr *Expr;
8468 if (Parser.parseExpression(Res&: Expr))
8469 return false;
8470
8471 if (Expr->evaluateAsAbsolute(Res&: Imm))
8472 return true;
8473
8474 if (Expected.empty()) {
8475 Error(L: S, Msg: "expected absolute expression");
8476 } else {
8477 Error(L: S, Msg: Twine("expected ", Expected) +
8478 Twine(" or an absolute expression"));
8479 }
8480 return false;
8481}
8482
8483bool
8484AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8485 SMLoc S = getLoc();
8486
8487 const MCExpr *Expr;
8488 if (Parser.parseExpression(Res&: Expr))
8489 return false;
8490
8491 int64_t IntVal;
8492 if (Expr->evaluateAsAbsolute(Res&: IntVal)) {
8493 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S));
8494 } else {
8495 Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S));
8496 }
8497 return true;
8498}
8499
8500bool
8501AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8502 if (isToken(Kind: AsmToken::String)) {
8503 Val = getToken().getStringContents();
8504 lex();
8505 return true;
8506 }
8507 Error(L: getLoc(), Msg: ErrMsg);
8508 return false;
8509}
8510
8511bool
8512AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8513 if (isToken(Kind: AsmToken::Identifier)) {
8514 Val = getTokenStr();
8515 lex();
8516 return true;
8517 }
8518 if (!ErrMsg.empty())
8519 Error(L: getLoc(), Msg: ErrMsg);
8520 return false;
8521}
8522
8523AsmToken
8524AMDGPUAsmParser::getToken() const {
8525 return Parser.getTok();
8526}
8527
8528AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8529 return isToken(Kind: AsmToken::EndOfStatement)
8530 ? getToken()
8531 : getLexer().peekTok(ShouldSkipSpace);
8532}
8533
8534void
8535AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8536 auto TokCount = getLexer().peekTokens(Buf: Tokens);
8537
8538 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8539 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8540}
8541
8542AsmToken::TokenKind
8543AMDGPUAsmParser::getTokenKind() const {
8544 return getLexer().getKind();
8545}
8546
8547SMLoc
8548AMDGPUAsmParser::getLoc() const {
8549 return getToken().getLoc();
8550}
8551
8552StringRef
8553AMDGPUAsmParser::getTokenStr() const {
8554 return getToken().getString();
8555}
8556
8557void
8558AMDGPUAsmParser::lex() {
8559 Parser.Lex();
8560}
8561
8562SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8563 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8564}
8565
8566// Returns one of the given locations that comes later in the source.
8567SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8568 return a.getPointer() < b.getPointer() ? b : a;
8569}
8570
8571SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8572 int MCOpIdx) const {
8573 for (const auto &Op : Operands) {
8574 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8575 if (TargetOp.getMCOpIdx() == MCOpIdx)
8576 return TargetOp.getStartLoc();
8577 }
8578 llvm_unreachable("No such MC operand!");
8579}
8580
8581SMLoc
8582AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8583 const OperandVector &Operands) const {
8584 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8585 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8586 if (Test(Op))
8587 return Op.getStartLoc();
8588 }
8589 return getInstLoc(Operands);
8590}
8591
8592SMLoc
8593AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8594 const OperandVector &Operands) const {
8595 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(ImmT: Type); };
8596 return getOperandLoc(Test, Operands);
8597}
8598
8599ParseStatus
8600AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8601 if (!trySkipToken(Kind: AsmToken::LCurly))
8602 return ParseStatus::NoMatch;
8603
8604 bool First = true;
8605 while (!trySkipToken(Kind: AsmToken::RCurly)) {
8606 if (!First &&
8607 !skipToken(Kind: AsmToken::Comma, ErrMsg: "comma or closing brace expected"))
8608 return ParseStatus::Failure;
8609
8610 StringRef Id = getTokenStr();
8611 SMLoc IdLoc = getLoc();
8612 if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "field name expected") ||
8613 !skipToken(Kind: AsmToken::Colon, ErrMsg: "colon expected"))
8614 return ParseStatus::Failure;
8615
8616 const auto *I =
8617 find_if(Range&: Fields, P: [Id](StructuredOpField *F) { return F->Id == Id; });
8618 if (I == Fields.end())
8619 return Error(L: IdLoc, Msg: "unknown field");
8620 if ((*I)->IsDefined)
8621 return Error(L: IdLoc, Msg: "duplicate field");
8622
8623 // TODO: Support symbolic values.
8624 (*I)->Loc = getLoc();
8625 if (!parseExpr(Imm&: (*I)->Val))
8626 return ParseStatus::Failure;
8627 (*I)->IsDefined = true;
8628
8629 First = false;
8630 }
8631 return ParseStatus::Success;
8632}
8633
8634bool AMDGPUAsmParser::validateStructuredOpFields(
8635 ArrayRef<const StructuredOpField *> Fields) {
8636 return all_of(Range&: Fields, P: [this](const StructuredOpField *F) {
8637 return F->validate(Parser&: *this);
8638 });
8639}
8640
8641//===----------------------------------------------------------------------===//
8642// swizzle
8643//===----------------------------------------------------------------------===//
8644
8645LLVM_READNONE
8646static unsigned
8647encodeBitmaskPerm(const unsigned AndMask,
8648 const unsigned OrMask,
8649 const unsigned XorMask) {
8650 using namespace llvm::AMDGPU::Swizzle;
8651
8652 return BITMASK_PERM_ENC |
8653 (AndMask << BITMASK_AND_SHIFT) |
8654 (OrMask << BITMASK_OR_SHIFT) |
8655 (XorMask << BITMASK_XOR_SHIFT);
8656}
8657
8658bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8659 const unsigned MaxVal,
8660 const Twine &ErrMsg, SMLoc &Loc) {
8661 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma")) {
8662 return false;
8663 }
8664 Loc = getLoc();
8665 if (!parseExpr(Imm&: Op)) {
8666 return false;
8667 }
8668 if (Op < MinVal || Op > MaxVal) {
8669 Error(L: Loc, Msg: ErrMsg);
8670 return false;
8671 }
8672
8673 return true;
8674}
8675
8676bool
8677AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8678 const unsigned MinVal,
8679 const unsigned MaxVal,
8680 const StringRef ErrMsg) {
8681 SMLoc Loc;
8682 for (unsigned i = 0; i < OpNum; ++i) {
8683 if (!parseSwizzleOperand(Op&: Op[i], MinVal, MaxVal, ErrMsg, Loc))
8684 return false;
8685 }
8686
8687 return true;
8688}
8689
8690bool
8691AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8692 using namespace llvm::AMDGPU::Swizzle;
8693
8694 int64_t Lane[LANE_NUM];
8695 if (parseSwizzleOperands(OpNum: LANE_NUM, Op: Lane, MinVal: 0, MaxVal: LANE_MAX,
8696 ErrMsg: "expected a 2-bit lane id")) {
8697 Imm = QUAD_PERM_ENC;
8698 for (unsigned I = 0; I < LANE_NUM; ++I) {
8699 Imm |= Lane[I] << (LANE_SHIFT * I);
8700 }
8701 return true;
8702 }
8703 return false;
8704}
8705
8706bool
8707AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8708 using namespace llvm::AMDGPU::Swizzle;
8709
8710 SMLoc Loc;
8711 int64_t GroupSize;
8712 int64_t LaneIdx;
8713
8714 if (!parseSwizzleOperand(Op&: GroupSize,
8715 MinVal: 2, MaxVal: 32,
8716 ErrMsg: "group size must be in the interval [2,32]",
8717 Loc)) {
8718 return false;
8719 }
8720 if (!isPowerOf2_64(Value: GroupSize)) {
8721 Error(L: Loc, Msg: "group size must be a power of two");
8722 return false;
8723 }
8724 if (parseSwizzleOperand(Op&: LaneIdx,
8725 MinVal: 0, MaxVal: GroupSize - 1,
8726 ErrMsg: "lane id must be in the interval [0,group size - 1]",
8727 Loc)) {
8728 Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX - GroupSize + 1, OrMask: LaneIdx, XorMask: 0);
8729 return true;
8730 }
8731 return false;
8732}
8733
8734bool
8735AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8736 using namespace llvm::AMDGPU::Swizzle;
8737
8738 SMLoc Loc;
8739 int64_t GroupSize;
8740
8741 if (!parseSwizzleOperand(Op&: GroupSize,
8742 MinVal: 2, MaxVal: 32,
8743 ErrMsg: "group size must be in the interval [2,32]",
8744 Loc)) {
8745 return false;
8746 }
8747 if (!isPowerOf2_64(Value: GroupSize)) {
8748 Error(L: Loc, Msg: "group size must be a power of two");
8749 return false;
8750 }
8751
8752 Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: 0, XorMask: GroupSize - 1);
8753 return true;
8754}
8755
8756bool
8757AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8758 using namespace llvm::AMDGPU::Swizzle;
8759
8760 SMLoc Loc;
8761 int64_t GroupSize;
8762
8763 if (!parseSwizzleOperand(Op&: GroupSize,
8764 MinVal: 1, MaxVal: 16,
8765 ErrMsg: "group size must be in the interval [1,16]",
8766 Loc)) {
8767 return false;
8768 }
8769 if (!isPowerOf2_64(Value: GroupSize)) {
8770 Error(L: Loc, Msg: "group size must be a power of two");
8771 return false;
8772 }
8773
8774 Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: 0, XorMask: GroupSize);
8775 return true;
8776}
8777
8778bool
8779AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8780 using namespace llvm::AMDGPU::Swizzle;
8781
8782 if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma")) {
8783 return false;
8784 }
8785
8786 StringRef Ctl;
8787 SMLoc StrLoc = getLoc();
8788 if (!parseString(Val&: Ctl)) {
8789 return false;
8790 }
8791 if (Ctl.size() != BITMASK_WIDTH) {
8792 Error(L: StrLoc, Msg: "expected a 5-character mask");
8793 return false;
8794 }
8795
8796 unsigned AndMask = 0;
8797 unsigned OrMask = 0;
8798 unsigned XorMask = 0;
8799
8800 for (size_t i = 0; i < Ctl.size(); ++i) {
8801 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8802 switch(Ctl[i]) {
8803 default:
8804 Error(L: StrLoc, Msg: "invalid mask");
8805 return false;
8806 case '0':
8807 break;
8808 case '1':
8809 OrMask |= Mask;
8810 break;
8811 case 'p':
8812 AndMask |= Mask;
8813 break;
8814 case 'i':
8815 AndMask |= Mask;
8816 XorMask |= Mask;
8817 break;
8818 }
8819 }
8820
8821 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8822 return true;
8823}
8824
8825bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8826 using namespace llvm::AMDGPU::Swizzle;
8827
8828 if (!AMDGPU::isGFX9Plus(STI: getSTI())) {
8829 Error(L: getLoc(), Msg: "FFT mode swizzle not supported on this GPU");
8830 return false;
8831 }
8832
8833 int64_t Swizzle;
8834 SMLoc Loc;
8835 if (!parseSwizzleOperand(Op&: Swizzle, MinVal: 0, MaxVal: FFT_SWIZZLE_MAX,
8836 ErrMsg: "FFT swizzle must be in the interval [0," +
8837 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8838 Loc))
8839 return false;
8840
8841 Imm = FFT_MODE_ENC | Swizzle;
8842 return true;
8843}
8844
8845bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8846 using namespace llvm::AMDGPU::Swizzle;
8847
8848 if (!AMDGPU::isGFX9Plus(STI: getSTI())) {
8849 Error(L: getLoc(), Msg: "Rotate mode swizzle not supported on this GPU");
8850 return false;
8851 }
8852
8853 SMLoc Loc;
8854 int64_t Direction;
8855
8856 if (!parseSwizzleOperand(Op&: Direction, MinVal: 0, MaxVal: 1,
8857 ErrMsg: "direction must be 0 (left) or 1 (right)", Loc))
8858 return false;
8859
8860 int64_t RotateSize;
8861 if (!parseSwizzleOperand(
8862 Op&: RotateSize, MinVal: 0, MaxVal: ROTATE_MAX_SIZE,
8863 ErrMsg: "number of threads to rotate must be in the interval [0," +
8864 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8865 Loc))
8866 return false;
8867
8868 Imm = ROTATE_MODE_ENC | (Direction << ROTATE_DIR_SHIFT) |
8869 (RotateSize << ROTATE_SIZE_SHIFT);
8870 return true;
8871}
8872
8873bool
8874AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8875
8876 SMLoc OffsetLoc = getLoc();
8877
8878 if (!parseExpr(Imm, Expected: "a swizzle macro")) {
8879 return false;
8880 }
8881 if (!isUInt<16>(x: Imm)) {
8882 Error(L: OffsetLoc, Msg: "expected a 16-bit offset");
8883 return false;
8884 }
8885 return true;
8886}
8887
8888bool
8889AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8890 using namespace llvm::AMDGPU::Swizzle;
8891
8892 if (skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parentheses")) {
8893
8894 SMLoc ModeLoc = getLoc();
8895 bool Ok = false;
8896
8897 if (trySkipId(Id: IdSymbolic[ID_QUAD_PERM])) {
8898 Ok = parseSwizzleQuadPerm(Imm);
8899 } else if (trySkipId(Id: IdSymbolic[ID_BITMASK_PERM])) {
8900 Ok = parseSwizzleBitmaskPerm(Imm);
8901 } else if (trySkipId(Id: IdSymbolic[ID_BROADCAST])) {
8902 Ok = parseSwizzleBroadcast(Imm);
8903 } else if (trySkipId(Id: IdSymbolic[ID_SWAP])) {
8904 Ok = parseSwizzleSwap(Imm);
8905 } else if (trySkipId(Id: IdSymbolic[ID_REVERSE])) {
8906 Ok = parseSwizzleReverse(Imm);
8907 } else if (trySkipId(Id: IdSymbolic[ID_FFT])) {
8908 Ok = parseSwizzleFFT(Imm);
8909 } else if (trySkipId(Id: IdSymbolic[ID_ROTATE])) {
8910 Ok = parseSwizzleRotate(Imm);
8911 } else {
8912 Error(L: ModeLoc, Msg: "expected a swizzle mode");
8913 }
8914
8915 return Ok && skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parentheses");
8916 }
8917
8918 return false;
8919}
8920
8921ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8922 SMLoc S = getLoc();
8923 int64_t Imm = 0;
8924
8925 if (trySkipId(Id: "offset")) {
8926
8927 bool Ok = false;
8928 if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon")) {
8929 if (trySkipId(Id: "swizzle")) {
8930 Ok = parseSwizzleMacro(Imm);
8931 } else {
8932 Ok = parseSwizzleOffset(Imm);
8933 }
8934 }
8935
8936 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTySwizzle));
8937
8938 return Ok ? ParseStatus::Success : ParseStatus::Failure;
8939 }
8940 return ParseStatus::NoMatch;
8941}
8942
8943bool
8944AMDGPUOperand::isSwizzle() const {
8945 return isImmTy(ImmT: ImmTySwizzle);
8946}
8947
8948//===----------------------------------------------------------------------===//
8949// VGPR Index Mode
8950//===----------------------------------------------------------------------===//
8951
8952int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8953
8954 using namespace llvm::AMDGPU::VGPRIndexMode;
8955
8956 if (trySkipToken(Kind: AsmToken::RParen)) {
8957 return OFF;
8958 }
8959
8960 int64_t Imm = 0;
8961
8962 while (true) {
8963 unsigned Mode = 0;
8964 SMLoc S = getLoc();
8965
8966 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8967 if (trySkipId(Id: IdSymbolic[ModeId])) {
8968 Mode = 1 << ModeId;
8969 break;
8970 }
8971 }
8972
8973 if (Mode == 0) {
8974 Error(L: S, Msg: (Imm == 0)?
8975 "expected a VGPR index mode or a closing parenthesis" :
8976 "expected a VGPR index mode");
8977 return UNDEF;
8978 }
8979
8980 if (Imm & Mode) {
8981 Error(L: S, Msg: "duplicate VGPR index mode");
8982 return UNDEF;
8983 }
8984 Imm |= Mode;
8985
8986 if (trySkipToken(Kind: AsmToken::RParen))
8987 break;
8988 if (!skipToken(Kind: AsmToken::Comma,
8989 ErrMsg: "expected a comma or a closing parenthesis"))
8990 return UNDEF;
8991 }
8992
8993 return Imm;
8994}
8995
8996ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8997
8998 using namespace llvm::AMDGPU::VGPRIndexMode;
8999
9000 int64_t Imm = 0;
9001 SMLoc S = getLoc();
9002
9003 if (trySkipId(Id: "gpr_idx", Kind: AsmToken::LParen)) {
9004 Imm = parseGPRIdxMacro();
9005 if (Imm == UNDEF)
9006 return ParseStatus::Failure;
9007 } else {
9008 if (getParser().parseAbsoluteExpression(Res&: Imm))
9009 return ParseStatus::Failure;
9010 if (Imm < 0 || !isUInt<4>(x: Imm))
9011 return Error(L: S, Msg: "invalid immediate: only 4-bit values are legal");
9012 }
9013
9014 Operands.push_back(
9015 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyGprIdxMode));
9016 return ParseStatus::Success;
9017}
9018
9019bool AMDGPUOperand::isGPRIdxMode() const {
9020 return isImmTy(ImmT: ImmTyGprIdxMode);
9021}
9022
9023//===----------------------------------------------------------------------===//
9024// sopp branch targets
9025//===----------------------------------------------------------------------===//
9026
9027ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9028
9029 // Make sure we are not parsing something
9030 // that looks like a label or an expression but is not.
9031 // This will improve error messages.
9032 if (isRegister() || isModifier())
9033 return ParseStatus::NoMatch;
9034
9035 if (!parseExpr(Operands))
9036 return ParseStatus::Failure;
9037
9038 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9039 assert(Opr.isImm() || Opr.isExpr());
9040 SMLoc Loc = Opr.getStartLoc();
9041
9042 // Currently we do not support arbitrary expressions as branch targets.
9043 // Only labels and absolute expressions are accepted.
9044 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9045 Error(L: Loc, Msg: "expected an absolute expression or a label");
9046 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9047 Error(L: Loc, Msg: "expected a 16-bit signed jump offset");
9048 }
9049
9050 return ParseStatus::Success;
9051}
9052
9053//===----------------------------------------------------------------------===//
9054// Boolean holding registers
9055//===----------------------------------------------------------------------===//
9056
9057ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9058 return parseReg(Operands);
9059}
9060
9061//===----------------------------------------------------------------------===//
9062// mubuf
9063//===----------------------------------------------------------------------===//
9064
9065void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9066 const OperandVector &Operands,
9067 bool IsAtomic) {
9068 OptionalImmIndexMap OptionalIdx;
9069 unsigned FirstOperandIdx = 1;
9070 bool IsAtomicReturn = false;
9071
9072 if (IsAtomic) {
9073 IsAtomicReturn = MII.get(Opcode: Inst.getOpcode()).TSFlags &
9074 SIInstrFlags::IsAtomicRet;
9075 }
9076
9077 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9078 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9079
9080 // Add the register arguments
9081 if (Op.isReg()) {
9082 Op.addRegOperands(Inst, N: 1);
9083 // Insert a tied src for atomic return dst.
9084 // This cannot be postponed as subsequent calls to
9085 // addImmOperands rely on correct number of MC operands.
9086 if (IsAtomicReturn && i == FirstOperandIdx)
9087 Op.addRegOperands(Inst, N: 1);
9088 continue;
9089 }
9090
9091 // Handle the case where soffset is an immediate
9092 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9093 Op.addImmOperands(Inst, N: 1);
9094 continue;
9095 }
9096
9097 // Handle tokens like 'offen' which are sometimes hard-coded into the
9098 // asm string. There are no MCInst operands for these.
9099 if (Op.isToken()) {
9100 continue;
9101 }
9102 assert(Op.isImm());
9103
9104 // Handle optional arguments
9105 OptionalIdx[Op.getImmTy()] = i;
9106 }
9107
9108 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOffset);
9109 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyCPol, Default: 0);
9110 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9111 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9112 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
9113}
9114
9115//===----------------------------------------------------------------------===//
9116// smrd
9117//===----------------------------------------------------------------------===//
9118
9119bool AMDGPUOperand::isSMRDOffset8() const {
9120 return isImmLiteral() && isUInt<8>(x: getImm());
9121}
9122
9123bool AMDGPUOperand::isSMEMOffset() const {
9124 // Offset range is checked later by validator.
9125 return isImmLiteral();
9126}
9127
9128bool AMDGPUOperand::isSMRDLiteralOffset() const {
9129 // 32-bit literals are only supported on CI and we only want to use them
9130 // when the offset is > 8-bits.
9131 return isImmLiteral() && !isUInt<8>(x: getImm()) && isUInt<32>(x: getImm());
9132}
9133
9134//===----------------------------------------------------------------------===//
9135// vop3
9136//===----------------------------------------------------------------------===//
9137
9138static bool ConvertOmodMul(int64_t &Mul) {
9139 if (Mul != 1 && Mul != 2 && Mul != 4)
9140 return false;
9141
9142 Mul >>= 1;
9143 return true;
9144}
9145
9146static bool ConvertOmodDiv(int64_t &Div) {
9147 if (Div == 1) {
9148 Div = 0;
9149 return true;
9150 }
9151
9152 if (Div == 2) {
9153 Div = 3;
9154 return true;
9155 }
9156
9157 return false;
9158}
9159
9160// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9161// This is intentional and ensures compatibility with sp3.
9162// See bug 35397 for details.
9163bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9164 if (BoundCtrl == 0 || BoundCtrl == 1) {
9165 if (!isGFX11Plus())
9166 BoundCtrl = 1;
9167 return true;
9168 }
9169 return false;
9170}
9171
9172void AMDGPUAsmParser::onBeginOfFile() {
9173 if (!getParser().getStreamer().getTargetStreamer() ||
9174 getSTI().getTargetTriple().getArch() == Triple::r600)
9175 return;
9176
9177 if (!getTargetStreamer().getTargetID())
9178 getTargetStreamer().initializeTargetID(STI: getSTI(),
9179 FeatureString: getSTI().getFeatureString());
9180
9181 if (isHsaAbi(STI: getSTI()))
9182 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9183}
9184
9185/// Parse AMDGPU specific expressions.
9186///
9187/// expr ::= or(expr, ...) |
9188/// max(expr, ...)
9189///
9190bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9191 using AGVK = AMDGPUMCExpr::VariantKind;
9192
9193 if (isToken(Kind: AsmToken::Identifier)) {
9194 StringRef TokenId = getTokenStr();
9195 AGVK VK = StringSwitch<AGVK>(TokenId)
9196 .Case(S: "max", Value: AGVK::AGVK_Max)
9197 .Case(S: "or", Value: AGVK::AGVK_Or)
9198 .Case(S: "extrasgprs", Value: AGVK::AGVK_ExtraSGPRs)
9199 .Case(S: "totalnumvgprs", Value: AGVK::AGVK_TotalNumVGPRs)
9200 .Case(S: "alignto", Value: AGVK::AGVK_AlignTo)
9201 .Case(S: "occupancy", Value: AGVK::AGVK_Occupancy)
9202 .Default(Value: AGVK::AGVK_None);
9203
9204 if (VK != AGVK::AGVK_None && peekToken().is(K: AsmToken::LParen)) {
9205 SmallVector<const MCExpr *, 4> Exprs;
9206 uint64_t CommaCount = 0;
9207 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9208 lex(); // Eat '('
9209 while (true) {
9210 if (trySkipToken(Kind: AsmToken::RParen)) {
9211 if (Exprs.empty()) {
9212 Error(L: getToken().getLoc(),
9213 Msg: "empty " + Twine(TokenId) + " expression");
9214 return true;
9215 }
9216 if (CommaCount + 1 != Exprs.size()) {
9217 Error(L: getToken().getLoc(),
9218 Msg: "mismatch of commas in " + Twine(TokenId) + " expression");
9219 return true;
9220 }
9221 Res = AMDGPUMCExpr::create(Kind: VK, Args: Exprs, Ctx&: getContext());
9222 return false;
9223 }
9224 const MCExpr *Expr;
9225 if (getParser().parseExpression(Res&: Expr, EndLoc))
9226 return true;
9227 Exprs.push_back(Elt: Expr);
9228 bool LastTokenWasComma = trySkipToken(Kind: AsmToken::Comma);
9229 if (LastTokenWasComma)
9230 CommaCount++;
9231 if (!LastTokenWasComma && !isToken(Kind: AsmToken::RParen)) {
9232 Error(L: getToken().getLoc(),
9233 Msg: "unexpected token in " + Twine(TokenId) + " expression");
9234 return true;
9235 }
9236 }
9237 }
9238 }
9239 return getParser().parsePrimaryExpr(Res, EndLoc, TypeInfo: nullptr);
9240}
9241
9242ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9243 StringRef Name = getTokenStr();
9244 if (Name == "mul") {
9245 return parseIntWithPrefix(Prefix: "mul", Operands,
9246 ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodMul);
9247 }
9248
9249 if (Name == "div") {
9250 return parseIntWithPrefix(Prefix: "div", Operands,
9251 ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodDiv);
9252 }
9253
9254 return ParseStatus::NoMatch;
9255}
9256
9257// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9258// the number of src operands present, then copies that bit into src0_modifiers.
9259static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9260 int Opc = Inst.getOpcode();
9261 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9262 if (OpSelIdx == -1)
9263 return;
9264
9265 int SrcNum;
9266 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9267 AMDGPU::OpName::src2};
9268 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: Ops[SrcNum]);
9269 ++SrcNum)
9270 ;
9271 assert(SrcNum > 0);
9272
9273 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9274
9275 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst);
9276 if (DstIdx == -1)
9277 return;
9278
9279 const MCOperand &DstOp = Inst.getOperand(i: DstIdx);
9280 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0_modifiers);
9281 uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm();
9282 if (DstOp.isReg() &&
9283 MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: DstOp.getReg())) {
9284 if (AMDGPU::isHi16Reg(Reg: DstOp.getReg(), MRI))
9285 ModVal |= SISrcMods::DST_OP_SEL;
9286 } else {
9287 if ((OpSel & (1 << SrcNum)) != 0)
9288 ModVal |= SISrcMods::DST_OP_SEL;
9289 }
9290 Inst.getOperand(i: ModIdx).setImm(ModVal);
9291}
9292
9293void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9294 const OperandVector &Operands) {
9295 cvtVOP3P(Inst, Operands);
9296 cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI());
9297}
9298
9299void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9300 OptionalImmIndexMap &OptionalIdx) {
9301 cvtVOP3P(Inst, Operands, OptionalIdx);
9302 cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI());
9303}
9304
9305static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9306 return
9307 // 1. This operand is input modifiers
9308 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9309 // 2. This is not last operand
9310 && Desc.NumOperands > (OpNum + 1)
9311 // 3. Next operand is register class
9312 && Desc.operands()[OpNum + 1].RegClass != -1
9313 // 4. Next register is not tied to any other operand
9314 && Desc.getOperandConstraint(OpNum: OpNum + 1,
9315 Constraint: MCOI::OperandConstraint::TIED_TO) == -1;
9316}
9317
9318void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9319 unsigned Opc = Inst.getOpcode();
9320 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9321 AMDGPU::OpName::src2};
9322 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9323 AMDGPU::OpName::src1_modifiers,
9324 AMDGPU::OpName::src2_modifiers};
9325 for (int J = 0; J < 3; ++J) {
9326 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: Ops[J]);
9327 if (OpIdx == -1)
9328 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9329 // no src1. So continue instead of break.
9330 continue;
9331
9332 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
9333 uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm();
9334
9335 if ((OpSel & (1 << J)) != 0)
9336 ModVal |= SISrcMods::OP_SEL_0;
9337 // op_sel[3] is encoded in src0_modifiers.
9338 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9339 ModVal |= SISrcMods::DST_OP_SEL;
9340
9341 Inst.getOperand(i: ModIdx).setImm(ModVal);
9342 }
9343}
9344
9345void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9346{
9347 OptionalImmIndexMap OptionalIdx;
9348 unsigned Opc = Inst.getOpcode();
9349
9350 unsigned I = 1;
9351 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9352 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9353 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
9354 }
9355
9356 for (unsigned E = Operands.size(); I != E; ++I) {
9357 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9358 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9359 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9360 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9361 Op.isInterpAttrChan()) {
9362 Inst.addOperand(Op: MCOperand::createImm(Val: Op.getImm()));
9363 } else if (Op.isImmModifier()) {
9364 OptionalIdx[Op.getImmTy()] = I;
9365 } else {
9366 llvm_unreachable("unhandled operand type");
9367 }
9368 }
9369
9370 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::high))
9371 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9372 ImmT: AMDGPUOperand::ImmTyHigh);
9373
9374 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9375 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9376 ImmT: AMDGPUOperand::ImmTyClamp);
9377
9378 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
9379 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9380 ImmT: AMDGPUOperand::ImmTyOModSI);
9381
9382 // Some v_interp instructions use op_sel[3] for dst.
9383 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
9384 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9385 ImmT: AMDGPUOperand::ImmTyOpSel);
9386 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9387 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9388
9389 cvtOpSelHelper(Inst, OpSel);
9390 }
9391}
9392
9393void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9394{
9395 OptionalImmIndexMap OptionalIdx;
9396 unsigned Opc = Inst.getOpcode();
9397
9398 unsigned I = 1;
9399 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9400 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9401 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
9402 }
9403
9404 for (unsigned E = Operands.size(); I != E; ++I) {
9405 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9406 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9407 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9408 } else if (Op.isImmModifier()) {
9409 OptionalIdx[Op.getImmTy()] = I;
9410 } else {
9411 llvm_unreachable("unhandled operand type");
9412 }
9413 }
9414
9415 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyClamp);
9416
9417 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9418 if (OpSelIdx != -1)
9419 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
9420
9421 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyWaitEXP);
9422
9423 if (OpSelIdx == -1)
9424 return;
9425
9426 unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9427 cvtOpSelHelper(Inst, OpSel);
9428}
9429
9430void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9431 const OperandVector &Operands) {
9432 OptionalImmIndexMap OptionalIdx;
9433 unsigned Opc = Inst.getOpcode();
9434 unsigned I = 1;
9435 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::cbsz);
9436
9437 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
9438
9439 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9440 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, N: 1);
9441
9442 for (unsigned E = Operands.size(); I != E; ++I) {
9443 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9444 int NumOperands = Inst.getNumOperands();
9445 // The order of operands in MCInst and parsed operands are different.
9446 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9447 // indices for parsing scale values correctly.
9448 if (NumOperands == CbszOpIdx) {
9449 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
9450 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
9451 }
9452 if (isRegOrImmWithInputMods(Desc, OpNum: NumOperands)) {
9453 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9454 } else if (Op.isImmModifier()) {
9455 OptionalIdx[Op.getImmTy()] = I;
9456 } else {
9457 Op.addRegOrImmOperands(Inst, N: 1);
9458 }
9459 }
9460
9461 // Insert CBSZ and BLGP operands for F8F6F4 variants
9462 auto CbszIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyCBSZ);
9463 if (CbszIdx != OptionalIdx.end()) {
9464 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9465 Inst.getOperand(i: CbszOpIdx).setImm(CbszVal);
9466 }
9467
9468 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
9469 auto BlgpIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyBLGP);
9470 if (BlgpIdx != OptionalIdx.end()) {
9471 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9472 Inst.getOperand(i: BlgpOpIdx).setImm(BlgpVal);
9473 }
9474
9475 // Add dummy src_modifiers
9476 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
9477 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
9478
9479 // Handle op_sel fields
9480
9481 unsigned OpSel = 0;
9482 auto OpselIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyOpSel);
9483 if (OpselIdx != OptionalIdx.end()) {
9484 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9485 .getImm();
9486 }
9487
9488 unsigned OpSelHi = 0;
9489 auto OpselHiIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyOpSelHi);
9490 if (OpselHiIdx != OptionalIdx.end()) {
9491 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9492 .getImm();
9493 }
9494 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9495 AMDGPU::OpName::src1_modifiers};
9496
9497 for (unsigned J = 0; J < 2; ++J) {
9498 unsigned ModVal = 0;
9499 if (OpSel & (1 << J))
9500 ModVal |= SISrcMods::OP_SEL_0;
9501 if (OpSelHi & (1 << J))
9502 ModVal |= SISrcMods::OP_SEL_1;
9503
9504 const int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
9505 Inst.getOperand(i: ModIdx).setImm(ModVal);
9506 }
9507}
9508
9509void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9510 OptionalImmIndexMap &OptionalIdx) {
9511 unsigned Opc = Inst.getOpcode();
9512
9513 unsigned I = 1;
9514 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9515 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9516 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
9517 }
9518
9519 for (unsigned E = Operands.size(); I != E; ++I) {
9520 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9521 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9522 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9523 } else if (Op.isImmModifier()) {
9524 OptionalIdx[Op.getImmTy()] = I;
9525 } else {
9526 Op.addRegOrImmOperands(Inst, N: 1);
9527 }
9528 }
9529
9530 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::scale_sel))
9531 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9532 ImmT: AMDGPUOperand::ImmTyScaleSel);
9533
9534 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9535 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9536 ImmT: AMDGPUOperand::ImmTyClamp);
9537
9538 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) {
9539 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in))
9540 Inst.addOperand(Op: Inst.getOperand(i: 0));
9541 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9542 ImmT: AMDGPUOperand::ImmTyByteSel);
9543 }
9544
9545 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
9546 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9547 ImmT: AMDGPUOperand::ImmTyOModSI);
9548
9549 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9550 // it has src2 register operand that is tied to dst operand
9551 // we don't allow modifiers for this operand in assembler so src2_modifiers
9552 // should be 0.
9553 if (isMAC(Opc)) {
9554 auto *it = Inst.begin();
9555 std::advance(i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2_modifiers));
9556 it = Inst.insert(I: it, Op: MCOperand::createImm(Val: 0)); // no modifiers for src2
9557 ++it;
9558 // Copy the operand to ensure it's not invalidated when Inst grows.
9559 Inst.insert(I: it, Op: MCOperand(Inst.getOperand(i: 0))); // src2 = dst
9560 }
9561}
9562
9563void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9564 OptionalImmIndexMap OptionalIdx;
9565 cvtVOP3(Inst, Operands, OptionalIdx);
9566}
9567
9568void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9569 OptionalImmIndexMap &OptIdx) {
9570 const int Opc = Inst.getOpcode();
9571 const MCInstrDesc &Desc = MII.get(Opcode: Opc);
9572
9573 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9574
9575 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9576 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9577 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9578 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9579 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9580 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9581 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9582 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9583 Inst.addOperand(Op: MCOperand::createImm(Val: 0)); // Placeholder for src2_mods
9584 Inst.addOperand(Op: Inst.getOperand(i: 0));
9585 }
9586
9587 // Adding vdst_in operand is already covered for these DPP instructions in
9588 // cvtVOP3DPP.
9589 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in) &&
9590 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx11 ||
9591 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx11 ||
9592 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx11 ||
9593 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx11 ||
9594 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx11 ||
9595 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx11 ||
9596 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx11 ||
9597 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx11 ||
9598 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx11 ||
9599 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx11 ||
9600 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx11 ||
9601 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx11 ||
9602 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9603 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9604 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9605 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9606 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9607 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9608 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9609 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9610 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9611 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9612 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9613 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9614 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9615 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9616 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9617 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9618 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9619 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9620 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9621 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9622 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9623 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9624 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9625 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9626 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9627 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9628 Inst.addOperand(Op: Inst.getOperand(i: 0));
9629 }
9630
9631 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::bitop3);
9632 if (BitOp3Idx != -1) {
9633 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyBitOp3);
9634 }
9635
9636 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9637 // instruction, and then figure out where to actually put the modifiers
9638
9639 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9640 if (OpSelIdx != -1) {
9641 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
9642 }
9643
9644 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
9645 if (OpSelHiIdx != -1) {
9646 int DefaultVal = IsPacked ? -1 : 0;
9647 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSelHi,
9648 Default: DefaultVal);
9649 }
9650
9651 int MatrixAFMTIdx =
9652 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_a_fmt);
9653 if (MatrixAFMTIdx != -1) {
9654 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9655 ImmT: AMDGPUOperand::ImmTyMatrixAFMT, Default: 0);
9656 }
9657
9658 int MatrixBFMTIdx =
9659 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_b_fmt);
9660 if (MatrixBFMTIdx != -1) {
9661 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9662 ImmT: AMDGPUOperand::ImmTyMatrixBFMT, Default: 0);
9663 }
9664
9665 int MatrixAScaleIdx =
9666 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_a_scale);
9667 if (MatrixAScaleIdx != -1) {
9668 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9669 ImmT: AMDGPUOperand::ImmTyMatrixAScale, Default: 0);
9670 }
9671
9672 int MatrixBScaleIdx =
9673 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_b_scale);
9674 if (MatrixBScaleIdx != -1) {
9675 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9676 ImmT: AMDGPUOperand::ImmTyMatrixBScale, Default: 0);
9677 }
9678
9679 int MatrixAScaleFmtIdx =
9680 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_a_scale_fmt);
9681 if (MatrixAScaleFmtIdx != -1) {
9682 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9683 ImmT: AMDGPUOperand::ImmTyMatrixAScaleFmt, Default: 0);
9684 }
9685
9686 int MatrixBScaleFmtIdx =
9687 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_b_scale_fmt);
9688 if (MatrixBScaleFmtIdx != -1) {
9689 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9690 ImmT: AMDGPUOperand::ImmTyMatrixBScaleFmt, Default: 0);
9691 }
9692
9693 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::matrix_a_reuse))
9694 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9695 ImmT: AMDGPUOperand::ImmTyMatrixAReuse, Default: 0);
9696
9697 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::matrix_b_reuse))
9698 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9699 ImmT: AMDGPUOperand::ImmTyMatrixBReuse, Default: 0);
9700
9701 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::neg_lo);
9702 if (NegLoIdx != -1)
9703 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegLo);
9704
9705 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::neg_hi);
9706 if (NegHiIdx != -1)
9707 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegHi);
9708
9709 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9710 AMDGPU::OpName::src2};
9711 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9712 AMDGPU::OpName::src1_modifiers,
9713 AMDGPU::OpName::src2_modifiers};
9714
9715 unsigned OpSel = 0;
9716 unsigned OpSelHi = 0;
9717 unsigned NegLo = 0;
9718 unsigned NegHi = 0;
9719
9720 if (OpSelIdx != -1)
9721 OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9722
9723 if (OpSelHiIdx != -1)
9724 OpSelHi = Inst.getOperand(i: OpSelHiIdx).getImm();
9725
9726 if (NegLoIdx != -1)
9727 NegLo = Inst.getOperand(i: NegLoIdx).getImm();
9728
9729 if (NegHiIdx != -1)
9730 NegHi = Inst.getOperand(i: NegHiIdx).getImm();
9731
9732 for (int J = 0; J < 3; ++J) {
9733 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: Ops[J]);
9734 if (OpIdx == -1)
9735 break;
9736
9737 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
9738
9739 if (ModIdx == -1)
9740 continue;
9741
9742 uint32_t ModVal = 0;
9743
9744 const MCOperand &SrcOp = Inst.getOperand(i: OpIdx);
9745 if (SrcOp.isReg() && getMRI()
9746 ->getRegClass(i: AMDGPU::VGPR_16RegClassID)
9747 .contains(Reg: SrcOp.getReg())) {
9748 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Reg: SrcOp.getReg(), MRI: *getMRI());
9749 if (VGPRSuffixIsHi)
9750 ModVal |= SISrcMods::OP_SEL_0;
9751 } else {
9752 if ((OpSel & (1 << J)) != 0)
9753 ModVal |= SISrcMods::OP_SEL_0;
9754 }
9755
9756 if ((OpSelHi & (1 << J)) != 0)
9757 ModVal |= SISrcMods::OP_SEL_1;
9758
9759 if ((NegLo & (1 << J)) != 0)
9760 ModVal |= SISrcMods::NEG;
9761
9762 if ((NegHi & (1 << J)) != 0)
9763 ModVal |= SISrcMods::NEG_HI;
9764
9765 Inst.getOperand(i: ModIdx).setImm(Inst.getOperand(i: ModIdx).getImm() | ModVal);
9766 }
9767}
9768
9769void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9770 OptionalImmIndexMap OptIdx;
9771 cvtVOP3(Inst, Operands, OptionalIdx&: OptIdx);
9772 cvtVOP3P(Inst, Operands, OptIdx);
9773}
9774
9775static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
9776 unsigned i, unsigned Opc,
9777 AMDGPU::OpName OpName) {
9778 if (AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName) != -1)
9779 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9780 else
9781 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, N: 1);
9782}
9783
9784void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9785 unsigned Opc = Inst.getOpcode();
9786
9787 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, N: 1);
9788 addSrcModifiersAndSrc(Inst, Operands, i: 2, Opc, OpName: AMDGPU::OpName::src0_modifiers);
9789 addSrcModifiersAndSrc(Inst, Operands, i: 3, Opc, OpName: AMDGPU::OpName::src1_modifiers);
9790 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, N: 1); // srcTiedDef
9791 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, N: 1); // src2
9792
9793 OptionalImmIndexMap OptIdx;
9794 for (unsigned i = 5; i < Operands.size(); ++i) {
9795 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9796 OptIdx[Op.getImmTy()] = i;
9797 }
9798
9799 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_8bit))
9800 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9801 ImmT: AMDGPUOperand::ImmTyIndexKey8bit);
9802
9803 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_16bit))
9804 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9805 ImmT: AMDGPUOperand::ImmTyIndexKey16bit);
9806
9807 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_32bit))
9808 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9809 ImmT: AMDGPUOperand::ImmTyIndexKey32bit);
9810
9811 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9812 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyClamp);
9813
9814 cvtVOP3P(Inst, Operands, OptIdx);
9815}
9816
9817//===----------------------------------------------------------------------===//
9818// VOPD
9819//===----------------------------------------------------------------------===//
9820
9821ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9822 if (!hasVOPD(STI: getSTI()))
9823 return ParseStatus::NoMatch;
9824
9825 if (isToken(Kind: AsmToken::Colon) && peekToken(ShouldSkipSpace: false).is(K: AsmToken::Colon)) {
9826 SMLoc S = getLoc();
9827 lex();
9828 lex();
9829 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "::", Loc: S));
9830 SMLoc OpYLoc = getLoc();
9831 StringRef OpYName;
9832 if (isToken(Kind: AsmToken::Identifier) && !Parser.parseIdentifier(Res&: OpYName)) {
9833 Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: OpYName, Loc: OpYLoc));
9834 return ParseStatus::Success;
9835 }
9836 return Error(L: OpYLoc, Msg: "expected a VOPDY instruction after ::");
9837 }
9838 return ParseStatus::NoMatch;
9839}
9840
9841// Create VOPD MCInst operands using parsed assembler operands.
9842void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9843 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9844
9845 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9846 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9847 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9848 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
9849 return;
9850 }
9851 if (Op.isReg()) {
9852 Op.addRegOperands(Inst, N: 1);
9853 return;
9854 }
9855 if (Op.isImm()) {
9856 Op.addImmOperands(Inst, N: 1);
9857 return;
9858 }
9859 llvm_unreachable("Unhandled operand type in cvtVOPD");
9860 };
9861
9862 const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Inst.getOpcode(), InstrInfo: &MII);
9863
9864 // MCInst operands are ordered as follows:
9865 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9866
9867 for (auto CompIdx : VOPD::COMPONENTS) {
9868 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9869 }
9870
9871 for (auto CompIdx : VOPD::COMPONENTS) {
9872 const auto &CInfo = InstInfo[CompIdx];
9873 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9874 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9875 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9876 if (CInfo.hasSrc2Acc())
9877 addOp(CInfo.getIndexOfDstInParsedOperands());
9878 }
9879
9880 int BitOp3Idx =
9881 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::bitop3);
9882 if (BitOp3Idx != -1) {
9883 OptionalImmIndexMap OptIdx;
9884 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9885 if (Op.isImm())
9886 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9887
9888 addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyBitOp3);
9889 }
9890}
9891
9892//===----------------------------------------------------------------------===//
9893// dpp
9894//===----------------------------------------------------------------------===//
9895
9896bool AMDGPUOperand::isDPP8() const {
9897 return isImmTy(ImmT: ImmTyDPP8);
9898}
9899
9900bool AMDGPUOperand::isDPPCtrl() const {
9901 using namespace AMDGPU::DPP;
9902
9903 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(x: getImm());
9904 if (result) {
9905 int64_t Imm = getImm();
9906 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9907 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9908 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9909 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9910 (Imm == DppCtrl::WAVE_SHL1) ||
9911 (Imm == DppCtrl::WAVE_ROL1) ||
9912 (Imm == DppCtrl::WAVE_SHR1) ||
9913 (Imm == DppCtrl::WAVE_ROR1) ||
9914 (Imm == DppCtrl::ROW_MIRROR) ||
9915 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9916 (Imm == DppCtrl::BCAST15) ||
9917 (Imm == DppCtrl::BCAST31) ||
9918 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9919 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9920 }
9921 return false;
9922}
9923
9924//===----------------------------------------------------------------------===//
9925// mAI
9926//===----------------------------------------------------------------------===//
9927
9928bool AMDGPUOperand::isBLGP() const {
9929 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(x: getImm());
9930}
9931
9932bool AMDGPUOperand::isS16Imm() const {
9933 return isImmLiteral() && (isInt<16>(x: getImm()) || isUInt<16>(x: getImm()));
9934}
9935
9936bool AMDGPUOperand::isU16Imm() const {
9937 return isImmLiteral() && isUInt<16>(x: getImm());
9938}
9939
9940//===----------------------------------------------------------------------===//
9941// dim
9942//===----------------------------------------------------------------------===//
9943
9944bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9945 // We want to allow "dim:1D" etc.,
9946 // but the initial 1 is tokenized as an integer.
9947 std::string Token;
9948 if (isToken(Kind: AsmToken::Integer)) {
9949 SMLoc Loc = getToken().getEndLoc();
9950 Token = std::string(getTokenStr());
9951 lex();
9952 if (getLoc() != Loc)
9953 return false;
9954 }
9955
9956 StringRef Suffix;
9957 if (!parseId(Val&: Suffix))
9958 return false;
9959 Token += Suffix;
9960
9961 StringRef DimId = Token;
9962 DimId.consume_front(Prefix: "SQ_RSRC_IMG_");
9963
9964 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(AsmSuffix: DimId);
9965 if (!DimInfo)
9966 return false;
9967
9968 Encoding = DimInfo->Encoding;
9969 return true;
9970}
9971
9972ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9973 if (!isGFX10Plus())
9974 return ParseStatus::NoMatch;
9975
9976 SMLoc S = getLoc();
9977
9978 if (!trySkipId(Id: "dim", Kind: AsmToken::Colon))
9979 return ParseStatus::NoMatch;
9980
9981 unsigned Encoding;
9982 SMLoc Loc = getLoc();
9983 if (!parseDimId(Encoding))
9984 return Error(L: Loc, Msg: "invalid dim value");
9985
9986 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Encoding, Loc: S,
9987 Type: AMDGPUOperand::ImmTyDim));
9988 return ParseStatus::Success;
9989}
9990
9991//===----------------------------------------------------------------------===//
9992// dpp
9993//===----------------------------------------------------------------------===//
9994
9995ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9996 SMLoc S = getLoc();
9997
9998 if (!isGFX10Plus() || !trySkipId(Id: "dpp8", Kind: AsmToken::Colon))
9999 return ParseStatus::NoMatch;
10000
10001 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
10002
10003 int64_t Sels[8];
10004
10005 if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket"))
10006 return ParseStatus::Failure;
10007
10008 for (size_t i = 0; i < 8; ++i) {
10009 if (i > 0 && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
10010 return ParseStatus::Failure;
10011
10012 SMLoc Loc = getLoc();
10013 if (getParser().parseAbsoluteExpression(Res&: Sels[i]))
10014 return ParseStatus::Failure;
10015 if (0 > Sels[i] || 7 < Sels[i])
10016 return Error(L: Loc, Msg: "expected a 3-bit value");
10017 }
10018
10019 if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
10020 return ParseStatus::Failure;
10021
10022 unsigned DPP8 = 0;
10023 for (size_t i = 0; i < 8; ++i)
10024 DPP8 |= (Sels[i] << (i * 3));
10025
10026 Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DPP8, Loc: S, Type: AMDGPUOperand::ImmTyDPP8));
10027 return ParseStatus::Success;
10028}
10029
10030bool
10031AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10032 const OperandVector &Operands) {
10033 if (Ctrl == "row_newbcast")
10034 return isGFX90A();
10035
10036 if (Ctrl == "row_share" ||
10037 Ctrl == "row_xmask")
10038 return isGFX10Plus();
10039
10040 if (Ctrl == "wave_shl" ||
10041 Ctrl == "wave_shr" ||
10042 Ctrl == "wave_rol" ||
10043 Ctrl == "wave_ror" ||
10044 Ctrl == "row_bcast")
10045 return isVI() || isGFX9();
10046
10047 return Ctrl == "row_mirror" ||
10048 Ctrl == "row_half_mirror" ||
10049 Ctrl == "quad_perm" ||
10050 Ctrl == "row_shl" ||
10051 Ctrl == "row_shr" ||
10052 Ctrl == "row_ror";
10053}
10054
10055int64_t
10056AMDGPUAsmParser::parseDPPCtrlPerm() {
10057 // quad_perm:[%d,%d,%d,%d]
10058
10059 if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket"))
10060 return -1;
10061
10062 int64_t Val = 0;
10063 for (int i = 0; i < 4; ++i) {
10064 if (i > 0 && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
10065 return -1;
10066
10067 int64_t Temp;
10068 SMLoc Loc = getLoc();
10069 if (getParser().parseAbsoluteExpression(Res&: Temp))
10070 return -1;
10071 if (Temp < 0 || Temp > 3) {
10072 Error(L: Loc, Msg: "expected a 2-bit value");
10073 return -1;
10074 }
10075
10076 Val += (Temp << i * 2);
10077 }
10078
10079 if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
10080 return -1;
10081
10082 return Val;
10083}
10084
10085int64_t
10086AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10087 using namespace AMDGPU::DPP;
10088
10089 // sel:%d
10090
10091 int64_t Val;
10092 SMLoc Loc = getLoc();
10093
10094 if (getParser().parseAbsoluteExpression(Res&: Val))
10095 return -1;
10096
10097 struct DppCtrlCheck {
10098 int64_t Ctrl;
10099 int Lo;
10100 int Hi;
10101 };
10102
10103 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10104 .Case(S: "wave_shl", Value: {.Ctrl: DppCtrl::WAVE_SHL1, .Lo: 1, .Hi: 1})
10105 .Case(S: "wave_rol", Value: {.Ctrl: DppCtrl::WAVE_ROL1, .Lo: 1, .Hi: 1})
10106 .Case(S: "wave_shr", Value: {.Ctrl: DppCtrl::WAVE_SHR1, .Lo: 1, .Hi: 1})
10107 .Case(S: "wave_ror", Value: {.Ctrl: DppCtrl::WAVE_ROR1, .Lo: 1, .Hi: 1})
10108 .Case(S: "row_shl", Value: {.Ctrl: DppCtrl::ROW_SHL0, .Lo: 1, .Hi: 15})
10109 .Case(S: "row_shr", Value: {.Ctrl: DppCtrl::ROW_SHR0, .Lo: 1, .Hi: 15})
10110 .Case(S: "row_ror", Value: {.Ctrl: DppCtrl::ROW_ROR0, .Lo: 1, .Hi: 15})
10111 .Case(S: "row_share", Value: {.Ctrl: DppCtrl::ROW_SHARE_FIRST, .Lo: 0, .Hi: 15})
10112 .Case(S: "row_xmask", Value: {.Ctrl: DppCtrl::ROW_XMASK_FIRST, .Lo: 0, .Hi: 15})
10113 .Case(S: "row_newbcast", Value: {.Ctrl: DppCtrl::ROW_NEWBCAST_FIRST, .Lo: 0, .Hi: 15})
10114 .Default(Value: {.Ctrl: -1, .Lo: 0, .Hi: 0});
10115
10116 bool Valid;
10117 if (Check.Ctrl == -1) {
10118 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10119 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10120 } else {
10121 Valid = Check.Lo <= Val && Val <= Check.Hi;
10122 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10123 }
10124
10125 if (!Valid) {
10126 Error(L: Loc, Msg: Twine("invalid ", Ctrl) + Twine(" value"));
10127 return -1;
10128 }
10129
10130 return Val;
10131}
10132
10133ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10134 using namespace AMDGPU::DPP;
10135
10136 if (!isToken(Kind: AsmToken::Identifier) ||
10137 !isSupportedDPPCtrl(Ctrl: getTokenStr(), Operands))
10138 return ParseStatus::NoMatch;
10139
10140 SMLoc S = getLoc();
10141 int64_t Val = -1;
10142 StringRef Ctrl;
10143
10144 parseId(Val&: Ctrl);
10145
10146 if (Ctrl == "row_mirror") {
10147 Val = DppCtrl::ROW_MIRROR;
10148 } else if (Ctrl == "row_half_mirror") {
10149 Val = DppCtrl::ROW_HALF_MIRROR;
10150 } else {
10151 if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon")) {
10152 if (Ctrl == "quad_perm") {
10153 Val = parseDPPCtrlPerm();
10154 } else {
10155 Val = parseDPPCtrlSel(Ctrl);
10156 }
10157 }
10158 }
10159
10160 if (Val == -1)
10161 return ParseStatus::Failure;
10162
10163 Operands.push_back(
10164 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: AMDGPUOperand::ImmTyDppCtrl));
10165 return ParseStatus::Success;
10166}
10167
10168void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10169 bool IsDPP8) {
10170 OptionalImmIndexMap OptionalIdx;
10171 unsigned Opc = Inst.getOpcode();
10172 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
10173
10174 // MAC instructions are special because they have 'old'
10175 // operand which is not tied to dst (but assumed to be).
10176 // They also have dummy unused src2_modifiers.
10177 int OldIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::old);
10178 int Src2ModIdx =
10179 AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2_modifiers);
10180 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10181 Desc.getOperandConstraint(OpNum: OldIdx, Constraint: MCOI::TIED_TO) == -1;
10182
10183 unsigned I = 1;
10184 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10185 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
10186 }
10187
10188 int Fi = 0;
10189 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst_in);
10190 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10191 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10192 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10193 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10194
10195 for (unsigned E = Operands.size(); I != E; ++I) {
10196
10197 if (IsMAC) {
10198 int NumOperands = Inst.getNumOperands();
10199 if (OldIdx == NumOperands) {
10200 // Handle old operand
10201 constexpr int DST_IDX = 0;
10202 Inst.addOperand(Op: Inst.getOperand(i: DST_IDX));
10203 } else if (Src2ModIdx == NumOperands) {
10204 // Add unused dummy src2_modifiers
10205 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
10206 }
10207 }
10208
10209 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10210 Inst.addOperand(Op: Inst.getOperand(i: 0));
10211 }
10212
10213 if (IsVOP3CvtSrDpp) {
10214 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10215 Inst.addOperand(Op: MCOperand::createImm(Val: 0));
10216 Inst.addOperand(Op: MCOperand::createReg(Reg: MCRegister()));
10217 }
10218 }
10219
10220 auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(),
10221 Constraint: MCOI::TIED_TO);
10222 if (TiedTo != -1) {
10223 assert((unsigned)TiedTo < Inst.getNumOperands());
10224 // handle tied old or src2 for MAC instructions
10225 Inst.addOperand(Op: Inst.getOperand(i: TiedTo));
10226 }
10227 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10228 // Add the register arguments
10229 if (IsDPP8 && Op.isDppFI()) {
10230 Fi = Op.getImm();
10231 } else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10232 Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2);
10233 } else if (Op.isReg()) {
10234 Op.addRegOperands(Inst, N: 1);
10235 } else if (Op.isImm() &&
10236 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10237 Op.addImmOperands(Inst, N: 1);
10238 } else if (Op.isImm()) {
10239 OptionalIdx[Op.getImmTy()] = I;
10240 } else {
10241 llvm_unreachable("unhandled operand type");
10242 }
10243 }
10244
10245 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10246 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10247 ImmT: AMDGPUOperand::ImmTyClamp);
10248
10249 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) {
10250 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10251 Inst.addOperand(Op: Inst.getOperand(i: 0));
10252 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10253 ImmT: AMDGPUOperand::ImmTyByteSel);
10254 }
10255
10256 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
10257 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI);
10258
10259 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10260 cvtVOP3P(Inst, Operands, OptIdx&: OptionalIdx);
10261 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10262 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10263 else if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
10264 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
10265 }
10266
10267 if (IsDPP8) {
10268 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDPP8);
10269 using namespace llvm::AMDGPU::DPP;
10270 Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0));
10271 } else {
10272 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppCtrl, Default: 0xe4);
10273 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: 0xf);
10274 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: 0xf);
10275 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl);
10276
10277 if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi))
10278 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10279 ImmT: AMDGPUOperand::ImmTyDppFI);
10280 }
10281}
10282
10283void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10284 OptionalImmIndexMap OptionalIdx;
10285
10286 unsigned I = 1;
10287 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
10288 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10289 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
10290 }
10291
10292 int Fi = 0;
10293 for (unsigned E = Operands.size(); I != E; ++I) {
10294 auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(),
10295 Constraint: MCOI::TIED_TO);
10296 if (TiedTo != -1) {
10297 assert((unsigned)TiedTo < Inst.getNumOperands());
10298 // handle tied old or src2 for MAC instructions
10299 Inst.addOperand(Op: Inst.getOperand(i: TiedTo));
10300 }
10301 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10302 // Add the register arguments
10303 if (Op.isReg() && validateVccOperand(Reg: Op.getReg())) {
10304 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10305 // Skip it.
10306 continue;
10307 }
10308
10309 if (IsDPP8) {
10310 if (Op.isDPP8()) {
10311 Op.addImmOperands(Inst, N: 1);
10312 } else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10313 Op.addRegWithFPInputModsOperands(Inst, N: 2);
10314 } else if (Op.isDppFI()) {
10315 Fi = Op.getImm();
10316 } else if (Op.isReg()) {
10317 Op.addRegOperands(Inst, N: 1);
10318 } else {
10319 llvm_unreachable("Invalid operand type");
10320 }
10321 } else {
10322 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10323 Op.addRegWithFPInputModsOperands(Inst, N: 2);
10324 } else if (Op.isReg()) {
10325 Op.addRegOperands(Inst, N: 1);
10326 } else if (Op.isDPPCtrl()) {
10327 Op.addImmOperands(Inst, N: 1);
10328 } else if (Op.isImm()) {
10329 // Handle optional arguments
10330 OptionalIdx[Op.getImmTy()] = I;
10331 } else {
10332 llvm_unreachable("Invalid operand type");
10333 }
10334 }
10335 }
10336
10337 if (IsDPP8) {
10338 using namespace llvm::AMDGPU::DPP;
10339 Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0));
10340 } else {
10341 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: 0xf);
10342 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: 0xf);
10343 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl);
10344 if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi)) {
10345 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10346 ImmT: AMDGPUOperand::ImmTyDppFI);
10347 }
10348 }
10349}
10350
10351//===----------------------------------------------------------------------===//
10352// sdwa
10353//===----------------------------------------------------------------------===//
10354
10355ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10356 StringRef Prefix,
10357 AMDGPUOperand::ImmTy Type) {
10358 return parseStringOrIntWithPrefix(
10359 Operands, Name: Prefix,
10360 Ids: {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10361 Type);
10362}
10363
10364ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10365 return parseStringOrIntWithPrefix(
10366 Operands, Name: "dst_unused", Ids: {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10367 Type: AMDGPUOperand::ImmTySDWADstUnused);
10368}
10369
10370void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10371 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP1);
10372}
10373
10374void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10375 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2);
10376}
10377
10378void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10379 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: true, SkipSrcVcc: true);
10380}
10381
10382void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10383 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: false, SkipSrcVcc: true);
10384}
10385
10386void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10387 cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOPC, SkipDstVcc: isVI());
10388}
10389
10390void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10391 uint64_t BasicInstType,
10392 bool SkipDstVcc,
10393 bool SkipSrcVcc) {
10394 using namespace llvm::AMDGPU::SDWA;
10395
10396 OptionalImmIndexMap OptionalIdx;
10397 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10398 bool SkippedVcc = false;
10399
10400 unsigned I = 1;
10401 const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
10402 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10403 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1);
10404 }
10405
10406 for (unsigned E = Operands.size(); I != E; ++I) {
10407 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10408 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10409 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10410 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10411 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10412 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10413 // Skip VCC only if we didn't skip it on previous iteration.
10414 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10415 if (BasicInstType == SIInstrFlags::VOP2 &&
10416 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10417 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10418 SkippedVcc = true;
10419 continue;
10420 }
10421 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10422 SkippedVcc = true;
10423 continue;
10424 }
10425 }
10426 if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10427 Op.addRegOrImmWithInputModsOperands(Inst, N: 2);
10428 } else if (Op.isImm()) {
10429 // Handle optional arguments
10430 OptionalIdx[Op.getImmTy()] = I;
10431 } else {
10432 llvm_unreachable("Invalid operand type");
10433 }
10434 SkippedVcc = false;
10435 }
10436
10437 const unsigned Opc = Inst.getOpcode();
10438 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10439 Opc != AMDGPU::V_NOP_sdwa_vi) {
10440 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10441 switch (BasicInstType) {
10442 case SIInstrFlags::VOP1:
10443 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
10444 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10445 ImmT: AMDGPUOperand::ImmTyClamp, Default: 0);
10446
10447 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
10448 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10449 ImmT: AMDGPUOperand::ImmTyOModSI, Default: 0);
10450
10451 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_sel))
10452 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10453 ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD);
10454
10455 if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_unused))
10456 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10457 ImmT: AMDGPUOperand::ImmTySDWADstUnused,
10458 Default: DstUnused::UNUSED_PRESERVE);
10459
10460 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
10461 break;
10462
10463 case SIInstrFlags::VOP2:
10464 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10465 ImmT: AMDGPUOperand::ImmTyClamp, Default: 0);
10466
10467 if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::omod))
10468 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI, Default: 0);
10469
10470 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD);
10471 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstUnused, Default: DstUnused::UNUSED_PRESERVE);
10472 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
10473 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD);
10474 break;
10475
10476 case SIInstrFlags::VOPC:
10477 if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::clamp))
10478 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10479 ImmT: AMDGPUOperand::ImmTyClamp, Default: 0);
10480 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
10481 addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD);
10482 break;
10483
10484 default:
10485 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10486 }
10487 }
10488
10489 // special case v_mac_{f16, f32}:
10490 // it has src2 register operand that is tied to dst operand
10491 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10492 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10493 auto *it = Inst.begin();
10494 std::advance(
10495 i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::src2));
10496 Inst.insert(I: it, Op: Inst.getOperand(i: 0)); // src2 = dst
10497 }
10498}
10499
10500/// Force static initialization.
10501extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10502LLVMInitializeAMDGPUAsmParser() {
10503 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
10504 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
10505}
10506
10507#define GET_MATCHER_IMPLEMENTATION
10508#define GET_MNEMONIC_SPELL_CHECKER
10509#define GET_MNEMONIC_CHECKER
10510#include "AMDGPUGenAsmMatcher.inc"
10511
10512ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10513 unsigned MCK) {
10514 switch (MCK) {
10515 case MCK_addr64:
10516 return parseTokenOp(Name: "addr64", Operands);
10517 case MCK_done:
10518 return parseNamedBit(Name: "done", Operands, ImmTy: AMDGPUOperand::ImmTyDone, IgnoreNegative: true);
10519 case MCK_idxen:
10520 return parseTokenOp(Name: "idxen", Operands);
10521 case MCK_lds:
10522 return parseTokenOp(Name: "lds", Operands);
10523 case MCK_offen:
10524 return parseTokenOp(Name: "offen", Operands);
10525 case MCK_off:
10526 return parseTokenOp(Name: "off", Operands);
10527 case MCK_row_95_en:
10528 return parseNamedBit(Name: "row_en", Operands, ImmTy: AMDGPUOperand::ImmTyRowEn, IgnoreNegative: true);
10529 case MCK_gds:
10530 return parseNamedBit(Name: "gds", Operands, ImmTy: AMDGPUOperand::ImmTyGDS);
10531 case MCK_tfe:
10532 return parseNamedBit(Name: "tfe", Operands, ImmTy: AMDGPUOperand::ImmTyTFE);
10533 }
10534 return tryCustomParseOperand(Operands, MCK);
10535}
10536
10537// This function should be defined after auto-generated include so that we have
10538// MatchClassKind enum defined
10539unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10540 unsigned Kind) {
10541 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10542 // But MatchInstructionImpl() expects to meet token and fails to validate
10543 // operand. This method checks if we are given immediate operand but expect to
10544 // get corresponding token.
10545 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10546 switch (Kind) {
10547 case MCK_addr64:
10548 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10549 case MCK_gds:
10550 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10551 case MCK_lds:
10552 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10553 case MCK_idxen:
10554 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10555 case MCK_offen:
10556 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10557 case MCK_tfe:
10558 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10559 case MCK_done:
10560 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10561 case MCK_row_95_en:
10562 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10563 case MCK_SSrc_b32:
10564 // When operands have expression values, they will return true for isToken,
10565 // because it is not possible to distinguish between a token and an
10566 // expression at parse time. MatchInstructionImpl() will always try to
10567 // match an operand as a token, when isToken returns true, and when the
10568 // name of the expression is not a valid token, the match will fail,
10569 // so we need to handle it here.
10570 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10571 case MCK_SSrc_f32:
10572 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10573 case MCK_SOPPBrTarget:
10574 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10575 case MCK_VReg32OrOff:
10576 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10577 case MCK_InterpSlot:
10578 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10579 case MCK_InterpAttr:
10580 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10581 case MCK_InterpAttrChan:
10582 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10583 case MCK_SReg_64:
10584 case MCK_SReg_64_XEXEC:
10585 // Null is defined as a 32-bit register but
10586 // it should also be enabled with 64-bit operands or larger.
10587 // The following code enables it for SReg_64 and larger operands
10588 // used as source and destination. Remaining source
10589 // operands are handled in isInlinableImm.
10590 case MCK_SReg_96:
10591 case MCK_SReg_128:
10592 case MCK_SReg_256:
10593 case MCK_SReg_512:
10594 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10595 default:
10596 return Match_InvalidOperand;
10597 }
10598}
10599
10600//===----------------------------------------------------------------------===//
10601// endpgm
10602//===----------------------------------------------------------------------===//
10603
10604ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10605 SMLoc S = getLoc();
10606 int64_t Imm = 0;
10607
10608 if (!parseExpr(Imm)) {
10609 // The operand is optional, if not present default to 0
10610 Imm = 0;
10611 }
10612
10613 if (!isUInt<16>(x: Imm))
10614 return Error(L: S, Msg: "expected a 16-bit value");
10615
10616 Operands.push_back(
10617 Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyEndpgm));
10618 return ParseStatus::Success;
10619}
10620
10621bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmT: ImmTyEndpgm); }
10622
10623//===----------------------------------------------------------------------===//
10624// Split Barrier
10625//===----------------------------------------------------------------------===//
10626
10627bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(type: MVT::i32); }
10628