AMDGPUAsmParser.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp]

1	//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "AMDKernelCodeT.h"
10	#include "MCTargetDesc/AMDGPUMCExpr.h"
11	#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
12	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13	#include "MCTargetDesc/AMDGPUTargetStreamer.h"
14	#include "SIDefines.h"
15	#include "SIInstrInfo.h"
16	#include "SIRegisterInfo.h"
17	#include "TargetInfo/AMDGPUTargetInfo.h"
18	#include "Utils/AMDGPUAsmUtils.h"
19	#include "Utils/AMDGPUBaseInfo.h"
20	#include "Utils/AMDKernelCodeTUtils.h"
21	#include "llvm/ADT/APFloat.h"
22	#include "llvm/ADT/SmallBitVector.h"
23	#include "llvm/ADT/StringSet.h"
24	#include "llvm/ADT/Twine.h"
25	#include "llvm/BinaryFormat/ELF.h"
26	#include "llvm/CodeGenTypes/MachineValueType.h"
27	#include "llvm/MC/MCAsmInfo.h"
28	#include "llvm/MC/MCContext.h"
29	#include "llvm/MC/MCExpr.h"
30	#include "llvm/MC/MCInst.h"
31	#include "llvm/MC/MCInstrDesc.h"
32	#include "llvm/MC/MCParser/MCAsmLexer.h"
33	#include "llvm/MC/MCParser/MCAsmParser.h"
34	#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
35	#include "llvm/MC/MCParser/MCTargetAsmParser.h"
36	#include "llvm/MC/MCSymbol.h"
37	#include "llvm/MC/TargetRegistry.h"
38	#include "llvm/Support/AMDGPUMetadata.h"
39	#include "llvm/Support/AMDHSAKernelDescriptor.h"
40	#include "llvm/Support/Casting.h"
41	#include "llvm/Support/MathExtras.h"
42	#include "llvm/TargetParser/TargetParser.h"
43	#include <optional>
44
45	using namespace llvm;
46	using namespace llvm::AMDGPU;
47	using namespace llvm::amdhsa;
48
49	namespace {
50
51	class AMDGPUAsmParser;
52
53	enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
54
55	//===----------------------------------------------------------------------===//
56	// Operand
57	//===----------------------------------------------------------------------===//
58
59	class AMDGPUOperand : public MCParsedAsmOperand {
60	enum KindTy {
61	Token,
62	Immediate,
63	Register,
64	Expression
65	} Kind;
66
67	SMLoc StartLoc, EndLoc;
68	const AMDGPUAsmParser *AsmParser;
69
70	public:
71	AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
72	: Kind(Kind_), AsmParser(AsmParser_) {}
73
74	using Ptr = std::unique_ptr<AMDGPUOperand>;
75
76	struct Modifiers {
77	bool Abs = false;
78	bool Neg = false;
79	bool Sext = false;
80	bool Lit = false;
81
82	bool hasFPModifiers() const { return Abs \|\| Neg; }
83	bool hasIntModifiers() const { return Sext; }
84	bool hasModifiers() const { return hasFPModifiers() \|\| hasIntModifiers(); }
85
86	int64_t getFPModifiersOperand() const {
87	int64_t Operand = `0`;
88	Operand \|= Abs ? SISrcMods::ABS : `0u`;
89	Operand \|= Neg ? SISrcMods::NEG : `0u`;
90	return Operand;
91	}
92
93	int64_t getIntModifiersOperand() const {
94	int64_t Operand = `0`;
95	Operand \|= Sext ? SISrcMods::SEXT : `0u`;
96	return Operand;
97	}
98
99	int64_t getModifiersOperand() const {
100	assert(!(hasFPModifiers() && hasIntModifiers())
101	&& "fp and int modifiers should not be used simultaneously");
102	if (hasFPModifiers())
103	return getFPModifiersOperand();
104	if (hasIntModifiers())
105	return getIntModifiersOperand();
106	return `0`;
107	}
108
109	friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110	};
111
112	enum ImmTy {
113	ImmTyNone,
114	ImmTyGDS,
115	ImmTyLDS,
116	ImmTyOffen,
117	ImmTyIdxen,
118	ImmTyAddr64,
119	ImmTyOffset,
120	ImmTyInstOffset,
121	ImmTyOffset0,
122	ImmTyOffset1,
123	ImmTySMEMOffsetMod,
124	ImmTyCPol,
125	ImmTyTFE,
126	ImmTyD16,
127	ImmTyClamp,
128	ImmTyOModSI,
129	ImmTySDWADstSel,
130	ImmTySDWASrc0Sel,
131	ImmTySDWASrc1Sel,
132	ImmTySDWADstUnused,
133	ImmTyDMask,
134	ImmTyDim,
135	ImmTyUNorm,
136	ImmTyDA,
137	ImmTyR128A16,
138	ImmTyA16,
139	ImmTyLWE,
140	ImmTyExpTgt,
141	ImmTyExpCompr,
142	ImmTyExpVM,
143	ImmTyFORMAT,
144	ImmTyHwreg,
145	ImmTyOff,
146	ImmTySendMsg,
147	ImmTyInterpSlot,
148	ImmTyInterpAttr,
149	ImmTyInterpAttrChan,
150	ImmTyOpSel,
151	ImmTyOpSelHi,
152	ImmTyNegLo,
153	ImmTyNegHi,
154	ImmTyIndexKey8bit,
155	ImmTyIndexKey16bit,
156	ImmTyDPP8,
157	ImmTyDppCtrl,
158	ImmTyDppRowMask,
159	ImmTyDppBankMask,
160	ImmTyDppBoundCtrl,
161	ImmTyDppFI,
162	ImmTySwizzle,
163	ImmTyGprIdxMode,
164	ImmTyHigh,
165	ImmTyBLGP,
166	ImmTyCBSZ,
167	ImmTyABID,
168	ImmTyEndpgm,
169	ImmTyWaitVDST,
170	ImmTyWaitEXP,
171	ImmTyWaitVAVDst,
172	ImmTyWaitVMVSrc,
173	ImmTyByteSel,
174	};
175
176	// Immediate operand kind.
177	// It helps to identify the location of an offending operand after an error.
178	// Note that regular literals and mandatory literals (KImm) must be handled
179	// differently. When looking for an offending operand, we should usually
180	// ignore mandatory literals because they are part of the instruction and
181	// cannot be changed. Report location of mandatory operands only for VOPD,
182	// when both OpX and OpY have a KImm and there are no other literals.
183	enum ImmKindTy {
184	ImmKindTyNone,
185	ImmKindTyLiteral,
186	ImmKindTyMandatoryLiteral,
187	ImmKindTyConst,
188	};
189
190	private:
191	struct TokOp {
192	const char *Data;
193	unsigned Length;
194	};
195
196	struct ImmOp {
197	int64_t Val;
198	ImmTy Type;
199	bool IsFPImm;
200	mutable ImmKindTy Kind;
201	Modifiers Mods;
202	};
203
204	struct RegOp {
205	unsigned RegNo;
206	Modifiers Mods;
207	};
208
209	union {
210	TokOp Tok;
211	ImmOp Imm;
212	RegOp Reg;
213	const MCExpr *Expr;
214	};
215
216	public:
217	bool isToken() const override { return Kind == Token; }
218
219	bool isSymbolRefExpr() const {
220	return isExpr() && Expr && isa<MCSymbolRefExpr>(Val: Expr);
221	}
222
223	bool isImm() const override {
224	return Kind == Immediate;
225	}
226
227	void setImmKindNone() const {
228	assert(isImm());
229	Imm.Kind = ImmKindTyNone;
230	}
231
232	void setImmKindLiteral() const {
233	assert(isImm());
234	Imm.Kind = ImmKindTyLiteral;
235	}
236
237	void setImmKindMandatoryLiteral() const {
238	assert(isImm());
239	Imm.Kind = ImmKindTyMandatoryLiteral;
240	}
241
242	void setImmKindConst() const {
243	assert(isImm());
244	Imm.Kind = ImmKindTyConst;
245	}
246
247	bool IsImmKindLiteral() const {
248	return isImm() && Imm.Kind == ImmKindTyLiteral;
249	}
250
251	bool IsImmKindMandatoryLiteral() const {
252	return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
253	}
254
255	bool isImmKindConst() const {
256	return isImm() && Imm.Kind == ImmKindTyConst;
257	}
258
259	bool isInlinableImm(MVT type) const;
260	bool isLiteralImm(MVT type) const;
261
262	bool isRegKind() const {
263	return Kind == Register;
264	}
265
266	bool isReg() const override {
267	return isRegKind() && !hasModifiers();
268	}
269
270	bool isRegOrInline(unsigned RCID, MVT type) const {
271	return isRegClass(RCID) \|\| isInlinableImm(type);
272	}
273
274	bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
275	return isRegOrInline(RCID, type) \|\| isLiteralImm(type);
276	}
277
278	bool isRegOrImmWithInt16InputMods() const {
279	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
280	}
281
282	bool isRegOrImmWithIntT16InputMods() const {
283	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::i16);
284	}
285
286	bool isRegOrImmWithInt32InputMods() const {
287	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
288	}
289
290	bool isRegOrInlineImmWithInt16InputMods() const {
291	return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
292	}
293
294	bool isRegOrInlineImmWithInt32InputMods() const {
295	return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
296	}
297
298	bool isRegOrImmWithInt64InputMods() const {
299	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64);
300	}
301
302	bool isRegOrImmWithFP16InputMods() const {
303	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16);
304	}
305
306	bool isRegOrImmWithFPT16InputMods() const {
307	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16);
308	}
309
310	bool isRegOrImmWithFP32InputMods() const {
311	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
312	}
313
314	bool isRegOrImmWithFP64InputMods() const {
315	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
316	}
317
318	template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
319	return isRegOrInline(
320	RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16);
321	}
322
323	bool isRegOrInlineImmWithFP32InputMods() const {
324	return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
325	}
326
327	bool isPackedFP16InputMods() const {
328	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::v2f16);
329	}
330
331	bool isVReg() const {
332	return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) \|\|
333	isRegClass(RCID: AMDGPU::VReg_64RegClassID) \|\|
334	isRegClass(RCID: AMDGPU::VReg_96RegClassID) \|\|
335	isRegClass(RCID: AMDGPU::VReg_128RegClassID) \|\|
336	isRegClass(RCID: AMDGPU::VReg_160RegClassID) \|\|
337	isRegClass(RCID: AMDGPU::VReg_192RegClassID) \|\|
338	isRegClass(RCID: AMDGPU::VReg_256RegClassID) \|\|
339	isRegClass(RCID: AMDGPU::VReg_512RegClassID) \|\|
340	isRegClass(RCID: AMDGPU::VReg_1024RegClassID);
341	}
342
343	bool isVReg32() const {
344	return isRegClass(RCID: AMDGPU::VGPR_32RegClassID);
345	}
346
347	bool isVReg32OrOff() const {
348	return isOff() \|\| isVReg32();
349	}
350
351	bool isNull() const {
352	return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
353	}
354
355	bool isVRegWithInputMods() const;
356	template <bool IsFake16> bool isT16VRegWithInputMods() const;
357
358	bool isSDWAOperand(MVT type) const;
359	bool isSDWAFP16Operand() const;
360	bool isSDWAFP32Operand() const;
361	bool isSDWAInt16Operand() const;
362	bool isSDWAInt32Operand() const;
363
364	bool isImmTy(ImmTy ImmT) const {
365	return isImm() && Imm.Type == ImmT;
366	}
367
368	template <ImmTy Ty> bool isImmTy() const { return isImmTy(ImmT: Ty); }
369
370	bool isImmLiteral() const { return isImmTy(ImmT: ImmTyNone); }
371
372	bool isImmModifier() const {
373	return isImm() && Imm.Type != ImmTyNone;
374	}
375
376	bool isOModSI() const { return isImmTy(ImmT: ImmTyOModSI); }
377	bool isDim() const { return isImmTy(ImmT: ImmTyDim); }
378	bool isR128A16() const { return isImmTy(ImmT: ImmTyR128A16); }
379	bool isOff() const { return isImmTy(ImmT: ImmTyOff); }
380	bool isExpTgt() const { return isImmTy(ImmT: ImmTyExpTgt); }
381	bool isOffen() const { return isImmTy(ImmT: ImmTyOffen); }
382	bool isIdxen() const { return isImmTy(ImmT: ImmTyIdxen); }
383	bool isAddr64() const { return isImmTy(ImmT: ImmTyAddr64); }
384	bool isSMEMOffsetMod() const { return isImmTy(ImmT: ImmTySMEMOffsetMod); }
385	bool isFlatOffset() const { return isImmTy(ImmT: ImmTyOffset) \|\| isImmTy(ImmT: ImmTyInstOffset); }
386	bool isGDS() const { return isImmTy(ImmT: ImmTyGDS); }
387	bool isLDS() const { return isImmTy(ImmT: ImmTyLDS); }
388	bool isCPol() const { return isImmTy(ImmT: ImmTyCPol); }
389	bool isIndexKey8bit() const { return isImmTy(ImmT: ImmTyIndexKey8bit); }
390	bool isIndexKey16bit() const { return isImmTy(ImmT: ImmTyIndexKey16bit); }
391	bool isTFE() const { return isImmTy(ImmT: ImmTyTFE); }
392	bool isFORMAT() const { return isImmTy(ImmT: ImmTyFORMAT) && isUInt<`7`>(x: getImm()); }
393	bool isDppFI() const { return isImmTy(ImmT: ImmTyDppFI); }
394	bool isSDWADstSel() const { return isImmTy(ImmT: ImmTySDWADstSel); }
395	bool isSDWASrc0Sel() const { return isImmTy(ImmT: ImmTySDWASrc0Sel); }
396	bool isSDWASrc1Sel() const { return isImmTy(ImmT: ImmTySDWASrc1Sel); }
397	bool isSDWADstUnused() const { return isImmTy(ImmT: ImmTySDWADstUnused); }
398	bool isInterpSlot() const { return isImmTy(ImmT: ImmTyInterpSlot); }
399	bool isInterpAttr() const { return isImmTy(ImmT: ImmTyInterpAttr); }
400	bool isInterpAttrChan() const { return isImmTy(ImmT: ImmTyInterpAttrChan); }
401	bool isOpSel() const { return isImmTy(ImmT: ImmTyOpSel); }
402	bool isOpSelHi() const { return isImmTy(ImmT: ImmTyOpSelHi); }
403	bool isNegLo() const { return isImmTy(ImmT: ImmTyNegLo); }
404	bool isNegHi() const { return isImmTy(ImmT: ImmTyNegHi); }
405
406	bool isRegOrImm() const {
407	return isReg() \|\| isImm();
408	}
409
410	bool isRegClass(unsigned RCID) const;
411
412	bool isInlineValue() const;
413
414	bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
415	return isRegOrInline(RCID, type) && !hasModifiers();
416	}
417
418	bool isSCSrcB16() const {
419	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i16);
420	}
421
422	bool isSCSrcV2B16() const {
423	return isSCSrcB16();
424	}
425
426	bool isSCSrc_b32() const {
427	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i32);
428	}
429
430	bool isSCSrc_b64() const {
431	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::i64);
432	}
433
434	bool isBoolReg() const;
435
436	bool isSCSrcF16() const {
437	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f16);
438	}
439
440	bool isSCSrcV2F16() const {
441	return isSCSrcF16();
442	}
443
444	bool isSCSrcF32() const {
445	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f32);
446	}
447
448	bool isSCSrcF64() const {
449	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::f64);
450	}
451
452	bool isSSrc_b32() const {
453	return isSCSrc_b32() \|\| isLiteralImm(type: MVT::i32) \|\| isExpr();
454	}
455
456	bool isSSrc_b16() const { return isSCSrcB16() \|\| isLiteralImm(type: MVT::i16); }
457
458	bool isSSrcV2B16() const {
459	llvm_unreachable("cannot happen");
460	return isSSrc_b16();
461	}
462
463	bool isSSrc_b64() const {
464	// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
465	// See isVSrc64().
466	return isSCSrc_b64() \|\| isLiteralImm(type: MVT::i64);
467	}
468
469	bool isSSrc_f32() const {
470	return isSCSrc_b32() \|\| isLiteralImm(type: MVT::f32) \|\| isExpr();
471	}
472
473	bool isSSrcF64() const { return isSCSrc_b64() \|\| isLiteralImm(type: MVT::f64); }
474
475	bool isSSrc_bf16() const { return isSCSrcB16() \|\| isLiteralImm(type: MVT::bf16); }
476
477	bool isSSrc_f16() const { return isSCSrcB16() \|\| isLiteralImm(type: MVT::f16); }
478
479	bool isSSrcV2F16() const {
480	llvm_unreachable("cannot happen");
481	return isSSrc_f16();
482	}
483
484	bool isSSrcV2FP32() const {
485	llvm_unreachable("cannot happen");
486	return isSSrc_f32();
487	}
488
489	bool isSCSrcV2FP32() const {
490	llvm_unreachable("cannot happen");
491	return isSCSrcF32();
492	}
493
494	bool isSSrcV2INT32() const {
495	llvm_unreachable("cannot happen");
496	return isSSrc_b32();
497	}
498
499	bool isSCSrcV2INT32() const {
500	llvm_unreachable("cannot happen");
501	return isSCSrc_b32();
502	}
503
504	bool isSSrcOrLds_b32() const {
505	return isRegOrInlineNoMods(RCID: AMDGPU::SRegOrLds_32RegClassID, type: MVT::i32) \|\|
506	isLiteralImm(type: MVT::i32) \|\| isExpr();
507	}
508
509	bool isVCSrc_b32() const {
510	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
511	}
512
513	bool isVCSrcB64() const {
514	return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64);
515	}
516
517	bool isVCSrcTB16() const {
518	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::i16);
519	}
520
521	bool isVCSrcTB16_Lo128() const {
522	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::i16);
523	}
524
525	bool isVCSrcFake16B16_Lo128() const {
526	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::i16);
527	}
528
529	bool isVCSrc_b16() const {
530	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
531	}
532
533	bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
534
535	bool isVCSrc_f32() const {
536	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
537	}
538
539	bool isVCSrcF64() const {
540	return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
541	}
542
543	bool isVCSrcTBF16() const {
544	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::bf16);
545	}
546
547	bool isVCSrcTF16() const {
548	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16);
549	}
550
551	bool isVCSrcTBF16_Lo128() const {
552	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::bf16);
553	}
554
555	bool isVCSrcTF16_Lo128() const {
556	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::f16);
557	}
558
559	bool isVCSrcFake16BF16_Lo128() const {
560	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::bf16);
561	}
562
563	bool isVCSrcFake16F16_Lo128() const {
564	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::f16);
565	}
566
567	bool isVCSrc_bf16() const {
568	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::bf16);
569	}
570
571	bool isVCSrc_f16() const {
572	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16);
573	}
574
575	bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
576
577	bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
578
579	bool isVSrc_b32() const {
580	return isVCSrc_f32() \|\| isLiteralImm(type: MVT::i32) \|\| isExpr();
581	}
582
583	bool isVSrc_b64() const { return isVCSrcF64() \|\| isLiteralImm(type: MVT::i64); }
584
585	bool isVSrcT_b16() const { return isVCSrcTB16() \|\| isLiteralImm(type: MVT::i16); }
586
587	bool isVSrcT_b16_Lo128() const {
588	return isVCSrcTB16_Lo128() \|\| isLiteralImm(type: MVT::i16);
589	}
590
591	bool isVSrcFake16_b16_Lo128() const {
592	return isVCSrcFake16B16_Lo128() \|\| isLiteralImm(type: MVT::i16);
593	}
594
595	bool isVSrc_b16() const { return isVCSrc_b16() \|\| isLiteralImm(type: MVT::i16); }
596
597	bool isVSrc_v2b16() const { return isVSrc_b16() \|\| isLiteralImm(type: MVT::v2i16); }
598
599	bool isVCSrcV2FP32() const {
600	return isVCSrcF64();
601	}
602
603	bool isVSrc_v2f32() const { return isVSrc_f64() \|\| isLiteralImm(type: MVT::v2f32); }
604
605	bool isVCSrcV2INT32() const {
606	return isVCSrcB64();
607	}
608
609	bool isVSrc_v2b32() const { return isVSrc_b64() \|\| isLiteralImm(type: MVT::v2i32); }
610
611	bool isVSrc_f32() const {
612	return isVCSrc_f32() \|\| isLiteralImm(type: MVT::f32) \|\| isExpr();
613	}
614
615	bool isVSrc_f64() const { return isVCSrcF64() \|\| isLiteralImm(type: MVT::f64); }
616
617	bool isVSrcT_bf16() const { return isVCSrcTBF16() \|\| isLiteralImm(type: MVT::bf16); }
618
619	bool isVSrcT_f16() const { return isVCSrcTF16() \|\| isLiteralImm(type: MVT::f16); }
620
621	bool isVSrcT_bf16_Lo128() const {
622	return isVCSrcTBF16_Lo128() \|\| isLiteralImm(type: MVT::bf16);
623	}
624
625	bool isVSrcT_f16_Lo128() const {
626	return isVCSrcTF16_Lo128() \|\| isLiteralImm(type: MVT::f16);
627	}
628
629	bool isVSrcFake16_bf16_Lo128() const {
630	return isVCSrcFake16BF16_Lo128() \|\| isLiteralImm(type: MVT::bf16);
631	}
632
633	bool isVSrcFake16_f16_Lo128() const {
634	return isVCSrcFake16F16_Lo128() \|\| isLiteralImm(type: MVT::f16);
635	}
636
637	bool isVSrc_bf16() const { return isVCSrc_bf16() \|\| isLiteralImm(type: MVT::bf16); }
638
639	bool isVSrc_f16() const { return isVCSrc_f16() \|\| isLiteralImm(type: MVT::f16); }
640
641	bool isVSrc_v2bf16() const {
642	return isVSrc_bf16() \|\| isLiteralImm(type: MVT::v2bf16);
643	}
644
645	bool isVSrc_v2f16() const { return isVSrc_f16() \|\| isLiteralImm(type: MVT::v2f16); }
646
647	bool isVISrcB32() const {
648	return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i32);
649	}
650
651	bool isVISrcB16() const {
652	return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i16);
653	}
654
655	bool isVISrcV2B16() const {
656	return isVISrcB16();
657	}
658
659	bool isVISrcF32() const {
660	return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f32);
661	}
662
663	bool isVISrcF16() const {
664	return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f16);
665	}
666
667	bool isVISrcV2F16() const {
668	return isVISrcF16() \|\| isVISrcB32();
669	}
670
671	bool isVISrc_64_bf16() const {
672	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::bf16);
673	}
674
675	bool isVISrc_64_f16() const {
676	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f16);
677	}
678
679	bool isVISrc_64_b32() const {
680	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32);
681	}
682
683	bool isVISrc_64B64() const {
684	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i64);
685	}
686
687	bool isVISrc_64_f64() const {
688	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f64);
689	}
690
691	bool isVISrc_64V2FP32() const {
692	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f32);
693	}
694
695	bool isVISrc_64V2INT32() const {
696	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32);
697	}
698
699	bool isVISrc_256_b32() const {
700	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32);
701	}
702
703	bool isVISrc_256_f32() const {
704	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32);
705	}
706
707	bool isVISrc_256B64() const {
708	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i64);
709	}
710
711	bool isVISrc_256_f64() const {
712	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f64);
713	}
714
715	bool isVISrc_128B16() const {
716	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i16);
717	}
718
719	bool isVISrc_128V2B16() const {
720	return isVISrc_128B16();
721	}
722
723	bool isVISrc_128_b32() const {
724	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i32);
725	}
726
727	bool isVISrc_128_f32() const {
728	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f32);
729	}
730
731	bool isVISrc_256V2FP32() const {
732	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32);
733	}
734
735	bool isVISrc_256V2INT32() const {
736	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32);
737	}
738
739	bool isVISrc_512_b32() const {
740	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i32);
741	}
742
743	bool isVISrc_512B16() const {
744	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i16);
745	}
746
747	bool isVISrc_512V2B16() const {
748	return isVISrc_512B16();
749	}
750
751	bool isVISrc_512_f32() const {
752	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f32);
753	}
754
755	bool isVISrc_512F16() const {
756	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f16);
757	}
758
759	bool isVISrc_512V2F16() const {
760	return isVISrc_512F16() \|\| isVISrc_512_b32();
761	}
762
763	bool isVISrc_1024_b32() const {
764	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i32);
765	}
766
767	bool isVISrc_1024B16() const {
768	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i16);
769	}
770
771	bool isVISrc_1024V2B16() const {
772	return isVISrc_1024B16();
773	}
774
775	bool isVISrc_1024_f32() const {
776	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f32);
777	}
778
779	bool isVISrc_1024F16() const {
780	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f16);
781	}
782
783	bool isVISrc_1024V2F16() const {
784	return isVISrc_1024F16() \|\| isVISrc_1024_b32();
785	}
786
787	bool isAISrcB32() const {
788	return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i32);
789	}
790
791	bool isAISrcB16() const {
792	return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i16);
793	}
794
795	bool isAISrcV2B16() const {
796	return isAISrcB16();
797	}
798
799	bool isAISrcF32() const {
800	return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f32);
801	}
802
803	bool isAISrcF16() const {
804	return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f16);
805	}
806
807	bool isAISrcV2F16() const {
808	return isAISrcF16() \|\| isAISrcB32();
809	}
810
811	bool isAISrc_64B64() const {
812	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::i64);
813	}
814
815	bool isAISrc_64_f64() const {
816	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::f64);
817	}
818
819	bool isAISrc_128_b32() const {
820	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i32);
821	}
822
823	bool isAISrc_128B16() const {
824	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i16);
825	}
826
827	bool isAISrc_128V2B16() const {
828	return isAISrc_128B16();
829	}
830
831	bool isAISrc_128_f32() const {
832	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f32);
833	}
834
835	bool isAISrc_128F16() const {
836	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f16);
837	}
838
839	bool isAISrc_128V2F16() const {
840	return isAISrc_128F16() \|\| isAISrc_128_b32();
841	}
842
843	bool isVISrc_128_bf16() const {
844	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::bf16);
845	}
846
847	bool isVISrc_128_f16() const {
848	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f16);
849	}
850
851	bool isVISrc_128V2F16() const {
852	return isVISrc_128_f16() \|\| isVISrc_128_b32();
853	}
854
855	bool isAISrc_256B64() const {
856	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::i64);
857	}
858
859	bool isAISrc_256_f64() const {
860	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::f64);
861	}
862
863	bool isAISrc_512_b32() const {
864	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i32);
865	}
866
867	bool isAISrc_512B16() const {
868	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i16);
869	}
870
871	bool isAISrc_512V2B16() const {
872	return isAISrc_512B16();
873	}
874
875	bool isAISrc_512_f32() const {
876	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f32);
877	}
878
879	bool isAISrc_512F16() const {
880	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f16);
881	}
882
883	bool isAISrc_512V2F16() const {
884	return isAISrc_512F16() \|\| isAISrc_512_b32();
885	}
886
887	bool isAISrc_1024_b32() const {
888	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i32);
889	}
890
891	bool isAISrc_1024B16() const {
892	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i16);
893	}
894
895	bool isAISrc_1024V2B16() const {
896	return isAISrc_1024B16();
897	}
898
899	bool isAISrc_1024_f32() const {
900	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f32);
901	}
902
903	bool isAISrc_1024F16() const {
904	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f16);
905	}
906
907	bool isAISrc_1024V2F16() const {
908	return isAISrc_1024F16() \|\| isAISrc_1024_b32();
909	}
910
911	bool isKImmFP32() const {
912	return isLiteralImm(type: MVT::f32);
913	}
914
915	bool isKImmFP16() const {
916	return isLiteralImm(type: MVT::f16);
917	}
918
919	bool isMem() const override {
920	return false;
921	}
922
923	bool isExpr() const {
924	return Kind == Expression;
925	}
926
927	bool isSOPPBrTarget() const { return isExpr() \|\| isImm(); }
928
929	bool isSWaitCnt() const;
930	bool isDepCtr() const;
931	bool isSDelayALU() const;
932	bool isHwreg() const;
933	bool isSendMsg() const;
934	bool isSplitBarrier() const;
935	bool isSwizzle() const;
936	bool isSMRDOffset8() const;
937	bool isSMEMOffset() const;
938	bool isSMRDLiteralOffset() const;
939	bool isDPP8() const;
940	bool isDPPCtrl() const;
941	bool isBLGP() const;
942	bool isGPRIdxMode() const;
943	bool isS16Imm() const;
944	bool isU16Imm() const;
945	bool isEndpgm() const;
946
947	auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
948	return [=](){ return P (*this); };
949	}
950
951	StringRef getToken() const {
952	assert(isToken());
953	return StringRef (Tok.Data, Tok.Length);
954	}
955
956	int64_t getImm() const {
957	assert(isImm());
958	return Imm.Val;
959	}
960
961	void setImm(int64_t Val) {
962	assert(isImm());
963	Imm.Val = Val;
964	}
965
966	ImmTy getImmTy() const {
967	assert(isImm());
968	return Imm.Type;
969	}
970
971	MCRegister getReg() const override {
972	assert(isRegKind());
973	return Reg.RegNo;
974	}
975
976	SMLoc getStartLoc() const override {
977	return StartLoc;
978	}
979
980	SMLoc getEndLoc() const override {
981	return EndLoc;
982	}
983
984	SMRange getLocRange() const {
985	return SMRange (StartLoc, EndLoc);
986	}
987
988	Modifiers getModifiers() const {
989	assert(isRegKind() \|\| isImmTy(ImmTyNone));
990	return isRegKind() ? Reg.Mods : Imm.Mods;
991	}
992
993	void setModifiers(Modifiers Mods) {
994	assert(isRegKind() \|\| isImmTy(ImmTyNone));
995	if (isRegKind())
996	Reg.Mods = Mods;
997	else
998	Imm.Mods = Mods;
999	}
1000
1001	bool hasModifiers() const {
1002	return getModifiers().hasModifiers();
1003	}
1004
1005	bool hasFPModifiers() const {
1006	return getModifiers().hasFPModifiers();
1007	}
1008
1009	bool hasIntModifiers() const {
1010	return getModifiers().hasIntModifiers();
1011	}
1012
1013	uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1014
1015	void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1016
1017	void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1018
1019	void addRegOperands(MCInst &Inst, unsigned N) const;
1020
1021	void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1022	if (isRegKind())
1023	addRegOperands(Inst, N);
1024	else
1025	addImmOperands(Inst, N);
1026	}
1027
1028	void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1029	Modifiers Mods = getModifiers();
1030	Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand()));
1031	if (isRegKind()) {
1032	addRegOperands(Inst, N);
1033	} else {
1034	addImmOperands(Inst, N, ApplyModifiers: false);
1035	}
1036	}
1037
1038	void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1039	assert(!hasIntModifiers());
1040	addRegOrImmWithInputModsOperands(Inst, N);
1041	}
1042
1043	void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1044	assert(!hasFPModifiers());
1045	addRegOrImmWithInputModsOperands(Inst, N);
1046	}
1047
1048	void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1049	Modifiers Mods = getModifiers();
1050	Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand()));
1051	assert(isRegKind());
1052	addRegOperands(Inst, N);
1053	}
1054
1055	void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1056	assert(!hasIntModifiers());
1057	addRegWithInputModsOperands(Inst, N);
1058	}
1059
1060	void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1061	assert(!hasFPModifiers());
1062	addRegWithInputModsOperands(Inst, N);
1063	}
1064
1065	static void printImmTy(raw_ostream& OS, ImmTy Type) {
1066	// clang-format off
1067	switch (Type) {
1068	case ImmTyNone: OS << "None"; break;
1069	case ImmTyGDS: OS << "GDS"; break;
1070	case ImmTyLDS: OS << "LDS"; break;
1071	case ImmTyOffen: OS << "Offen"; break;
1072	case ImmTyIdxen: OS << "Idxen"; break;
1073	case ImmTyAddr64: OS << "Addr64"; break;
1074	case ImmTyOffset: OS << "Offset"; break;
1075	case ImmTyInstOffset: OS << "InstOffset"; break;
1076	case ImmTyOffset0: OS << "Offset0"; break;
1077	case ImmTyOffset1: OS << "Offset1"; break;
1078	case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1079	case ImmTyCPol: OS << "CPol"; break;
1080	case ImmTyIndexKey8bit: OS << "index_key"; break;
1081	case ImmTyIndexKey16bit: OS << "index_key"; break;
1082	case ImmTyTFE: OS << "TFE"; break;
1083	case ImmTyD16: OS << "D16"; break;
1084	case ImmTyFORMAT: OS << "FORMAT"; break;
1085	case ImmTyClamp: OS << "Clamp"; break;
1086	case ImmTyOModSI: OS << "OModSI"; break;
1087	case ImmTyDPP8: OS << "DPP8"; break;
1088	case ImmTyDppCtrl: OS << "DppCtrl"; break;
1089	case ImmTyDppRowMask: OS << "DppRowMask"; break;
1090	case ImmTyDppBankMask: OS << "DppBankMask"; break;
1091	case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1092	case ImmTyDppFI: OS << "DppFI"; break;
1093	case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1094	case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1095	case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1096	case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1097	case ImmTyDMask: OS << "DMask"; break;
1098	case ImmTyDim: OS << "Dim"; break;
1099	case ImmTyUNorm: OS << "UNorm"; break;
1100	case ImmTyDA: OS << "DA"; break;
1101	case ImmTyR128A16: OS << "R128A16"; break;
1102	case ImmTyA16: OS << "A16"; break;
1103	case ImmTyLWE: OS << "LWE"; break;
1104	case ImmTyOff: OS << "Off"; break;
1105	case ImmTyExpTgt: OS << "ExpTgt"; break;
1106	case ImmTyExpCompr: OS << "ExpCompr"; break;
1107	case ImmTyExpVM: OS << "ExpVM"; break;
1108	case ImmTyHwreg: OS << "Hwreg"; break;
1109	case ImmTySendMsg: OS << "SendMsg"; break;
1110	case ImmTyInterpSlot: OS << "InterpSlot"; break;
1111	case ImmTyInterpAttr: OS << "InterpAttr"; break;
1112	case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1113	case ImmTyOpSel: OS << "OpSel"; break;
1114	case ImmTyOpSelHi: OS << "OpSelHi"; break;
1115	case ImmTyNegLo: OS << "NegLo"; break;
1116	case ImmTyNegHi: OS << "NegHi"; break;
1117	case ImmTySwizzle: OS << "Swizzle"; break;
1118	case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1119	case ImmTyHigh: OS << "High"; break;
1120	case ImmTyBLGP: OS << "BLGP"; break;
1121	case ImmTyCBSZ: OS << "CBSZ"; break;
1122	case ImmTyABID: OS << "ABID"; break;
1123	case ImmTyEndpgm: OS << "Endpgm"; break;
1124	case ImmTyWaitVDST: OS << "WaitVDST"; break;
1125	case ImmTyWaitEXP: OS << "WaitEXP"; break;
1126	case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1127	case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1128	case ImmTyByteSel: OS << "ByteSel" ; break;
1129	}
1130	// clang-format on
1131	}
1132
1133	void print(raw_ostream &OS) const override {
1134	switch (Kind) {
1135	case Register:
1136	OS << "<register " << getReg() << " mods: " << Reg.Mods << `'>'`;
1137	break;
1138	case Immediate:
1139	OS << `'<'` << getImm();
1140	if (getImmTy() != ImmTyNone) {
1141	OS << " type: "; printImmTy(OS, Type: getImmTy());
1142	}
1143	OS << " mods: " << Imm.Mods << `'>'`;
1144	break;
1145	case Token:
1146	OS << `'\''` << getToken() << `'\''`;
1147	break;
1148	case Expression:
1149	OS << "<expr " << *Expr << `'>'`;
1150	break;
1151	}
1152	}
1153
1154	static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1155	int64_t Val, SMLoc Loc,
1156	ImmTy Type = ImmTyNone,
1157	bool IsFPImm = false) {
1158	auto Op = std::make_unique<AMDGPUOperand>(args: Immediate, args&: AsmParser);
1159	Op ->Imm.Val = Val;
1160	Op ->Imm.IsFPImm = IsFPImm;
1161	Op ->Imm.Kind = ImmKindTyNone;
1162	Op ->Imm.Type = Type;
1163	Op ->Imm.Mods = Modifiers ();
1164	Op ->StartLoc = Loc;
1165	Op ->EndLoc = Loc;
1166	return Op;
1167	}
1168
1169	static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1170	StringRef Str, SMLoc Loc,
1171	bool HasExplicitEncodingSize = true) {
1172	auto Res = std::make_unique<AMDGPUOperand>(args: Token, args&: AsmParser);
1173	Res ->Tok.Data = Str.data();
1174	Res ->Tok.Length = Str.size();
1175	Res ->StartLoc = Loc;
1176	Res ->EndLoc = Loc;
1177	return Res;
1178	}
1179
1180	static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1181	unsigned RegNo, SMLoc S,
1182	SMLoc E) {
1183	auto Op = std::make_unique<AMDGPUOperand>(args: Register, args&: AsmParser);
1184	Op ->Reg.RegNo = RegNo;
1185	Op ->Reg.Mods = Modifiers ();
1186	Op ->StartLoc = S;
1187	Op ->EndLoc = E;
1188	return Op;
1189	}
1190
1191	static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1192	const class MCExpr *Expr, SMLoc S) {
1193	auto Op = std::make_unique<AMDGPUOperand>(args: Expression, args&: AsmParser);
1194	Op ->Expr = Expr;
1195	Op ->StartLoc = S;
1196	Op ->EndLoc = S;
1197	return Op;
1198	}
1199	};
1200
1201	raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1202	OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1203	return OS;
1204	}
1205
1206	//===----------------------------------------------------------------------===//
1207	// AsmParser
1208	//===----------------------------------------------------------------------===//
1209
1210	// Holds info related to the current kernel, e.g. count of SGPRs used.
1211	// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1212	// .amdgpu_hsa_kernel or at EOF.
1213	class KernelScopeInfo {
1214	int SgprIndexUnusedMin = -`1`;
1215	int VgprIndexUnusedMin = -`1`;
1216	int AgprIndexUnusedMin = -`1`;
1217	MCContext Ctx = nullptr*;
1218	MCSubtargetInfo const MSTI = nullptr*;
1219
1220	void usesSgprAt(int i) {
1221	if (i >= SgprIndexUnusedMin) {
1222	SgprIndexUnusedMin = ++i;
1223	if (Ctx) {
1224	MCSymbol* const Sym =
1225	Ctx->getOrCreateSymbol(Name: Twine (".kernel.sgpr_count"));
1226	Sym->setVariableValue(MCConstantExpr::create(Value: SgprIndexUnusedMin, Ctx&: *Ctx));
1227	}
1228	}
1229	}
1230
1231	void usesVgprAt(int i) {
1232	if (i >= VgprIndexUnusedMin) {
1233	VgprIndexUnusedMin = ++i;
1234	if (Ctx) {
1235	MCSymbol* const Sym =
1236	Ctx->getOrCreateSymbol(Name: Twine (".kernel.vgpr_count"));
1237	int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin,
1238	ArgNumVGPR: VgprIndexUnusedMin);
1239	Sym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx));
1240	}
1241	}
1242	}
1243
1244	void usesAgprAt(int i) {
1245	// Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1246	if (!hasMAIInsts(STI: *MSTI))
1247	return;
1248
1249	if (i >= AgprIndexUnusedMin) {
1250	AgprIndexUnusedMin = ++i;
1251	if (Ctx) {
1252	MCSymbol* const Sym =
1253	Ctx->getOrCreateSymbol(Name: Twine (".kernel.agpr_count"));
1254	Sym->setVariableValue(MCConstantExpr::create(Value: AgprIndexUnusedMin, Ctx&: *Ctx));
1255
1256	// Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1257	MCSymbol* const vSym =
1258	Ctx->getOrCreateSymbol(Name: Twine (".kernel.vgpr_count"));
1259	int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin,
1260	ArgNumVGPR: VgprIndexUnusedMin);
1261	vSym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx));
1262	}
1263	}
1264	}
1265
1266	public:
1267	KernelScopeInfo() = default;
1268
1269	void initialize(MCContext &Context) {
1270	Ctx = &Context;
1271	MSTI = Ctx->getSubtargetInfo();
1272
1273	usesSgprAt(i: SgprIndexUnusedMin = -`1`);
1274	usesVgprAt(i: VgprIndexUnusedMin = -`1`);
1275	if (hasMAIInsts(STI: *MSTI)) {
1276	usesAgprAt(i: AgprIndexUnusedMin = -`1`);
1277	}
1278	}
1279
1280	void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1281	unsigned RegWidth) {
1282	switch (RegKind) {
1283	case IS_SGPR:
1284	usesSgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: `32`) - `1`);
1285	break;
1286	case IS_AGPR:
1287	usesAgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: `32`) - `1`);
1288	break;
1289	case IS_VGPR:
1290	usesVgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: `32`) - `1`);
1291	break;
1292	default:
1293	break;
1294	}
1295	}
1296	};
1297
1298	class AMDGPUAsmParser : public MCTargetAsmParser {
1299	MCAsmParser &Parser;
1300
1301	unsigned ForcedEncodingSize = `0`;
1302	bool ForcedDPP = false;
1303	bool ForcedSDWA = false;
1304	KernelScopeInfo KernelScope;
1305
1306	/// @name Auto-generated Match Functions
1307	/// {
1308
1309	#define GET_ASSEMBLER_HEADER
1310	#include "AMDGPUGenAsmMatcher.inc"
1311
1312	/// }
1313
1314	private:
1315	void createConstantSymbol(StringRef Id, int64_t Val);
1316
1317	bool ParseAsAbsoluteExpression(uint32_t &Ret);
1318	bool OutOfRangeError(SMRange Range);
1319	/// Calculate VGPR/SGPR blocks required for given target, reserved
1320	/// registers, and user-specified NextFreeXGPR values.
1321	///
1322	/// \param Features [in] Target features, used for bug corrections.
1323	/// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1324	/// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1325	/// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1326	/// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1327	/// descriptor field, if valid.
1328	/// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1329	/// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1330	/// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1331	/// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1332	/// \param VGPRBlocks [out] Result VGPR block count.
1333	/// \param SGPRBlocks [out] Result SGPR block count.
1334	bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1335	const MCExpr FlatScrUsed, bool* XNACKUsed,
1336	std::optional<bool> EnableWavefrontSize32,
1337	const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1338	const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1339	const MCExpr &VGPRBlocks, const* MCExpr *&SGPRBlocks);
1340	bool ParseDirectiveAMDGCNTarget();
1341	bool ParseDirectiveAMDHSACodeObjectVersion();
1342	bool ParseDirectiveAMDHSAKernel();
1343	bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1344	bool ParseDirectiveAMDKernelCodeT();
1345	// TODO: Possibly make subtargetHasRegister const.
1346	bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1347	bool ParseDirectiveAMDGPUHsaKernel();
1348
1349	bool ParseDirectiveISAVersion();
1350	bool ParseDirectiveHSAMetadata();
1351	bool ParseDirectivePALMetadataBegin();
1352	bool ParseDirectivePALMetadata();
1353	bool ParseDirectiveAMDGPULDS();
1354
1355	/// Common code to parse out a block of text (typically YAML) between start and
1356	/// end directives.
1357	bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1358	const char *AssemblerDirectiveEnd,
1359	std::string &CollectString);
1360
1361	bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1362	RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1363	bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1364	unsigned &RegNum, unsigned &RegWidth,
1365	bool RestoreOnFailure = false);
1366	bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1367	unsigned &RegNum, unsigned &RegWidth,
1368	SmallVectorImpl<AsmToken> &Tokens);
1369	unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1370	unsigned &RegWidth,
1371	SmallVectorImpl<AsmToken> &Tokens);
1372	unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1373	unsigned &RegWidth,
1374	SmallVectorImpl<AsmToken> &Tokens);
1375	unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1376	unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1377	bool ParseRegRange(unsigned& Num, unsigned& Width);
1378	unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1379	unsigned RegWidth, SMLoc Loc);
1380
1381	bool isRegister();
1382	bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1383	std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1384	void initializeGprCountSymbol(RegisterKind RegKind);
1385	bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1386	unsigned RegWidth);
1387	void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1388	bool IsAtomic);
1389
1390	public:
1391	enum OperandMode {
1392	OperandMode_Default,
1393	OperandMode_NSA,
1394	};
1395
1396	using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1397
1398	AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1399	const MCInstrInfo &MII,
1400	const MCTargetOptions &Options)
1401	: MCTargetAsmParser (Options, STI, MII), Parser(_Parser) {
1402	MCAsmParserExtension::Initialize(Parser);
1403
1404	if (getFeatureBits().none()) {
1405	// Set default features.
1406	copySTI().ToggleFeature(FS: "southern-islands");
1407	}
1408
1409	FeatureBitset FB = getFeatureBits();
1410	if (!FB [AMDGPU::FeatureWavefrontSize64] &&
1411	!FB [AMDGPU::FeatureWavefrontSize32]) {
1412	// If there is no default wave size it must be a generation before gfx10,
1413	// these have FeatureWavefrontSize64 in their definition already. For
1414	// gfx10+ set wave32 as a default.
1415	copySTI().ToggleFeature(FB: AMDGPU::FeatureWavefrontSize32);
1416	}
1417
1418	setAvailableFeatures(ComputeAvailableFeatures(FB: getFeatureBits()));
1419
1420	AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
1421	if (ISA.Major >= `6` && isHsaAbi(STI: getSTI())) {
1422	createConstantSymbol(Id: ".amdgcn.gfx_generation_number", Val: ISA.Major);
1423	createConstantSymbol(Id: ".amdgcn.gfx_generation_minor", Val: ISA.Minor);
1424	createConstantSymbol(Id: ".amdgcn.gfx_generation_stepping", Val: ISA.Stepping);
1425	} else {
1426	createConstantSymbol(Id: ".option.machine_version_major", Val: ISA.Major);
1427	createConstantSymbol(Id: ".option.machine_version_minor", Val: ISA.Minor);
1428	createConstantSymbol(Id: ".option.machine_version_stepping", Val: ISA.Stepping);
1429	}
1430	if (ISA.Major >= `6` && isHsaAbi(STI: getSTI())) {
1431	initializeGprCountSymbol(RegKind: IS_VGPR);
1432	initializeGprCountSymbol(RegKind: IS_SGPR);
1433	} else
1434	KernelScope.initialize(Context&: getContext());
1435
1436	for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1437	createConstantSymbol(Id: Symbol, Val: Code);
1438
1439	createConstantSymbol(Id: "UC_VERSION_W64_BIT", Val: `0x2000`);
1440	createConstantSymbol(Id: "UC_VERSION_W32_BIT", Val: `0x4000`);
1441	createConstantSymbol(Id: "UC_VERSION_MDP_BIT", Val: `0x8000`);
1442	}
1443
1444	bool hasMIMG_R128() const {
1445	return AMDGPU::hasMIMG_R128(STI: getSTI());
1446	}
1447
1448	bool hasPackedD16() const {
1449	return AMDGPU::hasPackedD16(STI: getSTI());
1450	}
1451
1452	bool hasA16() const { return AMDGPU::hasA16(STI: getSTI()); }
1453
1454	bool hasG16() const { return AMDGPU::hasG16(STI: getSTI()); }
1455
1456	bool hasGDS() const { return AMDGPU::hasGDS(STI: getSTI()); }
1457
1458	bool isSI() const {
1459	return AMDGPU::isSI(STI: getSTI());
1460	}
1461
1462	bool isCI() const {
1463	return AMDGPU::isCI(STI: getSTI());
1464	}
1465
1466	bool isVI() const {
1467	return AMDGPU::isVI(STI: getSTI());
1468	}
1469
1470	bool isGFX9() const {
1471	return AMDGPU::isGFX9(STI: getSTI());
1472	}
1473
1474	// TODO: isGFX90A is also true for GFX940. We need to clean it.
1475	bool isGFX90A() const {
1476	return AMDGPU::isGFX90A(STI: getSTI());
1477	}
1478
1479	bool isGFX940() const {
1480	return AMDGPU::isGFX940(STI: getSTI());
1481	}
1482
1483	bool isGFX9Plus() const {
1484	return AMDGPU::isGFX9Plus(STI: getSTI());
1485	}
1486
1487	bool isGFX10() const {
1488	return AMDGPU::isGFX10(STI: getSTI());
1489	}
1490
1491	bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(STI: getSTI()); }
1492
1493	bool isGFX11() const {
1494	return AMDGPU::isGFX11(STI: getSTI());
1495	}
1496
1497	bool isGFX11Plus() const {
1498	return AMDGPU::isGFX11Plus(STI: getSTI());
1499	}
1500
1501	bool isGFX12() const { return AMDGPU::isGFX12(STI: getSTI()); }
1502
1503	bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(STI: getSTI()); }
1504
1505	bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(STI: getSTI()); }
1506
1507	bool isGFX10_BEncoding() const {
1508	return AMDGPU::isGFX10_BEncoding(STI: getSTI());
1509	}
1510
1511	bool hasInv2PiInlineImm() const {
1512	return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1513	}
1514
1515	bool hasFlatOffsets() const {
1516	return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1517	}
1518
1519	bool hasArchitectedFlatScratch() const {
1520	return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1521	}
1522
1523	bool hasSGPR102_SGPR103() const {
1524	return !isVI() && !isGFX9();
1525	}
1526
1527	bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1528
1529	bool hasIntClamp() const {
1530	return getFeatureBits()[AMDGPU::FeatureIntClamp];
1531	}
1532
1533	bool hasPartialNSAEncoding() const {
1534	return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1535	}
1536
1537	unsigned getNSAMaxSize(bool HasSampler = false) const {
1538	return AMDGPU::getNSAMaxSize(STI: getSTI(), HasSampler);
1539	}
1540
1541	unsigned getMaxNumUserSGPRs() const {
1542	return AMDGPU::getMaxNumUserSGPRs(STI: getSTI());
1543	}
1544
1545	bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(STI: getSTI()); }
1546
1547	AMDGPUTargetStreamer &getTargetStreamer() {
1548	MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1549	return static_cast<AMDGPUTargetStreamer &>(TS);
1550	}
1551
1552	const MCRegisterInfo getMRI() const* {
1553	// We need this const_cast because for some reason getContext() is not const
1554	// in MCAsmParser.
1555	return const_cast<AMDGPUAsmParser>(this*)->getContext().getRegisterInfo();
1556	}
1557
1558	const MCInstrInfo getMII() const* {
1559	return &MII;
1560	}
1561
1562	const FeatureBitset &getFeatureBits() const {
1563	return getSTI().getFeatureBits();
1564	}
1565
1566	void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1567	void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1568	void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1569
1570	unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1571	bool isForcedVOP3() const { return ForcedEncodingSize == `64`; }
1572	bool isForcedDPP() const { return ForcedDPP; }
1573	bool isForcedSDWA() const { return ForcedSDWA; }
1574	ArrayRef<unsigned> getMatchedVariants() const;
1575	StringRef getMatchedVariantName() const;
1576
1577	std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1578	bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1579	bool RestoreOnFailure);
1580	bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1581	ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1582	SMLoc &EndLoc) override;
1583	unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1584	unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1585	unsigned Kind) override;
1586	bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1587	OperandVector &Operands, MCStreamer &Out,
1588	uint64_t &ErrorInfo,
1589	bool MatchingInlineAsm) override;
1590	bool ParseDirective(AsmToken DirectiveID) override;
1591	ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1592	OperandMode Mode = OperandMode_Default);
1593	StringRef parseMnemonicSuffix(StringRef Name);
1594	bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1595	SMLoc NameLoc, OperandVector &Operands) override;
1596	//bool ProcessInstruction(MCInst &Inst);
1597
1598	ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1599
1600	ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1601
1602	ParseStatus
1603	parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1604	AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1605	std::function<bool(int64_t &)> ConvertResult = nullptr);
1606
1607	ParseStatus parseOperandArrayWithPrefix(
1608	const char *Prefix, OperandVector &Operands,
1609	AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1610	bool (ConvertResult)(int64_t &) = nullptr*);
1611
1612	ParseStatus
1613	parseNamedBit(StringRef Name, OperandVector &Operands,
1614	AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1615	unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1616	ParseStatus parseCPol(OperandVector &Operands);
1617	ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1618	ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1619	ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1620	SMLoc &StringLoc);
1621
1622	bool isModifier();
1623	bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1624	bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1625	bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1626	bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1627	bool parseSP3NegModifier();
1628	ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1629	bool HasLit = false);
1630	ParseStatus parseReg(OperandVector &Operands);
1631	ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1632	bool HasLit = false);
1633	ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1634	bool AllowImm = true);
1635	ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1636	bool AllowImm = true);
1637	ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1638	ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1639	ParseStatus parseVReg32OrOff(OperandVector &Operands);
1640	ParseStatus tryParseIndexKey(OperandVector &Operands,
1641	AMDGPUOperand::ImmTy ImmTy);
1642	ParseStatus parseIndexKey8bit(OperandVector &Operands);
1643	ParseStatus parseIndexKey16bit(OperandVector &Operands);
1644
1645	ParseStatus parseDfmtNfmt(int64_t &Format);
1646	ParseStatus parseUfmt(int64_t &Format);
1647	ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1648	int64_t &Format);
1649	ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1650	int64_t &Format);
1651	ParseStatus parseFORMAT(OperandVector &Operands);
1652	ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1653	ParseStatus parseNumericFormat(int64_t &Format);
1654	ParseStatus parseFlatOffset(OperandVector &Operands);
1655	ParseStatus parseR128A16(OperandVector &Operands);
1656	ParseStatus parseBLGP(OperandVector &Operands);
1657	bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1658	bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1659
1660	void cvtExp(MCInst &Inst, const OperandVector &Operands);
1661
1662	bool parseCnt(int64_t &IntVal);
1663	ParseStatus parseSWaitCnt(OperandVector &Operands);
1664
1665	bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1666	void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1667	ParseStatus parseDepCtr(OperandVector &Operands);
1668
1669	bool parseDelay(int64_t &Delay);
1670	ParseStatus parseSDelayALU(OperandVector &Operands);
1671
1672	ParseStatus parseHwreg(OperandVector &Operands);
1673
1674	private:
1675	struct OperandInfoTy {
1676	SMLoc Loc;
1677	int64_t Val;
1678	bool IsSymbolic = false;
1679	bool IsDefined = false;
1680
1681	OperandInfoTy(int64_t Val) : Val(Val) {}
1682	};
1683
1684	struct StructuredOpField : OperandInfoTy {
1685	StringLiteral Id;
1686	StringLiteral Desc;
1687	unsigned Width;
1688	bool IsDefined = false;
1689
1690	StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1691	int64_t Default)
1692	: OperandInfoTy (Default), Id (Id), Desc (Desc), Width(Width) {}
1693	virtual ~StructuredOpField() = default;
1694
1695	bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1696	Parser.Error(L: Loc, Msg: "invalid " + Desc + ": " + Err);
1697	return false;
1698	}
1699
1700	virtual bool validate(AMDGPUAsmParser &Parser) const {
1701	if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1702	return Error(Parser, Err: "not supported on this GPU");
1703	if (!isUIntN(N: Width, x: Val))
1704	return Error(Parser, Err: "only " + Twine (Width) + "-bit values are legal");
1705	return true;
1706	}
1707	};
1708
1709	ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1710	bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1711
1712	bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1713	bool validateSendMsg(const OperandInfoTy &Msg,
1714	const OperandInfoTy &Op,
1715	const OperandInfoTy &Stream);
1716
1717	ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1718	OperandInfoTy &Width);
1719
1720	SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1721	SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1722	SMLoc getBLGPLoc(const OperandVector &Operands) const;
1723
1724	SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1725	const OperandVector &Operands) const;
1726	SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1727	SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1728	SMLoc getLitLoc(const OperandVector &Operands,
1729	bool SearchMandatoryLiterals = false) const;
1730	SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1731	SMLoc getConstLoc(const OperandVector &Operands) const;
1732	SMLoc getInstLoc(const OperandVector &Operands) const;
1733
1734	bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1735	bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1736	bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1737	bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1738	bool validateSOPLiteral(const MCInst &Inst) const;
1739	bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1740	bool validateVOPDRegBankConstraints(const MCInst &Inst,
1741	const OperandVector &Operands);
1742	bool validateIntClampSupported(const MCInst &Inst);
1743	bool validateMIMGAtomicDMask(const MCInst &Inst);
1744	bool validateMIMGGatherDMask(const MCInst &Inst);
1745	bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1746	bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1747	bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1748	bool validateMIMGD16(const MCInst &Inst);
1749	bool validateMIMGMSAA(const MCInst &Inst);
1750	bool validateOpSel(const MCInst &Inst);
1751	bool validateNeg(const MCInst &Inst, int OpName);
1752	bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1753	bool validateVccOperand(unsigned Reg) const;
1754	bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1755	bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1756	bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1757	bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1758	bool validateAGPRLdSt(const MCInst &Inst) const;
1759	bool validateVGPRAlign(const MCInst &Inst) const;
1760	bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1761	bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1762	bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1763	bool validateDivScale(const MCInst &Inst);
1764	bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1765	bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1766	const SMLoc &IDLoc);
1767	bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1768	const unsigned CPol);
1769	bool validateExeczVcczOperands(const OperandVector &Operands);
1770	bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1771	std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1772	unsigned getConstantBusLimit(unsigned Opcode) const;
1773	bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1774	bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1775	unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1776
1777	bool isSupportedMnemo(StringRef Mnemo,
1778	const FeatureBitset &FBS);
1779	bool isSupportedMnemo(StringRef Mnemo,
1780	const FeatureBitset &FBS,
1781	ArrayRef<unsigned> Variants);
1782	bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1783
1784	bool isId(const StringRef Id) const;
1785	bool isId(const AsmToken &Token, const StringRef Id) const;
1786	bool isToken(const AsmToken::TokenKind Kind) const;
1787	StringRef getId() const;
1788	bool trySkipId(const StringRef Id);
1789	bool trySkipId(const StringRef Pref, const StringRef Id);
1790	bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1791	bool trySkipToken(const AsmToken::TokenKind Kind);
1792	bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1793	bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1794	bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1795
1796	void peekTokens(MutableArrayRef<AsmToken> Tokens);
1797	AsmToken::TokenKind getTokenKind() const;
1798	bool parseExpr(int64_t &Imm, StringRef Expected = "");
1799	bool parseExpr(OperandVector &Operands);
1800	StringRef getTokenStr() const;
1801	AsmToken peekToken(bool ShouldSkipSpace = true);
1802	AsmToken getToken() const;
1803	SMLoc getLoc() const;
1804	void lex();
1805
1806	public:
1807	void onBeginOfFile() override;
1808	bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1809
1810	ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1811
1812	ParseStatus parseExpTgt(OperandVector &Operands);
1813	ParseStatus parseSendMsg(OperandVector &Operands);
1814	ParseStatus parseInterpSlot(OperandVector &Operands);
1815	ParseStatus parseInterpAttr(OperandVector &Operands);
1816	ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1817	ParseStatus parseBoolReg(OperandVector &Operands);
1818
1819	bool parseSwizzleOperand(int64_t &Op,
1820	const unsigned MinVal,
1821	const unsigned MaxVal,
1822	const StringRef ErrMsg,
1823	SMLoc &Loc);
1824	bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1825	const unsigned MinVal,
1826	const unsigned MaxVal,
1827	const StringRef ErrMsg);
1828	ParseStatus parseSwizzle(OperandVector &Operands);
1829	bool parseSwizzleOffset(int64_t &Imm);
1830	bool parseSwizzleMacro(int64_t &Imm);
1831	bool parseSwizzleQuadPerm(int64_t &Imm);
1832	bool parseSwizzleBitmaskPerm(int64_t &Imm);
1833	bool parseSwizzleBroadcast(int64_t &Imm);
1834	bool parseSwizzleSwap(int64_t &Imm);
1835	bool parseSwizzleReverse(int64_t &Imm);
1836
1837	ParseStatus parseGPRIdxMode(OperandVector &Operands);
1838	int64_t parseGPRIdxMacro();
1839
1840	void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: false); }
1841	void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: true); }
1842
1843	ParseStatus parseOModSI(OperandVector &Operands);
1844
1845	void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1846	OptionalImmIndexMap &OptionalIdx);
1847	void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1848	void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1849	void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1850	void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1851
1852	void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1853	void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1854	OptionalImmIndexMap &OptionalIdx);
1855	void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1856	OptionalImmIndexMap &OptionalIdx);
1857
1858	void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1859	void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1860
1861	bool parseDimId(unsigned &Encoding);
1862	ParseStatus parseDim(OperandVector &Operands);
1863	bool convertDppBoundCtrl(int64_t &BoundCtrl);
1864	ParseStatus parseDPP8(OperandVector &Operands);
1865	ParseStatus parseDPPCtrl(OperandVector &Operands);
1866	bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1867	int64_t parseDPPCtrlSel(StringRef Ctrl);
1868	int64_t parseDPPCtrlPerm();
1869	void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1870	void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1871	cvtDPP(Inst, Operands, IsDPP8: true);
1872	}
1873	void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1874	bool IsDPP8 = false);
1875	void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1876	cvtVOP3DPP(Inst, Operands, IsDPP8: true);
1877	}
1878
1879	ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1880	AMDGPUOperand::ImmTy Type);
1881	ParseStatus parseSDWADstUnused(OperandVector &Operands);
1882	void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1883	void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1884	void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1885	void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1886	void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1887	void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1888	uint64_t BasicInstType,
1889	bool SkipDstVcc = false,
1890	bool SkipSrcVcc = false);
1891
1892	ParseStatus parseEndpgm(OperandVector &Operands);
1893
1894	ParseStatus parseVOPD(OperandVector &Operands);
1895	};
1896
1897	} // end anonymous namespace
1898
1899	// May be called with integer type with equivalent bitwidth.
1900	static const fltSemantics getFltSemantics(unsigned* Size) {
1901	switch (Size) {
1902	case `4`:
1903	return &APFloat::IEEEsingle();
1904	case `8`:
1905	return &APFloat::IEEEdouble();
1906	case `2`:
1907	return &APFloat::IEEEhalf();
1908	default:
1909	llvm_unreachable("unsupported fp type");
1910	}
1911	}
1912
1913	static const fltSemantics *getFltSemantics(MVT VT) {
1914	return getFltSemantics(Size: VT.getSizeInBits() / `8`);
1915	}
1916
1917	static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1918	switch (OperandType) {
1919	// When floating-point immediate is used as operand of type i16, the 32-bit
1920	// representation of the constant truncated to the 16 LSBs should be used.
1921	case AMDGPU::OPERAND_REG_IMM_INT16:
1922	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1923	case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1924	case AMDGPU::OPERAND_REG_IMM_INT32:
1925	case AMDGPU::OPERAND_REG_IMM_FP32:
1926	case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1927	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1928	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1929	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1930	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1931	case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1932	case AMDGPU::OPERAND_REG_IMM_V2FP32:
1933	case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1934	case AMDGPU::OPERAND_REG_IMM_V2INT32:
1935	case AMDGPU::OPERAND_REG_IMM_V2INT16:
1936	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1937	case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1938	case AMDGPU::OPERAND_KIMM32:
1939	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1940	return &APFloat::IEEEsingle();
1941	case AMDGPU::OPERAND_REG_IMM_INT64:
1942	case AMDGPU::OPERAND_REG_IMM_FP64:
1943	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1944	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1945	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1946	return &APFloat::IEEEdouble();
1947	case AMDGPU::OPERAND_REG_IMM_FP16:
1948	case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1949	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1950	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1951	case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1952	case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1953	case AMDGPU::OPERAND_REG_IMM_V2FP16:
1954	case AMDGPU::OPERAND_KIMM16:
1955	return &APFloat::IEEEhalf();
1956	case AMDGPU::OPERAND_REG_IMM_BF16:
1957	case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1958	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1959	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1960	case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1961	case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1962	case AMDGPU::OPERAND_REG_IMM_V2BF16:
1963	return &APFloat::BFloat();
1964	default:
1965	llvm_unreachable("unsupported fp type");
1966	}
1967	}
1968
1969	//===----------------------------------------------------------------------===//
1970	// Operand
1971	//===----------------------------------------------------------------------===//
1972
1973	static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1974	bool Lost;
1975
1976	// Convert literal to single precision
1977	APFloat::opStatus Status = FPLiteral.convert(ToSemantics: *getFltSemantics(VT),
1978	RM: APFloat::rmNearestTiesToEven,
1979	losesInfo: &Lost);
1980	// We allow precision lost but not overflow or underflow
1981	if (Status != APFloat::opOK &&
1982	Lost &&
1983	((Status & APFloat::opOverflow) != `0` \|\|
1984	(Status & APFloat::opUnderflow) != `0`)) {
1985	return false;
1986	}
1987
1988	return true;
1989	}
1990
1991	static bool isSafeTruncation(int64_t Val, unsigned Size) {
1992	return isUIntN(N: Size, x: Val) \|\| isIntN(N: Size, x: Val);
1993	}
1994
1995	static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1996	if (VT.getScalarType() == MVT::i16)
1997	return isInlinableLiteral32(Literal: Val, HasInv2Pi);
1998
1999	if (VT.getScalarType() == MVT::f16)
2000	return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi);
2001
2002	assert(VT.getScalarType() == MVT::bf16);
2003
2004	return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi);
2005	}
2006
2007	bool AMDGPUOperand::isInlinableImm(MVT type) const {
2008
2009	// This is a hack to enable named inline values like
2010	// shared_base with both 32-bit and 64-bit operands.
2011	// Note that these values are defined as
2012	// 32-bit operands only.
2013	if (isInlineValue()) {
2014	return true;
2015	}
2016
2017	if (!isImmTy(ImmT: ImmTyNone)) {
2018	// Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2019	return false;
2020	}
2021	// TODO: We should avoid using host float here. It would be better to
2022	// check the float bit values which is what a few other places do.
2023	// We've had bot failures before due to weird NaN support on mips hosts.
2024
2025	APInt Literal(`64`, Imm.Val);
2026
2027	if (Imm.IsFPImm) { // We got fp literal token
2028	if (type == MVT::f64 \|\| type == MVT::i64) { // Expected 64-bit operand
2029	return AMDGPU::isInlinableLiteral64(Literal: Imm.Val,
2030	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2031	}
2032
2033	APFloat FPLiteral(APFloat::IEEEdouble(), APInt (`64`, Imm.Val));
2034	if (!canLosslesslyConvertToFPType(FPLiteral, VT: type))
2035	return false;
2036
2037	if (type.getScalarSizeInBits() == `16`) {
2038	bool Lost = false;
2039	switch (type.getScalarType().SimpleTy) {
2040	default:
2041	llvm_unreachable("unknown 16-bit type");
2042	case MVT::bf16:
2043	FPLiteral.convert(ToSemantics: APFloatBase::BFloat(), RM: APFloat::rmNearestTiesToEven,
2044	losesInfo: &Lost);
2045	break;
2046	case MVT::f16:
2047	FPLiteral.convert(ToSemantics: APFloatBase::IEEEhalf(), RM: APFloat::rmNearestTiesToEven,
2048	losesInfo: &Lost);
2049	break;
2050	case MVT::i16:
2051	FPLiteral.convert(ToSemantics: APFloatBase::IEEEsingle(),
2052	RM: APFloat::rmNearestTiesToEven, losesInfo: &Lost);
2053	break;
2054	}
2055	// We need to use 32-bit representation here because when a floating-point
2056	// inline constant is used as an i16 operand, its 32-bit representation
2057	// representation will be used. We will need the 32-bit value to check if
2058	// it is FP inline constant.
2059	uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2060	return isInlineableLiteralOp16(Val: ImmVal, VT: type,
2061	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2062	}
2063
2064	// Check if single precision literal is inlinable
2065	return AMDGPU::isInlinableLiteral32(
2066	Literal: static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2067	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2068	}
2069
2070	// We got int literal token.
2071	if (type == MVT::f64 \|\| type == MVT::i64) { // Expected 64-bit operand
2072	return AMDGPU::isInlinableLiteral64(Literal: Imm.Val,
2073	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2074	}
2075
2076	if (!isSafeTruncation(Val: Imm.Val, Size: type.getScalarSizeInBits())) {
2077	return false;
2078	}
2079
2080	if (type.getScalarSizeInBits() == `16`) {
2081	return isInlineableLiteralOp16(
2082	Val: static_cast<int16_t>(Literal.getLoBits(numBits: `16`).getSExtValue()),
2083	VT: type, HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2084	}
2085
2086	return AMDGPU::isInlinableLiteral32(
2087	Literal: static_cast<int32_t>(Literal.getLoBits(numBits: `32`).getZExtValue()),
2088	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2089	}
2090
2091	bool AMDGPUOperand::isLiteralImm(MVT type) const {
2092	// Check that this immediate can be added as literal
2093	if (!isImmTy(ImmT: ImmTyNone)) {
2094	return false;
2095	}
2096
2097	if (!Imm.IsFPImm) {
2098	// We got int literal token.
2099
2100	if (type == MVT::f64 && hasFPModifiers()) {
2101	// Cannot apply fp modifiers to int literals preserving the same semantics
2102	// for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2103	// disable these cases.
2104	return false;
2105	}
2106
2107	unsigned Size = type.getSizeInBits();
2108	if (Size == `64`)
2109	Size = `32`;
2110
2111	// FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2112	// types.
2113	return isSafeTruncation(Val: Imm.Val, Size);
2114	}
2115
2116	// We got fp literal token
2117	if (type == MVT::f64) { // Expected 64-bit fp operand
2118	// We would set low 64-bits of literal to zeroes but we accept this literals
2119	return true;
2120	}
2121
2122	if (type == MVT::i64) { // Expected 64-bit int operand
2123	// We don't allow fp literals in 64-bit integer instructions. It is
2124	// unclear how we should encode them.
2125	return false;
2126	}
2127
2128	// We allow fp literals with f16x2 operands assuming that the specified
2129	// literal goes into the lower half and the upper half is zero. We also
2130	// require that the literal may be losslessly converted to f16.
2131	//
2132	// For i16x2 operands, we assume that the specified literal is encoded as a
2133	// single-precision float. This is pretty odd, but it matches SP3 and what
2134	// happens in hardware.
2135	MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2136	: (type == MVT::v2i16) ? MVT::f32
2137	: (type == MVT::v2f32) ? MVT::f32
2138	: type;
2139
2140	APFloat FPLiteral(APFloat::IEEEdouble(), APInt (`64`, Imm.Val));
2141	return canLosslesslyConvertToFPType(FPLiteral, VT: ExpectedType);
2142	}
2143
2144	bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2145	return isRegKind() && AsmParser->getMRI()->getRegClass(i: RCID).contains(Reg: getReg());
2146	}
2147
2148	bool AMDGPUOperand::isVRegWithInputMods() const {
2149	return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) \|\|
2150	// GFX90A allows DPP on 64-bit operands.
2151	(isRegClass(RCID: AMDGPU::VReg_64RegClassID) &&
2152	AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2153	}
2154
2155	template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2156	return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2157	: AMDGPU::VGPR_16_Lo128RegClassID);
2158	}
2159
2160	bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2161	if (AsmParser->isVI())
2162	return isVReg32();
2163	if (AsmParser->isGFX9Plus())
2164	return isRegClass(RCID: AMDGPU::VS_32RegClassID) \|\| isInlinableImm(type);
2165	return false;
2166	}
2167
2168	bool AMDGPUOperand::isSDWAFP16Operand() const {
2169	return isSDWAOperand(type: MVT::f16);
2170	}
2171
2172	bool AMDGPUOperand::isSDWAFP32Operand() const {
2173	return isSDWAOperand(type: MVT::f32);
2174	}
2175
2176	bool AMDGPUOperand::isSDWAInt16Operand() const {
2177	return isSDWAOperand(type: MVT::i16);
2178	}
2179
2180	bool AMDGPUOperand::isSDWAInt32Operand() const {
2181	return isSDWAOperand(type: MVT::i32);
2182	}
2183
2184	bool AMDGPUOperand::isBoolReg() const {
2185	auto FB = AsmParser->getFeatureBits();
2186	return isReg() && ((FB [AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) \|\|
2187	(FB [AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2188	}
2189
2190	uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2191	{
2192	assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2193	assert(Size == `2` \|\| Size == `4` \|\| Size == `8`);
2194
2195	const uint64_t FpSignMask = (`1ULL` << (Size * `8` - `1`));
2196
2197	if (Imm.Mods.Abs) {
2198	Val &= ~FpSignMask;
2199	}
2200	if (Imm.Mods.Neg) {
2201	Val ^= FpSignMask;
2202	}
2203
2204	return Val;
2205	}
2206
2207	void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2208	if (isExpr()) {
2209	Inst.addOperand(Op: MCOperand::createExpr(Val: Expr));
2210	return;
2211	}
2212
2213	if (AMDGPU::isSISrcOperand(Desc: AsmParser->getMII()->get(Opcode: Inst.getOpcode()),
2214	OpNo: Inst.getNumOperands())) {
2215	addLiteralImmOperand(Inst, Val: Imm.Val,
2216	ApplyModifiers: ApplyModifiers &
2217	isImmTy(ImmT: ImmTyNone) && Imm.Mods.hasFPModifiers());
2218	} else {
2219	assert(!isImmTy(ImmTyNone) \|\| !hasModifiers());
2220	Inst.addOperand(Op: MCOperand::createImm(Val: Imm.Val));
2221	setImmKindNone();
2222	}
2223	}
2224
2225	void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2226	const auto& InstDesc = AsmParser->getMII()->get(Opcode: Inst.getOpcode());
2227	auto OpNum = Inst.getNumOperands();
2228	// Check that this operand accepts literals
2229	assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2230
2231	if (ApplyModifiers) {
2232	assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2233	const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(Desc: InstDesc, OpNo: OpNum);
2234	Val = applyInputFPModifiers(Val, Size);
2235	}
2236
2237	APInt Literal(`64`, Val);
2238	uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2239
2240	if (Imm.IsFPImm) { // We got fp literal token
2241	switch (OpTy) {
2242	case AMDGPU::OPERAND_REG_IMM_INT64:
2243	case AMDGPU::OPERAND_REG_IMM_FP64:
2244	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2245	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2246	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2247	if (AMDGPU::isInlinableLiteral64(Literal: Literal.getZExtValue(),
2248	HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2249	Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getZExtValue()));
2250	setImmKindConst();
2251	return;
2252	}
2253
2254	// Non-inlineable
2255	if (AMDGPU::isSISrcFPOperand(Desc: InstDesc, OpNo: OpNum)) { // Expected 64-bit fp operand
2256	// For fp operands we check if low 32 bits are zeros
2257	if (Literal.getLoBits(numBits: `32`) != `0`) {
2258	const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(L: Inst.getLoc(),
2259	Msg: "Can't encode literal as exact 64-bit floating-point operand. "
2260	"Low 32-bits will be set to zero");
2261	Val &= `0xffffffff00000000u`;
2262	}
2263
2264	Inst.addOperand(Op: MCOperand::createImm(Val));
2265	setImmKindLiteral();
2266	return;
2267	}
2268
2269	// We don't allow fp literals in 64-bit integer instructions. It is
2270	// unclear how we should encode them. This case should be checked earlier
2271	// in predicate methods (isLiteralImm())
2272	llvm_unreachable("fp literal in 64-bit integer instruction.");
2273
2274	case AMDGPU::OPERAND_REG_IMM_BF16:
2275	case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
2276	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2277	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2278	case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
2279	case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
2280	case AMDGPU::OPERAND_REG_IMM_V2BF16:
2281	if (AsmParser->hasInv2PiInlineImm() && Literal == `0x3fc45f306725feed`) {
2282	// This is the 1/(2pi) which is going to be truncated to bf16 with the*
2283	// loss of precision. The constant represents ideomatic fp32 value of
2284	// 1/(2pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16*
2285	// bits. Prevent rounding below.
2286	Inst.addOperand(Op: MCOperand::createImm(Val: `0x3e22`));
2287	setImmKindLiteral();
2288	return;
2289	}
2290	[[fallthrough]];
2291
2292	case AMDGPU::OPERAND_REG_IMM_INT32:
2293	case AMDGPU::OPERAND_REG_IMM_FP32:
2294	case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2295	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2296	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2297	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2298	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2299	case AMDGPU::OPERAND_REG_IMM_INT16:
2300	case AMDGPU::OPERAND_REG_IMM_FP16:
2301	case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2302	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2303	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2304	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2305	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2306	case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2307	case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2308	case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2309	case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2310	case AMDGPU::OPERAND_REG_IMM_V2INT16:
2311	case AMDGPU::OPERAND_REG_IMM_V2FP16:
2312	case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2313	case AMDGPU::OPERAND_REG_IMM_V2FP32:
2314	case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2315	case AMDGPU::OPERAND_REG_IMM_V2INT32:
2316	case AMDGPU::OPERAND_KIMM32:
2317	case AMDGPU::OPERAND_KIMM16:
2318	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2319	bool lost;
2320	APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2321	// Convert literal to single precision
2322	FPLiteral.convert(ToSemantics: *getOpFltSemantics(OperandType: OpTy),
2323	RM: APFloat::rmNearestTiesToEven, losesInfo: &lost);
2324	// We allow precision lost but not overflow or underflow. This should be
2325	// checked earlier in isLiteralImm()
2326
2327	uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2328	Inst.addOperand(Op: MCOperand::createImm(Val: ImmVal));
2329	if (OpTy == AMDGPU::OPERAND_KIMM32 \|\| OpTy == AMDGPU::OPERAND_KIMM16) {
2330	setImmKindMandatoryLiteral();
2331	} else {
2332	setImmKindLiteral();
2333	}
2334	return;
2335	}
2336	default:
2337	llvm_unreachable("invalid operand size");
2338	}
2339
2340	return;
2341	}
2342
2343	// We got int literal token.
2344	// Only sign extend inline immediates.
2345	switch (OpTy) {
2346	case AMDGPU::OPERAND_REG_IMM_INT32:
2347	case AMDGPU::OPERAND_REG_IMM_FP32:
2348	case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2349	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2350	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2351	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2352	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2353	case AMDGPU::OPERAND_REG_IMM_V2INT16:
2354	case AMDGPU::OPERAND_REG_IMM_V2BF16:
2355	case AMDGPU::OPERAND_REG_IMM_V2FP16:
2356	case AMDGPU::OPERAND_REG_IMM_V2FP32:
2357	case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2358	case AMDGPU::OPERAND_REG_IMM_V2INT32:
2359	case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2360	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2361	if (isSafeTruncation(Val, Size: `32`) &&
2362	AMDGPU::isInlinableLiteral32(Literal: static_cast<int32_t>(Val),
2363	HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2364	Inst.addOperand(Op: MCOperand::createImm(Val));
2365	setImmKindConst();
2366	return;
2367	}
2368
2369	Inst.addOperand(Op: MCOperand::createImm(Val: Val & `0xffffffff`));
2370	setImmKindLiteral();
2371	return;
2372
2373	case AMDGPU::OPERAND_REG_IMM_INT64:
2374	case AMDGPU::OPERAND_REG_IMM_FP64:
2375	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2376	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2377	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2378	if (AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2379	Inst.addOperand(Op: MCOperand::createImm(Val));
2380	setImmKindConst();
2381	return;
2382	}
2383
2384	Val = AMDGPU::isSISrcFPOperand(Desc: InstDesc, OpNo: OpNum) ? (uint64_t)Val << `32`
2385	: Lo_32(Value: Val);
2386
2387	Inst.addOperand(Op: MCOperand::createImm(Val));
2388	setImmKindLiteral();
2389	return;
2390
2391	case AMDGPU::OPERAND_REG_IMM_INT16:
2392	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2393	case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2394	if (isSafeTruncation(Val, Size: `16`) &&
2395	AMDGPU::isInlinableIntLiteral(Literal: static_cast<int16_t>(Val))) {
2396	Inst.addOperand(Op: MCOperand::createImm(Val: Val & `0xffffffff`));
2397	setImmKindConst();
2398	return;
2399	}
2400
2401	Inst.addOperand(Op: MCOperand::createImm(Val: Val & `0xffff`));
2402	setImmKindLiteral();
2403	return;
2404
2405	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2406	case AMDGPU::OPERAND_REG_IMM_FP16:
2407	case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2408	case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2409	if (isSafeTruncation(Val, Size: `16`) &&
2410	AMDGPU::isInlinableLiteralFP16(Literal: static_cast<int16_t>(Val),
2411	HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2412	Inst.addOperand(Op: MCOperand::createImm(Val));
2413	setImmKindConst();
2414	return;
2415	}
2416
2417	Inst.addOperand(Op: MCOperand::createImm(Val: Val & `0xffff`));
2418	setImmKindLiteral();
2419	return;
2420
2421	case AMDGPU::OPERAND_REG_IMM_BF16:
2422	case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
2423	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2424	case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
2425	if (isSafeTruncation(Val, Size: `16`) &&
2426	AMDGPU::isInlinableLiteralBF16(Literal: static_cast<int16_t>(Val),
2427	HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2428	Inst.addOperand(Op: MCOperand::createImm(Val));
2429	setImmKindConst();
2430	return;
2431	}
2432
2433	Inst.addOperand(Op: MCOperand::createImm(Val: Val & `0xffff`));
2434	setImmKindLiteral();
2435	return;
2436
2437	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2438	case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
2439	assert(isSafeTruncation(Val, `16`));
2440	assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2441	Inst.addOperand(Op: MCOperand::createImm(Val));
2442	return;
2443	}
2444	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2445	case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2446	assert(isSafeTruncation(Val, `16`));
2447	assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2448	AsmParser->hasInv2PiInlineImm()));
2449
2450	Inst.addOperand(Op: MCOperand::createImm(Val));
2451	return;
2452	}
2453
2454	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2455	case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: {
2456	assert(isSafeTruncation(Val, `16`));
2457	assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2458	AsmParser->hasInv2PiInlineImm()));
2459
2460	Inst.addOperand(Op: MCOperand::createImm(Val));
2461	return;
2462	}
2463
2464	case AMDGPU::OPERAND_KIMM32:
2465	Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getLoBits(numBits: `32`).getZExtValue()));
2466	setImmKindMandatoryLiteral();
2467	return;
2468	case AMDGPU::OPERAND_KIMM16:
2469	Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getLoBits(numBits: `16`).getZExtValue()));
2470	setImmKindMandatoryLiteral();
2471	return;
2472	default:
2473	llvm_unreachable("invalid operand size");
2474	}
2475	}
2476
2477	void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2478	Inst.addOperand(Op: MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: getReg(), STI: AsmParser->getSTI())));
2479	}
2480
2481	bool AMDGPUOperand::isInlineValue() const {
2482	return isRegKind() && ::isInlineValue(Reg: getReg());
2483	}
2484
2485	//===----------------------------------------------------------------------===//
2486	// AsmParser
2487	//===----------------------------------------------------------------------===//
2488
2489	void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2490	// TODO: make those pre-defined variables read-only.
2491	// Currently there is none suitable machinery in the core llvm-mc for this.
2492	// MCSymbol::isRedefinable is intended for another purpose, and
2493	// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2494	MCContext &Ctx = getContext();
2495	MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id);
2496	Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx));
2497	}
2498
2499	static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2500	if (Is == IS_VGPR) {
2501	switch (RegWidth) {
2502	default: return -`1`;
2503	case `32`:
2504	return AMDGPU::VGPR_32RegClassID;
2505	case `64`:
2506	return AMDGPU::VReg_64RegClassID;
2507	case `96`:
2508	return AMDGPU::VReg_96RegClassID;
2509	case `128`:
2510	return AMDGPU::VReg_128RegClassID;
2511	case `160`:
2512	return AMDGPU::VReg_160RegClassID;
2513	case `192`:
2514	return AMDGPU::VReg_192RegClassID;
2515	case `224`:
2516	return AMDGPU::VReg_224RegClassID;
2517	case `256`:
2518	return AMDGPU::VReg_256RegClassID;
2519	case `288`:
2520	return AMDGPU::VReg_288RegClassID;
2521	case `320`:
2522	return AMDGPU::VReg_320RegClassID;
2523	case `352`:
2524	return AMDGPU::VReg_352RegClassID;
2525	case `384`:
2526	return AMDGPU::VReg_384RegClassID;
2527	case `512`:
2528	return AMDGPU::VReg_512RegClassID;
2529	case `1024`:
2530	return AMDGPU::VReg_1024RegClassID;
2531	}
2532	} else if (Is == IS_TTMP) {
2533	switch (RegWidth) {
2534	default: return -`1`;
2535	case `32`:
2536	return AMDGPU::TTMP_32RegClassID;
2537	case `64`:
2538	return AMDGPU::TTMP_64RegClassID;
2539	case `128`:
2540	return AMDGPU::TTMP_128RegClassID;
2541	case `256`:
2542	return AMDGPU::TTMP_256RegClassID;
2543	case `512`:
2544	return AMDGPU::TTMP_512RegClassID;
2545	}
2546	} else if (Is == IS_SGPR) {
2547	switch (RegWidth) {
2548	default: return -`1`;
2549	case `32`:
2550	return AMDGPU::SGPR_32RegClassID;
2551	case `64`:
2552	return AMDGPU::SGPR_64RegClassID;
2553	case `96`:
2554	return AMDGPU::SGPR_96RegClassID;
2555	case `128`:
2556	return AMDGPU::SGPR_128RegClassID;
2557	case `160`:
2558	return AMDGPU::SGPR_160RegClassID;
2559	case `192`:
2560	return AMDGPU::SGPR_192RegClassID;
2561	case `224`:
2562	return AMDGPU::SGPR_224RegClassID;
2563	case `256`:
2564	return AMDGPU::SGPR_256RegClassID;
2565	case `288`:
2566	return AMDGPU::SGPR_288RegClassID;
2567	case `320`:
2568	return AMDGPU::SGPR_320RegClassID;
2569	case `352`:
2570	return AMDGPU::SGPR_352RegClassID;
2571	case `384`:
2572	return AMDGPU::SGPR_384RegClassID;
2573	case `512`:
2574	return AMDGPU::SGPR_512RegClassID;
2575	}
2576	} else if (Is == IS_AGPR) {
2577	switch (RegWidth) {
2578	default: return -`1`;
2579	case `32`:
2580	return AMDGPU::AGPR_32RegClassID;
2581	case `64`:
2582	return AMDGPU::AReg_64RegClassID;
2583	case `96`:
2584	return AMDGPU::AReg_96RegClassID;
2585	case `128`:
2586	return AMDGPU::AReg_128RegClassID;
2587	case `160`:
2588	return AMDGPU::AReg_160RegClassID;
2589	case `192`:
2590	return AMDGPU::AReg_192RegClassID;
2591	case `224`:
2592	return AMDGPU::AReg_224RegClassID;
2593	case `256`:
2594	return AMDGPU::AReg_256RegClassID;
2595	case `288`:
2596	return AMDGPU::AReg_288RegClassID;
2597	case `320`:
2598	return AMDGPU::AReg_320RegClassID;
2599	case `352`:
2600	return AMDGPU::AReg_352RegClassID;
2601	case `384`:
2602	return AMDGPU::AReg_384RegClassID;
2603	case `512`:
2604	return AMDGPU::AReg_512RegClassID;
2605	case `1024`:
2606	return AMDGPU::AReg_1024RegClassID;
2607	}
2608	}
2609	return -`1`;
2610	}
2611
2612	static unsigned getSpecialRegForName(StringRef RegName) {
2613	return StringSwitch<unsigned>(RegName)
2614	.Case(S: "exec", Value: AMDGPU::EXEC)
2615	.Case(S: "vcc", Value: AMDGPU::VCC)
2616	.Case(S: "flat_scratch", Value: AMDGPU::FLAT_SCR)
2617	.Case(S: "xnack_mask", Value: AMDGPU::XNACK_MASK)
2618	.Case(S: "shared_base", Value: AMDGPU::SRC_SHARED_BASE)
2619	.Case(S: "src_shared_base", Value: AMDGPU::SRC_SHARED_BASE)
2620	.Case(S: "shared_limit", Value: AMDGPU::SRC_SHARED_LIMIT)
2621	.Case(S: "src_shared_limit", Value: AMDGPU::SRC_SHARED_LIMIT)
2622	.Case(S: "private_base", Value: AMDGPU::SRC_PRIVATE_BASE)
2623	.Case(S: "src_private_base", Value: AMDGPU::SRC_PRIVATE_BASE)
2624	.Case(S: "private_limit", Value: AMDGPU::SRC_PRIVATE_LIMIT)
2625	.Case(S: "src_private_limit", Value: AMDGPU::SRC_PRIVATE_LIMIT)
2626	.Case(S: "pops_exiting_wave_id", Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2627	.Case(S: "src_pops_exiting_wave_id", Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2628	.Case(S: "lds_direct", Value: AMDGPU::LDS_DIRECT)
2629	.Case(S: "src_lds_direct", Value: AMDGPU::LDS_DIRECT)
2630	.Case(S: "m0", Value: AMDGPU::M0)
2631	.Case(S: "vccz", Value: AMDGPU::SRC_VCCZ)
2632	.Case(S: "src_vccz", Value: AMDGPU::SRC_VCCZ)
2633	.Case(S: "execz", Value: AMDGPU::SRC_EXECZ)
2634	.Case(S: "src_execz", Value: AMDGPU::SRC_EXECZ)
2635	.Case(S: "scc", Value: AMDGPU::SRC_SCC)
2636	.Case(S: "src_scc", Value: AMDGPU::SRC_SCC)
2637	.Case(S: "tba", Value: AMDGPU::TBA)
2638	.Case(S: "tma", Value: AMDGPU::TMA)
2639	.Case(S: "flat_scratch_lo", Value: AMDGPU::FLAT_SCR_LO)
2640	.Case(S: "flat_scratch_hi", Value: AMDGPU::FLAT_SCR_HI)
2641	.Case(S: "xnack_mask_lo", Value: AMDGPU::XNACK_MASK_LO)
2642	.Case(S: "xnack_mask_hi", Value: AMDGPU::XNACK_MASK_HI)
2643	.Case(S: "vcc_lo", Value: AMDGPU::VCC_LO)
2644	.Case(S: "vcc_hi", Value: AMDGPU::VCC_HI)
2645	.Case(S: "exec_lo", Value: AMDGPU::EXEC_LO)
2646	.Case(S: "exec_hi", Value: AMDGPU::EXEC_HI)
2647	.Case(S: "tma_lo", Value: AMDGPU::TMA_LO)
2648	.Case(S: "tma_hi", Value: AMDGPU::TMA_HI)
2649	.Case(S: "tba_lo", Value: AMDGPU::TBA_LO)
2650	.Case(S: "tba_hi", Value: AMDGPU::TBA_HI)
2651	.Case(S: "pc", Value: AMDGPU::PC_REG)
2652	.Case(S: "null", Value: AMDGPU::SGPR_NULL)
2653	.Default(Value: AMDGPU::NoRegister);
2654	}
2655
2656	bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2657	SMLoc &EndLoc, bool RestoreOnFailure) {
2658	auto R = parseRegister();
2659	if (!R) return true;
2660	assert(R->isReg());
2661	RegNo = R ->getReg();
2662	StartLoc = R ->getStartLoc();
2663	EndLoc = R ->getEndLoc();
2664	return false;
2665	}
2666
2667	bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2668	SMLoc &EndLoc) {
2669	return ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /RestoreOnFailure=/false);
2670	}
2671
2672	ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2673	SMLoc &EndLoc) {
2674	bool Result = ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /RestoreOnFailure=/true);
2675	bool PendingErrors = getParser().hasPendingError();
2676	getParser().clearPendingErrors();
2677	if (PendingErrors)
2678	return ParseStatus::Failure;
2679	if (Result)
2680	return ParseStatus::NoMatch;
2681	return ParseStatus::Success;
2682	}
2683
2684	bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2685	RegisterKind RegKind, unsigned Reg1,
2686	SMLoc Loc) {
2687	switch (RegKind) {
2688	case IS_SPECIAL:
2689	if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2690	Reg = AMDGPU::EXEC;
2691	RegWidth = `64`;
2692	return true;
2693	}
2694	if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2695	Reg = AMDGPU::FLAT_SCR;
2696	RegWidth = `64`;
2697	return true;
2698	}
2699	if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2700	Reg = AMDGPU::XNACK_MASK;
2701	RegWidth = `64`;
2702	return true;
2703	}
2704	if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2705	Reg = AMDGPU::VCC;
2706	RegWidth = `64`;
2707	return true;
2708	}
2709	if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2710	Reg = AMDGPU::TBA;
2711	RegWidth = `64`;
2712	return true;
2713	}
2714	if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2715	Reg = AMDGPU::TMA;
2716	RegWidth = `64`;
2717	return true;
2718	}
2719	Error(L: Loc, Msg: "register does not fit in the list");
2720	return false;
2721	case IS_VGPR:
2722	case IS_SGPR:
2723	case IS_AGPR:
2724	case IS_TTMP:
2725	if (Reg1 != Reg + RegWidth / `32`) {
2726	Error(L: Loc, Msg: "registers in a list must have consecutive indices");
2727	return false;
2728	}
2729	RegWidth += `32`;
2730	return true;
2731	default:
2732	llvm_unreachable("unexpected register kind");
2733	}
2734	}
2735
2736	struct RegInfo {
2737	StringLiteral Name;
2738	RegisterKind Kind;
2739	};
2740
2741	static constexpr RegInfo RegularRegisters[] = {
2742	{.Name: {"v"}, .Kind: IS_VGPR},
2743	{.Name: {"s"}, .Kind: IS_SGPR},
2744	{.Name: {"ttmp"}, .Kind: IS_TTMP},
2745	{.Name: {"acc"}, .Kind: IS_AGPR},
2746	{.Name: {"a"}, .Kind: IS_AGPR},
2747	};
2748
2749	static bool isRegularReg(RegisterKind Kind) {
2750	return Kind == IS_VGPR \|\|
2751	Kind == IS_SGPR \|\|
2752	Kind == IS_TTMP \|\|
2753	Kind == IS_AGPR;
2754	}
2755
2756	static const RegInfo* getRegularRegInfo(StringRef Str) {
2757	for (const RegInfo &Reg : RegularRegisters)
2758	if (Str.starts_with(Prefix: Reg.Name))
2759	return &Reg;
2760	return nullptr;
2761	}
2762
2763	static bool getRegNum(StringRef Str, unsigned& Num) {
2764	return !Str.getAsInteger(Radix: `10`, Result&: Num);
2765	}
2766
2767	bool
2768	AMDGPUAsmParser::isRegister(const AsmToken &Token,
2769	const AsmToken &NextToken) const {
2770
2771	// A list of consecutive registers: [s0,s1,s2,s3]
2772	if (Token.is(K: AsmToken::LBrac))
2773	return true;
2774
2775	if (!Token.is(K: AsmToken::Identifier))
2776	return false;
2777
2778	// A single register like s0 or a range of registers like s[0:1]
2779
2780	StringRef Str = Token.getString();
2781	const RegInfo *Reg = getRegularRegInfo(Str);
2782	if (Reg) {
2783	StringRef RegName = Reg->Name;
2784	StringRef RegSuffix = Str.substr(Start: RegName.size());
2785	if (!RegSuffix.empty()) {
2786	RegSuffix.consume_back(Suffix: ".l");
2787	RegSuffix.consume_back(Suffix: ".h");
2788	unsigned Num;
2789	// A single register with an index: rXX
2790	if (getRegNum(Str: RegSuffix, Num))
2791	return true;
2792	} else {
2793	// A range of registers: r[XX:YY].
2794	if (NextToken.is(K: AsmToken::LBrac))
2795	return true;
2796	}
2797	}
2798
2799	return getSpecialRegForName(RegName: Str) != AMDGPU::NoRegister;
2800	}
2801
2802	bool
2803	AMDGPUAsmParser::isRegister()
2804	{
2805	return isRegister(Token: getToken(), NextToken: peekToken());
2806	}
2807
2808	unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2809	unsigned SubReg, unsigned RegWidth,
2810	SMLoc Loc) {
2811	assert(isRegularReg(RegKind));
2812
2813	unsigned AlignSize = `1`;
2814	if (RegKind == IS_SGPR \|\| RegKind == IS_TTMP) {
2815	// SGPR and TTMP registers must be aligned.
2816	// Max required alignment is 4 dwords.
2817	AlignSize = std::min(a: llvm::bit_ceil(Value: RegWidth / `32`), b: `4u`);
2818	}
2819
2820	if (RegNum % AlignSize != `0`) {
2821	Error(L: Loc, Msg: "invalid register alignment");
2822	return AMDGPU::NoRegister;
2823	}
2824
2825	unsigned RegIdx = RegNum / AlignSize;
2826	int RCID = getRegClass(Is: RegKind, RegWidth);
2827	if (RCID == -`1`) {
2828	Error(L: Loc, Msg: "invalid or unsupported register size");
2829	return AMDGPU::NoRegister;
2830	}
2831
2832	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2833	const MCRegisterClass RC = TRI->getRegClass(i: RCID);
2834	if (RegIdx >= RC.getNumRegs()) {
2835	Error(L: Loc, Msg: "register index is out of range");
2836	return AMDGPU::NoRegister;
2837	}
2838
2839	unsigned Reg = RC.getRegister(i: RegIdx);
2840
2841	if (SubReg) {
2842	Reg = TRI->getSubReg(Reg, Idx: SubReg);
2843
2844	// Currently all regular registers have their .l and .h subregisters, so
2845	// we should never need to generate an error here.
2846	assert(Reg && "Invalid subregister!");
2847	}
2848
2849	return Reg;
2850	}
2851
2852	bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2853	int64_t RegLo, RegHi;
2854	if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "missing register index"))
2855	return false;
2856
2857	SMLoc FirstIdxLoc = getLoc();
2858	SMLoc SecondIdxLoc;
2859
2860	if (!parseExpr(Imm&: RegLo))
2861	return false;
2862
2863	if (trySkipToken(Kind: AsmToken::Colon)) {
2864	SecondIdxLoc = getLoc();
2865	if (!parseExpr(Imm&: RegHi))
2866	return false;
2867	} else {
2868	RegHi = RegLo;
2869	}
2870
2871	if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
2872	return false;
2873
2874	if (!isUInt<`32`>(x: RegLo)) {
2875	Error(L: FirstIdxLoc, Msg: "invalid register index");
2876	return false;
2877	}
2878
2879	if (!isUInt<`32`>(x: RegHi)) {
2880	Error(L: SecondIdxLoc, Msg: "invalid register index");
2881	return false;
2882	}
2883
2884	if (RegLo > RegHi) {
2885	Error(L: FirstIdxLoc, Msg: "first register index should not exceed second index");
2886	return false;
2887	}
2888
2889	Num = static_cast<unsigned>(RegLo);
2890	RegWidth = `32` * ((RegHi - RegLo) + `1`);
2891	return true;
2892	}
2893
2894	unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2895	unsigned &RegNum, unsigned &RegWidth,
2896	SmallVectorImpl<AsmToken> &Tokens) {
2897	assert(isToken(AsmToken::Identifier));
2898	unsigned Reg = getSpecialRegForName(RegName: getTokenStr());
2899	if (Reg) {
2900	RegNum = `0`;
2901	RegWidth = `32`;
2902	RegKind = IS_SPECIAL;
2903	Tokens.push_back(Elt: getToken());
2904	lex(); // skip register name
2905	}
2906	return Reg;
2907	}
2908
2909	unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2910	unsigned &RegNum, unsigned &RegWidth,
2911	SmallVectorImpl<AsmToken> &Tokens) {
2912	assert(isToken(AsmToken::Identifier));
2913	StringRef RegName = getTokenStr();
2914	auto Loc = getLoc();
2915
2916	const RegInfo *RI = getRegularRegInfo(Str: RegName);
2917	if (!RI) {
2918	Error(L: Loc, Msg: "invalid register name");
2919	return AMDGPU::NoRegister;
2920	}
2921
2922	Tokens.push_back(Elt: getToken());
2923	lex(); // skip register name
2924
2925	RegKind = RI->Kind;
2926	StringRef RegSuffix = RegName.substr(Start: RI->Name.size());
2927	unsigned SubReg = NoSubRegister;
2928	if (!RegSuffix.empty()) {
2929	// We don't know the opcode till we are done parsing, so we don't know if
2930	// registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2931	// .h to correctly specify 16 bit registers. We also can't determine class
2932	// VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2933	if (RegSuffix.consume_back(Suffix: ".l"))
2934	SubReg = AMDGPU::lo16;
2935	else if (RegSuffix.consume_back(Suffix: ".h"))
2936	SubReg = AMDGPU::hi16;
2937
2938	// Single 32-bit register: vXX.
2939	if (!getRegNum(Str: RegSuffix, Num&: RegNum)) {
2940	Error(L: Loc, Msg: "invalid register index");
2941	return AMDGPU::NoRegister;
2942	}
2943	RegWidth = `32`;
2944	} else {
2945	// Range of registers: v[XX:YY]. ":YY" is optional.
2946	if (!ParseRegRange(Num&: RegNum, RegWidth))
2947	return AMDGPU::NoRegister;
2948	}
2949
2950	return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2951	}
2952
2953	unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2954	unsigned &RegWidth,
2955	SmallVectorImpl<AsmToken> &Tokens) {
2956	unsigned Reg = AMDGPU::NoRegister;
2957	auto ListLoc = getLoc();
2958
2959	if (!skipToken(Kind: AsmToken::LBrac,
2960	ErrMsg: "expected a register or a list of registers")) {
2961	return AMDGPU::NoRegister;
2962	}
2963
2964	// List of consecutive registers, e.g.: [s0,s1,s2,s3]
2965
2966	auto Loc = getLoc();
2967	if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2968	return AMDGPU::NoRegister;
2969	if (RegWidth != `32`) {
2970	Error(L: Loc, Msg: "expected a single 32-bit register");
2971	return AMDGPU::NoRegister;
2972	}
2973
2974	for (; trySkipToken(Kind: AsmToken::Comma); ) {
2975	RegisterKind NextRegKind;
2976	unsigned NextReg, NextRegNum, NextRegWidth;
2977	Loc = getLoc();
2978
2979	if (!ParseAMDGPURegister(RegKind&: NextRegKind, Reg&: NextReg,
2980	RegNum&: NextRegNum, RegWidth&: NextRegWidth,
2981	Tokens)) {
2982	return AMDGPU::NoRegister;
2983	}
2984	if (NextRegWidth != `32`) {
2985	Error(L: Loc, Msg: "expected a single 32-bit register");
2986	return AMDGPU::NoRegister;
2987	}
2988	if (NextRegKind != RegKind) {
2989	Error(L: Loc, Msg: "registers in a list must be of the same kind");
2990	return AMDGPU::NoRegister;
2991	}
2992	if (!AddNextRegisterToList(Reg, RegWidth, RegKind, Reg1: NextReg, Loc))
2993	return AMDGPU::NoRegister;
2994	}
2995
2996	if (!skipToken(Kind: AsmToken::RBrac,
2997	ErrMsg: "expected a comma or a closing square bracket")) {
2998	return AMDGPU::NoRegister;
2999	}
3000
3001	if (isRegularReg(Kind: RegKind))
3002	Reg = getRegularReg(RegKind, RegNum, SubReg: NoSubRegister, RegWidth, Loc: ListLoc);
3003
3004	return Reg;
3005	}
3006
3007	bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3008	unsigned &RegNum, unsigned &RegWidth,
3009	SmallVectorImpl<AsmToken> &Tokens) {
3010	auto Loc = getLoc();
3011	Reg = AMDGPU::NoRegister;
3012
3013	if (isToken(Kind: AsmToken::Identifier)) {
3014	Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3015	if (Reg == AMDGPU::NoRegister)
3016	Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3017	} else {
3018	Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3019	}
3020
3021	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3022	if (Reg == AMDGPU::NoRegister) {
3023	assert(Parser.hasPendingError());
3024	return false;
3025	}
3026
3027	if (!subtargetHasRegister(MRI: *TRI, RegNo: Reg)) {
3028	if (Reg == AMDGPU::SGPR_NULL) {
3029	Error(L: Loc, Msg: "'null' operand is not supported on this GPU");
3030	} else {
3031	Error(L: Loc, Msg: "register not available on this GPU");
3032	}
3033	return false;
3034	}
3035
3036	return true;
3037	}
3038
3039	bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
3040	unsigned &RegNum, unsigned &RegWidth,
3041	bool RestoreOnFailure /=false/) {
3042	Reg = AMDGPU::NoRegister;
3043
3044	SmallVector<AsmToken, `1`> Tokens;
3045	if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3046	if (RestoreOnFailure) {
3047	while (!Tokens.empty()) {
3048	getLexer().UnLex(Token: Tokens.pop_back_val());
3049	}
3050	}
3051	return true;
3052	}
3053	return false;
3054	}
3055
3056	std::optional<StringRef>
3057	AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3058	switch (RegKind) {
3059	case IS_VGPR:
3060	return StringRef (".amdgcn.next_free_vgpr");
3061	case IS_SGPR:
3062	return StringRef (".amdgcn.next_free_sgpr");
3063	default:
3064	return std::nullopt;
3065	}
3066	}
3067
3068	void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3069	auto SymbolName = getGprCountSymbolName(RegKind);
3070	assert(SymbolName && "initializing invalid register kind");
3071	MCSymbol Sym = getContext().getOrCreateSymbol(Name: SymbolName);
3072	Sym->setVariableValue(MCConstantExpr::create(Value: `0`, Ctx&: getContext()));
3073	}
3074
3075	bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3076	unsigned DwordRegIndex,
3077	unsigned RegWidth) {
3078	// Symbols are only defined for GCN targets
3079	if (AMDGPU::getIsaVersion(GPU: getSTI().getCPU()).Major < `6`)
3080	return true;
3081
3082	auto SymbolName = getGprCountSymbolName(RegKind);
3083	if (!SymbolName)
3084	return true;
3085	MCSymbol Sym = getContext().getOrCreateSymbol(Name: SymbolName);
3086
3087	int64_t NewMax = DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: `32`) - `1`;
3088	int64_t OldCount;
3089
3090	if (!Sym->isVariable())
3091	return !Error(L: getLoc(),
3092	Msg: ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3093	if (!Sym->getVariableValue(SetUsed: false)->evaluateAsAbsolute(Res&: OldCount))
3094	return !Error(
3095	L: getLoc(),
3096	Msg: ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3097
3098	if (OldCount <= NewMax)
3099	Sym->setVariableValue(MCConstantExpr::create(Value: NewMax + `1`, Ctx&: getContext()));
3100
3101	return true;
3102	}
3103
3104	std::unique_ptr<AMDGPUOperand>
3105	AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3106	const auto &Tok = getToken();
3107	SMLoc StartLoc = Tok.getLoc();
3108	SMLoc EndLoc = Tok.getEndLoc();
3109	RegisterKind RegKind;
3110	unsigned Reg, RegNum, RegWidth;
3111
3112	if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3113	return nullptr;
3114	}
3115	if (isHsaAbi(STI: getSTI())) {
3116	if (!updateGprCountSymbols(RegKind, DwordRegIndex: RegNum, RegWidth))
3117	return nullptr;
3118	} else
3119	KernelScope.usesRegister(RegKind, DwordRegIndex: RegNum, RegWidth);
3120	return AMDGPUOperand::CreateReg(AsmParser: this, RegNo: Reg, S: StartLoc, E: EndLoc);
3121	}
3122
3123	ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3124	bool HasSP3AbsModifier, bool HasLit) {
3125	// TODO: add syntactic sugar for 1/(2PI)*
3126
3127	if (isRegister())
3128	return ParseStatus::NoMatch;
3129	assert(!isModifier());
3130
3131	if (!HasLit) {
3132	HasLit = trySkipId(Id: "lit");
3133	if (HasLit) {
3134	if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit"))
3135	return ParseStatus::Failure;
3136	ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3137	if (S.isSuccess() &&
3138	!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3139	return ParseStatus::Failure;
3140	return S;
3141	}
3142	}
3143
3144	const auto& Tok = getToken();
3145	const auto& NextTok = peekToken();
3146	bool IsReal = Tok.is(K: AsmToken::Real);
3147	SMLoc S = getLoc();
3148	bool Negate = false;
3149
3150	if (!IsReal && Tok.is(K: AsmToken::Minus) && NextTok.is(K: AsmToken::Real)) {
3151	lex();
3152	IsReal = true;
3153	Negate = true;
3154	}
3155
3156	AMDGPUOperand::Modifiers Mods;
3157	Mods.Lit = HasLit;
3158
3159	if (IsReal) {
3160	// Floating-point expressions are not supported.
3161	// Can only allow floating-point literals with an
3162	// optional sign.
3163
3164	StringRef Num = getTokenStr();
3165	lex();
3166
3167	APFloat RealVal(APFloat::IEEEdouble());
3168	auto roundMode = APFloat::rmNearestTiesToEven;
3169	if (errorToBool(Err: RealVal.convertFromString(Num, roundMode).takeError()))
3170	return ParseStatus::Failure;
3171	if (Negate)
3172	RealVal.changeSign();
3173
3174	Operands.push_back(
3175	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: RealVal.bitcastToAPInt().getZExtValue(), Loc: S,
3176	Type: AMDGPUOperand::ImmTyNone, IsFPImm: true));
3177	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3178	Op.setModifiers(Mods);
3179
3180	return ParseStatus::Success;
3181
3182	} else {
3183	int64_t IntVal;
3184	const MCExpr *Expr;
3185	SMLoc S = getLoc();
3186
3187	if (HasSP3AbsModifier) {
3188	// This is a workaround for handling expressions
3189	// as arguments of SP3 'abs' modifier, for example:
3190	// \|1.0\|
3191	// \|-1\|
3192	// \|1+x\|
3193	// This syntax is not compatible with syntax of standard
3194	// MC expressions (due to the trailing '\|').
3195	SMLoc EndLoc;
3196	if (getParser().parsePrimaryExpr(Res&: Expr, EndLoc, TypeInfo: nullptr))
3197	return ParseStatus::Failure;
3198	} else {
3199	if (Parser.parseExpression(Res&: Expr))
3200	return ParseStatus::Failure;
3201	}
3202
3203	if (Expr->evaluateAsAbsolute(Res&: IntVal)) {
3204	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S));
3205	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3206	Op.setModifiers(Mods);
3207	} else {
3208	if (HasLit)
3209	return ParseStatus::NoMatch;
3210	Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S));
3211	}
3212
3213	return ParseStatus::Success;
3214	}
3215
3216	return ParseStatus::NoMatch;
3217	}
3218
3219	ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3220	if (!isRegister())
3221	return ParseStatus::NoMatch;
3222
3223	if (auto R = parseRegister()) {
3224	assert(R->isReg());
3225	Operands.push_back(Elt: std::move(R));
3226	return ParseStatus::Success;
3227	}
3228	return ParseStatus::Failure;
3229	}
3230
3231	ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3232	bool HasSP3AbsMod, bool HasLit) {
3233	ParseStatus Res = parseReg(Operands);
3234	if (!Res.isNoMatch())
3235	return Res;
3236	if (isModifier())
3237	return ParseStatus::NoMatch;
3238	return parseImm(Operands, HasSP3AbsModifier: HasSP3AbsMod, HasLit);
3239	}
3240
3241	bool
3242	AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3243	if (Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::LParen)) {
3244	const auto &str = Token.getString();
3245	return str == "abs" \|\| str == "neg" \|\| str == "sext";
3246	}
3247	return false;
3248	}
3249
3250	bool
3251	AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3252	return Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::Colon);
3253	}
3254
3255	bool
3256	AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3257	return isNamedOperandModifier(Token, NextToken) \|\| Token.is(K: AsmToken::Pipe);
3258	}
3259
3260	bool
3261	AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3262	return isRegister(Token, NextToken) \|\| isOperandModifier(Token, NextToken);
3263	}
3264
3265	// Check if this is an operand modifier or an opcode modifier
3266	// which may look like an expression but it is not. We should
3267	// avoid parsing these modifiers as expressions. Currently
3268	// recognized sequences are:
3269	// \|...\|
3270	// abs(...)
3271	// neg(...)
3272	// sext(...)
3273	// -reg
3274	// -\|...\|
3275	// -abs(...)
3276	// name:...
3277	//
3278	bool
3279	AMDGPUAsmParser::isModifier() {
3280
3281	AsmToken Tok = getToken();
3282	AsmToken NextToken[`2`];
3283	peekTokens(Tokens: NextToken);
3284
3285	return isOperandModifier(Token: Tok, NextToken: NextToken[`0`]) \|\|
3286	(Tok.is(K: AsmToken::Minus) && isRegOrOperandModifier(Token: NextToken[`0`], NextToken: NextToken[`1`])) \|\|
3287	isOpcodeModifierWithVal(Token: Tok, NextToken: NextToken[`0`]);
3288	}
3289
3290	// Check if the current token is an SP3 'neg' modifier.
3291	// Currently this modifier is allowed in the following context:
3292	//
3293	// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3294	// 2. Before an 'abs' modifier: -abs(...)
3295	// 3. Before an SP3 'abs' modifier: -\|...\|
3296	//
3297	// In all other cases "-" is handled as a part
3298	// of an expression that follows the sign.
3299	//
3300	// Note: When "-" is followed by an integer literal,
3301	// this is interpreted as integer negation rather
3302	// than a floating-point NEG modifier applied to N.
3303	// Beside being contr-intuitive, such use of floating-point
3304	// NEG modifier would have resulted in different meaning
3305	// of integer literals used with VOP1/2/C and VOP3,
3306	// for example:
3307	// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3308	// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3309	// Negative fp literals with preceding "-" are
3310	// handled likewise for uniformity
3311	//
3312	bool
3313	AMDGPUAsmParser::parseSP3NegModifier() {
3314
3315	AsmToken NextToken[`2`];
3316	peekTokens(Tokens: NextToken);
3317
3318	if (isToken(Kind: AsmToken::Minus) &&
3319	(isRegister(Token: NextToken[`0`], NextToken: NextToken[`1`]) \|\|
3320	NextToken[`0`].is(K: AsmToken::Pipe) \|\|
3321	isId(Token: NextToken[`0`], Id: "abs"))) {
3322	lex();
3323	return true;
3324	}
3325
3326	return false;
3327	}
3328
3329	ParseStatus
3330	AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3331	bool AllowImm) {
3332	bool Neg, SP3Neg;
3333	bool Abs, SP3Abs;
3334	bool Lit;
3335	SMLoc Loc;
3336
3337	// Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3338	if (isToken(Kind: AsmToken::Minus) && peekToken().is(K: AsmToken::Minus))
3339	return Error(L: getLoc(), Msg: "invalid syntax, expected 'neg' modifier");
3340
3341	SP3Neg = parseSP3NegModifier();
3342
3343	Loc = getLoc();
3344	Neg = trySkipId(Id: "neg");
3345	if (Neg && SP3Neg)
3346	return Error(L: Loc, Msg: "expected register or immediate");
3347	if (Neg && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after neg"))
3348	return ParseStatus::Failure;
3349
3350	Abs = trySkipId(Id: "abs");
3351	if (Abs && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after abs"))
3352	return ParseStatus::Failure;
3353
3354	Lit = trySkipId(Id: "lit");
3355	if (Lit && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit"))
3356	return ParseStatus::Failure;
3357
3358	Loc = getLoc();
3359	SP3Abs = trySkipToken(Kind: AsmToken::Pipe);
3360	if (Abs && SP3Abs)
3361	return Error(L: Loc, Msg: "expected register or immediate");
3362
3363	ParseStatus Res;
3364	if (AllowImm) {
3365	Res = parseRegOrImm(Operands, HasSP3AbsMod: SP3Abs, HasLit: Lit);
3366	} else {
3367	Res = parseReg(Operands);
3368	}
3369	if (!Res.isSuccess())
3370	return (SP3Neg \|\| Neg \|\| SP3Abs \|\| Abs \|\| Lit) ? ParseStatus::Failure : Res;
3371
3372	if (Lit && !Operands.back()->isImm())
3373	Error(L: Loc, Msg: "expected immediate with lit modifier");
3374
3375	if (SP3Abs && !skipToken(Kind: AsmToken::Pipe, ErrMsg: "expected vertical bar"))
3376	return ParseStatus::Failure;
3377	if (Abs && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3378	return ParseStatus::Failure;
3379	if (Neg && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3380	return ParseStatus::Failure;
3381	if (Lit && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3382	return ParseStatus::Failure;
3383
3384	AMDGPUOperand::Modifiers Mods;
3385	Mods.Abs = Abs \|\| SP3Abs;
3386	Mods.Neg = Neg \|\| SP3Neg;
3387	Mods.Lit = Lit;
3388
3389	if (Mods.hasFPModifiers() \|\| Lit) {
3390	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3391	if (Op.isExpr())
3392	return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression");
3393	Op.setModifiers(Mods);
3394	}
3395	return ParseStatus::Success;
3396	}
3397
3398	ParseStatus
3399	AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3400	bool AllowImm) {
3401	bool Sext = trySkipId(Id: "sext");
3402	if (Sext && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after sext"))
3403	return ParseStatus::Failure;
3404
3405	ParseStatus Res;
3406	if (AllowImm) {
3407	Res = parseRegOrImm(Operands);
3408	} else {
3409	Res = parseReg(Operands);
3410	}
3411	if (!Res.isSuccess())
3412	return Sext ? ParseStatus::Failure : Res;
3413
3414	if (Sext && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3415	return ParseStatus::Failure;
3416
3417	AMDGPUOperand::Modifiers Mods;
3418	Mods.Sext = Sext;
3419
3420	if (Mods.hasIntModifiers()) {
3421	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3422	if (Op.isExpr())
3423	return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression");
3424	Op.setModifiers(Mods);
3425	}
3426
3427	return ParseStatus::Success;
3428	}
3429
3430	ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3431	return parseRegOrImmWithFPInputMods(Operands, AllowImm: false);
3432	}
3433
3434	ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3435	return parseRegOrImmWithIntInputMods(Operands, AllowImm: false);
3436	}
3437
3438	ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3439	auto Loc = getLoc();
3440	if (trySkipId(Id: "off")) {
3441	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: `0`, Loc,
3442	Type: AMDGPUOperand::ImmTyOff, IsFPImm: false));
3443	return ParseStatus::Success;
3444	}
3445
3446	if (!isRegister())
3447	return ParseStatus::NoMatch;
3448
3449	std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3450	if (Reg) {
3451	Operands.push_back(Elt: std::move(Reg));
3452	return ParseStatus::Success;
3453	}
3454
3455	return ParseStatus::Failure;
3456	}
3457
3458	unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3459	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
3460
3461	if ((getForcedEncodingSize() == `32` && (TSFlags & SIInstrFlags::VOP3)) \|\|
3462	(getForcedEncodingSize() == `64` && !(TSFlags & SIInstrFlags::VOP3)) \|\|
3463	(isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) \|\|
3464	(isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3465	return Match_InvalidOperand;
3466
3467	if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi \|\|
3468	Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3469	// v_mac_f32/16 allow only dst_sel == DWORD;
3470	auto OpNum =
3471	AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::dst_sel);
3472	const auto &Op = Inst.getOperand(i: OpNum);
3473	if (!Op.isImm() \|\| Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3474	return Match_InvalidOperand;
3475	}
3476	}
3477
3478	return Match_Success;
3479	}
3480
3481	static ArrayRef<unsigned> getAllVariants() {
3482	static const unsigned Variants[] = {
3483	AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3484	AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3485	AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3486	};
3487
3488	return ArrayRef(Variants);
3489	}
3490
3491	// What asm variants we should check
3492	ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3493	if (isForcedDPP() && isForcedVOP3()) {
3494	static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3495	return ArrayRef(Variants);
3496	}
3497	if (getForcedEncodingSize() == `32`) {
3498	static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3499	return ArrayRef(Variants);
3500	}
3501
3502	if (isForcedVOP3()) {
3503	static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3504	return ArrayRef(Variants);
3505	}
3506
3507	if (isForcedSDWA()) {
3508	static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3509	AMDGPUAsmVariants::SDWA9};
3510	return ArrayRef(Variants);
3511	}
3512
3513	if (isForcedDPP()) {
3514	static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3515	return ArrayRef(Variants);
3516	}
3517
3518	return getAllVariants();
3519	}
3520
3521	StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3522	if (isForcedDPP() && isForcedVOP3())
3523	return "e64_dpp";
3524
3525	if (getForcedEncodingSize() == `32`)
3526	return "e32";
3527
3528	if (isForcedVOP3())
3529	return "e64";
3530
3531	if (isForcedSDWA())
3532	return "sdwa";
3533
3534	if (isForcedDPP())
3535	return "dpp";
3536
3537	return "";
3538	}
3539
3540	unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3541	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
3542	for (MCPhysReg Reg : Desc.implicit_uses()) {
3543	switch (Reg) {
3544	case AMDGPU::FLAT_SCR:
3545	case AMDGPU::VCC:
3546	case AMDGPU::VCC_LO:
3547	case AMDGPU::VCC_HI:
3548	case AMDGPU::M0:
3549	return Reg;
3550	default:
3551	break;
3552	}
3553	}
3554	return AMDGPU::NoRegister;
3555	}
3556
3557	// NB: This code is correct only when used to check constant
3558	// bus limitations because GFX7 support no f16 inline constants.
3559	// Note that there are no cases when a GFX7 opcode violates
3560	// constant bus limitations due to the use of an f16 constant.
3561	bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3562	unsigned OpIdx) const {
3563	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
3564
3565	if (!AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx) \|\|
3566	AMDGPU::isKImmOperand(Desc, OpNo: OpIdx)) {
3567	return false;
3568	}
3569
3570	const MCOperand &MO = Inst.getOperand(i: OpIdx);
3571
3572	int64_t Val = MO.getImm();
3573	auto OpSize = AMDGPU::getOperandSize(Desc, OpNo: OpIdx);
3574
3575	switch (OpSize) { // expected operand size
3576	case `8`:
3577	return AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3578	case `4`:
3579	return AMDGPU::isInlinableLiteral32(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3580	case `2`: {
3581	const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3582	if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 \|\|
3583	OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 \|\|
3584	OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3585	return AMDGPU::isInlinableLiteralI16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3586
3587	if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 \|\|
3588	OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 \|\|
3589	OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3590	return AMDGPU::isInlinableLiteralV2I16(Literal: Val);
3591
3592	if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 \|\|
3593	OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 \|\|
3594	OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3595	return AMDGPU::isInlinableLiteralV2F16(Literal: Val);
3596
3597	if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 \|\|
3598	OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2BF16 \|\|
3599	OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3600	return AMDGPU::isInlinableLiteralV2BF16(Literal: Val);
3601
3602	if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 \|\|
3603	OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 \|\|
3604	OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 \|\|
3605	OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
3606	return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3607
3608	if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 \|\|
3609	OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 \|\|
3610	OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 \|\|
3611	OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED)
3612	return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3613
3614	llvm_unreachable("invalid operand type");
3615	}
3616	default:
3617	llvm_unreachable("invalid operand size");
3618	}
3619	}
3620
3621	unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3622	if (!isGFX10Plus())
3623	return `1`;
3624
3625	switch (Opcode) {
3626	// 64-bit shift instructions can use only one scalar value input
3627	case AMDGPU::V_LSHLREV_B64_e64:
3628	case AMDGPU::V_LSHLREV_B64_gfx10:
3629	case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3630	case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3631	case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3632	case AMDGPU::V_LSHRREV_B64_e64:
3633	case AMDGPU::V_LSHRREV_B64_gfx10:
3634	case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3635	case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3636	case AMDGPU::V_ASHRREV_I64_e64:
3637	case AMDGPU::V_ASHRREV_I64_gfx10:
3638	case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3639	case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3640	case AMDGPU::V_LSHL_B64_e64:
3641	case AMDGPU::V_LSHR_B64_e64:
3642	case AMDGPU::V_ASHR_I64_e64:
3643	return `1`;
3644	default:
3645	return `2`;
3646	}
3647	}
3648
3649	constexpr unsigned MAX_SRC_OPERANDS_NUM = `6`;
3650	using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3651
3652	// Get regular operand indices in the same order as specified
3653	// in the instruction (but append mandatory literals to the end).
3654	static OperandIndices getSrcOperandIndices(unsigned Opcode,
3655	bool AddMandatoryLiterals = false) {
3656
3657	int16_t ImmIdx =
3658	AddMandatoryLiterals ? getNamedOperandIdx(Opcode, NamedIdx: OpName::imm) : -`1`;
3659
3660	if (isVOPD(Opc: Opcode)) {
3661	int16_t ImmDeferredIdx =
3662	AddMandatoryLiterals ? getNamedOperandIdx(Opcode, NamedIdx: OpName::immDeferred)
3663	: -`1`;
3664
3665	return {getNamedOperandIdx(Opcode, NamedIdx: OpName::src0X),
3666	getNamedOperandIdx(Opcode, NamedIdx: OpName::vsrc1X),
3667	getNamedOperandIdx(Opcode, NamedIdx: OpName::src0Y),
3668	getNamedOperandIdx(Opcode, NamedIdx: OpName::vsrc1Y),
3669	ImmDeferredIdx,
3670	ImmIdx};
3671	}
3672
3673	return {getNamedOperandIdx(Opcode, NamedIdx: OpName::src0),
3674	getNamedOperandIdx(Opcode, NamedIdx: OpName::src1),
3675	getNamedOperandIdx(Opcode, NamedIdx: OpName::src2), ImmIdx};
3676	}
3677
3678	bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3679	const MCOperand &MO = Inst.getOperand(i: OpIdx);
3680	if (MO.isImm())
3681	return !isInlineConstant(Inst, OpIdx);
3682	if (MO.isReg()) {
3683	auto Reg = MO.getReg();
3684	if (!Reg)
3685	return false;
3686	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3687	auto PReg = mc2PseudoReg(Reg);
3688	return isSGPR(Reg: PReg, TRI) && PReg != SGPR_NULL;
3689	}
3690	return true;
3691	}
3692
3693	// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3694	// Writelane is special in that it can use SGPR and M0 (which would normally
3695	// count as using the constant bus twice - but in this case it is allowed since
3696	// the lane selector doesn't count as a use of the constant bus). However, it is
3697	// still required to abide by the 1 SGPR rule.
3698	static bool checkWriteLane(const MCInst &Inst) {
3699	const unsigned Opcode = Inst.getOpcode();
3700	if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3701	return false;
3702	const MCOperand &LaneSelOp = Inst.getOperand(i: `2`);
3703	if (!LaneSelOp.isReg())
3704	return false;
3705	auto LaneSelReg = mc2PseudoReg(Reg: LaneSelOp.getReg());
3706	return LaneSelReg == M0 \|\| LaneSelReg == M0_gfxpre11;
3707	}
3708
3709	bool AMDGPUAsmParser::validateConstantBusLimitations(
3710	const MCInst &Inst, const OperandVector &Operands) {
3711	const unsigned Opcode = Inst.getOpcode();
3712	const MCInstrDesc &Desc = MII.get(Opcode);
3713	unsigned LastSGPR = AMDGPU::NoRegister;
3714	unsigned ConstantBusUseCount = `0`;
3715	unsigned NumLiterals = `0`;
3716	unsigned LiteralSize;
3717
3718	if (!(Desc.TSFlags &
3719	(SIInstrFlags::VOPC \| SIInstrFlags::VOP1 \| SIInstrFlags::VOP2 \|
3720	SIInstrFlags::VOP3 \| SIInstrFlags::VOP3P \| SIInstrFlags::SDWA)) &&
3721	!isVOPD(Opc: Opcode))
3722	return true;
3723
3724	if (checkWriteLane(Inst))
3725	return true;
3726
3727	// Check special imm operands (used by madmk, etc)
3728	if (AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm)) {
3729	++NumLiterals;
3730	LiteralSize = `4`;
3731	}
3732
3733	SmallDenseSet<unsigned> SGPRsUsed;
3734	unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3735	if (SGPRUsed != AMDGPU::NoRegister) {
3736	SGPRsUsed.insert(V: SGPRUsed);
3737	++ConstantBusUseCount;
3738	}
3739
3740	OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3741
3742	for (int OpIdx : OpIndices) {
3743	if (OpIdx == -`1`)
3744	continue;
3745
3746	const MCOperand &MO = Inst.getOperand(i: OpIdx);
3747	if (usesConstantBus(Inst, OpIdx)) {
3748	if (MO.isReg()) {
3749	LastSGPR = mc2PseudoReg(Reg: MO.getReg());
3750	// Pairs of registers with a partial intersections like these
3751	// s0, s[0:1]
3752	// flat_scratch_lo, flat_scratch
3753	// flat_scratch_lo, flat_scratch_hi
3754	// are theoretically valid but they are disabled anyway.
3755	// Note that this code mimics SIInstrInfo::verifyInstruction
3756	if (SGPRsUsed.insert(V: LastSGPR).second) {
3757	++ConstantBusUseCount;
3758	}
3759	} else { // Expression or a literal
3760
3761	if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3762	continue; // special operand like VINTERP attr_chan
3763
3764	// An instruction may use only one literal.
3765	// This has been validated on the previous step.
3766	// See validateVOPLiteral.
3767	// This literal may be used as more than one operand.
3768	// If all these operands are of the same size,
3769	// this literal counts as one scalar value.
3770	// Otherwise it counts as 2 scalar values.
3771	// See "GFX10 Shader Programming", section 3.6.2.3.
3772
3773	unsigned Size = AMDGPU::getOperandSize(Desc, OpNo: OpIdx);
3774	if (Size < `4`)
3775	Size = `4`;
3776
3777	if (NumLiterals == `0`) {
3778	NumLiterals = `1`;
3779	LiteralSize = Size;
3780	} else if (LiteralSize != Size) {
3781	NumLiterals = `2`;
3782	}
3783	}
3784	}
3785	}
3786	ConstantBusUseCount += NumLiterals;
3787
3788	if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3789	return true;
3790
3791	SMLoc LitLoc = getLitLoc(Operands);
3792	SMLoc RegLoc = getRegLoc(Reg: LastSGPR, Operands);
3793	SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3794	Error(L: Loc, Msg: "invalid operand (violates constant bus restrictions)");
3795	return false;
3796	}
3797
3798	bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3799	const MCInst &Inst, const OperandVector &Operands) {
3800
3801	const unsigned Opcode = Inst.getOpcode();
3802	if (!isVOPD(Opc: Opcode))
3803	return true;
3804
3805	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3806
3807	auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3808	const MCOperand &Opr = Inst.getOperand(i: OperandIdx);
3809	return (Opr.isReg() && !isSGPR(Reg: mc2PseudoReg(Reg: Opr.getReg()), TRI))
3810	? Opr.getReg()
3811	: MCRegister::NoRegister;
3812	};
3813
3814	// On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3815	bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3816
3817	const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII);
3818	auto InvalidCompOprIdx =
3819	InstInfo.getInvalidCompOperandIndex(GetRegIdx: getVRegIdx, SkipSrc);
3820	if (!InvalidCompOprIdx)
3821	return true;
3822
3823	auto CompOprIdx = *InvalidCompOprIdx;
3824	auto ParsedIdx =
3825	std::max(a: InstInfo [VOPD::X].getIndexInParsedOperands(CompOprIdx),
3826	b: InstInfo [VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3827	assert(ParsedIdx > `0` && ParsedIdx < Operands.size());
3828
3829	auto Loc = ((AMDGPUOperand &)*Operands [ParsedIdx]).getStartLoc();
3830	if (CompOprIdx == VOPD::Component::DST) {
3831	Error(L: Loc, Msg: "one dst register must be even and the other odd");
3832	} else {
3833	auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3834	Error(L: Loc, Msg: Twine ("src") + Twine (CompSrcIdx) +
3835	" operands must use different VGPR banks");
3836	}
3837
3838	return false;
3839	}
3840
3841	bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3842
3843	const unsigned Opc = Inst.getOpcode();
3844	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
3845
3846	if ((Desc.TSFlags & SIInstrFlags::IntClamp) != `0` && !hasIntClamp()) {
3847	int ClampIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp);
3848	assert(ClampIdx != -`1`);
3849	return Inst.getOperand(i: ClampIdx).getImm() == `0`;
3850	}
3851
3852	return true;
3853	}
3854
3855	constexpr uint64_t MIMGFlags =
3856	SIInstrFlags::MIMG \| SIInstrFlags::VIMAGE \| SIInstrFlags::VSAMPLE;
3857
3858	bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3859	const SMLoc &IDLoc) {
3860
3861	const unsigned Opc = Inst.getOpcode();
3862	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
3863
3864	if ((Desc.TSFlags & MIMGFlags) == `0`)
3865	return true;
3866
3867	int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdata);
3868	int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dmask);
3869	int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::tfe);
3870
3871	if (VDataIdx == -`1` && isGFX10Plus()) // no return image_sample
3872	return true;
3873
3874	if ((DMaskIdx == -`1` \|\| TFEIdx == -`1`) && isGFX10_AEncoding()) // intersect_ray
3875	return true;
3876
3877	unsigned VDataSize = AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VDataIdx);
3878	unsigned TFESize = (TFEIdx != -`1` && Inst.getOperand(i: TFEIdx).getImm()) ? `1` : `0`;
3879	unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & `0xf`;
3880	if (DMask == `0`)
3881	DMask = `1`;
3882
3883	bool IsPackedD16 = false;
3884	unsigned DataSize =
3885	(Desc.TSFlags & SIInstrFlags::Gather4) ? `4` : llvm::popcount(Value: DMask);
3886	if (hasPackedD16()) {
3887	int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::d16);
3888	IsPackedD16 = D16Idx >= `0`;
3889	if (IsPackedD16 && Inst.getOperand(i: D16Idx).getImm())
3890	DataSize = (DataSize + `1`) / `2`;
3891	}
3892
3893	if ((VDataSize / `4`) == DataSize + TFESize)
3894	return true;
3895
3896	StringRef Modifiers;
3897	if (isGFX90A())
3898	Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3899	else
3900	Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3901
3902	Error(L: IDLoc, Msg: Twine ("image data size does not match ") + Modifiers);
3903	return false;
3904	}
3905
3906	bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3907	const SMLoc &IDLoc) {
3908	const unsigned Opc = Inst.getOpcode();
3909	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
3910
3911	if ((Desc.TSFlags & MIMGFlags) == `0` \|\| !isGFX10Plus())
3912	return true;
3913
3914	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3915
3916	const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3917	AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
3918	int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::vaddr0);
3919	int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
3920	: AMDGPU::OpName::rsrc;
3921	int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: RSrcOpName);
3922	int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dim);
3923	int A16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::a16);
3924
3925	assert(VAddr0Idx != -`1`);
3926	assert(SrsrcIdx != -`1`);
3927	assert(SrsrcIdx > VAddr0Idx);
3928
3929	bool IsA16 = (A16Idx != -`1` && Inst.getOperand(i: A16Idx).getImm());
3930	if (BaseOpcode->BVH) {
3931	if (IsA16 == BaseOpcode->A16)
3932	return true;
3933	Error(L: IDLoc, Msg: "image address size does not match a16");
3934	return false;
3935	}
3936
3937	unsigned Dim = Inst.getOperand(i: DimIdx).getImm();
3938	const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim);
3939	bool IsNSA = SrsrcIdx - VAddr0Idx > `1`;
3940	unsigned ActualAddrSize =
3941	IsNSA ? SrsrcIdx - VAddr0Idx
3942	: AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VAddr0Idx) / `4`;
3943
3944	unsigned ExpectedAddrSize =
3945	AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim: DimInfo, IsA16, IsG16Supported: hasG16());
3946
3947	if (IsNSA) {
3948	if (hasPartialNSAEncoding() &&
3949	ExpectedAddrSize >
3950	getNSAMaxSize(HasSampler: Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
3951	int VAddrLastIdx = SrsrcIdx - `1`;
3952	unsigned VAddrLastSize =
3953	AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VAddrLastIdx) / `4`;
3954
3955	ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3956	}
3957	} else {
3958	if (ExpectedAddrSize > `12`)
3959	ExpectedAddrSize = `16`;
3960
3961	// Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3962	// This provides backward compatibility for assembly created
3963	// before 160b/192b/224b types were directly supported.
3964	if (ActualAddrSize == `8` && (ExpectedAddrSize >= `5` && ExpectedAddrSize <= `7`))
3965	return true;
3966	}
3967
3968	if (ActualAddrSize == ExpectedAddrSize)
3969	return true;
3970
3971	Error(L: IDLoc, Msg: "image address size does not match dim and a16");
3972	return false;
3973	}
3974
3975	bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3976
3977	const unsigned Opc = Inst.getOpcode();
3978	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
3979
3980	if ((Desc.TSFlags & MIMGFlags) == `0`)
3981	return true;
3982	if (!Desc.mayLoad() \|\| !Desc.mayStore())
3983	return true; // Not atomic
3984
3985	int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dmask);
3986	unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & `0xf`;
3987
3988	// This is an incomplete check because image_atomic_cmpswap
3989	// may only use 0x3 and 0xf while other atomic operations
3990	// may use 0x1 and 0x3. However these limitations are
3991	// verified when we check that dmask matches dst size.
3992	return DMask == `0x1` \|\| DMask == `0x3` \|\| DMask == `0xf`;
3993	}
3994
3995	bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3996
3997	const unsigned Opc = Inst.getOpcode();
3998	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
3999
4000	if ((Desc.TSFlags & SIInstrFlags::Gather4) == `0`)
4001	return true;
4002
4003	int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dmask);
4004	unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & `0xf`;
4005
4006	// GATHER4 instructions use dmask in a different fashion compared to
4007	// other MIMG instructions. The only useful DMASK values are
4008	// 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4009	// (red,red,red,red) etc.) The ISA document doesn't mention
4010	// this.
4011	return DMask == `0x1` \|\| DMask == `0x2` \|\| DMask == `0x4` \|\| DMask == `0x8`;
4012	}
4013
4014	bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4015	const unsigned Opc = Inst.getOpcode();
4016	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4017
4018	if ((Desc.TSFlags & MIMGFlags) == `0`)
4019	return true;
4020
4021	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4022	const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4023	AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
4024
4025	if (!BaseOpcode->MSAA)
4026	return true;
4027
4028	int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dim);
4029	assert(DimIdx != -`1`);
4030
4031	unsigned Dim = Inst.getOperand(i: DimIdx).getImm();
4032	const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim);
4033
4034	return DimInfo->MSAA;
4035	}
4036
4037	static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4038	{
4039	switch (Opcode) {
4040	case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4041	case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4042	case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4043	return true;
4044	default:
4045	return false;
4046	}
4047	}
4048
4049	// movrels opcodes should only allow VGPRS as src0.*
4050	// This is specified in .td description for vop1/vop3,
4051	// but sdwa is handled differently. See isSDWAOperand.
4052	bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4053	const OperandVector &Operands) {
4054
4055	const unsigned Opc = Inst.getOpcode();
4056	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4057
4058	if ((Desc.TSFlags & SIInstrFlags::SDWA) == `0` \|\| !IsMovrelsSDWAOpcode(Opcode: Opc))
4059	return true;
4060
4061	const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0);
4062	assert(Src0Idx != -`1`);
4063
4064	SMLoc ErrLoc;
4065	const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4066	if (Src0.isReg()) {
4067	auto Reg = mc2PseudoReg(Reg: Src0.getReg());
4068	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4069	if (!isSGPR(Reg, TRI))
4070	return true;
4071	ErrLoc = getRegLoc(Reg, Operands);
4072	} else {
4073	ErrLoc = getConstLoc(Operands);
4074	}
4075
4076	Error(L: ErrLoc, Msg: "source operand must be a VGPR");
4077	return false;
4078	}
4079
4080	bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4081	const OperandVector &Operands) {
4082
4083	const unsigned Opc = Inst.getOpcode();
4084
4085	if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4086	return true;
4087
4088	const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0);
4089	assert(Src0Idx != -`1`);
4090
4091	const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4092	if (!Src0.isReg())
4093	return true;
4094
4095	auto Reg = mc2PseudoReg(Reg: Src0.getReg());
4096	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4097	if (!isGFX90A() && isSGPR(Reg, TRI)) {
4098	Error(L: getRegLoc(Reg, Operands),
4099	Msg: "source operand must be either a VGPR or an inline constant");
4100	return false;
4101	}
4102
4103	return true;
4104	}
4105
4106	bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4107	const OperandVector &Operands) {
4108	unsigned Opcode = Inst.getOpcode();
4109	const MCInstrDesc &Desc = MII.get(Opcode);
4110
4111	if (!(Desc.TSFlags & SIInstrFlags::IsMAI) \|\|
4112	!getFeatureBits()[FeatureMFMAInlineLiteralBug])
4113	return true;
4114
4115	const int Src2Idx = getNamedOperandIdx(Opcode, NamedIdx: OpName::src2);
4116	if (Src2Idx == -`1`)
4117	return true;
4118
4119	if (Inst.getOperand(i: Src2Idx).isImm() && isInlineConstant(Inst, OpIdx: Src2Idx)) {
4120	Error(L: getConstLoc(Operands),
4121	Msg: "inline constants are not allowed for this operand");
4122	return false;
4123	}
4124
4125	return true;
4126	}
4127
4128	bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4129	const OperandVector &Operands) {
4130	const unsigned Opc = Inst.getOpcode();
4131	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4132
4133	if ((Desc.TSFlags & SIInstrFlags::IsMAI) == `0`)
4134	return true;
4135
4136	const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src2);
4137	if (Src2Idx == -`1`)
4138	return true;
4139
4140	const MCOperand &Src2 = Inst.getOperand(i: Src2Idx);
4141	if (!Src2.isReg())
4142	return true;
4143
4144	MCRegister Src2Reg = Src2.getReg();
4145	MCRegister DstReg = Inst.getOperand(i: `0`).getReg();
4146	if (Src2Reg == DstReg)
4147	return true;
4148
4149	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4150	if (TRI->getRegClass(i: Desc.operands()[`0`].RegClass).getSizeInBits() <= `128`)
4151	return true;
4152
4153	if (TRI->regsOverlap(RegA: Src2Reg, RegB: DstReg)) {
4154	Error(L: getRegLoc(Reg: mc2PseudoReg(Reg: Src2Reg), Operands),
4155	Msg: "source 2 operand must not partially overlap with dst");
4156	return false;
4157	}
4158
4159	return true;
4160	}
4161
4162	bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4163	switch (Inst.getOpcode()) {
4164	default:
4165	return true;
4166	case V_DIV_SCALE_F32_gfx6_gfx7:
4167	case V_DIV_SCALE_F32_vi:
4168	case V_DIV_SCALE_F32_gfx10:
4169	case V_DIV_SCALE_F64_gfx6_gfx7:
4170	case V_DIV_SCALE_F64_vi:
4171	case V_DIV_SCALE_F64_gfx10:
4172	break;
4173	}
4174
4175	// TODO: Check that src0 = src1 or src2.
4176
4177	for (auto Name : {AMDGPU::OpName::src0_modifiers,
4178	AMDGPU::OpName::src2_modifiers,
4179	AMDGPU::OpName::src2_modifiers}) {
4180	if (Inst.getOperand(i: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), NamedIdx: Name))
4181	.getImm() &
4182	SISrcMods::ABS) {
4183	return false;
4184	}
4185	}
4186
4187	return true;
4188	}
4189
4190	bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4191
4192	const unsigned Opc = Inst.getOpcode();
4193	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4194
4195	if ((Desc.TSFlags & MIMGFlags) == `0`)
4196	return true;
4197
4198	int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::d16);
4199	if (D16Idx >= `0` && Inst.getOperand(i: D16Idx).getImm()) {
4200	if (isCI() \|\| isSI())
4201	return false;
4202	}
4203
4204	return true;
4205	}
4206
4207	static bool IsRevOpcode(const unsigned Opcode)
4208	{
4209	switch (Opcode) {
4210	case AMDGPU::V_SUBREV_F32_e32:
4211	case AMDGPU::V_SUBREV_F32_e64:
4212	case AMDGPU::V_SUBREV_F32_e32_gfx10:
4213	case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4214	case AMDGPU::V_SUBREV_F32_e32_vi:
4215	case AMDGPU::V_SUBREV_F32_e64_gfx10:
4216	case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4217	case AMDGPU::V_SUBREV_F32_e64_vi:
4218
4219	case AMDGPU::V_SUBREV_CO_U32_e32:
4220	case AMDGPU::V_SUBREV_CO_U32_e64:
4221	case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4222	case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4223
4224	case AMDGPU::V_SUBBREV_U32_e32:
4225	case AMDGPU::V_SUBBREV_U32_e64:
4226	case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4227	case AMDGPU::V_SUBBREV_U32_e32_vi:
4228	case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4229	case AMDGPU::V_SUBBREV_U32_e64_vi:
4230
4231	case AMDGPU::V_SUBREV_U32_e32:
4232	case AMDGPU::V_SUBREV_U32_e64:
4233	case AMDGPU::V_SUBREV_U32_e32_gfx9:
4234	case AMDGPU::V_SUBREV_U32_e32_vi:
4235	case AMDGPU::V_SUBREV_U32_e64_gfx9:
4236	case AMDGPU::V_SUBREV_U32_e64_vi:
4237
4238	case AMDGPU::V_SUBREV_F16_e32:
4239	case AMDGPU::V_SUBREV_F16_e64:
4240	case AMDGPU::V_SUBREV_F16_e32_gfx10:
4241	case AMDGPU::V_SUBREV_F16_e32_vi:
4242	case AMDGPU::V_SUBREV_F16_e64_gfx10:
4243	case AMDGPU::V_SUBREV_F16_e64_vi:
4244
4245	case AMDGPU::V_SUBREV_U16_e32:
4246	case AMDGPU::V_SUBREV_U16_e64:
4247	case AMDGPU::V_SUBREV_U16_e32_vi:
4248	case AMDGPU::V_SUBREV_U16_e64_vi:
4249
4250	case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4251	case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4252	case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4253
4254	case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4255	case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4256
4257	case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4258	case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4259
4260	case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4261	case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4262
4263	case AMDGPU::V_LSHRREV_B32_e32:
4264	case AMDGPU::V_LSHRREV_B32_e64:
4265	case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4266	case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4267	case AMDGPU::V_LSHRREV_B32_e32_vi:
4268	case AMDGPU::V_LSHRREV_B32_e64_vi:
4269	case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4270	case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4271
4272	case AMDGPU::V_ASHRREV_I32_e32:
4273	case AMDGPU::V_ASHRREV_I32_e64:
4274	case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4275	case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4276	case AMDGPU::V_ASHRREV_I32_e32_vi:
4277	case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4278	case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4279	case AMDGPU::V_ASHRREV_I32_e64_vi:
4280
4281	case AMDGPU::V_LSHLREV_B32_e32:
4282	case AMDGPU::V_LSHLREV_B32_e64:
4283	case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4284	case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4285	case AMDGPU::V_LSHLREV_B32_e32_vi:
4286	case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4287	case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4288	case AMDGPU::V_LSHLREV_B32_e64_vi:
4289
4290	case AMDGPU::V_LSHLREV_B16_e32:
4291	case AMDGPU::V_LSHLREV_B16_e64:
4292	case AMDGPU::V_LSHLREV_B16_e32_vi:
4293	case AMDGPU::V_LSHLREV_B16_e64_vi:
4294	case AMDGPU::V_LSHLREV_B16_gfx10:
4295
4296	case AMDGPU::V_LSHRREV_B16_e32:
4297	case AMDGPU::V_LSHRREV_B16_e64:
4298	case AMDGPU::V_LSHRREV_B16_e32_vi:
4299	case AMDGPU::V_LSHRREV_B16_e64_vi:
4300	case AMDGPU::V_LSHRREV_B16_gfx10:
4301
4302	case AMDGPU::V_ASHRREV_I16_e32:
4303	case AMDGPU::V_ASHRREV_I16_e64:
4304	case AMDGPU::V_ASHRREV_I16_e32_vi:
4305	case AMDGPU::V_ASHRREV_I16_e64_vi:
4306	case AMDGPU::V_ASHRREV_I16_gfx10:
4307
4308	case AMDGPU::V_LSHLREV_B64_e64:
4309	case AMDGPU::V_LSHLREV_B64_gfx10:
4310	case AMDGPU::V_LSHLREV_B64_vi:
4311
4312	case AMDGPU::V_LSHRREV_B64_e64:
4313	case AMDGPU::V_LSHRREV_B64_gfx10:
4314	case AMDGPU::V_LSHRREV_B64_vi:
4315
4316	case AMDGPU::V_ASHRREV_I64_e64:
4317	case AMDGPU::V_ASHRREV_I64_gfx10:
4318	case AMDGPU::V_ASHRREV_I64_vi:
4319
4320	case AMDGPU::V_PK_LSHLREV_B16:
4321	case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4322	case AMDGPU::V_PK_LSHLREV_B16_vi:
4323
4324	case AMDGPU::V_PK_LSHRREV_B16:
4325	case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4326	case AMDGPU::V_PK_LSHRREV_B16_vi:
4327	case AMDGPU::V_PK_ASHRREV_I16:
4328	case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4329	case AMDGPU::V_PK_ASHRREV_I16_vi:
4330	return true;
4331	default:
4332	return false;
4333	}
4334	}
4335
4336	std::optional<StringRef>
4337	AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4338
4339	using namespace SIInstrFlags;
4340	const unsigned Opcode = Inst.getOpcode();
4341	const MCInstrDesc &Desc = MII.get(Opcode);
4342
4343	// lds_direct register is defined so that it can be used
4344	// with 9-bit operands only. Ignore encodings which do not accept these.
4345	const auto Enc = VOP1 \| VOP2 \| VOP3 \| VOPC \| VOP3P \| SIInstrFlags::SDWA;
4346	if ((Desc.TSFlags & Enc) == `0`)
4347	return std::nullopt;
4348
4349	for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4350	auto SrcIdx = getNamedOperandIdx(Opcode, NamedIdx: SrcName);
4351	if (SrcIdx == -`1`)
4352	break;
4353	const auto &Src = Inst.getOperand(i: SrcIdx);
4354	if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4355
4356	if (isGFX90A() \|\| isGFX11Plus())
4357	return StringRef ("lds_direct is not supported on this GPU");
4358
4359	if (IsRevOpcode(Opcode) \|\| (Desc.TSFlags & SIInstrFlags::SDWA))
4360	return StringRef ("lds_direct cannot be used with this instruction");
4361
4362	if (SrcName != OpName::src0)
4363	return StringRef ("lds_direct may be used as src0 only");
4364	}
4365	}
4366
4367	return std::nullopt;
4368	}
4369
4370	SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4371	for (unsigned i = `1`, e = Operands.size(); i != e; ++i) {
4372	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
4373	if (Op.isFlatOffset())
4374	return Op.getStartLoc();
4375	}
4376	return getLoc();
4377	}
4378
4379	bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4380	const OperandVector &Operands) {
4381	auto Opcode = Inst.getOpcode();
4382	auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, NamedIdx: AMDGPU::OpName::offset);
4383	if (OpNum == -`1`)
4384	return true;
4385
4386	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4387	if ((TSFlags & SIInstrFlags::FLAT))
4388	return validateFlatOffset(Inst, Operands);
4389
4390	if ((TSFlags & SIInstrFlags::SMRD))
4391	return validateSMEMOffset(Inst, Operands);
4392
4393	const auto &Op = Inst.getOperand(i: OpNum);
4394	if (isGFX12Plus() &&
4395	(TSFlags & (SIInstrFlags::MUBUF \| SIInstrFlags::MTBUF))) {
4396	const unsigned OffsetSize = `24`;
4397	if (!isIntN(N: OffsetSize, x: Op.getImm())) {
4398	Error(L: getFlatOffsetLoc(Operands),
4399	Msg: Twine ("expected a ") + Twine (OffsetSize) + "-bit signed offset");
4400	return false;
4401	}
4402	} else {
4403	const unsigned OffsetSize = `16`;
4404	if (!isUIntN(N: OffsetSize, x: Op.getImm())) {
4405	Error(L: getFlatOffsetLoc(Operands),
4406	Msg: Twine ("expected a ") + Twine (OffsetSize) + "-bit unsigned offset");
4407	return false;
4408	}
4409	}
4410	return true;
4411	}
4412
4413	bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4414	const OperandVector &Operands) {
4415	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4416	if ((TSFlags & SIInstrFlags::FLAT) == `0`)
4417	return true;
4418
4419	auto Opcode = Inst.getOpcode();
4420	auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, NamedIdx: AMDGPU::OpName::offset);
4421	assert(OpNum != -`1`);
4422
4423	const auto &Op = Inst.getOperand(i: OpNum);
4424	if (!hasFlatOffsets() && Op.getImm() != `0`) {
4425	Error(L: getFlatOffsetLoc(Operands),
4426	Msg: "flat offset modifier is not supported on this GPU");
4427	return false;
4428	}
4429
4430	// For pre-GFX12 FLAT instructions the offset must be positive;
4431	// MSB is ignored and forced to zero.
4432	unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(ST: getSTI());
4433	bool AllowNegative =
4434	(TSFlags & (SIInstrFlags::FlatGlobal \| SIInstrFlags::FlatScratch)) \|\|
4435	isGFX12Plus();
4436	if (!isIntN(N: OffsetSize, x: Op.getImm()) \|\| (!AllowNegative && Op.getImm() < `0`)) {
4437	Error(L: getFlatOffsetLoc(Operands),
4438	Msg: Twine ("expected a ") +
4439	(AllowNegative ? Twine (OffsetSize) + "-bit signed offset"
4440	: Twine (OffsetSize - `1`) + "-bit unsigned offset"));
4441	return false;
4442	}
4443
4444	return true;
4445	}
4446
4447	SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4448	// Start with second operand because SMEM Offset cannot be dst or src0.
4449	for (unsigned i = `2`, e = Operands.size(); i != e; ++i) {
4450	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
4451	if (Op.isSMEMOffset() \|\| Op.isSMEMOffsetMod())
4452	return Op.getStartLoc();
4453	}
4454	return getLoc();
4455	}
4456
4457	bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4458	const OperandVector &Operands) {
4459	if (isCI() \|\| isSI())
4460	return true;
4461
4462	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4463	if ((TSFlags & SIInstrFlags::SMRD) == `0`)
4464	return true;
4465
4466	auto Opcode = Inst.getOpcode();
4467	auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, NamedIdx: AMDGPU::OpName::offset);
4468	if (OpNum == -`1`)
4469	return true;
4470
4471	const auto &Op = Inst.getOperand(i: OpNum);
4472	if (!Op.isImm())
4473	return true;
4474
4475	uint64_t Offset = Op.getImm();
4476	bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opc: Opcode);
4477	if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(ST: getSTI(), EncodedOffset: Offset) \|\|
4478	AMDGPU::isLegalSMRDEncodedSignedOffset(ST: getSTI(), EncodedOffset: Offset, IsBuffer))
4479	return true;
4480
4481	Error(L: getSMEMOffsetLoc(Operands),
4482	Msg: isGFX12Plus() ? "expected a 24-bit signed offset"
4483	: (isVI() \|\| IsBuffer) ? "expected a 20-bit unsigned offset"
4484	: "expected a 21-bit signed offset");
4485
4486	return false;
4487	}
4488
4489	bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4490	unsigned Opcode = Inst.getOpcode();
4491	const MCInstrDesc &Desc = MII.get(Opcode);
4492	if (!(Desc.TSFlags & (SIInstrFlags::SOP2 \| SIInstrFlags::SOPC)))
4493	return true;
4494
4495	const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, NamedIdx: AMDGPU::OpName::src0);
4496	const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, NamedIdx: AMDGPU::OpName::src1);
4497
4498	const int OpIndices[] = { Src0Idx, Src1Idx };
4499
4500	unsigned NumExprs = `0`;
4501	unsigned NumLiterals = `0`;
4502	uint32_t LiteralValue;
4503
4504	for (int OpIdx : OpIndices) {
4505	if (OpIdx == -`1`) break;
4506
4507	const MCOperand &MO = Inst.getOperand(i: OpIdx);
4508	// Exclude special imm operands (like that used by s_set_gpr_idx_on)
4509	if (AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx)) {
4510	if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4511	uint32_t Value = static_cast<uint32_t>(MO.getImm());
4512	if (NumLiterals == `0` \|\| LiteralValue != Value) {
4513	LiteralValue = Value;
4514	++NumLiterals;
4515	}
4516	} else if (MO.isExpr()) {
4517	++NumExprs;
4518	}
4519	}
4520	}
4521
4522	return NumLiterals + NumExprs <= `1`;
4523	}
4524
4525	bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4526	const unsigned Opc = Inst.getOpcode();
4527	if (isPermlane16(Opc)) {
4528	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel);
4529	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4530
4531	if (OpSel & ~`3`)
4532	return false;
4533	}
4534
4535	uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags;
4536
4537	if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4538	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel);
4539	if (OpSelIdx != -`1`) {
4540	if (Inst.getOperand(i: OpSelIdx).getImm() != `0`)
4541	return false;
4542	}
4543	int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel_hi);
4544	if (OpSelHiIdx != -`1`) {
4545	if (Inst.getOperand(i: OpSelHiIdx).getImm() != -`1`)
4546	return false;
4547	}
4548	}
4549
4550	// op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4551	if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4552	(TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4553	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel);
4554	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4555	if (OpSel & `3`)
4556	return false;
4557	}
4558
4559	return true;
4560	}
4561
4562	bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4563	assert(OpName == AMDGPU::OpName::neg_lo \|\| OpName == AMDGPU::OpName::neg_hi);
4564
4565	const unsigned Opc = Inst.getOpcode();
4566	uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags;
4567
4568	// v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4569	// v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4570	// v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4571	// other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4572	if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4573	!(TSFlags & SIInstrFlags::IsSWMMAC))
4574	return true;
4575
4576	int NegIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: OpName);
4577	if (NegIdx == -`1`)
4578	return true;
4579
4580	unsigned Neg = Inst.getOperand(i: NegIdx).getImm();
4581
4582	// Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4583	// on some src operands but not allowed on other.
4584	// It is convenient that such instructions don't have src_modifiers operand
4585	// for src operands that don't allow neg because they also don't allow opsel.
4586
4587	int SrcMods[`3`] = {AMDGPU::OpName::src0_modifiers,
4588	AMDGPU::OpName::src1_modifiers,
4589	AMDGPU::OpName::src2_modifiers};
4590
4591	for (unsigned i = `0`; i < `3`; ++i) {
4592	if (!AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: SrcMods[i])) {
4593	if (Neg & (`1` << i))
4594	return false;
4595	}
4596	}
4597
4598	return true;
4599	}
4600
4601	bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4602	const OperandVector &Operands) {
4603	const unsigned Opc = Inst.getOpcode();
4604	int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dpp_ctrl);
4605	if (DppCtrlIdx >= `0`) {
4606	unsigned DppCtrl = Inst.getOperand(i: DppCtrlIdx).getImm();
4607
4608	if (!AMDGPU::isLegalDPALU_DPPControl(DC: DppCtrl) &&
4609	AMDGPU::isDPALU_DPP(OpDesc: MII.get(Opcode: Opc))) {
4610	// DP ALU DPP is supported for row_newbcast only on GFX9*
4611	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyDppCtrl, Operands);
4612	Error(L: S, Msg: "DP ALU dpp only supports row_newbcast");
4613	return false;
4614	}
4615	}
4616
4617	int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dpp8);
4618	bool IsDPP = DppCtrlIdx >= `0` \|\| Dpp8Idx >= `0`;
4619
4620	if (IsDPP && !hasDPPSrc1SGPR(STI: getSTI())) {
4621	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src1);
4622	if (Src1Idx >= `0`) {
4623	const MCOperand &Src1 = Inst.getOperand(i: Src1Idx);
4624	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4625	if (Src1.isReg() && isSGPR(Reg: mc2PseudoReg(Reg: Src1.getReg()), TRI)) {
4626	auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src1Idx).getReg());
4627	SMLoc S = getRegLoc(Reg, Operands);
4628	Error(L: S, Msg: "invalid operand for instruction");
4629	return false;
4630	}
4631	if (Src1.isImm()) {
4632	Error(L: getInstLoc(Operands),
4633	Msg: "src1 immediate operand invalid for instruction");
4634	return false;
4635	}
4636	}
4637	}
4638
4639	return true;
4640	}
4641
4642	// Check if VCC register matches wavefront size
4643	bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4644	auto FB = getFeatureBits();
4645	return (FB [AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) \|\|
4646	(FB [AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4647	}
4648
4649	// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4650	bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4651	const OperandVector &Operands) {
4652	unsigned Opcode = Inst.getOpcode();
4653	const MCInstrDesc &Desc = MII.get(Opcode);
4654	bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, NamedIdx: OpName::imm) != -`1`;
4655	if (!(Desc.TSFlags & (SIInstrFlags::VOP3 \| SIInstrFlags::VOP3P)) &&
4656	!HasMandatoryLiteral && !isVOPD(Opc: Opcode))
4657	return true;
4658
4659	OperandIndices OpIndices = getSrcOperandIndices(Opcode, AddMandatoryLiterals: HasMandatoryLiteral);
4660
4661	unsigned NumExprs = `0`;
4662	unsigned NumLiterals = `0`;
4663	uint32_t LiteralValue;
4664
4665	for (int OpIdx : OpIndices) {
4666	if (OpIdx == -`1`)
4667	continue;
4668
4669	const MCOperand &MO = Inst.getOperand(i: OpIdx);
4670	if (!MO.isImm() && !MO.isExpr())
4671	continue;
4672	if (!isSISrcOperand(Desc, OpNo: OpIdx))
4673	continue;
4674
4675	if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4676	uint64_t Value = static_cast<uint64_t>(MO.getImm());
4677	bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpNo: OpIdx) &&
4678	AMDGPU::getOperandSize(OpInfo: Desc.operands()[OpIdx]) == `8`;
4679	bool IsValid32Op = AMDGPU::isValid32BitLiteral(Val: Value, IsFP64);
4680
4681	if (!IsValid32Op && !isInt<`32`>(x: Value) && !isUInt<`32`>(x: Value)) {
4682	Error(L: getLitLoc(Operands), Msg: "invalid operand for instruction");
4683	return false;
4684	}
4685
4686	if (IsFP64 && IsValid32Op)
4687	Value = Hi_32(Value);
4688
4689	if (NumLiterals == `0` \|\| LiteralValue != Value) {
4690	LiteralValue = Value;
4691	++NumLiterals;
4692	}
4693	} else if (MO.isExpr()) {
4694	++NumExprs;
4695	}
4696	}
4697	NumLiterals += NumExprs;
4698
4699	if (!NumLiterals)
4700	return true;
4701
4702	if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4703	Error(L: getLitLoc(Operands), Msg: "literal operands are not supported");
4704	return false;
4705	}
4706
4707	if (NumLiterals > `1`) {
4708	Error(L: getLitLoc(Operands, SearchMandatoryLiterals: true), Msg: "only one unique literal operand is allowed");
4709	return false;
4710	}
4711
4712	return true;
4713	}
4714
4715	// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4716	static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4717	const MCRegisterInfo *MRI) {
4718	int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), NamedIdx: NameIdx);
4719	if (OpIdx < `0`)
4720	return -`1`;
4721
4722	const MCOperand &Op = Inst.getOperand(i: OpIdx);
4723	if (!Op.isReg())
4724	return -`1`;
4725
4726	unsigned Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
4727	auto Reg = Sub ? Sub : Op.getReg();
4728	const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID);
4729	return AGPR32.contains(Reg) ? `1` : `0`;
4730	}
4731
4732	bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4733	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4734	if ((TSFlags & (SIInstrFlags::FLAT \| SIInstrFlags::MUBUF \|
4735	SIInstrFlags::MTBUF \| SIInstrFlags::MIMG \|
4736	SIInstrFlags::DS)) == `0`)
4737	return true;
4738
4739	uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4740	: AMDGPU::OpName::vdata;
4741
4742	const MCRegisterInfo *MRI = getMRI();
4743	int DstAreg = IsAGPROperand(Inst, NameIdx: AMDGPU::OpName::vdst, MRI);
4744	int DataAreg = IsAGPROperand(Inst, NameIdx: DataNameIdx, MRI);
4745
4746	if ((TSFlags & SIInstrFlags::DS) && DataAreg >= `0`) {
4747	int Data2Areg = IsAGPROperand(Inst, NameIdx: AMDGPU::OpName::data1, MRI);
4748	if (Data2Areg >= `0` && Data2Areg != DataAreg)
4749	return false;
4750	}
4751
4752	auto FB = getFeatureBits();
4753	if (FB [AMDGPU::FeatureGFX90AInsts]) {
4754	if (DataAreg < `0` \|\| DstAreg < `0`)
4755	return true;
4756	return DstAreg == DataAreg;
4757	}
4758
4759	return DstAreg < `1` && DataAreg < `1`;
4760	}
4761
4762	bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4763	auto FB = getFeatureBits();
4764	if (!FB [AMDGPU::FeatureGFX90AInsts])
4765	return true;
4766
4767	const MCRegisterInfo *MRI = getMRI();
4768	const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID);
4769	const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID);
4770	for (unsigned I = `0`, E = Inst.getNumOperands(); I != E; ++I) {
4771	const MCOperand &Op = Inst.getOperand(i: I);
4772	if (!Op.isReg())
4773	continue;
4774
4775	unsigned Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
4776	if (!Sub)
4777	continue;
4778
4779	if (VGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::VGPR0) & `1`))
4780	return false;
4781	if (AGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::AGPR0) & `1`))
4782	return false;
4783	}
4784
4785	return true;
4786	}
4787
4788	SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4789	for (unsigned i = `1`, e = Operands.size(); i != e; ++i) {
4790	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
4791	if (Op.isBLGP())
4792	return Op.getStartLoc();
4793	}
4794	return SMLoc ();
4795	}
4796
4797	bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4798	const OperandVector &Operands) {
4799	unsigned Opc = Inst.getOpcode();
4800	int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::blgp);
4801	if (BlgpIdx == -`1`)
4802	return true;
4803	SMLoc BLGPLoc = getBLGPLoc(Operands);
4804	if (!BLGPLoc.isValid())
4805	return true;
4806	bool IsNeg = StringRef (BLGPLoc.getPointer()).starts_with(Prefix: "neg:");
4807	auto FB = getFeatureBits();
4808	bool UsesNeg = false;
4809	if (FB [AMDGPU::FeatureGFX940Insts]) {
4810	switch (Opc) {
4811	case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4812	case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4813	case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4814	case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4815	UsesNeg = true;
4816	}
4817	}
4818
4819	if (IsNeg == UsesNeg)
4820	return true;
4821
4822	Error(L: BLGPLoc,
4823	Msg: UsesNeg ? "invalid modifier: blgp is not supported"
4824	: "invalid modifier: neg is not supported");
4825
4826	return false;
4827	}
4828
4829	bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4830	const OperandVector &Operands) {
4831	if (!isGFX11Plus())
4832	return true;
4833
4834	unsigned Opc = Inst.getOpcode();
4835	if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4836	Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4837	Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4838	Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4839	return true;
4840
4841	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::sdst);
4842	assert(Src0Idx >= `0` && Inst.getOperand(Src0Idx).isReg());
4843	auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src0Idx).getReg());
4844	if (Reg == AMDGPU::SGPR_NULL)
4845	return true;
4846
4847	SMLoc RegLoc = getRegLoc(Reg, Operands);
4848	Error(L: RegLoc, Msg: "src0 must be null");
4849	return false;
4850	}
4851
4852	bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4853	const OperandVector &Operands) {
4854	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4855	if ((TSFlags & SIInstrFlags::DS) == `0`)
4856	return true;
4857	if (TSFlags & SIInstrFlags::GWS)
4858	return validateGWS(Inst, Operands);
4859	// Only validate GDS for non-GWS instructions.
4860	if (hasGDS())
4861	return true;
4862	int GDSIdx =
4863	AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::gds);
4864	if (GDSIdx < `0`)
4865	return true;
4866	unsigned GDS = Inst.getOperand(i: GDSIdx).getImm();
4867	if (GDS) {
4868	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyGDS, Operands);
4869	Error(L: S, Msg: "gds modifier is not supported on this GPU");
4870	return false;
4871	}
4872	return true;
4873	}
4874
4875	// gfx90a has an undocumented limitation:
4876	// DS_GWS opcodes must use even aligned registers.
4877	bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4878	const OperandVector &Operands) {
4879	if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4880	return true;
4881
4882	int Opc = Inst.getOpcode();
4883	if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4884	Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4885	return true;
4886
4887	const MCRegisterInfo *MRI = getMRI();
4888	const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID);
4889	int Data0Pos =
4890	AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::data0);
4891	assert(Data0Pos != -`1`);
4892	auto Reg = Inst.getOperand(i: Data0Pos).getReg();
4893	auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4894	if (RegIdx & `1`) {
4895	SMLoc RegLoc = getRegLoc(Reg, Operands);
4896	Error(L: RegLoc, Msg: "vgpr must be even aligned");
4897	return false;
4898	}
4899
4900	return true;
4901	}
4902
4903	bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4904	const OperandVector &Operands,
4905	const SMLoc &IDLoc) {
4906	int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(),
4907	NamedIdx: AMDGPU::OpName::cpol);
4908	if (CPolPos == -`1`)
4909	return true;
4910
4911	unsigned CPol = Inst.getOperand(i: CPolPos).getImm();
4912
4913	if (isGFX12Plus())
4914	return validateTHAndScopeBits(Inst, Operands, CPol);
4915
4916	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4917	if (TSFlags & SIInstrFlags::SMRD) {
4918	if (CPol && (isSI() \|\| isCI())) {
4919	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
4920	Error(L: S, Msg: "cache policy is not supported for SMRD instructions");
4921	return false;
4922	}
4923	if (CPol & ~(AMDGPU::CPol::GLC \| AMDGPU::CPol::DLC)) {
4924	Error(L: IDLoc, Msg: "invalid cache policy for SMEM instruction");
4925	return false;
4926	}
4927	}
4928
4929	if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4930	const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF \|
4931	SIInstrFlags::MTBUF \| SIInstrFlags::MIMG \|
4932	SIInstrFlags::FLAT;
4933	if (!(TSFlags & AllowSCCModifier)) {
4934	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
4935	StringRef CStr(S.getPointer());
4936	S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scc")]);
4937	Error(L: S,
4938	Msg: "scc modifier is not supported for this instruction on this GPU");
4939	return false;
4940	}
4941	}
4942
4943	if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet \| SIInstrFlags::IsAtomicRet)))
4944	return true;
4945
4946	if (TSFlags & SIInstrFlags::IsAtomicRet) {
4947	if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4948	Error(L: IDLoc, Msg: isGFX940() ? "instruction must use sc0"
4949	: "instruction must use glc");
4950	return false;
4951	}
4952	} else {
4953	if (CPol & CPol::GLC) {
4954	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
4955	StringRef CStr(S.getPointer());
4956	S = SMLoc::getFromPointer(
4957	Ptr: &CStr.data()[CStr.find(Str: isGFX940() ? "sc0" : "glc")]);
4958	Error(L: S, Msg: isGFX940() ? "instruction must not use sc0"
4959	: "instruction must not use glc");
4960	return false;
4961	}
4962	}
4963
4964	return true;
4965	}
4966
4967	bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4968	const OperandVector &Operands,
4969	const unsigned CPol) {
4970	const unsigned TH = CPol & AMDGPU::CPol::TH;
4971	const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4972
4973	const unsigned Opcode = Inst.getOpcode();
4974	const MCInstrDesc &TID = MII.get(Opcode);
4975
4976	auto PrintError = [&](StringRef Msg) {
4977	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
4978	Error(L: S, Msg);
4979	return false;
4980	};
4981
4982	if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4983	(TID.TSFlags & (SIInstrFlags::FLAT \| SIInstrFlags::MUBUF)) &&
4984	(!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
4985	return PrintError ("instruction must use th:TH_ATOMIC_RETURN");
4986
4987	if (TH == `0`)
4988	return true;
4989
4990	if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4991	((TH == AMDGPU::CPol::TH_NT_RT) \|\| (TH == AMDGPU::CPol::TH_RT_NT) \|\|
4992	(TH == AMDGPU::CPol::TH_NT_HT)))
4993	return PrintError ("invalid th value for SMEM instruction");
4994
4995	if (TH == AMDGPU::CPol::TH_BYPASS) {
4996	if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4997	CPol & AMDGPU::CPol::TH_REAL_BYPASS) \|\|
4998	(Scope == AMDGPU::CPol::SCOPE_SYS &&
4999	!(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
5000	return PrintError ("scope and th combination is not valid");
5001	}
5002
5003	bool IsStore = TID.mayStore();
5004	bool IsAtomic =
5005	TID.TSFlags & (SIInstrFlags::IsAtomicNoRet \| SIInstrFlags::IsAtomicRet);
5006
5007	if (IsAtomic) {
5008	if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5009	return PrintError ("invalid th value for atomic instructions");
5010	} else if (IsStore) {
5011	if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5012	return PrintError ("invalid th value for store instructions");
5013	} else {
5014	if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5015	return PrintError ("invalid th value for load instructions");
5016	}
5017
5018	return true;
5019	}
5020
5021	bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
5022	if (!isGFX11Plus())
5023	return true;
5024	for (auto &Operand : Operands) {
5025	if (!Operand ->isReg())
5026	continue;
5027	unsigned Reg = Operand ->getReg();
5028	if (Reg == SRC_EXECZ \|\| Reg == SRC_VCCZ) {
5029	Error(L: getRegLoc(Reg, Operands),
5030	Msg: "execz and vccz are not supported on this GPU");
5031	return false;
5032	}
5033	}
5034	return true;
5035	}
5036
5037	bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5038	const OperandVector &Operands) {
5039	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
5040	if (Desc.mayStore() &&
5041	(Desc.TSFlags & (SIInstrFlags::MUBUF \| SIInstrFlags::MTBUF))) {
5042	SMLoc Loc = getImmLoc(Type: AMDGPUOperand::ImmTyTFE, Operands);
5043	if (Loc != getInstLoc(Operands)) {
5044	Error(L: Loc, Msg: "TFE modifier has no meaning for store instructions");
5045	return false;
5046	}
5047	}
5048
5049	return true;
5050	}
5051
5052	bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5053	const SMLoc &IDLoc,
5054	const OperandVector &Operands) {
5055	if (auto ErrMsg = validateLdsDirect(Inst)) {
5056	Error(L: getRegLoc(Reg: LDS_DIRECT, Operands), Msg: *ErrMsg);
5057	return false;
5058	}
5059	if (!validateSOPLiteral(Inst)) {
5060	Error(L: getLitLoc(Operands),
5061	Msg: "only one unique literal operand is allowed");
5062	return false;
5063	}
5064	if (!validateVOPLiteral(Inst, Operands)) {
5065	return false;
5066	}
5067	if (!validateConstantBusLimitations(Inst, Operands)) {
5068	return false;
5069	}
5070	if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5071	return false;
5072	}
5073	if (!validateIntClampSupported(Inst)) {
5074	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyClamp, Operands),
5075	Msg: "integer clamping is not supported on this GPU");
5076	return false;
5077	}
5078	if (!validateOpSel(Inst)) {
5079	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands),
5080	Msg: "invalid op_sel operand");
5081	return false;
5082	}
5083	if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_lo)) {
5084	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegLo, Operands),
5085	Msg: "invalid neg_lo operand");
5086	return false;
5087	}
5088	if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_hi)) {
5089	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegHi, Operands),
5090	Msg: "invalid neg_hi operand");
5091	return false;
5092	}
5093	if (!validateDPP(Inst, Operands)) {
5094	return false;
5095	}
5096	// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5097	if (!validateMIMGD16(Inst)) {
5098	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands),
5099	Msg: "d16 modifier is not supported on this GPU");
5100	return false;
5101	}
5102	if (!validateMIMGMSAA(Inst)) {
5103	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDim, Operands),
5104	Msg: "invalid dim; must be MSAA type");
5105	return false;
5106	}
5107	if (!validateMIMGDataSize(Inst, IDLoc)) {
5108	return false;
5109	}
5110	if (!validateMIMGAddrSize(Inst, IDLoc))
5111	return false;
5112	if (!validateMIMGAtomicDMask(Inst)) {
5113	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands),
5114	Msg: "invalid atomic image dmask");
5115	return false;
5116	}
5117	if (!validateMIMGGatherDMask(Inst)) {
5118	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands),
5119	Msg: "invalid image_gather dmask: only one bit must be set");
5120	return false;
5121	}
5122	if (!validateMovrels(Inst, Operands)) {
5123	return false;
5124	}
5125	if (!validateOffset(Inst, Operands)) {
5126	return false;
5127	}
5128	if (!validateMAIAccWrite(Inst, Operands)) {
5129	return false;
5130	}
5131	if (!validateMAISrc2(Inst, Operands)) {
5132	return false;
5133	}
5134	if (!validateMFMA(Inst, Operands)) {
5135	return false;
5136	}
5137	if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5138	return false;
5139	}
5140
5141	if (!validateAGPRLdSt(Inst)) {
5142	Error(L: IDLoc, Msg: getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5143	? "invalid register class: data and dst should be all VGPR or AGPR"
5144	: "invalid register class: agpr loads and stores not supported on this GPU"
5145	);
5146	return false;
5147	}
5148	if (!validateVGPRAlign(Inst)) {
5149	Error(L: IDLoc,
5150	Msg: "invalid register class: vgpr tuples must be 64 bit aligned");
5151	return false;
5152	}
5153	if (!validateDS(Inst, Operands)) {
5154	return false;
5155	}
5156
5157	if (!validateBLGP(Inst, Operands)) {
5158	return false;
5159	}
5160
5161	if (!validateDivScale(Inst)) {
5162	Error(L: IDLoc, Msg: "ABS not allowed in VOP3B instructions");
5163	return false;
5164	}
5165	if (!validateWaitCnt(Inst, Operands)) {
5166	return false;
5167	}
5168	if (!validateExeczVcczOperands(Operands)) {
5169	return false;
5170	}
5171	if (!validateTFE(Inst, Operands)) {
5172	return false;
5173	}
5174
5175	return true;
5176	}
5177
5178	static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5179	const FeatureBitset &FBS,
5180	unsigned VariantID = `0`);
5181
5182	static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5183	const FeatureBitset &AvailableFeatures,
5184	unsigned VariantID);
5185
5186	bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5187	const FeatureBitset &FBS) {
5188	return isSupportedMnemo(Mnemo, FBS, Variants: getAllVariants());
5189	}
5190
5191	bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5192	const FeatureBitset &FBS,
5193	ArrayRef<unsigned> Variants) {
5194	for (auto Variant : Variants) {
5195	if (AMDGPUCheckMnemonic(Mnemonic: Mnemo, AvailableFeatures: FBS, VariantID: Variant))
5196	return true;
5197	}
5198
5199	return false;
5200	}
5201
5202	bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5203	const SMLoc &IDLoc) {
5204	FeatureBitset FBS = ComputeAvailableFeatures(FB: getFeatureBits());
5205
5206	// Check if requested instruction variant is supported.
5207	if (isSupportedMnemo(Mnemo, FBS, Variants: getMatchedVariants()))
5208	return false;
5209
5210	// This instruction is not supported.
5211	// Clear any other pending errors because they are no longer relevant.
5212	getParser().clearPendingErrors();
5213
5214	// Requested instruction variant is not supported.
5215	// Check if any other variants are supported.
5216	StringRef VariantName = getMatchedVariantName();
5217	if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5218	return Error(L: IDLoc,
5219	Msg: Twine (VariantName,
5220	" variant of this instruction is not supported"));
5221	}
5222
5223	// Check if this instruction may be used with a different wavesize.
5224	if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5225	!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5226
5227	FeatureBitset FeaturesWS32 = getFeatureBits();
5228	FeaturesWS32.flip(I: AMDGPU::FeatureWavefrontSize64)
5229	.flip(I: AMDGPU::FeatureWavefrontSize32);
5230	FeatureBitset AvailableFeaturesWS32 =
5231	ComputeAvailableFeatures(FB: FeaturesWS32);
5232
5233	if (isSupportedMnemo(Mnemo, FBS: AvailableFeaturesWS32, Variants: getMatchedVariants()))
5234	return Error(L: IDLoc, Msg: "instruction requires wavesize=32");
5235	}
5236
5237	// Finally check if this instruction is supported on any other GPU.
5238	if (isSupportedMnemo(Mnemo, FBS: FeatureBitset ().set())) {
5239	return Error(L: IDLoc, Msg: "instruction not supported on this GPU");
5240	}
5241
5242	// Instruction not supported on any GPU. Probably a typo.
5243	std::string Suggestion = AMDGPUMnemonicSpellCheck(S: Mnemo, FBS);
5244	return Error(L: IDLoc, Msg: "invalid instruction" + Suggestion);
5245	}
5246
5247	static bool isInvalidVOPDY(const OperandVector &Operands,
5248	uint64_t InvalidOprIdx) {
5249	assert(InvalidOprIdx < Operands.size());
5250	const auto &Op = ((AMDGPUOperand &)*Operands [InvalidOprIdx]);
5251	if (Op.isToken() && InvalidOprIdx > `1`) {
5252	const auto &PrevOp = ((AMDGPUOperand &)*Operands [InvalidOprIdx - `1`]);
5253	return PrevOp.isToken() && PrevOp.getToken() == "::";
5254	}
5255	return false;
5256	}
5257
5258	bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5259	OperandVector &Operands,
5260	MCStreamer &Out,
5261	uint64_t &ErrorInfo,
5262	bool MatchingInlineAsm) {
5263	MCInst Inst;
5264	unsigned Result = Match_Success;
5265	for (auto Variant : getMatchedVariants()) {
5266	uint64_t EI;
5267	auto R = MatchInstructionImpl(Operands, Inst, ErrorInfo&: EI, matchingInlineAsm: MatchingInlineAsm,
5268	VariantID: Variant);
5269	// We order match statuses from least to most specific. We use most specific
5270	// status as resulting
5271	// Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5272	if (R == Match_Success \|\| R == Match_MissingFeature \|\|
5273	(R == Match_InvalidOperand && Result != Match_MissingFeature) \|\|
5274	(R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5275	Result != Match_MissingFeature)) {
5276	Result = R;
5277	ErrorInfo = EI;
5278	}
5279	if (R == Match_Success)
5280	break;
5281	}
5282
5283	if (Result == Match_Success) {
5284	if (!validateInstruction(Inst, IDLoc, Operands)) {
5285	return true;
5286	}
5287	Inst.setLoc(IDLoc);
5288	Out.emitInstruction(Inst, STI: getSTI());
5289	return false;
5290	}
5291
5292	StringRef Mnemo = ((AMDGPUOperand &)*Operands [`0`]).getToken();
5293	if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5294	return true;
5295	}
5296
5297	switch (Result) {
5298	default: break;
5299	case Match_MissingFeature:
5300	// It has been verified that the specified instruction
5301	// mnemonic is valid. A match was found but it requires
5302	// features which are not supported on this GPU.
5303	return Error(L: IDLoc, Msg: "operands are not valid for this GPU or mode");
5304
5305	case Match_InvalidOperand: {
5306	SMLoc ErrorLoc = IDLoc;
5307	if (ErrorInfo != ~`0ULL`) {
5308	if (ErrorInfo >= Operands.size()) {
5309	return Error(L: IDLoc, Msg: "too few operands for instruction");
5310	}
5311	ErrorLoc = ((AMDGPUOperand &)*Operands [ErrorInfo]).getStartLoc();
5312	if (ErrorLoc == SMLoc ())
5313	ErrorLoc = IDLoc;
5314
5315	if (isInvalidVOPDY(Operands, InvalidOprIdx: ErrorInfo))
5316	return Error(L: ErrorLoc, Msg: "invalid VOPDY instruction");
5317	}
5318	return Error(L: ErrorLoc, Msg: "invalid operand for instruction");
5319	}
5320
5321	case Match_MnemonicFail:
5322	llvm_unreachable("Invalid instructions should have been handled already");
5323	}
5324	llvm_unreachable("Implement any new match types added!");
5325	}
5326
5327	bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5328	int64_t Tmp = -`1`;
5329	if (!isToken(Kind: AsmToken::Integer) && !isToken(Kind: AsmToken::Identifier)) {
5330	return true;
5331	}
5332	if (getParser().parseAbsoluteExpression(Res&: Tmp)) {
5333	return true;
5334	}
5335	Ret = static_cast<uint32_t>(Tmp);
5336	return false;
5337	}
5338
5339	bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5340	if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5341	return TokError(Msg: "directive only supported for amdgcn architecture");
5342
5343	std::string TargetIDDirective;
5344	SMLoc TargetStart = getTok().getLoc();
5345	if (getParser().parseEscapedString(Data&: TargetIDDirective))
5346	return true;
5347
5348	SMRange TargetRange = SMRange (TargetStart, getTok().getLoc());
5349	if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5350	return getParser().Error(L: TargetRange.Start,
5351	Msg: (Twine (".amdgcn_target directive's target id ") +
5352	Twine (TargetIDDirective) +
5353	Twine (" does not match the specified target id ") +
5354	Twine (getTargetStreamer().getTargetID()->toString())).str());
5355
5356	return false;
5357	}
5358
5359	bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5360	return Error(L: Range.Start, Msg: "value out of range", Range);
5361	}
5362
5363	bool AMDGPUAsmParser::calculateGPRBlocks(
5364	const FeatureBitset &Features, const MCExpr *VCCUsed,
5365	const MCExpr FlatScrUsed, bool* XNACKUsed,
5366	std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5367	SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5368	const MCExpr &VGPRBlocks, const* MCExpr *&SGPRBlocks) {
5369	// TODO(scott.linder): These calculations are duplicated from
5370	// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5371	IsaVersion Version = getIsaVersion(GPU: getSTI().getCPU());
5372	MCContext &Ctx = getContext();
5373
5374	const MCExpr *NumSGPRs = NextFreeSGPR;
5375	int64_t EvaluatedSGPRs;
5376
5377	if (Version.Major >= `10`)
5378	NumSGPRs = MCConstantExpr::create(Value: `0`, Ctx);
5379	else {
5380	unsigned MaxAddressableNumSGPRs =
5381	IsaInfo::getAddressableNumSGPRs(STI: &getSTI());
5382
5383	if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) && Version.Major >= `8` &&
5384	!Features.test(I: FeatureSGPRInitBug) &&
5385	static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5386	return OutOfRangeError(Range: SGPRRange);
5387
5388	const MCExpr *ExtraSGPRs =
5389	AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5390	NumSGPRs = MCBinaryExpr::createAdd(LHS: NumSGPRs, RHS: ExtraSGPRs, Ctx);
5391
5392	if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) &&
5393	(Version.Major <= `7` \|\| Features.test(I: FeatureSGPRInitBug)) &&
5394	static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5395	return OutOfRangeError(Range: SGPRRange);
5396
5397	if (Features.test(I: FeatureSGPRInitBug))
5398	NumSGPRs =
5399	MCConstantExpr::create(Value: IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);
5400	}
5401
5402	// The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5403	// (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5404	auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5405	unsigned Granule) -> const MCExpr * {
5406	const MCExpr *OneConst = MCConstantExpr::create(Value: `1ul`, Ctx);
5407	const MCExpr *GranuleConst = MCConstantExpr::create(Value: Granule, Ctx);
5408	const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax(Args: {NumGPR, OneConst}, Ctx);
5409	const MCExpr *AlignToGPR =
5410	AMDGPUMCExpr::createAlignTo(Value: MaxNumGPR, Align: GranuleConst, Ctx);
5411	const MCExpr *DivGPR =
5412	MCBinaryExpr::createDiv(LHS: AlignToGPR, RHS: GranuleConst, Ctx);
5413	const MCExpr *SubGPR = MCBinaryExpr::createSub(LHS: DivGPR, RHS: OneConst, Ctx);
5414	return SubGPR;
5415	};
5416
5417	VGPRBlocks = GetNumGPRBlocks (
5418	NextFreeVGPR,
5419	IsaInfo::getVGPREncodingGranule(STI: &getSTI(), EnableWavefrontSize32));
5420	SGPRBlocks =
5421	GetNumGPRBlocks (NumSGPRs, IsaInfo::getSGPREncodingGranule(STI: &getSTI()));
5422
5423	return false;
5424	}
5425
5426	bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5427	if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5428	return TokError(Msg: "directive only supported for amdgcn architecture");
5429
5430	if (!isHsaAbi(STI: getSTI()))
5431	return TokError(Msg: "directive only supported for amdhsa OS");
5432
5433	StringRef KernelName;
5434	if (getParser().parseIdentifier(Res&: KernelName))
5435	return true;
5436
5437	AMDGPU::MCKernelDescriptor KD =
5438	AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
5439	STI: &getSTI(), Ctx&: getContext());
5440
5441	StringSet<> Seen;
5442
5443	IsaVersion IVersion = getIsaVersion(GPU: getSTI().getCPU());
5444
5445	const MCExpr *ZeroExpr = MCConstantExpr::create(Value: `0`, Ctx&: getContext());
5446	const MCExpr *OneExpr = MCConstantExpr::create(Value: `1`, Ctx&: getContext());
5447
5448	SMRange VGPRRange;
5449	const MCExpr *NextFreeVGPR = ZeroExpr;
5450	const MCExpr *AccumOffset = MCConstantExpr::create(Value: `0`, Ctx&: getContext());
5451	uint64_t SharedVGPRCount = `0`;
5452	uint64_t PreloadLength = `0`;
5453	uint64_t PreloadOffset = `0`;
5454	SMRange SGPRRange;
5455	const MCExpr *NextFreeSGPR = ZeroExpr;
5456
5457	// Count the number of user SGPRs implied from the enabled feature bits.
5458	unsigned ImpliedUserSGPRCount = `0`;
5459
5460	// Track if the asm explicitly contains the directive for the user SGPR
5461	// count.
5462	std::optional<unsigned> ExplicitUserSGPRCount;
5463	const MCExpr *ReserveVCC = OneExpr;
5464	const MCExpr *ReserveFlatScr = OneExpr;
5465	std::optional<bool> EnableWavefrontSize32;
5466
5467	while (true) {
5468	while (trySkipToken(Kind: AsmToken::EndOfStatement));
5469
5470	StringRef ID;
5471	SMRange IDRange = getTok().getLocRange();
5472	if (!parseId(Val&: ID, ErrMsg: "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5473	return true;
5474
5475	if (ID == ".end_amdhsa_kernel")
5476	break;
5477
5478	if (!Seen.insert(key: ID).second)
5479	return TokError(Msg: ".amdhsa_ directives cannot be repeated");
5480
5481	SMLoc ValStart = getLoc();
5482	const MCExpr *ExprVal;
5483	if (getParser().parseExpression(Res&: ExprVal))
5484	return true;
5485	SMLoc ValEnd = getLoc();
5486	SMRange ValRange = SMRange (ValStart, ValEnd);
5487
5488	int64_t IVal = `0`;
5489	uint64_t Val = IVal;
5490	bool EvaluatableExpr;
5491	if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(Res&: IVal))) {
5492	if (IVal < `0`)
5493	return OutOfRangeError(Range: ValRange);
5494	Val = IVal;
5495	}
5496
5497	#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5498	if (!isUInt<ENTRY##_WIDTH>(Val)) \
5499	return OutOfRangeError(RANGE); \
5500	AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5501	getContext());
5502
5503	// Some fields use the parsed value immediately which requires the expression to
5504	// be solvable.
5505	#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5506	if (!(RESOLVED)) \
5507	return Error(IDRange.Start, "directive should have resolvable expression", \
5508	IDRange);
5509
5510	if (ID == ".amdhsa_group_segment_fixed_size") {
5511	if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
5512	CHAR_BIT>(x: Val))
5513	return OutOfRangeError(Range: ValRange);
5514	KD.group_segment_fixed_size = ExprVal;
5515	} else if (ID == ".amdhsa_private_segment_fixed_size") {
5516	if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
5517	CHAR_BIT>(x: Val))
5518	return OutOfRangeError(Range: ValRange);
5519	KD.private_segment_fixed_size = ExprVal;
5520	} else if (ID == ".amdhsa_kernarg_size") {
5521	if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(x: Val))
5522	return OutOfRangeError(Range: ValRange);
5523	KD.kernarg_size = ExprVal;
5524	} else if (ID == ".amdhsa_user_sgpr_count") {
5525	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5526	ExplicitUserSGPRCount = Val;
5527	} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5528	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5529	if (hasArchitectedFlatScratch())
5530	return Error(L: IDRange.Start,
5531	Msg: "directive is not supported with architected flat scratch",
5532	Range: IDRange);
5533	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5534	KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5535	ExprVal, ValRange);
5536	if (Val)
5537	ImpliedUserSGPRCount += `4`;
5538	} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5539	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5540	if (!hasKernargPreload())
5541	return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
5542
5543	if (Val > getMaxNumUserSGPRs())
5544	return OutOfRangeError(Range: ValRange);
5545	PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5546	ValRange);
5547	if (Val) {
5548	ImpliedUserSGPRCount += Val;
5549	PreloadLength = Val;
5550	}
5551	} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5552	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5553	if (!hasKernargPreload())
5554	return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
5555
5556	if (Val >= `1024`)
5557	return OutOfRangeError(Range: ValRange);
5558	PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5559	ValRange);
5560	if (Val)
5561	PreloadOffset = Val;
5562	} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5563	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5564	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5565	KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5566	ValRange);
5567	if (Val)
5568	ImpliedUserSGPRCount += `2`;
5569	} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5570	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5571	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5572	KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5573	ValRange);
5574	if (Val)
5575	ImpliedUserSGPRCount += `2`;
5576	} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5577	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5578	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5579	KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5580	ExprVal, ValRange);
5581	if (Val)
5582	ImpliedUserSGPRCount += `2`;
5583	} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5584	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5585	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5586	KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5587	ValRange);
5588	if (Val)
5589	ImpliedUserSGPRCount += `2`;
5590	} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5591	if (hasArchitectedFlatScratch())
5592	return Error(L: IDRange.Start,
5593	Msg: "directive is not supported with architected flat scratch",
5594	Range: IDRange);
5595	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5596	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5597	KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5598	ExprVal, ValRange);
5599	if (Val)
5600	ImpliedUserSGPRCount += `2`;
5601	} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5602	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5603	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5604	KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5605	ExprVal, ValRange);
5606	if (Val)
5607	ImpliedUserSGPRCount += `1`;
5608	} else if (ID == ".amdhsa_wavefront_size32") {
5609	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5610	if (IVersion.Major < `10`)
5611	return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
5612	EnableWavefrontSize32 = Val;
5613	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5614	KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5615	ValRange);
5616	} else if (ID == ".amdhsa_uses_dynamic_stack") {
5617	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5618	KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5619	ValRange);
5620	} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5621	if (hasArchitectedFlatScratch())
5622	return Error(L: IDRange.Start,
5623	Msg: "directive is not supported with architected flat scratch",
5624	Range: IDRange);
5625	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5626	COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5627	ValRange);
5628	} else if (ID == ".amdhsa_enable_private_segment") {
5629	if (!hasArchitectedFlatScratch())
5630	return Error(
5631	L: IDRange.Start,
5632	Msg: "directive is not supported without architected flat scratch",
5633	Range: IDRange);
5634	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5635	COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5636	ValRange);
5637	} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5638	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5639	COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5640	ValRange);
5641	} else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5642	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5643	COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5644	ValRange);
5645	} else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5646	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5647	COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5648	ValRange);
5649	} else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5650	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5651	COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5652	ValRange);
5653	} else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5654	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5655	COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5656	ValRange);
5657	} else if (ID == ".amdhsa_next_free_vgpr") {
5658	VGPRRange = ValRange;
5659	NextFreeVGPR = ExprVal;
5660	} else if (ID == ".amdhsa_next_free_sgpr") {
5661	SGPRRange = ValRange;
5662	NextFreeSGPR = ExprVal;
5663	} else if (ID == ".amdhsa_accum_offset") {
5664	if (!isGFX90A())
5665	return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
5666	AccumOffset = ExprVal;
5667	} else if (ID == ".amdhsa_reserve_vcc") {
5668	if (EvaluatableExpr && !isUInt<`1`>(x: Val))
5669	return OutOfRangeError(Range: ValRange);
5670	ReserveVCC = ExprVal;
5671	} else if (ID == ".amdhsa_reserve_flat_scratch") {
5672	if (IVersion.Major < `7`)
5673	return Error(L: IDRange.Start, Msg: "directive requires gfx7+", Range: IDRange);
5674	if (hasArchitectedFlatScratch())
5675	return Error(L: IDRange.Start,
5676	Msg: "directive is not supported with architected flat scratch",
5677	Range: IDRange);
5678	if (EvaluatableExpr && !isUInt<`1`>(x: Val))
5679	return OutOfRangeError(Range: ValRange);
5680	ReserveFlatScr = ExprVal;
5681	} else if (ID == ".amdhsa_reserve_xnack_mask") {
5682	if (IVersion.Major < `8`)
5683	return Error(L: IDRange.Start, Msg: "directive requires gfx8+", Range: IDRange);
5684	if (!isUInt<`1`>(x: Val))
5685	return OutOfRangeError(Range: ValRange);
5686	if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5687	return getParser().Error(L: IDRange.Start, Msg: ".amdhsa_reserve_xnack_mask does not match target id",
5688	Range: IDRange);
5689	} else if (ID == ".amdhsa_float_round_mode_32") {
5690	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5691	COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5692	ValRange);
5693	} else if (ID == ".amdhsa_float_round_mode_16_64") {
5694	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5695	COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5696	ValRange);
5697	} else if (ID == ".amdhsa_float_denorm_mode_32") {
5698	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5699	COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5700	ValRange);
5701	} else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5702	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5703	COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5704	ValRange);
5705	} else if (ID == ".amdhsa_dx10_clamp") {
5706	if (IVersion.Major >= `12`)
5707	return Error(L: IDRange.Start, Msg: "directive unsupported on gfx12+", Range: IDRange);
5708	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5709	COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5710	ValRange);
5711	} else if (ID == ".amdhsa_ieee_mode") {
5712	if (IVersion.Major >= `12`)
5713	return Error(L: IDRange.Start, Msg: "directive unsupported on gfx12+", Range: IDRange);
5714	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5715	COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5716	ValRange);
5717	} else if (ID == ".amdhsa_fp16_overflow") {
5718	if (IVersion.Major < `9`)
5719	return Error(L: IDRange.Start, Msg: "directive requires gfx9+", Range: IDRange);
5720	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5721	COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5722	ValRange);
5723	} else if (ID == ".amdhsa_tg_split") {
5724	if (!isGFX90A())
5725	return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
5726	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5727	ExprVal, ValRange);
5728	} else if (ID == ".amdhsa_workgroup_processor_mode") {
5729	if (IVersion.Major < `10`)
5730	return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
5731	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5732	COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5733	ValRange);
5734	} else if (ID == ".amdhsa_memory_ordered") {
5735	if (IVersion.Major < `10`)
5736	return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
5737	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5738	COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5739	ValRange);
5740	} else if (ID == ".amdhsa_forward_progress") {
5741	if (IVersion.Major < `10`)
5742	return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
5743	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5744	COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5745	ValRange);
5746	} else if (ID == ".amdhsa_shared_vgpr_count") {
5747	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5748	if (IVersion.Major < `10` \|\| IVersion.Major >= `12`)
5749	return Error(L: IDRange.Start, Msg: "directive requires gfx10 or gfx11",
5750	Range: IDRange);
5751	SharedVGPRCount = Val;
5752	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5753	COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5754	ValRange);
5755	} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5756	PARSE_BITS_ENTRY(
5757	KD.compute_pgm_rsrc2,
5758	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5759	ExprVal, ValRange);
5760	} else if (ID == ".amdhsa_exception_fp_denorm_src") {
5761	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5762	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5763	ExprVal, ValRange);
5764	} else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5765	PARSE_BITS_ENTRY(
5766	KD.compute_pgm_rsrc2,
5767	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5768	ExprVal, ValRange);
5769	} else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5770	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5771	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5772	ExprVal, ValRange);
5773	} else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5774	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5775	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5776	ExprVal, ValRange);
5777	} else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5778	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5779	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5780	ExprVal, ValRange);
5781	} else if (ID == ".amdhsa_exception_int_div_zero") {
5782	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5783	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5784	ExprVal, ValRange);
5785	} else if (ID == ".amdhsa_round_robin_scheduling") {
5786	if (IVersion.Major < `12`)
5787	return Error(L: IDRange.Start, Msg: "directive requires gfx12+", Range: IDRange);
5788	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5789	COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5790	ValRange);
5791	} else {
5792	return Error(L: IDRange.Start, Msg: "unknown .amdhsa_kernel directive", Range: IDRange);
5793	}
5794
5795	#undef PARSE_BITS_ENTRY
5796	}
5797
5798	if (!Seen.contains(key: ".amdhsa_next_free_vgpr"))
5799	return TokError(Msg: ".amdhsa_next_free_vgpr directive is required");
5800
5801	if (!Seen.contains(key: ".amdhsa_next_free_sgpr"))
5802	return TokError(Msg: ".amdhsa_next_free_sgpr directive is required");
5803
5804	const MCExpr *VGPRBlocks;
5805	const MCExpr *SGPRBlocks;
5806	if (calculateGPRBlocks(Features: getFeatureBits(), VCCUsed: ReserveVCC, FlatScrUsed: ReserveFlatScr,
5807	XNACKUsed: getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5808	EnableWavefrontSize32, NextFreeVGPR,
5809	VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5810	SGPRBlocks))
5811	return true;
5812
5813	int64_t EvaluatedVGPRBlocks;
5814	bool VGPRBlocksEvaluatable =
5815	VGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedVGPRBlocks);
5816	if (VGPRBlocksEvaluatable &&
5817	!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5818	x: static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5819	return OutOfRangeError(Range: VGPRRange);
5820	}
5821	AMDGPU::MCKernelDescriptor::bits_set(
5822	Dst&: KD.compute_pgm_rsrc1, Value: VGPRBlocks,
5823	Shift: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5824	Mask: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, Ctx&: getContext());
5825
5826	int64_t EvaluatedSGPRBlocks;
5827	if (SGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedSGPRBlocks) &&
5828	!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5829	x: static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5830	return OutOfRangeError(Range: SGPRRange);
5831	AMDGPU::MCKernelDescriptor::bits_set(
5832	Dst&: KD.compute_pgm_rsrc1, Value: SGPRBlocks,
5833	Shift: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5834	Mask: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, Ctx&: getContext());
5835
5836	if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5837	return TokError(Msg: "amdgpu_user_sgpr_count smaller than than implied by "
5838	"enabled user SGPRs");
5839
5840	unsigned UserSGPRCount =
5841	ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5842
5843	if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(x: UserSGPRCount))
5844	return TokError(Msg: "too many user SGPRs enabled");
5845	AMDGPU::MCKernelDescriptor::bits_set(
5846	Dst&: KD.compute_pgm_rsrc2, Value: MCConstantExpr::create(Value: UserSGPRCount, Ctx&: getContext()),
5847	Shift: COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5848	Mask: COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, Ctx&: getContext());
5849
5850	int64_t IVal = `0`;
5851	if (!KD.kernarg_size->evaluateAsAbsolute(Res&: IVal))
5852	return TokError(Msg: "Kernarg size should be resolvable");
5853	uint64_t kernarg_size = IVal;
5854	if (PreloadLength && kernarg_size &&
5855	(PreloadLength * `4` + PreloadOffset * `4` > kernarg_size))
5856	return TokError(Msg: "Kernarg preload length + offset is larger than the "
5857	"kernarg segment size");
5858
5859	if (isGFX90A()) {
5860	if (!Seen.contains(key: ".amdhsa_accum_offset"))
5861	return TokError(Msg: ".amdhsa_accum_offset directive is required");
5862	int64_t EvaluatedAccum;
5863	bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(Res&: EvaluatedAccum);
5864	uint64_t UEvaluatedAccum = EvaluatedAccum;
5865	if (AccumEvaluatable &&
5866	(UEvaluatedAccum < `4` \|\| UEvaluatedAccum > `256` \|\| (UEvaluatedAccum & `3`)))
5867	return TokError(Msg: "accum_offset should be in range [4..256] in "
5868	"increments of 4");
5869
5870	int64_t EvaluatedNumVGPR;
5871	if (NextFreeVGPR->evaluateAsAbsolute(Res&: EvaluatedNumVGPR) &&
5872	AccumEvaluatable &&
5873	UEvaluatedAccum >
5874	alignTo(Value: std::max(a: (uint64_t)`1`, b: (uint64_t)EvaluatedNumVGPR), Align: `4`))
5875	return TokError(Msg: "accum_offset exceeds total VGPR allocation");
5876	const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
5877	LHS: MCBinaryExpr::createDiv(
5878	LHS: AccumOffset, RHS: MCConstantExpr::create(Value: `4`, Ctx&: getContext()), Ctx&: getContext()),
5879	RHS: MCConstantExpr::create(Value: `1`, Ctx&: getContext()), Ctx&: getContext());
5880	MCKernelDescriptor::bits_set(Dst&: KD.compute_pgm_rsrc3, Value: AdjustedAccum,
5881	Shift: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
5882	Mask: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5883	Ctx&: getContext());
5884	}
5885
5886	if (IVersion.Major >= `10` && IVersion.Major < `12`) {
5887	// SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5888	if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5889	return TokError(Msg: "shared_vgpr_count directive not valid on "
5890	"wavefront size 32");
5891	}
5892
5893	if (VGPRBlocksEvaluatable &&
5894	(SharedVGPRCount * `2` + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
5895	`63`)) {
5896	return TokError(Msg: "shared_vgpr_count*2 + "
5897	"compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5898	"exceed 63\n");
5899	}
5900	}
5901
5902	getTargetStreamer().EmitAmdhsaKernelDescriptor(STI: getSTI(), KernelName, KernelDescriptor: KD,
5903	NextVGPR: NextFreeVGPR, NextSGPR: NextFreeSGPR,
5904	ReserveVCC, ReserveFlatScr);
5905	return false;
5906	}
5907
5908	bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5909	uint32_t Version;
5910	if (ParseAsAbsoluteExpression(Ret&: Version))
5911	return true;
5912
5913	getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(COV: Version);
5914	return false;
5915	}
5916
5917	bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5918	AMDGPUMCKernelCodeT &C) {
5919	// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5920	// assembly for backwards compatibility.
5921	if (ID == "max_scratch_backing_memory_byte_size") {
5922	Parser.eatToEndOfStatement();
5923	return false;
5924	}
5925
5926	SmallString<`40`> ErrStr;
5927	raw_svector_ostream Err(ErrStr);
5928	if (!C.ParseKernelCodeT(ID, MCParser&: getParser(), Err)) {
5929	return TokError(Msg: Err.str());
5930	}
5931	Lex();
5932
5933	if (ID == "enable_wavefront_size32") {
5934	if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5935	if (!isGFX10Plus())
5936	return TokError(Msg: "enable_wavefront_size32=1 is only allowed on GFX10+");
5937	if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5938	return TokError(Msg: "enable_wavefront_size32=1 requires +WavefrontSize32");
5939	} else {
5940	if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5941	return TokError(Msg: "enable_wavefront_size32=0 requires +WavefrontSize64");
5942	}
5943	}
5944
5945	if (ID == "wavefront_size") {
5946	if (C.wavefront_size == `5`) {
5947	if (!isGFX10Plus())
5948	return TokError(Msg: "wavefront_size=5 is only allowed on GFX10+");
5949	if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5950	return TokError(Msg: "wavefront_size=5 requires +WavefrontSize32");
5951	} else if (C.wavefront_size == `6`) {
5952	if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5953	return TokError(Msg: "wavefront_size=6 requires +WavefrontSize64");
5954	}
5955	}
5956
5957	return false;
5958	}
5959
5960	bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5961	AMDGPUMCKernelCodeT KernelCode;
5962	KernelCode.initDefault(STI: &getSTI(), Ctx&: getContext());
5963
5964	while (true) {
5965	// Lex EndOfStatement. This is in a while loop, because lexing a comment
5966	// will set the current token to EndOfStatement.
5967	while(trySkipToken(Kind: AsmToken::EndOfStatement));
5968
5969	StringRef ID;
5970	if (!parseId(Val&: ID, ErrMsg: "expected value identifier or .end_amd_kernel_code_t"))
5971	return true;
5972
5973	if (ID == ".end_amd_kernel_code_t")
5974	break;
5975
5976	if (ParseAMDKernelCodeTValue(ID, C&: KernelCode))
5977	return true;
5978	}
5979
5980	KernelCode.validate(STI: &getSTI(), Ctx&: getContext());
5981	getTargetStreamer().EmitAMDKernelCodeT(Header&: KernelCode);
5982
5983	return false;
5984	}
5985
5986	bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5987	StringRef KernelName;
5988	if (!parseId(Val&: KernelName, ErrMsg: "expected symbol name"))
5989	return true;
5990
5991	getTargetStreamer().EmitAMDGPUSymbolType(SymbolName: KernelName,
5992	Type: ELF::STT_AMDGPU_HSA_KERNEL);
5993
5994	KernelScope.initialize(Context&: getContext());
5995	return false;
5996	}
5997
5998	bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5999	if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
6000	return Error(L: getLoc(),
6001	Msg: ".amd_amdgpu_isa directive is not available on non-amdgcn "
6002	"architectures");
6003	}
6004
6005	auto TargetIDDirective = getLexer().getTok().getStringContents();
6006	if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6007	return Error(L: getParser().getTok().getLoc(), Msg: "target id must match options");
6008
6009	getTargetStreamer().EmitISAVersion();
6010	Lex();
6011
6012	return false;
6013	}
6014
6015	bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6016	assert(isHsaAbi(getSTI()));
6017
6018	std::string HSAMetadataString;
6019	if (ParseToEndDirective(AssemblerDirectiveBegin: HSAMD::V3::AssemblerDirectiveBegin,
6020	AssemblerDirectiveEnd: HSAMD::V3::AssemblerDirectiveEnd, CollectString&: HSAMetadataString))
6021	return true;
6022
6023	if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6024	return Error(L: getLoc(), Msg: "invalid HSA metadata");
6025
6026	return false;
6027	}
6028
6029	/// Common code to parse out a block of text (typically YAML) between start and
6030	/// end directives.
6031	bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6032	const char *AssemblerDirectiveEnd,
6033	std::string &CollectString) {
6034
6035	raw_string_ostream CollectStream(CollectString);
6036
6037	getLexer().setSkipSpace(false);
6038
6039	bool FoundEnd = false;
6040	while (!isToken(Kind: AsmToken::Eof)) {
6041	while (isToken(Kind: AsmToken::Space)) {
6042	CollectStream << getTokenStr();
6043	Lex();
6044	}
6045
6046	if (trySkipId(Id: AssemblerDirectiveEnd)) {
6047	FoundEnd = true;
6048	break;
6049	}
6050
6051	CollectStream << Parser.parseStringToEndOfStatement()
6052	<< getContext().getAsmInfo()->getSeparatorString();
6053
6054	Parser.eatToEndOfStatement();
6055	}
6056
6057	getLexer().setSkipSpace(true);
6058
6059	if (isToken(Kind: AsmToken::Eof) && !FoundEnd) {
6060	return TokError(Msg: Twine ("expected directive ") +
6061	Twine (AssemblerDirectiveEnd) + Twine (" not found"));
6062	}
6063
6064	CollectStream.flush();
6065	return false;
6066	}
6067
6068	/// Parse the assembler directive for new MsgPack-format PAL metadata.
6069	bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6070	std::string String;
6071	if (ParseToEndDirective(AssemblerDirectiveBegin: AMDGPU::PALMD::AssemblerDirectiveBegin,
6072	AssemblerDirectiveEnd: AMDGPU::PALMD::AssemblerDirectiveEnd, CollectString&: String))
6073	return true;
6074
6075	auto PALMetadata = getTargetStreamer().getPALMetadata();
6076	if (!PALMetadata->setFromString(String))
6077	return Error(L: getLoc(), Msg: "invalid PAL metadata");
6078	return false;
6079	}
6080
6081	/// Parse the assembler directive for old linear-format PAL metadata.
6082	bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6083	if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6084	return Error(L: getLoc(),
6085	Msg: (Twine (PALMD::AssemblerDirective) + Twine (" directive is "
6086	"not available on non-amdpal OSes")).str());
6087	}
6088
6089	auto PALMetadata = getTargetStreamer().getPALMetadata();
6090	PALMetadata->setLegacy();
6091	for (;;) {
6092	uint32_t Key, Value;
6093	if (ParseAsAbsoluteExpression(Ret&: Key)) {
6094	return TokError(Msg: Twine ("invalid value in ") +
6095	Twine (PALMD::AssemblerDirective));
6096	}
6097	if (!trySkipToken(Kind: AsmToken::Comma)) {
6098	return TokError(Msg: Twine ("expected an even number of values in ") +
6099	Twine (PALMD::AssemblerDirective));
6100	}
6101	if (ParseAsAbsoluteExpression(Ret&: Value)) {
6102	return TokError(Msg: Twine ("invalid value in ") +
6103	Twine (PALMD::AssemblerDirective));
6104	}
6105	PALMetadata->setRegister(Reg: Key, Val: Value);
6106	if (!trySkipToken(Kind: AsmToken::Comma))
6107	break;
6108	}
6109	return false;
6110	}
6111
6112	/// ParseDirectiveAMDGPULDS
6113	/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6114	bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6115	if (getParser().checkForValidSection())
6116	return true;
6117
6118	StringRef Name;
6119	SMLoc NameLoc = getLoc();
6120	if (getParser().parseIdentifier(Res&: Name))
6121	return TokError(Msg: "expected identifier in directive");
6122
6123	MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6124	if (getParser().parseComma())
6125	return true;
6126
6127	unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(STI: &getSTI());
6128
6129	int64_t Size;
6130	SMLoc SizeLoc = getLoc();
6131	if (getParser().parseAbsoluteExpression(Res&: Size))
6132	return true;
6133	if (Size < `0`)
6134	return Error(L: SizeLoc, Msg: "size must be non-negative");
6135	if (Size > LocalMemorySize)
6136	return Error(L: SizeLoc, Msg: "size is too large");
6137
6138	int64_t Alignment = `4`;
6139	if (trySkipToken(Kind: AsmToken::Comma)) {
6140	SMLoc AlignLoc = getLoc();
6141	if (getParser().parseAbsoluteExpression(Res&: Alignment))
6142	return true;
6143	if (Alignment < `0` \|\| !isPowerOf2_64(Value: Alignment))
6144	return Error(L: AlignLoc, Msg: "alignment must be a power of two");
6145
6146	// Alignment larger than the size of LDS is possible in theory, as long
6147	// as the linker manages to place to symbol at address 0, but we do want
6148	// to make sure the alignment fits nicely into a 32-bit integer.
6149	if (Alignment >= `1u` << `31`)
6150	return Error(L: AlignLoc, Msg: "alignment is too large");
6151	}
6152
6153	if (parseEOL())
6154	return true;
6155
6156	Symbol->redefineIfPossible();
6157	if (!Symbol->isUndefined())
6158	return Error(L: NameLoc, Msg: "invalid symbol redefinition");
6159
6160	getTargetStreamer().emitAMDGPULDS(Symbol, Size, Alignment: Align (Alignment));
6161	return false;
6162	}
6163
6164	bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6165	StringRef IDVal = DirectiveID.getString();
6166
6167	if (isHsaAbi(STI: getSTI())) {
6168	if (IDVal == ".amdhsa_kernel")
6169	return ParseDirectiveAMDHSAKernel();
6170
6171	if (IDVal == ".amdhsa_code_object_version")
6172	return ParseDirectiveAMDHSACodeObjectVersion();
6173
6174	// TODO: Restructure/combine with PAL metadata directive.
6175	if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
6176	return ParseDirectiveHSAMetadata();
6177	} else {
6178	if (IDVal == ".amd_kernel_code_t")
6179	return ParseDirectiveAMDKernelCodeT();
6180
6181	if (IDVal == ".amdgpu_hsa_kernel")
6182	return ParseDirectiveAMDGPUHsaKernel();
6183
6184	if (IDVal == ".amd_amdgpu_isa")
6185	return ParseDirectiveISAVersion();
6186
6187	if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
6188	return Error(L: getLoc(), Msg: (Twine (HSAMD::AssemblerDirectiveBegin) +
6189	Twine (" directive is "
6190	"not available on non-amdhsa OSes"))
6191	.str());
6192	}
6193	}
6194
6195	if (IDVal == ".amdgcn_target")
6196	return ParseDirectiveAMDGCNTarget();
6197
6198	if (IDVal == ".amdgpu_lds")
6199	return ParseDirectiveAMDGPULDS();
6200
6201	if (IDVal == PALMD::AssemblerDirectiveBegin)
6202	return ParseDirectivePALMetadataBegin();
6203
6204	if (IDVal == PALMD::AssemblerDirective)
6205	return ParseDirectivePALMetadata();
6206
6207	return true;
6208	}
6209
6210	bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6211	unsigned RegNo) {
6212
6213	if (MRI.regsOverlap(RegA: AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegB: RegNo))
6214	return isGFX9Plus();
6215
6216	// GFX10+ has 2 more SGPRs 104 and 105.
6217	if (MRI.regsOverlap(RegA: AMDGPU::SGPR104_SGPR105, RegB: RegNo))
6218	return hasSGPR104_SGPR105();
6219
6220	switch (RegNo) {
6221	case AMDGPU::SRC_SHARED_BASE_LO:
6222	case AMDGPU::SRC_SHARED_BASE:
6223	case AMDGPU::SRC_SHARED_LIMIT_LO:
6224	case AMDGPU::SRC_SHARED_LIMIT:
6225	case AMDGPU::SRC_PRIVATE_BASE_LO:
6226	case AMDGPU::SRC_PRIVATE_BASE:
6227	case AMDGPU::SRC_PRIVATE_LIMIT_LO:
6228	case AMDGPU::SRC_PRIVATE_LIMIT:
6229	return isGFX9Plus();
6230	case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
6231	return isGFX9Plus() && !isGFX11Plus();
6232	case AMDGPU::TBA:
6233	case AMDGPU::TBA_LO:
6234	case AMDGPU::TBA_HI:
6235	case AMDGPU::TMA:
6236	case AMDGPU::TMA_LO:
6237	case AMDGPU::TMA_HI:
6238	return !isGFX9Plus();
6239	case AMDGPU::XNACK_MASK:
6240	case AMDGPU::XNACK_MASK_LO:
6241	case AMDGPU::XNACK_MASK_HI:
6242	return (isVI() \|\| isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6243	case AMDGPU::SGPR_NULL:
6244	return isGFX10Plus();
6245	default:
6246	break;
6247	}
6248
6249	if (isCI())
6250	return true;
6251
6252	if (isSI() \|\| isGFX10Plus()) {
6253	// No flat_scr on SI.
6254	// On GFX10Plus flat scratch is not a valid register operand and can only be
6255	// accessed with s_setreg/s_getreg.
6256	switch (RegNo) {
6257	case AMDGPU::FLAT_SCR:
6258	case AMDGPU::FLAT_SCR_LO:
6259	case AMDGPU::FLAT_SCR_HI:
6260	return false;
6261	default:
6262	return true;
6263	}
6264	}
6265
6266	// VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6267	// SI/CI have.
6268	if (MRI.regsOverlap(RegA: AMDGPU::SGPR102_SGPR103, RegB: RegNo))
6269	return hasSGPR102_SGPR103();
6270
6271	return true;
6272	}
6273
6274	ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6275	StringRef Mnemonic,
6276	OperandMode Mode) {
6277	ParseStatus Res = parseVOPD(Operands);
6278	if (Res.isSuccess() \|\| Res.isFailure() \|\| isToken(Kind: AsmToken::EndOfStatement))
6279	return Res;
6280
6281	// Try to parse with a custom parser
6282	Res = MatchOperandParserImpl(Operands, Mnemonic);
6283
6284	// If we successfully parsed the operand or if there as an error parsing,
6285	// we are done.
6286	//
6287	// If we are parsing after we reach EndOfStatement then this means we
6288	// are appending default values to the Operands list. This is only done
6289	// by custom parser, so we shouldn't continue on to the generic parsing.
6290	if (Res.isSuccess() \|\| Res.isFailure() \|\| isToken(Kind: AsmToken::EndOfStatement))
6291	return Res;
6292
6293	SMLoc RBraceLoc;
6294	SMLoc LBraceLoc = getLoc();
6295	if (Mode == OperandMode_NSA && trySkipToken(Kind: AsmToken::LBrac)) {
6296	unsigned Prefix = Operands.size();
6297
6298	for (;;) {
6299	auto Loc = getLoc();
6300	Res = parseReg(Operands);
6301	if (Res.isNoMatch())
6302	Error(L: Loc, Msg: "expected a register");
6303	if (!Res.isSuccess())
6304	return ParseStatus::Failure;
6305
6306	RBraceLoc = getLoc();
6307	if (trySkipToken(Kind: AsmToken::RBrac))
6308	break;
6309
6310	if (!skipToken(Kind: AsmToken::Comma,
6311	ErrMsg: "expected a comma or a closing square bracket"))
6312	return ParseStatus::Failure;
6313	}
6314
6315	if (Operands.size() - Prefix > `1`) {
6316	Operands.insert(I: Operands.begin() + Prefix,
6317	Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "[", Loc: LBraceLoc));
6318	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "]", Loc: RBraceLoc));
6319	}
6320
6321	return ParseStatus::Success;
6322	}
6323
6324	return parseRegOrImm(Operands);
6325	}
6326
6327	StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6328	// Clear any forced encodings from the previous instruction.
6329	setForcedEncodingSize(`0`);
6330	setForcedDPP(false);
6331	setForcedSDWA(false);
6332
6333	if (Name.ends_with(Suffix: "_e64_dpp")) {
6334	setForcedDPP(true);
6335	setForcedEncodingSize(`64`);
6336	return Name.substr(Start: `0`, N: Name.size() - `8`);
6337	}
6338	if (Name.ends_with(Suffix: "_e64")) {
6339	setForcedEncodingSize(`64`);
6340	return Name.substr(Start: `0`, N: Name.size() - `4`);
6341	}
6342	if (Name.ends_with(Suffix: "_e32")) {
6343	setForcedEncodingSize(`32`);
6344	return Name.substr(Start: `0`, N: Name.size() - `4`);
6345	}
6346	if (Name.ends_with(Suffix: "_dpp")) {
6347	setForcedDPP(true);
6348	return Name.substr(Start: `0`, N: Name.size() - `4`);
6349	}
6350	if (Name.ends_with(Suffix: "_sdwa")) {
6351	setForcedSDWA(true);
6352	return Name.substr(Start: `0`, N: Name.size() - `5`);
6353	}
6354	return Name;
6355	}
6356
6357	static void applyMnemonicAliases(StringRef &Mnemonic,
6358	const FeatureBitset &Features,
6359	unsigned VariantID);
6360
6361	bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6362	StringRef Name,
6363	SMLoc NameLoc, OperandVector &Operands) {
6364	// Add the instruction mnemonic
6365	Name = parseMnemonicSuffix(Name);
6366
6367	// If the target architecture uses MnemonicAlias, call it here to parse
6368	// operands correctly.
6369	applyMnemonicAliases(Mnemonic&: Name, Features: getAvailableFeatures(), VariantID: `0`);
6370
6371	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: NameLoc));
6372
6373	bool IsMIMG = Name.starts_with(Prefix: "image_");
6374
6375	while (!trySkipToken(Kind: AsmToken::EndOfStatement)) {
6376	OperandMode Mode = OperandMode_Default;
6377	if (IsMIMG && isGFX10Plus() && Operands.size() == `2`)
6378	Mode = OperandMode_NSA;
6379	ParseStatus Res = parseOperand(Operands, Mnemonic: Name, Mode);
6380
6381	if (!Res.isSuccess()) {
6382	checkUnsupportedInstruction(Mnemo: Name, IDLoc: NameLoc);
6383	if (!Parser.hasPendingError()) {
6384	// FIXME: use real operand location rather than the current location.
6385	StringRef Msg = Res.isFailure() ? "failed parsing operand."
6386	: "not a valid operand.";
6387	Error(L: getLoc(), Msg);
6388	}
6389	while (!trySkipToken(Kind: AsmToken::EndOfStatement)) {
6390	lex();
6391	}
6392	return true;
6393	}
6394
6395	// Eat the comma or space if there is one.
6396	trySkipToken(Kind: AsmToken::Comma);
6397	}
6398
6399	return false;
6400	}
6401
6402	//===----------------------------------------------------------------------===//
6403	// Utility functions
6404	//===----------------------------------------------------------------------===//
6405
6406	ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6407	OperandVector &Operands) {
6408	SMLoc S = getLoc();
6409	if (!trySkipId(Id: Name))
6410	return ParseStatus::NoMatch;
6411
6412	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: S));
6413	return ParseStatus::Success;
6414	}
6415
6416	ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6417	int64_t &IntVal) {
6418
6419	if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
6420	return ParseStatus::NoMatch;
6421
6422	return parseExpr(Imm&: IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6423	}
6424
6425	ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6426	const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6427	std::function<bool(int64_t &)> ConvertResult) {
6428	SMLoc S = getLoc();
6429	int64_t Value = `0`;
6430
6431	ParseStatus Res = parseIntWithPrefix(Prefix, IntVal&: Value);
6432	if (!Res.isSuccess())
6433	return Res;
6434
6435	if (ConvertResult && !ConvertResult (Value)) {
6436	Error(L: S, Msg: "invalid " + StringRef (Prefix) + " value.");
6437	}
6438
6439	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Value, Loc: S, Type: ImmTy));
6440	return ParseStatus::Success;
6441	}
6442
6443	ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6444	const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6445	bool (*ConvertResult)(int64_t &)) {
6446	SMLoc S = getLoc();
6447	if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
6448	return ParseStatus::NoMatch;
6449
6450	if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected a left square bracket"))
6451	return ParseStatus::Failure;
6452
6453	unsigned Val = `0`;
6454	const unsigned MaxSize = `4`;
6455
6456	// FIXME: How to verify the number of elements matches the number of src
6457	// operands?
6458	for (int I = `0`; ; ++I) {
6459	int64_t Op;
6460	SMLoc Loc = getLoc();
6461	if (!parseExpr(Imm&: Op))
6462	return ParseStatus::Failure;
6463
6464	if (Op != `0` && Op != `1`)
6465	return Error(L: Loc, Msg: "invalid " + StringRef (Prefix) + " value.");
6466
6467	Val \|= (Op << I);
6468
6469	if (trySkipToken(Kind: AsmToken::RBrac))
6470	break;
6471
6472	if (I + `1` == MaxSize)
6473	return Error(L: getLoc(), Msg: "expected a closing square bracket");
6474
6475	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
6476	return ParseStatus::Failure;
6477	}
6478
6479	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: ImmTy));
6480	return ParseStatus::Success;
6481	}
6482
6483	ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6484	OperandVector &Operands,
6485	AMDGPUOperand::ImmTy ImmTy) {
6486	int64_t Bit;
6487	SMLoc S = getLoc();
6488
6489	if (trySkipId(Id: Name)) {
6490	Bit = `1`;
6491	} else if (trySkipId(Pref: "no", Id: Name)) {
6492	Bit = `0`;
6493	} else {
6494	return ParseStatus::NoMatch;
6495	}
6496
6497	if (Name == "r128" && !hasMIMG_R128())
6498	return Error(L: S, Msg: "r128 modifier is not supported on this GPU");
6499	if (Name == "a16" && !hasA16())
6500	return Error(L: S, Msg: "a16 modifier is not supported on this GPU");
6501
6502	if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6503	ImmTy = AMDGPUOperand::ImmTyR128A16;
6504
6505	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Bit, Loc: S, Type: ImmTy));
6506	return ParseStatus::Success;
6507	}
6508
6509	unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6510	bool &Disabling) const {
6511	Disabling = Id.consume_front(Prefix: "no");
6512
6513	if (isGFX940() && !Mnemo.starts_with(Prefix: "s_")) {
6514	return StringSwitch<unsigned>(Id)
6515	.Case(S: "nt", Value: AMDGPU::CPol::NT)
6516	.Case(S: "sc0", Value: AMDGPU::CPol::SC0)
6517	.Case(S: "sc1", Value: AMDGPU::CPol::SC1)
6518	.Default(Value: `0`);
6519	}
6520
6521	return StringSwitch<unsigned>(Id)
6522	.Case(S: "dlc", Value: AMDGPU::CPol::DLC)
6523	.Case(S: "glc", Value: AMDGPU::CPol::GLC)
6524	.Case(S: "scc", Value: AMDGPU::CPol::SCC)
6525	.Case(S: "slc", Value: AMDGPU::CPol::SLC)
6526	.Default(Value: `0`);
6527	}
6528
6529	ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6530	if (isGFX12Plus()) {
6531	SMLoc StringLoc = getLoc();
6532
6533	int64_t CPolVal = `0`;
6534	ParseStatus ResTH = ParseStatus::NoMatch;
6535	ParseStatus ResScope = ParseStatus::NoMatch;
6536
6537	for (;;) {
6538	if (ResTH.isNoMatch()) {
6539	int64_t TH;
6540	ResTH = parseTH(Operands, TH);
6541	if (ResTH.isFailure())
6542	return ResTH;
6543	if (ResTH.isSuccess()) {
6544	CPolVal \|= TH;
6545	continue;
6546	}
6547	}
6548
6549	if (ResScope.isNoMatch()) {
6550	int64_t Scope;
6551	ResScope = parseScope(Operands, Scope);
6552	if (ResScope.isFailure())
6553	return ResScope;
6554	if (ResScope.isSuccess()) {
6555	CPolVal \|= Scope;
6556	continue;
6557	}
6558	}
6559
6560	break;
6561	}
6562
6563	if (ResTH.isNoMatch() && ResScope.isNoMatch())
6564	return ParseStatus::NoMatch;
6565
6566	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: CPolVal, Loc: StringLoc,
6567	Type: AMDGPUOperand::ImmTyCPol));
6568	return ParseStatus::Success;
6569	}
6570
6571	StringRef Mnemo = ((AMDGPUOperand &)*Operands [`0`]).getToken();
6572	SMLoc OpLoc = getLoc();
6573	unsigned Enabled = `0`, Seen = `0`;
6574	for (;;) {
6575	SMLoc S = getLoc();
6576	bool Disabling;
6577	unsigned CPol = getCPolKind(Id: getId(), Mnemo, Disabling);
6578	if (!CPol)
6579	break;
6580
6581	lex();
6582
6583	if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6584	return Error(L: S, Msg: "dlc modifier is not supported on this GPU");
6585
6586	if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6587	return Error(L: S, Msg: "scc modifier is not supported on this GPU");
6588
6589	if (Seen & CPol)
6590	return Error(L: S, Msg: "duplicate cache policy modifier");
6591
6592	if (!Disabling)
6593	Enabled \|= CPol;
6594
6595	Seen \|= CPol;
6596	}
6597
6598	if (!Seen)
6599	return ParseStatus::NoMatch;
6600
6601	Operands.push_back(
6602	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Enabled, Loc: OpLoc, Type: AMDGPUOperand::ImmTyCPol));
6603	return ParseStatus::Success;
6604	}
6605
6606	ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6607	int64_t &Scope) {
6608	Scope = AMDGPU::CPol::SCOPE_CU; // default;
6609
6610	StringRef Value;
6611	SMLoc StringLoc;
6612	ParseStatus Res;
6613
6614	Res = parseStringWithPrefix(Prefix: "scope", Value, StringLoc);
6615	if (!Res.isSuccess())
6616	return Res;
6617
6618	Scope = StringSwitch<int64_t>(Value)
6619	.Case(S: "SCOPE_CU", Value: AMDGPU::CPol::SCOPE_CU)
6620	.Case(S: "SCOPE_SE", Value: AMDGPU::CPol::SCOPE_SE)
6621	.Case(S: "SCOPE_DEV", Value: AMDGPU::CPol::SCOPE_DEV)
6622	.Case(S: "SCOPE_SYS", Value: AMDGPU::CPol::SCOPE_SYS)
6623	.Default(Value: `0xffffffff`);
6624
6625	if (Scope == `0xffffffff`)
6626	return Error(L: StringLoc, Msg: "invalid scope value");
6627
6628	return ParseStatus::Success;
6629	}
6630
6631	ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6632	TH = AMDGPU::CPol::TH_RT; // default
6633
6634	StringRef Value;
6635	SMLoc StringLoc;
6636	ParseStatus Res = parseStringWithPrefix(Prefix: "th", Value, StringLoc);
6637	if (!Res.isSuccess())
6638	return Res;
6639
6640	if (Value == "TH_DEFAULT")
6641	TH = AMDGPU::CPol::TH_RT;
6642	else if (Value == "TH_STORE_LU" \|\| Value == "TH_LOAD_RT_WB" \|\|
6643	Value == "TH_LOAD_NT_WB") {
6644	return Error(L: StringLoc, Msg: "invalid th value");
6645	} else if (Value.consume_front(Prefix: "TH_ATOMIC_")) {
6646	TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
6647	} else if (Value.consume_front(Prefix: "TH_LOAD_")) {
6648	TH = AMDGPU::CPol::TH_TYPE_LOAD;
6649	} else if (Value.consume_front(Prefix: "TH_STORE_")) {
6650	TH = AMDGPU::CPol::TH_TYPE_STORE;
6651	} else {
6652	return Error(L: StringLoc, Msg: "invalid th value");
6653	}
6654
6655	if (Value == "BYPASS")
6656	TH \|= AMDGPU::CPol::TH_REAL_BYPASS;
6657
6658	if (TH != `0`) {
6659	if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
6660	TH \|= StringSwitch<int64_t>(Value)
6661	.Case(S: "RETURN", Value: AMDGPU::CPol::TH_ATOMIC_RETURN)
6662	.Case(S: "RT", Value: AMDGPU::CPol::TH_RT)
6663	.Case(S: "RT_RETURN", Value: AMDGPU::CPol::TH_ATOMIC_RETURN)
6664	.Case(S: "NT", Value: AMDGPU::CPol::TH_ATOMIC_NT)
6665	.Case(S: "NT_RETURN", Value: AMDGPU::CPol::TH_ATOMIC_NT \|
6666	AMDGPU::CPol::TH_ATOMIC_RETURN)
6667	.Case(S: "CASCADE_RT", Value: AMDGPU::CPol::TH_ATOMIC_CASCADE)
6668	.Case(S: "CASCADE_NT", Value: AMDGPU::CPol::TH_ATOMIC_CASCADE \|
6669	AMDGPU::CPol::TH_ATOMIC_NT)
6670	.Default(Value: `0xffffffff`);
6671	else
6672	TH \|= StringSwitch<int64_t>(Value)
6673	.Case(S: "RT", Value: AMDGPU::CPol::TH_RT)
6674	.Case(S: "NT", Value: AMDGPU::CPol::TH_NT)
6675	.Case(S: "HT", Value: AMDGPU::CPol::TH_HT)
6676	.Case(S: "LU", Value: AMDGPU::CPol::TH_LU)
6677	.Case(S: "RT_WB", Value: AMDGPU::CPol::TH_RT_WB)
6678	.Case(S: "NT_RT", Value: AMDGPU::CPol::TH_NT_RT)
6679	.Case(S: "RT_NT", Value: AMDGPU::CPol::TH_RT_NT)
6680	.Case(S: "NT_HT", Value: AMDGPU::CPol::TH_NT_HT)
6681	.Case(S: "NT_WB", Value: AMDGPU::CPol::TH_NT_WB)
6682	.Case(S: "BYPASS", Value: AMDGPU::CPol::TH_BYPASS)
6683	.Default(Value: `0xffffffff`);
6684	}
6685
6686	if (TH == `0xffffffff`)
6687	return Error(L: StringLoc, Msg: "invalid th value");
6688
6689	return ParseStatus::Success;
6690	}
6691
6692	static void addOptionalImmOperand(
6693	MCInst& Inst, const OperandVector& Operands,
6694	AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6695	AMDGPUOperand::ImmTy ImmT,
6696	int64_t Default = `0`) {
6697	auto i = OptionalIdx.find(x: ImmT);
6698	if (i != OptionalIdx.end()) {
6699	unsigned Idx = i ->second;
6700	((AMDGPUOperand &)*Operands [Idx]).addImmOperands(Inst, N: `1`);
6701	} else {
6702	Inst.addOperand(Op: MCOperand::createImm(Val: Default));
6703	}
6704	}
6705
6706	ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6707	StringRef &Value,
6708	SMLoc &StringLoc) {
6709	if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
6710	return ParseStatus::NoMatch;
6711
6712	StringLoc = getLoc();
6713	return parseId(Val&: Value, ErrMsg: "expected an identifier") ? ParseStatus::Success
6714	: ParseStatus::Failure;
6715	}
6716
6717	//===----------------------------------------------------------------------===//
6718	// MTBUF format
6719	//===----------------------------------------------------------------------===//
6720
6721	bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6722	int64_t MaxVal,
6723	int64_t &Fmt) {
6724	int64_t Val;
6725	SMLoc Loc = getLoc();
6726
6727	auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: Val);
6728	if (Res.isFailure())
6729	return false;
6730	if (Res.isNoMatch())
6731	return true;
6732
6733	if (Val < `0` \|\| Val > MaxVal) {
6734	Error(L: Loc, Msg: Twine ("out of range ", StringRef (Pref)));
6735	return false;
6736	}
6737
6738	Fmt = Val;
6739	return true;
6740	}
6741
6742	ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6743	AMDGPUOperand::ImmTy ImmTy) {
6744	const char *Pref = "index_key";
6745	int64_t ImmVal = `0`;
6746	SMLoc Loc = getLoc();
6747	auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: ImmVal);
6748	if (!Res.isSuccess())
6749	return Res;
6750
6751	if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < `0` \|\| ImmVal > `1`))
6752	return Error(L: Loc, Msg: Twine ("out of range ", StringRef (Pref)));
6753
6754	if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < `0` \|\| ImmVal > `3`))
6755	return Error(L: Loc, Msg: Twine ("out of range ", StringRef (Pref)));
6756
6757	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: ImmTy));
6758	return ParseStatus::Success;
6759	}
6760
6761	ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6762	return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey8bit);
6763	}
6764
6765	ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6766	return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey16bit);
6767	}
6768
6769	// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6770	// values to live in a joint format operand in the MCInst encoding.
6771	ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6772	using namespace llvm::AMDGPU::MTBUFFormat;
6773
6774	int64_t Dfmt = DFMT_UNDEF;
6775	int64_t Nfmt = NFMT_UNDEF;
6776
6777	// dfmt and nfmt can appear in either order, and each is optional.
6778	for (int I = `0`; I < `2`; ++I) {
6779	if (Dfmt == DFMT_UNDEF && !tryParseFmt(Pref: "dfmt", MaxVal: DFMT_MAX, Fmt&: Dfmt))
6780	return ParseStatus::Failure;
6781
6782	if (Nfmt == NFMT_UNDEF && !tryParseFmt(Pref: "nfmt", MaxVal: NFMT_MAX, Fmt&: Nfmt))
6783	return ParseStatus::Failure;
6784
6785	// Skip optional comma between dfmt/nfmt
6786	// but guard against 2 commas following each other.
6787	if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6788	!peekToken().is(K: AsmToken::Comma)) {
6789	trySkipToken(Kind: AsmToken::Comma);
6790	}
6791	}
6792
6793	if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6794	return ParseStatus::NoMatch;
6795
6796	Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6797	Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6798
6799	Format = encodeDfmtNfmt(Dfmt, Nfmt);
6800	return ParseStatus::Success;
6801	}
6802
6803	ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6804	using namespace llvm::AMDGPU::MTBUFFormat;
6805
6806	int64_t Fmt = UFMT_UNDEF;
6807
6808	if (!tryParseFmt(Pref: "format", MaxVal: UFMT_MAX, Fmt))
6809	return ParseStatus::Failure;
6810
6811	if (Fmt == UFMT_UNDEF)
6812	return ParseStatus::NoMatch;
6813
6814	Format = Fmt;
6815	return ParseStatus::Success;
6816	}
6817
6818	bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6819	int64_t &Nfmt,
6820	StringRef FormatStr,
6821	SMLoc Loc) {
6822	using namespace llvm::AMDGPU::MTBUFFormat;
6823	int64_t Format;
6824
6825	Format = getDfmt(Name: FormatStr);
6826	if (Format != DFMT_UNDEF) {
6827	Dfmt = Format;
6828	return true;
6829	}
6830
6831	Format = getNfmt(Name: FormatStr, STI: getSTI());
6832	if (Format != NFMT_UNDEF) {
6833	Nfmt = Format;
6834	return true;
6835	}
6836
6837	Error(L: Loc, Msg: "unsupported format");
6838	return false;
6839	}
6840
6841	ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6842	SMLoc FormatLoc,
6843	int64_t &Format) {
6844	using namespace llvm::AMDGPU::MTBUFFormat;
6845
6846	int64_t Dfmt = DFMT_UNDEF;
6847	int64_t Nfmt = NFMT_UNDEF;
6848	if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, Loc: FormatLoc))
6849	return ParseStatus::Failure;
6850
6851	if (trySkipToken(Kind: AsmToken::Comma)) {
6852	StringRef Str;
6853	SMLoc Loc = getLoc();
6854	if (!parseId(Val&: Str, ErrMsg: "expected a format string") \|\|
6855	!matchDfmtNfmt(Dfmt, Nfmt, FormatStr: Str, Loc))
6856	return ParseStatus::Failure;
6857	if (Dfmt == DFMT_UNDEF)
6858	return Error(L: Loc, Msg: "duplicate numeric format");
6859	if (Nfmt == NFMT_UNDEF)
6860	return Error(L: Loc, Msg: "duplicate data format");
6861	}
6862
6863	Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6864	Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6865
6866	if (isGFX10Plus()) {
6867	auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, STI: getSTI());
6868	if (Ufmt == UFMT_UNDEF)
6869	return Error(L: FormatLoc, Msg: "unsupported format");
6870	Format = Ufmt;
6871	} else {
6872	Format = encodeDfmtNfmt(Dfmt, Nfmt);
6873	}
6874
6875	return ParseStatus::Success;
6876	}
6877
6878	ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6879	SMLoc Loc,
6880	int64_t &Format) {
6881	using namespace llvm::AMDGPU::MTBUFFormat;
6882
6883	auto Id = getUnifiedFormat(Name: FormatStr, STI: getSTI());
6884	if (Id == UFMT_UNDEF)
6885	return ParseStatus::NoMatch;
6886
6887	if (!isGFX10Plus())
6888	return Error(L: Loc, Msg: "unified format is not supported on this GPU");
6889
6890	Format = Id;
6891	return ParseStatus::Success;
6892	}
6893
6894	ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6895	using namespace llvm::AMDGPU::MTBUFFormat;
6896	SMLoc Loc = getLoc();
6897
6898	if (!parseExpr(Imm&: Format))
6899	return ParseStatus::Failure;
6900	if (!isValidFormatEncoding(Val: Format, STI: getSTI()))
6901	return Error(L: Loc, Msg: "out of range format");
6902
6903	return ParseStatus::Success;
6904	}
6905
6906	ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6907	using namespace llvm::AMDGPU::MTBUFFormat;
6908
6909	if (!trySkipId(Id: "format", Kind: AsmToken::Colon))
6910	return ParseStatus::NoMatch;
6911
6912	if (trySkipToken(Kind: AsmToken::LBrac)) {
6913	StringRef FormatStr;
6914	SMLoc Loc = getLoc();
6915	if (!parseId(Val&: FormatStr, ErrMsg: "expected a format string"))
6916	return ParseStatus::Failure;
6917
6918	auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6919	if (Res.isNoMatch())
6920	Res = parseSymbolicSplitFormat(FormatStr, FormatLoc: Loc, Format);
6921	if (!Res.isSuccess())
6922	return Res;
6923
6924	if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
6925	return ParseStatus::Failure;
6926
6927	return ParseStatus::Success;
6928	}
6929
6930	return parseNumericFormat(Format);
6931	}
6932
6933	ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6934	using namespace llvm::AMDGPU::MTBUFFormat;
6935
6936	int64_t Format = getDefaultFormatEncoding(STI: getSTI());
6937	ParseStatus Res;
6938	SMLoc Loc = getLoc();
6939
6940	// Parse legacy format syntax.
6941	Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6942	if (Res.isFailure())
6943	return Res;
6944
6945	bool FormatFound = Res.isSuccess();
6946
6947	Operands.push_back(
6948	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Format, Loc, Type: AMDGPUOperand::ImmTyFORMAT));
6949
6950	if (FormatFound)
6951	trySkipToken(Kind: AsmToken::Comma);
6952
6953	if (isToken(Kind: AsmToken::EndOfStatement)) {
6954	// We are expecting an soffset operand,
6955	// but let matcher handle the error.
6956	return ParseStatus::Success;
6957	}
6958
6959	// Parse soffset.
6960	Res = parseRegOrImm(Operands);
6961	if (!Res.isSuccess())
6962	return Res;
6963
6964	trySkipToken(Kind: AsmToken::Comma);
6965
6966	if (!FormatFound) {
6967	Res = parseSymbolicOrNumericFormat(Format);
6968	if (Res.isFailure())
6969	return Res;
6970	if (Res.isSuccess()) {
6971	auto Size = Operands.size();
6972	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands [Size - `2`]);
6973	assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6974	Op.setImm(Format);
6975	}
6976	return ParseStatus::Success;
6977	}
6978
6979	if (isId(Id: "format") && peekToken().is(K: AsmToken::Colon))
6980	return Error(L: getLoc(), Msg: "duplicate format");
6981	return ParseStatus::Success;
6982	}
6983
6984	ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6985	ParseStatus Res =
6986	parseIntWithPrefix(Prefix: "offset", Operands, ImmTy: AMDGPUOperand::ImmTyOffset);
6987	if (Res.isNoMatch()) {
6988	Res = parseIntWithPrefix(Prefix: "inst_offset", Operands,
6989	ImmTy: AMDGPUOperand::ImmTyInstOffset);
6990	}
6991	return Res;
6992	}
6993
6994	ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6995	ParseStatus Res =
6996	parseNamedBit(Name: "r128", Operands, ImmTy: AMDGPUOperand::ImmTyR128A16);
6997	if (Res.isNoMatch())
6998	Res = parseNamedBit(Name: "a16", Operands, ImmTy: AMDGPUOperand::ImmTyA16);
6999	return Res;
7000	}
7001
7002	ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7003	ParseStatus Res =
7004	parseIntWithPrefix(Prefix: "blgp", Operands, ImmTy: AMDGPUOperand::ImmTyBLGP);
7005	if (Res.isNoMatch()) {
7006	Res =
7007	parseOperandArrayWithPrefix(Prefix: "neg", Operands, ImmTy: AMDGPUOperand::ImmTyBLGP);
7008	}
7009	return Res;
7010	}
7011
7012	//===----------------------------------------------------------------------===//
7013	// Exp
7014	//===----------------------------------------------------------------------===//
7015
7016	void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7017	OptionalImmIndexMap OptionalIdx;
7018
7019	unsigned OperandIdx[`4`];
7020	unsigned EnMask = `0`;
7021	int SrcIdx = `0`;
7022
7023	for (unsigned i = `1`, e = Operands.size(); i != e; ++i) {
7024	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
7025
7026	// Add the register arguments
7027	if (Op.isReg()) {
7028	assert(SrcIdx < `4`);
7029	OperandIdx[SrcIdx] = Inst.size();
7030	Op.addRegOperands(Inst, N: `1`);
7031	++SrcIdx;
7032	continue;
7033	}
7034
7035	if (Op.isOff()) {
7036	assert(SrcIdx < `4`);
7037	OperandIdx[SrcIdx] = Inst.size();
7038	Inst.addOperand(Op: MCOperand::createReg(Reg: AMDGPU::NoRegister));
7039	++SrcIdx;
7040	continue;
7041	}
7042
7043	if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7044	Op.addImmOperands(Inst, N: `1`);
7045	continue;
7046	}
7047
7048	if (Op.isToken() && (Op.getToken() == "done" \|\| Op.getToken() == "row_en"))
7049	continue;
7050
7051	// Handle optional arguments
7052	OptionalIdx [Op.getImmTy()] = i;
7053	}
7054
7055	assert(SrcIdx == `4`);
7056
7057	bool Compr = false;
7058	if (OptionalIdx.find(x: AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7059	Compr = true;
7060	Inst.getOperand(i: OperandIdx[`1`]) = Inst.getOperand(i: OperandIdx[`2`]);
7061	Inst.getOperand(i: OperandIdx[`2`]).setReg(AMDGPU::NoRegister);
7062	Inst.getOperand(i: OperandIdx[`3`]).setReg(AMDGPU::NoRegister);
7063	}
7064
7065	for (auto i = `0`; i < SrcIdx; ++i) {
7066	if (Inst.getOperand(i: OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
7067	EnMask \|= Compr? (`0x3` << i * `2`) : (`0x1` << i);
7068	}
7069	}
7070
7071	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpVM);
7072	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpCompr);
7073
7074	Inst.addOperand(Op: MCOperand::createImm(Val: EnMask));
7075	}
7076
7077	//===----------------------------------------------------------------------===//
7078	// s_waitcnt
7079	//===----------------------------------------------------------------------===//
7080
7081	static bool
7082	encodeCnt(
7083	const AMDGPU::IsaVersion ISA,
7084	int64_t &IntVal,
7085	int64_t CntVal,
7086	bool Saturate,
7087	unsigned (encode)(const* IsaVersion &Version, unsigned, unsigned),
7088	unsigned (decode)(const* IsaVersion &Version, unsigned))
7089	{
7090	bool Failed = false;
7091
7092	IntVal = encode(ISA, IntVal, CntVal);
7093	if (CntVal != decode(ISA, IntVal)) {
7094	if (Saturate) {
7095	IntVal = encode(ISA, IntVal, -`1`);
7096	} else {
7097	Failed = true;
7098	}
7099	}
7100	return Failed;
7101	}
7102
7103	bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7104
7105	SMLoc CntLoc = getLoc();
7106	StringRef CntName = getTokenStr();
7107
7108	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name") \|\|
7109	!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7110	return false;
7111
7112	int64_t CntVal;
7113	SMLoc ValLoc = getLoc();
7114	if (!parseExpr(Imm&: CntVal))
7115	return false;
7116
7117	AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
7118
7119	bool Failed = true;
7120	bool Sat = CntName.ends_with(Suffix: "_sat");
7121
7122	if (CntName == "vmcnt" \|\| CntName == "vmcnt_sat") {
7123	Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeVmcnt, decode: decodeVmcnt);
7124	} else if (CntName == "expcnt" \|\| CntName == "expcnt_sat") {
7125	Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeExpcnt, decode: decodeExpcnt);
7126	} else if (CntName == "lgkmcnt" \|\| CntName == "lgkmcnt_sat") {
7127	Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeLgkmcnt, decode: decodeLgkmcnt);
7128	} else {
7129	Error(L: CntLoc, Msg: "invalid counter name " + CntName);
7130	return false;
7131	}
7132
7133	if (Failed) {
7134	Error(L: ValLoc, Msg: "too large value for " + CntName);
7135	return false;
7136	}
7137
7138	if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
7139	return false;
7140
7141	if (trySkipToken(Kind: AsmToken::Amp) \|\| trySkipToken(Kind: AsmToken::Comma)) {
7142	if (isToken(Kind: AsmToken::EndOfStatement)) {
7143	Error(L: getLoc(), Msg: "expected a counter name");
7144	return false;
7145	}
7146	}
7147
7148	return true;
7149	}
7150
7151	ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7152	AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
7153	int64_t Waitcnt = getWaitcntBitMask(Version: ISA);
7154	SMLoc S = getLoc();
7155
7156	if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7157	while (!isToken(Kind: AsmToken::EndOfStatement)) {
7158	if (!parseCnt(IntVal&: Waitcnt))
7159	return ParseStatus::Failure;
7160	}
7161	} else {
7162	if (!parseExpr(Imm&: Waitcnt))
7163	return ParseStatus::Failure;
7164	}
7165
7166	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Waitcnt, Loc: S));
7167	return ParseStatus::Success;
7168	}
7169
7170	bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7171	SMLoc FieldLoc = getLoc();
7172	StringRef FieldName = getTokenStr();
7173	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a field name") \|\|
7174	!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7175	return false;
7176
7177	SMLoc ValueLoc = getLoc();
7178	StringRef ValueName = getTokenStr();
7179	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a value name") \|\|
7180	!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a right parenthesis"))
7181	return false;
7182
7183	unsigned Shift;
7184	if (FieldName == "instid0") {
7185	Shift = `0`;
7186	} else if (FieldName == "instskip") {
7187	Shift = `4`;
7188	} else if (FieldName == "instid1") {
7189	Shift = `7`;
7190	} else {
7191	Error(L: FieldLoc, Msg: "invalid field name " + FieldName);
7192	return false;
7193	}
7194
7195	int Value;
7196	if (Shift == `4`) {
7197	// Parse values for instskip.
7198	Value = StringSwitch<int>(ValueName)
7199	.Case(S: "SAME", Value: `0`)
7200	.Case(S: "NEXT", Value: `1`)
7201	.Case(S: "SKIP_1", Value: `2`)
7202	.Case(S: "SKIP_2", Value: `3`)
7203	.Case(S: "SKIP_3", Value: `4`)
7204	.Case(S: "SKIP_4", Value: `5`)
7205	.Default(Value: -`1`);
7206	} else {
7207	// Parse values for instid0 and instid1.
7208	Value = StringSwitch<int>(ValueName)
7209	.Case(S: "NO_DEP", Value: `0`)
7210	.Case(S: "VALU_DEP_1", Value: `1`)
7211	.Case(S: "VALU_DEP_2", Value: `2`)
7212	.Case(S: "VALU_DEP_3", Value: `3`)
7213	.Case(S: "VALU_DEP_4", Value: `4`)
7214	.Case(S: "TRANS32_DEP_1", Value: `5`)
7215	.Case(S: "TRANS32_DEP_2", Value: `6`)
7216	.Case(S: "TRANS32_DEP_3", Value: `7`)
7217	.Case(S: "FMA_ACCUM_CYCLE_1", Value: `8`)
7218	.Case(S: "SALU_CYCLE_1", Value: `9`)
7219	.Case(S: "SALU_CYCLE_2", Value: `10`)
7220	.Case(S: "SALU_CYCLE_3", Value: `11`)
7221	.Default(Value: -`1`);
7222	}
7223	if (Value < `0`) {
7224	Error(L: ValueLoc, Msg: "invalid value name " + ValueName);
7225	return false;
7226	}
7227
7228	Delay \|= Value << Shift;
7229	return true;
7230	}
7231
7232	ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7233	int64_t Delay = `0`;
7234	SMLoc S = getLoc();
7235
7236	if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7237	do {
7238	if (!parseDelay(Delay))
7239	return ParseStatus::Failure;
7240	} while (trySkipToken(Kind: AsmToken::Pipe));
7241	} else {
7242	if (!parseExpr(Imm&: Delay))
7243	return ParseStatus::Failure;
7244	}
7245
7246	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Delay, Loc: S));
7247	return ParseStatus::Success;
7248	}
7249
7250	bool
7251	AMDGPUOperand::isSWaitCnt() const {
7252	return isImm();
7253	}
7254
7255	bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7256
7257	//===----------------------------------------------------------------------===//
7258	// DepCtr
7259	//===----------------------------------------------------------------------===//
7260
7261	void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7262	StringRef DepCtrName) {
7263	switch (ErrorId) {
7264	case OPR_ID_UNKNOWN:
7265	Error(L: Loc, Msg: Twine ("invalid counter name ", DepCtrName));
7266	return;
7267	case OPR_ID_UNSUPPORTED:
7268	Error(L: Loc, Msg: Twine (DepCtrName, " is not supported on this GPU"));
7269	return;
7270	case OPR_ID_DUPLICATE:
7271	Error(L: Loc, Msg: Twine ("duplicate counter name ", DepCtrName));
7272	return;
7273	case OPR_VAL_INVALID:
7274	Error(L: Loc, Msg: Twine ("invalid value for ", DepCtrName));
7275	return;
7276	default:
7277	assert(false);
7278	}
7279	}
7280
7281	bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7282
7283	using namespace llvm::AMDGPU::DepCtr;
7284
7285	SMLoc DepCtrLoc = getLoc();
7286	StringRef DepCtrName = getTokenStr();
7287
7288	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name") \|\|
7289	!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7290	return false;
7291
7292	int64_t ExprVal;
7293	if (!parseExpr(Imm&: ExprVal))
7294	return false;
7295
7296	unsigned PrevOprMask = UsedOprMask;
7297	int CntVal = encodeDepCtr(Name: DepCtrName, Val: ExprVal, UsedOprMask, STI: getSTI());
7298
7299	if (CntVal < `0`) {
7300	depCtrError(Loc: DepCtrLoc, ErrorId: CntVal, DepCtrName);
7301	return false;
7302	}
7303
7304	if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
7305	return false;
7306
7307	if (trySkipToken(Kind: AsmToken::Amp) \|\| trySkipToken(Kind: AsmToken::Comma)) {
7308	if (isToken(Kind: AsmToken::EndOfStatement)) {
7309	Error(L: getLoc(), Msg: "expected a counter name");
7310	return false;
7311	}
7312	}
7313
7314	unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7315	DepCtr = (DepCtr & ~CntValMask) \| CntVal;
7316	return true;
7317	}
7318
7319	ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7320	using namespace llvm::AMDGPU::DepCtr;
7321
7322	int64_t DepCtr = getDefaultDepCtrEncoding(STI: getSTI());
7323	SMLoc Loc = getLoc();
7324
7325	if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7326	unsigned UsedOprMask = `0`;
7327	while (!isToken(Kind: AsmToken::EndOfStatement)) {
7328	if (!parseDepCtr(DepCtr, UsedOprMask))
7329	return ParseStatus::Failure;
7330	}
7331	} else {
7332	if (!parseExpr(Imm&: DepCtr))
7333	return ParseStatus::Failure;
7334	}
7335
7336	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DepCtr, Loc));
7337	return ParseStatus::Success;
7338	}
7339
7340	bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7341
7342	//===----------------------------------------------------------------------===//
7343	// hwreg
7344	//===----------------------------------------------------------------------===//
7345
7346	ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7347	OperandInfoTy &Offset,
7348	OperandInfoTy &Width) {
7349	using namespace llvm::AMDGPU::Hwreg;
7350
7351	if (!trySkipId(Id: "hwreg", Kind: AsmToken::LParen))
7352	return ParseStatus::NoMatch;
7353
7354	// The register may be specified by name or using a numeric code
7355	HwReg.Loc = getLoc();
7356	if (isToken(Kind: AsmToken::Identifier) &&
7357	(HwReg.Val = getHwregId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) {
7358	HwReg.IsSymbolic = true;
7359	lex(); // skip register name
7360	} else if (!parseExpr(Imm&: HwReg.Val, Expected: "a register name")) {
7361	return ParseStatus::Failure;
7362	}
7363
7364	if (trySkipToken(Kind: AsmToken::RParen))
7365	return ParseStatus::Success;
7366
7367	// parse optional params
7368	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma or a closing parenthesis"))
7369	return ParseStatus::Failure;
7370
7371	Offset.Loc = getLoc();
7372	if (!parseExpr(Imm&: Offset.Val))
7373	return ParseStatus::Failure;
7374
7375	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
7376	return ParseStatus::Failure;
7377
7378	Width.Loc = getLoc();
7379	if (!parseExpr(Imm&: Width.Val) \|\|
7380	!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
7381	return ParseStatus::Failure;
7382
7383	return ParseStatus::Success;
7384	}
7385
7386	ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7387	using namespace llvm::AMDGPU::Hwreg;
7388
7389	int64_t ImmVal = `0`;
7390	SMLoc Loc = getLoc();
7391
7392	StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7393	HwregId::Default);
7394	StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7395	HwregOffset::Default);
7396	struct : StructuredOpField {
7397	using StructuredOpField::StructuredOpField;
7398	bool validate(AMDGPUAsmParser &Parser) const override {
7399	if (!isUIntN(N: Width, x: Val - `1`))
7400	return Error(Parser, Err: "only values from 1 to 32 are legal");
7401	return true;
7402	}
7403	} Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7404	ParseStatus Res = parseStructuredOpFields(Fields: {&HwReg, &Offset, &Width});
7405
7406	if (Res.isNoMatch())
7407	Res = parseHwregFunc(HwReg, Offset, Width);
7408
7409	if (Res.isSuccess()) {
7410	if (!validateStructuredOpFields(Fields: {&HwReg, &Offset, &Width}))
7411	return ParseStatus::Failure;
7412	ImmVal = HwregEncoding::encode(Values: HwReg.Val, Values: Offset.Val, Values: Width.Val);
7413	}
7414
7415	if (Res.isNoMatch() &&
7416	parseExpr(Imm&: ImmVal, Expected: "a hwreg macro, structured immediate"))
7417	Res = ParseStatus::Success;
7418
7419	if (!Res.isSuccess())
7420	return ParseStatus::Failure;
7421
7422	if (!isUInt<`16`>(x: ImmVal))
7423	return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
7424	Operands.push_back(
7425	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTyHwreg));
7426	return ParseStatus::Success;
7427	}
7428
7429	bool AMDGPUOperand::isHwreg() const {
7430	return isImmTy(ImmT: ImmTyHwreg);
7431	}
7432
7433	//===----------------------------------------------------------------------===//
7434	// sendmsg
7435	//===----------------------------------------------------------------------===//
7436
7437	bool
7438	AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7439	OperandInfoTy &Op,
7440	OperandInfoTy &Stream) {
7441	using namespace llvm::AMDGPU::SendMsg;
7442
7443	Msg.Loc = getLoc();
7444	if (isToken(Kind: AsmToken::Identifier) &&
7445	(Msg.Val = getMsgId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) {
7446	Msg.IsSymbolic = true;
7447	lex(); // skip message name
7448	} else if (!parseExpr(Imm&: Msg.Val, Expected: "a message name")) {
7449	return false;
7450	}
7451
7452	if (trySkipToken(Kind: AsmToken::Comma)) {
7453	Op.IsDefined = true;
7454	Op.Loc = getLoc();
7455	if (isToken(Kind: AsmToken::Identifier) &&
7456	(Op.Val = getMsgOpId(MsgId: Msg.Val, Name: getTokenStr(), STI: getSTI())) !=
7457	OPR_ID_UNKNOWN) {
7458	lex(); // skip operation name
7459	} else if (!parseExpr(Imm&: Op.Val, Expected: "an operation name")) {
7460	return false;
7461	}
7462
7463	if (trySkipToken(Kind: AsmToken::Comma)) {
7464	Stream.IsDefined = true;
7465	Stream.Loc = getLoc();
7466	if (!parseExpr(Imm&: Stream.Val))
7467	return false;
7468	}
7469	}
7470
7471	return skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis");
7472	}
7473
7474	bool
7475	AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7476	const OperandInfoTy &Op,
7477	const OperandInfoTy &Stream) {
7478	using namespace llvm::AMDGPU::SendMsg;
7479
7480	// Validation strictness depends on whether message is specified
7481	// in a symbolic or in a numeric form. In the latter case
7482	// only encoding possibility is checked.
7483	bool Strict = Msg.IsSymbolic;
7484
7485	if (Strict) {
7486	if (Msg.Val == OPR_ID_UNSUPPORTED) {
7487	Error(L: Msg.Loc, Msg: "specified message id is not supported on this GPU");
7488	return false;
7489	}
7490	} else {
7491	if (!isValidMsgId(MsgId: Msg.Val, STI: getSTI())) {
7492	Error(L: Msg.Loc, Msg: "invalid message id");
7493	return false;
7494	}
7495	}
7496	if (Strict && (msgRequiresOp(MsgId: Msg.Val, STI: getSTI()) != Op.IsDefined)) {
7497	if (Op.IsDefined) {
7498	Error(L: Op.Loc, Msg: "message does not support operations");
7499	} else {
7500	Error(L: Msg.Loc, Msg: "missing message operation");
7501	}
7502	return false;
7503	}
7504	if (!isValidMsgOp(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI(), Strict)) {
7505	if (Op.Val == OPR_ID_UNSUPPORTED)
7506	Error(L: Op.Loc, Msg: "specified operation id is not supported on this GPU");
7507	else
7508	Error(L: Op.Loc, Msg: "invalid operation id");
7509	return false;
7510	}
7511	if (Strict && !msgSupportsStream(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI()) &&
7512	Stream.IsDefined) {
7513	Error(L: Stream.Loc, Msg: "message operation does not support streams");
7514	return false;
7515	}
7516	if (!isValidMsgStream(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val, STI: getSTI(), Strict)) {
7517	Error(L: Stream.Loc, Msg: "invalid message stream id");
7518	return false;
7519	}
7520	return true;
7521	}
7522
7523	ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7524	using namespace llvm::AMDGPU::SendMsg;
7525
7526	int64_t ImmVal = `0`;
7527	SMLoc Loc = getLoc();
7528
7529	if (trySkipId(Id: "sendmsg", Kind: AsmToken::LParen)) {
7530	OperandInfoTy Msg(OPR_ID_UNKNOWN);
7531	OperandInfoTy Op(OP_NONE_);
7532	OperandInfoTy Stream(STREAM_ID_NONE_);
7533	if (parseSendMsgBody(Msg, Op, Stream) &&
7534	validateSendMsg(Msg, Op, Stream)) {
7535	ImmVal = encodeMsg(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val);
7536	} else {
7537	return ParseStatus::Failure;
7538	}
7539	} else if (parseExpr(Imm&: ImmVal, Expected: "a sendmsg macro")) {
7540	if (ImmVal < `0` \|\| !isUInt<`16`>(x: ImmVal))
7541	return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
7542	} else {
7543	return ParseStatus::Failure;
7544	}
7545
7546	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTySendMsg));
7547	return ParseStatus::Success;
7548	}
7549
7550	bool AMDGPUOperand::isSendMsg() const {
7551	return isImmTy(ImmT: ImmTySendMsg);
7552	}
7553
7554	//===----------------------------------------------------------------------===//
7555	// v_interp
7556	//===----------------------------------------------------------------------===//
7557
7558	ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7559	StringRef Str;
7560	SMLoc S = getLoc();
7561
7562	if (!parseId(Val&: Str))
7563	return ParseStatus::NoMatch;
7564
7565	int Slot = StringSwitch<int>(Str)
7566	.Case(S: "p10", Value: `0`)
7567	.Case(S: "p20", Value: `1`)
7568	.Case(S: "p0", Value: `2`)
7569	.Default(Value: -`1`);
7570
7571	if (Slot == -`1`)
7572	return Error(L: S, Msg: "invalid interpolation slot");
7573
7574	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Slot, Loc: S,
7575	Type: AMDGPUOperand::ImmTyInterpSlot));
7576	return ParseStatus::Success;
7577	}
7578
7579	ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7580	StringRef Str;
7581	SMLoc S = getLoc();
7582
7583	if (!parseId(Val&: Str))
7584	return ParseStatus::NoMatch;
7585
7586	if (!Str.starts_with(Prefix: "attr"))
7587	return Error(L: S, Msg: "invalid interpolation attribute");
7588
7589	StringRef Chan = Str.take_back(N: `2`);
7590	int AttrChan = StringSwitch<int>(Chan)
7591	.Case(S: ".x", Value: `0`)
7592	.Case(S: ".y", Value: `1`)
7593	.Case(S: ".z", Value: `2`)
7594	.Case(S: ".w", Value: `3`)
7595	.Default(Value: -`1`);
7596	if (AttrChan == -`1`)
7597	return Error(L: S, Msg: "invalid or missing interpolation attribute channel");
7598
7599	Str = Str.drop_back(N: `2`).drop_front(N: `4`);
7600
7601	uint8_t Attr;
7602	if (Str.getAsInteger(Radix: `10`, Result&: Attr))
7603	return Error(L: S, Msg: "invalid or missing interpolation attribute number");
7604
7605	if (Attr > `32`)
7606	return Error(L: S, Msg: "out of bounds interpolation attribute number");
7607
7608	SMLoc SChan = SMLoc::getFromPointer(Ptr: Chan.data());
7609
7610	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Attr, Loc: S,
7611	Type: AMDGPUOperand::ImmTyInterpAttr));
7612	Operands.push_back(Elt: AMDGPUOperand::CreateImm(
7613	AsmParser: this, Val: AttrChan, Loc: SChan, Type: AMDGPUOperand::ImmTyInterpAttrChan));
7614	return ParseStatus::Success;
7615	}
7616
7617	//===----------------------------------------------------------------------===//
7618	// exp
7619	//===----------------------------------------------------------------------===//
7620
7621	ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7622	using namespace llvm::AMDGPU::Exp;
7623
7624	StringRef Str;
7625	SMLoc S = getLoc();
7626
7627	if (!parseId(Val&: Str))
7628	return ParseStatus::NoMatch;
7629
7630	unsigned Id = getTgtId(Name: Str);
7631	if (Id == ET_INVALID \|\| !isSupportedTgtId(Id, STI: getSTI()))
7632	return Error(L: S, Msg: (Id == ET_INVALID)
7633	? "invalid exp target"
7634	: "exp target is not supported on this GPU");
7635
7636	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Id, Loc: S,
7637	Type: AMDGPUOperand::ImmTyExpTgt));
7638	return ParseStatus::Success;
7639	}
7640
7641	//===----------------------------------------------------------------------===//
7642	// parser helpers
7643	//===----------------------------------------------------------------------===//
7644
7645	bool
7646	AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7647	return Token.is(K: AsmToken::Identifier) && Token.getString() == Id;
7648	}
7649
7650	bool
7651	AMDGPUAsmParser::isId(const StringRef Id) const {
7652	return isId(Token: getToken(), Id);
7653	}
7654
7655	bool
7656	AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7657	return getTokenKind() == Kind;
7658	}
7659
7660	StringRef AMDGPUAsmParser::getId() const {
7661	return isToken(Kind: AsmToken::Identifier) ? getTokenStr() : StringRef ();
7662	}
7663
7664	bool
7665	AMDGPUAsmParser::trySkipId(const StringRef Id) {
7666	if (isId(Id)) {
7667	lex();
7668	return true;
7669	}
7670	return false;
7671	}
7672
7673	bool
7674	AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7675	if (isToken(Kind: AsmToken::Identifier)) {
7676	StringRef Tok = getTokenStr();
7677	if (Tok.starts_with(Prefix: Pref) && Tok.drop_front(N: Pref.size()) == Id) {
7678	lex();
7679	return true;
7680	}
7681	}
7682	return false;
7683	}
7684
7685	bool
7686	AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7687	if (isId(Id) && peekToken().is(K: Kind)) {
7688	lex();
7689	lex();
7690	return true;
7691	}
7692	return false;
7693	}
7694
7695	bool
7696	AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7697	if (isToken(Kind)) {
7698	lex();
7699	return true;
7700	}
7701	return false;
7702	}
7703
7704	bool
7705	AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7706	const StringRef ErrMsg) {
7707	if (!trySkipToken(Kind)) {
7708	Error(L: getLoc(), Msg: ErrMsg);
7709	return false;
7710	}
7711	return true;
7712	}
7713
7714	bool
7715	AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7716	SMLoc S = getLoc();
7717
7718	const MCExpr *Expr;
7719	if (Parser.parseExpression(Res&: Expr))
7720	return false;
7721
7722	if (Expr->evaluateAsAbsolute(Res&: Imm))
7723	return true;
7724
7725	if (Expected.empty()) {
7726	Error(L: S, Msg: "expected absolute expression");
7727	} else {
7728	Error(L: S, Msg: Twine ("expected ", Expected) +
7729	Twine (" or an absolute expression"));
7730	}
7731	return false;
7732	}
7733
7734	bool
7735	AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7736	SMLoc S = getLoc();
7737
7738	const MCExpr *Expr;
7739	if (Parser.parseExpression(Res&: Expr))
7740	return false;
7741
7742	int64_t IntVal;
7743	if (Expr->evaluateAsAbsolute(Res&: IntVal)) {
7744	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S));
7745	} else {
7746	Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S));
7747	}
7748	return true;
7749	}
7750
7751	bool
7752	AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7753	if (isToken(Kind: AsmToken::String)) {
7754	Val = getToken().getStringContents();
7755	lex();
7756	return true;
7757	}
7758	Error(L: getLoc(), Msg: ErrMsg);
7759	return false;
7760	}
7761
7762	bool
7763	AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7764	if (isToken(Kind: AsmToken::Identifier)) {
7765	Val = getTokenStr();
7766	lex();
7767	return true;
7768	}
7769	if (!ErrMsg.empty())
7770	Error(L: getLoc(), Msg: ErrMsg);
7771	return false;
7772	}
7773
7774	AsmToken
7775	AMDGPUAsmParser::getToken() const {
7776	return Parser.getTok();
7777	}
7778
7779	AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7780	return isToken(Kind: AsmToken::EndOfStatement)
7781	? getToken()
7782	: getLexer().peekTok(ShouldSkipSpace);
7783	}
7784
7785	void
7786	AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7787	auto TokCount = getLexer().peekTokens(Buf: Tokens);
7788
7789	for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7790	Tokens [Idx] = AsmToken (AsmToken::Error, "");
7791	}
7792
7793	AsmToken::TokenKind
7794	AMDGPUAsmParser::getTokenKind() const {
7795	return getLexer().getKind();
7796	}
7797
7798	SMLoc
7799	AMDGPUAsmParser::getLoc() const {
7800	return getToken().getLoc();
7801	}
7802
7803	StringRef
7804	AMDGPUAsmParser::getTokenStr() const {
7805	return getToken().getString();
7806	}
7807
7808	void
7809	AMDGPUAsmParser::lex() {
7810	Parser.Lex();
7811	}
7812
7813	SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7814	return ((AMDGPUOperand &)*Operands [`0`]).getStartLoc();
7815	}
7816
7817	SMLoc
7818	AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7819	const OperandVector &Operands) const {
7820	for (unsigned i = Operands.size() - `1`; i > `0`; --i) {
7821	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
7822	if (Test (Op))
7823	return Op.getStartLoc();
7824	}
7825	return getInstLoc(Operands);
7826	}
7827
7828	SMLoc
7829	AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7830	const OperandVector &Operands) const {
7831	auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(ImmT: Type); };
7832	return getOperandLoc(Test, Operands);
7833	}
7834
7835	SMLoc
7836	AMDGPUAsmParser::getRegLoc(unsigned Reg,
7837	const OperandVector &Operands) const {
7838	auto Test = [=](const AMDGPUOperand& Op) {
7839	return Op.isRegKind() && Op.getReg() == Reg;
7840	};
7841	return getOperandLoc(Test, Operands);
7842	}
7843
7844	SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7845	bool SearchMandatoryLiterals) const {
7846	auto Test = [](const AMDGPUOperand& Op) {
7847	return Op.IsImmKindLiteral() \|\| Op.isExpr();
7848	};
7849	SMLoc Loc = getOperandLoc(Test, Operands);
7850	if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7851	Loc = getMandatoryLitLoc(Operands);
7852	return Loc;
7853	}
7854
7855	SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7856	auto Test = [](const AMDGPUOperand &Op) {
7857	return Op.IsImmKindMandatoryLiteral();
7858	};
7859	return getOperandLoc(Test, Operands);
7860	}
7861
7862	SMLoc
7863	AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7864	auto Test = [](const AMDGPUOperand& Op) {
7865	return Op.isImmKindConst();
7866	};
7867	return getOperandLoc(Test, Operands);
7868	}
7869
7870	ParseStatus
7871	AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
7872	if (!trySkipToken(Kind: AsmToken::LCurly))
7873	return ParseStatus::NoMatch;
7874
7875	bool First = true;
7876	while (!trySkipToken(Kind: AsmToken::RCurly)) {
7877	if (!First &&
7878	!skipToken(Kind: AsmToken::Comma, ErrMsg: "comma or closing brace expected"))
7879	return ParseStatus::Failure;
7880
7881	StringRef Id = getTokenStr();
7882	SMLoc IdLoc = getLoc();
7883	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "field name expected") \|\|
7884	!skipToken(Kind: AsmToken::Colon, ErrMsg: "colon expected"))
7885	return ParseStatus::Failure;
7886
7887	auto I =
7888	find_if(Range&: Fields, P: [Id](StructuredOpField F) { return* F->Id == Id; });
7889	if (I == Fields.end())
7890	return Error(L: IdLoc, Msg: "unknown field");
7891	if ((*I)->IsDefined)
7892	return Error(L: IdLoc, Msg: "duplicate field");
7893
7894	// TODO: Support symbolic values.
7895	(*I)->Loc = getLoc();
7896	if (!parseExpr(Imm&: (*I)->Val))
7897	return ParseStatus::Failure;
7898	(I)->IsDefined = true*;
7899
7900	First = false;
7901	}
7902	return ParseStatus::Success;
7903	}
7904
7905	bool AMDGPUAsmParser::validateStructuredOpFields(
7906	ArrayRef<const StructuredOpField *> Fields) {
7907	return all_of(Range&: Fields, P: [this](const StructuredOpField *F) {
7908	return F->validate(Parser&: *this);
7909	});
7910	}
7911
7912	//===----------------------------------------------------------------------===//
7913	// swizzle
7914	//===----------------------------------------------------------------------===//
7915
7916	LLVM_READNONE
7917	static unsigned
7918	encodeBitmaskPerm(const unsigned AndMask,
7919	const unsigned OrMask,
7920	const unsigned XorMask) {
7921	using namespace llvm::AMDGPU::Swizzle;
7922
7923	return BITMASK_PERM_ENC \|
7924	(AndMask << BITMASK_AND_SHIFT) \|
7925	(OrMask << BITMASK_OR_SHIFT) \|
7926	(XorMask << BITMASK_XOR_SHIFT);
7927	}
7928
7929	bool
7930	AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7931	const unsigned MinVal,
7932	const unsigned MaxVal,
7933	const StringRef ErrMsg,
7934	SMLoc &Loc) {
7935	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma")) {
7936	return false;
7937	}
7938	Loc = getLoc();
7939	if (!parseExpr(Imm&: Op)) {
7940	return false;
7941	}
7942	if (Op < MinVal \|\| Op > MaxVal) {
7943	Error(L: Loc, Msg: ErrMsg);
7944	return false;
7945	}
7946
7947	return true;
7948	}
7949
7950	bool
7951	AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7952	const unsigned MinVal,
7953	const unsigned MaxVal,
7954	const StringRef ErrMsg) {
7955	SMLoc Loc;
7956	for (unsigned i = `0`; i < OpNum; ++i) {
7957	if (!parseSwizzleOperand(Op&: Op[i], MinVal, MaxVal, ErrMsg, Loc))
7958	return false;
7959	}
7960
7961	return true;
7962	}
7963
7964	bool
7965	AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7966	using namespace llvm::AMDGPU::Swizzle;
7967
7968	int64_t Lane[LANE_NUM];
7969	if (parseSwizzleOperands(OpNum: LANE_NUM, Op: Lane, MinVal: `0`, MaxVal: LANE_MAX,
7970	ErrMsg: "expected a 2-bit lane id")) {
7971	Imm = QUAD_PERM_ENC;
7972	for (unsigned I = `0`; I < LANE_NUM; ++I) {
7973	Imm \|= Lane[I] << (LANE_SHIFT * I);
7974	}
7975	return true;
7976	}
7977	return false;
7978	}
7979
7980	bool
7981	AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7982	using namespace llvm::AMDGPU::Swizzle;
7983
7984	SMLoc Loc;
7985	int64_t GroupSize;
7986	int64_t LaneIdx;
7987
7988	if (!parseSwizzleOperand(Op&: GroupSize,
7989	MinVal: `2`, MaxVal: `32`,
7990	ErrMsg: "group size must be in the interval [2,32]",
7991	Loc)) {
7992	return false;
7993	}
7994	if (!isPowerOf2_64(Value: GroupSize)) {
7995	Error(L: Loc, Msg: "group size must be a power of two");
7996	return false;
7997	}
7998	if (parseSwizzleOperand(Op&: LaneIdx,
7999	MinVal: `0`, MaxVal: GroupSize - `1`,
8000	ErrMsg: "lane id must be in the interval [0,group size - 1]",
8001	Loc)) {
8002	Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX - GroupSize + `1`, OrMask: LaneIdx, XorMask: `0`);
8003	return true;
8004	}
8005	return false;
8006	}
8007
8008	bool
8009	AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8010	using namespace llvm::AMDGPU::Swizzle;
8011
8012	SMLoc Loc;
8013	int64_t GroupSize;
8014
8015	if (!parseSwizzleOperand(Op&: GroupSize,
8016	MinVal: `2`, MaxVal: `32`,
8017	ErrMsg: "group size must be in the interval [2,32]",
8018	Loc)) {
8019	return false;
8020	}
8021	if (!isPowerOf2_64(Value: GroupSize)) {
8022	Error(L: Loc, Msg: "group size must be a power of two");
8023	return false;
8024	}
8025
8026	Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: `0`, XorMask: GroupSize - `1`);
8027	return true;
8028	}
8029
8030	bool
8031	AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8032	using namespace llvm::AMDGPU::Swizzle;
8033
8034	SMLoc Loc;
8035	int64_t GroupSize;
8036
8037	if (!parseSwizzleOperand(Op&: GroupSize,
8038	MinVal: `1`, MaxVal: `16`,
8039	ErrMsg: "group size must be in the interval [1,16]",
8040	Loc)) {
8041	return false;
8042	}
8043	if (!isPowerOf2_64(Value: GroupSize)) {
8044	Error(L: Loc, Msg: "group size must be a power of two");
8045	return false;
8046	}
8047
8048	Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: `0`, XorMask: GroupSize);
8049	return true;
8050	}
8051
8052	bool
8053	AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8054	using namespace llvm::AMDGPU::Swizzle;
8055
8056	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma")) {
8057	return false;
8058	}
8059
8060	StringRef Ctl;
8061	SMLoc StrLoc = getLoc();
8062	if (!parseString(Val&: Ctl)) {
8063	return false;
8064	}
8065	if (Ctl.size() != BITMASK_WIDTH) {
8066	Error(L: StrLoc, Msg: "expected a 5-character mask");
8067	return false;
8068	}
8069
8070	unsigned AndMask = `0`;
8071	unsigned OrMask = `0`;
8072	unsigned XorMask = `0`;
8073
8074	for (size_t i = `0`; i < Ctl.size(); ++i) {
8075	unsigned Mask = `1` << (BITMASK_WIDTH - `1` - i);
8076	switch(Ctl [i]) {
8077	default:
8078	Error(L: StrLoc, Msg: "invalid mask");
8079	return false;
8080	case `'0'`:
8081	break;
8082	case `'1'`:
8083	OrMask \|= Mask;
8084	break;
8085	case `'p'`:
8086	AndMask \|= Mask;
8087	break;
8088	case `'i'`:
8089	AndMask \|= Mask;
8090	XorMask \|= Mask;
8091	break;
8092	}
8093	}
8094
8095	Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8096	return true;
8097	}
8098
8099	bool
8100	AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8101
8102	SMLoc OffsetLoc = getLoc();
8103
8104	if (!parseExpr(Imm, Expected: "a swizzle macro")) {
8105	return false;
8106	}
8107	if (!isUInt<`16`>(x: Imm)) {
8108	Error(L: OffsetLoc, Msg: "expected a 16-bit offset");
8109	return false;
8110	}
8111	return true;
8112	}
8113
8114	bool
8115	AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8116	using namespace llvm::AMDGPU::Swizzle;
8117
8118	if (skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parentheses")) {
8119
8120	SMLoc ModeLoc = getLoc();
8121	bool Ok = false;
8122
8123	if (trySkipId(Id: IdSymbolic[ID_QUAD_PERM])) {
8124	Ok = parseSwizzleQuadPerm(Imm);
8125	} else if (trySkipId(Id: IdSymbolic[ID_BITMASK_PERM])) {
8126	Ok = parseSwizzleBitmaskPerm(Imm);
8127	} else if (trySkipId(Id: IdSymbolic[ID_BROADCAST])) {
8128	Ok = parseSwizzleBroadcast(Imm);
8129	} else if (trySkipId(Id: IdSymbolic[ID_SWAP])) {
8130	Ok = parseSwizzleSwap(Imm);
8131	} else if (trySkipId(Id: IdSymbolic[ID_REVERSE])) {
8132	Ok = parseSwizzleReverse(Imm);
8133	} else {
8134	Error(L: ModeLoc, Msg: "expected a swizzle mode");
8135	}
8136
8137	return Ok && skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parentheses");
8138	}
8139
8140	return false;
8141	}
8142
8143	ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8144	SMLoc S = getLoc();
8145	int64_t Imm = `0`;
8146
8147	if (trySkipId(Id: "offset")) {
8148
8149	bool Ok = false;
8150	if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon")) {
8151	if (trySkipId(Id: "swizzle")) {
8152	Ok = parseSwizzleMacro(Imm);
8153	} else {
8154	Ok = parseSwizzleOffset(Imm);
8155	}
8156	}
8157
8158	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTySwizzle));
8159
8160	return Ok ? ParseStatus::Success : ParseStatus::Failure;
8161	}
8162	return ParseStatus::NoMatch;
8163	}
8164
8165	bool
8166	AMDGPUOperand::isSwizzle() const {
8167	return isImmTy(ImmT: ImmTySwizzle);
8168	}
8169
8170	//===----------------------------------------------------------------------===//
8171	// VGPR Index Mode
8172	//===----------------------------------------------------------------------===//
8173
8174	int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8175
8176	using namespace llvm::AMDGPU::VGPRIndexMode;
8177
8178	if (trySkipToken(Kind: AsmToken::RParen)) {
8179	return OFF;
8180	}
8181
8182	int64_t Imm = `0`;
8183
8184	while (true) {
8185	unsigned Mode = `0`;
8186	SMLoc S = getLoc();
8187
8188	for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8189	if (trySkipId(Id: IdSymbolic[ModeId])) {
8190	Mode = `1` << ModeId;
8191	break;
8192	}
8193	}
8194
8195	if (Mode == `0`) {
8196	Error(L: S, Msg: (Imm == `0`)?
8197	"expected a VGPR index mode or a closing parenthesis" :
8198	"expected a VGPR index mode");
8199	return UNDEF;
8200	}
8201
8202	if (Imm & Mode) {
8203	Error(L: S, Msg: "duplicate VGPR index mode");
8204	return UNDEF;
8205	}
8206	Imm \|= Mode;
8207
8208	if (trySkipToken(Kind: AsmToken::RParen))
8209	break;
8210	if (!skipToken(Kind: AsmToken::Comma,
8211	ErrMsg: "expected a comma or a closing parenthesis"))
8212	return UNDEF;
8213	}
8214
8215	return Imm;
8216	}
8217
8218	ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8219
8220	using namespace llvm::AMDGPU::VGPRIndexMode;
8221
8222	int64_t Imm = `0`;
8223	SMLoc S = getLoc();
8224
8225	if (trySkipId(Id: "gpr_idx", Kind: AsmToken::LParen)) {
8226	Imm = parseGPRIdxMacro();
8227	if (Imm == UNDEF)
8228	return ParseStatus::Failure;
8229	} else {
8230	if (getParser().parseAbsoluteExpression(Res&: Imm))
8231	return ParseStatus::Failure;
8232	if (Imm < `0` \|\| !isUInt<`4`>(x: Imm))
8233	return Error(L: S, Msg: "invalid immediate: only 4-bit values are legal");
8234	}
8235
8236	Operands.push_back(
8237	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyGprIdxMode));
8238	return ParseStatus::Success;
8239	}
8240
8241	bool AMDGPUOperand::isGPRIdxMode() const {
8242	return isImmTy(ImmT: ImmTyGprIdxMode);
8243	}
8244
8245	//===----------------------------------------------------------------------===//
8246	// sopp branch targets
8247	//===----------------------------------------------------------------------===//
8248
8249	ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8250
8251	// Make sure we are not parsing something
8252	// that looks like a label or an expression but is not.
8253	// This will improve error messages.
8254	if (isRegister() \|\| isModifier())
8255	return ParseStatus::NoMatch;
8256
8257	if (!parseExpr(Operands))
8258	return ParseStatus::Failure;
8259
8260	AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands [Operands.size() - `1`]);
8261	assert(Opr.isImm() \|\| Opr.isExpr());
8262	SMLoc Loc = Opr.getStartLoc();
8263
8264	// Currently we do not support arbitrary expressions as branch targets.
8265	// Only labels and absolute expressions are accepted.
8266	if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8267	Error(L: Loc, Msg: "expected an absolute expression or a label");
8268	} else if (Opr.isImm() && !Opr.isS16Imm()) {
8269	Error(L: Loc, Msg: "expected a 16-bit signed jump offset");
8270	}
8271
8272	return ParseStatus::Success;
8273	}
8274
8275	//===----------------------------------------------------------------------===//
8276	// Boolean holding registers
8277	//===----------------------------------------------------------------------===//
8278
8279	ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8280	return parseReg(Operands);
8281	}
8282
8283	//===----------------------------------------------------------------------===//
8284	// mubuf
8285	//===----------------------------------------------------------------------===//
8286
8287	void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8288	const OperandVector &Operands,
8289	bool IsAtomic) {
8290	OptionalImmIndexMap OptionalIdx;
8291	unsigned FirstOperandIdx = `1`;
8292	bool IsAtomicReturn = false;
8293
8294	if (IsAtomic) {
8295	IsAtomicReturn = MII.get(Opcode: Inst.getOpcode()).TSFlags &
8296	SIInstrFlags::IsAtomicRet;
8297	}
8298
8299	for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8300	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
8301
8302	// Add the register arguments
8303	if (Op.isReg()) {
8304	Op.addRegOperands(Inst, N: `1`);
8305	// Insert a tied src for atomic return dst.
8306	// This cannot be postponed as subsequent calls to
8307	// addImmOperands rely on correct number of MC operands.
8308	if (IsAtomicReturn && i == FirstOperandIdx)
8309	Op.addRegOperands(Inst, N: `1`);
8310	continue;
8311	}
8312
8313	// Handle the case where soffset is an immediate
8314	if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8315	Op.addImmOperands(Inst, N: `1`);
8316	continue;
8317	}
8318
8319	// Handle tokens like 'offen' which are sometimes hard-coded into the
8320	// asm string. There are no MCInst operands for these.
8321	if (Op.isToken()) {
8322	continue;
8323	}
8324	assert(Op.isImm());
8325
8326	// Handle optional arguments
8327	OptionalIdx [Op.getImmTy()] = i;
8328	}
8329
8330	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOffset);
8331	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyCPol, Default: `0`);
8332	}
8333
8334	//===----------------------------------------------------------------------===//
8335	// smrd
8336	//===----------------------------------------------------------------------===//
8337
8338	bool AMDGPUOperand::isSMRDOffset8() const {
8339	return isImmLiteral() && isUInt<`8`>(x: getImm());
8340	}
8341
8342	bool AMDGPUOperand::isSMEMOffset() const {
8343	// Offset range is checked later by validator.
8344	return isImmLiteral();
8345	}
8346
8347	bool AMDGPUOperand::isSMRDLiteralOffset() const {
8348	// 32-bit literals are only supported on CI and we only want to use them
8349	// when the offset is > 8-bits.
8350	return isImmLiteral() && !isUInt<`8`>(x: getImm()) && isUInt<`32`>(x: getImm());
8351	}
8352
8353	//===----------------------------------------------------------------------===//
8354	// vop3
8355	//===----------------------------------------------------------------------===//
8356
8357	static bool ConvertOmodMul(int64_t &Mul) {
8358	if (Mul != `1` && Mul != `2` && Mul != `4`)
8359	return false;
8360
8361	Mul >>= `1`;
8362	return true;
8363	}
8364
8365	static bool ConvertOmodDiv(int64_t &Div) {
8366	if (Div == `1`) {
8367	Div = `0`;
8368	return true;
8369	}
8370
8371	if (Div == `2`) {
8372	Div = `3`;
8373	return true;
8374	}
8375
8376	return false;
8377	}
8378
8379	// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8380	// This is intentional and ensures compatibility with sp3.
8381	// See bug 35397 for details.
8382	bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8383	if (BoundCtrl == `0` \|\| BoundCtrl == `1`) {
8384	if (!isGFX11Plus())
8385	BoundCtrl = `1`;
8386	return true;
8387	}
8388	return false;
8389	}
8390
8391	void AMDGPUAsmParser::onBeginOfFile() {
8392	if (!getParser().getStreamer().getTargetStreamer() \|\|
8393	getSTI().getTargetTriple().getArch() == Triple::r600)
8394	return;
8395
8396	if (!getTargetStreamer().getTargetID())
8397	getTargetStreamer().initializeTargetID(STI: getSTI(),
8398	FeatureString: getSTI().getFeatureString());
8399
8400	if (isHsaAbi(STI: getSTI()))
8401	getTargetStreamer().EmitDirectiveAMDGCNTarget();
8402	}
8403
8404	/// Parse AMDGPU specific expressions.
8405	///
8406	/// expr ::= or(expr, ...) \|
8407	/// max(expr, ...)
8408	///
8409	bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8410	using AGVK = AMDGPUMCExpr::VariantKind;
8411
8412	if (isToken(Kind: AsmToken::Identifier)) {
8413	StringRef TokenId = getTokenStr();
8414	AGVK VK = StringSwitch<AGVK>(TokenId)
8415	.Case(S: "max", Value: AGVK::AGVK_Max)
8416	.Case(S: "or", Value: AGVK::AGVK_Or)
8417	.Case(S: "extrasgprs", Value: AGVK::AGVK_ExtraSGPRs)
8418	.Case(S: "totalnumvgprs", Value: AGVK::AGVK_TotalNumVGPRs)
8419	.Case(S: "alignto", Value: AGVK::AGVK_AlignTo)
8420	.Case(S: "occupancy", Value: AGVK::AGVK_Occupancy)
8421	.Default(Value: AGVK::AGVK_None);
8422
8423	if (VK != AGVK::AGVK_None && peekToken().is(K: AsmToken::LParen)) {
8424	SmallVector<const MCExpr *, `4`> Exprs;
8425	uint64_t CommaCount = `0`;
8426	lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8427	lex(); // Eat '('
8428	while (true) {
8429	if (trySkipToken(Kind: AsmToken::RParen)) {
8430	if (Exprs.empty()) {
8431	Error(L: getToken().getLoc(),
8432	Msg: "empty " + Twine (TokenId) + " expression");
8433	return true;
8434	}
8435	if (CommaCount + `1` != Exprs.size()) {
8436	Error(L: getToken().getLoc(),
8437	Msg: "mismatch of commas in " + Twine (TokenId) + " expression");
8438	return true;
8439	}
8440	Res = AMDGPUMCExpr::create(Kind: VK, Args: Exprs, Ctx&: getContext());
8441	return false;
8442	}
8443	const MCExpr *Expr;
8444	if (getParser().parseExpression(Res&: Expr, EndLoc))
8445	return true;
8446	Exprs.push_back(Elt: Expr);
8447	bool LastTokenWasComma = trySkipToken(Kind: AsmToken::Comma);
8448	if (LastTokenWasComma)
8449	CommaCount++;
8450	if (!LastTokenWasComma && !isToken(Kind: AsmToken::RParen)) {
8451	Error(L: getToken().getLoc(),
8452	Msg: "unexpected token in " + Twine (TokenId) + " expression");
8453	return true;
8454	}
8455	}
8456	}
8457	}
8458	return getParser().parsePrimaryExpr(Res, EndLoc, TypeInfo: nullptr);
8459	}
8460
8461	ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8462	StringRef Name = getTokenStr();
8463	if (Name == "mul") {
8464	return parseIntWithPrefix(Prefix: "mul", Operands,
8465	ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodMul);
8466	}
8467
8468	if (Name == "div") {
8469	return parseIntWithPrefix(Prefix: "div", Operands,
8470	ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodDiv);
8471	}
8472
8473	return ParseStatus::NoMatch;
8474	}
8475
8476	// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8477	// the number of src operands present, then copies that bit into src0_modifiers.
8478	static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8479	int Opc = Inst.getOpcode();
8480	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel);
8481	if (OpSelIdx == -`1`)
8482	return;
8483
8484	int SrcNum;
8485	const int Ops[] = { AMDGPU::OpName::src0,
8486	AMDGPU::OpName::src1,
8487	AMDGPU::OpName::src2 };
8488	for (SrcNum = `0`; SrcNum < `3` && AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: Ops[SrcNum]);
8489	++SrcNum)
8490	;
8491	assert(SrcNum > `0`);
8492
8493	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
8494
8495	int DstIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst);
8496	if (DstIdx == -`1`)
8497	return;
8498
8499	const MCOperand &DstOp = Inst.getOperand(i: DstIdx);
8500	int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0_modifiers);
8501	uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm();
8502	if (DstOp.isReg() &&
8503	MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: DstOp.getReg())) {
8504	if (AMDGPU::isHi(Reg: DstOp.getReg(), MRI))
8505	ModVal \|= SISrcMods::DST_OP_SEL;
8506	} else {
8507	if ((OpSel & (`1` << SrcNum)) != `0`)
8508	ModVal \|= SISrcMods::DST_OP_SEL;
8509	}
8510	Inst.getOperand(i: ModIdx).setImm(ModVal);
8511	}
8512
8513	void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8514	const OperandVector &Operands) {
8515	cvtVOP3P(Inst, Operands);
8516	cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI());
8517	}
8518
8519	void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8520	OptionalImmIndexMap &OptionalIdx) {
8521	cvtVOP3P(Inst, Operands, OptionalIdx);
8522	cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI());
8523	}
8524
8525	static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8526	return
8527	// 1. This operand is input modifiers
8528	Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8529	// 2. This is not last operand
8530	&& Desc.NumOperands > (OpNum + `1`)
8531	// 3. Next operand is register class
8532	&& Desc.operands()[OpNum + `1`].RegClass != -`1`
8533	// 4. Next register is not tied to any other operand
8534	&& Desc.getOperandConstraint(OpNum: OpNum + `1`,
8535	Constraint: MCOI::OperandConstraint::TIED_TO) == -`1`;
8536	}
8537
8538	void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8539	{
8540	OptionalImmIndexMap OptionalIdx;
8541	unsigned Opc = Inst.getOpcode();
8542
8543	unsigned I = `1`;
8544	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
8545	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
8546	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
8547	}
8548
8549	for (unsigned E = Operands.size(); I != E; ++I) {
8550	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
8551	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
8552	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
8553	} else if (Op.isInterpSlot() \|\| Op.isInterpAttr() \|\|
8554	Op.isInterpAttrChan()) {
8555	Inst.addOperand(Op: MCOperand::createImm(Val: Op.getImm()));
8556	} else if (Op.isImmModifier()) {
8557	OptionalIdx [Op.getImmTy()] = I;
8558	} else {
8559	llvm_unreachable("unhandled operand type");
8560	}
8561	}
8562
8563	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::high))
8564	addOptionalImmOperand(Inst, Operands, OptionalIdx,
8565	ImmT: AMDGPUOperand::ImmTyHigh);
8566
8567	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
8568	addOptionalImmOperand(Inst, Operands, OptionalIdx,
8569	ImmT: AMDGPUOperand::ImmTyClamp);
8570
8571	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
8572	addOptionalImmOperand(Inst, Operands, OptionalIdx,
8573	ImmT: AMDGPUOperand::ImmTyOModSI);
8574	}
8575
8576	void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8577	{
8578	OptionalImmIndexMap OptionalIdx;
8579	unsigned Opc = Inst.getOpcode();
8580
8581	unsigned I = `1`;
8582	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
8583	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
8584	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
8585	}
8586
8587	for (unsigned E = Operands.size(); I != E; ++I) {
8588	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
8589	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
8590	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
8591	} else if (Op.isImmModifier()) {
8592	OptionalIdx [Op.getImmTy()] = I;
8593	} else {
8594	llvm_unreachable("unhandled operand type");
8595	}
8596	}
8597
8598	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyClamp);
8599
8600	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel);
8601	if (OpSelIdx != -`1`)
8602	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
8603
8604	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyWaitEXP);
8605
8606	if (OpSelIdx == -`1`)
8607	return;
8608
8609	const int Ops[] = { AMDGPU::OpName::src0,
8610	AMDGPU::OpName::src1,
8611	AMDGPU::OpName::src2 };
8612	const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8613	AMDGPU::OpName::src1_modifiers,
8614	AMDGPU::OpName::src2_modifiers };
8615
8616	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
8617
8618	for (int J = `0`; J < `3`; ++J) {
8619	int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: Ops[J]);
8620	if (OpIdx == -`1`)
8621	break;
8622
8623	int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: ModOps[J]);
8624	uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm();
8625
8626	if ((OpSel & (`1` << J)) != `0`)
8627	ModVal \|= SISrcMods::OP_SEL_0;
8628	if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8629	(OpSel & (`1` << `3`)) != `0`)
8630	ModVal \|= SISrcMods::DST_OP_SEL;
8631
8632	Inst.getOperand(i: ModIdx).setImm(ModVal);
8633	}
8634	}
8635
8636	void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8637	OptionalImmIndexMap &OptionalIdx) {
8638	unsigned Opc = Inst.getOpcode();
8639
8640	unsigned I = `1`;
8641	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
8642	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
8643	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
8644	}
8645
8646	for (unsigned E = Operands.size(); I != E; ++I) {
8647	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
8648	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
8649	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
8650	} else if (Op.isImmModifier()) {
8651	OptionalIdx [Op.getImmTy()] = I;
8652	} else if (Op.isRegOrImm()) {
8653	Op.addRegOrImmOperands(Inst, N: `1`);
8654	} else {
8655	llvm_unreachable("unhandled operand type");
8656	}
8657	}
8658
8659	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) {
8660	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in))
8661	Inst.addOperand(Op: Inst.getOperand(i: `0`));
8662	addOptionalImmOperand(Inst, Operands, OptionalIdx,
8663	ImmT: AMDGPUOperand::ImmTyByteSel);
8664	}
8665
8666	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
8667	addOptionalImmOperand(Inst, Operands, OptionalIdx,
8668	ImmT: AMDGPUOperand::ImmTyClamp);
8669
8670	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
8671	addOptionalImmOperand(Inst, Operands, OptionalIdx,
8672	ImmT: AMDGPUOperand::ImmTyOModSI);
8673
8674	// Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8675	// it has src2 register operand that is tied to dst operand
8676	// we don't allow modifiers for this operand in assembler so src2_modifiers
8677	// should be 0.
8678	if (isMAC(Opc)) {
8679	auto it = Inst.begin();
8680	std::advance(i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src2_modifiers));
8681	it = Inst.insert(I: it, Op: MCOperand::createImm(Val: `0`)); // no modifiers for src2
8682	++it;
8683	// Copy the operand to ensure it's not invalidated when Inst grows.
8684	Inst.insert(I: it, Op: MCOperand (Inst.getOperand(i: `0`))); // src2 = dst
8685	}
8686	}
8687
8688	void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8689	OptionalImmIndexMap OptionalIdx;
8690	cvtVOP3(Inst, Operands, OptionalIdx);
8691	}
8692
8693	void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8694	OptionalImmIndexMap &OptIdx) {
8695	const int Opc = Inst.getOpcode();
8696	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
8697
8698	const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != `0`;
8699
8700	if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi \|\|
8701	Opc == AMDGPU::V_CVT_SR_FP8_F32_vi \|\|
8702	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 \|\|
8703	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
8704	Inst.addOperand(Op: MCOperand::createImm(Val: `0`)); // Placeholder for src2_mods
8705	Inst.addOperand(Op: Inst.getOperand(i: `0`));
8706	}
8707
8708	// Adding vdst_in operand is already covered for these DPP instructions in
8709	// cvtVOP3DPP.
8710	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in) &&
8711	!(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 \|\|
8712	Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 \|\|
8713	Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 \|\|
8714	Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 \|\|
8715	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 \|\|
8716	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 \|\|
8717	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 \|\|
8718	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
8719	assert(!IsPacked);
8720	Inst.addOperand(Op: Inst.getOperand(i: `0`));
8721	}
8722
8723	// FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8724	// instruction, and then figure out where to actually put the modifiers
8725
8726	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel);
8727	if (OpSelIdx != -`1`) {
8728	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
8729	}
8730
8731	int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel_hi);
8732	if (OpSelHiIdx != -`1`) {
8733	int DefaultVal = IsPacked ? -`1` : `0`;
8734	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSelHi,
8735	Default: DefaultVal);
8736	}
8737
8738	int NegLoIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_lo);
8739	if (NegLoIdx != -`1`)
8740	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegLo);
8741
8742	int NegHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_hi);
8743	if (NegHiIdx != -`1`)
8744	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegHi);
8745
8746	const int Ops[] = { AMDGPU::OpName::src0,
8747	AMDGPU::OpName::src1,
8748	AMDGPU::OpName::src2 };
8749	const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8750	AMDGPU::OpName::src1_modifiers,
8751	AMDGPU::OpName::src2_modifiers };
8752
8753	unsigned OpSel = `0`;
8754	unsigned OpSelHi = `0`;
8755	unsigned NegLo = `0`;
8756	unsigned NegHi = `0`;
8757
8758	if (OpSelIdx != -`1`)
8759	OpSel = Inst.getOperand(i: OpSelIdx).getImm();
8760
8761	if (OpSelHiIdx != -`1`)
8762	OpSelHi = Inst.getOperand(i: OpSelHiIdx).getImm();
8763
8764	if (NegLoIdx != -`1`)
8765	NegLo = Inst.getOperand(i: NegLoIdx).getImm();
8766
8767	if (NegHiIdx != -`1`)
8768	NegHi = Inst.getOperand(i: NegHiIdx).getImm();
8769
8770	for (int J = `0`; J < `3`; ++J) {
8771	int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: Ops[J]);
8772	if (OpIdx == -`1`)
8773	break;
8774
8775	int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: ModOps[J]);
8776
8777	if (ModIdx == -`1`)
8778	continue;
8779
8780	uint32_t ModVal = `0`;
8781
8782	const MCOperand &SrcOp = Inst.getOperand(i: OpIdx);
8783	if (SrcOp.isReg() && getMRI()
8784	->getRegClass(i: AMDGPU::VGPR_16RegClassID)
8785	.contains(Reg: SrcOp.getReg())) {
8786	bool VGPRSuffixIsHi = AMDGPU::isHi(Reg: SrcOp.getReg(), MRI: *getMRI());
8787	if (VGPRSuffixIsHi)
8788	ModVal \|= SISrcMods::OP_SEL_0;
8789	} else {
8790	if ((OpSel & (`1` << J)) != `0`)
8791	ModVal \|= SISrcMods::OP_SEL_0;
8792	}
8793
8794	if ((OpSelHi & (`1` << J)) != `0`)
8795	ModVal \|= SISrcMods::OP_SEL_1;
8796
8797	if ((NegLo & (`1` << J)) != `0`)
8798	ModVal \|= SISrcMods::NEG;
8799
8800	if ((NegHi & (`1` << J)) != `0`)
8801	ModVal \|= SISrcMods::NEG_HI;
8802
8803	Inst.getOperand(i: ModIdx).setImm(Inst.getOperand(i: ModIdx).getImm() \| ModVal);
8804	}
8805	}
8806
8807	void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8808	OptionalImmIndexMap OptIdx;
8809	cvtVOP3(Inst, Operands, OptionalIdx&: OptIdx);
8810	cvtVOP3P(Inst, Operands, OptIdx);
8811	}
8812
8813	static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
8814	unsigned i, unsigned Opc, unsigned OpName) {
8815	if (AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: OpName) != -`1`)
8816	((AMDGPUOperand &)*Operands [i]).addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
8817	else
8818	((AMDGPUOperand &)*Operands [i]).addRegOperands(Inst, N: `1`);
8819	}
8820
8821	void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8822	unsigned Opc = Inst.getOpcode();
8823
8824	((AMDGPUOperand &)*Operands [`1`]).addRegOperands(Inst, N: `1`);
8825	addSrcModifiersAndSrc(Inst, Operands, i: `2`, Opc, OpName: AMDGPU::OpName::src0_modifiers);
8826	addSrcModifiersAndSrc(Inst, Operands, i: `3`, Opc, OpName: AMDGPU::OpName::src1_modifiers);
8827	((AMDGPUOperand &)Operands [`1`]).addRegOperands(Inst, N: `1`); // srcTiedDef*
8828	((AMDGPUOperand &)Operands [`4`]).addRegOperands(Inst, N: `1`); // src2*
8829
8830	OptionalImmIndexMap OptIdx;
8831	for (unsigned i = `5`; i < Operands.size(); ++i) {
8832	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
8833	OptIdx [Op.getImmTy()] = i;
8834	}
8835
8836	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_8bit))
8837	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
8838	ImmT: AMDGPUOperand::ImmTyIndexKey8bit);
8839
8840	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_16bit))
8841	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
8842	ImmT: AMDGPUOperand::ImmTyIndexKey16bit);
8843
8844	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
8845	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyClamp);
8846
8847	cvtVOP3P(Inst, Operands, OptIdx);
8848	}
8849
8850	//===----------------------------------------------------------------------===//
8851	// VOPD
8852	//===----------------------------------------------------------------------===//
8853
8854	ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8855	if (!hasVOPD(STI: getSTI()))
8856	return ParseStatus::NoMatch;
8857
8858	if (isToken(Kind: AsmToken::Colon) && peekToken(ShouldSkipSpace: false).is(K: AsmToken::Colon)) {
8859	SMLoc S = getLoc();
8860	lex();
8861	lex();
8862	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "::", Loc: S));
8863	SMLoc OpYLoc = getLoc();
8864	StringRef OpYName;
8865	if (isToken(Kind: AsmToken::Identifier) && !Parser.parseIdentifier(Res&: OpYName)) {
8866	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: OpYName, Loc: OpYLoc));
8867	return ParseStatus::Success;
8868	}
8869	return Error(L: OpYLoc, Msg: "expected a VOPDY instruction after ::");
8870	}
8871	return ParseStatus::NoMatch;
8872	}
8873
8874	// Create VOPD MCInst operands using parsed assembler operands.
8875	void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8876	auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8877	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [ParsedOprIdx]);
8878	if (Op.isReg()) {
8879	Op.addRegOperands(Inst, N: `1`);
8880	return;
8881	}
8882	if (Op.isImm()) {
8883	Op.addImmOperands(Inst, N: `1`);
8884	return;
8885	}
8886	llvm_unreachable("Unhandled operand type in cvtVOPD");
8887	};
8888
8889	const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Inst.getOpcode(), InstrInfo: &MII);
8890
8891	// MCInst operands are ordered as follows:
8892	// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8893
8894	for (auto CompIdx : VOPD::COMPONENTS) {
8895	addOp (InstInfo [CompIdx].getIndexOfDstInParsedOperands());
8896	}
8897
8898	for (auto CompIdx : VOPD::COMPONENTS) {
8899	const auto &CInfo = InstInfo [CompIdx];
8900	auto CompSrcOperandsNum = InstInfo [CompIdx].getCompParsedSrcOperandsNum();
8901	for (unsigned CompSrcIdx = `0`; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8902	addOp (CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8903	if (CInfo.hasSrc2Acc())
8904	addOp (CInfo.getIndexOfDstInParsedOperands());
8905	}
8906	}
8907
8908	//===----------------------------------------------------------------------===//
8909	// dpp
8910	//===----------------------------------------------------------------------===//
8911
8912	bool AMDGPUOperand::isDPP8() const {
8913	return isImmTy(ImmT: ImmTyDPP8);
8914	}
8915
8916	bool AMDGPUOperand::isDPPCtrl() const {
8917	using namespace AMDGPU::DPP;
8918
8919	bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<`9`>(x: getImm());
8920	if (result) {
8921	int64_t Imm = getImm();
8922	return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) \|\|
8923	(Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) \|\|
8924	(Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) \|\|
8925	(Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) \|\|
8926	(Imm == DppCtrl::WAVE_SHL1) \|\|
8927	(Imm == DppCtrl::WAVE_ROL1) \|\|
8928	(Imm == DppCtrl::WAVE_SHR1) \|\|
8929	(Imm == DppCtrl::WAVE_ROR1) \|\|
8930	(Imm == DppCtrl::ROW_MIRROR) \|\|
8931	(Imm == DppCtrl::ROW_HALF_MIRROR) \|\|
8932	(Imm == DppCtrl::BCAST15) \|\|
8933	(Imm == DppCtrl::BCAST31) \|\|
8934	(Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) \|\|
8935	(Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8936	}
8937	return false;
8938	}
8939
8940	//===----------------------------------------------------------------------===//
8941	// mAI
8942	//===----------------------------------------------------------------------===//
8943
8944	bool AMDGPUOperand::isBLGP() const {
8945	return isImm() && getImmTy() == ImmTyBLGP && isUInt<`3`>(x: getImm());
8946	}
8947
8948	bool AMDGPUOperand::isS16Imm() const {
8949	return isImmLiteral() && (isInt<`16`>(x: getImm()) \|\| isUInt<`16`>(x: getImm()));
8950	}
8951
8952	bool AMDGPUOperand::isU16Imm() const {
8953	return isImmLiteral() && isUInt<`16`>(x: getImm());
8954	}
8955
8956	//===----------------------------------------------------------------------===//
8957	// dim
8958	//===----------------------------------------------------------------------===//
8959
8960	bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8961	// We want to allow "dim:1D" etc.,
8962	// but the initial 1 is tokenized as an integer.
8963	std::string Token;
8964	if (isToken(Kind: AsmToken::Integer)) {
8965	SMLoc Loc = getToken().getEndLoc();
8966	Token = std::string (getTokenStr());
8967	lex();
8968	if (getLoc() != Loc)
8969	return false;
8970	}
8971
8972	StringRef Suffix;
8973	if (!parseId(Val&: Suffix))
8974	return false;
8975	Token += Suffix;
8976
8977	StringRef DimId = Token;
8978	if (DimId.starts_with(Prefix: "SQ_RSRC_IMG_"))
8979	DimId = DimId.drop_front(N: `12`);
8980
8981	const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(AsmSuffix: DimId);
8982	if (!DimInfo)
8983	return false;
8984
8985	Encoding = DimInfo->Encoding;
8986	return true;
8987	}
8988
8989	ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8990	if (!isGFX10Plus())
8991	return ParseStatus::NoMatch;
8992
8993	SMLoc S = getLoc();
8994
8995	if (!trySkipId(Id: "dim", Kind: AsmToken::Colon))
8996	return ParseStatus::NoMatch;
8997
8998	unsigned Encoding;
8999	SMLoc Loc = getLoc();
9000	if (!parseDimId(Encoding))
9001	return Error(L: Loc, Msg: "invalid dim value");
9002
9003	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Encoding, Loc: S,
9004	Type: AMDGPUOperand::ImmTyDim));
9005	return ParseStatus::Success;
9006	}
9007
9008	//===----------------------------------------------------------------------===//
9009	// dpp
9010	//===----------------------------------------------------------------------===//
9011
9012	ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9013	SMLoc S = getLoc();
9014
9015	if (!isGFX10Plus() \|\| !trySkipId(Id: "dpp8", Kind: AsmToken::Colon))
9016	return ParseStatus::NoMatch;
9017
9018	// dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9019
9020	int64_t Sels[`8`];
9021
9022	if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket"))
9023	return ParseStatus::Failure;
9024
9025	for (size_t i = `0`; i < `8`; ++i) {
9026	if (i > `0` && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
9027	return ParseStatus::Failure;
9028
9029	SMLoc Loc = getLoc();
9030	if (getParser().parseAbsoluteExpression(Res&: Sels[i]))
9031	return ParseStatus::Failure;
9032	if (`0` > Sels[i] \|\| `7` < Sels[i])
9033	return Error(L: Loc, Msg: "expected a 3-bit value");
9034	}
9035
9036	if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
9037	return ParseStatus::Failure;
9038
9039	unsigned DPP8 = `0`;
9040	for (size_t i = `0`; i < `8`; ++i)
9041	DPP8 \|= (Sels[i] << (i * `3`));
9042
9043	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DPP8, Loc: S, Type: AMDGPUOperand::ImmTyDPP8));
9044	return ParseStatus::Success;
9045	}
9046
9047	bool
9048	AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9049	const OperandVector &Operands) {
9050	if (Ctrl == "row_newbcast")
9051	return isGFX90A();
9052
9053	if (Ctrl == "row_share" \|\|
9054	Ctrl == "row_xmask")
9055	return isGFX10Plus();
9056
9057	if (Ctrl == "wave_shl" \|\|
9058	Ctrl == "wave_shr" \|\|
9059	Ctrl == "wave_rol" \|\|
9060	Ctrl == "wave_ror" \|\|
9061	Ctrl == "row_bcast")
9062	return isVI() \|\| isGFX9();
9063
9064	return Ctrl == "row_mirror" \|\|
9065	Ctrl == "row_half_mirror" \|\|
9066	Ctrl == "quad_perm" \|\|
9067	Ctrl == "row_shl" \|\|
9068	Ctrl == "row_shr" \|\|
9069	Ctrl == "row_ror";
9070	}
9071
9072	int64_t
9073	AMDGPUAsmParser::parseDPPCtrlPerm() {
9074	// quad_perm:[%d,%d,%d,%d]
9075
9076	if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket"))
9077	return -`1`;
9078
9079	int64_t Val = `0`;
9080	for (int i = `0`; i < `4`; ++i) {
9081	if (i > `0` && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
9082	return -`1`;
9083
9084	int64_t Temp;
9085	SMLoc Loc = getLoc();
9086	if (getParser().parseAbsoluteExpression(Res&: Temp))
9087	return -`1`;
9088	if (Temp < `0` \|\| Temp > `3`) {
9089	Error(L: Loc, Msg: "expected a 2-bit value");
9090	return -`1`;
9091	}
9092
9093	Val += (Temp << i * `2`);
9094	}
9095
9096	if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
9097	return -`1`;
9098
9099	return Val;
9100	}
9101
9102	int64_t
9103	AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9104	using namespace AMDGPU::DPP;
9105
9106	// sel:%d
9107
9108	int64_t Val;
9109	SMLoc Loc = getLoc();
9110
9111	if (getParser().parseAbsoluteExpression(Res&: Val))
9112	return -`1`;
9113
9114	struct DppCtrlCheck {
9115	int64_t Ctrl;
9116	int Lo;
9117	int Hi;
9118	};
9119
9120	DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9121	.Case(S: "wave_shl", Value: {.Ctrl: DppCtrl::WAVE_SHL1, .Lo: `1`, .Hi: `1`})
9122	.Case(S: "wave_rol", Value: {.Ctrl: DppCtrl::WAVE_ROL1, .Lo: `1`, .Hi: `1`})
9123	.Case(S: "wave_shr", Value: {.Ctrl: DppCtrl::WAVE_SHR1, .Lo: `1`, .Hi: `1`})
9124	.Case(S: "wave_ror", Value: {.Ctrl: DppCtrl::WAVE_ROR1, .Lo: `1`, .Hi: `1`})
9125	.Case(S: "row_shl", Value: {.Ctrl: DppCtrl::ROW_SHL0, .Lo: `1`, .Hi: `15`})
9126	.Case(S: "row_shr", Value: {.Ctrl: DppCtrl::ROW_SHR0, .Lo: `1`, .Hi: `15`})
9127	.Case(S: "row_ror", Value: {.Ctrl: DppCtrl::ROW_ROR0, .Lo: `1`, .Hi: `15`})
9128	.Case(S: "row_share", Value: {.Ctrl: DppCtrl::ROW_SHARE_FIRST, .Lo: `0`, .Hi: `15`})
9129	.Case(S: "row_xmask", Value: {.Ctrl: DppCtrl::ROW_XMASK_FIRST, .Lo: `0`, .Hi: `15`})
9130	.Case(S: "row_newbcast", Value: {.Ctrl: DppCtrl::ROW_NEWBCAST_FIRST, .Lo: `0`, .Hi: `15`})
9131	.Default(Value: {.Ctrl: -`1`, .Lo: `0`, .Hi: `0`});
9132
9133	bool Valid;
9134	if (Check.Ctrl == -`1`) {
9135	Valid = (Ctrl == "row_bcast" && (Val == `15` \|\| Val == `31`));
9136	Val = (Val == `15`)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9137	} else {
9138	Valid = Check.Lo <= Val && Val <= Check.Hi;
9139	Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl \| Val);
9140	}
9141
9142	if (!Valid) {
9143	Error(L: Loc, Msg: Twine ("invalid ", Ctrl) + Twine (" value"));
9144	return -`1`;
9145	}
9146
9147	return Val;
9148	}
9149
9150	ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9151	using namespace AMDGPU::DPP;
9152
9153	if (!isToken(Kind: AsmToken::Identifier) \|\|
9154	!isSupportedDPPCtrl(Ctrl: getTokenStr(), Operands))
9155	return ParseStatus::NoMatch;
9156
9157	SMLoc S = getLoc();
9158	int64_t Val = -`1`;
9159	StringRef Ctrl;
9160
9161	parseId(Val&: Ctrl);
9162
9163	if (Ctrl == "row_mirror") {
9164	Val = DppCtrl::ROW_MIRROR;
9165	} else if (Ctrl == "row_half_mirror") {
9166	Val = DppCtrl::ROW_HALF_MIRROR;
9167	} else {
9168	if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon")) {
9169	if (Ctrl == "quad_perm") {
9170	Val = parseDPPCtrlPerm();
9171	} else {
9172	Val = parseDPPCtrlSel(Ctrl);
9173	}
9174	}
9175	}
9176
9177	if (Val == -`1`)
9178	return ParseStatus::Failure;
9179
9180	Operands.push_back(
9181	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: AMDGPUOperand::ImmTyDppCtrl));
9182	return ParseStatus::Success;
9183	}
9184
9185	void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9186	bool IsDPP8) {
9187	OptionalImmIndexMap OptionalIdx;
9188	unsigned Opc = Inst.getOpcode();
9189	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9190
9191	// MAC instructions are special because they have 'old'
9192	// operand which is not tied to dst (but assumed to be).
9193	// They also have dummy unused src2_modifiers.
9194	int OldIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::old);
9195	int Src2ModIdx =
9196	AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src2_modifiers);
9197	bool IsMAC = OldIdx != -`1` && Src2ModIdx != -`1` &&
9198	Desc.getOperandConstraint(OpNum: OldIdx, Constraint: MCOI::TIED_TO) == -`1`;
9199
9200	unsigned I = `1`;
9201	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
9202	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
9203	}
9204
9205	int Fi = `0`;
9206	for (unsigned E = Operands.size(); I != E; ++I) {
9207
9208	if (IsMAC) {
9209	int NumOperands = Inst.getNumOperands();
9210	if (OldIdx == NumOperands) {
9211	// Handle old operand
9212	constexpr int DST_IDX = `0`;
9213	Inst.addOperand(Op: Inst.getOperand(i: DST_IDX));
9214	} else if (Src2ModIdx == NumOperands) {
9215	// Add unused dummy src2_modifiers
9216	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
9217	}
9218	}
9219
9220	int VdstInIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in);
9221	if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9222	Inst.addOperand(Op: Inst.getOperand(i: `0`));
9223	}
9224
9225	bool IsVOP3CvtSrDpp =
9226	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 \|\|
9227	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 \|\|
9228	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 \|\|
9229	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9230	if (IsVOP3CvtSrDpp) {
9231	if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9232	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
9233	Inst.addOperand(Op: MCOperand::createReg(Reg: `0`));
9234	}
9235	}
9236
9237	auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(),
9238	Constraint: MCOI::TIED_TO);
9239	if (TiedTo != -`1`) {
9240	assert((unsigned)TiedTo < Inst.getNumOperands());
9241	// handle tied old or src2 for MAC instructions
9242	Inst.addOperand(Op: Inst.getOperand(i: TiedTo));
9243	}
9244	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
9245	// Add the register arguments
9246	if (IsDPP8 && Op.isDppFI()) {
9247	Fi = Op.getImm();
9248	} else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9249	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
9250	} else if (Op.isReg()) {
9251	Op.addRegOperands(Inst, N: `1`);
9252	} else if (Op.isImm() &&
9253	Desc.operands()[Inst.getNumOperands()].RegClass != -`1`) {
9254	assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9255	Op.addImmOperands(Inst, N: `1`);
9256	} else if (Op.isImm()) {
9257	OptionalIdx [Op.getImmTy()] = I;
9258	} else {
9259	llvm_unreachable("unhandled operand type");
9260	}
9261	}
9262
9263	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel))
9264	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9265	ImmT: AMDGPUOperand::ImmTyByteSel);
9266
9267	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9268	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9269	ImmT: AMDGPUOperand::ImmTyClamp);
9270
9271	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
9272	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI);
9273
9274	if (Desc.TSFlags & SIInstrFlags::VOP3P)
9275	cvtVOP3P(Inst, Operands, OptIdx&: OptionalIdx);
9276	else if (Desc.TSFlags & SIInstrFlags::VOP3)
9277	cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9278	else if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
9279	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
9280	}
9281
9282	if (IsDPP8) {
9283	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDPP8);
9284	using namespace llvm::AMDGPU::DPP;
9285	Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0));
9286	} else {
9287	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppCtrl, Default: `0xe4`);
9288	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: `0xf`);
9289	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: `0xf`);
9290	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl);
9291
9292	if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi))
9293	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9294	ImmT: AMDGPUOperand::ImmTyDppFI);
9295	}
9296	}
9297
9298	void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9299	OptionalImmIndexMap OptionalIdx;
9300
9301	unsigned I = `1`;
9302	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9303	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
9304	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
9305	}
9306
9307	int Fi = `0`;
9308	for (unsigned E = Operands.size(); I != E; ++I) {
9309	auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(),
9310	Constraint: MCOI::TIED_TO);
9311	if (TiedTo != -`1`) {
9312	assert((unsigned)TiedTo < Inst.getNumOperands());
9313	// handle tied old or src2 for MAC instructions
9314	Inst.addOperand(Op: Inst.getOperand(i: TiedTo));
9315	}
9316	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
9317	// Add the register arguments
9318	if (Op.isReg() && validateVccOperand(Reg: Op.getReg())) {
9319	// VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9320	// Skip it.
9321	continue;
9322	}
9323
9324	if (IsDPP8) {
9325	if (Op.isDPP8()) {
9326	Op.addImmOperands(Inst, N: `1`);
9327	} else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9328	Op.addRegWithFPInputModsOperands(Inst, N: `2`);
9329	} else if (Op.isDppFI()) {
9330	Fi = Op.getImm();
9331	} else if (Op.isReg()) {
9332	Op.addRegOperands(Inst, N: `1`);
9333	} else {
9334	llvm_unreachable("Invalid operand type");
9335	}
9336	} else {
9337	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9338	Op.addRegWithFPInputModsOperands(Inst, N: `2`);
9339	} else if (Op.isReg()) {
9340	Op.addRegOperands(Inst, N: `1`);
9341	} else if (Op.isDPPCtrl()) {
9342	Op.addImmOperands(Inst, N: `1`);
9343	} else if (Op.isImm()) {
9344	// Handle optional arguments
9345	OptionalIdx [Op.getImmTy()] = I;
9346	} else {
9347	llvm_unreachable("Invalid operand type");
9348	}
9349	}
9350	}
9351
9352	if (IsDPP8) {
9353	using namespace llvm::AMDGPU::DPP;
9354	Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0));
9355	} else {
9356	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: `0xf`);
9357	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: `0xf`);
9358	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl);
9359	if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi)) {
9360	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9361	ImmT: AMDGPUOperand::ImmTyDppFI);
9362	}
9363	}
9364	}
9365
9366	//===----------------------------------------------------------------------===//
9367	// sdwa
9368	//===----------------------------------------------------------------------===//
9369
9370	ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9371	StringRef Prefix,
9372	AMDGPUOperand::ImmTy Type) {
9373	using namespace llvm::AMDGPU::SDWA;
9374
9375	SMLoc S = getLoc();
9376	StringRef Value;
9377
9378	SMLoc StringLoc;
9379	ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9380	if (!Res.isSuccess())
9381	return Res;
9382
9383	int64_t Int;
9384	Int = StringSwitch<int64_t>(Value)
9385	.Case(S: "BYTE_0", Value: SdwaSel::BYTE_0)
9386	.Case(S: "BYTE_1", Value: SdwaSel::BYTE_1)
9387	.Case(S: "BYTE_2", Value: SdwaSel::BYTE_2)
9388	.Case(S: "BYTE_3", Value: SdwaSel::BYTE_3)
9389	.Case(S: "WORD_0", Value: SdwaSel::WORD_0)
9390	.Case(S: "WORD_1", Value: SdwaSel::WORD_1)
9391	.Case(S: "DWORD", Value: SdwaSel::DWORD)
9392	.Default(Value: `0xffffffff`);
9393
9394	if (Int == `0xffffffff`)
9395	return Error(L: StringLoc, Msg: "invalid " + Twine (Prefix) + " value");
9396
9397	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Int, Loc: S, Type));
9398	return ParseStatus::Success;
9399	}
9400
9401	ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9402	using namespace llvm::AMDGPU::SDWA;
9403
9404	SMLoc S = getLoc();
9405	StringRef Value;
9406
9407	SMLoc StringLoc;
9408	ParseStatus Res = parseStringWithPrefix(Prefix: "dst_unused", Value, StringLoc);
9409	if (!Res.isSuccess())
9410	return Res;
9411
9412	int64_t Int;
9413	Int = StringSwitch<int64_t>(Value)
9414	.Case(S: "UNUSED_PAD", Value: DstUnused::UNUSED_PAD)
9415	.Case(S: "UNUSED_SEXT", Value: DstUnused::UNUSED_SEXT)
9416	.Case(S: "UNUSED_PRESERVE", Value: DstUnused::UNUSED_PRESERVE)
9417	.Default(Value: `0xffffffff`);
9418
9419	if (Int == `0xffffffff`)
9420	return Error(L: StringLoc, Msg: "invalid dst_unused value");
9421
9422	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Int, Loc: S, Type: AMDGPUOperand::ImmTySDWADstUnused));
9423	return ParseStatus::Success;
9424	}
9425
9426	void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9427	cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP1);
9428	}
9429
9430	void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9431	cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2);
9432	}
9433
9434	void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9435	cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: true, SkipSrcVcc: true);
9436	}
9437
9438	void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9439	cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: false, SkipSrcVcc: true);
9440	}
9441
9442	void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9443	cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOPC, SkipDstVcc: isVI());
9444	}
9445
9446	void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9447	uint64_t BasicInstType,
9448	bool SkipDstVcc,
9449	bool SkipSrcVcc) {
9450	using namespace llvm::AMDGPU::SDWA;
9451
9452	OptionalImmIndexMap OptionalIdx;
9453	bool SkipVcc = SkipDstVcc \|\| SkipSrcVcc;
9454	bool SkippedVcc = false;
9455
9456	unsigned I = `1`;
9457	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9458	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
9459	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
9460	}
9461
9462	for (unsigned E = Operands.size(); I != E; ++I) {
9463	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
9464	if (SkipVcc && !SkippedVcc && Op.isReg() &&
9465	(Op.getReg() == AMDGPU::VCC \|\| Op.getReg() == AMDGPU::VCC_LO)) {
9466	// VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9467	// Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9468	// or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9469	// Skip VCC only if we didn't skip it on previous iteration.
9470	// Note that src0 and src1 occupy 2 slots each because of modifiers.
9471	if (BasicInstType == SIInstrFlags::VOP2 &&
9472	((SkipDstVcc && Inst.getNumOperands() == `1`) \|\|
9473	(SkipSrcVcc && Inst.getNumOperands() == `5`))) {
9474	SkippedVcc = true;
9475	continue;
9476	}
9477	if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == `0`) {
9478	SkippedVcc = true;
9479	continue;
9480	}
9481	}
9482	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9483	Op.addRegOrImmWithInputModsOperands(Inst, N: `2`);
9484	} else if (Op.isImm()) {
9485	// Handle optional arguments
9486	OptionalIdx [Op.getImmTy()] = I;
9487	} else {
9488	llvm_unreachable("Invalid operand type");
9489	}
9490	SkippedVcc = false;
9491	}
9492
9493	const unsigned Opc = Inst.getOpcode();
9494	if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9495	Opc != AMDGPU::V_NOP_sdwa_vi) {
9496	// v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9497	switch (BasicInstType) {
9498	case SIInstrFlags::VOP1:
9499	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9500	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9501	ImmT: AMDGPUOperand::ImmTyClamp, Default: `0`);
9502
9503	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
9504	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9505	ImmT: AMDGPUOperand::ImmTyOModSI, Default: `0`);
9506
9507	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_sel))
9508	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9509	ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD);
9510
9511	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_unused))
9512	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9513	ImmT: AMDGPUOperand::ImmTySDWADstUnused,
9514	Default: DstUnused::UNUSED_PRESERVE);
9515
9516	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
9517	break;
9518
9519	case SIInstrFlags::VOP2:
9520	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9521	ImmT: AMDGPUOperand::ImmTyClamp, Default: `0`);
9522
9523	if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::omod))
9524	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI, Default: `0`);
9525
9526	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD);
9527	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstUnused, Default: DstUnused::UNUSED_PRESERVE);
9528	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
9529	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD);
9530	break;
9531
9532	case SIInstrFlags::VOPC:
9533	if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::clamp))
9534	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9535	ImmT: AMDGPUOperand::ImmTyClamp, Default: `0`);
9536	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
9537	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD);
9538	break;
9539
9540	default:
9541	llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9542	}
9543	}
9544
9545	// special case v_mac_{f16, f32}:
9546	// it has src2 register operand that is tied to dst operand
9547	if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi \|\|
9548	Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9549	auto it = Inst.begin();
9550	std::advance(
9551	i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::src2));
9552	Inst.insert(I: it, Op: Inst.getOperand(i: `0`)); // src2 = dst
9553	}
9554	}
9555
9556	/// Force static initialization.
9557	extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9558	RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
9559	RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9560	}
9561
9562	#define GET_REGISTER_MATCHER
9563	#define GET_MATCHER_IMPLEMENTATION
9564	#define GET_MNEMONIC_SPELL_CHECKER
9565	#define GET_MNEMONIC_CHECKER
9566	#include "AMDGPUGenAsmMatcher.inc"
9567
9568	ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9569	unsigned MCK) {
9570	switch (MCK) {
9571	case MCK_addr64:
9572	return parseTokenOp(Name: "addr64", Operands);
9573	case MCK_done:
9574	return parseTokenOp(Name: "done", Operands);
9575	case MCK_idxen:
9576	return parseTokenOp(Name: "idxen", Operands);
9577	case MCK_lds:
9578	return parseTokenOp(Name: "lds", Operands);
9579	case MCK_offen:
9580	return parseTokenOp(Name: "offen", Operands);
9581	case MCK_off:
9582	return parseTokenOp(Name: "off", Operands);
9583	case MCK_row_95_en:
9584	return parseTokenOp(Name: "row_en", Operands);
9585	case MCK_gds:
9586	return parseNamedBit(Name: "gds", Operands, ImmTy: AMDGPUOperand::ImmTyGDS);
9587	case MCK_tfe:
9588	return parseNamedBit(Name: "tfe", Operands, ImmTy: AMDGPUOperand::ImmTyTFE);
9589	}
9590	return tryCustomParseOperand(Operands, MCK);
9591	}
9592
9593	// This function should be defined after auto-generated include so that we have
9594	// MatchClassKind enum defined
9595	unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9596	unsigned Kind) {
9597	// Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9598	// But MatchInstructionImpl() expects to meet token and fails to validate
9599	// operand. This method checks if we are given immediate operand but expect to
9600	// get corresponding token.
9601	AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9602	switch (Kind) {
9603	case MCK_addr64:
9604	return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9605	case MCK_gds:
9606	return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9607	case MCK_lds:
9608	return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9609	case MCK_idxen:
9610	return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9611	case MCK_offen:
9612	return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9613	case MCK_tfe:
9614	return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9615	case MCK_SSrc_b32:
9616	// When operands have expression values, they will return true for isToken,
9617	// because it is not possible to distinguish between a token and an
9618	// expression at parse time. MatchInstructionImpl() will always try to
9619	// match an operand as a token, when isToken returns true, and when the
9620	// name of the expression is not a valid token, the match will fail,
9621	// so we need to handle it here.
9622	return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9623	case MCK_SSrc_f32:
9624	return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9625	case MCK_SOPPBrTarget:
9626	return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9627	case MCK_VReg32OrOff:
9628	return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9629	case MCK_InterpSlot:
9630	return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9631	case MCK_InterpAttr:
9632	return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9633	case MCK_InterpAttrChan:
9634	return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9635	case MCK_SReg_64:
9636	case MCK_SReg_64_XEXEC:
9637	// Null is defined as a 32-bit register but
9638	// it should also be enabled with 64-bit operands.
9639	// The following code enables it for SReg_64 operands
9640	// used as source and destination. Remaining source
9641	// operands are handled in isInlinableImm.
9642	return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9643	default:
9644	return Match_InvalidOperand;
9645	}
9646	}
9647
9648	//===----------------------------------------------------------------------===//
9649	// endpgm
9650	//===----------------------------------------------------------------------===//
9651
9652	ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9653	SMLoc S = getLoc();
9654	int64_t Imm = `0`;
9655
9656	if (!parseExpr(Imm)) {
9657	// The operand is optional, if not present default to 0
9658	Imm = `0`;
9659	}
9660
9661	if (!isUInt<`16`>(x: Imm))
9662	return Error(L: S, Msg: "expected a 16-bit value");
9663
9664	Operands.push_back(
9665	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyEndpgm));
9666	return ParseStatus::Success;
9667	}
9668
9669	bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmT: ImmTyEndpgm); }
9670
9671	//===----------------------------------------------------------------------===//
9672	// Split Barrier
9673	//===----------------------------------------------------------------------===//
9674
9675	bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(type: MVT::i32); }
9676

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp