AMDGPUAsmParser.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp]

1	//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "AMDKernelCodeT.h"
10	#include "MCTargetDesc/AMDGPUInstPrinter.h"
11	#include "MCTargetDesc/AMDGPUMCAsmInfo.h"
12	#include "MCTargetDesc/AMDGPUMCExpr.h"
13	#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
14	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15	#include "MCTargetDesc/AMDGPUTargetStreamer.h"
16	#include "SIDefines.h"
17	#include "SIInstrInfo.h"
18	#include "TargetInfo/AMDGPUTargetInfo.h"
19	#include "Utils/AMDGPUAsmUtils.h"
20	#include "Utils/AMDGPUBaseInfo.h"
21	#include "Utils/AMDKernelCodeTUtils.h"
22	#include "llvm/ADT/APFloat.h"
23	#include "llvm/ADT/SmallBitVector.h"
24	#include "llvm/ADT/StringSet.h"
25	#include "llvm/ADT/Twine.h"
26	#include "llvm/BinaryFormat/ELF.h"
27	#include "llvm/CodeGenTypes/MachineValueType.h"
28	#include "llvm/MC/MCAsmInfo.h"
29	#include "llvm/MC/MCContext.h"
30	#include "llvm/MC/MCExpr.h"
31	#include "llvm/MC/MCInst.h"
32	#include "llvm/MC/MCInstrDesc.h"
33	#include "llvm/MC/MCParser/AsmLexer.h"
34	#include "llvm/MC/MCParser/MCAsmParser.h"
35	#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
36	#include "llvm/MC/MCParser/MCTargetAsmParser.h"
37	#include "llvm/MC/MCRegisterInfo.h"
38	#include "llvm/MC/MCSymbol.h"
39	#include "llvm/MC/TargetRegistry.h"
40	#include "llvm/Support/AMDGPUMetadata.h"
41	#include "llvm/Support/AMDHSAKernelDescriptor.h"
42	#include "llvm/Support/Casting.h"
43	#include "llvm/Support/Compiler.h"
44	#include "llvm/Support/MathExtras.h"
45	#include "llvm/TargetParser/TargetParser.h"
46	#include <optional>
47
48	using namespace llvm;
49	using namespace llvm::AMDGPU;
50	using namespace llvm::amdhsa;
51
52	namespace {
53
54	class AMDGPUAsmParser;
55
56	enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58	//===----------------------------------------------------------------------===//
59	// Operand
60	//===----------------------------------------------------------------------===//
61
62	class AMDGPUOperand : public MCParsedAsmOperand {
63	enum KindTy {
64	Token,
65	Immediate,
66	Register,
67	Expression
68	} Kind;
69
70	SMLoc StartLoc, EndLoc;
71	const AMDGPUAsmParser *AsmParser;
72
73	public:
74	AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75	: Kind(Kind_), AsmParser(AsmParser_) {}
76
77	using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79	struct Modifiers {
80	bool Abs = false;
81	bool Neg = false;
82	bool Sext = false;
83	bool Lit = false;
84
85	bool hasFPModifiers() const { return Abs \|\| Neg; }
86	bool hasIntModifiers() const { return Sext; }
87	bool hasModifiers() const { return hasFPModifiers() \|\| hasIntModifiers(); }
88
89	int64_t getFPModifiersOperand() const {
90	int64_t Operand = `0`;
91	Operand \|= Abs ? SISrcMods::ABS : `0u`;
92	Operand \|= Neg ? SISrcMods::NEG : `0u`;
93	return Operand;
94	}
95
96	int64_t getIntModifiersOperand() const {
97	int64_t Operand = `0`;
98	Operand \|= Sext ? SISrcMods::SEXT : `0u`;
99	return Operand;
100	}
101
102	int64_t getModifiersOperand() const {
103	assert(!(hasFPModifiers() && hasIntModifiers())
104	&& "fp and int modifiers should not be used simultaneously");
105	if (hasFPModifiers())
106	return getFPModifiersOperand();
107	if (hasIntModifiers())
108	return getIntModifiersOperand();
109	return `0`;
110	}
111
112	friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113	};
114
115	enum ImmTy {
116	ImmTyNone,
117	ImmTyGDS,
118	ImmTyLDS,
119	ImmTyOffen,
120	ImmTyIdxen,
121	ImmTyAddr64,
122	ImmTyOffset,
123	ImmTyInstOffset,
124	ImmTyOffset0,
125	ImmTyOffset1,
126	ImmTySMEMOffsetMod,
127	ImmTyCPol,
128	ImmTyTFE,
129	ImmTyD16,
130	ImmTyClamp,
131	ImmTyOModSI,
132	ImmTySDWADstSel,
133	ImmTySDWASrc0Sel,
134	ImmTySDWASrc1Sel,
135	ImmTySDWADstUnused,
136	ImmTyDMask,
137	ImmTyDim,
138	ImmTyUNorm,
139	ImmTyDA,
140	ImmTyR128A16,
141	ImmTyA16,
142	ImmTyLWE,
143	ImmTyExpTgt,
144	ImmTyExpCompr,
145	ImmTyExpVM,
146	ImmTyFORMAT,
147	ImmTyHwreg,
148	ImmTyOff,
149	ImmTySendMsg,
150	ImmTyInterpSlot,
151	ImmTyInterpAttr,
152	ImmTyInterpAttrChan,
153	ImmTyOpSel,
154	ImmTyOpSelHi,
155	ImmTyNegLo,
156	ImmTyNegHi,
157	ImmTyIndexKey8bit,
158	ImmTyIndexKey16bit,
159	ImmTyDPP8,
160	ImmTyDppCtrl,
161	ImmTyDppRowMask,
162	ImmTyDppBankMask,
163	ImmTyDppBoundCtrl,
164	ImmTyDppFI,
165	ImmTySwizzle,
166	ImmTyGprIdxMode,
167	ImmTyHigh,
168	ImmTyBLGP,
169	ImmTyCBSZ,
170	ImmTyABID,
171	ImmTyEndpgm,
172	ImmTyWaitVDST,
173	ImmTyWaitEXP,
174	ImmTyWaitVAVDst,
175	ImmTyWaitVMVSrc,
176	ImmTyByteSel,
177	ImmTyBitOp3,
178	};
179
180	// Immediate operand kind.
181	// It helps to identify the location of an offending operand after an error.
182	// Note that regular literals and mandatory literals (KImm) must be handled
183	// differently. When looking for an offending operand, we should usually
184	// ignore mandatory literals because they are part of the instruction and
185	// cannot be changed. Report location of mandatory operands only for VOPD,
186	// when both OpX and OpY have a KImm and there are no other literals.
187	enum ImmKindTy {
188	ImmKindTyNone,
189	ImmKindTyLiteral,
190	ImmKindTyMandatoryLiteral,
191	ImmKindTyConst,
192	};
193
194	private:
195	struct TokOp {
196	const char *Data;
197	unsigned Length;
198	};
199
200	struct ImmOp {
201	int64_t Val;
202	ImmTy Type;
203	bool IsFPImm;
204	mutable ImmKindTy Kind;
205	Modifiers Mods;
206	};
207
208	struct RegOp {
209	MCRegister RegNo;
210	Modifiers Mods;
211	};
212
213	union {
214	TokOp Tok;
215	ImmOp Imm;
216	RegOp Reg;
217	const MCExpr *Expr;
218	};
219
220	public:
221	bool isToken() const override { return Kind == Token; }
222
223	bool isSymbolRefExpr() const {
224	return isExpr() && Expr && isa<MCSymbolRefExpr>(Val: Expr);
225	}
226
227	bool isImm() const override {
228	return Kind == Immediate;
229	}
230
231	void setImmKindNone() const {
232	assert(isImm());
233	Imm.Kind = ImmKindTyNone;
234	}
235
236	void setImmKindLiteral() const {
237	assert(isImm());
238	Imm.Kind = ImmKindTyLiteral;
239	}
240
241	void setImmKindMandatoryLiteral() const {
242	assert(isImm());
243	Imm.Kind = ImmKindTyMandatoryLiteral;
244	}
245
246	void setImmKindConst() const {
247	assert(isImm());
248	Imm.Kind = ImmKindTyConst;
249	}
250
251	bool IsImmKindLiteral() const {
252	return isImm() && Imm.Kind == ImmKindTyLiteral;
253	}
254
255	bool IsImmKindMandatoryLiteral() const {
256	return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
257	}
258
259	bool isImmKindConst() const {
260	return isImm() && Imm.Kind == ImmKindTyConst;
261	}
262
263	bool isInlinableImm(MVT type) const;
264	bool isLiteralImm(MVT type) const;
265
266	bool isRegKind() const {
267	return Kind == Register;
268	}
269
270	bool isReg() const override {
271	return isRegKind() && !hasModifiers();
272	}
273
274	bool isRegOrInline(unsigned RCID, MVT type) const {
275	return isRegClass(RCID) \|\| isInlinableImm(type);
276	}
277
278	bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
279	return isRegOrInline(RCID, type) \|\| isLiteralImm(type);
280	}
281
282	bool isRegOrImmWithInt16InputMods() const {
283	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
284	}
285
286	template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
287	return isRegOrImmWithInputMods(
288	RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::i16);
289	}
290
291	bool isRegOrImmWithInt32InputMods() const {
292	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
293	}
294
295	bool isRegOrInlineImmWithInt16InputMods() const {
296	return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
297	}
298
299	template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
300	return isRegOrInline(
301	RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::i16);
302	}
303
304	bool isRegOrInlineImmWithInt32InputMods() const {
305	return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
306	}
307
308	bool isRegOrImmWithInt64InputMods() const {
309	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64);
310	}
311
312	bool isRegOrImmWithFP16InputMods() const {
313	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16);
314	}
315
316	template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
317	return isRegOrImmWithInputMods(
318	RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16);
319	}
320
321	bool isRegOrImmWithFP32InputMods() const {
322	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
323	}
324
325	bool isRegOrImmWithFP64InputMods() const {
326	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
327	}
328
329	template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
330	return isRegOrInline(
331	RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16);
332	}
333
334	bool isRegOrInlineImmWithFP32InputMods() const {
335	return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
336	}
337
338	bool isPackedFP16InputMods() const {
339	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::v2f16);
340	}
341
342	bool isPackedVGPRFP32InputMods() const {
343	return isRegOrImmWithInputMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::v2f32);
344	}
345
346	bool isVReg() const {
347	return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) \|\|
348	isRegClass(RCID: AMDGPU::VReg_64RegClassID) \|\|
349	isRegClass(RCID: AMDGPU::VReg_96RegClassID) \|\|
350	isRegClass(RCID: AMDGPU::VReg_128RegClassID) \|\|
351	isRegClass(RCID: AMDGPU::VReg_160RegClassID) \|\|
352	isRegClass(RCID: AMDGPU::VReg_192RegClassID) \|\|
353	isRegClass(RCID: AMDGPU::VReg_256RegClassID) \|\|
354	isRegClass(RCID: AMDGPU::VReg_512RegClassID) \|\|
355	isRegClass(RCID: AMDGPU::VReg_1024RegClassID);
356	}
357
358	bool isVReg32() const {
359	return isRegClass(RCID: AMDGPU::VGPR_32RegClassID);
360	}
361
362	bool isVReg32OrOff() const {
363	return isOff() \|\| isVReg32();
364	}
365
366	bool isNull() const {
367	return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
368	}
369
370	bool isVRegWithInputMods() const;
371	template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
372	template <bool IsFake16> bool isT16VRegWithInputMods() const;
373
374	bool isSDWAOperand(MVT type) const;
375	bool isSDWAFP16Operand() const;
376	bool isSDWAFP32Operand() const;
377	bool isSDWAInt16Operand() const;
378	bool isSDWAInt32Operand() const;
379
380	bool isImmTy(ImmTy ImmT) const {
381	return isImm() && Imm.Type == ImmT;
382	}
383
384	template <ImmTy Ty> bool isImmTy() const { return isImmTy(ImmT: Ty); }
385
386	bool isImmLiteral() const { return isImmTy(ImmT: ImmTyNone); }
387
388	bool isImmModifier() const {
389	return isImm() && Imm.Type != ImmTyNone;
390	}
391
392	bool isOModSI() const { return isImmTy(ImmT: ImmTyOModSI); }
393	bool isDim() const { return isImmTy(ImmT: ImmTyDim); }
394	bool isR128A16() const { return isImmTy(ImmT: ImmTyR128A16); }
395	bool isOff() const { return isImmTy(ImmT: ImmTyOff); }
396	bool isExpTgt() const { return isImmTy(ImmT: ImmTyExpTgt); }
397	bool isOffen() const { return isImmTy(ImmT: ImmTyOffen); }
398	bool isIdxen() const { return isImmTy(ImmT: ImmTyIdxen); }
399	bool isAddr64() const { return isImmTy(ImmT: ImmTyAddr64); }
400	bool isSMEMOffsetMod() const { return isImmTy(ImmT: ImmTySMEMOffsetMod); }
401	bool isFlatOffset() const { return isImmTy(ImmT: ImmTyOffset) \|\| isImmTy(ImmT: ImmTyInstOffset); }
402	bool isGDS() const { return isImmTy(ImmT: ImmTyGDS); }
403	bool isLDS() const { return isImmTy(ImmT: ImmTyLDS); }
404	bool isCPol() const { return isImmTy(ImmT: ImmTyCPol); }
405	bool isIndexKey8bit() const { return isImmTy(ImmT: ImmTyIndexKey8bit); }
406	bool isIndexKey16bit() const { return isImmTy(ImmT: ImmTyIndexKey16bit); }
407	bool isTFE() const { return isImmTy(ImmT: ImmTyTFE); }
408	bool isFORMAT() const { return isImmTy(ImmT: ImmTyFORMAT) && isUInt<`7`>(x: getImm()); }
409	bool isDppFI() const { return isImmTy(ImmT: ImmTyDppFI); }
410	bool isSDWADstSel() const { return isImmTy(ImmT: ImmTySDWADstSel); }
411	bool isSDWASrc0Sel() const { return isImmTy(ImmT: ImmTySDWASrc0Sel); }
412	bool isSDWASrc1Sel() const { return isImmTy(ImmT: ImmTySDWASrc1Sel); }
413	bool isSDWADstUnused() const { return isImmTy(ImmT: ImmTySDWADstUnused); }
414	bool isInterpSlot() const { return isImmTy(ImmT: ImmTyInterpSlot); }
415	bool isInterpAttr() const { return isImmTy(ImmT: ImmTyInterpAttr); }
416	bool isInterpAttrChan() const { return isImmTy(ImmT: ImmTyInterpAttrChan); }
417	bool isOpSel() const { return isImmTy(ImmT: ImmTyOpSel); }
418	bool isOpSelHi() const { return isImmTy(ImmT: ImmTyOpSelHi); }
419	bool isNegLo() const { return isImmTy(ImmT: ImmTyNegLo); }
420	bool isNegHi() const { return isImmTy(ImmT: ImmTyNegHi); }
421	bool isBitOp3() const { return isImmTy(ImmT: ImmTyBitOp3) && isUInt<`8`>(x: getImm()); }
422
423	bool isRegOrImm() const {
424	return isReg() \|\| isImm();
425	}
426
427	bool isRegClass(unsigned RCID) const;
428
429	bool isInlineValue() const;
430
431	bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
432	return isRegOrInline(RCID, type) && !hasModifiers();
433	}
434
435	bool isSCSrcB16() const {
436	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i16);
437	}
438
439	bool isSCSrcV2B16() const {
440	return isSCSrcB16();
441	}
442
443	bool isSCSrc_b32() const {
444	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i32);
445	}
446
447	bool isSCSrc_b64() const {
448	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::i64);
449	}
450
451	bool isBoolReg() const;
452
453	bool isSCSrcF16() const {
454	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f16);
455	}
456
457	bool isSCSrcV2F16() const {
458	return isSCSrcF16();
459	}
460
461	bool isSCSrcF32() const {
462	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f32);
463	}
464
465	bool isSCSrcF64() const {
466	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::f64);
467	}
468
469	bool isSSrc_b32() const {
470	return isSCSrc_b32() \|\| isLiteralImm(type: MVT::i32) \|\| isExpr();
471	}
472
473	bool isSSrc_b16() const { return isSCSrcB16() \|\| isLiteralImm(type: MVT::i16); }
474
475	bool isSSrcV2B16() const {
476	llvm_unreachable("cannot happen");
477	return isSSrc_b16();
478	}
479
480	bool isSSrc_b64() const {
481	// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
482	// See isVSrc64().
483	return isSCSrc_b64() \|\| isLiteralImm(type: MVT::i64);
484	}
485
486	bool isSSrc_f32() const {
487	return isSCSrc_b32() \|\| isLiteralImm(type: MVT::f32) \|\| isExpr();
488	}
489
490	bool isSSrcF64() const { return isSCSrc_b64() \|\| isLiteralImm(type: MVT::f64); }
491
492	bool isSSrc_bf16() const { return isSCSrcB16() \|\| isLiteralImm(type: MVT::bf16); }
493
494	bool isSSrc_f16() const { return isSCSrcB16() \|\| isLiteralImm(type: MVT::f16); }
495
496	bool isSSrcV2F16() const {
497	llvm_unreachable("cannot happen");
498	return isSSrc_f16();
499	}
500
501	bool isSSrcV2FP32() const {
502	llvm_unreachable("cannot happen");
503	return isSSrc_f32();
504	}
505
506	bool isSCSrcV2FP32() const {
507	llvm_unreachable("cannot happen");
508	return isSCSrcF32();
509	}
510
511	bool isSSrcV2INT32() const {
512	llvm_unreachable("cannot happen");
513	return isSSrc_b32();
514	}
515
516	bool isSCSrcV2INT32() const {
517	llvm_unreachable("cannot happen");
518	return isSCSrc_b32();
519	}
520
521	bool isSSrcOrLds_b32() const {
522	return isRegOrInlineNoMods(RCID: AMDGPU::SRegOrLds_32RegClassID, type: MVT::i32) \|\|
523	isLiteralImm(type: MVT::i32) \|\| isExpr();
524	}
525
526	bool isVCSrc_b32() const {
527	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
528	}
529
530	bool isVCSrcB64() const {
531	return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64);
532	}
533
534	bool isVCSrcT_b16() const {
535	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::i16);
536	}
537
538	bool isVCSrcTB16_Lo128() const {
539	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::i16);
540	}
541
542	bool isVCSrcFake16B16_Lo128() const {
543	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::i16);
544	}
545
546	bool isVCSrc_b16() const {
547	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
548	}
549
550	bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
551
552	bool isVCSrc_f32() const {
553	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
554	}
555
556	bool isVCSrcF64() const {
557	return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
558	}
559
560	bool isVCSrcTBF16() const {
561	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::bf16);
562	}
563
564	bool isVCSrcT_f16() const {
565	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16);
566	}
567
568	bool isVCSrcT_bf16() const {
569	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16);
570	}
571
572	bool isVCSrcTBF16_Lo128() const {
573	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::bf16);
574	}
575
576	bool isVCSrcTF16_Lo128() const {
577	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::f16);
578	}
579
580	bool isVCSrcFake16BF16_Lo128() const {
581	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::bf16);
582	}
583
584	bool isVCSrcFake16F16_Lo128() const {
585	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::f16);
586	}
587
588	bool isVCSrc_bf16() const {
589	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::bf16);
590	}
591
592	bool isVCSrc_f16() const {
593	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16);
594	}
595
596	bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
597
598	bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
599
600	bool isVSrc_b32() const {
601	return isVCSrc_f32() \|\| isLiteralImm(type: MVT::i32) \|\| isExpr();
602	}
603
604	bool isVSrc_b64() const { return isVCSrcF64() \|\| isLiteralImm(type: MVT::i64); }
605
606	bool isVSrcT_b16() const { return isVCSrcT_b16() \|\| isLiteralImm(type: MVT::i16); }
607
608	bool isVSrcT_b16_Lo128() const {
609	return isVCSrcTB16_Lo128() \|\| isLiteralImm(type: MVT::i16);
610	}
611
612	bool isVSrcFake16_b16_Lo128() const {
613	return isVCSrcFake16B16_Lo128() \|\| isLiteralImm(type: MVT::i16);
614	}
615
616	bool isVSrc_b16() const { return isVCSrc_b16() \|\| isLiteralImm(type: MVT::i16); }
617
618	bool isVSrc_v2b16() const { return isVSrc_b16() \|\| isLiteralImm(type: MVT::v2i16); }
619
620	bool isVCSrcV2FP32() const {
621	return isVCSrcF64();
622	}
623
624	bool isVSrc_v2f32() const { return isVSrc_f64() \|\| isLiteralImm(type: MVT::v2f32); }
625
626	bool isVCSrcV2INT32() const {
627	return isVCSrcB64();
628	}
629
630	bool isVSrc_v2b32() const { return isVSrc_b64() \|\| isLiteralImm(type: MVT::v2i32); }
631
632	bool isVSrc_f32() const {
633	return isVCSrc_f32() \|\| isLiteralImm(type: MVT::f32) \|\| isExpr();
634	}
635
636	bool isVSrc_f64() const { return isVCSrcF64() \|\| isLiteralImm(type: MVT::f64); }
637
638	bool isVSrcT_bf16() const { return isVCSrcTBF16() \|\| isLiteralImm(type: MVT::bf16); }
639
640	bool isVSrcT_f16() const { return isVCSrcT_f16() \|\| isLiteralImm(type: MVT::f16); }
641
642	bool isVSrcT_bf16_Lo128() const {
643	return isVCSrcTBF16_Lo128() \|\| isLiteralImm(type: MVT::bf16);
644	}
645
646	bool isVSrcT_f16_Lo128() const {
647	return isVCSrcTF16_Lo128() \|\| isLiteralImm(type: MVT::f16);
648	}
649
650	bool isVSrcFake16_bf16_Lo128() const {
651	return isVCSrcFake16BF16_Lo128() \|\| isLiteralImm(type: MVT::bf16);
652	}
653
654	bool isVSrcFake16_f16_Lo128() const {
655	return isVCSrcFake16F16_Lo128() \|\| isLiteralImm(type: MVT::f16);
656	}
657
658	bool isVSrc_bf16() const { return isVCSrc_bf16() \|\| isLiteralImm(type: MVT::bf16); }
659
660	bool isVSrc_f16() const { return isVCSrc_f16() \|\| isLiteralImm(type: MVT::f16); }
661
662	bool isVSrc_v2bf16() const {
663	return isVSrc_bf16() \|\| isLiteralImm(type: MVT::v2bf16);
664	}
665
666	bool isVSrc_v2f16() const { return isVSrc_f16() \|\| isLiteralImm(type: MVT::v2f16); }
667
668	bool isVISrcB32() const {
669	return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i32);
670	}
671
672	bool isVISrcB16() const {
673	return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i16);
674	}
675
676	bool isVISrcV2B16() const {
677	return isVISrcB16();
678	}
679
680	bool isVISrcF32() const {
681	return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f32);
682	}
683
684	bool isVISrcF16() const {
685	return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f16);
686	}
687
688	bool isVISrcV2F16() const {
689	return isVISrcF16() \|\| isVISrcB32();
690	}
691
692	bool isVISrc_64_bf16() const {
693	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::bf16);
694	}
695
696	bool isVISrc_64_f16() const {
697	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f16);
698	}
699
700	bool isVISrc_64_b32() const {
701	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32);
702	}
703
704	bool isVISrc_64B64() const {
705	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i64);
706	}
707
708	bool isVISrc_64_f64() const {
709	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f64);
710	}
711
712	bool isVISrc_64V2FP32() const {
713	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f32);
714	}
715
716	bool isVISrc_64V2INT32() const {
717	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32);
718	}
719
720	bool isVISrc_256_b32() const {
721	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32);
722	}
723
724	bool isVISrc_256_f32() const {
725	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32);
726	}
727
728	bool isVISrc_256B64() const {
729	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i64);
730	}
731
732	bool isVISrc_256_f64() const {
733	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f64);
734	}
735
736	bool isVISrc_128B16() const {
737	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i16);
738	}
739
740	bool isVISrc_128V2B16() const {
741	return isVISrc_128B16();
742	}
743
744	bool isVISrc_128_b32() const {
745	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i32);
746	}
747
748	bool isVISrc_128_f32() const {
749	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f32);
750	}
751
752	bool isVISrc_256V2FP32() const {
753	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32);
754	}
755
756	bool isVISrc_256V2INT32() const {
757	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32);
758	}
759
760	bool isVISrc_512_b32() const {
761	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i32);
762	}
763
764	bool isVISrc_512B16() const {
765	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i16);
766	}
767
768	bool isVISrc_512V2B16() const {
769	return isVISrc_512B16();
770	}
771
772	bool isVISrc_512_f32() const {
773	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f32);
774	}
775
776	bool isVISrc_512F16() const {
777	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f16);
778	}
779
780	bool isVISrc_512V2F16() const {
781	return isVISrc_512F16() \|\| isVISrc_512_b32();
782	}
783
784	bool isVISrc_1024_b32() const {
785	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i32);
786	}
787
788	bool isVISrc_1024B16() const {
789	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i16);
790	}
791
792	bool isVISrc_1024V2B16() const {
793	return isVISrc_1024B16();
794	}
795
796	bool isVISrc_1024_f32() const {
797	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f32);
798	}
799
800	bool isVISrc_1024F16() const {
801	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f16);
802	}
803
804	bool isVISrc_1024V2F16() const {
805	return isVISrc_1024F16() \|\| isVISrc_1024_b32();
806	}
807
808	bool isAISrcB32() const {
809	return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i32);
810	}
811
812	bool isAISrcB16() const {
813	return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i16);
814	}
815
816	bool isAISrcV2B16() const {
817	return isAISrcB16();
818	}
819
820	bool isAISrcF32() const {
821	return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f32);
822	}
823
824	bool isAISrcF16() const {
825	return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f16);
826	}
827
828	bool isAISrcV2F16() const {
829	return isAISrcF16() \|\| isAISrcB32();
830	}
831
832	bool isAISrc_64B64() const {
833	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::i64);
834	}
835
836	bool isAISrc_64_f64() const {
837	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::f64);
838	}
839
840	bool isAISrc_128_b32() const {
841	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i32);
842	}
843
844	bool isAISrc_128B16() const {
845	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i16);
846	}
847
848	bool isAISrc_128V2B16() const {
849	return isAISrc_128B16();
850	}
851
852	bool isAISrc_128_f32() const {
853	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f32);
854	}
855
856	bool isAISrc_128F16() const {
857	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f16);
858	}
859
860	bool isAISrc_128V2F16() const {
861	return isAISrc_128F16() \|\| isAISrc_128_b32();
862	}
863
864	bool isVISrc_128_bf16() const {
865	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::bf16);
866	}
867
868	bool isVISrc_128_f16() const {
869	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f16);
870	}
871
872	bool isVISrc_128V2F16() const {
873	return isVISrc_128_f16() \|\| isVISrc_128_b32();
874	}
875
876	bool isAISrc_256B64() const {
877	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::i64);
878	}
879
880	bool isAISrc_256_f64() const {
881	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::f64);
882	}
883
884	bool isAISrc_512_b32() const {
885	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i32);
886	}
887
888	bool isAISrc_512B16() const {
889	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i16);
890	}
891
892	bool isAISrc_512V2B16() const {
893	return isAISrc_512B16();
894	}
895
896	bool isAISrc_512_f32() const {
897	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f32);
898	}
899
900	bool isAISrc_512F16() const {
901	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f16);
902	}
903
904	bool isAISrc_512V2F16() const {
905	return isAISrc_512F16() \|\| isAISrc_512_b32();
906	}
907
908	bool isAISrc_1024_b32() const {
909	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i32);
910	}
911
912	bool isAISrc_1024B16() const {
913	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i16);
914	}
915
916	bool isAISrc_1024V2B16() const {
917	return isAISrc_1024B16();
918	}
919
920	bool isAISrc_1024_f32() const {
921	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f32);
922	}
923
924	bool isAISrc_1024F16() const {
925	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f16);
926	}
927
928	bool isAISrc_1024V2F16() const {
929	return isAISrc_1024F16() \|\| isAISrc_1024_b32();
930	}
931
932	bool isKImmFP32() const {
933	return isLiteralImm(type: MVT::f32);
934	}
935
936	bool isKImmFP16() const {
937	return isLiteralImm(type: MVT::f16);
938	}
939
940	bool isMem() const override {
941	return false;
942	}
943
944	bool isExpr() const {
945	return Kind == Expression;
946	}
947
948	bool isSOPPBrTarget() const { return isExpr() \|\| isImm(); }
949
950	bool isSWaitCnt() const;
951	bool isDepCtr() const;
952	bool isSDelayALU() const;
953	bool isHwreg() const;
954	bool isSendMsg() const;
955	bool isSplitBarrier() const;
956	bool isSwizzle() const;
957	bool isSMRDOffset8() const;
958	bool isSMEMOffset() const;
959	bool isSMRDLiteralOffset() const;
960	bool isDPP8() const;
961	bool isDPPCtrl() const;
962	bool isBLGP() const;
963	bool isGPRIdxMode() const;
964	bool isS16Imm() const;
965	bool isU16Imm() const;
966	bool isEndpgm() const;
967
968	auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
969	return [=](){ return P (*this); };
970	}
971
972	StringRef getToken() const {
973	assert(isToken());
974	return StringRef (Tok.Data, Tok.Length);
975	}
976
977	int64_t getImm() const {
978	assert(isImm());
979	return Imm.Val;
980	}
981
982	void setImm(int64_t Val) {
983	assert(isImm());
984	Imm.Val = Val;
985	}
986
987	ImmTy getImmTy() const {
988	assert(isImm());
989	return Imm.Type;
990	}
991
992	MCRegister getReg() const override {
993	assert(isRegKind());
994	return Reg.RegNo;
995	}
996
997	SMLoc getStartLoc() const override {
998	return StartLoc;
999	}
1000
1001	SMLoc getEndLoc() const override {
1002	return EndLoc;
1003	}
1004
1005	SMRange getLocRange() const {
1006	return SMRange (StartLoc, EndLoc);
1007	}
1008
1009	Modifiers getModifiers() const {
1010	assert(isRegKind() \|\| isImmTy(ImmTyNone));
1011	return isRegKind() ? Reg.Mods : Imm.Mods;
1012	}
1013
1014	void setModifiers(Modifiers Mods) {
1015	assert(isRegKind() \|\| isImmTy(ImmTyNone));
1016	if (isRegKind())
1017	Reg.Mods = Mods;
1018	else
1019	Imm.Mods = Mods;
1020	}
1021
1022	bool hasModifiers() const {
1023	return getModifiers().hasModifiers();
1024	}
1025
1026	bool hasFPModifiers() const {
1027	return getModifiers().hasFPModifiers();
1028	}
1029
1030	bool hasIntModifiers() const {
1031	return getModifiers().hasIntModifiers();
1032	}
1033
1034	uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1035
1036	void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1037
1038	void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1039
1040	void addRegOperands(MCInst &Inst, unsigned N) const;
1041
1042	void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1043	if (isRegKind())
1044	addRegOperands(Inst, N);
1045	else
1046	addImmOperands(Inst, N);
1047	}
1048
1049	void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1050	Modifiers Mods = getModifiers();
1051	Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand()));
1052	if (isRegKind()) {
1053	addRegOperands(Inst, N);
1054	} else {
1055	addImmOperands(Inst, N, ApplyModifiers: false);
1056	}
1057	}
1058
1059	void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1060	assert(!hasIntModifiers());
1061	addRegOrImmWithInputModsOperands(Inst, N);
1062	}
1063
1064	void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1065	assert(!hasFPModifiers());
1066	addRegOrImmWithInputModsOperands(Inst, N);
1067	}
1068
1069	void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1070	Modifiers Mods = getModifiers();
1071	Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand()));
1072	assert(isRegKind());
1073	addRegOperands(Inst, N);
1074	}
1075
1076	void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1077	assert(!hasIntModifiers());
1078	addRegWithInputModsOperands(Inst, N);
1079	}
1080
1081	void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1082	assert(!hasFPModifiers());
1083	addRegWithInputModsOperands(Inst, N);
1084	}
1085
1086	static void printImmTy(raw_ostream& OS, ImmTy Type) {
1087	// clang-format off
1088	switch (Type) {
1089	case ImmTyNone: OS << "None"; break;
1090	case ImmTyGDS: OS << "GDS"; break;
1091	case ImmTyLDS: OS << "LDS"; break;
1092	case ImmTyOffen: OS << "Offen"; break;
1093	case ImmTyIdxen: OS << "Idxen"; break;
1094	case ImmTyAddr64: OS << "Addr64"; break;
1095	case ImmTyOffset: OS << "Offset"; break;
1096	case ImmTyInstOffset: OS << "InstOffset"; break;
1097	case ImmTyOffset0: OS << "Offset0"; break;
1098	case ImmTyOffset1: OS << "Offset1"; break;
1099	case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1100	case ImmTyCPol: OS << "CPol"; break;
1101	case ImmTyIndexKey8bit: OS << "index_key"; break;
1102	case ImmTyIndexKey16bit: OS << "index_key"; break;
1103	case ImmTyTFE: OS << "TFE"; break;
1104	case ImmTyD16: OS << "D16"; break;
1105	case ImmTyFORMAT: OS << "FORMAT"; break;
1106	case ImmTyClamp: OS << "Clamp"; break;
1107	case ImmTyOModSI: OS << "OModSI"; break;
1108	case ImmTyDPP8: OS << "DPP8"; break;
1109	case ImmTyDppCtrl: OS << "DppCtrl"; break;
1110	case ImmTyDppRowMask: OS << "DppRowMask"; break;
1111	case ImmTyDppBankMask: OS << "DppBankMask"; break;
1112	case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1113	case ImmTyDppFI: OS << "DppFI"; break;
1114	case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1115	case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1116	case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1117	case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1118	case ImmTyDMask: OS << "DMask"; break;
1119	case ImmTyDim: OS << "Dim"; break;
1120	case ImmTyUNorm: OS << "UNorm"; break;
1121	case ImmTyDA: OS << "DA"; break;
1122	case ImmTyR128A16: OS << "R128A16"; break;
1123	case ImmTyA16: OS << "A16"; break;
1124	case ImmTyLWE: OS << "LWE"; break;
1125	case ImmTyOff: OS << "Off"; break;
1126	case ImmTyExpTgt: OS << "ExpTgt"; break;
1127	case ImmTyExpCompr: OS << "ExpCompr"; break;
1128	case ImmTyExpVM: OS << "ExpVM"; break;
1129	case ImmTyHwreg: OS << "Hwreg"; break;
1130	case ImmTySendMsg: OS << "SendMsg"; break;
1131	case ImmTyInterpSlot: OS << "InterpSlot"; break;
1132	case ImmTyInterpAttr: OS << "InterpAttr"; break;
1133	case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1134	case ImmTyOpSel: OS << "OpSel"; break;
1135	case ImmTyOpSelHi: OS << "OpSelHi"; break;
1136	case ImmTyNegLo: OS << "NegLo"; break;
1137	case ImmTyNegHi: OS << "NegHi"; break;
1138	case ImmTySwizzle: OS << "Swizzle"; break;
1139	case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1140	case ImmTyHigh: OS << "High"; break;
1141	case ImmTyBLGP: OS << "BLGP"; break;
1142	case ImmTyCBSZ: OS << "CBSZ"; break;
1143	case ImmTyABID: OS << "ABID"; break;
1144	case ImmTyEndpgm: OS << "Endpgm"; break;
1145	case ImmTyWaitVDST: OS << "WaitVDST"; break;
1146	case ImmTyWaitEXP: OS << "WaitEXP"; break;
1147	case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1148	case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1149	case ImmTyByteSel: OS << "ByteSel" ; break;
1150	case ImmTyBitOp3: OS << "BitOp3"; break;
1151	}
1152	// clang-format on
1153	}
1154
1155	void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1156	switch (Kind) {
1157	case Register:
1158	OS << "<register " << AMDGPUInstPrinter::getRegisterName(Reg: getReg())
1159	<< " mods: " << Reg.Mods << `'>'`;
1160	break;
1161	case Immediate:
1162	OS << `'<'` << getImm();
1163	if (getImmTy() != ImmTyNone) {
1164	OS << " type: "; printImmTy(OS, Type: getImmTy());
1165	}
1166	OS << " mods: " << Imm.Mods << `'>'`;
1167	break;
1168	case Token:
1169	OS << `'\''` << getToken() << `'\''`;
1170	break;
1171	case Expression:
1172	OS << "<expr ";
1173	MAI.printExpr(OS, *Expr);
1174	OS << `'>'`;
1175	break;
1176	}
1177	}
1178
1179	static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1180	int64_t Val, SMLoc Loc,
1181	ImmTy Type = ImmTyNone,
1182	bool IsFPImm = false) {
1183	auto Op = std::make_unique<AMDGPUOperand>(args: Immediate, args&: AsmParser);
1184	Op ->Imm.Val = Val;
1185	Op ->Imm.IsFPImm = IsFPImm;
1186	Op ->Imm.Kind = ImmKindTyNone;
1187	Op ->Imm.Type = Type;
1188	Op ->Imm.Mods = Modifiers ();
1189	Op ->StartLoc = Loc;
1190	Op ->EndLoc = Loc;
1191	return Op;
1192	}
1193
1194	static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1195	StringRef Str, SMLoc Loc,
1196	bool HasExplicitEncodingSize = true) {
1197	auto Res = std::make_unique<AMDGPUOperand>(args: Token, args&: AsmParser);
1198	Res ->Tok.Data = Str.data();
1199	Res ->Tok.Length = Str.size();
1200	Res ->StartLoc = Loc;
1201	Res ->EndLoc = Loc;
1202	return Res;
1203	}
1204
1205	static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1206	MCRegister Reg, SMLoc S, SMLoc E) {
1207	auto Op = std::make_unique<AMDGPUOperand>(args: Register, args&: AsmParser);
1208	Op ->Reg.RegNo = Reg;
1209	Op ->Reg.Mods = Modifiers ();
1210	Op ->StartLoc = S;
1211	Op ->EndLoc = E;
1212	return Op;
1213	}
1214
1215	static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1216	const class MCExpr *Expr, SMLoc S) {
1217	auto Op = std::make_unique<AMDGPUOperand>(args: Expression, args&: AsmParser);
1218	Op ->Expr = Expr;
1219	Op ->StartLoc = S;
1220	Op ->EndLoc = S;
1221	return Op;
1222	}
1223	};
1224
1225	raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1226	OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1227	return OS;
1228	}
1229
1230	//===----------------------------------------------------------------------===//
1231	// AsmParser
1232	//===----------------------------------------------------------------------===//
1233
1234	// Holds info related to the current kernel, e.g. count of SGPRs used.
1235	// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1236	// .amdgpu_hsa_kernel or at EOF.
1237	class KernelScopeInfo {
1238	int SgprIndexUnusedMin = -`1`;
1239	int VgprIndexUnusedMin = -`1`;
1240	int AgprIndexUnusedMin = -`1`;
1241	MCContext Ctx = nullptr*;
1242	MCSubtargetInfo const MSTI = nullptr*;
1243
1244	void usesSgprAt(int i) {
1245	if (i >= SgprIndexUnusedMin) {
1246	SgprIndexUnusedMin = ++i;
1247	if (Ctx) {
1248	MCSymbol* const Sym =
1249	Ctx->getOrCreateSymbol(Name: Twine (".kernel.sgpr_count"));
1250	Sym->setVariableValue(MCConstantExpr::create(Value: SgprIndexUnusedMin, Ctx&: *Ctx));
1251	}
1252	}
1253	}
1254
1255	void usesVgprAt(int i) {
1256	if (i >= VgprIndexUnusedMin) {
1257	VgprIndexUnusedMin = ++i;
1258	if (Ctx) {
1259	MCSymbol* const Sym =
1260	Ctx->getOrCreateSymbol(Name: Twine (".kernel.vgpr_count"));
1261	int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin,
1262	ArgNumVGPR: VgprIndexUnusedMin);
1263	Sym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx));
1264	}
1265	}
1266	}
1267
1268	void usesAgprAt(int i) {
1269	// Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1270	if (!hasMAIInsts(STI: *MSTI))
1271	return;
1272
1273	if (i >= AgprIndexUnusedMin) {
1274	AgprIndexUnusedMin = ++i;
1275	if (Ctx) {
1276	MCSymbol* const Sym =
1277	Ctx->getOrCreateSymbol(Name: Twine (".kernel.agpr_count"));
1278	Sym->setVariableValue(MCConstantExpr::create(Value: AgprIndexUnusedMin, Ctx&: *Ctx));
1279
1280	// Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1281	MCSymbol* const vSym =
1282	Ctx->getOrCreateSymbol(Name: Twine (".kernel.vgpr_count"));
1283	int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin,
1284	ArgNumVGPR: VgprIndexUnusedMin);
1285	vSym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx));
1286	}
1287	}
1288	}
1289
1290	public:
1291	KernelScopeInfo() = default;
1292
1293	void initialize(MCContext &Context) {
1294	Ctx = &Context;
1295	MSTI = Ctx->getSubtargetInfo();
1296
1297	usesSgprAt(i: SgprIndexUnusedMin = -`1`);
1298	usesVgprAt(i: VgprIndexUnusedMin = -`1`);
1299	if (hasMAIInsts(STI: *MSTI)) {
1300	usesAgprAt(i: AgprIndexUnusedMin = -`1`);
1301	}
1302	}
1303
1304	void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1305	unsigned RegWidth) {
1306	switch (RegKind) {
1307	case IS_SGPR:
1308	usesSgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: `32`) - `1`);
1309	break;
1310	case IS_AGPR:
1311	usesAgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: `32`) - `1`);
1312	break;
1313	case IS_VGPR:
1314	usesVgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: `32`) - `1`);
1315	break;
1316	default:
1317	break;
1318	}
1319	}
1320	};
1321
1322	class AMDGPUAsmParser : public MCTargetAsmParser {
1323	MCAsmParser &Parser;
1324
1325	unsigned ForcedEncodingSize = `0`;
1326	bool ForcedDPP = false;
1327	bool ForcedSDWA = false;
1328	KernelScopeInfo KernelScope;
1329
1330	/// @name Auto-generated Match Functions
1331	/// {
1332
1333	#define GET_ASSEMBLER_HEADER
1334	#include "AMDGPUGenAsmMatcher.inc"
1335
1336	/// }
1337
1338	private:
1339	void createConstantSymbol(StringRef Id, int64_t Val);
1340
1341	bool ParseAsAbsoluteExpression(uint32_t &Ret);
1342	bool OutOfRangeError(SMRange Range);
1343	/// Calculate VGPR/SGPR blocks required for given target, reserved
1344	/// registers, and user-specified NextFreeXGPR values.
1345	///
1346	/// \param Features [in] Target features, used for bug corrections.
1347	/// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1348	/// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1349	/// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1350	/// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1351	/// descriptor field, if valid.
1352	/// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1353	/// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1354	/// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1355	/// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1356	/// \param VGPRBlocks [out] Result VGPR block count.
1357	/// \param SGPRBlocks [out] Result SGPR block count.
1358	bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1359	const MCExpr FlatScrUsed, bool* XNACKUsed,
1360	std::optional<bool> EnableWavefrontSize32,
1361	const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1362	const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1363	const MCExpr &VGPRBlocks, const* MCExpr *&SGPRBlocks);
1364	bool ParseDirectiveAMDGCNTarget();
1365	bool ParseDirectiveAMDHSACodeObjectVersion();
1366	bool ParseDirectiveAMDHSAKernel();
1367	bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1368	bool ParseDirectiveAMDKernelCodeT();
1369	// TODO: Possibly make subtargetHasRegister const.
1370	bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1371	bool ParseDirectiveAMDGPUHsaKernel();
1372
1373	bool ParseDirectiveISAVersion();
1374	bool ParseDirectiveHSAMetadata();
1375	bool ParseDirectivePALMetadataBegin();
1376	bool ParseDirectivePALMetadata();
1377	bool ParseDirectiveAMDGPULDS();
1378
1379	/// Common code to parse out a block of text (typically YAML) between start and
1380	/// end directives.
1381	bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1382	const char *AssemblerDirectiveEnd,
1383	std::string &CollectString);
1384
1385	bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1386	RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1387	bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1388	unsigned &RegNum, unsigned &RegWidth,
1389	bool RestoreOnFailure = false);
1390	bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1391	unsigned &RegNum, unsigned &RegWidth,
1392	SmallVectorImpl<AsmToken> &Tokens);
1393	MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1394	unsigned &RegWidth,
1395	SmallVectorImpl<AsmToken> &Tokens);
1396	MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1397	unsigned &RegWidth,
1398	SmallVectorImpl<AsmToken> &Tokens);
1399	MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1400	unsigned &RegWidth,
1401	SmallVectorImpl<AsmToken> &Tokens);
1402	bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1403	MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1404	unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1405
1406	bool isRegister();
1407	bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1408	std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1409	void initializeGprCountSymbol(RegisterKind RegKind);
1410	bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1411	unsigned RegWidth);
1412	void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1413	bool IsAtomic);
1414
1415	public:
1416	enum OperandMode {
1417	OperandMode_Default,
1418	OperandMode_NSA,
1419	};
1420
1421	using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1422
1423	AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1424	const MCInstrInfo &MII,
1425	const MCTargetOptions &Options)
1426	: MCTargetAsmParser (Options, STI, MII), Parser(_Parser) {
1427	MCAsmParserExtension::Initialize(Parser);
1428
1429	if (getFeatureBits().none()) {
1430	// Set default features.
1431	copySTI().ToggleFeature(FS: "southern-islands");
1432	}
1433
1434	FeatureBitset FB = getFeatureBits();
1435	if (!FB [AMDGPU::FeatureWavefrontSize64] &&
1436	!FB [AMDGPU::FeatureWavefrontSize32]) {
1437	// If there is no default wave size it must be a generation before gfx10,
1438	// these have FeatureWavefrontSize64 in their definition already. For
1439	// gfx10+ set wave32 as a default.
1440	copySTI().ToggleFeature(FB: AMDGPU::FeatureWavefrontSize32);
1441	}
1442
1443	setAvailableFeatures(ComputeAvailableFeatures(FB: getFeatureBits()));
1444
1445	AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
1446	if (ISA.Major >= `6` && isHsaAbi(STI: getSTI())) {
1447	createConstantSymbol(Id: ".amdgcn.gfx_generation_number", Val: ISA.Major);
1448	createConstantSymbol(Id: ".amdgcn.gfx_generation_minor", Val: ISA.Minor);
1449	createConstantSymbol(Id: ".amdgcn.gfx_generation_stepping", Val: ISA.Stepping);
1450	} else {
1451	createConstantSymbol(Id: ".option.machine_version_major", Val: ISA.Major);
1452	createConstantSymbol(Id: ".option.machine_version_minor", Val: ISA.Minor);
1453	createConstantSymbol(Id: ".option.machine_version_stepping", Val: ISA.Stepping);
1454	}
1455	if (ISA.Major >= `6` && isHsaAbi(STI: getSTI())) {
1456	initializeGprCountSymbol(RegKind: IS_VGPR);
1457	initializeGprCountSymbol(RegKind: IS_SGPR);
1458	} else
1459	KernelScope.initialize(Context&: getContext());
1460
1461	for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1462	createConstantSymbol(Id: Symbol, Val: Code);
1463
1464	createConstantSymbol(Id: "UC_VERSION_W64_BIT", Val: `0x2000`);
1465	createConstantSymbol(Id: "UC_VERSION_W32_BIT", Val: `0x4000`);
1466	createConstantSymbol(Id: "UC_VERSION_MDP_BIT", Val: `0x8000`);
1467	}
1468
1469	bool hasMIMG_R128() const {
1470	return AMDGPU::hasMIMG_R128(STI: getSTI());
1471	}
1472
1473	bool hasPackedD16() const {
1474	return AMDGPU::hasPackedD16(STI: getSTI());
1475	}
1476
1477	bool hasA16() const { return AMDGPU::hasA16(STI: getSTI()); }
1478
1479	bool hasG16() const { return AMDGPU::hasG16(STI: getSTI()); }
1480
1481	bool hasGDS() const { return AMDGPU::hasGDS(STI: getSTI()); }
1482
1483	bool isSI() const {
1484	return AMDGPU::isSI(STI: getSTI());
1485	}
1486
1487	bool isCI() const {
1488	return AMDGPU::isCI(STI: getSTI());
1489	}
1490
1491	bool isVI() const {
1492	return AMDGPU::isVI(STI: getSTI());
1493	}
1494
1495	bool isGFX9() const {
1496	return AMDGPU::isGFX9(STI: getSTI());
1497	}
1498
1499	// TODO: isGFX90A is also true for GFX940. We need to clean it.
1500	bool isGFX90A() const {
1501	return AMDGPU::isGFX90A(STI: getSTI());
1502	}
1503
1504	bool isGFX940() const {
1505	return AMDGPU::isGFX940(STI: getSTI());
1506	}
1507
1508	bool isGFX9Plus() const {
1509	return AMDGPU::isGFX9Plus(STI: getSTI());
1510	}
1511
1512	bool isGFX10() const {
1513	return AMDGPU::isGFX10(STI: getSTI());
1514	}
1515
1516	bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(STI: getSTI()); }
1517
1518	bool isGFX11() const {
1519	return AMDGPU::isGFX11(STI: getSTI());
1520	}
1521
1522	bool isGFX11Plus() const {
1523	return AMDGPU::isGFX11Plus(STI: getSTI());
1524	}
1525
1526	bool isGFX12() const { return AMDGPU::isGFX12(STI: getSTI()); }
1527
1528	bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(STI: getSTI()); }
1529
1530	bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(STI: getSTI()); }
1531
1532	bool isGFX10_BEncoding() const {
1533	return AMDGPU::isGFX10_BEncoding(STI: getSTI());
1534	}
1535
1536	bool hasInv2PiInlineImm() const {
1537	return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1538	}
1539
1540	bool hasFlatOffsets() const {
1541	return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1542	}
1543
1544	bool hasTrue16Insts() const {
1545	return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1546	}
1547
1548	bool hasArchitectedFlatScratch() const {
1549	return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1550	}
1551
1552	bool hasSGPR102_SGPR103() const {
1553	return !isVI() && !isGFX9();
1554	}
1555
1556	bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1557
1558	bool hasIntClamp() const {
1559	return getFeatureBits()[AMDGPU::FeatureIntClamp];
1560	}
1561
1562	bool hasPartialNSAEncoding() const {
1563	return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1564	}
1565
1566	unsigned getNSAMaxSize(bool HasSampler = false) const {
1567	return AMDGPU::getNSAMaxSize(STI: getSTI(), HasSampler);
1568	}
1569
1570	unsigned getMaxNumUserSGPRs() const {
1571	return AMDGPU::getMaxNumUserSGPRs(STI: getSTI());
1572	}
1573
1574	bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(STI: getSTI()); }
1575
1576	AMDGPUTargetStreamer &getTargetStreamer() {
1577	MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1578	return static_cast<AMDGPUTargetStreamer &>(TS);
1579	}
1580
1581	const MCRegisterInfo getMRI() const* {
1582	// We need this const_cast because for some reason getContext() is not const
1583	// in MCAsmParser.
1584	return const_cast<AMDGPUAsmParser>(this*)->getContext().getRegisterInfo();
1585	}
1586
1587	const MCInstrInfo getMII() const* {
1588	return &MII;
1589	}
1590
1591	const FeatureBitset &getFeatureBits() const {
1592	return getSTI().getFeatureBits();
1593	}
1594
1595	void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1596	void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1597	void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1598
1599	unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1600	bool isForcedVOP3() const { return ForcedEncodingSize == `64`; }
1601	bool isForcedDPP() const { return ForcedDPP; }
1602	bool isForcedSDWA() const { return ForcedSDWA; }
1603	ArrayRef<unsigned> getMatchedVariants() const;
1604	StringRef getMatchedVariantName() const;
1605
1606	std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1607	bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1608	bool RestoreOnFailure);
1609	bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1610	ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1611	SMLoc &EndLoc) override;
1612	unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1613	unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1614	unsigned Kind) override;
1615	bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1616	OperandVector &Operands, MCStreamer &Out,
1617	uint64_t &ErrorInfo,
1618	bool MatchingInlineAsm) override;
1619	bool ParseDirective(AsmToken DirectiveID) override;
1620	ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1621	OperandMode Mode = OperandMode_Default);
1622	StringRef parseMnemonicSuffix(StringRef Name);
1623	bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1624	SMLoc NameLoc, OperandVector &Operands) override;
1625	//bool ProcessInstruction(MCInst &Inst);
1626
1627	ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1628
1629	ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1630
1631	ParseStatus
1632	parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1633	AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1634	std::function<bool(int64_t &)> ConvertResult = nullptr);
1635
1636	ParseStatus parseOperandArrayWithPrefix(
1637	const char *Prefix, OperandVector &Operands,
1638	AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1639	bool (ConvertResult)(int64_t &) = nullptr*);
1640
1641	ParseStatus
1642	parseNamedBit(StringRef Name, OperandVector &Operands,
1643	AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1644	unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1645	ParseStatus parseCPol(OperandVector &Operands);
1646	ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1647	ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1648	ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1649	SMLoc &StringLoc);
1650	ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1651	StringRef Name,
1652	ArrayRef<const char *> Ids,
1653	int64_t &IntVal);
1654	ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1655	StringRef Name,
1656	ArrayRef<const char *> Ids,
1657	AMDGPUOperand::ImmTy Type);
1658
1659	bool isModifier();
1660	bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1661	bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1662	bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1663	bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1664	bool parseSP3NegModifier();
1665	ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1666	bool HasLit = false);
1667	ParseStatus parseReg(OperandVector &Operands);
1668	ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1669	bool HasLit = false);
1670	ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1671	bool AllowImm = true);
1672	ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1673	bool AllowImm = true);
1674	ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1675	ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1676	ParseStatus parseVReg32OrOff(OperandVector &Operands);
1677	ParseStatus tryParseIndexKey(OperandVector &Operands,
1678	AMDGPUOperand::ImmTy ImmTy);
1679	ParseStatus parseIndexKey8bit(OperandVector &Operands);
1680	ParseStatus parseIndexKey16bit(OperandVector &Operands);
1681
1682	ParseStatus parseDfmtNfmt(int64_t &Format);
1683	ParseStatus parseUfmt(int64_t &Format);
1684	ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1685	int64_t &Format);
1686	ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1687	int64_t &Format);
1688	ParseStatus parseFORMAT(OperandVector &Operands);
1689	ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1690	ParseStatus parseNumericFormat(int64_t &Format);
1691	ParseStatus parseFlatOffset(OperandVector &Operands);
1692	ParseStatus parseR128A16(OperandVector &Operands);
1693	ParseStatus parseBLGP(OperandVector &Operands);
1694	bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1695	bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1696
1697	void cvtExp(MCInst &Inst, const OperandVector &Operands);
1698
1699	bool parseCnt(int64_t &IntVal);
1700	ParseStatus parseSWaitCnt(OperandVector &Operands);
1701
1702	bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1703	void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1704	ParseStatus parseDepCtr(OperandVector &Operands);
1705
1706	bool parseDelay(int64_t &Delay);
1707	ParseStatus parseSDelayALU(OperandVector &Operands);
1708
1709	ParseStatus parseHwreg(OperandVector &Operands);
1710
1711	private:
1712	struct OperandInfoTy {
1713	SMLoc Loc;
1714	int64_t Val;
1715	bool IsSymbolic = false;
1716	bool IsDefined = false;
1717
1718	OperandInfoTy(int64_t Val) : Val(Val) {}
1719	};
1720
1721	struct StructuredOpField : OperandInfoTy {
1722	StringLiteral Id;
1723	StringLiteral Desc;
1724	unsigned Width;
1725	bool IsDefined = false;
1726
1727	StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width,
1728	int64_t Default)
1729	: OperandInfoTy (Default), Id (Id), Desc (Desc), Width(Width) {}
1730	virtual ~StructuredOpField() = default;
1731
1732	bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1733	Parser.Error(L: Loc, Msg: "invalid " + Desc + ": " + Err);
1734	return false;
1735	}
1736
1737	virtual bool validate(AMDGPUAsmParser &Parser) const {
1738	if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1739	return Error(Parser, Err: "not supported on this GPU");
1740	if (!isUIntN(N: Width, x: Val))
1741	return Error(Parser, Err: "only " + Twine (Width) + "-bit values are legal");
1742	return true;
1743	}
1744	};
1745
1746	ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1747	bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1748
1749	bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1750	bool validateSendMsg(const OperandInfoTy &Msg,
1751	const OperandInfoTy &Op,
1752	const OperandInfoTy &Stream);
1753
1754	ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1755	OperandInfoTy &Width);
1756
1757	SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1758	SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1759	SMLoc getBLGPLoc(const OperandVector &Operands) const;
1760
1761	SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1762	const OperandVector &Operands) const;
1763	SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1764	SMLoc getRegLoc(MCRegister Reg, const OperandVector &Operands) const;
1765	SMLoc getLitLoc(const OperandVector &Operands,
1766	bool SearchMandatoryLiterals = false) const;
1767	SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1768	SMLoc getConstLoc(const OperandVector &Operands) const;
1769	SMLoc getInstLoc(const OperandVector &Operands) const;
1770
1771	bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1772	bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1773	bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1774	bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1775	bool validateSOPLiteral(const MCInst &Inst) const;
1776	bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1777	bool validateVOPDRegBankConstraints(const MCInst &Inst,
1778	const OperandVector &Operands);
1779	bool validateIntClampSupported(const MCInst &Inst);
1780	bool validateMIMGAtomicDMask(const MCInst &Inst);
1781	bool validateMIMGGatherDMask(const MCInst &Inst);
1782	bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1783	bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1784	bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1785	bool validateMIMGD16(const MCInst &Inst);
1786	bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1787	bool validateTensorR128(const MCInst &Inst);
1788	bool validateMIMGMSAA(const MCInst &Inst);
1789	bool validateOpSel(const MCInst &Inst);
1790	bool validateTrue16OpSel(const MCInst &Inst);
1791	bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1792	bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1793	bool validateVccOperand(MCRegister Reg) const;
1794	bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1795	bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1796	bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1797	bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1798	bool validateAGPRLdSt(const MCInst &Inst) const;
1799	bool validateVGPRAlign(const MCInst &Inst) const;
1800	bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1801	bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1802	bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1803	bool validateDivScale(const MCInst &Inst);
1804	bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1805	bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1806	const SMLoc &IDLoc);
1807	bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1808	const unsigned CPol);
1809	bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1810	std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1811	unsigned getConstantBusLimit(unsigned Opcode) const;
1812	bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1813	bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1814	unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1815
1816	bool isSupportedMnemo(StringRef Mnemo,
1817	const FeatureBitset &FBS);
1818	bool isSupportedMnemo(StringRef Mnemo,
1819	const FeatureBitset &FBS,
1820	ArrayRef<unsigned> Variants);
1821	bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1822
1823	bool isId(const StringRef Id) const;
1824	bool isId(const AsmToken &Token, const StringRef Id) const;
1825	bool isToken(const AsmToken::TokenKind Kind) const;
1826	StringRef getId() const;
1827	bool trySkipId(const StringRef Id);
1828	bool trySkipId(const StringRef Pref, const StringRef Id);
1829	bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1830	bool trySkipToken(const AsmToken::TokenKind Kind);
1831	bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1832	bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1833	bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1834
1835	void peekTokens(MutableArrayRef<AsmToken> Tokens);
1836	AsmToken::TokenKind getTokenKind() const;
1837	bool parseExpr(int64_t &Imm, StringRef Expected = "");
1838	bool parseExpr(OperandVector &Operands);
1839	StringRef getTokenStr() const;
1840	AsmToken peekToken(bool ShouldSkipSpace = true);
1841	AsmToken getToken() const;
1842	SMLoc getLoc() const;
1843	void lex();
1844
1845	public:
1846	void onBeginOfFile() override;
1847	bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1848
1849	ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1850
1851	ParseStatus parseExpTgt(OperandVector &Operands);
1852	ParseStatus parseSendMsg(OperandVector &Operands);
1853	ParseStatus parseInterpSlot(OperandVector &Operands);
1854	ParseStatus parseInterpAttr(OperandVector &Operands);
1855	ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1856	ParseStatus parseBoolReg(OperandVector &Operands);
1857
1858	bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1859	const unsigned MaxVal, const Twine &ErrMsg,
1860	SMLoc &Loc);
1861	bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1862	const unsigned MinVal,
1863	const unsigned MaxVal,
1864	const StringRef ErrMsg);
1865	ParseStatus parseSwizzle(OperandVector &Operands);
1866	bool parseSwizzleOffset(int64_t &Imm);
1867	bool parseSwizzleMacro(int64_t &Imm);
1868	bool parseSwizzleQuadPerm(int64_t &Imm);
1869	bool parseSwizzleBitmaskPerm(int64_t &Imm);
1870	bool parseSwizzleBroadcast(int64_t &Imm);
1871	bool parseSwizzleSwap(int64_t &Imm);
1872	bool parseSwizzleReverse(int64_t &Imm);
1873	bool parseSwizzleFFT(int64_t &Imm);
1874	bool parseSwizzleRotate(int64_t &Imm);
1875
1876	ParseStatus parseGPRIdxMode(OperandVector &Operands);
1877	int64_t parseGPRIdxMacro();
1878
1879	void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: false); }
1880	void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: true); }
1881
1882	ParseStatus parseOModSI(OperandVector &Operands);
1883
1884	void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1885	OptionalImmIndexMap &OptionalIdx);
1886	void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1887	void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1888	void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1889	void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1890	void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1891
1892	void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1893	void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1894	OptionalImmIndexMap &OptionalIdx);
1895	void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1896	OptionalImmIndexMap &OptionalIdx);
1897
1898	void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1899	void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1900
1901	bool parseDimId(unsigned &Encoding);
1902	ParseStatus parseDim(OperandVector &Operands);
1903	bool convertDppBoundCtrl(int64_t &BoundCtrl);
1904	ParseStatus parseDPP8(OperandVector &Operands);
1905	ParseStatus parseDPPCtrl(OperandVector &Operands);
1906	bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1907	int64_t parseDPPCtrlSel(StringRef Ctrl);
1908	int64_t parseDPPCtrlPerm();
1909	void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1910	void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1911	cvtDPP(Inst, Operands, IsDPP8: true);
1912	}
1913	void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1914	bool IsDPP8 = false);
1915	void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1916	cvtVOP3DPP(Inst, Operands, IsDPP8: true);
1917	}
1918
1919	ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1920	AMDGPUOperand::ImmTy Type);
1921	ParseStatus parseSDWADstUnused(OperandVector &Operands);
1922	void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1923	void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1924	void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1925	void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1926	void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1927	void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1928	uint64_t BasicInstType,
1929	bool SkipDstVcc = false,
1930	bool SkipSrcVcc = false);
1931
1932	ParseStatus parseEndpgm(OperandVector &Operands);
1933
1934	ParseStatus parseVOPD(OperandVector &Operands);
1935	};
1936
1937	} // end anonymous namespace
1938
1939	// May be called with integer type with equivalent bitwidth.
1940	static const fltSemantics getFltSemantics(unsigned* Size) {
1941	switch (Size) {
1942	case `4`:
1943	return &APFloat::IEEEsingle();
1944	case `8`:
1945	return &APFloat::IEEEdouble();
1946	case `2`:
1947	return &APFloat::IEEEhalf();
1948	default:
1949	llvm_unreachable("unsupported fp type");
1950	}
1951	}
1952
1953	static const fltSemantics *getFltSemantics(MVT VT) {
1954	return getFltSemantics(Size: VT.getSizeInBits() / `8`);
1955	}
1956
1957	static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1958	switch (OperandType) {
1959	// When floating-point immediate is used as operand of type i16, the 32-bit
1960	// representation of the constant truncated to the 16 LSBs should be used.
1961	case AMDGPU::OPERAND_REG_IMM_INT16:
1962	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1963	case AMDGPU::OPERAND_REG_IMM_INT32:
1964	case AMDGPU::OPERAND_REG_IMM_FP32:
1965	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1966	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1967	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1968	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1969	case AMDGPU::OPERAND_REG_IMM_V2FP32:
1970	case AMDGPU::OPERAND_REG_IMM_V2INT32:
1971	case AMDGPU::OPERAND_REG_IMM_V2INT16:
1972	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1973	case AMDGPU::OPERAND_KIMM32:
1974	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1975	return &APFloat::IEEEsingle();
1976	case AMDGPU::OPERAND_REG_IMM_INT64:
1977	case AMDGPU::OPERAND_REG_IMM_FP64:
1978	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1979	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1980	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1981	return &APFloat::IEEEdouble();
1982	case AMDGPU::OPERAND_REG_IMM_FP16:
1983	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1984	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1985	case AMDGPU::OPERAND_REG_IMM_V2FP16:
1986	case AMDGPU::OPERAND_KIMM16:
1987	return &APFloat::IEEEhalf();
1988	case AMDGPU::OPERAND_REG_IMM_BF16:
1989	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1990	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1991	case AMDGPU::OPERAND_REG_IMM_V2BF16:
1992	return &APFloat::BFloat();
1993	default:
1994	llvm_unreachable("unsupported fp type");
1995	}
1996	}
1997
1998	//===----------------------------------------------------------------------===//
1999	// Operand
2000	//===----------------------------------------------------------------------===//
2001
2002	static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2003	bool Lost;
2004
2005	// Convert literal to single precision
2006	APFloat::opStatus Status = FPLiteral.convert(ToSemantics: *getFltSemantics(VT),
2007	RM: APFloat::rmNearestTiesToEven,
2008	losesInfo: &Lost);
2009	// We allow precision lost but not overflow or underflow
2010	if (Status != APFloat::opOK &&
2011	Lost &&
2012	((Status & APFloat::opOverflow) != `0` \|\|
2013	(Status & APFloat::opUnderflow) != `0`)) {
2014	return false;
2015	}
2016
2017	return true;
2018	}
2019
2020	static bool isSafeTruncation(int64_t Val, unsigned Size) {
2021	return isUIntN(N: Size, x: Val) \|\| isIntN(N: Size, x: Val);
2022	}
2023
2024	static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2025	if (VT.getScalarType() == MVT::i16)
2026	return isInlinableLiteral32(Literal: Val, HasInv2Pi);
2027
2028	if (VT.getScalarType() == MVT::f16)
2029	return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi);
2030
2031	assert(VT.getScalarType() == MVT::bf16);
2032
2033	return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi);
2034	}
2035
2036	bool AMDGPUOperand::isInlinableImm(MVT type) const {
2037
2038	// This is a hack to enable named inline values like
2039	// shared_base with both 32-bit and 64-bit operands.
2040	// Note that these values are defined as
2041	// 32-bit operands only.
2042	if (isInlineValue()) {
2043	return true;
2044	}
2045
2046	if (!isImmTy(ImmT: ImmTyNone)) {
2047	// Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2048	return false;
2049	}
2050	// TODO: We should avoid using host float here. It would be better to
2051	// check the float bit values which is what a few other places do.
2052	// We've had bot failures before due to weird NaN support on mips hosts.
2053
2054	APInt Literal(`64`, Imm.Val);
2055
2056	if (Imm.IsFPImm) { // We got fp literal token
2057	if (type == MVT::f64 \|\| type == MVT::i64) { // Expected 64-bit operand
2058	return AMDGPU::isInlinableLiteral64(Literal: Imm.Val,
2059	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2060	}
2061
2062	APFloat FPLiteral(APFloat::IEEEdouble(), APInt (`64`, Imm.Val));
2063	if (!canLosslesslyConvertToFPType(FPLiteral, VT: type))
2064	return false;
2065
2066	if (type.getScalarSizeInBits() == `16`) {
2067	bool Lost = false;
2068	switch (type.getScalarType().SimpleTy) {
2069	default:
2070	llvm_unreachable("unknown 16-bit type");
2071	case MVT::bf16:
2072	FPLiteral.convert(ToSemantics: APFloatBase::BFloat(), RM: APFloat::rmNearestTiesToEven,
2073	losesInfo: &Lost);
2074	break;
2075	case MVT::f16:
2076	FPLiteral.convert(ToSemantics: APFloatBase::IEEEhalf(), RM: APFloat::rmNearestTiesToEven,
2077	losesInfo: &Lost);
2078	break;
2079	case MVT::i16:
2080	FPLiteral.convert(ToSemantics: APFloatBase::IEEEsingle(),
2081	RM: APFloat::rmNearestTiesToEven, losesInfo: &Lost);
2082	break;
2083	}
2084	// We need to use 32-bit representation here because when a floating-point
2085	// inline constant is used as an i16 operand, its 32-bit representation
2086	// representation will be used. We will need the 32-bit value to check if
2087	// it is FP inline constant.
2088	uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2089	return isInlineableLiteralOp16(Val: ImmVal, VT: type,
2090	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2091	}
2092
2093	// Check if single precision literal is inlinable
2094	return AMDGPU::isInlinableLiteral32(
2095	Literal: static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2096	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2097	}
2098
2099	// We got int literal token.
2100	if (type == MVT::f64 \|\| type == MVT::i64) { // Expected 64-bit operand
2101	return AMDGPU::isInlinableLiteral64(Literal: Imm.Val,
2102	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2103	}
2104
2105	if (!isSafeTruncation(Val: Imm.Val, Size: type.getScalarSizeInBits())) {
2106	return false;
2107	}
2108
2109	if (type.getScalarSizeInBits() == `16`) {
2110	return isInlineableLiteralOp16(
2111	Val: static_cast<int16_t>(Literal.getLoBits(numBits: `16`).getSExtValue()),
2112	VT: type, HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2113	}
2114
2115	return AMDGPU::isInlinableLiteral32(
2116	Literal: static_cast<int32_t>(Literal.getLoBits(numBits: `32`).getZExtValue()),
2117	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2118	}
2119
2120	bool AMDGPUOperand::isLiteralImm(MVT type) const {
2121	// Check that this immediate can be added as literal
2122	if (!isImmTy(ImmT: ImmTyNone)) {
2123	return false;
2124	}
2125
2126	if (!Imm.IsFPImm) {
2127	// We got int literal token.
2128
2129	if (type == MVT::f64 && hasFPModifiers()) {
2130	// Cannot apply fp modifiers to int literals preserving the same semantics
2131	// for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2132	// disable these cases.
2133	return false;
2134	}
2135
2136	unsigned Size = type.getSizeInBits();
2137	if (Size == `64`)
2138	Size = `32`;
2139
2140	// FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2141	// types.
2142	return isSafeTruncation(Val: Imm.Val, Size);
2143	}
2144
2145	// We got fp literal token
2146	if (type == MVT::f64) { // Expected 64-bit fp operand
2147	// We would set low 64-bits of literal to zeroes but we accept this literals
2148	return true;
2149	}
2150
2151	if (type == MVT::i64) { // Expected 64-bit int operand
2152	// We don't allow fp literals in 64-bit integer instructions. It is
2153	// unclear how we should encode them.
2154	return false;
2155	}
2156
2157	// We allow fp literals with f16x2 operands assuming that the specified
2158	// literal goes into the lower half and the upper half is zero. We also
2159	// require that the literal may be losslessly converted to f16.
2160	//
2161	// For i16x2 operands, we assume that the specified literal is encoded as a
2162	// single-precision float. This is pretty odd, but it matches SP3 and what
2163	// happens in hardware.
2164	MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2165	: (type == MVT::v2i16) ? MVT::f32
2166	: (type == MVT::v2f32) ? MVT::f32
2167	: type;
2168
2169	APFloat FPLiteral(APFloat::IEEEdouble(), APInt (`64`, Imm.Val));
2170	return canLosslesslyConvertToFPType(FPLiteral, VT: ExpectedType);
2171	}
2172
2173	bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2174	return isRegKind() && AsmParser->getMRI()->getRegClass(i: RCID).contains(Reg: getReg());
2175	}
2176
2177	bool AMDGPUOperand::isVRegWithInputMods() const {
2178	return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) \|\|
2179	// GFX90A allows DPP on 64-bit operands.
2180	(isRegClass(RCID: AMDGPU::VReg_64RegClassID) &&
2181	AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2182	}
2183
2184	template <bool IsFake16>
2185	bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2186	return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2187	: AMDGPU::VGPR_16_Lo128RegClassID);
2188	}
2189
2190	template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2191	return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32RegClassID
2192	: AMDGPU::VGPR_16RegClassID);
2193	}
2194
2195	bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2196	if (AsmParser->isVI())
2197	return isVReg32();
2198	if (AsmParser->isGFX9Plus())
2199	return isRegClass(RCID: AMDGPU::VS_32RegClassID) \|\| isInlinableImm(type);
2200	return false;
2201	}
2202
2203	bool AMDGPUOperand::isSDWAFP16Operand() const {
2204	return isSDWAOperand(type: MVT::f16);
2205	}
2206
2207	bool AMDGPUOperand::isSDWAFP32Operand() const {
2208	return isSDWAOperand(type: MVT::f32);
2209	}
2210
2211	bool AMDGPUOperand::isSDWAInt16Operand() const {
2212	return isSDWAOperand(type: MVT::i16);
2213	}
2214
2215	bool AMDGPUOperand::isSDWAInt32Operand() const {
2216	return isSDWAOperand(type: MVT::i32);
2217	}
2218
2219	bool AMDGPUOperand::isBoolReg() const {
2220	auto FB = AsmParser->getFeatureBits();
2221	return isReg() && ((FB [AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) \|\|
2222	(FB [AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
2223	}
2224
2225	uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2226	{
2227	assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2228	assert(Size == `2` \|\| Size == `4` \|\| Size == `8`);
2229
2230	const uint64_t FpSignMask = (`1ULL` << (Size * `8` - `1`));
2231
2232	if (Imm.Mods.Abs) {
2233	Val &= ~FpSignMask;
2234	}
2235	if (Imm.Mods.Neg) {
2236	Val ^= FpSignMask;
2237	}
2238
2239	return Val;
2240	}
2241
2242	void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2243	if (isExpr()) {
2244	Inst.addOperand(Op: MCOperand::createExpr(Val: Expr));
2245	return;
2246	}
2247
2248	if (AMDGPU::isSISrcOperand(Desc: AsmParser->getMII()->get(Opcode: Inst.getOpcode()),
2249	OpNo: Inst.getNumOperands())) {
2250	addLiteralImmOperand(Inst, Val: Imm.Val,
2251	ApplyModifiers: ApplyModifiers &
2252	isImmTy(ImmT: ImmTyNone) && Imm.Mods.hasFPModifiers());
2253	} else {
2254	assert(!isImmTy(ImmTyNone) \|\| !hasModifiers());
2255	Inst.addOperand(Op: MCOperand::createImm(Val: Imm.Val));
2256	setImmKindNone();
2257	}
2258	}
2259
2260	void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2261	const auto& InstDesc = AsmParser->getMII()->get(Opcode: Inst.getOpcode());
2262	auto OpNum = Inst.getNumOperands();
2263	// Check that this operand accepts literals
2264	assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2265
2266	if (ApplyModifiers) {
2267	assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2268	const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(Desc: InstDesc, OpNo: OpNum);
2269	Val = applyInputFPModifiers(Val, Size);
2270	}
2271
2272	APInt Literal(`64`, Val);
2273	uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2274
2275	if (Imm.IsFPImm) { // We got fp literal token
2276	switch (OpTy) {
2277	case AMDGPU::OPERAND_REG_IMM_INT64:
2278	case AMDGPU::OPERAND_REG_IMM_FP64:
2279	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2280	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2281	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2282	if (AMDGPU::isInlinableLiteral64(Literal: Literal.getZExtValue(),
2283	HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2284	Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getZExtValue()));
2285	setImmKindConst();
2286	return;
2287	}
2288
2289	// Non-inlineable
2290	if (AMDGPU::isSISrcFPOperand(Desc: InstDesc, OpNo: OpNum)) { // Expected 64-bit fp operand
2291	// For fp operands we check if low 32 bits are zeros
2292	if (Literal.getLoBits(numBits: `32`) != `0`) {
2293	const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(L: Inst.getLoc(),
2294	Msg: "Can't encode literal as exact 64-bit floating-point operand. "
2295	"Low 32-bits will be set to zero");
2296	Val &= `0xffffffff00000000u`;
2297	}
2298
2299	Inst.addOperand(Op: MCOperand::createImm(Val));
2300	setImmKindLiteral();
2301	return;
2302	}
2303
2304	// We don't allow fp literals in 64-bit integer instructions. It is
2305	// unclear how we should encode them. This case should be checked earlier
2306	// in predicate methods (isLiteralImm())
2307	llvm_unreachable("fp literal in 64-bit integer instruction.");
2308
2309	case AMDGPU::OPERAND_REG_IMM_BF16:
2310	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2311	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2312	case AMDGPU::OPERAND_REG_IMM_V2BF16:
2313	if (AsmParser->hasInv2PiInlineImm() && Literal == `0x3fc45f306725feed`) {
2314	// This is the 1/(2pi) which is going to be truncated to bf16 with the*
2315	// loss of precision. The constant represents ideomatic fp32 value of
2316	// 1/(2pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16*
2317	// bits. Prevent rounding below.
2318	Inst.addOperand(Op: MCOperand::createImm(Val: `0x3e22`));
2319	setImmKindLiteral();
2320	return;
2321	}
2322	[[fallthrough]];
2323
2324	case AMDGPU::OPERAND_REG_IMM_INT32:
2325	case AMDGPU::OPERAND_REG_IMM_FP32:
2326	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2327	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2328	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2329	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2330	case AMDGPU::OPERAND_REG_IMM_INT16:
2331	case AMDGPU::OPERAND_REG_IMM_FP16:
2332	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2333	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2334	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2335	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2336	case AMDGPU::OPERAND_REG_IMM_V2INT16:
2337	case AMDGPU::OPERAND_REG_IMM_V2FP16:
2338	case AMDGPU::OPERAND_REG_IMM_V2FP32:
2339	case AMDGPU::OPERAND_REG_IMM_V2INT32:
2340	case AMDGPU::OPERAND_KIMM32:
2341	case AMDGPU::OPERAND_KIMM16:
2342	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2343	bool lost;
2344	APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2345	// Convert literal to single precision
2346	FPLiteral.convert(ToSemantics: *getOpFltSemantics(OperandType: OpTy),
2347	RM: APFloat::rmNearestTiesToEven, losesInfo: &lost);
2348	// We allow precision lost but not overflow or underflow. This should be
2349	// checked earlier in isLiteralImm()
2350
2351	uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2352	Inst.addOperand(Op: MCOperand::createImm(Val: ImmVal));
2353	if (OpTy == AMDGPU::OPERAND_KIMM32 \|\| OpTy == AMDGPU::OPERAND_KIMM16) {
2354	setImmKindMandatoryLiteral();
2355	} else {
2356	setImmKindLiteral();
2357	}
2358	return;
2359	}
2360	default:
2361	llvm_unreachable("invalid operand size");
2362	}
2363
2364	return;
2365	}
2366
2367	// We got int literal token.
2368	// Only sign extend inline immediates.
2369	switch (OpTy) {
2370	case AMDGPU::OPERAND_REG_IMM_INT32:
2371	case AMDGPU::OPERAND_REG_IMM_FP32:
2372	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2373	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2374	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2375	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2376	case AMDGPU::OPERAND_REG_IMM_V2INT16:
2377	case AMDGPU::OPERAND_REG_IMM_V2BF16:
2378	case AMDGPU::OPERAND_REG_IMM_V2FP16:
2379	case AMDGPU::OPERAND_REG_IMM_V2FP32:
2380	case AMDGPU::OPERAND_REG_IMM_V2INT32:
2381	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2382	if (isSafeTruncation(Val, Size: `32`) &&
2383	AMDGPU::isInlinableLiteral32(Literal: static_cast<int32_t>(Val),
2384	HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2385	Inst.addOperand(Op: MCOperand::createImm(Val));
2386	setImmKindConst();
2387	return;
2388	}
2389
2390	Inst.addOperand(Op: MCOperand::createImm(Val: Lo_32(Value: Val)));
2391	setImmKindLiteral();
2392	return;
2393
2394	case AMDGPU::OPERAND_REG_IMM_INT64:
2395	case AMDGPU::OPERAND_REG_IMM_FP64:
2396	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2397	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2398	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2399	if (AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2400	Inst.addOperand(Op: MCOperand::createImm(Val));
2401	setImmKindConst();
2402	return;
2403	}
2404
2405	Val = AMDGPU::isSISrcFPOperand(Desc: InstDesc, OpNo: OpNum) ? (uint64_t)Val << `32`
2406	: Lo_32(Value: Val);
2407
2408	Inst.addOperand(Op: MCOperand::createImm(Val));
2409	setImmKindLiteral();
2410	return;
2411
2412	case AMDGPU::OPERAND_REG_IMM_INT16:
2413	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2414	if (isSafeTruncation(Val, Size: `16`) &&
2415	AMDGPU::isInlinableIntLiteral(Literal: static_cast<int16_t>(Val))) {
2416	Inst.addOperand(Op: MCOperand::createImm(Val: Lo_32(Value: Val)));
2417	setImmKindConst();
2418	return;
2419	}
2420
2421	Inst.addOperand(Op: MCOperand::createImm(Val: Val & `0xffff`));
2422	setImmKindLiteral();
2423	return;
2424
2425	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2426	case AMDGPU::OPERAND_REG_IMM_FP16:
2427	if (isSafeTruncation(Val, Size: `16`) &&
2428	AMDGPU::isInlinableLiteralFP16(Literal: static_cast<int16_t>(Val),
2429	HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2430	Inst.addOperand(Op: MCOperand::createImm(Val));
2431	setImmKindConst();
2432	return;
2433	}
2434
2435	Inst.addOperand(Op: MCOperand::createImm(Val: Val & `0xffff`));
2436	setImmKindLiteral();
2437	return;
2438
2439	case AMDGPU::OPERAND_REG_IMM_BF16:
2440	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2441	if (isSafeTruncation(Val, Size: `16`) &&
2442	AMDGPU::isInlinableLiteralBF16(Literal: static_cast<int16_t>(Val),
2443	HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2444	Inst.addOperand(Op: MCOperand::createImm(Val));
2445	setImmKindConst();
2446	return;
2447	}
2448
2449	Inst.addOperand(Op: MCOperand::createImm(Val: Val & `0xffff`));
2450	setImmKindLiteral();
2451	return;
2452
2453	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: {
2454	assert(isSafeTruncation(Val, `16`));
2455	assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
2456	Inst.addOperand(Op: MCOperand::createImm(Val));
2457	return;
2458	}
2459	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
2460	assert(isSafeTruncation(Val, `16`));
2461	assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
2462	AsmParser->hasInv2PiInlineImm()));
2463
2464	Inst.addOperand(Op: MCOperand::createImm(Val));
2465	return;
2466	}
2467
2468	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: {
2469	assert(isSafeTruncation(Val, `16`));
2470	assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val),
2471	AsmParser->hasInv2PiInlineImm()));
2472
2473	Inst.addOperand(Op: MCOperand::createImm(Val));
2474	return;
2475	}
2476
2477	case AMDGPU::OPERAND_KIMM32:
2478	Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getLoBits(numBits: `32`).getZExtValue()));
2479	setImmKindMandatoryLiteral();
2480	return;
2481	case AMDGPU::OPERAND_KIMM16:
2482	Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getLoBits(numBits: `16`).getZExtValue()));
2483	setImmKindMandatoryLiteral();
2484	return;
2485	default:
2486	llvm_unreachable("invalid operand size");
2487	}
2488	}
2489
2490	void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2491	Inst.addOperand(Op: MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: getReg(), STI: AsmParser->getSTI())));
2492	}
2493
2494	bool AMDGPUOperand::isInlineValue() const {
2495	return isRegKind() && ::isInlineValue(Reg: getReg());
2496	}
2497
2498	//===----------------------------------------------------------------------===//
2499	// AsmParser
2500	//===----------------------------------------------------------------------===//
2501
2502	void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2503	// TODO: make those pre-defined variables read-only.
2504	// Currently there is none suitable machinery in the core llvm-mc for this.
2505	// MCSymbol::isRedefinable is intended for another purpose, and
2506	// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2507	MCContext &Ctx = getContext();
2508	MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id);
2509	Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx));
2510	}
2511
2512	static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2513	if (Is == IS_VGPR) {
2514	switch (RegWidth) {
2515	default: return -`1`;
2516	case `32`:
2517	return AMDGPU::VGPR_32RegClassID;
2518	case `64`:
2519	return AMDGPU::VReg_64RegClassID;
2520	case `96`:
2521	return AMDGPU::VReg_96RegClassID;
2522	case `128`:
2523	return AMDGPU::VReg_128RegClassID;
2524	case `160`:
2525	return AMDGPU::VReg_160RegClassID;
2526	case `192`:
2527	return AMDGPU::VReg_192RegClassID;
2528	case `224`:
2529	return AMDGPU::VReg_224RegClassID;
2530	case `256`:
2531	return AMDGPU::VReg_256RegClassID;
2532	case `288`:
2533	return AMDGPU::VReg_288RegClassID;
2534	case `320`:
2535	return AMDGPU::VReg_320RegClassID;
2536	case `352`:
2537	return AMDGPU::VReg_352RegClassID;
2538	case `384`:
2539	return AMDGPU::VReg_384RegClassID;
2540	case `512`:
2541	return AMDGPU::VReg_512RegClassID;
2542	case `1024`:
2543	return AMDGPU::VReg_1024RegClassID;
2544	}
2545	} else if (Is == IS_TTMP) {
2546	switch (RegWidth) {
2547	default: return -`1`;
2548	case `32`:
2549	return AMDGPU::TTMP_32RegClassID;
2550	case `64`:
2551	return AMDGPU::TTMP_64RegClassID;
2552	case `128`:
2553	return AMDGPU::TTMP_128RegClassID;
2554	case `256`:
2555	return AMDGPU::TTMP_256RegClassID;
2556	case `512`:
2557	return AMDGPU::TTMP_512RegClassID;
2558	}
2559	} else if (Is == IS_SGPR) {
2560	switch (RegWidth) {
2561	default: return -`1`;
2562	case `32`:
2563	return AMDGPU::SGPR_32RegClassID;
2564	case `64`:
2565	return AMDGPU::SGPR_64RegClassID;
2566	case `96`:
2567	return AMDGPU::SGPR_96RegClassID;
2568	case `128`:
2569	return AMDGPU::SGPR_128RegClassID;
2570	case `160`:
2571	return AMDGPU::SGPR_160RegClassID;
2572	case `192`:
2573	return AMDGPU::SGPR_192RegClassID;
2574	case `224`:
2575	return AMDGPU::SGPR_224RegClassID;
2576	case `256`:
2577	return AMDGPU::SGPR_256RegClassID;
2578	case `288`:
2579	return AMDGPU::SGPR_288RegClassID;
2580	case `320`:
2581	return AMDGPU::SGPR_320RegClassID;
2582	case `352`:
2583	return AMDGPU::SGPR_352RegClassID;
2584	case `384`:
2585	return AMDGPU::SGPR_384RegClassID;
2586	case `512`:
2587	return AMDGPU::SGPR_512RegClassID;
2588	}
2589	} else if (Is == IS_AGPR) {
2590	switch (RegWidth) {
2591	default: return -`1`;
2592	case `32`:
2593	return AMDGPU::AGPR_32RegClassID;
2594	case `64`:
2595	return AMDGPU::AReg_64RegClassID;
2596	case `96`:
2597	return AMDGPU::AReg_96RegClassID;
2598	case `128`:
2599	return AMDGPU::AReg_128RegClassID;
2600	case `160`:
2601	return AMDGPU::AReg_160RegClassID;
2602	case `192`:
2603	return AMDGPU::AReg_192RegClassID;
2604	case `224`:
2605	return AMDGPU::AReg_224RegClassID;
2606	case `256`:
2607	return AMDGPU::AReg_256RegClassID;
2608	case `288`:
2609	return AMDGPU::AReg_288RegClassID;
2610	case `320`:
2611	return AMDGPU::AReg_320RegClassID;
2612	case `352`:
2613	return AMDGPU::AReg_352RegClassID;
2614	case `384`:
2615	return AMDGPU::AReg_384RegClassID;
2616	case `512`:
2617	return AMDGPU::AReg_512RegClassID;
2618	case `1024`:
2619	return AMDGPU::AReg_1024RegClassID;
2620	}
2621	}
2622	return -`1`;
2623	}
2624
2625	static MCRegister getSpecialRegForName(StringRef RegName) {
2626	return StringSwitch<unsigned>(RegName)
2627	.Case(S: "exec", Value: AMDGPU::EXEC)
2628	.Case(S: "vcc", Value: AMDGPU::VCC)
2629	.Case(S: "flat_scratch", Value: AMDGPU::FLAT_SCR)
2630	.Case(S: "xnack_mask", Value: AMDGPU::XNACK_MASK)
2631	.Case(S: "shared_base", Value: AMDGPU::SRC_SHARED_BASE)
2632	.Case(S: "src_shared_base", Value: AMDGPU::SRC_SHARED_BASE)
2633	.Case(S: "shared_limit", Value: AMDGPU::SRC_SHARED_LIMIT)
2634	.Case(S: "src_shared_limit", Value: AMDGPU::SRC_SHARED_LIMIT)
2635	.Case(S: "private_base", Value: AMDGPU::SRC_PRIVATE_BASE)
2636	.Case(S: "src_private_base", Value: AMDGPU::SRC_PRIVATE_BASE)
2637	.Case(S: "private_limit", Value: AMDGPU::SRC_PRIVATE_LIMIT)
2638	.Case(S: "src_private_limit", Value: AMDGPU::SRC_PRIVATE_LIMIT)
2639	.Case(S: "pops_exiting_wave_id", Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2640	.Case(S: "src_pops_exiting_wave_id", Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2641	.Case(S: "lds_direct", Value: AMDGPU::LDS_DIRECT)
2642	.Case(S: "src_lds_direct", Value: AMDGPU::LDS_DIRECT)
2643	.Case(S: "m0", Value: AMDGPU::M0)
2644	.Case(S: "vccz", Value: AMDGPU::SRC_VCCZ)
2645	.Case(S: "src_vccz", Value: AMDGPU::SRC_VCCZ)
2646	.Case(S: "execz", Value: AMDGPU::SRC_EXECZ)
2647	.Case(S: "src_execz", Value: AMDGPU::SRC_EXECZ)
2648	.Case(S: "scc", Value: AMDGPU::SRC_SCC)
2649	.Case(S: "src_scc", Value: AMDGPU::SRC_SCC)
2650	.Case(S: "tba", Value: AMDGPU::TBA)
2651	.Case(S: "tma", Value: AMDGPU::TMA)
2652	.Case(S: "flat_scratch_lo", Value: AMDGPU::FLAT_SCR_LO)
2653	.Case(S: "flat_scratch_hi", Value: AMDGPU::FLAT_SCR_HI)
2654	.Case(S: "xnack_mask_lo", Value: AMDGPU::XNACK_MASK_LO)
2655	.Case(S: "xnack_mask_hi", Value: AMDGPU::XNACK_MASK_HI)
2656	.Case(S: "vcc_lo", Value: AMDGPU::VCC_LO)
2657	.Case(S: "vcc_hi", Value: AMDGPU::VCC_HI)
2658	.Case(S: "exec_lo", Value: AMDGPU::EXEC_LO)
2659	.Case(S: "exec_hi", Value: AMDGPU::EXEC_HI)
2660	.Case(S: "tma_lo", Value: AMDGPU::TMA_LO)
2661	.Case(S: "tma_hi", Value: AMDGPU::TMA_HI)
2662	.Case(S: "tba_lo", Value: AMDGPU::TBA_LO)
2663	.Case(S: "tba_hi", Value: AMDGPU::TBA_HI)
2664	.Case(S: "pc", Value: AMDGPU::PC_REG)
2665	.Case(S: "null", Value: AMDGPU::SGPR_NULL)
2666	.Default(Value: AMDGPU::NoRegister);
2667	}
2668
2669	bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2670	SMLoc &EndLoc, bool RestoreOnFailure) {
2671	auto R = parseRegister();
2672	if (!R) return true;
2673	assert(R->isReg());
2674	RegNo = R ->getReg();
2675	StartLoc = R ->getStartLoc();
2676	EndLoc = R ->getEndLoc();
2677	return false;
2678	}
2679
2680	bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2681	SMLoc &EndLoc) {
2682	return ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /RestoreOnFailure=/false);
2683	}
2684
2685	ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2686	SMLoc &EndLoc) {
2687	bool Result = ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /RestoreOnFailure=/true);
2688	bool PendingErrors = getParser().hasPendingError();
2689	getParser().clearPendingErrors();
2690	if (PendingErrors)
2691	return ParseStatus::Failure;
2692	if (Result)
2693	return ParseStatus::NoMatch;
2694	return ParseStatus::Success;
2695	}
2696
2697	bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2698	RegisterKind RegKind,
2699	MCRegister Reg1, SMLoc Loc) {
2700	switch (RegKind) {
2701	case IS_SPECIAL:
2702	if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2703	Reg = AMDGPU::EXEC;
2704	RegWidth = `64`;
2705	return true;
2706	}
2707	if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2708	Reg = AMDGPU::FLAT_SCR;
2709	RegWidth = `64`;
2710	return true;
2711	}
2712	if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2713	Reg = AMDGPU::XNACK_MASK;
2714	RegWidth = `64`;
2715	return true;
2716	}
2717	if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2718	Reg = AMDGPU::VCC;
2719	RegWidth = `64`;
2720	return true;
2721	}
2722	if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2723	Reg = AMDGPU::TBA;
2724	RegWidth = `64`;
2725	return true;
2726	}
2727	if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2728	Reg = AMDGPU::TMA;
2729	RegWidth = `64`;
2730	return true;
2731	}
2732	Error(L: Loc, Msg: "register does not fit in the list");
2733	return false;
2734	case IS_VGPR:
2735	case IS_SGPR:
2736	case IS_AGPR:
2737	case IS_TTMP:
2738	if (Reg1 != Reg + RegWidth / `32`) {
2739	Error(L: Loc, Msg: "registers in a list must have consecutive indices");
2740	return false;
2741	}
2742	RegWidth += `32`;
2743	return true;
2744	default:
2745	llvm_unreachable("unexpected register kind");
2746	}
2747	}
2748
2749	struct RegInfo {
2750	StringLiteral Name;
2751	RegisterKind Kind;
2752	};
2753
2754	static constexpr RegInfo RegularRegisters[] = {
2755	{.Name: {"v"}, .Kind: IS_VGPR},
2756	{.Name: {"s"}, .Kind: IS_SGPR},
2757	{.Name: {"ttmp"}, .Kind: IS_TTMP},
2758	{.Name: {"acc"}, .Kind: IS_AGPR},
2759	{.Name: {"a"}, .Kind: IS_AGPR},
2760	};
2761
2762	static bool isRegularReg(RegisterKind Kind) {
2763	return Kind == IS_VGPR \|\|
2764	Kind == IS_SGPR \|\|
2765	Kind == IS_TTMP \|\|
2766	Kind == IS_AGPR;
2767	}
2768
2769	static const RegInfo* getRegularRegInfo(StringRef Str) {
2770	for (const RegInfo &Reg : RegularRegisters)
2771	if (Str.starts_with(Prefix: Reg.Name))
2772	return &Reg;
2773	return nullptr;
2774	}
2775
2776	static bool getRegNum(StringRef Str, unsigned& Num) {
2777	return !Str.getAsInteger(Radix: `10`, Result&: Num);
2778	}
2779
2780	bool
2781	AMDGPUAsmParser::isRegister(const AsmToken &Token,
2782	const AsmToken &NextToken) const {
2783
2784	// A list of consecutive registers: [s0,s1,s2,s3]
2785	if (Token.is(K: AsmToken::LBrac))
2786	return true;
2787
2788	if (!Token.is(K: AsmToken::Identifier))
2789	return false;
2790
2791	// A single register like s0 or a range of registers like s[0:1]
2792
2793	StringRef Str = Token.getString();
2794	const RegInfo *Reg = getRegularRegInfo(Str);
2795	if (Reg) {
2796	StringRef RegName = Reg->Name;
2797	StringRef RegSuffix = Str.substr(Start: RegName.size());
2798	if (!RegSuffix.empty()) {
2799	RegSuffix.consume_back(Suffix: ".l");
2800	RegSuffix.consume_back(Suffix: ".h");
2801	unsigned Num;
2802	// A single register with an index: rXX
2803	if (getRegNum(Str: RegSuffix, Num))
2804	return true;
2805	} else {
2806	// A range of registers: r[XX:YY].
2807	if (NextToken.is(K: AsmToken::LBrac))
2808	return true;
2809	}
2810	}
2811
2812	return getSpecialRegForName(RegName: Str).isValid();
2813	}
2814
2815	bool
2816	AMDGPUAsmParser::isRegister()
2817	{
2818	return isRegister(Token: getToken(), NextToken: peekToken());
2819	}
2820
2821	MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2822	unsigned SubReg, unsigned RegWidth,
2823	SMLoc Loc) {
2824	assert(isRegularReg(RegKind));
2825
2826	unsigned AlignSize = `1`;
2827	if (RegKind == IS_SGPR \|\| RegKind == IS_TTMP) {
2828	// SGPR and TTMP registers must be aligned.
2829	// Max required alignment is 4 dwords.
2830	AlignSize = std::min(a: llvm::bit_ceil(Value: RegWidth / `32`), b: `4u`);
2831	}
2832
2833	if (RegNum % AlignSize != `0`) {
2834	Error(L: Loc, Msg: "invalid register alignment");
2835	return MCRegister ();
2836	}
2837
2838	unsigned RegIdx = RegNum / AlignSize;
2839	int RCID = getRegClass(Is: RegKind, RegWidth);
2840	if (RCID == -`1`) {
2841	Error(L: Loc, Msg: "invalid or unsupported register size");
2842	return MCRegister ();
2843	}
2844
2845	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2846	const MCRegisterClass RC = TRI->getRegClass(i: RCID);
2847	if (RegIdx >= RC.getNumRegs()) {
2848	Error(L: Loc, Msg: "register index is out of range");
2849	return MCRegister ();
2850	}
2851
2852	MCRegister Reg = RC.getRegister(i: RegIdx);
2853
2854	if (SubReg) {
2855	Reg = TRI->getSubReg(Reg, Idx: SubReg);
2856
2857	// Currently all regular registers have their .l and .h subregisters, so
2858	// we should never need to generate an error here.
2859	assert(Reg && "Invalid subregister!");
2860	}
2861
2862	return Reg;
2863	}
2864
2865	bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2866	unsigned &SubReg) {
2867	int64_t RegLo, RegHi;
2868	if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "missing register index"))
2869	return false;
2870
2871	SMLoc FirstIdxLoc = getLoc();
2872	SMLoc SecondIdxLoc;
2873
2874	if (!parseExpr(Imm&: RegLo))
2875	return false;
2876
2877	if (trySkipToken(Kind: AsmToken::Colon)) {
2878	SecondIdxLoc = getLoc();
2879	if (!parseExpr(Imm&: RegHi))
2880	return false;
2881	} else {
2882	RegHi = RegLo;
2883	}
2884
2885	if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
2886	return false;
2887
2888	if (!isUInt<`32`>(x: RegLo)) {
2889	Error(L: FirstIdxLoc, Msg: "invalid register index");
2890	return false;
2891	}
2892
2893	if (!isUInt<`32`>(x: RegHi)) {
2894	Error(L: SecondIdxLoc, Msg: "invalid register index");
2895	return false;
2896	}
2897
2898	if (RegLo > RegHi) {
2899	Error(L: FirstIdxLoc, Msg: "first register index should not exceed second index");
2900	return false;
2901	}
2902
2903	if (RegHi == RegLo) {
2904	StringRef RegSuffix = getTokenStr();
2905	if (RegSuffix == ".l") {
2906	SubReg = AMDGPU::lo16;
2907	lex();
2908	} else if (RegSuffix == ".h") {
2909	SubReg = AMDGPU::hi16;
2910	lex();
2911	}
2912	}
2913
2914	Num = static_cast<unsigned>(RegLo);
2915	RegWidth = `32` * ((RegHi - RegLo) + `1`);
2916
2917	return true;
2918	}
2919
2920	MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2921	unsigned &RegNum,
2922	unsigned &RegWidth,
2923	SmallVectorImpl<AsmToken> &Tokens) {
2924	assert(isToken(AsmToken::Identifier));
2925	MCRegister Reg = getSpecialRegForName(RegName: getTokenStr());
2926	if (Reg) {
2927	RegNum = `0`;
2928	RegWidth = `32`;
2929	RegKind = IS_SPECIAL;
2930	Tokens.push_back(Elt: getToken());
2931	lex(); // skip register name
2932	}
2933	return Reg;
2934	}
2935
2936	MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2937	unsigned &RegNum,
2938	unsigned &RegWidth,
2939	SmallVectorImpl<AsmToken> &Tokens) {
2940	assert(isToken(AsmToken::Identifier));
2941	StringRef RegName = getTokenStr();
2942	auto Loc = getLoc();
2943
2944	const RegInfo *RI = getRegularRegInfo(Str: RegName);
2945	if (!RI) {
2946	Error(L: Loc, Msg: "invalid register name");
2947	return MCRegister ();
2948	}
2949
2950	Tokens.push_back(Elt: getToken());
2951	lex(); // skip register name
2952
2953	RegKind = RI->Kind;
2954	StringRef RegSuffix = RegName.substr(Start: RI->Name.size());
2955	unsigned SubReg = NoSubRegister;
2956	if (!RegSuffix.empty()) {
2957	if (RegSuffix.consume_back(Suffix: ".l"))
2958	SubReg = AMDGPU::lo16;
2959	else if (RegSuffix.consume_back(Suffix: ".h"))
2960	SubReg = AMDGPU::hi16;
2961
2962	// Single 32-bit register: vXX.
2963	if (!getRegNum(Str: RegSuffix, Num&: RegNum)) {
2964	Error(L: Loc, Msg: "invalid register index");
2965	return MCRegister ();
2966	}
2967	RegWidth = `32`;
2968	} else {
2969	// Range of registers: v[XX:YY]. ":YY" is optional.
2970	if (!ParseRegRange(Num&: RegNum, RegWidth, SubReg))
2971	return MCRegister ();
2972	}
2973
2974	return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2975	}
2976
2977	MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2978	unsigned &RegNum, unsigned &RegWidth,
2979	SmallVectorImpl<AsmToken> &Tokens) {
2980	MCRegister Reg;
2981	auto ListLoc = getLoc();
2982
2983	if (!skipToken(Kind: AsmToken::LBrac,
2984	ErrMsg: "expected a register or a list of registers")) {
2985	return MCRegister ();
2986	}
2987
2988	// List of consecutive registers, e.g.: [s0,s1,s2,s3]
2989
2990	auto Loc = getLoc();
2991	if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2992	return MCRegister ();
2993	if (RegWidth != `32`) {
2994	Error(L: Loc, Msg: "expected a single 32-bit register");
2995	return MCRegister ();
2996	}
2997
2998	for (; trySkipToken(Kind: AsmToken::Comma); ) {
2999	RegisterKind NextRegKind;
3000	MCRegister NextReg;
3001	unsigned NextRegNum, NextRegWidth;
3002	Loc = getLoc();
3003
3004	if (!ParseAMDGPURegister(RegKind&: NextRegKind, Reg&: NextReg,
3005	RegNum&: NextRegNum, RegWidth&: NextRegWidth,
3006	Tokens)) {
3007	return MCRegister ();
3008	}
3009	if (NextRegWidth != `32`) {
3010	Error(L: Loc, Msg: "expected a single 32-bit register");
3011	return MCRegister ();
3012	}
3013	if (NextRegKind != RegKind) {
3014	Error(L: Loc, Msg: "registers in a list must be of the same kind");
3015	return MCRegister ();
3016	}
3017	if (!AddNextRegisterToList(Reg, RegWidth, RegKind, Reg1: NextReg, Loc))
3018	return MCRegister ();
3019	}
3020
3021	if (!skipToken(Kind: AsmToken::RBrac,
3022	ErrMsg: "expected a comma or a closing square bracket")) {
3023	return MCRegister ();
3024	}
3025
3026	if (isRegularReg(Kind: RegKind))
3027	Reg = getRegularReg(RegKind, RegNum, SubReg: NoSubRegister, RegWidth, Loc: ListLoc);
3028
3029	return Reg;
3030	}
3031
3032	bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3033	MCRegister &Reg, unsigned &RegNum,
3034	unsigned &RegWidth,
3035	SmallVectorImpl<AsmToken> &Tokens) {
3036	auto Loc = getLoc();
3037	Reg = MCRegister ();
3038
3039	if (isToken(Kind: AsmToken::Identifier)) {
3040	Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3041	if (!Reg)
3042	Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3043	} else {
3044	Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3045	}
3046
3047	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3048	if (!Reg) {
3049	assert(Parser.hasPendingError());
3050	return false;
3051	}
3052
3053	if (!subtargetHasRegister(MRI: *TRI, Reg)) {
3054	if (Reg == AMDGPU::SGPR_NULL) {
3055	Error(L: Loc, Msg: "'null' operand is not supported on this GPU");
3056	} else {
3057	Error(L: Loc, Msg: Twine (AMDGPUInstPrinter::getRegisterName(Reg)) +
3058	" register not available on this GPU");
3059	}
3060	return false;
3061	}
3062
3063	return true;
3064	}
3065
3066	bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3067	MCRegister &Reg, unsigned &RegNum,
3068	unsigned &RegWidth,
3069	bool RestoreOnFailure /=false/) {
3070	Reg = MCRegister ();
3071
3072	SmallVector<AsmToken, `1`> Tokens;
3073	if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3074	if (RestoreOnFailure) {
3075	while (!Tokens.empty()) {
3076	getLexer().UnLex(Token: Tokens.pop_back_val());
3077	}
3078	}
3079	return true;
3080	}
3081	return false;
3082	}
3083
3084	std::optional<StringRef>
3085	AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3086	switch (RegKind) {
3087	case IS_VGPR:
3088	return StringRef (".amdgcn.next_free_vgpr");
3089	case IS_SGPR:
3090	return StringRef (".amdgcn.next_free_sgpr");
3091	default:
3092	return std::nullopt;
3093	}
3094	}
3095
3096	void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3097	auto SymbolName = getGprCountSymbolName(RegKind);
3098	assert(SymbolName && "initializing invalid register kind");
3099	MCSymbol Sym = getContext().getOrCreateSymbol(Name: SymbolName);
3100	Sym->setVariableValue(MCConstantExpr::create(Value: `0`, Ctx&: getContext()));
3101	Sym->setRedefinable(true);
3102	}
3103
3104	bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3105	unsigned DwordRegIndex,
3106	unsigned RegWidth) {
3107	// Symbols are only defined for GCN targets
3108	if (AMDGPU::getIsaVersion(GPU: getSTI().getCPU()).Major < `6`)
3109	return true;
3110
3111	auto SymbolName = getGprCountSymbolName(RegKind);
3112	if (!SymbolName)
3113	return true;
3114	MCSymbol Sym = getContext().getOrCreateSymbol(Name: SymbolName);
3115
3116	int64_t NewMax = DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: `32`) - `1`;
3117	int64_t OldCount;
3118
3119	if (!Sym->isVariable())
3120	return !Error(L: getLoc(),
3121	Msg: ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3122	if (!Sym->getVariableValue()->evaluateAsAbsolute(Res&: OldCount))
3123	return !Error(
3124	L: getLoc(),
3125	Msg: ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3126
3127	if (OldCount <= NewMax)
3128	Sym->setVariableValue(MCConstantExpr::create(Value: NewMax + `1`, Ctx&: getContext()));
3129
3130	return true;
3131	}
3132
3133	std::unique_ptr<AMDGPUOperand>
3134	AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3135	const auto &Tok = getToken();
3136	SMLoc StartLoc = Tok.getLoc();
3137	SMLoc EndLoc = Tok.getEndLoc();
3138	RegisterKind RegKind;
3139	MCRegister Reg;
3140	unsigned RegNum, RegWidth;
3141
3142	if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3143	return nullptr;
3144	}
3145	if (isHsaAbi(STI: getSTI())) {
3146	if (!updateGprCountSymbols(RegKind, DwordRegIndex: RegNum, RegWidth))
3147	return nullptr;
3148	} else
3149	KernelScope.usesRegister(RegKind, DwordRegIndex: RegNum, RegWidth);
3150	return AMDGPUOperand::CreateReg(AsmParser: this, Reg, S: StartLoc, E: EndLoc);
3151	}
3152
3153	ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3154	bool HasSP3AbsModifier, bool HasLit) {
3155	// TODO: add syntactic sugar for 1/(2PI)*
3156
3157	if (isRegister())
3158	return ParseStatus::NoMatch;
3159	assert(!isModifier());
3160
3161	if (!HasLit) {
3162	HasLit = trySkipId(Id: "lit");
3163	if (HasLit) {
3164	if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit"))
3165	return ParseStatus::Failure;
3166	ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
3167	if (S.isSuccess() &&
3168	!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3169	return ParseStatus::Failure;
3170	return S;
3171	}
3172	}
3173
3174	const auto& Tok = getToken();
3175	const auto& NextTok = peekToken();
3176	bool IsReal = Tok.is(K: AsmToken::Real);
3177	SMLoc S = getLoc();
3178	bool Negate = false;
3179
3180	if (!IsReal && Tok.is(K: AsmToken::Minus) && NextTok.is(K: AsmToken::Real)) {
3181	lex();
3182	IsReal = true;
3183	Negate = true;
3184	}
3185
3186	AMDGPUOperand::Modifiers Mods;
3187	Mods.Lit = HasLit;
3188
3189	if (IsReal) {
3190	// Floating-point expressions are not supported.
3191	// Can only allow floating-point literals with an
3192	// optional sign.
3193
3194	StringRef Num = getTokenStr();
3195	lex();
3196
3197	APFloat RealVal(APFloat::IEEEdouble());
3198	auto roundMode = APFloat::rmNearestTiesToEven;
3199	if (errorToBool(Err: RealVal.convertFromString(Num, roundMode).takeError()))
3200	return ParseStatus::Failure;
3201	if (Negate)
3202	RealVal.changeSign();
3203
3204	Operands.push_back(
3205	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: RealVal.bitcastToAPInt().getZExtValue(), Loc: S,
3206	Type: AMDGPUOperand::ImmTyNone, IsFPImm: true));
3207	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3208	Op.setModifiers(Mods);
3209
3210	return ParseStatus::Success;
3211
3212	} else {
3213	int64_t IntVal;
3214	const MCExpr *Expr;
3215	SMLoc S = getLoc();
3216
3217	if (HasSP3AbsModifier) {
3218	// This is a workaround for handling expressions
3219	// as arguments of SP3 'abs' modifier, for example:
3220	// \|1.0\|
3221	// \|-1\|
3222	// \|1+x\|
3223	// This syntax is not compatible with syntax of standard
3224	// MC expressions (due to the trailing '\|').
3225	SMLoc EndLoc;
3226	if (getParser().parsePrimaryExpr(Res&: Expr, EndLoc, TypeInfo: nullptr))
3227	return ParseStatus::Failure;
3228	} else {
3229	if (Parser.parseExpression(Res&: Expr))
3230	return ParseStatus::Failure;
3231	}
3232
3233	if (Expr->evaluateAsAbsolute(Res&: IntVal)) {
3234	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S));
3235	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3236	Op.setModifiers(Mods);
3237	} else {
3238	if (HasLit)
3239	return ParseStatus::NoMatch;
3240	Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S));
3241	}
3242
3243	return ParseStatus::Success;
3244	}
3245
3246	return ParseStatus::NoMatch;
3247	}
3248
3249	ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3250	if (!isRegister())
3251	return ParseStatus::NoMatch;
3252
3253	if (auto R = parseRegister()) {
3254	assert(R->isReg());
3255	Operands.push_back(Elt: std::move(R));
3256	return ParseStatus::Success;
3257	}
3258	return ParseStatus::Failure;
3259	}
3260
3261	ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3262	bool HasSP3AbsMod, bool HasLit) {
3263	ParseStatus Res = parseReg(Operands);
3264	if (!Res.isNoMatch())
3265	return Res;
3266	if (isModifier())
3267	return ParseStatus::NoMatch;
3268	return parseImm(Operands, HasSP3AbsModifier: HasSP3AbsMod, HasLit);
3269	}
3270
3271	bool
3272	AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3273	if (Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::LParen)) {
3274	const auto &str = Token.getString();
3275	return str == "abs" \|\| str == "neg" \|\| str == "sext";
3276	}
3277	return false;
3278	}
3279
3280	bool
3281	AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3282	return Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::Colon);
3283	}
3284
3285	bool
3286	AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3287	return isNamedOperandModifier(Token, NextToken) \|\| Token.is(K: AsmToken::Pipe);
3288	}
3289
3290	bool
3291	AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3292	return isRegister(Token, NextToken) \|\| isOperandModifier(Token, NextToken);
3293	}
3294
3295	// Check if this is an operand modifier or an opcode modifier
3296	// which may look like an expression but it is not. We should
3297	// avoid parsing these modifiers as expressions. Currently
3298	// recognized sequences are:
3299	// \|...\|
3300	// abs(...)
3301	// neg(...)
3302	// sext(...)
3303	// -reg
3304	// -\|...\|
3305	// -abs(...)
3306	// name:...
3307	//
3308	bool
3309	AMDGPUAsmParser::isModifier() {
3310
3311	AsmToken Tok = getToken();
3312	AsmToken NextToken[`2`];
3313	peekTokens(Tokens: NextToken);
3314
3315	return isOperandModifier(Token: Tok, NextToken: NextToken[`0`]) \|\|
3316	(Tok.is(K: AsmToken::Minus) && isRegOrOperandModifier(Token: NextToken[`0`], NextToken: NextToken[`1`])) \|\|
3317	isOpcodeModifierWithVal(Token: Tok, NextToken: NextToken[`0`]);
3318	}
3319
3320	// Check if the current token is an SP3 'neg' modifier.
3321	// Currently this modifier is allowed in the following context:
3322	//
3323	// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3324	// 2. Before an 'abs' modifier: -abs(...)
3325	// 3. Before an SP3 'abs' modifier: -\|...\|
3326	//
3327	// In all other cases "-" is handled as a part
3328	// of an expression that follows the sign.
3329	//
3330	// Note: When "-" is followed by an integer literal,
3331	// this is interpreted as integer negation rather
3332	// than a floating-point NEG modifier applied to N.
3333	// Beside being contr-intuitive, such use of floating-point
3334	// NEG modifier would have resulted in different meaning
3335	// of integer literals used with VOP1/2/C and VOP3,
3336	// for example:
3337	// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3338	// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3339	// Negative fp literals with preceding "-" are
3340	// handled likewise for uniformity
3341	//
3342	bool
3343	AMDGPUAsmParser::parseSP3NegModifier() {
3344
3345	AsmToken NextToken[`2`];
3346	peekTokens(Tokens: NextToken);
3347
3348	if (isToken(Kind: AsmToken::Minus) &&
3349	(isRegister(Token: NextToken[`0`], NextToken: NextToken[`1`]) \|\|
3350	NextToken[`0`].is(K: AsmToken::Pipe) \|\|
3351	isId(Token: NextToken[`0`], Id: "abs"))) {
3352	lex();
3353	return true;
3354	}
3355
3356	return false;
3357	}
3358
3359	ParseStatus
3360	AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3361	bool AllowImm) {
3362	bool Neg, SP3Neg;
3363	bool Abs, SP3Abs;
3364	bool Lit;
3365	SMLoc Loc;
3366
3367	// Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3368	if (isToken(Kind: AsmToken::Minus) && peekToken().is(K: AsmToken::Minus))
3369	return Error(L: getLoc(), Msg: "invalid syntax, expected 'neg' modifier");
3370
3371	SP3Neg = parseSP3NegModifier();
3372
3373	Loc = getLoc();
3374	Neg = trySkipId(Id: "neg");
3375	if (Neg && SP3Neg)
3376	return Error(L: Loc, Msg: "expected register or immediate");
3377	if (Neg && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after neg"))
3378	return ParseStatus::Failure;
3379
3380	Abs = trySkipId(Id: "abs");
3381	if (Abs && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after abs"))
3382	return ParseStatus::Failure;
3383
3384	Lit = trySkipId(Id: "lit");
3385	if (Lit && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit"))
3386	return ParseStatus::Failure;
3387
3388	Loc = getLoc();
3389	SP3Abs = trySkipToken(Kind: AsmToken::Pipe);
3390	if (Abs && SP3Abs)
3391	return Error(L: Loc, Msg: "expected register or immediate");
3392
3393	ParseStatus Res;
3394	if (AllowImm) {
3395	Res = parseRegOrImm(Operands, HasSP3AbsMod: SP3Abs, HasLit: Lit);
3396	} else {
3397	Res = parseReg(Operands);
3398	}
3399	if (!Res.isSuccess())
3400	return (SP3Neg \|\| Neg \|\| SP3Abs \|\| Abs \|\| Lit) ? ParseStatus::Failure : Res;
3401
3402	if (Lit && !Operands.back()->isImm())
3403	Error(L: Loc, Msg: "expected immediate with lit modifier");
3404
3405	if (SP3Abs && !skipToken(Kind: AsmToken::Pipe, ErrMsg: "expected vertical bar"))
3406	return ParseStatus::Failure;
3407	if (Abs && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3408	return ParseStatus::Failure;
3409	if (Neg && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3410	return ParseStatus::Failure;
3411	if (Lit && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3412	return ParseStatus::Failure;
3413
3414	AMDGPUOperand::Modifiers Mods;
3415	Mods.Abs = Abs \|\| SP3Abs;
3416	Mods.Neg = Neg \|\| SP3Neg;
3417	Mods.Lit = Lit;
3418
3419	if (Mods.hasFPModifiers() \|\| Lit) {
3420	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3421	if (Op.isExpr())
3422	return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression");
3423	Op.setModifiers(Mods);
3424	}
3425	return ParseStatus::Success;
3426	}
3427
3428	ParseStatus
3429	AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3430	bool AllowImm) {
3431	bool Sext = trySkipId(Id: "sext");
3432	if (Sext && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after sext"))
3433	return ParseStatus::Failure;
3434
3435	ParseStatus Res;
3436	if (AllowImm) {
3437	Res = parseRegOrImm(Operands);
3438	} else {
3439	Res = parseReg(Operands);
3440	}
3441	if (!Res.isSuccess())
3442	return Sext ? ParseStatus::Failure : Res;
3443
3444	if (Sext && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3445	return ParseStatus::Failure;
3446
3447	AMDGPUOperand::Modifiers Mods;
3448	Mods.Sext = Sext;
3449
3450	if (Mods.hasIntModifiers()) {
3451	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3452	if (Op.isExpr())
3453	return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression");
3454	Op.setModifiers(Mods);
3455	}
3456
3457	return ParseStatus::Success;
3458	}
3459
3460	ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3461	return parseRegOrImmWithFPInputMods(Operands, AllowImm: false);
3462	}
3463
3464	ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3465	return parseRegOrImmWithIntInputMods(Operands, AllowImm: false);
3466	}
3467
3468	ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3469	auto Loc = getLoc();
3470	if (trySkipId(Id: "off")) {
3471	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: `0`, Loc,
3472	Type: AMDGPUOperand::ImmTyOff, IsFPImm: false));
3473	return ParseStatus::Success;
3474	}
3475
3476	if (!isRegister())
3477	return ParseStatus::NoMatch;
3478
3479	std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3480	if (Reg) {
3481	Operands.push_back(Elt: std::move(Reg));
3482	return ParseStatus::Success;
3483	}
3484
3485	return ParseStatus::Failure;
3486	}
3487
3488	unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3489	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
3490
3491	if ((getForcedEncodingSize() == `32` && (TSFlags & SIInstrFlags::VOP3)) \|\|
3492	(getForcedEncodingSize() == `64` && !(TSFlags & SIInstrFlags::VOP3)) \|\|
3493	(isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) \|\|
3494	(isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3495	return Match_InvalidOperand;
3496
3497	if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi \|\|
3498	Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3499	// v_mac_f32/16 allow only dst_sel == DWORD;
3500	auto OpNum =
3501	AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::dst_sel);
3502	const auto &Op = Inst.getOperand(i: OpNum);
3503	if (!Op.isImm() \|\| Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3504	return Match_InvalidOperand;
3505	}
3506	}
3507
3508	return Match_Success;
3509	}
3510
3511	static ArrayRef<unsigned> getAllVariants() {
3512	static const unsigned Variants[] = {
3513	AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3514	AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3515	AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3516	};
3517
3518	return ArrayRef(Variants);
3519	}
3520
3521	// What asm variants we should check
3522	ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3523	if (isForcedDPP() && isForcedVOP3()) {
3524	static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3525	return ArrayRef(Variants);
3526	}
3527	if (getForcedEncodingSize() == `32`) {
3528	static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3529	return ArrayRef(Variants);
3530	}
3531
3532	if (isForcedVOP3()) {
3533	static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3534	return ArrayRef(Variants);
3535	}
3536
3537	if (isForcedSDWA()) {
3538	static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3539	AMDGPUAsmVariants::SDWA9};
3540	return ArrayRef(Variants);
3541	}
3542
3543	if (isForcedDPP()) {
3544	static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3545	return ArrayRef(Variants);
3546	}
3547
3548	return getAllVariants();
3549	}
3550
3551	StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3552	if (isForcedDPP() && isForcedVOP3())
3553	return "e64_dpp";
3554
3555	if (getForcedEncodingSize() == `32`)
3556	return "e32";
3557
3558	if (isForcedVOP3())
3559	return "e64";
3560
3561	if (isForcedSDWA())
3562	return "sdwa";
3563
3564	if (isForcedDPP())
3565	return "dpp";
3566
3567	return "";
3568	}
3569
3570	unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3571	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
3572	for (MCPhysReg Reg : Desc.implicit_uses()) {
3573	switch (Reg) {
3574	case AMDGPU::FLAT_SCR:
3575	case AMDGPU::VCC:
3576	case AMDGPU::VCC_LO:
3577	case AMDGPU::VCC_HI:
3578	case AMDGPU::M0:
3579	return Reg;
3580	default:
3581	break;
3582	}
3583	}
3584	return AMDGPU::NoRegister;
3585	}
3586
3587	// NB: This code is correct only when used to check constant
3588	// bus limitations because GFX7 support no f16 inline constants.
3589	// Note that there are no cases when a GFX7 opcode violates
3590	// constant bus limitations due to the use of an f16 constant.
3591	bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3592	unsigned OpIdx) const {
3593	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
3594
3595	if (!AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx) \|\|
3596	AMDGPU::isKImmOperand(Desc, OpNo: OpIdx)) {
3597	return false;
3598	}
3599
3600	const MCOperand &MO = Inst.getOperand(i: OpIdx);
3601
3602	int64_t Val = MO.getImm();
3603	auto OpSize = AMDGPU::getOperandSize(Desc, OpNo: OpIdx);
3604
3605	switch (OpSize) { // expected operand size
3606	case `8`:
3607	return AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3608	case `4`:
3609	return AMDGPU::isInlinableLiteral32(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3610	case `2`: {
3611	const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3612	if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 \|\|
3613	OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16)
3614	return AMDGPU::isInlinableLiteralI16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3615
3616	if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 \|\|
3617	OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3618	return AMDGPU::isInlinableLiteralV2I16(Literal: Val);
3619
3620	if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 \|\|
3621	OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3622	return AMDGPU::isInlinableLiteralV2F16(Literal: Val);
3623
3624	if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 \|\|
3625	OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3626	return AMDGPU::isInlinableLiteralV2BF16(Literal: Val);
3627
3628	if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 \|\|
3629	OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16)
3630	return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3631
3632	if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 \|\|
3633	OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16)
3634	return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3635
3636	llvm_unreachable("invalid operand type");
3637	}
3638	default:
3639	llvm_unreachable("invalid operand size");
3640	}
3641	}
3642
3643	unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3644	if (!isGFX10Plus())
3645	return `1`;
3646
3647	switch (Opcode) {
3648	// 64-bit shift instructions can use only one scalar value input
3649	case AMDGPU::V_LSHLREV_B64_e64:
3650	case AMDGPU::V_LSHLREV_B64_gfx10:
3651	case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3652	case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3653	case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3654	case AMDGPU::V_LSHRREV_B64_e64:
3655	case AMDGPU::V_LSHRREV_B64_gfx10:
3656	case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3657	case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3658	case AMDGPU::V_ASHRREV_I64_e64:
3659	case AMDGPU::V_ASHRREV_I64_gfx10:
3660	case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3661	case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3662	case AMDGPU::V_LSHL_B64_e64:
3663	case AMDGPU::V_LSHR_B64_e64:
3664	case AMDGPU::V_ASHR_I64_e64:
3665	return `1`;
3666	default:
3667	return `2`;
3668	}
3669	}
3670
3671	constexpr unsigned MAX_SRC_OPERANDS_NUM = `6`;
3672	using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3673
3674	// Get regular operand indices in the same order as specified
3675	// in the instruction (but append mandatory literals to the end).
3676	static OperandIndices getSrcOperandIndices(unsigned Opcode,
3677	bool AddMandatoryLiterals = false) {
3678
3679	int16_t ImmIdx =
3680	AddMandatoryLiterals ? getNamedOperandIdx(Opcode, Name: OpName::imm) : -`1`;
3681
3682	if (isVOPD(Opc: Opcode)) {
3683	int16_t ImmXIdx =
3684	AddMandatoryLiterals ? getNamedOperandIdx(Opcode, Name: OpName::immX) : -`1`;
3685
3686	return {getNamedOperandIdx(Opcode, Name: OpName::src0X),
3687	getNamedOperandIdx(Opcode, Name: OpName::vsrc1X),
3688	getNamedOperandIdx(Opcode, Name: OpName::src0Y),
3689	getNamedOperandIdx(Opcode, Name: OpName::vsrc1Y),
3690	ImmXIdx,
3691	ImmIdx};
3692	}
3693
3694	return {getNamedOperandIdx(Opcode, Name: OpName::src0),
3695	getNamedOperandIdx(Opcode, Name: OpName::src1),
3696	getNamedOperandIdx(Opcode, Name: OpName::src2), ImmIdx};
3697	}
3698
3699	bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3700	const MCOperand &MO = Inst.getOperand(i: OpIdx);
3701	if (MO.isImm())
3702	return !isInlineConstant(Inst, OpIdx);
3703	if (MO.isReg()) {
3704	auto Reg = MO.getReg();
3705	if (!Reg)
3706	return false;
3707	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3708	auto PReg = mc2PseudoReg(Reg);
3709	return isSGPR(Reg: PReg, TRI) && PReg != SGPR_NULL;
3710	}
3711	return true;
3712	}
3713
3714	// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3715	// Writelane is special in that it can use SGPR and M0 (which would normally
3716	// count as using the constant bus twice - but in this case it is allowed since
3717	// the lane selector doesn't count as a use of the constant bus). However, it is
3718	// still required to abide by the 1 SGPR rule.
3719	static bool checkWriteLane(const MCInst &Inst) {
3720	const unsigned Opcode = Inst.getOpcode();
3721	if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3722	return false;
3723	const MCOperand &LaneSelOp = Inst.getOperand(i: `2`);
3724	if (!LaneSelOp.isReg())
3725	return false;
3726	auto LaneSelReg = mc2PseudoReg(Reg: LaneSelOp.getReg());
3727	return LaneSelReg == M0 \|\| LaneSelReg == M0_gfxpre11;
3728	}
3729
3730	bool AMDGPUAsmParser::validateConstantBusLimitations(
3731	const MCInst &Inst, const OperandVector &Operands) {
3732	const unsigned Opcode = Inst.getOpcode();
3733	const MCInstrDesc &Desc = MII.get(Opcode);
3734	MCRegister LastSGPR;
3735	unsigned ConstantBusUseCount = `0`;
3736	unsigned NumLiterals = `0`;
3737	unsigned LiteralSize;
3738
3739	if (!(Desc.TSFlags &
3740	(SIInstrFlags::VOPC \| SIInstrFlags::VOP1 \| SIInstrFlags::VOP2 \|
3741	SIInstrFlags::VOP3 \| SIInstrFlags::VOP3P \| SIInstrFlags::SDWA)) &&
3742	!isVOPD(Opc: Opcode))
3743	return true;
3744
3745	if (checkWriteLane(Inst))
3746	return true;
3747
3748	// Check special imm operands (used by madmk, etc)
3749	if (AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm)) {
3750	++NumLiterals;
3751	LiteralSize = `4`;
3752	}
3753
3754	SmallDenseSet<unsigned> SGPRsUsed;
3755	unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3756	if (SGPRUsed != AMDGPU::NoRegister) {
3757	SGPRsUsed.insert(V: SGPRUsed);
3758	++ConstantBusUseCount;
3759	}
3760
3761	OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3762
3763	for (int OpIdx : OpIndices) {
3764	if (OpIdx == -`1`)
3765	continue;
3766
3767	const MCOperand &MO = Inst.getOperand(i: OpIdx);
3768	if (usesConstantBus(Inst, OpIdx)) {
3769	if (MO.isReg()) {
3770	LastSGPR = mc2PseudoReg(Reg: MO.getReg());
3771	// Pairs of registers with a partial intersections like these
3772	// s0, s[0:1]
3773	// flat_scratch_lo, flat_scratch
3774	// flat_scratch_lo, flat_scratch_hi
3775	// are theoretically valid but they are disabled anyway.
3776	// Note that this code mimics SIInstrInfo::verifyInstruction
3777	if (SGPRsUsed.insert(V: LastSGPR).second) {
3778	++ConstantBusUseCount;
3779	}
3780	} else { // Expression or a literal
3781
3782	if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3783	continue; // special operand like VINTERP attr_chan
3784
3785	// An instruction may use only one literal.
3786	// This has been validated on the previous step.
3787	// See validateVOPLiteral.
3788	// This literal may be used as more than one operand.
3789	// If all these operands are of the same size,
3790	// this literal counts as one scalar value.
3791	// Otherwise it counts as 2 scalar values.
3792	// See "GFX10 Shader Programming", section 3.6.2.3.
3793
3794	unsigned Size = AMDGPU::getOperandSize(Desc, OpNo: OpIdx);
3795	if (Size < `4`)
3796	Size = `4`;
3797
3798	if (NumLiterals == `0`) {
3799	NumLiterals = `1`;
3800	LiteralSize = Size;
3801	} else if (LiteralSize != Size) {
3802	NumLiterals = `2`;
3803	}
3804	}
3805	}
3806	}
3807	ConstantBusUseCount += NumLiterals;
3808
3809	if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3810	return true;
3811
3812	SMLoc LitLoc = getLitLoc(Operands);
3813	SMLoc RegLoc = getRegLoc(Reg: LastSGPR, Operands);
3814	SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3815	Error(L: Loc, Msg: "invalid operand (violates constant bus restrictions)");
3816	return false;
3817	}
3818
3819	bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3820	const MCInst &Inst, const OperandVector &Operands) {
3821
3822	const unsigned Opcode = Inst.getOpcode();
3823	if (!isVOPD(Opc: Opcode))
3824	return true;
3825
3826	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3827
3828	auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3829	const MCOperand &Opr = Inst.getOperand(i: OperandIdx);
3830	return (Opr.isReg() && !isSGPR(Reg: mc2PseudoReg(Reg: Opr.getReg()), TRI))
3831	? Opr.getReg()
3832	: MCRegister ();
3833	};
3834
3835	// On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3836	bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3837
3838	const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII);
3839	auto InvalidCompOprIdx =
3840	InstInfo.getInvalidCompOperandIndex(GetRegIdx: getVRegIdx, SkipSrc);
3841	if (!InvalidCompOprIdx)
3842	return true;
3843
3844	auto CompOprIdx = *InvalidCompOprIdx;
3845	auto ParsedIdx =
3846	std::max(a: InstInfo [VOPD::X].getIndexInParsedOperands(CompOprIdx),
3847	b: InstInfo [VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3848	assert(ParsedIdx > `0` && ParsedIdx < Operands.size());
3849
3850	auto Loc = ((AMDGPUOperand &)*Operands [ParsedIdx]).getStartLoc();
3851	if (CompOprIdx == VOPD::Component::DST) {
3852	Error(L: Loc, Msg: "one dst register must be even and the other odd");
3853	} else {
3854	auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3855	Error(L: Loc, Msg: Twine ("src") + Twine (CompSrcIdx) +
3856	" operands must use different VGPR banks");
3857	}
3858
3859	return false;
3860	}
3861
3862	bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3863
3864	const unsigned Opc = Inst.getOpcode();
3865	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
3866
3867	if ((Desc.TSFlags & SIInstrFlags::IntClamp) != `0` && !hasIntClamp()) {
3868	int ClampIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::clamp);
3869	assert(ClampIdx != -`1`);
3870	return Inst.getOperand(i: ClampIdx).getImm() == `0`;
3871	}
3872
3873	return true;
3874	}
3875
3876	constexpr uint64_t MIMGFlags =
3877	SIInstrFlags::MIMG \| SIInstrFlags::VIMAGE \| SIInstrFlags::VSAMPLE;
3878
3879	bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3880	const SMLoc &IDLoc) {
3881
3882	const unsigned Opc = Inst.getOpcode();
3883	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
3884
3885	if ((Desc.TSFlags & MIMGFlags) == `0`)
3886	return true;
3887
3888	int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdata);
3889	int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
3890	int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::tfe);
3891
3892	if (VDataIdx == -`1` && isGFX10Plus()) // no return image_sample
3893	return true;
3894
3895	if ((DMaskIdx == -`1` \|\| TFEIdx == -`1`) && isGFX10_AEncoding()) // intersect_ray
3896	return true;
3897
3898	unsigned VDataSize = AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VDataIdx);
3899	unsigned TFESize = (TFEIdx != -`1` && Inst.getOperand(i: TFEIdx).getImm()) ? `1` : `0`;
3900	unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & `0xf`;
3901	if (DMask == `0`)
3902	DMask = `1`;
3903
3904	bool IsPackedD16 = false;
3905	unsigned DataSize =
3906	(Desc.TSFlags & SIInstrFlags::Gather4) ? `4` : llvm::popcount(Value: DMask);
3907	if (hasPackedD16()) {
3908	int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::d16);
3909	IsPackedD16 = D16Idx >= `0`;
3910	if (IsPackedD16 && Inst.getOperand(i: D16Idx).getImm())
3911	DataSize = (DataSize + `1`) / `2`;
3912	}
3913
3914	if ((VDataSize / `4`) == DataSize + TFESize)
3915	return true;
3916
3917	StringRef Modifiers;
3918	if (isGFX90A())
3919	Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3920	else
3921	Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3922
3923	Error(L: IDLoc, Msg: Twine ("image data size does not match ") + Modifiers);
3924	return false;
3925	}
3926
3927	bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3928	const SMLoc &IDLoc) {
3929	const unsigned Opc = Inst.getOpcode();
3930	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
3931
3932	if ((Desc.TSFlags & MIMGFlags) == `0` \|\| !isGFX10Plus())
3933	return true;
3934
3935	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3936
3937	const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3938	AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
3939	int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vaddr0);
3940	AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
3941	? AMDGPU::OpName::srsrc
3942	: AMDGPU::OpName::rsrc;
3943	int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: RSrcOpName);
3944	int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dim);
3945	int A16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::a16);
3946
3947	assert(VAddr0Idx != -`1`);
3948	assert(SrsrcIdx != -`1`);
3949	assert(SrsrcIdx > VAddr0Idx);
3950
3951	bool IsA16 = (A16Idx != -`1` && Inst.getOperand(i: A16Idx).getImm());
3952	if (BaseOpcode->BVH) {
3953	if (IsA16 == BaseOpcode->A16)
3954	return true;
3955	Error(L: IDLoc, Msg: "image address size does not match a16");
3956	return false;
3957	}
3958
3959	unsigned Dim = Inst.getOperand(i: DimIdx).getImm();
3960	const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim);
3961	bool IsNSA = SrsrcIdx - VAddr0Idx > `1`;
3962	unsigned ActualAddrSize =
3963	IsNSA ? SrsrcIdx - VAddr0Idx
3964	: AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VAddr0Idx) / `4`;
3965
3966	unsigned ExpectedAddrSize =
3967	AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim: DimInfo, IsA16, IsG16Supported: hasG16());
3968
3969	if (IsNSA) {
3970	if (hasPartialNSAEncoding() &&
3971	ExpectedAddrSize >
3972	getNSAMaxSize(HasSampler: Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
3973	int VAddrLastIdx = SrsrcIdx - `1`;
3974	unsigned VAddrLastSize =
3975	AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VAddrLastIdx) / `4`;
3976
3977	ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3978	}
3979	} else {
3980	if (ExpectedAddrSize > `12`)
3981	ExpectedAddrSize = `16`;
3982
3983	// Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3984	// This provides backward compatibility for assembly created
3985	// before 160b/192b/224b types were directly supported.
3986	if (ActualAddrSize == `8` && (ExpectedAddrSize >= `5` && ExpectedAddrSize <= `7`))
3987	return true;
3988	}
3989
3990	if (ActualAddrSize == ExpectedAddrSize)
3991	return true;
3992
3993	Error(L: IDLoc, Msg: "image address size does not match dim and a16");
3994	return false;
3995	}
3996
3997	bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3998
3999	const unsigned Opc = Inst.getOpcode();
4000	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4001
4002	if ((Desc.TSFlags & MIMGFlags) == `0`)
4003	return true;
4004	if (!Desc.mayLoad() \|\| !Desc.mayStore())
4005	return true; // Not atomic
4006
4007	int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4008	unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & `0xf`;
4009
4010	// This is an incomplete check because image_atomic_cmpswap
4011	// may only use 0x3 and 0xf while other atomic operations
4012	// may use 0x1 and 0x3. However these limitations are
4013	// verified when we check that dmask matches dst size.
4014	return DMask == `0x1` \|\| DMask == `0x3` \|\| DMask == `0xf`;
4015	}
4016
4017	bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4018
4019	const unsigned Opc = Inst.getOpcode();
4020	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4021
4022	if ((Desc.TSFlags & SIInstrFlags::Gather4) == `0`)
4023	return true;
4024
4025	int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4026	unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & `0xf`;
4027
4028	// GATHER4 instructions use dmask in a different fashion compared to
4029	// other MIMG instructions. The only useful DMASK values are
4030	// 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4031	// (red,red,red,red) etc.) The ISA document doesn't mention
4032	// this.
4033	return DMask == `0x1` \|\| DMask == `0x2` \|\| DMask == `0x4` \|\| DMask == `0x8`;
4034	}
4035
4036	bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4037	const OperandVector &Operands) {
4038	if (!isGFX10Plus())
4039	return true;
4040
4041	const unsigned Opc = Inst.getOpcode();
4042	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4043
4044	if ((Desc.TSFlags & MIMGFlags) == `0`)
4045	return true;
4046
4047	// image_bvh_intersect_ray instructions do not have dim
4048	if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4049	return true;
4050
4051	for (unsigned i = `1`, e = Operands.size(); i != e; ++i) {
4052	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
4053	if (Op.isDim())
4054	return true;
4055	}
4056	return false;
4057	}
4058
4059	bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4060	const unsigned Opc = Inst.getOpcode();
4061	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4062
4063	if ((Desc.TSFlags & MIMGFlags) == `0`)
4064	return true;
4065
4066	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4067	const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4068	AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
4069
4070	if (!BaseOpcode->MSAA)
4071	return true;
4072
4073	int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dim);
4074	assert(DimIdx != -`1`);
4075
4076	unsigned Dim = Inst.getOperand(i: DimIdx).getImm();
4077	const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim);
4078
4079	return DimInfo->MSAA;
4080	}
4081
4082	static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4083	{
4084	switch (Opcode) {
4085	case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4086	case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4087	case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4088	return true;
4089	default:
4090	return false;
4091	}
4092	}
4093
4094	// movrels opcodes should only allow VGPRS as src0.*
4095	// This is specified in .td description for vop1/vop3,
4096	// but sdwa is handled differently. See isSDWAOperand.
4097	bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4098	const OperandVector &Operands) {
4099
4100	const unsigned Opc = Inst.getOpcode();
4101	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4102
4103	if ((Desc.TSFlags & SIInstrFlags::SDWA) == `0` \|\| !IsMovrelsSDWAOpcode(Opcode: Opc))
4104	return true;
4105
4106	const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4107	assert(Src0Idx != -`1`);
4108
4109	SMLoc ErrLoc;
4110	const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4111	if (Src0.isReg()) {
4112	auto Reg = mc2PseudoReg(Reg: Src0.getReg());
4113	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4114	if (!isSGPR(Reg, TRI))
4115	return true;
4116	ErrLoc = getRegLoc(Reg, Operands);
4117	} else {
4118	ErrLoc = getConstLoc(Operands);
4119	}
4120
4121	Error(L: ErrLoc, Msg: "source operand must be a VGPR");
4122	return false;
4123	}
4124
4125	bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4126	const OperandVector &Operands) {
4127
4128	const unsigned Opc = Inst.getOpcode();
4129
4130	if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4131	return true;
4132
4133	const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4134	assert(Src0Idx != -`1`);
4135
4136	const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4137	if (!Src0.isReg())
4138	return true;
4139
4140	auto Reg = mc2PseudoReg(Reg: Src0.getReg());
4141	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4142	if (!isGFX90A() && isSGPR(Reg, TRI)) {
4143	Error(L: getRegLoc(Reg, Operands),
4144	Msg: "source operand must be either a VGPR or an inline constant");
4145	return false;
4146	}
4147
4148	return true;
4149	}
4150
4151	bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4152	const OperandVector &Operands) {
4153	unsigned Opcode = Inst.getOpcode();
4154	const MCInstrDesc &Desc = MII.get(Opcode);
4155
4156	if (!(Desc.TSFlags & SIInstrFlags::IsMAI) \|\|
4157	!getFeatureBits()[FeatureMFMAInlineLiteralBug])
4158	return true;
4159
4160	const int Src2Idx = getNamedOperandIdx(Opcode, Name: OpName::src2);
4161	if (Src2Idx == -`1`)
4162	return true;
4163
4164	if (Inst.getOperand(i: Src2Idx).isImm() && isInlineConstant(Inst, OpIdx: Src2Idx)) {
4165	Error(L: getConstLoc(Operands),
4166	Msg: "inline constants are not allowed for this operand");
4167	return false;
4168	}
4169
4170	return true;
4171	}
4172
4173	bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4174	const OperandVector &Operands) {
4175	const unsigned Opc = Inst.getOpcode();
4176	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4177
4178	if ((Desc.TSFlags & SIInstrFlags::IsMAI) == `0`)
4179	return true;
4180
4181	int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
4182	if (BlgpIdx != -`1`) {
4183	if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opcode: Opc)) {
4184	int CbszIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::cbsz);
4185
4186	unsigned CBSZ = Inst.getOperand(i: CbszIdx).getImm();
4187	unsigned BLGP = Inst.getOperand(i: BlgpIdx).getImm();
4188
4189	// Validate the correct register size was used for the floating point
4190	// format operands
4191
4192	bool Success = true;
4193	if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ)) {
4194	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4195	Error(L: getRegLoc(Reg: mc2PseudoReg(Reg: Inst.getOperand(i: Src0Idx).getReg()),
4196	Operands),
4197	Msg: "wrong register tuple size for cbsz value " + Twine (CBSZ));
4198	Success = false;
4199	}
4200
4201	if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP)) {
4202	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
4203	Error(L: getRegLoc(Reg: mc2PseudoReg(Reg: Inst.getOperand(i: Src1Idx).getReg()),
4204	Operands),
4205	Msg: "wrong register tuple size for blgp value " + Twine (BLGP));
4206	Success = false;
4207	}
4208
4209	return Success;
4210	}
4211	}
4212
4213	const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2);
4214	if (Src2Idx == -`1`)
4215	return true;
4216
4217	const MCOperand &Src2 = Inst.getOperand(i: Src2Idx);
4218	if (!Src2.isReg())
4219	return true;
4220
4221	MCRegister Src2Reg = Src2.getReg();
4222	MCRegister DstReg = Inst.getOperand(i: `0`).getReg();
4223	if (Src2Reg == DstReg)
4224	return true;
4225
4226	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4227	if (TRI->getRegClass(i: Desc.operands()[`0`].RegClass).getSizeInBits() <= `128`)
4228	return true;
4229
4230	if (TRI->regsOverlap(RegA: Src2Reg, RegB: DstReg)) {
4231	Error(L: getRegLoc(Reg: mc2PseudoReg(Reg: Src2Reg), Operands),
4232	Msg: "source 2 operand must not partially overlap with dst");
4233	return false;
4234	}
4235
4236	return true;
4237	}
4238
4239	bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4240	switch (Inst.getOpcode()) {
4241	default:
4242	return true;
4243	case V_DIV_SCALE_F32_gfx6_gfx7:
4244	case V_DIV_SCALE_F32_vi:
4245	case V_DIV_SCALE_F32_gfx10:
4246	case V_DIV_SCALE_F64_gfx6_gfx7:
4247	case V_DIV_SCALE_F64_vi:
4248	case V_DIV_SCALE_F64_gfx10:
4249	break;
4250	}
4251
4252	// TODO: Check that src0 = src1 or src2.
4253
4254	for (auto Name : {AMDGPU::OpName::src0_modifiers,
4255	AMDGPU::OpName::src2_modifiers,
4256	AMDGPU::OpName::src2_modifiers}) {
4257	if (Inst.getOperand(i: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name))
4258	.getImm() &
4259	SISrcMods::ABS) {
4260	return false;
4261	}
4262	}
4263
4264	return true;
4265	}
4266
4267	bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4268
4269	const unsigned Opc = Inst.getOpcode();
4270	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4271
4272	if ((Desc.TSFlags & MIMGFlags) == `0`)
4273	return true;
4274
4275	int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::d16);
4276	if (D16Idx >= `0` && Inst.getOperand(i: D16Idx).getImm()) {
4277	if (isCI() \|\| isSI())
4278	return false;
4279	}
4280
4281	return true;
4282	}
4283
4284	bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4285	const unsigned Opc = Inst.getOpcode();
4286	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4287
4288	if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == `0`)
4289	return true;
4290
4291	int R128Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::r128);
4292
4293	return R128Idx < `0` \|\| !Inst.getOperand(i: R128Idx).getImm();
4294	}
4295
4296	static bool IsRevOpcode(const unsigned Opcode)
4297	{
4298	switch (Opcode) {
4299	case AMDGPU::V_SUBREV_F32_e32:
4300	case AMDGPU::V_SUBREV_F32_e64:
4301	case AMDGPU::V_SUBREV_F32_e32_gfx10:
4302	case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4303	case AMDGPU::V_SUBREV_F32_e32_vi:
4304	case AMDGPU::V_SUBREV_F32_e64_gfx10:
4305	case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4306	case AMDGPU::V_SUBREV_F32_e64_vi:
4307
4308	case AMDGPU::V_SUBREV_CO_U32_e32:
4309	case AMDGPU::V_SUBREV_CO_U32_e64:
4310	case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4311	case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4312
4313	case AMDGPU::V_SUBBREV_U32_e32:
4314	case AMDGPU::V_SUBBREV_U32_e64:
4315	case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4316	case AMDGPU::V_SUBBREV_U32_e32_vi:
4317	case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4318	case AMDGPU::V_SUBBREV_U32_e64_vi:
4319
4320	case AMDGPU::V_SUBREV_U32_e32:
4321	case AMDGPU::V_SUBREV_U32_e64:
4322	case AMDGPU::V_SUBREV_U32_e32_gfx9:
4323	case AMDGPU::V_SUBREV_U32_e32_vi:
4324	case AMDGPU::V_SUBREV_U32_e64_gfx9:
4325	case AMDGPU::V_SUBREV_U32_e64_vi:
4326
4327	case AMDGPU::V_SUBREV_F16_e32:
4328	case AMDGPU::V_SUBREV_F16_e64:
4329	case AMDGPU::V_SUBREV_F16_e32_gfx10:
4330	case AMDGPU::V_SUBREV_F16_e32_vi:
4331	case AMDGPU::V_SUBREV_F16_e64_gfx10:
4332	case AMDGPU::V_SUBREV_F16_e64_vi:
4333
4334	case AMDGPU::V_SUBREV_U16_e32:
4335	case AMDGPU::V_SUBREV_U16_e64:
4336	case AMDGPU::V_SUBREV_U16_e32_vi:
4337	case AMDGPU::V_SUBREV_U16_e64_vi:
4338
4339	case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4340	case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4341	case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4342
4343	case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4344	case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4345
4346	case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4347	case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4348
4349	case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4350	case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4351
4352	case AMDGPU::V_LSHRREV_B32_e32:
4353	case AMDGPU::V_LSHRREV_B32_e64:
4354	case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4355	case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4356	case AMDGPU::V_LSHRREV_B32_e32_vi:
4357	case AMDGPU::V_LSHRREV_B32_e64_vi:
4358	case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4359	case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4360
4361	case AMDGPU::V_ASHRREV_I32_e32:
4362	case AMDGPU::V_ASHRREV_I32_e64:
4363	case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4364	case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4365	case AMDGPU::V_ASHRREV_I32_e32_vi:
4366	case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4367	case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4368	case AMDGPU::V_ASHRREV_I32_e64_vi:
4369
4370	case AMDGPU::V_LSHLREV_B32_e32:
4371	case AMDGPU::V_LSHLREV_B32_e64:
4372	case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4373	case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4374	case AMDGPU::V_LSHLREV_B32_e32_vi:
4375	case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4376	case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4377	case AMDGPU::V_LSHLREV_B32_e64_vi:
4378
4379	case AMDGPU::V_LSHLREV_B16_e32:
4380	case AMDGPU::V_LSHLREV_B16_e64:
4381	case AMDGPU::V_LSHLREV_B16_e32_vi:
4382	case AMDGPU::V_LSHLREV_B16_e64_vi:
4383	case AMDGPU::V_LSHLREV_B16_gfx10:
4384
4385	case AMDGPU::V_LSHRREV_B16_e32:
4386	case AMDGPU::V_LSHRREV_B16_e64:
4387	case AMDGPU::V_LSHRREV_B16_e32_vi:
4388	case AMDGPU::V_LSHRREV_B16_e64_vi:
4389	case AMDGPU::V_LSHRREV_B16_gfx10:
4390
4391	case AMDGPU::V_ASHRREV_I16_e32:
4392	case AMDGPU::V_ASHRREV_I16_e64:
4393	case AMDGPU::V_ASHRREV_I16_e32_vi:
4394	case AMDGPU::V_ASHRREV_I16_e64_vi:
4395	case AMDGPU::V_ASHRREV_I16_gfx10:
4396
4397	case AMDGPU::V_LSHLREV_B64_e64:
4398	case AMDGPU::V_LSHLREV_B64_gfx10:
4399	case AMDGPU::V_LSHLREV_B64_vi:
4400
4401	case AMDGPU::V_LSHRREV_B64_e64:
4402	case AMDGPU::V_LSHRREV_B64_gfx10:
4403	case AMDGPU::V_LSHRREV_B64_vi:
4404
4405	case AMDGPU::V_ASHRREV_I64_e64:
4406	case AMDGPU::V_ASHRREV_I64_gfx10:
4407	case AMDGPU::V_ASHRREV_I64_vi:
4408
4409	case AMDGPU::V_PK_LSHLREV_B16:
4410	case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4411	case AMDGPU::V_PK_LSHLREV_B16_vi:
4412
4413	case AMDGPU::V_PK_LSHRREV_B16:
4414	case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4415	case AMDGPU::V_PK_LSHRREV_B16_vi:
4416	case AMDGPU::V_PK_ASHRREV_I16:
4417	case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4418	case AMDGPU::V_PK_ASHRREV_I16_vi:
4419	return true;
4420	default:
4421	return false;
4422	}
4423	}
4424
4425	std::optional<StringRef>
4426	AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4427
4428	using namespace SIInstrFlags;
4429	const unsigned Opcode = Inst.getOpcode();
4430	const MCInstrDesc &Desc = MII.get(Opcode);
4431
4432	// lds_direct register is defined so that it can be used
4433	// with 9-bit operands only. Ignore encodings which do not accept these.
4434	const auto Enc = VOP1 \| VOP2 \| VOP3 \| VOPC \| VOP3P \| SIInstrFlags::SDWA;
4435	if ((Desc.TSFlags & Enc) == `0`)
4436	return std::nullopt;
4437
4438	for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4439	auto SrcIdx = getNamedOperandIdx(Opcode, Name: SrcName);
4440	if (SrcIdx == -`1`)
4441	break;
4442	const auto &Src = Inst.getOperand(i: SrcIdx);
4443	if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4444
4445	if (isGFX90A() \|\| isGFX11Plus())
4446	return StringRef ("lds_direct is not supported on this GPU");
4447
4448	if (IsRevOpcode(Opcode) \|\| (Desc.TSFlags & SIInstrFlags::SDWA))
4449	return StringRef ("lds_direct cannot be used with this instruction");
4450
4451	if (SrcName != OpName::src0)
4452	return StringRef ("lds_direct may be used as src0 only");
4453	}
4454	}
4455
4456	return std::nullopt;
4457	}
4458
4459	SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4460	for (unsigned i = `1`, e = Operands.size(); i != e; ++i) {
4461	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
4462	if (Op.isFlatOffset())
4463	return Op.getStartLoc();
4464	}
4465	return getLoc();
4466	}
4467
4468	bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4469	const OperandVector &Operands) {
4470	auto Opcode = Inst.getOpcode();
4471	auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4472	if (OpNum == -`1`)
4473	return true;
4474
4475	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4476	if ((TSFlags & SIInstrFlags::FLAT))
4477	return validateFlatOffset(Inst, Operands);
4478
4479	if ((TSFlags & SIInstrFlags::SMRD))
4480	return validateSMEMOffset(Inst, Operands);
4481
4482	const auto &Op = Inst.getOperand(i: OpNum);
4483	if (isGFX12Plus() &&
4484	(TSFlags & (SIInstrFlags::MUBUF \| SIInstrFlags::MTBUF))) {
4485	const unsigned OffsetSize = `24`;
4486	if (!isIntN(N: OffsetSize, x: Op.getImm())) {
4487	Error(L: getFlatOffsetLoc(Operands),
4488	Msg: Twine ("expected a ") + Twine (OffsetSize) + "-bit signed offset");
4489	return false;
4490	}
4491	} else {
4492	const unsigned OffsetSize = `16`;
4493	if (!isUIntN(N: OffsetSize, x: Op.getImm())) {
4494	Error(L: getFlatOffsetLoc(Operands),
4495	Msg: Twine ("expected a ") + Twine (OffsetSize) + "-bit unsigned offset");
4496	return false;
4497	}
4498	}
4499	return true;
4500	}
4501
4502	bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4503	const OperandVector &Operands) {
4504	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4505	if ((TSFlags & SIInstrFlags::FLAT) == `0`)
4506	return true;
4507
4508	auto Opcode = Inst.getOpcode();
4509	auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4510	assert(OpNum != -`1`);
4511
4512	const auto &Op = Inst.getOperand(i: OpNum);
4513	if (!hasFlatOffsets() && Op.getImm() != `0`) {
4514	Error(L: getFlatOffsetLoc(Operands),
4515	Msg: "flat offset modifier is not supported on this GPU");
4516	return false;
4517	}
4518
4519	// For pre-GFX12 FLAT instructions the offset must be positive;
4520	// MSB is ignored and forced to zero.
4521	unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(ST: getSTI());
4522	bool AllowNegative =
4523	(TSFlags & (SIInstrFlags::FlatGlobal \| SIInstrFlags::FlatScratch)) \|\|
4524	isGFX12Plus();
4525	if (!isIntN(N: OffsetSize, x: Op.getImm()) \|\| (!AllowNegative && Op.getImm() < `0`)) {
4526	Error(L: getFlatOffsetLoc(Operands),
4527	Msg: Twine ("expected a ") +
4528	(AllowNegative ? Twine (OffsetSize) + "-bit signed offset"
4529	: Twine (OffsetSize - `1`) + "-bit unsigned offset"));
4530	return false;
4531	}
4532
4533	return true;
4534	}
4535
4536	SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4537	// Start with second operand because SMEM Offset cannot be dst or src0.
4538	for (unsigned i = `2`, e = Operands.size(); i != e; ++i) {
4539	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
4540	if (Op.isSMEMOffset() \|\| Op.isSMEMOffsetMod())
4541	return Op.getStartLoc();
4542	}
4543	return getLoc();
4544	}
4545
4546	bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4547	const OperandVector &Operands) {
4548	if (isCI() \|\| isSI())
4549	return true;
4550
4551	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4552	if ((TSFlags & SIInstrFlags::SMRD) == `0`)
4553	return true;
4554
4555	auto Opcode = Inst.getOpcode();
4556	auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4557	if (OpNum == -`1`)
4558	return true;
4559
4560	const auto &Op = Inst.getOperand(i: OpNum);
4561	if (!Op.isImm())
4562	return true;
4563
4564	uint64_t Offset = Op.getImm();
4565	bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opc: Opcode);
4566	if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(ST: getSTI(), EncodedOffset: Offset) \|\|
4567	AMDGPU::isLegalSMRDEncodedSignedOffset(ST: getSTI(), EncodedOffset: Offset, IsBuffer))
4568	return true;
4569
4570	Error(L: getSMEMOffsetLoc(Operands),
4571	Msg: isGFX12Plus() ? "expected a 24-bit signed offset"
4572	: (isVI() \|\| IsBuffer) ? "expected a 20-bit unsigned offset"
4573	: "expected a 21-bit signed offset");
4574
4575	return false;
4576	}
4577
4578	bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4579	unsigned Opcode = Inst.getOpcode();
4580	const MCInstrDesc &Desc = MII.get(Opcode);
4581	if (!(Desc.TSFlags & (SIInstrFlags::SOP2 \| SIInstrFlags::SOPC)))
4582	return true;
4583
4584	const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::src0);
4585	const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::src1);
4586
4587	const int OpIndices[] = { Src0Idx, Src1Idx };
4588
4589	unsigned NumExprs = `0`;
4590	unsigned NumLiterals = `0`;
4591	uint32_t LiteralValue;
4592
4593	for (int OpIdx : OpIndices) {
4594	if (OpIdx == -`1`) break;
4595
4596	const MCOperand &MO = Inst.getOperand(i: OpIdx);
4597	// Exclude special imm operands (like that used by s_set_gpr_idx_on)
4598	if (AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx)) {
4599	if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4600	uint32_t Value = static_cast<uint32_t>(MO.getImm());
4601	if (NumLiterals == `0` \|\| LiteralValue != Value) {
4602	LiteralValue = Value;
4603	++NumLiterals;
4604	}
4605	} else if (MO.isExpr()) {
4606	++NumExprs;
4607	}
4608	}
4609	}
4610
4611	return NumLiterals + NumExprs <= `1`;
4612	}
4613
4614	bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4615	const unsigned Opc = Inst.getOpcode();
4616	if (isPermlane16(Opc)) {
4617	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4618	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4619
4620	if (OpSel & ~`3`)
4621	return false;
4622	}
4623
4624	uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags;
4625
4626	if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4627	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4628	if (OpSelIdx != -`1`) {
4629	if (Inst.getOperand(i: OpSelIdx).getImm() != `0`)
4630	return false;
4631	}
4632	int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
4633	if (OpSelHiIdx != -`1`) {
4634	if (Inst.getOperand(i: OpSelHiIdx).getImm() != -`1`)
4635	return false;
4636	}
4637	}
4638
4639	// op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4640	if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4641	(TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4642	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4643	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4644	if (OpSel & `3`)
4645	return false;
4646	}
4647
4648	return true;
4649	}
4650
4651	bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4652	if (!hasTrue16Insts())
4653	return true;
4654	const MCRegisterInfo *MRI = getMRI();
4655	const unsigned Opc = Inst.getOpcode();
4656	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4657	if (OpSelIdx == -`1`)
4658	return true;
4659	unsigned OpSelOpValue = Inst.getOperand(i: OpSelIdx).getImm();
4660	// If the value is 0 we could have a default OpSel Operand, so conservatively
4661	// allow it.
4662	if (OpSelOpValue == `0`)
4663	return true;
4664	unsigned OpCount = `0`;
4665	for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4666	AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4667	int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: OpName);
4668	if (OpIdx == -`1`)
4669	continue;
4670	const MCOperand &Op = Inst.getOperand(i: OpIdx);
4671	if (Op.isReg() &&
4672	MRI->getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: Op.getReg())) {
4673	bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Reg: Op.getReg(), MRI: *MRI);
4674	bool OpSelOpIsHi = ((OpSelOpValue & (`1` << OpCount)) != `0`);
4675	if (OpSelOpIsHi != VGPRSuffixIsHi)
4676	return false;
4677	}
4678	++OpCount;
4679	}
4680
4681	return true;
4682	}
4683
4684	bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
4685	assert(OpName == AMDGPU::OpName::neg_lo \|\| OpName == AMDGPU::OpName::neg_hi);
4686
4687	const unsigned Opc = Inst.getOpcode();
4688	uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags;
4689
4690	// v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4691	// v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4692	// v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4693	// other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4694	if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4695	!(TSFlags & SIInstrFlags::IsSWMMAC))
4696	return true;
4697
4698	int NegIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName);
4699	if (NegIdx == -`1`)
4700	return true;
4701
4702	unsigned Neg = Inst.getOperand(i: NegIdx).getImm();
4703
4704	// Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4705	// on some src operands but not allowed on other.
4706	// It is convenient that such instructions don't have src_modifiers operand
4707	// for src operands that don't allow neg because they also don't allow opsel.
4708
4709	const AMDGPU::OpName SrcMods[`3`] = {AMDGPU::OpName::src0_modifiers,
4710	AMDGPU::OpName::src1_modifiers,
4711	AMDGPU::OpName::src2_modifiers};
4712
4713	for (unsigned i = `0`; i < `3`; ++i) {
4714	if (!AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: SrcMods[i])) {
4715	if (Neg & (`1` << i))
4716	return false;
4717	}
4718	}
4719
4720	return true;
4721	}
4722
4723	bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4724	const OperandVector &Operands) {
4725	const unsigned Opc = Inst.getOpcode();
4726	int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dpp_ctrl);
4727	if (DppCtrlIdx >= `0`) {
4728	unsigned DppCtrl = Inst.getOperand(i: DppCtrlIdx).getImm();
4729
4730	if (!AMDGPU::isLegalDPALU_DPPControl(DC: DppCtrl) &&
4731	AMDGPU::isDPALU_DPP(OpDesc: MII.get(Opcode: Opc))) {
4732	// DP ALU DPP is supported for row_newbcast only on GFX9*
4733	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyDppCtrl, Operands);
4734	Error(L: S, Msg: "DP ALU dpp only supports row_newbcast");
4735	return false;
4736	}
4737	}
4738
4739	int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dpp8);
4740	bool IsDPP = DppCtrlIdx >= `0` \|\| Dpp8Idx >= `0`;
4741
4742	if (IsDPP && !hasDPPSrc1SGPR(STI: getSTI())) {
4743	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
4744	if (Src1Idx >= `0`) {
4745	const MCOperand &Src1 = Inst.getOperand(i: Src1Idx);
4746	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4747	if (Src1.isReg() && isSGPR(Reg: mc2PseudoReg(Reg: Src1.getReg()), TRI)) {
4748	auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src1Idx).getReg());
4749	SMLoc S = getRegLoc(Reg, Operands);
4750	Error(L: S, Msg: "invalid operand for instruction");
4751	return false;
4752	}
4753	if (Src1.isImm()) {
4754	Error(L: getInstLoc(Operands),
4755	Msg: "src1 immediate operand invalid for instruction");
4756	return false;
4757	}
4758	}
4759	}
4760
4761	return true;
4762	}
4763
4764	// Check if VCC register matches wavefront size
4765	bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
4766	auto FB = getFeatureBits();
4767	return (FB [AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) \|\|
4768	(FB [AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4769	}
4770
4771	// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4772	bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4773	const OperandVector &Operands) {
4774	unsigned Opcode = Inst.getOpcode();
4775	const MCInstrDesc &Desc = MII.get(Opcode);
4776	bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, Name: OpName::imm) != -`1`;
4777	if (!(Desc.TSFlags & (SIInstrFlags::VOP3 \| SIInstrFlags::VOP3P)) &&
4778	!HasMandatoryLiteral && !isVOPD(Opc: Opcode))
4779	return true;
4780
4781	OperandIndices OpIndices = getSrcOperandIndices(Opcode, AddMandatoryLiterals: HasMandatoryLiteral);
4782
4783	unsigned NumExprs = `0`;
4784	unsigned NumLiterals = `0`;
4785	uint32_t LiteralValue;
4786
4787	for (int OpIdx : OpIndices) {
4788	if (OpIdx == -`1`)
4789	continue;
4790
4791	const MCOperand &MO = Inst.getOperand(i: OpIdx);
4792	if (!MO.isImm() && !MO.isExpr())
4793	continue;
4794	if (!isSISrcOperand(Desc, OpNo: OpIdx))
4795	continue;
4796
4797	if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4798	uint64_t Value = static_cast<uint64_t>(MO.getImm());
4799	bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpNo: OpIdx) &&
4800	AMDGPU::getOperandSize(OpInfo: Desc.operands()[OpIdx]) == `8`;
4801	bool IsValid32Op = AMDGPU::isValid32BitLiteral(Val: Value, IsFP64);
4802
4803	if (!IsValid32Op && !isInt<`32`>(x: Value) && !isUInt<`32`>(x: Value)) {
4804	Error(L: getLitLoc(Operands), Msg: "invalid operand for instruction");
4805	return false;
4806	}
4807
4808	if (IsFP64 && IsValid32Op)
4809	Value = Hi_32(Value);
4810
4811	if (NumLiterals == `0` \|\| LiteralValue != Value) {
4812	LiteralValue = Value;
4813	++NumLiterals;
4814	}
4815	} else if (MO.isExpr()) {
4816	++NumExprs;
4817	}
4818	}
4819	NumLiterals += NumExprs;
4820
4821	if (!NumLiterals)
4822	return true;
4823
4824	if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4825	Error(L: getLitLoc(Operands), Msg: "literal operands are not supported");
4826	return false;
4827	}
4828
4829	if (NumLiterals > `1`) {
4830	Error(L: getLitLoc(Operands, SearchMandatoryLiterals: true), Msg: "only one unique literal operand is allowed");
4831	return false;
4832	}
4833
4834	return true;
4835	}
4836
4837	// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4838	static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
4839	const MCRegisterInfo *MRI) {
4840	int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name);
4841	if (OpIdx < `0`)
4842	return -`1`;
4843
4844	const MCOperand &Op = Inst.getOperand(i: OpIdx);
4845	if (!Op.isReg())
4846	return -`1`;
4847
4848	MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
4849	auto Reg = Sub ? Sub : Op.getReg();
4850	const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID);
4851	return AGPR32.contains(Reg) ? `1` : `0`;
4852	}
4853
4854	bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4855	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4856	if ((TSFlags & (SIInstrFlags::FLAT \| SIInstrFlags::MUBUF \|
4857	SIInstrFlags::MTBUF \| SIInstrFlags::MIMG \|
4858	SIInstrFlags::DS)) == `0`)
4859	return true;
4860
4861	AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
4862	? AMDGPU::OpName::data0
4863	: AMDGPU::OpName::vdata;
4864
4865	const MCRegisterInfo *MRI = getMRI();
4866	int DstAreg = IsAGPROperand(Inst, Name: AMDGPU::OpName::vdst, MRI);
4867	int DataAreg = IsAGPROperand(Inst, Name: DataName, MRI);
4868
4869	if ((TSFlags & SIInstrFlags::DS) && DataAreg >= `0`) {
4870	int Data2Areg = IsAGPROperand(Inst, Name: AMDGPU::OpName::data1, MRI);
4871	if (Data2Areg >= `0` && Data2Areg != DataAreg)
4872	return false;
4873	}
4874
4875	auto FB = getFeatureBits();
4876	if (FB [AMDGPU::FeatureGFX90AInsts]) {
4877	if (DataAreg < `0` \|\| DstAreg < `0`)
4878	return true;
4879	return DstAreg == DataAreg;
4880	}
4881
4882	return DstAreg < `1` && DataAreg < `1`;
4883	}
4884
4885	bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4886	auto FB = getFeatureBits();
4887	unsigned Opc = Inst.getOpcode();
4888	// DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
4889	// unaligned VGPR. All others only allow even aligned VGPRs.
4890	if (!(FB [AMDGPU::FeatureGFX90AInsts]) \|\| Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
4891	return true;
4892
4893	const MCRegisterInfo *MRI = getMRI();
4894	const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID);
4895	const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID);
4896	for (unsigned I = `0`, E = Inst.getNumOperands(); I != E; ++I) {
4897	const MCOperand &Op = Inst.getOperand(i: I);
4898	if (!Op.isReg())
4899	continue;
4900
4901	MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
4902	if (!Sub)
4903	continue;
4904
4905	if (VGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::VGPR0) & `1`))
4906	return false;
4907	if (AGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::AGPR0) & `1`))
4908	return false;
4909	}
4910
4911	return true;
4912	}
4913
4914	SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4915	for (unsigned i = `1`, e = Operands.size(); i != e; ++i) {
4916	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
4917	if (Op.isBLGP())
4918	return Op.getStartLoc();
4919	}
4920	return SMLoc ();
4921	}
4922
4923	bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4924	const OperandVector &Operands) {
4925	unsigned Opc = Inst.getOpcode();
4926	int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
4927	if (BlgpIdx == -`1`)
4928	return true;
4929	SMLoc BLGPLoc = getBLGPLoc(Operands);
4930	if (!BLGPLoc.isValid())
4931	return true;
4932	bool IsNeg = StringRef (BLGPLoc.getPointer()).starts_with(Prefix: "neg:");
4933	auto FB = getFeatureBits();
4934	bool UsesNeg = false;
4935	if (FB [AMDGPU::FeatureGFX940Insts]) {
4936	switch (Opc) {
4937	case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4938	case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4939	case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4940	case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4941	UsesNeg = true;
4942	}
4943	}
4944
4945	if (IsNeg == UsesNeg)
4946	return true;
4947
4948	Error(L: BLGPLoc,
4949	Msg: UsesNeg ? "invalid modifier: blgp is not supported"
4950	: "invalid modifier: neg is not supported");
4951
4952	return false;
4953	}
4954
4955	bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4956	const OperandVector &Operands) {
4957	if (!isGFX11Plus())
4958	return true;
4959
4960	unsigned Opc = Inst.getOpcode();
4961	if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4962	Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4963	Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4964	Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4965	return true;
4966
4967	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::sdst);
4968	assert(Src0Idx >= `0` && Inst.getOperand(Src0Idx).isReg());
4969	auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src0Idx).getReg());
4970	if (Reg == AMDGPU::SGPR_NULL)
4971	return true;
4972
4973	SMLoc RegLoc = getRegLoc(Reg, Operands);
4974	Error(L: RegLoc, Msg: "src0 must be null");
4975	return false;
4976	}
4977
4978	bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4979	const OperandVector &Operands) {
4980	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4981	if ((TSFlags & SIInstrFlags::DS) == `0`)
4982	return true;
4983	if (TSFlags & SIInstrFlags::GWS)
4984	return validateGWS(Inst, Operands);
4985	// Only validate GDS for non-GWS instructions.
4986	if (hasGDS())
4987	return true;
4988	int GDSIdx =
4989	AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::gds);
4990	if (GDSIdx < `0`)
4991	return true;
4992	unsigned GDS = Inst.getOperand(i: GDSIdx).getImm();
4993	if (GDS) {
4994	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyGDS, Operands);
4995	Error(L: S, Msg: "gds modifier is not supported on this GPU");
4996	return false;
4997	}
4998	return true;
4999	}
5000
5001	// gfx90a has an undocumented limitation:
5002	// DS_GWS opcodes must use even aligned registers.
5003	bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5004	const OperandVector &Operands) {
5005	if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5006	return true;
5007
5008	int Opc = Inst.getOpcode();
5009	if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5010	Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5011	return true;
5012
5013	const MCRegisterInfo *MRI = getMRI();
5014	const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID);
5015	int Data0Pos =
5016	AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::data0);
5017	assert(Data0Pos != -`1`);
5018	auto Reg = Inst.getOperand(i: Data0Pos).getReg();
5019	auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5020	if (RegIdx & `1`) {
5021	SMLoc RegLoc = getRegLoc(Reg, Operands);
5022	Error(L: RegLoc, Msg: "vgpr must be even aligned");
5023	return false;
5024	}
5025
5026	return true;
5027	}
5028
5029	bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5030	const OperandVector &Operands,
5031	const SMLoc &IDLoc) {
5032	int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(),
5033	Name: AMDGPU::OpName::cpol);
5034	if (CPolPos == -`1`)
5035	return true;
5036
5037	unsigned CPol = Inst.getOperand(i: CPolPos).getImm();
5038
5039	if (isGFX12Plus())
5040	return validateTHAndScopeBits(Inst, Operands, CPol);
5041
5042	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
5043	if (TSFlags & SIInstrFlags::SMRD) {
5044	if (CPol && (isSI() \|\| isCI())) {
5045	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5046	Error(L: S, Msg: "cache policy is not supported for SMRD instructions");
5047	return false;
5048	}
5049	if (CPol & ~(AMDGPU::CPol::GLC \| AMDGPU::CPol::DLC)) {
5050	Error(L: IDLoc, Msg: "invalid cache policy for SMEM instruction");
5051	return false;
5052	}
5053	}
5054
5055	if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5056	const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF \|
5057	SIInstrFlags::MTBUF \| SIInstrFlags::MIMG \|
5058	SIInstrFlags::FLAT;
5059	if (!(TSFlags & AllowSCCModifier)) {
5060	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5061	StringRef CStr(S.getPointer());
5062	S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scc")]);
5063	Error(L: S,
5064	Msg: "scc modifier is not supported for this instruction on this GPU");
5065	return false;
5066	}
5067	}
5068
5069	if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet \| SIInstrFlags::IsAtomicRet)))
5070	return true;
5071
5072	if (TSFlags & SIInstrFlags::IsAtomicRet) {
5073	if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5074	Error(L: IDLoc, Msg: isGFX940() ? "instruction must use sc0"
5075	: "instruction must use glc");
5076	return false;
5077	}
5078	} else {
5079	if (CPol & CPol::GLC) {
5080	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5081	StringRef CStr(S.getPointer());
5082	S = SMLoc::getFromPointer(
5083	Ptr: &CStr.data()[CStr.find(Str: isGFX940() ? "sc0" : "glc")]);
5084	Error(L: S, Msg: isGFX940() ? "instruction must not use sc0"
5085	: "instruction must not use glc");
5086	return false;
5087	}
5088	}
5089
5090	return true;
5091	}
5092
5093	bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5094	const OperandVector &Operands,
5095	const unsigned CPol) {
5096	const unsigned TH = CPol & AMDGPU::CPol::TH;
5097	const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5098
5099	const unsigned Opcode = Inst.getOpcode();
5100	const MCInstrDesc &TID = MII.get(Opcode);
5101
5102	auto PrintError = [&](StringRef Msg) {
5103	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5104	Error(L: S, Msg);
5105	return false;
5106	};
5107
5108	if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5109	(TID.TSFlags & (SIInstrFlags::FLAT \| SIInstrFlags::MUBUF)) &&
5110	(!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
5111	return PrintError ("instruction must use th:TH_ATOMIC_RETURN");
5112
5113	if (TH == `0`)
5114	return true;
5115
5116	if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5117	((TH == AMDGPU::CPol::TH_NT_RT) \|\| (TH == AMDGPU::CPol::TH_RT_NT) \|\|
5118	(TH == AMDGPU::CPol::TH_NT_HT)))
5119	return PrintError ("invalid th value for SMEM instruction");
5120
5121	if (TH == AMDGPU::CPol::TH_BYPASS) {
5122	if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5123	CPol & AMDGPU::CPol::TH_REAL_BYPASS) \|\|
5124	(Scope == AMDGPU::CPol::SCOPE_SYS &&
5125	!(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
5126	return PrintError ("scope and th combination is not valid");
5127	}
5128
5129	unsigned THType = AMDGPU::getTemporalHintType(TID);
5130	if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5131	if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5132	return PrintError ("invalid th value for atomic instructions");
5133	} else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5134	if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5135	return PrintError ("invalid th value for store instructions");
5136	} else {
5137	if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5138	return PrintError ("invalid th value for load instructions");
5139	}
5140
5141	return true;
5142	}
5143
5144	bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5145	const OperandVector &Operands) {
5146	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
5147	if (Desc.mayStore() &&
5148	(Desc.TSFlags & (SIInstrFlags::MUBUF \| SIInstrFlags::MTBUF))) {
5149	SMLoc Loc = getImmLoc(Type: AMDGPUOperand::ImmTyTFE, Operands);
5150	if (Loc != getInstLoc(Operands)) {
5151	Error(L: Loc, Msg: "TFE modifier has no meaning for store instructions");
5152	return false;
5153	}
5154	}
5155
5156	return true;
5157	}
5158
5159	bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
5160	const SMLoc &IDLoc,
5161	const OperandVector &Operands) {
5162	if (auto ErrMsg = validateLdsDirect(Inst)) {
5163	Error(L: getRegLoc(Reg: LDS_DIRECT, Operands), Msg: *ErrMsg);
5164	return false;
5165	}
5166	if (!validateTrue16OpSel(Inst)) {
5167	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands),
5168	Msg: "op_sel operand conflicts with 16-bit operand suffix");
5169	return false;
5170	}
5171	if (!validateSOPLiteral(Inst)) {
5172	Error(L: getLitLoc(Operands),
5173	Msg: "only one unique literal operand is allowed");
5174	return false;
5175	}
5176	if (!validateVOPLiteral(Inst, Operands)) {
5177	return false;
5178	}
5179	if (!validateConstantBusLimitations(Inst, Operands)) {
5180	return false;
5181	}
5182	if (!validateVOPDRegBankConstraints(Inst, Operands)) {
5183	return false;
5184	}
5185	if (!validateIntClampSupported(Inst)) {
5186	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyClamp, Operands),
5187	Msg: "integer clamping is not supported on this GPU");
5188	return false;
5189	}
5190	if (!validateOpSel(Inst)) {
5191	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands),
5192	Msg: "invalid op_sel operand");
5193	return false;
5194	}
5195	if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_lo)) {
5196	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegLo, Operands),
5197	Msg: "invalid neg_lo operand");
5198	return false;
5199	}
5200	if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_hi)) {
5201	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegHi, Operands),
5202	Msg: "invalid neg_hi operand");
5203	return false;
5204	}
5205	if (!validateDPP(Inst, Operands)) {
5206	return false;
5207	}
5208	// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5209	if (!validateMIMGD16(Inst)) {
5210	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands),
5211	Msg: "d16 modifier is not supported on this GPU");
5212	return false;
5213	}
5214	if (!validateMIMGDim(Inst, Operands)) {
5215	Error(L: IDLoc, Msg: "missing dim operand");
5216	return false;
5217	}
5218	if (!validateTensorR128(Inst)) {
5219	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands),
5220	Msg: "instruction must set modifier r128=0");
5221	return false;
5222	}
5223	if (!validateMIMGMSAA(Inst)) {
5224	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDim, Operands),
5225	Msg: "invalid dim; must be MSAA type");
5226	return false;
5227	}
5228	if (!validateMIMGDataSize(Inst, IDLoc)) {
5229	return false;
5230	}
5231	if (!validateMIMGAddrSize(Inst, IDLoc))
5232	return false;
5233	if (!validateMIMGAtomicDMask(Inst)) {
5234	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands),
5235	Msg: "invalid atomic image dmask");
5236	return false;
5237	}
5238	if (!validateMIMGGatherDMask(Inst)) {
5239	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands),
5240	Msg: "invalid image_gather dmask: only one bit must be set");
5241	return false;
5242	}
5243	if (!validateMovrels(Inst, Operands)) {
5244	return false;
5245	}
5246	if (!validateOffset(Inst, Operands)) {
5247	return false;
5248	}
5249	if (!validateMAIAccWrite(Inst, Operands)) {
5250	return false;
5251	}
5252	if (!validateMAISrc2(Inst, Operands)) {
5253	return false;
5254	}
5255	if (!validateMFMA(Inst, Operands)) {
5256	return false;
5257	}
5258	if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5259	return false;
5260	}
5261
5262	if (!validateAGPRLdSt(Inst)) {
5263	Error(L: IDLoc, Msg: getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5264	? "invalid register class: data and dst should be all VGPR or AGPR"
5265	: "invalid register class: agpr loads and stores not supported on this GPU"
5266	);
5267	return false;
5268	}
5269	if (!validateVGPRAlign(Inst)) {
5270	Error(L: IDLoc,
5271	Msg: "invalid register class: vgpr tuples must be 64 bit aligned");
5272	return false;
5273	}
5274	if (!validateDS(Inst, Operands)) {
5275	return false;
5276	}
5277
5278	if (!validateBLGP(Inst, Operands)) {
5279	return false;
5280	}
5281
5282	if (!validateDivScale(Inst)) {
5283	Error(L: IDLoc, Msg: "ABS not allowed in VOP3B instructions");
5284	return false;
5285	}
5286	if (!validateWaitCnt(Inst, Operands)) {
5287	return false;
5288	}
5289	if (!validateTFE(Inst, Operands)) {
5290	return false;
5291	}
5292
5293	return true;
5294	}
5295
5296	static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5297	const FeatureBitset &FBS,
5298	unsigned VariantID = `0`);
5299
5300	static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5301	const FeatureBitset &AvailableFeatures,
5302	unsigned VariantID);
5303
5304	bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5305	const FeatureBitset &FBS) {
5306	return isSupportedMnemo(Mnemo, FBS, Variants: getAllVariants());
5307	}
5308
5309	bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5310	const FeatureBitset &FBS,
5311	ArrayRef<unsigned> Variants) {
5312	for (auto Variant : Variants) {
5313	if (AMDGPUCheckMnemonic(Mnemonic: Mnemo, AvailableFeatures: FBS, VariantID: Variant))
5314	return true;
5315	}
5316
5317	return false;
5318	}
5319
5320	bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5321	const SMLoc &IDLoc) {
5322	FeatureBitset FBS = ComputeAvailableFeatures(FB: getFeatureBits());
5323
5324	// Check if requested instruction variant is supported.
5325	if (isSupportedMnemo(Mnemo, FBS, Variants: getMatchedVariants()))
5326	return false;
5327
5328	// This instruction is not supported.
5329	// Clear any other pending errors because they are no longer relevant.
5330	getParser().clearPendingErrors();
5331
5332	// Requested instruction variant is not supported.
5333	// Check if any other variants are supported.
5334	StringRef VariantName = getMatchedVariantName();
5335	if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5336	return Error(L: IDLoc,
5337	Msg: Twine (VariantName,
5338	" variant of this instruction is not supported"));
5339	}
5340
5341	// Check if this instruction may be used with a different wavesize.
5342	if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5343	!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5344
5345	FeatureBitset FeaturesWS32 = getFeatureBits();
5346	FeaturesWS32.flip(I: AMDGPU::FeatureWavefrontSize64)
5347	.flip(I: AMDGPU::FeatureWavefrontSize32);
5348	FeatureBitset AvailableFeaturesWS32 =
5349	ComputeAvailableFeatures(FB: FeaturesWS32);
5350
5351	if (isSupportedMnemo(Mnemo, FBS: AvailableFeaturesWS32, Variants: getMatchedVariants()))
5352	return Error(L: IDLoc, Msg: "instruction requires wavesize=32");
5353	}
5354
5355	// Finally check if this instruction is supported on any other GPU.
5356	if (isSupportedMnemo(Mnemo, FBS: FeatureBitset ().set())) {
5357	return Error(L: IDLoc, Msg: "instruction not supported on this GPU");
5358	}
5359
5360	// Instruction not supported on any GPU. Probably a typo.
5361	std::string Suggestion = AMDGPUMnemonicSpellCheck(S: Mnemo, FBS);
5362	return Error(L: IDLoc, Msg: "invalid instruction" + Suggestion);
5363	}
5364
5365	static bool isInvalidVOPDY(const OperandVector &Operands,
5366	uint64_t InvalidOprIdx) {
5367	assert(InvalidOprIdx < Operands.size());
5368	const auto &Op = ((AMDGPUOperand &)*Operands [InvalidOprIdx]);
5369	if (Op.isToken() && InvalidOprIdx > `1`) {
5370	const auto &PrevOp = ((AMDGPUOperand &)*Operands [InvalidOprIdx - `1`]);
5371	return PrevOp.isToken() && PrevOp.getToken() == "::";
5372	}
5373	return false;
5374	}
5375
5376	bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5377	OperandVector &Operands,
5378	MCStreamer &Out,
5379	uint64_t &ErrorInfo,
5380	bool MatchingInlineAsm) {
5381	MCInst Inst;
5382	unsigned Result = Match_Success;
5383	for (auto Variant : getMatchedVariants()) {
5384	uint64_t EI;
5385	auto R = MatchInstructionImpl(Operands, Inst, ErrorInfo&: EI, matchingInlineAsm: MatchingInlineAsm,
5386	VariantID: Variant);
5387	// We order match statuses from least to most specific. We use most specific
5388	// status as resulting
5389	// Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5390	if (R == Match_Success \|\| R == Match_MissingFeature \|\|
5391	(R == Match_InvalidOperand && Result != Match_MissingFeature) \|\|
5392	(R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5393	Result != Match_MissingFeature)) {
5394	Result = R;
5395	ErrorInfo = EI;
5396	}
5397	if (R == Match_Success)
5398	break;
5399	}
5400
5401	if (Result == Match_Success) {
5402	if (!validateInstruction(Inst, IDLoc, Operands)) {
5403	return true;
5404	}
5405	Inst.setLoc(IDLoc);
5406	Out.emitInstruction(Inst, STI: getSTI());
5407	return false;
5408	}
5409
5410	StringRef Mnemo = ((AMDGPUOperand &)*Operands [`0`]).getToken();
5411	if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5412	return true;
5413	}
5414
5415	switch (Result) {
5416	default: break;
5417	case Match_MissingFeature:
5418	// It has been verified that the specified instruction
5419	// mnemonic is valid. A match was found but it requires
5420	// features which are not supported on this GPU.
5421	return Error(L: IDLoc, Msg: "operands are not valid for this GPU or mode");
5422
5423	case Match_InvalidOperand: {
5424	SMLoc ErrorLoc = IDLoc;
5425	if (ErrorInfo != ~`0ULL`) {
5426	if (ErrorInfo >= Operands.size()) {
5427	return Error(L: IDLoc, Msg: "too few operands for instruction");
5428	}
5429	ErrorLoc = ((AMDGPUOperand &)*Operands [ErrorInfo]).getStartLoc();
5430	if (ErrorLoc == SMLoc ())
5431	ErrorLoc = IDLoc;
5432
5433	if (isInvalidVOPDY(Operands, InvalidOprIdx: ErrorInfo))
5434	return Error(L: ErrorLoc, Msg: "invalid VOPDY instruction");
5435	}
5436	return Error(L: ErrorLoc, Msg: "invalid operand for instruction");
5437	}
5438
5439	case Match_MnemonicFail:
5440	llvm_unreachable("Invalid instructions should have been handled already");
5441	}
5442	llvm_unreachable("Implement any new match types added!");
5443	}
5444
5445	bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5446	int64_t Tmp = -`1`;
5447	if (!isToken(Kind: AsmToken::Integer) && !isToken(Kind: AsmToken::Identifier)) {
5448	return true;
5449	}
5450	if (getParser().parseAbsoluteExpression(Res&: Tmp)) {
5451	return true;
5452	}
5453	Ret = static_cast<uint32_t>(Tmp);
5454	return false;
5455	}
5456
5457	bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5458	if (!getSTI().getTargetTriple().isAMDGCN())
5459	return TokError(Msg: "directive only supported for amdgcn architecture");
5460
5461	std::string TargetIDDirective;
5462	SMLoc TargetStart = getTok().getLoc();
5463	if (getParser().parseEscapedString(Data&: TargetIDDirective))
5464	return true;
5465
5466	SMRange TargetRange = SMRange (TargetStart, getTok().getLoc());
5467	if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5468	return getParser().Error(L: TargetRange.Start,
5469	Msg: (Twine (".amdgcn_target directive's target id ") +
5470	Twine (TargetIDDirective) +
5471	Twine (" does not match the specified target id ") +
5472	Twine (getTargetStreamer().getTargetID()->toString())).str());
5473
5474	return false;
5475	}
5476
5477	bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5478	return Error(L: Range.Start, Msg: "value out of range", Range);
5479	}
5480
5481	bool AMDGPUAsmParser::calculateGPRBlocks(
5482	const FeatureBitset &Features, const MCExpr *VCCUsed,
5483	const MCExpr FlatScrUsed, bool* XNACKUsed,
5484	std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5485	SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5486	const MCExpr &VGPRBlocks, const* MCExpr *&SGPRBlocks) {
5487	// TODO(scott.linder): These calculations are duplicated from
5488	// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5489	IsaVersion Version = getIsaVersion(GPU: getSTI().getCPU());
5490	MCContext &Ctx = getContext();
5491
5492	const MCExpr *NumSGPRs = NextFreeSGPR;
5493	int64_t EvaluatedSGPRs;
5494
5495	if (Version.Major >= `10`)
5496	NumSGPRs = MCConstantExpr::create(Value: `0`, Ctx);
5497	else {
5498	unsigned MaxAddressableNumSGPRs =
5499	IsaInfo::getAddressableNumSGPRs(STI: &getSTI());
5500
5501	if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) && Version.Major >= `8` &&
5502	!Features.test(I: FeatureSGPRInitBug) &&
5503	static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5504	return OutOfRangeError(Range: SGPRRange);
5505
5506	const MCExpr *ExtraSGPRs =
5507	AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5508	NumSGPRs = MCBinaryExpr::createAdd(LHS: NumSGPRs, RHS: ExtraSGPRs, Ctx);
5509
5510	if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) &&
5511	(Version.Major <= `7` \|\| Features.test(I: FeatureSGPRInitBug)) &&
5512	static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5513	return OutOfRangeError(Range: SGPRRange);
5514
5515	if (Features.test(I: FeatureSGPRInitBug))
5516	NumSGPRs =
5517	MCConstantExpr::create(Value: IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);
5518	}
5519
5520	// The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5521	// (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5522	auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5523	unsigned Granule) -> const MCExpr * {
5524	const MCExpr *OneConst = MCConstantExpr::create(Value: `1ul`, Ctx);
5525	const MCExpr *GranuleConst = MCConstantExpr::create(Value: Granule, Ctx);
5526	const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax(Args: {NumGPR, OneConst}, Ctx);
5527	const MCExpr *AlignToGPR =
5528	AMDGPUMCExpr::createAlignTo(Value: MaxNumGPR, Align: GranuleConst, Ctx);
5529	const MCExpr *DivGPR =
5530	MCBinaryExpr::createDiv(LHS: AlignToGPR, RHS: GranuleConst, Ctx);
5531	const MCExpr *SubGPR = MCBinaryExpr::createSub(LHS: DivGPR, RHS: OneConst, Ctx);
5532	return SubGPR;
5533	};
5534
5535	VGPRBlocks = GetNumGPRBlocks (
5536	NextFreeVGPR,
5537	IsaInfo::getVGPREncodingGranule(STI: &getSTI(), EnableWavefrontSize32));
5538	SGPRBlocks =
5539	GetNumGPRBlocks (NumSGPRs, IsaInfo::getSGPREncodingGranule(STI: &getSTI()));
5540
5541	return false;
5542	}
5543
5544	bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5545	if (!getSTI().getTargetTriple().isAMDGCN())
5546	return TokError(Msg: "directive only supported for amdgcn architecture");
5547
5548	if (!isHsaAbi(STI: getSTI()))
5549	return TokError(Msg: "directive only supported for amdhsa OS");
5550
5551	StringRef KernelName;
5552	if (getParser().parseIdentifier(Res&: KernelName))
5553	return true;
5554
5555	AMDGPU::MCKernelDescriptor KD =
5556	AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
5557	STI: &getSTI(), Ctx&: getContext());
5558
5559	StringSet<> Seen;
5560
5561	IsaVersion IVersion = getIsaVersion(GPU: getSTI().getCPU());
5562
5563	const MCExpr *ZeroExpr = MCConstantExpr::create(Value: `0`, Ctx&: getContext());
5564	const MCExpr *OneExpr = MCConstantExpr::create(Value: `1`, Ctx&: getContext());
5565
5566	SMRange VGPRRange;
5567	const MCExpr *NextFreeVGPR = ZeroExpr;
5568	const MCExpr *AccumOffset = MCConstantExpr::create(Value: `0`, Ctx&: getContext());
5569	uint64_t SharedVGPRCount = `0`;
5570	uint64_t PreloadLength = `0`;
5571	uint64_t PreloadOffset = `0`;
5572	SMRange SGPRRange;
5573	const MCExpr *NextFreeSGPR = ZeroExpr;
5574
5575	// Count the number of user SGPRs implied from the enabled feature bits.
5576	unsigned ImpliedUserSGPRCount = `0`;
5577
5578	// Track if the asm explicitly contains the directive for the user SGPR
5579	// count.
5580	std::optional<unsigned> ExplicitUserSGPRCount;
5581	const MCExpr *ReserveVCC = OneExpr;
5582	const MCExpr *ReserveFlatScr = OneExpr;
5583	std::optional<bool> EnableWavefrontSize32;
5584
5585	while (true) {
5586	while (trySkipToken(Kind: AsmToken::EndOfStatement));
5587
5588	StringRef ID;
5589	SMRange IDRange = getTok().getLocRange();
5590	if (!parseId(Val&: ID, ErrMsg: "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5591	return true;
5592
5593	if (ID == ".end_amdhsa_kernel")
5594	break;
5595
5596	if (!Seen.insert(key: ID).second)
5597	return TokError(Msg: ".amdhsa_ directives cannot be repeated");
5598
5599	SMLoc ValStart = getLoc();
5600	const MCExpr *ExprVal;
5601	if (getParser().parseExpression(Res&: ExprVal))
5602	return true;
5603	SMLoc ValEnd = getLoc();
5604	SMRange ValRange = SMRange (ValStart, ValEnd);
5605
5606	int64_t IVal = `0`;
5607	uint64_t Val = IVal;
5608	bool EvaluatableExpr;
5609	if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(Res&: IVal))) {
5610	if (IVal < `0`)
5611	return OutOfRangeError(Range: ValRange);
5612	Val = IVal;
5613	}
5614
5615	#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5616	if (!isUInt<ENTRY##_WIDTH>(Val)) \
5617	return OutOfRangeError(RANGE); \
5618	AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
5619	getContext());
5620
5621	// Some fields use the parsed value immediately which requires the expression to
5622	// be solvable.
5623	#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
5624	if (!(RESOLVED)) \
5625	return Error(IDRange.Start, "directive should have resolvable expression", \
5626	IDRange);
5627
5628	if (ID == ".amdhsa_group_segment_fixed_size") {
5629	if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
5630	CHAR_BIT>(x: Val))
5631	return OutOfRangeError(Range: ValRange);
5632	KD.group_segment_fixed_size = ExprVal;
5633	} else if (ID == ".amdhsa_private_segment_fixed_size") {
5634	if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
5635	CHAR_BIT>(x: Val))
5636	return OutOfRangeError(Range: ValRange);
5637	KD.private_segment_fixed_size = ExprVal;
5638	} else if (ID == ".amdhsa_kernarg_size") {
5639	if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(x: Val))
5640	return OutOfRangeError(Range: ValRange);
5641	KD.kernarg_size = ExprVal;
5642	} else if (ID == ".amdhsa_user_sgpr_count") {
5643	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5644	ExplicitUserSGPRCount = Val;
5645	} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5646	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5647	if (hasArchitectedFlatScratch())
5648	return Error(L: IDRange.Start,
5649	Msg: "directive is not supported with architected flat scratch",
5650	Range: IDRange);
5651	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5652	KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5653	ExprVal, ValRange);
5654	if (Val)
5655	ImpliedUserSGPRCount += `4`;
5656	} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5657	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5658	if (!hasKernargPreload())
5659	return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
5660
5661	if (Val > getMaxNumUserSGPRs())
5662	return OutOfRangeError(Range: ValRange);
5663	PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
5664	ValRange);
5665	if (Val) {
5666	ImpliedUserSGPRCount += Val;
5667	PreloadLength = Val;
5668	}
5669	} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5670	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5671	if (!hasKernargPreload())
5672	return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
5673
5674	if (Val >= `1024`)
5675	return OutOfRangeError(Range: ValRange);
5676	PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
5677	ValRange);
5678	if (Val)
5679	PreloadOffset = Val;
5680	} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5681	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5682	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5683	KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
5684	ValRange);
5685	if (Val)
5686	ImpliedUserSGPRCount += `2`;
5687	} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5688	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5689	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5690	KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
5691	ValRange);
5692	if (Val)
5693	ImpliedUserSGPRCount += `2`;
5694	} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5695	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5696	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5697	KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5698	ExprVal, ValRange);
5699	if (Val)
5700	ImpliedUserSGPRCount += `2`;
5701	} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5702	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5703	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5704	KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
5705	ValRange);
5706	if (Val)
5707	ImpliedUserSGPRCount += `2`;
5708	} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5709	if (hasArchitectedFlatScratch())
5710	return Error(L: IDRange.Start,
5711	Msg: "directive is not supported with architected flat scratch",
5712	Range: IDRange);
5713	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5714	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5715	KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
5716	ExprVal, ValRange);
5717	if (Val)
5718	ImpliedUserSGPRCount += `2`;
5719	} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5720	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5721	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5722	KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5723	ExprVal, ValRange);
5724	if (Val)
5725	ImpliedUserSGPRCount += `1`;
5726	} else if (ID == ".amdhsa_wavefront_size32") {
5727	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5728	if (IVersion.Major < `10`)
5729	return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
5730	EnableWavefrontSize32 = Val;
5731	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5732	KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
5733	ValRange);
5734	} else if (ID == ".amdhsa_uses_dynamic_stack") {
5735	PARSE_BITS_ENTRY(KD.kernel_code_properties,
5736	KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
5737	ValRange);
5738	} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5739	if (hasArchitectedFlatScratch())
5740	return Error(L: IDRange.Start,
5741	Msg: "directive is not supported with architected flat scratch",
5742	Range: IDRange);
5743	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5744	COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5745	ValRange);
5746	} else if (ID == ".amdhsa_enable_private_segment") {
5747	if (!hasArchitectedFlatScratch())
5748	return Error(
5749	L: IDRange.Start,
5750	Msg: "directive is not supported without architected flat scratch",
5751	Range: IDRange);
5752	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5753	COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
5754	ValRange);
5755	} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5756	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5757	COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
5758	ValRange);
5759	} else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5760	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5761	COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
5762	ValRange);
5763	} else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5764	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5765	COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
5766	ValRange);
5767	} else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5768	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5769	COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
5770	ValRange);
5771	} else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5772	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5773	COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
5774	ValRange);
5775	} else if (ID == ".amdhsa_next_free_vgpr") {
5776	VGPRRange = ValRange;
5777	NextFreeVGPR = ExprVal;
5778	} else if (ID == ".amdhsa_next_free_sgpr") {
5779	SGPRRange = ValRange;
5780	NextFreeSGPR = ExprVal;
5781	} else if (ID == ".amdhsa_accum_offset") {
5782	if (!isGFX90A())
5783	return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
5784	AccumOffset = ExprVal;
5785	} else if (ID == ".amdhsa_reserve_vcc") {
5786	if (EvaluatableExpr && !isUInt<`1`>(x: Val))
5787	return OutOfRangeError(Range: ValRange);
5788	ReserveVCC = ExprVal;
5789	} else if (ID == ".amdhsa_reserve_flat_scratch") {
5790	if (IVersion.Major < `7`)
5791	return Error(L: IDRange.Start, Msg: "directive requires gfx7+", Range: IDRange);
5792	if (hasArchitectedFlatScratch())
5793	return Error(L: IDRange.Start,
5794	Msg: "directive is not supported with architected flat scratch",
5795	Range: IDRange);
5796	if (EvaluatableExpr && !isUInt<`1`>(x: Val))
5797	return OutOfRangeError(Range: ValRange);
5798	ReserveFlatScr = ExprVal;
5799	} else if (ID == ".amdhsa_reserve_xnack_mask") {
5800	if (IVersion.Major < `8`)
5801	return Error(L: IDRange.Start, Msg: "directive requires gfx8+", Range: IDRange);
5802	if (!isUInt<`1`>(x: Val))
5803	return OutOfRangeError(Range: ValRange);
5804	if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5805	return getParser().Error(L: IDRange.Start, Msg: ".amdhsa_reserve_xnack_mask does not match target id",
5806	Range: IDRange);
5807	} else if (ID == ".amdhsa_float_round_mode_32") {
5808	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5809	COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
5810	ValRange);
5811	} else if (ID == ".amdhsa_float_round_mode_16_64") {
5812	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5813	COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
5814	ValRange);
5815	} else if (ID == ".amdhsa_float_denorm_mode_32") {
5816	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5817	COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
5818	ValRange);
5819	} else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5820	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5821	COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
5822	ValRange);
5823	} else if (ID == ".amdhsa_dx10_clamp") {
5824	if (IVersion.Major >= `12`)
5825	return Error(L: IDRange.Start, Msg: "directive unsupported on gfx12+", Range: IDRange);
5826	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5827	COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
5828	ValRange);
5829	} else if (ID == ".amdhsa_ieee_mode") {
5830	if (IVersion.Major >= `12`)
5831	return Error(L: IDRange.Start, Msg: "directive unsupported on gfx12+", Range: IDRange);
5832	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5833	COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
5834	ValRange);
5835	} else if (ID == ".amdhsa_fp16_overflow") {
5836	if (IVersion.Major < `9`)
5837	return Error(L: IDRange.Start, Msg: "directive requires gfx9+", Range: IDRange);
5838	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5839	COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
5840	ValRange);
5841	} else if (ID == ".amdhsa_tg_split") {
5842	if (!isGFX90A())
5843	return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
5844	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
5845	ExprVal, ValRange);
5846	} else if (ID == ".amdhsa_workgroup_processor_mode") {
5847	if (IVersion.Major < `10`)
5848	return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
5849	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5850	COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
5851	ValRange);
5852	} else if (ID == ".amdhsa_memory_ordered") {
5853	if (IVersion.Major < `10`)
5854	return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
5855	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5856	COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
5857	ValRange);
5858	} else if (ID == ".amdhsa_forward_progress") {
5859	if (IVersion.Major < `10`)
5860	return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
5861	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5862	COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
5863	ValRange);
5864	} else if (ID == ".amdhsa_shared_vgpr_count") {
5865	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
5866	if (IVersion.Major < `10` \|\| IVersion.Major >= `12`)
5867	return Error(L: IDRange.Start, Msg: "directive requires gfx10 or gfx11",
5868	Range: IDRange);
5869	SharedVGPRCount = Val;
5870	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5871	COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
5872	ValRange);
5873	} else if (ID == ".amdhsa_inst_pref_size") {
5874	if (IVersion.Major < `11`)
5875	return Error(L: IDRange.Start, Msg: "directive requires gfx11+", Range: IDRange);
5876	if (IVersion.Major == `11`) {
5877	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5878	COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
5879	ValRange);
5880	} else {
5881	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5882	COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
5883	ValRange);
5884	}
5885	} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5886	PARSE_BITS_ENTRY(
5887	KD.compute_pgm_rsrc2,
5888	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
5889	ExprVal, ValRange);
5890	} else if (ID == ".amdhsa_exception_fp_denorm_src") {
5891	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5892	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5893	ExprVal, ValRange);
5894	} else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5895	PARSE_BITS_ENTRY(
5896	KD.compute_pgm_rsrc2,
5897	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
5898	ExprVal, ValRange);
5899	} else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5900	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5901	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5902	ExprVal, ValRange);
5903	} else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5904	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5905	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5906	ExprVal, ValRange);
5907	} else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5908	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5909	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5910	ExprVal, ValRange);
5911	} else if (ID == ".amdhsa_exception_int_div_zero") {
5912	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5913	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5914	ExprVal, ValRange);
5915	} else if (ID == ".amdhsa_round_robin_scheduling") {
5916	if (IVersion.Major < `12`)
5917	return Error(L: IDRange.Start, Msg: "directive requires gfx12+", Range: IDRange);
5918	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5919	COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
5920	ValRange);
5921	} else {
5922	return Error(L: IDRange.Start, Msg: "unknown .amdhsa_kernel directive", Range: IDRange);
5923	}
5924
5925	#undef PARSE_BITS_ENTRY
5926	}
5927
5928	if (!Seen.contains(key: ".amdhsa_next_free_vgpr"))
5929	return TokError(Msg: ".amdhsa_next_free_vgpr directive is required");
5930
5931	if (!Seen.contains(key: ".amdhsa_next_free_sgpr"))
5932	return TokError(Msg: ".amdhsa_next_free_sgpr directive is required");
5933
5934	unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(u&: ImpliedUserSGPRCount);
5935
5936	// Consider the case where the total number of UserSGPRs with trailing
5937	// allocated preload SGPRs, is greater than the number of explicitly
5938	// referenced SGPRs.
5939	if (PreloadLength) {
5940	MCContext &Ctx = getContext();
5941	NextFreeSGPR = AMDGPUMCExpr::createMax(
5942	Args: {NextFreeSGPR, MCConstantExpr::create(Value: UserSGPRCount, Ctx)}, Ctx);
5943	}
5944
5945	const MCExpr *VGPRBlocks;
5946	const MCExpr *SGPRBlocks;
5947	if (calculateGPRBlocks(Features: getFeatureBits(), VCCUsed: ReserveVCC, FlatScrUsed: ReserveFlatScr,
5948	XNACKUsed: getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5949	EnableWavefrontSize32, NextFreeVGPR,
5950	VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5951	SGPRBlocks))
5952	return true;
5953
5954	int64_t EvaluatedVGPRBlocks;
5955	bool VGPRBlocksEvaluatable =
5956	VGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedVGPRBlocks);
5957	if (VGPRBlocksEvaluatable &&
5958	!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5959	x: static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
5960	return OutOfRangeError(Range: VGPRRange);
5961	}
5962	AMDGPU::MCKernelDescriptor::bits_set(
5963	Dst&: KD.compute_pgm_rsrc1, Value: VGPRBlocks,
5964	Shift: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
5965	Mask: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, Ctx&: getContext());
5966
5967	int64_t EvaluatedSGPRBlocks;
5968	if (SGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedSGPRBlocks) &&
5969	!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5970	x: static_cast<uint64_t>(EvaluatedSGPRBlocks)))
5971	return OutOfRangeError(Range: SGPRRange);
5972	AMDGPU::MCKernelDescriptor::bits_set(
5973	Dst&: KD.compute_pgm_rsrc1, Value: SGPRBlocks,
5974	Shift: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
5975	Mask: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, Ctx&: getContext());
5976
5977	if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5978	return TokError(Msg: "amdgpu_user_sgpr_count smaller than than implied by "
5979	"enabled user SGPRs");
5980
5981	if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(x: UserSGPRCount))
5982	return TokError(Msg: "too many user SGPRs enabled");
5983	AMDGPU::MCKernelDescriptor::bits_set(
5984	Dst&: KD.compute_pgm_rsrc2, Value: MCConstantExpr::create(Value: UserSGPRCount, Ctx&: getContext()),
5985	Shift: COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
5986	Mask: COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, Ctx&: getContext());
5987
5988	int64_t IVal = `0`;
5989	if (!KD.kernarg_size->evaluateAsAbsolute(Res&: IVal))
5990	return TokError(Msg: "Kernarg size should be resolvable");
5991	uint64_t kernarg_size = IVal;
5992	if (PreloadLength && kernarg_size &&
5993	(PreloadLength * `4` + PreloadOffset * `4` > kernarg_size))
5994	return TokError(Msg: "Kernarg preload length + offset is larger than the "
5995	"kernarg segment size");
5996
5997	if (isGFX90A()) {
5998	if (!Seen.contains(key: ".amdhsa_accum_offset"))
5999	return TokError(Msg: ".amdhsa_accum_offset directive is required");
6000	int64_t EvaluatedAccum;
6001	bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(Res&: EvaluatedAccum);
6002	uint64_t UEvaluatedAccum = EvaluatedAccum;
6003	if (AccumEvaluatable &&
6004	(UEvaluatedAccum < `4` \|\| UEvaluatedAccum > `256` \|\| (UEvaluatedAccum & `3`)))
6005	return TokError(Msg: "accum_offset should be in range [4..256] in "
6006	"increments of 4");
6007
6008	int64_t EvaluatedNumVGPR;
6009	if (NextFreeVGPR->evaluateAsAbsolute(Res&: EvaluatedNumVGPR) &&
6010	AccumEvaluatable &&
6011	UEvaluatedAccum >
6012	alignTo(Value: std::max(a: (uint64_t)`1`, b: (uint64_t)EvaluatedNumVGPR), Align: `4`))
6013	return TokError(Msg: "accum_offset exceeds total VGPR allocation");
6014	const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6015	LHS: MCBinaryExpr::createDiv(
6016	LHS: AccumOffset, RHS: MCConstantExpr::create(Value: `4`, Ctx&: getContext()), Ctx&: getContext()),
6017	RHS: MCConstantExpr::create(Value: `1`, Ctx&: getContext()), Ctx&: getContext());
6018	MCKernelDescriptor::bits_set(Dst&: KD.compute_pgm_rsrc3, Value: AdjustedAccum,
6019	Shift: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6020	Mask: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6021	Ctx&: getContext());
6022	}
6023
6024	if (IVersion.Major >= `10` && IVersion.Major < `12`) {
6025	// SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6026	if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6027	return TokError(Msg: "shared_vgpr_count directive not valid on "
6028	"wavefront size 32");
6029	}
6030
6031	if (VGPRBlocksEvaluatable &&
6032	(SharedVGPRCount * `2` + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6033	`63`)) {
6034	return TokError(Msg: "shared_vgpr_count*2 + "
6035	"compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6036	"exceed 63\n");
6037	}
6038	}
6039
6040	getTargetStreamer().EmitAmdhsaKernelDescriptor(STI: getSTI(), KernelName, KernelDescriptor: KD,
6041	NextVGPR: NextFreeVGPR, NextSGPR: NextFreeSGPR,
6042	ReserveVCC, ReserveFlatScr);
6043	return false;
6044	}
6045
6046	bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6047	uint32_t Version;
6048	if (ParseAsAbsoluteExpression(Ret&: Version))
6049	return true;
6050
6051	getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(COV: Version);
6052	return false;
6053	}
6054
6055	bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6056	AMDGPUMCKernelCodeT &C) {
6057	// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6058	// assembly for backwards compatibility.
6059	if (ID == "max_scratch_backing_memory_byte_size") {
6060	Parser.eatToEndOfStatement();
6061	return false;
6062	}
6063
6064	SmallString<`40`> ErrStr;
6065	raw_svector_ostream Err(ErrStr);
6066	if (!C.ParseKernelCodeT(ID, MCParser&: getParser(), Err)) {
6067	return TokError(Msg: Err.str());
6068	}
6069	Lex();
6070
6071	if (ID == "enable_wavefront_size32") {
6072	if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6073	if (!isGFX10Plus())
6074	return TokError(Msg: "enable_wavefront_size32=1 is only allowed on GFX10+");
6075	if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6076	return TokError(Msg: "enable_wavefront_size32=1 requires +WavefrontSize32");
6077	} else {
6078	if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6079	return TokError(Msg: "enable_wavefront_size32=0 requires +WavefrontSize64");
6080	}
6081	}
6082
6083	if (ID == "wavefront_size") {
6084	if (C.wavefront_size == `5`) {
6085	if (!isGFX10Plus())
6086	return TokError(Msg: "wavefront_size=5 is only allowed on GFX10+");
6087	if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
6088	return TokError(Msg: "wavefront_size=5 requires +WavefrontSize32");
6089	} else if (C.wavefront_size == `6`) {
6090	if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
6091	return TokError(Msg: "wavefront_size=6 requires +WavefrontSize64");
6092	}
6093	}
6094
6095	return false;
6096	}
6097
6098	bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6099	AMDGPUMCKernelCodeT KernelCode;
6100	KernelCode.initDefault(STI: &getSTI(), Ctx&: getContext());
6101
6102	while (true) {
6103	// Lex EndOfStatement. This is in a while loop, because lexing a comment
6104	// will set the current token to EndOfStatement.
6105	while(trySkipToken(Kind: AsmToken::EndOfStatement));
6106
6107	StringRef ID;
6108	if (!parseId(Val&: ID, ErrMsg: "expected value identifier or .end_amd_kernel_code_t"))
6109	return true;
6110
6111	if (ID == ".end_amd_kernel_code_t")
6112	break;
6113
6114	if (ParseAMDKernelCodeTValue(ID, C&: KernelCode))
6115	return true;
6116	}
6117
6118	KernelCode.validate(STI: &getSTI(), Ctx&: getContext());
6119	getTargetStreamer().EmitAMDKernelCodeT(Header&: KernelCode);
6120
6121	return false;
6122	}
6123
6124	bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6125	StringRef KernelName;
6126	if (!parseId(Val&: KernelName, ErrMsg: "expected symbol name"))
6127	return true;
6128
6129	getTargetStreamer().EmitAMDGPUSymbolType(SymbolName: KernelName,
6130	Type: ELF::STT_AMDGPU_HSA_KERNEL);
6131
6132	KernelScope.initialize(Context&: getContext());
6133	return false;
6134	}
6135
6136	bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6137	if (!getSTI().getTargetTriple().isAMDGCN()) {
6138	return Error(L: getLoc(),
6139	Msg: ".amd_amdgpu_isa directive is not available on non-amdgcn "
6140	"architectures");
6141	}
6142
6143	auto TargetIDDirective = getLexer().getTok().getStringContents();
6144	if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6145	return Error(L: getParser().getTok().getLoc(), Msg: "target id must match options");
6146
6147	getTargetStreamer().EmitISAVersion();
6148	Lex();
6149
6150	return false;
6151	}
6152
6153	bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6154	assert(isHsaAbi(getSTI()));
6155
6156	std::string HSAMetadataString;
6157	if (ParseToEndDirective(AssemblerDirectiveBegin: HSAMD::V3::AssemblerDirectiveBegin,
6158	AssemblerDirectiveEnd: HSAMD::V3::AssemblerDirectiveEnd, CollectString&: HSAMetadataString))
6159	return true;
6160
6161	if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6162	return Error(L: getLoc(), Msg: "invalid HSA metadata");
6163
6164	return false;
6165	}
6166
6167	/// Common code to parse out a block of text (typically YAML) between start and
6168	/// end directives.
6169	bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6170	const char *AssemblerDirectiveEnd,
6171	std::string &CollectString) {
6172
6173	raw_string_ostream CollectStream(CollectString);
6174
6175	getLexer().setSkipSpace(false);
6176
6177	bool FoundEnd = false;
6178	while (!isToken(Kind: AsmToken::Eof)) {
6179	while (isToken(Kind: AsmToken::Space)) {
6180	CollectStream << getTokenStr();
6181	Lex();
6182	}
6183
6184	if (trySkipId(Id: AssemblerDirectiveEnd)) {
6185	FoundEnd = true;
6186	break;
6187	}
6188
6189	CollectStream << Parser.parseStringToEndOfStatement()
6190	<< getContext().getAsmInfo()->getSeparatorString();
6191
6192	Parser.eatToEndOfStatement();
6193	}
6194
6195	getLexer().setSkipSpace(true);
6196
6197	if (isToken(Kind: AsmToken::Eof) && !FoundEnd) {
6198	return TokError(Msg: Twine ("expected directive ") +
6199	Twine (AssemblerDirectiveEnd) + Twine (" not found"));
6200	}
6201
6202	return false;
6203	}
6204
6205	/// Parse the assembler directive for new MsgPack-format PAL metadata.
6206	bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6207	std::string String;
6208	if (ParseToEndDirective(AssemblerDirectiveBegin: AMDGPU::PALMD::AssemblerDirectiveBegin,
6209	AssemblerDirectiveEnd: AMDGPU::PALMD::AssemblerDirectiveEnd, CollectString&: String))
6210	return true;
6211
6212	auto *PALMetadata = getTargetStreamer().getPALMetadata();
6213	if (!PALMetadata->setFromString(String))
6214	return Error(L: getLoc(), Msg: "invalid PAL metadata");
6215	return false;
6216	}
6217
6218	/// Parse the assembler directive for old linear-format PAL metadata.
6219	bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6220	if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6221	return Error(L: getLoc(),
6222	Msg: (Twine (PALMD::AssemblerDirective) + Twine (" directive is "
6223	"not available on non-amdpal OSes")).str());
6224	}
6225
6226	auto *PALMetadata = getTargetStreamer().getPALMetadata();
6227	PALMetadata->setLegacy();
6228	for (;;) {
6229	uint32_t Key, Value;
6230	if (ParseAsAbsoluteExpression(Ret&: Key)) {
6231	return TokError(Msg: Twine ("invalid value in ") +
6232	Twine (PALMD::AssemblerDirective));
6233	}
6234	if (!trySkipToken(Kind: AsmToken::Comma)) {
6235	return TokError(Msg: Twine ("expected an even number of values in ") +
6236	Twine (PALMD::AssemblerDirective));
6237	}
6238	if (ParseAsAbsoluteExpression(Ret&: Value)) {
6239	return TokError(Msg: Twine ("invalid value in ") +
6240	Twine (PALMD::AssemblerDirective));
6241	}
6242	PALMetadata->setRegister(Reg: Key, Val: Value);
6243	if (!trySkipToken(Kind: AsmToken::Comma))
6244	break;
6245	}
6246	return false;
6247	}
6248
6249	/// ParseDirectiveAMDGPULDS
6250	/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6251	bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6252	if (getParser().checkForValidSection())
6253	return true;
6254
6255	StringRef Name;
6256	SMLoc NameLoc = getLoc();
6257	if (getParser().parseIdentifier(Res&: Name))
6258	return TokError(Msg: "expected identifier in directive");
6259
6260	MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6261	if (getParser().parseComma())
6262	return true;
6263
6264	unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(STI: &getSTI());
6265
6266	int64_t Size;
6267	SMLoc SizeLoc = getLoc();
6268	if (getParser().parseAbsoluteExpression(Res&: Size))
6269	return true;
6270	if (Size < `0`)
6271	return Error(L: SizeLoc, Msg: "size must be non-negative");
6272	if (Size > LocalMemorySize)
6273	return Error(L: SizeLoc, Msg: "size is too large");
6274
6275	int64_t Alignment = `4`;
6276	if (trySkipToken(Kind: AsmToken::Comma)) {
6277	SMLoc AlignLoc = getLoc();
6278	if (getParser().parseAbsoluteExpression(Res&: Alignment))
6279	return true;
6280	if (Alignment < `0` \|\| !isPowerOf2_64(Value: Alignment))
6281	return Error(L: AlignLoc, Msg: "alignment must be a power of two");
6282
6283	// Alignment larger than the size of LDS is possible in theory, as long
6284	// as the linker manages to place to symbol at address 0, but we do want
6285	// to make sure the alignment fits nicely into a 32-bit integer.
6286	if (Alignment >= `1u` << `31`)
6287	return Error(L: AlignLoc, Msg: "alignment is too large");
6288	}
6289
6290	if (parseEOL())
6291	return true;
6292
6293	Symbol->redefineIfPossible();
6294	if (!Symbol->isUndefined())
6295	return Error(L: NameLoc, Msg: "invalid symbol redefinition");
6296
6297	getTargetStreamer().emitAMDGPULDS(Symbol, Size, Alignment: Align (Alignment));
6298	return false;
6299	}
6300
6301	bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6302	StringRef IDVal = DirectiveID.getString();
6303
6304	if (isHsaAbi(STI: getSTI())) {
6305	if (IDVal == ".amdhsa_kernel")
6306	return ParseDirectiveAMDHSAKernel();
6307
6308	if (IDVal == ".amdhsa_code_object_version")
6309	return ParseDirectiveAMDHSACodeObjectVersion();
6310
6311	// TODO: Restructure/combine with PAL metadata directive.
6312	if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
6313	return ParseDirectiveHSAMetadata();
6314	} else {
6315	if (IDVal == ".amd_kernel_code_t")
6316	return ParseDirectiveAMDKernelCodeT();
6317
6318	if (IDVal == ".amdgpu_hsa_kernel")
6319	return ParseDirectiveAMDGPUHsaKernel();
6320
6321	if (IDVal == ".amd_amdgpu_isa")
6322	return ParseDirectiveISAVersion();
6323
6324	if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
6325	return Error(L: getLoc(), Msg: (Twine (HSAMD::AssemblerDirectiveBegin) +
6326	Twine (" directive is "
6327	"not available on non-amdhsa OSes"))
6328	.str());
6329	}
6330	}
6331
6332	if (IDVal == ".amdgcn_target")
6333	return ParseDirectiveAMDGCNTarget();
6334
6335	if (IDVal == ".amdgpu_lds")
6336	return ParseDirectiveAMDGPULDS();
6337
6338	if (IDVal == PALMD::AssemblerDirectiveBegin)
6339	return ParseDirectivePALMetadataBegin();
6340
6341	if (IDVal == PALMD::AssemblerDirective)
6342	return ParseDirectivePALMetadata();
6343
6344	return true;
6345	}
6346
6347	bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6348	MCRegister Reg) {
6349	if (MRI.regsOverlap(RegA: TTMP12_TTMP13_TTMP14_TTMP15, RegB: Reg))
6350	return isGFX9Plus();
6351
6352	// GFX10+ has 2 more SGPRs 104 and 105.
6353	if (MRI.regsOverlap(RegA: SGPR104_SGPR105, RegB: Reg))
6354	return hasSGPR104_SGPR105();
6355
6356	switch (Reg.id()) {
6357	case SRC_SHARED_BASE_LO:
6358	case SRC_SHARED_BASE:
6359	case SRC_SHARED_LIMIT_LO:
6360	case SRC_SHARED_LIMIT:
6361	case SRC_PRIVATE_BASE_LO:
6362	case SRC_PRIVATE_BASE:
6363	case SRC_PRIVATE_LIMIT_LO:
6364	case SRC_PRIVATE_LIMIT:
6365	return isGFX9Plus();
6366	case SRC_POPS_EXITING_WAVE_ID:
6367	return isGFX9Plus() && !isGFX11Plus();
6368	case TBA:
6369	case TBA_LO:
6370	case TBA_HI:
6371	case TMA:
6372	case TMA_LO:
6373	case TMA_HI:
6374	return !isGFX9Plus();
6375	case XNACK_MASK:
6376	case XNACK_MASK_LO:
6377	case XNACK_MASK_HI:
6378	return (isVI() \|\| isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6379	case SGPR_NULL:
6380	return isGFX10Plus();
6381	case SRC_EXECZ:
6382	case SRC_VCCZ:
6383	return !isGFX11Plus();
6384	default:
6385	break;
6386	}
6387
6388	if (isCI())
6389	return true;
6390
6391	if (isSI() \|\| isGFX10Plus()) {
6392	// No flat_scr on SI.
6393	// On GFX10Plus flat scratch is not a valid register operand and can only be
6394	// accessed with s_setreg/s_getreg.
6395	switch (Reg.id()) {
6396	case FLAT_SCR:
6397	case FLAT_SCR_LO:
6398	case FLAT_SCR_HI:
6399	return false;
6400	default:
6401	return true;
6402	}
6403	}
6404
6405	// VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6406	// SI/CI have.
6407	if (MRI.regsOverlap(RegA: SGPR102_SGPR103, RegB: Reg))
6408	return hasSGPR102_SGPR103();
6409
6410	return true;
6411	}
6412
6413	ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6414	StringRef Mnemonic,
6415	OperandMode Mode) {
6416	ParseStatus Res = parseVOPD(Operands);
6417	if (Res.isSuccess() \|\| Res.isFailure() \|\| isToken(Kind: AsmToken::EndOfStatement))
6418	return Res;
6419
6420	// Try to parse with a custom parser
6421	Res = MatchOperandParserImpl(Operands, Mnemonic);
6422
6423	// If we successfully parsed the operand or if there as an error parsing,
6424	// we are done.
6425	//
6426	// If we are parsing after we reach EndOfStatement then this means we
6427	// are appending default values to the Operands list. This is only done
6428	// by custom parser, so we shouldn't continue on to the generic parsing.
6429	if (Res.isSuccess() \|\| Res.isFailure() \|\| isToken(Kind: AsmToken::EndOfStatement))
6430	return Res;
6431
6432	SMLoc RBraceLoc;
6433	SMLoc LBraceLoc = getLoc();
6434	if (Mode == OperandMode_NSA && trySkipToken(Kind: AsmToken::LBrac)) {
6435	unsigned Prefix = Operands.size();
6436
6437	for (;;) {
6438	auto Loc = getLoc();
6439	Res = parseReg(Operands);
6440	if (Res.isNoMatch())
6441	Error(L: Loc, Msg: "expected a register");
6442	if (!Res.isSuccess())
6443	return ParseStatus::Failure;
6444
6445	RBraceLoc = getLoc();
6446	if (trySkipToken(Kind: AsmToken::RBrac))
6447	break;
6448
6449	if (!skipToken(Kind: AsmToken::Comma,
6450	ErrMsg: "expected a comma or a closing square bracket"))
6451	return ParseStatus::Failure;
6452	}
6453
6454	if (Operands.size() - Prefix > `1`) {
6455	Operands.insert(I: Operands.begin() + Prefix,
6456	Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "[", Loc: LBraceLoc));
6457	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "]", Loc: RBraceLoc));
6458	}
6459
6460	return ParseStatus::Success;
6461	}
6462
6463	return parseRegOrImm(Operands);
6464	}
6465
6466	StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6467	// Clear any forced encodings from the previous instruction.
6468	setForcedEncodingSize(`0`);
6469	setForcedDPP(false);
6470	setForcedSDWA(false);
6471
6472	if (Name.consume_back(Suffix: "_e64_dpp")) {
6473	setForcedDPP(true);
6474	setForcedEncodingSize(`64`);
6475	return Name;
6476	}
6477	if (Name.consume_back(Suffix: "_e64")) {
6478	setForcedEncodingSize(`64`);
6479	return Name;
6480	}
6481	if (Name.consume_back(Suffix: "_e32")) {
6482	setForcedEncodingSize(`32`);
6483	return Name;
6484	}
6485	if (Name.consume_back(Suffix: "_dpp")) {
6486	setForcedDPP(true);
6487	return Name;
6488	}
6489	if (Name.consume_back(Suffix: "_sdwa")) {
6490	setForcedSDWA(true);
6491	return Name;
6492	}
6493	return Name;
6494	}
6495
6496	static void applyMnemonicAliases(StringRef &Mnemonic,
6497	const FeatureBitset &Features,
6498	unsigned VariantID);
6499
6500	bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6501	StringRef Name, SMLoc NameLoc,
6502	OperandVector &Operands) {
6503	// Add the instruction mnemonic
6504	Name = parseMnemonicSuffix(Name);
6505
6506	// If the target architecture uses MnemonicAlias, call it here to parse
6507	// operands correctly.
6508	applyMnemonicAliases(Mnemonic&: Name, Features: getAvailableFeatures(), VariantID: `0`);
6509
6510	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: NameLoc));
6511
6512	bool IsMIMG = Name.starts_with(Prefix: "image_");
6513
6514	while (!trySkipToken(Kind: AsmToken::EndOfStatement)) {
6515	OperandMode Mode = OperandMode_Default;
6516	if (IsMIMG && isGFX10Plus() && Operands.size() == `2`)
6517	Mode = OperandMode_NSA;
6518	ParseStatus Res = parseOperand(Operands, Mnemonic: Name, Mode);
6519
6520	if (!Res.isSuccess()) {
6521	checkUnsupportedInstruction(Mnemo: Name, IDLoc: NameLoc);
6522	if (!Parser.hasPendingError()) {
6523	// FIXME: use real operand location rather than the current location.
6524	StringRef Msg = Res.isFailure() ? "failed parsing operand."
6525	: "not a valid operand.";
6526	Error(L: getLoc(), Msg);
6527	}
6528	while (!trySkipToken(Kind: AsmToken::EndOfStatement)) {
6529	lex();
6530	}
6531	return true;
6532	}
6533
6534	// Eat the comma or space if there is one.
6535	trySkipToken(Kind: AsmToken::Comma);
6536	}
6537
6538	return false;
6539	}
6540
6541	//===----------------------------------------------------------------------===//
6542	// Utility functions
6543	//===----------------------------------------------------------------------===//
6544
6545	ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6546	OperandVector &Operands) {
6547	SMLoc S = getLoc();
6548	if (!trySkipId(Id: Name))
6549	return ParseStatus::NoMatch;
6550
6551	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: S));
6552	return ParseStatus::Success;
6553	}
6554
6555	ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6556	int64_t &IntVal) {
6557
6558	if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
6559	return ParseStatus::NoMatch;
6560
6561	return parseExpr(Imm&: IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6562	}
6563
6564	ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6565	const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6566	std::function<bool(int64_t &)> ConvertResult) {
6567	SMLoc S = getLoc();
6568	int64_t Value = `0`;
6569
6570	ParseStatus Res = parseIntWithPrefix(Prefix, IntVal&: Value);
6571	if (!Res.isSuccess())
6572	return Res;
6573
6574	if (ConvertResult && !ConvertResult (Value)) {
6575	Error(L: S, Msg: "invalid " + StringRef (Prefix) + " value.");
6576	}
6577
6578	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Value, Loc: S, Type: ImmTy));
6579	return ParseStatus::Success;
6580	}
6581
6582	ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6583	const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6584	bool (*ConvertResult)(int64_t &)) {
6585	SMLoc S = getLoc();
6586	if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
6587	return ParseStatus::NoMatch;
6588
6589	if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected a left square bracket"))
6590	return ParseStatus::Failure;
6591
6592	unsigned Val = `0`;
6593	const unsigned MaxSize = `4`;
6594
6595	// FIXME: How to verify the number of elements matches the number of src
6596	// operands?
6597	for (int I = `0`; ; ++I) {
6598	int64_t Op;
6599	SMLoc Loc = getLoc();
6600	if (!parseExpr(Imm&: Op))
6601	return ParseStatus::Failure;
6602
6603	if (Op != `0` && Op != `1`)
6604	return Error(L: Loc, Msg: "invalid " + StringRef (Prefix) + " value.");
6605
6606	Val \|= (Op << I);
6607
6608	if (trySkipToken(Kind: AsmToken::RBrac))
6609	break;
6610
6611	if (I + `1` == MaxSize)
6612	return Error(L: getLoc(), Msg: "expected a closing square bracket");
6613
6614	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
6615	return ParseStatus::Failure;
6616	}
6617
6618	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: ImmTy));
6619	return ParseStatus::Success;
6620	}
6621
6622	ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6623	OperandVector &Operands,
6624	AMDGPUOperand::ImmTy ImmTy) {
6625	int64_t Bit;
6626	SMLoc S = getLoc();
6627
6628	if (trySkipId(Id: Name)) {
6629	Bit = `1`;
6630	} else if (trySkipId(Pref: "no", Id: Name)) {
6631	Bit = `0`;
6632	} else {
6633	return ParseStatus::NoMatch;
6634	}
6635
6636	if (Name == "r128" && !hasMIMG_R128())
6637	return Error(L: S, Msg: "r128 modifier is not supported on this GPU");
6638	if (Name == "a16" && !hasA16())
6639	return Error(L: S, Msg: "a16 modifier is not supported on this GPU");
6640
6641	if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6642	ImmTy = AMDGPUOperand::ImmTyR128A16;
6643
6644	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Bit, Loc: S, Type: ImmTy));
6645	return ParseStatus::Success;
6646	}
6647
6648	unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6649	bool &Disabling) const {
6650	Disabling = Id.consume_front(Prefix: "no");
6651
6652	if (isGFX940() && !Mnemo.starts_with(Prefix: "s_")) {
6653	return StringSwitch<unsigned>(Id)
6654	.Case(S: "nt", Value: AMDGPU::CPol::NT)
6655	.Case(S: "sc0", Value: AMDGPU::CPol::SC0)
6656	.Case(S: "sc1", Value: AMDGPU::CPol::SC1)
6657	.Default(Value: `0`);
6658	}
6659
6660	return StringSwitch<unsigned>(Id)
6661	.Case(S: "dlc", Value: AMDGPU::CPol::DLC)
6662	.Case(S: "glc", Value: AMDGPU::CPol::GLC)
6663	.Case(S: "scc", Value: AMDGPU::CPol::SCC)
6664	.Case(S: "slc", Value: AMDGPU::CPol::SLC)
6665	.Default(Value: `0`);
6666	}
6667
6668	ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6669	if (isGFX12Plus()) {
6670	SMLoc StringLoc = getLoc();
6671
6672	int64_t CPolVal = `0`;
6673	ParseStatus ResTH = ParseStatus::NoMatch;
6674	ParseStatus ResScope = ParseStatus::NoMatch;
6675
6676	for (;;) {
6677	if (ResTH.isNoMatch()) {
6678	int64_t TH;
6679	ResTH = parseTH(Operands, TH);
6680	if (ResTH.isFailure())
6681	return ResTH;
6682	if (ResTH.isSuccess()) {
6683	CPolVal \|= TH;
6684	continue;
6685	}
6686	}
6687
6688	if (ResScope.isNoMatch()) {
6689	int64_t Scope;
6690	ResScope = parseScope(Operands, Scope);
6691	if (ResScope.isFailure())
6692	return ResScope;
6693	if (ResScope.isSuccess()) {
6694	CPolVal \|= Scope;
6695	continue;
6696	}
6697	}
6698
6699	break;
6700	}
6701
6702	if (ResTH.isNoMatch() && ResScope.isNoMatch())
6703	return ParseStatus::NoMatch;
6704
6705	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: CPolVal, Loc: StringLoc,
6706	Type: AMDGPUOperand::ImmTyCPol));
6707	return ParseStatus::Success;
6708	}
6709
6710	StringRef Mnemo = ((AMDGPUOperand &)*Operands [`0`]).getToken();
6711	SMLoc OpLoc = getLoc();
6712	unsigned Enabled = `0`, Seen = `0`;
6713	for (;;) {
6714	SMLoc S = getLoc();
6715	bool Disabling;
6716	unsigned CPol = getCPolKind(Id: getId(), Mnemo, Disabling);
6717	if (!CPol)
6718	break;
6719
6720	lex();
6721
6722	if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6723	return Error(L: S, Msg: "dlc modifier is not supported on this GPU");
6724
6725	if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6726	return Error(L: S, Msg: "scc modifier is not supported on this GPU");
6727
6728	if (Seen & CPol)
6729	return Error(L: S, Msg: "duplicate cache policy modifier");
6730
6731	if (!Disabling)
6732	Enabled \|= CPol;
6733
6734	Seen \|= CPol;
6735	}
6736
6737	if (!Seen)
6738	return ParseStatus::NoMatch;
6739
6740	Operands.push_back(
6741	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Enabled, Loc: OpLoc, Type: AMDGPUOperand::ImmTyCPol));
6742	return ParseStatus::Success;
6743	}
6744
6745	ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6746	int64_t &Scope) {
6747	static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
6748	CPol::SCOPE_DEV, CPol::SCOPE_SYS};
6749
6750	ParseStatus Res = parseStringOrIntWithPrefix(
6751	Operands, Name: "scope", Ids: {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
6752	IntVal&: Scope);
6753
6754	if (Res.isSuccess())
6755	Scope = Scopes[Scope];
6756
6757	return Res;
6758	}
6759
6760	ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6761	TH = AMDGPU::CPol::TH_RT; // default
6762
6763	StringRef Value;
6764	SMLoc StringLoc;
6765	ParseStatus Res = parseStringWithPrefix(Prefix: "th", Value, StringLoc);
6766	if (!Res.isSuccess())
6767	return Res;
6768
6769	if (Value == "TH_DEFAULT")
6770	TH = AMDGPU::CPol::TH_RT;
6771	else if (Value == "TH_STORE_LU" \|\| Value == "TH_LOAD_WB" \|\|
6772	Value == "TH_LOAD_NT_WB") {
6773	return Error(L: StringLoc, Msg: "invalid th value");
6774	} else if (Value.consume_front(Prefix: "TH_ATOMIC_")) {
6775	TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
6776	} else if (Value.consume_front(Prefix: "TH_LOAD_")) {
6777	TH = AMDGPU::CPol::TH_TYPE_LOAD;
6778	} else if (Value.consume_front(Prefix: "TH_STORE_")) {
6779	TH = AMDGPU::CPol::TH_TYPE_STORE;
6780	} else {
6781	return Error(L: StringLoc, Msg: "invalid th value");
6782	}
6783
6784	if (Value == "BYPASS")
6785	TH \|= AMDGPU::CPol::TH_REAL_BYPASS;
6786
6787	if (TH != `0`) {
6788	if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
6789	TH \|= StringSwitch<int64_t>(Value)
6790	.Case(S: "RETURN", Value: AMDGPU::CPol::TH_ATOMIC_RETURN)
6791	.Case(S: "RT", Value: AMDGPU::CPol::TH_RT)
6792	.Case(S: "RT_RETURN", Value: AMDGPU::CPol::TH_ATOMIC_RETURN)
6793	.Case(S: "NT", Value: AMDGPU::CPol::TH_ATOMIC_NT)
6794	.Case(S: "NT_RETURN", Value: AMDGPU::CPol::TH_ATOMIC_NT \|
6795	AMDGPU::CPol::TH_ATOMIC_RETURN)
6796	.Case(S: "CASCADE_RT", Value: AMDGPU::CPol::TH_ATOMIC_CASCADE)
6797	.Case(S: "CASCADE_NT", Value: AMDGPU::CPol::TH_ATOMIC_CASCADE \|
6798	AMDGPU::CPol::TH_ATOMIC_NT)
6799	.Default(Value: `0xffffffff`);
6800	else
6801	TH \|= StringSwitch<int64_t>(Value)
6802	.Case(S: "RT", Value: AMDGPU::CPol::TH_RT)
6803	.Case(S: "NT", Value: AMDGPU::CPol::TH_NT)
6804	.Case(S: "HT", Value: AMDGPU::CPol::TH_HT)
6805	.Case(S: "LU", Value: AMDGPU::CPol::TH_LU)
6806	.Case(S: "WB", Value: AMDGPU::CPol::TH_WB)
6807	.Case(S: "NT_RT", Value: AMDGPU::CPol::TH_NT_RT)
6808	.Case(S: "RT_NT", Value: AMDGPU::CPol::TH_RT_NT)
6809	.Case(S: "NT_HT", Value: AMDGPU::CPol::TH_NT_HT)
6810	.Case(S: "NT_WB", Value: AMDGPU::CPol::TH_NT_WB)
6811	.Case(S: "BYPASS", Value: AMDGPU::CPol::TH_BYPASS)
6812	.Default(Value: `0xffffffff`);
6813	}
6814
6815	if (TH == `0xffffffff`)
6816	return Error(L: StringLoc, Msg: "invalid th value");
6817
6818	return ParseStatus::Success;
6819	}
6820
6821	static void
6822	addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,
6823	AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
6824	AMDGPUOperand::ImmTy ImmT, int64_t Default = `0`,
6825	std::optional<unsigned> InsertAt = std::nullopt) {
6826	auto i = OptionalIdx.find(x: ImmT);
6827	if (i != OptionalIdx.end()) {
6828	unsigned Idx = i ->second;
6829	const AMDGPUOperand &Op =
6830	static_cast<const AMDGPUOperand &>(*Operands [Idx]);
6831	if (InsertAt)
6832	Inst.insert(I: Inst.begin() + *InsertAt, Op: MCOperand::createImm(Val: Op.getImm()));
6833	else
6834	Op.addImmOperands(Inst, N: `1`);
6835	} else {
6836	if (InsertAt.has_value())
6837	Inst.insert(I: Inst.begin() + *InsertAt, Op: MCOperand::createImm(Val: Default));
6838	else
6839	Inst.addOperand(Op: MCOperand::createImm(Val: Default));
6840	}
6841	}
6842
6843	ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6844	StringRef &Value,
6845	SMLoc &StringLoc) {
6846	if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
6847	return ParseStatus::NoMatch;
6848
6849	StringLoc = getLoc();
6850	return parseId(Val&: Value, ErrMsg: "expected an identifier") ? ParseStatus::Success
6851	: ParseStatus::Failure;
6852	}
6853
6854	ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6855	OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
6856	int64_t &IntVal) {
6857	if (!trySkipId(Id: Name, Kind: AsmToken::Colon))
6858	return ParseStatus::NoMatch;
6859
6860	SMLoc StringLoc = getLoc();
6861
6862	StringRef Value;
6863	if (isToken(Kind: AsmToken::Identifier)) {
6864	Value = getTokenStr();
6865	lex();
6866
6867	for (IntVal = `0`; IntVal < (int64_t)Ids.size(); ++IntVal)
6868	if (Value == Ids [IntVal])
6869	break;
6870	} else if (!parseExpr(Imm&: IntVal))
6871	return ParseStatus::Failure;
6872
6873	if (IntVal < `0` \|\| IntVal >= (int64_t)Ids.size())
6874	return Error(L: StringLoc, Msg: "invalid " + Twine (Name) + " value");
6875
6876	return ParseStatus::Success;
6877	}
6878
6879	ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
6880	OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
6881	AMDGPUOperand::ImmTy Type) {
6882	SMLoc S = getLoc();
6883	int64_t IntVal;
6884
6885	ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
6886	if (Res.isSuccess())
6887	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S, Type));
6888
6889	return Res;
6890	}
6891
6892	//===----------------------------------------------------------------------===//
6893	// MTBUF format
6894	//===----------------------------------------------------------------------===//
6895
6896	bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6897	int64_t MaxVal,
6898	int64_t &Fmt) {
6899	int64_t Val;
6900	SMLoc Loc = getLoc();
6901
6902	auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: Val);
6903	if (Res.isFailure())
6904	return false;
6905	if (Res.isNoMatch())
6906	return true;
6907
6908	if (Val < `0` \|\| Val > MaxVal) {
6909	Error(L: Loc, Msg: Twine ("out of range ", StringRef (Pref)));
6910	return false;
6911	}
6912
6913	Fmt = Val;
6914	return true;
6915	}
6916
6917	ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6918	AMDGPUOperand::ImmTy ImmTy) {
6919	const char *Pref = "index_key";
6920	int64_t ImmVal = `0`;
6921	SMLoc Loc = getLoc();
6922	auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: ImmVal);
6923	if (!Res.isSuccess())
6924	return Res;
6925
6926	if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < `0` \|\| ImmVal > `1`))
6927	return Error(L: Loc, Msg: Twine ("out of range ", StringRef (Pref)));
6928
6929	if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < `0` \|\| ImmVal > `3`))
6930	return Error(L: Loc, Msg: Twine ("out of range ", StringRef (Pref)));
6931
6932	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: ImmTy));
6933	return ParseStatus::Success;
6934	}
6935
6936	ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6937	return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey8bit);
6938	}
6939
6940	ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6941	return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey16bit);
6942	}
6943
6944	// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6945	// values to live in a joint format operand in the MCInst encoding.
6946	ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6947	using namespace llvm::AMDGPU::MTBUFFormat;
6948
6949	int64_t Dfmt = DFMT_UNDEF;
6950	int64_t Nfmt = NFMT_UNDEF;
6951
6952	// dfmt and nfmt can appear in either order, and each is optional.
6953	for (int I = `0`; I < `2`; ++I) {
6954	if (Dfmt == DFMT_UNDEF && !tryParseFmt(Pref: "dfmt", MaxVal: DFMT_MAX, Fmt&: Dfmt))
6955	return ParseStatus::Failure;
6956
6957	if (Nfmt == NFMT_UNDEF && !tryParseFmt(Pref: "nfmt", MaxVal: NFMT_MAX, Fmt&: Nfmt))
6958	return ParseStatus::Failure;
6959
6960	// Skip optional comma between dfmt/nfmt
6961	// but guard against 2 commas following each other.
6962	if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6963	!peekToken().is(K: AsmToken::Comma)) {
6964	trySkipToken(Kind: AsmToken::Comma);
6965	}
6966	}
6967
6968	if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6969	return ParseStatus::NoMatch;
6970
6971	Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6972	Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6973
6974	Format = encodeDfmtNfmt(Dfmt, Nfmt);
6975	return ParseStatus::Success;
6976	}
6977
6978	ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6979	using namespace llvm::AMDGPU::MTBUFFormat;
6980
6981	int64_t Fmt = UFMT_UNDEF;
6982
6983	if (!tryParseFmt(Pref: "format", MaxVal: UFMT_MAX, Fmt))
6984	return ParseStatus::Failure;
6985
6986	if (Fmt == UFMT_UNDEF)
6987	return ParseStatus::NoMatch;
6988
6989	Format = Fmt;
6990	return ParseStatus::Success;
6991	}
6992
6993	bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6994	int64_t &Nfmt,
6995	StringRef FormatStr,
6996	SMLoc Loc) {
6997	using namespace llvm::AMDGPU::MTBUFFormat;
6998	int64_t Format;
6999
7000	Format = getDfmt(Name: FormatStr);
7001	if (Format != DFMT_UNDEF) {
7002	Dfmt = Format;
7003	return true;
7004	}
7005
7006	Format = getNfmt(Name: FormatStr, STI: getSTI());
7007	if (Format != NFMT_UNDEF) {
7008	Nfmt = Format;
7009	return true;
7010	}
7011
7012	Error(L: Loc, Msg: "unsupported format");
7013	return false;
7014	}
7015
7016	ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7017	SMLoc FormatLoc,
7018	int64_t &Format) {
7019	using namespace llvm::AMDGPU::MTBUFFormat;
7020
7021	int64_t Dfmt = DFMT_UNDEF;
7022	int64_t Nfmt = NFMT_UNDEF;
7023	if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, Loc: FormatLoc))
7024	return ParseStatus::Failure;
7025
7026	if (trySkipToken(Kind: AsmToken::Comma)) {
7027	StringRef Str;
7028	SMLoc Loc = getLoc();
7029	if (!parseId(Val&: Str, ErrMsg: "expected a format string") \|\|
7030	!matchDfmtNfmt(Dfmt, Nfmt, FormatStr: Str, Loc))
7031	return ParseStatus::Failure;
7032	if (Dfmt == DFMT_UNDEF)
7033	return Error(L: Loc, Msg: "duplicate numeric format");
7034	if (Nfmt == NFMT_UNDEF)
7035	return Error(L: Loc, Msg: "duplicate data format");
7036	}
7037
7038	Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7039	Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7040
7041	if (isGFX10Plus()) {
7042	auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, STI: getSTI());
7043	if (Ufmt == UFMT_UNDEF)
7044	return Error(L: FormatLoc, Msg: "unsupported format");
7045	Format = Ufmt;
7046	} else {
7047	Format = encodeDfmtNfmt(Dfmt, Nfmt);
7048	}
7049
7050	return ParseStatus::Success;
7051	}
7052
7053	ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7054	SMLoc Loc,
7055	int64_t &Format) {
7056	using namespace llvm::AMDGPU::MTBUFFormat;
7057
7058	auto Id = getUnifiedFormat(Name: FormatStr, STI: getSTI());
7059	if (Id == UFMT_UNDEF)
7060	return ParseStatus::NoMatch;
7061
7062	if (!isGFX10Plus())
7063	return Error(L: Loc, Msg: "unified format is not supported on this GPU");
7064
7065	Format = Id;
7066	return ParseStatus::Success;
7067	}
7068
7069	ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7070	using namespace llvm::AMDGPU::MTBUFFormat;
7071	SMLoc Loc = getLoc();
7072
7073	if (!parseExpr(Imm&: Format))
7074	return ParseStatus::Failure;
7075	if (!isValidFormatEncoding(Val: Format, STI: getSTI()))
7076	return Error(L: Loc, Msg: "out of range format");
7077
7078	return ParseStatus::Success;
7079	}
7080
7081	ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7082	using namespace llvm::AMDGPU::MTBUFFormat;
7083
7084	if (!trySkipId(Id: "format", Kind: AsmToken::Colon))
7085	return ParseStatus::NoMatch;
7086
7087	if (trySkipToken(Kind: AsmToken::LBrac)) {
7088	StringRef FormatStr;
7089	SMLoc Loc = getLoc();
7090	if (!parseId(Val&: FormatStr, ErrMsg: "expected a format string"))
7091	return ParseStatus::Failure;
7092
7093	auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7094	if (Res.isNoMatch())
7095	Res = parseSymbolicSplitFormat(FormatStr, FormatLoc: Loc, Format);
7096	if (!Res.isSuccess())
7097	return Res;
7098
7099	if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
7100	return ParseStatus::Failure;
7101
7102	return ParseStatus::Success;
7103	}
7104
7105	return parseNumericFormat(Format);
7106	}
7107
7108	ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7109	using namespace llvm::AMDGPU::MTBUFFormat;
7110
7111	int64_t Format = getDefaultFormatEncoding(STI: getSTI());
7112	ParseStatus Res;
7113	SMLoc Loc = getLoc();
7114
7115	// Parse legacy format syntax.
7116	Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7117	if (Res.isFailure())
7118	return Res;
7119
7120	bool FormatFound = Res.isSuccess();
7121
7122	Operands.push_back(
7123	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Format, Loc, Type: AMDGPUOperand::ImmTyFORMAT));
7124
7125	if (FormatFound)
7126	trySkipToken(Kind: AsmToken::Comma);
7127
7128	if (isToken(Kind: AsmToken::EndOfStatement)) {
7129	// We are expecting an soffset operand,
7130	// but let matcher handle the error.
7131	return ParseStatus::Success;
7132	}
7133
7134	// Parse soffset.
7135	Res = parseRegOrImm(Operands);
7136	if (!Res.isSuccess())
7137	return Res;
7138
7139	trySkipToken(Kind: AsmToken::Comma);
7140
7141	if (!FormatFound) {
7142	Res = parseSymbolicOrNumericFormat(Format);
7143	if (Res.isFailure())
7144	return Res;
7145	if (Res.isSuccess()) {
7146	auto Size = Operands.size();
7147	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands [Size - `2`]);
7148	assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7149	Op.setImm(Format);
7150	}
7151	return ParseStatus::Success;
7152	}
7153
7154	if (isId(Id: "format") && peekToken().is(K: AsmToken::Colon))
7155	return Error(L: getLoc(), Msg: "duplicate format");
7156	return ParseStatus::Success;
7157	}
7158
7159	ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7160	ParseStatus Res =
7161	parseIntWithPrefix(Prefix: "offset", Operands, ImmTy: AMDGPUOperand::ImmTyOffset);
7162	if (Res.isNoMatch()) {
7163	Res = parseIntWithPrefix(Prefix: "inst_offset", Operands,
7164	ImmTy: AMDGPUOperand::ImmTyInstOffset);
7165	}
7166	return Res;
7167	}
7168
7169	ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7170	ParseStatus Res =
7171	parseNamedBit(Name: "r128", Operands, ImmTy: AMDGPUOperand::ImmTyR128A16);
7172	if (Res.isNoMatch())
7173	Res = parseNamedBit(Name: "a16", Operands, ImmTy: AMDGPUOperand::ImmTyA16);
7174	return Res;
7175	}
7176
7177	ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7178	ParseStatus Res =
7179	parseIntWithPrefix(Prefix: "blgp", Operands, ImmTy: AMDGPUOperand::ImmTyBLGP);
7180	if (Res.isNoMatch()) {
7181	Res =
7182	parseOperandArrayWithPrefix(Prefix: "neg", Operands, ImmTy: AMDGPUOperand::ImmTyBLGP);
7183	}
7184	return Res;
7185	}
7186
7187	//===----------------------------------------------------------------------===//
7188	// Exp
7189	//===----------------------------------------------------------------------===//
7190
7191	void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7192	OptionalImmIndexMap OptionalIdx;
7193
7194	unsigned OperandIdx[`4`];
7195	unsigned EnMask = `0`;
7196	int SrcIdx = `0`;
7197
7198	for (unsigned i = `1`, e = Operands.size(); i != e; ++i) {
7199	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
7200
7201	// Add the register arguments
7202	if (Op.isReg()) {
7203	assert(SrcIdx < `4`);
7204	OperandIdx[SrcIdx] = Inst.size();
7205	Op.addRegOperands(Inst, N: `1`);
7206	++SrcIdx;
7207	continue;
7208	}
7209
7210	if (Op.isOff()) {
7211	assert(SrcIdx < `4`);
7212	OperandIdx[SrcIdx] = Inst.size();
7213	Inst.addOperand(Op: MCOperand::createReg(Reg: MCRegister ()));
7214	++SrcIdx;
7215	continue;
7216	}
7217
7218	if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7219	Op.addImmOperands(Inst, N: `1`);
7220	continue;
7221	}
7222
7223	if (Op.isToken() && (Op.getToken() == "done" \|\| Op.getToken() == "row_en"))
7224	continue;
7225
7226	// Handle optional arguments
7227	OptionalIdx [Op.getImmTy()] = i;
7228	}
7229
7230	assert(SrcIdx == `4`);
7231
7232	bool Compr = false;
7233	if (OptionalIdx.find(x: AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7234	Compr = true;
7235	Inst.getOperand(i: OperandIdx[`1`]) = Inst.getOperand(i: OperandIdx[`2`]);
7236	Inst.getOperand(i: OperandIdx[`2`]).setReg(MCRegister ());
7237	Inst.getOperand(i: OperandIdx[`3`]).setReg(MCRegister ());
7238	}
7239
7240	for (auto i = `0`; i < SrcIdx; ++i) {
7241	if (Inst.getOperand(i: OperandIdx[i]).getReg()) {
7242	EnMask \|= Compr? (`0x3` << i * `2`) : (`0x1` << i);
7243	}
7244	}
7245
7246	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpVM);
7247	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpCompr);
7248
7249	Inst.addOperand(Op: MCOperand::createImm(Val: EnMask));
7250	}
7251
7252	//===----------------------------------------------------------------------===//
7253	// s_waitcnt
7254	//===----------------------------------------------------------------------===//
7255
7256	static bool
7257	encodeCnt(
7258	const AMDGPU::IsaVersion ISA,
7259	int64_t &IntVal,
7260	int64_t CntVal,
7261	bool Saturate,
7262	unsigned (encode)(const* IsaVersion &Version, unsigned, unsigned),
7263	unsigned (decode)(const* IsaVersion &Version, unsigned))
7264	{
7265	bool Failed = false;
7266
7267	IntVal = encode(ISA, IntVal, CntVal);
7268	if (CntVal != decode(ISA, IntVal)) {
7269	if (Saturate) {
7270	IntVal = encode(ISA, IntVal, -`1`);
7271	} else {
7272	Failed = true;
7273	}
7274	}
7275	return Failed;
7276	}
7277
7278	bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7279
7280	SMLoc CntLoc = getLoc();
7281	StringRef CntName = getTokenStr();
7282
7283	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name") \|\|
7284	!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7285	return false;
7286
7287	int64_t CntVal;
7288	SMLoc ValLoc = getLoc();
7289	if (!parseExpr(Imm&: CntVal))
7290	return false;
7291
7292	AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
7293
7294	bool Failed = true;
7295	bool Sat = CntName.ends_with(Suffix: "_sat");
7296
7297	if (CntName == "vmcnt" \|\| CntName == "vmcnt_sat") {
7298	Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeVmcnt, decode: decodeVmcnt);
7299	} else if (CntName == "expcnt" \|\| CntName == "expcnt_sat") {
7300	Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeExpcnt, decode: decodeExpcnt);
7301	} else if (CntName == "lgkmcnt" \|\| CntName == "lgkmcnt_sat") {
7302	Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeLgkmcnt, decode: decodeLgkmcnt);
7303	} else {
7304	Error(L: CntLoc, Msg: "invalid counter name " + CntName);
7305	return false;
7306	}
7307
7308	if (Failed) {
7309	Error(L: ValLoc, Msg: "too large value for " + CntName);
7310	return false;
7311	}
7312
7313	if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
7314	return false;
7315
7316	if (trySkipToken(Kind: AsmToken::Amp) \|\| trySkipToken(Kind: AsmToken::Comma)) {
7317	if (isToken(Kind: AsmToken::EndOfStatement)) {
7318	Error(L: getLoc(), Msg: "expected a counter name");
7319	return false;
7320	}
7321	}
7322
7323	return true;
7324	}
7325
7326	ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7327	AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
7328	int64_t Waitcnt = getWaitcntBitMask(Version: ISA);
7329	SMLoc S = getLoc();
7330
7331	if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7332	while (!isToken(Kind: AsmToken::EndOfStatement)) {
7333	if (!parseCnt(IntVal&: Waitcnt))
7334	return ParseStatus::Failure;
7335	}
7336	} else {
7337	if (!parseExpr(Imm&: Waitcnt))
7338	return ParseStatus::Failure;
7339	}
7340
7341	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Waitcnt, Loc: S));
7342	return ParseStatus::Success;
7343	}
7344
7345	bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7346	SMLoc FieldLoc = getLoc();
7347	StringRef FieldName = getTokenStr();
7348	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a field name") \|\|
7349	!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7350	return false;
7351
7352	SMLoc ValueLoc = getLoc();
7353	StringRef ValueName = getTokenStr();
7354	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a value name") \|\|
7355	!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a right parenthesis"))
7356	return false;
7357
7358	unsigned Shift;
7359	if (FieldName == "instid0") {
7360	Shift = `0`;
7361	} else if (FieldName == "instskip") {
7362	Shift = `4`;
7363	} else if (FieldName == "instid1") {
7364	Shift = `7`;
7365	} else {
7366	Error(L: FieldLoc, Msg: "invalid field name " + FieldName);
7367	return false;
7368	}
7369
7370	int Value;
7371	if (Shift == `4`) {
7372	// Parse values for instskip.
7373	Value = StringSwitch<int>(ValueName)
7374	.Case(S: "SAME", Value: `0`)
7375	.Case(S: "NEXT", Value: `1`)
7376	.Case(S: "SKIP_1", Value: `2`)
7377	.Case(S: "SKIP_2", Value: `3`)
7378	.Case(S: "SKIP_3", Value: `4`)
7379	.Case(S: "SKIP_4", Value: `5`)
7380	.Default(Value: -`1`);
7381	} else {
7382	// Parse values for instid0 and instid1.
7383	Value = StringSwitch<int>(ValueName)
7384	.Case(S: "NO_DEP", Value: `0`)
7385	.Case(S: "VALU_DEP_1", Value: `1`)
7386	.Case(S: "VALU_DEP_2", Value: `2`)
7387	.Case(S: "VALU_DEP_3", Value: `3`)
7388	.Case(S: "VALU_DEP_4", Value: `4`)
7389	.Case(S: "TRANS32_DEP_1", Value: `5`)
7390	.Case(S: "TRANS32_DEP_2", Value: `6`)
7391	.Case(S: "TRANS32_DEP_3", Value: `7`)
7392	.Case(S: "FMA_ACCUM_CYCLE_1", Value: `8`)
7393	.Case(S: "SALU_CYCLE_1", Value: `9`)
7394	.Case(S: "SALU_CYCLE_2", Value: `10`)
7395	.Case(S: "SALU_CYCLE_3", Value: `11`)
7396	.Default(Value: -`1`);
7397	}
7398	if (Value < `0`) {
7399	Error(L: ValueLoc, Msg: "invalid value name " + ValueName);
7400	return false;
7401	}
7402
7403	Delay \|= Value << Shift;
7404	return true;
7405	}
7406
7407	ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7408	int64_t Delay = `0`;
7409	SMLoc S = getLoc();
7410
7411	if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7412	do {
7413	if (!parseDelay(Delay))
7414	return ParseStatus::Failure;
7415	} while (trySkipToken(Kind: AsmToken::Pipe));
7416	} else {
7417	if (!parseExpr(Imm&: Delay))
7418	return ParseStatus::Failure;
7419	}
7420
7421	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Delay, Loc: S));
7422	return ParseStatus::Success;
7423	}
7424
7425	bool
7426	AMDGPUOperand::isSWaitCnt() const {
7427	return isImm();
7428	}
7429
7430	bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7431
7432	//===----------------------------------------------------------------------===//
7433	// DepCtr
7434	//===----------------------------------------------------------------------===//
7435
7436	void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7437	StringRef DepCtrName) {
7438	switch (ErrorId) {
7439	case OPR_ID_UNKNOWN:
7440	Error(L: Loc, Msg: Twine ("invalid counter name ", DepCtrName));
7441	return;
7442	case OPR_ID_UNSUPPORTED:
7443	Error(L: Loc, Msg: Twine (DepCtrName, " is not supported on this GPU"));
7444	return;
7445	case OPR_ID_DUPLICATE:
7446	Error(L: Loc, Msg: Twine ("duplicate counter name ", DepCtrName));
7447	return;
7448	case OPR_VAL_INVALID:
7449	Error(L: Loc, Msg: Twine ("invalid value for ", DepCtrName));
7450	return;
7451	default:
7452	assert(false);
7453	}
7454	}
7455
7456	bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7457
7458	using namespace llvm::AMDGPU::DepCtr;
7459
7460	SMLoc DepCtrLoc = getLoc();
7461	StringRef DepCtrName = getTokenStr();
7462
7463	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name") \|\|
7464	!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
7465	return false;
7466
7467	int64_t ExprVal;
7468	if (!parseExpr(Imm&: ExprVal))
7469	return false;
7470
7471	unsigned PrevOprMask = UsedOprMask;
7472	int CntVal = encodeDepCtr(Name: DepCtrName, Val: ExprVal, UsedOprMask, STI: getSTI());
7473
7474	if (CntVal < `0`) {
7475	depCtrError(Loc: DepCtrLoc, ErrorId: CntVal, DepCtrName);
7476	return false;
7477	}
7478
7479	if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
7480	return false;
7481
7482	if (trySkipToken(Kind: AsmToken::Amp) \|\| trySkipToken(Kind: AsmToken::Comma)) {
7483	if (isToken(Kind: AsmToken::EndOfStatement)) {
7484	Error(L: getLoc(), Msg: "expected a counter name");
7485	return false;
7486	}
7487	}
7488
7489	unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7490	DepCtr = (DepCtr & ~CntValMask) \| CntVal;
7491	return true;
7492	}
7493
7494	ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7495	using namespace llvm::AMDGPU::DepCtr;
7496
7497	int64_t DepCtr = getDefaultDepCtrEncoding(STI: getSTI());
7498	SMLoc Loc = getLoc();
7499
7500	if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
7501	unsigned UsedOprMask = `0`;
7502	while (!isToken(Kind: AsmToken::EndOfStatement)) {
7503	if (!parseDepCtr(DepCtr, UsedOprMask))
7504	return ParseStatus::Failure;
7505	}
7506	} else {
7507	if (!parseExpr(Imm&: DepCtr))
7508	return ParseStatus::Failure;
7509	}
7510
7511	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DepCtr, Loc));
7512	return ParseStatus::Success;
7513	}
7514
7515	bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7516
7517	//===----------------------------------------------------------------------===//
7518	// hwreg
7519	//===----------------------------------------------------------------------===//
7520
7521	ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
7522	OperandInfoTy &Offset,
7523	OperandInfoTy &Width) {
7524	using namespace llvm::AMDGPU::Hwreg;
7525
7526	if (!trySkipId(Id: "hwreg", Kind: AsmToken::LParen))
7527	return ParseStatus::NoMatch;
7528
7529	// The register may be specified by name or using a numeric code
7530	HwReg.Loc = getLoc();
7531	if (isToken(Kind: AsmToken::Identifier) &&
7532	(HwReg.Val = getHwregId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) {
7533	HwReg.IsSymbolic = true;
7534	lex(); // skip register name
7535	} else if (!parseExpr(Imm&: HwReg.Val, Expected: "a register name")) {
7536	return ParseStatus::Failure;
7537	}
7538
7539	if (trySkipToken(Kind: AsmToken::RParen))
7540	return ParseStatus::Success;
7541
7542	// parse optional params
7543	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma or a closing parenthesis"))
7544	return ParseStatus::Failure;
7545
7546	Offset.Loc = getLoc();
7547	if (!parseExpr(Imm&: Offset.Val))
7548	return ParseStatus::Failure;
7549
7550	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
7551	return ParseStatus::Failure;
7552
7553	Width.Loc = getLoc();
7554	if (!parseExpr(Imm&: Width.Val) \|\|
7555	!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
7556	return ParseStatus::Failure;
7557
7558	return ParseStatus::Success;
7559	}
7560
7561	ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7562	using namespace llvm::AMDGPU::Hwreg;
7563
7564	int64_t ImmVal = `0`;
7565	SMLoc Loc = getLoc();
7566
7567	StructuredOpField HwReg("id", "hardware register", HwregId::Width,
7568	HwregId::Default);
7569	StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
7570	HwregOffset::Default);
7571	struct : StructuredOpField {
7572	using StructuredOpField::StructuredOpField;
7573	bool validate(AMDGPUAsmParser &Parser) const override {
7574	if (!isUIntN(N: Width, x: Val - `1`))
7575	return Error(Parser, Err: "only values from 1 to 32 are legal");
7576	return true;
7577	}
7578	} Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
7579	ParseStatus Res = parseStructuredOpFields(Fields: {&HwReg, &Offset, &Width});
7580
7581	if (Res.isNoMatch())
7582	Res = parseHwregFunc(HwReg, Offset, Width);
7583
7584	if (Res.isSuccess()) {
7585	if (!validateStructuredOpFields(Fields: {&HwReg, &Offset, &Width}))
7586	return ParseStatus::Failure;
7587	ImmVal = HwregEncoding::encode(Values: HwReg.Val, Values: Offset.Val, Values: Width.Val);
7588	}
7589
7590	if (Res.isNoMatch() &&
7591	parseExpr(Imm&: ImmVal, Expected: "a hwreg macro, structured immediate"))
7592	Res = ParseStatus::Success;
7593
7594	if (!Res.isSuccess())
7595	return ParseStatus::Failure;
7596
7597	if (!isUInt<`16`>(x: ImmVal))
7598	return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
7599	Operands.push_back(
7600	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTyHwreg));
7601	return ParseStatus::Success;
7602	}
7603
7604	bool AMDGPUOperand::isHwreg() const {
7605	return isImmTy(ImmT: ImmTyHwreg);
7606	}
7607
7608	//===----------------------------------------------------------------------===//
7609	// sendmsg
7610	//===----------------------------------------------------------------------===//
7611
7612	bool
7613	AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7614	OperandInfoTy &Op,
7615	OperandInfoTy &Stream) {
7616	using namespace llvm::AMDGPU::SendMsg;
7617
7618	Msg.Loc = getLoc();
7619	if (isToken(Kind: AsmToken::Identifier) &&
7620	(Msg.Val = getMsgId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) {
7621	Msg.IsSymbolic = true;
7622	lex(); // skip message name
7623	} else if (!parseExpr(Imm&: Msg.Val, Expected: "a message name")) {
7624	return false;
7625	}
7626
7627	if (trySkipToken(Kind: AsmToken::Comma)) {
7628	Op.IsDefined = true;
7629	Op.Loc = getLoc();
7630	if (isToken(Kind: AsmToken::Identifier) &&
7631	(Op.Val = getMsgOpId(MsgId: Msg.Val, Name: getTokenStr(), STI: getSTI())) !=
7632	OPR_ID_UNKNOWN) {
7633	lex(); // skip operation name
7634	} else if (!parseExpr(Imm&: Op.Val, Expected: "an operation name")) {
7635	return false;
7636	}
7637
7638	if (trySkipToken(Kind: AsmToken::Comma)) {
7639	Stream.IsDefined = true;
7640	Stream.Loc = getLoc();
7641	if (!parseExpr(Imm&: Stream.Val))
7642	return false;
7643	}
7644	}
7645
7646	return skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis");
7647	}
7648
7649	bool
7650	AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7651	const OperandInfoTy &Op,
7652	const OperandInfoTy &Stream) {
7653	using namespace llvm::AMDGPU::SendMsg;
7654
7655	// Validation strictness depends on whether message is specified
7656	// in a symbolic or in a numeric form. In the latter case
7657	// only encoding possibility is checked.
7658	bool Strict = Msg.IsSymbolic;
7659
7660	if (Strict) {
7661	if (Msg.Val == OPR_ID_UNSUPPORTED) {
7662	Error(L: Msg.Loc, Msg: "specified message id is not supported on this GPU");
7663	return false;
7664	}
7665	} else {
7666	if (!isValidMsgId(MsgId: Msg.Val, STI: getSTI())) {
7667	Error(L: Msg.Loc, Msg: "invalid message id");
7668	return false;
7669	}
7670	}
7671	if (Strict && (msgRequiresOp(MsgId: Msg.Val, STI: getSTI()) != Op.IsDefined)) {
7672	if (Op.IsDefined) {
7673	Error(L: Op.Loc, Msg: "message does not support operations");
7674	} else {
7675	Error(L: Msg.Loc, Msg: "missing message operation");
7676	}
7677	return false;
7678	}
7679	if (!isValidMsgOp(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI(), Strict)) {
7680	if (Op.Val == OPR_ID_UNSUPPORTED)
7681	Error(L: Op.Loc, Msg: "specified operation id is not supported on this GPU");
7682	else
7683	Error(L: Op.Loc, Msg: "invalid operation id");
7684	return false;
7685	}
7686	if (Strict && !msgSupportsStream(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI()) &&
7687	Stream.IsDefined) {
7688	Error(L: Stream.Loc, Msg: "message operation does not support streams");
7689	return false;
7690	}
7691	if (!isValidMsgStream(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val, STI: getSTI(), Strict)) {
7692	Error(L: Stream.Loc, Msg: "invalid message stream id");
7693	return false;
7694	}
7695	return true;
7696	}
7697
7698	ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7699	using namespace llvm::AMDGPU::SendMsg;
7700
7701	int64_t ImmVal = `0`;
7702	SMLoc Loc = getLoc();
7703
7704	if (trySkipId(Id: "sendmsg", Kind: AsmToken::LParen)) {
7705	OperandInfoTy Msg(OPR_ID_UNKNOWN);
7706	OperandInfoTy Op(OP_NONE_);
7707	OperandInfoTy Stream(STREAM_ID_NONE_);
7708	if (parseSendMsgBody(Msg, Op, Stream) &&
7709	validateSendMsg(Msg, Op, Stream)) {
7710	ImmVal = encodeMsg(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val);
7711	} else {
7712	return ParseStatus::Failure;
7713	}
7714	} else if (parseExpr(Imm&: ImmVal, Expected: "a sendmsg macro")) {
7715	if (ImmVal < `0` \|\| !isUInt<`16`>(x: ImmVal))
7716	return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
7717	} else {
7718	return ParseStatus::Failure;
7719	}
7720
7721	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTySendMsg));
7722	return ParseStatus::Success;
7723	}
7724
7725	bool AMDGPUOperand::isSendMsg() const {
7726	return isImmTy(ImmT: ImmTySendMsg);
7727	}
7728
7729	//===----------------------------------------------------------------------===//
7730	// v_interp
7731	//===----------------------------------------------------------------------===//
7732
7733	ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7734	StringRef Str;
7735	SMLoc S = getLoc();
7736
7737	if (!parseId(Val&: Str))
7738	return ParseStatus::NoMatch;
7739
7740	int Slot = StringSwitch<int>(Str)
7741	.Case(S: "p10", Value: `0`)
7742	.Case(S: "p20", Value: `1`)
7743	.Case(S: "p0", Value: `2`)
7744	.Default(Value: -`1`);
7745
7746	if (Slot == -`1`)
7747	return Error(L: S, Msg: "invalid interpolation slot");
7748
7749	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Slot, Loc: S,
7750	Type: AMDGPUOperand::ImmTyInterpSlot));
7751	return ParseStatus::Success;
7752	}
7753
7754	ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7755	StringRef Str;
7756	SMLoc S = getLoc();
7757
7758	if (!parseId(Val&: Str))
7759	return ParseStatus::NoMatch;
7760
7761	if (!Str.starts_with(Prefix: "attr"))
7762	return Error(L: S, Msg: "invalid interpolation attribute");
7763
7764	StringRef Chan = Str.take_back(N: `2`);
7765	int AttrChan = StringSwitch<int>(Chan)
7766	.Case(S: ".x", Value: `0`)
7767	.Case(S: ".y", Value: `1`)
7768	.Case(S: ".z", Value: `2`)
7769	.Case(S: ".w", Value: `3`)
7770	.Default(Value: -`1`);
7771	if (AttrChan == -`1`)
7772	return Error(L: S, Msg: "invalid or missing interpolation attribute channel");
7773
7774	Str = Str.drop_back(N: `2`).drop_front(N: `4`);
7775
7776	uint8_t Attr;
7777	if (Str.getAsInteger(Radix: `10`, Result&: Attr))
7778	return Error(L: S, Msg: "invalid or missing interpolation attribute number");
7779
7780	if (Attr > `32`)
7781	return Error(L: S, Msg: "out of bounds interpolation attribute number");
7782
7783	SMLoc SChan = SMLoc::getFromPointer(Ptr: Chan.data());
7784
7785	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Attr, Loc: S,
7786	Type: AMDGPUOperand::ImmTyInterpAttr));
7787	Operands.push_back(Elt: AMDGPUOperand::CreateImm(
7788	AsmParser: this, Val: AttrChan, Loc: SChan, Type: AMDGPUOperand::ImmTyInterpAttrChan));
7789	return ParseStatus::Success;
7790	}
7791
7792	//===----------------------------------------------------------------------===//
7793	// exp
7794	//===----------------------------------------------------------------------===//
7795
7796	ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7797	using namespace llvm::AMDGPU::Exp;
7798
7799	StringRef Str;
7800	SMLoc S = getLoc();
7801
7802	if (!parseId(Val&: Str))
7803	return ParseStatus::NoMatch;
7804
7805	unsigned Id = getTgtId(Name: Str);
7806	if (Id == ET_INVALID \|\| !isSupportedTgtId(Id, STI: getSTI()))
7807	return Error(L: S, Msg: (Id == ET_INVALID)
7808	? "invalid exp target"
7809	: "exp target is not supported on this GPU");
7810
7811	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Id, Loc: S,
7812	Type: AMDGPUOperand::ImmTyExpTgt));
7813	return ParseStatus::Success;
7814	}
7815
7816	//===----------------------------------------------------------------------===//
7817	// parser helpers
7818	//===----------------------------------------------------------------------===//
7819
7820	bool
7821	AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7822	return Token.is(K: AsmToken::Identifier) && Token.getString() == Id;
7823	}
7824
7825	bool
7826	AMDGPUAsmParser::isId(const StringRef Id) const {
7827	return isId(Token: getToken(), Id);
7828	}
7829
7830	bool
7831	AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7832	return getTokenKind() == Kind;
7833	}
7834
7835	StringRef AMDGPUAsmParser::getId() const {
7836	return isToken(Kind: AsmToken::Identifier) ? getTokenStr() : StringRef ();
7837	}
7838
7839	bool
7840	AMDGPUAsmParser::trySkipId(const StringRef Id) {
7841	if (isId(Id)) {
7842	lex();
7843	return true;
7844	}
7845	return false;
7846	}
7847
7848	bool
7849	AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7850	if (isToken(Kind: AsmToken::Identifier)) {
7851	StringRef Tok = getTokenStr();
7852	if (Tok.starts_with(Prefix: Pref) && Tok.drop_front(N: Pref.size()) == Id) {
7853	lex();
7854	return true;
7855	}
7856	}
7857	return false;
7858	}
7859
7860	bool
7861	AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7862	if (isId(Id) && peekToken().is(K: Kind)) {
7863	lex();
7864	lex();
7865	return true;
7866	}
7867	return false;
7868	}
7869
7870	bool
7871	AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7872	if (isToken(Kind)) {
7873	lex();
7874	return true;
7875	}
7876	return false;
7877	}
7878
7879	bool
7880	AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7881	const StringRef ErrMsg) {
7882	if (!trySkipToken(Kind)) {
7883	Error(L: getLoc(), Msg: ErrMsg);
7884	return false;
7885	}
7886	return true;
7887	}
7888
7889	bool
7890	AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7891	SMLoc S = getLoc();
7892
7893	const MCExpr *Expr;
7894	if (Parser.parseExpression(Res&: Expr))
7895	return false;
7896
7897	if (Expr->evaluateAsAbsolute(Res&: Imm))
7898	return true;
7899
7900	if (Expected.empty()) {
7901	Error(L: S, Msg: "expected absolute expression");
7902	} else {
7903	Error(L: S, Msg: Twine ("expected ", Expected) +
7904	Twine (" or an absolute expression"));
7905	}
7906	return false;
7907	}
7908
7909	bool
7910	AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7911	SMLoc S = getLoc();
7912
7913	const MCExpr *Expr;
7914	if (Parser.parseExpression(Res&: Expr))
7915	return false;
7916
7917	int64_t IntVal;
7918	if (Expr->evaluateAsAbsolute(Res&: IntVal)) {
7919	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S));
7920	} else {
7921	Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S));
7922	}
7923	return true;
7924	}
7925
7926	bool
7927	AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7928	if (isToken(Kind: AsmToken::String)) {
7929	Val = getToken().getStringContents();
7930	lex();
7931	return true;
7932	}
7933	Error(L: getLoc(), Msg: ErrMsg);
7934	return false;
7935	}
7936
7937	bool
7938	AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7939	if (isToken(Kind: AsmToken::Identifier)) {
7940	Val = getTokenStr();
7941	lex();
7942	return true;
7943	}
7944	if (!ErrMsg.empty())
7945	Error(L: getLoc(), Msg: ErrMsg);
7946	return false;
7947	}
7948
7949	AsmToken
7950	AMDGPUAsmParser::getToken() const {
7951	return Parser.getTok();
7952	}
7953
7954	AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7955	return isToken(Kind: AsmToken::EndOfStatement)
7956	? getToken()
7957	: getLexer().peekTok(ShouldSkipSpace);
7958	}
7959
7960	void
7961	AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7962	auto TokCount = getLexer().peekTokens(Buf: Tokens);
7963
7964	for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7965	Tokens [Idx] = AsmToken (AsmToken::Error, "");
7966	}
7967
7968	AsmToken::TokenKind
7969	AMDGPUAsmParser::getTokenKind() const {
7970	return getLexer().getKind();
7971	}
7972
7973	SMLoc
7974	AMDGPUAsmParser::getLoc() const {
7975	return getToken().getLoc();
7976	}
7977
7978	StringRef
7979	AMDGPUAsmParser::getTokenStr() const {
7980	return getToken().getString();
7981	}
7982
7983	void
7984	AMDGPUAsmParser::lex() {
7985	Parser.Lex();
7986	}
7987
7988	SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7989	return ((AMDGPUOperand &)*Operands [`0`]).getStartLoc();
7990	}
7991
7992	SMLoc
7993	AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7994	const OperandVector &Operands) const {
7995	for (unsigned i = Operands.size() - `1`; i > `0`; --i) {
7996	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
7997	if (Test (Op))
7998	return Op.getStartLoc();
7999	}
8000	return getInstLoc(Operands);
8001	}
8002
8003	SMLoc
8004	AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8005	const OperandVector &Operands) const {
8006	auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(ImmT: Type); };
8007	return getOperandLoc(Test, Operands);
8008	}
8009
8010	SMLoc AMDGPUAsmParser::getRegLoc(MCRegister Reg,
8011	const OperandVector &Operands) const {
8012	auto Test = [=](const AMDGPUOperand& Op) {
8013	return Op.isRegKind() && Op.getReg() == Reg;
8014	};
8015	return getOperandLoc(Test, Operands);
8016	}
8017
8018	SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
8019	bool SearchMandatoryLiterals) const {
8020	auto Test = [](const AMDGPUOperand& Op) {
8021	return Op.IsImmKindLiteral() \|\| Op.isExpr();
8022	};
8023	SMLoc Loc = getOperandLoc(Test, Operands);
8024	if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
8025	Loc = getMandatoryLitLoc(Operands);
8026	return Loc;
8027	}
8028
8029	SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
8030	auto Test = [](const AMDGPUOperand &Op) {
8031	return Op.IsImmKindMandatoryLiteral();
8032	};
8033	return getOperandLoc(Test, Operands);
8034	}
8035
8036	SMLoc
8037	AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
8038	auto Test = [](const AMDGPUOperand& Op) {
8039	return Op.isImmKindConst();
8040	};
8041	return getOperandLoc(Test, Operands);
8042	}
8043
8044	ParseStatus
8045	AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8046	if (!trySkipToken(Kind: AsmToken::LCurly))
8047	return ParseStatus::NoMatch;
8048
8049	bool First = true;
8050	while (!trySkipToken(Kind: AsmToken::RCurly)) {
8051	if (!First &&
8052	!skipToken(Kind: AsmToken::Comma, ErrMsg: "comma or closing brace expected"))
8053	return ParseStatus::Failure;
8054
8055	StringRef Id = getTokenStr();
8056	SMLoc IdLoc = getLoc();
8057	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "field name expected") \|\|
8058	!skipToken(Kind: AsmToken::Colon, ErrMsg: "colon expected"))
8059	return ParseStatus::Failure;
8060
8061	const auto *I =
8062	find_if(Range&: Fields, P: [Id](StructuredOpField F) { return* F->Id == Id; });
8063	if (I == Fields.end())
8064	return Error(L: IdLoc, Msg: "unknown field");
8065	if ((*I)->IsDefined)
8066	return Error(L: IdLoc, Msg: "duplicate field");
8067
8068	// TODO: Support symbolic values.
8069	(*I)->Loc = getLoc();
8070	if (!parseExpr(Imm&: (*I)->Val))
8071	return ParseStatus::Failure;
8072	(I)->IsDefined = true*;
8073
8074	First = false;
8075	}
8076	return ParseStatus::Success;
8077	}
8078
8079	bool AMDGPUAsmParser::validateStructuredOpFields(
8080	ArrayRef<const StructuredOpField *> Fields) {
8081	return all_of(Range&: Fields, P: [this](const StructuredOpField *F) {
8082	return F->validate(Parser&: *this);
8083	});
8084	}
8085
8086	//===----------------------------------------------------------------------===//
8087	// swizzle
8088	//===----------------------------------------------------------------------===//
8089
8090	LLVM_READNONE
8091	static unsigned
8092	encodeBitmaskPerm(const unsigned AndMask,
8093	const unsigned OrMask,
8094	const unsigned XorMask) {
8095	using namespace llvm::AMDGPU::Swizzle;
8096
8097	return BITMASK_PERM_ENC \|
8098	(AndMask << BITMASK_AND_SHIFT) \|
8099	(OrMask << BITMASK_OR_SHIFT) \|
8100	(XorMask << BITMASK_XOR_SHIFT);
8101	}
8102
8103	bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8104	const unsigned MaxVal,
8105	const Twine &ErrMsg, SMLoc &Loc) {
8106	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma")) {
8107	return false;
8108	}
8109	Loc = getLoc();
8110	if (!parseExpr(Imm&: Op)) {
8111	return false;
8112	}
8113	if (Op < MinVal \|\| Op > MaxVal) {
8114	Error(L: Loc, Msg: ErrMsg);
8115	return false;
8116	}
8117
8118	return true;
8119	}
8120
8121	bool
8122	AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8123	const unsigned MinVal,
8124	const unsigned MaxVal,
8125	const StringRef ErrMsg) {
8126	SMLoc Loc;
8127	for (unsigned i = `0`; i < OpNum; ++i) {
8128	if (!parseSwizzleOperand(Op&: Op[i], MinVal, MaxVal, ErrMsg, Loc))
8129	return false;
8130	}
8131
8132	return true;
8133	}
8134
8135	bool
8136	AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8137	using namespace llvm::AMDGPU::Swizzle;
8138
8139	int64_t Lane[LANE_NUM];
8140	if (parseSwizzleOperands(OpNum: LANE_NUM, Op: Lane, MinVal: `0`, MaxVal: LANE_MAX,
8141	ErrMsg: "expected a 2-bit lane id")) {
8142	Imm = QUAD_PERM_ENC;
8143	for (unsigned I = `0`; I < LANE_NUM; ++I) {
8144	Imm \|= Lane[I] << (LANE_SHIFT * I);
8145	}
8146	return true;
8147	}
8148	return false;
8149	}
8150
8151	bool
8152	AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8153	using namespace llvm::AMDGPU::Swizzle;
8154
8155	SMLoc Loc;
8156	int64_t GroupSize;
8157	int64_t LaneIdx;
8158
8159	if (!parseSwizzleOperand(Op&: GroupSize,
8160	MinVal: `2`, MaxVal: `32`,
8161	ErrMsg: "group size must be in the interval [2,32]",
8162	Loc)) {
8163	return false;
8164	}
8165	if (!isPowerOf2_64(Value: GroupSize)) {
8166	Error(L: Loc, Msg: "group size must be a power of two");
8167	return false;
8168	}
8169	if (parseSwizzleOperand(Op&: LaneIdx,
8170	MinVal: `0`, MaxVal: GroupSize - `1`,
8171	ErrMsg: "lane id must be in the interval [0,group size - 1]",
8172	Loc)) {
8173	Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX - GroupSize + `1`, OrMask: LaneIdx, XorMask: `0`);
8174	return true;
8175	}
8176	return false;
8177	}
8178
8179	bool
8180	AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8181	using namespace llvm::AMDGPU::Swizzle;
8182
8183	SMLoc Loc;
8184	int64_t GroupSize;
8185
8186	if (!parseSwizzleOperand(Op&: GroupSize,
8187	MinVal: `2`, MaxVal: `32`,
8188	ErrMsg: "group size must be in the interval [2,32]",
8189	Loc)) {
8190	return false;
8191	}
8192	if (!isPowerOf2_64(Value: GroupSize)) {
8193	Error(L: Loc, Msg: "group size must be a power of two");
8194	return false;
8195	}
8196
8197	Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: `0`, XorMask: GroupSize - `1`);
8198	return true;
8199	}
8200
8201	bool
8202	AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8203	using namespace llvm::AMDGPU::Swizzle;
8204
8205	SMLoc Loc;
8206	int64_t GroupSize;
8207
8208	if (!parseSwizzleOperand(Op&: GroupSize,
8209	MinVal: `1`, MaxVal: `16`,
8210	ErrMsg: "group size must be in the interval [1,16]",
8211	Loc)) {
8212	return false;
8213	}
8214	if (!isPowerOf2_64(Value: GroupSize)) {
8215	Error(L: Loc, Msg: "group size must be a power of two");
8216	return false;
8217	}
8218
8219	Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: `0`, XorMask: GroupSize);
8220	return true;
8221	}
8222
8223	bool
8224	AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8225	using namespace llvm::AMDGPU::Swizzle;
8226
8227	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma")) {
8228	return false;
8229	}
8230
8231	StringRef Ctl;
8232	SMLoc StrLoc = getLoc();
8233	if (!parseString(Val&: Ctl)) {
8234	return false;
8235	}
8236	if (Ctl.size() != BITMASK_WIDTH) {
8237	Error(L: StrLoc, Msg: "expected a 5-character mask");
8238	return false;
8239	}
8240
8241	unsigned AndMask = `0`;
8242	unsigned OrMask = `0`;
8243	unsigned XorMask = `0`;
8244
8245	for (size_t i = `0`; i < Ctl.size(); ++i) {
8246	unsigned Mask = `1` << (BITMASK_WIDTH - `1` - i);
8247	switch(Ctl [i]) {
8248	default:
8249	Error(L: StrLoc, Msg: "invalid mask");
8250	return false;
8251	case `'0'`:
8252	break;
8253	case `'1'`:
8254	OrMask \|= Mask;
8255	break;
8256	case `'p'`:
8257	AndMask \|= Mask;
8258	break;
8259	case `'i'`:
8260	AndMask \|= Mask;
8261	XorMask \|= Mask;
8262	break;
8263	}
8264	}
8265
8266	Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8267	return true;
8268	}
8269
8270	bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8271	using namespace llvm::AMDGPU::Swizzle;
8272
8273	if (!AMDGPU::isGFX9Plus(STI: getSTI())) {
8274	Error(L: getLoc(), Msg: "FFT mode swizzle not supported on this GPU");
8275	return false;
8276	}
8277
8278	int64_t Swizzle;
8279	SMLoc Loc;
8280	if (!parseSwizzleOperand(Op&: Swizzle, MinVal: `0`, MaxVal: FFT_SWIZZLE_MAX,
8281	ErrMsg: "FFT swizzle must be in the interval [0," +
8282	Twine (FFT_SWIZZLE_MAX) + Twine (`']'`),
8283	Loc))
8284	return false;
8285
8286	Imm = FFT_MODE_ENC \| Swizzle;
8287	return true;
8288	}
8289
8290	bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8291	using namespace llvm::AMDGPU::Swizzle;
8292
8293	if (!AMDGPU::isGFX9Plus(STI: getSTI())) {
8294	Error(L: getLoc(), Msg: "Rotate mode swizzle not supported on this GPU");
8295	return false;
8296	}
8297
8298	SMLoc Loc;
8299	int64_t Direction;
8300
8301	if (!parseSwizzleOperand(Op&: Direction, MinVal: `0`, MaxVal: `1`,
8302	ErrMsg: "direction must be 0 (left) or 1 (right)", Loc))
8303	return false;
8304
8305	int64_t RotateSize;
8306	if (!parseSwizzleOperand(
8307	Op&: RotateSize, MinVal: `0`, MaxVal: ROTATE_MAX_SIZE,
8308	ErrMsg: "number of threads to rotate must be in the interval [0," +
8309	Twine (ROTATE_MAX_SIZE) + Twine (`']'`),
8310	Loc))
8311	return false;
8312
8313	Imm = ROTATE_MODE_ENC \| (Direction << ROTATE_DIR_SHIFT) \|
8314	(RotateSize << ROTATE_SIZE_SHIFT);
8315	return true;
8316	}
8317
8318	bool
8319	AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8320
8321	SMLoc OffsetLoc = getLoc();
8322
8323	if (!parseExpr(Imm, Expected: "a swizzle macro")) {
8324	return false;
8325	}
8326	if (!isUInt<`16`>(x: Imm)) {
8327	Error(L: OffsetLoc, Msg: "expected a 16-bit offset");
8328	return false;
8329	}
8330	return true;
8331	}
8332
8333	bool
8334	AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8335	using namespace llvm::AMDGPU::Swizzle;
8336
8337	if (skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parentheses")) {
8338
8339	SMLoc ModeLoc = getLoc();
8340	bool Ok = false;
8341
8342	if (trySkipId(Id: IdSymbolic[ID_QUAD_PERM])) {
8343	Ok = parseSwizzleQuadPerm(Imm);
8344	} else if (trySkipId(Id: IdSymbolic[ID_BITMASK_PERM])) {
8345	Ok = parseSwizzleBitmaskPerm(Imm);
8346	} else if (trySkipId(Id: IdSymbolic[ID_BROADCAST])) {
8347	Ok = parseSwizzleBroadcast(Imm);
8348	} else if (trySkipId(Id: IdSymbolic[ID_SWAP])) {
8349	Ok = parseSwizzleSwap(Imm);
8350	} else if (trySkipId(Id: IdSymbolic[ID_REVERSE])) {
8351	Ok = parseSwizzleReverse(Imm);
8352	} else if (trySkipId(Id: IdSymbolic[ID_FFT])) {
8353	Ok = parseSwizzleFFT(Imm);
8354	} else if (trySkipId(Id: IdSymbolic[ID_ROTATE])) {
8355	Ok = parseSwizzleRotate(Imm);
8356	} else {
8357	Error(L: ModeLoc, Msg: "expected a swizzle mode");
8358	}
8359
8360	return Ok && skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parentheses");
8361	}
8362
8363	return false;
8364	}
8365
8366	ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8367	SMLoc S = getLoc();
8368	int64_t Imm = `0`;
8369
8370	if (trySkipId(Id: "offset")) {
8371
8372	bool Ok = false;
8373	if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon")) {
8374	if (trySkipId(Id: "swizzle")) {
8375	Ok = parseSwizzleMacro(Imm);
8376	} else {
8377	Ok = parseSwizzleOffset(Imm);
8378	}
8379	}
8380
8381	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTySwizzle));
8382
8383	return Ok ? ParseStatus::Success : ParseStatus::Failure;
8384	}
8385	return ParseStatus::NoMatch;
8386	}
8387
8388	bool
8389	AMDGPUOperand::isSwizzle() const {
8390	return isImmTy(ImmT: ImmTySwizzle);
8391	}
8392
8393	//===----------------------------------------------------------------------===//
8394	// VGPR Index Mode
8395	//===----------------------------------------------------------------------===//
8396
8397	int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8398
8399	using namespace llvm::AMDGPU::VGPRIndexMode;
8400
8401	if (trySkipToken(Kind: AsmToken::RParen)) {
8402	return OFF;
8403	}
8404
8405	int64_t Imm = `0`;
8406
8407	while (true) {
8408	unsigned Mode = `0`;
8409	SMLoc S = getLoc();
8410
8411	for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8412	if (trySkipId(Id: IdSymbolic[ModeId])) {
8413	Mode = `1` << ModeId;
8414	break;
8415	}
8416	}
8417
8418	if (Mode == `0`) {
8419	Error(L: S, Msg: (Imm == `0`)?
8420	"expected a VGPR index mode or a closing parenthesis" :
8421	"expected a VGPR index mode");
8422	return UNDEF;
8423	}
8424
8425	if (Imm & Mode) {
8426	Error(L: S, Msg: "duplicate VGPR index mode");
8427	return UNDEF;
8428	}
8429	Imm \|= Mode;
8430
8431	if (trySkipToken(Kind: AsmToken::RParen))
8432	break;
8433	if (!skipToken(Kind: AsmToken::Comma,
8434	ErrMsg: "expected a comma or a closing parenthesis"))
8435	return UNDEF;
8436	}
8437
8438	return Imm;
8439	}
8440
8441	ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8442
8443	using namespace llvm::AMDGPU::VGPRIndexMode;
8444
8445	int64_t Imm = `0`;
8446	SMLoc S = getLoc();
8447
8448	if (trySkipId(Id: "gpr_idx", Kind: AsmToken::LParen)) {
8449	Imm = parseGPRIdxMacro();
8450	if (Imm == UNDEF)
8451	return ParseStatus::Failure;
8452	} else {
8453	if (getParser().parseAbsoluteExpression(Res&: Imm))
8454	return ParseStatus::Failure;
8455	if (Imm < `0` \|\| !isUInt<`4`>(x: Imm))
8456	return Error(L: S, Msg: "invalid immediate: only 4-bit values are legal");
8457	}
8458
8459	Operands.push_back(
8460	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyGprIdxMode));
8461	return ParseStatus::Success;
8462	}
8463
8464	bool AMDGPUOperand::isGPRIdxMode() const {
8465	return isImmTy(ImmT: ImmTyGprIdxMode);
8466	}
8467
8468	//===----------------------------------------------------------------------===//
8469	// sopp branch targets
8470	//===----------------------------------------------------------------------===//
8471
8472	ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
8473
8474	// Make sure we are not parsing something
8475	// that looks like a label or an expression but is not.
8476	// This will improve error messages.
8477	if (isRegister() \|\| isModifier())
8478	return ParseStatus::NoMatch;
8479
8480	if (!parseExpr(Operands))
8481	return ParseStatus::Failure;
8482
8483	AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands [Operands.size() - `1`]);
8484	assert(Opr.isImm() \|\| Opr.isExpr());
8485	SMLoc Loc = Opr.getStartLoc();
8486
8487	// Currently we do not support arbitrary expressions as branch targets.
8488	// Only labels and absolute expressions are accepted.
8489	if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
8490	Error(L: Loc, Msg: "expected an absolute expression or a label");
8491	} else if (Opr.isImm() && !Opr.isS16Imm()) {
8492	Error(L: Loc, Msg: "expected a 16-bit signed jump offset");
8493	}
8494
8495	return ParseStatus::Success;
8496	}
8497
8498	//===----------------------------------------------------------------------===//
8499	// Boolean holding registers
8500	//===----------------------------------------------------------------------===//
8501
8502	ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8503	return parseReg(Operands);
8504	}
8505
8506	//===----------------------------------------------------------------------===//
8507	// mubuf
8508	//===----------------------------------------------------------------------===//
8509
8510	void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8511	const OperandVector &Operands,
8512	bool IsAtomic) {
8513	OptionalImmIndexMap OptionalIdx;
8514	unsigned FirstOperandIdx = `1`;
8515	bool IsAtomicReturn = false;
8516
8517	if (IsAtomic) {
8518	IsAtomicReturn = MII.get(Opcode: Inst.getOpcode()).TSFlags &
8519	SIInstrFlags::IsAtomicRet;
8520	}
8521
8522	for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8523	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
8524
8525	// Add the register arguments
8526	if (Op.isReg()) {
8527	Op.addRegOperands(Inst, N: `1`);
8528	// Insert a tied src for atomic return dst.
8529	// This cannot be postponed as subsequent calls to
8530	// addImmOperands rely on correct number of MC operands.
8531	if (IsAtomicReturn && i == FirstOperandIdx)
8532	Op.addRegOperands(Inst, N: `1`);
8533	continue;
8534	}
8535
8536	// Handle the case where soffset is an immediate
8537	if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8538	Op.addImmOperands(Inst, N: `1`);
8539	continue;
8540	}
8541
8542	// Handle tokens like 'offen' which are sometimes hard-coded into the
8543	// asm string. There are no MCInst operands for these.
8544	if (Op.isToken()) {
8545	continue;
8546	}
8547	assert(Op.isImm());
8548
8549	// Handle optional arguments
8550	OptionalIdx [Op.getImmTy()] = i;
8551	}
8552
8553	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOffset);
8554	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyCPol, Default: `0`);
8555	}
8556
8557	//===----------------------------------------------------------------------===//
8558	// smrd
8559	//===----------------------------------------------------------------------===//
8560
8561	bool AMDGPUOperand::isSMRDOffset8() const {
8562	return isImmLiteral() && isUInt<`8`>(x: getImm());
8563	}
8564
8565	bool AMDGPUOperand::isSMEMOffset() const {
8566	// Offset range is checked later by validator.
8567	return isImmLiteral();
8568	}
8569
8570	bool AMDGPUOperand::isSMRDLiteralOffset() const {
8571	// 32-bit literals are only supported on CI and we only want to use them
8572	// when the offset is > 8-bits.
8573	return isImmLiteral() && !isUInt<`8`>(x: getImm()) && isUInt<`32`>(x: getImm());
8574	}
8575
8576	//===----------------------------------------------------------------------===//
8577	// vop3
8578	//===----------------------------------------------------------------------===//
8579
8580	static bool ConvertOmodMul(int64_t &Mul) {
8581	if (Mul != `1` && Mul != `2` && Mul != `4`)
8582	return false;
8583
8584	Mul >>= `1`;
8585	return true;
8586	}
8587
8588	static bool ConvertOmodDiv(int64_t &Div) {
8589	if (Div == `1`) {
8590	Div = `0`;
8591	return true;
8592	}
8593
8594	if (Div == `2`) {
8595	Div = `3`;
8596	return true;
8597	}
8598
8599	return false;
8600	}
8601
8602	// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8603	// This is intentional and ensures compatibility with sp3.
8604	// See bug 35397 for details.
8605	bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8606	if (BoundCtrl == `0` \|\| BoundCtrl == `1`) {
8607	if (!isGFX11Plus())
8608	BoundCtrl = `1`;
8609	return true;
8610	}
8611	return false;
8612	}
8613
8614	void AMDGPUAsmParser::onBeginOfFile() {
8615	if (!getParser().getStreamer().getTargetStreamer() \|\|
8616	getSTI().getTargetTriple().getArch() == Triple::r600)
8617	return;
8618
8619	if (!getTargetStreamer().getTargetID())
8620	getTargetStreamer().initializeTargetID(STI: getSTI(),
8621	FeatureString: getSTI().getFeatureString());
8622
8623	if (isHsaAbi(STI: getSTI()))
8624	getTargetStreamer().EmitDirectiveAMDGCNTarget();
8625	}
8626
8627	/// Parse AMDGPU specific expressions.
8628	///
8629	/// expr ::= or(expr, ...) \|
8630	/// max(expr, ...)
8631	///
8632	bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
8633	using AGVK = AMDGPUMCExpr::VariantKind;
8634
8635	if (isToken(Kind: AsmToken::Identifier)) {
8636	StringRef TokenId = getTokenStr();
8637	AGVK VK = StringSwitch<AGVK>(TokenId)
8638	.Case(S: "max", Value: AGVK::AGVK_Max)
8639	.Case(S: "or", Value: AGVK::AGVK_Or)
8640	.Case(S: "extrasgprs", Value: AGVK::AGVK_ExtraSGPRs)
8641	.Case(S: "totalnumvgprs", Value: AGVK::AGVK_TotalNumVGPRs)
8642	.Case(S: "alignto", Value: AGVK::AGVK_AlignTo)
8643	.Case(S: "occupancy", Value: AGVK::AGVK_Occupancy)
8644	.Default(Value: AGVK::AGVK_None);
8645
8646	if (VK != AGVK::AGVK_None && peekToken().is(K: AsmToken::LParen)) {
8647	SmallVector<const MCExpr *, `4`> Exprs;
8648	uint64_t CommaCount = `0`;
8649	lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
8650	lex(); // Eat '('
8651	while (true) {
8652	if (trySkipToken(Kind: AsmToken::RParen)) {
8653	if (Exprs.empty()) {
8654	Error(L: getToken().getLoc(),
8655	Msg: "empty " + Twine (TokenId) + " expression");
8656	return true;
8657	}
8658	if (CommaCount + `1` != Exprs.size()) {
8659	Error(L: getToken().getLoc(),
8660	Msg: "mismatch of commas in " + Twine (TokenId) + " expression");
8661	return true;
8662	}
8663	Res = AMDGPUMCExpr::create(Kind: VK, Args: Exprs, Ctx&: getContext());
8664	return false;
8665	}
8666	const MCExpr *Expr;
8667	if (getParser().parseExpression(Res&: Expr, EndLoc))
8668	return true;
8669	Exprs.push_back(Elt: Expr);
8670	bool LastTokenWasComma = trySkipToken(Kind: AsmToken::Comma);
8671	if (LastTokenWasComma)
8672	CommaCount++;
8673	if (!LastTokenWasComma && !isToken(Kind: AsmToken::RParen)) {
8674	Error(L: getToken().getLoc(),
8675	Msg: "unexpected token in " + Twine (TokenId) + " expression");
8676	return true;
8677	}
8678	}
8679	}
8680	}
8681	return getParser().parsePrimaryExpr(Res, EndLoc, TypeInfo: nullptr);
8682	}
8683
8684	ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8685	StringRef Name = getTokenStr();
8686	if (Name == "mul") {
8687	return parseIntWithPrefix(Prefix: "mul", Operands,
8688	ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodMul);
8689	}
8690
8691	if (Name == "div") {
8692	return parseIntWithPrefix(Prefix: "div", Operands,
8693	ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodDiv);
8694	}
8695
8696	return ParseStatus::NoMatch;
8697	}
8698
8699	// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8700	// the number of src operands present, then copies that bit into src0_modifiers.
8701	static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
8702	int Opc = Inst.getOpcode();
8703	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
8704	if (OpSelIdx == -`1`)
8705	return;
8706
8707	int SrcNum;
8708	const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
8709	AMDGPU::OpName::src2};
8710	for (SrcNum = `0`; SrcNum < `3` && AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: Ops[SrcNum]);
8711	++SrcNum)
8712	;
8713	assert(SrcNum > `0`);
8714
8715	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
8716
8717	int DstIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst);
8718	if (DstIdx == -`1`)
8719	return;
8720
8721	const MCOperand &DstOp = Inst.getOperand(i: DstIdx);
8722	int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0_modifiers);
8723	uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm();
8724	if (DstOp.isReg() &&
8725	MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: DstOp.getReg())) {
8726	if (AMDGPU::isHi16Reg(Reg: DstOp.getReg(), MRI))
8727	ModVal \|= SISrcMods::DST_OP_SEL;
8728	} else {
8729	if ((OpSel & (`1` << SrcNum)) != `0`)
8730	ModVal \|= SISrcMods::DST_OP_SEL;
8731	}
8732	Inst.getOperand(i: ModIdx).setImm(ModVal);
8733	}
8734
8735	void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8736	const OperandVector &Operands) {
8737	cvtVOP3P(Inst, Operands);
8738	cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI());
8739	}
8740
8741	void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8742	OptionalImmIndexMap &OptionalIdx) {
8743	cvtVOP3P(Inst, Operands, OptionalIdx);
8744	cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI());
8745	}
8746
8747	static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8748	return
8749	// 1. This operand is input modifiers
8750	Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8751	// 2. This is not last operand
8752	&& Desc.NumOperands > (OpNum + `1`)
8753	// 3. Next operand is register class
8754	&& Desc.operands()[OpNum + `1`].RegClass != -`1`
8755	// 4. Next register is not tied to any other operand
8756	&& Desc.getOperandConstraint(OpNum: OpNum + `1`,
8757	Constraint: MCOI::OperandConstraint::TIED_TO) == -`1`;
8758	}
8759
8760	void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8761	{
8762	OptionalImmIndexMap OptionalIdx;
8763	unsigned Opc = Inst.getOpcode();
8764
8765	unsigned I = `1`;
8766	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
8767	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
8768	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
8769	}
8770
8771	for (unsigned E = Operands.size(); I != E; ++I) {
8772	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
8773	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
8774	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
8775	} else if (Op.isInterpSlot() \|\| Op.isInterpAttr() \|\|
8776	Op.isInterpAttrChan()) {
8777	Inst.addOperand(Op: MCOperand::createImm(Val: Op.getImm()));
8778	} else if (Op.isImmModifier()) {
8779	OptionalIdx [Op.getImmTy()] = I;
8780	} else {
8781	llvm_unreachable("unhandled operand type");
8782	}
8783	}
8784
8785	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::high))
8786	addOptionalImmOperand(Inst, Operands, OptionalIdx,
8787	ImmT: AMDGPUOperand::ImmTyHigh);
8788
8789	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
8790	addOptionalImmOperand(Inst, Operands, OptionalIdx,
8791	ImmT: AMDGPUOperand::ImmTyClamp);
8792
8793	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
8794	addOptionalImmOperand(Inst, Operands, OptionalIdx,
8795	ImmT: AMDGPUOperand::ImmTyOModSI);
8796	}
8797
8798	void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8799	{
8800	OptionalImmIndexMap OptionalIdx;
8801	unsigned Opc = Inst.getOpcode();
8802
8803	unsigned I = `1`;
8804	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
8805	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
8806	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
8807	}
8808
8809	for (unsigned E = Operands.size(); I != E; ++I) {
8810	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
8811	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
8812	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
8813	} else if (Op.isImmModifier()) {
8814	OptionalIdx [Op.getImmTy()] = I;
8815	} else {
8816	llvm_unreachable("unhandled operand type");
8817	}
8818	}
8819
8820	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyClamp);
8821
8822	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
8823	if (OpSelIdx != -`1`)
8824	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
8825
8826	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyWaitEXP);
8827
8828	if (OpSelIdx == -`1`)
8829	return;
8830
8831	const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
8832	AMDGPU::OpName::src2};
8833	const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
8834	AMDGPU::OpName::src1_modifiers,
8835	AMDGPU::OpName::src2_modifiers};
8836
8837	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
8838
8839	for (int J = `0`; J < `3`; ++J) {
8840	int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: Ops[J]);
8841	if (OpIdx == -`1`)
8842	break;
8843
8844	int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
8845	uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm();
8846
8847	if ((OpSel & (`1` << J)) != `0`)
8848	ModVal \|= SISrcMods::OP_SEL_0;
8849	if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8850	(OpSel & (`1` << `3`)) != `0`)
8851	ModVal \|= SISrcMods::DST_OP_SEL;
8852
8853	Inst.getOperand(i: ModIdx).setImm(ModVal);
8854	}
8855	}
8856	void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
8857	const OperandVector &Operands) {
8858	OptionalImmIndexMap OptionalIdx;
8859	unsigned Opc = Inst.getOpcode();
8860	unsigned I = `1`;
8861	int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::cbsz);
8862
8863	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
8864
8865	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J)
8866	static_cast<AMDGPUOperand &>(*Operands [I++]).addRegOperands(Inst, N: `1`);
8867
8868	for (unsigned E = Operands.size(); I != E; ++I) {
8869	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands [I]);
8870	int NumOperands = Inst.getNumOperands();
8871	// The order of operands in MCInst and parsed operands are different.
8872	// Adding dummy cbsz and blgp operands at corresponding MCInst operand
8873	// indices for parsing scale values correctly.
8874	if (NumOperands == CbszOpIdx) {
8875	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
8876	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
8877	}
8878	if (isRegOrImmWithInputMods(Desc, OpNum: NumOperands)) {
8879	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
8880	} else if (Op.isImmModifier()) {
8881	OptionalIdx [Op.getImmTy()] = I;
8882	} else {
8883	Op.addRegOrImmOperands(Inst, N: `1`);
8884	}
8885	}
8886
8887	// Insert CBSZ and BLGP operands for F8F6F4 variants
8888	auto CbszIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyCBSZ);
8889	if (CbszIdx != OptionalIdx.end()) {
8890	int CbszVal = ((AMDGPUOperand &)*Operands [CbszIdx ->second]).getImm();
8891	Inst.getOperand(i: CbszOpIdx).setImm(CbszVal);
8892	}
8893
8894	int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
8895	auto BlgpIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyBLGP);
8896	if (BlgpIdx != OptionalIdx.end()) {
8897	int BlgpVal = ((AMDGPUOperand &)*Operands [BlgpIdx ->second]).getImm();
8898	Inst.getOperand(i: BlgpOpIdx).setImm(BlgpVal);
8899	}
8900
8901	// Add dummy src_modifiers
8902	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
8903	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
8904
8905	// Handle op_sel fields
8906
8907	unsigned OpSel = `0`;
8908	auto OpselIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyOpSel);
8909	if (OpselIdx != OptionalIdx.end()) {
8910	OpSel = static_cast<const AMDGPUOperand &>(*Operands [OpselIdx ->second])
8911	.getImm();
8912	}
8913
8914	unsigned OpSelHi = `0`;
8915	auto OpselHiIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyOpSelHi);
8916	if (OpselHiIdx != OptionalIdx.end()) {
8917	OpSelHi = static_cast<const AMDGPUOperand &>(*Operands [OpselHiIdx ->second])
8918	.getImm();
8919	}
8920	const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
8921	AMDGPU::OpName::src1_modifiers};
8922
8923	for (unsigned J = `0`; J < `2`; ++J) {
8924	unsigned ModVal = `0`;
8925	if (OpSel & (`1` << J))
8926	ModVal \|= SISrcMods::OP_SEL_0;
8927	if (OpSelHi & (`1` << J))
8928	ModVal \|= SISrcMods::OP_SEL_1;
8929
8930	const int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
8931	Inst.getOperand(i: ModIdx).setImm(ModVal);
8932	}
8933	}
8934
8935	void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8936	OptionalImmIndexMap &OptionalIdx) {
8937	unsigned Opc = Inst.getOpcode();
8938
8939	unsigned I = `1`;
8940	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
8941	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
8942	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
8943	}
8944
8945	for (unsigned E = Operands.size(); I != E; ++I) {
8946	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
8947	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
8948	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
8949	} else if (Op.isImmModifier()) {
8950	OptionalIdx [Op.getImmTy()] = I;
8951	} else {
8952	Op.addRegOrImmOperands(Inst, N: `1`);
8953	}
8954	}
8955
8956	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) {
8957	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in))
8958	Inst.addOperand(Op: Inst.getOperand(i: `0`));
8959	addOptionalImmOperand(Inst, Operands, OptionalIdx,
8960	ImmT: AMDGPUOperand::ImmTyByteSel);
8961	}
8962
8963	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
8964	addOptionalImmOperand(Inst, Operands, OptionalIdx,
8965	ImmT: AMDGPUOperand::ImmTyClamp);
8966
8967	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
8968	addOptionalImmOperand(Inst, Operands, OptionalIdx,
8969	ImmT: AMDGPUOperand::ImmTyOModSI);
8970
8971	// Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8972	// it has src2 register operand that is tied to dst operand
8973	// we don't allow modifiers for this operand in assembler so src2_modifiers
8974	// should be 0.
8975	if (isMAC(Opc)) {
8976	auto *it = Inst.begin();
8977	std::advance(i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2_modifiers));
8978	it = Inst.insert(I: it, Op: MCOperand::createImm(Val: `0`)); // no modifiers for src2
8979	++it;
8980	// Copy the operand to ensure it's not invalidated when Inst grows.
8981	Inst.insert(I: it, Op: MCOperand (Inst.getOperand(i: `0`))); // src2 = dst
8982	}
8983	}
8984
8985	void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8986	OptionalImmIndexMap OptionalIdx;
8987	cvtVOP3(Inst, Operands, OptionalIdx);
8988	}
8989
8990	void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8991	OptionalImmIndexMap &OptIdx) {
8992	const int Opc = Inst.getOpcode();
8993	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
8994
8995	const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != `0`;
8996
8997	if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi \|\|
8998	Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi \|\|
8999	Opc == AMDGPU::V_CVT_SR_BF8_F32_vi \|\|
9000	Opc == AMDGPU::V_CVT_SR_FP8_F32_vi \|\|
9001	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 \|\|
9002	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9003	Inst.addOperand(Op: MCOperand::createImm(Val: `0`)); // Placeholder for src2_mods
9004	Inst.addOperand(Op: Inst.getOperand(i: `0`));
9005	}
9006
9007	// Adding vdst_in operand is already covered for these DPP instructions in
9008	// cvtVOP3DPP.
9009	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in) &&
9010	!(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 \|\|
9011	Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 \|\|
9012	Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 \|\|
9013	Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 \|\|
9014	Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 \|\|
9015	Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 \|\|
9016	Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 \|\|
9017	Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 \|\|
9018	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 \|\|
9019	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 \|\|
9020	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 \|\|
9021	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) {
9022	Inst.addOperand(Op: Inst.getOperand(i: `0`));
9023	}
9024
9025	int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::bitop3);
9026	if (BitOp3Idx != -`1`) {
9027	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyBitOp3);
9028	}
9029
9030	// FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9031	// instruction, and then figure out where to actually put the modifiers
9032
9033	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9034	if (OpSelIdx != -`1`) {
9035	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
9036	}
9037
9038	int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
9039	if (OpSelHiIdx != -`1`) {
9040	int DefaultVal = IsPacked ? -`1` : `0`;
9041	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSelHi,
9042	Default: DefaultVal);
9043	}
9044
9045	int NegLoIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::neg_lo);
9046	if (NegLoIdx != -`1`)
9047	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegLo);
9048
9049	int NegHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::neg_hi);
9050	if (NegHiIdx != -`1`)
9051	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegHi);
9052
9053	const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9054	AMDGPU::OpName::src2};
9055	const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9056	AMDGPU::OpName::src1_modifiers,
9057	AMDGPU::OpName::src2_modifiers};
9058
9059	unsigned OpSel = `0`;
9060	unsigned OpSelHi = `0`;
9061	unsigned NegLo = `0`;
9062	unsigned NegHi = `0`;
9063
9064	if (OpSelIdx != -`1`)
9065	OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9066
9067	if (OpSelHiIdx != -`1`)
9068	OpSelHi = Inst.getOperand(i: OpSelHiIdx).getImm();
9069
9070	if (NegLoIdx != -`1`)
9071	NegLo = Inst.getOperand(i: NegLoIdx).getImm();
9072
9073	if (NegHiIdx != -`1`)
9074	NegHi = Inst.getOperand(i: NegHiIdx).getImm();
9075
9076	for (int J = `0`; J < `3`; ++J) {
9077	int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: Ops[J]);
9078	if (OpIdx == -`1`)
9079	break;
9080
9081	int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
9082
9083	if (ModIdx == -`1`)
9084	continue;
9085
9086	uint32_t ModVal = `0`;
9087
9088	const MCOperand &SrcOp = Inst.getOperand(i: OpIdx);
9089	if (SrcOp.isReg() && getMRI()
9090	->getRegClass(i: AMDGPU::VGPR_16RegClassID)
9091	.contains(Reg: SrcOp.getReg())) {
9092	bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Reg: SrcOp.getReg(), MRI: *getMRI());
9093	if (VGPRSuffixIsHi)
9094	ModVal \|= SISrcMods::OP_SEL_0;
9095	} else {
9096	if ((OpSel & (`1` << J)) != `0`)
9097	ModVal \|= SISrcMods::OP_SEL_0;
9098	}
9099
9100	if ((OpSelHi & (`1` << J)) != `0`)
9101	ModVal \|= SISrcMods::OP_SEL_1;
9102
9103	if ((NegLo & (`1` << J)) != `0`)
9104	ModVal \|= SISrcMods::NEG;
9105
9106	if ((NegHi & (`1` << J)) != `0`)
9107	ModVal \|= SISrcMods::NEG_HI;
9108
9109	Inst.getOperand(i: ModIdx).setImm(Inst.getOperand(i: ModIdx).getImm() \| ModVal);
9110	}
9111	}
9112
9113	void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9114	OptionalImmIndexMap OptIdx;
9115	cvtVOP3(Inst, Operands, OptionalIdx&: OptIdx);
9116	cvtVOP3P(Inst, Operands, OptIdx);
9117	}
9118
9119	static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
9120	unsigned i, unsigned Opc,
9121	AMDGPU::OpName OpName) {
9122	if (AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName) != -`1`)
9123	((AMDGPUOperand &)*Operands [i]).addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
9124	else
9125	((AMDGPUOperand &)*Operands [i]).addRegOperands(Inst, N: `1`);
9126	}
9127
9128	void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9129	unsigned Opc = Inst.getOpcode();
9130
9131	((AMDGPUOperand &)*Operands [`1`]).addRegOperands(Inst, N: `1`);
9132	addSrcModifiersAndSrc(Inst, Operands, i: `2`, Opc, OpName: AMDGPU::OpName::src0_modifiers);
9133	addSrcModifiersAndSrc(Inst, Operands, i: `3`, Opc, OpName: AMDGPU::OpName::src1_modifiers);
9134	((AMDGPUOperand &)Operands [`1`]).addRegOperands(Inst, N: `1`); // srcTiedDef*
9135	((AMDGPUOperand &)Operands [`4`]).addRegOperands(Inst, N: `1`); // src2*
9136
9137	OptionalImmIndexMap OptIdx;
9138	for (unsigned i = `5`; i < Operands.size(); ++i) {
9139	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
9140	OptIdx [Op.getImmTy()] = i;
9141	}
9142
9143	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_8bit))
9144	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9145	ImmT: AMDGPUOperand::ImmTyIndexKey8bit);
9146
9147	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_16bit))
9148	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
9149	ImmT: AMDGPUOperand::ImmTyIndexKey16bit);
9150
9151	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9152	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyClamp);
9153
9154	cvtVOP3P(Inst, Operands, OptIdx);
9155	}
9156
9157	//===----------------------------------------------------------------------===//
9158	// VOPD
9159	//===----------------------------------------------------------------------===//
9160
9161	ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9162	if (!hasVOPD(STI: getSTI()))
9163	return ParseStatus::NoMatch;
9164
9165	if (isToken(Kind: AsmToken::Colon) && peekToken(ShouldSkipSpace: false).is(K: AsmToken::Colon)) {
9166	SMLoc S = getLoc();
9167	lex();
9168	lex();
9169	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "::", Loc: S));
9170	SMLoc OpYLoc = getLoc();
9171	StringRef OpYName;
9172	if (isToken(Kind: AsmToken::Identifier) && !Parser.parseIdentifier(Res&: OpYName)) {
9173	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: OpYName, Loc: OpYLoc));
9174	return ParseStatus::Success;
9175	}
9176	return Error(L: OpYLoc, Msg: "expected a VOPDY instruction after ::");
9177	}
9178	return ParseStatus::NoMatch;
9179	}
9180
9181	// Create VOPD MCInst operands using parsed assembler operands.
9182	void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9183	auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9184	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [ParsedOprIdx]);
9185	if (Op.isReg()) {
9186	Op.addRegOperands(Inst, N: `1`);
9187	return;
9188	}
9189	if (Op.isImm()) {
9190	Op.addImmOperands(Inst, N: `1`);
9191	return;
9192	}
9193	llvm_unreachable("Unhandled operand type in cvtVOPD");
9194	};
9195
9196	const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Inst.getOpcode(), InstrInfo: &MII);
9197
9198	// MCInst operands are ordered as follows:
9199	// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9200
9201	for (auto CompIdx : VOPD::COMPONENTS) {
9202	addOp (InstInfo [CompIdx].getIndexOfDstInParsedOperands());
9203	}
9204
9205	for (auto CompIdx : VOPD::COMPONENTS) {
9206	const auto &CInfo = InstInfo [CompIdx];
9207	auto CompSrcOperandsNum = InstInfo [CompIdx].getCompParsedSrcOperandsNum();
9208	for (unsigned CompSrcIdx = `0`; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9209	addOp (CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9210	if (CInfo.hasSrc2Acc())
9211	addOp (CInfo.getIndexOfDstInParsedOperands());
9212	}
9213	}
9214
9215	//===----------------------------------------------------------------------===//
9216	// dpp
9217	//===----------------------------------------------------------------------===//
9218
9219	bool AMDGPUOperand::isDPP8() const {
9220	return isImmTy(ImmT: ImmTyDPP8);
9221	}
9222
9223	bool AMDGPUOperand::isDPPCtrl() const {
9224	using namespace AMDGPU::DPP;
9225
9226	bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<`9`>(x: getImm());
9227	if (result) {
9228	int64_t Imm = getImm();
9229	return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) \|\|
9230	(Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) \|\|
9231	(Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) \|\|
9232	(Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) \|\|
9233	(Imm == DppCtrl::WAVE_SHL1) \|\|
9234	(Imm == DppCtrl::WAVE_ROL1) \|\|
9235	(Imm == DppCtrl::WAVE_SHR1) \|\|
9236	(Imm == DppCtrl::WAVE_ROR1) \|\|
9237	(Imm == DppCtrl::ROW_MIRROR) \|\|
9238	(Imm == DppCtrl::ROW_HALF_MIRROR) \|\|
9239	(Imm == DppCtrl::BCAST15) \|\|
9240	(Imm == DppCtrl::BCAST31) \|\|
9241	(Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) \|\|
9242	(Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9243	}
9244	return false;
9245	}
9246
9247	//===----------------------------------------------------------------------===//
9248	// mAI
9249	//===----------------------------------------------------------------------===//
9250
9251	bool AMDGPUOperand::isBLGP() const {
9252	return isImm() && getImmTy() == ImmTyBLGP && isUInt<`3`>(x: getImm());
9253	}
9254
9255	bool AMDGPUOperand::isS16Imm() const {
9256	return isImmLiteral() && (isInt<`16`>(x: getImm()) \|\| isUInt<`16`>(x: getImm()));
9257	}
9258
9259	bool AMDGPUOperand::isU16Imm() const {
9260	return isImmLiteral() && isUInt<`16`>(x: getImm());
9261	}
9262
9263	//===----------------------------------------------------------------------===//
9264	// dim
9265	//===----------------------------------------------------------------------===//
9266
9267	bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9268	// We want to allow "dim:1D" etc.,
9269	// but the initial 1 is tokenized as an integer.
9270	std::string Token;
9271	if (isToken(Kind: AsmToken::Integer)) {
9272	SMLoc Loc = getToken().getEndLoc();
9273	Token = std::string (getTokenStr());
9274	lex();
9275	if (getLoc() != Loc)
9276	return false;
9277	}
9278
9279	StringRef Suffix;
9280	if (!parseId(Val&: Suffix))
9281	return false;
9282	Token += Suffix;
9283
9284	StringRef DimId = Token;
9285	DimId.consume_front(Prefix: "SQ_RSRC_IMG_");
9286
9287	const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(AsmSuffix: DimId);
9288	if (!DimInfo)
9289	return false;
9290
9291	Encoding = DimInfo->Encoding;
9292	return true;
9293	}
9294
9295	ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9296	if (!isGFX10Plus())
9297	return ParseStatus::NoMatch;
9298
9299	SMLoc S = getLoc();
9300
9301	if (!trySkipId(Id: "dim", Kind: AsmToken::Colon))
9302	return ParseStatus::NoMatch;
9303
9304	unsigned Encoding;
9305	SMLoc Loc = getLoc();
9306	if (!parseDimId(Encoding))
9307	return Error(L: Loc, Msg: "invalid dim value");
9308
9309	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Encoding, Loc: S,
9310	Type: AMDGPUOperand::ImmTyDim));
9311	return ParseStatus::Success;
9312	}
9313
9314	//===----------------------------------------------------------------------===//
9315	// dpp
9316	//===----------------------------------------------------------------------===//
9317
9318	ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9319	SMLoc S = getLoc();
9320
9321	if (!isGFX10Plus() \|\| !trySkipId(Id: "dpp8", Kind: AsmToken::Colon))
9322	return ParseStatus::NoMatch;
9323
9324	// dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
9325
9326	int64_t Sels[`8`];
9327
9328	if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket"))
9329	return ParseStatus::Failure;
9330
9331	for (size_t i = `0`; i < `8`; ++i) {
9332	if (i > `0` && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
9333	return ParseStatus::Failure;
9334
9335	SMLoc Loc = getLoc();
9336	if (getParser().parseAbsoluteExpression(Res&: Sels[i]))
9337	return ParseStatus::Failure;
9338	if (`0` > Sels[i] \|\| `7` < Sels[i])
9339	return Error(L: Loc, Msg: "expected a 3-bit value");
9340	}
9341
9342	if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
9343	return ParseStatus::Failure;
9344
9345	unsigned DPP8 = `0`;
9346	for (size_t i = `0`; i < `8`; ++i)
9347	DPP8 \|= (Sels[i] << (i * `3`));
9348
9349	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DPP8, Loc: S, Type: AMDGPUOperand::ImmTyDPP8));
9350	return ParseStatus::Success;
9351	}
9352
9353	bool
9354	AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
9355	const OperandVector &Operands) {
9356	if (Ctrl == "row_newbcast")
9357	return isGFX90A();
9358
9359	if (Ctrl == "row_share" \|\|
9360	Ctrl == "row_xmask")
9361	return isGFX10Plus();
9362
9363	if (Ctrl == "wave_shl" \|\|
9364	Ctrl == "wave_shr" \|\|
9365	Ctrl == "wave_rol" \|\|
9366	Ctrl == "wave_ror" \|\|
9367	Ctrl == "row_bcast")
9368	return isVI() \|\| isGFX9();
9369
9370	return Ctrl == "row_mirror" \|\|
9371	Ctrl == "row_half_mirror" \|\|
9372	Ctrl == "quad_perm" \|\|
9373	Ctrl == "row_shl" \|\|
9374	Ctrl == "row_shr" \|\|
9375	Ctrl == "row_ror";
9376	}
9377
9378	int64_t
9379	AMDGPUAsmParser::parseDPPCtrlPerm() {
9380	// quad_perm:[%d,%d,%d,%d]
9381
9382	if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket"))
9383	return -`1`;
9384
9385	int64_t Val = `0`;
9386	for (int i = `0`; i < `4`; ++i) {
9387	if (i > `0` && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
9388	return -`1`;
9389
9390	int64_t Temp;
9391	SMLoc Loc = getLoc();
9392	if (getParser().parseAbsoluteExpression(Res&: Temp))
9393	return -`1`;
9394	if (Temp < `0` \|\| Temp > `3`) {
9395	Error(L: Loc, Msg: "expected a 2-bit value");
9396	return -`1`;
9397	}
9398
9399	Val += (Temp << i * `2`);
9400	}
9401
9402	if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
9403	return -`1`;
9404
9405	return Val;
9406	}
9407
9408	int64_t
9409	AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
9410	using namespace AMDGPU::DPP;
9411
9412	// sel:%d
9413
9414	int64_t Val;
9415	SMLoc Loc = getLoc();
9416
9417	if (getParser().parseAbsoluteExpression(Res&: Val))
9418	return -`1`;
9419
9420	struct DppCtrlCheck {
9421	int64_t Ctrl;
9422	int Lo;
9423	int Hi;
9424	};
9425
9426	DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
9427	.Case(S: "wave_shl", Value: {.Ctrl: DppCtrl::WAVE_SHL1, .Lo: `1`, .Hi: `1`})
9428	.Case(S: "wave_rol", Value: {.Ctrl: DppCtrl::WAVE_ROL1, .Lo: `1`, .Hi: `1`})
9429	.Case(S: "wave_shr", Value: {.Ctrl: DppCtrl::WAVE_SHR1, .Lo: `1`, .Hi: `1`})
9430	.Case(S: "wave_ror", Value: {.Ctrl: DppCtrl::WAVE_ROR1, .Lo: `1`, .Hi: `1`})
9431	.Case(S: "row_shl", Value: {.Ctrl: DppCtrl::ROW_SHL0, .Lo: `1`, .Hi: `15`})
9432	.Case(S: "row_shr", Value: {.Ctrl: DppCtrl::ROW_SHR0, .Lo: `1`, .Hi: `15`})
9433	.Case(S: "row_ror", Value: {.Ctrl: DppCtrl::ROW_ROR0, .Lo: `1`, .Hi: `15`})
9434	.Case(S: "row_share", Value: {.Ctrl: DppCtrl::ROW_SHARE_FIRST, .Lo: `0`, .Hi: `15`})
9435	.Case(S: "row_xmask", Value: {.Ctrl: DppCtrl::ROW_XMASK_FIRST, .Lo: `0`, .Hi: `15`})
9436	.Case(S: "row_newbcast", Value: {.Ctrl: DppCtrl::ROW_NEWBCAST_FIRST, .Lo: `0`, .Hi: `15`})
9437	.Default(Value: {.Ctrl: -`1`, .Lo: `0`, .Hi: `0`});
9438
9439	bool Valid;
9440	if (Check.Ctrl == -`1`) {
9441	Valid = (Ctrl == "row_bcast" && (Val == `15` \|\| Val == `31`));
9442	Val = (Val == `15`)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
9443	} else {
9444	Valid = Check.Lo <= Val && Val <= Check.Hi;
9445	Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl \| Val);
9446	}
9447
9448	if (!Valid) {
9449	Error(L: Loc, Msg: Twine ("invalid ", Ctrl) + Twine (" value"));
9450	return -`1`;
9451	}
9452
9453	return Val;
9454	}
9455
9456	ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
9457	using namespace AMDGPU::DPP;
9458
9459	if (!isToken(Kind: AsmToken::Identifier) \|\|
9460	!isSupportedDPPCtrl(Ctrl: getTokenStr(), Operands))
9461	return ParseStatus::NoMatch;
9462
9463	SMLoc S = getLoc();
9464	int64_t Val = -`1`;
9465	StringRef Ctrl;
9466
9467	parseId(Val&: Ctrl);
9468
9469	if (Ctrl == "row_mirror") {
9470	Val = DppCtrl::ROW_MIRROR;
9471	} else if (Ctrl == "row_half_mirror") {
9472	Val = DppCtrl::ROW_HALF_MIRROR;
9473	} else {
9474	if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon")) {
9475	if (Ctrl == "quad_perm") {
9476	Val = parseDPPCtrlPerm();
9477	} else {
9478	Val = parseDPPCtrlSel(Ctrl);
9479	}
9480	}
9481	}
9482
9483	if (Val == -`1`)
9484	return ParseStatus::Failure;
9485
9486	Operands.push_back(
9487	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: AMDGPUOperand::ImmTyDppCtrl));
9488	return ParseStatus::Success;
9489	}
9490
9491	void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
9492	bool IsDPP8) {
9493	OptionalImmIndexMap OptionalIdx;
9494	unsigned Opc = Inst.getOpcode();
9495	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9496
9497	// MAC instructions are special because they have 'old'
9498	// operand which is not tied to dst (but assumed to be).
9499	// They also have dummy unused src2_modifiers.
9500	int OldIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::old);
9501	int Src2ModIdx =
9502	AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2_modifiers);
9503	bool IsMAC = OldIdx != -`1` && Src2ModIdx != -`1` &&
9504	Desc.getOperandConstraint(OpNum: OldIdx, Constraint: MCOI::TIED_TO) == -`1`;
9505
9506	unsigned I = `1`;
9507	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
9508	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
9509	}
9510
9511	int Fi = `0`;
9512	int VdstInIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst_in);
9513	bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 \|\|
9514	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 \|\|
9515	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 \|\|
9516	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
9517
9518	for (unsigned E = Operands.size(); I != E; ++I) {
9519
9520	if (IsMAC) {
9521	int NumOperands = Inst.getNumOperands();
9522	if (OldIdx == NumOperands) {
9523	// Handle old operand
9524	constexpr int DST_IDX = `0`;
9525	Inst.addOperand(Op: Inst.getOperand(i: DST_IDX));
9526	} else if (Src2ModIdx == NumOperands) {
9527	// Add unused dummy src2_modifiers
9528	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
9529	}
9530	}
9531
9532	if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
9533	Inst.addOperand(Op: Inst.getOperand(i: `0`));
9534	}
9535
9536	if (IsVOP3CvtSrDpp) {
9537	if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
9538	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
9539	Inst.addOperand(Op: MCOperand::createReg(Reg: MCRegister ()));
9540	}
9541	}
9542
9543	auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(),
9544	Constraint: MCOI::TIED_TO);
9545	if (TiedTo != -`1`) {
9546	assert((unsigned)TiedTo < Inst.getNumOperands());
9547	// handle tied old or src2 for MAC instructions
9548	Inst.addOperand(Op: Inst.getOperand(i: TiedTo));
9549	}
9550	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
9551	// Add the register arguments
9552	if (IsDPP8 && Op.isDppFI()) {
9553	Fi = Op.getImm();
9554	} else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9555	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
9556	} else if (Op.isReg()) {
9557	Op.addRegOperands(Inst, N: `1`);
9558	} else if (Op.isImm() &&
9559	Desc.operands()[Inst.getNumOperands()].RegClass != -`1`) {
9560	assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
9561	Op.addImmOperands(Inst, N: `1`);
9562	} else if (Op.isImm()) {
9563	OptionalIdx [Op.getImmTy()] = I;
9564	} else {
9565	llvm_unreachable("unhandled operand type");
9566	}
9567	}
9568
9569	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel))
9570	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9571	ImmT: AMDGPUOperand::ImmTyByteSel);
9572
9573	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9574	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9575	ImmT: AMDGPUOperand::ImmTyClamp);
9576
9577	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
9578	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI);
9579
9580	if (Desc.TSFlags & SIInstrFlags::VOP3P)
9581	cvtVOP3P(Inst, Operands, OptIdx&: OptionalIdx);
9582	else if (Desc.TSFlags & SIInstrFlags::VOP3)
9583	cvtVOP3OpSel(Inst, Operands, OptionalIdx);
9584	else if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
9585	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
9586	}
9587
9588	if (IsDPP8) {
9589	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDPP8);
9590	using namespace llvm::AMDGPU::DPP;
9591	Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0));
9592	} else {
9593	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppCtrl, Default: `0xe4`);
9594	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: `0xf`);
9595	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: `0xf`);
9596	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl);
9597
9598	if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi))
9599	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9600	ImmT: AMDGPUOperand::ImmTyDppFI);
9601	}
9602	}
9603
9604	void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
9605	OptionalImmIndexMap OptionalIdx;
9606
9607	unsigned I = `1`;
9608	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9609	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
9610	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
9611	}
9612
9613	int Fi = `0`;
9614	for (unsigned E = Operands.size(); I != E; ++I) {
9615	auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(),
9616	Constraint: MCOI::TIED_TO);
9617	if (TiedTo != -`1`) {
9618	assert((unsigned)TiedTo < Inst.getNumOperands());
9619	// handle tied old or src2 for MAC instructions
9620	Inst.addOperand(Op: Inst.getOperand(i: TiedTo));
9621	}
9622	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
9623	// Add the register arguments
9624	if (Op.isReg() && validateVccOperand(Reg: Op.getReg())) {
9625	// VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
9626	// Skip it.
9627	continue;
9628	}
9629
9630	if (IsDPP8) {
9631	if (Op.isDPP8()) {
9632	Op.addImmOperands(Inst, N: `1`);
9633	} else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9634	Op.addRegWithFPInputModsOperands(Inst, N: `2`);
9635	} else if (Op.isDppFI()) {
9636	Fi = Op.getImm();
9637	} else if (Op.isReg()) {
9638	Op.addRegOperands(Inst, N: `1`);
9639	} else {
9640	llvm_unreachable("Invalid operand type");
9641	}
9642	} else {
9643	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9644	Op.addRegWithFPInputModsOperands(Inst, N: `2`);
9645	} else if (Op.isReg()) {
9646	Op.addRegOperands(Inst, N: `1`);
9647	} else if (Op.isDPPCtrl()) {
9648	Op.addImmOperands(Inst, N: `1`);
9649	} else if (Op.isImm()) {
9650	// Handle optional arguments
9651	OptionalIdx [Op.getImmTy()] = I;
9652	} else {
9653	llvm_unreachable("Invalid operand type");
9654	}
9655	}
9656	}
9657
9658	if (IsDPP8) {
9659	using namespace llvm::AMDGPU::DPP;
9660	Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0));
9661	} else {
9662	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: `0xf`);
9663	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: `0xf`);
9664	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl);
9665	if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi)) {
9666	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9667	ImmT: AMDGPUOperand::ImmTyDppFI);
9668	}
9669	}
9670	}
9671
9672	//===----------------------------------------------------------------------===//
9673	// sdwa
9674	//===----------------------------------------------------------------------===//
9675
9676	ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9677	StringRef Prefix,
9678	AMDGPUOperand::ImmTy Type) {
9679	return parseStringOrIntWithPrefix(
9680	Operands, Name: Prefix,
9681	Ids: {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
9682	Type);
9683	}
9684
9685	ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9686	return parseStringOrIntWithPrefix(
9687	Operands, Name: "dst_unused", Ids: {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
9688	Type: AMDGPUOperand::ImmTySDWADstUnused);
9689	}
9690
9691	void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9692	cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP1);
9693	}
9694
9695	void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9696	cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2);
9697	}
9698
9699	void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9700	cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: true, SkipSrcVcc: true);
9701	}
9702
9703	void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9704	cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: false, SkipSrcVcc: true);
9705	}
9706
9707	void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9708	cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOPC, SkipDstVcc: isVI());
9709	}
9710
9711	void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9712	uint64_t BasicInstType,
9713	bool SkipDstVcc,
9714	bool SkipSrcVcc) {
9715	using namespace llvm::AMDGPU::SDWA;
9716
9717	OptionalImmIndexMap OptionalIdx;
9718	bool SkipVcc = SkipDstVcc \|\| SkipSrcVcc;
9719	bool SkippedVcc = false;
9720
9721	unsigned I = `1`;
9722	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9723	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
9724	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
9725	}
9726
9727	for (unsigned E = Operands.size(); I != E; ++I) {
9728	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
9729	if (SkipVcc && !SkippedVcc && Op.isReg() &&
9730	(Op.getReg() == AMDGPU::VCC \|\| Op.getReg() == AMDGPU::VCC_LO)) {
9731	// VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9732	// Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9733	// or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9734	// Skip VCC only if we didn't skip it on previous iteration.
9735	// Note that src0 and src1 occupy 2 slots each because of modifiers.
9736	if (BasicInstType == SIInstrFlags::VOP2 &&
9737	((SkipDstVcc && Inst.getNumOperands() == `1`) \|\|
9738	(SkipSrcVcc && Inst.getNumOperands() == `5`))) {
9739	SkippedVcc = true;
9740	continue;
9741	}
9742	if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == `0`) {
9743	SkippedVcc = true;
9744	continue;
9745	}
9746	}
9747	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9748	Op.addRegOrImmWithInputModsOperands(Inst, N: `2`);
9749	} else if (Op.isImm()) {
9750	// Handle optional arguments
9751	OptionalIdx [Op.getImmTy()] = I;
9752	} else {
9753	llvm_unreachable("Invalid operand type");
9754	}
9755	SkippedVcc = false;
9756	}
9757
9758	const unsigned Opc = Inst.getOpcode();
9759	if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9760	Opc != AMDGPU::V_NOP_sdwa_vi) {
9761	// v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9762	switch (BasicInstType) {
9763	case SIInstrFlags::VOP1:
9764	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9765	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9766	ImmT: AMDGPUOperand::ImmTyClamp, Default: `0`);
9767
9768	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
9769	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9770	ImmT: AMDGPUOperand::ImmTyOModSI, Default: `0`);
9771
9772	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_sel))
9773	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9774	ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD);
9775
9776	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_unused))
9777	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9778	ImmT: AMDGPUOperand::ImmTySDWADstUnused,
9779	Default: DstUnused::UNUSED_PRESERVE);
9780
9781	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
9782	break;
9783
9784	case SIInstrFlags::VOP2:
9785	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9786	ImmT: AMDGPUOperand::ImmTyClamp, Default: `0`);
9787
9788	if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::omod))
9789	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI, Default: `0`);
9790
9791	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD);
9792	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstUnused, Default: DstUnused::UNUSED_PRESERVE);
9793	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
9794	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD);
9795	break;
9796
9797	case SIInstrFlags::VOPC:
9798	if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::clamp))
9799	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9800	ImmT: AMDGPUOperand::ImmTyClamp, Default: `0`);
9801	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
9802	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD);
9803	break;
9804
9805	default:
9806	llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9807	}
9808	}
9809
9810	// special case v_mac_{f16, f32}:
9811	// it has src2 register operand that is tied to dst operand
9812	if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi \|\|
9813	Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9814	auto *it = Inst.begin();
9815	std::advance(
9816	i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::src2));
9817	Inst.insert(I: it, Op: Inst.getOperand(i: `0`)); // src2 = dst
9818	}
9819	}
9820
9821	/// Force static initialization.
9822	extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
9823	LLVMInitializeAMDGPUAsmParser() {
9824	RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
9825	RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9826	}
9827
9828	#define GET_REGISTER_MATCHER
9829	#define GET_MATCHER_IMPLEMENTATION
9830	#define GET_MNEMONIC_SPELL_CHECKER
9831	#define GET_MNEMONIC_CHECKER
9832	#include "AMDGPUGenAsmMatcher.inc"
9833
9834	ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9835	unsigned MCK) {
9836	switch (MCK) {
9837	case MCK_addr64:
9838	return parseTokenOp(Name: "addr64", Operands);
9839	case MCK_done:
9840	return parseTokenOp(Name: "done", Operands);
9841	case MCK_idxen:
9842	return parseTokenOp(Name: "idxen", Operands);
9843	case MCK_lds:
9844	return parseTokenOp(Name: "lds", Operands);
9845	case MCK_offen:
9846	return parseTokenOp(Name: "offen", Operands);
9847	case MCK_off:
9848	return parseTokenOp(Name: "off", Operands);
9849	case MCK_row_95_en:
9850	return parseTokenOp(Name: "row_en", Operands);
9851	case MCK_gds:
9852	return parseNamedBit(Name: "gds", Operands, ImmTy: AMDGPUOperand::ImmTyGDS);
9853	case MCK_tfe:
9854	return parseNamedBit(Name: "tfe", Operands, ImmTy: AMDGPUOperand::ImmTyTFE);
9855	}
9856	return tryCustomParseOperand(Operands, MCK);
9857	}
9858
9859	// This function should be defined after auto-generated include so that we have
9860	// MatchClassKind enum defined
9861	unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9862	unsigned Kind) {
9863	// Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9864	// But MatchInstructionImpl() expects to meet token and fails to validate
9865	// operand. This method checks if we are given immediate operand but expect to
9866	// get corresponding token.
9867	AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9868	switch (Kind) {
9869	case MCK_addr64:
9870	return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9871	case MCK_gds:
9872	return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9873	case MCK_lds:
9874	return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9875	case MCK_idxen:
9876	return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9877	case MCK_offen:
9878	return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9879	case MCK_tfe:
9880	return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9881	case MCK_SSrc_b32:
9882	// When operands have expression values, they will return true for isToken,
9883	// because it is not possible to distinguish between a token and an
9884	// expression at parse time. MatchInstructionImpl() will always try to
9885	// match an operand as a token, when isToken returns true, and when the
9886	// name of the expression is not a valid token, the match will fail,
9887	// so we need to handle it here.
9888	return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
9889	case MCK_SSrc_f32:
9890	return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
9891	case MCK_SOPPBrTarget:
9892	return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9893	case MCK_VReg32OrOff:
9894	return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9895	case MCK_InterpSlot:
9896	return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9897	case MCK_InterpAttr:
9898	return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9899	case MCK_InterpAttrChan:
9900	return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9901	case MCK_SReg_64:
9902	case MCK_SReg_64_XEXEC:
9903	// Null is defined as a 32-bit register but
9904	// it should also be enabled with 64-bit operands or larger.
9905	// The following code enables it for SReg_64 and larger operands
9906	// used as source and destination. Remaining source
9907	// operands are handled in isInlinableImm.
9908	case MCK_SReg_96:
9909	case MCK_SReg_128:
9910	case MCK_SReg_256:
9911	case MCK_SReg_512:
9912	return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9913	default:
9914	return Match_InvalidOperand;
9915	}
9916	}
9917
9918	//===----------------------------------------------------------------------===//
9919	// endpgm
9920	//===----------------------------------------------------------------------===//
9921
9922	ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9923	SMLoc S = getLoc();
9924	int64_t Imm = `0`;
9925
9926	if (!parseExpr(Imm)) {
9927	// The operand is optional, if not present default to 0
9928	Imm = `0`;
9929	}
9930
9931	if (!isUInt<`16`>(x: Imm))
9932	return Error(L: S, Msg: "expected a 16-bit value");
9933
9934	Operands.push_back(
9935	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyEndpgm));
9936	return ParseStatus::Success;
9937	}
9938
9939	bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmT: ImmTyEndpgm); }
9940
9941	//===----------------------------------------------------------------------===//
9942	// Split Barrier
9943	//===----------------------------------------------------------------------===//
9944
9945	bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(type: MVT::i32); }
9946

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp