AMDGPUAsmParser.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp]

1	//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "AMDKernelCodeT.h"
10	#include "MCTargetDesc/AMDGPUInstPrinter.h"
11	#include "MCTargetDesc/AMDGPUMCAsmInfo.h"
12	#include "MCTargetDesc/AMDGPUMCExpr.h"
13	#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
14	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15	#include "MCTargetDesc/AMDGPUTargetStreamer.h"
16	#include "SIDefines.h"
17	#include "SIInstrInfo.h"
18	#include "TargetInfo/AMDGPUTargetInfo.h"
19	#include "Utils/AMDGPUAsmUtils.h"
20	#include "Utils/AMDGPUBaseInfo.h"
21	#include "Utils/AMDKernelCodeTUtils.h"
22	#include "llvm/ADT/APFloat.h"
23	#include "llvm/ADT/SmallBitVector.h"
24	#include "llvm/ADT/StringSet.h"
25	#include "llvm/ADT/Twine.h"
26	#include "llvm/BinaryFormat/ELF.h"
27	#include "llvm/CodeGenTypes/MachineValueType.h"
28	#include "llvm/MC/MCAsmInfo.h"
29	#include "llvm/MC/MCContext.h"
30	#include "llvm/MC/MCExpr.h"
31	#include "llvm/MC/MCInst.h"
32	#include "llvm/MC/MCInstrDesc.h"
33	#include "llvm/MC/MCParser/AsmLexer.h"
34	#include "llvm/MC/MCParser/MCAsmParser.h"
35	#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
36	#include "llvm/MC/MCParser/MCTargetAsmParser.h"
37	#include "llvm/MC/MCRegisterInfo.h"
38	#include "llvm/MC/MCSymbol.h"
39	#include "llvm/MC/TargetRegistry.h"
40	#include "llvm/Support/AMDGPUMetadata.h"
41	#include "llvm/Support/AMDGPUObjLinkingInfo.h"
42	#include "llvm/Support/AMDHSAKernelDescriptor.h"
43	#include "llvm/Support/Casting.h"
44	#include "llvm/Support/Compiler.h"
45	#include "llvm/Support/MathExtras.h"
46	#include "llvm/TargetParser/AMDGPUTargetParser.h"
47	#include <optional>
48
49	using namespace llvm;
50	using namespace llvm::AMDGPU;
51	using namespace llvm::amdhsa;
52
53	namespace {
54
55	class AMDGPUAsmParser;
56
57	enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
58
59	//===----------------------------------------------------------------------===//
60	// Operand
61	//===----------------------------------------------------------------------===//
62
63	class AMDGPUOperand : public MCParsedAsmOperand {
64	enum KindTy {
65	Token,
66	Immediate,
67	Register,
68	Expression
69	} Kind;
70
71	SMLoc StartLoc, EndLoc;
72	const AMDGPUAsmParser *AsmParser;
73
74	public:
75	AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
76	: Kind(Kind_), AsmParser(AsmParser_) {}
77
78	using Ptr = std::unique_ptr<AMDGPUOperand>;
79
80	struct Modifiers {
81	bool Abs = false;
82	bool Neg = false;
83	bool Sext = false;
84	LitModifier Lit = LitModifier::None;
85
86	bool hasFPModifiers() const { return Abs \|\| Neg; }
87	bool hasIntModifiers() const { return Sext; }
88	bool hasModifiers() const { return hasFPModifiers() \|\| hasIntModifiers(); }
89	bool isForcedLit() const { return Lit == LitModifier::Lit; }
90	bool isForcedLit64() const { return Lit == LitModifier::Lit64; }
91
92	int64_t getFPModifiersOperand() const {
93	int64_t Operand = `0`;
94	Operand \|= Abs ? SISrcMods::ABS : `0u`;
95	Operand \|= Neg ? SISrcMods::NEG : `0u`;
96	return Operand;
97	}
98
99	int64_t getIntModifiersOperand() const {
100	int64_t Operand = `0`;
101	Operand \|= Sext ? SISrcMods::SEXT : `0u`;
102	return Operand;
103	}
104
105	int64_t getModifiersOperand() const {
106	assert(!(hasFPModifiers() && hasIntModifiers())
107	&& "fp and int modifiers should not be used simultaneously");
108	if (hasFPModifiers())
109	return getFPModifiersOperand();
110	if (hasIntModifiers())
111	return getIntModifiersOperand();
112	return `0`;
113	}
114
115	friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
116	};
117
118	enum ImmTy {
119	ImmTyNone,
120	ImmTyGDS,
121	ImmTyLDS,
122	ImmTyOffen,
123	ImmTyIdxen,
124	ImmTyAddr64,
125	ImmTyOffset,
126	ImmTyInstOffset,
127	ImmTyOffset0,
128	ImmTyOffset1,
129	ImmTySMEMOffsetMod,
130	ImmTyCPol,
131	ImmTyTFE,
132	ImmTyIsAsync,
133	ImmTyD16,
134	ImmTyClamp,
135	ImmTyOModSI,
136	ImmTySDWADstSel,
137	ImmTySDWASrc0Sel,
138	ImmTySDWASrc1Sel,
139	ImmTySDWADstUnused,
140	ImmTyDMask,
141	ImmTyDim,
142	ImmTyUNorm,
143	ImmTyDA,
144	ImmTyR128A16,
145	ImmTyA16,
146	ImmTyLWE,
147	ImmTyExpTgt,
148	ImmTyExpCompr,
149	ImmTyExpVM,
150	ImmTyDone,
151	ImmTyRowEn,
152	ImmTyFORMAT,
153	ImmTyHwreg,
154	ImmTyOff,
155	ImmTySendMsg,
156	ImmTyWaitEvent,
157	ImmTyInterpSlot,
158	ImmTyInterpAttr,
159	ImmTyInterpAttrChan,
160	ImmTyOpSel,
161	ImmTyOpSelHi,
162	ImmTyNegLo,
163	ImmTyNegHi,
164	ImmTyIndexKey8bit,
165	ImmTyIndexKey16bit,
166	ImmTyIndexKey32bit,
167	ImmTyDPP8,
168	ImmTyDppCtrl,
169	ImmTyDppRowMask,
170	ImmTyDppBankMask,
171	ImmTyDppBoundCtrl,
172	ImmTyDppFI,
173	ImmTySwizzle,
174	ImmTyGprIdxMode,
175	ImmTyHigh,
176	ImmTyBLGP,
177	ImmTyCBSZ,
178	ImmTyABID,
179	ImmTyEndpgm,
180	ImmTyWaitVDST,
181	ImmTyWaitEXP,
182	ImmTyWaitVAVDst,
183	ImmTyWaitVMVSrc,
184	ImmTyBitOp3,
185	ImmTyMatrixAFMT,
186	ImmTyMatrixBFMT,
187	ImmTyMatrixAScale,
188	ImmTyMatrixBScale,
189	ImmTyMatrixAScaleFmt,
190	ImmTyMatrixBScaleFmt,
191	ImmTyMatrixAReuse,
192	ImmTyMatrixBReuse,
193	ImmTyScaleSel,
194	ImmTyByteSel,
195	};
196
197	private:
198	struct TokOp {
199	const char *Data;
200	unsigned Length;
201	};
202
203	struct ImmOp {
204	int64_t Val;
205	ImmTy Type;
206	bool IsFPImm;
207	Modifiers Mods;
208	};
209
210	struct RegOp {
211	MCRegister RegNo;
212	Modifiers Mods;
213	};
214
215	union {
216	TokOp Tok;
217	ImmOp Imm;
218	RegOp Reg;
219	const MCExpr *Expr;
220	};
221
222	// The index of the associated MCInst operand.
223	mutable int MCOpIdx = -`1`;
224
225	public:
226	bool isToken() const override { return Kind == Token; }
227
228	bool isSymbolRefExpr() const {
229	return isExpr() && Expr && isa<MCSymbolRefExpr>(Val: Expr);
230	}
231
232	bool isImm() const override {
233	return Kind == Immediate;
234	}
235
236	bool isInlinableImm(MVT type) const;
237	bool isLiteralImm(MVT type) const;
238
239	bool isRegKind() const {
240	return Kind == Register;
241	}
242
243	bool isReg() const override {
244	return isRegKind() && !hasModifiers();
245	}
246
247	bool isRegOrInline(unsigned RCID, MVT type) const {
248	return isRegClass(RCID) \|\| isInlinableImm(type);
249	}
250
251	bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
252	return isRegOrInline(RCID, type) \|\| isLiteralImm(type);
253	}
254
255	bool isRegOrImmWithInt16InputMods() const {
256	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
257	}
258
259	template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
260	return isRegOrImmWithInputMods(
261	RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::i16);
262	}
263
264	bool isRegOrImmWithInt32InputMods() const {
265	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
266	}
267
268	bool isRegOrInlineImmWithInt16InputMods() const {
269	return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
270	}
271
272	template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
273	return isRegOrInline(
274	RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::i16);
275	}
276
277	bool isRegOrInlineImmWithInt32InputMods() const {
278	return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
279	}
280
281	bool isRegOrImmWithInt64InputMods() const {
282	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64);
283	}
284
285	bool isRegOrImmWithFP16InputMods() const {
286	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16);
287	}
288
289	template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
290	return isRegOrImmWithInputMods(
291	RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16);
292	}
293
294	bool isRegOrImmWithFP32InputMods() const {
295	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
296	}
297
298	bool isRegOrImmWithFP64InputMods() const {
299	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
300	}
301
302	template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
303	return isRegOrInline(
304	RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16);
305	}
306
307	bool isRegOrInlineImmWithFP32InputMods() const {
308	return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
309	}
310
311	bool isRegOrInlineImmWithFP64InputMods() const {
312	return isRegOrInline(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
313	}
314
315	bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
316
317	bool isVRegWithFP32InputMods() const {
318	return isVRegWithInputMods(RCID: AMDGPU::VGPR_32RegClassID);
319	}
320
321	bool isVRegWithFP64InputMods() const {
322	return isVRegWithInputMods(RCID: AMDGPU::VReg_64RegClassID);
323	}
324
325	bool isPackedFP16InputMods() const {
326	return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::v2f16);
327	}
328
329	bool isPackedVGPRFP32InputMods() const {
330	return isRegOrImmWithInputMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::v2f32);
331	}
332
333	bool isVReg() const {
334	return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) \|\|
335	isRegClass(RCID: AMDGPU::VReg_64RegClassID) \|\|
336	isRegClass(RCID: AMDGPU::VReg_96RegClassID) \|\|
337	isRegClass(RCID: AMDGPU::VReg_128RegClassID) \|\|
338	isRegClass(RCID: AMDGPU::VReg_160RegClassID) \|\|
339	isRegClass(RCID: AMDGPU::VReg_192RegClassID) \|\|
340	isRegClass(RCID: AMDGPU::VReg_256RegClassID) \|\|
341	isRegClass(RCID: AMDGPU::VReg_512RegClassID) \|\|
342	isRegClass(RCID: AMDGPU::VReg_1024RegClassID);
343	}
344
345	bool isVReg32() const {
346	return isRegClass(RCID: AMDGPU::VGPR_32RegClassID);
347	}
348
349	bool isVReg32OrOff() const {
350	return isOff() \|\| isVReg32();
351	}
352
353	bool isNull() const {
354	return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
355	}
356
357	bool isAV_LdSt_32_Align2_RegOp() const {
358	return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) \|\|
359	isRegClass(RCID: AMDGPU::AGPR_32RegClassID);
360	}
361
362	bool isVRegWithInputMods() const;
363	template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
364	template <bool IsFake16> bool isT16VRegWithInputMods() const;
365
366	bool isSDWAOperand(MVT type) const;
367	bool isSDWAFP16Operand() const;
368	bool isSDWAFP32Operand() const;
369	bool isSDWAInt16Operand() const;
370	bool isSDWAInt32Operand() const;
371
372	bool isImmTy(ImmTy ImmT) const {
373	return isImm() && Imm.Type == ImmT;
374	}
375
376	template <ImmTy Ty> bool isImmTy() const { return isImmTy(ImmT: Ty); }
377
378	bool isImmLiteral() const { return isImmTy(ImmT: ImmTyNone); }
379
380	bool isImmModifier() const {
381	return isImm() && Imm.Type != ImmTyNone;
382	}
383
384	bool isOModSI() const { return isImmTy(ImmT: ImmTyOModSI); }
385	bool isDim() const { return isImmTy(ImmT: ImmTyDim); }
386	bool isR128A16() const { return isImmTy(ImmT: ImmTyR128A16); }
387	bool isOff() const { return isImmTy(ImmT: ImmTyOff); }
388	bool isExpTgt() const { return isImmTy(ImmT: ImmTyExpTgt); }
389	bool isOffen() const { return isImmTy(ImmT: ImmTyOffen); }
390	bool isIdxen() const { return isImmTy(ImmT: ImmTyIdxen); }
391	bool isAddr64() const { return isImmTy(ImmT: ImmTyAddr64); }
392	bool isSMEMOffsetMod() const { return isImmTy(ImmT: ImmTySMEMOffsetMod); }
393	bool isFlatOffset() const { return isImmTy(ImmT: ImmTyOffset) \|\| isImmTy(ImmT: ImmTyInstOffset); }
394	bool isGDS() const { return isImmTy(ImmT: ImmTyGDS); }
395	bool isLDS() const { return isImmTy(ImmT: ImmTyLDS); }
396	bool isCPol() const { return isImmTy(ImmT: ImmTyCPol); }
397	bool isIndexKey8bit() const { return isImmTy(ImmT: ImmTyIndexKey8bit); }
398	bool isIndexKey16bit() const { return isImmTy(ImmT: ImmTyIndexKey16bit); }
399	bool isIndexKey32bit() const { return isImmTy(ImmT: ImmTyIndexKey32bit); }
400	bool isMatrixAFMT() const { return isImmTy(ImmT: ImmTyMatrixAFMT); }
401	bool isMatrixBFMT() const { return isImmTy(ImmT: ImmTyMatrixBFMT); }
402	bool isMatrixAScale() const { return isImmTy(ImmT: ImmTyMatrixAScale); }
403	bool isMatrixBScale() const { return isImmTy(ImmT: ImmTyMatrixBScale); }
404	bool isMatrixAScaleFmt() const { return isImmTy(ImmT: ImmTyMatrixAScaleFmt); }
405	bool isMatrixBScaleFmt() const { return isImmTy(ImmT: ImmTyMatrixBScaleFmt); }
406	bool isMatrixAReuse() const { return isImmTy(ImmT: ImmTyMatrixAReuse); }
407	bool isMatrixBReuse() const { return isImmTy(ImmT: ImmTyMatrixBReuse); }
408	bool isTFE() const { return isImmTy(ImmT: ImmTyTFE); }
409	bool isFORMAT() const { return isImmTy(ImmT: ImmTyFORMAT) && isUInt<`7`>(x: getImm()); }
410	bool isDppFI() const { return isImmTy(ImmT: ImmTyDppFI); }
411	bool isSDWADstSel() const { return isImmTy(ImmT: ImmTySDWADstSel); }
412	bool isSDWASrc0Sel() const { return isImmTy(ImmT: ImmTySDWASrc0Sel); }
413	bool isSDWASrc1Sel() const { return isImmTy(ImmT: ImmTySDWASrc1Sel); }
414	bool isSDWADstUnused() const { return isImmTy(ImmT: ImmTySDWADstUnused); }
415	bool isInterpSlot() const { return isImmTy(ImmT: ImmTyInterpSlot); }
416	bool isInterpAttr() const { return isImmTy(ImmT: ImmTyInterpAttr); }
417	bool isInterpAttrChan() const { return isImmTy(ImmT: ImmTyInterpAttrChan); }
418	bool isOpSel() const { return isImmTy(ImmT: ImmTyOpSel); }
419	bool isOpSelHi() const { return isImmTy(ImmT: ImmTyOpSelHi); }
420	bool isNegLo() const { return isImmTy(ImmT: ImmTyNegLo); }
421	bool isNegHi() const { return isImmTy(ImmT: ImmTyNegHi); }
422	bool isBitOp3() const { return isImmTy(ImmT: ImmTyBitOp3) && isUInt<`8`>(x: getImm()); }
423	bool isDone() const { return isImmTy(ImmT: ImmTyDone); }
424	bool isRowEn() const { return isImmTy(ImmT: ImmTyRowEn); }
425
426	bool isRegOrImm() const {
427	return isReg() \|\| isImm();
428	}
429
430	bool isRegClass(unsigned RCID) const;
431
432	bool isInlineValue() const;
433
434	bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
435	return isRegOrInline(RCID, type) && !hasModifiers();
436	}
437
438	bool isSCSrcB16() const {
439	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i16);
440	}
441
442	bool isSCSrcV2B16() const {
443	return isSCSrcB16();
444	}
445
446	bool isSCSrc_b32() const {
447	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i32);
448	}
449
450	bool isSCSrc_b64() const {
451	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::i64);
452	}
453
454	bool isBoolReg() const;
455
456	bool isSCSrcF16() const {
457	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f16);
458	}
459
460	bool isSCSrcV2F16() const {
461	return isSCSrcF16();
462	}
463
464	bool isSCSrcF32() const {
465	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f32);
466	}
467
468	bool isSCSrcF64() const {
469	return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::f64);
470	}
471
472	bool isSSrc_b32() const {
473	return isSCSrc_b32() \|\| isLiteralImm(type: MVT::i32) \|\| isExpr();
474	}
475
476	bool isSSrc_b16() const { return isSCSrcB16() \|\| isLiteralImm(type: MVT::i16); }
477
478	bool isSSrcV2B16() const {
479	llvm_unreachable("cannot happen");
480	return isSSrc_b16();
481	}
482
483	bool isSSrc_b64() const {
484	// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
485	// See isVSrc64().
486	return isSCSrc_b64() \|\| isLiteralImm(type: MVT::i64) \|\|
487	(((const MCTargetAsmParser *)AsmParser)
488	->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
489	isExpr());
490	}
491
492	bool isSSrc_f32() const {
493	return isSCSrc_b32() \|\| isLiteralImm(type: MVT::f32) \|\| isExpr();
494	}
495
496	bool isSSrcF64() const { return isSCSrc_b64() \|\| isLiteralImm(type: MVT::f64); }
497
498	bool isSSrc_bf16() const { return isSCSrcB16() \|\| isLiteralImm(type: MVT::bf16); }
499
500	bool isSSrc_f16() const { return isSCSrcB16() \|\| isLiteralImm(type: MVT::f16); }
501
502	bool isSSrcV2F16() const {
503	llvm_unreachable("cannot happen");
504	return isSSrc_f16();
505	}
506
507	bool isSSrcV2FP32() const {
508	llvm_unreachable("cannot happen");
509	return isSSrc_f32();
510	}
511
512	bool isSCSrcV2FP32() const {
513	llvm_unreachable("cannot happen");
514	return isSCSrcF32();
515	}
516
517	bool isSSrcV2INT32() const {
518	llvm_unreachable("cannot happen");
519	return isSSrc_b32();
520	}
521
522	bool isSCSrcV2INT32() const {
523	llvm_unreachable("cannot happen");
524	return isSCSrc_b32();
525	}
526
527	bool isSSrcOrLds_b32() const {
528	return isRegOrInlineNoMods(RCID: AMDGPU::SRegOrLds_32RegClassID, type: MVT::i32) \|\|
529	isLiteralImm(type: MVT::i32) \|\| isExpr();
530	}
531
532	bool isVCSrc_b32() const {
533	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32);
534	}
535
536	bool isVCSrc_b32_Lo256() const {
537	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo256RegClassID, type: MVT::i32);
538	}
539
540	bool isVCSrc_b64_Lo256() const {
541	return isRegOrInlineNoMods(RCID: AMDGPU::VS_64_Lo256RegClassID, type: MVT::i64);
542	}
543
544	bool isVCSrc_b64() const {
545	return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64);
546	}
547
548	bool isVCSrcT_b16() const {
549	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::i16);
550	}
551
552	bool isVCSrcTB16_Lo128() const {
553	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::i16);
554	}
555
556	bool isVCSrcFake16B16_Lo128() const {
557	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::i16);
558	}
559
560	bool isVCSrc_b16() const {
561	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16);
562	}
563
564	bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
565
566	bool isVCSrc_f32() const {
567	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32);
568	}
569
570	bool isVCSrc_f64() const {
571	return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64);
572	}
573
574	bool isVCSrcTBF16() const {
575	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::bf16);
576	}
577
578	bool isVCSrcT_f16() const {
579	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16);
580	}
581
582	bool isVCSrcT_bf16() const {
583	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16);
584	}
585
586	bool isVCSrcTBF16_Lo128() const {
587	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::bf16);
588	}
589
590	bool isVCSrcTF16_Lo128() const {
591	return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::f16);
592	}
593
594	bool isVCSrcFake16BF16_Lo128() const {
595	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::bf16);
596	}
597
598	bool isVCSrcFake16F16_Lo128() const {
599	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::f16);
600	}
601
602	bool isVCSrc_bf16() const {
603	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::bf16);
604	}
605
606	bool isVCSrc_f16() const {
607	return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16);
608	}
609
610	bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
611
612	bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
613
614	bool isVSrc_b32() const {
615	return isVCSrc_f32() \|\| isLiteralImm(type: MVT::i32) \|\| isExpr();
616	}
617
618	bool isVSrc_b64() const { return isVCSrc_f64() \|\| isLiteralImm(type: MVT::i64); }
619
620	bool isVSrc_v2b64() const {
621	return isRegOrInlineNoMods(RCID: AMDGPU::VS_128RegClassID, type: MVT::i64) \|\|
622	isLiteralImm(type: MVT::i64);
623	}
624
625	bool isVSrc_v2f64() const {
626	return isRegOrInlineNoMods(RCID: AMDGPU::VS_128RegClassID, type: MVT::f64) \|\|
627	isLiteralImm(type: MVT::f64);
628	}
629
630	bool isVSrcT_b16() const { return isVCSrcT_b16() \|\| isLiteralImm(type: MVT::i16); }
631
632	bool isVSrcT_b16_Lo128() const {
633	return isVCSrcTB16_Lo128() \|\| isLiteralImm(type: MVT::i16);
634	}
635
636	bool isVSrcFake16_b16_Lo128() const {
637	return isVCSrcFake16B16_Lo128() \|\| isLiteralImm(type: MVT::i16);
638	}
639
640	bool isVSrc_b16() const { return isVCSrc_b16() \|\| isLiteralImm(type: MVT::i16); }
641
642	bool isVSrc_v2b16() const { return isVSrc_b16() \|\| isLiteralImm(type: MVT::v2i16); }
643
644	bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
645
646	bool isVSrc_v2f32() const { return isVSrc_f64() \|\| isLiteralImm(type: MVT::v2f32); }
647
648	bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
649
650	bool isVSrc_v2b32() const { return isVSrc_b64() \|\| isLiteralImm(type: MVT::v2i32); }
651
652	bool isVSrc_f32() const {
653	return isVCSrc_f32() \|\| isLiteralImm(type: MVT::f32) \|\| isExpr();
654	}
655
656	bool isVSrc_f64() const { return isVCSrc_f64() \|\| isLiteralImm(type: MVT::f64); }
657
658	bool isVSrcT_bf16() const { return isVCSrcTBF16() \|\| isLiteralImm(type: MVT::bf16); }
659
660	bool isVSrcT_f16() const { return isVCSrcT_f16() \|\| isLiteralImm(type: MVT::f16); }
661
662	bool isVSrcT_bf16_Lo128() const {
663	return isVCSrcTBF16_Lo128() \|\| isLiteralImm(type: MVT::bf16);
664	}
665
666	bool isVSrcT_f16_Lo128() const {
667	return isVCSrcTF16_Lo128() \|\| isLiteralImm(type: MVT::f16);
668	}
669
670	bool isVSrcFake16_bf16_Lo128() const {
671	return isVCSrcFake16BF16_Lo128() \|\| isLiteralImm(type: MVT::bf16);
672	}
673
674	bool isVSrcFake16_f16_Lo128() const {
675	return isVCSrcFake16F16_Lo128() \|\| isLiteralImm(type: MVT::f16);
676	}
677
678	bool isVSrc_bf16() const { return isVCSrc_bf16() \|\| isLiteralImm(type: MVT::bf16); }
679
680	bool isVSrc_f16() const { return isVCSrc_f16() \|\| isLiteralImm(type: MVT::f16); }
681
682	bool isVSrc_v2bf16() const {
683	return isVSrc_bf16() \|\| isLiteralImm(type: MVT::v2bf16);
684	}
685
686	bool isVSrc_v2f16() const { return isVSrc_f16() \|\| isLiteralImm(type: MVT::v2f16); }
687
688	bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
689
690	bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
691
692	bool isVISrcB32() const {
693	return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i32);
694	}
695
696	bool isVISrcB16() const {
697	return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i16);
698	}
699
700	bool isVISrcV2B16() const {
701	return isVISrcB16();
702	}
703
704	bool isVISrcF32() const {
705	return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f32);
706	}
707
708	bool isVISrcF16() const {
709	return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f16);
710	}
711
712	bool isVISrcV2F16() const {
713	return isVISrcF16() \|\| isVISrcB32();
714	}
715
716	bool isVISrc_64_bf16() const {
717	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::bf16);
718	}
719
720	bool isVISrc_64_f16() const {
721	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f16);
722	}
723
724	bool isVISrc_64_b32() const {
725	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32);
726	}
727
728	bool isVISrc_64B64() const {
729	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i64);
730	}
731
732	bool isVISrc_64_f64() const {
733	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f64);
734	}
735
736	bool isVISrc_64V2FP32() const {
737	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f32);
738	}
739
740	bool isVISrc_64V2INT32() const {
741	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32);
742	}
743
744	bool isVISrc_256_b32() const {
745	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32);
746	}
747
748	bool isVISrc_256_f32() const {
749	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32);
750	}
751
752	bool isVISrc_256B64() const {
753	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i64);
754	}
755
756	bool isVISrc_256_f64() const {
757	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f64);
758	}
759
760	bool isVISrc_512_f64() const {
761	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f64);
762	}
763
764	bool isVISrc_128B16() const {
765	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i16);
766	}
767
768	bool isVISrc_128V2B16() const {
769	return isVISrc_128B16();
770	}
771
772	bool isVISrc_128_b32() const {
773	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i32);
774	}
775
776	bool isVISrc_128_f32() const {
777	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f32);
778	}
779
780	bool isVISrc_256V2FP32() const {
781	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32);
782	}
783
784	bool isVISrc_256V2INT32() const {
785	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32);
786	}
787
788	bool isVISrc_512_b32() const {
789	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i32);
790	}
791
792	bool isVISrc_512B16() const {
793	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i16);
794	}
795
796	bool isVISrc_512V2B16() const {
797	return isVISrc_512B16();
798	}
799
800	bool isVISrc_512_f32() const {
801	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f32);
802	}
803
804	bool isVISrc_512F16() const {
805	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f16);
806	}
807
808	bool isVISrc_512V2F16() const {
809	return isVISrc_512F16() \|\| isVISrc_512_b32();
810	}
811
812	bool isVISrc_1024_b32() const {
813	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i32);
814	}
815
816	bool isVISrc_1024B16() const {
817	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i16);
818	}
819
820	bool isVISrc_1024V2B16() const {
821	return isVISrc_1024B16();
822	}
823
824	bool isVISrc_1024_f32() const {
825	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f32);
826	}
827
828	bool isVISrc_1024F16() const {
829	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f16);
830	}
831
832	bool isVISrc_1024V2F16() const {
833	return isVISrc_1024F16() \|\| isVISrc_1024_b32();
834	}
835
836	bool isAISrcB32() const {
837	return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i32);
838	}
839
840	bool isAISrcB16() const {
841	return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i16);
842	}
843
844	bool isAISrcV2B16() const {
845	return isAISrcB16();
846	}
847
848	bool isAISrcF32() const {
849	return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f32);
850	}
851
852	bool isAISrcF16() const {
853	return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f16);
854	}
855
856	bool isAISrcV2F16() const {
857	return isAISrcF16() \|\| isAISrcB32();
858	}
859
860	bool isAISrc_64B64() const {
861	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::i64);
862	}
863
864	bool isAISrc_64_f64() const {
865	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::f64);
866	}
867
868	bool isAISrc_128_b32() const {
869	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i32);
870	}
871
872	bool isAISrc_128B16() const {
873	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i16);
874	}
875
876	bool isAISrc_128V2B16() const {
877	return isAISrc_128B16();
878	}
879
880	bool isAISrc_128_f32() const {
881	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f32);
882	}
883
884	bool isAISrc_128F16() const {
885	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f16);
886	}
887
888	bool isAISrc_128V2F16() const {
889	return isAISrc_128F16() \|\| isAISrc_128_b32();
890	}
891
892	bool isVISrc_128_bf16() const {
893	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::bf16);
894	}
895
896	bool isVISrc_128_f16() const {
897	return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f16);
898	}
899
900	bool isVISrc_128V2F16() const {
901	return isVISrc_128_f16() \|\| isVISrc_128_b32();
902	}
903
904	bool isAISrc_256B64() const {
905	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::i64);
906	}
907
908	bool isAISrc_256_f64() const {
909	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::f64);
910	}
911
912	bool isAISrc_512_b32() const {
913	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i32);
914	}
915
916	bool isAISrc_512B16() const {
917	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i16);
918	}
919
920	bool isAISrc_512V2B16() const {
921	return isAISrc_512B16();
922	}
923
924	bool isAISrc_512_f32() const {
925	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f32);
926	}
927
928	bool isAISrc_512F16() const {
929	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f16);
930	}
931
932	bool isAISrc_512V2F16() const {
933	return isAISrc_512F16() \|\| isAISrc_512_b32();
934	}
935
936	bool isAISrc_1024_b32() const {
937	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i32);
938	}
939
940	bool isAISrc_1024B16() const {
941	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i16);
942	}
943
944	bool isAISrc_1024V2B16() const {
945	return isAISrc_1024B16();
946	}
947
948	bool isAISrc_1024_f32() const {
949	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f32);
950	}
951
952	bool isAISrc_1024F16() const {
953	return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f16);
954	}
955
956	bool isAISrc_1024V2F16() const {
957	return isAISrc_1024F16() \|\| isAISrc_1024_b32();
958	}
959
960	bool isKImmFP32() const {
961	return isLiteralImm(type: MVT::f32);
962	}
963
964	bool isKImmFP16() const {
965	return isLiteralImm(type: MVT::f16);
966	}
967
968	bool isKImmFP64() const { return isLiteralImm(type: MVT::f64); }
969
970	bool isMem() const override {
971	return false;
972	}
973
974	bool isExpr() const {
975	return Kind == Expression;
976	}
977
978	bool isSOPPBrTarget() const { return isExpr() \|\| isImm(); }
979
980	bool isSWaitCnt() const;
981	bool isDepCtr() const;
982	bool isSDelayALU() const;
983	bool isHwreg() const;
984	bool isSendMsg() const;
985	bool isWaitEvent() const;
986	bool isSplitBarrier() const;
987	bool isSwizzle() const;
988	bool isSMRDOffset8() const;
989	bool isSMEMOffset() const;
990	bool isSMRDLiteralOffset() const;
991	bool isDPP8() const;
992	bool isDPPCtrl() const;
993	bool isBLGP() const;
994	bool isGPRIdxMode() const;
995	bool isS16Imm() const;
996	bool isU16Imm() const;
997	bool isEndpgm() const;
998
999	auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
1000	return [this, P]() { return P (*this); };
1001	}
1002
1003	StringRef getToken() const {
1004	assert(isToken());
1005	return StringRef (Tok.Data, Tok.Length);
1006	}
1007
1008	int64_t getImm() const {
1009	assert(isImm());
1010	return Imm.Val;
1011	}
1012
1013	void setImm(int64_t Val) {
1014	assert(isImm());
1015	Imm.Val = Val;
1016	}
1017
1018	ImmTy getImmTy() const {
1019	assert(isImm());
1020	return Imm.Type;
1021	}
1022
1023	MCRegister getReg() const override {
1024	assert(isRegKind());
1025	return Reg.RegNo;
1026	}
1027
1028	SMLoc getStartLoc() const override {
1029	return StartLoc;
1030	}
1031
1032	SMLoc getEndLoc() const override {
1033	return EndLoc;
1034	}
1035
1036	SMRange getLocRange() const {
1037	return SMRange (StartLoc, EndLoc);
1038	}
1039
1040	int getMCOpIdx() const { return MCOpIdx; }
1041
1042	Modifiers getModifiers() const {
1043	assert(isRegKind() \|\| isImmTy(ImmTyNone));
1044	return isRegKind() ? Reg.Mods : Imm.Mods;
1045	}
1046
1047	void setModifiers(Modifiers Mods) {
1048	assert(isRegKind() \|\| isImmTy(ImmTyNone));
1049	if (isRegKind())
1050	Reg.Mods = Mods;
1051	else
1052	Imm.Mods = Mods;
1053	}
1054
1055	bool hasModifiers() const {
1056	return getModifiers().hasModifiers();
1057	}
1058
1059	bool hasFPModifiers() const {
1060	return getModifiers().hasFPModifiers();
1061	}
1062
1063	bool hasIntModifiers() const {
1064	return getModifiers().hasIntModifiers();
1065	}
1066
1067	bool isForcedLit() const {
1068	return isImmLiteral() && getModifiers().isForcedLit();
1069	}
1070
1071	bool isForcedLit64() const {
1072	return isImmLiteral() && getModifiers().isForcedLit64();
1073	}
1074
1075	uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1076
1077	void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1078
1079	void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1080
1081	void addRegOperands(MCInst &Inst, unsigned N) const;
1082
1083	void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1084	if (isRegKind())
1085	addRegOperands(Inst, N);
1086	else
1087	addImmOperands(Inst, N);
1088	}
1089
1090	void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1091	Modifiers Mods = getModifiers();
1092	Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand()));
1093	if (isRegKind()) {
1094	addRegOperands(Inst, N);
1095	} else {
1096	addImmOperands(Inst, N, ApplyModifiers: false);
1097	}
1098	}
1099
1100	void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1101	assert(!hasIntModifiers());
1102	addRegOrImmWithInputModsOperands(Inst, N);
1103	}
1104
1105	void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1106	assert(!hasFPModifiers());
1107	addRegOrImmWithInputModsOperands(Inst, N);
1108	}
1109
1110	void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1111	Modifiers Mods = getModifiers();
1112	Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand()));
1113	assert(isRegKind());
1114	addRegOperands(Inst, N);
1115	}
1116
1117	void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1118	assert(!hasIntModifiers());
1119	addRegWithInputModsOperands(Inst, N);
1120	}
1121
1122	void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1123	assert(!hasFPModifiers());
1124	addRegWithInputModsOperands(Inst, N);
1125	}
1126
1127	static void printImmTy(raw_ostream& OS, ImmTy Type) {
1128	// clang-format off
1129	switch (Type) {
1130	case ImmTyNone: OS << "None"; break;
1131	case ImmTyGDS: OS << "GDS"; break;
1132	case ImmTyLDS: OS << "LDS"; break;
1133	case ImmTyOffen: OS << "Offen"; break;
1134	case ImmTyIdxen: OS << "Idxen"; break;
1135	case ImmTyAddr64: OS << "Addr64"; break;
1136	case ImmTyOffset: OS << "Offset"; break;
1137	case ImmTyInstOffset: OS << "InstOffset"; break;
1138	case ImmTyOffset0: OS << "Offset0"; break;
1139	case ImmTyOffset1: OS << "Offset1"; break;
1140	case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1141	case ImmTyCPol: OS << "CPol"; break;
1142	case ImmTyIndexKey8bit: OS << "index_key"; break;
1143	case ImmTyIndexKey16bit: OS << "index_key"; break;
1144	case ImmTyIndexKey32bit: OS << "index_key"; break;
1145	case ImmTyTFE: OS << "TFE"; break;
1146	case ImmTyIsAsync: OS << "IsAsync"; break;
1147	case ImmTyD16: OS << "D16"; break;
1148	case ImmTyFORMAT: OS << "FORMAT"; break;
1149	case ImmTyClamp: OS << "Clamp"; break;
1150	case ImmTyOModSI: OS << "OModSI"; break;
1151	case ImmTyDPP8: OS << "DPP8"; break;
1152	case ImmTyDppCtrl: OS << "DppCtrl"; break;
1153	case ImmTyDppRowMask: OS << "DppRowMask"; break;
1154	case ImmTyDppBankMask: OS << "DppBankMask"; break;
1155	case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1156	case ImmTyDppFI: OS << "DppFI"; break;
1157	case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1158	case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1159	case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1160	case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1161	case ImmTyDMask: OS << "DMask"; break;
1162	case ImmTyDim: OS << "Dim"; break;
1163	case ImmTyUNorm: OS << "UNorm"; break;
1164	case ImmTyDA: OS << "DA"; break;
1165	case ImmTyR128A16: OS << "R128A16"; break;
1166	case ImmTyA16: OS << "A16"; break;
1167	case ImmTyLWE: OS << "LWE"; break;
1168	case ImmTyOff: OS << "Off"; break;
1169	case ImmTyExpTgt: OS << "ExpTgt"; break;
1170	case ImmTyExpCompr: OS << "ExpCompr"; break;
1171	case ImmTyExpVM: OS << "ExpVM"; break;
1172	case ImmTyDone: OS << "Done"; break;
1173	case ImmTyRowEn: OS << "RowEn"; break;
1174	case ImmTyHwreg: OS << "Hwreg"; break;
1175	case ImmTySendMsg: OS << "SendMsg"; break;
1176	case ImmTyWaitEvent: OS << "WaitEvent"; break;
1177	case ImmTyInterpSlot: OS << "InterpSlot"; break;
1178	case ImmTyInterpAttr: OS << "InterpAttr"; break;
1179	case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1180	case ImmTyOpSel: OS << "OpSel"; break;
1181	case ImmTyOpSelHi: OS << "OpSelHi"; break;
1182	case ImmTyNegLo: OS << "NegLo"; break;
1183	case ImmTyNegHi: OS << "NegHi"; break;
1184	case ImmTySwizzle: OS << "Swizzle"; break;
1185	case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1186	case ImmTyHigh: OS << "High"; break;
1187	case ImmTyBLGP: OS << "BLGP"; break;
1188	case ImmTyCBSZ: OS << "CBSZ"; break;
1189	case ImmTyABID: OS << "ABID"; break;
1190	case ImmTyEndpgm: OS << "Endpgm"; break;
1191	case ImmTyWaitVDST: OS << "WaitVDST"; break;
1192	case ImmTyWaitEXP: OS << "WaitEXP"; break;
1193	case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1194	case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1195	case ImmTyBitOp3: OS << "BitOp3"; break;
1196	case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1197	case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1198	case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1199	case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1200	case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1201	case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1202	case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1203	case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1204	case ImmTyScaleSel: OS << "ScaleSel" ; break;
1205	case ImmTyByteSel: OS << "ByteSel" ; break;
1206	}
1207	// clang-format on
1208	}
1209
1210	void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1211	switch (Kind) {
1212	case Register:
1213	OS << "<register " << AMDGPUInstPrinter::getRegisterName(Reg: getReg())
1214	<< " mods: " << Reg.Mods << `'>'`;
1215	break;
1216	case Immediate:
1217	OS << `'<'` << getImm();
1218	if (getImmTy() != ImmTyNone) {
1219	OS << " type: "; printImmTy(OS, Type: getImmTy());
1220	}
1221	OS << " mods: " << Imm.Mods << `'>'`;
1222	break;
1223	case Token:
1224	OS << `'\''` << getToken() << `'\''`;
1225	break;
1226	case Expression:
1227	OS << "<expr ";
1228	MAI.printExpr(OS, *Expr);
1229	OS << `'>'`;
1230	break;
1231	}
1232	}
1233
1234	static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1235	int64_t Val, SMLoc Loc,
1236	ImmTy Type = ImmTyNone,
1237	bool IsFPImm = false) {
1238	auto Op = std::make_unique<AMDGPUOperand>(args: Immediate, args&: AsmParser);
1239	Op ->Imm.Val = Val;
1240	Op ->Imm.IsFPImm = IsFPImm;
1241	Op ->Imm.Type = Type;
1242	Op ->Imm.Mods = Modifiers ();
1243	Op ->StartLoc = Loc;
1244	Op ->EndLoc = Loc;
1245	return Op;
1246	}
1247
1248	static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1249	StringRef Str, SMLoc Loc,
1250	bool HasExplicitEncodingSize = true) {
1251	auto Res = std::make_unique<AMDGPUOperand>(args: Token, args&: AsmParser);
1252	Res ->Tok.Data = Str.data();
1253	Res ->Tok.Length = Str.size();
1254	Res ->StartLoc = Loc;
1255	Res ->EndLoc = Loc;
1256	return Res;
1257	}
1258
1259	static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1260	MCRegister Reg, SMLoc S, SMLoc E) {
1261	auto Op = std::make_unique<AMDGPUOperand>(args: Register, args&: AsmParser);
1262	Op ->Reg.RegNo = Reg;
1263	Op ->Reg.Mods = Modifiers ();
1264	Op ->StartLoc = S;
1265	Op ->EndLoc = E;
1266	return Op;
1267	}
1268
1269	static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1270	const class MCExpr *Expr, SMLoc S) {
1271	auto Op = std::make_unique<AMDGPUOperand>(args: Expression, args&: AsmParser);
1272	Op ->Expr = Expr;
1273	Op ->StartLoc = S;
1274	Op ->EndLoc = S;
1275	return Op;
1276	}
1277	};
1278
1279	raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1280	OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1281	return OS;
1282	}
1283
1284	//===----------------------------------------------------------------------===//
1285	// AsmParser
1286	//===----------------------------------------------------------------------===//
1287
1288	// TODO: define GET_SUBTARGET_FEATURE_NAME
1289	#define GET_REGISTER_MATCHER
1290	#include "AMDGPUGenAsmMatcher.inc"
1291	#undef GET_REGISTER_MATCHER
1292	#undef GET_SUBTARGET_FEATURE_NAME
1293
1294	// Holds info related to the current kernel, e.g. count of SGPRs used.
1295	// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1296	// .amdgpu_hsa_kernel or at EOF.
1297	class KernelScopeInfo {
1298	int SgprIndexUnusedMin = -`1`;
1299	int VgprIndexUnusedMin = -`1`;
1300	int AgprIndexUnusedMin = -`1`;
1301	MCContext Ctx = nullptr*;
1302	MCSubtargetInfo const MSTI = nullptr*;
1303
1304	void usesSgprAt(int i) {
1305	if (i >= SgprIndexUnusedMin) {
1306	SgprIndexUnusedMin = ++i;
1307	if (Ctx) {
1308	MCSymbol* const Sym =
1309	Ctx->getOrCreateSymbol(Name: Twine (".kernel.sgpr_count"));
1310	Sym->setVariableValue(MCConstantExpr::create(Value: SgprIndexUnusedMin, Ctx&: *Ctx));
1311	}
1312	}
1313	}
1314
1315	void usesVgprAt(int i) {
1316	if (i >= VgprIndexUnusedMin) {
1317	VgprIndexUnusedMin = ++i;
1318	if (Ctx) {
1319	MCSymbol* const Sym =
1320	Ctx->getOrCreateSymbol(Name: Twine (".kernel.vgpr_count"));
1321	int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin,
1322	ArgNumVGPR: VgprIndexUnusedMin);
1323	Sym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx));
1324	}
1325	}
1326	}
1327
1328	void usesAgprAt(int i) {
1329	// Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1330	if (!hasMAIInsts(STI: *MSTI))
1331	return;
1332
1333	if (i >= AgprIndexUnusedMin) {
1334	AgprIndexUnusedMin = ++i;
1335	if (Ctx) {
1336	MCSymbol* const Sym =
1337	Ctx->getOrCreateSymbol(Name: Twine (".kernel.agpr_count"));
1338	Sym->setVariableValue(MCConstantExpr::create(Value: AgprIndexUnusedMin, Ctx&: *Ctx));
1339
1340	// Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1341	MCSymbol* const vSym =
1342	Ctx->getOrCreateSymbol(Name: Twine (".kernel.vgpr_count"));
1343	int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin,
1344	ArgNumVGPR: VgprIndexUnusedMin);
1345	vSym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx));
1346	}
1347	}
1348	}
1349
1350	public:
1351	KernelScopeInfo() = default;
1352
1353	void initialize(MCContext &Context) {
1354	Ctx = &Context;
1355	MSTI = Ctx->getSubtargetInfo();
1356
1357	usesSgprAt(i: SgprIndexUnusedMin = -`1`);
1358	usesVgprAt(i: VgprIndexUnusedMin = -`1`);
1359	if (hasMAIInsts(STI: *MSTI)) {
1360	usesAgprAt(i: AgprIndexUnusedMin = -`1`);
1361	}
1362	}
1363
1364	void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1365	unsigned RegWidth) {
1366	switch (RegKind) {
1367	case IS_SGPR:
1368	usesSgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: `32`) - `1`);
1369	break;
1370	case IS_AGPR:
1371	usesAgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: `32`) - `1`);
1372	break;
1373	case IS_VGPR:
1374	usesVgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: `32`) - `1`);
1375	break;
1376	default:
1377	break;
1378	}
1379	}
1380	};
1381
1382	class AMDGPUAsmParser : public MCTargetAsmParser {
1383	MCAsmParser &Parser;
1384
1385	unsigned ForcedEncodingSize = `0`;
1386	bool ForcedDPP = false;
1387	bool ForcedSDWA = false;
1388	KernelScopeInfo KernelScope;
1389	const unsigned HwMode;
1390
1391	/// @name Auto-generated Match Functions
1392	/// {
1393
1394	#define GET_ASSEMBLER_HEADER
1395	#include "AMDGPUGenAsmMatcher.inc"
1396
1397	/// }
1398
1399	/// Get size of register operand
1400	unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1401	assert(OpNo < Desc.NumOperands);
1402	int16_t RCID = MII.getOpRegClassID(OpInfo: Desc.operands()[OpNo], HwModeId: HwMode);
1403	return getRegBitWidth(RCID) / `8`;
1404	}
1405
1406	std::optional<AMDGPU::InfoSectionData> InfoData;
1407
1408	/// Whether the leading .amdgcn_target directive has been emitted to the
1409	/// output streamer yet. The emission is deferred until the first piece of
1410	/// content (instruction or kernel descriptor) so that any leading
1411	/// .amdgcn_target/.amd_amdgpu_isa directive in the source has had a chance to
1412	/// update the target ID first.
1413	bool TargetDirectiveEmitted = false;
1414
1415	/// State for checking that every kernel named in a .amdhsa_kernel directive
1416	/// begins with the required prologue instruction sequence. Because the
1417	/// directive may appear either before or after the kernel's label (it is
1418	/// normally emitted after the function body, in .rodata), validation is
1419	/// deferred to onEndOfFile(). We record an order-independent timeline of
1420	/// parsed labels and emitted instruction opcodes, plus the set of symbols
1421	/// named by .amdhsa_kernel directives, and match them up at end of file.
1422	SmallVector<unsigned> OpcodeStream;
1423	SmallVector<std::tuple<const MCSymbol , SMLoc, unsigned*>>
1424	OpcodeStreamSymbols;
1425	SmallPtrSet<const MCSymbol *, `8`> AMDHSAKernelSymbols;
1426
1427	/// Verify recorded kernel prologues.
1428	void checkKernelPrologues();
1429
1430	private:
1431	void createConstantSymbol(StringRef Id, int64_t Val);
1432
1433	bool ParseAsAbsoluteExpression(uint32_t &Ret);
1434	bool OutOfRangeError(SMRange Range);
1435	/// Calculate VGPR/SGPR blocks required for given target, reserved
1436	/// registers, and user-specified NextFreeXGPR values.
1437	///
1438	/// \param Features [in] Target features, used for bug corrections.
1439	/// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1440	/// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1441	/// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1442	/// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1443	/// descriptor field, if valid.
1444	/// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1445	/// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1446	/// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1447	/// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1448	/// \param VGPRBlocks [out] Result VGPR block count.
1449	/// \param SGPRBlocks [out] Result SGPR block count.
1450	bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1451	const MCExpr FlatScrUsed, bool* XNACKUsed,
1452	std::optional<bool> EnableWavefrontSize32,
1453	const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1454	const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1455	const MCExpr &VGPRBlocks, const* MCExpr *&SGPRBlocks);
1456	bool ParseDirectiveAMDGCNTarget();
1457	bool ParseDirectiveAMDHSACodeObjectVersion();
1458	bool ParseDirectiveAMDHSAKernel();
1459	bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1460	bool ParseDirectiveAMDKernelCodeT();
1461	// TODO: Possibly make subtargetHasRegister const.
1462	bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1463	bool ParseDirectiveAMDGPUHsaKernel();
1464
1465	bool ParseDirectiveISAVersion();
1466	bool ParseDirectiveHSAMetadata();
1467	bool ParseDirectivePALMetadataBegin();
1468	bool ParseDirectivePALMetadata();
1469	bool ParseDirectiveAMDGPULDS();
1470	bool ParseDirectiveAMDGPUInfo();
1471
1472	/// Common code to parse out a block of text (typically YAML) between start and
1473	/// end directives.
1474	bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1475	const char *AssemblerDirectiveEnd,
1476	std::string &CollectString);
1477
1478	bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1479	RegisterKind RegKind, MCRegister Reg1,
1480	RegisterKind RegKind1, SMLoc Loc);
1481	bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1482	unsigned &RegNum, unsigned &RegWidth,
1483	bool RestoreOnFailure = false);
1484	bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1485	unsigned &RegNum, unsigned &RegWidth,
1486	SmallVectorImpl<AsmToken> &Tokens);
1487	MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1488	unsigned &RegWidth,
1489	SmallVectorImpl<AsmToken> &Tokens);
1490	MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1491	unsigned &RegWidth,
1492	SmallVectorImpl<AsmToken> &Tokens);
1493	MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1494	unsigned &RegWidth,
1495	SmallVectorImpl<AsmToken> &Tokens);
1496	bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1497	MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1498	unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1499
1500	bool isRegister();
1501	bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1502	std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1503	void initializeGprCountSymbol(RegisterKind RegKind);
1504	bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1505	unsigned RegWidth);
1506	void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1507	bool IsAtomic);
1508
1509	public:
1510	enum OperandMode {
1511	OperandMode_Default,
1512	OperandMode_NSA,
1513	};
1514
1515	using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1516
1517	AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1518	const MCInstrInfo &MII)
1519	: MCTargetAsmParser (STI, MII), Parser(_Parser),
1520	HwMode(STI.getHwMode(type: MCSubtargetInfo::HwMode_RegInfo)) {
1521	MCAsmParserExtension::Initialize(Parser);
1522
1523	setAvailableFeatures(ComputeAvailableFeatures(FB: getFeatureBits()));
1524
1525	AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
1526	if (ISA.Major >= `6` && isHsaAbi(STI: getSTI())) {
1527	createConstantSymbol(Id: ".amdgcn.gfx_generation_number", Val: ISA.Major);
1528	createConstantSymbol(Id: ".amdgcn.gfx_generation_minor", Val: ISA.Minor);
1529	createConstantSymbol(Id: ".amdgcn.gfx_generation_stepping", Val: ISA.Stepping);
1530	} else {
1531	createConstantSymbol(Id: ".option.machine_version_major", Val: ISA.Major);
1532	createConstantSymbol(Id: ".option.machine_version_minor", Val: ISA.Minor);
1533	createConstantSymbol(Id: ".option.machine_version_stepping", Val: ISA.Stepping);
1534	}
1535	if (ISA.Major >= `6` && isHsaAbi(STI: getSTI())) {
1536	initializeGprCountSymbol(RegKind: IS_VGPR);
1537	initializeGprCountSymbol(RegKind: IS_SGPR);
1538	} else
1539	KernelScope.initialize(Context&: getContext());
1540
1541	for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1542	createConstantSymbol(Id: Symbol, Val: Code);
1543
1544	createConstantSymbol(Id: "UC_VERSION_W64_BIT", Val: `0x2000`);
1545	createConstantSymbol(Id: "UC_VERSION_W32_BIT", Val: `0x4000`);
1546	createConstantSymbol(Id: "UC_VERSION_MDP_BIT", Val: `0x8000`);
1547	}
1548
1549	bool hasMIMG_R128() const {
1550	return AMDGPU::hasMIMG_R128(STI: getSTI());
1551	}
1552
1553	bool hasPackedD16() const {
1554	return AMDGPU::hasPackedD16(STI: getSTI());
1555	}
1556
1557	bool hasA16() const { return AMDGPU::hasA16(STI: getSTI()); }
1558
1559	bool hasG16() const { return AMDGPU::hasG16(STI: getSTI()); }
1560
1561	bool hasGDS() const { return AMDGPU::hasGDS(STI: getSTI()); }
1562
1563	bool isSI() const {
1564	return AMDGPU::isSI(STI: getSTI());
1565	}
1566
1567	bool isCI() const {
1568	return AMDGPU::isCI(STI: getSTI());
1569	}
1570
1571	bool isVI() const {
1572	return AMDGPU::isVI(STI: getSTI());
1573	}
1574
1575	bool isGFX9() const {
1576	return AMDGPU::isGFX9(STI: getSTI());
1577	}
1578
1579	// TODO: isGFX90A is also true for GFX940. We need to clean it.
1580	bool isGFX90A() const {
1581	return AMDGPU::isGFX90A(STI: getSTI());
1582	}
1583
1584	bool isGFX940() const {
1585	return AMDGPU::isGFX940(STI: getSTI());
1586	}
1587
1588	bool isGFX9Plus() const {
1589	return AMDGPU::isGFX9Plus(STI: getSTI());
1590	}
1591
1592	bool isGFX10() const {
1593	return AMDGPU::isGFX10(STI: getSTI());
1594	}
1595
1596	bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(STI: getSTI()); }
1597
1598	bool isGFX11() const {
1599	return AMDGPU::isGFX11(STI: getSTI());
1600	}
1601
1602	bool isGFX11Plus() const {
1603	return AMDGPU::isGFX11Plus(STI: getSTI());
1604	}
1605
1606	bool isGFX12() const { return AMDGPU::isGFX12(STI: getSTI()); }
1607
1608	bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(STI: getSTI()); }
1609
1610	bool isGFX1250() const { return AMDGPU::isGFX1250(STI: getSTI()); }
1611
1612	bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(STI: getSTI()); }
1613
1614	bool isGFX13() const { return AMDGPU::isGFX13(STI: getSTI()); }
1615
1616	bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(STI: getSTI()); }
1617
1618	bool hasBVHRayTracingInsts() const {
1619	return getFeatureBits()[AMDGPU::FeatureBVHRayTracingInsts];
1620	}
1621
1622	bool isGFX10_BEncoding() const {
1623	return AMDGPU::isGFX10_BEncoding(STI: getSTI());
1624	}
1625
1626	bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1627
1628	bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1629
1630	bool hasInv2PiInlineImm() const {
1631	return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1632	}
1633
1634	bool has64BitLiterals() const {
1635	return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1636	}
1637
1638	bool hasFlatOffsets() const {
1639	return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1640	}
1641
1642	bool hasTrue16Insts() const {
1643	return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1644	}
1645
1646	bool hasArchitectedFlatScratch() const {
1647	return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1648	}
1649
1650	bool hasSGPR102_SGPR103() const {
1651	return !isVI() && !isGFX9();
1652	}
1653
1654	bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1655
1656	bool hasIntClamp() const {
1657	return getFeatureBits()[AMDGPU::FeatureIntClamp];
1658	}
1659
1660	bool hasPartialNSAEncoding() const {
1661	return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1662	}
1663
1664	bool hasGloballyAddressableScratch() const {
1665	return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1666	}
1667
1668	unsigned getNSAMaxSize(bool HasSampler = false) const {
1669	return AMDGPU::getNSAMaxSize(STI: getSTI(), HasSampler);
1670	}
1671
1672	unsigned getMaxNumUserSGPRs() const {
1673	return AMDGPU::getMaxNumUserSGPRs(STI: getSTI());
1674	}
1675
1676	bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(STI: getSTI()); }
1677
1678	AMDGPUTargetStreamer &getTargetStreamer() {
1679	MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1680	return static_cast<AMDGPUTargetStreamer &>(TS);
1681	}
1682
1683	MCContext &getContext() const {
1684	// We need this const_cast because for some reason getContext() is not const
1685	// in MCAsmParser.
1686	return const_cast<AMDGPUAsmParser >(this*)->MCTargetAsmParser::getContext();
1687	}
1688
1689	const MCRegisterInfo getMRI() const* {
1690	return getContext().getRegisterInfo();
1691	}
1692
1693	const MCInstrInfo getMII() const* {
1694	return &MII;
1695	}
1696
1697	// FIXME: This should not be used. Instead, should use queries derived from
1698	// getAvailableFeatures().
1699	const FeatureBitset &getFeatureBits() const {
1700	return getSTI().getFeatureBits();
1701	}
1702
1703	void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1704	void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1705	void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1706
1707	unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1708	bool isForcedVOP3() const { return ForcedEncodingSize == `64`; }
1709	bool isForcedDPP() const { return ForcedDPP; }
1710	bool isForcedSDWA() const { return ForcedSDWA; }
1711	ArrayRef<unsigned> getMatchedVariants() const;
1712	StringRef getMatchedVariantName() const;
1713
1714	std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1715	bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1716	bool RestoreOnFailure);
1717	bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1718	ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1719	SMLoc &EndLoc) override;
1720	unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1721	unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1722	unsigned Kind) override;
1723	bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1724	OperandVector &Operands, MCStreamer &Out,
1725	uint64_t &ErrorInfo,
1726	bool MatchingInlineAsm) override;
1727	bool ParseDirective(AsmToken DirectiveID) override;
1728	void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) override;
1729	void onEndOfFile() override;
1730	ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1731	OperandMode Mode = OperandMode_Default);
1732	StringRef parseMnemonicSuffix(StringRef Name);
1733	bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1734	SMLoc NameLoc, OperandVector &Operands) override;
1735	//bool ProcessInstruction(MCInst &Inst);
1736
1737	ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1738
1739	ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1740
1741	ParseStatus
1742	parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1743	AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1744	std::function<bool(int64_t &)> ConvertResult = nullptr);
1745
1746	ParseStatus parseOperandArrayWithPrefix(
1747	const char *Prefix, OperandVector &Operands,
1748	AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1749	bool (ConvertResult)(int64_t &) = nullptr*);
1750
1751	ParseStatus
1752	parseNamedBit(StringRef Name, OperandVector &Operands,
1753	AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1754	bool IgnoreNegative = false);
1755	unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1756	ParseStatus parseCPol(OperandVector &Operands);
1757	ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1758	ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1759	ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1760	SMLoc &StringLoc);
1761	ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1762	StringRef Name,
1763	ArrayRef<const char *> Ids,
1764	int64_t &IntVal);
1765	ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1766	StringRef Name,
1767	ArrayRef<const char *> Ids,
1768	AMDGPUOperand::ImmTy Type);
1769
1770	bool isModifier();
1771	bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1772	bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1773	bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1774	bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1775	bool parseSP3NegModifier();
1776	ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1777	LitModifier Lit = LitModifier::None);
1778	ParseStatus parseReg(OperandVector &Operands);
1779	ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1780	LitModifier Lit = LitModifier::None);
1781	ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1782	bool AllowImm = true);
1783	ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1784	bool AllowImm = true);
1785	ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1786	ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1787	ParseStatus parseVReg32OrOff(OperandVector &Operands);
1788	ParseStatus tryParseIndexKey(OperandVector &Operands,
1789	AMDGPUOperand::ImmTy ImmTy);
1790	ParseStatus parseIndexKey8bit(OperandVector &Operands);
1791	ParseStatus parseIndexKey16bit(OperandVector &Operands);
1792	ParseStatus parseIndexKey32bit(OperandVector &Operands);
1793	ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1794	AMDGPUOperand::ImmTy Type);
1795	ParseStatus parseMatrixAFMT(OperandVector &Operands);
1796	ParseStatus parseMatrixBFMT(OperandVector &Operands);
1797	ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1798	AMDGPUOperand::ImmTy Type);
1799	ParseStatus parseMatrixAScale(OperandVector &Operands);
1800	ParseStatus parseMatrixBScale(OperandVector &Operands);
1801	ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1802	AMDGPUOperand::ImmTy Type);
1803	ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1804	ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1805
1806	ParseStatus parseDfmtNfmt(int64_t &Format);
1807	ParseStatus parseUfmt(int64_t &Format);
1808	ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1809	int64_t &Format);
1810	ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1811	int64_t &Format);
1812	ParseStatus parseFORMAT(OperandVector &Operands);
1813	ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1814	ParseStatus parseNumericFormat(int64_t &Format);
1815	ParseStatus parseFlatOffset(OperandVector &Operands);
1816	ParseStatus parseR128A16(OperandVector &Operands);
1817	ParseStatus parseBLGP(OperandVector &Operands);
1818	bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1819	bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1820
1821	void cvtExp(MCInst &Inst, const OperandVector &Operands);
1822
1823	bool parseCnt(int64_t &IntVal);
1824	ParseStatus parseSWaitCnt(OperandVector &Operands);
1825
1826	bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1827	void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1828	ParseStatus parseDepCtr(OperandVector &Operands);
1829
1830	bool parseDelay(int64_t &Delay);
1831	ParseStatus parseSDelayALU(OperandVector &Operands);
1832
1833	ParseStatus parseHwreg(OperandVector &Operands);
1834
1835	private:
1836	struct OperandInfoTy {
1837	SMLoc Loc;
1838	int64_t Val;
1839	bool IsSymbolic = false;
1840	bool IsDefined = false;
1841
1842	constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1843	};
1844
1845	struct StructuredOpField : OperandInfoTy {
1846	StringLiteral Id;
1847	StringLiteral Desc;
1848	unsigned Width;
1849	bool IsDefined = false;
1850
1851	constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1852	unsigned Width, int64_t Default)
1853	: OperandInfoTy (Default), Id (Id), Desc (Desc), Width(Width) {}
1854	virtual ~StructuredOpField() = default;
1855
1856	bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1857	Parser.Error(L: Loc, Msg: "invalid " + Desc + ": " + Err);
1858	return false;
1859	}
1860
1861	virtual bool validate(AMDGPUAsmParser &Parser) const {
1862	if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1863	return Error(Parser, Err: "not supported on this GPU");
1864	if (!isUIntN(N: Width, x: Val))
1865	return Error(Parser, Err: "only " + Twine (Width) + "-bit values are legal");
1866	return true;
1867	}
1868	};
1869
1870	ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1871	bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1872
1873	bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1874	bool validateSendMsg(const OperandInfoTy &Msg,
1875	const OperandInfoTy &Op,
1876	const OperandInfoTy &Stream);
1877
1878	ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1879	OperandInfoTy &Width);
1880
1881	const AMDGPUOperand &findMCOperand(const OperandVector &Operands,
1882	int MCOpIdx) const;
1883
1884	static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1885
1886	SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1887	SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1888	SMLoc getBLGPLoc(const OperandVector &Operands) const;
1889
1890	SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1891	SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1892	const OperandVector &Operands) const;
1893	SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1894	const OperandVector &Operands) const;
1895	SMLoc getInstLoc(const OperandVector &Operands) const;
1896
1897	bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1898	const OperandVector &Operands);
1899	bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1900	bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1901	bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1902	bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1903	bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1904	std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1905	bool AsVOPD3);
1906	bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1907	bool tryVOPD(const MCInst &Inst);
1908	bool tryVOPD3(const MCInst &Inst);
1909	bool tryAnotherVOPDEncoding(const MCInst &Inst);
1910
1911	bool validateIntClampSupported(const MCInst &Inst);
1912	bool validateMIMGAtomicDMask(const MCInst &Inst);
1913	bool validateMIMGGatherDMask(const MCInst &Inst);
1914	bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1915	bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1916	bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1917	bool validateMIMGD16(const MCInst &Inst);
1918	bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1919	bool validateTensorR128(const MCInst &Inst);
1920	bool validateMIMGMSAA(const MCInst &Inst);
1921	bool validateOpSel(const MCInst &Inst);
1922	bool validateTrue16OpSel(const MCInst &Inst);
1923	bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1924	bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1925	bool validateVccOperand(MCRegister Reg) const;
1926	bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1927	bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1928	bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1929	bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1930	bool validateAGPRLdSt(const MCInst &Inst) const;
1931	bool validateVGPRAlign(const MCInst &Inst) const;
1932	bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1933	bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1934	bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1935	bool validateDivScale(const MCInst &Inst);
1936	bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1937	bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1938	SMLoc IDLoc);
1939	bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1940	const unsigned CPol);
1941	bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1942	bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1943	bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1944	unsigned getConstantBusLimit(unsigned Opcode) const;
1945	bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1946	bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1947	MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1948
1949	bool isSupportedMnemo(StringRef Mnemo,
1950	const FeatureBitset &FBS);
1951	bool isSupportedMnemo(StringRef Mnemo,
1952	const FeatureBitset &FBS,
1953	ArrayRef<unsigned> Variants);
1954	bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1955
1956	bool isId(const StringRef Id) const;
1957	bool isId(const AsmToken &Token, const StringRef Id) const;
1958	bool isToken(const AsmToken::TokenKind Kind) const;
1959	StringRef getId() const;
1960	bool trySkipId(const StringRef Id);
1961	bool trySkipId(const StringRef Pref, const StringRef Id);
1962	bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1963	bool trySkipToken(const AsmToken::TokenKind Kind);
1964	bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1965	bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1966	bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1967
1968	void peekTokens(MutableArrayRef<AsmToken> Tokens);
1969	AsmToken::TokenKind getTokenKind() const;
1970	bool parseExpr(int64_t &Imm, StringRef Expected = "");
1971	bool parseExpr(OperandVector &Operands);
1972	StringRef getTokenStr() const;
1973	AsmToken peekToken(bool ShouldSkipSpace = true);
1974	AsmToken getToken() const;
1975	SMLoc getLoc() const;
1976	void lex();
1977
1978	public:
1979	void onBeginOfFile() override;
1980	/// Emit the deferred leading .amdgcn_target directive if it has not been
1981	/// emitted yet. Called before emitting the first instruction or kernel
1982	/// descriptor.
1983	void emitTargetDirective();
1984	bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1985
1986	ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1987
1988	ParseStatus parseExpTgt(OperandVector &Operands);
1989	ParseStatus parseSendMsg(OperandVector &Operands);
1990	ParseStatus parseWaitEvent(OperandVector &Operands);
1991	ParseStatus parseInterpSlot(OperandVector &Operands);
1992	ParseStatus parseInterpAttr(OperandVector &Operands);
1993	ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1994	ParseStatus parseBoolReg(OperandVector &Operands);
1995
1996	bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1997	const unsigned MaxVal, const Twine &ErrMsg,
1998	SMLoc &Loc);
1999	bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
2000	const unsigned MinVal,
2001	const unsigned MaxVal,
2002	const StringRef ErrMsg);
2003	ParseStatus parseSwizzle(OperandVector &Operands);
2004	bool parseSwizzleOffset(int64_t &Imm);
2005	bool parseSwizzleMacro(int64_t &Imm);
2006	bool parseSwizzleQuadPerm(int64_t &Imm);
2007	bool parseSwizzleBitmaskPerm(int64_t &Imm);
2008	bool parseSwizzleBroadcast(int64_t &Imm);
2009	bool parseSwizzleSwap(int64_t &Imm);
2010	bool parseSwizzleReverse(int64_t &Imm);
2011	bool parseSwizzleFFT(int64_t &Imm);
2012	bool parseSwizzleRotate(int64_t &Imm);
2013
2014	ParseStatus parseGPRIdxMode(OperandVector &Operands);
2015	int64_t parseGPRIdxMacro();
2016
2017	void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: false); }
2018	void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: true); }
2019
2020	ParseStatus parseOModSI(OperandVector &Operands);
2021
2022	void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
2023	OptionalImmIndexMap &OptionalIdx);
2024	void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
2025	void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
2026	void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
2027	void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
2028	void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
2029
2030	void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
2031	void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
2032	OptionalImmIndexMap &OptionalIdx);
2033	void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
2034	OptionalImmIndexMap &OptionalIdx);
2035
2036	void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
2037	void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
2038	void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
2039
2040	bool parseDimId(unsigned &Encoding);
2041	ParseStatus parseDim(OperandVector &Operands);
2042	bool convertDppBoundCtrl(int64_t &BoundCtrl);
2043	ParseStatus parseDPP8(OperandVector &Operands);
2044	ParseStatus parseDPPCtrl(OperandVector &Operands);
2045	bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
2046	int64_t parseDPPCtrlSel(StringRef Ctrl);
2047	int64_t parseDPPCtrlPerm();
2048	void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
2049	void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
2050	cvtDPP(Inst, Operands, IsDPP8: true);
2051	}
2052	void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
2053	bool IsDPP8 = false);
2054	void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
2055	cvtVOP3DPP(Inst, Operands, IsDPP8: true);
2056	}
2057
2058	ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2059	AMDGPUOperand::ImmTy Type);
2060	ParseStatus parseSDWADstUnused(OperandVector &Operands);
2061	void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2062	void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2063	void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2064	void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2065	void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2066
2067	enum class SDWAInstType : unsigned { VOP1 = `0`, VOP2 = `1`, VOPC = `2` };
2068
2069	void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2070	SDWAInstType BasicInstType, bool SkipDstVcc = false,
2071	bool SkipSrcVcc = false);
2072
2073	ParseStatus parseEndpgm(OperandVector &Operands);
2074
2075	ParseStatus parseVOPD(OperandVector &Operands);
2076	};
2077
2078	} // end anonymous namespace
2079
2080	// May be called with integer type with equivalent bitwidth.
2081	static const fltSemantics getFltSemantics(unsigned* Size) {
2082	switch (Size) {
2083	case `4`:
2084	return &APFloat::IEEEsingle();
2085	case `8`:
2086	return &APFloat::IEEEdouble();
2087	case `2`:
2088	return &APFloat::IEEEhalf();
2089	default:
2090	llvm_unreachable("unsupported fp type");
2091	}
2092	}
2093
2094	static const fltSemantics *getFltSemantics(MVT VT) {
2095	return getFltSemantics(Size: VT.getScalarSizeInBits() / `8`);
2096	}
2097
2098	static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
2099	switch (OperandType) {
2100	// When floating-point immediate is used as operand of type i16, the 32-bit
2101	// representation of the constant truncated to the 16 LSBs should be used.
2102	case AMDGPU::OPERAND_REG_IMM_INT16:
2103	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2104	case AMDGPU::OPERAND_REG_IMM_INT32:
2105	case AMDGPU::OPERAND_REG_IMM_FP32:
2106	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2107	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2108	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2109	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2110	case AMDGPU::OPERAND_REG_IMM_V2FP32:
2111	case AMDGPU::OPERAND_REG_IMM_V2INT32:
2112	case AMDGPU::OPERAND_REG_IMM_V2INT16:
2113	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2114	case AMDGPU::OPERAND_KIMM32:
2115	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2116	return &APFloat::IEEEsingle();
2117	case AMDGPU::OPERAND_REG_IMM_INT64:
2118	case AMDGPU::OPERAND_REG_IMM_FP64:
2119	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2120	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2121	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2122	case AMDGPU::OPERAND_REG_IMM_V2FP64:
2123	case AMDGPU::OPERAND_REG_IMM_V2INT64:
2124	case AMDGPU::OPERAND_KIMM64:
2125	return &APFloat::IEEEdouble();
2126	case AMDGPU::OPERAND_REG_IMM_FP16:
2127	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2128	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2129	case AMDGPU::OPERAND_REG_IMM_V2FP16:
2130	case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2131	case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2132	case AMDGPU::OPERAND_KIMM16:
2133	return &APFloat::IEEEhalf();
2134	case AMDGPU::OPERAND_REG_IMM_BF16:
2135	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2136	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2137	case AMDGPU::OPERAND_REG_IMM_V2BF16:
2138	return &APFloat::BFloat();
2139	default:
2140	llvm_unreachable("unsupported fp type");
2141	}
2142	}
2143
2144	//===----------------------------------------------------------------------===//
2145	// Operand
2146	//===----------------------------------------------------------------------===//
2147
2148	static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2149	bool Lost;
2150
2151	// Convert literal to single precision
2152	APFloat::opStatus Status = FPLiteral.convert(ToSemantics: *getFltSemantics(VT),
2153	RM: APFloat::rmNearestTiesToEven,
2154	losesInfo: &Lost);
2155	// We allow precision lost but not overflow or underflow
2156	if (Status != APFloat::opOK &&
2157	Lost &&
2158	((Status & APFloat::opOverflow) != `0` \|\|
2159	(Status & APFloat::opUnderflow) != `0`)) {
2160	return false;
2161	}
2162
2163	return true;
2164	}
2165
2166	static bool isSafeTruncation(int64_t Val, unsigned Size) {
2167	return isUIntN(N: Size, x: Val) \|\| isIntN(N: Size, x: Val);
2168	}
2169
2170	static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2171	if (VT.getScalarType() == MVT::i16)
2172	return isInlinableLiteral32(Literal: Val, HasInv2Pi);
2173
2174	if (VT.getScalarType() == MVT::f16)
2175	return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi);
2176
2177	assert(VT.getScalarType() == MVT::bf16);
2178
2179	return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi);
2180	}
2181
2182	bool AMDGPUOperand::isInlinableImm(MVT type) const {
2183
2184	// This is a hack to enable named inline values like
2185	// shared_base with both 32-bit and 64-bit operands.
2186	// Note that these values are defined as
2187	// 32-bit operands only.
2188	if (isInlineValue()) {
2189	return true;
2190	}
2191
2192	if (!isImmTy(ImmT: ImmTyNone)) {
2193	// Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2194	return false;
2195	}
2196
2197	if (getModifiers().Lit != LitModifier::None)
2198	return false;
2199
2200	// TODO: We should avoid using host float here. It would be better to
2201	// check the float bit values which is what a few other places do.
2202	// We've had bot failures before due to weird NaN support on mips hosts.
2203
2204	APInt Literal(`64`, Imm.Val);
2205
2206	if (Imm.IsFPImm) { // We got fp literal token
2207	if (type == MVT::f64 \|\| type == MVT::i64) { // Expected 64-bit operand
2208	return AMDGPU::isInlinableLiteral64(Literal: Imm.Val,
2209	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2210	}
2211
2212	APFloat FPLiteral(APFloat::IEEEdouble(), APInt (`64`, Imm.Val));
2213	if (!canLosslesslyConvertToFPType(FPLiteral, VT: type))
2214	return false;
2215
2216	if (type.getScalarSizeInBits() == `16`) {
2217	bool Lost = false;
2218	switch (type.getScalarType().SimpleTy) {
2219	default:
2220	llvm_unreachable("unknown 16-bit type");
2221	case MVT::bf16:
2222	FPLiteral.convert(ToSemantics: APFloatBase::BFloat(), RM: APFloat::rmNearestTiesToEven,
2223	losesInfo: &Lost);
2224	break;
2225	case MVT::f16:
2226	FPLiteral.convert(ToSemantics: APFloatBase::IEEEhalf(), RM: APFloat::rmNearestTiesToEven,
2227	losesInfo: &Lost);
2228	break;
2229	case MVT::i16:
2230	FPLiteral.convert(ToSemantics: APFloatBase::IEEEsingle(),
2231	RM: APFloat::rmNearestTiesToEven, losesInfo: &Lost);
2232	break;
2233	}
2234	// We need to use 32-bit representation here because when a floating-point
2235	// inline constant is used as an i16 operand, its 32-bit representation
2236	// representation will be used. We will need the 32-bit value to check if
2237	// it is FP inline constant.
2238	uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2239	return isInlineableLiteralOp16(Val: ImmVal, VT: type,
2240	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2241	}
2242
2243	// Check if single precision literal is inlinable
2244	return AMDGPU::isInlinableLiteral32(
2245	Literal: static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2246	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2247	}
2248
2249	// We got int literal token.
2250	if (type == MVT::f64 \|\| type == MVT::i64) { // Expected 64-bit operand
2251	return AMDGPU::isInlinableLiteral64(Literal: Imm.Val,
2252	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2253	}
2254
2255	if (!isSafeTruncation(Val: Imm.Val, Size: type.getScalarSizeInBits())) {
2256	return false;
2257	}
2258
2259	if (type.getScalarSizeInBits() == `16`) {
2260	return isInlineableLiteralOp16(
2261	Val: static_cast<int16_t>(Literal.getLoBits(numBits: `16`).getSExtValue()),
2262	VT: type, HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2263	}
2264
2265	return AMDGPU::isInlinableLiteral32(
2266	Literal: static_cast<int32_t>(Literal.getLoBits(numBits: `32`).getZExtValue()),
2267	HasInv2Pi: AsmParser->hasInv2PiInlineImm());
2268	}
2269
2270	bool AMDGPUOperand::isLiteralImm(MVT type) const {
2271	// Check that this immediate can be added as literal
2272	if (!isImmTy(ImmT: ImmTyNone)) {
2273	return false;
2274	}
2275
2276	bool Allow64Bit =
2277	(type == MVT::i64 \|\| type == MVT::f64) && AsmParser->has64BitLiterals();
2278
2279	if (!Imm.IsFPImm) {
2280	// We got int literal token.
2281
2282	if (type == MVT::f64 && hasFPModifiers()) {
2283	// Cannot apply fp modifiers to int literals preserving the same semantics
2284	// for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2285	// disable these cases.
2286	return false;
2287	}
2288
2289	unsigned Size = type.getSizeInBits();
2290	if (Size == `64`) {
2291	if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Val: Imm.Val, IsFP64: false))
2292	return true;
2293	Size = `32`;
2294	}
2295
2296	// FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2297	// types.
2298	return isSafeTruncation(Val: Imm.Val, Size);
2299	}
2300
2301	// We got fp literal token
2302	if (type == MVT::f64) { // Expected 64-bit fp operand
2303	// We would set low 64-bits of literal to zeroes but we accept this literals
2304	return true;
2305	}
2306
2307	if (type == MVT::i64) { // Expected 64-bit int operand
2308	// We don't allow fp literals in 64-bit integer instructions. It is
2309	// unclear how we should encode them.
2310	return false;
2311	}
2312
2313	// We allow fp literals with f16x2 operands assuming that the specified
2314	// literal goes into the lower half and the upper half is zero. We also
2315	// require that the literal may be losslessly converted to f16.
2316	//
2317	// For i16x2 operands, we assume that the specified literal is encoded as a
2318	// single-precision float. This is pretty odd, but it matches SP3 and what
2319	// happens in hardware.
2320	MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2321	: (type == MVT::v2i16) ? MVT::f32
2322	: (type == MVT::v2f32) ? MVT::f32
2323	: type;
2324
2325	APFloat FPLiteral(APFloat::IEEEdouble(), APInt (`64`, Imm.Val));
2326	return canLosslesslyConvertToFPType(FPLiteral, VT: ExpectedType);
2327	}
2328
2329	bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2330	return isRegKind() && AsmParser->getMRI()->getRegClass(i: RCID).contains(Reg: getReg());
2331	}
2332
2333	bool AMDGPUOperand::isVRegWithInputMods() const {
2334	return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) \|\|
2335	// GFX90A allows DPP on 64-bit operands.
2336	(isRegClass(RCID: AMDGPU::VReg_64RegClassID) &&
2337	AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2338	}
2339
2340	template <bool IsFake16>
2341	bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2342	return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2343	: AMDGPU::VGPR_16_Lo128RegClassID);
2344	}
2345
2346	template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2347	return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32RegClassID
2348	: AMDGPU::VGPR_16RegClassID);
2349	}
2350
2351	bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2352	if (AsmParser->isVI())
2353	return isVReg32();
2354	if (AsmParser->isGFX9Plus())
2355	return isRegClass(RCID: AMDGPU::VS_32RegClassID) \|\| isInlinableImm(type);
2356	return false;
2357	}
2358
2359	bool AMDGPUOperand::isSDWAFP16Operand() const {
2360	return isSDWAOperand(type: MVT::f16);
2361	}
2362
2363	bool AMDGPUOperand::isSDWAFP32Operand() const {
2364	return isSDWAOperand(type: MVT::f32);
2365	}
2366
2367	bool AMDGPUOperand::isSDWAInt16Operand() const {
2368	return isSDWAOperand(type: MVT::i16);
2369	}
2370
2371	bool AMDGPUOperand::isSDWAInt32Operand() const {
2372	return isSDWAOperand(type: MVT::i32);
2373	}
2374
2375	bool AMDGPUOperand::isBoolReg() const {
2376	return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) \|\|
2377	(AsmParser->isWave32() && isSCSrc_b32()));
2378	}
2379
2380	uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2381	{
2382	assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2383	assert(Size == `2` \|\| Size == `4` \|\| Size == `8`);
2384
2385	const uint64_t FpSignMask = (`1ULL` << (Size * `8` - `1`));
2386
2387	if (Imm.Mods.Abs) {
2388	Val &= ~FpSignMask;
2389	}
2390	if (Imm.Mods.Neg) {
2391	Val ^= FpSignMask;
2392	}
2393
2394	return Val;
2395	}
2396
2397	void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2398	MCOpIdx = Inst.getNumOperands();
2399
2400	if (isExpr()) {
2401	Inst.addOperand(Op: MCOperand::createExpr(Val: Expr));
2402	return;
2403	}
2404
2405	if (AMDGPU::isSISrcOperand(Desc: AsmParser->getMII()->get(Opcode: Inst.getOpcode()),
2406	OpNo: Inst.getNumOperands())) {
2407	addLiteralImmOperand(Inst, Val: Imm.Val,
2408	ApplyModifiers: ApplyModifiers &
2409	isImmTy(ImmT: ImmTyNone) && Imm.Mods.hasFPModifiers());
2410	} else {
2411	assert(!isImmTy(ImmTyNone) \|\| !hasModifiers());
2412	Inst.addOperand(Op: MCOperand::createImm(Val: Imm.Val));
2413	}
2414	}
2415
2416	void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2417	const auto& InstDesc = AsmParser->getMII()->get(Opcode: Inst.getOpcode());
2418	auto OpNum = Inst.getNumOperands();
2419	// Check that this operand accepts literals
2420	assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2421
2422	if (ApplyModifiers) {
2423	assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2424	const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(Desc: InstDesc, OpNo: OpNum);
2425	Val = applyInputFPModifiers(Val, Size);
2426	}
2427
2428	APInt Literal(`64`, Val);
2429	uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2430
2431	bool CanUse64BitLiterals =
2432	AsmParser->has64BitLiterals() &&
2433	!(InstDesc.TSFlags & (SIInstrFlags::VOP3 \| SIInstrFlags::VOP3P));
2434	LitModifier Lit = getModifiers().Lit;
2435	MCContext &Ctx = AsmParser->getContext();
2436
2437	if (Imm.IsFPImm) { // We got fp literal token
2438	switch (OpTy) {
2439	case AMDGPU::OPERAND_REG_IMM_INT64:
2440	case AMDGPU::OPERAND_REG_IMM_FP64:
2441	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2442	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2443	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2444	case AMDGPU::OPERAND_REG_IMM_V2FP64:
2445	case AMDGPU::OPERAND_REG_IMM_V2INT64:
2446	if (Lit == LitModifier::None &&
2447	AMDGPU::isInlinableLiteral64(Literal: Literal.getZExtValue(),
2448	HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2449	Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getZExtValue()));
2450	return;
2451	}
2452
2453	// Non-inlineable
2454	if (AMDGPU::isSISrcFPOperand(Desc: InstDesc,
2455	OpNo: OpNum)) { // Expected 64-bit fp operand
2456	bool HasMandatoryLiteral =
2457	AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::imm);
2458	// For fp operands we check if low 32 bits are zeros
2459	if (Literal.getLoBits(numBits: `32`) != `0` &&
2460	(InstDesc.getSize() != `4` \|\| !AsmParser->has64BitLiterals()) &&
2461	!HasMandatoryLiteral) {
2462	const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2463	L: Inst.getLoc(),
2464	Msg: "Can't encode literal as exact 64-bit floating-point operand. "
2465	"Low 32-bits will be set to zero");
2466	Val &= `0xffffffff00000000u`;
2467	}
2468
2469	if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 \|\|
2470	OpTy == AMDGPU::OPERAND_REG_INLINE_C_FP64 \|\|
2471	OpTy == AMDGPU::OPERAND_REG_INLINE_AC_FP64)) {
2472	if (CanUse64BitLiterals && Lit == LitModifier::None &&
2473	(isInt<`32`>(x: Val) \|\| isUInt<`32`>(x: Val))) {
2474	// The floating-point operand will be verbalized as an
2475	// integer one. If that integer happens to fit 32 bits, on
2476	// re-assembling it will be intepreted as the high half of
2477	// the actual value, so we have to wrap it into lit64().
2478	Lit = LitModifier::Lit64;
2479	} else if (Lit == LitModifier::Lit) {
2480	// For FP64 operands lit() specifies the high half of the value.
2481	Val = Hi_32(Value: Val);
2482	}
2483	}
2484	break;
2485	}
2486
2487	// We don't allow fp literals in 64-bit integer instructions. It is
2488	// unclear how we should encode them. This case should be checked earlier
2489	// in predicate methods (isLiteralImm())
2490	llvm_unreachable("fp literal in 64-bit integer instruction.");
2491
2492	case AMDGPU::OPERAND_KIMM64:
2493	if (CanUse64BitLiterals && Lit == LitModifier::None &&
2494	(isInt<`32`>(x: Val) \|\| isUInt<`32`>(x: Val)))
2495	Lit = LitModifier::Lit64;
2496	break;
2497
2498	case AMDGPU::OPERAND_REG_IMM_BF16:
2499	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2500	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2501	case AMDGPU::OPERAND_REG_IMM_V2BF16:
2502	if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2503	Literal == `0x3fc45f306725feed`) {
2504	// This is the 1/(2pi) which is going to be truncated to bf16 with the*
2505	// loss of precision. The constant represents ideomatic fp32 value of
2506	// 1/(2pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16*
2507	// bits. Prevent rounding below.
2508	Inst.addOperand(Op: MCOperand::createImm(Val: `0x3e22`));
2509	return;
2510	}
2511	[[fallthrough]];
2512
2513	case AMDGPU::OPERAND_REG_IMM_INT32:
2514	case AMDGPU::OPERAND_REG_IMM_FP32:
2515	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2516	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2517	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2518	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2519	case AMDGPU::OPERAND_REG_IMM_INT16:
2520	case AMDGPU::OPERAND_REG_IMM_FP16:
2521	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2522	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2523	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2524	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2525	case AMDGPU::OPERAND_REG_IMM_V2INT16:
2526	case AMDGPU::OPERAND_REG_IMM_V2FP16:
2527	case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2528	case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2529	case AMDGPU::OPERAND_REG_IMM_V2FP32:
2530	case AMDGPU::OPERAND_REG_IMM_V2INT32:
2531	case AMDGPU::OPERAND_KIMM32:
2532	case AMDGPU::OPERAND_KIMM16:
2533	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2534	bool lost;
2535	APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2536	// Convert literal to single precision
2537	FPLiteral.convert(ToSemantics: *getOpFltSemantics(OperandType: OpTy),
2538	RM: APFloat::rmNearestTiesToEven, losesInfo: &lost);
2539	// We allow precision lost but not overflow or underflow. This should be
2540	// checked earlier in isLiteralImm()
2541
2542	Val = FPLiteral.bitcastToAPInt().getZExtValue();
2543	break;
2544	}
2545	default:
2546	llvm_unreachable("invalid operand size");
2547	}
2548
2549	if (Lit != LitModifier::None) {
2550	Inst.addOperand(
2551	Op: MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(Lit, Value: Val, Ctx)));
2552	} else {
2553	Inst.addOperand(Op: MCOperand::createImm(Val));
2554	}
2555	return;
2556	}
2557
2558	// We got int literal token.
2559	// Only sign extend inline immediates.
2560	switch (OpTy) {
2561	case AMDGPU::OPERAND_REG_IMM_INT32:
2562	case AMDGPU::OPERAND_REG_IMM_FP32:
2563	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2564	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2565	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2566	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2567	case AMDGPU::OPERAND_REG_IMM_V2INT16:
2568	case AMDGPU::OPERAND_REG_IMM_V2BF16:
2569	case AMDGPU::OPERAND_REG_IMM_V2FP16:
2570	case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2571	case AMDGPU::OPERAND_REG_IMM_V2FP32:
2572	case AMDGPU::OPERAND_REG_IMM_V2INT32:
2573	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2574	case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2575	break;
2576
2577	case AMDGPU::OPERAND_REG_IMM_INT64:
2578	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2579	case AMDGPU::OPERAND_REG_IMM_V2INT64:
2580	if (Lit == LitModifier::None &&
2581	AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2582	Inst.addOperand(Op: MCOperand::createImm(Val));
2583	return;
2584	}
2585
2586	// When the 32 MSBs are not zero (effectively means it can't be safely
2587	// truncated to uint32_t), if the target doesn't support 64-bit literals, or
2588	// the lit modifier is explicitly used, we need to truncate it to the 32
2589	// LSBs.
2590	if (!AsmParser->has64BitLiterals() \|\| Lit == LitModifier::Lit)
2591	Val = Lo_32(Value: Val);
2592	break;
2593
2594	case AMDGPU::OPERAND_REG_IMM_FP64:
2595	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2596	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2597	case AMDGPU::OPERAND_REG_IMM_V2FP64:
2598	if (Lit == LitModifier::None &&
2599	AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: AsmParser->hasInv2PiInlineImm())) {
2600	Inst.addOperand(Op: MCOperand::createImm(Val));
2601	return;
2602	}
2603
2604	// If the target doesn't support 64-bit literals, we need to use the
2605	// constant as the high 32 MSBs of a double-precision floating point value.
2606	if (!AsmParser->has64BitLiterals()) {
2607	Val = static_cast<uint64_t>(Val) << `32`;
2608	} else {
2609	// Now the target does support 64-bit literals, there are two cases
2610	// where we still want to use src_literal encoding:
2611	// 1) explicitly forced by using lit modifier;
2612	// 2) the value is a valid 32-bit representation (signed or unsigned),
2613	// meanwhile not forced by lit64 modifier.
2614	if (Lit == LitModifier::Lit \|\|
2615	(Lit != LitModifier::Lit64 && (isInt<`32`>(x: Val) \|\| isUInt<`32`>(x: Val))))
2616	Val = static_cast<uint64_t>(Val) << `32`;
2617	}
2618
2619	// For FP64 operands lit() specifies the high half of the value.
2620	if (Lit == LitModifier::Lit)
2621	Val = Hi_32(Value: Val);
2622	break;
2623
2624	case AMDGPU::OPERAND_REG_IMM_INT16:
2625	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2626	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2627	case AMDGPU::OPERAND_REG_IMM_FP16:
2628	case AMDGPU::OPERAND_REG_IMM_BF16:
2629	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2630	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2631	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2632	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2633	case AMDGPU::OPERAND_KIMM32:
2634	case AMDGPU::OPERAND_KIMM16:
2635	break;
2636
2637	case AMDGPU::OPERAND_KIMM64:
2638	if ((isInt<`32`>(x: Val) \|\| isUInt<`32`>(x: Val)) && Lit != LitModifier::Lit64)
2639	Val <<= `32`;
2640	break;
2641
2642	default:
2643	llvm_unreachable("invalid operand type");
2644	}
2645
2646	if (Lit != LitModifier::None) {
2647	Inst.addOperand(
2648	Op: MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(Lit, Value: Val, Ctx)));
2649	} else {
2650	Inst.addOperand(Op: MCOperand::createImm(Val));
2651	}
2652	}
2653
2654	void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2655	MCOpIdx = Inst.getNumOperands();
2656	Inst.addOperand(Op: MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: getReg(), STI: AsmParser->getSTI())));
2657	}
2658
2659	bool AMDGPUOperand::isInlineValue() const {
2660	return isRegKind() && ::isInlineValue(Reg: getReg());
2661	}
2662
2663	//===----------------------------------------------------------------------===//
2664	// AsmParser
2665	//===----------------------------------------------------------------------===//
2666
2667	void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2668	// TODO: make those pre-defined variables read-only.
2669	// Currently there is none suitable machinery in the core llvm-mc for this.
2670	// MCSymbol::isRedefinable is intended for another purpose, and
2671	// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2672	MCContext &Ctx = getContext();
2673	MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id);
2674	Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx));
2675	}
2676
2677	static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2678	if (Is == IS_VGPR) {
2679	switch (RegWidth) {
2680	default: return -`1`;
2681	case `32`:
2682	return AMDGPU::VGPR_32RegClassID;
2683	case `64`:
2684	return AMDGPU::VReg_64RegClassID;
2685	case `96`:
2686	return AMDGPU::VReg_96RegClassID;
2687	case `128`:
2688	return AMDGPU::VReg_128RegClassID;
2689	case `160`:
2690	return AMDGPU::VReg_160RegClassID;
2691	case `192`:
2692	return AMDGPU::VReg_192RegClassID;
2693	case `224`:
2694	return AMDGPU::VReg_224RegClassID;
2695	case `256`:
2696	return AMDGPU::VReg_256RegClassID;
2697	case `288`:
2698	return AMDGPU::VReg_288RegClassID;
2699	case `320`:
2700	return AMDGPU::VReg_320RegClassID;
2701	case `352`:
2702	return AMDGPU::VReg_352RegClassID;
2703	case `384`:
2704	return AMDGPU::VReg_384RegClassID;
2705	case `512`:
2706	return AMDGPU::VReg_512RegClassID;
2707	case `1024`:
2708	return AMDGPU::VReg_1024RegClassID;
2709	}
2710	} else if (Is == IS_TTMP) {
2711	switch (RegWidth) {
2712	default: return -`1`;
2713	case `32`:
2714	return AMDGPU::TTMP_32RegClassID;
2715	case `64`:
2716	return AMDGPU::TTMP_64RegClassID;
2717	case `128`:
2718	return AMDGPU::TTMP_128RegClassID;
2719	case `256`:
2720	return AMDGPU::TTMP_256RegClassID;
2721	case `512`:
2722	return AMDGPU::TTMP_512RegClassID;
2723	}
2724	} else if (Is == IS_SGPR) {
2725	switch (RegWidth) {
2726	default: return -`1`;
2727	case `32`:
2728	return AMDGPU::SGPR_32RegClassID;
2729	case `64`:
2730	return AMDGPU::SGPR_64RegClassID;
2731	case `96`:
2732	return AMDGPU::SGPR_96RegClassID;
2733	case `128`:
2734	return AMDGPU::SGPR_128RegClassID;
2735	case `160`:
2736	return AMDGPU::SGPR_160RegClassID;
2737	case `192`:
2738	return AMDGPU::SGPR_192RegClassID;
2739	case `224`:
2740	return AMDGPU::SGPR_224RegClassID;
2741	case `256`:
2742	return AMDGPU::SGPR_256RegClassID;
2743	case `288`:
2744	return AMDGPU::SGPR_288RegClassID;
2745	case `320`:
2746	return AMDGPU::SGPR_320RegClassID;
2747	case `352`:
2748	return AMDGPU::SGPR_352RegClassID;
2749	case `384`:
2750	return AMDGPU::SGPR_384RegClassID;
2751	case `512`:
2752	return AMDGPU::SGPR_512RegClassID;
2753	}
2754	} else if (Is == IS_AGPR) {
2755	switch (RegWidth) {
2756	default: return -`1`;
2757	case `32`:
2758	return AMDGPU::AGPR_32RegClassID;
2759	case `64`:
2760	return AMDGPU::AReg_64RegClassID;
2761	case `96`:
2762	return AMDGPU::AReg_96RegClassID;
2763	case `128`:
2764	return AMDGPU::AReg_128RegClassID;
2765	case `160`:
2766	return AMDGPU::AReg_160RegClassID;
2767	case `192`:
2768	return AMDGPU::AReg_192RegClassID;
2769	case `224`:
2770	return AMDGPU::AReg_224RegClassID;
2771	case `256`:
2772	return AMDGPU::AReg_256RegClassID;
2773	case `288`:
2774	return AMDGPU::AReg_288RegClassID;
2775	case `320`:
2776	return AMDGPU::AReg_320RegClassID;
2777	case `352`:
2778	return AMDGPU::AReg_352RegClassID;
2779	case `384`:
2780	return AMDGPU::AReg_384RegClassID;
2781	case `512`:
2782	return AMDGPU::AReg_512RegClassID;
2783	case `1024`:
2784	return AMDGPU::AReg_1024RegClassID;
2785	}
2786	}
2787	return -`1`;
2788	}
2789
2790	static MCRegister getSpecialRegForName(StringRef RegName) {
2791	return StringSwitch<unsigned>(RegName)
2792	.Case(S: "exec", Value: AMDGPU::EXEC)
2793	.Case(S: "vcc", Value: AMDGPU::VCC)
2794	.Case(S: "flat_scratch", Value: AMDGPU::FLAT_SCR)
2795	.Case(S: "xnack_mask", Value: AMDGPU::XNACK_MASK)
2796	.Case(S: "shared_base", Value: AMDGPU::SRC_SHARED_BASE)
2797	.Case(S: "src_shared_base", Value: AMDGPU::SRC_SHARED_BASE)
2798	.Case(S: "shared_limit", Value: AMDGPU::SRC_SHARED_LIMIT)
2799	.Case(S: "src_shared_limit", Value: AMDGPU::SRC_SHARED_LIMIT)
2800	.Case(S: "private_base", Value: AMDGPU::SRC_PRIVATE_BASE)
2801	.Case(S: "src_private_base", Value: AMDGPU::SRC_PRIVATE_BASE)
2802	.Case(S: "private_limit", Value: AMDGPU::SRC_PRIVATE_LIMIT)
2803	.Case(S: "src_private_limit", Value: AMDGPU::SRC_PRIVATE_LIMIT)
2804	.Case(S: "src_flat_scratch_base_lo", Value: AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2805	.Case(S: "src_flat_scratch_base_hi", Value: AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2806	.Case(S: "pops_exiting_wave_id", Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2807	.Case(S: "src_pops_exiting_wave_id", Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2808	.Case(S: "lds_direct", Value: AMDGPU::LDS_DIRECT)
2809	.Case(S: "src_lds_direct", Value: AMDGPU::LDS_DIRECT)
2810	.Case(S: "m0", Value: AMDGPU::M0)
2811	.Case(S: "vccz", Value: AMDGPU::SRC_VCCZ)
2812	.Case(S: "src_vccz", Value: AMDGPU::SRC_VCCZ)
2813	.Case(S: "execz", Value: AMDGPU::SRC_EXECZ)
2814	.Case(S: "src_execz", Value: AMDGPU::SRC_EXECZ)
2815	.Case(S: "scc", Value: AMDGPU::SRC_SCC)
2816	.Case(S: "src_scc", Value: AMDGPU::SRC_SCC)
2817	.Case(S: "tba", Value: AMDGPU::TBA)
2818	.Case(S: "tma", Value: AMDGPU::TMA)
2819	.Case(S: "flat_scratch_lo", Value: AMDGPU::FLAT_SCR_LO)
2820	.Case(S: "flat_scratch_hi", Value: AMDGPU::FLAT_SCR_HI)
2821	.Case(S: "xnack_mask_lo", Value: AMDGPU::XNACK_MASK_LO)
2822	.Case(S: "xnack_mask_hi", Value: AMDGPU::XNACK_MASK_HI)
2823	.Case(S: "vcc_lo", Value: AMDGPU::VCC_LO)
2824	.Case(S: "vcc_hi", Value: AMDGPU::VCC_HI)
2825	.Case(S: "exec_lo", Value: AMDGPU::EXEC_LO)
2826	.Case(S: "exec_hi", Value: AMDGPU::EXEC_HI)
2827	.Case(S: "tma_lo", Value: AMDGPU::TMA_LO)
2828	.Case(S: "tma_hi", Value: AMDGPU::TMA_HI)
2829	.Case(S: "tba_lo", Value: AMDGPU::TBA_LO)
2830	.Case(S: "tba_hi", Value: AMDGPU::TBA_HI)
2831	.Case(S: "pc", Value: AMDGPU::PC_REG)
2832	.Case(S: "null", Value: AMDGPU::SGPR_NULL)
2833	.Default(Value: AMDGPU::NoRegister);
2834	}
2835
2836	bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2837	SMLoc &EndLoc, bool RestoreOnFailure) {
2838	auto R = parseRegister();
2839	if (!R) return true;
2840	assert(R->isReg());
2841	RegNo = R ->getReg();
2842	StartLoc = R ->getStartLoc();
2843	EndLoc = R ->getEndLoc();
2844	return false;
2845	}
2846
2847	bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2848	SMLoc &EndLoc) {
2849	return ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /RestoreOnFailure=/false);
2850	}
2851
2852	ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2853	SMLoc &EndLoc) {
2854	bool Result = ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /RestoreOnFailure=/true);
2855	bool PendingErrors = getParser().hasPendingError();
2856	getParser().clearPendingErrors();
2857	if (PendingErrors)
2858	return ParseStatus::Failure;
2859	if (Result)
2860	return ParseStatus::NoMatch;
2861	return ParseStatus::Success;
2862	}
2863
2864	bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2865	RegisterKind RegKind,
2866	MCRegister Reg1,
2867	RegisterKind RegKind1, SMLoc Loc) {
2868	// Allow VCC_LO/HI at the end of SGPR lists.
2869	if (RegKind == IS_SGPR) {
2870	unsigned RegIdx = (Reg - AMDGPU::SGPR0) + RegWidth / `32`;
2871	if ((RegIdx == `106` && Reg1 == AMDGPU::VCC_LO) \|\|
2872	(RegIdx == `107` && Reg1 == AMDGPU::VCC_HI)) {
2873	RegWidth += `32`;
2874	return true;
2875	}
2876	}
2877
2878	if (RegKind != RegKind1) {
2879	Error(L: Loc, Msg: "registers in a list must be of the same kind");
2880	return MCRegister ();
2881	}
2882
2883	switch (RegKind) {
2884	case IS_SPECIAL:
2885	if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2886	Reg = AMDGPU::EXEC;
2887	RegWidth = `64`;
2888	return true;
2889	}
2890	if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2891	Reg = AMDGPU::FLAT_SCR;
2892	RegWidth = `64`;
2893	return true;
2894	}
2895	if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2896	Reg = AMDGPU::XNACK_MASK;
2897	RegWidth = `64`;
2898	return true;
2899	}
2900	if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2901	Reg = AMDGPU::VCC;
2902	RegWidth = `64`;
2903	return true;
2904	}
2905	if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2906	Reg = AMDGPU::TBA;
2907	RegWidth = `64`;
2908	return true;
2909	}
2910	if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2911	Reg = AMDGPU::TMA;
2912	RegWidth = `64`;
2913	return true;
2914	}
2915	Error(L: Loc, Msg: "register does not fit in the list");
2916	return false;
2917	case IS_VGPR:
2918	case IS_SGPR:
2919	case IS_AGPR:
2920	case IS_TTMP:
2921	if (Reg1 != Reg + RegWidth / `32`) {
2922	Error(L: Loc, Msg: "registers in a list must have consecutive indices");
2923	return false;
2924	}
2925	RegWidth += `32`;
2926	return true;
2927	default:
2928	llvm_unreachable("unexpected register kind");
2929	}
2930	}
2931
2932	struct RegInfo {
2933	StringLiteral Name;
2934	RegisterKind Kind;
2935	};
2936
2937	static constexpr RegInfo RegularRegisters[] = {
2938	{.Name: {"v"}, .Kind: IS_VGPR},
2939	{.Name: {"s"}, .Kind: IS_SGPR},
2940	{.Name: {"ttmp"}, .Kind: IS_TTMP},
2941	{.Name: {"acc"}, .Kind: IS_AGPR},
2942	{.Name: {"a"}, .Kind: IS_AGPR},
2943	};
2944
2945	static bool isRegularReg(RegisterKind Kind) {
2946	return Kind == IS_VGPR \|\|
2947	Kind == IS_SGPR \|\|
2948	Kind == IS_TTMP \|\|
2949	Kind == IS_AGPR;
2950	}
2951
2952	static const RegInfo* getRegularRegInfo(StringRef Str) {
2953	for (const RegInfo &Reg : RegularRegisters)
2954	if (Str.starts_with(Prefix: Reg.Name))
2955	return &Reg;
2956	return nullptr;
2957	}
2958
2959	static bool getRegNum(StringRef Str, unsigned& Num) {
2960	return !Str.getAsInteger(Radix: `10`, Result&: Num);
2961	}
2962
2963	bool
2964	AMDGPUAsmParser::isRegister(const AsmToken &Token,
2965	const AsmToken &NextToken) const {
2966
2967	// A list of consecutive registers: [s0,s1,s2,s3]
2968	if (Token.is(K: AsmToken::LBrac))
2969	return true;
2970
2971	if (!Token.is(K: AsmToken::Identifier))
2972	return false;
2973
2974	// A single register like s0 or a range of registers like s[0:1]
2975
2976	StringRef Str = Token.getString();
2977	const RegInfo *Reg = getRegularRegInfo(Str);
2978	if (Reg) {
2979	StringRef RegName = Reg->Name;
2980	StringRef RegSuffix = Str.substr(Start: RegName.size());
2981	if (!RegSuffix.empty()) {
2982	RegSuffix.consume_back(Suffix: ".l");
2983	RegSuffix.consume_back(Suffix: ".h");
2984	unsigned Num;
2985	// A single register with an index: rXX
2986	if (getRegNum(Str: RegSuffix, Num))
2987	return true;
2988	} else {
2989	// A range of registers: r[XX:YY].
2990	if (NextToken.is(K: AsmToken::LBrac))
2991	return true;
2992	}
2993	}
2994
2995	return getSpecialRegForName(RegName: Str).isValid();
2996	}
2997
2998	bool
2999	AMDGPUAsmParser::isRegister()
3000	{
3001	return isRegister(Token: getToken(), NextToken: peekToken());
3002	}
3003
3004	MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
3005	unsigned SubReg, unsigned RegWidth,
3006	SMLoc Loc) {
3007	assert(isRegularReg(RegKind));
3008
3009	unsigned AlignSize = `1`;
3010	if (RegKind == IS_SGPR \|\| RegKind == IS_TTMP) {
3011	// SGPR and TTMP registers must be aligned.
3012	// Max required alignment is 4 dwords.
3013	AlignSize = std::min(a: llvm::bit_ceil(Value: RegWidth / `32`), b: `4u`);
3014	}
3015
3016	if (RegNum % AlignSize != `0`) {
3017	Error(L: Loc, Msg: "invalid register alignment");
3018	return MCRegister ();
3019	}
3020
3021	unsigned RegIdx = RegNum / AlignSize;
3022	int RCID = getRegClass(Is: RegKind, RegWidth);
3023	if (RCID == -`1`) {
3024	Error(L: Loc, Msg: "invalid or unsupported register size");
3025	return MCRegister ();
3026	}
3027
3028	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3029	const MCRegisterClass &RC = TRI->getRegClass(i: RCID);
3030	if (RegIdx >= RC.getNumRegs() \|\| (RegKind == IS_VGPR && RegIdx > `255`)) {
3031	Error(L: Loc, Msg: "register index is out of range");
3032	return AMDGPU::NoRegister;
3033	}
3034
3035	if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / `32` > `256`) {
3036	Error(L: Loc, Msg: "register index is out of range");
3037	return MCRegister ();
3038	}
3039
3040	MCRegister Reg = RC.getRegister(i: RegIdx);
3041
3042	if (SubReg) {
3043	Reg = TRI->getSubReg(Reg, Idx: SubReg);
3044
3045	// Currently all regular registers have their .l and .h subregisters, so
3046	// we should never need to generate an error here.
3047	assert(Reg && "Invalid subregister!");
3048	}
3049
3050	return Reg;
3051	}
3052
3053	bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
3054	unsigned &SubReg) {
3055	int64_t RegLo, RegHi;
3056	if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "missing register index"))
3057	return false;
3058
3059	SMLoc FirstIdxLoc = getLoc();
3060	SMLoc SecondIdxLoc;
3061
3062	if (!parseExpr(Imm&: RegLo))
3063	return false;
3064
3065	if (trySkipToken(Kind: AsmToken::Colon)) {
3066	SecondIdxLoc = getLoc();
3067	if (!parseExpr(Imm&: RegHi))
3068	return false;
3069	} else {
3070	RegHi = RegLo;
3071	}
3072
3073	if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
3074	return false;
3075
3076	if (!isUInt<`32`>(x: RegLo)) {
3077	Error(L: FirstIdxLoc, Msg: "invalid register index");
3078	return false;
3079	}
3080
3081	if (!isUInt<`32`>(x: RegHi)) {
3082	Error(L: SecondIdxLoc, Msg: "invalid register index");
3083	return false;
3084	}
3085
3086	if (RegLo > RegHi) {
3087	Error(L: FirstIdxLoc, Msg: "first register index should not exceed second index");
3088	return false;
3089	}
3090
3091	if (RegHi == RegLo) {
3092	StringRef RegSuffix = getTokenStr();
3093	if (RegSuffix == ".l") {
3094	SubReg = AMDGPU::lo16;
3095	lex();
3096	} else if (RegSuffix == ".h") {
3097	SubReg = AMDGPU::hi16;
3098	lex();
3099	}
3100	}
3101
3102	Num = static_cast<unsigned>(RegLo);
3103	RegWidth = `32` * ((RegHi - RegLo) + `1`);
3104
3105	return true;
3106	}
3107
3108	MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3109	unsigned &RegNum,
3110	unsigned &RegWidth,
3111	SmallVectorImpl<AsmToken> &Tokens) {
3112	assert(isToken(AsmToken::Identifier));
3113	MCRegister Reg = getSpecialRegForName(RegName: getTokenStr());
3114	if (Reg) {
3115	RegNum = `0`;
3116	RegWidth = `32`;
3117	RegKind = IS_SPECIAL;
3118	Tokens.push_back(Elt: getToken());
3119	lex(); // skip register name
3120	}
3121	return Reg;
3122	}
3123
3124	MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3125	unsigned &RegNum,
3126	unsigned &RegWidth,
3127	SmallVectorImpl<AsmToken> &Tokens) {
3128	assert(isToken(AsmToken::Identifier));
3129	StringRef RegName = getTokenStr();
3130	auto Loc = getLoc();
3131
3132	const RegInfo *RI = getRegularRegInfo(Str: RegName);
3133	if (!RI) {
3134	Error(L: Loc, Msg: "invalid register name");
3135	return MCRegister ();
3136	}
3137
3138	Tokens.push_back(Elt: getToken());
3139	lex(); // skip register name
3140
3141	RegKind = RI->Kind;
3142	StringRef RegSuffix = RegName.substr(Start: RI->Name.size());
3143	unsigned SubReg = NoSubRegister;
3144	bool IsRange = false;
3145	if (!RegSuffix.empty()) {
3146	if (RegSuffix.consume_back(Suffix: ".l"))
3147	SubReg = AMDGPU::lo16;
3148	else if (RegSuffix.consume_back(Suffix: ".h"))
3149	SubReg = AMDGPU::hi16;
3150
3151	// Single 32-bit register: vXX.
3152	if (!getRegNum(Str: RegSuffix, Num&: RegNum)) {
3153	Error(L: Loc, Msg: "invalid register index");
3154	return MCRegister ();
3155	}
3156	RegWidth = `32`;
3157	} else {
3158	// Range of registers: v[XX:YY]. ":YY" is optional.
3159	IsRange = true;
3160	if (!ParseRegRange(Num&: RegNum, RegWidth, SubReg))
3161	return MCRegister ();
3162	}
3163
3164	// Do not allow vcc_lo/hi be referred as s106/107.
3165	MCRegister Reg = getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3166	const MCRegisterInfo &TRI = *getContext().getRegisterInfo();
3167	if (RegKind == IS_SGPR && IsRange
3168	? (TRI.isSubRegister(RegA: Reg, RegB: VCC_LO) \|\| TRI.isSubRegister(RegA: Reg, RegB: VCC_HI))
3169	: (Reg == VCC_LO \|\| Reg == VCC_HI)) {
3170	Error(L: Loc, Msg: "register index is out of range");
3171	return MCRegister ();
3172	}
3173
3174	return Reg;
3175	}
3176
3177	MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3178	unsigned &RegNum, unsigned &RegWidth,
3179	SmallVectorImpl<AsmToken> &Tokens) {
3180	MCRegister Reg;
3181	auto ListLoc = getLoc();
3182
3183	if (!skipToken(Kind: AsmToken::LBrac,
3184	ErrMsg: "expected a register or a list of registers")) {
3185	return MCRegister ();
3186	}
3187
3188	// List of consecutive registers, e.g.: [s0,s1,s2,s3]
3189
3190	auto Loc = getLoc();
3191	if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3192	return MCRegister ();
3193	if (RegWidth != `32`) {
3194	Error(L: Loc, Msg: "expected a single 32-bit register");
3195	return MCRegister ();
3196	}
3197
3198	for (; trySkipToken(Kind: AsmToken::Comma); ) {
3199	RegisterKind NextRegKind;
3200	MCRegister NextReg;
3201	unsigned NextRegNum, NextRegWidth;
3202	Loc = getLoc();
3203
3204	if (!ParseAMDGPURegister(RegKind&: NextRegKind, Reg&: NextReg,
3205	RegNum&: NextRegNum, RegWidth&: NextRegWidth,
3206	Tokens)) {
3207	return MCRegister ();
3208	}
3209	if (NextRegWidth != `32`) {
3210	Error(L: Loc, Msg: "expected a single 32-bit register");
3211	return MCRegister ();
3212	}
3213	if (!AddNextRegisterToList(Reg, RegWidth, RegKind, Reg1: NextReg, RegKind1: NextRegKind,
3214	Loc))
3215	return MCRegister ();
3216	}
3217
3218	if (!skipToken(Kind: AsmToken::RBrac,
3219	ErrMsg: "expected a comma or a closing square bracket")) {
3220	return MCRegister ();
3221	}
3222
3223	if (isRegularReg(Kind: RegKind))
3224	Reg = getRegularReg(RegKind, RegNum, SubReg: NoSubRegister, RegWidth, Loc: ListLoc);
3225
3226	return Reg;
3227	}
3228
3229	bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3230	MCRegister &Reg, unsigned &RegNum,
3231	unsigned &RegWidth,
3232	SmallVectorImpl<AsmToken> &Tokens) {
3233	auto Loc = getLoc();
3234	Reg = MCRegister ();
3235
3236	if (isToken(Kind: AsmToken::Identifier)) {
3237	Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3238	if (!Reg)
3239	Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3240	} else {
3241	Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3242	}
3243
3244	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3245	if (!Reg) {
3246	assert(Parser.hasPendingError());
3247	return false;
3248	}
3249
3250	if (!subtargetHasRegister(MRI: *TRI, Reg)) {
3251	if (Reg == AMDGPU::SGPR_NULL) {
3252	Error(L: Loc, Msg: "'null' operand is not supported on this GPU");
3253	} else {
3254	Error(L: Loc, Msg: Twine (AMDGPUInstPrinter::getRegisterName(Reg)) +
3255	" register not available on this GPU");
3256	}
3257	return false;
3258	}
3259
3260	return true;
3261	}
3262
3263	bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3264	MCRegister &Reg, unsigned &RegNum,
3265	unsigned &RegWidth,
3266	bool RestoreOnFailure /=false/) {
3267	Reg = MCRegister ();
3268
3269	SmallVector<AsmToken, `1`> Tokens;
3270	if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3271	if (RestoreOnFailure) {
3272	while (!Tokens.empty()) {
3273	getLexer().UnLex(Token: Tokens.pop_back_val());
3274	}
3275	}
3276	return true;
3277	}
3278	return false;
3279	}
3280
3281	std::optional<StringRef>
3282	AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3283	switch (RegKind) {
3284	case IS_VGPR:
3285	return StringRef (".amdgcn.next_free_vgpr");
3286	case IS_SGPR:
3287	return StringRef (".amdgcn.next_free_sgpr");
3288	default:
3289	return std::nullopt;
3290	}
3291	}
3292
3293	void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3294	auto SymbolName = getGprCountSymbolName(RegKind);
3295	assert(SymbolName && "initializing invalid register kind");
3296	MCSymbol Sym = getContext().getOrCreateSymbol(Name: SymbolName);
3297	Sym->setVariableValue(MCConstantExpr::create(Value: `0`, Ctx&: getContext()));
3298	Sym->setRedefinable(true);
3299	}
3300
3301	bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3302	unsigned DwordRegIndex,
3303	unsigned RegWidth) {
3304	// Symbols are only defined for GCN targets
3305	if (AMDGPU::getIsaVersion(GPU: getSTI().getCPU()).Major < `6`)
3306	return true;
3307
3308	auto SymbolName = getGprCountSymbolName(RegKind);
3309	if (!SymbolName)
3310	return true;
3311	MCSymbol Sym = getContext().getOrCreateSymbol(Name: SymbolName);
3312
3313	int64_t NewMax = DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: `32`) - `1`;
3314	int64_t OldCount;
3315
3316	if (!Sym->isVariable())
3317	return !Error(L: getLoc(),
3318	Msg: ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3319	if (!Sym->getVariableValue()->evaluateAsAbsolute(Res&: OldCount))
3320	return !Error(
3321	L: getLoc(),
3322	Msg: ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3323
3324	if (OldCount <= NewMax)
3325	Sym->setVariableValue(MCConstantExpr::create(Value: NewMax + `1`, Ctx&: getContext()));
3326
3327	return true;
3328	}
3329
3330	std::unique_ptr<AMDGPUOperand>
3331	AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3332	const auto &Tok = getToken();
3333	SMLoc StartLoc = Tok.getLoc();
3334	SMLoc EndLoc = Tok.getEndLoc();
3335	RegisterKind RegKind;
3336	MCRegister Reg;
3337	unsigned RegNum, RegWidth;
3338
3339	if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3340	return nullptr;
3341	}
3342	if (isHsaAbi(STI: getSTI())) {
3343	if (!updateGprCountSymbols(RegKind, DwordRegIndex: RegNum, RegWidth))
3344	return nullptr;
3345	} else
3346	KernelScope.usesRegister(RegKind, DwordRegIndex: RegNum, RegWidth);
3347	return AMDGPUOperand::CreateReg(AsmParser: this, Reg, S: StartLoc, E: EndLoc);
3348	}
3349
3350	ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3351	bool HasSP3AbsModifier, LitModifier Lit) {
3352	// TODO: add syntactic sugar for 1/(2PI)*
3353
3354	if (isRegister() \|\| isModifier())
3355	return ParseStatus::NoMatch;
3356
3357	if (Lit == LitModifier::None) {
3358	if (trySkipId(Id: "lit"))
3359	Lit = LitModifier::Lit;
3360	else if (trySkipId(Id: "lit64"))
3361	Lit = LitModifier::Lit64;
3362
3363	if (Lit != LitModifier::None) {
3364	if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit"))
3365	return ParseStatus::Failure;
3366	ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3367	if (S.isSuccess() &&
3368	!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3369	return ParseStatus::Failure;
3370	return S;
3371	}
3372	}
3373
3374	const auto& Tok = getToken();
3375	const auto& NextTok = peekToken();
3376	bool IsReal = Tok.is(K: AsmToken::Real);
3377	SMLoc S = getLoc();
3378	bool Negate = false;
3379
3380	if (!IsReal && Tok.is(K: AsmToken::Minus) && NextTok.is(K: AsmToken::Real)) {
3381	lex();
3382	IsReal = true;
3383	Negate = true;
3384	}
3385
3386	AMDGPUOperand::Modifiers Mods;
3387	Mods.Lit = Lit;
3388
3389	if (IsReal) {
3390	// Floating-point expressions are not supported.
3391	// Can only allow floating-point literals with an
3392	// optional sign.
3393
3394	StringRef Num = getTokenStr();
3395	lex();
3396
3397	APFloat RealVal(APFloat::IEEEdouble());
3398	auto roundMode = APFloat::rmNearestTiesToEven;
3399	if (errorToBool(Err: RealVal.convertFromString(Num, roundMode).takeError()))
3400	return ParseStatus::Failure;
3401	if (Negate)
3402	RealVal.changeSign();
3403
3404	Operands.push_back(
3405	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: RealVal.bitcastToAPInt().getZExtValue(), Loc: S,
3406	Type: AMDGPUOperand::ImmTyNone, IsFPImm: true));
3407	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3408	Op.setModifiers(Mods);
3409
3410	return ParseStatus::Success;
3411
3412	} else {
3413	int64_t IntVal;
3414	const MCExpr *Expr;
3415	SMLoc S = getLoc();
3416
3417	if (HasSP3AbsModifier) {
3418	// This is a workaround for handling expressions
3419	// as arguments of SP3 'abs' modifier, for example:
3420	// \|1.0\|
3421	// \|-1\|
3422	// \|1+x\|
3423	// This syntax is not compatible with syntax of standard
3424	// MC expressions (due to the trailing '\|').
3425	SMLoc EndLoc;
3426	if (getParser().parsePrimaryExpr(Res&: Expr, EndLoc, TypeInfo: nullptr))
3427	return ParseStatus::Failure;
3428	} else {
3429	if (Parser.parseExpression(Res&: Expr))
3430	return ParseStatus::Failure;
3431	}
3432
3433	if (Expr->evaluateAsAbsolute(Res&: IntVal)) {
3434	if (Lit == LitModifier::Lit && !isInt<`32`>(x: IntVal) && !isUInt<`32`>(x: IntVal))
3435	return Error(L: S, Msg: "literal value out of range");
3436	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S));
3437	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3438	Op.setModifiers(Mods);
3439	} else {
3440	if (Lit != LitModifier::None)
3441	return ParseStatus::NoMatch;
3442	Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S));
3443	}
3444
3445	return ParseStatus::Success;
3446	}
3447
3448	return ParseStatus::NoMatch;
3449	}
3450
3451	ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3452	if (!isRegister())
3453	return ParseStatus::NoMatch;
3454
3455	if (auto R = parseRegister()) {
3456	assert(R->isReg());
3457	Operands.push_back(Elt: std::move(R));
3458	return ParseStatus::Success;
3459	}
3460	return ParseStatus::Failure;
3461	}
3462
3463	ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3464	bool HasSP3AbsMod, LitModifier Lit) {
3465	ParseStatus Res = parseReg(Operands);
3466	if (!Res.isNoMatch())
3467	return Res;
3468	if (isModifier())
3469	return ParseStatus::NoMatch;
3470	return parseImm(Operands, HasSP3AbsModifier: HasSP3AbsMod, Lit);
3471	}
3472
3473	bool
3474	AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3475	if (Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::LParen)) {
3476	const auto &str = Token.getString();
3477	return str == "abs" \|\| str == "neg" \|\| str == "sext";
3478	}
3479	return false;
3480	}
3481
3482	bool
3483	AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3484	return Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::Colon);
3485	}
3486
3487	bool
3488	AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3489	return isNamedOperandModifier(Token, NextToken) \|\| Token.is(K: AsmToken::Pipe);
3490	}
3491
3492	bool
3493	AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3494	return isRegister(Token, NextToken) \|\| isOperandModifier(Token, NextToken);
3495	}
3496
3497	// Check if this is an operand modifier or an opcode modifier
3498	// which may look like an expression but it is not. We should
3499	// avoid parsing these modifiers as expressions. Currently
3500	// recognized sequences are:
3501	// \|...\|
3502	// abs(...)
3503	// neg(...)
3504	// sext(...)
3505	// -reg
3506	// -\|...\|
3507	// -abs(...)
3508	// name:...
3509	//
3510	bool
3511	AMDGPUAsmParser::isModifier() {
3512
3513	AsmToken Tok = getToken();
3514	AsmToken NextToken[`2`];
3515	peekTokens(Tokens: NextToken);
3516
3517	return isOperandModifier(Token: Tok, NextToken: NextToken[`0`]) \|\|
3518	(Tok.is(K: AsmToken::Minus) && isRegOrOperandModifier(Token: NextToken[`0`], NextToken: NextToken[`1`])) \|\|
3519	isOpcodeModifierWithVal(Token: Tok, NextToken: NextToken[`0`]);
3520	}
3521
3522	// Check if the current token is an SP3 'neg' modifier.
3523	// Currently this modifier is allowed in the following context:
3524	//
3525	// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3526	// 2. Before an 'abs' modifier: -abs(...)
3527	// 3. Before an SP3 'abs' modifier: -\|...\|
3528	//
3529	// In all other cases "-" is handled as a part
3530	// of an expression that follows the sign.
3531	//
3532	// Note: When "-" is followed by an integer literal,
3533	// this is interpreted as integer negation rather
3534	// than a floating-point NEG modifier applied to N.
3535	// Beside being contr-intuitive, such use of floating-point
3536	// NEG modifier would have resulted in different meaning
3537	// of integer literals used with VOP1/2/C and VOP3,
3538	// for example:
3539	// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3540	// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3541	// Negative fp literals with preceding "-" are
3542	// handled likewise for uniformity
3543	//
3544	bool
3545	AMDGPUAsmParser::parseSP3NegModifier() {
3546
3547	AsmToken NextToken[`2`];
3548	peekTokens(Tokens: NextToken);
3549
3550	if (isToken(Kind: AsmToken::Minus) &&
3551	(isRegister(Token: NextToken[`0`], NextToken: NextToken[`1`]) \|\|
3552	NextToken[`0`].is(K: AsmToken::Pipe) \|\|
3553	isId(Token: NextToken[`0`], Id: "abs"))) {
3554	lex();
3555	return true;
3556	}
3557
3558	return false;
3559	}
3560
3561	ParseStatus
3562	AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3563	bool AllowImm) {
3564	bool Neg, SP3Neg;
3565	bool Abs, SP3Abs;
3566	SMLoc Loc;
3567
3568	// Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3569	if (isToken(Kind: AsmToken::Minus) && peekToken().is(K: AsmToken::Minus))
3570	return Error(L: getLoc(), Msg: "invalid syntax, expected 'neg' modifier");
3571
3572	SP3Neg = parseSP3NegModifier();
3573
3574	Loc = getLoc();
3575	Neg = trySkipId(Id: "neg");
3576	if (Neg && SP3Neg)
3577	return Error(L: Loc, Msg: "expected register or immediate");
3578	if (Neg && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after neg"))
3579	return ParseStatus::Failure;
3580
3581	Abs = trySkipId(Id: "abs");
3582	if (Abs && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after abs"))
3583	return ParseStatus::Failure;
3584
3585	LitModifier Lit = LitModifier::None;
3586	if (trySkipId(Id: "lit")) {
3587	Lit = LitModifier::Lit;
3588	if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit"))
3589	return ParseStatus::Failure;
3590	} else if (trySkipId(Id: "lit64")) {
3591	Lit = LitModifier::Lit64;
3592	if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit64"))
3593	return ParseStatus::Failure;
3594	if (!has64BitLiterals())
3595	return Error(L: Loc, Msg: "lit64 is not supported on this GPU");
3596	}
3597
3598	Loc = getLoc();
3599	SP3Abs = trySkipToken(Kind: AsmToken::Pipe);
3600	if (Abs && SP3Abs)
3601	return Error(L: Loc, Msg: "expected register or immediate");
3602
3603	ParseStatus Res;
3604	if (AllowImm) {
3605	Res = parseRegOrImm(Operands, HasSP3AbsMod: SP3Abs, Lit);
3606	} else {
3607	Res = parseReg(Operands);
3608	}
3609	if (!Res.isSuccess())
3610	return (SP3Neg \|\| Neg \|\| SP3Abs \|\| Abs \|\| Lit != LitModifier::None)
3611	? ParseStatus::Failure
3612	: Res;
3613
3614	if (Lit != LitModifier::None && !Operands.back()->isImm())
3615	Error(L: Loc, Msg: "expected immediate with lit modifier");
3616
3617	if (SP3Abs && !skipToken(Kind: AsmToken::Pipe, ErrMsg: "expected vertical bar"))
3618	return ParseStatus::Failure;
3619	if (Abs && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3620	return ParseStatus::Failure;
3621	if (Neg && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3622	return ParseStatus::Failure;
3623	if (Lit != LitModifier::None &&
3624	!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3625	return ParseStatus::Failure;
3626
3627	AMDGPUOperand::Modifiers Mods;
3628	Mods.Abs = Abs \|\| SP3Abs;
3629	Mods.Neg = Neg \|\| SP3Neg;
3630	Mods.Lit = Lit;
3631
3632	if (Mods.hasFPModifiers() \|\| Lit != LitModifier::None) {
3633	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3634	if (Op.isExpr())
3635	return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression");
3636	Op.setModifiers(Mods);
3637	}
3638	return ParseStatus::Success;
3639	}
3640
3641	ParseStatus
3642	AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3643	bool AllowImm) {
3644	bool Sext = trySkipId(Id: "sext");
3645	if (Sext && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after sext"))
3646	return ParseStatus::Failure;
3647
3648	ParseStatus Res;
3649	if (AllowImm) {
3650	Res = parseRegOrImm(Operands);
3651	} else {
3652	Res = parseReg(Operands);
3653	}
3654	if (!Res.isSuccess())
3655	return Sext ? ParseStatus::Failure : Res;
3656
3657	if (Sext && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses"))
3658	return ParseStatus::Failure;
3659
3660	AMDGPUOperand::Modifiers Mods;
3661	Mods.Sext = Sext;
3662
3663	if (Mods.hasIntModifiers()) {
3664	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3665	if (Op.isExpr())
3666	return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression");
3667	Op.setModifiers(Mods);
3668	}
3669
3670	return ParseStatus::Success;
3671	}
3672
3673	ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3674	return parseRegOrImmWithFPInputMods(Operands, AllowImm: false);
3675	}
3676
3677	ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3678	return parseRegOrImmWithIntInputMods(Operands, AllowImm: false);
3679	}
3680
3681	ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3682	auto Loc = getLoc();
3683	if (trySkipId(Id: "off")) {
3684	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: `0`, Loc,
3685	Type: AMDGPUOperand::ImmTyOff, IsFPImm: false));
3686	return ParseStatus::Success;
3687	}
3688
3689	if (!isRegister())
3690	return ParseStatus::NoMatch;
3691
3692	std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3693	if (Reg) {
3694	Operands.push_back(Elt: std::move(Reg));
3695	return ParseStatus::Success;
3696	}
3697
3698	return ParseStatus::Failure;
3699	}
3700
3701	unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3702	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
3703
3704	if ((getForcedEncodingSize() == `32` && (TSFlags & SIInstrFlags::VOP3)) \|\|
3705	(getForcedEncodingSize() == `64` && !(TSFlags & SIInstrFlags::VOP3)) \|\|
3706	(isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) \|\|
3707	(isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3708	return Match_InvalidOperand;
3709
3710	if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi \|\|
3711	Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3712	// v_mac_f32/16 allow only dst_sel == DWORD;
3713	auto OpNum =
3714	AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::dst_sel);
3715	const auto &Op = Inst.getOperand(i: OpNum);
3716	if (!Op.isImm() \|\| Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3717	return Match_InvalidOperand;
3718	}
3719	}
3720
3721	// Asm can first try to match VOPD or VOPD3. By failing early here with
3722	// Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3723	// Checking later during validateInstruction does not give a chance to retry
3724	// parsing as a different encoding.
3725	if (tryAnotherVOPDEncoding(Inst))
3726	return Match_InvalidOperand;
3727
3728	return Match_Success;
3729	}
3730
3731	static ArrayRef<unsigned> getAllVariants() {
3732	static const unsigned Variants[] = {
3733	AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3734	AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3735	AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3736	};
3737
3738	return ArrayRef(Variants);
3739	}
3740
3741	// What asm variants we should check
3742	ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3743	if (isForcedDPP() && isForcedVOP3()) {
3744	static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3745	return ArrayRef(Variants);
3746	}
3747	if (getForcedEncodingSize() == `32`) {
3748	static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3749	return ArrayRef(Variants);
3750	}
3751
3752	if (isForcedVOP3()) {
3753	static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3754	return ArrayRef(Variants);
3755	}
3756
3757	if (isForcedSDWA()) {
3758	static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3759	AMDGPUAsmVariants::SDWA9};
3760	return ArrayRef(Variants);
3761	}
3762
3763	if (isForcedDPP()) {
3764	static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3765	return ArrayRef(Variants);
3766	}
3767
3768	return getAllVariants();
3769	}
3770
3771	StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3772	if (isForcedDPP() && isForcedVOP3())
3773	return "e64_dpp";
3774
3775	if (getForcedEncodingSize() == `32`)
3776	return "e32";
3777
3778	if (isForcedVOP3())
3779	return "e64";
3780
3781	if (isForcedSDWA())
3782	return "sdwa";
3783
3784	if (isForcedDPP())
3785	return "dpp";
3786
3787	return "";
3788	}
3789
3790	MCRegister
3791	AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3792	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
3793	for (MCPhysReg Reg : Desc.implicit_uses()) {
3794	switch (Reg) {
3795	case AMDGPU::FLAT_SCR:
3796	case AMDGPU::VCC:
3797	case AMDGPU::VCC_LO:
3798	case AMDGPU::VCC_HI:
3799	case AMDGPU::M0:
3800	return Reg;
3801	default:
3802	break;
3803	}
3804	}
3805	return MCRegister ();
3806	}
3807
3808	// NB: This code is correct only when used to check constant
3809	// bus limitations because GFX7 support no f16 inline constants.
3810	// Note that there are no cases when a GFX7 opcode violates
3811	// constant bus limitations due to the use of an f16 constant.
3812	bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3813	unsigned OpIdx) const {
3814	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
3815
3816	if (!AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx) \|\|
3817	AMDGPU::isKImmOperand(Desc, OpNo: OpIdx)) {
3818	return false;
3819	}
3820
3821	const MCOperand &MO = Inst.getOperand(i: OpIdx);
3822
3823	int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(Expr: MO.getExpr());
3824	auto OpSize = AMDGPU::getOperandSize(Desc, OpNo: OpIdx);
3825
3826	switch (OpSize) { // expected operand size
3827	case `8`:
3828	return AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3829	case `4`:
3830	return AMDGPU::isInlinableLiteral32(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3831	case `2`: {
3832	const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3833	if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 \|\|
3834	OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16)
3835	return AMDGPU::isInlinableLiteralI16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3836
3837	if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 \|\|
3838	OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3839	return AMDGPU::isInlinableLiteralV2I16(Literal: Val);
3840
3841	if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 \|\|
3842	OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3843	return AMDGPU::isInlinableLiteralV2F16(Literal: Val);
3844
3845	if (OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT)
3846	return AMDGPU::isPKFMACF16InlineConstant(Literal: Val, IsGFX11Plus: isGFX11Plus());
3847
3848	if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 \|\|
3849	OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
3850	return AMDGPU::isInlinableLiteralV2BF16(Literal: Val);
3851
3852	if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 \|\|
3853	OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16)
3854	return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3855
3856	if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 \|\|
3857	OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16)
3858	return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm());
3859
3860	if (OperandType == AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16)
3861	return false;
3862
3863	llvm_unreachable("invalid operand type");
3864	}
3865	default:
3866	llvm_unreachable("invalid operand size");
3867	}
3868	}
3869
3870	unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3871	if (!isGFX10Plus())
3872	return `1`;
3873
3874	switch (Opcode) {
3875	// 64-bit shift instructions can use only one scalar value input
3876	case AMDGPU::V_LSHLREV_B64_e64:
3877	case AMDGPU::V_LSHLREV_B64_gfx10:
3878	case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3879	case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3880	case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3881	case AMDGPU::V_LSHRREV_B64_e64:
3882	case AMDGPU::V_LSHRREV_B64_gfx10:
3883	case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3884	case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3885	case AMDGPU::V_ASHRREV_I64_e64:
3886	case AMDGPU::V_ASHRREV_I64_gfx10:
3887	case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3888	case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3889	case AMDGPU::V_LSHL_B64_e64:
3890	case AMDGPU::V_LSHR_B64_e64:
3891	case AMDGPU::V_ASHR_I64_e64:
3892	return `1`;
3893	default:
3894	return `2`;
3895	}
3896	}
3897
3898	constexpr unsigned MAX_SRC_OPERANDS_NUM = `6`;
3899	using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3900
3901	// Get regular operand indices in the same order as specified
3902	// in the instruction (but append mandatory literals to the end).
3903	static OperandIndices getSrcOperandIndices(unsigned Opcode,
3904	bool AddMandatoryLiterals = false) {
3905
3906	int16_t ImmIdx =
3907	AddMandatoryLiterals ? getNamedOperandIdx(Opcode, Name: OpName::imm) : -`1`;
3908
3909	if (isVOPD(Opc: Opcode)) {
3910	int16_t ImmXIdx =
3911	AddMandatoryLiterals ? getNamedOperandIdx(Opcode, Name: OpName::immX) : -`1`;
3912
3913	return {getNamedOperandIdx(Opcode, Name: OpName::src0X),
3914	getNamedOperandIdx(Opcode, Name: OpName::vsrc1X),
3915	getNamedOperandIdx(Opcode, Name: OpName::vsrc2X),
3916	getNamedOperandIdx(Opcode, Name: OpName::src0Y),
3917	getNamedOperandIdx(Opcode, Name: OpName::vsrc1Y),
3918	getNamedOperandIdx(Opcode, Name: OpName::vsrc2Y),
3919	ImmXIdx,
3920	ImmIdx};
3921	}
3922
3923	return {getNamedOperandIdx(Opcode, Name: OpName::src0),
3924	getNamedOperandIdx(Opcode, Name: OpName::src1),
3925	getNamedOperandIdx(Opcode, Name: OpName::src2), ImmIdx};
3926	}
3927
3928	bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3929	const MCOperand &MO = Inst.getOperand(i: OpIdx);
3930	if (MO.isImm())
3931	return !isInlineConstant(Inst, OpIdx);
3932	if (MO.isReg()) {
3933	auto Reg = MO.getReg();
3934	if (!Reg)
3935	return false;
3936	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3937	auto PReg = mc2PseudoReg(Reg);
3938	return isSGPR(Reg: PReg, TRI) && PReg != SGPR_NULL;
3939	}
3940	return true;
3941	}
3942
3943	// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3944	// Writelane is special in that it can use SGPR and M0 (which would normally
3945	// count as using the constant bus twice - but in this case it is allowed since
3946	// the lane selector doesn't count as a use of the constant bus). However, it is
3947	// still required to abide by the 1 SGPR rule.
3948	static bool checkWriteLane(const MCInst &Inst) {
3949	const unsigned Opcode = Inst.getOpcode();
3950	if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3951	return false;
3952	const MCOperand &LaneSelOp = Inst.getOperand(i: `2`);
3953	if (!LaneSelOp.isReg())
3954	return false;
3955	auto LaneSelReg = mc2PseudoReg(Reg: LaneSelOp.getReg());
3956	return LaneSelReg == M0 \|\| LaneSelReg == M0_gfxpre11;
3957	}
3958
3959	bool AMDGPUAsmParser::validateConstantBusLimitations(
3960	const MCInst &Inst, const OperandVector &Operands) {
3961	const unsigned Opcode = Inst.getOpcode();
3962	const MCInstrDesc &Desc = MII.get(Opcode);
3963	MCRegister LastSGPR;
3964	unsigned ConstantBusUseCount = `0`;
3965	unsigned NumLiterals = `0`;
3966	unsigned LiteralSize;
3967
3968	if (!(Desc.TSFlags &
3969	(SIInstrFlags::VOPC \| SIInstrFlags::VOP1 \| SIInstrFlags::VOP2 \|
3970	SIInstrFlags::VOP3 \| SIInstrFlags::VOP3P \| SIInstrFlags::SDWA)) &&
3971	!isVOPD(Opc: Opcode))
3972	return true;
3973
3974	if (checkWriteLane(Inst))
3975	return true;
3976
3977	// Check special imm operands (used by madmk, etc)
3978	if (AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm)) {
3979	++NumLiterals;
3980	LiteralSize = `4`;
3981	}
3982
3983	SmallDenseSet<MCRegister> SGPRsUsed;
3984	MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3985	if (SGPRUsed) {
3986	SGPRsUsed.insert(V: SGPRUsed);
3987	++ConstantBusUseCount;
3988	}
3989
3990	OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3991
3992	unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3993
3994	for (int OpIdx : OpIndices) {
3995	if (OpIdx == -`1`)
3996	continue;
3997
3998	const MCOperand &MO = Inst.getOperand(i: OpIdx);
3999	if (usesConstantBus(Inst, OpIdx)) {
4000	if (MO.isReg()) {
4001	LastSGPR = mc2PseudoReg(Reg: MO.getReg());
4002	// Pairs of registers with a partial intersections like these
4003	// s0, s[0:1]
4004	// flat_scratch_lo, flat_scratch
4005	// flat_scratch_lo, flat_scratch_hi
4006	// are theoretically valid but they are disabled anyway.
4007	// Note that this code mimics SIInstrInfo::verifyInstruction
4008	if (SGPRsUsed.insert(V: LastSGPR).second) {
4009	++ConstantBusUseCount;
4010	}
4011	} else { // Expression or a literal
4012
4013	if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
4014	continue; // special operand like VINTERP attr_chan
4015
4016	// An instruction may use only one literal.
4017	// This has been validated on the previous step.
4018	// See validateVOPLiteral.
4019	// This literal may be used as more than one operand.
4020	// If all these operands are of the same size,
4021	// this literal counts as one scalar value.
4022	// Otherwise it counts as 2 scalar values.
4023	// See "GFX10 Shader Programming", section 3.6.2.3.
4024
4025	unsigned Size = AMDGPU::getOperandSize(Desc, OpNo: OpIdx);
4026	if (Size < `4`)
4027	Size = `4`;
4028
4029	if (NumLiterals == `0`) {
4030	NumLiterals = `1`;
4031	LiteralSize = Size;
4032	} else if (LiteralSize != Size) {
4033	NumLiterals = `2`;
4034	}
4035	}
4036	}
4037
4038	if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
4039	Error(L: getOperandLoc(Operands, MCOpIdx: OpIdx),
4040	Msg: "invalid operand (violates constant bus restrictions)");
4041	return false;
4042	}
4043	}
4044	return true;
4045	}
4046
4047	std::optional<unsigned>
4048	AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
4049
4050	const unsigned Opcode = Inst.getOpcode();
4051	if (!isVOPD(Opc: Opcode))
4052	return {};
4053
4054	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4055
4056	auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
4057	const MCOperand &Opr = Inst.getOperand(i: OperandIdx);
4058	return (Opr.isReg() && !isSGPR(Reg: mc2PseudoReg(Reg: Opr.getReg()), TRI))
4059	? Opr.getReg()
4060	: MCRegister ();
4061	};
4062
4063	// On GFX1170+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
4064	// source-cache.
4065	bool SkipSrc =
4066	Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1170 \|\|
4067	Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 \|\|
4068	Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 \|\|
4069	Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 \|\|
4070	Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 \|\|
4071	Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
4072	bool AllowSameVGPR = isGFX12Plus();
4073
4074	if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
4075	for (auto OpName : {OpName::src0X, OpName::src0Y}) {
4076	int I = getNamedOperandIdx(Opcode, Name: OpName);
4077	const MCOperand &Op = Inst.getOperand(i: I);
4078	if (!Op.isImm())
4079	continue;
4080	int64_t Imm = Op.getImm();
4081	if (!AMDGPU::isInlinableLiteral32(Literal: Imm, HasInv2Pi: hasInv2PiInlineImm()) &&
4082	!AMDGPU::isInlinableLiteral64(Literal: Imm, HasInv2Pi: hasInv2PiInlineImm()))
4083	return (unsigned)I;
4084	}
4085
4086	for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
4087	OpName::vsrc2Y, OpName::imm}) {
4088	int I = getNamedOperandIdx(Opcode, Name: OpName);
4089	if (I == -`1`)
4090	continue;
4091	const MCOperand &Op = Inst.getOperand(i: I);
4092	if (Op.isImm())
4093	return (unsigned)I;
4094	}
4095	}
4096
4097	const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII);
4098	auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4099	GetRegIdx: getVRegIdx, MRI: *TRI, SkipSrc, AllowSameVGPR, VOPD3: AsVOPD3);
4100
4101	return InvalidCompOprIdx;
4102	}
4103
4104	bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4105	const OperandVector &Operands) {
4106
4107	unsigned Opcode = Inst.getOpcode();
4108	bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4109
4110	if (AsVOPD3) {
4111	for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4112	AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4113	if ((Op.isRegKind() \|\| Op.isImmTy(ImmT: AMDGPUOperand::ImmTyNone)) &&
4114	(Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4115	Error(L: Op.getStartLoc(), Msg: "ABS not allowed in VOPD3 instructions");
4116	}
4117	}
4118
4119	auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4120	if (!InvalidCompOprIdx.has_value())
4121	return true;
4122
4123	auto CompOprIdx = *InvalidCompOprIdx;
4124	const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII);
4125	auto ParsedIdx =
4126	std::max(a: InstInfo [VOPD::X].getIndexInParsedOperands(CompOprIdx),
4127	b: InstInfo [VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4128	assert(ParsedIdx > `0` && ParsedIdx < Operands.size());
4129
4130	auto Loc = ((AMDGPUOperand &)*Operands [ParsedIdx]).getStartLoc();
4131	if (CompOprIdx == VOPD::Component::DST) {
4132	if (AsVOPD3)
4133	Error(L: Loc, Msg: "dst registers must be distinct");
4134	else
4135	Error(L: Loc, Msg: "one dst register must be even and the other odd");
4136	} else {
4137	auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4138	Error(L: Loc, Msg: Twine ("src") + Twine (CompSrcIdx) +
4139	" operands must use different VGPR banks");
4140	}
4141
4142	return false;
4143	}
4144
4145	// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4146	// potentially used as VOPD3 with the same operands.
4147	bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4148	// First check if it fits VOPD
4149	auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3: false);
4150	if (!InvalidCompOprIdx.has_value())
4151	return false;
4152
4153	// Then if it fits VOPD3
4154	InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3: true);
4155	if (InvalidCompOprIdx.has_value()) {
4156	// If failed operand is dst it is better to show error about VOPD3
4157	// instruction as it has more capabilities and error message will be
4158	// more informative. If the dst is not legal for VOPD3, then it is not
4159	// legal for VOPD either.
4160	if (*InvalidCompOprIdx == VOPD::Component::DST)
4161	return true;
4162
4163	// Otherwise prefer VOPD as we may find ourselves in an awkward situation
4164	// with a conflict in tied implicit src2 of fmac and no asm operand to
4165	// to point to.
4166	return false;
4167	}
4168	return true;
4169	}
4170
4171	// \returns true is a VOPD3 instruction can be also represented as a shorter
4172	// VOPD encoding.
4173	bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4174	const unsigned Opcode = Inst.getOpcode();
4175	const auto &II = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII);
4176	unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST: getSTI());
4177	if (!getCanBeVOPD(Opc: II [VOPD::X].getOpcode(), EncodingFamily, VOPD3: false).X \|\|
4178	!getCanBeVOPD(Opc: II [VOPD::Y].getOpcode(), EncodingFamily, VOPD3: false).Y)
4179	return false;
4180
4181	// This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4182	// explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4183	// be parsed as VOPD which does not accept src2.
4184	if (II [VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 \|\|
4185	II [VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4186	return false;
4187
4188	// If any modifiers are set this cannot be VOPD.
4189	for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4190	OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4191	OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4192	int I = getNamedOperandIdx(Opcode, Name: OpName);
4193	if (I == -`1`)
4194	continue;
4195	if (Inst.getOperand(i: I).getImm())
4196	return false;
4197	}
4198
4199	return !tryVOPD3(Inst);
4200	}
4201
4202	// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4203	// form but switch to VOPD3 otherwise.
4204	bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4205	const unsigned Opcode = Inst.getOpcode();
4206	if (!isGFX1250Plus() \|\| !isVOPD(Opc: Opcode))
4207	return false;
4208
4209	if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4210	return tryVOPD(Inst);
4211	return tryVOPD3(Inst);
4212	}
4213
4214	bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4215
4216	const unsigned Opc = Inst.getOpcode();
4217	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4218
4219	if ((Desc.TSFlags & SIInstrFlags::IntClamp) != `0` && !hasIntClamp()) {
4220	int ClampIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::clamp);
4221	assert(ClampIdx != -`1`);
4222	return Inst.getOperand(i: ClampIdx).getImm() == `0`;
4223	}
4224
4225	return true;
4226	}
4227
4228	constexpr uint64_t MIMGFlags =
4229	SIInstrFlags::MIMG \| SIInstrFlags::VIMAGE \| SIInstrFlags::VSAMPLE;
4230
4231	bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4232
4233	const unsigned Opc = Inst.getOpcode();
4234	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4235
4236	if ((Desc.TSFlags & MIMGFlags) == `0`)
4237	return true;
4238
4239	int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdata);
4240	int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4241	int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::tfe);
4242
4243	if (VDataIdx == -`1` && isGFX10Plus()) // no return image_sample
4244	return true;
4245
4246	if ((DMaskIdx == -`1` \|\| TFEIdx == -`1`) &&
4247	hasBVHRayTracingInsts()) // intersect_ray
4248	return true;
4249
4250	unsigned VDataSize = getRegOperandSize(Desc, OpNo: VDataIdx);
4251	unsigned TFESize = (TFEIdx != -`1` && Inst.getOperand(i: TFEIdx).getImm()) ? `1` : `0`;
4252	unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & `0xf`;
4253	if (DMask == `0`)
4254	DMask = `1`;
4255
4256	bool IsPackedD16 = false;
4257	unsigned DataSize =
4258	(Desc.TSFlags & SIInstrFlags::Gather4) ? `4` : llvm::popcount(Value: DMask);
4259	if (hasPackedD16()) {
4260	int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::d16);
4261	IsPackedD16 = D16Idx >= `0`;
4262	if (IsPackedD16 && Inst.getOperand(i: D16Idx).getImm())
4263	DataSize = (DataSize + `1`) / `2`;
4264	}
4265
4266	if ((VDataSize / `4`) == DataSize + TFESize)
4267	return true;
4268
4269	StringRef Modifiers;
4270	if (isGFX90A())
4271	Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4272	else
4273	Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4274
4275	Error(L: IDLoc, Msg: Twine ("image data size does not match ") + Modifiers);
4276	return false;
4277	}
4278
4279	bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4280	const unsigned Opc = Inst.getOpcode();
4281	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4282
4283	if ((Desc.TSFlags & MIMGFlags) == `0` \|\| !isGFX10Plus())
4284	return true;
4285
4286	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4287
4288	const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4289	AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
4290	int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vaddr0);
4291	AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4292	? AMDGPU::OpName::srsrc
4293	: AMDGPU::OpName::rsrc;
4294	int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: RSrcOpName);
4295	int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dim);
4296	int A16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::a16);
4297
4298	assert(VAddr0Idx != -`1`);
4299	assert(SrsrcIdx != -`1`);
4300	assert(SrsrcIdx > VAddr0Idx);
4301
4302	bool IsA16 = (A16Idx != -`1` && Inst.getOperand(i: A16Idx).getImm());
4303	if (BaseOpcode->BVH) {
4304	if (IsA16 == BaseOpcode->A16)
4305	return true;
4306	Error(L: IDLoc, Msg: "image address size does not match a16");
4307	return false;
4308	}
4309
4310	unsigned Dim = Inst.getOperand(i: DimIdx).getImm();
4311	const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim);
4312	bool IsNSA = SrsrcIdx - VAddr0Idx > `1`;
4313	unsigned ActualAddrSize =
4314	IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, OpNo: VAddr0Idx) / `4`;
4315
4316	unsigned ExpectedAddrSize =
4317	AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim: DimInfo, IsA16, IsG16Supported: hasG16());
4318
4319	if (IsNSA) {
4320	if (hasPartialNSAEncoding() &&
4321	ExpectedAddrSize >
4322	getNSAMaxSize(HasSampler: Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
4323	int VAddrLastIdx = SrsrcIdx - `1`;
4324	unsigned VAddrLastSize = getRegOperandSize(Desc, OpNo: VAddrLastIdx) / `4`;
4325
4326	ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4327	}
4328	} else {
4329	if (ExpectedAddrSize > `12`)
4330	ExpectedAddrSize = `16`;
4331
4332	// Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4333	// This provides backward compatibility for assembly created
4334	// before 160b/192b/224b types were directly supported.
4335	if (ActualAddrSize == `8` && (ExpectedAddrSize >= `5` && ExpectedAddrSize <= `7`))
4336	return true;
4337	}
4338
4339	if (ActualAddrSize == ExpectedAddrSize)
4340	return true;
4341
4342	Error(L: IDLoc, Msg: "image address size does not match dim and a16");
4343	return false;
4344	}
4345
4346	bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4347
4348	const unsigned Opc = Inst.getOpcode();
4349	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4350
4351	if ((Desc.TSFlags & MIMGFlags) == `0`)
4352	return true;
4353	if (!Desc.mayLoad() \|\| !Desc.mayStore())
4354	return true; // Not atomic
4355
4356	int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4357	unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & `0xf`;
4358
4359	// This is an incomplete check because image_atomic_cmpswap
4360	// may only use 0x3 and 0xf while other atomic operations
4361	// may use 0x1 and 0x3. However these limitations are
4362	// verified when we check that dmask matches dst size.
4363	return DMask == `0x1` \|\| DMask == `0x3` \|\| DMask == `0xf`;
4364	}
4365
4366	bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4367
4368	const unsigned Opc = Inst.getOpcode();
4369	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4370
4371	if ((Desc.TSFlags & SIInstrFlags::Gather4) == `0`)
4372	return true;
4373
4374	int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask);
4375	unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & `0xf`;
4376
4377	// GATHER4 instructions use dmask in a different fashion compared to
4378	// other MIMG instructions. The only useful DMASK values are
4379	// 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4380	// (red,red,red,red) etc.) The ISA document doesn't mention
4381	// this.
4382	return DMask == `0x1` \|\| DMask == `0x2` \|\| DMask == `0x4` \|\| DMask == `0x8`;
4383	}
4384
4385	bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4386	const OperandVector &Operands) {
4387	if (!isGFX10Plus())
4388	return true;
4389
4390	const unsigned Opc = Inst.getOpcode();
4391	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4392
4393	if ((Desc.TSFlags & MIMGFlags) == `0`)
4394	return true;
4395
4396	// image_bvh_intersect_ray instructions do not have dim
4397	if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
4398	return true;
4399
4400	for (unsigned i = `1`, e = Operands.size(); i != e; ++i) {
4401	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
4402	if (Op.isDim())
4403	return true;
4404	}
4405	return false;
4406	}
4407
4408	bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4409	const unsigned Opc = Inst.getOpcode();
4410	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4411
4412	if ((Desc.TSFlags & MIMGFlags) == `0`)
4413	return true;
4414
4415	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4416	const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4417	AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
4418
4419	if (!BaseOpcode->MSAA)
4420	return true;
4421
4422	int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dim);
4423	assert(DimIdx != -`1`);
4424
4425	unsigned Dim = Inst.getOperand(i: DimIdx).getImm();
4426	const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim);
4427
4428	return DimInfo->MSAA;
4429	}
4430
4431	static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4432	{
4433	switch (Opcode) {
4434	case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4435	case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4436	case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4437	return true;
4438	default:
4439	return false;
4440	}
4441	}
4442
4443	// movrels opcodes should only allow VGPRS as src0.*
4444	// This is specified in .td description for vop1/vop3,
4445	// but sdwa is handled differently. See isSDWAOperand.
4446	bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4447	const OperandVector &Operands) {
4448
4449	const unsigned Opc = Inst.getOpcode();
4450	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4451
4452	if ((Desc.TSFlags & SIInstrFlags::SDWA) == `0` \|\| !IsMovrelsSDWAOpcode(Opcode: Opc))
4453	return true;
4454
4455	const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4456	assert(Src0Idx != -`1`);
4457
4458	const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4459	if (Src0.isReg()) {
4460	auto Reg = mc2PseudoReg(Reg: Src0.getReg());
4461	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4462	if (!isSGPR(Reg, TRI))
4463	return true;
4464	}
4465
4466	Error(L: getOperandLoc(Operands, MCOpIdx: Src0Idx), Msg: "source operand must be a VGPR");
4467	return false;
4468	}
4469
4470	bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4471	const OperandVector &Operands) {
4472
4473	const unsigned Opc = Inst.getOpcode();
4474
4475	if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4476	return true;
4477
4478	const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4479	assert(Src0Idx != -`1`);
4480
4481	const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
4482	if (!Src0.isReg())
4483	return true;
4484
4485	auto Reg = mc2PseudoReg(Reg: Src0.getReg());
4486	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4487	if (!isGFX90A() && isSGPR(Reg, TRI)) {
4488	Error(L: getOperandLoc(Operands, MCOpIdx: Src0Idx),
4489	Msg: "source operand must be either a VGPR or an inline constant");
4490	return false;
4491	}
4492
4493	return true;
4494	}
4495
4496	bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4497	const OperandVector &Operands) {
4498	unsigned Opcode = Inst.getOpcode();
4499	const MCInstrDesc &Desc = MII.get(Opcode);
4500
4501	if (!(Desc.TSFlags & SIInstrFlags::IsMAI) \|\|
4502	!getFeatureBits()[FeatureMFMAInlineLiteralBug])
4503	return true;
4504
4505	const int Src2Idx = getNamedOperandIdx(Opcode, Name: OpName::src2);
4506	if (Src2Idx == -`1`)
4507	return true;
4508
4509	if (Inst.getOperand(i: Src2Idx).isImm() && isInlineConstant(Inst, OpIdx: Src2Idx)) {
4510	Error(L: getOperandLoc(Operands, MCOpIdx: Src2Idx),
4511	Msg: "inline constants are not allowed for this operand");
4512	return false;
4513	}
4514
4515	return true;
4516	}
4517
4518	bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4519	const OperandVector &Operands) {
4520	const unsigned Opc = Inst.getOpcode();
4521	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4522
4523	if ((Desc.TSFlags & SIInstrFlags::IsMAI) == `0`)
4524	return true;
4525
4526	int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
4527	if (BlgpIdx != -`1`) {
4528	if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opcode: Opc)) {
4529	int CbszIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::cbsz);
4530
4531	unsigned CBSZ = Inst.getOperand(i: CbszIdx).getImm();
4532	unsigned BLGP = Inst.getOperand(i: BlgpIdx).getImm();
4533
4534	// Validate the correct register size was used for the floating point
4535	// format operands
4536
4537	bool Success = true;
4538	if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ)) {
4539	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
4540	Error(L: getOperandLoc(Operands, MCOpIdx: Src0Idx),
4541	Msg: "wrong register tuple size for cbsz value " + Twine (CBSZ));
4542	Success = false;
4543	}
4544
4545	if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP)) {
4546	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
4547	Error(L: getOperandLoc(Operands, MCOpIdx: Src1Idx),
4548	Msg: "wrong register tuple size for blgp value " + Twine (BLGP));
4549	Success = false;
4550	}
4551
4552	return Success;
4553	}
4554	}
4555
4556	const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2);
4557	if (Src2Idx == -`1`)
4558	return true;
4559
4560	const MCOperand &Src2 = Inst.getOperand(i: Src2Idx);
4561	if (!Src2.isReg())
4562	return true;
4563
4564	MCRegister Src2Reg = Src2.getReg();
4565	MCRegister DstReg = Inst.getOperand(i: `0`).getReg();
4566	if (Src2Reg == DstReg)
4567	return true;
4568
4569	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4570	if (TRI->getRegClass(i: MII.getOpRegClassID(OpInfo: Desc.operands()[`0`], HwModeId: HwMode))
4571	.getSizeInBits() <= `128`)
4572	return true;
4573
4574	if (TRI->regsOverlap(RegA: Src2Reg, RegB: DstReg)) {
4575	Error(L: getOperandLoc(Operands, MCOpIdx: Src2Idx),
4576	Msg: "source 2 operand must not partially overlap with dst");
4577	return false;
4578	}
4579
4580	return true;
4581	}
4582
4583	bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4584	switch (Inst.getOpcode()) {
4585	default:
4586	return true;
4587	case V_DIV_SCALE_F32_gfx6_gfx7:
4588	case V_DIV_SCALE_F32_vi:
4589	case V_DIV_SCALE_F32_gfx10:
4590	case V_DIV_SCALE_F64_gfx6_gfx7:
4591	case V_DIV_SCALE_F64_vi:
4592	case V_DIV_SCALE_F64_gfx10:
4593	break;
4594	}
4595
4596	// TODO: Check that src0 = src1 or src2.
4597
4598	for (auto Name : {AMDGPU::OpName::src0_modifiers,
4599	AMDGPU::OpName::src2_modifiers,
4600	AMDGPU::OpName::src2_modifiers}) {
4601	if (Inst.getOperand(i: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name))
4602	.getImm() &
4603	SISrcMods::ABS) {
4604	return false;
4605	}
4606	}
4607
4608	return true;
4609	}
4610
4611	bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4612
4613	const unsigned Opc = Inst.getOpcode();
4614	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4615
4616	if ((Desc.TSFlags & MIMGFlags) == `0`)
4617	return true;
4618
4619	int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::d16);
4620	if (D16Idx >= `0` && Inst.getOperand(i: D16Idx).getImm()) {
4621	if (isCI() \|\| isSI())
4622	return false;
4623	}
4624
4625	return true;
4626	}
4627
4628	bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4629	const unsigned Opc = Inst.getOpcode();
4630	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
4631
4632	if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == `0`)
4633	return true;
4634
4635	int R128Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::r128);
4636
4637	return R128Idx < `0` \|\| !Inst.getOperand(i: R128Idx).getImm();
4638	}
4639
4640	static bool IsRevOpcode(const unsigned Opcode)
4641	{
4642	switch (Opcode) {
4643	case AMDGPU::V_SUBREV_F32_e32:
4644	case AMDGPU::V_SUBREV_F32_e64:
4645	case AMDGPU::V_SUBREV_F32_e32_gfx10:
4646	case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4647	case AMDGPU::V_SUBREV_F32_e32_vi:
4648	case AMDGPU::V_SUBREV_F32_e64_gfx10:
4649	case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4650	case AMDGPU::V_SUBREV_F32_e64_vi:
4651
4652	case AMDGPU::V_SUBREV_CO_U32_e32:
4653	case AMDGPU::V_SUBREV_CO_U32_e64:
4654	case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4655	case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4656
4657	case AMDGPU::V_SUBBREV_U32_e32:
4658	case AMDGPU::V_SUBBREV_U32_e64:
4659	case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4660	case AMDGPU::V_SUBBREV_U32_e32_vi:
4661	case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4662	case AMDGPU::V_SUBBREV_U32_e64_vi:
4663
4664	case AMDGPU::V_SUBREV_U32_e32:
4665	case AMDGPU::V_SUBREV_U32_e64:
4666	case AMDGPU::V_SUBREV_U32_e32_gfx9:
4667	case AMDGPU::V_SUBREV_U32_e32_vi:
4668	case AMDGPU::V_SUBREV_U32_e64_gfx9:
4669	case AMDGPU::V_SUBREV_U32_e64_vi:
4670
4671	case AMDGPU::V_SUBREV_F16_e32:
4672	case AMDGPU::V_SUBREV_F16_e64:
4673	case AMDGPU::V_SUBREV_F16_e32_gfx10:
4674	case AMDGPU::V_SUBREV_F16_e32_vi:
4675	case AMDGPU::V_SUBREV_F16_e64_gfx10:
4676	case AMDGPU::V_SUBREV_F16_e64_vi:
4677
4678	case AMDGPU::V_SUBREV_U16_e32:
4679	case AMDGPU::V_SUBREV_U16_e64:
4680	case AMDGPU::V_SUBREV_U16_e32_vi:
4681	case AMDGPU::V_SUBREV_U16_e64_vi:
4682
4683	case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4684	case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4685	case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4686
4687	case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4688	case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4689
4690	case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4691	case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4692
4693	case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4694	case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4695
4696	case AMDGPU::V_LSHRREV_B32_e32:
4697	case AMDGPU::V_LSHRREV_B32_e64:
4698	case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4699	case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4700	case AMDGPU::V_LSHRREV_B32_e32_vi:
4701	case AMDGPU::V_LSHRREV_B32_e64_vi:
4702	case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4703	case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4704
4705	case AMDGPU::V_ASHRREV_I32_e32:
4706	case AMDGPU::V_ASHRREV_I32_e64:
4707	case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4708	case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4709	case AMDGPU::V_ASHRREV_I32_e32_vi:
4710	case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4711	case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4712	case AMDGPU::V_ASHRREV_I32_e64_vi:
4713
4714	case AMDGPU::V_LSHLREV_B32_e32:
4715	case AMDGPU::V_LSHLREV_B32_e64:
4716	case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4717	case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4718	case AMDGPU::V_LSHLREV_B32_e32_vi:
4719	case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4720	case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4721	case AMDGPU::V_LSHLREV_B32_e64_vi:
4722
4723	case AMDGPU::V_LSHLREV_B16_e32:
4724	case AMDGPU::V_LSHLREV_B16_e64:
4725	case AMDGPU::V_LSHLREV_B16_e32_vi:
4726	case AMDGPU::V_LSHLREV_B16_e64_vi:
4727	case AMDGPU::V_LSHLREV_B16_gfx10:
4728
4729	case AMDGPU::V_LSHRREV_B16_e32:
4730	case AMDGPU::V_LSHRREV_B16_e64:
4731	case AMDGPU::V_LSHRREV_B16_e32_vi:
4732	case AMDGPU::V_LSHRREV_B16_e64_vi:
4733	case AMDGPU::V_LSHRREV_B16_gfx10:
4734
4735	case AMDGPU::V_ASHRREV_I16_e32:
4736	case AMDGPU::V_ASHRREV_I16_e64:
4737	case AMDGPU::V_ASHRREV_I16_e32_vi:
4738	case AMDGPU::V_ASHRREV_I16_e64_vi:
4739	case AMDGPU::V_ASHRREV_I16_gfx10:
4740
4741	case AMDGPU::V_LSHLREV_B64_e64:
4742	case AMDGPU::V_LSHLREV_B64_gfx10:
4743	case AMDGPU::V_LSHLREV_B64_vi:
4744
4745	case AMDGPU::V_LSHRREV_B64_e64:
4746	case AMDGPU::V_LSHRREV_B64_gfx10:
4747	case AMDGPU::V_LSHRREV_B64_vi:
4748
4749	case AMDGPU::V_ASHRREV_I64_e64:
4750	case AMDGPU::V_ASHRREV_I64_gfx10:
4751	case AMDGPU::V_ASHRREV_I64_vi:
4752
4753	case AMDGPU::V_PK_LSHLREV_B16:
4754	case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4755	case AMDGPU::V_PK_LSHLREV_B16_vi:
4756
4757	case AMDGPU::V_PK_LSHRREV_B16:
4758	case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4759	case AMDGPU::V_PK_LSHRREV_B16_vi:
4760	case AMDGPU::V_PK_ASHRREV_I16:
4761	case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4762	case AMDGPU::V_PK_ASHRREV_I16_vi:
4763	return true;
4764	default:
4765	return false;
4766	}
4767	}
4768
4769	bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4770	const OperandVector &Operands) {
4771	using namespace SIInstrFlags;
4772	const unsigned Opcode = Inst.getOpcode();
4773	const MCInstrDesc &Desc = MII.get(Opcode);
4774
4775	// lds_direct register is defined so that it can be used
4776	// with 9-bit operands only. Ignore encodings which do not accept these.
4777	const auto Enc = VOP1 \| VOP2 \| VOP3 \| VOPC \| VOP3P \| SIInstrFlags::SDWA;
4778	if ((Desc.TSFlags & Enc) == `0`)
4779	return true;
4780
4781	for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4782	auto SrcIdx = getNamedOperandIdx(Opcode, Name: SrcName);
4783	if (SrcIdx == -`1`)
4784	break;
4785	const auto &Src = Inst.getOperand(i: SrcIdx);
4786	if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4787
4788	if (isGFX90A() \|\| isGFX11Plus()) {
4789	Error(L: getOperandLoc(Operands, MCOpIdx: SrcIdx),
4790	Msg: "lds_direct is not supported on this GPU");
4791	return false;
4792	}
4793
4794	if (IsRevOpcode(Opcode) \|\| (Desc.TSFlags & SIInstrFlags::SDWA)) {
4795	Error(L: getOperandLoc(Operands, MCOpIdx: SrcIdx),
4796	Msg: "lds_direct cannot be used with this instruction");
4797	return false;
4798	}
4799
4800	if (SrcName != OpName::src0) {
4801	Error(L: getOperandLoc(Operands, MCOpIdx: SrcIdx),
4802	Msg: "lds_direct may be used as src0 only");
4803	return false;
4804	}
4805	}
4806	}
4807
4808	return true;
4809	}
4810
4811	SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4812	for (unsigned i = `1`, e = Operands.size(); i != e; ++i) {
4813	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
4814	if (Op.isFlatOffset())
4815	return Op.getStartLoc();
4816	}
4817	return getLoc();
4818	}
4819
4820	bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4821	const OperandVector &Operands) {
4822	auto Opcode = Inst.getOpcode();
4823	auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4824	if (OpNum == -`1`)
4825	return true;
4826
4827	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4828	if ((TSFlags & SIInstrFlags::FLAT))
4829	return validateFlatOffset(Inst, Operands);
4830
4831	if ((TSFlags & SIInstrFlags::SMRD))
4832	return validateSMEMOffset(Inst, Operands);
4833
4834	const auto &Op = Inst.getOperand(i: OpNum);
4835	// GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4836	if (isGFX12Plus() &&
4837	(TSFlags & (SIInstrFlags::MUBUF \| SIInstrFlags::MTBUF))) {
4838	const unsigned OffsetSize = `24`;
4839	if (!isUIntN(N: OffsetSize - `1`, x: Op.getImm())) {
4840	Error(L: getFlatOffsetLoc(Operands),
4841	Msg: Twine ("expected a ") + Twine (OffsetSize - `1`) +
4842	"-bit unsigned offset for buffer ops");
4843	return false;
4844	}
4845	} else {
4846	const unsigned OffsetSize = `16`;
4847	if (!isUIntN(N: OffsetSize, x: Op.getImm())) {
4848	Error(L: getFlatOffsetLoc(Operands),
4849	Msg: Twine ("expected a ") + Twine (OffsetSize) + "-bit unsigned offset");
4850	return false;
4851	}
4852	}
4853	return true;
4854	}
4855
4856	bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4857	const OperandVector &Operands) {
4858	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4859	if ((TSFlags & SIInstrFlags::FLAT) == `0`)
4860	return true;
4861
4862	auto Opcode = Inst.getOpcode();
4863	auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4864	assert(OpNum != -`1`);
4865
4866	const auto &Op = Inst.getOperand(i: OpNum);
4867	if (!hasFlatOffsets() && Op.getImm() != `0`) {
4868	Error(L: getFlatOffsetLoc(Operands),
4869	Msg: "flat offset modifier is not supported on this GPU");
4870	return false;
4871	}
4872
4873	// For pre-GFX12 FLAT instructions the offset must be positive;
4874	// MSB is ignored and forced to zero.
4875	unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(ST: getSTI());
4876	bool AllowNegative =
4877	(TSFlags & (SIInstrFlags::FlatGlobal \| SIInstrFlags::FlatScratch)) \|\|
4878	isGFX12Plus();
4879	if (!isIntN(N: OffsetSize, x: Op.getImm()) \|\| (!AllowNegative && Op.getImm() < `0`)) {
4880	Error(L: getFlatOffsetLoc(Operands),
4881	Msg: Twine ("expected a ") +
4882	(AllowNegative ? Twine (OffsetSize) + "-bit signed offset"
4883	: Twine (OffsetSize - `1`) + "-bit unsigned offset"));
4884	return false;
4885	}
4886
4887	return true;
4888	}
4889
4890	SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4891	// Start with second operand because SMEM Offset cannot be dst or src0.
4892	for (unsigned i = `2`, e = Operands.size(); i != e; ++i) {
4893	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
4894	if (Op.isSMEMOffset() \|\| Op.isSMEMOffsetMod())
4895	return Op.getStartLoc();
4896	}
4897	return getLoc();
4898	}
4899
4900	bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4901	const OperandVector &Operands) {
4902	if (isCI() \|\| isSI())
4903	return true;
4904
4905	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
4906	if ((TSFlags & SIInstrFlags::SMRD) == `0`)
4907	return true;
4908
4909	auto Opcode = Inst.getOpcode();
4910	auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset);
4911	if (OpNum == -`1`)
4912	return true;
4913
4914	const auto &Op = Inst.getOperand(i: OpNum);
4915	if (!Op.isImm())
4916	return true;
4917
4918	uint64_t Offset = Op.getImm();
4919	bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opc: Opcode);
4920	if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(ST: getSTI(), EncodedOffset: Offset) \|\|
4921	AMDGPU::isLegalSMRDEncodedSignedOffset(ST: getSTI(), EncodedOffset: Offset, IsBuffer))
4922	return true;
4923
4924	Error(L: getSMEMOffsetLoc(Operands),
4925	Msg: isGFX12Plus() && IsBuffer
4926	? "expected a 23-bit unsigned offset for buffer ops"
4927	: isGFX12Plus() ? "expected a 24-bit signed offset"
4928	: (isVI() \|\| IsBuffer) ? "expected a 20-bit unsigned offset"
4929	: "expected a 21-bit signed offset");
4930
4931	return false;
4932	}
4933
4934	bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4935	const OperandVector &Operands) {
4936	unsigned Opcode = Inst.getOpcode();
4937	const MCInstrDesc &Desc = MII.get(Opcode);
4938	if (!(Desc.TSFlags & (SIInstrFlags::SOP2 \| SIInstrFlags::SOPC)))
4939	return true;
4940
4941	const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::src0);
4942	const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::src1);
4943
4944	const int OpIndices[] = { Src0Idx, Src1Idx };
4945
4946	unsigned NumExprs = `0`;
4947	unsigned NumLiterals = `0`;
4948	int64_t LiteralValue;
4949
4950	for (int OpIdx : OpIndices) {
4951	if (OpIdx == -`1`) break;
4952
4953	const MCOperand &MO = Inst.getOperand(i: OpIdx);
4954	// Exclude special imm operands (like that used by s_set_gpr_idx_on)
4955	if (AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx)) {
4956	bool IsLit = false;
4957	std::optional<int64_t> Imm;
4958	if (MO.isImm()) {
4959	Imm = MO.getImm();
4960	} else if (MO.isExpr()) {
4961	if (isLitExpr(Expr: MO.getExpr())) {
4962	IsLit = true;
4963	Imm = getLitValue(Expr: MO.getExpr());
4964	}
4965	} else {
4966	continue;
4967	}
4968
4969	if (!Imm.has_value()) {
4970	++NumExprs;
4971	} else if (!isInlineConstant(Inst, OpIdx)) {
4972	auto OpType = static_cast<AMDGPU::OperandType>(
4973	Desc.operands()[OpIdx].OperandType);
4974	int64_t Value = encode32BitLiteral(Imm: *Imm, Type: OpType, IsLit);
4975	if (NumLiterals == `0` \|\| LiteralValue != Value) {
4976	LiteralValue = Value;
4977	++NumLiterals;
4978	}
4979	}
4980	}
4981	}
4982
4983	if (NumLiterals + NumExprs <= `1`)
4984	return true;
4985
4986	Error(L: getOperandLoc(Operands, MCOpIdx: Src1Idx),
4987	Msg: "only one unique literal operand is allowed");
4988	return false;
4989	}
4990
4991	bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4992	const unsigned Opc = Inst.getOpcode();
4993	if (isPermlane16(Opc)) {
4994	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
4995	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
4996
4997	if (OpSel & ~`3`)
4998	return false;
4999	}
5000
5001	uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags;
5002
5003	if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
5004	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
5005	if (OpSelIdx != -`1`) {
5006	if (Inst.getOperand(i: OpSelIdx).getImm() != `0`)
5007	return false;
5008	}
5009	int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
5010	if (OpSelHiIdx != -`1`) {
5011	if (Inst.getOperand(i: OpSelHiIdx).getImm() != -`1`)
5012	return false;
5013	}
5014	}
5015
5016	// op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
5017	if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
5018	(TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
5019	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
5020	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
5021	if (OpSel & `3`)
5022	return false;
5023	}
5024
5025	// Packed math FP32 instructions typically accept SGPRs or VGPRs as source
5026	// operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
5027	// the first SGPR and use it for both the low and high operations.
5028	if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
5029	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0);
5030	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
5031	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
5032	int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
5033
5034	const MCOperand &Src0 = Inst.getOperand(i: Src0Idx);
5035	const MCOperand &Src1 = Inst.getOperand(i: Src1Idx);
5036	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
5037	unsigned OpSelHi = Inst.getOperand(i: OpSelHiIdx).getImm();
5038
5039	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5040
5041	auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
5042	unsigned Mask = `1U` << Index;
5043	return ((OpSel & Mask) == `0`) && ((OpSelHi & Mask) == `0`);
5044	};
5045
5046	if (Src0.isReg() && isSGPR(Reg: Src0.getReg(), TRI) &&
5047	!VerifyOneSGPR (/Index=/`0`))
5048	return false;
5049	if (Src1.isReg() && isSGPR(Reg: Src1.getReg(), TRI) &&
5050	!VerifyOneSGPR (/Index=/`1`))
5051	return false;
5052
5053	int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2);
5054	if (Src2Idx != -`1`) {
5055	const MCOperand &Src2 = Inst.getOperand(i: Src2Idx);
5056	if (Src2.isReg() && isSGPR(Reg: Src2.getReg(), TRI) &&
5057	!VerifyOneSGPR (/Index=/`2`))
5058	return false;
5059	}
5060	}
5061
5062	return true;
5063	}
5064
5065	bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
5066	if (!hasTrue16Insts())
5067	return true;
5068	const MCRegisterInfo *MRI = getMRI();
5069	const unsigned Opc = Inst.getOpcode();
5070	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
5071	if (OpSelIdx == -`1`)
5072	return true;
5073	unsigned OpSelOpValue = Inst.getOperand(i: OpSelIdx).getImm();
5074	// If the value is 0 we could have a default OpSel Operand, so conservatively
5075	// allow it.
5076	if (OpSelOpValue == `0`)
5077	return true;
5078	unsigned OpCount = `0`;
5079	for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
5080	AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
5081	int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: OpName);
5082	if (OpIdx == -`1`)
5083	continue;
5084	const MCOperand &Op = Inst.getOperand(i: OpIdx);
5085	if (Op.isReg() &&
5086	MRI->getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: Op.getReg())) {
5087	bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Reg: Op.getReg(), MRI: *MRI);
5088	bool OpSelOpIsHi = ((OpSelOpValue & (`1` << OpCount)) != `0`);
5089	if (OpSelOpIsHi != VGPRSuffixIsHi)
5090	return false;
5091	}
5092	++OpCount;
5093	}
5094
5095	return true;
5096	}
5097
5098	bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5099	assert(OpName == AMDGPU::OpName::neg_lo \|\| OpName == AMDGPU::OpName::neg_hi);
5100
5101	const unsigned Opc = Inst.getOpcode();
5102	uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags;
5103
5104	// v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5105	// v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5106	// v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5107	// other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5108	if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5109	!(TSFlags & SIInstrFlags::IsSWMMAC))
5110	return true;
5111
5112	int NegIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName);
5113	if (NegIdx == -`1`)
5114	return true;
5115
5116	unsigned Neg = Inst.getOperand(i: NegIdx).getImm();
5117
5118	// Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5119	// on some src operands but not allowed on other.
5120	// It is convenient that such instructions don't have src_modifiers operand
5121	// for src operands that don't allow neg because they also don't allow opsel.
5122
5123	const AMDGPU::OpName SrcMods[`3`] = {AMDGPU::OpName::src0_modifiers,
5124	AMDGPU::OpName::src1_modifiers,
5125	AMDGPU::OpName::src2_modifiers};
5126
5127	for (unsigned i = `0`; i < `3`; ++i) {
5128	if (!AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: SrcMods[i])) {
5129	if (Neg & (`1` << i))
5130	return false;
5131	}
5132	}
5133
5134	return true;
5135	}
5136
5137	bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5138	const OperandVector &Operands) {
5139	const unsigned Opc = Inst.getOpcode();
5140	int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dpp_ctrl);
5141	if (DppCtrlIdx >= `0`) {
5142	unsigned DppCtrl = Inst.getOperand(i: DppCtrlIdx).getImm();
5143
5144	if (!AMDGPU::isLegalDPALU_DPPControl(ST: getSTI(), DC: DppCtrl) &&
5145	AMDGPU::isDPALU_DPP(OpDesc: MII.get(Opcode: Opc), MII, ST: getSTI())) {
5146	// DP ALU DPP is supported for row_newbcast only on GFX9 and row_share*
5147	// only on GFX12.
5148	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyDppCtrl, Operands);
5149	Error(L: S, Msg: isGFX12() ? "DP ALU dpp only supports row_share"
5150	: "DP ALU dpp only supports row_newbcast");
5151	return false;
5152	}
5153	}
5154
5155	int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dpp8);
5156	bool IsDPP = DppCtrlIdx >= `0` \|\| Dpp8Idx >= `0`;
5157
5158	if (IsDPP && !hasDPPSrc1SGPR(STI: getSTI())) {
5159	int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1);
5160	if (Src1Idx >= `0`) {
5161	const MCOperand &Src1 = Inst.getOperand(i: Src1Idx);
5162	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5163	if (Src1.isReg() && isSGPR(Reg: mc2PseudoReg(Reg: Src1.getReg()), TRI)) {
5164	Error(L: getOperandLoc(Operands, MCOpIdx: Src1Idx),
5165	Msg: "invalid operand for instruction");
5166	return false;
5167	}
5168	if (Src1.isImm()) {
5169	Error(L: getInstLoc(Operands),
5170	Msg: "src1 immediate operand invalid for instruction");
5171	return false;
5172	}
5173	}
5174	}
5175
5176	return true;
5177	}
5178
5179	// Check if VCC register matches wavefront size
5180	bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5181	return (Reg == AMDGPU::VCC && isWave64()) \|\|
5182	(Reg == AMDGPU::VCC_LO && isWave32());
5183	}
5184
5185	// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5186	bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5187	const OperandVector &Operands) {
5188	unsigned Opcode = Inst.getOpcode();
5189	const MCInstrDesc &Desc = MII.get(Opcode);
5190	bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, Name: OpName::imm) != -`1`;
5191	if (!(Desc.TSFlags & (SIInstrFlags::VOP3 \| SIInstrFlags::VOP3P)) &&
5192	!HasMandatoryLiteral && !isVOPD(Opc: Opcode))
5193	return true;
5194
5195	OperandIndices OpIndices = getSrcOperandIndices(Opcode, AddMandatoryLiterals: HasMandatoryLiteral);
5196
5197	std::optional<unsigned> LiteralOpIdx;
5198	std::optional<uint64_t> LiteralValue;
5199
5200	for (int OpIdx : OpIndices) {
5201	if (OpIdx == -`1`)
5202	continue;
5203
5204	const MCOperand &MO = Inst.getOperand(i: OpIdx);
5205	if (!MO.isImm() && !MO.isExpr())
5206	continue;
5207	if (!isSISrcOperand(Desc, OpNo: OpIdx))
5208	continue;
5209
5210	std::optional<int64_t> Imm;
5211	if (MO.isImm())
5212	Imm = MO.getImm();
5213	else if (MO.isExpr() && isLitExpr(Expr: MO.getExpr()))
5214	Imm = getLitValue(Expr: MO.getExpr());
5215
5216	bool IsAnotherLiteral = false;
5217	bool IsForcedLit = findMCOperand(Operands, MCOpIdx: OpIdx).isForcedLit();
5218	bool IsForcedLit64 = findMCOperand(Operands, MCOpIdx: OpIdx).isForcedLit64();
5219	if (!Imm.has_value()) {
5220	// Literal value not known, so we conservately assume it's different.
5221	IsAnotherLiteral = true;
5222	} else if (IsForcedLit \|\| IsForcedLit64 \|\| !isInlineConstant(Inst, OpIdx)) {
5223	uint64_t Value = *Imm;
5224	bool IsForcedFP64 =
5225	Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 \|\|
5226	(Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_REG_IMM_FP64 &&
5227	HasMandatoryLiteral);
5228	unsigned OpTy = Desc.operands()[OpIdx].OperandType;
5229	bool IsFP64 =
5230	(IsForcedFP64 \|\| (AMDGPU::isSISrcFPOperand(Desc, OpNo: OpIdx) &&
5231	OpTy != AMDGPU::OPERAND_REG_IMM_V2INT64)) &&
5232	AMDGPU::getOperandSize(OpInfo: Desc.operands()[OpIdx]) == `8`;
5233	bool IsValid32Op =
5234	IsForcedLit \|\| AMDGPU::isValid32BitLiteral(Val: Value, IsFP64);
5235
5236	if (((!IsValid32Op && !isInt<`32`>(x: Value) && !isUInt<`32`>(x: Value) &&
5237	!IsForcedFP64) \|\|
5238	(IsForcedLit64 && !HasMandatoryLiteral)) &&
5239	(!has64BitLiterals() \|\| Desc.getSize() != `4`)) {
5240	Error(L: getOperandLoc(Operands, MCOpIdx: OpIdx),
5241	Msg: "invalid operand for instruction");
5242	return false;
5243	}
5244
5245	// Only src0 can use lit64 in VOP encoding.*
5246	if (!IsForcedFP64 && (IsForcedLit64 \|\| !IsValid32Op) &&
5247	OpIdx != getNamedOperandIdx(Opcode, Name: OpName::src0)) {
5248	Error(L: getOperandLoc(Operands, MCOpIdx: OpIdx),
5249	Msg: "invalid operand for instruction");
5250	return false;
5251	}
5252
5253	if (IsFP64 && IsValid32Op && !IsForcedFP64)
5254	Value = Hi_32(Value);
5255
5256	IsAnotherLiteral = !LiteralValue \|\| *LiteralValue != Value;
5257	LiteralValue = Value;
5258	}
5259
5260	if (IsAnotherLiteral && !HasMandatoryLiteral &&
5261	!getFeatureBits()[FeatureVOP3Literal]) {
5262	Error(L: getOperandLoc(Operands, MCOpIdx: OpIdx),
5263	Msg: "literal operands are not supported");
5264	return false;
5265	}
5266
5267	if (LiteralOpIdx && IsAnotherLiteral) {
5268	Error(L: getLaterLoc(a: getOperandLoc(Operands, MCOpIdx: OpIdx),
5269	b: getOperandLoc(Operands, MCOpIdx: *LiteralOpIdx)),
5270	Msg: "only one unique literal operand is allowed");
5271	return false;
5272	}
5273
5274	if (IsAnotherLiteral)
5275	LiteralOpIdx = OpIdx;
5276	}
5277
5278	return true;
5279	}
5280
5281	// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5282	static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5283	const MCRegisterInfo *MRI) {
5284	int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name);
5285	if (OpIdx < `0`)
5286	return -`1`;
5287
5288	const MCOperand &Op = Inst.getOperand(i: OpIdx);
5289	if (!Op.isReg())
5290	return -`1`;
5291
5292	MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
5293	auto Reg = Sub ? Sub : Op.getReg();
5294	const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID);
5295	return AGPR32.contains(Reg) ? `1` : `0`;
5296	}
5297
5298	bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5299	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
5300	if ((TSFlags & (SIInstrFlags::FLAT \| SIInstrFlags::MUBUF \|
5301	SIInstrFlags::MTBUF \| SIInstrFlags::MIMG \|
5302	SIInstrFlags::DS)) == `0`)
5303	return true;
5304
5305	AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5306	? AMDGPU::OpName::data0
5307	: AMDGPU::OpName::vdata;
5308
5309	const MCRegisterInfo *MRI = getMRI();
5310	int DstAreg = IsAGPROperand(Inst, Name: AMDGPU::OpName::vdst, MRI);
5311	int DataAreg = IsAGPROperand(Inst, Name: DataName, MRI);
5312
5313	if ((TSFlags & SIInstrFlags::DS) && DataAreg >= `0`) {
5314	int Data2Areg = IsAGPROperand(Inst, Name: AMDGPU::OpName::data1, MRI);
5315	if (Data2Areg >= `0` && Data2Areg != DataAreg)
5316	return false;
5317	}
5318
5319	auto FB = getFeatureBits();
5320	if (FB [AMDGPU::FeatureGFX90AInsts]) {
5321	if (DataAreg < `0` \|\| DstAreg < `0`)
5322	return true;
5323	return DstAreg == DataAreg;
5324	}
5325
5326	return DstAreg < `1` && DataAreg < `1`;
5327	}
5328
5329	bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5330	auto FB = getFeatureBits();
5331	if (!FB [AMDGPU::FeatureRequiresAlignedVGPRs])
5332	return true;
5333
5334	unsigned Opc = Inst.getOpcode();
5335	const MCRegisterInfo *MRI = getMRI();
5336	// DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5337	// unaligned VGPR. All others only allow even aligned VGPRs.
5338	if (FB [AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5339	return true;
5340
5341	if (FB [AMDGPU::FeatureGFX1250Insts]) {
5342	switch (Opc) {
5343	default:
5344	break;
5345	case AMDGPU::DS_LOAD_TR6_B96:
5346	case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5347	// DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5348	// allows unaligned VGPR. All others only allow even aligned VGPRs.
5349	return true;
5350	case AMDGPU::GLOBAL_LOAD_TR6_B96:
5351	case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5352	// GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5353	// allows unaligned VGPR for vdst, but other operands still only allow
5354	// even aligned VGPRs.
5355	int VAddrIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vaddr);
5356	if (VAddrIdx != -`1`) {
5357	const MCOperand &Op = Inst.getOperand(i: VAddrIdx);
5358	MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
5359	if ((Sub - AMDGPU::VGPR0) & `1`)
5360	return false;
5361	}
5362	return true;
5363	}
5364	case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5365	case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5366	return true;
5367	}
5368	}
5369
5370	const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID);
5371	const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID);
5372	for (unsigned I = `0`, E = Inst.getNumOperands(); I != E; ++I) {
5373	const MCOperand &Op = Inst.getOperand(i: I);
5374	if (!Op.isReg())
5375	continue;
5376
5377	MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
5378	if (!Sub)
5379	continue;
5380
5381	if (VGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::VGPR0) & `1`))
5382	return false;
5383	if (AGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::AGPR0) & `1`))
5384	return false;
5385	}
5386
5387	return true;
5388	}
5389
5390	SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5391	for (unsigned i = `1`, e = Operands.size(); i != e; ++i) {
5392	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
5393	if (Op.isBLGP())
5394	return Op.getStartLoc();
5395	}
5396	return SMLoc ();
5397	}
5398
5399	bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5400	const OperandVector &Operands) {
5401	unsigned Opc = Inst.getOpcode();
5402	int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
5403	if (BlgpIdx == -`1`)
5404	return true;
5405	SMLoc BLGPLoc = getBLGPLoc(Operands);
5406	if (!BLGPLoc.isValid())
5407	return true;
5408	bool IsNeg = StringRef (BLGPLoc.getPointer()).starts_with(Prefix: "neg:");
5409	auto FB = getFeatureBits();
5410	bool UsesNeg = false;
5411	if (FB [AMDGPU::FeatureGFX940Insts]) {
5412	switch (Opc) {
5413	case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5414	case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5415	case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5416	case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5417	UsesNeg = true;
5418	}
5419	}
5420
5421	if (IsNeg == UsesNeg)
5422	return true;
5423
5424	Error(L: BLGPLoc,
5425	Msg: UsesNeg ? "invalid modifier: blgp is not supported"
5426	: "invalid modifier: neg is not supported");
5427
5428	return false;
5429	}
5430
5431	bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5432	const OperandVector &Operands) {
5433	if (!isGFX11Plus())
5434	return true;
5435
5436	unsigned Opc = Inst.getOpcode();
5437	if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5438	Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5439	Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5440	Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5441	return true;
5442
5443	int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::sdst);
5444	assert(Src0Idx >= `0` && Inst.getOperand(Src0Idx).isReg());
5445	auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src0Idx).getReg());
5446	if (Reg == AMDGPU::SGPR_NULL)
5447	return true;
5448
5449	Error(L: getOperandLoc(Operands, MCOpIdx: Src0Idx), Msg: "src0 must be null");
5450	return false;
5451	}
5452
5453	bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5454	const OperandVector &Operands) {
5455	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
5456	if ((TSFlags & SIInstrFlags::DS) == `0`)
5457	return true;
5458	if (TSFlags & SIInstrFlags::GWS)
5459	return validateGWS(Inst, Operands);
5460	// Only validate GDS for non-GWS instructions.
5461	if (hasGDS())
5462	return true;
5463	int GDSIdx =
5464	AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::gds);
5465	if (GDSIdx < `0`)
5466	return true;
5467	unsigned GDS = Inst.getOperand(i: GDSIdx).getImm();
5468	if (GDS) {
5469	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyGDS, Operands);
5470	Error(L: S, Msg: "gds modifier is not supported on this GPU");
5471	return false;
5472	}
5473	return true;
5474	}
5475
5476	// gfx90a has an undocumented limitation:
5477	// DS_GWS opcodes must use even aligned registers.
5478	bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5479	const OperandVector &Operands) {
5480	if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5481	return true;
5482
5483	int Opc = Inst.getOpcode();
5484	if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5485	Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5486	return true;
5487
5488	const MCRegisterInfo *MRI = getMRI();
5489	const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID);
5490	int Data0Pos =
5491	AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::data0);
5492	assert(Data0Pos != -`1`);
5493	auto Reg = Inst.getOperand(i: Data0Pos).getReg();
5494	auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5495	if (RegIdx & `1`) {
5496	Error(L: getOperandLoc(Operands, MCOpIdx: Data0Pos), Msg: "vgpr must be even aligned");
5497	return false;
5498	}
5499
5500	return true;
5501	}
5502
5503	bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5504	const OperandVector &Operands,
5505	SMLoc IDLoc) {
5506	int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(),
5507	Name: AMDGPU::OpName::cpol);
5508	if (CPolPos == -`1`)
5509	return true;
5510
5511	unsigned CPol = Inst.getOperand(i: CPolPos).getImm();
5512
5513	if (!isGFX1250Plus()) {
5514	if (CPol & CPol::SCAL) {
5515	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5516	StringRef CStr(S.getPointer());
5517	S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scale_offset")]);
5518	Error(L: S, Msg: "scale_offset is not supported on this GPU");
5519	}
5520	if (CPol & CPol::NV) {
5521	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5522	StringRef CStr(S.getPointer());
5523	S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "nv")]);
5524	Error(L: S, Msg: "nv is not supported on this GPU");
5525	}
5526	}
5527
5528	if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Opcode: Inst.getOpcode())) {
5529	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5530	StringRef CStr(S.getPointer());
5531	S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scale_offset")]);
5532	Error(L: S, Msg: "scale_offset is not supported for this instruction");
5533	}
5534
5535	if (isGFX12Plus())
5536	return validateTHAndScopeBits(Inst, Operands, CPol);
5537
5538	uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags;
5539	if (TSFlags & SIInstrFlags::SMRD) {
5540	if (CPol && (isSI() \|\| isCI())) {
5541	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5542	Error(L: S, Msg: "cache policy is not supported for SMRD instructions");
5543	return false;
5544	}
5545	if (CPol & ~(AMDGPU::CPol::GLC \| AMDGPU::CPol::DLC)) {
5546	Error(L: IDLoc, Msg: "invalid cache policy for SMEM instruction");
5547	return false;
5548	}
5549	}
5550
5551	if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5552	const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF \|
5553	SIInstrFlags::MTBUF \| SIInstrFlags::MIMG \|
5554	SIInstrFlags::FLAT;
5555	if (!(TSFlags & AllowSCCModifier)) {
5556	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5557	StringRef CStr(S.getPointer());
5558	S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scc")]);
5559	Error(L: S,
5560	Msg: "scc modifier is not supported for this instruction on this GPU");
5561	return false;
5562	}
5563	}
5564
5565	if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet \| SIInstrFlags::IsAtomicRet)))
5566	return true;
5567
5568	if (TSFlags & SIInstrFlags::IsAtomicRet) {
5569	if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5570	Error(L: IDLoc, Msg: isGFX940() ? "instruction must use sc0"
5571	: "instruction must use glc");
5572	return false;
5573	}
5574	} else {
5575	if (CPol & CPol::GLC) {
5576	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5577	StringRef CStr(S.getPointer());
5578	S = SMLoc::getFromPointer(
5579	Ptr: &CStr.data()[CStr.find(Str: isGFX940() ? "sc0" : "glc")]);
5580	Error(L: S, Msg: isGFX940() ? "instruction must not use sc0"
5581	: "instruction must not use glc");
5582	return false;
5583	}
5584	}
5585
5586	return true;
5587	}
5588
5589	bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5590	const OperandVector &Operands,
5591	const unsigned CPol) {
5592	const unsigned TH = CPol & AMDGPU::CPol::TH;
5593	const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5594
5595	const unsigned Opcode = Inst.getOpcode();
5596	const MCInstrDesc &TID = MII.get(Opcode);
5597
5598	auto PrintError = [&](StringRef Msg) {
5599	SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands);
5600	Error(L: S, Msg);
5601	return false;
5602	};
5603
5604	if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5605	(TID.TSFlags & SIInstrFlags::IsAtomicNoRet))
5606	return PrintError ("th:TH_ATOMIC_RETURN requires a destination operand");
5607
5608	if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5609	(TID.TSFlags & (SIInstrFlags::FLAT \| SIInstrFlags::MUBUF)) &&
5610	(!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
5611	return PrintError ("instruction must use th:TH_ATOMIC_RETURN");
5612
5613	if (TH == `0`)
5614	return true;
5615
5616	if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5617	((TH == AMDGPU::CPol::TH_NT_RT) \|\| (TH == AMDGPU::CPol::TH_RT_NT) \|\|
5618	(TH == AMDGPU::CPol::TH_NT_HT)))
5619	return PrintError ("invalid th value for SMEM instruction");
5620
5621	if (TH == AMDGPU::CPol::TH_BYPASS) {
5622	if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5623	CPol & AMDGPU::CPol::TH_REAL_BYPASS) \|\|
5624	(Scope == AMDGPU::CPol::SCOPE_SYS &&
5625	!(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
5626	return PrintError ("scope and th combination is not valid");
5627	}
5628
5629	unsigned THType = AMDGPU::getTemporalHintType(TID);
5630	if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5631	if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5632	return PrintError ("invalid th value for atomic instructions");
5633	} else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5634	if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5635	return PrintError ("invalid th value for store instructions");
5636	} else {
5637	if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5638	return PrintError ("invalid th value for load instructions");
5639	}
5640
5641	return true;
5642	}
5643
5644	bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5645	const OperandVector &Operands) {
5646	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
5647	if (Desc.mayStore() &&
5648	(Desc.TSFlags & (SIInstrFlags::MUBUF \| SIInstrFlags::MTBUF))) {
5649	SMLoc Loc = getImmLoc(Type: AMDGPUOperand::ImmTyTFE, Operands);
5650	if (Loc != getInstLoc(Operands)) {
5651	Error(L: Loc, Msg: "TFE modifier has no meaning for store instructions");
5652	return false;
5653	}
5654	}
5655
5656	return true;
5657	}
5658
5659	bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5660	const OperandVector &Operands) {
5661	unsigned Opc = Inst.getOpcode();
5662	const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5663	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
5664
5665	int AFmtIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_a_fmt);
5666	if (AFmtIdx == -`1`)
5667	return true;
5668	unsigned AFmt = Inst.getOperand(i: AFmtIdx).getImm();
5669	int BFmtIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_b_fmt);
5670	unsigned BFmt = Inst.getOperand(i: BFmtIdx).getImm();
5671
5672	auto validateFmt = [&](unsigned Fmt, AMDGPU::OpName SrcOp) -> bool {
5673	int SrcIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: SrcOp);
5674	unsigned RegSize =
5675	TRI->getRegClass(i: MII.getOpRegClassID(OpInfo: Desc.operands()[SrcIdx], HwModeId: HwMode))
5676	.getSizeInBits();
5677
5678	if (RegSize == AMDGPU::wmmaScaleF8F6F4FormatToNumRegs(Fmt) * `32`)
5679	return true;
5680
5681	Error(L: getOperandLoc(Operands, MCOpIdx: SrcIdx),
5682	Msg: "wrong register tuple size for " +
5683	Twine (WMMAMods::ModMatrixFmt[Fmt]));
5684	return false;
5685	};
5686
5687	if (!validateFmt (AFmt, AMDGPU::OpName::src0) \|\|
5688	!validateFmt (BFmt, AMDGPU::OpName::src1))
5689	return false;
5690
5691	int AScaleIdx =
5692	AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_a_scale_fmt);
5693	if (AScaleIdx == -`1`)
5694	return true;
5695	unsigned AScale = Inst.getOperand(i: AScaleIdx).getImm();
5696	int BScaleIdx =
5697	AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_b_scale_fmt);
5698	unsigned BScale = Inst.getOperand(i: BScaleIdx).getImm();
5699	if (!isValidWMMAScaleFmtCombination(AFmt, AScale, BFmt, BScale)) {
5700	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyMatrixAFMT, Operands),
5701	Msg: "invalid matrix and scale format combination");
5702	return false;
5703	}
5704
5705	return true;
5706	}
5707
5708	bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5709	const OperandVector &Operands) {
5710	if (!validateLdsDirect(Inst, Operands))
5711	return false;
5712	if (!validateTrue16OpSel(Inst)) {
5713	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands),
5714	Msg: "op_sel operand conflicts with 16-bit operand suffix");
5715	return false;
5716	}
5717	if (!validateSOPLiteral(Inst, Operands))
5718	return false;
5719	if (!validateVOPLiteral(Inst, Operands)) {
5720	return false;
5721	}
5722	if (!validateConstantBusLimitations(Inst, Operands)) {
5723	return false;
5724	}
5725	if (!validateVOPD(Inst, Operands)) {
5726	return false;
5727	}
5728	if (!validateIntClampSupported(Inst)) {
5729	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyClamp, Operands),
5730	Msg: "integer clamping is not supported on this GPU");
5731	return false;
5732	}
5733	if (!validateOpSel(Inst)) {
5734	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands),
5735	Msg: "invalid op_sel operand");
5736	return false;
5737	}
5738	if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_lo)) {
5739	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegLo, Operands),
5740	Msg: "invalid neg_lo operand");
5741	return false;
5742	}
5743	if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_hi)) {
5744	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegHi, Operands),
5745	Msg: "invalid neg_hi operand");
5746	return false;
5747	}
5748	if (!validateDPP(Inst, Operands)) {
5749	return false;
5750	}
5751	// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5752	if (!validateMIMGD16(Inst)) {
5753	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands),
5754	Msg: "d16 modifier is not supported on this GPU");
5755	return false;
5756	}
5757	if (!validateMIMGDim(Inst, Operands)) {
5758	Error(L: IDLoc, Msg: "missing dim operand");
5759	return false;
5760	}
5761	if (!validateTensorR128(Inst)) {
5762	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands),
5763	Msg: "instruction must set modifier r128=0");
5764	return false;
5765	}
5766	if (!validateMIMGMSAA(Inst)) {
5767	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDim, Operands),
5768	Msg: "invalid dim; must be MSAA type");
5769	return false;
5770	}
5771	if (!validateMIMGDataSize(Inst, IDLoc)) {
5772	return false;
5773	}
5774	if (!validateMIMGAddrSize(Inst, IDLoc))
5775	return false;
5776	if (!validateMIMGAtomicDMask(Inst)) {
5777	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands),
5778	Msg: "invalid atomic image dmask");
5779	return false;
5780	}
5781	if (!validateMIMGGatherDMask(Inst)) {
5782	Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands),
5783	Msg: "invalid image_gather dmask: only one bit must be set");
5784	return false;
5785	}
5786	if (!validateMovrels(Inst, Operands)) {
5787	return false;
5788	}
5789	if (!validateOffset(Inst, Operands)) {
5790	return false;
5791	}
5792	if (!validateMAIAccWrite(Inst, Operands)) {
5793	return false;
5794	}
5795	if (!validateMAISrc2(Inst, Operands)) {
5796	return false;
5797	}
5798	if (!validateMFMA(Inst, Operands)) {
5799	return false;
5800	}
5801	if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5802	return false;
5803	}
5804
5805	if (!validateAGPRLdSt(Inst)) {
5806	Error(L: IDLoc, Msg: getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5807	? "invalid register class: data and dst should be all VGPR or AGPR"
5808	: "invalid register class: agpr loads and stores not supported on this GPU"
5809	);
5810	return false;
5811	}
5812	if (!validateVGPRAlign(Inst)) {
5813	Error(L: IDLoc,
5814	Msg: "invalid register class: vgpr tuples must be 64 bit aligned");
5815	return false;
5816	}
5817	if (!validateDS(Inst, Operands)) {
5818	return false;
5819	}
5820
5821	if (!validateBLGP(Inst, Operands)) {
5822	return false;
5823	}
5824
5825	if (!validateDivScale(Inst)) {
5826	Error(L: IDLoc, Msg: "ABS not allowed in VOP3B instructions");
5827	return false;
5828	}
5829	if (!validateWaitCnt(Inst, Operands)) {
5830	return false;
5831	}
5832	if (!validateTFE(Inst, Operands)) {
5833	return false;
5834	}
5835	if (!validateWMMA(Inst, Operands)) {
5836	return false;
5837	}
5838
5839	return true;
5840	}
5841
5842	static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5843	const FeatureBitset &FBS,
5844	unsigned VariantID = `0`);
5845
5846	static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5847	const FeatureBitset &AvailableFeatures,
5848	unsigned VariantID);
5849
5850	bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5851	const FeatureBitset &FBS) {
5852	return isSupportedMnemo(Mnemo, FBS, Variants: getAllVariants());
5853	}
5854
5855	bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5856	const FeatureBitset &FBS,
5857	ArrayRef<unsigned> Variants) {
5858	for (auto Variant : Variants) {
5859	if (AMDGPUCheckMnemonic(Mnemonic: Mnemo, AvailableFeatures: FBS, VariantID: Variant))
5860	return true;
5861	}
5862
5863	return false;
5864	}
5865
5866	bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5867	SMLoc IDLoc) {
5868	FeatureBitset FBS = ComputeAvailableFeatures(FB: getFeatureBits());
5869
5870	// Check if requested instruction variant is supported.
5871	if (isSupportedMnemo(Mnemo, FBS, Variants: getMatchedVariants()))
5872	return false;
5873
5874	// This instruction is not supported.
5875	// Clear any other pending errors because they are no longer relevant.
5876	getParser().clearPendingErrors();
5877
5878	// Requested instruction variant is not supported.
5879	// Check if any other variants are supported.
5880	StringRef VariantName = getMatchedVariantName();
5881	if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5882	return Error(L: IDLoc,
5883	Msg: Twine (VariantName,
5884	" variant of this instruction is not supported"));
5885	}
5886
5887	// Check if this instruction may be used with a different wavesize.
5888	if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5889	!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5890	// FIXME: Use getAvailableFeatures, and do not manually recompute
5891	FeatureBitset FeaturesWS32 = getFeatureBits();
5892	FeaturesWS32.flip(I: AMDGPU::FeatureWavefrontSize64)
5893	.flip(I: AMDGPU::FeatureWavefrontSize32);
5894	FeatureBitset AvailableFeaturesWS32 =
5895	ComputeAvailableFeatures(FB: FeaturesWS32);
5896
5897	if (isSupportedMnemo(Mnemo, FBS: AvailableFeaturesWS32, Variants: getMatchedVariants()))
5898	return Error(L: IDLoc, Msg: "instruction requires wavesize=32");
5899	}
5900
5901	// Finally check if this instruction is supported on any other GPU.
5902	if (isSupportedMnemo(Mnemo, FBS: FeatureBitset ().set())) {
5903	return Error(L: IDLoc, Msg: "instruction not supported on this GPU (" +
5904	getSTI().getCPU() + ")" + ": " + Mnemo);
5905	}
5906
5907	// Instruction not supported on any GPU. Probably a typo.
5908	std::string Suggestion = AMDGPUMnemonicSpellCheck(S: Mnemo, FBS);
5909	return Error(L: IDLoc, Msg: "invalid instruction" + Suggestion);
5910	}
5911
5912	static bool isInvalidVOPDY(const OperandVector &Operands,
5913	uint64_t InvalidOprIdx) {
5914	assert(InvalidOprIdx < Operands.size());
5915	const auto &Op = ((AMDGPUOperand &)*Operands [InvalidOprIdx]);
5916	if (Op.isToken() && InvalidOprIdx > `1`) {
5917	const auto &PrevOp = ((AMDGPUOperand &)*Operands [InvalidOprIdx - `1`]);
5918	return PrevOp.isToken() && PrevOp.getToken() == "::";
5919	}
5920	return false;
5921	}
5922
5923	bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5924	OperandVector &Operands,
5925	MCStreamer &Out,
5926	uint64_t &ErrorInfo,
5927	bool MatchingInlineAsm) {
5928	MCInst Inst;
5929	Inst.setLoc(IDLoc);
5930	unsigned Result = Match_Success;
5931	for (auto Variant : getMatchedVariants()) {
5932	uint64_t EI;
5933	auto R = MatchInstructionImpl(Operands, Inst, ErrorInfo&: EI, matchingInlineAsm: MatchingInlineAsm,
5934	VariantID: Variant);
5935	// We order match statuses from least to most specific. We use most specific
5936	// status as resulting
5937	// Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5938	if (R == Match_Success \|\| R == Match_MissingFeature \|\|
5939	(R == Match_InvalidOperand && Result != Match_MissingFeature) \|\|
5940	(R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5941	Result != Match_MissingFeature)) {
5942	Result = R;
5943	ErrorInfo = EI;
5944	}
5945	if (R == Match_Success)
5946	break;
5947	}
5948
5949	if (Result == Match_Success) {
5950	if (!validateInstruction(Inst, IDLoc, Operands)) {
5951	return true;
5952	}
5953	emitTargetDirective();
5954	Out.emitInstruction(Inst, STI: getSTI());
5955	// Record for kernel prologue checking.
5956	OpcodeStream.push_back(Elt: Inst.getOpcode());
5957	return false;
5958	}
5959
5960	StringRef Mnemo = ((AMDGPUOperand &)*Operands [`0`]).getToken();
5961	if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5962	return true;
5963	}
5964
5965	switch (Result) {
5966	default: break;
5967	case Match_MissingFeature:
5968	// It has been verified that the specified instruction
5969	// mnemonic is valid. A match was found but it requires
5970	// features which are not supported on this GPU.
5971	return Error(L: IDLoc, Msg: "operands are not valid for this GPU or mode");
5972
5973	case Match_InvalidOperand: {
5974	SMLoc ErrorLoc = IDLoc;
5975	if (ErrorInfo != ~`0ULL`) {
5976	if (ErrorInfo >= Operands.size()) {
5977	return Error(L: IDLoc, Msg: "too few operands for instruction");
5978	}
5979	ErrorLoc = ((AMDGPUOperand &)*Operands [ErrorInfo]).getStartLoc();
5980	if (ErrorLoc == SMLoc ())
5981	ErrorLoc = IDLoc;
5982
5983	if (isInvalidVOPDY(Operands, InvalidOprIdx: ErrorInfo))
5984	return Error(L: ErrorLoc, Msg: "invalid VOPDY instruction");
5985	}
5986	return Error(L: ErrorLoc, Msg: "invalid operand for instruction");
5987	}
5988
5989	case Match_MnemonicFail:
5990	llvm_unreachable("Invalid instructions should have been handled already");
5991	}
5992	llvm_unreachable("Implement any new match types added!");
5993	}
5994
5995	bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5996	int64_t Tmp = -`1`;
5997	if (!isToken(Kind: AsmToken::Integer) && !isToken(Kind: AsmToken::Identifier)) {
5998	return true;
5999	}
6000	if (getParser().parseAbsoluteExpression(Res&: Tmp)) {
6001	return true;
6002	}
6003	Ret = static_cast<uint32_t>(Tmp);
6004	return false;
6005	}
6006
6007	bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
6008	if (!getSTI().getTargetTriple().isAMDGCN())
6009	return TokError(Msg: "directive only supported for amdgcn architecture");
6010
6011	std::string TargetIDDirective;
6012	SMLoc TargetStart = getTok().getLoc();
6013	if (getParser().parseEscapedString(Data&: TargetIDDirective))
6014	return true;
6015
6016	std::optional<AMDGPU::TargetID> MaybeParsed =
6017	AMDGPU::TargetID::parseTargetIDString(TargetIDDirective);
6018	if (!MaybeParsed)
6019	return getParser().Error(L: TargetStart,
6020	Msg: "malformed target id '" + TargetIDDirective + "'");
6021
6022	const AMDGPU::TargetID &ParsedTargetID = *MaybeParsed;
6023	const Triple &TT = getSTI().getTargetTriple();
6024
6025	// The processor named in the target id must be covered by the triple's
6026	// subarch.
6027	if (!AMDGPU::isCPUValidForSubArch(SubArch: TT.getSubArch(),
6028	AK: ParsedTargetID.getGPUKind())) {
6029	return getParser().Error(
6030	L: TargetStart, Msg: "target id '" + TargetIDDirective +
6031	"' specifies a processor that is not valid for "
6032	"subarch '" +
6033	TT.getArchName() + "'");
6034	}
6035
6036	const std::optional<AMDGPU::TargetID> &CurrentTargetID =
6037	getTargetStreamer().getTargetID();
6038
6039	Triple DirectiveTriple(ParsedTargetID.getTargetTripleString());
6040	const Triple &STITriple = getSTI().getTargetTriple();
6041	if (!DirectiveTriple.isCompatibleWith(Other: STITriple)) {
6042	return getParser().Error(
6043	L: TargetStart, Msg: ".amdgcn_target " + Twine (ParsedTargetID.toString()) +
6044	" is incompatible with " +
6045	Twine (CurrentTargetID ->toString()));
6046	}
6047
6048	// Error if the ISA version doesn't match
6049	StringRef DirectiveProcessor =
6050	AMDGPU::getArchNameAMDGCN(AK: ParsedTargetID.getGPUKind());
6051	AMDGPU::IsaVersion DirectiveISA = AMDGPU::getIsaVersion(GPU: DirectiveProcessor);
6052	AMDGPU::IsaVersion CurrentISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
6053	if (DirectiveISA != CurrentISA) {
6054	return getParser().Error(L: TargetStart,
6055	Msg: ".amdgcn_target directive processor " +
6056	Twine (DirectiveProcessor) +
6057	" does not match the specified processor " +
6058	Twine (getSTI().getCPU()));
6059	}
6060
6061	// Warn if sramecc or xnack mismatch. These do not change the encoding.
6062	if (AMDGPU::IsaInfo::targetIDSettingsConflict(
6063	Lhs: ParsedTargetID.getXnackSetting(),
6064	Rhs: CurrentTargetID ->getXnackSetting())) {
6065	Warning(L: TargetStart,
6066	Msg: ".amdgcn_target directive has conflicting xnack settings");
6067	}
6068	if (AMDGPU::IsaInfo::targetIDSettingsConflict(
6069	Lhs: ParsedTargetID.getSramEccSetting(),
6070	Rhs: CurrentTargetID ->getSramEccSetting())) {
6071	Warning(L: TargetStart,
6072	Msg: ".amdgcn_target directive has conflicting sramecc settings");
6073	}
6074
6075	// Update the target streamer's TargetID with settings from the directive.
6076	// We don't update the MCSubtargetInfo because we've already validated
6077	// that the directive matches the command-line CPU.
6078	getTargetStreamer().getTargetID()->setXnackSetting(
6079	ParsedTargetID.getXnackSetting());
6080	getTargetStreamer().getTargetID()->setSramEccSetting(
6081	ParsedTargetID.getSramEccSetting());
6082
6083	return false;
6084	}
6085
6086	bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
6087	return Error(L: Range.Start, Msg: "value out of range", Range);
6088	}
6089
6090	bool AMDGPUAsmParser::calculateGPRBlocks(
6091	const FeatureBitset &Features, const MCExpr *VCCUsed,
6092	const MCExpr FlatScrUsed, bool* XNACKUsed,
6093	std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
6094	SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
6095	const MCExpr &VGPRBlocks, const* MCExpr *&SGPRBlocks) {
6096	// TODO(scott.linder): These calculations are duplicated from
6097	// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
6098	IsaVersion Version = getIsaVersion(GPU: getSTI().getCPU());
6099	MCContext &Ctx = getContext();
6100
6101	const MCExpr *NumSGPRs = NextFreeSGPR;
6102	int64_t EvaluatedSGPRs;
6103
6104	if (Version.Major >= `10`)
6105	NumSGPRs = MCConstantExpr::create(Value: `0`, Ctx);
6106	else {
6107	unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(STI: getSTI());
6108
6109	if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) && Version.Major >= `8` &&
6110	!Features.test(I: FeatureSGPRInitBug) &&
6111	static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6112	return OutOfRangeError(Range: SGPRRange);
6113
6114	const MCExpr *ExtraSGPRs =
6115	AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
6116	NumSGPRs = MCBinaryExpr::createAdd(LHS: NumSGPRs, RHS: ExtraSGPRs, Ctx);
6117
6118	if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) &&
6119	(Version.Major <= `7` \|\| Features.test(I: FeatureSGPRInitBug)) &&
6120	static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
6121	return OutOfRangeError(Range: SGPRRange);
6122
6123	if (Features.test(I: FeatureSGPRInitBug))
6124	NumSGPRs =
6125	MCConstantExpr::create(Value: IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);
6126	}
6127
6128	// The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
6129	// (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
6130	auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
6131	unsigned Granule) -> const MCExpr * {
6132	const MCExpr *OneConst = MCConstantExpr::create(Value: `1ul`, Ctx);
6133	const MCExpr *GranuleConst = MCConstantExpr::create(Value: Granule, Ctx);
6134	const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax(Args: {NumGPR, OneConst}, Ctx);
6135	const MCExpr *AlignToGPR =
6136	AMDGPUMCExpr::createAlignTo(Value: MaxNumGPR, Align: GranuleConst, Ctx);
6137	const MCExpr *DivGPR =
6138	MCBinaryExpr::createDiv(LHS: AlignToGPR, RHS: GranuleConst, Ctx);
6139	const MCExpr *SubGPR = MCBinaryExpr::createSub(LHS: DivGPR, RHS: OneConst, Ctx);
6140	return SubGPR;
6141	};
6142
6143	VGPRBlocks = GetNumGPRBlocks (
6144	NextFreeVGPR,
6145	IsaInfo::getVGPREncodingGranule(STI: getSTI(), EnableWavefrontSize32));
6146	SGPRBlocks =
6147	GetNumGPRBlocks (NumSGPRs, IsaInfo::getSGPREncodingGranule(STI: getSTI()));
6148
6149	return false;
6150	}
6151
6152	bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
6153	if (!getSTI().getTargetTriple().isAMDGCN())
6154	return TokError(Msg: "directive only supported for amdgcn architecture");
6155
6156	if (!isHsaAbi(STI: getSTI()))
6157	return TokError(Msg: "directive only supported for amdhsa OS");
6158
6159	StringRef KernelName;
6160	if (getParser().parseIdentifier(Res&: KernelName))
6161	return true;
6162
6163	// Remember the kernel name so its prologue can be checked at end of file.
6164	// The matching label may have been parsed already or may follow later.
6165	AMDHSAKernelSymbols.insert(Ptr: getContext().getOrCreateSymbol(Name: KernelName));
6166
6167	AMDGPU::MCKernelDescriptor KD =
6168	AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(
6169	STI: &getSTI(), Ctx&: getContext());
6170
6171	StringSet<> Seen;
6172
6173	IsaVersion IVersion = getIsaVersion(GPU: getSTI().getCPU());
6174
6175	const MCExpr *ZeroExpr = MCConstantExpr::create(Value: `0`, Ctx&: getContext());
6176	const MCExpr *OneExpr = MCConstantExpr::create(Value: `1`, Ctx&: getContext());
6177
6178	SMRange VGPRRange;
6179	const MCExpr *NextFreeVGPR = ZeroExpr;
6180	const MCExpr *AccumOffset = MCConstantExpr::create(Value: `0`, Ctx&: getContext());
6181	const MCExpr *NamedBarCnt = ZeroExpr;
6182	uint64_t SharedVGPRCount = `0`;
6183	uint64_t PreloadLength = `0`;
6184	uint64_t PreloadOffset = `0`;
6185	SMRange SGPRRange;
6186	const MCExpr *NextFreeSGPR = ZeroExpr;
6187
6188	// Count the number of user SGPRs implied from the enabled feature bits.
6189	unsigned ImpliedUserSGPRCount = `0`;
6190
6191	// Track if the asm explicitly contains the directive for the user SGPR
6192	// count.
6193	std::optional<unsigned> ExplicitUserSGPRCount;
6194	const MCExpr *ReserveVCC = OneExpr;
6195	const MCExpr *ReserveFlatScr = OneExpr;
6196	std::optional<bool> EnableWavefrontSize32;
6197
6198	while (true) {
6199	while (trySkipToken(Kind: AsmToken::EndOfStatement));
6200
6201	StringRef ID;
6202	SMRange IDRange = getTok().getLocRange();
6203	if (!parseId(Val&: ID, ErrMsg: "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6204	return true;
6205
6206	if (ID == ".end_amdhsa_kernel")
6207	break;
6208
6209	if (!Seen.insert(key: ID).second)
6210	return TokError(Msg: ".amdhsa_ directives cannot be repeated");
6211
6212	SMLoc ValStart = getLoc();
6213	const MCExpr *ExprVal;
6214	if (getParser().parseExpression(Res&: ExprVal))
6215	return true;
6216	SMLoc ValEnd = getLoc();
6217	SMRange ValRange = SMRange (ValStart, ValEnd);
6218
6219	int64_t IVal = `0`;
6220	uint64_t Val = IVal;
6221	bool EvaluatableExpr;
6222	if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(Res&: IVal))) {
6223	if (IVal < `0`)
6224	return OutOfRangeError(Range: ValRange);
6225	Val = IVal;
6226	}
6227
6228	#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6229	if (!isUInt<ENTRY##_WIDTH>(Val)) \
6230	return OutOfRangeError(RANGE); \
6231	AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6232	getContext());
6233
6234	// Some fields use the parsed value immediately which requires the expression to
6235	// be solvable.
6236	#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6237	if (!(RESOLVED)) \
6238	return Error(IDRange.Start, "directive should have resolvable expression", \
6239	IDRange);
6240
6241	if (ID == ".amdhsa_group_segment_fixed_size") {
6242	if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) *
6243	CHAR_BIT>(x: Val))
6244	return OutOfRangeError(Range: ValRange);
6245	KD.group_segment_fixed_size = ExprVal;
6246	} else if (ID == ".amdhsa_private_segment_fixed_size") {
6247	if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) *
6248	CHAR_BIT>(x: Val))
6249	return OutOfRangeError(Range: ValRange);
6250	KD.private_segment_fixed_size = ExprVal;
6251	} else if (ID == ".amdhsa_kernarg_size") {
6252	if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(x: Val))
6253	return OutOfRangeError(Range: ValRange);
6254	KD.kernarg_size = ExprVal;
6255	} else if (ID == ".amdhsa_user_sgpr_count") {
6256	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6257	ExplicitUserSGPRCount = Val;
6258	} else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6259	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6260	if (hasArchitectedFlatScratch())
6261	return Error(L: IDRange.Start,
6262	Msg: "directive is not supported with architected flat scratch",
6263	Range: IDRange);
6264	PARSE_BITS_ENTRY(KD.kernel_code_properties,
6265	KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6266	ExprVal, ValRange);
6267	if (Val)
6268	ImpliedUserSGPRCount += `4`;
6269	} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6270	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6271	if (!hasKernargPreload())
6272	return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
6273
6274	if (Val > getMaxNumUserSGPRs())
6275	return OutOfRangeError(Range: ValRange);
6276	PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6277	ValRange);
6278	if (Val) {
6279	ImpliedUserSGPRCount += Val;
6280	PreloadLength = Val;
6281	}
6282	} else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6283	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6284	if (!hasKernargPreload())
6285	return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
6286
6287	if (Val >= `1024`)
6288	return OutOfRangeError(Range: ValRange);
6289	PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6290	ValRange);
6291	if (Val)
6292	PreloadOffset = Val;
6293	} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6294	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6295	PARSE_BITS_ENTRY(KD.kernel_code_properties,
6296	KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6297	ValRange);
6298	if (Val)
6299	ImpliedUserSGPRCount += `2`;
6300	} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6301	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6302	PARSE_BITS_ENTRY(KD.kernel_code_properties,
6303	KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6304	ValRange);
6305	if (Val)
6306	ImpliedUserSGPRCount += `2`;
6307	} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6308	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6309	PARSE_BITS_ENTRY(KD.kernel_code_properties,
6310	KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6311	ExprVal, ValRange);
6312	if (Val)
6313	ImpliedUserSGPRCount += `2`;
6314	} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6315	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6316	PARSE_BITS_ENTRY(KD.kernel_code_properties,
6317	KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6318	ValRange);
6319	if (Val)
6320	ImpliedUserSGPRCount += `2`;
6321	} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6322	if (hasArchitectedFlatScratch())
6323	return Error(L: IDRange.Start,
6324	Msg: "directive is not supported with architected flat scratch",
6325	Range: IDRange);
6326	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6327	PARSE_BITS_ENTRY(KD.kernel_code_properties,
6328	KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6329	ExprVal, ValRange);
6330	if (Val)
6331	ImpliedUserSGPRCount += `2`;
6332	} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6333	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6334	PARSE_BITS_ENTRY(KD.kernel_code_properties,
6335	KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6336	ExprVal, ValRange);
6337	if (Val)
6338	ImpliedUserSGPRCount += `1`;
6339	} else if (ID == ".amdhsa_wavefront_size32") {
6340	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6341	if (IVersion.Major < `10`)
6342	return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
6343	EnableWavefrontSize32 = Val;
6344	PARSE_BITS_ENTRY(KD.kernel_code_properties,
6345	KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6346	ValRange);
6347	} else if (ID == ".amdhsa_uses_dynamic_stack") {
6348	PARSE_BITS_ENTRY(KD.kernel_code_properties,
6349	KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6350	ValRange);
6351	} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6352	if (hasArchitectedFlatScratch())
6353	return Error(L: IDRange.Start,
6354	Msg: "directive is not supported with architected flat scratch",
6355	Range: IDRange);
6356	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6357	COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6358	ValRange);
6359	} else if (ID == ".amdhsa_enable_private_segment") {
6360	if (!hasArchitectedFlatScratch())
6361	return Error(
6362	L: IDRange.Start,
6363	Msg: "directive is not supported without architected flat scratch",
6364	Range: IDRange);
6365	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6366	COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6367	ValRange);
6368	} else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6369	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6370	COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6371	ValRange);
6372	} else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6373	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6374	COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6375	ValRange);
6376	} else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6377	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6378	COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6379	ValRange);
6380	} else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6381	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6382	COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6383	ValRange);
6384	} else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6385	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6386	COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6387	ValRange);
6388	} else if (ID == ".amdhsa_next_free_vgpr") {
6389	VGPRRange = ValRange;
6390	NextFreeVGPR = ExprVal;
6391	} else if (ID == ".amdhsa_next_free_sgpr") {
6392	SGPRRange = ValRange;
6393	NextFreeSGPR = ExprVal;
6394	} else if (ID == ".amdhsa_accum_offset") {
6395	if (!isGFX90A())
6396	return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
6397	AccumOffset = ExprVal;
6398	} else if (ID == ".amdhsa_named_barrier_count") {
6399	if (!isGFX1250Plus())
6400	return Error(L: IDRange.Start, Msg: "directive requires gfx1250+", Range: IDRange);
6401	NamedBarCnt = ExprVal;
6402	} else if (ID == ".amdhsa_reserve_vcc") {
6403	if (EvaluatableExpr && !isUInt<`1`>(x: Val))
6404	return OutOfRangeError(Range: ValRange);
6405	ReserveVCC = ExprVal;
6406	} else if (ID == ".amdhsa_reserve_flat_scratch") {
6407	if (IVersion.Major < `7`)
6408	return Error(L: IDRange.Start, Msg: "directive requires gfx7+", Range: IDRange);
6409	if (hasArchitectedFlatScratch())
6410	return Error(L: IDRange.Start,
6411	Msg: "directive is not supported with architected flat scratch",
6412	Range: IDRange);
6413	if (EvaluatableExpr && !isUInt<`1`>(x: Val))
6414	return OutOfRangeError(Range: ValRange);
6415	ReserveFlatScr = ExprVal;
6416	} else if (ID == ".amdhsa_reserve_xnack_mask") {
6417	if (IVersion.Major < `8`)
6418	return Error(L: IDRange.Start, Msg: "directive requires gfx8+", Range: IDRange);
6419	if (!isUInt<`1`>(x: Val))
6420	return OutOfRangeError(Range: ValRange);
6421	bool XnackOn = getTargetStreamer().getTargetID()->isXnackOnOrAny() \|\|
6422	getSTI().hasFeature(Feature: AMDGPU::FeatureXNACK);
6423	if (Val != XnackOn) {
6424	return getParser().Error(
6425	L: IDRange.Start,
6426	Msg: ".amdhsa_reserve_xnack_mask does not match target id", Range: IDRange);
6427	}
6428	} else if (ID == ".amdhsa_float_round_mode_32") {
6429	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6430	COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6431	ValRange);
6432	} else if (ID == ".amdhsa_float_round_mode_16_64") {
6433	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6434	COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6435	ValRange);
6436	} else if (ID == ".amdhsa_float_denorm_mode_32") {
6437	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6438	COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6439	ValRange);
6440	} else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6441	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6442	COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6443	ValRange);
6444	} else if (ID == ".amdhsa_dx10_clamp") {
6445	if (!getSTI().hasFeature(Feature: AMDGPU::FeatureDX10ClampAndIEEEMode))
6446	return Error(L: IDRange.Start, Msg: "directive unsupported on gfx1170+",
6447	Range: IDRange);
6448	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6449	COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6450	ValRange);
6451	} else if (ID == ".amdhsa_ieee_mode") {
6452	if (!getSTI().hasFeature(Feature: AMDGPU::FeatureDX10ClampAndIEEEMode))
6453	return Error(L: IDRange.Start, Msg: "directive unsupported on gfx1170+",
6454	Range: IDRange);
6455	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6456	COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6457	ValRange);
6458	} else if (ID == ".amdhsa_fp16_overflow") {
6459	if (IVersion.Major < `9`)
6460	return Error(L: IDRange.Start, Msg: "directive requires gfx9+", Range: IDRange);
6461	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6462	COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6463	ValRange);
6464	} else if (ID == ".amdhsa_tg_split") {
6465	if (!isGFX90A())
6466	return Error(L: IDRange.Start, Msg: "directive requires gfx90a+", Range: IDRange);
6467	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6468	ExprVal, ValRange);
6469	} else if (ID == ".amdhsa_workgroup_processor_mode") {
6470	if (!supportsWGP(STI: getSTI()))
6471	return Error(L: IDRange.Start,
6472	Msg: "directive unsupported on " + getSTI().getCPU(), Range: IDRange);
6473	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6474	COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6475	ValRange);
6476	} else if (ID == ".amdhsa_memory_ordered") {
6477	if (IVersion.Major < `10`)
6478	return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
6479	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6480	COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6481	ValRange);
6482	} else if (ID == ".amdhsa_forward_progress") {
6483	if (IVersion.Major < `10`)
6484	return Error(L: IDRange.Start, Msg: "directive requires gfx10+", Range: IDRange);
6485	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6486	COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6487	ValRange);
6488	} else if (ID == ".amdhsa_shared_vgpr_count") {
6489	EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6490	if (IVersion.Major < `10` \|\| IVersion.Major >= `12`)
6491	return Error(L: IDRange.Start, Msg: "directive requires gfx10 or gfx11",
6492	Range: IDRange);
6493	SharedVGPRCount = Val;
6494	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6495	COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6496	ValRange);
6497	} else if (ID == ".amdhsa_inst_pref_size") {
6498	if (IVersion.Major < `11`)
6499	return Error(L: IDRange.Start, Msg: "directive requires gfx11+", Range: IDRange);
6500	if (IVersion.Major == `11`) {
6501	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6502	COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6503	ValRange);
6504	} else {
6505	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
6506	COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6507	ValRange);
6508	}
6509	} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6510	PARSE_BITS_ENTRY(
6511	KD.compute_pgm_rsrc2,
6512	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6513	ExprVal, ValRange);
6514	} else if (ID == ".amdhsa_exception_fp_denorm_src") {
6515	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6516	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6517	ExprVal, ValRange);
6518	} else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6519	PARSE_BITS_ENTRY(
6520	KD.compute_pgm_rsrc2,
6521	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6522	ExprVal, ValRange);
6523	} else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6524	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6525	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6526	ExprVal, ValRange);
6527	} else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6528	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6529	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6530	ExprVal, ValRange);
6531	} else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6532	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6533	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6534	ExprVal, ValRange);
6535	} else if (ID == ".amdhsa_exception_int_div_zero") {
6536	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
6537	COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6538	ExprVal, ValRange);
6539	} else if (ID == ".amdhsa_round_robin_scheduling") {
6540	if (IVersion.Major < `12`)
6541	return Error(L: IDRange.Start, Msg: "directive requires gfx12+", Range: IDRange);
6542	PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
6543	COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6544	ValRange);
6545	} else {
6546	return Error(L: IDRange.Start, Msg: "unknown .amdhsa_kernel directive", Range: IDRange);
6547	}
6548
6549	#undef PARSE_BITS_ENTRY
6550	}
6551
6552	if (!Seen.contains(key: ".amdhsa_next_free_vgpr"))
6553	return TokError(Msg: ".amdhsa_next_free_vgpr directive is required");
6554
6555	if (!Seen.contains(key: ".amdhsa_next_free_sgpr"))
6556	return TokError(Msg: ".amdhsa_next_free_sgpr directive is required");
6557
6558	unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(u&: ImpliedUserSGPRCount);
6559	if (UserSGPRCount > getMaxNumUserSGPRs())
6560	return TokError(Msg: "too many user SGPRs enabled, found " +
6561	Twine (UserSGPRCount) + ", but only " +
6562	Twine (getMaxNumUserSGPRs()) + " are supported.");
6563
6564	// Consider the case where the total number of UserSGPRs with trailing
6565	// allocated preload SGPRs, is greater than the number of explicitly
6566	// referenced SGPRs.
6567	if (PreloadLength) {
6568	MCContext &Ctx = getContext();
6569	NextFreeSGPR = AMDGPUMCExpr::createMax(
6570	Args: {NextFreeSGPR, MCConstantExpr::create(Value: UserSGPRCount, Ctx)}, Ctx);
6571	}
6572
6573	const MCExpr *VGPRBlocks;
6574	const MCExpr *SGPRBlocks;
6575	if (calculateGPRBlocks(Features: getFeatureBits(), VCCUsed: ReserveVCC, FlatScrUsed: ReserveFlatScr,
6576	XNACKUsed: getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6577	EnableWavefrontSize32, NextFreeVGPR,
6578	VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6579	SGPRBlocks))
6580	return true;
6581
6582	int64_t EvaluatedVGPRBlocks;
6583	bool VGPRBlocksEvaluatable =
6584	VGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedVGPRBlocks);
6585	if (VGPRBlocksEvaluatable &&
6586	!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
6587	x: static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6588	return OutOfRangeError(Range: VGPRRange);
6589	}
6590	AMDGPU::MCKernelDescriptor::bits_set(
6591	Dst&: KD.compute_pgm_rsrc1, Value: VGPRBlocks,
6592	Shift: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6593	Mask: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, Ctx&: getContext());
6594
6595	int64_t EvaluatedSGPRBlocks;
6596	if (SGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedSGPRBlocks) &&
6597	!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
6598	x: static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6599	return OutOfRangeError(Range: SGPRRange);
6600	AMDGPU::MCKernelDescriptor::bits_set(
6601	Dst&: KD.compute_pgm_rsrc1, Value: SGPRBlocks,
6602	Shift: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6603	Mask: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, Ctx&: getContext());
6604
6605	if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6606	return TokError(Msg: "amdgpu_user_sgpr_count smaller than implied by "
6607	"enabled user SGPRs");
6608
6609	if (isGFX1250Plus()) {
6610	AMDGPU::MCKernelDescriptor::bits_set(
6611	Dst&: KD.compute_pgm_rsrc2,
6612	Value: MCConstantExpr::create(Value: UserSGPRCount, Ctx&: getContext()),
6613	Shift: COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6614	Mask: COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, Ctx&: getContext());
6615	} else {
6616	AMDGPU::MCKernelDescriptor::bits_set(
6617	Dst&: KD.compute_pgm_rsrc2,
6618	Value: MCConstantExpr::create(Value: UserSGPRCount, Ctx&: getContext()),
6619	Shift: COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6620	Mask: COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, Ctx&: getContext());
6621	}
6622
6623	int64_t IVal = `0`;
6624	if (!KD.kernarg_size->evaluateAsAbsolute(Res&: IVal))
6625	return TokError(Msg: "Kernarg size should be resolvable");
6626	uint64_t kernarg_size = IVal;
6627	if (PreloadLength && kernarg_size &&
6628	(PreloadLength * `4` + PreloadOffset * `4` > kernarg_size))
6629	return TokError(Msg: "Kernarg preload length + offset is larger than the "
6630	"kernarg segment size");
6631
6632	if (isGFX90A()) {
6633	if (!Seen.contains(key: ".amdhsa_accum_offset"))
6634	return TokError(Msg: ".amdhsa_accum_offset directive is required");
6635	int64_t EvaluatedAccum;
6636	bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(Res&: EvaluatedAccum);
6637	uint64_t UEvaluatedAccum = EvaluatedAccum;
6638	if (AccumEvaluatable &&
6639	(UEvaluatedAccum < `4` \|\| UEvaluatedAccum > `256` \|\| (UEvaluatedAccum & `3`)))
6640	return TokError(Msg: "accum_offset should be in range [4..256] in "
6641	"increments of 4");
6642
6643	int64_t EvaluatedNumVGPR;
6644	if (NextFreeVGPR->evaluateAsAbsolute(Res&: EvaluatedNumVGPR) &&
6645	AccumEvaluatable &&
6646	UEvaluatedAccum >
6647	alignTo(Value: std::max(a: (uint64_t)`1`, b: (uint64_t)EvaluatedNumVGPR), Align: `4`))
6648	return TokError(Msg: "accum_offset exceeds total VGPR allocation");
6649	const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6650	LHS: MCBinaryExpr::createDiv(
6651	LHS: AccumOffset, RHS: MCConstantExpr::create(Value: `4`, Ctx&: getContext()), Ctx&: getContext()),
6652	RHS: MCConstantExpr::create(Value: `1`, Ctx&: getContext()), Ctx&: getContext());
6653	MCKernelDescriptor::bits_set(Dst&: KD.compute_pgm_rsrc3, Value: AdjustedAccum,
6654	Shift: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6655	Mask: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6656	Ctx&: getContext());
6657	}
6658
6659	if (isGFX1250Plus())
6660	MCKernelDescriptor::bits_set(Dst&: KD.compute_pgm_rsrc3, Value: NamedBarCnt,
6661	Shift: COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6662	Mask: COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6663	Ctx&: getContext());
6664
6665	if (IVersion.Major >= `10` && IVersion.Major < `12`) {
6666	// SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6667	if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6668	return TokError(Msg: "shared_vgpr_count directive not valid on "
6669	"wavefront size 32");
6670	}
6671
6672	if (VGPRBlocksEvaluatable &&
6673	(SharedVGPRCount * `2` + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6674	`63`)) {
6675	return TokError(Msg: "shared_vgpr_count*2 + "
6676	"compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6677	"exceed 63\n");
6678	}
6679	}
6680
6681	emitTargetDirective();
6682	getTargetStreamer().EmitAmdhsaKernelDescriptor(STI: getSTI(), KernelName, KernelDescriptor: KD,
6683	NextVGPR: NextFreeVGPR, NextSGPR: NextFreeSGPR,
6684	ReserveVCC, ReserveFlatScr);
6685	return false;
6686	}
6687
6688	bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6689	uint32_t Version;
6690	if (ParseAsAbsoluteExpression(Ret&: Version))
6691	return true;
6692
6693	getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(COV: Version);
6694	emitTargetDirective();
6695	return false;
6696	}
6697
6698	bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6699	AMDGPUMCKernelCodeT &C) {
6700	// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6701	// assembly for backwards compatibility.
6702	if (ID == "max_scratch_backing_memory_byte_size") {
6703	Parser.eatToEndOfStatement();
6704	return false;
6705	}
6706
6707	SmallString<`40`> ErrStr;
6708	raw_svector_ostream Err(ErrStr);
6709	if (!C.ParseKernelCodeT(ID, MCParser&: getParser(), Err)) {
6710	return TokError(Msg: Err.str());
6711	}
6712	Lex();
6713
6714	if (ID == "enable_wavefront_size32") {
6715	if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6716	if (!isGFX10Plus())
6717	return TokError(Msg: "enable_wavefront_size32=1 is only allowed on GFX10+");
6718	if (!isWave32())
6719	return TokError(Msg: "enable_wavefront_size32=1 requires +WavefrontSize32");
6720	} else {
6721	if (!isWave64())
6722	return TokError(Msg: "enable_wavefront_size32=0 requires +WavefrontSize64");
6723	}
6724	}
6725
6726	if (ID == "wavefront_size") {
6727	if (C.wavefront_size == `5`) {
6728	if (!isGFX10Plus())
6729	return TokError(Msg: "wavefront_size=5 is only allowed on GFX10+");
6730	if (!isWave32())
6731	return TokError(Msg: "wavefront_size=5 requires +WavefrontSize32");
6732	} else if (C.wavefront_size == `6`) {
6733	if (!isWave64())
6734	return TokError(Msg: "wavefront_size=6 requires +WavefrontSize64");
6735	}
6736	}
6737
6738	return false;
6739	}
6740
6741	bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6742	AMDGPUMCKernelCodeT KernelCode;
6743	KernelCode.initDefault(STI: getSTI(), Ctx&: getContext());
6744
6745	while (true) {
6746	// Lex EndOfStatement. This is in a while loop, because lexing a comment
6747	// will set the current token to EndOfStatement.
6748	while(trySkipToken(Kind: AsmToken::EndOfStatement));
6749
6750	StringRef ID;
6751	if (!parseId(Val&: ID, ErrMsg: "expected value identifier or .end_amd_kernel_code_t"))
6752	return true;
6753
6754	if (ID == ".end_amd_kernel_code_t")
6755	break;
6756
6757	if (ParseAMDKernelCodeTValue(ID, C&: KernelCode))
6758	return true;
6759	}
6760
6761	KernelCode.validate(STI: &getSTI(), Ctx&: getContext());
6762	getTargetStreamer().EmitAMDKernelCodeT(Header&: KernelCode);
6763
6764	return false;
6765	}
6766
6767	bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6768	StringRef KernelName;
6769	if (!parseId(Val&: KernelName, ErrMsg: "expected symbol name"))
6770	return true;
6771
6772	getTargetStreamer().EmitAMDGPUSymbolType(SymbolName: KernelName,
6773	Type: ELF::STT_AMDGPU_HSA_KERNEL);
6774
6775	KernelScope.initialize(Context&: getContext());
6776	return false;
6777	}
6778
6779	bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6780	if (!getSTI().getTargetTriple().isAMDGCN()) {
6781	return Error(L: getLoc(),
6782	Msg: ".amd_amdgpu_isa directive is not available on non-amdgcn "
6783	"architectures");
6784	}
6785
6786	StringRef TargetIDDirective = getLexer().getTok().getStringContents();
6787
6788	std::optional<AMDGPU::TargetID> MaybeParsed =
6789	AMDGPU::TargetID::parseTargetIDString(TargetIDDirective);
6790	if (!MaybeParsed)
6791	return Error(L: getParser().getTok().getLoc(),
6792	Msg: "malformed target id '" + TargetIDDirective + "'");
6793
6794	const AMDGPU::TargetID &ParsedTargetID = *MaybeParsed;
6795	const Triple &TT = getSTI().getTargetTriple();
6796
6797	// The processor named in the target id must be covered by the triple's
6798	// subarch.
6799	if (!AMDGPU::isCPUValidForSubArch(SubArch: TT.getSubArch(),
6800	AK: ParsedTargetID.getGPUKind())) {
6801	return Error(L: getParser().getTok().getLoc(),
6802	Msg: "target id '" + TargetIDDirective +
6803	"' specifies a processor that is not valid for subarch '" +
6804	TT.getArchName() + "'");
6805	}
6806
6807	const std::optional<AMDGPU::TargetID> &CurrentTargetID =
6808	getTargetStreamer().getTargetID();
6809
6810	Triple DirectiveTriple(ParsedTargetID.getTargetTripleString());
6811	const Triple &STITriple = getSTI().getTargetTriple();
6812	if (!DirectiveTriple.isCompatibleWith(Other: STITriple)) {
6813	return Error(L: getParser().getTok().getLoc(),
6814	Msg: ".amd_amdgpu_isa " + Twine (ParsedTargetID.toString()) +
6815	" is incompatible with " +
6816	Twine (CurrentTargetID ->toString()));
6817	}
6818
6819	// Error if the ISA version doesn't match
6820	StringRef DirectiveProcessor =
6821	AMDGPU::getArchNameAMDGCN(AK: ParsedTargetID.getGPUKind());
6822	AMDGPU::IsaVersion DirectiveISA = AMDGPU::getIsaVersion(GPU: DirectiveProcessor);
6823	AMDGPU::IsaVersion CurrentISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
6824	if (DirectiveISA != CurrentISA) {
6825	return Error(L: getParser().getTok().getLoc(),
6826	Msg: ".amd_amdgpu_isa directive processor " +
6827	Twine (DirectiveProcessor) +
6828	" does not match the specified processor " +
6829	Twine (getSTI().getCPU()));
6830	}
6831
6832	getTargetStreamer().EmitISAVersion();
6833	Lex();
6834
6835	return false;
6836	}
6837
6838	bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6839	assert(isHsaAbi(getSTI()));
6840
6841	std::string HSAMetadataString;
6842	if (ParseToEndDirective(AssemblerDirectiveBegin: HSAMD::V3::AssemblerDirectiveBegin,
6843	AssemblerDirectiveEnd: HSAMD::V3::AssemblerDirectiveEnd, CollectString&: HSAMetadataString))
6844	return true;
6845
6846	if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6847	return Error(L: getLoc(), Msg: "invalid HSA metadata");
6848
6849	return false;
6850	}
6851
6852	/// Common code to parse out a block of text (typically YAML) between start and
6853	/// end directives.
6854	bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6855	const char *AssemblerDirectiveEnd,
6856	std::string &CollectString) {
6857
6858	raw_string_ostream CollectStream(CollectString);
6859
6860	getLexer().setSkipSpace(false);
6861
6862	bool FoundEnd = false;
6863	while (!isToken(Kind: AsmToken::Eof)) {
6864	while (isToken(Kind: AsmToken::Space)) {
6865	CollectStream << getTokenStr();
6866	Lex();
6867	}
6868
6869	if (trySkipId(Id: AssemblerDirectiveEnd)) {
6870	FoundEnd = true;
6871	break;
6872	}
6873
6874	CollectStream << Parser.parseStringToEndOfStatement()
6875	<< getContext().getAsmInfo().getSeparatorString();
6876
6877	Parser.eatToEndOfStatement();
6878	}
6879
6880	getLexer().setSkipSpace(true);
6881
6882	if (isToken(Kind: AsmToken::Eof) && !FoundEnd) {
6883	return TokError(Msg: Twine ("expected directive ") +
6884	Twine (AssemblerDirectiveEnd) + Twine (" not found"));
6885	}
6886
6887	return false;
6888	}
6889
6890	/// Parse the assembler directive for new MsgPack-format PAL metadata.
6891	bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6892	std::string String;
6893	if (ParseToEndDirective(AssemblerDirectiveBegin: AMDGPU::PALMD::AssemblerDirectiveBegin,
6894	AssemblerDirectiveEnd: AMDGPU::PALMD::AssemblerDirectiveEnd, CollectString&: String))
6895	return true;
6896
6897	auto *PALMetadata = getTargetStreamer().getPALMetadata();
6898	if (!PALMetadata->setFromString(String))
6899	return Error(L: getLoc(), Msg: "invalid PAL metadata");
6900	return false;
6901	}
6902
6903	/// Parse the assembler directive for old linear-format PAL metadata.
6904	bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6905	if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6906	return Error(L: getLoc(),
6907	Msg: (Twine (PALMD::AssemblerDirective) + Twine (" directive is "
6908	"not available on non-amdpal OSes")).str());
6909	}
6910
6911	auto *PALMetadata = getTargetStreamer().getPALMetadata();
6912	PALMetadata->setLegacy();
6913	for (;;) {
6914	uint32_t Key, Value;
6915	if (ParseAsAbsoluteExpression(Ret&: Key)) {
6916	return TokError(Msg: Twine ("invalid value in ") +
6917	Twine (PALMD::AssemblerDirective));
6918	}
6919	if (!trySkipToken(Kind: AsmToken::Comma)) {
6920	return TokError(Msg: Twine ("expected an even number of values in ") +
6921	Twine (PALMD::AssemblerDirective));
6922	}
6923	if (ParseAsAbsoluteExpression(Ret&: Value)) {
6924	return TokError(Msg: Twine ("invalid value in ") +
6925	Twine (PALMD::AssemblerDirective));
6926	}
6927	PALMetadata->setRegister(Reg: Key, Val: Value);
6928	if (!trySkipToken(Kind: AsmToken::Comma))
6929	break;
6930	}
6931	return false;
6932	}
6933
6934	/// ParseDirectiveAMDGPULDS
6935	/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6936	bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6937	if (getParser().checkForValidSection())
6938	return true;
6939
6940	StringRef Name;
6941	SMLoc NameLoc = getLoc();
6942	if (getParser().parseIdentifier(Res&: Name))
6943	return TokError(Msg: "expected identifier in directive");
6944
6945	MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6946	if (getParser().parseComma())
6947	return true;
6948
6949	unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(STI: getSTI());
6950
6951	int64_t Size;
6952	SMLoc SizeLoc = getLoc();
6953	if (getParser().parseAbsoluteExpression(Res&: Size))
6954	return true;
6955	if (Size < `0`)
6956	return Error(L: SizeLoc, Msg: "size must be non-negative");
6957	if (Size > LocalMemorySize)
6958	return Error(L: SizeLoc, Msg: "size is too large");
6959
6960	int64_t Alignment = `4`;
6961	if (trySkipToken(Kind: AsmToken::Comma)) {
6962	SMLoc AlignLoc = getLoc();
6963	if (getParser().parseAbsoluteExpression(Res&: Alignment))
6964	return true;
6965	if (Alignment < `0` \|\| !isPowerOf2_64(Value: Alignment))
6966	return Error(L: AlignLoc, Msg: "alignment must be a power of two");
6967
6968	// Alignment larger than the size of LDS is possible in theory, as long
6969	// as the linker manages to place to symbol at address 0, but we do want
6970	// to make sure the alignment fits nicely into a 32-bit integer.
6971	if (Alignment >= `1u` << `31`)
6972	return Error(L: AlignLoc, Msg: "alignment is too large");
6973	}
6974
6975	if (parseEOL())
6976	return true;
6977
6978	Symbol->redefineIfPossible();
6979	if (!Symbol->isUndefined())
6980	return Error(L: NameLoc, Msg: "invalid symbol redefinition");
6981
6982	getTargetStreamer().emitAMDGPULDS(Symbol, Size, Alignment: Align (Alignment));
6983	return false;
6984	}
6985
6986	bool AMDGPUAsmParser::ParseDirectiveAMDGPUInfo() {
6987	if (getParser().checkForValidSection())
6988	return true;
6989
6990	StringRef FuncName;
6991	if (getParser().parseIdentifier(Res&: FuncName))
6992	return TokError(Msg: "expected symbol name after .amdgpu_info");
6993
6994	MCSymbol *FuncSym = getContext().getOrCreateSymbol(Name: FuncName);
6995	AMDGPU::InfoSectionData ParsedInfoData;
6996	AMDGPU::FuncInfo FI;
6997	FI.Sym = FuncSym;
6998	bool HasScalarAttrs = false;
6999
7000	while (true) {
7001	while (trySkipToken(Kind: AsmToken::EndOfStatement))
7002	;
7003
7004	StringRef ID;
7005	SMLoc IDLoc = getLoc();
7006	if (!parseId(Val&: ID, ErrMsg: "expected directive or .end_amdgpu_info"))
7007	return true;
7008
7009	if (ID == ".end_amdgpu_info")
7010	break;
7011
7012	// Every per-entry directive shares the `.amdgpu_` namespace prefix; strip
7013	// it once and dispatch on the distinguishing suffix below. The unstripped
7014	// ID is preserved for diagnostics.
7015	StringRef Dir = ID;
7016	if (!Dir.consume_front(Prefix: ".amdgpu_"))
7017	return Error(L: IDLoc, Msg: "unknown .amdgpu_info directive '" + ID + "'");
7018
7019	if (Dir == "flags") {
7020	int64_t Val;
7021	if (getParser().parseAbsoluteExpression(Res&: Val))
7022	return true;
7023	auto Flags = static_cast<AMDGPU::FuncInfoFlags>(Val);
7024	FI.UsesVCC = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_VCC);
7025	FI.UsesFlatScratch =
7026	!!(Flags & AMDGPU::FuncInfoFlags::FUNC_USES_FLAT_SCRATCH);
7027	FI.HasDynStack = !!(Flags & AMDGPU::FuncInfoFlags::FUNC_HAS_DYN_STACK);
7028	HasScalarAttrs = true;
7029	} else if (Dir == "num_sgpr") {
7030	int64_t Val;
7031	if (getParser().parseAbsoluteExpression(Res&: Val))
7032	return true;
7033	FI.NumSGPR = static_cast<uint32_t>(Val);
7034	HasScalarAttrs = true;
7035	} else if (Dir == "num_vgpr") {
7036	int64_t Val;
7037	if (getParser().parseAbsoluteExpression(Res&: Val))
7038	return true;
7039	FI.NumArchVGPR = static_cast<uint32_t>(Val);
7040	HasScalarAttrs = true;
7041	} else if (Dir == "num_agpr") {
7042	int64_t Val;
7043	if (getParser().parseAbsoluteExpression(Res&: Val))
7044	return true;
7045	FI.NumAccVGPR = static_cast<uint32_t>(Val);
7046	HasScalarAttrs = true;
7047	} else if (Dir == "private_segment_size") {
7048	int64_t Val;
7049	if (getParser().parseAbsoluteExpression(Res&: Val))
7050	return true;
7051	FI.PrivateSegmentSize = static_cast<uint32_t>(Val);
7052	HasScalarAttrs = true;
7053	} else if (Dir == "use") {
7054	StringRef ResName;
7055	if (getParser().parseIdentifier(Res&: ResName))
7056	return TokError(Msg: "expected resource symbol for .amdgpu_use");
7057	ParsedInfoData.Uses.push_back(
7058	Elt: {FuncSym, getContext().getOrCreateSymbol(Name: ResName)});
7059	} else if (Dir == "call") {
7060	StringRef DstName;
7061	if (getParser().parseIdentifier(Res&: DstName))
7062	return TokError(Msg: "expected callee symbol for .amdgpu_call");
7063	ParsedInfoData.Calls.push_back(
7064	Elt: {FuncSym, getContext().getOrCreateSymbol(Name: DstName)});
7065	} else if (Dir == "indirect_call") {
7066	std::string TypeId;
7067	if (getParser().parseEscapedString(Data&: TypeId))
7068	return TokError(Msg: "expected type ID string for .amdgpu_indirect_call");
7069	ParsedInfoData.IndirectCalls.push_back(Elt: {FuncSym, std::move(TypeId)});
7070	} else if (Dir == "typeid") {
7071	std::string TypeId;
7072	if (getParser().parseEscapedString(Data&: TypeId))
7073	return TokError(Msg: "expected type ID string for .amdgpu_typeid");
7074	ParsedInfoData.TypeIds.push_back(Elt: {FuncSym, std::move(TypeId)});
7075	} else {
7076	return Error(L: IDLoc, Msg: "unknown .amdgpu_info directive '" + ID + "'");
7077	}
7078	}
7079
7080	if (HasScalarAttrs)
7081	ParsedInfoData.Funcs.push_back(Elt: std::move(FI));
7082
7083	AMDGPU::InfoSectionData &Data = InfoData ? *InfoData : InfoData.emplace();
7084	for (AMDGPU::FuncInfo &Func : ParsedInfoData.Funcs)
7085	Data.Funcs.push_back(Elt: std::move(Func));
7086	for (std::pair<MCSymbol , MCSymbol > &Use : ParsedInfoData.Uses)
7087	Data.Uses.push_back(Elt: Use);
7088	for (std::pair<MCSymbol , MCSymbol > &Call : ParsedInfoData.Calls)
7089	Data.Calls.push_back(Elt: Call);
7090	for (std::pair<MCSymbol *, std::string> &IndirectCall :
7091	ParsedInfoData.IndirectCalls)
7092	Data.IndirectCalls.push_back(Elt: std::move(IndirectCall));
7093	for (std::pair<MCSymbol *, std::string> &TypeId : ParsedInfoData.TypeIds)
7094	Data.TypeIds.push_back(Elt: std::move(TypeId));
7095
7096	return false;
7097	}
7098
7099	void AMDGPUAsmParser::doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) {
7100	// Record every parsed label in the timeline so that, at end of file, the
7101	// instructions following a kernel's label can be located regardless of
7102	// whether the .amdhsa_kernel directive came before or after the label.
7103	OpcodeStreamSymbols.emplace_back(Args&: Symbol, Args&: IDLoc, Args: OpcodeStream.size());
7104	}
7105
7106	void AMDGPUAsmParser::checkKernelPrologues() {
7107	if (getFeatureBits()[AMDGPU::FeatureRequiresInitialUnclausedVmem]) {
7108	static const unsigned Required[] = {GLOBAL_WB_gfx12, V_NOP_e32_gfx12};
7109	for (auto [Sym, Loc, Offset] : OpcodeStreamSymbols) {
7110	if (!AMDHSAKernelSymbols.contains(Ptr: Sym))
7111	continue;
7112	ArrayRef<unsigned> Prologue = ArrayRef(OpcodeStream).drop_front(N: Offset);
7113	if (Prologue.take_front(N: std::size(Required)) != ArrayRef(Required)) {
7114	Warning(L: Loc, Msg: "kernel '" + Sym->getName() +
7115	"' does not begin with the required prologue "
7116	"sequence: GLOBAL_WB followed by V_NOP");
7117	}
7118	}
7119	}
7120	OpcodeStream.clear();
7121	OpcodeStreamSymbols.clear();
7122	AMDHSAKernelSymbols.clear();
7123	}
7124
7125	void AMDGPUAsmParser::onEndOfFile() {
7126	emitTargetDirective();
7127	checkKernelPrologues();
7128	if (InfoData)
7129	getTargetStreamer().emitAMDGPUInfo(Data: *InfoData);
7130	}
7131
7132	bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
7133	StringRef IDVal = DirectiveID.getString();
7134
7135	if (isHsaAbi(STI: getSTI())) {
7136	if (IDVal == ".amdhsa_kernel")
7137	return ParseDirectiveAMDHSAKernel();
7138
7139	if (IDVal == ".amdhsa_code_object_version")
7140	return ParseDirectiveAMDHSACodeObjectVersion();
7141
7142	// TODO: Restructure/combine with PAL metadata directive.
7143	if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
7144	return ParseDirectiveHSAMetadata();
7145	} else {
7146	if (IDVal == ".amd_kernel_code_t")
7147	return ParseDirectiveAMDKernelCodeT();
7148
7149	if (IDVal == ".amdgpu_hsa_kernel")
7150	return ParseDirectiveAMDGPUHsaKernel();
7151
7152	if (IDVal == ".amd_amdgpu_isa")
7153	return ParseDirectiveISAVersion();
7154
7155	if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
7156	return Error(L: getLoc(), Msg: (Twine (HSAMD::AssemblerDirectiveBegin) +
7157	Twine (" directive is "
7158	"not available on non-amdhsa OSes"))
7159	.str());
7160	}
7161	}
7162
7163	if (IDVal == ".amdgcn_target")
7164	return ParseDirectiveAMDGCNTarget();
7165
7166	if (IDVal == ".amdgpu_lds")
7167	return ParseDirectiveAMDGPULDS();
7168
7169	if (IDVal == ".amdgpu_info")
7170	return ParseDirectiveAMDGPUInfo();
7171
7172	if (IDVal == PALMD::AssemblerDirectiveBegin)
7173	return ParseDirectivePALMetadataBegin();
7174
7175	if (IDVal == PALMD::AssemblerDirective)
7176	return ParseDirectivePALMetadata();
7177
7178	return true;
7179	}
7180
7181	bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
7182	MCRegister Reg) {
7183	if (MRI.regsOverlap(RegA: TTMP12_TTMP13_TTMP14_TTMP15, RegB: Reg))
7184	return isGFX9Plus();
7185
7186	// GFX10+ has 2 more SGPRs 104 and 105.
7187	if (MRI.regsOverlap(RegA: SGPR104_SGPR105, RegB: Reg))
7188	return hasSGPR104_SGPR105();
7189
7190	switch (Reg.id()) {
7191	case SRC_SHARED_BASE_LO:
7192	case SRC_SHARED_BASE:
7193	case SRC_SHARED_LIMIT_LO:
7194	case SRC_SHARED_LIMIT:
7195	case SRC_PRIVATE_BASE_LO:
7196	case SRC_PRIVATE_BASE:
7197	case SRC_PRIVATE_LIMIT_LO:
7198	case SRC_PRIVATE_LIMIT:
7199	return isGFX9Plus();
7200	case SRC_FLAT_SCRATCH_BASE_LO:
7201	case SRC_FLAT_SCRATCH_BASE_HI:
7202	return hasGloballyAddressableScratch();
7203	case SRC_POPS_EXITING_WAVE_ID:
7204	return isGFX9Plus() && !isGFX11Plus();
7205	case TBA:
7206	case TBA_LO:
7207	case TBA_HI:
7208	case TMA:
7209	case TMA_LO:
7210	case TMA_HI:
7211	return !isGFX9Plus();
7212	case XNACK_MASK:
7213	case XNACK_MASK_LO:
7214	case XNACK_MASK_HI:
7215	return (isVI() \|\| isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
7216	case SGPR_NULL:
7217	return isGFX10Plus();
7218	case SRC_EXECZ:
7219	case SRC_VCCZ:
7220	return !isGFX11Plus();
7221	default:
7222	break;
7223	}
7224
7225	if (isCI())
7226	return true;
7227
7228	if (isSI() \|\| isGFX10Plus()) {
7229	// No flat_scr on SI.
7230	// On GFX10Plus flat scratch is not a valid register operand and can only be
7231	// accessed with s_setreg/s_getreg.
7232	switch (Reg.id()) {
7233	case FLAT_SCR:
7234	case FLAT_SCR_LO:
7235	case FLAT_SCR_HI:
7236	return false;
7237	default:
7238	return true;
7239	}
7240	}
7241
7242	// VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
7243	// SI/CI have.
7244	if (MRI.regsOverlap(RegA: SGPR102_SGPR103, RegB: Reg))
7245	return hasSGPR102_SGPR103();
7246
7247	return true;
7248	}
7249
7250	ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
7251	StringRef Mnemonic,
7252	OperandMode Mode) {
7253	ParseStatus Res = parseVOPD(Operands);
7254	if (Res.isSuccess() \|\| Res.isFailure() \|\| isToken(Kind: AsmToken::EndOfStatement))
7255	return Res;
7256
7257	// Try to parse with a custom parser
7258	Res = MatchOperandParserImpl(Operands, Mnemonic);
7259
7260	// If we successfully parsed the operand or if there as an error parsing,
7261	// we are done.
7262	//
7263	// If we are parsing after we reach EndOfStatement then this means we
7264	// are appending default values to the Operands list. This is only done
7265	// by custom parser, so we shouldn't continue on to the generic parsing.
7266	if (Res.isSuccess() \|\| Res.isFailure() \|\| isToken(Kind: AsmToken::EndOfStatement))
7267	return Res;
7268
7269	SMLoc RBraceLoc;
7270	SMLoc LBraceLoc = getLoc();
7271	if (Mode == OperandMode_NSA && trySkipToken(Kind: AsmToken::LBrac)) {
7272	unsigned Prefix = Operands.size();
7273
7274	for (;;) {
7275	auto Loc = getLoc();
7276	Res = parseReg(Operands);
7277	if (Res.isNoMatch())
7278	Error(L: Loc, Msg: "expected a register");
7279	if (!Res.isSuccess())
7280	return ParseStatus::Failure;
7281
7282	RBraceLoc = getLoc();
7283	if (trySkipToken(Kind: AsmToken::RBrac))
7284	break;
7285
7286	if (!skipToken(Kind: AsmToken::Comma,
7287	ErrMsg: "expected a comma or a closing square bracket"))
7288	return ParseStatus::Failure;
7289	}
7290
7291	if (Operands.size() - Prefix > `1`) {
7292	Operands.insert(I: Operands.begin() + Prefix,
7293	Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "[", Loc: LBraceLoc));
7294	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "]", Loc: RBraceLoc));
7295	}
7296
7297	return ParseStatus::Success;
7298	}
7299
7300	return parseRegOrImm(Operands);
7301	}
7302
7303	StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
7304	// Clear any forced encodings from the previous instruction.
7305	setForcedEncodingSize(`0`);
7306	setForcedDPP(false);
7307	setForcedSDWA(false);
7308
7309	if (Name.consume_back(Suffix: "_e64_dpp")) {
7310	setForcedDPP(true);
7311	setForcedEncodingSize(`64`);
7312	return Name;
7313	}
7314	if (Name.consume_back(Suffix: "_e64")) {
7315	setForcedEncodingSize(`64`);
7316	return Name;
7317	}
7318	if (Name.consume_back(Suffix: "_e32")) {
7319	setForcedEncodingSize(`32`);
7320	return Name;
7321	}
7322	if (Name.consume_back(Suffix: "_dpp")) {
7323	setForcedDPP(true);
7324	return Name;
7325	}
7326	if (Name.consume_back(Suffix: "_sdwa")) {
7327	setForcedSDWA(true);
7328	return Name;
7329	}
7330	return Name;
7331	}
7332
7333	static void applyMnemonicAliases(StringRef &Mnemonic,
7334	const FeatureBitset &Features,
7335	unsigned VariantID);
7336
7337	bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
7338	StringRef Name, SMLoc NameLoc,
7339	OperandVector &Operands) {
7340	// Add the instruction mnemonic
7341	Name = parseMnemonicSuffix(Name);
7342
7343	// If the target architecture uses MnemonicAlias, call it here to parse
7344	// operands correctly.
7345	applyMnemonicAliases(Mnemonic&: Name, Features: getAvailableFeatures(), VariantID: `0`);
7346
7347	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: NameLoc));
7348
7349	bool IsMIMG = Name.starts_with(Prefix: "image_");
7350
7351	while (!trySkipToken(Kind: AsmToken::EndOfStatement)) {
7352	OperandMode Mode = OperandMode_Default;
7353	if (IsMIMG && isGFX10Plus() && Operands.size() == `2`)
7354	Mode = OperandMode_NSA;
7355	ParseStatus Res = parseOperand(Operands, Mnemonic: Name, Mode);
7356
7357	if (!Res.isSuccess()) {
7358	checkUnsupportedInstruction(Mnemo: Name, IDLoc: NameLoc);
7359	if (!Parser.hasPendingError()) {
7360	// FIXME: use real operand location rather than the current location.
7361	StringRef Msg = Res.isFailure() ? "failed parsing operand."
7362	: "not a valid operand.";
7363	Error(L: getLoc(), Msg);
7364	}
7365	while (!trySkipToken(Kind: AsmToken::EndOfStatement)) {
7366	lex();
7367	}
7368	return true;
7369	}
7370
7371	// Eat the comma or space if there is one.
7372	trySkipToken(Kind: AsmToken::Comma);
7373	}
7374
7375	return false;
7376	}
7377
7378	//===----------------------------------------------------------------------===//
7379	// Utility functions
7380	//===----------------------------------------------------------------------===//
7381
7382	ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
7383	OperandVector &Operands) {
7384	SMLoc S = getLoc();
7385	if (!trySkipId(Id: Name))
7386	return ParseStatus::NoMatch;
7387
7388	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: S));
7389	return ParseStatus::Success;
7390	}
7391
7392	ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7393	int64_t &IntVal) {
7394
7395	if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
7396	return ParseStatus::NoMatch;
7397
7398	return parseExpr(Imm&: IntVal) ? ParseStatus::Success : ParseStatus::Failure;
7399	}
7400
7401	ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7402	const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7403	std::function<bool(int64_t &)> ConvertResult) {
7404	SMLoc S = getLoc();
7405	int64_t Value = `0`;
7406
7407	ParseStatus Res = parseIntWithPrefix(Prefix, IntVal&: Value);
7408	if (!Res.isSuccess())
7409	return Res;
7410
7411	if (ConvertResult && !ConvertResult (Value)) {
7412	Error(L: S, Msg: "invalid " + StringRef (Prefix) + " value.");
7413	}
7414
7415	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Value, Loc: S, Type: ImmTy));
7416	return ParseStatus::Success;
7417	}
7418
7419	ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7420	const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7421	bool (*ConvertResult)(int64_t &)) {
7422	SMLoc S = getLoc();
7423	if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
7424	return ParseStatus::NoMatch;
7425
7426	if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected a left square bracket"))
7427	return ParseStatus::Failure;
7428
7429	unsigned Val = `0`;
7430	const unsigned MaxSize = `4`;
7431
7432	// FIXME: How to verify the number of elements matches the number of src
7433	// operands?
7434	for (int I = `0`; ; ++I) {
7435	int64_t Op;
7436	SMLoc Loc = getLoc();
7437	if (!parseExpr(Imm&: Op))
7438	return ParseStatus::Failure;
7439
7440	if (Op != `0` && Op != `1`)
7441	return Error(L: Loc, Msg: "invalid " + StringRef (Prefix) + " value.");
7442
7443	Val \|= (Op << I);
7444
7445	if (trySkipToken(Kind: AsmToken::RBrac))
7446	break;
7447
7448	if (I + `1` == MaxSize)
7449	return Error(L: getLoc(), Msg: "expected a closing square bracket");
7450
7451	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
7452	return ParseStatus::Failure;
7453	}
7454
7455	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: ImmTy));
7456	return ParseStatus::Success;
7457	}
7458
7459	ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7460	OperandVector &Operands,
7461	AMDGPUOperand::ImmTy ImmTy,
7462	bool IgnoreNegative) {
7463	int64_t Bit;
7464	SMLoc S = getLoc();
7465
7466	if (trySkipId(Id: Name)) {
7467	Bit = `1`;
7468	} else if (trySkipId(Pref: "no", Id: Name)) {
7469	if (IgnoreNegative)
7470	return ParseStatus::Success;
7471	Bit = `0`;
7472	} else {
7473	return ParseStatus::NoMatch;
7474	}
7475
7476	if (Name == "r128" && !hasMIMG_R128())
7477	return Error(L: S, Msg: "r128 modifier is not supported on this GPU");
7478	if (Name == "a16" && !hasA16())
7479	return Error(L: S, Msg: "a16 modifier is not supported on this GPU");
7480
7481	if (Bit == `0` && Name == "gds") {
7482	StringRef Mnemo = ((AMDGPUOperand &)*Operands [`0`]).getToken();
7483	if (Mnemo.starts_with(Prefix: "ds_gws"))
7484	return Error(L: S, Msg: "nogds is not allowed");
7485	}
7486
7487	if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7488	ImmTy = AMDGPUOperand::ImmTyR128A16;
7489
7490	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Bit, Loc: S, Type: ImmTy));
7491	return ParseStatus::Success;
7492	}
7493
7494	unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7495	bool &Disabling) const {
7496	Disabling = Id.consume_front(Prefix: "no");
7497
7498	if (isGFX940() && !Mnemo.starts_with(Prefix: "s_")) {
7499	return StringSwitch<unsigned>(Id)
7500	.Case(S: "nt", Value: AMDGPU::CPol::NT)
7501	.Case(S: "sc0", Value: AMDGPU::CPol::SC0)
7502	.Case(S: "sc1", Value: AMDGPU::CPol::SC1)
7503	.Default(Value: `0`);
7504	}
7505
7506	return StringSwitch<unsigned>(Id)
7507	.Case(S: "dlc", Value: AMDGPU::CPol::DLC)
7508	.Case(S: "glc", Value: AMDGPU::CPol::GLC)
7509	.Case(S: "scc", Value: AMDGPU::CPol::SCC)
7510	.Case(S: "slc", Value: AMDGPU::CPol::SLC)
7511	.Default(Value: `0`);
7512	}
7513
7514	ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7515	if (isGFX12Plus()) {
7516	SMLoc StringLoc = getLoc();
7517
7518	int64_t CPolVal = `0`;
7519	ParseStatus ResTH = ParseStatus::NoMatch;
7520	ParseStatus ResScope = ParseStatus::NoMatch;
7521	ParseStatus ResNV = ParseStatus::NoMatch;
7522	ParseStatus ResScal = ParseStatus::NoMatch;
7523
7524	for (;;) {
7525	if (ResTH.isNoMatch()) {
7526	int64_t TH;
7527	ResTH = parseTH(Operands, TH);
7528	if (ResTH.isFailure())
7529	return ResTH;
7530	if (ResTH.isSuccess()) {
7531	CPolVal \|= TH;
7532	continue;
7533	}
7534	}
7535
7536	if (ResScope.isNoMatch()) {
7537	int64_t Scope;
7538	ResScope = parseScope(Operands, Scope);
7539	if (ResScope.isFailure())
7540	return ResScope;
7541	if (ResScope.isSuccess()) {
7542	CPolVal \|= Scope;
7543	continue;
7544	}
7545	}
7546
7547	// NV bit exists on GFX12+, but does something starting from GFX1250.
7548	// Allow parsing on all GFX12 and fail on validation for better
7549	// diagnostics.
7550	if (ResNV.isNoMatch()) {
7551	if (trySkipId(Id: "nv")) {
7552	ResNV = ParseStatus::Success;
7553	CPolVal \|= CPol::NV;
7554	continue;
7555	} else if (trySkipId(Pref: "no", Id: "nv")) {
7556	ResNV = ParseStatus::Success;
7557	continue;
7558	}
7559	}
7560
7561	if (ResScal.isNoMatch()) {
7562	if (trySkipId(Id: "scale_offset")) {
7563	ResScal = ParseStatus::Success;
7564	CPolVal \|= CPol::SCAL;
7565	continue;
7566	} else if (trySkipId(Pref: "no", Id: "scale_offset")) {
7567	ResScal = ParseStatus::Success;
7568	continue;
7569	}
7570	}
7571
7572	break;
7573	}
7574
7575	if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7576	ResScal.isNoMatch())
7577	return ParseStatus::NoMatch;
7578
7579	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: CPolVal, Loc: StringLoc,
7580	Type: AMDGPUOperand::ImmTyCPol));
7581	return ParseStatus::Success;
7582	}
7583
7584	StringRef Mnemo = ((AMDGPUOperand &)*Operands [`0`]).getToken();
7585	SMLoc OpLoc = getLoc();
7586	unsigned Enabled = `0`, Seen = `0`;
7587	for (;;) {
7588	SMLoc S = getLoc();
7589	bool Disabling;
7590	unsigned CPol = getCPolKind(Id: getId(), Mnemo, Disabling);
7591	if (!CPol)
7592	break;
7593
7594	lex();
7595
7596	if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7597	return Error(L: S, Msg: "dlc modifier is not supported on this GPU");
7598
7599	if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7600	return Error(L: S, Msg: "scc modifier is not supported on this GPU");
7601
7602	if (Seen & CPol)
7603	return Error(L: S, Msg: "duplicate cache policy modifier");
7604
7605	if (!Disabling)
7606	Enabled \|= CPol;
7607
7608	Seen \|= CPol;
7609	}
7610
7611	if (!Seen)
7612	return ParseStatus::NoMatch;
7613
7614	Operands.push_back(
7615	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Enabled, Loc: OpLoc, Type: AMDGPUOperand::ImmTyCPol));
7616	return ParseStatus::Success;
7617	}
7618
7619	ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7620	int64_t &Scope) {
7621	static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7622	CPol::SCOPE_DEV, CPol::SCOPE_SYS};
7623
7624	ParseStatus Res = parseStringOrIntWithPrefix(
7625	Operands, Name: "scope", Ids: {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7626	IntVal&: Scope);
7627
7628	if (Res.isSuccess())
7629	Scope = Scopes[Scope];
7630
7631	return Res;
7632	}
7633
7634	ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7635	TH = AMDGPU::CPol::TH_RT; // default
7636
7637	StringRef Value;
7638	SMLoc StringLoc;
7639	ParseStatus Res = parseStringWithPrefix(Prefix: "th", Value, StringLoc);
7640	if (!Res.isSuccess())
7641	return Res;
7642
7643	if (Value == "TH_DEFAULT")
7644	TH = AMDGPU::CPol::TH_RT;
7645	else if (Value == "TH_STORE_LU" \|\| Value == "TH_LOAD_WB" \|\|
7646	Value == "TH_LOAD_NT_WB") {
7647	return Error(L: StringLoc, Msg: "invalid th value");
7648	} else if (Value.consume_front(Prefix: "TH_ATOMIC_")) {
7649	TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
7650	} else if (Value.consume_front(Prefix: "TH_LOAD_")) {
7651	TH = AMDGPU::CPol::TH_TYPE_LOAD;
7652	} else if (Value.consume_front(Prefix: "TH_STORE_")) {
7653	TH = AMDGPU::CPol::TH_TYPE_STORE;
7654	} else {
7655	return Error(L: StringLoc, Msg: "invalid th value");
7656	}
7657
7658	if (Value == "BYPASS")
7659	TH \|= AMDGPU::CPol::TH_REAL_BYPASS;
7660
7661	if (TH != `0`) {
7662	if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
7663	TH \|= StringSwitch<int64_t>(Value)
7664	.Case(S: "RETURN", Value: AMDGPU::CPol::TH_ATOMIC_RETURN)
7665	.Case(S: "RT", Value: AMDGPU::CPol::TH_RT)
7666	.Case(S: "RT_RETURN", Value: AMDGPU::CPol::TH_ATOMIC_RETURN)
7667	.Case(S: "NT", Value: AMDGPU::CPol::TH_ATOMIC_NT)
7668	.Case(S: "NT_RETURN", Value: AMDGPU::CPol::TH_ATOMIC_NT \|
7669	AMDGPU::CPol::TH_ATOMIC_RETURN)
7670	.Case(S: "CASCADE_RT", Value: AMDGPU::CPol::TH_ATOMIC_CASCADE)
7671	.Case(S: "CASCADE_NT", Value: AMDGPU::CPol::TH_ATOMIC_CASCADE \|
7672	AMDGPU::CPol::TH_ATOMIC_NT)
7673	.Default(Value: `0xffffffff`);
7674	else
7675	TH \|= StringSwitch<int64_t>(Value)
7676	.Case(S: "RT", Value: AMDGPU::CPol::TH_RT)
7677	.Case(S: "NT", Value: AMDGPU::CPol::TH_NT)
7678	.Case(S: "HT", Value: AMDGPU::CPol::TH_HT)
7679	.Case(S: "LU", Value: AMDGPU::CPol::TH_LU)
7680	.Case(S: "WB", Value: AMDGPU::CPol::TH_WB)
7681	.Case(S: "NT_RT", Value: AMDGPU::CPol::TH_NT_RT)
7682	.Case(S: "RT_NT", Value: AMDGPU::CPol::TH_RT_NT)
7683	.Case(S: "NT_HT", Value: AMDGPU::CPol::TH_NT_HT)
7684	.Case(S: "NT_WB", Value: AMDGPU::CPol::TH_NT_WB)
7685	.Case(S: "BYPASS", Value: AMDGPU::CPol::TH_BYPASS)
7686	.Default(Value: `0xffffffff`);
7687	}
7688
7689	if (TH == `0xffffffff`)
7690	return Error(L: StringLoc, Msg: "invalid th value");
7691
7692	return ParseStatus::Success;
7693	}
7694
7695	static void
7696	addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands,
7697	AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7698	AMDGPUOperand::ImmTy ImmT, int64_t Default = `0`,
7699	std::optional<unsigned> InsertAt = std::nullopt) {
7700	auto i = OptionalIdx.find(x: ImmT);
7701	if (i != OptionalIdx.end()) {
7702	unsigned Idx = i ->second;
7703	const AMDGPUOperand &Op =
7704	static_cast<const AMDGPUOperand &>(*Operands [Idx]);
7705	if (InsertAt)
7706	Inst.insert(I: Inst.begin() + *InsertAt, Op: MCOperand::createImm(Val: Op.getImm()));
7707	else
7708	Op.addImmOperands(Inst, N: `1`);
7709	} else {
7710	if (InsertAt.has_value())
7711	Inst.insert(I: Inst.begin() + *InsertAt, Op: MCOperand::createImm(Val: Default));
7712	else
7713	Inst.addOperand(Op: MCOperand::createImm(Val: Default));
7714	}
7715	}
7716
7717	ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7718	StringRef &Value,
7719	SMLoc &StringLoc) {
7720	if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon))
7721	return ParseStatus::NoMatch;
7722
7723	StringLoc = getLoc();
7724	return parseId(Val&: Value, ErrMsg: "expected an identifier") ? ParseStatus::Success
7725	: ParseStatus::Failure;
7726	}
7727
7728	ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7729	OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7730	int64_t &IntVal) {
7731	if (!trySkipId(Id: Name, Kind: AsmToken::Colon))
7732	return ParseStatus::NoMatch;
7733
7734	SMLoc StringLoc = getLoc();
7735
7736	StringRef Value;
7737	if (isToken(Kind: AsmToken::Identifier)) {
7738	Value = getTokenStr();
7739	lex();
7740
7741	for (IntVal = `0`; IntVal < (int64_t)Ids.size(); ++IntVal)
7742	if (Value == Ids [IntVal])
7743	break;
7744	} else if (!parseExpr(Imm&: IntVal))
7745	return ParseStatus::Failure;
7746
7747	if (IntVal < `0` \|\| IntVal >= (int64_t)Ids.size())
7748	return Error(L: StringLoc, Msg: "invalid " + Twine (Name) + " value");
7749
7750	return ParseStatus::Success;
7751	}
7752
7753	ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7754	OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7755	AMDGPUOperand::ImmTy Type) {
7756	SMLoc S = getLoc();
7757	int64_t IntVal;
7758
7759	ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7760	if (Res.isSuccess())
7761	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S, Type));
7762
7763	return Res;
7764	}
7765
7766	//===----------------------------------------------------------------------===//
7767	// MTBUF format
7768	//===----------------------------------------------------------------------===//
7769
7770	bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7771	int64_t MaxVal,
7772	int64_t &Fmt) {
7773	int64_t Val;
7774	SMLoc Loc = getLoc();
7775
7776	auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: Val);
7777	if (Res.isFailure())
7778	return false;
7779	if (Res.isNoMatch())
7780	return true;
7781
7782	if (Val < `0` \|\| Val > MaxVal) {
7783	Error(L: Loc, Msg: Twine ("out of range ", StringRef (Pref)));
7784	return false;
7785	}
7786
7787	Fmt = Val;
7788	return true;
7789	}
7790
7791	ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7792	AMDGPUOperand::ImmTy ImmTy) {
7793	const char *Pref = "index_key";
7794	int64_t ImmVal = `0`;
7795	SMLoc Loc = getLoc();
7796	auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: ImmVal);
7797	if (!Res.isSuccess())
7798	return Res;
7799
7800	if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit \|\|
7801	ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7802	(ImmVal < `0` \|\| ImmVal > `1`))
7803	return Error(L: Loc, Msg: Twine ("out of range ", StringRef (Pref)));
7804
7805	if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < `0` \|\| ImmVal > `3`))
7806	return Error(L: Loc, Msg: Twine ("out of range ", StringRef (Pref)));
7807
7808	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: ImmTy));
7809	return ParseStatus::Success;
7810	}
7811
7812	ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7813	return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey8bit);
7814	}
7815
7816	ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7817	return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey16bit);
7818	}
7819
7820	ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7821	return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey32bit);
7822	}
7823
7824	ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7825	StringRef Name,
7826	AMDGPUOperand::ImmTy Type) {
7827	return parseStringOrIntWithPrefix(Operands, Name, Ids: WMMAMods::ModMatrixFmt,
7828	Type);
7829	}
7830
7831	ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7832	return tryParseMatrixFMT(Operands, Name: "matrix_a_fmt",
7833	Type: AMDGPUOperand::ImmTyMatrixAFMT);
7834	}
7835
7836	ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7837	return tryParseMatrixFMT(Operands, Name: "matrix_b_fmt",
7838	Type: AMDGPUOperand::ImmTyMatrixBFMT);
7839	}
7840
7841	ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7842	StringRef Name,
7843	AMDGPUOperand::ImmTy Type) {
7844	return parseStringOrIntWithPrefix(Operands, Name, Ids: WMMAMods::ModMatrixScale,
7845	Type);
7846	}
7847
7848	ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7849	return tryParseMatrixScale(Operands, Name: "matrix_a_scale",
7850	Type: AMDGPUOperand::ImmTyMatrixAScale);
7851	}
7852
7853	ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7854	return tryParseMatrixScale(Operands, Name: "matrix_b_scale",
7855	Type: AMDGPUOperand::ImmTyMatrixBScale);
7856	}
7857
7858	ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7859	StringRef Name,
7860	AMDGPUOperand::ImmTy Type) {
7861	return parseStringOrIntWithPrefix(Operands, Name, Ids: WMMAMods::ModMatrixScaleFmt,
7862	Type);
7863	}
7864
7865	ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7866	return tryParseMatrixScaleFmt(Operands, Name: "matrix_a_scale_fmt",
7867	Type: AMDGPUOperand::ImmTyMatrixAScaleFmt);
7868	}
7869
7870	ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7871	return tryParseMatrixScaleFmt(Operands, Name: "matrix_b_scale_fmt",
7872	Type: AMDGPUOperand::ImmTyMatrixBScaleFmt);
7873	}
7874
7875	// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7876	// values to live in a joint format operand in the MCInst encoding.
7877	ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7878	using namespace llvm::AMDGPU::MTBUFFormat;
7879
7880	int64_t Dfmt = DFMT_UNDEF;
7881	int64_t Nfmt = NFMT_UNDEF;
7882
7883	// dfmt and nfmt can appear in either order, and each is optional.
7884	for (int I = `0`; I < `2`; ++I) {
7885	if (Dfmt == DFMT_UNDEF && !tryParseFmt(Pref: "dfmt", MaxVal: DFMT_MAX, Fmt&: Dfmt))
7886	return ParseStatus::Failure;
7887
7888	if (Nfmt == NFMT_UNDEF && !tryParseFmt(Pref: "nfmt", MaxVal: NFMT_MAX, Fmt&: Nfmt))
7889	return ParseStatus::Failure;
7890
7891	// Skip optional comma between dfmt/nfmt
7892	// but guard against 2 commas following each other.
7893	if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7894	!peekToken().is(K: AsmToken::Comma)) {
7895	trySkipToken(Kind: AsmToken::Comma);
7896	}
7897	}
7898
7899	if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7900	return ParseStatus::NoMatch;
7901
7902	Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7903	Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7904
7905	Format = encodeDfmtNfmt(Dfmt, Nfmt);
7906	return ParseStatus::Success;
7907	}
7908
7909	ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7910	using namespace llvm::AMDGPU::MTBUFFormat;
7911
7912	int64_t Fmt = UFMT_UNDEF;
7913
7914	if (!tryParseFmt(Pref: "format", MaxVal: UFMT_MAX, Fmt))
7915	return ParseStatus::Failure;
7916
7917	if (Fmt == UFMT_UNDEF)
7918	return ParseStatus::NoMatch;
7919
7920	Format = Fmt;
7921	return ParseStatus::Success;
7922	}
7923
7924	bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7925	int64_t &Nfmt,
7926	StringRef FormatStr,
7927	SMLoc Loc) {
7928	using namespace llvm::AMDGPU::MTBUFFormat;
7929	int64_t Format;
7930
7931	Format = getDfmt(Name: FormatStr);
7932	if (Format != DFMT_UNDEF) {
7933	Dfmt = Format;
7934	return true;
7935	}
7936
7937	Format = getNfmt(Name: FormatStr, STI: getSTI());
7938	if (Format != NFMT_UNDEF) {
7939	Nfmt = Format;
7940	return true;
7941	}
7942
7943	Error(L: Loc, Msg: "unsupported format");
7944	return false;
7945	}
7946
7947	ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7948	SMLoc FormatLoc,
7949	int64_t &Format) {
7950	using namespace llvm::AMDGPU::MTBUFFormat;
7951
7952	int64_t Dfmt = DFMT_UNDEF;
7953	int64_t Nfmt = NFMT_UNDEF;
7954	if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, Loc: FormatLoc))
7955	return ParseStatus::Failure;
7956
7957	if (trySkipToken(Kind: AsmToken::Comma)) {
7958	StringRef Str;
7959	SMLoc Loc = getLoc();
7960	if (!parseId(Val&: Str, ErrMsg: "expected a format string") \|\|
7961	!matchDfmtNfmt(Dfmt, Nfmt, FormatStr: Str, Loc))
7962	return ParseStatus::Failure;
7963	if (Dfmt == DFMT_UNDEF)
7964	return Error(L: Loc, Msg: "duplicate numeric format");
7965	if (Nfmt == NFMT_UNDEF)
7966	return Error(L: Loc, Msg: "duplicate data format");
7967	}
7968
7969	Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7970	Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7971
7972	if (isGFX10Plus()) {
7973	auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, STI: getSTI());
7974	if (Ufmt == UFMT_UNDEF)
7975	return Error(L: FormatLoc, Msg: "unsupported format");
7976	Format = Ufmt;
7977	} else {
7978	Format = encodeDfmtNfmt(Dfmt, Nfmt);
7979	}
7980
7981	return ParseStatus::Success;
7982	}
7983
7984	ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7985	SMLoc Loc,
7986	int64_t &Format) {
7987	using namespace llvm::AMDGPU::MTBUFFormat;
7988
7989	auto Id = getUnifiedFormat(Name: FormatStr, STI: getSTI());
7990	if (Id == UFMT_UNDEF)
7991	return ParseStatus::NoMatch;
7992
7993	if (!isGFX10Plus())
7994	return Error(L: Loc, Msg: "unified format is not supported on this GPU");
7995
7996	Format = Id;
7997	return ParseStatus::Success;
7998	}
7999
8000	ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
8001	using namespace llvm::AMDGPU::MTBUFFormat;
8002	SMLoc Loc = getLoc();
8003
8004	if (!parseExpr(Imm&: Format))
8005	return ParseStatus::Failure;
8006	if (!isValidFormatEncoding(Val: Format, STI: getSTI()))
8007	return Error(L: Loc, Msg: "out of range format");
8008
8009	return ParseStatus::Success;
8010	}
8011
8012	ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
8013	using namespace llvm::AMDGPU::MTBUFFormat;
8014
8015	if (!trySkipId(Id: "format", Kind: AsmToken::Colon))
8016	return ParseStatus::NoMatch;
8017
8018	if (trySkipToken(Kind: AsmToken::LBrac)) {
8019	StringRef FormatStr;
8020	SMLoc Loc = getLoc();
8021	if (!parseId(Val&: FormatStr, ErrMsg: "expected a format string"))
8022	return ParseStatus::Failure;
8023
8024	auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
8025	if (Res.isNoMatch())
8026	Res = parseSymbolicSplitFormat(FormatStr, FormatLoc: Loc, Format);
8027	if (!Res.isSuccess())
8028	return Res;
8029
8030	if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
8031	return ParseStatus::Failure;
8032
8033	return ParseStatus::Success;
8034	}
8035
8036	return parseNumericFormat(Format);
8037	}
8038
8039	ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
8040	using namespace llvm::AMDGPU::MTBUFFormat;
8041
8042	int64_t Format = getDefaultFormatEncoding(STI: getSTI());
8043	ParseStatus Res;
8044	SMLoc Loc = getLoc();
8045
8046	// Parse legacy format syntax.
8047	Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
8048	if (Res.isFailure())
8049	return Res;
8050
8051	bool FormatFound = Res.isSuccess();
8052
8053	Operands.push_back(
8054	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Format, Loc, Type: AMDGPUOperand::ImmTyFORMAT));
8055
8056	if (FormatFound)
8057	trySkipToken(Kind: AsmToken::Comma);
8058
8059	if (isToken(Kind: AsmToken::EndOfStatement)) {
8060	// We are expecting an soffset operand,
8061	// but let matcher handle the error.
8062	return ParseStatus::Success;
8063	}
8064
8065	// Parse soffset.
8066	Res = parseRegOrImm(Operands);
8067	if (!Res.isSuccess())
8068	return Res;
8069
8070	trySkipToken(Kind: AsmToken::Comma);
8071
8072	if (!FormatFound) {
8073	Res = parseSymbolicOrNumericFormat(Format);
8074	if (Res.isFailure())
8075	return Res;
8076	if (Res.isSuccess()) {
8077	auto Size = Operands.size();
8078	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands [Size - `2`]);
8079	assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
8080	Op.setImm(Format);
8081	}
8082	return ParseStatus::Success;
8083	}
8084
8085	if (isId(Id: "format") && peekToken().is(K: AsmToken::Colon))
8086	return Error(L: getLoc(), Msg: "duplicate format");
8087	return ParseStatus::Success;
8088	}
8089
8090	ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
8091	ParseStatus Res =
8092	parseIntWithPrefix(Prefix: "offset", Operands, ImmTy: AMDGPUOperand::ImmTyOffset);
8093	if (Res.isNoMatch()) {
8094	Res = parseIntWithPrefix(Prefix: "inst_offset", Operands,
8095	ImmTy: AMDGPUOperand::ImmTyInstOffset);
8096	}
8097	return Res;
8098	}
8099
8100	ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
8101	ParseStatus Res =
8102	parseNamedBit(Name: "r128", Operands, ImmTy: AMDGPUOperand::ImmTyR128A16);
8103	if (Res.isNoMatch())
8104	Res = parseNamedBit(Name: "a16", Operands, ImmTy: AMDGPUOperand::ImmTyA16);
8105	return Res;
8106	}
8107
8108	ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
8109	ParseStatus Res =
8110	parseIntWithPrefix(Prefix: "blgp", Operands, ImmTy: AMDGPUOperand::ImmTyBLGP);
8111	if (Res.isNoMatch()) {
8112	Res =
8113	parseOperandArrayWithPrefix(Prefix: "neg", Operands, ImmTy: AMDGPUOperand::ImmTyBLGP);
8114	}
8115	return Res;
8116	}
8117
8118	//===----------------------------------------------------------------------===//
8119	// Exp
8120	//===----------------------------------------------------------------------===//
8121
8122	void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
8123	OptionalImmIndexMap OptionalIdx;
8124
8125	unsigned OperandIdx[`4`];
8126	unsigned EnMask = `0`;
8127	int SrcIdx = `0`;
8128
8129	for (unsigned i = `1`, e = Operands.size(); i != e; ++i) {
8130	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
8131
8132	// Add the register arguments
8133	if (Op.isReg()) {
8134	assert(SrcIdx < `4`);
8135	OperandIdx[SrcIdx] = Inst.size();
8136	Op.addRegOperands(Inst, N: `1`);
8137	++SrcIdx;
8138	continue;
8139	}
8140
8141	if (Op.isOff()) {
8142	assert(SrcIdx < `4`);
8143	OperandIdx[SrcIdx] = Inst.size();
8144	Inst.addOperand(Op: MCOperand::createReg(Reg: MCRegister ()));
8145	++SrcIdx;
8146	continue;
8147	}
8148
8149	if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
8150	Op.addImmOperands(Inst, N: `1`);
8151	continue;
8152	}
8153
8154	if (Op.isToken() && (Op.getToken() == "done" \|\| Op.getToken() == "row_en"))
8155	continue;
8156
8157	// Handle optional arguments
8158	OptionalIdx [Op.getImmTy()] = i;
8159	}
8160
8161	assert(SrcIdx == `4`);
8162
8163	bool Compr = false;
8164	if (OptionalIdx.find(x: AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
8165	Compr = true;
8166	Inst.getOperand(i: OperandIdx[`1`]) = Inst.getOperand(i: OperandIdx[`2`]);
8167	Inst.getOperand(i: OperandIdx[`2`]).setReg(MCRegister ());
8168	Inst.getOperand(i: OperandIdx[`3`]).setReg(MCRegister ());
8169	}
8170
8171	for (auto i = `0`; i < SrcIdx; ++i) {
8172	if (Inst.getOperand(i: OperandIdx[i]).getReg()) {
8173	EnMask \|= Compr? (`0x3` << i * `2`) : (`0x1` << i);
8174	}
8175	}
8176
8177	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpVM);
8178	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpCompr);
8179
8180	Inst.addOperand(Op: MCOperand::createImm(Val: EnMask));
8181	}
8182
8183	//===----------------------------------------------------------------------===//
8184	// s_waitcnt
8185	//===----------------------------------------------------------------------===//
8186
8187	static bool
8188	encodeCnt(
8189	const AMDGPU::IsaVersion ISA,
8190	int64_t &IntVal,
8191	int64_t CntVal,
8192	bool Saturate,
8193	unsigned (encode)(const* IsaVersion &Version, unsigned, unsigned),
8194	unsigned (decode)(const* IsaVersion &Version, unsigned))
8195	{
8196	bool Failed = false;
8197
8198	IntVal = encode(ISA, IntVal, CntVal);
8199	if (CntVal != decode(ISA, IntVal)) {
8200	if (Saturate) {
8201	IntVal = encode(ISA, IntVal, -`1`);
8202	} else {
8203	Failed = true;
8204	}
8205	}
8206	return Failed;
8207	}
8208
8209	bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
8210
8211	SMLoc CntLoc = getLoc();
8212	StringRef CntName = getTokenStr();
8213
8214	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name") \|\|
8215	!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
8216	return false;
8217
8218	int64_t CntVal;
8219	SMLoc ValLoc = getLoc();
8220	if (!parseExpr(Imm&: CntVal))
8221	return false;
8222
8223	AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
8224
8225	bool Failed = true;
8226	bool Sat = CntName.ends_with(Suffix: "_sat");
8227
8228	if (CntName == "vmcnt" \|\| CntName == "vmcnt_sat") {
8229	Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeVmcnt, decode: decodeVmcnt);
8230	} else if (CntName == "expcnt" \|\| CntName == "expcnt_sat") {
8231	Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeExpcnt, decode: decodeExpcnt);
8232	} else if (CntName == "lgkmcnt" \|\| CntName == "lgkmcnt_sat") {
8233	Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeLgkmcnt, decode: decodeLgkmcnt);
8234	} else {
8235	Error(L: CntLoc, Msg: "invalid counter name " + CntName);
8236	return false;
8237	}
8238
8239	if (Failed) {
8240	Error(L: ValLoc, Msg: "too large value for " + CntName);
8241	return false;
8242	}
8243
8244	if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
8245	return false;
8246
8247	if (trySkipToken(Kind: AsmToken::Amp) \|\| trySkipToken(Kind: AsmToken::Comma)) {
8248	if (isToken(Kind: AsmToken::EndOfStatement)) {
8249	Error(L: getLoc(), Msg: "expected a counter name");
8250	return false;
8251	}
8252	}
8253
8254	return true;
8255	}
8256
8257	ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
8258	AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU());
8259	int64_t Waitcnt = getWaitcntBitMask(Version: ISA);
8260	SMLoc S = getLoc();
8261
8262	if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
8263	while (!isToken(Kind: AsmToken::EndOfStatement)) {
8264	if (!parseCnt(IntVal&: Waitcnt))
8265	return ParseStatus::Failure;
8266	}
8267	} else {
8268	if (!parseExpr(Imm&: Waitcnt))
8269	return ParseStatus::Failure;
8270	}
8271
8272	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Waitcnt, Loc: S));
8273	return ParseStatus::Success;
8274	}
8275
8276	bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
8277	SMLoc FieldLoc = getLoc();
8278	StringRef FieldName = getTokenStr();
8279	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a field name") \|\|
8280	!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
8281	return false;
8282
8283	SMLoc ValueLoc = getLoc();
8284	StringRef ValueName = getTokenStr();
8285	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a value name") \|\|
8286	!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a right parenthesis"))
8287	return false;
8288
8289	unsigned Shift;
8290	if (FieldName == "instid0") {
8291	Shift = `0`;
8292	} else if (FieldName == "instskip") {
8293	Shift = `4`;
8294	} else if (FieldName == "instid1") {
8295	Shift = `7`;
8296	} else {
8297	Error(L: FieldLoc, Msg: "invalid field name " + FieldName);
8298	return false;
8299	}
8300
8301	int Value;
8302	if (Shift == `4`) {
8303	// Parse values for instskip.
8304	Value = StringSwitch<int>(ValueName)
8305	.Case(S: "SAME", Value: `0`)
8306	.Case(S: "NEXT", Value: `1`)
8307	.Case(S: "SKIP_1", Value: `2`)
8308	.Case(S: "SKIP_2", Value: `3`)
8309	.Case(S: "SKIP_3", Value: `4`)
8310	.Case(S: "SKIP_4", Value: `5`)
8311	.Default(Value: -`1`);
8312	} else {
8313	// Parse values for instid0 and instid1.
8314	Value = StringSwitch<int>(ValueName)
8315	.Case(S: "NO_DEP", Value: `0`)
8316	.Case(S: "VALU_DEP_1", Value: `1`)
8317	.Case(S: "VALU_DEP_2", Value: `2`)
8318	.Case(S: "VALU_DEP_3", Value: `3`)
8319	.Case(S: "VALU_DEP_4", Value: `4`)
8320	.Case(S: "TRANS32_DEP_1", Value: `5`)
8321	.Case(S: "TRANS32_DEP_2", Value: `6`)
8322	.Case(S: "TRANS32_DEP_3", Value: `7`)
8323	.Case(S: "FMA_ACCUM_CYCLE_1", Value: `8`)
8324	.Case(S: "SALU_CYCLE_1", Value: `9`)
8325	.Case(S: "SALU_CYCLE_2", Value: `10`)
8326	.Case(S: "SALU_CYCLE_3", Value: `11`)
8327	.Default(Value: -`1`);
8328	}
8329	if (Value < `0`) {
8330	Error(L: ValueLoc, Msg: "invalid value name " + ValueName);
8331	return false;
8332	}
8333
8334	Delay \|= Value << Shift;
8335	return true;
8336	}
8337
8338	ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
8339	int64_t Delay = `0`;
8340	SMLoc S = getLoc();
8341
8342	if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
8343	do {
8344	if (!parseDelay(Delay))
8345	return ParseStatus::Failure;
8346	} while (trySkipToken(Kind: AsmToken::Pipe));
8347	} else {
8348	if (!parseExpr(Imm&: Delay))
8349	return ParseStatus::Failure;
8350	}
8351
8352	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Delay, Loc: S));
8353	return ParseStatus::Success;
8354	}
8355
8356	bool
8357	AMDGPUOperand::isSWaitCnt() const {
8358	return isImm();
8359	}
8360
8361	bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
8362
8363	//===----------------------------------------------------------------------===//
8364	// DepCtr
8365	//===----------------------------------------------------------------------===//
8366
8367	void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
8368	StringRef DepCtrName) {
8369	switch (ErrorId) {
8370	case OPR_ID_UNKNOWN:
8371	Error(L: Loc, Msg: Twine ("invalid counter name ", DepCtrName));
8372	return;
8373	case OPR_ID_UNSUPPORTED:
8374	Error(L: Loc, Msg: Twine (DepCtrName, " is not supported on this GPU"));
8375	return;
8376	case OPR_ID_DUPLICATE:
8377	Error(L: Loc, Msg: Twine ("duplicate counter name ", DepCtrName));
8378	return;
8379	case OPR_VAL_INVALID:
8380	Error(L: Loc, Msg: Twine ("invalid value for ", DepCtrName));
8381	return;
8382	default:
8383	assert(false);
8384	}
8385	}
8386
8387	bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
8388
8389	using namespace llvm::AMDGPU::DepCtr;
8390
8391	SMLoc DepCtrLoc = getLoc();
8392	StringRef DepCtrName = getTokenStr();
8393
8394	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name") \|\|
8395	!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis"))
8396	return false;
8397
8398	int64_t ExprVal;
8399	if (!parseExpr(Imm&: ExprVal))
8400	return false;
8401
8402	unsigned PrevOprMask = UsedOprMask;
8403	int CntVal = encodeDepCtr(Name: DepCtrName, Val: ExprVal, UsedOprMask, STI: getSTI());
8404
8405	if (CntVal < `0`) {
8406	depCtrError(Loc: DepCtrLoc, ErrorId: CntVal, DepCtrName);
8407	return false;
8408	}
8409
8410	if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
8411	return false;
8412
8413	if (trySkipToken(Kind: AsmToken::Amp) \|\| trySkipToken(Kind: AsmToken::Comma)) {
8414	if (isToken(Kind: AsmToken::EndOfStatement)) {
8415	Error(L: getLoc(), Msg: "expected a counter name");
8416	return false;
8417	}
8418	}
8419
8420	int64_t CntValMask = PrevOprMask ^ UsedOprMask;
8421	DepCtr = (DepCtr & ~CntValMask) \| CntVal;
8422	return true;
8423	}
8424
8425	ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8426	using namespace llvm::AMDGPU::DepCtr;
8427
8428	int64_t DepCtr = getDefaultDepCtrEncoding(STI: getSTI());
8429	SMLoc Loc = getLoc();
8430
8431	if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) {
8432	unsigned UsedOprMask = `0`;
8433	while (!isToken(Kind: AsmToken::EndOfStatement)) {
8434	if (!parseDepCtr(DepCtr, UsedOprMask))
8435	return ParseStatus::Failure;
8436	}
8437	} else {
8438	if (!parseExpr(Imm&: DepCtr))
8439	return ParseStatus::Failure;
8440	}
8441
8442	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DepCtr, Loc));
8443	return ParseStatus::Success;
8444	}
8445
8446	bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8447
8448	//===----------------------------------------------------------------------===//
8449	// hwreg
8450	//===----------------------------------------------------------------------===//
8451
8452	ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8453	OperandInfoTy &Offset,
8454	OperandInfoTy &Width) {
8455	using namespace llvm::AMDGPU::Hwreg;
8456
8457	if (!trySkipId(Id: "hwreg", Kind: AsmToken::LParen))
8458	return ParseStatus::NoMatch;
8459
8460	// The register may be specified by name or using a numeric code
8461	HwReg.Loc = getLoc();
8462	if (isToken(Kind: AsmToken::Identifier) &&
8463	(HwReg.Val = getHwregId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) {
8464	HwReg.IsSymbolic = true;
8465	lex(); // skip register name
8466	} else if (!parseExpr(Imm&: HwReg.Val, Expected: "a register name")) {
8467	return ParseStatus::Failure;
8468	}
8469
8470	if (trySkipToken(Kind: AsmToken::RParen))
8471	return ParseStatus::Success;
8472
8473	// parse optional params
8474	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma or a closing parenthesis"))
8475	return ParseStatus::Failure;
8476
8477	Offset.Loc = getLoc();
8478	if (!parseExpr(Imm&: Offset.Val))
8479	return ParseStatus::Failure;
8480
8481	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
8482	return ParseStatus::Failure;
8483
8484	Width.Loc = getLoc();
8485	if (!parseExpr(Imm&: Width.Val) \|\|
8486	!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis"))
8487	return ParseStatus::Failure;
8488
8489	return ParseStatus::Success;
8490	}
8491
8492	ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8493	using namespace llvm::AMDGPU::Hwreg;
8494
8495	int64_t ImmVal = `0`;
8496	SMLoc Loc = getLoc();
8497
8498	StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8499	HwregId::Default);
8500	StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8501	HwregOffset::Default);
8502	struct : StructuredOpField {
8503	using StructuredOpField::StructuredOpField;
8504	bool validate(AMDGPUAsmParser &Parser) const override {
8505	if (!isUIntN(N: Width, x: Val - `1`))
8506	return Error(Parser, Err: "only values from 1 to 32 are legal");
8507	return true;
8508	}
8509	} Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8510	ParseStatus Res = parseStructuredOpFields(Fields: {&HwReg, &Offset, &Width});
8511
8512	if (Res.isNoMatch())
8513	Res = parseHwregFunc(HwReg, Offset, Width);
8514
8515	if (Res.isSuccess()) {
8516	if (!validateStructuredOpFields(Fields: {&HwReg, &Offset, &Width}))
8517	return ParseStatus::Failure;
8518	ImmVal = HwregEncoding::encode(Values: HwReg.Val, Values: Offset.Val, Values: Width.Val);
8519	}
8520
8521	if (Res.isNoMatch() &&
8522	parseExpr(Imm&: ImmVal, Expected: "a hwreg macro, structured immediate"))
8523	Res = ParseStatus::Success;
8524
8525	if (!Res.isSuccess())
8526	return ParseStatus::Failure;
8527
8528	if (!isUInt<`16`>(x: ImmVal))
8529	return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
8530	Operands.push_back(
8531	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTyHwreg));
8532	return ParseStatus::Success;
8533	}
8534
8535	bool AMDGPUOperand::isHwreg() const {
8536	return isImmTy(ImmT: ImmTyHwreg);
8537	}
8538
8539	//===----------------------------------------------------------------------===//
8540	// sendmsg
8541	//===----------------------------------------------------------------------===//
8542
8543	bool
8544	AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8545	OperandInfoTy &Op,
8546	OperandInfoTy &Stream) {
8547	using namespace llvm::AMDGPU::SendMsg;
8548
8549	Msg.Loc = getLoc();
8550	if (isToken(Kind: AsmToken::Identifier) &&
8551	(Msg.Val = getMsgId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) {
8552	Msg.IsSymbolic = true;
8553	lex(); // skip message name
8554	} else if (!parseExpr(Imm&: Msg.Val, Expected: "a message name")) {
8555	return false;
8556	}
8557
8558	if (trySkipToken(Kind: AsmToken::Comma)) {
8559	Op.IsDefined = true;
8560	Op.Loc = getLoc();
8561	if (isToken(Kind: AsmToken::Identifier) &&
8562	(Op.Val = getMsgOpId(MsgId: Msg.Val, Name: getTokenStr(), STI: getSTI())) !=
8563	OPR_ID_UNKNOWN) {
8564	lex(); // skip operation name
8565	} else if (!parseExpr(Imm&: Op.Val, Expected: "an operation name")) {
8566	return false;
8567	}
8568
8569	if (trySkipToken(Kind: AsmToken::Comma)) {
8570	Stream.IsDefined = true;
8571	Stream.Loc = getLoc();
8572	if (!parseExpr(Imm&: Stream.Val))
8573	return false;
8574	}
8575	}
8576
8577	return skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis");
8578	}
8579
8580	bool
8581	AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8582	const OperandInfoTy &Op,
8583	const OperandInfoTy &Stream) {
8584	using namespace llvm::AMDGPU::SendMsg;
8585
8586	// Validation strictness depends on whether message is specified
8587	// in a symbolic or in a numeric form. In the latter case
8588	// only encoding possibility is checked.
8589	bool Strict = Msg.IsSymbolic;
8590
8591	if (Strict) {
8592	if (Msg.Val == OPR_ID_UNSUPPORTED) {
8593	Error(L: Msg.Loc, Msg: "specified message id is not supported on this GPU");
8594	return false;
8595	}
8596	} else {
8597	if (!isValidMsgId(MsgId: Msg.Val, STI: getSTI())) {
8598	Error(L: Msg.Loc, Msg: "invalid message id");
8599	return false;
8600	}
8601	}
8602	if (Strict && (msgRequiresOp(MsgId: Msg.Val, STI: getSTI()) != Op.IsDefined)) {
8603	if (Op.IsDefined) {
8604	Error(L: Op.Loc, Msg: "message does not support operations");
8605	} else {
8606	Error(L: Msg.Loc, Msg: "missing message operation");
8607	}
8608	return false;
8609	}
8610	if (!isValidMsgOp(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI(), Strict)) {
8611	if (Op.Val == OPR_ID_UNSUPPORTED)
8612	Error(L: Op.Loc, Msg: "specified operation id is not supported on this GPU");
8613	else
8614	Error(L: Op.Loc, Msg: "invalid operation id");
8615	return false;
8616	}
8617	if (Strict && !msgSupportsStream(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI()) &&
8618	Stream.IsDefined) {
8619	Error(L: Stream.Loc, Msg: "message operation does not support streams");
8620	return false;
8621	}
8622	if (!isValidMsgStream(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val, STI: getSTI(), Strict)) {
8623	Error(L: Stream.Loc, Msg: "invalid message stream id");
8624	return false;
8625	}
8626	return true;
8627	}
8628
8629	ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8630	using namespace llvm::AMDGPU::SendMsg;
8631
8632	int64_t ImmVal = `0`;
8633	SMLoc Loc = getLoc();
8634
8635	if (trySkipId(Id: "sendmsg", Kind: AsmToken::LParen)) {
8636	OperandInfoTy Msg(OPR_ID_UNKNOWN);
8637	OperandInfoTy Op(OP_NONE_);
8638	OperandInfoTy Stream(STREAM_ID_NONE_);
8639	if (parseSendMsgBody(Msg, Op, Stream) &&
8640	validateSendMsg(Msg, Op, Stream)) {
8641	ImmVal = encodeMsg(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val);
8642	} else {
8643	return ParseStatus::Failure;
8644	}
8645	} else if (parseExpr(Imm&: ImmVal, Expected: "a sendmsg macro")) {
8646	if (ImmVal < `0` \|\| !isUInt<`16`>(x: ImmVal))
8647	return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
8648	} else {
8649	return ParseStatus::Failure;
8650	}
8651
8652	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTySendMsg));
8653	return ParseStatus::Success;
8654	}
8655
8656	bool AMDGPUOperand::isSendMsg() const {
8657	return isImmTy(ImmT: ImmTySendMsg);
8658	}
8659
8660	ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8661	using namespace llvm::AMDGPU::WaitEvent;
8662
8663	SMLoc Loc = getLoc();
8664	int64_t ImmVal = `0`;
8665
8666	StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8667	`1`, `0`);
8668	StructuredOpField ExportReady("export_ready", "bit value", `1`, `0`);
8669
8670	StructuredOpField *TargetBitfield =
8671	isGFX11() ? &DontWaitExportReady : &ExportReady;
8672
8673	ParseStatus Res = parseStructuredOpFields(Fields: {TargetBitfield});
8674	if (Res.isNoMatch() && parseExpr(Imm&: ImmVal, Expected: "structured immediate"))
8675	Res = ParseStatus::Success;
8676	else if (Res.isSuccess()) {
8677	if (!validateStructuredOpFields(Fields: {TargetBitfield}))
8678	return ParseStatus::Failure;
8679	ImmVal = TargetBitfield->Val;
8680	}
8681
8682	if (!Res.isSuccess())
8683	return ParseStatus::Failure;
8684
8685	if (!isUInt<`16`>(x: ImmVal))
8686	return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal");
8687
8688	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc,
8689	Type: AMDGPUOperand::ImmTyWaitEvent));
8690	return ParseStatus::Success;
8691	}
8692
8693	bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmT: ImmTyWaitEvent); }
8694
8695	//===----------------------------------------------------------------------===//
8696	// v_interp
8697	//===----------------------------------------------------------------------===//
8698
8699	ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8700	StringRef Str;
8701	SMLoc S = getLoc();
8702
8703	if (!parseId(Val&: Str))
8704	return ParseStatus::NoMatch;
8705
8706	int Slot = StringSwitch<int>(Str)
8707	.Case(S: "p10", Value: `0`)
8708	.Case(S: "p20", Value: `1`)
8709	.Case(S: "p0", Value: `2`)
8710	.Default(Value: -`1`);
8711
8712	if (Slot == -`1`)
8713	return Error(L: S, Msg: "invalid interpolation slot");
8714
8715	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Slot, Loc: S,
8716	Type: AMDGPUOperand::ImmTyInterpSlot));
8717	return ParseStatus::Success;
8718	}
8719
8720	ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8721	StringRef Str;
8722	SMLoc S = getLoc();
8723
8724	if (!parseId(Val&: Str))
8725	return ParseStatus::NoMatch;
8726
8727	if (!Str.starts_with(Prefix: "attr"))
8728	return Error(L: S, Msg: "invalid interpolation attribute");
8729
8730	StringRef Chan = Str.take_back(N: `2`);
8731	int AttrChan = StringSwitch<int>(Chan)
8732	.Case(S: ".x", Value: `0`)
8733	.Case(S: ".y", Value: `1`)
8734	.Case(S: ".z", Value: `2`)
8735	.Case(S: ".w", Value: `3`)
8736	.Default(Value: -`1`);
8737	if (AttrChan == -`1`)
8738	return Error(L: S, Msg: "invalid or missing interpolation attribute channel");
8739
8740	Str = Str.drop_back(N: `2`).drop_front(N: `4`);
8741
8742	uint8_t Attr;
8743	if (Str.getAsInteger(Radix: `10`, Result&: Attr))
8744	return Error(L: S, Msg: "invalid or missing interpolation attribute number");
8745
8746	if (Attr > `32`)
8747	return Error(L: S, Msg: "out of bounds interpolation attribute number");
8748
8749	SMLoc SChan = SMLoc::getFromPointer(Ptr: Chan.data());
8750
8751	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Attr, Loc: S,
8752	Type: AMDGPUOperand::ImmTyInterpAttr));
8753	Operands.push_back(Elt: AMDGPUOperand::CreateImm(
8754	AsmParser: this, Val: AttrChan, Loc: SChan, Type: AMDGPUOperand::ImmTyInterpAttrChan));
8755	return ParseStatus::Success;
8756	}
8757
8758	//===----------------------------------------------------------------------===//
8759	// exp
8760	//===----------------------------------------------------------------------===//
8761
8762	ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8763	using namespace llvm::AMDGPU::Exp;
8764
8765	StringRef Str;
8766	SMLoc S = getLoc();
8767
8768	if (!parseId(Val&: Str))
8769	return ParseStatus::NoMatch;
8770
8771	unsigned Id = getTgtId(Name: Str);
8772	if (Id == ET_INVALID \|\| !isSupportedTgtId(Id, STI: getSTI()))
8773	return Error(L: S, Msg: (Id == ET_INVALID)
8774	? "invalid exp target"
8775	: "exp target is not supported on this GPU");
8776
8777	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Id, Loc: S,
8778	Type: AMDGPUOperand::ImmTyExpTgt));
8779	return ParseStatus::Success;
8780	}
8781
8782	//===----------------------------------------------------------------------===//
8783	// parser helpers
8784	//===----------------------------------------------------------------------===//
8785
8786	bool
8787	AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8788	return Token.is(K: AsmToken::Identifier) && Token.getString() == Id;
8789	}
8790
8791	bool
8792	AMDGPUAsmParser::isId(const StringRef Id) const {
8793	return isId(Token: getToken(), Id);
8794	}
8795
8796	bool
8797	AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8798	return getTokenKind() == Kind;
8799	}
8800
8801	StringRef AMDGPUAsmParser::getId() const {
8802	return isToken(Kind: AsmToken::Identifier) ? getTokenStr() : StringRef ();
8803	}
8804
8805	bool
8806	AMDGPUAsmParser::trySkipId(const StringRef Id) {
8807	if (isId(Id)) {
8808	lex();
8809	return true;
8810	}
8811	return false;
8812	}
8813
8814	bool
8815	AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8816	if (isToken(Kind: AsmToken::Identifier)) {
8817	StringRef Tok = getTokenStr();
8818	if (Tok.starts_with(Prefix: Pref) && Tok.drop_front(N: Pref.size()) == Id) {
8819	lex();
8820	return true;
8821	}
8822	}
8823	return false;
8824	}
8825
8826	bool
8827	AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8828	if (isId(Id) && peekToken().is(K: Kind)) {
8829	lex();
8830	lex();
8831	return true;
8832	}
8833	return false;
8834	}
8835
8836	bool
8837	AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8838	if (isToken(Kind)) {
8839	lex();
8840	return true;
8841	}
8842	return false;
8843	}
8844
8845	bool
8846	AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8847	const StringRef ErrMsg) {
8848	if (!trySkipToken(Kind)) {
8849	Error(L: getLoc(), Msg: ErrMsg);
8850	return false;
8851	}
8852	return true;
8853	}
8854
8855	bool
8856	AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8857	SMLoc S = getLoc();
8858
8859	const MCExpr *Expr;
8860	if (Parser.parseExpression(Res&: Expr))
8861	return false;
8862
8863	if (Expr->evaluateAsAbsolute(Res&: Imm))
8864	return true;
8865
8866	if (Expected.empty()) {
8867	Error(L: S, Msg: "expected absolute expression");
8868	} else {
8869	Error(L: S, Msg: Twine ("expected ", Expected) +
8870	Twine (" or an absolute expression"));
8871	}
8872	return false;
8873	}
8874
8875	bool
8876	AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8877	SMLoc S = getLoc();
8878
8879	const MCExpr *Expr;
8880	if (Parser.parseExpression(Res&: Expr))
8881	return false;
8882
8883	int64_t IntVal;
8884	if (Expr->evaluateAsAbsolute(Res&: IntVal)) {
8885	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S));
8886	} else {
8887	Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S));
8888	}
8889	return true;
8890	}
8891
8892	bool
8893	AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8894	if (isToken(Kind: AsmToken::String)) {
8895	Val = getToken().getStringContents();
8896	lex();
8897	return true;
8898	}
8899	Error(L: getLoc(), Msg: ErrMsg);
8900	return false;
8901	}
8902
8903	bool
8904	AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8905	if (isToken(Kind: AsmToken::Identifier)) {
8906	Val = getTokenStr();
8907	lex();
8908	return true;
8909	}
8910	if (!ErrMsg.empty())
8911	Error(L: getLoc(), Msg: ErrMsg);
8912	return false;
8913	}
8914
8915	AsmToken
8916	AMDGPUAsmParser::getToken() const {
8917	return Parser.getTok();
8918	}
8919
8920	AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8921	return isToken(Kind: AsmToken::EndOfStatement)
8922	? getToken()
8923	: getLexer().peekTok(ShouldSkipSpace);
8924	}
8925
8926	void
8927	AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8928	auto TokCount = getLexer().peekTokens(Buf: Tokens);
8929
8930	for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8931	Tokens [Idx] = AsmToken (AsmToken::Error, "");
8932	}
8933
8934	AsmToken::TokenKind
8935	AMDGPUAsmParser::getTokenKind() const {
8936	return getLexer().getKind();
8937	}
8938
8939	SMLoc
8940	AMDGPUAsmParser::getLoc() const {
8941	return getToken().getLoc();
8942	}
8943
8944	StringRef
8945	AMDGPUAsmParser::getTokenStr() const {
8946	return getToken().getString();
8947	}
8948
8949	void
8950	AMDGPUAsmParser::lex() {
8951	Parser.Lex();
8952	}
8953
8954	const AMDGPUOperand &
8955	AMDGPUAsmParser::findMCOperand(const OperandVector &Operands,
8956	int MCOpIdx) const {
8957	for (const auto &Op : Operands) {
8958	const AMDGPUOperand &TargetOp = static_cast<AMDGPUOperand &>(*Op);
8959	if (TargetOp.getMCOpIdx() == MCOpIdx)
8960	return TargetOp;
8961	}
8962	llvm_unreachable("no such MC operand!");
8963	}
8964
8965	SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8966	return ((AMDGPUOperand &)*Operands [`0`]).getStartLoc();
8967	}
8968
8969	// Returns one of the given locations that comes later in the source.
8970	SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8971	return a.getPointer() < b.getPointer() ? b : a;
8972	}
8973
8974	SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8975	int MCOpIdx) const {
8976	return findMCOperand(Operands, MCOpIdx).getStartLoc();
8977	}
8978
8979	SMLoc
8980	AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8981	const OperandVector &Operands) const {
8982	for (unsigned i = Operands.size() - `1`; i > `0`; --i) {
8983	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
8984	if (Test (Op))
8985	return Op.getStartLoc();
8986	}
8987	return getInstLoc(Operands);
8988	}
8989
8990	SMLoc
8991	AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8992	const OperandVector &Operands) const {
8993	auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(ImmT: Type); };
8994	return getOperandLoc(Test, Operands);
8995	}
8996
8997	ParseStatus
8998	AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8999	if (!trySkipToken(Kind: AsmToken::LCurly))
9000	return ParseStatus::NoMatch;
9001
9002	bool First = true;
9003	while (!trySkipToken(Kind: AsmToken::RCurly)) {
9004	if (!First &&
9005	!skipToken(Kind: AsmToken::Comma, ErrMsg: "comma or closing brace expected"))
9006	return ParseStatus::Failure;
9007
9008	StringRef Id = getTokenStr();
9009	SMLoc IdLoc = getLoc();
9010	if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "field name expected") \|\|
9011	!skipToken(Kind: AsmToken::Colon, ErrMsg: "colon expected"))
9012	return ParseStatus::Failure;
9013
9014	const auto *I =
9015	find_if(Range&: Fields, P: [Id](StructuredOpField F) { return* F->Id == Id; });
9016	if (I == Fields.end())
9017	return Error(L: IdLoc, Msg: "unknown field");
9018	if ((*I)->IsDefined)
9019	return Error(L: IdLoc, Msg: "duplicate field");
9020
9021	// TODO: Support symbolic values.
9022	(*I)->Loc = getLoc();
9023	if (!parseExpr(Imm&: (*I)->Val))
9024	return ParseStatus::Failure;
9025	(I)->IsDefined = true*;
9026
9027	First = false;
9028	}
9029	return ParseStatus::Success;
9030	}
9031
9032	bool AMDGPUAsmParser::validateStructuredOpFields(
9033	ArrayRef<const StructuredOpField *> Fields) {
9034	return all_of(Range&: Fields, P: [this](const StructuredOpField *F) {
9035	return F->validate(Parser&: *this);
9036	});
9037	}
9038
9039	//===----------------------------------------------------------------------===//
9040	// swizzle
9041	//===----------------------------------------------------------------------===//
9042
9043	LLVM_READNONE
9044	static unsigned
9045	encodeBitmaskPerm(const unsigned AndMask,
9046	const unsigned OrMask,
9047	const unsigned XorMask) {
9048	using namespace llvm::AMDGPU::Swizzle;
9049
9050	return BITMASK_PERM_ENC \|
9051	(AndMask << BITMASK_AND_SHIFT) \|
9052	(OrMask << BITMASK_OR_SHIFT) \|
9053	(XorMask << BITMASK_XOR_SHIFT);
9054	}
9055
9056	bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
9057	const unsigned MaxVal,
9058	const Twine &ErrMsg, SMLoc &Loc) {
9059	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma")) {
9060	return false;
9061	}
9062	Loc = getLoc();
9063	if (!parseExpr(Imm&: Op)) {
9064	return false;
9065	}
9066	if (Op < MinVal \|\| Op > MaxVal) {
9067	Error(L: Loc, Msg: ErrMsg);
9068	return false;
9069	}
9070
9071	return true;
9072	}
9073
9074	bool
9075	AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
9076	const unsigned MinVal,
9077	const unsigned MaxVal,
9078	const StringRef ErrMsg) {
9079	SMLoc Loc;
9080	for (unsigned i = `0`; i < OpNum; ++i) {
9081	if (!parseSwizzleOperand(Op&: Op[i], MinVal, MaxVal, ErrMsg, Loc))
9082	return false;
9083	}
9084
9085	return true;
9086	}
9087
9088	bool
9089	AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
9090	using namespace llvm::AMDGPU::Swizzle;
9091
9092	int64_t Lane[LANE_NUM];
9093	if (parseSwizzleOperands(OpNum: LANE_NUM, Op: Lane, MinVal: `0`, MaxVal: LANE_MAX,
9094	ErrMsg: "expected a 2-bit lane id")) {
9095	Imm = QUAD_PERM_ENC;
9096	for (unsigned I = `0`; I < LANE_NUM; ++I) {
9097	Imm \|= Lane[I] << (LANE_SHIFT * I);
9098	}
9099	return true;
9100	}
9101	return false;
9102	}
9103
9104	bool
9105	AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
9106	using namespace llvm::AMDGPU::Swizzle;
9107
9108	SMLoc Loc;
9109	int64_t GroupSize;
9110	int64_t LaneIdx;
9111
9112	if (!parseSwizzleOperand(Op&: GroupSize,
9113	MinVal: `2`, MaxVal: `32`,
9114	ErrMsg: "group size must be in the interval [2,32]",
9115	Loc)) {
9116	return false;
9117	}
9118	if (!isPowerOf2_64(Value: GroupSize)) {
9119	Error(L: Loc, Msg: "group size must be a power of two");
9120	return false;
9121	}
9122	if (parseSwizzleOperand(Op&: LaneIdx,
9123	MinVal: `0`, MaxVal: GroupSize - `1`,
9124	ErrMsg: "lane id must be in the interval [0,group size - 1]",
9125	Loc)) {
9126	Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX - GroupSize + `1`, OrMask: LaneIdx, XorMask: `0`);
9127	return true;
9128	}
9129	return false;
9130	}
9131
9132	bool
9133	AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
9134	using namespace llvm::AMDGPU::Swizzle;
9135
9136	SMLoc Loc;
9137	int64_t GroupSize;
9138
9139	if (!parseSwizzleOperand(Op&: GroupSize,
9140	MinVal: `2`, MaxVal: `32`,
9141	ErrMsg: "group size must be in the interval [2,32]",
9142	Loc)) {
9143	return false;
9144	}
9145	if (!isPowerOf2_64(Value: GroupSize)) {
9146	Error(L: Loc, Msg: "group size must be a power of two");
9147	return false;
9148	}
9149
9150	Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: `0`, XorMask: GroupSize - `1`);
9151	return true;
9152	}
9153
9154	bool
9155	AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
9156	using namespace llvm::AMDGPU::Swizzle;
9157
9158	SMLoc Loc;
9159	int64_t GroupSize;
9160
9161	if (!parseSwizzleOperand(Op&: GroupSize,
9162	MinVal: `1`, MaxVal: `16`,
9163	ErrMsg: "group size must be in the interval [1,16]",
9164	Loc)) {
9165	return false;
9166	}
9167	if (!isPowerOf2_64(Value: GroupSize)) {
9168	Error(L: Loc, Msg: "group size must be a power of two");
9169	return false;
9170	}
9171
9172	Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: `0`, XorMask: GroupSize);
9173	return true;
9174	}
9175
9176	bool
9177	AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
9178	using namespace llvm::AMDGPU::Swizzle;
9179
9180	if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma")) {
9181	return false;
9182	}
9183
9184	StringRef Ctl;
9185	SMLoc StrLoc = getLoc();
9186	if (!parseString(Val&: Ctl)) {
9187	return false;
9188	}
9189	if (Ctl.size() != BITMASK_WIDTH) {
9190	Error(L: StrLoc, Msg: "expected a 5-character mask");
9191	return false;
9192	}
9193
9194	unsigned AndMask = `0`;
9195	unsigned OrMask = `0`;
9196	unsigned XorMask = `0`;
9197
9198	for (size_t i = `0`; i < Ctl.size(); ++i) {
9199	unsigned Mask = `1` << (BITMASK_WIDTH - `1` - i);
9200	switch(Ctl [i]) {
9201	default:
9202	Error(L: StrLoc, Msg: "invalid mask");
9203	return false;
9204	case `'0'`:
9205	break;
9206	case `'1'`:
9207	OrMask \|= Mask;
9208	break;
9209	case `'p'`:
9210	AndMask \|= Mask;
9211	break;
9212	case `'i'`:
9213	AndMask \|= Mask;
9214	XorMask \|= Mask;
9215	break;
9216	}
9217	}
9218
9219	Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
9220	return true;
9221	}
9222
9223	bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
9224	using namespace llvm::AMDGPU::Swizzle;
9225
9226	if (!AMDGPU::isGFX9Plus(STI: getSTI())) {
9227	Error(L: getLoc(), Msg: "FFT mode swizzle not supported on this GPU");
9228	return false;
9229	}
9230
9231	int64_t Swizzle;
9232	SMLoc Loc;
9233	if (!parseSwizzleOperand(Op&: Swizzle, MinVal: `0`, MaxVal: FFT_SWIZZLE_MAX,
9234	ErrMsg: "FFT swizzle must be in the interval [0," +
9235	Twine (FFT_SWIZZLE_MAX) + Twine (`']'`),
9236	Loc))
9237	return false;
9238
9239	Imm = FFT_MODE_ENC \| Swizzle;
9240	return true;
9241	}
9242
9243	bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
9244	using namespace llvm::AMDGPU::Swizzle;
9245
9246	if (!AMDGPU::isGFX9Plus(STI: getSTI())) {
9247	Error(L: getLoc(), Msg: "Rotate mode swizzle not supported on this GPU");
9248	return false;
9249	}
9250
9251	SMLoc Loc;
9252	int64_t Direction;
9253
9254	if (!parseSwizzleOperand(Op&: Direction, MinVal: `0`, MaxVal: `1`,
9255	ErrMsg: "direction must be 0 (left) or 1 (right)", Loc))
9256	return false;
9257
9258	int64_t RotateSize;
9259	if (!parseSwizzleOperand(
9260	Op&: RotateSize, MinVal: `0`, MaxVal: ROTATE_MAX_SIZE,
9261	ErrMsg: "number of threads to rotate must be in the interval [0," +
9262	Twine (ROTATE_MAX_SIZE) + Twine (`']'`),
9263	Loc))
9264	return false;
9265
9266	Imm = ROTATE_MODE_ENC \| (Direction << ROTATE_DIR_SHIFT) \|
9267	(RotateSize << ROTATE_SIZE_SHIFT);
9268	return true;
9269	}
9270
9271	bool
9272	AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
9273
9274	SMLoc OffsetLoc = getLoc();
9275
9276	if (!parseExpr(Imm, Expected: "a swizzle macro")) {
9277	return false;
9278	}
9279	if (!isUInt<`16`>(x: Imm)) {
9280	Error(L: OffsetLoc, Msg: "expected a 16-bit offset");
9281	return false;
9282	}
9283	return true;
9284	}
9285
9286	bool
9287	AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
9288	using namespace llvm::AMDGPU::Swizzle;
9289
9290	if (skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parentheses")) {
9291
9292	SMLoc ModeLoc = getLoc();
9293	bool Ok = false;
9294
9295	if (trySkipId(Id: IdSymbolic[ID_QUAD_PERM])) {
9296	Ok = parseSwizzleQuadPerm(Imm);
9297	} else if (trySkipId(Id: IdSymbolic[ID_BITMASK_PERM])) {
9298	Ok = parseSwizzleBitmaskPerm(Imm);
9299	} else if (trySkipId(Id: IdSymbolic[ID_BROADCAST])) {
9300	Ok = parseSwizzleBroadcast(Imm);
9301	} else if (trySkipId(Id: IdSymbolic[ID_SWAP])) {
9302	Ok = parseSwizzleSwap(Imm);
9303	} else if (trySkipId(Id: IdSymbolic[ID_REVERSE])) {
9304	Ok = parseSwizzleReverse(Imm);
9305	} else if (trySkipId(Id: IdSymbolic[ID_FFT])) {
9306	Ok = parseSwizzleFFT(Imm);
9307	} else if (trySkipId(Id: IdSymbolic[ID_ROTATE])) {
9308	Ok = parseSwizzleRotate(Imm);
9309	} else {
9310	Error(L: ModeLoc, Msg: "expected a swizzle mode");
9311	}
9312
9313	return Ok && skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parentheses");
9314	}
9315
9316	return false;
9317	}
9318
9319	ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
9320	SMLoc S = getLoc();
9321	int64_t Imm = `0`;
9322
9323	if (trySkipId(Id: "offset")) {
9324
9325	bool Ok = false;
9326	if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon")) {
9327	if (trySkipId(Id: "swizzle")) {
9328	Ok = parseSwizzleMacro(Imm);
9329	} else {
9330	Ok = parseSwizzleOffset(Imm);
9331	}
9332	}
9333
9334	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTySwizzle));
9335
9336	return Ok ? ParseStatus::Success : ParseStatus::Failure;
9337	}
9338	return ParseStatus::NoMatch;
9339	}
9340
9341	bool
9342	AMDGPUOperand::isSwizzle() const {
9343	return isImmTy(ImmT: ImmTySwizzle);
9344	}
9345
9346	//===----------------------------------------------------------------------===//
9347	// VGPR Index Mode
9348	//===----------------------------------------------------------------------===//
9349
9350	int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
9351
9352	using namespace llvm::AMDGPU::VGPRIndexMode;
9353
9354	if (trySkipToken(Kind: AsmToken::RParen)) {
9355	return OFF;
9356	}
9357
9358	int64_t Imm = `0`;
9359
9360	while (true) {
9361	unsigned Mode = `0`;
9362	SMLoc S = getLoc();
9363
9364	for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
9365	if (trySkipId(Id: IdSymbolic[ModeId])) {
9366	Mode = `1` << ModeId;
9367	break;
9368	}
9369	}
9370
9371	if (Mode == `0`) {
9372	Error(L: S, Msg: (Imm == `0`)?
9373	"expected a VGPR index mode or a closing parenthesis" :
9374	"expected a VGPR index mode");
9375	return UNDEF;
9376	}
9377
9378	if (Imm & Mode) {
9379	Error(L: S, Msg: "duplicate VGPR index mode");
9380	return UNDEF;
9381	}
9382	Imm \|= Mode;
9383
9384	if (trySkipToken(Kind: AsmToken::RParen))
9385	break;
9386	if (!skipToken(Kind: AsmToken::Comma,
9387	ErrMsg: "expected a comma or a closing parenthesis"))
9388	return UNDEF;
9389	}
9390
9391	return Imm;
9392	}
9393
9394	ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
9395
9396	using namespace llvm::AMDGPU::VGPRIndexMode;
9397
9398	int64_t Imm = `0`;
9399	SMLoc S = getLoc();
9400
9401	if (trySkipId(Id: "gpr_idx", Kind: AsmToken::LParen)) {
9402	Imm = parseGPRIdxMacro();
9403	if (Imm == UNDEF)
9404	return ParseStatus::Failure;
9405	} else {
9406	if (getParser().parseAbsoluteExpression(Res&: Imm))
9407	return ParseStatus::Failure;
9408	if (Imm < `0` \|\| !isUInt<`4`>(x: Imm))
9409	return Error(L: S, Msg: "invalid immediate: only 4-bit values are legal");
9410	}
9411
9412	Operands.push_back(
9413	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyGprIdxMode));
9414	return ParseStatus::Success;
9415	}
9416
9417	bool AMDGPUOperand::isGPRIdxMode() const {
9418	return isImmTy(ImmT: ImmTyGprIdxMode);
9419	}
9420
9421	//===----------------------------------------------------------------------===//
9422	// sopp branch targets
9423	//===----------------------------------------------------------------------===//
9424
9425	ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9426
9427	// Make sure we are not parsing something
9428	// that looks like a label or an expression but is not.
9429	// This will improve error messages.
9430	if (isRegister() \|\| isModifier())
9431	return ParseStatus::NoMatch;
9432
9433	if (!parseExpr(Operands))
9434	return ParseStatus::Failure;
9435
9436	AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands [Operands.size() - `1`]);
9437	assert(Opr.isImm() \|\| Opr.isExpr());
9438	SMLoc Loc = Opr.getStartLoc();
9439
9440	// Currently we do not support arbitrary expressions as branch targets.
9441	// Only labels and absolute expressions are accepted.
9442	if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9443	Error(L: Loc, Msg: "expected an absolute expression or a label");
9444	} else if (Opr.isImm() && !Opr.isS16Imm()) {
9445	Error(L: Loc, Msg: "expected a 16-bit signed jump offset");
9446	}
9447
9448	return ParseStatus::Success;
9449	}
9450
9451	//===----------------------------------------------------------------------===//
9452	// Boolean holding registers
9453	//===----------------------------------------------------------------------===//
9454
9455	ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9456	return parseReg(Operands);
9457	}
9458
9459	//===----------------------------------------------------------------------===//
9460	// mubuf
9461	//===----------------------------------------------------------------------===//
9462
9463	void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9464	const OperandVector &Operands,
9465	bool IsAtomic) {
9466	OptionalImmIndexMap OptionalIdx;
9467	unsigned FirstOperandIdx = `1`;
9468	bool IsAtomicReturn = false;
9469
9470	if (IsAtomic) {
9471	IsAtomicReturn = MII.get(Opcode: Inst.getOpcode()).TSFlags &
9472	SIInstrFlags::IsAtomicRet;
9473	}
9474
9475	for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9476	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
9477
9478	// Add the register arguments
9479	if (Op.isReg()) {
9480	Op.addRegOperands(Inst, N: `1`);
9481	// Insert a tied src for atomic return dst.
9482	// This cannot be postponed as subsequent calls to
9483	// addImmOperands rely on correct number of MC operands.
9484	if (IsAtomicReturn && i == FirstOperandIdx)
9485	Op.addRegOperands(Inst, N: `1`);
9486	continue;
9487	}
9488
9489	// Handle the case where soffset is an immediate
9490	if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9491	Op.addImmOperands(Inst, N: `1`);
9492	continue;
9493	}
9494
9495	// Handle tokens like 'offen' which are sometimes hard-coded into the
9496	// asm string. There are no MCInst operands for these.
9497	if (Op.isToken()) {
9498	continue;
9499	}
9500	assert(Op.isImm());
9501
9502	// Handle optional arguments
9503	OptionalIdx [Op.getImmTy()] = i;
9504	}
9505
9506	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOffset);
9507	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyCPol, Default: `0`);
9508	// Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9509	// agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9510	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
9511	}
9512
9513	//===----------------------------------------------------------------------===//
9514	// smrd
9515	//===----------------------------------------------------------------------===//
9516
9517	bool AMDGPUOperand::isSMRDOffset8() const {
9518	return isImmLiteral() && isUInt<`8`>(x: getImm());
9519	}
9520
9521	bool AMDGPUOperand::isSMEMOffset() const {
9522	// Offset range is checked later by validator.
9523	return isImmLiteral();
9524	}
9525
9526	bool AMDGPUOperand::isSMRDLiteralOffset() const {
9527	// 32-bit literals are only supported on CI and we only want to use them
9528	// when the offset is > 8-bits.
9529	return isImmLiteral() && !isUInt<`8`>(x: getImm()) && isUInt<`32`>(x: getImm());
9530	}
9531
9532	//===----------------------------------------------------------------------===//
9533	// vop3
9534	//===----------------------------------------------------------------------===//
9535
9536	static bool ConvertOmodMul(int64_t &Mul) {
9537	if (Mul != `1` && Mul != `2` && Mul != `4`)
9538	return false;
9539
9540	Mul >>= `1`;
9541	return true;
9542	}
9543
9544	static bool ConvertOmodDiv(int64_t &Div) {
9545	if (Div == `1`) {
9546	Div = `0`;
9547	return true;
9548	}
9549
9550	if (Div == `2`) {
9551	Div = `3`;
9552	return true;
9553	}
9554
9555	return false;
9556	}
9557
9558	// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9559	// This is intentional and ensures compatibility with sp3.
9560	// See bug 35397 for details.
9561	bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9562	if (BoundCtrl == `0` \|\| BoundCtrl == `1`) {
9563	if (!isGFX11Plus())
9564	BoundCtrl = `1`;
9565	return true;
9566	}
9567	return false;
9568	}
9569
9570	void AMDGPUAsmParser::onBeginOfFile() {
9571	if (!getParser().getStreamer().getTargetStreamer())
9572	return;
9573
9574	if (!getTargetStreamer().getTargetID())
9575	getTargetStreamer().initializeTargetID(STI: getSTI(),
9576	/ApplyFeatureString=/true);
9577	}
9578
9579	void AMDGPUAsmParser::emitTargetDirective() {
9580	if (TargetDirectiveEmitted)
9581	return;
9582	TargetDirectiveEmitted = true;
9583
9584	if (!getParser().getStreamer().getTargetStreamer() \|\|
9585	getSTI().getTargetTriple().getArch() == Triple::r600)
9586	return;
9587
9588	if (isHsaAbi(STI: getSTI()))
9589	getTargetStreamer().EmitDirectiveAMDGCNTarget();
9590	}
9591
9592	/// Parse AMDGPU specific expressions.
9593	///
9594	/// expr ::= or(expr, ...) \|
9595	/// max(expr, ...) \|
9596	/// min(expr, ...)
9597	///
9598	bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9599	using AGVK = AMDGPUMCExpr::VariantKind;
9600
9601	if (isToken(Kind: AsmToken::Identifier)) {
9602	StringRef TokenId = getTokenStr();
9603	AGVK VK = StringSwitch<AGVK>(TokenId)
9604	.Case(S: "max", Value: AGVK::AGVK_Max)
9605	.Case(S: "min", Value: AGVK::AGVK_Min)
9606	.Case(S: "or", Value: AGVK::AGVK_Or)
9607	.Case(S: "extrasgprs", Value: AGVK::AGVK_ExtraSGPRs)
9608	.Case(S: "totalnumvgprs", Value: AGVK::AGVK_TotalNumVGPRs)
9609	.Case(S: "alignto", Value: AGVK::AGVK_AlignTo)
9610	.Case(S: "occupancy", Value: AGVK::AGVK_Occupancy)
9611	.Case(S: "instprefsize", Value: AGVK::AGVK_InstPrefSize)
9612	.Default(Value: AGVK::AGVK_None);
9613
9614	if (VK != AGVK::AGVK_None && peekToken().is(K: AsmToken::LParen)) {
9615	SmallVector<const MCExpr *, `4`> Exprs;
9616	uint64_t CommaCount = `0`;
9617	lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9618	lex(); // Eat '('
9619	while (true) {
9620	if (trySkipToken(Kind: AsmToken::RParen)) {
9621	if (Exprs.empty()) {
9622	Error(L: getToken().getLoc(),
9623	Msg: "empty " + Twine (TokenId) + " expression");
9624	return true;
9625	}
9626	if (CommaCount + `1` != Exprs.size()) {
9627	Error(L: getToken().getLoc(),
9628	Msg: "mismatch of commas in " + Twine (TokenId) + " expression");
9629	return true;
9630	}
9631	if (unsigned Expected = AMDGPUMCExpr::getNumExpectedArgs(Kind: VK);
9632	Expected && Exprs.size() != Expected) {
9633	Error(L: getToken().getLoc(), Msg: Twine (TokenId) + " expression expects " +
9634	Twine (Expected) + " operands");
9635	return true;
9636	}
9637	Res = AMDGPUMCExpr::create(Kind: VK, Args: Exprs, Ctx&: getContext());
9638	return false;
9639	}
9640	const MCExpr *Expr;
9641	if (getParser().parseExpression(Res&: Expr, EndLoc))
9642	return true;
9643	Exprs.push_back(Elt: Expr);
9644	bool LastTokenWasComma = trySkipToken(Kind: AsmToken::Comma);
9645	if (LastTokenWasComma)
9646	CommaCount++;
9647	if (!LastTokenWasComma && !isToken(Kind: AsmToken::RParen)) {
9648	Error(L: getToken().getLoc(),
9649	Msg: "unexpected token in " + Twine (TokenId) + " expression");
9650	return true;
9651	}
9652	}
9653	}
9654	}
9655	return getParser().parsePrimaryExpr(Res, EndLoc, TypeInfo: nullptr);
9656	}
9657
9658	ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9659	StringRef Name = getTokenStr();
9660	if (Name == "mul") {
9661	return parseIntWithPrefix(Prefix: "mul", Operands,
9662	ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodMul);
9663	}
9664
9665	if (Name == "div") {
9666	return parseIntWithPrefix(Prefix: "div", Operands,
9667	ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodDiv);
9668	}
9669
9670	return ParseStatus::NoMatch;
9671	}
9672
9673	// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9674	// the number of src operands present, then copies that bit into src0_modifiers.
9675	static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9676	int Opc = Inst.getOpcode();
9677	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9678	if (OpSelIdx == -`1`)
9679	return;
9680
9681	int SrcNum;
9682	const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9683	AMDGPU::OpName::src2};
9684	for (SrcNum = `0`; SrcNum < `3` && AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: Ops[SrcNum]);
9685	++SrcNum)
9686	;
9687	assert(SrcNum > `0`);
9688
9689	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9690
9691	int DstIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst);
9692	if (DstIdx == -`1`)
9693	return;
9694
9695	const MCOperand &DstOp = Inst.getOperand(i: DstIdx);
9696	int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0_modifiers);
9697	uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm();
9698	if (DstOp.isReg() &&
9699	MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: DstOp.getReg())) {
9700	if (AMDGPU::isHi16Reg(Reg: DstOp.getReg(), MRI))
9701	ModVal \|= SISrcMods::DST_OP_SEL;
9702	} else {
9703	if ((OpSel & (`1` << SrcNum)) != `0`)
9704	ModVal \|= SISrcMods::DST_OP_SEL;
9705	}
9706	Inst.getOperand(i: ModIdx).setImm(ModVal);
9707	}
9708
9709	void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9710	const OperandVector &Operands) {
9711	cvtVOP3P(Inst, Operands);
9712	cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI());
9713	}
9714
9715	void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9716	OptionalImmIndexMap &OptionalIdx) {
9717	cvtVOP3P(Inst, Operands, OptionalIdx);
9718	cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI());
9719	}
9720
9721	static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9722	return
9723	// 1. This operand is input modifiers
9724	Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9725	// 2. This is not last operand
9726	&& Desc.NumOperands > (OpNum + `1`)
9727	// 3. Next operand is register class
9728	&& Desc.operands()[OpNum + `1`].RegClass != -`1`
9729	// 4. Next register is not tied to any other operand
9730	&& Desc.getOperandConstraint(OpNum: OpNum + `1`,
9731	Constraint: MCOI::OperandConstraint::TIED_TO) == -`1`;
9732	}
9733
9734	void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9735	unsigned Opc = Inst.getOpcode();
9736	constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9737	AMDGPU::OpName::src2};
9738	constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9739	AMDGPU::OpName::src1_modifiers,
9740	AMDGPU::OpName::src2_modifiers};
9741	for (int J = `0`; J < `3`; ++J) {
9742	int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: Ops[J]);
9743	if (OpIdx == -`1`)
9744	// Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9745	// no src1. So continue instead of break.
9746	continue;
9747
9748	int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
9749	uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm();
9750
9751	if ((OpSel & (`1` << J)) != `0`)
9752	ModVal \|= SISrcMods::OP_SEL_0;
9753	// op_sel[3] is encoded in src0_modifiers.
9754	if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (`1` << `3`)) != `0`)
9755	ModVal \|= SISrcMods::DST_OP_SEL;
9756
9757	Inst.getOperand(i: ModIdx).setImm(ModVal);
9758	}
9759	}
9760
9761	void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9762	{
9763	OptionalImmIndexMap OptionalIdx;
9764	unsigned Opc = Inst.getOpcode();
9765
9766	unsigned I = `1`;
9767	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9768	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
9769	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
9770	}
9771
9772	for (unsigned E = Operands.size(); I != E; ++I) {
9773	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
9774	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9775	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
9776	} else if (Op.isInterpSlot() \|\| Op.isInterpAttr() \|\|
9777	Op.isInterpAttrChan()) {
9778	Inst.addOperand(Op: MCOperand::createImm(Val: Op.getImm()));
9779	} else if (Op.isImmModifier()) {
9780	OptionalIdx [Op.getImmTy()] = I;
9781	} else {
9782	llvm_unreachable("unhandled operand type");
9783	}
9784	}
9785
9786	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::high))
9787	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9788	ImmT: AMDGPUOperand::ImmTyHigh);
9789
9790	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9791	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9792	ImmT: AMDGPUOperand::ImmTyClamp);
9793
9794	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
9795	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9796	ImmT: AMDGPUOperand::ImmTyOModSI);
9797
9798	// Some v_interp instructions use op_sel[3] for dst.
9799	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
9800	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9801	ImmT: AMDGPUOperand::ImmTyOpSel);
9802	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9803	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9804
9805	cvtOpSelHelper(Inst, OpSel);
9806	}
9807	}
9808
9809	void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9810	{
9811	OptionalImmIndexMap OptionalIdx;
9812	unsigned Opc = Inst.getOpcode();
9813
9814	unsigned I = `1`;
9815	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9816	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
9817	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
9818	}
9819
9820	for (unsigned E = Operands.size(); I != E; ++I) {
9821	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
9822	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9823	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
9824	} else if (Op.isImmModifier()) {
9825	OptionalIdx [Op.getImmTy()] = I;
9826	} else {
9827	llvm_unreachable("unhandled operand type");
9828	}
9829	}
9830
9831	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyClamp);
9832
9833	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
9834	if (OpSelIdx != -`1`)
9835	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
9836
9837	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyWaitEXP);
9838
9839	if (OpSelIdx == -`1`)
9840	return;
9841
9842	unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm();
9843	cvtOpSelHelper(Inst, OpSel);
9844	}
9845
9846	void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9847	const OperandVector &Operands) {
9848	OptionalImmIndexMap OptionalIdx;
9849	unsigned Opc = Inst.getOpcode();
9850	unsigned I = `1`;
9851	int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::cbsz);
9852
9853	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
9854
9855	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J)
9856	static_cast<AMDGPUOperand &>(*Operands [I++]).addRegOperands(Inst, N: `1`);
9857
9858	for (unsigned E = Operands.size(); I != E; ++I) {
9859	AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands [I]);
9860	int NumOperands = Inst.getNumOperands();
9861	// The order of operands in MCInst and parsed operands are different.
9862	// Adding dummy cbsz and blgp operands at corresponding MCInst operand
9863	// indices for parsing scale values correctly.
9864	if (NumOperands == CbszOpIdx) {
9865	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
9866	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
9867	}
9868	if (isRegOrImmWithInputMods(Desc, OpNum: NumOperands)) {
9869	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
9870	} else if (Op.isImmModifier()) {
9871	OptionalIdx [Op.getImmTy()] = I;
9872	} else {
9873	Op.addRegOrImmOperands(Inst, N: `1`);
9874	}
9875	}
9876
9877	// Insert CBSZ and BLGP operands for F8F6F4 variants
9878	auto CbszIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyCBSZ);
9879	if (CbszIdx != OptionalIdx.end()) {
9880	int CbszVal = ((AMDGPUOperand &)*Operands [CbszIdx ->second]).getImm();
9881	Inst.getOperand(i: CbszOpIdx).setImm(CbszVal);
9882	}
9883
9884	int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp);
9885	auto BlgpIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyBLGP);
9886	if (BlgpIdx != OptionalIdx.end()) {
9887	int BlgpVal = ((AMDGPUOperand &)*Operands [BlgpIdx ->second]).getImm();
9888	Inst.getOperand(i: BlgpOpIdx).setImm(BlgpVal);
9889	}
9890
9891	// Add dummy src_modifiers
9892	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
9893	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
9894
9895	// Handle op_sel fields
9896
9897	unsigned OpSel = `0`;
9898	auto OpselIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyOpSel);
9899	if (OpselIdx != OptionalIdx.end()) {
9900	OpSel = static_cast<const AMDGPUOperand &>(*Operands [OpselIdx ->second])
9901	.getImm();
9902	}
9903
9904	unsigned OpSelHi = `0`;
9905	auto OpselHiIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyOpSelHi);
9906	if (OpselHiIdx != OptionalIdx.end()) {
9907	OpSelHi = static_cast<const AMDGPUOperand &>(*Operands [OpselHiIdx ->second])
9908	.getImm();
9909	}
9910	const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9911	AMDGPU::OpName::src1_modifiers};
9912
9913	for (unsigned J = `0`; J < `2`; ++J) {
9914	unsigned ModVal = `0`;
9915	if (OpSel & (`1` << J))
9916	ModVal \|= SISrcMods::OP_SEL_0;
9917	if (OpSelHi & (`1` << J))
9918	ModVal \|= SISrcMods::OP_SEL_1;
9919
9920	const int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
9921	Inst.getOperand(i: ModIdx).setImm(ModVal);
9922	}
9923	}
9924
9925	void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9926	OptionalImmIndexMap &OptionalIdx) {
9927	unsigned Opc = Inst.getOpcode();
9928
9929	unsigned I = `1`;
9930	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
9931	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
9932	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
9933	}
9934
9935	for (unsigned E = Operands.size(); I != E; ++I) {
9936	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
9937	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
9938	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
9939	} else if (Op.isImmModifier()) {
9940	OptionalIdx [Op.getImmTy()] = I;
9941	} else {
9942	Op.addRegOrImmOperands(Inst, N: `1`);
9943	}
9944	}
9945
9946	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::scale_sel))
9947	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9948	ImmT: AMDGPUOperand::ImmTyScaleSel);
9949
9950	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
9951	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9952	ImmT: AMDGPUOperand::ImmTyClamp);
9953
9954	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) {
9955	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in))
9956	Inst.addOperand(Op: Inst.getOperand(i: `0`));
9957	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9958	ImmT: AMDGPUOperand::ImmTyByteSel);
9959	}
9960
9961	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
9962	addOptionalImmOperand(Inst, Operands, OptionalIdx,
9963	ImmT: AMDGPUOperand::ImmTyOModSI);
9964
9965	// Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9966	// it has src2 register operand that is tied to dst operand
9967	// we don't allow modifiers for this operand in assembler so src2_modifiers
9968	// should be 0.
9969	if (isMAC(Opc)) {
9970	auto *it = Inst.begin();
9971	std::advance(i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2_modifiers));
9972	it = Inst.insert(I: it, Op: MCOperand::createImm(Val: `0`)); // no modifiers for src2
9973	++it;
9974	// Copy the operand to ensure it's not invalidated when Inst grows.
9975	Inst.insert(I: it, Op: MCOperand (Inst.getOperand(i: `0`))); // src2 = dst
9976	}
9977	}
9978
9979	void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9980	OptionalImmIndexMap OptionalIdx;
9981	cvtVOP3(Inst, Operands, OptionalIdx);
9982	}
9983
9984	void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9985	OptionalImmIndexMap &OptIdx) {
9986	const int Opc = Inst.getOpcode();
9987	const MCInstrDesc &Desc = MII.get(Opcode: Opc);
9988
9989	const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != `0`;
9990
9991	if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi \|\|
9992	Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi \|\|
9993	Opc == AMDGPU::V_CVT_SR_BF8_F32_vi \|\|
9994	Opc == AMDGPU::V_CVT_SR_FP8_F32_vi \|\|
9995	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 \|\|
9996	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 \|\|
9997	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 \|\|
9998	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12 \|\|
9999	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx13 \|\|
10000	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx13) {
10001	Inst.addOperand(Op: MCOperand::createImm(Val: `0`)); // Placeholder for src2_mods
10002	Inst.addOperand(Op: Inst.getOperand(i: `0`));
10003	}
10004
10005	// Append vdst_in only if a previous converter (cvtVOP3DPP for DPP variants,
10006	// cvtVOP3 for byte_sel variants) hasn't already placed it. Use the position
10007	// of the named operand to detect that, the same way cvtVOP3DPP does
10008	// internally.
10009	int VdstInIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst_in);
10010	if (VdstInIdx != -`1` && VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10011	Inst.addOperand(Op: Inst.getOperand(i: `0`));
10012
10013	int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::bitop3);
10014	if (BitOp3Idx != -`1`) {
10015	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyBitOp3);
10016	}
10017
10018	// FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
10019	// instruction, and then figure out where to actually put the modifiers
10020
10021	int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel);
10022	if (OpSelIdx != -`1`) {
10023	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
10024	}
10025
10026	int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi);
10027	if (OpSelHiIdx != -`1`) {
10028	int DefaultVal = IsPacked ? -`1` : `0`;
10029	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSelHi,
10030	Default: DefaultVal);
10031	}
10032
10033	int MatrixAFMTIdx =
10034	AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_a_fmt);
10035	if (MatrixAFMTIdx != -`1`) {
10036	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
10037	ImmT: AMDGPUOperand::ImmTyMatrixAFMT, Default: `0`);
10038	}
10039
10040	int MatrixBFMTIdx =
10041	AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_b_fmt);
10042	if (MatrixBFMTIdx != -`1`) {
10043	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
10044	ImmT: AMDGPUOperand::ImmTyMatrixBFMT, Default: `0`);
10045	}
10046
10047	int MatrixAScaleIdx =
10048	AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_a_scale);
10049	if (MatrixAScaleIdx != -`1`) {
10050	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
10051	ImmT: AMDGPUOperand::ImmTyMatrixAScale, Default: `0`);
10052	}
10053
10054	int MatrixBScaleIdx =
10055	AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_b_scale);
10056	if (MatrixBScaleIdx != -`1`) {
10057	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
10058	ImmT: AMDGPUOperand::ImmTyMatrixBScale, Default: `0`);
10059	}
10060
10061	int MatrixAScaleFmtIdx =
10062	AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_a_scale_fmt);
10063	if (MatrixAScaleFmtIdx != -`1`) {
10064	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
10065	ImmT: AMDGPUOperand::ImmTyMatrixAScaleFmt, Default: `0`);
10066	}
10067
10068	int MatrixBScaleFmtIdx =
10069	AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::matrix_b_scale_fmt);
10070	if (MatrixBScaleFmtIdx != -`1`) {
10071	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
10072	ImmT: AMDGPUOperand::ImmTyMatrixBScaleFmt, Default: `0`);
10073	}
10074
10075	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::matrix_a_reuse))
10076	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
10077	ImmT: AMDGPUOperand::ImmTyMatrixAReuse, Default: `0`);
10078
10079	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::matrix_b_reuse))
10080	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
10081	ImmT: AMDGPUOperand::ImmTyMatrixBReuse, Default: `0`);
10082
10083	int NegLoIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::neg_lo);
10084	if (NegLoIdx != -`1`)
10085	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegLo);
10086
10087	int NegHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::neg_hi);
10088	if (NegHiIdx != -`1`)
10089	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegHi);
10090
10091	const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
10092	AMDGPU::OpName::src2};
10093	const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
10094	AMDGPU::OpName::src1_modifiers,
10095	AMDGPU::OpName::src2_modifiers};
10096
10097	unsigned OpSel = `0`;
10098	unsigned OpSelHi = `0`;
10099	unsigned NegLo = `0`;
10100	unsigned NegHi = `0`;
10101
10102	if (OpSelIdx != -`1`)
10103	OpSel = Inst.getOperand(i: OpSelIdx).getImm();
10104
10105	if (OpSelHiIdx != -`1`)
10106	OpSelHi = Inst.getOperand(i: OpSelHiIdx).getImm();
10107
10108	if (NegLoIdx != -`1`)
10109	NegLo = Inst.getOperand(i: NegLoIdx).getImm();
10110
10111	if (NegHiIdx != -`1`)
10112	NegHi = Inst.getOperand(i: NegHiIdx).getImm();
10113
10114	for (int J = `0`; J < `3`; ++J) {
10115	int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: Ops[J]);
10116	if (OpIdx == -`1`)
10117	break;
10118
10119	int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
10120
10121	if (ModIdx == -`1`)
10122	continue;
10123
10124	// For MAC instructions, src2 is tied to vdst and its op_sel bit
10125	// is not encoded.
10126	if (AMDGPU::isMAC(Opc) && ModOps[J] == AMDGPU::OpName::src2_modifiers)
10127	continue;
10128
10129	uint32_t ModVal = `0`;
10130
10131	const MCOperand &SrcOp = Inst.getOperand(i: OpIdx);
10132	if (SrcOp.isReg() && getMRI()
10133	->getRegClass(i: AMDGPU::VGPR_16RegClassID)
10134	.contains(Reg: SrcOp.getReg())) {
10135	bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Reg: SrcOp.getReg(), MRI: *getMRI());
10136	if (VGPRSuffixIsHi)
10137	ModVal \|= SISrcMods::OP_SEL_0;
10138	} else {
10139	if ((OpSel & (`1` << J)) != `0`)
10140	ModVal \|= SISrcMods::OP_SEL_0;
10141	}
10142
10143	if ((OpSelHi & (`1` << J)) != `0`)
10144	ModVal \|= SISrcMods::OP_SEL_1;
10145
10146	if ((NegLo & (`1` << J)) != `0`)
10147	ModVal \|= SISrcMods::NEG;
10148
10149	if ((NegHi & (`1` << J)) != `0`)
10150	ModVal \|= SISrcMods::NEG_HI;
10151
10152	Inst.getOperand(i: ModIdx).setImm(Inst.getOperand(i: ModIdx).getImm() \| ModVal);
10153	}
10154	}
10155
10156	void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
10157	OptionalImmIndexMap OptIdx;
10158	cvtVOP3(Inst, Operands, OptionalIdx&: OptIdx);
10159	cvtVOP3P(Inst, Operands, OptIdx);
10160	}
10161
10162	static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
10163	unsigned i, unsigned Opc,
10164	AMDGPU::OpName OpName) {
10165	if (AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName) != -`1`)
10166	((AMDGPUOperand &)*Operands [i]).addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
10167	else
10168	((AMDGPUOperand &)*Operands [i]).addRegOperands(Inst, N: `1`);
10169	}
10170
10171	void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
10172	unsigned Opc = Inst.getOpcode();
10173
10174	((AMDGPUOperand &)*Operands [`1`]).addRegOperands(Inst, N: `1`);
10175	addSrcModifiersAndSrc(Inst, Operands, i: `2`, Opc, OpName: AMDGPU::OpName::src0_modifiers);
10176	addSrcModifiersAndSrc(Inst, Operands, i: `3`, Opc, OpName: AMDGPU::OpName::src1_modifiers);
10177	((AMDGPUOperand &)Operands [`1`]).addRegOperands(Inst, N: `1`); // srcTiedDef*
10178	((AMDGPUOperand &)Operands [`4`]).addRegOperands(Inst, N: `1`); // src2*
10179
10180	OptionalImmIndexMap OptIdx;
10181	for (unsigned i = `5`; i < Operands.size(); ++i) {
10182	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [i]);
10183	OptIdx [Op.getImmTy()] = i;
10184	}
10185
10186	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_8bit))
10187	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
10188	ImmT: AMDGPUOperand::ImmTyIndexKey8bit);
10189
10190	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_16bit))
10191	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
10192	ImmT: AMDGPUOperand::ImmTyIndexKey16bit);
10193
10194	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_32bit))
10195	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx,
10196	ImmT: AMDGPUOperand::ImmTyIndexKey32bit);
10197
10198	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
10199	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyClamp);
10200
10201	cvtVOP3P(Inst, Operands, OptIdx);
10202	}
10203
10204	//===----------------------------------------------------------------------===//
10205	// VOPD
10206	//===----------------------------------------------------------------------===//
10207
10208	ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
10209	if (!hasVOPD(STI: getSTI()))
10210	return ParseStatus::NoMatch;
10211
10212	if (isToken(Kind: AsmToken::Colon) && peekToken(ShouldSkipSpace: false).is(K: AsmToken::Colon)) {
10213	SMLoc S = getLoc();
10214	lex();
10215	lex();
10216	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "::", Loc: S));
10217	SMLoc OpYLoc = getLoc();
10218	StringRef OpYName;
10219	if (isToken(Kind: AsmToken::Identifier) && !Parser.parseIdentifier(Res&: OpYName)) {
10220	Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: OpYName, Loc: OpYLoc));
10221	return ParseStatus::Success;
10222	}
10223	return Error(L: OpYLoc, Msg: "expected a VOPDY instruction after ::");
10224	}
10225	return ParseStatus::NoMatch;
10226	}
10227
10228	// Create VOPD MCInst operands using parsed assembler operands.
10229	void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
10230	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
10231
10232	auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
10233	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [ParsedOprIdx]);
10234	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10235	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
10236	return;
10237	}
10238	if (Op.isReg()) {
10239	Op.addRegOperands(Inst, N: `1`);
10240	return;
10241	}
10242	if (Op.isImm()) {
10243	Op.addImmOperands(Inst, N: `1`);
10244	return;
10245	}
10246	llvm_unreachable("Unhandled operand type in cvtVOPD");
10247	};
10248
10249	const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Inst.getOpcode(), InstrInfo: &MII);
10250
10251	// MCInst operands are ordered as follows:
10252	// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
10253
10254	for (auto CompIdx : VOPD::COMPONENTS) {
10255	addOp (InstInfo [CompIdx].getIndexOfDstInParsedOperands());
10256	}
10257
10258	for (auto CompIdx : VOPD::COMPONENTS) {
10259	const auto &CInfo = InstInfo [CompIdx];
10260	auto CompSrcOperandsNum = InstInfo [CompIdx].getCompParsedSrcOperandsNum();
10261	for (unsigned CompSrcIdx = `0`; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
10262	addOp (CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
10263	if (CInfo.hasSrc2Acc())
10264	addOp (CInfo.getIndexOfDstInParsedOperands());
10265	}
10266
10267	int BitOp3Idx =
10268	AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::bitop3);
10269	if (BitOp3Idx != -`1`) {
10270	OptionalImmIndexMap OptIdx;
10271	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
10272	if (Op.isImm())
10273	OptIdx [Op.getImmTy()] = Operands.size() - `1`;
10274
10275	addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyBitOp3);
10276	}
10277	}
10278
10279	//===----------------------------------------------------------------------===//
10280	// dpp
10281	//===----------------------------------------------------------------------===//
10282
10283	bool AMDGPUOperand::isDPP8() const {
10284	return isImmTy(ImmT: ImmTyDPP8);
10285	}
10286
10287	bool AMDGPUOperand::isDPPCtrl() const {
10288	using namespace AMDGPU::DPP;
10289
10290	bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<`9`>(x: getImm());
10291	if (result) {
10292	int64_t Imm = getImm();
10293	return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) \|\|
10294	(Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) \|\|
10295	(Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) \|\|
10296	(Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) \|\|
10297	(Imm == DppCtrl::WAVE_SHL1) \|\|
10298	(Imm == DppCtrl::WAVE_ROL1) \|\|
10299	(Imm == DppCtrl::WAVE_SHR1) \|\|
10300	(Imm == DppCtrl::WAVE_ROR1) \|\|
10301	(Imm == DppCtrl::ROW_MIRROR) \|\|
10302	(Imm == DppCtrl::ROW_HALF_MIRROR) \|\|
10303	(Imm == DppCtrl::BCAST15) \|\|
10304	(Imm == DppCtrl::BCAST31) \|\|
10305	(Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) \|\|
10306	(Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
10307	}
10308	return false;
10309	}
10310
10311	//===----------------------------------------------------------------------===//
10312	// mAI
10313	//===----------------------------------------------------------------------===//
10314
10315	bool AMDGPUOperand::isBLGP() const {
10316	return isImm() && getImmTy() == ImmTyBLGP && isUInt<`3`>(x: getImm());
10317	}
10318
10319	bool AMDGPUOperand::isS16Imm() const {
10320	return isImmLiteral() && (isInt<`16`>(x: getImm()) \|\| isUInt<`16`>(x: getImm()));
10321	}
10322
10323	bool AMDGPUOperand::isU16Imm() const {
10324	return isImmLiteral() && isUInt<`16`>(x: getImm());
10325	}
10326
10327	//===----------------------------------------------------------------------===//
10328	// dim
10329	//===----------------------------------------------------------------------===//
10330
10331	bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
10332	// We want to allow "dim:1D" etc.,
10333	// but the initial 1 is tokenized as an integer.
10334	std::string Token;
10335	if (isToken(Kind: AsmToken::Integer)) {
10336	SMLoc Loc = getToken().getEndLoc();
10337	Token = std::string (getTokenStr());
10338	lex();
10339	if (getLoc() != Loc)
10340	return false;
10341	}
10342
10343	StringRef Suffix;
10344	if (!parseId(Val&: Suffix))
10345	return false;
10346	Token += Suffix;
10347
10348	StringRef DimId = Token;
10349	DimId.consume_front(Prefix: "SQ_RSRC_IMG_");
10350
10351	const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(AsmSuffix: DimId);
10352	if (!DimInfo)
10353	return false;
10354
10355	Encoding = DimInfo->Encoding;
10356	return true;
10357	}
10358
10359	ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
10360	if (!isGFX10Plus())
10361	return ParseStatus::NoMatch;
10362
10363	SMLoc S = getLoc();
10364
10365	if (!trySkipId(Id: "dim", Kind: AsmToken::Colon))
10366	return ParseStatus::NoMatch;
10367
10368	unsigned Encoding;
10369	SMLoc Loc = getLoc();
10370	if (!parseDimId(Encoding))
10371	return Error(L: Loc, Msg: "invalid dim value");
10372
10373	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Encoding, Loc: S,
10374	Type: AMDGPUOperand::ImmTyDim));
10375	return ParseStatus::Success;
10376	}
10377
10378	//===----------------------------------------------------------------------===//
10379	// dpp
10380	//===----------------------------------------------------------------------===//
10381
10382	ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
10383	SMLoc S = getLoc();
10384
10385	if (!isGFX10Plus() \|\| !trySkipId(Id: "dpp8", Kind: AsmToken::Colon))
10386	return ParseStatus::NoMatch;
10387
10388	// dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
10389
10390	int64_t Sels[`8`];
10391
10392	if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket"))
10393	return ParseStatus::Failure;
10394
10395	for (size_t i = `0`; i < `8`; ++i) {
10396	if (i > `0` && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
10397	return ParseStatus::Failure;
10398
10399	SMLoc Loc = getLoc();
10400	if (getParser().parseAbsoluteExpression(Res&: Sels[i]))
10401	return ParseStatus::Failure;
10402	if (`0` > Sels[i] \|\| `7` < Sels[i])
10403	return Error(L: Loc, Msg: "expected a 3-bit value");
10404	}
10405
10406	if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
10407	return ParseStatus::Failure;
10408
10409	unsigned DPP8 = `0`;
10410	for (size_t i = `0`; i < `8`; ++i)
10411	DPP8 \|= (Sels[i] << (i * `3`));
10412
10413	Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DPP8, Loc: S, Type: AMDGPUOperand::ImmTyDPP8));
10414	return ParseStatus::Success;
10415	}
10416
10417	bool
10418	AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10419	const OperandVector &Operands) {
10420	if (Ctrl == "row_newbcast")
10421	return isGFX90A();
10422
10423	if (Ctrl == "row_share" \|\|
10424	Ctrl == "row_xmask")
10425	return isGFX10Plus();
10426
10427	if (Ctrl == "wave_shl" \|\|
10428	Ctrl == "wave_shr" \|\|
10429	Ctrl == "wave_rol" \|\|
10430	Ctrl == "wave_ror" \|\|
10431	Ctrl == "row_bcast")
10432	return isVI() \|\| isGFX9();
10433
10434	return Ctrl == "row_mirror" \|\|
10435	Ctrl == "row_half_mirror" \|\|
10436	Ctrl == "quad_perm" \|\|
10437	Ctrl == "row_shl" \|\|
10438	Ctrl == "row_shr" \|\|
10439	Ctrl == "row_ror";
10440	}
10441
10442	int64_t
10443	AMDGPUAsmParser::parseDPPCtrlPerm() {
10444	// quad_perm:[%d,%d,%d,%d]
10445
10446	if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket"))
10447	return -`1`;
10448
10449	int64_t Val = `0`;
10450	for (int i = `0`; i < `4`; ++i) {
10451	if (i > `0` && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma"))
10452	return -`1`;
10453
10454	int64_t Temp;
10455	SMLoc Loc = getLoc();
10456	if (getParser().parseAbsoluteExpression(Res&: Temp))
10457	return -`1`;
10458	if (Temp < `0` \|\| Temp > `3`) {
10459	Error(L: Loc, Msg: "expected a 2-bit value");
10460	return -`1`;
10461	}
10462
10463	Val += (Temp << i * `2`);
10464	}
10465
10466	if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket"))
10467	return -`1`;
10468
10469	return Val;
10470	}
10471
10472	int64_t
10473	AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10474	using namespace AMDGPU::DPP;
10475
10476	// sel:%d
10477
10478	int64_t Val;
10479	SMLoc Loc = getLoc();
10480
10481	if (getParser().parseAbsoluteExpression(Res&: Val))
10482	return -`1`;
10483
10484	struct DppCtrlCheck {
10485	int64_t Ctrl;
10486	int Lo;
10487	int Hi;
10488	};
10489
10490	DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10491	.Case(S: "wave_shl", Value: {.Ctrl: DppCtrl::WAVE_SHL1, .Lo: `1`, .Hi: `1`})
10492	.Case(S: "wave_rol", Value: {.Ctrl: DppCtrl::WAVE_ROL1, .Lo: `1`, .Hi: `1`})
10493	.Case(S: "wave_shr", Value: {.Ctrl: DppCtrl::WAVE_SHR1, .Lo: `1`, .Hi: `1`})
10494	.Case(S: "wave_ror", Value: {.Ctrl: DppCtrl::WAVE_ROR1, .Lo: `1`, .Hi: `1`})
10495	.Case(S: "row_shl", Value: {.Ctrl: DppCtrl::ROW_SHL0, .Lo: `1`, .Hi: `15`})
10496	.Case(S: "row_shr", Value: {.Ctrl: DppCtrl::ROW_SHR0, .Lo: `1`, .Hi: `15`})
10497	.Case(S: "row_ror", Value: {.Ctrl: DppCtrl::ROW_ROR0, .Lo: `1`, .Hi: `15`})
10498	.Case(S: "row_share", Value: {.Ctrl: DppCtrl::ROW_SHARE_FIRST, .Lo: `0`, .Hi: `15`})
10499	.Case(S: "row_xmask", Value: {.Ctrl: DppCtrl::ROW_XMASK_FIRST, .Lo: `0`, .Hi: `15`})
10500	.Case(S: "row_newbcast", Value: {.Ctrl: DppCtrl::ROW_NEWBCAST_FIRST, .Lo: `0`, .Hi: `15`})
10501	.Default(Value: {.Ctrl: -`1`, .Lo: `0`, .Hi: `0`});
10502
10503	bool Valid;
10504	if (Check.Ctrl == -`1`) {
10505	Valid = (Ctrl == "row_bcast" && (Val == `15` \|\| Val == `31`));
10506	Val = (Val == `15`)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10507	} else {
10508	Valid = Check.Lo <= Val && Val <= Check.Hi;
10509	Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl \| Val);
10510	}
10511
10512	if (!Valid) {
10513	Error(L: Loc, Msg: Twine ("invalid ", Ctrl) + Twine (" value"));
10514	return -`1`;
10515	}
10516
10517	return Val;
10518	}
10519
10520	ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10521	using namespace AMDGPU::DPP;
10522
10523	if (!isToken(Kind: AsmToken::Identifier) \|\|
10524	!isSupportedDPPCtrl(Ctrl: getTokenStr(), Operands))
10525	return ParseStatus::NoMatch;
10526
10527	SMLoc S = getLoc();
10528	int64_t Val = -`1`;
10529	StringRef Ctrl;
10530
10531	parseId(Val&: Ctrl);
10532
10533	if (Ctrl == "row_mirror") {
10534	Val = DppCtrl::ROW_MIRROR;
10535	} else if (Ctrl == "row_half_mirror") {
10536	Val = DppCtrl::ROW_HALF_MIRROR;
10537	} else {
10538	if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon")) {
10539	if (Ctrl == "quad_perm") {
10540	Val = parseDPPCtrlPerm();
10541	} else {
10542	Val = parseDPPCtrlSel(Ctrl);
10543	}
10544	}
10545	}
10546
10547	if (Val == -`1`)
10548	return ParseStatus::Failure;
10549
10550	Operands.push_back(
10551	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: AMDGPUOperand::ImmTyDppCtrl));
10552	return ParseStatus::Success;
10553	}
10554
10555	void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10556	bool IsDPP8) {
10557	OptionalImmIndexMap OptionalIdx;
10558	unsigned Opc = Inst.getOpcode();
10559	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
10560
10561	// MAC instructions are special because they have 'old'
10562	// operand which is not tied to dst (but assumed to be).
10563	// They also have dummy unused src2_modifiers.
10564	int OldIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::old);
10565	int Src2ModIdx =
10566	AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2_modifiers);
10567	bool IsMAC = OldIdx != -`1` && Src2ModIdx != -`1` &&
10568	Desc.getOperandConstraint(OpNum: OldIdx, Constraint: MCOI::TIED_TO) == -`1`;
10569
10570	unsigned I = `1`;
10571	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
10572	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
10573	}
10574
10575	int Fi = `0`;
10576	int VdstInIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst_in);
10577	bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 \|\|
10578	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx13 \|\|
10579	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 \|\|
10580	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx13 \|\|
10581	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 \|\|
10582	Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx13 \|\|
10583	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 \|\|
10584	Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx13;
10585
10586	for (unsigned E = Operands.size(); I != E; ++I) {
10587
10588	if (IsMAC) {
10589	int NumOperands = Inst.getNumOperands();
10590	if (OldIdx == NumOperands) {
10591	// Handle old operand
10592	constexpr int DST_IDX = `0`;
10593	Inst.addOperand(Op: Inst.getOperand(i: DST_IDX));
10594	} else if (Src2ModIdx == NumOperands) {
10595	// Add unused dummy src2_modifiers
10596	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
10597	}
10598	}
10599
10600	if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10601	Inst.addOperand(Op: Inst.getOperand(i: `0`));
10602	}
10603
10604	if (IsVOP3CvtSrDpp) {
10605	if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10606	Inst.addOperand(Op: MCOperand::createImm(Val: `0`));
10607	Inst.addOperand(Op: MCOperand::createReg(Reg: MCRegister ()));
10608	}
10609	}
10610
10611	auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(),
10612	Constraint: MCOI::TIED_TO);
10613	if (TiedTo != -`1`) {
10614	assert((unsigned)TiedTo < Inst.getNumOperands());
10615	// handle tied old or src2 for MAC instructions
10616	Inst.addOperand(Op: Inst.getOperand(i: TiedTo));
10617	}
10618	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
10619	// Add the register arguments
10620	if (IsDPP8 && Op.isDppFI()) {
10621	Fi = Op.getImm();
10622	} else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10623	Op.addRegOrImmWithFPInputModsOperands(Inst, N: `2`);
10624	} else if (Op.isReg()) {
10625	Op.addRegOperands(Inst, N: `1`);
10626	} else if (Op.isImm() &&
10627	Desc.operands()[Inst.getNumOperands()].RegClass != -`1`) {
10628	Op.addImmOperands(Inst, N: `1`);
10629	} else if (Op.isImm()) {
10630	OptionalIdx [Op.getImmTy()] = I;
10631	} else {
10632	llvm_unreachable("unhandled operand type");
10633	}
10634	}
10635
10636	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10637	addOptionalImmOperand(Inst, Operands, OptionalIdx,
10638	ImmT: AMDGPUOperand::ImmTyClamp);
10639
10640	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) {
10641	if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10642	Inst.addOperand(Op: Inst.getOperand(i: `0`));
10643	addOptionalImmOperand(Inst, Operands, OptionalIdx,
10644	ImmT: AMDGPUOperand::ImmTyByteSel);
10645	}
10646
10647	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
10648	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI);
10649
10650	if (Desc.TSFlags & SIInstrFlags::VOP3P)
10651	cvtVOP3P(Inst, Operands, OptIdx&: OptionalIdx);
10652	else if (Desc.TSFlags & SIInstrFlags::VOP3)
10653	cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10654	else if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
10655	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel);
10656	}
10657
10658	if (IsDPP8) {
10659	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDPP8);
10660	using namespace llvm::AMDGPU::DPP;
10661	Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0));
10662	} else {
10663	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppCtrl, Default: `0xe4`);
10664	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: `0xf`);
10665	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: `0xf`);
10666	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl);
10667
10668	if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi))
10669	addOptionalImmOperand(Inst, Operands, OptionalIdx,
10670	ImmT: AMDGPUOperand::ImmTyDppFI);
10671	}
10672	}
10673
10674	void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10675	OptionalImmIndexMap OptionalIdx;
10676
10677	unsigned I = `1`;
10678	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
10679	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
10680	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
10681	}
10682
10683	int Fi = `0`;
10684	for (unsigned E = Operands.size(); I != E; ++I) {
10685	auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(),
10686	Constraint: MCOI::TIED_TO);
10687	if (TiedTo != -`1`) {
10688	assert((unsigned)TiedTo < Inst.getNumOperands());
10689	// handle tied old or src2 for MAC instructions
10690	Inst.addOperand(Op: Inst.getOperand(i: TiedTo));
10691	}
10692	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
10693	// Add the register arguments
10694	if (Op.isReg() && validateVccOperand(Reg: Op.getReg())) {
10695	// VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10696	// Skip it.
10697	continue;
10698	}
10699
10700	if (IsDPP8) {
10701	if (Op.isDPP8()) {
10702	Op.addImmOperands(Inst, N: `1`);
10703	} else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10704	Op.addRegWithFPInputModsOperands(Inst, N: `2`);
10705	} else if (Op.isDppFI()) {
10706	Fi = Op.getImm();
10707	} else if (Op.isReg()) {
10708	Op.addRegOperands(Inst, N: `1`);
10709	} else {
10710	llvm_unreachable("Invalid operand type");
10711	}
10712	} else {
10713	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10714	Op.addRegWithFPInputModsOperands(Inst, N: `2`);
10715	} else if (Op.isReg()) {
10716	Op.addRegOperands(Inst, N: `1`);
10717	} else if (Op.isDPPCtrl()) {
10718	Op.addImmOperands(Inst, N: `1`);
10719	} else if (Op.isImm()) {
10720	// Handle optional arguments
10721	OptionalIdx [Op.getImmTy()] = I;
10722	} else {
10723	llvm_unreachable("Invalid operand type");
10724	}
10725	}
10726	}
10727
10728	if (IsDPP8) {
10729	using namespace llvm::AMDGPU::DPP;
10730	Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0));
10731	} else {
10732	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: `0xf`);
10733	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: `0xf`);
10734	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl);
10735	if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi)) {
10736	addOptionalImmOperand(Inst, Operands, OptionalIdx,
10737	ImmT: AMDGPUOperand::ImmTyDppFI);
10738	}
10739	}
10740	}
10741
10742	//===----------------------------------------------------------------------===//
10743	// sdwa
10744	//===----------------------------------------------------------------------===//
10745
10746	ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10747	StringRef Prefix,
10748	AMDGPUOperand::ImmTy Type) {
10749	return parseStringOrIntWithPrefix(
10750	Operands, Name: Prefix,
10751	Ids: {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10752	Type);
10753	}
10754
10755	ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10756	return parseStringOrIntWithPrefix(
10757	Operands, Name: "dst_unused", Ids: {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10758	Type: AMDGPUOperand::ImmTySDWADstUnused);
10759	}
10760
10761	void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10762	cvtSDWA(Inst, Operands, BasicInstType: SDWAInstType::VOP1);
10763	}
10764
10765	void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10766	cvtSDWA(Inst, Operands, BasicInstType: SDWAInstType::VOP2);
10767	}
10768
10769	void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10770	cvtSDWA(Inst, Operands, BasicInstType: SDWAInstType::VOP2, SkipDstVcc: true, SkipSrcVcc: true);
10771	}
10772
10773	void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10774	cvtSDWA(Inst, Operands, BasicInstType: SDWAInstType::VOP2, SkipDstVcc: false, SkipSrcVcc: true);
10775	}
10776
10777	void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10778	cvtSDWA(Inst, Operands, BasicInstType: SDWAInstType::VOPC, SkipDstVcc: isVI());
10779	}
10780
10781	void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10782	SDWAInstType BasicInstType, bool SkipDstVcc,
10783	bool SkipSrcVcc) {
10784	using namespace llvm::AMDGPU::SDWA;
10785
10786	OptionalImmIndexMap OptionalIdx;
10787	bool SkipVcc = SkipDstVcc \|\| SkipSrcVcc;
10788	bool SkippedVcc = false;
10789
10790	unsigned I = `1`;
10791	const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode());
10792	for (unsigned J = `0`; J < Desc.getNumDefs(); ++J) {
10793	((AMDGPUOperand &)*Operands [I++]).addRegOperands(Inst, N: `1`);
10794	}
10795
10796	for (unsigned E = Operands.size(); I != E; ++I) {
10797	AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands [I]);
10798	if (SkipVcc && !SkippedVcc && Op.isReg() &&
10799	(Op.getReg() == AMDGPU::VCC \|\| Op.getReg() == AMDGPU::VCC_LO)) {
10800	// VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10801	// Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10802	// or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10803	// Skip VCC only if we didn't skip it on previous iteration.
10804	// Note that src0 and src1 occupy 2 slots each because of modifiers.
10805	if (BasicInstType == SDWAInstType::VOP2 &&
10806	((SkipDstVcc && Inst.getNumOperands() == `1`) \|\|
10807	(SkipSrcVcc && Inst.getNumOperands() == `5`))) {
10808	SkippedVcc = true;
10809	continue;
10810	}
10811	if (BasicInstType == SDWAInstType::VOPC && Inst.getNumOperands() == `0`) {
10812	SkippedVcc = true;
10813	continue;
10814	}
10815	}
10816	if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) {
10817	Op.addRegOrImmWithInputModsOperands(Inst, N: `2`);
10818	} else if (Op.isImm()) {
10819	// Handle optional arguments
10820	OptionalIdx [Op.getImmTy()] = I;
10821	} else {
10822	llvm_unreachable("Invalid operand type");
10823	}
10824	SkippedVcc = false;
10825	}
10826
10827	const unsigned Opc = Inst.getOpcode();
10828	if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10829	Opc != AMDGPU::V_NOP_sdwa_vi) {
10830	// v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10831	switch (BasicInstType) {
10832	case SDWAInstType::VOP1:
10833	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp))
10834	addOptionalImmOperand(Inst, Operands, OptionalIdx,
10835	ImmT: AMDGPUOperand::ImmTyClamp, Default: `0`);
10836
10837	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod))
10838	addOptionalImmOperand(Inst, Operands, OptionalIdx,
10839	ImmT: AMDGPUOperand::ImmTyOModSI, Default: `0`);
10840
10841	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_sel))
10842	addOptionalImmOperand(Inst, Operands, OptionalIdx,
10843	ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD);
10844
10845	if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_unused))
10846	addOptionalImmOperand(Inst, Operands, OptionalIdx,
10847	ImmT: AMDGPUOperand::ImmTySDWADstUnused,
10848	Default: DstUnused::UNUSED_PRESERVE);
10849
10850	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
10851	break;
10852
10853	case SDWAInstType::VOP2:
10854	addOptionalImmOperand(Inst, Operands, OptionalIdx,
10855	ImmT: AMDGPUOperand::ImmTyClamp, Default: `0`);
10856
10857	if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::omod))
10858	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI, Default: `0`);
10859
10860	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD);
10861	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstUnused, Default: DstUnused::UNUSED_PRESERVE);
10862	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
10863	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD);
10864	break;
10865
10866	case SDWAInstType::VOPC:
10867	if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::clamp))
10868	addOptionalImmOperand(Inst, Operands, OptionalIdx,
10869	ImmT: AMDGPUOperand::ImmTyClamp, Default: `0`);
10870	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD);
10871	addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD);
10872	break;
10873	}
10874	}
10875
10876	// special case v_mac_{f16, f32}:
10877	// it has src2 register operand that is tied to dst operand
10878	if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi \|\|
10879	Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10880	auto *it = Inst.begin();
10881	std::advance(
10882	i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::src2));
10883	Inst.insert(I: it, Op: Inst.getOperand(i: `0`)); // src2 = dst
10884	}
10885	}
10886
10887	/// Force static initialization.
10888	extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10889	LLVMInitializeAMDGPUAsmParser() {
10890	RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
10891	RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
10892	RegisterMCAsmParser<AMDGPUAsmParser> C(getTheGCNLegacyTarget());
10893	}
10894
10895	#define GET_MATCHER_IMPLEMENTATION
10896	#define GET_MNEMONIC_SPELL_CHECKER
10897	#define GET_MNEMONIC_CHECKER
10898	#include "AMDGPUGenAsmMatcher.inc"
10899
10900	ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10901	unsigned MCK) {
10902	switch (MCK) {
10903	case MCK_addr64:
10904	return parseTokenOp(Name: "addr64", Operands);
10905	case MCK_done:
10906	return parseNamedBit(Name: "done", Operands, ImmTy: AMDGPUOperand::ImmTyDone, IgnoreNegative: true);
10907	case MCK_idxen:
10908	return parseTokenOp(Name: "idxen", Operands);
10909	case MCK_lds:
10910	return parseNamedBit(Name: "lds", Operands, ImmTy: AMDGPUOperand::ImmTyLDS,
10911	/IgnoreNegative=/true);
10912	case MCK_offen:
10913	return parseTokenOp(Name: "offen", Operands);
10914	case MCK_off:
10915	return parseTokenOp(Name: "off", Operands);
10916	case MCK_row_95_en:
10917	return parseNamedBit(Name: "row_en", Operands, ImmTy: AMDGPUOperand::ImmTyRowEn, IgnoreNegative: true);
10918	case MCK_gds:
10919	return parseNamedBit(Name: "gds", Operands, ImmTy: AMDGPUOperand::ImmTyGDS);
10920	case MCK_tfe:
10921	return parseNamedBit(Name: "tfe", Operands, ImmTy: AMDGPUOperand::ImmTyTFE);
10922	}
10923	return tryCustomParseOperand(Operands, MCK);
10924	}
10925
10926	// This function should be defined after auto-generated include so that we have
10927	// MatchClassKind enum defined
10928	unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10929	unsigned Kind) {
10930	// Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10931	// But MatchInstructionImpl() expects to meet token and fails to validate
10932	// operand. This method checks if we are given immediate operand but expect to
10933	// get corresponding token.
10934	AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10935	switch (Kind) {
10936	case MCK_addr64:
10937	return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10938	case MCK_gds:
10939	return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10940	case MCK_lds:
10941	return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10942	case MCK_idxen:
10943	return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10944	case MCK_offen:
10945	return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10946	case MCK_tfe:
10947	return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10948	case MCK_done:
10949	return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10950	case MCK_row_95_en:
10951	return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10952	case MCK_SSrc_b32:
10953	// When operands have expression values, they will return true for isToken,
10954	// because it is not possible to distinguish between a token and an
10955	// expression at parse time. MatchInstructionImpl() will always try to
10956	// match an operand as a token, when isToken returns true, and when the
10957	// name of the expression is not a valid token, the match will fail,
10958	// so we need to handle it here.
10959	return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10960	case MCK_SSrc_f32:
10961	return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10962	case MCK_SOPPBrTarget:
10963	return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10964	case MCK_VReg32OrOff:
10965	return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10966	case MCK_InterpSlot:
10967	return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10968	case MCK_InterpAttr:
10969	return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10970	case MCK_InterpAttrChan:
10971	return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10972	case MCK_SReg_64:
10973	case MCK_SReg_64_XEXEC:
10974	// Null is defined as a 32-bit register but
10975	// it should also be enabled with 64-bit operands or larger.
10976	// The following code enables it for SReg_64 and larger operands
10977	// used as source and destination. Remaining source
10978	// operands are handled in isInlinableImm.
10979	case MCK_SReg_96:
10980	case MCK_SReg_128:
10981	case MCK_SReg_256:
10982	case MCK_SReg_512:
10983	return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10984	default:
10985	return Match_InvalidOperand;
10986	}
10987	}
10988
10989	//===----------------------------------------------------------------------===//
10990	// endpgm
10991	//===----------------------------------------------------------------------===//
10992
10993	ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10994	SMLoc S = getLoc();
10995	int64_t Imm = `0`;
10996
10997	if (!parseExpr(Imm)) {
10998	// The operand is optional, if not present default to 0
10999	Imm = `0`;
11000	}
11001
11002	if (!isUInt<`16`>(x: Imm))
11003	return Error(L: S, Msg: "expected a 16-bit value");
11004
11005	Operands.push_back(
11006	Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyEndpgm));
11007	return ParseStatus::Success;
11008	}
11009
11010	bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmT: ImmTyEndpgm); }
11011
11012	//===----------------------------------------------------------------------===//
11013	// Split Barrier
11014	//===----------------------------------------------------------------------===//
11015
11016	bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(type: MVT::i32); }
11017

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp