1 | //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AMDKernelCodeT.h" |
10 | #include "MCTargetDesc/AMDGPUInstPrinter.h" |
11 | #include "MCTargetDesc/AMDGPUMCAsmInfo.h" |
12 | #include "MCTargetDesc/AMDGPUMCExpr.h" |
13 | #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h" |
14 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
15 | #include "MCTargetDesc/AMDGPUTargetStreamer.h" |
16 | #include "SIDefines.h" |
17 | #include "SIInstrInfo.h" |
18 | #include "TargetInfo/AMDGPUTargetInfo.h" |
19 | #include "Utils/AMDGPUAsmUtils.h" |
20 | #include "Utils/AMDGPUBaseInfo.h" |
21 | #include "Utils/AMDKernelCodeTUtils.h" |
22 | #include "llvm/ADT/APFloat.h" |
23 | #include "llvm/ADT/SmallBitVector.h" |
24 | #include "llvm/ADT/StringSet.h" |
25 | #include "llvm/ADT/Twine.h" |
26 | #include "llvm/BinaryFormat/ELF.h" |
27 | #include "llvm/CodeGenTypes/MachineValueType.h" |
28 | #include "llvm/MC/MCAsmInfo.h" |
29 | #include "llvm/MC/MCContext.h" |
30 | #include "llvm/MC/MCExpr.h" |
31 | #include "llvm/MC/MCInst.h" |
32 | #include "llvm/MC/MCInstrDesc.h" |
33 | #include "llvm/MC/MCParser/AsmLexer.h" |
34 | #include "llvm/MC/MCParser/MCAsmParser.h" |
35 | #include "llvm/MC/MCParser/MCParsedAsmOperand.h" |
36 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
37 | #include "llvm/MC/MCRegisterInfo.h" |
38 | #include "llvm/MC/MCSymbol.h" |
39 | #include "llvm/MC/TargetRegistry.h" |
40 | #include "llvm/Support/AMDGPUMetadata.h" |
41 | #include "llvm/Support/AMDHSAKernelDescriptor.h" |
42 | #include "llvm/Support/Casting.h" |
43 | #include "llvm/Support/Compiler.h" |
44 | #include "llvm/Support/MathExtras.h" |
45 | #include "llvm/TargetParser/TargetParser.h" |
46 | #include <optional> |
47 | |
48 | using namespace llvm; |
49 | using namespace llvm::AMDGPU; |
50 | using namespace llvm::amdhsa; |
51 | |
52 | namespace { |
53 | |
54 | class AMDGPUAsmParser; |
55 | |
56 | enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; |
57 | |
58 | //===----------------------------------------------------------------------===// |
59 | // Operand |
60 | //===----------------------------------------------------------------------===// |
61 | |
62 | class AMDGPUOperand : public MCParsedAsmOperand { |
63 | enum KindTy { |
64 | Token, |
65 | Immediate, |
66 | Register, |
67 | Expression |
68 | } Kind; |
69 | |
70 | SMLoc StartLoc, EndLoc; |
71 | const AMDGPUAsmParser *AsmParser; |
72 | |
73 | public: |
74 | AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) |
75 | : Kind(Kind_), AsmParser(AsmParser_) {} |
76 | |
77 | using Ptr = std::unique_ptr<AMDGPUOperand>; |
78 | |
79 | struct Modifiers { |
80 | bool Abs = false; |
81 | bool Neg = false; |
82 | bool Sext = false; |
83 | bool Lit = false; |
84 | |
85 | bool hasFPModifiers() const { return Abs || Neg; } |
86 | bool hasIntModifiers() const { return Sext; } |
87 | bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } |
88 | |
89 | int64_t getFPModifiersOperand() const { |
90 | int64_t Operand = 0; |
91 | Operand |= Abs ? SISrcMods::ABS : 0u; |
92 | Operand |= Neg ? SISrcMods::NEG : 0u; |
93 | return Operand; |
94 | } |
95 | |
96 | int64_t getIntModifiersOperand() const { |
97 | int64_t Operand = 0; |
98 | Operand |= Sext ? SISrcMods::SEXT : 0u; |
99 | return Operand; |
100 | } |
101 | |
102 | int64_t getModifiersOperand() const { |
103 | assert(!(hasFPModifiers() && hasIntModifiers()) |
104 | && "fp and int modifiers should not be used simultaneously" ); |
105 | if (hasFPModifiers()) |
106 | return getFPModifiersOperand(); |
107 | if (hasIntModifiers()) |
108 | return getIntModifiersOperand(); |
109 | return 0; |
110 | } |
111 | |
112 | friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); |
113 | }; |
114 | |
115 | enum ImmTy { |
116 | ImmTyNone, |
117 | ImmTyGDS, |
118 | ImmTyLDS, |
119 | ImmTyOffen, |
120 | ImmTyIdxen, |
121 | ImmTyAddr64, |
122 | ImmTyOffset, |
123 | ImmTyInstOffset, |
124 | ImmTyOffset0, |
125 | ImmTyOffset1, |
126 | ImmTySMEMOffsetMod, |
127 | ImmTyCPol, |
128 | ImmTyTFE, |
129 | ImmTyD16, |
130 | ImmTyClamp, |
131 | ImmTyOModSI, |
132 | ImmTySDWADstSel, |
133 | ImmTySDWASrc0Sel, |
134 | ImmTySDWASrc1Sel, |
135 | ImmTySDWADstUnused, |
136 | ImmTyDMask, |
137 | ImmTyDim, |
138 | ImmTyUNorm, |
139 | ImmTyDA, |
140 | ImmTyR128A16, |
141 | ImmTyA16, |
142 | ImmTyLWE, |
143 | ImmTyExpTgt, |
144 | ImmTyExpCompr, |
145 | ImmTyExpVM, |
146 | ImmTyFORMAT, |
147 | ImmTyHwreg, |
148 | ImmTyOff, |
149 | ImmTySendMsg, |
150 | ImmTyInterpSlot, |
151 | ImmTyInterpAttr, |
152 | ImmTyInterpAttrChan, |
153 | ImmTyOpSel, |
154 | ImmTyOpSelHi, |
155 | ImmTyNegLo, |
156 | ImmTyNegHi, |
157 | ImmTyIndexKey8bit, |
158 | ImmTyIndexKey16bit, |
159 | ImmTyDPP8, |
160 | ImmTyDppCtrl, |
161 | ImmTyDppRowMask, |
162 | ImmTyDppBankMask, |
163 | ImmTyDppBoundCtrl, |
164 | ImmTyDppFI, |
165 | ImmTySwizzle, |
166 | ImmTyGprIdxMode, |
167 | ImmTyHigh, |
168 | ImmTyBLGP, |
169 | ImmTyCBSZ, |
170 | ImmTyABID, |
171 | ImmTyEndpgm, |
172 | ImmTyWaitVDST, |
173 | ImmTyWaitEXP, |
174 | ImmTyWaitVAVDst, |
175 | ImmTyWaitVMVSrc, |
176 | ImmTyByteSel, |
177 | ImmTyBitOp3, |
178 | }; |
179 | |
180 | // Immediate operand kind. |
181 | // It helps to identify the location of an offending operand after an error. |
182 | // Note that regular literals and mandatory literals (KImm) must be handled |
183 | // differently. When looking for an offending operand, we should usually |
184 | // ignore mandatory literals because they are part of the instruction and |
185 | // cannot be changed. Report location of mandatory operands only for VOPD, |
186 | // when both OpX and OpY have a KImm and there are no other literals. |
187 | enum ImmKindTy { |
188 | ImmKindTyNone, |
189 | ImmKindTyLiteral, |
190 | ImmKindTyMandatoryLiteral, |
191 | ImmKindTyConst, |
192 | }; |
193 | |
194 | private: |
195 | struct TokOp { |
196 | const char *Data; |
197 | unsigned Length; |
198 | }; |
199 | |
200 | struct ImmOp { |
201 | int64_t Val; |
202 | ImmTy Type; |
203 | bool IsFPImm; |
204 | mutable ImmKindTy Kind; |
205 | Modifiers Mods; |
206 | }; |
207 | |
208 | struct RegOp { |
209 | MCRegister RegNo; |
210 | Modifiers Mods; |
211 | }; |
212 | |
213 | union { |
214 | TokOp Tok; |
215 | ImmOp Imm; |
216 | RegOp Reg; |
217 | const MCExpr *Expr; |
218 | }; |
219 | |
220 | public: |
221 | bool isToken() const override { return Kind == Token; } |
222 | |
223 | bool isSymbolRefExpr() const { |
224 | return isExpr() && Expr && isa<MCSymbolRefExpr>(Val: Expr); |
225 | } |
226 | |
227 | bool isImm() const override { |
228 | return Kind == Immediate; |
229 | } |
230 | |
231 | void setImmKindNone() const { |
232 | assert(isImm()); |
233 | Imm.Kind = ImmKindTyNone; |
234 | } |
235 | |
236 | void setImmKindLiteral() const { |
237 | assert(isImm()); |
238 | Imm.Kind = ImmKindTyLiteral; |
239 | } |
240 | |
241 | void setImmKindMandatoryLiteral() const { |
242 | assert(isImm()); |
243 | Imm.Kind = ImmKindTyMandatoryLiteral; |
244 | } |
245 | |
246 | void setImmKindConst() const { |
247 | assert(isImm()); |
248 | Imm.Kind = ImmKindTyConst; |
249 | } |
250 | |
251 | bool IsImmKindLiteral() const { |
252 | return isImm() && Imm.Kind == ImmKindTyLiteral; |
253 | } |
254 | |
255 | bool IsImmKindMandatoryLiteral() const { |
256 | return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral; |
257 | } |
258 | |
259 | bool isImmKindConst() const { |
260 | return isImm() && Imm.Kind == ImmKindTyConst; |
261 | } |
262 | |
263 | bool isInlinableImm(MVT type) const; |
264 | bool isLiteralImm(MVT type) const; |
265 | |
266 | bool isRegKind() const { |
267 | return Kind == Register; |
268 | } |
269 | |
270 | bool isReg() const override { |
271 | return isRegKind() && !hasModifiers(); |
272 | } |
273 | |
274 | bool isRegOrInline(unsigned RCID, MVT type) const { |
275 | return isRegClass(RCID) || isInlinableImm(type); |
276 | } |
277 | |
278 | bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { |
279 | return isRegOrInline(RCID, type) || isLiteralImm(type); |
280 | } |
281 | |
282 | bool isRegOrImmWithInt16InputMods() const { |
283 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16); |
284 | } |
285 | |
286 | template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const { |
287 | return isRegOrImmWithInputMods( |
288 | RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::i16); |
289 | } |
290 | |
291 | bool isRegOrImmWithInt32InputMods() const { |
292 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32); |
293 | } |
294 | |
295 | bool isRegOrInlineImmWithInt16InputMods() const { |
296 | return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16); |
297 | } |
298 | |
299 | template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const { |
300 | return isRegOrInline( |
301 | RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::i16); |
302 | } |
303 | |
304 | bool isRegOrInlineImmWithInt32InputMods() const { |
305 | return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32); |
306 | } |
307 | |
308 | bool isRegOrImmWithInt64InputMods() const { |
309 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64); |
310 | } |
311 | |
312 | bool isRegOrImmWithFP16InputMods() const { |
313 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16); |
314 | } |
315 | |
316 | template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const { |
317 | return isRegOrImmWithInputMods( |
318 | RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16); |
319 | } |
320 | |
321 | bool isRegOrImmWithFP32InputMods() const { |
322 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32); |
323 | } |
324 | |
325 | bool isRegOrImmWithFP64InputMods() const { |
326 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64); |
327 | } |
328 | |
329 | template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const { |
330 | return isRegOrInline( |
331 | RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16); |
332 | } |
333 | |
334 | bool isRegOrInlineImmWithFP32InputMods() const { |
335 | return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32); |
336 | } |
337 | |
338 | bool isPackedFP16InputMods() const { |
339 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::v2f16); |
340 | } |
341 | |
342 | bool isPackedVGPRFP32InputMods() const { |
343 | return isRegOrImmWithInputMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::v2f32); |
344 | } |
345 | |
346 | bool isVReg() const { |
347 | return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) || |
348 | isRegClass(RCID: AMDGPU::VReg_64RegClassID) || |
349 | isRegClass(RCID: AMDGPU::VReg_96RegClassID) || |
350 | isRegClass(RCID: AMDGPU::VReg_128RegClassID) || |
351 | isRegClass(RCID: AMDGPU::VReg_160RegClassID) || |
352 | isRegClass(RCID: AMDGPU::VReg_192RegClassID) || |
353 | isRegClass(RCID: AMDGPU::VReg_256RegClassID) || |
354 | isRegClass(RCID: AMDGPU::VReg_512RegClassID) || |
355 | isRegClass(RCID: AMDGPU::VReg_1024RegClassID); |
356 | } |
357 | |
358 | bool isVReg32() const { |
359 | return isRegClass(RCID: AMDGPU::VGPR_32RegClassID); |
360 | } |
361 | |
362 | bool isVReg32OrOff() const { |
363 | return isOff() || isVReg32(); |
364 | } |
365 | |
366 | bool isNull() const { |
367 | return isRegKind() && getReg() == AMDGPU::SGPR_NULL; |
368 | } |
369 | |
370 | bool isVRegWithInputMods() const; |
371 | template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const; |
372 | template <bool IsFake16> bool isT16VRegWithInputMods() const; |
373 | |
374 | bool isSDWAOperand(MVT type) const; |
375 | bool isSDWAFP16Operand() const; |
376 | bool isSDWAFP32Operand() const; |
377 | bool isSDWAInt16Operand() const; |
378 | bool isSDWAInt32Operand() const; |
379 | |
380 | bool isImmTy(ImmTy ImmT) const { |
381 | return isImm() && Imm.Type == ImmT; |
382 | } |
383 | |
384 | template <ImmTy Ty> bool isImmTy() const { return isImmTy(ImmT: Ty); } |
385 | |
386 | bool isImmLiteral() const { return isImmTy(ImmT: ImmTyNone); } |
387 | |
388 | bool isImmModifier() const { |
389 | return isImm() && Imm.Type != ImmTyNone; |
390 | } |
391 | |
392 | bool isOModSI() const { return isImmTy(ImmT: ImmTyOModSI); } |
393 | bool isDim() const { return isImmTy(ImmT: ImmTyDim); } |
394 | bool isR128A16() const { return isImmTy(ImmT: ImmTyR128A16); } |
395 | bool isOff() const { return isImmTy(ImmT: ImmTyOff); } |
396 | bool isExpTgt() const { return isImmTy(ImmT: ImmTyExpTgt); } |
397 | bool isOffen() const { return isImmTy(ImmT: ImmTyOffen); } |
398 | bool isIdxen() const { return isImmTy(ImmT: ImmTyIdxen); } |
399 | bool isAddr64() const { return isImmTy(ImmT: ImmTyAddr64); } |
400 | bool isSMEMOffsetMod() const { return isImmTy(ImmT: ImmTySMEMOffsetMod); } |
401 | bool isFlatOffset() const { return isImmTy(ImmT: ImmTyOffset) || isImmTy(ImmT: ImmTyInstOffset); } |
402 | bool isGDS() const { return isImmTy(ImmT: ImmTyGDS); } |
403 | bool isLDS() const { return isImmTy(ImmT: ImmTyLDS); } |
404 | bool isCPol() const { return isImmTy(ImmT: ImmTyCPol); } |
405 | bool isIndexKey8bit() const { return isImmTy(ImmT: ImmTyIndexKey8bit); } |
406 | bool isIndexKey16bit() const { return isImmTy(ImmT: ImmTyIndexKey16bit); } |
407 | bool isTFE() const { return isImmTy(ImmT: ImmTyTFE); } |
408 | bool isFORMAT() const { return isImmTy(ImmT: ImmTyFORMAT) && isUInt<7>(x: getImm()); } |
409 | bool isDppFI() const { return isImmTy(ImmT: ImmTyDppFI); } |
410 | bool isSDWADstSel() const { return isImmTy(ImmT: ImmTySDWADstSel); } |
411 | bool isSDWASrc0Sel() const { return isImmTy(ImmT: ImmTySDWASrc0Sel); } |
412 | bool isSDWASrc1Sel() const { return isImmTy(ImmT: ImmTySDWASrc1Sel); } |
413 | bool isSDWADstUnused() const { return isImmTy(ImmT: ImmTySDWADstUnused); } |
414 | bool isInterpSlot() const { return isImmTy(ImmT: ImmTyInterpSlot); } |
415 | bool isInterpAttr() const { return isImmTy(ImmT: ImmTyInterpAttr); } |
416 | bool isInterpAttrChan() const { return isImmTy(ImmT: ImmTyInterpAttrChan); } |
417 | bool isOpSel() const { return isImmTy(ImmT: ImmTyOpSel); } |
418 | bool isOpSelHi() const { return isImmTy(ImmT: ImmTyOpSelHi); } |
419 | bool isNegLo() const { return isImmTy(ImmT: ImmTyNegLo); } |
420 | bool isNegHi() const { return isImmTy(ImmT: ImmTyNegHi); } |
421 | bool isBitOp3() const { return isImmTy(ImmT: ImmTyBitOp3) && isUInt<8>(x: getImm()); } |
422 | |
423 | bool isRegOrImm() const { |
424 | return isReg() || isImm(); |
425 | } |
426 | |
427 | bool isRegClass(unsigned RCID) const; |
428 | |
429 | bool isInlineValue() const; |
430 | |
431 | bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { |
432 | return isRegOrInline(RCID, type) && !hasModifiers(); |
433 | } |
434 | |
435 | bool isSCSrcB16() const { |
436 | return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i16); |
437 | } |
438 | |
439 | bool isSCSrcV2B16() const { |
440 | return isSCSrcB16(); |
441 | } |
442 | |
443 | bool isSCSrc_b32() const { |
444 | return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i32); |
445 | } |
446 | |
447 | bool isSCSrc_b64() const { |
448 | return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::i64); |
449 | } |
450 | |
451 | bool isBoolReg() const; |
452 | |
453 | bool isSCSrcF16() const { |
454 | return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f16); |
455 | } |
456 | |
457 | bool isSCSrcV2F16() const { |
458 | return isSCSrcF16(); |
459 | } |
460 | |
461 | bool isSCSrcF32() const { |
462 | return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f32); |
463 | } |
464 | |
465 | bool isSCSrcF64() const { |
466 | return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::f64); |
467 | } |
468 | |
469 | bool isSSrc_b32() const { |
470 | return isSCSrc_b32() || isLiteralImm(type: MVT::i32) || isExpr(); |
471 | } |
472 | |
473 | bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(type: MVT::i16); } |
474 | |
475 | bool isSSrcV2B16() const { |
476 | llvm_unreachable("cannot happen" ); |
477 | return isSSrc_b16(); |
478 | } |
479 | |
480 | bool isSSrc_b64() const { |
481 | // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. |
482 | // See isVSrc64(). |
483 | return isSCSrc_b64() || isLiteralImm(type: MVT::i64); |
484 | } |
485 | |
486 | bool isSSrc_f32() const { |
487 | return isSCSrc_b32() || isLiteralImm(type: MVT::f32) || isExpr(); |
488 | } |
489 | |
490 | bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(type: MVT::f64); } |
491 | |
492 | bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(type: MVT::bf16); } |
493 | |
494 | bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(type: MVT::f16); } |
495 | |
496 | bool isSSrcV2F16() const { |
497 | llvm_unreachable("cannot happen" ); |
498 | return isSSrc_f16(); |
499 | } |
500 | |
501 | bool isSSrcV2FP32() const { |
502 | llvm_unreachable("cannot happen" ); |
503 | return isSSrc_f32(); |
504 | } |
505 | |
506 | bool isSCSrcV2FP32() const { |
507 | llvm_unreachable("cannot happen" ); |
508 | return isSCSrcF32(); |
509 | } |
510 | |
511 | bool isSSrcV2INT32() const { |
512 | llvm_unreachable("cannot happen" ); |
513 | return isSSrc_b32(); |
514 | } |
515 | |
516 | bool isSCSrcV2INT32() const { |
517 | llvm_unreachable("cannot happen" ); |
518 | return isSCSrc_b32(); |
519 | } |
520 | |
521 | bool isSSrcOrLds_b32() const { |
522 | return isRegOrInlineNoMods(RCID: AMDGPU::SRegOrLds_32RegClassID, type: MVT::i32) || |
523 | isLiteralImm(type: MVT::i32) || isExpr(); |
524 | } |
525 | |
526 | bool isVCSrc_b32() const { |
527 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32); |
528 | } |
529 | |
530 | bool isVCSrcB64() const { |
531 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64); |
532 | } |
533 | |
534 | bool isVCSrcT_b16() const { |
535 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::i16); |
536 | } |
537 | |
538 | bool isVCSrcTB16_Lo128() const { |
539 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::i16); |
540 | } |
541 | |
542 | bool isVCSrcFake16B16_Lo128() const { |
543 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::i16); |
544 | } |
545 | |
546 | bool isVCSrc_b16() const { |
547 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16); |
548 | } |
549 | |
550 | bool isVCSrc_v2b16() const { return isVCSrc_b16(); } |
551 | |
552 | bool isVCSrc_f32() const { |
553 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32); |
554 | } |
555 | |
556 | bool isVCSrcF64() const { |
557 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64); |
558 | } |
559 | |
560 | bool isVCSrcTBF16() const { |
561 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::bf16); |
562 | } |
563 | |
564 | bool isVCSrcT_f16() const { |
565 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16); |
566 | } |
567 | |
568 | bool isVCSrcT_bf16() const { |
569 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16); |
570 | } |
571 | |
572 | bool isVCSrcTBF16_Lo128() const { |
573 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::bf16); |
574 | } |
575 | |
576 | bool isVCSrcTF16_Lo128() const { |
577 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::f16); |
578 | } |
579 | |
580 | bool isVCSrcFake16BF16_Lo128() const { |
581 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::bf16); |
582 | } |
583 | |
584 | bool isVCSrcFake16F16_Lo128() const { |
585 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::f16); |
586 | } |
587 | |
588 | bool isVCSrc_bf16() const { |
589 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::bf16); |
590 | } |
591 | |
592 | bool isVCSrc_f16() const { |
593 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16); |
594 | } |
595 | |
596 | bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); } |
597 | |
598 | bool isVCSrc_v2f16() const { return isVCSrc_f16(); } |
599 | |
600 | bool isVSrc_b32() const { |
601 | return isVCSrc_f32() || isLiteralImm(type: MVT::i32) || isExpr(); |
602 | } |
603 | |
604 | bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(type: MVT::i64); } |
605 | |
606 | bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(type: MVT::i16); } |
607 | |
608 | bool isVSrcT_b16_Lo128() const { |
609 | return isVCSrcTB16_Lo128() || isLiteralImm(type: MVT::i16); |
610 | } |
611 | |
612 | bool isVSrcFake16_b16_Lo128() const { |
613 | return isVCSrcFake16B16_Lo128() || isLiteralImm(type: MVT::i16); |
614 | } |
615 | |
616 | bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(type: MVT::i16); } |
617 | |
618 | bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(type: MVT::v2i16); } |
619 | |
620 | bool isVCSrcV2FP32() const { |
621 | return isVCSrcF64(); |
622 | } |
623 | |
624 | bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(type: MVT::v2f32); } |
625 | |
626 | bool isVCSrcV2INT32() const { |
627 | return isVCSrcB64(); |
628 | } |
629 | |
630 | bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(type: MVT::v2i32); } |
631 | |
632 | bool isVSrc_f32() const { |
633 | return isVCSrc_f32() || isLiteralImm(type: MVT::f32) || isExpr(); |
634 | } |
635 | |
636 | bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(type: MVT::f64); } |
637 | |
638 | bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(type: MVT::bf16); } |
639 | |
640 | bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(type: MVT::f16); } |
641 | |
642 | bool isVSrcT_bf16_Lo128() const { |
643 | return isVCSrcTBF16_Lo128() || isLiteralImm(type: MVT::bf16); |
644 | } |
645 | |
646 | bool isVSrcT_f16_Lo128() const { |
647 | return isVCSrcTF16_Lo128() || isLiteralImm(type: MVT::f16); |
648 | } |
649 | |
650 | bool isVSrcFake16_bf16_Lo128() const { |
651 | return isVCSrcFake16BF16_Lo128() || isLiteralImm(type: MVT::bf16); |
652 | } |
653 | |
654 | bool isVSrcFake16_f16_Lo128() const { |
655 | return isVCSrcFake16F16_Lo128() || isLiteralImm(type: MVT::f16); |
656 | } |
657 | |
658 | bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(type: MVT::bf16); } |
659 | |
660 | bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(type: MVT::f16); } |
661 | |
662 | bool isVSrc_v2bf16() const { |
663 | return isVSrc_bf16() || isLiteralImm(type: MVT::v2bf16); |
664 | } |
665 | |
666 | bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(type: MVT::v2f16); } |
667 | |
668 | bool isVISrcB32() const { |
669 | return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i32); |
670 | } |
671 | |
672 | bool isVISrcB16() const { |
673 | return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i16); |
674 | } |
675 | |
676 | bool isVISrcV2B16() const { |
677 | return isVISrcB16(); |
678 | } |
679 | |
680 | bool isVISrcF32() const { |
681 | return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f32); |
682 | } |
683 | |
684 | bool isVISrcF16() const { |
685 | return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f16); |
686 | } |
687 | |
688 | bool isVISrcV2F16() const { |
689 | return isVISrcF16() || isVISrcB32(); |
690 | } |
691 | |
692 | bool isVISrc_64_bf16() const { |
693 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::bf16); |
694 | } |
695 | |
696 | bool isVISrc_64_f16() const { |
697 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f16); |
698 | } |
699 | |
700 | bool isVISrc_64_b32() const { |
701 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32); |
702 | } |
703 | |
704 | bool isVISrc_64B64() const { |
705 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i64); |
706 | } |
707 | |
708 | bool isVISrc_64_f64() const { |
709 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f64); |
710 | } |
711 | |
712 | bool isVISrc_64V2FP32() const { |
713 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f32); |
714 | } |
715 | |
716 | bool isVISrc_64V2INT32() const { |
717 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32); |
718 | } |
719 | |
720 | bool isVISrc_256_b32() const { |
721 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32); |
722 | } |
723 | |
724 | bool isVISrc_256_f32() const { |
725 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32); |
726 | } |
727 | |
728 | bool isVISrc_256B64() const { |
729 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i64); |
730 | } |
731 | |
732 | bool isVISrc_256_f64() const { |
733 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f64); |
734 | } |
735 | |
736 | bool isVISrc_128B16() const { |
737 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i16); |
738 | } |
739 | |
740 | bool isVISrc_128V2B16() const { |
741 | return isVISrc_128B16(); |
742 | } |
743 | |
744 | bool isVISrc_128_b32() const { |
745 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i32); |
746 | } |
747 | |
748 | bool isVISrc_128_f32() const { |
749 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f32); |
750 | } |
751 | |
752 | bool isVISrc_256V2FP32() const { |
753 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32); |
754 | } |
755 | |
756 | bool isVISrc_256V2INT32() const { |
757 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32); |
758 | } |
759 | |
760 | bool isVISrc_512_b32() const { |
761 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i32); |
762 | } |
763 | |
764 | bool isVISrc_512B16() const { |
765 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i16); |
766 | } |
767 | |
768 | bool isVISrc_512V2B16() const { |
769 | return isVISrc_512B16(); |
770 | } |
771 | |
772 | bool isVISrc_512_f32() const { |
773 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f32); |
774 | } |
775 | |
776 | bool isVISrc_512F16() const { |
777 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f16); |
778 | } |
779 | |
780 | bool isVISrc_512V2F16() const { |
781 | return isVISrc_512F16() || isVISrc_512_b32(); |
782 | } |
783 | |
784 | bool isVISrc_1024_b32() const { |
785 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i32); |
786 | } |
787 | |
788 | bool isVISrc_1024B16() const { |
789 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i16); |
790 | } |
791 | |
792 | bool isVISrc_1024V2B16() const { |
793 | return isVISrc_1024B16(); |
794 | } |
795 | |
796 | bool isVISrc_1024_f32() const { |
797 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f32); |
798 | } |
799 | |
800 | bool isVISrc_1024F16() const { |
801 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f16); |
802 | } |
803 | |
804 | bool isVISrc_1024V2F16() const { |
805 | return isVISrc_1024F16() || isVISrc_1024_b32(); |
806 | } |
807 | |
808 | bool isAISrcB32() const { |
809 | return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i32); |
810 | } |
811 | |
812 | bool isAISrcB16() const { |
813 | return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i16); |
814 | } |
815 | |
816 | bool isAISrcV2B16() const { |
817 | return isAISrcB16(); |
818 | } |
819 | |
820 | bool isAISrcF32() const { |
821 | return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f32); |
822 | } |
823 | |
824 | bool isAISrcF16() const { |
825 | return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f16); |
826 | } |
827 | |
828 | bool isAISrcV2F16() const { |
829 | return isAISrcF16() || isAISrcB32(); |
830 | } |
831 | |
832 | bool isAISrc_64B64() const { |
833 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::i64); |
834 | } |
835 | |
836 | bool isAISrc_64_f64() const { |
837 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::f64); |
838 | } |
839 | |
840 | bool isAISrc_128_b32() const { |
841 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i32); |
842 | } |
843 | |
844 | bool isAISrc_128B16() const { |
845 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i16); |
846 | } |
847 | |
848 | bool isAISrc_128V2B16() const { |
849 | return isAISrc_128B16(); |
850 | } |
851 | |
852 | bool isAISrc_128_f32() const { |
853 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f32); |
854 | } |
855 | |
856 | bool isAISrc_128F16() const { |
857 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f16); |
858 | } |
859 | |
860 | bool isAISrc_128V2F16() const { |
861 | return isAISrc_128F16() || isAISrc_128_b32(); |
862 | } |
863 | |
864 | bool isVISrc_128_bf16() const { |
865 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::bf16); |
866 | } |
867 | |
868 | bool isVISrc_128_f16() const { |
869 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f16); |
870 | } |
871 | |
872 | bool isVISrc_128V2F16() const { |
873 | return isVISrc_128_f16() || isVISrc_128_b32(); |
874 | } |
875 | |
876 | bool isAISrc_256B64() const { |
877 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::i64); |
878 | } |
879 | |
880 | bool isAISrc_256_f64() const { |
881 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::f64); |
882 | } |
883 | |
884 | bool isAISrc_512_b32() const { |
885 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i32); |
886 | } |
887 | |
888 | bool isAISrc_512B16() const { |
889 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i16); |
890 | } |
891 | |
892 | bool isAISrc_512V2B16() const { |
893 | return isAISrc_512B16(); |
894 | } |
895 | |
896 | bool isAISrc_512_f32() const { |
897 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f32); |
898 | } |
899 | |
900 | bool isAISrc_512F16() const { |
901 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f16); |
902 | } |
903 | |
904 | bool isAISrc_512V2F16() const { |
905 | return isAISrc_512F16() || isAISrc_512_b32(); |
906 | } |
907 | |
908 | bool isAISrc_1024_b32() const { |
909 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i32); |
910 | } |
911 | |
912 | bool isAISrc_1024B16() const { |
913 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i16); |
914 | } |
915 | |
916 | bool isAISrc_1024V2B16() const { |
917 | return isAISrc_1024B16(); |
918 | } |
919 | |
920 | bool isAISrc_1024_f32() const { |
921 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f32); |
922 | } |
923 | |
924 | bool isAISrc_1024F16() const { |
925 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f16); |
926 | } |
927 | |
928 | bool isAISrc_1024V2F16() const { |
929 | return isAISrc_1024F16() || isAISrc_1024_b32(); |
930 | } |
931 | |
932 | bool isKImmFP32() const { |
933 | return isLiteralImm(type: MVT::f32); |
934 | } |
935 | |
936 | bool isKImmFP16() const { |
937 | return isLiteralImm(type: MVT::f16); |
938 | } |
939 | |
940 | bool isMem() const override { |
941 | return false; |
942 | } |
943 | |
944 | bool isExpr() const { |
945 | return Kind == Expression; |
946 | } |
947 | |
948 | bool isSOPPBrTarget() const { return isExpr() || isImm(); } |
949 | |
950 | bool isSWaitCnt() const; |
951 | bool isDepCtr() const; |
952 | bool isSDelayALU() const; |
953 | bool isHwreg() const; |
954 | bool isSendMsg() const; |
955 | bool isSplitBarrier() const; |
956 | bool isSwizzle() const; |
957 | bool isSMRDOffset8() const; |
958 | bool isSMEMOffset() const; |
959 | bool isSMRDLiteralOffset() const; |
960 | bool isDPP8() const; |
961 | bool isDPPCtrl() const; |
962 | bool isBLGP() const; |
963 | bool isGPRIdxMode() const; |
964 | bool isS16Imm() const; |
965 | bool isU16Imm() const; |
966 | bool isEndpgm() const; |
967 | |
968 | auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const { |
969 | return [=](){ return P(*this); }; |
970 | } |
971 | |
972 | StringRef getToken() const { |
973 | assert(isToken()); |
974 | return StringRef(Tok.Data, Tok.Length); |
975 | } |
976 | |
977 | int64_t getImm() const { |
978 | assert(isImm()); |
979 | return Imm.Val; |
980 | } |
981 | |
982 | void setImm(int64_t Val) { |
983 | assert(isImm()); |
984 | Imm.Val = Val; |
985 | } |
986 | |
987 | ImmTy getImmTy() const { |
988 | assert(isImm()); |
989 | return Imm.Type; |
990 | } |
991 | |
992 | MCRegister getReg() const override { |
993 | assert(isRegKind()); |
994 | return Reg.RegNo; |
995 | } |
996 | |
997 | SMLoc getStartLoc() const override { |
998 | return StartLoc; |
999 | } |
1000 | |
1001 | SMLoc getEndLoc() const override { |
1002 | return EndLoc; |
1003 | } |
1004 | |
1005 | SMRange getLocRange() const { |
1006 | return SMRange(StartLoc, EndLoc); |
1007 | } |
1008 | |
1009 | Modifiers getModifiers() const { |
1010 | assert(isRegKind() || isImmTy(ImmTyNone)); |
1011 | return isRegKind() ? Reg.Mods : Imm.Mods; |
1012 | } |
1013 | |
1014 | void setModifiers(Modifiers Mods) { |
1015 | assert(isRegKind() || isImmTy(ImmTyNone)); |
1016 | if (isRegKind()) |
1017 | Reg.Mods = Mods; |
1018 | else |
1019 | Imm.Mods = Mods; |
1020 | } |
1021 | |
1022 | bool hasModifiers() const { |
1023 | return getModifiers().hasModifiers(); |
1024 | } |
1025 | |
1026 | bool hasFPModifiers() const { |
1027 | return getModifiers().hasFPModifiers(); |
1028 | } |
1029 | |
1030 | bool hasIntModifiers() const { |
1031 | return getModifiers().hasIntModifiers(); |
1032 | } |
1033 | |
1034 | uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; |
1035 | |
1036 | void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; |
1037 | |
1038 | void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; |
1039 | |
1040 | void addRegOperands(MCInst &Inst, unsigned N) const; |
1041 | |
1042 | void addRegOrImmOperands(MCInst &Inst, unsigned N) const { |
1043 | if (isRegKind()) |
1044 | addRegOperands(Inst, N); |
1045 | else |
1046 | addImmOperands(Inst, N); |
1047 | } |
1048 | |
1049 | void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { |
1050 | Modifiers Mods = getModifiers(); |
1051 | Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand())); |
1052 | if (isRegKind()) { |
1053 | addRegOperands(Inst, N); |
1054 | } else { |
1055 | addImmOperands(Inst, N, ApplyModifiers: false); |
1056 | } |
1057 | } |
1058 | |
1059 | void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { |
1060 | assert(!hasIntModifiers()); |
1061 | addRegOrImmWithInputModsOperands(Inst, N); |
1062 | } |
1063 | |
1064 | void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { |
1065 | assert(!hasFPModifiers()); |
1066 | addRegOrImmWithInputModsOperands(Inst, N); |
1067 | } |
1068 | |
1069 | void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { |
1070 | Modifiers Mods = getModifiers(); |
1071 | Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand())); |
1072 | assert(isRegKind()); |
1073 | addRegOperands(Inst, N); |
1074 | } |
1075 | |
1076 | void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { |
1077 | assert(!hasIntModifiers()); |
1078 | addRegWithInputModsOperands(Inst, N); |
1079 | } |
1080 | |
1081 | void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { |
1082 | assert(!hasFPModifiers()); |
1083 | addRegWithInputModsOperands(Inst, N); |
1084 | } |
1085 | |
1086 | static void printImmTy(raw_ostream& OS, ImmTy Type) { |
1087 | // clang-format off |
1088 | switch (Type) { |
1089 | case ImmTyNone: OS << "None" ; break; |
1090 | case ImmTyGDS: OS << "GDS" ; break; |
1091 | case ImmTyLDS: OS << "LDS" ; break; |
1092 | case ImmTyOffen: OS << "Offen" ; break; |
1093 | case ImmTyIdxen: OS << "Idxen" ; break; |
1094 | case ImmTyAddr64: OS << "Addr64" ; break; |
1095 | case ImmTyOffset: OS << "Offset" ; break; |
1096 | case ImmTyInstOffset: OS << "InstOffset" ; break; |
1097 | case ImmTyOffset0: OS << "Offset0" ; break; |
1098 | case ImmTyOffset1: OS << "Offset1" ; break; |
1099 | case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod" ; break; |
1100 | case ImmTyCPol: OS << "CPol" ; break; |
1101 | case ImmTyIndexKey8bit: OS << "index_key" ; break; |
1102 | case ImmTyIndexKey16bit: OS << "index_key" ; break; |
1103 | case ImmTyTFE: OS << "TFE" ; break; |
1104 | case ImmTyD16: OS << "D16" ; break; |
1105 | case ImmTyFORMAT: OS << "FORMAT" ; break; |
1106 | case ImmTyClamp: OS << "Clamp" ; break; |
1107 | case ImmTyOModSI: OS << "OModSI" ; break; |
1108 | case ImmTyDPP8: OS << "DPP8" ; break; |
1109 | case ImmTyDppCtrl: OS << "DppCtrl" ; break; |
1110 | case ImmTyDppRowMask: OS << "DppRowMask" ; break; |
1111 | case ImmTyDppBankMask: OS << "DppBankMask" ; break; |
1112 | case ImmTyDppBoundCtrl: OS << "DppBoundCtrl" ; break; |
1113 | case ImmTyDppFI: OS << "DppFI" ; break; |
1114 | case ImmTySDWADstSel: OS << "SDWADstSel" ; break; |
1115 | case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel" ; break; |
1116 | case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel" ; break; |
1117 | case ImmTySDWADstUnused: OS << "SDWADstUnused" ; break; |
1118 | case ImmTyDMask: OS << "DMask" ; break; |
1119 | case ImmTyDim: OS << "Dim" ; break; |
1120 | case ImmTyUNorm: OS << "UNorm" ; break; |
1121 | case ImmTyDA: OS << "DA" ; break; |
1122 | case ImmTyR128A16: OS << "R128A16" ; break; |
1123 | case ImmTyA16: OS << "A16" ; break; |
1124 | case ImmTyLWE: OS << "LWE" ; break; |
1125 | case ImmTyOff: OS << "Off" ; break; |
1126 | case ImmTyExpTgt: OS << "ExpTgt" ; break; |
1127 | case ImmTyExpCompr: OS << "ExpCompr" ; break; |
1128 | case ImmTyExpVM: OS << "ExpVM" ; break; |
1129 | case ImmTyHwreg: OS << "Hwreg" ; break; |
1130 | case ImmTySendMsg: OS << "SendMsg" ; break; |
1131 | case ImmTyInterpSlot: OS << "InterpSlot" ; break; |
1132 | case ImmTyInterpAttr: OS << "InterpAttr" ; break; |
1133 | case ImmTyInterpAttrChan: OS << "InterpAttrChan" ; break; |
1134 | case ImmTyOpSel: OS << "OpSel" ; break; |
1135 | case ImmTyOpSelHi: OS << "OpSelHi" ; break; |
1136 | case ImmTyNegLo: OS << "NegLo" ; break; |
1137 | case ImmTyNegHi: OS << "NegHi" ; break; |
1138 | case ImmTySwizzle: OS << "Swizzle" ; break; |
1139 | case ImmTyGprIdxMode: OS << "GprIdxMode" ; break; |
1140 | case ImmTyHigh: OS << "High" ; break; |
1141 | case ImmTyBLGP: OS << "BLGP" ; break; |
1142 | case ImmTyCBSZ: OS << "CBSZ" ; break; |
1143 | case ImmTyABID: OS << "ABID" ; break; |
1144 | case ImmTyEndpgm: OS << "Endpgm" ; break; |
1145 | case ImmTyWaitVDST: OS << "WaitVDST" ; break; |
1146 | case ImmTyWaitEXP: OS << "WaitEXP" ; break; |
1147 | case ImmTyWaitVAVDst: OS << "WaitVAVDst" ; break; |
1148 | case ImmTyWaitVMVSrc: OS << "WaitVMVSrc" ; break; |
1149 | case ImmTyByteSel: OS << "ByteSel" ; break; |
1150 | case ImmTyBitOp3: OS << "BitOp3" ; break; |
1151 | } |
1152 | // clang-format on |
1153 | } |
1154 | |
1155 | void print(raw_ostream &OS, const MCAsmInfo &MAI) const override { |
1156 | switch (Kind) { |
1157 | case Register: |
1158 | OS << "<register " << AMDGPUInstPrinter::getRegisterName(Reg: getReg()) |
1159 | << " mods: " << Reg.Mods << '>'; |
1160 | break; |
1161 | case Immediate: |
1162 | OS << '<' << getImm(); |
1163 | if (getImmTy() != ImmTyNone) { |
1164 | OS << " type: " ; printImmTy(OS, Type: getImmTy()); |
1165 | } |
1166 | OS << " mods: " << Imm.Mods << '>'; |
1167 | break; |
1168 | case Token: |
1169 | OS << '\'' << getToken() << '\''; |
1170 | break; |
1171 | case Expression: |
1172 | OS << "<expr " ; |
1173 | MAI.printExpr(OS, *Expr); |
1174 | OS << '>'; |
1175 | break; |
1176 | } |
1177 | } |
1178 | |
1179 | static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, |
1180 | int64_t Val, SMLoc Loc, |
1181 | ImmTy Type = ImmTyNone, |
1182 | bool IsFPImm = false) { |
1183 | auto Op = std::make_unique<AMDGPUOperand>(args: Immediate, args&: AsmParser); |
1184 | Op->Imm.Val = Val; |
1185 | Op->Imm.IsFPImm = IsFPImm; |
1186 | Op->Imm.Kind = ImmKindTyNone; |
1187 | Op->Imm.Type = Type; |
1188 | Op->Imm.Mods = Modifiers(); |
1189 | Op->StartLoc = Loc; |
1190 | Op->EndLoc = Loc; |
1191 | return Op; |
1192 | } |
1193 | |
1194 | static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, |
1195 | StringRef Str, SMLoc Loc, |
1196 | bool HasExplicitEncodingSize = true) { |
1197 | auto Res = std::make_unique<AMDGPUOperand>(args: Token, args&: AsmParser); |
1198 | Res->Tok.Data = Str.data(); |
1199 | Res->Tok.Length = Str.size(); |
1200 | Res->StartLoc = Loc; |
1201 | Res->EndLoc = Loc; |
1202 | return Res; |
1203 | } |
1204 | |
1205 | static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, |
1206 | MCRegister Reg, SMLoc S, SMLoc E) { |
1207 | auto Op = std::make_unique<AMDGPUOperand>(args: Register, args&: AsmParser); |
1208 | Op->Reg.RegNo = Reg; |
1209 | Op->Reg.Mods = Modifiers(); |
1210 | Op->StartLoc = S; |
1211 | Op->EndLoc = E; |
1212 | return Op; |
1213 | } |
1214 | |
1215 | static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, |
1216 | const class MCExpr *Expr, SMLoc S) { |
1217 | auto Op = std::make_unique<AMDGPUOperand>(args: Expression, args&: AsmParser); |
1218 | Op->Expr = Expr; |
1219 | Op->StartLoc = S; |
1220 | Op->EndLoc = S; |
1221 | return Op; |
1222 | } |
1223 | }; |
1224 | |
1225 | raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { |
1226 | OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; |
1227 | return OS; |
1228 | } |
1229 | |
1230 | //===----------------------------------------------------------------------===// |
1231 | // AsmParser |
1232 | //===----------------------------------------------------------------------===// |
1233 | |
1234 | // Holds info related to the current kernel, e.g. count of SGPRs used. |
1235 | // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next |
1236 | // .amdgpu_hsa_kernel or at EOF. |
1237 | class KernelScopeInfo { |
1238 | int SgprIndexUnusedMin = -1; |
1239 | int VgprIndexUnusedMin = -1; |
1240 | int AgprIndexUnusedMin = -1; |
1241 | MCContext *Ctx = nullptr; |
1242 | MCSubtargetInfo const *MSTI = nullptr; |
1243 | |
1244 | void usesSgprAt(int i) { |
1245 | if (i >= SgprIndexUnusedMin) { |
1246 | SgprIndexUnusedMin = ++i; |
1247 | if (Ctx) { |
1248 | MCSymbol* const Sym = |
1249 | Ctx->getOrCreateSymbol(Name: Twine(".kernel.sgpr_count" )); |
1250 | Sym->setVariableValue(MCConstantExpr::create(Value: SgprIndexUnusedMin, Ctx&: *Ctx)); |
1251 | } |
1252 | } |
1253 | } |
1254 | |
1255 | void usesVgprAt(int i) { |
1256 | if (i >= VgprIndexUnusedMin) { |
1257 | VgprIndexUnusedMin = ++i; |
1258 | if (Ctx) { |
1259 | MCSymbol* const Sym = |
1260 | Ctx->getOrCreateSymbol(Name: Twine(".kernel.vgpr_count" )); |
1261 | int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin, |
1262 | ArgNumVGPR: VgprIndexUnusedMin); |
1263 | Sym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx)); |
1264 | } |
1265 | } |
1266 | } |
1267 | |
1268 | void usesAgprAt(int i) { |
1269 | // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction |
1270 | if (!hasMAIInsts(STI: *MSTI)) |
1271 | return; |
1272 | |
1273 | if (i >= AgprIndexUnusedMin) { |
1274 | AgprIndexUnusedMin = ++i; |
1275 | if (Ctx) { |
1276 | MCSymbol* const Sym = |
1277 | Ctx->getOrCreateSymbol(Name: Twine(".kernel.agpr_count" )); |
1278 | Sym->setVariableValue(MCConstantExpr::create(Value: AgprIndexUnusedMin, Ctx&: *Ctx)); |
1279 | |
1280 | // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) |
1281 | MCSymbol* const vSym = |
1282 | Ctx->getOrCreateSymbol(Name: Twine(".kernel.vgpr_count" )); |
1283 | int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin, |
1284 | ArgNumVGPR: VgprIndexUnusedMin); |
1285 | vSym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx)); |
1286 | } |
1287 | } |
1288 | } |
1289 | |
1290 | public: |
1291 | KernelScopeInfo() = default; |
1292 | |
1293 | void initialize(MCContext &Context) { |
1294 | Ctx = &Context; |
1295 | MSTI = Ctx->getSubtargetInfo(); |
1296 | |
1297 | usesSgprAt(i: SgprIndexUnusedMin = -1); |
1298 | usesVgprAt(i: VgprIndexUnusedMin = -1); |
1299 | if (hasMAIInsts(STI: *MSTI)) { |
1300 | usesAgprAt(i: AgprIndexUnusedMin = -1); |
1301 | } |
1302 | } |
1303 | |
1304 | void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, |
1305 | unsigned RegWidth) { |
1306 | switch (RegKind) { |
1307 | case IS_SGPR: |
1308 | usesSgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1); |
1309 | break; |
1310 | case IS_AGPR: |
1311 | usesAgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1); |
1312 | break; |
1313 | case IS_VGPR: |
1314 | usesVgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1); |
1315 | break; |
1316 | default: |
1317 | break; |
1318 | } |
1319 | } |
1320 | }; |
1321 | |
1322 | class AMDGPUAsmParser : public MCTargetAsmParser { |
1323 | MCAsmParser &Parser; |
1324 | |
1325 | unsigned ForcedEncodingSize = 0; |
1326 | bool ForcedDPP = false; |
1327 | bool ForcedSDWA = false; |
1328 | KernelScopeInfo KernelScope; |
1329 | |
1330 | /// @name Auto-generated Match Functions |
1331 | /// { |
1332 | |
1333 | #define |
1334 | #include "AMDGPUGenAsmMatcher.inc" |
1335 | |
1336 | /// } |
1337 | |
1338 | private: |
1339 | void createConstantSymbol(StringRef Id, int64_t Val); |
1340 | |
1341 | bool ParseAsAbsoluteExpression(uint32_t &Ret); |
1342 | bool OutOfRangeError(SMRange Range); |
1343 | /// Calculate VGPR/SGPR blocks required for given target, reserved |
1344 | /// registers, and user-specified NextFreeXGPR values. |
1345 | /// |
1346 | /// \param Features [in] Target features, used for bug corrections. |
1347 | /// \param VCCUsed [in] Whether VCC special SGPR is reserved. |
1348 | /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. |
1349 | /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. |
1350 | /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel |
1351 | /// descriptor field, if valid. |
1352 | /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. |
1353 | /// \param VGPRRange [in] Token range, used for VGPR diagnostics. |
1354 | /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. |
1355 | /// \param SGPRRange [in] Token range, used for SGPR diagnostics. |
1356 | /// \param VGPRBlocks [out] Result VGPR block count. |
1357 | /// \param SGPRBlocks [out] Result SGPR block count. |
1358 | bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed, |
1359 | const MCExpr *FlatScrUsed, bool XNACKUsed, |
1360 | std::optional<bool> EnableWavefrontSize32, |
1361 | const MCExpr *NextFreeVGPR, SMRange VGPRRange, |
1362 | const MCExpr *NextFreeSGPR, SMRange SGPRRange, |
1363 | const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks); |
1364 | bool ParseDirectiveAMDGCNTarget(); |
1365 | bool ParseDirectiveAMDHSACodeObjectVersion(); |
1366 | bool ParseDirectiveAMDHSAKernel(); |
1367 | bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &); |
1368 | bool ParseDirectiveAMDKernelCodeT(); |
1369 | // TODO: Possibly make subtargetHasRegister const. |
1370 | bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg); |
1371 | bool ParseDirectiveAMDGPUHsaKernel(); |
1372 | |
1373 | bool ParseDirectiveISAVersion(); |
1374 | bool ParseDirectiveHSAMetadata(); |
1375 | bool ParseDirectivePALMetadataBegin(); |
1376 | bool ParseDirectivePALMetadata(); |
1377 | bool ParseDirectiveAMDGPULDS(); |
1378 | |
1379 | /// Common code to parse out a block of text (typically YAML) between start and |
1380 | /// end directives. |
1381 | bool ParseToEndDirective(const char *AssemblerDirectiveBegin, |
1382 | const char *AssemblerDirectiveEnd, |
1383 | std::string &CollectString); |
1384 | |
1385 | bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth, |
1386 | RegisterKind RegKind, MCRegister Reg1, SMLoc Loc); |
1387 | bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg, |
1388 | unsigned &RegNum, unsigned &RegWidth, |
1389 | bool RestoreOnFailure = false); |
1390 | bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg, |
1391 | unsigned &RegNum, unsigned &RegWidth, |
1392 | SmallVectorImpl<AsmToken> &Tokens); |
1393 | MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, |
1394 | unsigned &RegWidth, |
1395 | SmallVectorImpl<AsmToken> &Tokens); |
1396 | MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, |
1397 | unsigned &RegWidth, |
1398 | SmallVectorImpl<AsmToken> &Tokens); |
1399 | MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum, |
1400 | unsigned &RegWidth, |
1401 | SmallVectorImpl<AsmToken> &Tokens); |
1402 | bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg); |
1403 | MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum, |
1404 | unsigned SubReg, unsigned RegWidth, SMLoc Loc); |
1405 | |
1406 | bool isRegister(); |
1407 | bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; |
1408 | std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); |
1409 | void initializeGprCountSymbol(RegisterKind RegKind); |
1410 | bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, |
1411 | unsigned RegWidth); |
1412 | void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, |
1413 | bool IsAtomic); |
1414 | |
1415 | public: |
1416 | enum OperandMode { |
1417 | OperandMode_Default, |
1418 | OperandMode_NSA, |
1419 | }; |
1420 | |
1421 | using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; |
1422 | |
1423 | AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, |
1424 | const MCInstrInfo &MII, |
1425 | const MCTargetOptions &Options) |
1426 | : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { |
1427 | MCAsmParserExtension::Initialize(Parser); |
1428 | |
1429 | if (getFeatureBits().none()) { |
1430 | // Set default features. |
1431 | copySTI().ToggleFeature(FS: "southern-islands" ); |
1432 | } |
1433 | |
1434 | FeatureBitset FB = getFeatureBits(); |
1435 | if (!FB[AMDGPU::FeatureWavefrontSize64] && |
1436 | !FB[AMDGPU::FeatureWavefrontSize32]) { |
1437 | // If there is no default wave size it must be a generation before gfx10, |
1438 | // these have FeatureWavefrontSize64 in their definition already. For |
1439 | // gfx10+ set wave32 as a default. |
1440 | copySTI().ToggleFeature(FB: AMDGPU::FeatureWavefrontSize32); |
1441 | } |
1442 | |
1443 | setAvailableFeatures(ComputeAvailableFeatures(FB: getFeatureBits())); |
1444 | |
1445 | AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU()); |
1446 | if (ISA.Major >= 6 && isHsaAbi(STI: getSTI())) { |
1447 | createConstantSymbol(Id: ".amdgcn.gfx_generation_number" , Val: ISA.Major); |
1448 | createConstantSymbol(Id: ".amdgcn.gfx_generation_minor" , Val: ISA.Minor); |
1449 | createConstantSymbol(Id: ".amdgcn.gfx_generation_stepping" , Val: ISA.Stepping); |
1450 | } else { |
1451 | createConstantSymbol(Id: ".option.machine_version_major" , Val: ISA.Major); |
1452 | createConstantSymbol(Id: ".option.machine_version_minor" , Val: ISA.Minor); |
1453 | createConstantSymbol(Id: ".option.machine_version_stepping" , Val: ISA.Stepping); |
1454 | } |
1455 | if (ISA.Major >= 6 && isHsaAbi(STI: getSTI())) { |
1456 | initializeGprCountSymbol(RegKind: IS_VGPR); |
1457 | initializeGprCountSymbol(RegKind: IS_SGPR); |
1458 | } else |
1459 | KernelScope.initialize(Context&: getContext()); |
1460 | |
1461 | for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions()) |
1462 | createConstantSymbol(Id: Symbol, Val: Code); |
1463 | |
1464 | createConstantSymbol(Id: "UC_VERSION_W64_BIT" , Val: 0x2000); |
1465 | createConstantSymbol(Id: "UC_VERSION_W32_BIT" , Val: 0x4000); |
1466 | createConstantSymbol(Id: "UC_VERSION_MDP_BIT" , Val: 0x8000); |
1467 | } |
1468 | |
1469 | bool hasMIMG_R128() const { |
1470 | return AMDGPU::hasMIMG_R128(STI: getSTI()); |
1471 | } |
1472 | |
1473 | bool hasPackedD16() const { |
1474 | return AMDGPU::hasPackedD16(STI: getSTI()); |
1475 | } |
1476 | |
1477 | bool hasA16() const { return AMDGPU::hasA16(STI: getSTI()); } |
1478 | |
1479 | bool hasG16() const { return AMDGPU::hasG16(STI: getSTI()); } |
1480 | |
1481 | bool hasGDS() const { return AMDGPU::hasGDS(STI: getSTI()); } |
1482 | |
1483 | bool isSI() const { |
1484 | return AMDGPU::isSI(STI: getSTI()); |
1485 | } |
1486 | |
1487 | bool isCI() const { |
1488 | return AMDGPU::isCI(STI: getSTI()); |
1489 | } |
1490 | |
1491 | bool isVI() const { |
1492 | return AMDGPU::isVI(STI: getSTI()); |
1493 | } |
1494 | |
1495 | bool isGFX9() const { |
1496 | return AMDGPU::isGFX9(STI: getSTI()); |
1497 | } |
1498 | |
1499 | // TODO: isGFX90A is also true for GFX940. We need to clean it. |
1500 | bool isGFX90A() const { |
1501 | return AMDGPU::isGFX90A(STI: getSTI()); |
1502 | } |
1503 | |
1504 | bool isGFX940() const { |
1505 | return AMDGPU::isGFX940(STI: getSTI()); |
1506 | } |
1507 | |
1508 | bool isGFX9Plus() const { |
1509 | return AMDGPU::isGFX9Plus(STI: getSTI()); |
1510 | } |
1511 | |
1512 | bool isGFX10() const { |
1513 | return AMDGPU::isGFX10(STI: getSTI()); |
1514 | } |
1515 | |
1516 | bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(STI: getSTI()); } |
1517 | |
1518 | bool isGFX11() const { |
1519 | return AMDGPU::isGFX11(STI: getSTI()); |
1520 | } |
1521 | |
1522 | bool isGFX11Plus() const { |
1523 | return AMDGPU::isGFX11Plus(STI: getSTI()); |
1524 | } |
1525 | |
1526 | bool isGFX12() const { return AMDGPU::isGFX12(STI: getSTI()); } |
1527 | |
1528 | bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(STI: getSTI()); } |
1529 | |
1530 | bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(STI: getSTI()); } |
1531 | |
1532 | bool isGFX10_BEncoding() const { |
1533 | return AMDGPU::isGFX10_BEncoding(STI: getSTI()); |
1534 | } |
1535 | |
1536 | bool hasInv2PiInlineImm() const { |
1537 | return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; |
1538 | } |
1539 | |
1540 | bool hasFlatOffsets() const { |
1541 | return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; |
1542 | } |
1543 | |
1544 | bool hasTrue16Insts() const { |
1545 | return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts]; |
1546 | } |
1547 | |
1548 | bool hasArchitectedFlatScratch() const { |
1549 | return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; |
1550 | } |
1551 | |
1552 | bool hasSGPR102_SGPR103() const { |
1553 | return !isVI() && !isGFX9(); |
1554 | } |
1555 | |
1556 | bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } |
1557 | |
1558 | bool hasIntClamp() const { |
1559 | return getFeatureBits()[AMDGPU::FeatureIntClamp]; |
1560 | } |
1561 | |
1562 | bool hasPartialNSAEncoding() const { |
1563 | return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding]; |
1564 | } |
1565 | |
1566 | unsigned getNSAMaxSize(bool HasSampler = false) const { |
1567 | return AMDGPU::getNSAMaxSize(STI: getSTI(), HasSampler); |
1568 | } |
1569 | |
1570 | unsigned getMaxNumUserSGPRs() const { |
1571 | return AMDGPU::getMaxNumUserSGPRs(STI: getSTI()); |
1572 | } |
1573 | |
1574 | bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(STI: getSTI()); } |
1575 | |
1576 | AMDGPUTargetStreamer &getTargetStreamer() { |
1577 | MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); |
1578 | return static_cast<AMDGPUTargetStreamer &>(TS); |
1579 | } |
1580 | |
1581 | const MCRegisterInfo *getMRI() const { |
1582 | // We need this const_cast because for some reason getContext() is not const |
1583 | // in MCAsmParser. |
1584 | return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); |
1585 | } |
1586 | |
1587 | const MCInstrInfo *getMII() const { |
1588 | return &MII; |
1589 | } |
1590 | |
1591 | const FeatureBitset &getFeatureBits() const { |
1592 | return getSTI().getFeatureBits(); |
1593 | } |
1594 | |
1595 | void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } |
1596 | void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } |
1597 | void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } |
1598 | |
1599 | unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } |
1600 | bool isForcedVOP3() const { return ForcedEncodingSize == 64; } |
1601 | bool isForcedDPP() const { return ForcedDPP; } |
1602 | bool isForcedSDWA() const { return ForcedSDWA; } |
1603 | ArrayRef<unsigned> getMatchedVariants() const; |
1604 | StringRef getMatchedVariantName() const; |
1605 | |
1606 | std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); |
1607 | bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, |
1608 | bool RestoreOnFailure); |
1609 | bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; |
1610 | ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, |
1611 | SMLoc &EndLoc) override; |
1612 | unsigned checkTargetMatchPredicate(MCInst &Inst) override; |
1613 | unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, |
1614 | unsigned Kind) override; |
1615 | bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, |
1616 | OperandVector &Operands, MCStreamer &Out, |
1617 | uint64_t &ErrorInfo, |
1618 | bool MatchingInlineAsm) override; |
1619 | bool ParseDirective(AsmToken DirectiveID) override; |
1620 | ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic, |
1621 | OperandMode Mode = OperandMode_Default); |
1622 | StringRef parseMnemonicSuffix(StringRef Name); |
1623 | bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, |
1624 | SMLoc NameLoc, OperandVector &Operands) override; |
1625 | //bool ProcessInstruction(MCInst &Inst); |
1626 | |
1627 | ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands); |
1628 | |
1629 | ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int); |
1630 | |
1631 | ParseStatus |
1632 | parseIntWithPrefix(const char *Prefix, OperandVector &Operands, |
1633 | AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, |
1634 | std::function<bool(int64_t &)> ConvertResult = nullptr); |
1635 | |
1636 | ParseStatus parseOperandArrayWithPrefix( |
1637 | const char *Prefix, OperandVector &Operands, |
1638 | AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, |
1639 | bool (*ConvertResult)(int64_t &) = nullptr); |
1640 | |
1641 | ParseStatus |
1642 | parseNamedBit(StringRef Name, OperandVector &Operands, |
1643 | AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); |
1644 | unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const; |
1645 | ParseStatus parseCPol(OperandVector &Operands); |
1646 | ParseStatus parseScope(OperandVector &Operands, int64_t &Scope); |
1647 | ParseStatus parseTH(OperandVector &Operands, int64_t &TH); |
1648 | ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value, |
1649 | SMLoc &StringLoc); |
1650 | ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands, |
1651 | StringRef Name, |
1652 | ArrayRef<const char *> Ids, |
1653 | int64_t &IntVal); |
1654 | ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands, |
1655 | StringRef Name, |
1656 | ArrayRef<const char *> Ids, |
1657 | AMDGPUOperand::ImmTy Type); |
1658 | |
1659 | bool isModifier(); |
1660 | bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; |
1661 | bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; |
1662 | bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; |
1663 | bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; |
1664 | bool parseSP3NegModifier(); |
1665 | ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false, |
1666 | bool HasLit = false); |
1667 | ParseStatus parseReg(OperandVector &Operands); |
1668 | ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false, |
1669 | bool HasLit = false); |
1670 | ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands, |
1671 | bool AllowImm = true); |
1672 | ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands, |
1673 | bool AllowImm = true); |
1674 | ParseStatus parseRegWithFPInputMods(OperandVector &Operands); |
1675 | ParseStatus parseRegWithIntInputMods(OperandVector &Operands); |
1676 | ParseStatus parseVReg32OrOff(OperandVector &Operands); |
1677 | ParseStatus tryParseIndexKey(OperandVector &Operands, |
1678 | AMDGPUOperand::ImmTy ImmTy); |
1679 | ParseStatus parseIndexKey8bit(OperandVector &Operands); |
1680 | ParseStatus parseIndexKey16bit(OperandVector &Operands); |
1681 | |
1682 | ParseStatus parseDfmtNfmt(int64_t &Format); |
1683 | ParseStatus parseUfmt(int64_t &Format); |
1684 | ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, |
1685 | int64_t &Format); |
1686 | ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, |
1687 | int64_t &Format); |
1688 | ParseStatus parseFORMAT(OperandVector &Operands); |
1689 | ParseStatus parseSymbolicOrNumericFormat(int64_t &Format); |
1690 | ParseStatus parseNumericFormat(int64_t &Format); |
1691 | ParseStatus parseFlatOffset(OperandVector &Operands); |
1692 | ParseStatus parseR128A16(OperandVector &Operands); |
1693 | ParseStatus parseBLGP(OperandVector &Operands); |
1694 | bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); |
1695 | bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); |
1696 | |
1697 | void cvtExp(MCInst &Inst, const OperandVector &Operands); |
1698 | |
1699 | bool parseCnt(int64_t &IntVal); |
1700 | ParseStatus parseSWaitCnt(OperandVector &Operands); |
1701 | |
1702 | bool parseDepCtr(int64_t &IntVal, unsigned &Mask); |
1703 | void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); |
1704 | ParseStatus parseDepCtr(OperandVector &Operands); |
1705 | |
1706 | bool parseDelay(int64_t &Delay); |
1707 | ParseStatus parseSDelayALU(OperandVector &Operands); |
1708 | |
1709 | ParseStatus parseHwreg(OperandVector &Operands); |
1710 | |
1711 | private: |
1712 | struct OperandInfoTy { |
1713 | SMLoc Loc; |
1714 | int64_t Val; |
1715 | bool IsSymbolic = false; |
1716 | bool IsDefined = false; |
1717 | |
1718 | OperandInfoTy(int64_t Val) : Val(Val) {} |
1719 | }; |
1720 | |
1721 | struct StructuredOpField : OperandInfoTy { |
1722 | StringLiteral Id; |
1723 | StringLiteral Desc; |
1724 | unsigned Width; |
1725 | bool IsDefined = false; |
1726 | |
1727 | StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width, |
1728 | int64_t Default) |
1729 | : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {} |
1730 | virtual ~StructuredOpField() = default; |
1731 | |
1732 | bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const { |
1733 | Parser.Error(L: Loc, Msg: "invalid " + Desc + ": " + Err); |
1734 | return false; |
1735 | } |
1736 | |
1737 | virtual bool validate(AMDGPUAsmParser &Parser) const { |
1738 | if (IsSymbolic && Val == OPR_ID_UNSUPPORTED) |
1739 | return Error(Parser, Err: "not supported on this GPU" ); |
1740 | if (!isUIntN(N: Width, x: Val)) |
1741 | return Error(Parser, Err: "only " + Twine(Width) + "-bit values are legal" ); |
1742 | return true; |
1743 | } |
1744 | }; |
1745 | |
1746 | ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields); |
1747 | bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields); |
1748 | |
1749 | bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); |
1750 | bool validateSendMsg(const OperandInfoTy &Msg, |
1751 | const OperandInfoTy &Op, |
1752 | const OperandInfoTy &Stream); |
1753 | |
1754 | ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset, |
1755 | OperandInfoTy &Width); |
1756 | |
1757 | SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; |
1758 | SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; |
1759 | SMLoc getBLGPLoc(const OperandVector &Operands) const; |
1760 | |
1761 | SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, |
1762 | const OperandVector &Operands) const; |
1763 | SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; |
1764 | SMLoc getRegLoc(MCRegister Reg, const OperandVector &Operands) const; |
1765 | SMLoc getLitLoc(const OperandVector &Operands, |
1766 | bool SearchMandatoryLiterals = false) const; |
1767 | SMLoc getMandatoryLitLoc(const OperandVector &Operands) const; |
1768 | SMLoc getConstLoc(const OperandVector &Operands) const; |
1769 | SMLoc getInstLoc(const OperandVector &Operands) const; |
1770 | |
1771 | bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); |
1772 | bool validateOffset(const MCInst &Inst, const OperandVector &Operands); |
1773 | bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); |
1774 | bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); |
1775 | bool validateSOPLiteral(const MCInst &Inst) const; |
1776 | bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); |
1777 | bool validateVOPDRegBankConstraints(const MCInst &Inst, |
1778 | const OperandVector &Operands); |
1779 | bool validateIntClampSupported(const MCInst &Inst); |
1780 | bool validateMIMGAtomicDMask(const MCInst &Inst); |
1781 | bool validateMIMGGatherDMask(const MCInst &Inst); |
1782 | bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); |
1783 | bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc); |
1784 | bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc); |
1785 | bool validateMIMGD16(const MCInst &Inst); |
1786 | bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands); |
1787 | bool validateTensorR128(const MCInst &Inst); |
1788 | bool validateMIMGMSAA(const MCInst &Inst); |
1789 | bool validateOpSel(const MCInst &Inst); |
1790 | bool validateTrue16OpSel(const MCInst &Inst); |
1791 | bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName); |
1792 | bool validateDPP(const MCInst &Inst, const OperandVector &Operands); |
1793 | bool validateVccOperand(MCRegister Reg) const; |
1794 | bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); |
1795 | bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); |
1796 | bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands); |
1797 | bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); |
1798 | bool validateAGPRLdSt(const MCInst &Inst) const; |
1799 | bool validateVGPRAlign(const MCInst &Inst) const; |
1800 | bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); |
1801 | bool validateDS(const MCInst &Inst, const OperandVector &Operands); |
1802 | bool validateGWS(const MCInst &Inst, const OperandVector &Operands); |
1803 | bool validateDivScale(const MCInst &Inst); |
1804 | bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands); |
1805 | bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, |
1806 | const SMLoc &IDLoc); |
1807 | bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands, |
1808 | const unsigned CPol); |
1809 | bool validateTFE(const MCInst &Inst, const OperandVector &Operands); |
1810 | std::optional<StringRef> validateLdsDirect(const MCInst &Inst); |
1811 | unsigned getConstantBusLimit(unsigned Opcode) const; |
1812 | bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); |
1813 | bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; |
1814 | unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; |
1815 | |
1816 | bool isSupportedMnemo(StringRef Mnemo, |
1817 | const FeatureBitset &FBS); |
1818 | bool isSupportedMnemo(StringRef Mnemo, |
1819 | const FeatureBitset &FBS, |
1820 | ArrayRef<unsigned> Variants); |
1821 | bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); |
1822 | |
1823 | bool isId(const StringRef Id) const; |
1824 | bool isId(const AsmToken &Token, const StringRef Id) const; |
1825 | bool isToken(const AsmToken::TokenKind Kind) const; |
1826 | StringRef getId() const; |
1827 | bool trySkipId(const StringRef Id); |
1828 | bool trySkipId(const StringRef Pref, const StringRef Id); |
1829 | bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); |
1830 | bool trySkipToken(const AsmToken::TokenKind Kind); |
1831 | bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); |
1832 | bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string" ); |
1833 | bool parseId(StringRef &Val, const StringRef ErrMsg = "" ); |
1834 | |
1835 | void peekTokens(MutableArrayRef<AsmToken> Tokens); |
1836 | AsmToken::TokenKind getTokenKind() const; |
1837 | bool parseExpr(int64_t &Imm, StringRef Expected = "" ); |
1838 | bool parseExpr(OperandVector &Operands); |
1839 | StringRef getTokenStr() const; |
1840 | AsmToken peekToken(bool ShouldSkipSpace = true); |
1841 | AsmToken getToken() const; |
1842 | SMLoc getLoc() const; |
1843 | void lex(); |
1844 | |
1845 | public: |
1846 | void onBeginOfFile() override; |
1847 | bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; |
1848 | |
1849 | ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK); |
1850 | |
1851 | ParseStatus parseExpTgt(OperandVector &Operands); |
1852 | ParseStatus parseSendMsg(OperandVector &Operands); |
1853 | ParseStatus parseInterpSlot(OperandVector &Operands); |
1854 | ParseStatus parseInterpAttr(OperandVector &Operands); |
1855 | ParseStatus parseSOPPBrTarget(OperandVector &Operands); |
1856 | ParseStatus parseBoolReg(OperandVector &Operands); |
1857 | |
1858 | bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal, |
1859 | const unsigned MaxVal, const Twine &ErrMsg, |
1860 | SMLoc &Loc); |
1861 | bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, |
1862 | const unsigned MinVal, |
1863 | const unsigned MaxVal, |
1864 | const StringRef ErrMsg); |
1865 | ParseStatus parseSwizzle(OperandVector &Operands); |
1866 | bool parseSwizzleOffset(int64_t &Imm); |
1867 | bool parseSwizzleMacro(int64_t &Imm); |
1868 | bool parseSwizzleQuadPerm(int64_t &Imm); |
1869 | bool parseSwizzleBitmaskPerm(int64_t &Imm); |
1870 | bool parseSwizzleBroadcast(int64_t &Imm); |
1871 | bool parseSwizzleSwap(int64_t &Imm); |
1872 | bool parseSwizzleReverse(int64_t &Imm); |
1873 | bool parseSwizzleFFT(int64_t &Imm); |
1874 | bool parseSwizzleRotate(int64_t &Imm); |
1875 | |
1876 | ParseStatus parseGPRIdxMode(OperandVector &Operands); |
1877 | int64_t parseGPRIdxMacro(); |
1878 | |
1879 | void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: false); } |
1880 | void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: true); } |
1881 | |
1882 | ParseStatus parseOModSI(OperandVector &Operands); |
1883 | |
1884 | void cvtVOP3(MCInst &Inst, const OperandVector &Operands, |
1885 | OptionalImmIndexMap &OptionalIdx); |
1886 | void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands); |
1887 | void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); |
1888 | void cvtVOP3(MCInst &Inst, const OperandVector &Operands); |
1889 | void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); |
1890 | void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands); |
1891 | |
1892 | void cvtVOPD(MCInst &Inst, const OperandVector &Operands); |
1893 | void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, |
1894 | OptionalImmIndexMap &OptionalIdx); |
1895 | void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, |
1896 | OptionalImmIndexMap &OptionalIdx); |
1897 | |
1898 | void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); |
1899 | void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); |
1900 | |
1901 | bool parseDimId(unsigned &Encoding); |
1902 | ParseStatus parseDim(OperandVector &Operands); |
1903 | bool convertDppBoundCtrl(int64_t &BoundCtrl); |
1904 | ParseStatus parseDPP8(OperandVector &Operands); |
1905 | ParseStatus parseDPPCtrl(OperandVector &Operands); |
1906 | bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); |
1907 | int64_t parseDPPCtrlSel(StringRef Ctrl); |
1908 | int64_t parseDPPCtrlPerm(); |
1909 | void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); |
1910 | void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { |
1911 | cvtDPP(Inst, Operands, IsDPP8: true); |
1912 | } |
1913 | void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, |
1914 | bool IsDPP8 = false); |
1915 | void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { |
1916 | cvtVOP3DPP(Inst, Operands, IsDPP8: true); |
1917 | } |
1918 | |
1919 | ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix, |
1920 | AMDGPUOperand::ImmTy Type); |
1921 | ParseStatus parseSDWADstUnused(OperandVector &Operands); |
1922 | void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); |
1923 | void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); |
1924 | void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); |
1925 | void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); |
1926 | void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); |
1927 | void cvtSDWA(MCInst &Inst, const OperandVector &Operands, |
1928 | uint64_t BasicInstType, |
1929 | bool SkipDstVcc = false, |
1930 | bool SkipSrcVcc = false); |
1931 | |
1932 | ParseStatus parseEndpgm(OperandVector &Operands); |
1933 | |
1934 | ParseStatus parseVOPD(OperandVector &Operands); |
1935 | }; |
1936 | |
1937 | } // end anonymous namespace |
1938 | |
1939 | // May be called with integer type with equivalent bitwidth. |
1940 | static const fltSemantics *getFltSemantics(unsigned Size) { |
1941 | switch (Size) { |
1942 | case 4: |
1943 | return &APFloat::IEEEsingle(); |
1944 | case 8: |
1945 | return &APFloat::IEEEdouble(); |
1946 | case 2: |
1947 | return &APFloat::IEEEhalf(); |
1948 | default: |
1949 | llvm_unreachable("unsupported fp type" ); |
1950 | } |
1951 | } |
1952 | |
1953 | static const fltSemantics *getFltSemantics(MVT VT) { |
1954 | return getFltSemantics(Size: VT.getSizeInBits() / 8); |
1955 | } |
1956 | |
1957 | static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { |
1958 | switch (OperandType) { |
1959 | // When floating-point immediate is used as operand of type i16, the 32-bit |
1960 | // representation of the constant truncated to the 16 LSBs should be used. |
1961 | case AMDGPU::OPERAND_REG_IMM_INT16: |
1962 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: |
1963 | case AMDGPU::OPERAND_REG_IMM_INT32: |
1964 | case AMDGPU::OPERAND_REG_IMM_FP32: |
1965 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: |
1966 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
1967 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: |
1968 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: |
1969 | case AMDGPU::OPERAND_REG_IMM_V2FP32: |
1970 | case AMDGPU::OPERAND_REG_IMM_V2INT32: |
1971 | case AMDGPU::OPERAND_REG_IMM_V2INT16: |
1972 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: |
1973 | case AMDGPU::OPERAND_KIMM32: |
1974 | case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: |
1975 | return &APFloat::IEEEsingle(); |
1976 | case AMDGPU::OPERAND_REG_IMM_INT64: |
1977 | case AMDGPU::OPERAND_REG_IMM_FP64: |
1978 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: |
1979 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
1980 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: |
1981 | return &APFloat::IEEEdouble(); |
1982 | case AMDGPU::OPERAND_REG_IMM_FP16: |
1983 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
1984 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
1985 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
1986 | case AMDGPU::OPERAND_KIMM16: |
1987 | return &APFloat::IEEEhalf(); |
1988 | case AMDGPU::OPERAND_REG_IMM_BF16: |
1989 | case AMDGPU::OPERAND_REG_INLINE_C_BF16: |
1990 | case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: |
1991 | case AMDGPU::OPERAND_REG_IMM_V2BF16: |
1992 | return &APFloat::BFloat(); |
1993 | default: |
1994 | llvm_unreachable("unsupported fp type" ); |
1995 | } |
1996 | } |
1997 | |
1998 | //===----------------------------------------------------------------------===// |
1999 | // Operand |
2000 | //===----------------------------------------------------------------------===// |
2001 | |
2002 | static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { |
2003 | bool Lost; |
2004 | |
2005 | // Convert literal to single precision |
2006 | APFloat::opStatus Status = FPLiteral.convert(ToSemantics: *getFltSemantics(VT), |
2007 | RM: APFloat::rmNearestTiesToEven, |
2008 | losesInfo: &Lost); |
2009 | // We allow precision lost but not overflow or underflow |
2010 | if (Status != APFloat::opOK && |
2011 | Lost && |
2012 | ((Status & APFloat::opOverflow) != 0 || |
2013 | (Status & APFloat::opUnderflow) != 0)) { |
2014 | return false; |
2015 | } |
2016 | |
2017 | return true; |
2018 | } |
2019 | |
2020 | static bool isSafeTruncation(int64_t Val, unsigned Size) { |
2021 | return isUIntN(N: Size, x: Val) || isIntN(N: Size, x: Val); |
2022 | } |
2023 | |
2024 | static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { |
2025 | if (VT.getScalarType() == MVT::i16) |
2026 | return isInlinableLiteral32(Literal: Val, HasInv2Pi); |
2027 | |
2028 | if (VT.getScalarType() == MVT::f16) |
2029 | return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi); |
2030 | |
2031 | assert(VT.getScalarType() == MVT::bf16); |
2032 | |
2033 | return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi); |
2034 | } |
2035 | |
2036 | bool AMDGPUOperand::isInlinableImm(MVT type) const { |
2037 | |
2038 | // This is a hack to enable named inline values like |
2039 | // shared_base with both 32-bit and 64-bit operands. |
2040 | // Note that these values are defined as |
2041 | // 32-bit operands only. |
2042 | if (isInlineValue()) { |
2043 | return true; |
2044 | } |
2045 | |
2046 | if (!isImmTy(ImmT: ImmTyNone)) { |
2047 | // Only plain immediates are inlinable (e.g. "clamp" attribute is not) |
2048 | return false; |
2049 | } |
2050 | // TODO: We should avoid using host float here. It would be better to |
2051 | // check the float bit values which is what a few other places do. |
2052 | // We've had bot failures before due to weird NaN support on mips hosts. |
2053 | |
2054 | APInt Literal(64, Imm.Val); |
2055 | |
2056 | if (Imm.IsFPImm) { // We got fp literal token |
2057 | if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand |
2058 | return AMDGPU::isInlinableLiteral64(Literal: Imm.Val, |
2059 | HasInv2Pi: AsmParser->hasInv2PiInlineImm()); |
2060 | } |
2061 | |
2062 | APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); |
2063 | if (!canLosslesslyConvertToFPType(FPLiteral, VT: type)) |
2064 | return false; |
2065 | |
2066 | if (type.getScalarSizeInBits() == 16) { |
2067 | bool Lost = false; |
2068 | switch (type.getScalarType().SimpleTy) { |
2069 | default: |
2070 | llvm_unreachable("unknown 16-bit type" ); |
2071 | case MVT::bf16: |
2072 | FPLiteral.convert(ToSemantics: APFloatBase::BFloat(), RM: APFloat::rmNearestTiesToEven, |
2073 | losesInfo: &Lost); |
2074 | break; |
2075 | case MVT::f16: |
2076 | FPLiteral.convert(ToSemantics: APFloatBase::IEEEhalf(), RM: APFloat::rmNearestTiesToEven, |
2077 | losesInfo: &Lost); |
2078 | break; |
2079 | case MVT::i16: |
2080 | FPLiteral.convert(ToSemantics: APFloatBase::IEEEsingle(), |
2081 | RM: APFloat::rmNearestTiesToEven, losesInfo: &Lost); |
2082 | break; |
2083 | } |
2084 | // We need to use 32-bit representation here because when a floating-point |
2085 | // inline constant is used as an i16 operand, its 32-bit representation |
2086 | // representation will be used. We will need the 32-bit value to check if |
2087 | // it is FP inline constant. |
2088 | uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); |
2089 | return isInlineableLiteralOp16(Val: ImmVal, VT: type, |
2090 | HasInv2Pi: AsmParser->hasInv2PiInlineImm()); |
2091 | } |
2092 | |
2093 | // Check if single precision literal is inlinable |
2094 | return AMDGPU::isInlinableLiteral32( |
2095 | Literal: static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), |
2096 | HasInv2Pi: AsmParser->hasInv2PiInlineImm()); |
2097 | } |
2098 | |
2099 | // We got int literal token. |
2100 | if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand |
2101 | return AMDGPU::isInlinableLiteral64(Literal: Imm.Val, |
2102 | HasInv2Pi: AsmParser->hasInv2PiInlineImm()); |
2103 | } |
2104 | |
2105 | if (!isSafeTruncation(Val: Imm.Val, Size: type.getScalarSizeInBits())) { |
2106 | return false; |
2107 | } |
2108 | |
2109 | if (type.getScalarSizeInBits() == 16) { |
2110 | return isInlineableLiteralOp16( |
2111 | Val: static_cast<int16_t>(Literal.getLoBits(numBits: 16).getSExtValue()), |
2112 | VT: type, HasInv2Pi: AsmParser->hasInv2PiInlineImm()); |
2113 | } |
2114 | |
2115 | return AMDGPU::isInlinableLiteral32( |
2116 | Literal: static_cast<int32_t>(Literal.getLoBits(numBits: 32).getZExtValue()), |
2117 | HasInv2Pi: AsmParser->hasInv2PiInlineImm()); |
2118 | } |
2119 | |
2120 | bool AMDGPUOperand::isLiteralImm(MVT type) const { |
2121 | // Check that this immediate can be added as literal |
2122 | if (!isImmTy(ImmT: ImmTyNone)) { |
2123 | return false; |
2124 | } |
2125 | |
2126 | if (!Imm.IsFPImm) { |
2127 | // We got int literal token. |
2128 | |
2129 | if (type == MVT::f64 && hasFPModifiers()) { |
2130 | // Cannot apply fp modifiers to int literals preserving the same semantics |
2131 | // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, |
2132 | // disable these cases. |
2133 | return false; |
2134 | } |
2135 | |
2136 | unsigned Size = type.getSizeInBits(); |
2137 | if (Size == 64) |
2138 | Size = 32; |
2139 | |
2140 | // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP |
2141 | // types. |
2142 | return isSafeTruncation(Val: Imm.Val, Size); |
2143 | } |
2144 | |
2145 | // We got fp literal token |
2146 | if (type == MVT::f64) { // Expected 64-bit fp operand |
2147 | // We would set low 64-bits of literal to zeroes but we accept this literals |
2148 | return true; |
2149 | } |
2150 | |
2151 | if (type == MVT::i64) { // Expected 64-bit int operand |
2152 | // We don't allow fp literals in 64-bit integer instructions. It is |
2153 | // unclear how we should encode them. |
2154 | return false; |
2155 | } |
2156 | |
2157 | // We allow fp literals with f16x2 operands assuming that the specified |
2158 | // literal goes into the lower half and the upper half is zero. We also |
2159 | // require that the literal may be losslessly converted to f16. |
2160 | // |
2161 | // For i16x2 operands, we assume that the specified literal is encoded as a |
2162 | // single-precision float. This is pretty odd, but it matches SP3 and what |
2163 | // happens in hardware. |
2164 | MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16 |
2165 | : (type == MVT::v2i16) ? MVT::f32 |
2166 | : (type == MVT::v2f32) ? MVT::f32 |
2167 | : type; |
2168 | |
2169 | APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); |
2170 | return canLosslesslyConvertToFPType(FPLiteral, VT: ExpectedType); |
2171 | } |
2172 | |
2173 | bool AMDGPUOperand::isRegClass(unsigned RCID) const { |
2174 | return isRegKind() && AsmParser->getMRI()->getRegClass(i: RCID).contains(Reg: getReg()); |
2175 | } |
2176 | |
2177 | bool AMDGPUOperand::isVRegWithInputMods() const { |
2178 | return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) || |
2179 | // GFX90A allows DPP on 64-bit operands. |
2180 | (isRegClass(RCID: AMDGPU::VReg_64RegClassID) && |
2181 | AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]); |
2182 | } |
2183 | |
2184 | template <bool IsFake16> |
2185 | bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const { |
2186 | return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID |
2187 | : AMDGPU::VGPR_16_Lo128RegClassID); |
2188 | } |
2189 | |
2190 | template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const { |
2191 | return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32RegClassID |
2192 | : AMDGPU::VGPR_16RegClassID); |
2193 | } |
2194 | |
2195 | bool AMDGPUOperand::isSDWAOperand(MVT type) const { |
2196 | if (AsmParser->isVI()) |
2197 | return isVReg32(); |
2198 | if (AsmParser->isGFX9Plus()) |
2199 | return isRegClass(RCID: AMDGPU::VS_32RegClassID) || isInlinableImm(type); |
2200 | return false; |
2201 | } |
2202 | |
2203 | bool AMDGPUOperand::isSDWAFP16Operand() const { |
2204 | return isSDWAOperand(type: MVT::f16); |
2205 | } |
2206 | |
2207 | bool AMDGPUOperand::isSDWAFP32Operand() const { |
2208 | return isSDWAOperand(type: MVT::f32); |
2209 | } |
2210 | |
2211 | bool AMDGPUOperand::isSDWAInt16Operand() const { |
2212 | return isSDWAOperand(type: MVT::i16); |
2213 | } |
2214 | |
2215 | bool AMDGPUOperand::isSDWAInt32Operand() const { |
2216 | return isSDWAOperand(type: MVT::i32); |
2217 | } |
2218 | |
2219 | bool AMDGPUOperand::isBoolReg() const { |
2220 | auto FB = AsmParser->getFeatureBits(); |
2221 | return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) || |
2222 | (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32())); |
2223 | } |
2224 | |
2225 | uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const |
2226 | { |
2227 | assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); |
2228 | assert(Size == 2 || Size == 4 || Size == 8); |
2229 | |
2230 | const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); |
2231 | |
2232 | if (Imm.Mods.Abs) { |
2233 | Val &= ~FpSignMask; |
2234 | } |
2235 | if (Imm.Mods.Neg) { |
2236 | Val ^= FpSignMask; |
2237 | } |
2238 | |
2239 | return Val; |
2240 | } |
2241 | |
2242 | void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { |
2243 | if (isExpr()) { |
2244 | Inst.addOperand(Op: MCOperand::createExpr(Val: Expr)); |
2245 | return; |
2246 | } |
2247 | |
2248 | if (AMDGPU::isSISrcOperand(Desc: AsmParser->getMII()->get(Opcode: Inst.getOpcode()), |
2249 | OpNo: Inst.getNumOperands())) { |
2250 | addLiteralImmOperand(Inst, Val: Imm.Val, |
2251 | ApplyModifiers: ApplyModifiers & |
2252 | isImmTy(ImmT: ImmTyNone) && Imm.Mods.hasFPModifiers()); |
2253 | } else { |
2254 | assert(!isImmTy(ImmTyNone) || !hasModifiers()); |
2255 | Inst.addOperand(Op: MCOperand::createImm(Val: Imm.Val)); |
2256 | setImmKindNone(); |
2257 | } |
2258 | } |
2259 | |
2260 | void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { |
2261 | const auto& InstDesc = AsmParser->getMII()->get(Opcode: Inst.getOpcode()); |
2262 | auto OpNum = Inst.getNumOperands(); |
2263 | // Check that this operand accepts literals |
2264 | assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); |
2265 | |
2266 | if (ApplyModifiers) { |
2267 | assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); |
2268 | const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(Desc: InstDesc, OpNo: OpNum); |
2269 | Val = applyInputFPModifiers(Val, Size); |
2270 | } |
2271 | |
2272 | APInt Literal(64, Val); |
2273 | uint8_t OpTy = InstDesc.operands()[OpNum].OperandType; |
2274 | |
2275 | if (Imm.IsFPImm) { // We got fp literal token |
2276 | switch (OpTy) { |
2277 | case AMDGPU::OPERAND_REG_IMM_INT64: |
2278 | case AMDGPU::OPERAND_REG_IMM_FP64: |
2279 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: |
2280 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
2281 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: |
2282 | if (AMDGPU::isInlinableLiteral64(Literal: Literal.getZExtValue(), |
2283 | HasInv2Pi: AsmParser->hasInv2PiInlineImm())) { |
2284 | Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getZExtValue())); |
2285 | setImmKindConst(); |
2286 | return; |
2287 | } |
2288 | |
2289 | // Non-inlineable |
2290 | if (AMDGPU::isSISrcFPOperand(Desc: InstDesc, OpNo: OpNum)) { // Expected 64-bit fp operand |
2291 | // For fp operands we check if low 32 bits are zeros |
2292 | if (Literal.getLoBits(numBits: 32) != 0) { |
2293 | const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(L: Inst.getLoc(), |
2294 | Msg: "Can't encode literal as exact 64-bit floating-point operand. " |
2295 | "Low 32-bits will be set to zero" ); |
2296 | Val &= 0xffffffff00000000u; |
2297 | } |
2298 | |
2299 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2300 | setImmKindLiteral(); |
2301 | return; |
2302 | } |
2303 | |
2304 | // We don't allow fp literals in 64-bit integer instructions. It is |
2305 | // unclear how we should encode them. This case should be checked earlier |
2306 | // in predicate methods (isLiteralImm()) |
2307 | llvm_unreachable("fp literal in 64-bit integer instruction." ); |
2308 | |
2309 | case AMDGPU::OPERAND_REG_IMM_BF16: |
2310 | case AMDGPU::OPERAND_REG_INLINE_C_BF16: |
2311 | case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: |
2312 | case AMDGPU::OPERAND_REG_IMM_V2BF16: |
2313 | if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) { |
2314 | // This is the 1/(2*pi) which is going to be truncated to bf16 with the |
2315 | // loss of precision. The constant represents ideomatic fp32 value of |
2316 | // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16 |
2317 | // bits. Prevent rounding below. |
2318 | Inst.addOperand(Op: MCOperand::createImm(Val: 0x3e22)); |
2319 | setImmKindLiteral(); |
2320 | return; |
2321 | } |
2322 | [[fallthrough]]; |
2323 | |
2324 | case AMDGPU::OPERAND_REG_IMM_INT32: |
2325 | case AMDGPU::OPERAND_REG_IMM_FP32: |
2326 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: |
2327 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
2328 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: |
2329 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: |
2330 | case AMDGPU::OPERAND_REG_IMM_INT16: |
2331 | case AMDGPU::OPERAND_REG_IMM_FP16: |
2332 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: |
2333 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
2334 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: |
2335 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
2336 | case AMDGPU::OPERAND_REG_IMM_V2INT16: |
2337 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
2338 | case AMDGPU::OPERAND_REG_IMM_V2FP32: |
2339 | case AMDGPU::OPERAND_REG_IMM_V2INT32: |
2340 | case AMDGPU::OPERAND_KIMM32: |
2341 | case AMDGPU::OPERAND_KIMM16: |
2342 | case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: { |
2343 | bool lost; |
2344 | APFloat FPLiteral(APFloat::IEEEdouble(), Literal); |
2345 | // Convert literal to single precision |
2346 | FPLiteral.convert(ToSemantics: *getOpFltSemantics(OperandType: OpTy), |
2347 | RM: APFloat::rmNearestTiesToEven, losesInfo: &lost); |
2348 | // We allow precision lost but not overflow or underflow. This should be |
2349 | // checked earlier in isLiteralImm() |
2350 | |
2351 | uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); |
2352 | Inst.addOperand(Op: MCOperand::createImm(Val: ImmVal)); |
2353 | if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) { |
2354 | setImmKindMandatoryLiteral(); |
2355 | } else { |
2356 | setImmKindLiteral(); |
2357 | } |
2358 | return; |
2359 | } |
2360 | default: |
2361 | llvm_unreachable("invalid operand size" ); |
2362 | } |
2363 | |
2364 | return; |
2365 | } |
2366 | |
2367 | // We got int literal token. |
2368 | // Only sign extend inline immediates. |
2369 | switch (OpTy) { |
2370 | case AMDGPU::OPERAND_REG_IMM_INT32: |
2371 | case AMDGPU::OPERAND_REG_IMM_FP32: |
2372 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: |
2373 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
2374 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: |
2375 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: |
2376 | case AMDGPU::OPERAND_REG_IMM_V2INT16: |
2377 | case AMDGPU::OPERAND_REG_IMM_V2BF16: |
2378 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
2379 | case AMDGPU::OPERAND_REG_IMM_V2FP32: |
2380 | case AMDGPU::OPERAND_REG_IMM_V2INT32: |
2381 | case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: |
2382 | if (isSafeTruncation(Val, Size: 32) && |
2383 | AMDGPU::isInlinableLiteral32(Literal: static_cast<int32_t>(Val), |
2384 | HasInv2Pi: AsmParser->hasInv2PiInlineImm())) { |
2385 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2386 | setImmKindConst(); |
2387 | return; |
2388 | } |
2389 | |
2390 | Inst.addOperand(Op: MCOperand::createImm(Val: Lo_32(Value: Val))); |
2391 | setImmKindLiteral(); |
2392 | return; |
2393 | |
2394 | case AMDGPU::OPERAND_REG_IMM_INT64: |
2395 | case AMDGPU::OPERAND_REG_IMM_FP64: |
2396 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: |
2397 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
2398 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: |
2399 | if (AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: AsmParser->hasInv2PiInlineImm())) { |
2400 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2401 | setImmKindConst(); |
2402 | return; |
2403 | } |
2404 | |
2405 | Val = AMDGPU::isSISrcFPOperand(Desc: InstDesc, OpNo: OpNum) ? (uint64_t)Val << 32 |
2406 | : Lo_32(Value: Val); |
2407 | |
2408 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2409 | setImmKindLiteral(); |
2410 | return; |
2411 | |
2412 | case AMDGPU::OPERAND_REG_IMM_INT16: |
2413 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: |
2414 | if (isSafeTruncation(Val, Size: 16) && |
2415 | AMDGPU::isInlinableIntLiteral(Literal: static_cast<int16_t>(Val))) { |
2416 | Inst.addOperand(Op: MCOperand::createImm(Val: Lo_32(Value: Val))); |
2417 | setImmKindConst(); |
2418 | return; |
2419 | } |
2420 | |
2421 | Inst.addOperand(Op: MCOperand::createImm(Val: Val & 0xffff)); |
2422 | setImmKindLiteral(); |
2423 | return; |
2424 | |
2425 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
2426 | case AMDGPU::OPERAND_REG_IMM_FP16: |
2427 | if (isSafeTruncation(Val, Size: 16) && |
2428 | AMDGPU::isInlinableLiteralFP16(Literal: static_cast<int16_t>(Val), |
2429 | HasInv2Pi: AsmParser->hasInv2PiInlineImm())) { |
2430 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2431 | setImmKindConst(); |
2432 | return; |
2433 | } |
2434 | |
2435 | Inst.addOperand(Op: MCOperand::createImm(Val: Val & 0xffff)); |
2436 | setImmKindLiteral(); |
2437 | return; |
2438 | |
2439 | case AMDGPU::OPERAND_REG_IMM_BF16: |
2440 | case AMDGPU::OPERAND_REG_INLINE_C_BF16: |
2441 | if (isSafeTruncation(Val, Size: 16) && |
2442 | AMDGPU::isInlinableLiteralBF16(Literal: static_cast<int16_t>(Val), |
2443 | HasInv2Pi: AsmParser->hasInv2PiInlineImm())) { |
2444 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2445 | setImmKindConst(); |
2446 | return; |
2447 | } |
2448 | |
2449 | Inst.addOperand(Op: MCOperand::createImm(Val: Val & 0xffff)); |
2450 | setImmKindLiteral(); |
2451 | return; |
2452 | |
2453 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: { |
2454 | assert(isSafeTruncation(Val, 16)); |
2455 | assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))); |
2456 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2457 | return; |
2458 | } |
2459 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { |
2460 | assert(isSafeTruncation(Val, 16)); |
2461 | assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val), |
2462 | AsmParser->hasInv2PiInlineImm())); |
2463 | |
2464 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2465 | return; |
2466 | } |
2467 | |
2468 | case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: { |
2469 | assert(isSafeTruncation(Val, 16)); |
2470 | assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val), |
2471 | AsmParser->hasInv2PiInlineImm())); |
2472 | |
2473 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2474 | return; |
2475 | } |
2476 | |
2477 | case AMDGPU::OPERAND_KIMM32: |
2478 | Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getLoBits(numBits: 32).getZExtValue())); |
2479 | setImmKindMandatoryLiteral(); |
2480 | return; |
2481 | case AMDGPU::OPERAND_KIMM16: |
2482 | Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getLoBits(numBits: 16).getZExtValue())); |
2483 | setImmKindMandatoryLiteral(); |
2484 | return; |
2485 | default: |
2486 | llvm_unreachable("invalid operand size" ); |
2487 | } |
2488 | } |
2489 | |
2490 | void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { |
2491 | Inst.addOperand(Op: MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: getReg(), STI: AsmParser->getSTI()))); |
2492 | } |
2493 | |
2494 | bool AMDGPUOperand::isInlineValue() const { |
2495 | return isRegKind() && ::isInlineValue(Reg: getReg()); |
2496 | } |
2497 | |
2498 | //===----------------------------------------------------------------------===// |
2499 | // AsmParser |
2500 | //===----------------------------------------------------------------------===// |
2501 | |
2502 | void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) { |
2503 | // TODO: make those pre-defined variables read-only. |
2504 | // Currently there is none suitable machinery in the core llvm-mc for this. |
2505 | // MCSymbol::isRedefinable is intended for another purpose, and |
2506 | // AsmParser::parseDirectiveSet() cannot be specialized for specific target. |
2507 | MCContext &Ctx = getContext(); |
2508 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id); |
2509 | Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx)); |
2510 | } |
2511 | |
2512 | static int getRegClass(RegisterKind Is, unsigned RegWidth) { |
2513 | if (Is == IS_VGPR) { |
2514 | switch (RegWidth) { |
2515 | default: return -1; |
2516 | case 32: |
2517 | return AMDGPU::VGPR_32RegClassID; |
2518 | case 64: |
2519 | return AMDGPU::VReg_64RegClassID; |
2520 | case 96: |
2521 | return AMDGPU::VReg_96RegClassID; |
2522 | case 128: |
2523 | return AMDGPU::VReg_128RegClassID; |
2524 | case 160: |
2525 | return AMDGPU::VReg_160RegClassID; |
2526 | case 192: |
2527 | return AMDGPU::VReg_192RegClassID; |
2528 | case 224: |
2529 | return AMDGPU::VReg_224RegClassID; |
2530 | case 256: |
2531 | return AMDGPU::VReg_256RegClassID; |
2532 | case 288: |
2533 | return AMDGPU::VReg_288RegClassID; |
2534 | case 320: |
2535 | return AMDGPU::VReg_320RegClassID; |
2536 | case 352: |
2537 | return AMDGPU::VReg_352RegClassID; |
2538 | case 384: |
2539 | return AMDGPU::VReg_384RegClassID; |
2540 | case 512: |
2541 | return AMDGPU::VReg_512RegClassID; |
2542 | case 1024: |
2543 | return AMDGPU::VReg_1024RegClassID; |
2544 | } |
2545 | } else if (Is == IS_TTMP) { |
2546 | switch (RegWidth) { |
2547 | default: return -1; |
2548 | case 32: |
2549 | return AMDGPU::TTMP_32RegClassID; |
2550 | case 64: |
2551 | return AMDGPU::TTMP_64RegClassID; |
2552 | case 128: |
2553 | return AMDGPU::TTMP_128RegClassID; |
2554 | case 256: |
2555 | return AMDGPU::TTMP_256RegClassID; |
2556 | case 512: |
2557 | return AMDGPU::TTMP_512RegClassID; |
2558 | } |
2559 | } else if (Is == IS_SGPR) { |
2560 | switch (RegWidth) { |
2561 | default: return -1; |
2562 | case 32: |
2563 | return AMDGPU::SGPR_32RegClassID; |
2564 | case 64: |
2565 | return AMDGPU::SGPR_64RegClassID; |
2566 | case 96: |
2567 | return AMDGPU::SGPR_96RegClassID; |
2568 | case 128: |
2569 | return AMDGPU::SGPR_128RegClassID; |
2570 | case 160: |
2571 | return AMDGPU::SGPR_160RegClassID; |
2572 | case 192: |
2573 | return AMDGPU::SGPR_192RegClassID; |
2574 | case 224: |
2575 | return AMDGPU::SGPR_224RegClassID; |
2576 | case 256: |
2577 | return AMDGPU::SGPR_256RegClassID; |
2578 | case 288: |
2579 | return AMDGPU::SGPR_288RegClassID; |
2580 | case 320: |
2581 | return AMDGPU::SGPR_320RegClassID; |
2582 | case 352: |
2583 | return AMDGPU::SGPR_352RegClassID; |
2584 | case 384: |
2585 | return AMDGPU::SGPR_384RegClassID; |
2586 | case 512: |
2587 | return AMDGPU::SGPR_512RegClassID; |
2588 | } |
2589 | } else if (Is == IS_AGPR) { |
2590 | switch (RegWidth) { |
2591 | default: return -1; |
2592 | case 32: |
2593 | return AMDGPU::AGPR_32RegClassID; |
2594 | case 64: |
2595 | return AMDGPU::AReg_64RegClassID; |
2596 | case 96: |
2597 | return AMDGPU::AReg_96RegClassID; |
2598 | case 128: |
2599 | return AMDGPU::AReg_128RegClassID; |
2600 | case 160: |
2601 | return AMDGPU::AReg_160RegClassID; |
2602 | case 192: |
2603 | return AMDGPU::AReg_192RegClassID; |
2604 | case 224: |
2605 | return AMDGPU::AReg_224RegClassID; |
2606 | case 256: |
2607 | return AMDGPU::AReg_256RegClassID; |
2608 | case 288: |
2609 | return AMDGPU::AReg_288RegClassID; |
2610 | case 320: |
2611 | return AMDGPU::AReg_320RegClassID; |
2612 | case 352: |
2613 | return AMDGPU::AReg_352RegClassID; |
2614 | case 384: |
2615 | return AMDGPU::AReg_384RegClassID; |
2616 | case 512: |
2617 | return AMDGPU::AReg_512RegClassID; |
2618 | case 1024: |
2619 | return AMDGPU::AReg_1024RegClassID; |
2620 | } |
2621 | } |
2622 | return -1; |
2623 | } |
2624 | |
2625 | static MCRegister getSpecialRegForName(StringRef RegName) { |
2626 | return StringSwitch<unsigned>(RegName) |
2627 | .Case(S: "exec" , Value: AMDGPU::EXEC) |
2628 | .Case(S: "vcc" , Value: AMDGPU::VCC) |
2629 | .Case(S: "flat_scratch" , Value: AMDGPU::FLAT_SCR) |
2630 | .Case(S: "xnack_mask" , Value: AMDGPU::XNACK_MASK) |
2631 | .Case(S: "shared_base" , Value: AMDGPU::SRC_SHARED_BASE) |
2632 | .Case(S: "src_shared_base" , Value: AMDGPU::SRC_SHARED_BASE) |
2633 | .Case(S: "shared_limit" , Value: AMDGPU::SRC_SHARED_LIMIT) |
2634 | .Case(S: "src_shared_limit" , Value: AMDGPU::SRC_SHARED_LIMIT) |
2635 | .Case(S: "private_base" , Value: AMDGPU::SRC_PRIVATE_BASE) |
2636 | .Case(S: "src_private_base" , Value: AMDGPU::SRC_PRIVATE_BASE) |
2637 | .Case(S: "private_limit" , Value: AMDGPU::SRC_PRIVATE_LIMIT) |
2638 | .Case(S: "src_private_limit" , Value: AMDGPU::SRC_PRIVATE_LIMIT) |
2639 | .Case(S: "pops_exiting_wave_id" , Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID) |
2640 | .Case(S: "src_pops_exiting_wave_id" , Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID) |
2641 | .Case(S: "lds_direct" , Value: AMDGPU::LDS_DIRECT) |
2642 | .Case(S: "src_lds_direct" , Value: AMDGPU::LDS_DIRECT) |
2643 | .Case(S: "m0" , Value: AMDGPU::M0) |
2644 | .Case(S: "vccz" , Value: AMDGPU::SRC_VCCZ) |
2645 | .Case(S: "src_vccz" , Value: AMDGPU::SRC_VCCZ) |
2646 | .Case(S: "execz" , Value: AMDGPU::SRC_EXECZ) |
2647 | .Case(S: "src_execz" , Value: AMDGPU::SRC_EXECZ) |
2648 | .Case(S: "scc" , Value: AMDGPU::SRC_SCC) |
2649 | .Case(S: "src_scc" , Value: AMDGPU::SRC_SCC) |
2650 | .Case(S: "tba" , Value: AMDGPU::TBA) |
2651 | .Case(S: "tma" , Value: AMDGPU::TMA) |
2652 | .Case(S: "flat_scratch_lo" , Value: AMDGPU::FLAT_SCR_LO) |
2653 | .Case(S: "flat_scratch_hi" , Value: AMDGPU::FLAT_SCR_HI) |
2654 | .Case(S: "xnack_mask_lo" , Value: AMDGPU::XNACK_MASK_LO) |
2655 | .Case(S: "xnack_mask_hi" , Value: AMDGPU::XNACK_MASK_HI) |
2656 | .Case(S: "vcc_lo" , Value: AMDGPU::VCC_LO) |
2657 | .Case(S: "vcc_hi" , Value: AMDGPU::VCC_HI) |
2658 | .Case(S: "exec_lo" , Value: AMDGPU::EXEC_LO) |
2659 | .Case(S: "exec_hi" , Value: AMDGPU::EXEC_HI) |
2660 | .Case(S: "tma_lo" , Value: AMDGPU::TMA_LO) |
2661 | .Case(S: "tma_hi" , Value: AMDGPU::TMA_HI) |
2662 | .Case(S: "tba_lo" , Value: AMDGPU::TBA_LO) |
2663 | .Case(S: "tba_hi" , Value: AMDGPU::TBA_HI) |
2664 | .Case(S: "pc" , Value: AMDGPU::PC_REG) |
2665 | .Case(S: "null" , Value: AMDGPU::SGPR_NULL) |
2666 | .Default(Value: AMDGPU::NoRegister); |
2667 | } |
2668 | |
2669 | bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, |
2670 | SMLoc &EndLoc, bool RestoreOnFailure) { |
2671 | auto R = parseRegister(); |
2672 | if (!R) return true; |
2673 | assert(R->isReg()); |
2674 | RegNo = R->getReg(); |
2675 | StartLoc = R->getStartLoc(); |
2676 | EndLoc = R->getEndLoc(); |
2677 | return false; |
2678 | } |
2679 | |
2680 | bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, |
2681 | SMLoc &EndLoc) { |
2682 | return ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); |
2683 | } |
2684 | |
2685 | ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, |
2686 | SMLoc &EndLoc) { |
2687 | bool Result = ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); |
2688 | bool PendingErrors = getParser().hasPendingError(); |
2689 | getParser().clearPendingErrors(); |
2690 | if (PendingErrors) |
2691 | return ParseStatus::Failure; |
2692 | if (Result) |
2693 | return ParseStatus::NoMatch; |
2694 | return ParseStatus::Success; |
2695 | } |
2696 | |
2697 | bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth, |
2698 | RegisterKind RegKind, |
2699 | MCRegister Reg1, SMLoc Loc) { |
2700 | switch (RegKind) { |
2701 | case IS_SPECIAL: |
2702 | if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { |
2703 | Reg = AMDGPU::EXEC; |
2704 | RegWidth = 64; |
2705 | return true; |
2706 | } |
2707 | if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { |
2708 | Reg = AMDGPU::FLAT_SCR; |
2709 | RegWidth = 64; |
2710 | return true; |
2711 | } |
2712 | if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { |
2713 | Reg = AMDGPU::XNACK_MASK; |
2714 | RegWidth = 64; |
2715 | return true; |
2716 | } |
2717 | if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { |
2718 | Reg = AMDGPU::VCC; |
2719 | RegWidth = 64; |
2720 | return true; |
2721 | } |
2722 | if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { |
2723 | Reg = AMDGPU::TBA; |
2724 | RegWidth = 64; |
2725 | return true; |
2726 | } |
2727 | if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { |
2728 | Reg = AMDGPU::TMA; |
2729 | RegWidth = 64; |
2730 | return true; |
2731 | } |
2732 | Error(L: Loc, Msg: "register does not fit in the list" ); |
2733 | return false; |
2734 | case IS_VGPR: |
2735 | case IS_SGPR: |
2736 | case IS_AGPR: |
2737 | case IS_TTMP: |
2738 | if (Reg1 != Reg + RegWidth / 32) { |
2739 | Error(L: Loc, Msg: "registers in a list must have consecutive indices" ); |
2740 | return false; |
2741 | } |
2742 | RegWidth += 32; |
2743 | return true; |
2744 | default: |
2745 | llvm_unreachable("unexpected register kind" ); |
2746 | } |
2747 | } |
2748 | |
2749 | struct RegInfo { |
2750 | StringLiteral Name; |
2751 | RegisterKind Kind; |
2752 | }; |
2753 | |
2754 | static constexpr RegInfo RegularRegisters[] = { |
2755 | {.Name: {"v" }, .Kind: IS_VGPR}, |
2756 | {.Name: {"s" }, .Kind: IS_SGPR}, |
2757 | {.Name: {"ttmp" }, .Kind: IS_TTMP}, |
2758 | {.Name: {"acc" }, .Kind: IS_AGPR}, |
2759 | {.Name: {"a" }, .Kind: IS_AGPR}, |
2760 | }; |
2761 | |
2762 | static bool isRegularReg(RegisterKind Kind) { |
2763 | return Kind == IS_VGPR || |
2764 | Kind == IS_SGPR || |
2765 | Kind == IS_TTMP || |
2766 | Kind == IS_AGPR; |
2767 | } |
2768 | |
2769 | static const RegInfo* getRegularRegInfo(StringRef Str) { |
2770 | for (const RegInfo &Reg : RegularRegisters) |
2771 | if (Str.starts_with(Prefix: Reg.Name)) |
2772 | return &Reg; |
2773 | return nullptr; |
2774 | } |
2775 | |
2776 | static bool getRegNum(StringRef Str, unsigned& Num) { |
2777 | return !Str.getAsInteger(Radix: 10, Result&: Num); |
2778 | } |
2779 | |
2780 | bool |
2781 | AMDGPUAsmParser::isRegister(const AsmToken &Token, |
2782 | const AsmToken &NextToken) const { |
2783 | |
2784 | // A list of consecutive registers: [s0,s1,s2,s3] |
2785 | if (Token.is(K: AsmToken::LBrac)) |
2786 | return true; |
2787 | |
2788 | if (!Token.is(K: AsmToken::Identifier)) |
2789 | return false; |
2790 | |
2791 | // A single register like s0 or a range of registers like s[0:1] |
2792 | |
2793 | StringRef Str = Token.getString(); |
2794 | const RegInfo *Reg = getRegularRegInfo(Str); |
2795 | if (Reg) { |
2796 | StringRef RegName = Reg->Name; |
2797 | StringRef RegSuffix = Str.substr(Start: RegName.size()); |
2798 | if (!RegSuffix.empty()) { |
2799 | RegSuffix.consume_back(Suffix: ".l" ); |
2800 | RegSuffix.consume_back(Suffix: ".h" ); |
2801 | unsigned Num; |
2802 | // A single register with an index: rXX |
2803 | if (getRegNum(Str: RegSuffix, Num)) |
2804 | return true; |
2805 | } else { |
2806 | // A range of registers: r[XX:YY]. |
2807 | if (NextToken.is(K: AsmToken::LBrac)) |
2808 | return true; |
2809 | } |
2810 | } |
2811 | |
2812 | return getSpecialRegForName(RegName: Str).isValid(); |
2813 | } |
2814 | |
2815 | bool |
2816 | AMDGPUAsmParser::isRegister() |
2817 | { |
2818 | return isRegister(Token: getToken(), NextToken: peekToken()); |
2819 | } |
2820 | |
2821 | MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum, |
2822 | unsigned SubReg, unsigned RegWidth, |
2823 | SMLoc Loc) { |
2824 | assert(isRegularReg(RegKind)); |
2825 | |
2826 | unsigned AlignSize = 1; |
2827 | if (RegKind == IS_SGPR || RegKind == IS_TTMP) { |
2828 | // SGPR and TTMP registers must be aligned. |
2829 | // Max required alignment is 4 dwords. |
2830 | AlignSize = std::min(a: llvm::bit_ceil(Value: RegWidth / 32), b: 4u); |
2831 | } |
2832 | |
2833 | if (RegNum % AlignSize != 0) { |
2834 | Error(L: Loc, Msg: "invalid register alignment" ); |
2835 | return MCRegister(); |
2836 | } |
2837 | |
2838 | unsigned RegIdx = RegNum / AlignSize; |
2839 | int RCID = getRegClass(Is: RegKind, RegWidth); |
2840 | if (RCID == -1) { |
2841 | Error(L: Loc, Msg: "invalid or unsupported register size" ); |
2842 | return MCRegister(); |
2843 | } |
2844 | |
2845 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
2846 | const MCRegisterClass RC = TRI->getRegClass(i: RCID); |
2847 | if (RegIdx >= RC.getNumRegs()) { |
2848 | Error(L: Loc, Msg: "register index is out of range" ); |
2849 | return MCRegister(); |
2850 | } |
2851 | |
2852 | MCRegister Reg = RC.getRegister(i: RegIdx); |
2853 | |
2854 | if (SubReg) { |
2855 | Reg = TRI->getSubReg(Reg, Idx: SubReg); |
2856 | |
2857 | // Currently all regular registers have their .l and .h subregisters, so |
2858 | // we should never need to generate an error here. |
2859 | assert(Reg && "Invalid subregister!" ); |
2860 | } |
2861 | |
2862 | return Reg; |
2863 | } |
2864 | |
2865 | bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth, |
2866 | unsigned &SubReg) { |
2867 | int64_t RegLo, RegHi; |
2868 | if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "missing register index" )) |
2869 | return false; |
2870 | |
2871 | SMLoc FirstIdxLoc = getLoc(); |
2872 | SMLoc SecondIdxLoc; |
2873 | |
2874 | if (!parseExpr(Imm&: RegLo)) |
2875 | return false; |
2876 | |
2877 | if (trySkipToken(Kind: AsmToken::Colon)) { |
2878 | SecondIdxLoc = getLoc(); |
2879 | if (!parseExpr(Imm&: RegHi)) |
2880 | return false; |
2881 | } else { |
2882 | RegHi = RegLo; |
2883 | } |
2884 | |
2885 | if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket" )) |
2886 | return false; |
2887 | |
2888 | if (!isUInt<32>(x: RegLo)) { |
2889 | Error(L: FirstIdxLoc, Msg: "invalid register index" ); |
2890 | return false; |
2891 | } |
2892 | |
2893 | if (!isUInt<32>(x: RegHi)) { |
2894 | Error(L: SecondIdxLoc, Msg: "invalid register index" ); |
2895 | return false; |
2896 | } |
2897 | |
2898 | if (RegLo > RegHi) { |
2899 | Error(L: FirstIdxLoc, Msg: "first register index should not exceed second index" ); |
2900 | return false; |
2901 | } |
2902 | |
2903 | if (RegHi == RegLo) { |
2904 | StringRef RegSuffix = getTokenStr(); |
2905 | if (RegSuffix == ".l" ) { |
2906 | SubReg = AMDGPU::lo16; |
2907 | lex(); |
2908 | } else if (RegSuffix == ".h" ) { |
2909 | SubReg = AMDGPU::hi16; |
2910 | lex(); |
2911 | } |
2912 | } |
2913 | |
2914 | Num = static_cast<unsigned>(RegLo); |
2915 | RegWidth = 32 * ((RegHi - RegLo) + 1); |
2916 | |
2917 | return true; |
2918 | } |
2919 | |
2920 | MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, |
2921 | unsigned &RegNum, |
2922 | unsigned &RegWidth, |
2923 | SmallVectorImpl<AsmToken> &Tokens) { |
2924 | assert(isToken(AsmToken::Identifier)); |
2925 | MCRegister Reg = getSpecialRegForName(RegName: getTokenStr()); |
2926 | if (Reg) { |
2927 | RegNum = 0; |
2928 | RegWidth = 32; |
2929 | RegKind = IS_SPECIAL; |
2930 | Tokens.push_back(Elt: getToken()); |
2931 | lex(); // skip register name |
2932 | } |
2933 | return Reg; |
2934 | } |
2935 | |
2936 | MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, |
2937 | unsigned &RegNum, |
2938 | unsigned &RegWidth, |
2939 | SmallVectorImpl<AsmToken> &Tokens) { |
2940 | assert(isToken(AsmToken::Identifier)); |
2941 | StringRef RegName = getTokenStr(); |
2942 | auto Loc = getLoc(); |
2943 | |
2944 | const RegInfo *RI = getRegularRegInfo(Str: RegName); |
2945 | if (!RI) { |
2946 | Error(L: Loc, Msg: "invalid register name" ); |
2947 | return MCRegister(); |
2948 | } |
2949 | |
2950 | Tokens.push_back(Elt: getToken()); |
2951 | lex(); // skip register name |
2952 | |
2953 | RegKind = RI->Kind; |
2954 | StringRef RegSuffix = RegName.substr(Start: RI->Name.size()); |
2955 | unsigned SubReg = NoSubRegister; |
2956 | if (!RegSuffix.empty()) { |
2957 | if (RegSuffix.consume_back(Suffix: ".l" )) |
2958 | SubReg = AMDGPU::lo16; |
2959 | else if (RegSuffix.consume_back(Suffix: ".h" )) |
2960 | SubReg = AMDGPU::hi16; |
2961 | |
2962 | // Single 32-bit register: vXX. |
2963 | if (!getRegNum(Str: RegSuffix, Num&: RegNum)) { |
2964 | Error(L: Loc, Msg: "invalid register index" ); |
2965 | return MCRegister(); |
2966 | } |
2967 | RegWidth = 32; |
2968 | } else { |
2969 | // Range of registers: v[XX:YY]. ":YY" is optional. |
2970 | if (!ParseRegRange(Num&: RegNum, RegWidth, SubReg)) |
2971 | return MCRegister(); |
2972 | } |
2973 | |
2974 | return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc); |
2975 | } |
2976 | |
2977 | MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, |
2978 | unsigned &RegNum, unsigned &RegWidth, |
2979 | SmallVectorImpl<AsmToken> &Tokens) { |
2980 | MCRegister Reg; |
2981 | auto ListLoc = getLoc(); |
2982 | |
2983 | if (!skipToken(Kind: AsmToken::LBrac, |
2984 | ErrMsg: "expected a register or a list of registers" )) { |
2985 | return MCRegister(); |
2986 | } |
2987 | |
2988 | // List of consecutive registers, e.g.: [s0,s1,s2,s3] |
2989 | |
2990 | auto Loc = getLoc(); |
2991 | if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) |
2992 | return MCRegister(); |
2993 | if (RegWidth != 32) { |
2994 | Error(L: Loc, Msg: "expected a single 32-bit register" ); |
2995 | return MCRegister(); |
2996 | } |
2997 | |
2998 | for (; trySkipToken(Kind: AsmToken::Comma); ) { |
2999 | RegisterKind NextRegKind; |
3000 | MCRegister NextReg; |
3001 | unsigned NextRegNum, NextRegWidth; |
3002 | Loc = getLoc(); |
3003 | |
3004 | if (!ParseAMDGPURegister(RegKind&: NextRegKind, Reg&: NextReg, |
3005 | RegNum&: NextRegNum, RegWidth&: NextRegWidth, |
3006 | Tokens)) { |
3007 | return MCRegister(); |
3008 | } |
3009 | if (NextRegWidth != 32) { |
3010 | Error(L: Loc, Msg: "expected a single 32-bit register" ); |
3011 | return MCRegister(); |
3012 | } |
3013 | if (NextRegKind != RegKind) { |
3014 | Error(L: Loc, Msg: "registers in a list must be of the same kind" ); |
3015 | return MCRegister(); |
3016 | } |
3017 | if (!AddNextRegisterToList(Reg, RegWidth, RegKind, Reg1: NextReg, Loc)) |
3018 | return MCRegister(); |
3019 | } |
3020 | |
3021 | if (!skipToken(Kind: AsmToken::RBrac, |
3022 | ErrMsg: "expected a comma or a closing square bracket" )) { |
3023 | return MCRegister(); |
3024 | } |
3025 | |
3026 | if (isRegularReg(Kind: RegKind)) |
3027 | Reg = getRegularReg(RegKind, RegNum, SubReg: NoSubRegister, RegWidth, Loc: ListLoc); |
3028 | |
3029 | return Reg; |
3030 | } |
3031 | |
3032 | bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, |
3033 | MCRegister &Reg, unsigned &RegNum, |
3034 | unsigned &RegWidth, |
3035 | SmallVectorImpl<AsmToken> &Tokens) { |
3036 | auto Loc = getLoc(); |
3037 | Reg = MCRegister(); |
3038 | |
3039 | if (isToken(Kind: AsmToken::Identifier)) { |
3040 | Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); |
3041 | if (!Reg) |
3042 | Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); |
3043 | } else { |
3044 | Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); |
3045 | } |
3046 | |
3047 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
3048 | if (!Reg) { |
3049 | assert(Parser.hasPendingError()); |
3050 | return false; |
3051 | } |
3052 | |
3053 | if (!subtargetHasRegister(MRI: *TRI, Reg)) { |
3054 | if (Reg == AMDGPU::SGPR_NULL) { |
3055 | Error(L: Loc, Msg: "'null' operand is not supported on this GPU" ); |
3056 | } else { |
3057 | Error(L: Loc, Msg: Twine(AMDGPUInstPrinter::getRegisterName(Reg)) + |
3058 | " register not available on this GPU" ); |
3059 | } |
3060 | return false; |
3061 | } |
3062 | |
3063 | return true; |
3064 | } |
3065 | |
3066 | bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, |
3067 | MCRegister &Reg, unsigned &RegNum, |
3068 | unsigned &RegWidth, |
3069 | bool RestoreOnFailure /*=false*/) { |
3070 | Reg = MCRegister(); |
3071 | |
3072 | SmallVector<AsmToken, 1> Tokens; |
3073 | if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { |
3074 | if (RestoreOnFailure) { |
3075 | while (!Tokens.empty()) { |
3076 | getLexer().UnLex(Token: Tokens.pop_back_val()); |
3077 | } |
3078 | } |
3079 | return true; |
3080 | } |
3081 | return false; |
3082 | } |
3083 | |
3084 | std::optional<StringRef> |
3085 | AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { |
3086 | switch (RegKind) { |
3087 | case IS_VGPR: |
3088 | return StringRef(".amdgcn.next_free_vgpr" ); |
3089 | case IS_SGPR: |
3090 | return StringRef(".amdgcn.next_free_sgpr" ); |
3091 | default: |
3092 | return std::nullopt; |
3093 | } |
3094 | } |
3095 | |
3096 | void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { |
3097 | auto SymbolName = getGprCountSymbolName(RegKind); |
3098 | assert(SymbolName && "initializing invalid register kind" ); |
3099 | MCSymbol *Sym = getContext().getOrCreateSymbol(Name: *SymbolName); |
3100 | Sym->setVariableValue(MCConstantExpr::create(Value: 0, Ctx&: getContext())); |
3101 | Sym->setRedefinable(true); |
3102 | } |
3103 | |
3104 | bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, |
3105 | unsigned DwordRegIndex, |
3106 | unsigned RegWidth) { |
3107 | // Symbols are only defined for GCN targets |
3108 | if (AMDGPU::getIsaVersion(GPU: getSTI().getCPU()).Major < 6) |
3109 | return true; |
3110 | |
3111 | auto SymbolName = getGprCountSymbolName(RegKind); |
3112 | if (!SymbolName) |
3113 | return true; |
3114 | MCSymbol *Sym = getContext().getOrCreateSymbol(Name: *SymbolName); |
3115 | |
3116 | int64_t NewMax = DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1; |
3117 | int64_t OldCount; |
3118 | |
3119 | if (!Sym->isVariable()) |
3120 | return !Error(L: getLoc(), |
3121 | Msg: ".amdgcn.next_free_{v,s}gpr symbols must be variable" ); |
3122 | if (!Sym->getVariableValue()->evaluateAsAbsolute(Res&: OldCount)) |
3123 | return !Error( |
3124 | L: getLoc(), |
3125 | Msg: ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions" ); |
3126 | |
3127 | if (OldCount <= NewMax) |
3128 | Sym->setVariableValue(MCConstantExpr::create(Value: NewMax + 1, Ctx&: getContext())); |
3129 | |
3130 | return true; |
3131 | } |
3132 | |
3133 | std::unique_ptr<AMDGPUOperand> |
3134 | AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { |
3135 | const auto &Tok = getToken(); |
3136 | SMLoc StartLoc = Tok.getLoc(); |
3137 | SMLoc EndLoc = Tok.getEndLoc(); |
3138 | RegisterKind RegKind; |
3139 | MCRegister Reg; |
3140 | unsigned RegNum, RegWidth; |
3141 | |
3142 | if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { |
3143 | return nullptr; |
3144 | } |
3145 | if (isHsaAbi(STI: getSTI())) { |
3146 | if (!updateGprCountSymbols(RegKind, DwordRegIndex: RegNum, RegWidth)) |
3147 | return nullptr; |
3148 | } else |
3149 | KernelScope.usesRegister(RegKind, DwordRegIndex: RegNum, RegWidth); |
3150 | return AMDGPUOperand::CreateReg(AsmParser: this, Reg, S: StartLoc, E: EndLoc); |
3151 | } |
3152 | |
3153 | ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands, |
3154 | bool HasSP3AbsModifier, bool HasLit) { |
3155 | // TODO: add syntactic sugar for 1/(2*PI) |
3156 | |
3157 | if (isRegister()) |
3158 | return ParseStatus::NoMatch; |
3159 | assert(!isModifier()); |
3160 | |
3161 | if (!HasLit) { |
3162 | HasLit = trySkipId(Id: "lit" ); |
3163 | if (HasLit) { |
3164 | if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit" )) |
3165 | return ParseStatus::Failure; |
3166 | ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit); |
3167 | if (S.isSuccess() && |
3168 | !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses" )) |
3169 | return ParseStatus::Failure; |
3170 | return S; |
3171 | } |
3172 | } |
3173 | |
3174 | const auto& Tok = getToken(); |
3175 | const auto& NextTok = peekToken(); |
3176 | bool IsReal = Tok.is(K: AsmToken::Real); |
3177 | SMLoc S = getLoc(); |
3178 | bool Negate = false; |
3179 | |
3180 | if (!IsReal && Tok.is(K: AsmToken::Minus) && NextTok.is(K: AsmToken::Real)) { |
3181 | lex(); |
3182 | IsReal = true; |
3183 | Negate = true; |
3184 | } |
3185 | |
3186 | AMDGPUOperand::Modifiers Mods; |
3187 | Mods.Lit = HasLit; |
3188 | |
3189 | if (IsReal) { |
3190 | // Floating-point expressions are not supported. |
3191 | // Can only allow floating-point literals with an |
3192 | // optional sign. |
3193 | |
3194 | StringRef Num = getTokenStr(); |
3195 | lex(); |
3196 | |
3197 | APFloat RealVal(APFloat::IEEEdouble()); |
3198 | auto roundMode = APFloat::rmNearestTiesToEven; |
3199 | if (errorToBool(Err: RealVal.convertFromString(Num, roundMode).takeError())) |
3200 | return ParseStatus::Failure; |
3201 | if (Negate) |
3202 | RealVal.changeSign(); |
3203 | |
3204 | Operands.push_back( |
3205 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: RealVal.bitcastToAPInt().getZExtValue(), Loc: S, |
3206 | Type: AMDGPUOperand::ImmTyNone, IsFPImm: true)); |
3207 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); |
3208 | Op.setModifiers(Mods); |
3209 | |
3210 | return ParseStatus::Success; |
3211 | |
3212 | } else { |
3213 | int64_t IntVal; |
3214 | const MCExpr *Expr; |
3215 | SMLoc S = getLoc(); |
3216 | |
3217 | if (HasSP3AbsModifier) { |
3218 | // This is a workaround for handling expressions |
3219 | // as arguments of SP3 'abs' modifier, for example: |
3220 | // |1.0| |
3221 | // |-1| |
3222 | // |1+x| |
3223 | // This syntax is not compatible with syntax of standard |
3224 | // MC expressions (due to the trailing '|'). |
3225 | SMLoc EndLoc; |
3226 | if (getParser().parsePrimaryExpr(Res&: Expr, EndLoc, TypeInfo: nullptr)) |
3227 | return ParseStatus::Failure; |
3228 | } else { |
3229 | if (Parser.parseExpression(Res&: Expr)) |
3230 | return ParseStatus::Failure; |
3231 | } |
3232 | |
3233 | if (Expr->evaluateAsAbsolute(Res&: IntVal)) { |
3234 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S)); |
3235 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); |
3236 | Op.setModifiers(Mods); |
3237 | } else { |
3238 | if (HasLit) |
3239 | return ParseStatus::NoMatch; |
3240 | Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S)); |
3241 | } |
3242 | |
3243 | return ParseStatus::Success; |
3244 | } |
3245 | |
3246 | return ParseStatus::NoMatch; |
3247 | } |
3248 | |
3249 | ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) { |
3250 | if (!isRegister()) |
3251 | return ParseStatus::NoMatch; |
3252 | |
3253 | if (auto R = parseRegister()) { |
3254 | assert(R->isReg()); |
3255 | Operands.push_back(Elt: std::move(R)); |
3256 | return ParseStatus::Success; |
3257 | } |
3258 | return ParseStatus::Failure; |
3259 | } |
3260 | |
3261 | ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, |
3262 | bool HasSP3AbsMod, bool HasLit) { |
3263 | ParseStatus Res = parseReg(Operands); |
3264 | if (!Res.isNoMatch()) |
3265 | return Res; |
3266 | if (isModifier()) |
3267 | return ParseStatus::NoMatch; |
3268 | return parseImm(Operands, HasSP3AbsModifier: HasSP3AbsMod, HasLit); |
3269 | } |
3270 | |
3271 | bool |
3272 | AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { |
3273 | if (Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::LParen)) { |
3274 | const auto &str = Token.getString(); |
3275 | return str == "abs" || str == "neg" || str == "sext" ; |
3276 | } |
3277 | return false; |
3278 | } |
3279 | |
3280 | bool |
3281 | AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { |
3282 | return Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::Colon); |
3283 | } |
3284 | |
3285 | bool |
3286 | AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { |
3287 | return isNamedOperandModifier(Token, NextToken) || Token.is(K: AsmToken::Pipe); |
3288 | } |
3289 | |
3290 | bool |
3291 | AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { |
3292 | return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); |
3293 | } |
3294 | |
3295 | // Check if this is an operand modifier or an opcode modifier |
3296 | // which may look like an expression but it is not. We should |
3297 | // avoid parsing these modifiers as expressions. Currently |
3298 | // recognized sequences are: |
3299 | // |...| |
3300 | // abs(...) |
3301 | // neg(...) |
3302 | // sext(...) |
3303 | // -reg |
3304 | // -|...| |
3305 | // -abs(...) |
3306 | // name:... |
3307 | // |
3308 | bool |
3309 | AMDGPUAsmParser::isModifier() { |
3310 | |
3311 | AsmToken Tok = getToken(); |
3312 | AsmToken NextToken[2]; |
3313 | peekTokens(Tokens: NextToken); |
3314 | |
3315 | return isOperandModifier(Token: Tok, NextToken: NextToken[0]) || |
3316 | (Tok.is(K: AsmToken::Minus) && isRegOrOperandModifier(Token: NextToken[0], NextToken: NextToken[1])) || |
3317 | isOpcodeModifierWithVal(Token: Tok, NextToken: NextToken[0]); |
3318 | } |
3319 | |
3320 | // Check if the current token is an SP3 'neg' modifier. |
3321 | // Currently this modifier is allowed in the following context: |
3322 | // |
3323 | // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". |
3324 | // 2. Before an 'abs' modifier: -abs(...) |
3325 | // 3. Before an SP3 'abs' modifier: -|...| |
3326 | // |
3327 | // In all other cases "-" is handled as a part |
3328 | // of an expression that follows the sign. |
3329 | // |
3330 | // Note: When "-" is followed by an integer literal, |
3331 | // this is interpreted as integer negation rather |
3332 | // than a floating-point NEG modifier applied to N. |
3333 | // Beside being contr-intuitive, such use of floating-point |
3334 | // NEG modifier would have resulted in different meaning |
3335 | // of integer literals used with VOP1/2/C and VOP3, |
3336 | // for example: |
3337 | // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF |
3338 | // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 |
3339 | // Negative fp literals with preceding "-" are |
3340 | // handled likewise for uniformity |
3341 | // |
3342 | bool |
3343 | AMDGPUAsmParser::parseSP3NegModifier() { |
3344 | |
3345 | AsmToken NextToken[2]; |
3346 | peekTokens(Tokens: NextToken); |
3347 | |
3348 | if (isToken(Kind: AsmToken::Minus) && |
3349 | (isRegister(Token: NextToken[0], NextToken: NextToken[1]) || |
3350 | NextToken[0].is(K: AsmToken::Pipe) || |
3351 | isId(Token: NextToken[0], Id: "abs" ))) { |
3352 | lex(); |
3353 | return true; |
3354 | } |
3355 | |
3356 | return false; |
3357 | } |
3358 | |
3359 | ParseStatus |
3360 | AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, |
3361 | bool AllowImm) { |
3362 | bool Neg, SP3Neg; |
3363 | bool Abs, SP3Abs; |
3364 | bool Lit; |
3365 | SMLoc Loc; |
3366 | |
3367 | // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. |
3368 | if (isToken(Kind: AsmToken::Minus) && peekToken().is(K: AsmToken::Minus)) |
3369 | return Error(L: getLoc(), Msg: "invalid syntax, expected 'neg' modifier" ); |
3370 | |
3371 | SP3Neg = parseSP3NegModifier(); |
3372 | |
3373 | Loc = getLoc(); |
3374 | Neg = trySkipId(Id: "neg" ); |
3375 | if (Neg && SP3Neg) |
3376 | return Error(L: Loc, Msg: "expected register or immediate" ); |
3377 | if (Neg && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after neg" )) |
3378 | return ParseStatus::Failure; |
3379 | |
3380 | Abs = trySkipId(Id: "abs" ); |
3381 | if (Abs && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after abs" )) |
3382 | return ParseStatus::Failure; |
3383 | |
3384 | Lit = trySkipId(Id: "lit" ); |
3385 | if (Lit && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit" )) |
3386 | return ParseStatus::Failure; |
3387 | |
3388 | Loc = getLoc(); |
3389 | SP3Abs = trySkipToken(Kind: AsmToken::Pipe); |
3390 | if (Abs && SP3Abs) |
3391 | return Error(L: Loc, Msg: "expected register or immediate" ); |
3392 | |
3393 | ParseStatus Res; |
3394 | if (AllowImm) { |
3395 | Res = parseRegOrImm(Operands, HasSP3AbsMod: SP3Abs, HasLit: Lit); |
3396 | } else { |
3397 | Res = parseReg(Operands); |
3398 | } |
3399 | if (!Res.isSuccess()) |
3400 | return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res; |
3401 | |
3402 | if (Lit && !Operands.back()->isImm()) |
3403 | Error(L: Loc, Msg: "expected immediate with lit modifier" ); |
3404 | |
3405 | if (SP3Abs && !skipToken(Kind: AsmToken::Pipe, ErrMsg: "expected vertical bar" )) |
3406 | return ParseStatus::Failure; |
3407 | if (Abs && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses" )) |
3408 | return ParseStatus::Failure; |
3409 | if (Neg && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses" )) |
3410 | return ParseStatus::Failure; |
3411 | if (Lit && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses" )) |
3412 | return ParseStatus::Failure; |
3413 | |
3414 | AMDGPUOperand::Modifiers Mods; |
3415 | Mods.Abs = Abs || SP3Abs; |
3416 | Mods.Neg = Neg || SP3Neg; |
3417 | Mods.Lit = Lit; |
3418 | |
3419 | if (Mods.hasFPModifiers() || Lit) { |
3420 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); |
3421 | if (Op.isExpr()) |
3422 | return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression" ); |
3423 | Op.setModifiers(Mods); |
3424 | } |
3425 | return ParseStatus::Success; |
3426 | } |
3427 | |
3428 | ParseStatus |
3429 | AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, |
3430 | bool AllowImm) { |
3431 | bool Sext = trySkipId(Id: "sext" ); |
3432 | if (Sext && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after sext" )) |
3433 | return ParseStatus::Failure; |
3434 | |
3435 | ParseStatus Res; |
3436 | if (AllowImm) { |
3437 | Res = parseRegOrImm(Operands); |
3438 | } else { |
3439 | Res = parseReg(Operands); |
3440 | } |
3441 | if (!Res.isSuccess()) |
3442 | return Sext ? ParseStatus::Failure : Res; |
3443 | |
3444 | if (Sext && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses" )) |
3445 | return ParseStatus::Failure; |
3446 | |
3447 | AMDGPUOperand::Modifiers Mods; |
3448 | Mods.Sext = Sext; |
3449 | |
3450 | if (Mods.hasIntModifiers()) { |
3451 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); |
3452 | if (Op.isExpr()) |
3453 | return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression" ); |
3454 | Op.setModifiers(Mods); |
3455 | } |
3456 | |
3457 | return ParseStatus::Success; |
3458 | } |
3459 | |
3460 | ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { |
3461 | return parseRegOrImmWithFPInputMods(Operands, AllowImm: false); |
3462 | } |
3463 | |
3464 | ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { |
3465 | return parseRegOrImmWithIntInputMods(Operands, AllowImm: false); |
3466 | } |
3467 | |
3468 | ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { |
3469 | auto Loc = getLoc(); |
3470 | if (trySkipId(Id: "off" )) { |
3471 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: 0, Loc, |
3472 | Type: AMDGPUOperand::ImmTyOff, IsFPImm: false)); |
3473 | return ParseStatus::Success; |
3474 | } |
3475 | |
3476 | if (!isRegister()) |
3477 | return ParseStatus::NoMatch; |
3478 | |
3479 | std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); |
3480 | if (Reg) { |
3481 | Operands.push_back(Elt: std::move(Reg)); |
3482 | return ParseStatus::Success; |
3483 | } |
3484 | |
3485 | return ParseStatus::Failure; |
3486 | } |
3487 | |
3488 | unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { |
3489 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
3490 | |
3491 | if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || |
3492 | (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || |
3493 | (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || |
3494 | (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) |
3495 | return Match_InvalidOperand; |
3496 | |
3497 | if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || |
3498 | Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { |
3499 | // v_mac_f32/16 allow only dst_sel == DWORD; |
3500 | auto OpNum = |
3501 | AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::dst_sel); |
3502 | const auto &Op = Inst.getOperand(i: OpNum); |
3503 | if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { |
3504 | return Match_InvalidOperand; |
3505 | } |
3506 | } |
3507 | |
3508 | return Match_Success; |
3509 | } |
3510 | |
3511 | static ArrayRef<unsigned> getAllVariants() { |
3512 | static const unsigned Variants[] = { |
3513 | AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, |
3514 | AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, |
3515 | AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP |
3516 | }; |
3517 | |
3518 | return ArrayRef(Variants); |
3519 | } |
3520 | |
3521 | // What asm variants we should check |
3522 | ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { |
3523 | if (isForcedDPP() && isForcedVOP3()) { |
3524 | static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; |
3525 | return ArrayRef(Variants); |
3526 | } |
3527 | if (getForcedEncodingSize() == 32) { |
3528 | static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; |
3529 | return ArrayRef(Variants); |
3530 | } |
3531 | |
3532 | if (isForcedVOP3()) { |
3533 | static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; |
3534 | return ArrayRef(Variants); |
3535 | } |
3536 | |
3537 | if (isForcedSDWA()) { |
3538 | static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, |
3539 | AMDGPUAsmVariants::SDWA9}; |
3540 | return ArrayRef(Variants); |
3541 | } |
3542 | |
3543 | if (isForcedDPP()) { |
3544 | static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; |
3545 | return ArrayRef(Variants); |
3546 | } |
3547 | |
3548 | return getAllVariants(); |
3549 | } |
3550 | |
3551 | StringRef AMDGPUAsmParser::getMatchedVariantName() const { |
3552 | if (isForcedDPP() && isForcedVOP3()) |
3553 | return "e64_dpp" ; |
3554 | |
3555 | if (getForcedEncodingSize() == 32) |
3556 | return "e32" ; |
3557 | |
3558 | if (isForcedVOP3()) |
3559 | return "e64" ; |
3560 | |
3561 | if (isForcedSDWA()) |
3562 | return "sdwa" ; |
3563 | |
3564 | if (isForcedDPP()) |
3565 | return "dpp" ; |
3566 | |
3567 | return "" ; |
3568 | } |
3569 | |
3570 | unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { |
3571 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
3572 | for (MCPhysReg Reg : Desc.implicit_uses()) { |
3573 | switch (Reg) { |
3574 | case AMDGPU::FLAT_SCR: |
3575 | case AMDGPU::VCC: |
3576 | case AMDGPU::VCC_LO: |
3577 | case AMDGPU::VCC_HI: |
3578 | case AMDGPU::M0: |
3579 | return Reg; |
3580 | default: |
3581 | break; |
3582 | } |
3583 | } |
3584 | return AMDGPU::NoRegister; |
3585 | } |
3586 | |
3587 | // NB: This code is correct only when used to check constant |
3588 | // bus limitations because GFX7 support no f16 inline constants. |
3589 | // Note that there are no cases when a GFX7 opcode violates |
3590 | // constant bus limitations due to the use of an f16 constant. |
3591 | bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, |
3592 | unsigned OpIdx) const { |
3593 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
3594 | |
3595 | if (!AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx) || |
3596 | AMDGPU::isKImmOperand(Desc, OpNo: OpIdx)) { |
3597 | return false; |
3598 | } |
3599 | |
3600 | const MCOperand &MO = Inst.getOperand(i: OpIdx); |
3601 | |
3602 | int64_t Val = MO.getImm(); |
3603 | auto OpSize = AMDGPU::getOperandSize(Desc, OpNo: OpIdx); |
3604 | |
3605 | switch (OpSize) { // expected operand size |
3606 | case 8: |
3607 | return AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: hasInv2PiInlineImm()); |
3608 | case 4: |
3609 | return AMDGPU::isInlinableLiteral32(Literal: Val, HasInv2Pi: hasInv2PiInlineImm()); |
3610 | case 2: { |
3611 | const unsigned OperandType = Desc.operands()[OpIdx].OperandType; |
3612 | if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || |
3613 | OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16) |
3614 | return AMDGPU::isInlinableLiteralI16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm()); |
3615 | |
3616 | if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || |
3617 | OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) |
3618 | return AMDGPU::isInlinableLiteralV2I16(Literal: Val); |
3619 | |
3620 | if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || |
3621 | OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) |
3622 | return AMDGPU::isInlinableLiteralV2F16(Literal: Val); |
3623 | |
3624 | if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 || |
3625 | OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16) |
3626 | return AMDGPU::isInlinableLiteralV2BF16(Literal: Val); |
3627 | |
3628 | if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 || |
3629 | OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16) |
3630 | return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm()); |
3631 | |
3632 | if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 || |
3633 | OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16) |
3634 | return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm()); |
3635 | |
3636 | llvm_unreachable("invalid operand type" ); |
3637 | } |
3638 | default: |
3639 | llvm_unreachable("invalid operand size" ); |
3640 | } |
3641 | } |
3642 | |
3643 | unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { |
3644 | if (!isGFX10Plus()) |
3645 | return 1; |
3646 | |
3647 | switch (Opcode) { |
3648 | // 64-bit shift instructions can use only one scalar value input |
3649 | case AMDGPU::V_LSHLREV_B64_e64: |
3650 | case AMDGPU::V_LSHLREV_B64_gfx10: |
3651 | case AMDGPU::V_LSHLREV_B64_e64_gfx11: |
3652 | case AMDGPU::V_LSHLREV_B64_e32_gfx12: |
3653 | case AMDGPU::V_LSHLREV_B64_e64_gfx12: |
3654 | case AMDGPU::V_LSHRREV_B64_e64: |
3655 | case AMDGPU::V_LSHRREV_B64_gfx10: |
3656 | case AMDGPU::V_LSHRREV_B64_e64_gfx11: |
3657 | case AMDGPU::V_LSHRREV_B64_e64_gfx12: |
3658 | case AMDGPU::V_ASHRREV_I64_e64: |
3659 | case AMDGPU::V_ASHRREV_I64_gfx10: |
3660 | case AMDGPU::V_ASHRREV_I64_e64_gfx11: |
3661 | case AMDGPU::V_ASHRREV_I64_e64_gfx12: |
3662 | case AMDGPU::V_LSHL_B64_e64: |
3663 | case AMDGPU::V_LSHR_B64_e64: |
3664 | case AMDGPU::V_ASHR_I64_e64: |
3665 | return 1; |
3666 | default: |
3667 | return 2; |
3668 | } |
3669 | } |
3670 | |
3671 | constexpr unsigned MAX_SRC_OPERANDS_NUM = 6; |
3672 | using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>; |
3673 | |
3674 | // Get regular operand indices in the same order as specified |
3675 | // in the instruction (but append mandatory literals to the end). |
3676 | static OperandIndices getSrcOperandIndices(unsigned Opcode, |
3677 | bool AddMandatoryLiterals = false) { |
3678 | |
3679 | int16_t ImmIdx = |
3680 | AddMandatoryLiterals ? getNamedOperandIdx(Opcode, Name: OpName::imm) : -1; |
3681 | |
3682 | if (isVOPD(Opc: Opcode)) { |
3683 | int16_t ImmXIdx = |
3684 | AddMandatoryLiterals ? getNamedOperandIdx(Opcode, Name: OpName::immX) : -1; |
3685 | |
3686 | return {getNamedOperandIdx(Opcode, Name: OpName::src0X), |
3687 | getNamedOperandIdx(Opcode, Name: OpName::vsrc1X), |
3688 | getNamedOperandIdx(Opcode, Name: OpName::src0Y), |
3689 | getNamedOperandIdx(Opcode, Name: OpName::vsrc1Y), |
3690 | ImmXIdx, |
3691 | ImmIdx}; |
3692 | } |
3693 | |
3694 | return {getNamedOperandIdx(Opcode, Name: OpName::src0), |
3695 | getNamedOperandIdx(Opcode, Name: OpName::src1), |
3696 | getNamedOperandIdx(Opcode, Name: OpName::src2), ImmIdx}; |
3697 | } |
3698 | |
3699 | bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { |
3700 | const MCOperand &MO = Inst.getOperand(i: OpIdx); |
3701 | if (MO.isImm()) |
3702 | return !isInlineConstant(Inst, OpIdx); |
3703 | if (MO.isReg()) { |
3704 | auto Reg = MO.getReg(); |
3705 | if (!Reg) |
3706 | return false; |
3707 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
3708 | auto PReg = mc2PseudoReg(Reg); |
3709 | return isSGPR(Reg: PReg, TRI) && PReg != SGPR_NULL; |
3710 | } |
3711 | return true; |
3712 | } |
3713 | |
3714 | // Based on the comment for `AMDGPUInstructionSelector::selectWritelane`: |
3715 | // Writelane is special in that it can use SGPR and M0 (which would normally |
3716 | // count as using the constant bus twice - but in this case it is allowed since |
3717 | // the lane selector doesn't count as a use of the constant bus). However, it is |
3718 | // still required to abide by the 1 SGPR rule. |
3719 | static bool checkWriteLane(const MCInst &Inst) { |
3720 | const unsigned Opcode = Inst.getOpcode(); |
3721 | if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi) |
3722 | return false; |
3723 | const MCOperand &LaneSelOp = Inst.getOperand(i: 2); |
3724 | if (!LaneSelOp.isReg()) |
3725 | return false; |
3726 | auto LaneSelReg = mc2PseudoReg(Reg: LaneSelOp.getReg()); |
3727 | return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11; |
3728 | } |
3729 | |
3730 | bool AMDGPUAsmParser::validateConstantBusLimitations( |
3731 | const MCInst &Inst, const OperandVector &Operands) { |
3732 | const unsigned Opcode = Inst.getOpcode(); |
3733 | const MCInstrDesc &Desc = MII.get(Opcode); |
3734 | MCRegister LastSGPR; |
3735 | unsigned ConstantBusUseCount = 0; |
3736 | unsigned NumLiterals = 0; |
3737 | unsigned LiteralSize; |
3738 | |
3739 | if (!(Desc.TSFlags & |
3740 | (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | |
3741 | SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) && |
3742 | !isVOPD(Opc: Opcode)) |
3743 | return true; |
3744 | |
3745 | if (checkWriteLane(Inst)) |
3746 | return true; |
3747 | |
3748 | // Check special imm operands (used by madmk, etc) |
3749 | if (AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm)) { |
3750 | ++NumLiterals; |
3751 | LiteralSize = 4; |
3752 | } |
3753 | |
3754 | SmallDenseSet<unsigned> SGPRsUsed; |
3755 | unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); |
3756 | if (SGPRUsed != AMDGPU::NoRegister) { |
3757 | SGPRsUsed.insert(V: SGPRUsed); |
3758 | ++ConstantBusUseCount; |
3759 | } |
3760 | |
3761 | OperandIndices OpIndices = getSrcOperandIndices(Opcode); |
3762 | |
3763 | for (int OpIdx : OpIndices) { |
3764 | if (OpIdx == -1) |
3765 | continue; |
3766 | |
3767 | const MCOperand &MO = Inst.getOperand(i: OpIdx); |
3768 | if (usesConstantBus(Inst, OpIdx)) { |
3769 | if (MO.isReg()) { |
3770 | LastSGPR = mc2PseudoReg(Reg: MO.getReg()); |
3771 | // Pairs of registers with a partial intersections like these |
3772 | // s0, s[0:1] |
3773 | // flat_scratch_lo, flat_scratch |
3774 | // flat_scratch_lo, flat_scratch_hi |
3775 | // are theoretically valid but they are disabled anyway. |
3776 | // Note that this code mimics SIInstrInfo::verifyInstruction |
3777 | if (SGPRsUsed.insert(V: LastSGPR).second) { |
3778 | ++ConstantBusUseCount; |
3779 | } |
3780 | } else { // Expression or a literal |
3781 | |
3782 | if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) |
3783 | continue; // special operand like VINTERP attr_chan |
3784 | |
3785 | // An instruction may use only one literal. |
3786 | // This has been validated on the previous step. |
3787 | // See validateVOPLiteral. |
3788 | // This literal may be used as more than one operand. |
3789 | // If all these operands are of the same size, |
3790 | // this literal counts as one scalar value. |
3791 | // Otherwise it counts as 2 scalar values. |
3792 | // See "GFX10 Shader Programming", section 3.6.2.3. |
3793 | |
3794 | unsigned Size = AMDGPU::getOperandSize(Desc, OpNo: OpIdx); |
3795 | if (Size < 4) |
3796 | Size = 4; |
3797 | |
3798 | if (NumLiterals == 0) { |
3799 | NumLiterals = 1; |
3800 | LiteralSize = Size; |
3801 | } else if (LiteralSize != Size) { |
3802 | NumLiterals = 2; |
3803 | } |
3804 | } |
3805 | } |
3806 | } |
3807 | ConstantBusUseCount += NumLiterals; |
3808 | |
3809 | if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) |
3810 | return true; |
3811 | |
3812 | SMLoc LitLoc = getLitLoc(Operands); |
3813 | SMLoc RegLoc = getRegLoc(Reg: LastSGPR, Operands); |
3814 | SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; |
3815 | Error(L: Loc, Msg: "invalid operand (violates constant bus restrictions)" ); |
3816 | return false; |
3817 | } |
3818 | |
3819 | bool AMDGPUAsmParser::validateVOPDRegBankConstraints( |
3820 | const MCInst &Inst, const OperandVector &Operands) { |
3821 | |
3822 | const unsigned Opcode = Inst.getOpcode(); |
3823 | if (!isVOPD(Opc: Opcode)) |
3824 | return true; |
3825 | |
3826 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
3827 | |
3828 | auto getVRegIdx = [&](unsigned, unsigned OperandIdx) { |
3829 | const MCOperand &Opr = Inst.getOperand(i: OperandIdx); |
3830 | return (Opr.isReg() && !isSGPR(Reg: mc2PseudoReg(Reg: Opr.getReg()), TRI)) |
3831 | ? Opr.getReg() |
3832 | : MCRegister(); |
3833 | }; |
3834 | |
3835 | // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. |
3836 | bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12; |
3837 | |
3838 | const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII); |
3839 | auto InvalidCompOprIdx = |
3840 | InstInfo.getInvalidCompOperandIndex(GetRegIdx: getVRegIdx, SkipSrc); |
3841 | if (!InvalidCompOprIdx) |
3842 | return true; |
3843 | |
3844 | auto CompOprIdx = *InvalidCompOprIdx; |
3845 | auto ParsedIdx = |
3846 | std::max(a: InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), |
3847 | b: InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); |
3848 | assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); |
3849 | |
3850 | auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); |
3851 | if (CompOprIdx == VOPD::Component::DST) { |
3852 | Error(L: Loc, Msg: "one dst register must be even and the other odd" ); |
3853 | } else { |
3854 | auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; |
3855 | Error(L: Loc, Msg: Twine("src" ) + Twine(CompSrcIdx) + |
3856 | " operands must use different VGPR banks" ); |
3857 | } |
3858 | |
3859 | return false; |
3860 | } |
3861 | |
3862 | bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { |
3863 | |
3864 | const unsigned Opc = Inst.getOpcode(); |
3865 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
3866 | |
3867 | if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { |
3868 | int ClampIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::clamp); |
3869 | assert(ClampIdx != -1); |
3870 | return Inst.getOperand(i: ClampIdx).getImm() == 0; |
3871 | } |
3872 | |
3873 | return true; |
3874 | } |
3875 | |
3876 | constexpr uint64_t MIMGFlags = |
3877 | SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE; |
3878 | |
3879 | bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, |
3880 | const SMLoc &IDLoc) { |
3881 | |
3882 | const unsigned Opc = Inst.getOpcode(); |
3883 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
3884 | |
3885 | if ((Desc.TSFlags & MIMGFlags) == 0) |
3886 | return true; |
3887 | |
3888 | int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdata); |
3889 | int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask); |
3890 | int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::tfe); |
3891 | |
3892 | if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample |
3893 | return true; |
3894 | |
3895 | if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray |
3896 | return true; |
3897 | |
3898 | unsigned VDataSize = AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VDataIdx); |
3899 | unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(i: TFEIdx).getImm()) ? 1 : 0; |
3900 | unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf; |
3901 | if (DMask == 0) |
3902 | DMask = 1; |
3903 | |
3904 | bool IsPackedD16 = false; |
3905 | unsigned DataSize = |
3906 | (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(Value: DMask); |
3907 | if (hasPackedD16()) { |
3908 | int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::d16); |
3909 | IsPackedD16 = D16Idx >= 0; |
3910 | if (IsPackedD16 && Inst.getOperand(i: D16Idx).getImm()) |
3911 | DataSize = (DataSize + 1) / 2; |
3912 | } |
3913 | |
3914 | if ((VDataSize / 4) == DataSize + TFESize) |
3915 | return true; |
3916 | |
3917 | StringRef Modifiers; |
3918 | if (isGFX90A()) |
3919 | Modifiers = IsPackedD16 ? "dmask and d16" : "dmask" ; |
3920 | else |
3921 | Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe" ; |
3922 | |
3923 | Error(L: IDLoc, Msg: Twine("image data size does not match " ) + Modifiers); |
3924 | return false; |
3925 | } |
3926 | |
3927 | bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, |
3928 | const SMLoc &IDLoc) { |
3929 | const unsigned Opc = Inst.getOpcode(); |
3930 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
3931 | |
3932 | if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus()) |
3933 | return true; |
3934 | |
3935 | const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); |
3936 | |
3937 | const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = |
3938 | AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode); |
3939 | int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vaddr0); |
3940 | AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) |
3941 | ? AMDGPU::OpName::srsrc |
3942 | : AMDGPU::OpName::rsrc; |
3943 | int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: RSrcOpName); |
3944 | int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dim); |
3945 | int A16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::a16); |
3946 | |
3947 | assert(VAddr0Idx != -1); |
3948 | assert(SrsrcIdx != -1); |
3949 | assert(SrsrcIdx > VAddr0Idx); |
3950 | |
3951 | bool IsA16 = (A16Idx != -1 && Inst.getOperand(i: A16Idx).getImm()); |
3952 | if (BaseOpcode->BVH) { |
3953 | if (IsA16 == BaseOpcode->A16) |
3954 | return true; |
3955 | Error(L: IDLoc, Msg: "image address size does not match a16" ); |
3956 | return false; |
3957 | } |
3958 | |
3959 | unsigned Dim = Inst.getOperand(i: DimIdx).getImm(); |
3960 | const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim); |
3961 | bool IsNSA = SrsrcIdx - VAddr0Idx > 1; |
3962 | unsigned ActualAddrSize = |
3963 | IsNSA ? SrsrcIdx - VAddr0Idx |
3964 | : AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VAddr0Idx) / 4; |
3965 | |
3966 | unsigned ExpectedAddrSize = |
3967 | AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim: DimInfo, IsA16, IsG16Supported: hasG16()); |
3968 | |
3969 | if (IsNSA) { |
3970 | if (hasPartialNSAEncoding() && |
3971 | ExpectedAddrSize > |
3972 | getNSAMaxSize(HasSampler: Desc.TSFlags & SIInstrFlags::VSAMPLE)) { |
3973 | int VAddrLastIdx = SrsrcIdx - 1; |
3974 | unsigned VAddrLastSize = |
3975 | AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VAddrLastIdx) / 4; |
3976 | |
3977 | ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize; |
3978 | } |
3979 | } else { |
3980 | if (ExpectedAddrSize > 12) |
3981 | ExpectedAddrSize = 16; |
3982 | |
3983 | // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. |
3984 | // This provides backward compatibility for assembly created |
3985 | // before 160b/192b/224b types were directly supported. |
3986 | if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) |
3987 | return true; |
3988 | } |
3989 | |
3990 | if (ActualAddrSize == ExpectedAddrSize) |
3991 | return true; |
3992 | |
3993 | Error(L: IDLoc, Msg: "image address size does not match dim and a16" ); |
3994 | return false; |
3995 | } |
3996 | |
3997 | bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { |
3998 | |
3999 | const unsigned Opc = Inst.getOpcode(); |
4000 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
4001 | |
4002 | if ((Desc.TSFlags & MIMGFlags) == 0) |
4003 | return true; |
4004 | if (!Desc.mayLoad() || !Desc.mayStore()) |
4005 | return true; // Not atomic |
4006 | |
4007 | int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask); |
4008 | unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf; |
4009 | |
4010 | // This is an incomplete check because image_atomic_cmpswap |
4011 | // may only use 0x3 and 0xf while other atomic operations |
4012 | // may use 0x1 and 0x3. However these limitations are |
4013 | // verified when we check that dmask matches dst size. |
4014 | return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; |
4015 | } |
4016 | |
4017 | bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { |
4018 | |
4019 | const unsigned Opc = Inst.getOpcode(); |
4020 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
4021 | |
4022 | if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) |
4023 | return true; |
4024 | |
4025 | int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dmask); |
4026 | unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf; |
4027 | |
4028 | // GATHER4 instructions use dmask in a different fashion compared to |
4029 | // other MIMG instructions. The only useful DMASK values are |
4030 | // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns |
4031 | // (red,red,red,red) etc.) The ISA document doesn't mention |
4032 | // this. |
4033 | return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; |
4034 | } |
4035 | |
4036 | bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst, |
4037 | const OperandVector &Operands) { |
4038 | if (!isGFX10Plus()) |
4039 | return true; |
4040 | |
4041 | const unsigned Opc = Inst.getOpcode(); |
4042 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
4043 | |
4044 | if ((Desc.TSFlags & MIMGFlags) == 0) |
4045 | return true; |
4046 | |
4047 | // image_bvh_intersect_ray instructions do not have dim |
4048 | if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH) |
4049 | return true; |
4050 | |
4051 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { |
4052 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
4053 | if (Op.isDim()) |
4054 | return true; |
4055 | } |
4056 | return false; |
4057 | } |
4058 | |
4059 | bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { |
4060 | const unsigned Opc = Inst.getOpcode(); |
4061 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
4062 | |
4063 | if ((Desc.TSFlags & MIMGFlags) == 0) |
4064 | return true; |
4065 | |
4066 | const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); |
4067 | const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = |
4068 | AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode); |
4069 | |
4070 | if (!BaseOpcode->MSAA) |
4071 | return true; |
4072 | |
4073 | int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dim); |
4074 | assert(DimIdx != -1); |
4075 | |
4076 | unsigned Dim = Inst.getOperand(i: DimIdx).getImm(); |
4077 | const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim); |
4078 | |
4079 | return DimInfo->MSAA; |
4080 | } |
4081 | |
4082 | static bool IsMovrelsSDWAOpcode(const unsigned Opcode) |
4083 | { |
4084 | switch (Opcode) { |
4085 | case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: |
4086 | case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: |
4087 | case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: |
4088 | return true; |
4089 | default: |
4090 | return false; |
4091 | } |
4092 | } |
4093 | |
4094 | // movrels* opcodes should only allow VGPRS as src0. |
4095 | // This is specified in .td description for vop1/vop3, |
4096 | // but sdwa is handled differently. See isSDWAOperand. |
4097 | bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, |
4098 | const OperandVector &Operands) { |
4099 | |
4100 | const unsigned Opc = Inst.getOpcode(); |
4101 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
4102 | |
4103 | if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opcode: Opc)) |
4104 | return true; |
4105 | |
4106 | const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0); |
4107 | assert(Src0Idx != -1); |
4108 | |
4109 | SMLoc ErrLoc; |
4110 | const MCOperand &Src0 = Inst.getOperand(i: Src0Idx); |
4111 | if (Src0.isReg()) { |
4112 | auto Reg = mc2PseudoReg(Reg: Src0.getReg()); |
4113 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
4114 | if (!isSGPR(Reg, TRI)) |
4115 | return true; |
4116 | ErrLoc = getRegLoc(Reg, Operands); |
4117 | } else { |
4118 | ErrLoc = getConstLoc(Operands); |
4119 | } |
4120 | |
4121 | Error(L: ErrLoc, Msg: "source operand must be a VGPR" ); |
4122 | return false; |
4123 | } |
4124 | |
4125 | bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, |
4126 | const OperandVector &Operands) { |
4127 | |
4128 | const unsigned Opc = Inst.getOpcode(); |
4129 | |
4130 | if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) |
4131 | return true; |
4132 | |
4133 | const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0); |
4134 | assert(Src0Idx != -1); |
4135 | |
4136 | const MCOperand &Src0 = Inst.getOperand(i: Src0Idx); |
4137 | if (!Src0.isReg()) |
4138 | return true; |
4139 | |
4140 | auto Reg = mc2PseudoReg(Reg: Src0.getReg()); |
4141 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
4142 | if (!isGFX90A() && isSGPR(Reg, TRI)) { |
4143 | Error(L: getRegLoc(Reg, Operands), |
4144 | Msg: "source operand must be either a VGPR or an inline constant" ); |
4145 | return false; |
4146 | } |
4147 | |
4148 | return true; |
4149 | } |
4150 | |
4151 | bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst, |
4152 | const OperandVector &Operands) { |
4153 | unsigned Opcode = Inst.getOpcode(); |
4154 | const MCInstrDesc &Desc = MII.get(Opcode); |
4155 | |
4156 | if (!(Desc.TSFlags & SIInstrFlags::IsMAI) || |
4157 | !getFeatureBits()[FeatureMFMAInlineLiteralBug]) |
4158 | return true; |
4159 | |
4160 | const int Src2Idx = getNamedOperandIdx(Opcode, Name: OpName::src2); |
4161 | if (Src2Idx == -1) |
4162 | return true; |
4163 | |
4164 | if (Inst.getOperand(i: Src2Idx).isImm() && isInlineConstant(Inst, OpIdx: Src2Idx)) { |
4165 | Error(L: getConstLoc(Operands), |
4166 | Msg: "inline constants are not allowed for this operand" ); |
4167 | return false; |
4168 | } |
4169 | |
4170 | return true; |
4171 | } |
4172 | |
4173 | bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, |
4174 | const OperandVector &Operands) { |
4175 | const unsigned Opc = Inst.getOpcode(); |
4176 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
4177 | |
4178 | if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) |
4179 | return true; |
4180 | |
4181 | int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp); |
4182 | if (BlgpIdx != -1) { |
4183 | if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opcode: Opc)) { |
4184 | int CbszIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::cbsz); |
4185 | |
4186 | unsigned CBSZ = Inst.getOperand(i: CbszIdx).getImm(); |
4187 | unsigned BLGP = Inst.getOperand(i: BlgpIdx).getImm(); |
4188 | |
4189 | // Validate the correct register size was used for the floating point |
4190 | // format operands |
4191 | |
4192 | bool Success = true; |
4193 | if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ)) { |
4194 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0); |
4195 | Error(L: getRegLoc(Reg: mc2PseudoReg(Reg: Inst.getOperand(i: Src0Idx).getReg()), |
4196 | Operands), |
4197 | Msg: "wrong register tuple size for cbsz value " + Twine(CBSZ)); |
4198 | Success = false; |
4199 | } |
4200 | |
4201 | if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP)) { |
4202 | int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1); |
4203 | Error(L: getRegLoc(Reg: mc2PseudoReg(Reg: Inst.getOperand(i: Src1Idx).getReg()), |
4204 | Operands), |
4205 | Msg: "wrong register tuple size for blgp value " + Twine(BLGP)); |
4206 | Success = false; |
4207 | } |
4208 | |
4209 | return Success; |
4210 | } |
4211 | } |
4212 | |
4213 | const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2); |
4214 | if (Src2Idx == -1) |
4215 | return true; |
4216 | |
4217 | const MCOperand &Src2 = Inst.getOperand(i: Src2Idx); |
4218 | if (!Src2.isReg()) |
4219 | return true; |
4220 | |
4221 | MCRegister Src2Reg = Src2.getReg(); |
4222 | MCRegister DstReg = Inst.getOperand(i: 0).getReg(); |
4223 | if (Src2Reg == DstReg) |
4224 | return true; |
4225 | |
4226 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
4227 | if (TRI->getRegClass(i: Desc.operands()[0].RegClass).getSizeInBits() <= 128) |
4228 | return true; |
4229 | |
4230 | if (TRI->regsOverlap(RegA: Src2Reg, RegB: DstReg)) { |
4231 | Error(L: getRegLoc(Reg: mc2PseudoReg(Reg: Src2Reg), Operands), |
4232 | Msg: "source 2 operand must not partially overlap with dst" ); |
4233 | return false; |
4234 | } |
4235 | |
4236 | return true; |
4237 | } |
4238 | |
4239 | bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { |
4240 | switch (Inst.getOpcode()) { |
4241 | default: |
4242 | return true; |
4243 | case V_DIV_SCALE_F32_gfx6_gfx7: |
4244 | case V_DIV_SCALE_F32_vi: |
4245 | case V_DIV_SCALE_F32_gfx10: |
4246 | case V_DIV_SCALE_F64_gfx6_gfx7: |
4247 | case V_DIV_SCALE_F64_vi: |
4248 | case V_DIV_SCALE_F64_gfx10: |
4249 | break; |
4250 | } |
4251 | |
4252 | // TODO: Check that src0 = src1 or src2. |
4253 | |
4254 | for (auto Name : {AMDGPU::OpName::src0_modifiers, |
4255 | AMDGPU::OpName::src2_modifiers, |
4256 | AMDGPU::OpName::src2_modifiers}) { |
4257 | if (Inst.getOperand(i: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name)) |
4258 | .getImm() & |
4259 | SISrcMods::ABS) { |
4260 | return false; |
4261 | } |
4262 | } |
4263 | |
4264 | return true; |
4265 | } |
4266 | |
4267 | bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { |
4268 | |
4269 | const unsigned Opc = Inst.getOpcode(); |
4270 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
4271 | |
4272 | if ((Desc.TSFlags & MIMGFlags) == 0) |
4273 | return true; |
4274 | |
4275 | int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::d16); |
4276 | if (D16Idx >= 0 && Inst.getOperand(i: D16Idx).getImm()) { |
4277 | if (isCI() || isSI()) |
4278 | return false; |
4279 | } |
4280 | |
4281 | return true; |
4282 | } |
4283 | |
4284 | bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) { |
4285 | const unsigned Opc = Inst.getOpcode(); |
4286 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
4287 | |
4288 | if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0) |
4289 | return true; |
4290 | |
4291 | int R128Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::r128); |
4292 | |
4293 | return R128Idx < 0 || !Inst.getOperand(i: R128Idx).getImm(); |
4294 | } |
4295 | |
4296 | static bool IsRevOpcode(const unsigned Opcode) |
4297 | { |
4298 | switch (Opcode) { |
4299 | case AMDGPU::V_SUBREV_F32_e32: |
4300 | case AMDGPU::V_SUBREV_F32_e64: |
4301 | case AMDGPU::V_SUBREV_F32_e32_gfx10: |
4302 | case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: |
4303 | case AMDGPU::V_SUBREV_F32_e32_vi: |
4304 | case AMDGPU::V_SUBREV_F32_e64_gfx10: |
4305 | case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: |
4306 | case AMDGPU::V_SUBREV_F32_e64_vi: |
4307 | |
4308 | case AMDGPU::V_SUBREV_CO_U32_e32: |
4309 | case AMDGPU::V_SUBREV_CO_U32_e64: |
4310 | case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: |
4311 | case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: |
4312 | |
4313 | case AMDGPU::V_SUBBREV_U32_e32: |
4314 | case AMDGPU::V_SUBBREV_U32_e64: |
4315 | case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: |
4316 | case AMDGPU::V_SUBBREV_U32_e32_vi: |
4317 | case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: |
4318 | case AMDGPU::V_SUBBREV_U32_e64_vi: |
4319 | |
4320 | case AMDGPU::V_SUBREV_U32_e32: |
4321 | case AMDGPU::V_SUBREV_U32_e64: |
4322 | case AMDGPU::V_SUBREV_U32_e32_gfx9: |
4323 | case AMDGPU::V_SUBREV_U32_e32_vi: |
4324 | case AMDGPU::V_SUBREV_U32_e64_gfx9: |
4325 | case AMDGPU::V_SUBREV_U32_e64_vi: |
4326 | |
4327 | case AMDGPU::V_SUBREV_F16_e32: |
4328 | case AMDGPU::V_SUBREV_F16_e64: |
4329 | case AMDGPU::V_SUBREV_F16_e32_gfx10: |
4330 | case AMDGPU::V_SUBREV_F16_e32_vi: |
4331 | case AMDGPU::V_SUBREV_F16_e64_gfx10: |
4332 | case AMDGPU::V_SUBREV_F16_e64_vi: |
4333 | |
4334 | case AMDGPU::V_SUBREV_U16_e32: |
4335 | case AMDGPU::V_SUBREV_U16_e64: |
4336 | case AMDGPU::V_SUBREV_U16_e32_vi: |
4337 | case AMDGPU::V_SUBREV_U16_e64_vi: |
4338 | |
4339 | case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: |
4340 | case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: |
4341 | case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: |
4342 | |
4343 | case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: |
4344 | case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: |
4345 | |
4346 | case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: |
4347 | case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: |
4348 | |
4349 | case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: |
4350 | case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: |
4351 | |
4352 | case AMDGPU::V_LSHRREV_B32_e32: |
4353 | case AMDGPU::V_LSHRREV_B32_e64: |
4354 | case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: |
4355 | case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: |
4356 | case AMDGPU::V_LSHRREV_B32_e32_vi: |
4357 | case AMDGPU::V_LSHRREV_B32_e64_vi: |
4358 | case AMDGPU::V_LSHRREV_B32_e32_gfx10: |
4359 | case AMDGPU::V_LSHRREV_B32_e64_gfx10: |
4360 | |
4361 | case AMDGPU::V_ASHRREV_I32_e32: |
4362 | case AMDGPU::V_ASHRREV_I32_e64: |
4363 | case AMDGPU::V_ASHRREV_I32_e32_gfx10: |
4364 | case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: |
4365 | case AMDGPU::V_ASHRREV_I32_e32_vi: |
4366 | case AMDGPU::V_ASHRREV_I32_e64_gfx10: |
4367 | case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: |
4368 | case AMDGPU::V_ASHRREV_I32_e64_vi: |
4369 | |
4370 | case AMDGPU::V_LSHLREV_B32_e32: |
4371 | case AMDGPU::V_LSHLREV_B32_e64: |
4372 | case AMDGPU::V_LSHLREV_B32_e32_gfx10: |
4373 | case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: |
4374 | case AMDGPU::V_LSHLREV_B32_e32_vi: |
4375 | case AMDGPU::V_LSHLREV_B32_e64_gfx10: |
4376 | case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: |
4377 | case AMDGPU::V_LSHLREV_B32_e64_vi: |
4378 | |
4379 | case AMDGPU::V_LSHLREV_B16_e32: |
4380 | case AMDGPU::V_LSHLREV_B16_e64: |
4381 | case AMDGPU::V_LSHLREV_B16_e32_vi: |
4382 | case AMDGPU::V_LSHLREV_B16_e64_vi: |
4383 | case AMDGPU::V_LSHLREV_B16_gfx10: |
4384 | |
4385 | case AMDGPU::V_LSHRREV_B16_e32: |
4386 | case AMDGPU::V_LSHRREV_B16_e64: |
4387 | case AMDGPU::V_LSHRREV_B16_e32_vi: |
4388 | case AMDGPU::V_LSHRREV_B16_e64_vi: |
4389 | case AMDGPU::V_LSHRREV_B16_gfx10: |
4390 | |
4391 | case AMDGPU::V_ASHRREV_I16_e32: |
4392 | case AMDGPU::V_ASHRREV_I16_e64: |
4393 | case AMDGPU::V_ASHRREV_I16_e32_vi: |
4394 | case AMDGPU::V_ASHRREV_I16_e64_vi: |
4395 | case AMDGPU::V_ASHRREV_I16_gfx10: |
4396 | |
4397 | case AMDGPU::V_LSHLREV_B64_e64: |
4398 | case AMDGPU::V_LSHLREV_B64_gfx10: |
4399 | case AMDGPU::V_LSHLREV_B64_vi: |
4400 | |
4401 | case AMDGPU::V_LSHRREV_B64_e64: |
4402 | case AMDGPU::V_LSHRREV_B64_gfx10: |
4403 | case AMDGPU::V_LSHRREV_B64_vi: |
4404 | |
4405 | case AMDGPU::V_ASHRREV_I64_e64: |
4406 | case AMDGPU::V_ASHRREV_I64_gfx10: |
4407 | case AMDGPU::V_ASHRREV_I64_vi: |
4408 | |
4409 | case AMDGPU::V_PK_LSHLREV_B16: |
4410 | case AMDGPU::V_PK_LSHLREV_B16_gfx10: |
4411 | case AMDGPU::V_PK_LSHLREV_B16_vi: |
4412 | |
4413 | case AMDGPU::V_PK_LSHRREV_B16: |
4414 | case AMDGPU::V_PK_LSHRREV_B16_gfx10: |
4415 | case AMDGPU::V_PK_LSHRREV_B16_vi: |
4416 | case AMDGPU::V_PK_ASHRREV_I16: |
4417 | case AMDGPU::V_PK_ASHRREV_I16_gfx10: |
4418 | case AMDGPU::V_PK_ASHRREV_I16_vi: |
4419 | return true; |
4420 | default: |
4421 | return false; |
4422 | } |
4423 | } |
4424 | |
4425 | std::optional<StringRef> |
4426 | AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { |
4427 | |
4428 | using namespace SIInstrFlags; |
4429 | const unsigned Opcode = Inst.getOpcode(); |
4430 | const MCInstrDesc &Desc = MII.get(Opcode); |
4431 | |
4432 | // lds_direct register is defined so that it can be used |
4433 | // with 9-bit operands only. Ignore encodings which do not accept these. |
4434 | const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; |
4435 | if ((Desc.TSFlags & Enc) == 0) |
4436 | return std::nullopt; |
4437 | |
4438 | for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { |
4439 | auto SrcIdx = getNamedOperandIdx(Opcode, Name: SrcName); |
4440 | if (SrcIdx == -1) |
4441 | break; |
4442 | const auto &Src = Inst.getOperand(i: SrcIdx); |
4443 | if (Src.isReg() && Src.getReg() == LDS_DIRECT) { |
4444 | |
4445 | if (isGFX90A() || isGFX11Plus()) |
4446 | return StringRef("lds_direct is not supported on this GPU" ); |
4447 | |
4448 | if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) |
4449 | return StringRef("lds_direct cannot be used with this instruction" ); |
4450 | |
4451 | if (SrcName != OpName::src0) |
4452 | return StringRef("lds_direct may be used as src0 only" ); |
4453 | } |
4454 | } |
4455 | |
4456 | return std::nullopt; |
4457 | } |
4458 | |
4459 | SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { |
4460 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { |
4461 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
4462 | if (Op.isFlatOffset()) |
4463 | return Op.getStartLoc(); |
4464 | } |
4465 | return getLoc(); |
4466 | } |
4467 | |
4468 | bool AMDGPUAsmParser::validateOffset(const MCInst &Inst, |
4469 | const OperandVector &Operands) { |
4470 | auto Opcode = Inst.getOpcode(); |
4471 | auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset); |
4472 | if (OpNum == -1) |
4473 | return true; |
4474 | |
4475 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
4476 | if ((TSFlags & SIInstrFlags::FLAT)) |
4477 | return validateFlatOffset(Inst, Operands); |
4478 | |
4479 | if ((TSFlags & SIInstrFlags::SMRD)) |
4480 | return validateSMEMOffset(Inst, Operands); |
4481 | |
4482 | const auto &Op = Inst.getOperand(i: OpNum); |
4483 | if (isGFX12Plus() && |
4484 | (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { |
4485 | const unsigned OffsetSize = 24; |
4486 | if (!isIntN(N: OffsetSize, x: Op.getImm())) { |
4487 | Error(L: getFlatOffsetLoc(Operands), |
4488 | Msg: Twine("expected a " ) + Twine(OffsetSize) + "-bit signed offset" ); |
4489 | return false; |
4490 | } |
4491 | } else { |
4492 | const unsigned OffsetSize = 16; |
4493 | if (!isUIntN(N: OffsetSize, x: Op.getImm())) { |
4494 | Error(L: getFlatOffsetLoc(Operands), |
4495 | Msg: Twine("expected a " ) + Twine(OffsetSize) + "-bit unsigned offset" ); |
4496 | return false; |
4497 | } |
4498 | } |
4499 | return true; |
4500 | } |
4501 | |
4502 | bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, |
4503 | const OperandVector &Operands) { |
4504 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
4505 | if ((TSFlags & SIInstrFlags::FLAT) == 0) |
4506 | return true; |
4507 | |
4508 | auto Opcode = Inst.getOpcode(); |
4509 | auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset); |
4510 | assert(OpNum != -1); |
4511 | |
4512 | const auto &Op = Inst.getOperand(i: OpNum); |
4513 | if (!hasFlatOffsets() && Op.getImm() != 0) { |
4514 | Error(L: getFlatOffsetLoc(Operands), |
4515 | Msg: "flat offset modifier is not supported on this GPU" ); |
4516 | return false; |
4517 | } |
4518 | |
4519 | // For pre-GFX12 FLAT instructions the offset must be positive; |
4520 | // MSB is ignored and forced to zero. |
4521 | unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(ST: getSTI()); |
4522 | bool AllowNegative = |
4523 | (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) || |
4524 | isGFX12Plus(); |
4525 | if (!isIntN(N: OffsetSize, x: Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) { |
4526 | Error(L: getFlatOffsetLoc(Operands), |
4527 | Msg: Twine("expected a " ) + |
4528 | (AllowNegative ? Twine(OffsetSize) + "-bit signed offset" |
4529 | : Twine(OffsetSize - 1) + "-bit unsigned offset" )); |
4530 | return false; |
4531 | } |
4532 | |
4533 | return true; |
4534 | } |
4535 | |
4536 | SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { |
4537 | // Start with second operand because SMEM Offset cannot be dst or src0. |
4538 | for (unsigned i = 2, e = Operands.size(); i != e; ++i) { |
4539 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
4540 | if (Op.isSMEMOffset() || Op.isSMEMOffsetMod()) |
4541 | return Op.getStartLoc(); |
4542 | } |
4543 | return getLoc(); |
4544 | } |
4545 | |
4546 | bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, |
4547 | const OperandVector &Operands) { |
4548 | if (isCI() || isSI()) |
4549 | return true; |
4550 | |
4551 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
4552 | if ((TSFlags & SIInstrFlags::SMRD) == 0) |
4553 | return true; |
4554 | |
4555 | auto Opcode = Inst.getOpcode(); |
4556 | auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::offset); |
4557 | if (OpNum == -1) |
4558 | return true; |
4559 | |
4560 | const auto &Op = Inst.getOperand(i: OpNum); |
4561 | if (!Op.isImm()) |
4562 | return true; |
4563 | |
4564 | uint64_t Offset = Op.getImm(); |
4565 | bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opc: Opcode); |
4566 | if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(ST: getSTI(), EncodedOffset: Offset) || |
4567 | AMDGPU::isLegalSMRDEncodedSignedOffset(ST: getSTI(), EncodedOffset: Offset, IsBuffer)) |
4568 | return true; |
4569 | |
4570 | Error(L: getSMEMOffsetLoc(Operands), |
4571 | Msg: isGFX12Plus() ? "expected a 24-bit signed offset" |
4572 | : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" |
4573 | : "expected a 21-bit signed offset" ); |
4574 | |
4575 | return false; |
4576 | } |
4577 | |
4578 | bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { |
4579 | unsigned Opcode = Inst.getOpcode(); |
4580 | const MCInstrDesc &Desc = MII.get(Opcode); |
4581 | if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) |
4582 | return true; |
4583 | |
4584 | const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::src0); |
4585 | const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::src1); |
4586 | |
4587 | const int OpIndices[] = { Src0Idx, Src1Idx }; |
4588 | |
4589 | unsigned NumExprs = 0; |
4590 | unsigned NumLiterals = 0; |
4591 | uint32_t LiteralValue; |
4592 | |
4593 | for (int OpIdx : OpIndices) { |
4594 | if (OpIdx == -1) break; |
4595 | |
4596 | const MCOperand &MO = Inst.getOperand(i: OpIdx); |
4597 | // Exclude special imm operands (like that used by s_set_gpr_idx_on) |
4598 | if (AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx)) { |
4599 | if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { |
4600 | uint32_t Value = static_cast<uint32_t>(MO.getImm()); |
4601 | if (NumLiterals == 0 || LiteralValue != Value) { |
4602 | LiteralValue = Value; |
4603 | ++NumLiterals; |
4604 | } |
4605 | } else if (MO.isExpr()) { |
4606 | ++NumExprs; |
4607 | } |
4608 | } |
4609 | } |
4610 | |
4611 | return NumLiterals + NumExprs <= 1; |
4612 | } |
4613 | |
4614 | bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { |
4615 | const unsigned Opc = Inst.getOpcode(); |
4616 | if (isPermlane16(Opc)) { |
4617 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel); |
4618 | unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm(); |
4619 | |
4620 | if (OpSel & ~3) |
4621 | return false; |
4622 | } |
4623 | |
4624 | uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags; |
4625 | |
4626 | if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { |
4627 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel); |
4628 | if (OpSelIdx != -1) { |
4629 | if (Inst.getOperand(i: OpSelIdx).getImm() != 0) |
4630 | return false; |
4631 | } |
4632 | int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi); |
4633 | if (OpSelHiIdx != -1) { |
4634 | if (Inst.getOperand(i: OpSelHiIdx).getImm() != -1) |
4635 | return false; |
4636 | } |
4637 | } |
4638 | |
4639 | // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). |
4640 | if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) && |
4641 | (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) { |
4642 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel); |
4643 | unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm(); |
4644 | if (OpSel & 3) |
4645 | return false; |
4646 | } |
4647 | |
4648 | return true; |
4649 | } |
4650 | |
4651 | bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) { |
4652 | if (!hasTrue16Insts()) |
4653 | return true; |
4654 | const MCRegisterInfo *MRI = getMRI(); |
4655 | const unsigned Opc = Inst.getOpcode(); |
4656 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel); |
4657 | if (OpSelIdx == -1) |
4658 | return true; |
4659 | unsigned OpSelOpValue = Inst.getOperand(i: OpSelIdx).getImm(); |
4660 | // If the value is 0 we could have a default OpSel Operand, so conservatively |
4661 | // allow it. |
4662 | if (OpSelOpValue == 0) |
4663 | return true; |
4664 | unsigned OpCount = 0; |
4665 | for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1, |
4666 | AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) { |
4667 | int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: OpName); |
4668 | if (OpIdx == -1) |
4669 | continue; |
4670 | const MCOperand &Op = Inst.getOperand(i: OpIdx); |
4671 | if (Op.isReg() && |
4672 | MRI->getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: Op.getReg())) { |
4673 | bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Reg: Op.getReg(), MRI: *MRI); |
4674 | bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0); |
4675 | if (OpSelOpIsHi != VGPRSuffixIsHi) |
4676 | return false; |
4677 | } |
4678 | ++OpCount; |
4679 | } |
4680 | |
4681 | return true; |
4682 | } |
4683 | |
4684 | bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) { |
4685 | assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi); |
4686 | |
4687 | const unsigned Opc = Inst.getOpcode(); |
4688 | uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags; |
4689 | |
4690 | // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2) |
4691 | // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1) |
4692 | // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1) |
4693 | // other wmma/swmmac instructions don't have neg_lo/neg_hi operand. |
4694 | if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) && |
4695 | !(TSFlags & SIInstrFlags::IsSWMMAC)) |
4696 | return true; |
4697 | |
4698 | int NegIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName); |
4699 | if (NegIdx == -1) |
4700 | return true; |
4701 | |
4702 | unsigned Neg = Inst.getOperand(i: NegIdx).getImm(); |
4703 | |
4704 | // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed |
4705 | // on some src operands but not allowed on other. |
4706 | // It is convenient that such instructions don't have src_modifiers operand |
4707 | // for src operands that don't allow neg because they also don't allow opsel. |
4708 | |
4709 | const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers, |
4710 | AMDGPU::OpName::src1_modifiers, |
4711 | AMDGPU::OpName::src2_modifiers}; |
4712 | |
4713 | for (unsigned i = 0; i < 3; ++i) { |
4714 | if (!AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: SrcMods[i])) { |
4715 | if (Neg & (1 << i)) |
4716 | return false; |
4717 | } |
4718 | } |
4719 | |
4720 | return true; |
4721 | } |
4722 | |
4723 | bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, |
4724 | const OperandVector &Operands) { |
4725 | const unsigned Opc = Inst.getOpcode(); |
4726 | int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dpp_ctrl); |
4727 | if (DppCtrlIdx >= 0) { |
4728 | unsigned DppCtrl = Inst.getOperand(i: DppCtrlIdx).getImm(); |
4729 | |
4730 | if (!AMDGPU::isLegalDPALU_DPPControl(DC: DppCtrl) && |
4731 | AMDGPU::isDPALU_DPP(OpDesc: MII.get(Opcode: Opc))) { |
4732 | // DP ALU DPP is supported for row_newbcast only on GFX9* |
4733 | SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyDppCtrl, Operands); |
4734 | Error(L: S, Msg: "DP ALU dpp only supports row_newbcast" ); |
4735 | return false; |
4736 | } |
4737 | } |
4738 | |
4739 | int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::dpp8); |
4740 | bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0; |
4741 | |
4742 | if (IsDPP && !hasDPPSrc1SGPR(STI: getSTI())) { |
4743 | int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src1); |
4744 | if (Src1Idx >= 0) { |
4745 | const MCOperand &Src1 = Inst.getOperand(i: Src1Idx); |
4746 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
4747 | if (Src1.isReg() && isSGPR(Reg: mc2PseudoReg(Reg: Src1.getReg()), TRI)) { |
4748 | auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src1Idx).getReg()); |
4749 | SMLoc S = getRegLoc(Reg, Operands); |
4750 | Error(L: S, Msg: "invalid operand for instruction" ); |
4751 | return false; |
4752 | } |
4753 | if (Src1.isImm()) { |
4754 | Error(L: getInstLoc(Operands), |
4755 | Msg: "src1 immediate operand invalid for instruction" ); |
4756 | return false; |
4757 | } |
4758 | } |
4759 | } |
4760 | |
4761 | return true; |
4762 | } |
4763 | |
4764 | // Check if VCC register matches wavefront size |
4765 | bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const { |
4766 | auto FB = getFeatureBits(); |
4767 | return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || |
4768 | (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); |
4769 | } |
4770 | |
4771 | // One unique literal can be used. VOP3 literal is only allowed in GFX10+ |
4772 | bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, |
4773 | const OperandVector &Operands) { |
4774 | unsigned Opcode = Inst.getOpcode(); |
4775 | const MCInstrDesc &Desc = MII.get(Opcode); |
4776 | bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, Name: OpName::imm) != -1; |
4777 | if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && |
4778 | !HasMandatoryLiteral && !isVOPD(Opc: Opcode)) |
4779 | return true; |
4780 | |
4781 | OperandIndices OpIndices = getSrcOperandIndices(Opcode, AddMandatoryLiterals: HasMandatoryLiteral); |
4782 | |
4783 | unsigned NumExprs = 0; |
4784 | unsigned NumLiterals = 0; |
4785 | uint32_t LiteralValue; |
4786 | |
4787 | for (int OpIdx : OpIndices) { |
4788 | if (OpIdx == -1) |
4789 | continue; |
4790 | |
4791 | const MCOperand &MO = Inst.getOperand(i: OpIdx); |
4792 | if (!MO.isImm() && !MO.isExpr()) |
4793 | continue; |
4794 | if (!isSISrcOperand(Desc, OpNo: OpIdx)) |
4795 | continue; |
4796 | |
4797 | if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { |
4798 | uint64_t Value = static_cast<uint64_t>(MO.getImm()); |
4799 | bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpNo: OpIdx) && |
4800 | AMDGPU::getOperandSize(OpInfo: Desc.operands()[OpIdx]) == 8; |
4801 | bool IsValid32Op = AMDGPU::isValid32BitLiteral(Val: Value, IsFP64); |
4802 | |
4803 | if (!IsValid32Op && !isInt<32>(x: Value) && !isUInt<32>(x: Value)) { |
4804 | Error(L: getLitLoc(Operands), Msg: "invalid operand for instruction" ); |
4805 | return false; |
4806 | } |
4807 | |
4808 | if (IsFP64 && IsValid32Op) |
4809 | Value = Hi_32(Value); |
4810 | |
4811 | if (NumLiterals == 0 || LiteralValue != Value) { |
4812 | LiteralValue = Value; |
4813 | ++NumLiterals; |
4814 | } |
4815 | } else if (MO.isExpr()) { |
4816 | ++NumExprs; |
4817 | } |
4818 | } |
4819 | NumLiterals += NumExprs; |
4820 | |
4821 | if (!NumLiterals) |
4822 | return true; |
4823 | |
4824 | if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) { |
4825 | Error(L: getLitLoc(Operands), Msg: "literal operands are not supported" ); |
4826 | return false; |
4827 | } |
4828 | |
4829 | if (NumLiterals > 1) { |
4830 | Error(L: getLitLoc(Operands, SearchMandatoryLiterals: true), Msg: "only one unique literal operand is allowed" ); |
4831 | return false; |
4832 | } |
4833 | |
4834 | return true; |
4835 | } |
4836 | |
4837 | // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. |
4838 | static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, |
4839 | const MCRegisterInfo *MRI) { |
4840 | int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name); |
4841 | if (OpIdx < 0) |
4842 | return -1; |
4843 | |
4844 | const MCOperand &Op = Inst.getOperand(i: OpIdx); |
4845 | if (!Op.isReg()) |
4846 | return -1; |
4847 | |
4848 | MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0); |
4849 | auto Reg = Sub ? Sub : Op.getReg(); |
4850 | const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID); |
4851 | return AGPR32.contains(Reg) ? 1 : 0; |
4852 | } |
4853 | |
4854 | bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { |
4855 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
4856 | if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | |
4857 | SIInstrFlags::MTBUF | SIInstrFlags::MIMG | |
4858 | SIInstrFlags::DS)) == 0) |
4859 | return true; |
4860 | |
4861 | AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS) |
4862 | ? AMDGPU::OpName::data0 |
4863 | : AMDGPU::OpName::vdata; |
4864 | |
4865 | const MCRegisterInfo *MRI = getMRI(); |
4866 | int DstAreg = IsAGPROperand(Inst, Name: AMDGPU::OpName::vdst, MRI); |
4867 | int DataAreg = IsAGPROperand(Inst, Name: DataName, MRI); |
4868 | |
4869 | if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { |
4870 | int Data2Areg = IsAGPROperand(Inst, Name: AMDGPU::OpName::data1, MRI); |
4871 | if (Data2Areg >= 0 && Data2Areg != DataAreg) |
4872 | return false; |
4873 | } |
4874 | |
4875 | auto FB = getFeatureBits(); |
4876 | if (FB[AMDGPU::FeatureGFX90AInsts]) { |
4877 | if (DataAreg < 0 || DstAreg < 0) |
4878 | return true; |
4879 | return DstAreg == DataAreg; |
4880 | } |
4881 | |
4882 | return DstAreg < 1 && DataAreg < 1; |
4883 | } |
4884 | |
4885 | bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { |
4886 | auto FB = getFeatureBits(); |
4887 | unsigned Opc = Inst.getOpcode(); |
4888 | // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows |
4889 | // unaligned VGPR. All others only allow even aligned VGPRs. |
4890 | if (!(FB[AMDGPU::FeatureGFX90AInsts]) || Opc == AMDGPU::DS_READ_B96_TR_B6_vi) |
4891 | return true; |
4892 | |
4893 | const MCRegisterInfo *MRI = getMRI(); |
4894 | const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID); |
4895 | const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID); |
4896 | for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { |
4897 | const MCOperand &Op = Inst.getOperand(i: I); |
4898 | if (!Op.isReg()) |
4899 | continue; |
4900 | |
4901 | MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0); |
4902 | if (!Sub) |
4903 | continue; |
4904 | |
4905 | if (VGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::VGPR0) & 1)) |
4906 | return false; |
4907 | if (AGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::AGPR0) & 1)) |
4908 | return false; |
4909 | } |
4910 | |
4911 | return true; |
4912 | } |
4913 | |
4914 | SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { |
4915 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { |
4916 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
4917 | if (Op.isBLGP()) |
4918 | return Op.getStartLoc(); |
4919 | } |
4920 | return SMLoc(); |
4921 | } |
4922 | |
4923 | bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, |
4924 | const OperandVector &Operands) { |
4925 | unsigned Opc = Inst.getOpcode(); |
4926 | int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp); |
4927 | if (BlgpIdx == -1) |
4928 | return true; |
4929 | SMLoc BLGPLoc = getBLGPLoc(Operands); |
4930 | if (!BLGPLoc.isValid()) |
4931 | return true; |
4932 | bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with(Prefix: "neg:" ); |
4933 | auto FB = getFeatureBits(); |
4934 | bool UsesNeg = false; |
4935 | if (FB[AMDGPU::FeatureGFX940Insts]) { |
4936 | switch (Opc) { |
4937 | case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: |
4938 | case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: |
4939 | case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: |
4940 | case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: |
4941 | UsesNeg = true; |
4942 | } |
4943 | } |
4944 | |
4945 | if (IsNeg == UsesNeg) |
4946 | return true; |
4947 | |
4948 | Error(L: BLGPLoc, |
4949 | Msg: UsesNeg ? "invalid modifier: blgp is not supported" |
4950 | : "invalid modifier: neg is not supported" ); |
4951 | |
4952 | return false; |
4953 | } |
4954 | |
4955 | bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst, |
4956 | const OperandVector &Operands) { |
4957 | if (!isGFX11Plus()) |
4958 | return true; |
4959 | |
4960 | unsigned Opc = Inst.getOpcode(); |
4961 | if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 && |
4962 | Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 && |
4963 | Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 && |
4964 | Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11) |
4965 | return true; |
4966 | |
4967 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::sdst); |
4968 | assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg()); |
4969 | auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src0Idx).getReg()); |
4970 | if (Reg == AMDGPU::SGPR_NULL) |
4971 | return true; |
4972 | |
4973 | SMLoc RegLoc = getRegLoc(Reg, Operands); |
4974 | Error(L: RegLoc, Msg: "src0 must be null" ); |
4975 | return false; |
4976 | } |
4977 | |
4978 | bool AMDGPUAsmParser::validateDS(const MCInst &Inst, |
4979 | const OperandVector &Operands) { |
4980 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
4981 | if ((TSFlags & SIInstrFlags::DS) == 0) |
4982 | return true; |
4983 | if (TSFlags & SIInstrFlags::GWS) |
4984 | return validateGWS(Inst, Operands); |
4985 | // Only validate GDS for non-GWS instructions. |
4986 | if (hasGDS()) |
4987 | return true; |
4988 | int GDSIdx = |
4989 | AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::gds); |
4990 | if (GDSIdx < 0) |
4991 | return true; |
4992 | unsigned GDS = Inst.getOperand(i: GDSIdx).getImm(); |
4993 | if (GDS) { |
4994 | SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyGDS, Operands); |
4995 | Error(L: S, Msg: "gds modifier is not supported on this GPU" ); |
4996 | return false; |
4997 | } |
4998 | return true; |
4999 | } |
5000 | |
5001 | // gfx90a has an undocumented limitation: |
5002 | // DS_GWS opcodes must use even aligned registers. |
5003 | bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, |
5004 | const OperandVector &Operands) { |
5005 | if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) |
5006 | return true; |
5007 | |
5008 | int Opc = Inst.getOpcode(); |
5009 | if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && |
5010 | Opc != AMDGPU::DS_GWS_SEMA_BR_vi) |
5011 | return true; |
5012 | |
5013 | const MCRegisterInfo *MRI = getMRI(); |
5014 | const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID); |
5015 | int Data0Pos = |
5016 | AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::data0); |
5017 | assert(Data0Pos != -1); |
5018 | auto Reg = Inst.getOperand(i: Data0Pos).getReg(); |
5019 | auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); |
5020 | if (RegIdx & 1) { |
5021 | SMLoc RegLoc = getRegLoc(Reg, Operands); |
5022 | Error(L: RegLoc, Msg: "vgpr must be even aligned" ); |
5023 | return false; |
5024 | } |
5025 | |
5026 | return true; |
5027 | } |
5028 | |
5029 | bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, |
5030 | const OperandVector &Operands, |
5031 | const SMLoc &IDLoc) { |
5032 | int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), |
5033 | Name: AMDGPU::OpName::cpol); |
5034 | if (CPolPos == -1) |
5035 | return true; |
5036 | |
5037 | unsigned CPol = Inst.getOperand(i: CPolPos).getImm(); |
5038 | |
5039 | if (isGFX12Plus()) |
5040 | return validateTHAndScopeBits(Inst, Operands, CPol); |
5041 | |
5042 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
5043 | if (TSFlags & SIInstrFlags::SMRD) { |
5044 | if (CPol && (isSI() || isCI())) { |
5045 | SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands); |
5046 | Error(L: S, Msg: "cache policy is not supported for SMRD instructions" ); |
5047 | return false; |
5048 | } |
5049 | if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { |
5050 | Error(L: IDLoc, Msg: "invalid cache policy for SMEM instruction" ); |
5051 | return false; |
5052 | } |
5053 | } |
5054 | |
5055 | if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { |
5056 | const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF | |
5057 | SIInstrFlags::MTBUF | SIInstrFlags::MIMG | |
5058 | SIInstrFlags::FLAT; |
5059 | if (!(TSFlags & AllowSCCModifier)) { |
5060 | SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands); |
5061 | StringRef CStr(S.getPointer()); |
5062 | S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scc" )]); |
5063 | Error(L: S, |
5064 | Msg: "scc modifier is not supported for this instruction on this GPU" ); |
5065 | return false; |
5066 | } |
5067 | } |
5068 | |
5069 | if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) |
5070 | return true; |
5071 | |
5072 | if (TSFlags & SIInstrFlags::IsAtomicRet) { |
5073 | if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { |
5074 | Error(L: IDLoc, Msg: isGFX940() ? "instruction must use sc0" |
5075 | : "instruction must use glc" ); |
5076 | return false; |
5077 | } |
5078 | } else { |
5079 | if (CPol & CPol::GLC) { |
5080 | SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands); |
5081 | StringRef CStr(S.getPointer()); |
5082 | S = SMLoc::getFromPointer( |
5083 | Ptr: &CStr.data()[CStr.find(Str: isGFX940() ? "sc0" : "glc" )]); |
5084 | Error(L: S, Msg: isGFX940() ? "instruction must not use sc0" |
5085 | : "instruction must not use glc" ); |
5086 | return false; |
5087 | } |
5088 | } |
5089 | |
5090 | return true; |
5091 | } |
5092 | |
5093 | bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst, |
5094 | const OperandVector &Operands, |
5095 | const unsigned CPol) { |
5096 | const unsigned TH = CPol & AMDGPU::CPol::TH; |
5097 | const unsigned Scope = CPol & AMDGPU::CPol::SCOPE; |
5098 | |
5099 | const unsigned Opcode = Inst.getOpcode(); |
5100 | const MCInstrDesc &TID = MII.get(Opcode); |
5101 | |
5102 | auto PrintError = [&](StringRef Msg) { |
5103 | SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands); |
5104 | Error(L: S, Msg); |
5105 | return false; |
5106 | }; |
5107 | |
5108 | if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) && |
5109 | (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) && |
5110 | (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN))) |
5111 | return PrintError("instruction must use th:TH_ATOMIC_RETURN" ); |
5112 | |
5113 | if (TH == 0) |
5114 | return true; |
5115 | |
5116 | if ((TID.TSFlags & SIInstrFlags::SMRD) && |
5117 | ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) || |
5118 | (TH == AMDGPU::CPol::TH_NT_HT))) |
5119 | return PrintError("invalid th value for SMEM instruction" ); |
5120 | |
5121 | if (TH == AMDGPU::CPol::TH_BYPASS) { |
5122 | if ((Scope != AMDGPU::CPol::SCOPE_SYS && |
5123 | CPol & AMDGPU::CPol::TH_REAL_BYPASS) || |
5124 | (Scope == AMDGPU::CPol::SCOPE_SYS && |
5125 | !(CPol & AMDGPU::CPol::TH_REAL_BYPASS))) |
5126 | return PrintError("scope and th combination is not valid" ); |
5127 | } |
5128 | |
5129 | unsigned THType = AMDGPU::getTemporalHintType(TID); |
5130 | if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) { |
5131 | if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC)) |
5132 | return PrintError("invalid th value for atomic instructions" ); |
5133 | } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) { |
5134 | if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE)) |
5135 | return PrintError("invalid th value for store instructions" ); |
5136 | } else { |
5137 | if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD)) |
5138 | return PrintError("invalid th value for load instructions" ); |
5139 | } |
5140 | |
5141 | return true; |
5142 | } |
5143 | |
5144 | bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, |
5145 | const OperandVector &Operands) { |
5146 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
5147 | if (Desc.mayStore() && |
5148 | (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { |
5149 | SMLoc Loc = getImmLoc(Type: AMDGPUOperand::ImmTyTFE, Operands); |
5150 | if (Loc != getInstLoc(Operands)) { |
5151 | Error(L: Loc, Msg: "TFE modifier has no meaning for store instructions" ); |
5152 | return false; |
5153 | } |
5154 | } |
5155 | |
5156 | return true; |
5157 | } |
5158 | |
5159 | bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, |
5160 | const SMLoc &IDLoc, |
5161 | const OperandVector &Operands) { |
5162 | if (auto ErrMsg = validateLdsDirect(Inst)) { |
5163 | Error(L: getRegLoc(Reg: LDS_DIRECT, Operands), Msg: *ErrMsg); |
5164 | return false; |
5165 | } |
5166 | if (!validateTrue16OpSel(Inst)) { |
5167 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands), |
5168 | Msg: "op_sel operand conflicts with 16-bit operand suffix" ); |
5169 | return false; |
5170 | } |
5171 | if (!validateSOPLiteral(Inst)) { |
5172 | Error(L: getLitLoc(Operands), |
5173 | Msg: "only one unique literal operand is allowed" ); |
5174 | return false; |
5175 | } |
5176 | if (!validateVOPLiteral(Inst, Operands)) { |
5177 | return false; |
5178 | } |
5179 | if (!validateConstantBusLimitations(Inst, Operands)) { |
5180 | return false; |
5181 | } |
5182 | if (!validateVOPDRegBankConstraints(Inst, Operands)) { |
5183 | return false; |
5184 | } |
5185 | if (!validateIntClampSupported(Inst)) { |
5186 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyClamp, Operands), |
5187 | Msg: "integer clamping is not supported on this GPU" ); |
5188 | return false; |
5189 | } |
5190 | if (!validateOpSel(Inst)) { |
5191 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands), |
5192 | Msg: "invalid op_sel operand" ); |
5193 | return false; |
5194 | } |
5195 | if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_lo)) { |
5196 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegLo, Operands), |
5197 | Msg: "invalid neg_lo operand" ); |
5198 | return false; |
5199 | } |
5200 | if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_hi)) { |
5201 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegHi, Operands), |
5202 | Msg: "invalid neg_hi operand" ); |
5203 | return false; |
5204 | } |
5205 | if (!validateDPP(Inst, Operands)) { |
5206 | return false; |
5207 | } |
5208 | // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. |
5209 | if (!validateMIMGD16(Inst)) { |
5210 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands), |
5211 | Msg: "d16 modifier is not supported on this GPU" ); |
5212 | return false; |
5213 | } |
5214 | if (!validateMIMGDim(Inst, Operands)) { |
5215 | Error(L: IDLoc, Msg: "missing dim operand" ); |
5216 | return false; |
5217 | } |
5218 | if (!validateTensorR128(Inst)) { |
5219 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands), |
5220 | Msg: "instruction must set modifier r128=0" ); |
5221 | return false; |
5222 | } |
5223 | if (!validateMIMGMSAA(Inst)) { |
5224 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDim, Operands), |
5225 | Msg: "invalid dim; must be MSAA type" ); |
5226 | return false; |
5227 | } |
5228 | if (!validateMIMGDataSize(Inst, IDLoc)) { |
5229 | return false; |
5230 | } |
5231 | if (!validateMIMGAddrSize(Inst, IDLoc)) |
5232 | return false; |
5233 | if (!validateMIMGAtomicDMask(Inst)) { |
5234 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands), |
5235 | Msg: "invalid atomic image dmask" ); |
5236 | return false; |
5237 | } |
5238 | if (!validateMIMGGatherDMask(Inst)) { |
5239 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands), |
5240 | Msg: "invalid image_gather dmask: only one bit must be set" ); |
5241 | return false; |
5242 | } |
5243 | if (!validateMovrels(Inst, Operands)) { |
5244 | return false; |
5245 | } |
5246 | if (!validateOffset(Inst, Operands)) { |
5247 | return false; |
5248 | } |
5249 | if (!validateMAIAccWrite(Inst, Operands)) { |
5250 | return false; |
5251 | } |
5252 | if (!validateMAISrc2(Inst, Operands)) { |
5253 | return false; |
5254 | } |
5255 | if (!validateMFMA(Inst, Operands)) { |
5256 | return false; |
5257 | } |
5258 | if (!validateCoherencyBits(Inst, Operands, IDLoc)) { |
5259 | return false; |
5260 | } |
5261 | |
5262 | if (!validateAGPRLdSt(Inst)) { |
5263 | Error(L: IDLoc, Msg: getFeatureBits()[AMDGPU::FeatureGFX90AInsts] |
5264 | ? "invalid register class: data and dst should be all VGPR or AGPR" |
5265 | : "invalid register class: agpr loads and stores not supported on this GPU" |
5266 | ); |
5267 | return false; |
5268 | } |
5269 | if (!validateVGPRAlign(Inst)) { |
5270 | Error(L: IDLoc, |
5271 | Msg: "invalid register class: vgpr tuples must be 64 bit aligned" ); |
5272 | return false; |
5273 | } |
5274 | if (!validateDS(Inst, Operands)) { |
5275 | return false; |
5276 | } |
5277 | |
5278 | if (!validateBLGP(Inst, Operands)) { |
5279 | return false; |
5280 | } |
5281 | |
5282 | if (!validateDivScale(Inst)) { |
5283 | Error(L: IDLoc, Msg: "ABS not allowed in VOP3B instructions" ); |
5284 | return false; |
5285 | } |
5286 | if (!validateWaitCnt(Inst, Operands)) { |
5287 | return false; |
5288 | } |
5289 | if (!validateTFE(Inst, Operands)) { |
5290 | return false; |
5291 | } |
5292 | |
5293 | return true; |
5294 | } |
5295 | |
5296 | static std::string AMDGPUMnemonicSpellCheck(StringRef S, |
5297 | const FeatureBitset &FBS, |
5298 | unsigned VariantID = 0); |
5299 | |
5300 | static bool AMDGPUCheckMnemonic(StringRef Mnemonic, |
5301 | const FeatureBitset &AvailableFeatures, |
5302 | unsigned VariantID); |
5303 | |
5304 | bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, |
5305 | const FeatureBitset &FBS) { |
5306 | return isSupportedMnemo(Mnemo, FBS, Variants: getAllVariants()); |
5307 | } |
5308 | |
5309 | bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, |
5310 | const FeatureBitset &FBS, |
5311 | ArrayRef<unsigned> Variants) { |
5312 | for (auto Variant : Variants) { |
5313 | if (AMDGPUCheckMnemonic(Mnemonic: Mnemo, AvailableFeatures: FBS, VariantID: Variant)) |
5314 | return true; |
5315 | } |
5316 | |
5317 | return false; |
5318 | } |
5319 | |
5320 | bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, |
5321 | const SMLoc &IDLoc) { |
5322 | FeatureBitset FBS = ComputeAvailableFeatures(FB: getFeatureBits()); |
5323 | |
5324 | // Check if requested instruction variant is supported. |
5325 | if (isSupportedMnemo(Mnemo, FBS, Variants: getMatchedVariants())) |
5326 | return false; |
5327 | |
5328 | // This instruction is not supported. |
5329 | // Clear any other pending errors because they are no longer relevant. |
5330 | getParser().clearPendingErrors(); |
5331 | |
5332 | // Requested instruction variant is not supported. |
5333 | // Check if any other variants are supported. |
5334 | StringRef VariantName = getMatchedVariantName(); |
5335 | if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { |
5336 | return Error(L: IDLoc, |
5337 | Msg: Twine(VariantName, |
5338 | " variant of this instruction is not supported" )); |
5339 | } |
5340 | |
5341 | // Check if this instruction may be used with a different wavesize. |
5342 | if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && |
5343 | !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) { |
5344 | |
5345 | FeatureBitset FeaturesWS32 = getFeatureBits(); |
5346 | FeaturesWS32.flip(I: AMDGPU::FeatureWavefrontSize64) |
5347 | .flip(I: AMDGPU::FeatureWavefrontSize32); |
5348 | FeatureBitset AvailableFeaturesWS32 = |
5349 | ComputeAvailableFeatures(FB: FeaturesWS32); |
5350 | |
5351 | if (isSupportedMnemo(Mnemo, FBS: AvailableFeaturesWS32, Variants: getMatchedVariants())) |
5352 | return Error(L: IDLoc, Msg: "instruction requires wavesize=32" ); |
5353 | } |
5354 | |
5355 | // Finally check if this instruction is supported on any other GPU. |
5356 | if (isSupportedMnemo(Mnemo, FBS: FeatureBitset().set())) { |
5357 | return Error(L: IDLoc, Msg: "instruction not supported on this GPU" ); |
5358 | } |
5359 | |
5360 | // Instruction not supported on any GPU. Probably a typo. |
5361 | std::string Suggestion = AMDGPUMnemonicSpellCheck(S: Mnemo, FBS); |
5362 | return Error(L: IDLoc, Msg: "invalid instruction" + Suggestion); |
5363 | } |
5364 | |
5365 | static bool isInvalidVOPDY(const OperandVector &Operands, |
5366 | uint64_t InvalidOprIdx) { |
5367 | assert(InvalidOprIdx < Operands.size()); |
5368 | const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]); |
5369 | if (Op.isToken() && InvalidOprIdx > 1) { |
5370 | const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]); |
5371 | return PrevOp.isToken() && PrevOp.getToken() == "::" ; |
5372 | } |
5373 | return false; |
5374 | } |
5375 | |
5376 | bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, |
5377 | OperandVector &Operands, |
5378 | MCStreamer &Out, |
5379 | uint64_t &ErrorInfo, |
5380 | bool MatchingInlineAsm) { |
5381 | MCInst Inst; |
5382 | unsigned Result = Match_Success; |
5383 | for (auto Variant : getMatchedVariants()) { |
5384 | uint64_t EI; |
5385 | auto R = MatchInstructionImpl(Operands, Inst, ErrorInfo&: EI, matchingInlineAsm: MatchingInlineAsm, |
5386 | VariantID: Variant); |
5387 | // We order match statuses from least to most specific. We use most specific |
5388 | // status as resulting |
5389 | // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature |
5390 | if (R == Match_Success || R == Match_MissingFeature || |
5391 | (R == Match_InvalidOperand && Result != Match_MissingFeature) || |
5392 | (R == Match_MnemonicFail && Result != Match_InvalidOperand && |
5393 | Result != Match_MissingFeature)) { |
5394 | Result = R; |
5395 | ErrorInfo = EI; |
5396 | } |
5397 | if (R == Match_Success) |
5398 | break; |
5399 | } |
5400 | |
5401 | if (Result == Match_Success) { |
5402 | if (!validateInstruction(Inst, IDLoc, Operands)) { |
5403 | return true; |
5404 | } |
5405 | Inst.setLoc(IDLoc); |
5406 | Out.emitInstruction(Inst, STI: getSTI()); |
5407 | return false; |
5408 | } |
5409 | |
5410 | StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); |
5411 | if (checkUnsupportedInstruction(Mnemo, IDLoc)) { |
5412 | return true; |
5413 | } |
5414 | |
5415 | switch (Result) { |
5416 | default: break; |
5417 | case Match_MissingFeature: |
5418 | // It has been verified that the specified instruction |
5419 | // mnemonic is valid. A match was found but it requires |
5420 | // features which are not supported on this GPU. |
5421 | return Error(L: IDLoc, Msg: "operands are not valid for this GPU or mode" ); |
5422 | |
5423 | case Match_InvalidOperand: { |
5424 | SMLoc ErrorLoc = IDLoc; |
5425 | if (ErrorInfo != ~0ULL) { |
5426 | if (ErrorInfo >= Operands.size()) { |
5427 | return Error(L: IDLoc, Msg: "too few operands for instruction" ); |
5428 | } |
5429 | ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); |
5430 | if (ErrorLoc == SMLoc()) |
5431 | ErrorLoc = IDLoc; |
5432 | |
5433 | if (isInvalidVOPDY(Operands, InvalidOprIdx: ErrorInfo)) |
5434 | return Error(L: ErrorLoc, Msg: "invalid VOPDY instruction" ); |
5435 | } |
5436 | return Error(L: ErrorLoc, Msg: "invalid operand for instruction" ); |
5437 | } |
5438 | |
5439 | case Match_MnemonicFail: |
5440 | llvm_unreachable("Invalid instructions should have been handled already" ); |
5441 | } |
5442 | llvm_unreachable("Implement any new match types added!" ); |
5443 | } |
5444 | |
5445 | bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { |
5446 | int64_t Tmp = -1; |
5447 | if (!isToken(Kind: AsmToken::Integer) && !isToken(Kind: AsmToken::Identifier)) { |
5448 | return true; |
5449 | } |
5450 | if (getParser().parseAbsoluteExpression(Res&: Tmp)) { |
5451 | return true; |
5452 | } |
5453 | Ret = static_cast<uint32_t>(Tmp); |
5454 | return false; |
5455 | } |
5456 | |
5457 | bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { |
5458 | if (!getSTI().getTargetTriple().isAMDGCN()) |
5459 | return TokError(Msg: "directive only supported for amdgcn architecture" ); |
5460 | |
5461 | std::string TargetIDDirective; |
5462 | SMLoc TargetStart = getTok().getLoc(); |
5463 | if (getParser().parseEscapedString(Data&: TargetIDDirective)) |
5464 | return true; |
5465 | |
5466 | SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); |
5467 | if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) |
5468 | return getParser().Error(L: TargetRange.Start, |
5469 | Msg: (Twine(".amdgcn_target directive's target id " ) + |
5470 | Twine(TargetIDDirective) + |
5471 | Twine(" does not match the specified target id " ) + |
5472 | Twine(getTargetStreamer().getTargetID()->toString())).str()); |
5473 | |
5474 | return false; |
5475 | } |
5476 | |
5477 | bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { |
5478 | return Error(L: Range.Start, Msg: "value out of range" , Range); |
5479 | } |
5480 | |
5481 | bool AMDGPUAsmParser::calculateGPRBlocks( |
5482 | const FeatureBitset &Features, const MCExpr *VCCUsed, |
5483 | const MCExpr *FlatScrUsed, bool XNACKUsed, |
5484 | std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR, |
5485 | SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange, |
5486 | const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) { |
5487 | // TODO(scott.linder): These calculations are duplicated from |
5488 | // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. |
5489 | IsaVersion Version = getIsaVersion(GPU: getSTI().getCPU()); |
5490 | MCContext &Ctx = getContext(); |
5491 | |
5492 | const MCExpr *NumSGPRs = NextFreeSGPR; |
5493 | int64_t EvaluatedSGPRs; |
5494 | |
5495 | if (Version.Major >= 10) |
5496 | NumSGPRs = MCConstantExpr::create(Value: 0, Ctx); |
5497 | else { |
5498 | unsigned MaxAddressableNumSGPRs = |
5499 | IsaInfo::getAddressableNumSGPRs(STI: &getSTI()); |
5500 | |
5501 | if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) && Version.Major >= 8 && |
5502 | !Features.test(I: FeatureSGPRInitBug) && |
5503 | static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs) |
5504 | return OutOfRangeError(Range: SGPRRange); |
5505 | |
5506 | const MCExpr * = |
5507 | AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx); |
5508 | NumSGPRs = MCBinaryExpr::createAdd(LHS: NumSGPRs, RHS: ExtraSGPRs, Ctx); |
5509 | |
5510 | if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) && |
5511 | (Version.Major <= 7 || Features.test(I: FeatureSGPRInitBug)) && |
5512 | static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs) |
5513 | return OutOfRangeError(Range: SGPRRange); |
5514 | |
5515 | if (Features.test(I: FeatureSGPRInitBug)) |
5516 | NumSGPRs = |
5517 | MCConstantExpr::create(Value: IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx); |
5518 | } |
5519 | |
5520 | // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks: |
5521 | // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1 |
5522 | auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR, |
5523 | unsigned Granule) -> const MCExpr * { |
5524 | const MCExpr *OneConst = MCConstantExpr::create(Value: 1ul, Ctx); |
5525 | const MCExpr *GranuleConst = MCConstantExpr::create(Value: Granule, Ctx); |
5526 | const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax(Args: {NumGPR, OneConst}, Ctx); |
5527 | const MCExpr *AlignToGPR = |
5528 | AMDGPUMCExpr::createAlignTo(Value: MaxNumGPR, Align: GranuleConst, Ctx); |
5529 | const MCExpr *DivGPR = |
5530 | MCBinaryExpr::createDiv(LHS: AlignToGPR, RHS: GranuleConst, Ctx); |
5531 | const MCExpr *SubGPR = MCBinaryExpr::createSub(LHS: DivGPR, RHS: OneConst, Ctx); |
5532 | return SubGPR; |
5533 | }; |
5534 | |
5535 | VGPRBlocks = GetNumGPRBlocks( |
5536 | NextFreeVGPR, |
5537 | IsaInfo::getVGPREncodingGranule(STI: &getSTI(), EnableWavefrontSize32)); |
5538 | SGPRBlocks = |
5539 | GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(STI: &getSTI())); |
5540 | |
5541 | return false; |
5542 | } |
5543 | |
5544 | bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { |
5545 | if (!getSTI().getTargetTriple().isAMDGCN()) |
5546 | return TokError(Msg: "directive only supported for amdgcn architecture" ); |
5547 | |
5548 | if (!isHsaAbi(STI: getSTI())) |
5549 | return TokError(Msg: "directive only supported for amdhsa OS" ); |
5550 | |
5551 | StringRef KernelName; |
5552 | if (getParser().parseIdentifier(Res&: KernelName)) |
5553 | return true; |
5554 | |
5555 | AMDGPU::MCKernelDescriptor KD = |
5556 | AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor( |
5557 | STI: &getSTI(), Ctx&: getContext()); |
5558 | |
5559 | StringSet<> Seen; |
5560 | |
5561 | IsaVersion IVersion = getIsaVersion(GPU: getSTI().getCPU()); |
5562 | |
5563 | const MCExpr *ZeroExpr = MCConstantExpr::create(Value: 0, Ctx&: getContext()); |
5564 | const MCExpr *OneExpr = MCConstantExpr::create(Value: 1, Ctx&: getContext()); |
5565 | |
5566 | SMRange VGPRRange; |
5567 | const MCExpr *NextFreeVGPR = ZeroExpr; |
5568 | const MCExpr *AccumOffset = MCConstantExpr::create(Value: 0, Ctx&: getContext()); |
5569 | uint64_t SharedVGPRCount = 0; |
5570 | uint64_t PreloadLength = 0; |
5571 | uint64_t PreloadOffset = 0; |
5572 | SMRange SGPRRange; |
5573 | const MCExpr *NextFreeSGPR = ZeroExpr; |
5574 | |
5575 | // Count the number of user SGPRs implied from the enabled feature bits. |
5576 | unsigned ImpliedUserSGPRCount = 0; |
5577 | |
5578 | // Track if the asm explicitly contains the directive for the user SGPR |
5579 | // count. |
5580 | std::optional<unsigned> ExplicitUserSGPRCount; |
5581 | const MCExpr *ReserveVCC = OneExpr; |
5582 | const MCExpr *ReserveFlatScr = OneExpr; |
5583 | std::optional<bool> EnableWavefrontSize32; |
5584 | |
5585 | while (true) { |
5586 | while (trySkipToken(Kind: AsmToken::EndOfStatement)); |
5587 | |
5588 | StringRef ID; |
5589 | SMRange IDRange = getTok().getLocRange(); |
5590 | if (!parseId(Val&: ID, ErrMsg: "expected .amdhsa_ directive or .end_amdhsa_kernel" )) |
5591 | return true; |
5592 | |
5593 | if (ID == ".end_amdhsa_kernel" ) |
5594 | break; |
5595 | |
5596 | if (!Seen.insert(key: ID).second) |
5597 | return TokError(Msg: ".amdhsa_ directives cannot be repeated" ); |
5598 | |
5599 | SMLoc ValStart = getLoc(); |
5600 | const MCExpr *ExprVal; |
5601 | if (getParser().parseExpression(Res&: ExprVal)) |
5602 | return true; |
5603 | SMLoc ValEnd = getLoc(); |
5604 | SMRange ValRange = SMRange(ValStart, ValEnd); |
5605 | |
5606 | int64_t IVal = 0; |
5607 | uint64_t Val = IVal; |
5608 | bool EvaluatableExpr; |
5609 | if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(Res&: IVal))) { |
5610 | if (IVal < 0) |
5611 | return OutOfRangeError(Range: ValRange); |
5612 | Val = IVal; |
5613 | } |
5614 | |
5615 | #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ |
5616 | if (!isUInt<ENTRY##_WIDTH>(Val)) \ |
5617 | return OutOfRangeError(RANGE); \ |
5618 | AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \ |
5619 | getContext()); |
5620 | |
5621 | // Some fields use the parsed value immediately which requires the expression to |
5622 | // be solvable. |
5623 | #define EXPR_RESOLVE_OR_ERROR(RESOLVED) \ |
5624 | if (!(RESOLVED)) \ |
5625 | return Error(IDRange.Start, "directive should have resolvable expression", \ |
5626 | IDRange); |
5627 | |
5628 | if (ID == ".amdhsa_group_segment_fixed_size" ) { |
5629 | if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) * |
5630 | CHAR_BIT>(x: Val)) |
5631 | return OutOfRangeError(Range: ValRange); |
5632 | KD.group_segment_fixed_size = ExprVal; |
5633 | } else if (ID == ".amdhsa_private_segment_fixed_size" ) { |
5634 | if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) * |
5635 | CHAR_BIT>(x: Val)) |
5636 | return OutOfRangeError(Range: ValRange); |
5637 | KD.private_segment_fixed_size = ExprVal; |
5638 | } else if (ID == ".amdhsa_kernarg_size" ) { |
5639 | if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(x: Val)) |
5640 | return OutOfRangeError(Range: ValRange); |
5641 | KD.kernarg_size = ExprVal; |
5642 | } else if (ID == ".amdhsa_user_sgpr_count" ) { |
5643 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5644 | ExplicitUserSGPRCount = Val; |
5645 | } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer" ) { |
5646 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5647 | if (hasArchitectedFlatScratch()) |
5648 | return Error(L: IDRange.Start, |
5649 | Msg: "directive is not supported with architected flat scratch" , |
5650 | Range: IDRange); |
5651 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5652 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, |
5653 | ExprVal, ValRange); |
5654 | if (Val) |
5655 | ImpliedUserSGPRCount += 4; |
5656 | } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length" ) { |
5657 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5658 | if (!hasKernargPreload()) |
5659 | return Error(L: IDRange.Start, Msg: "directive requires gfx90a+" , Range: IDRange); |
5660 | |
5661 | if (Val > getMaxNumUserSGPRs()) |
5662 | return OutOfRangeError(Range: ValRange); |
5663 | PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal, |
5664 | ValRange); |
5665 | if (Val) { |
5666 | ImpliedUserSGPRCount += Val; |
5667 | PreloadLength = Val; |
5668 | } |
5669 | } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset" ) { |
5670 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5671 | if (!hasKernargPreload()) |
5672 | return Error(L: IDRange.Start, Msg: "directive requires gfx90a+" , Range: IDRange); |
5673 | |
5674 | if (Val >= 1024) |
5675 | return OutOfRangeError(Range: ValRange); |
5676 | PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal, |
5677 | ValRange); |
5678 | if (Val) |
5679 | PreloadOffset = Val; |
5680 | } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr" ) { |
5681 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5682 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5683 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal, |
5684 | ValRange); |
5685 | if (Val) |
5686 | ImpliedUserSGPRCount += 2; |
5687 | } else if (ID == ".amdhsa_user_sgpr_queue_ptr" ) { |
5688 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5689 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5690 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal, |
5691 | ValRange); |
5692 | if (Val) |
5693 | ImpliedUserSGPRCount += 2; |
5694 | } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr" ) { |
5695 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5696 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5697 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, |
5698 | ExprVal, ValRange); |
5699 | if (Val) |
5700 | ImpliedUserSGPRCount += 2; |
5701 | } else if (ID == ".amdhsa_user_sgpr_dispatch_id" ) { |
5702 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5703 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5704 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal, |
5705 | ValRange); |
5706 | if (Val) |
5707 | ImpliedUserSGPRCount += 2; |
5708 | } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init" ) { |
5709 | if (hasArchitectedFlatScratch()) |
5710 | return Error(L: IDRange.Start, |
5711 | Msg: "directive is not supported with architected flat scratch" , |
5712 | Range: IDRange); |
5713 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5714 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5715 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, |
5716 | ExprVal, ValRange); |
5717 | if (Val) |
5718 | ImpliedUserSGPRCount += 2; |
5719 | } else if (ID == ".amdhsa_user_sgpr_private_segment_size" ) { |
5720 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5721 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5722 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, |
5723 | ExprVal, ValRange); |
5724 | if (Val) |
5725 | ImpliedUserSGPRCount += 1; |
5726 | } else if (ID == ".amdhsa_wavefront_size32" ) { |
5727 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5728 | if (IVersion.Major < 10) |
5729 | return Error(L: IDRange.Start, Msg: "directive requires gfx10+" , Range: IDRange); |
5730 | EnableWavefrontSize32 = Val; |
5731 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5732 | KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal, |
5733 | ValRange); |
5734 | } else if (ID == ".amdhsa_uses_dynamic_stack" ) { |
5735 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5736 | KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal, |
5737 | ValRange); |
5738 | } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset" ) { |
5739 | if (hasArchitectedFlatScratch()) |
5740 | return Error(L: IDRange.Start, |
5741 | Msg: "directive is not supported with architected flat scratch" , |
5742 | Range: IDRange); |
5743 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5744 | COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, |
5745 | ValRange); |
5746 | } else if (ID == ".amdhsa_enable_private_segment" ) { |
5747 | if (!hasArchitectedFlatScratch()) |
5748 | return Error( |
5749 | L: IDRange.Start, |
5750 | Msg: "directive is not supported without architected flat scratch" , |
5751 | Range: IDRange); |
5752 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5753 | COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, |
5754 | ValRange); |
5755 | } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x" ) { |
5756 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5757 | COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal, |
5758 | ValRange); |
5759 | } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y" ) { |
5760 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5761 | COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal, |
5762 | ValRange); |
5763 | } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z" ) { |
5764 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5765 | COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal, |
5766 | ValRange); |
5767 | } else if (ID == ".amdhsa_system_sgpr_workgroup_info" ) { |
5768 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5769 | COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal, |
5770 | ValRange); |
5771 | } else if (ID == ".amdhsa_system_vgpr_workitem_id" ) { |
5772 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5773 | COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal, |
5774 | ValRange); |
5775 | } else if (ID == ".amdhsa_next_free_vgpr" ) { |
5776 | VGPRRange = ValRange; |
5777 | NextFreeVGPR = ExprVal; |
5778 | } else if (ID == ".amdhsa_next_free_sgpr" ) { |
5779 | SGPRRange = ValRange; |
5780 | NextFreeSGPR = ExprVal; |
5781 | } else if (ID == ".amdhsa_accum_offset" ) { |
5782 | if (!isGFX90A()) |
5783 | return Error(L: IDRange.Start, Msg: "directive requires gfx90a+" , Range: IDRange); |
5784 | AccumOffset = ExprVal; |
5785 | } else if (ID == ".amdhsa_reserve_vcc" ) { |
5786 | if (EvaluatableExpr && !isUInt<1>(x: Val)) |
5787 | return OutOfRangeError(Range: ValRange); |
5788 | ReserveVCC = ExprVal; |
5789 | } else if (ID == ".amdhsa_reserve_flat_scratch" ) { |
5790 | if (IVersion.Major < 7) |
5791 | return Error(L: IDRange.Start, Msg: "directive requires gfx7+" , Range: IDRange); |
5792 | if (hasArchitectedFlatScratch()) |
5793 | return Error(L: IDRange.Start, |
5794 | Msg: "directive is not supported with architected flat scratch" , |
5795 | Range: IDRange); |
5796 | if (EvaluatableExpr && !isUInt<1>(x: Val)) |
5797 | return OutOfRangeError(Range: ValRange); |
5798 | ReserveFlatScr = ExprVal; |
5799 | } else if (ID == ".amdhsa_reserve_xnack_mask" ) { |
5800 | if (IVersion.Major < 8) |
5801 | return Error(L: IDRange.Start, Msg: "directive requires gfx8+" , Range: IDRange); |
5802 | if (!isUInt<1>(x: Val)) |
5803 | return OutOfRangeError(Range: ValRange); |
5804 | if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) |
5805 | return getParser().Error(L: IDRange.Start, Msg: ".amdhsa_reserve_xnack_mask does not match target id" , |
5806 | Range: IDRange); |
5807 | } else if (ID == ".amdhsa_float_round_mode_32" ) { |
5808 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5809 | COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal, |
5810 | ValRange); |
5811 | } else if (ID == ".amdhsa_float_round_mode_16_64" ) { |
5812 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5813 | COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal, |
5814 | ValRange); |
5815 | } else if (ID == ".amdhsa_float_denorm_mode_32" ) { |
5816 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5817 | COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal, |
5818 | ValRange); |
5819 | } else if (ID == ".amdhsa_float_denorm_mode_16_64" ) { |
5820 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5821 | COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal, |
5822 | ValRange); |
5823 | } else if (ID == ".amdhsa_dx10_clamp" ) { |
5824 | if (IVersion.Major >= 12) |
5825 | return Error(L: IDRange.Start, Msg: "directive unsupported on gfx12+" , Range: IDRange); |
5826 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5827 | COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal, |
5828 | ValRange); |
5829 | } else if (ID == ".amdhsa_ieee_mode" ) { |
5830 | if (IVersion.Major >= 12) |
5831 | return Error(L: IDRange.Start, Msg: "directive unsupported on gfx12+" , Range: IDRange); |
5832 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5833 | COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal, |
5834 | ValRange); |
5835 | } else if (ID == ".amdhsa_fp16_overflow" ) { |
5836 | if (IVersion.Major < 9) |
5837 | return Error(L: IDRange.Start, Msg: "directive requires gfx9+" , Range: IDRange); |
5838 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5839 | COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal, |
5840 | ValRange); |
5841 | } else if (ID == ".amdhsa_tg_split" ) { |
5842 | if (!isGFX90A()) |
5843 | return Error(L: IDRange.Start, Msg: "directive requires gfx90a+" , Range: IDRange); |
5844 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, |
5845 | ExprVal, ValRange); |
5846 | } else if (ID == ".amdhsa_workgroup_processor_mode" ) { |
5847 | if (IVersion.Major < 10) |
5848 | return Error(L: IDRange.Start, Msg: "directive requires gfx10+" , Range: IDRange); |
5849 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5850 | COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal, |
5851 | ValRange); |
5852 | } else if (ID == ".amdhsa_memory_ordered" ) { |
5853 | if (IVersion.Major < 10) |
5854 | return Error(L: IDRange.Start, Msg: "directive requires gfx10+" , Range: IDRange); |
5855 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5856 | COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal, |
5857 | ValRange); |
5858 | } else if (ID == ".amdhsa_forward_progress" ) { |
5859 | if (IVersion.Major < 10) |
5860 | return Error(L: IDRange.Start, Msg: "directive requires gfx10+" , Range: IDRange); |
5861 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5862 | COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal, |
5863 | ValRange); |
5864 | } else if (ID == ".amdhsa_shared_vgpr_count" ) { |
5865 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5866 | if (IVersion.Major < 10 || IVersion.Major >= 12) |
5867 | return Error(L: IDRange.Start, Msg: "directive requires gfx10 or gfx11" , |
5868 | Range: IDRange); |
5869 | SharedVGPRCount = Val; |
5870 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, |
5871 | COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal, |
5872 | ValRange); |
5873 | } else if (ID == ".amdhsa_inst_pref_size" ) { |
5874 | if (IVersion.Major < 11) |
5875 | return Error(L: IDRange.Start, Msg: "directive requires gfx11+" , Range: IDRange); |
5876 | if (IVersion.Major == 11) { |
5877 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, |
5878 | COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal, |
5879 | ValRange); |
5880 | } else { |
5881 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, |
5882 | COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal, |
5883 | ValRange); |
5884 | } |
5885 | } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op" ) { |
5886 | PARSE_BITS_ENTRY( |
5887 | KD.compute_pgm_rsrc2, |
5888 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, |
5889 | ExprVal, ValRange); |
5890 | } else if (ID == ".amdhsa_exception_fp_denorm_src" ) { |
5891 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5892 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, |
5893 | ExprVal, ValRange); |
5894 | } else if (ID == ".amdhsa_exception_fp_ieee_div_zero" ) { |
5895 | PARSE_BITS_ENTRY( |
5896 | KD.compute_pgm_rsrc2, |
5897 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, |
5898 | ExprVal, ValRange); |
5899 | } else if (ID == ".amdhsa_exception_fp_ieee_overflow" ) { |
5900 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5901 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, |
5902 | ExprVal, ValRange); |
5903 | } else if (ID == ".amdhsa_exception_fp_ieee_underflow" ) { |
5904 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5905 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, |
5906 | ExprVal, ValRange); |
5907 | } else if (ID == ".amdhsa_exception_fp_ieee_inexact" ) { |
5908 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5909 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, |
5910 | ExprVal, ValRange); |
5911 | } else if (ID == ".amdhsa_exception_int_div_zero" ) { |
5912 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5913 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, |
5914 | ExprVal, ValRange); |
5915 | } else if (ID == ".amdhsa_round_robin_scheduling" ) { |
5916 | if (IVersion.Major < 12) |
5917 | return Error(L: IDRange.Start, Msg: "directive requires gfx12+" , Range: IDRange); |
5918 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5919 | COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal, |
5920 | ValRange); |
5921 | } else { |
5922 | return Error(L: IDRange.Start, Msg: "unknown .amdhsa_kernel directive" , Range: IDRange); |
5923 | } |
5924 | |
5925 | #undef PARSE_BITS_ENTRY |
5926 | } |
5927 | |
5928 | if (!Seen.contains(key: ".amdhsa_next_free_vgpr" )) |
5929 | return TokError(Msg: ".amdhsa_next_free_vgpr directive is required" ); |
5930 | |
5931 | if (!Seen.contains(key: ".amdhsa_next_free_sgpr" )) |
5932 | return TokError(Msg: ".amdhsa_next_free_sgpr directive is required" ); |
5933 | |
5934 | unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(u&: ImpliedUserSGPRCount); |
5935 | |
5936 | // Consider the case where the total number of UserSGPRs with trailing |
5937 | // allocated preload SGPRs, is greater than the number of explicitly |
5938 | // referenced SGPRs. |
5939 | if (PreloadLength) { |
5940 | MCContext &Ctx = getContext(); |
5941 | NextFreeSGPR = AMDGPUMCExpr::createMax( |
5942 | Args: {NextFreeSGPR, MCConstantExpr::create(Value: UserSGPRCount, Ctx)}, Ctx); |
5943 | } |
5944 | |
5945 | const MCExpr *VGPRBlocks; |
5946 | const MCExpr *SGPRBlocks; |
5947 | if (calculateGPRBlocks(Features: getFeatureBits(), VCCUsed: ReserveVCC, FlatScrUsed: ReserveFlatScr, |
5948 | XNACKUsed: getTargetStreamer().getTargetID()->isXnackOnOrAny(), |
5949 | EnableWavefrontSize32, NextFreeVGPR, |
5950 | VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, |
5951 | SGPRBlocks)) |
5952 | return true; |
5953 | |
5954 | int64_t EvaluatedVGPRBlocks; |
5955 | bool VGPRBlocksEvaluatable = |
5956 | VGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedVGPRBlocks); |
5957 | if (VGPRBlocksEvaluatable && |
5958 | !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( |
5959 | x: static_cast<uint64_t>(EvaluatedVGPRBlocks))) { |
5960 | return OutOfRangeError(Range: VGPRRange); |
5961 | } |
5962 | AMDGPU::MCKernelDescriptor::bits_set( |
5963 | Dst&: KD.compute_pgm_rsrc1, Value: VGPRBlocks, |
5964 | Shift: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT, |
5965 | Mask: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, Ctx&: getContext()); |
5966 | |
5967 | int64_t EvaluatedSGPRBlocks; |
5968 | if (SGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedSGPRBlocks) && |
5969 | !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( |
5970 | x: static_cast<uint64_t>(EvaluatedSGPRBlocks))) |
5971 | return OutOfRangeError(Range: SGPRRange); |
5972 | AMDGPU::MCKernelDescriptor::bits_set( |
5973 | Dst&: KD.compute_pgm_rsrc1, Value: SGPRBlocks, |
5974 | Shift: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT, |
5975 | Mask: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, Ctx&: getContext()); |
5976 | |
5977 | if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) |
5978 | return TokError(Msg: "amdgpu_user_sgpr_count smaller than than implied by " |
5979 | "enabled user SGPRs" ); |
5980 | |
5981 | if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(x: UserSGPRCount)) |
5982 | return TokError(Msg: "too many user SGPRs enabled" ); |
5983 | AMDGPU::MCKernelDescriptor::bits_set( |
5984 | Dst&: KD.compute_pgm_rsrc2, Value: MCConstantExpr::create(Value: UserSGPRCount, Ctx&: getContext()), |
5985 | Shift: COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT, |
5986 | Mask: COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, Ctx&: getContext()); |
5987 | |
5988 | int64_t IVal = 0; |
5989 | if (!KD.kernarg_size->evaluateAsAbsolute(Res&: IVal)) |
5990 | return TokError(Msg: "Kernarg size should be resolvable" ); |
5991 | uint64_t kernarg_size = IVal; |
5992 | if (PreloadLength && kernarg_size && |
5993 | (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size)) |
5994 | return TokError(Msg: "Kernarg preload length + offset is larger than the " |
5995 | "kernarg segment size" ); |
5996 | |
5997 | if (isGFX90A()) { |
5998 | if (!Seen.contains(key: ".amdhsa_accum_offset" )) |
5999 | return TokError(Msg: ".amdhsa_accum_offset directive is required" ); |
6000 | int64_t EvaluatedAccum; |
6001 | bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(Res&: EvaluatedAccum); |
6002 | uint64_t UEvaluatedAccum = EvaluatedAccum; |
6003 | if (AccumEvaluatable && |
6004 | (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3))) |
6005 | return TokError(Msg: "accum_offset should be in range [4..256] in " |
6006 | "increments of 4" ); |
6007 | |
6008 | int64_t EvaluatedNumVGPR; |
6009 | if (NextFreeVGPR->evaluateAsAbsolute(Res&: EvaluatedNumVGPR) && |
6010 | AccumEvaluatable && |
6011 | UEvaluatedAccum > |
6012 | alignTo(Value: std::max(a: (uint64_t)1, b: (uint64_t)EvaluatedNumVGPR), Align: 4)) |
6013 | return TokError(Msg: "accum_offset exceeds total VGPR allocation" ); |
6014 | const MCExpr *AdjustedAccum = MCBinaryExpr::createSub( |
6015 | LHS: MCBinaryExpr::createDiv( |
6016 | LHS: AccumOffset, RHS: MCConstantExpr::create(Value: 4, Ctx&: getContext()), Ctx&: getContext()), |
6017 | RHS: MCConstantExpr::create(Value: 1, Ctx&: getContext()), Ctx&: getContext()); |
6018 | MCKernelDescriptor::bits_set(Dst&: KD.compute_pgm_rsrc3, Value: AdjustedAccum, |
6019 | Shift: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, |
6020 | Mask: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, |
6021 | Ctx&: getContext()); |
6022 | } |
6023 | |
6024 | if (IVersion.Major >= 10 && IVersion.Major < 12) { |
6025 | // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS |
6026 | if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { |
6027 | return TokError(Msg: "shared_vgpr_count directive not valid on " |
6028 | "wavefront size 32" ); |
6029 | } |
6030 | |
6031 | if (VGPRBlocksEvaluatable && |
6032 | (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) > |
6033 | 63)) { |
6034 | return TokError(Msg: "shared_vgpr_count*2 + " |
6035 | "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " |
6036 | "exceed 63\n" ); |
6037 | } |
6038 | } |
6039 | |
6040 | getTargetStreamer().EmitAmdhsaKernelDescriptor(STI: getSTI(), KernelName, KernelDescriptor: KD, |
6041 | NextVGPR: NextFreeVGPR, NextSGPR: NextFreeSGPR, |
6042 | ReserveVCC, ReserveFlatScr); |
6043 | return false; |
6044 | } |
6045 | |
6046 | bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() { |
6047 | uint32_t Version; |
6048 | if (ParseAsAbsoluteExpression(Ret&: Version)) |
6049 | return true; |
6050 | |
6051 | getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(COV: Version); |
6052 | return false; |
6053 | } |
6054 | |
6055 | bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, |
6056 | AMDGPUMCKernelCodeT &C) { |
6057 | // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing |
6058 | // assembly for backwards compatibility. |
6059 | if (ID == "max_scratch_backing_memory_byte_size" ) { |
6060 | Parser.eatToEndOfStatement(); |
6061 | return false; |
6062 | } |
6063 | |
6064 | SmallString<40> ErrStr; |
6065 | raw_svector_ostream Err(ErrStr); |
6066 | if (!C.ParseKernelCodeT(ID, MCParser&: getParser(), Err)) { |
6067 | return TokError(Msg: Err.str()); |
6068 | } |
6069 | Lex(); |
6070 | |
6071 | if (ID == "enable_wavefront_size32" ) { |
6072 | if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { |
6073 | if (!isGFX10Plus()) |
6074 | return TokError(Msg: "enable_wavefront_size32=1 is only allowed on GFX10+" ); |
6075 | if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) |
6076 | return TokError(Msg: "enable_wavefront_size32=1 requires +WavefrontSize32" ); |
6077 | } else { |
6078 | if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) |
6079 | return TokError(Msg: "enable_wavefront_size32=0 requires +WavefrontSize64" ); |
6080 | } |
6081 | } |
6082 | |
6083 | if (ID == "wavefront_size" ) { |
6084 | if (C.wavefront_size == 5) { |
6085 | if (!isGFX10Plus()) |
6086 | return TokError(Msg: "wavefront_size=5 is only allowed on GFX10+" ); |
6087 | if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) |
6088 | return TokError(Msg: "wavefront_size=5 requires +WavefrontSize32" ); |
6089 | } else if (C.wavefront_size == 6) { |
6090 | if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) |
6091 | return TokError(Msg: "wavefront_size=6 requires +WavefrontSize64" ); |
6092 | } |
6093 | } |
6094 | |
6095 | return false; |
6096 | } |
6097 | |
6098 | bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { |
6099 | AMDGPUMCKernelCodeT KernelCode; |
6100 | KernelCode.initDefault(STI: &getSTI(), Ctx&: getContext()); |
6101 | |
6102 | while (true) { |
6103 | // Lex EndOfStatement. This is in a while loop, because lexing a comment |
6104 | // will set the current token to EndOfStatement. |
6105 | while(trySkipToken(Kind: AsmToken::EndOfStatement)); |
6106 | |
6107 | StringRef ID; |
6108 | if (!parseId(Val&: ID, ErrMsg: "expected value identifier or .end_amd_kernel_code_t" )) |
6109 | return true; |
6110 | |
6111 | if (ID == ".end_amd_kernel_code_t" ) |
6112 | break; |
6113 | |
6114 | if (ParseAMDKernelCodeTValue(ID, C&: KernelCode)) |
6115 | return true; |
6116 | } |
6117 | |
6118 | KernelCode.validate(STI: &getSTI(), Ctx&: getContext()); |
6119 | getTargetStreamer().EmitAMDKernelCodeT(Header&: KernelCode); |
6120 | |
6121 | return false; |
6122 | } |
6123 | |
6124 | bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { |
6125 | StringRef KernelName; |
6126 | if (!parseId(Val&: KernelName, ErrMsg: "expected symbol name" )) |
6127 | return true; |
6128 | |
6129 | getTargetStreamer().EmitAMDGPUSymbolType(SymbolName: KernelName, |
6130 | Type: ELF::STT_AMDGPU_HSA_KERNEL); |
6131 | |
6132 | KernelScope.initialize(Context&: getContext()); |
6133 | return false; |
6134 | } |
6135 | |
6136 | bool AMDGPUAsmParser::ParseDirectiveISAVersion() { |
6137 | if (!getSTI().getTargetTriple().isAMDGCN()) { |
6138 | return Error(L: getLoc(), |
6139 | Msg: ".amd_amdgpu_isa directive is not available on non-amdgcn " |
6140 | "architectures" ); |
6141 | } |
6142 | |
6143 | auto TargetIDDirective = getLexer().getTok().getStringContents(); |
6144 | if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) |
6145 | return Error(L: getParser().getTok().getLoc(), Msg: "target id must match options" ); |
6146 | |
6147 | getTargetStreamer().EmitISAVersion(); |
6148 | Lex(); |
6149 | |
6150 | return false; |
6151 | } |
6152 | |
6153 | bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { |
6154 | assert(isHsaAbi(getSTI())); |
6155 | |
6156 | std::string HSAMetadataString; |
6157 | if (ParseToEndDirective(AssemblerDirectiveBegin: HSAMD::V3::AssemblerDirectiveBegin, |
6158 | AssemblerDirectiveEnd: HSAMD::V3::AssemblerDirectiveEnd, CollectString&: HSAMetadataString)) |
6159 | return true; |
6160 | |
6161 | if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) |
6162 | return Error(L: getLoc(), Msg: "invalid HSA metadata" ); |
6163 | |
6164 | return false; |
6165 | } |
6166 | |
6167 | /// Common code to parse out a block of text (typically YAML) between start and |
6168 | /// end directives. |
6169 | bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, |
6170 | const char *AssemblerDirectiveEnd, |
6171 | std::string &CollectString) { |
6172 | |
6173 | raw_string_ostream CollectStream(CollectString); |
6174 | |
6175 | getLexer().setSkipSpace(false); |
6176 | |
6177 | bool FoundEnd = false; |
6178 | while (!isToken(Kind: AsmToken::Eof)) { |
6179 | while (isToken(Kind: AsmToken::Space)) { |
6180 | CollectStream << getTokenStr(); |
6181 | Lex(); |
6182 | } |
6183 | |
6184 | if (trySkipId(Id: AssemblerDirectiveEnd)) { |
6185 | FoundEnd = true; |
6186 | break; |
6187 | } |
6188 | |
6189 | CollectStream << Parser.parseStringToEndOfStatement() |
6190 | << getContext().getAsmInfo()->getSeparatorString(); |
6191 | |
6192 | Parser.eatToEndOfStatement(); |
6193 | } |
6194 | |
6195 | getLexer().setSkipSpace(true); |
6196 | |
6197 | if (isToken(Kind: AsmToken::Eof) && !FoundEnd) { |
6198 | return TokError(Msg: Twine("expected directive " ) + |
6199 | Twine(AssemblerDirectiveEnd) + Twine(" not found" )); |
6200 | } |
6201 | |
6202 | return false; |
6203 | } |
6204 | |
6205 | /// Parse the assembler directive for new MsgPack-format PAL metadata. |
6206 | bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { |
6207 | std::string String; |
6208 | if (ParseToEndDirective(AssemblerDirectiveBegin: AMDGPU::PALMD::AssemblerDirectiveBegin, |
6209 | AssemblerDirectiveEnd: AMDGPU::PALMD::AssemblerDirectiveEnd, CollectString&: String)) |
6210 | return true; |
6211 | |
6212 | auto *PALMetadata = getTargetStreamer().getPALMetadata(); |
6213 | if (!PALMetadata->setFromString(String)) |
6214 | return Error(L: getLoc(), Msg: "invalid PAL metadata" ); |
6215 | return false; |
6216 | } |
6217 | |
6218 | /// Parse the assembler directive for old linear-format PAL metadata. |
6219 | bool AMDGPUAsmParser::ParseDirectivePALMetadata() { |
6220 | if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { |
6221 | return Error(L: getLoc(), |
6222 | Msg: (Twine(PALMD::AssemblerDirective) + Twine(" directive is " |
6223 | "not available on non-amdpal OSes" )).str()); |
6224 | } |
6225 | |
6226 | auto *PALMetadata = getTargetStreamer().getPALMetadata(); |
6227 | PALMetadata->setLegacy(); |
6228 | for (;;) { |
6229 | uint32_t Key, Value; |
6230 | if (ParseAsAbsoluteExpression(Ret&: Key)) { |
6231 | return TokError(Msg: Twine("invalid value in " ) + |
6232 | Twine(PALMD::AssemblerDirective)); |
6233 | } |
6234 | if (!trySkipToken(Kind: AsmToken::Comma)) { |
6235 | return TokError(Msg: Twine("expected an even number of values in " ) + |
6236 | Twine(PALMD::AssemblerDirective)); |
6237 | } |
6238 | if (ParseAsAbsoluteExpression(Ret&: Value)) { |
6239 | return TokError(Msg: Twine("invalid value in " ) + |
6240 | Twine(PALMD::AssemblerDirective)); |
6241 | } |
6242 | PALMetadata->setRegister(Reg: Key, Val: Value); |
6243 | if (!trySkipToken(Kind: AsmToken::Comma)) |
6244 | break; |
6245 | } |
6246 | return false; |
6247 | } |
6248 | |
6249 | /// ParseDirectiveAMDGPULDS |
6250 | /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] |
6251 | bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { |
6252 | if (getParser().checkForValidSection()) |
6253 | return true; |
6254 | |
6255 | StringRef Name; |
6256 | SMLoc NameLoc = getLoc(); |
6257 | if (getParser().parseIdentifier(Res&: Name)) |
6258 | return TokError(Msg: "expected identifier in directive" ); |
6259 | |
6260 | MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); |
6261 | if (getParser().parseComma()) |
6262 | return true; |
6263 | |
6264 | unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(STI: &getSTI()); |
6265 | |
6266 | int64_t Size; |
6267 | SMLoc SizeLoc = getLoc(); |
6268 | if (getParser().parseAbsoluteExpression(Res&: Size)) |
6269 | return true; |
6270 | if (Size < 0) |
6271 | return Error(L: SizeLoc, Msg: "size must be non-negative" ); |
6272 | if (Size > LocalMemorySize) |
6273 | return Error(L: SizeLoc, Msg: "size is too large" ); |
6274 | |
6275 | int64_t Alignment = 4; |
6276 | if (trySkipToken(Kind: AsmToken::Comma)) { |
6277 | SMLoc AlignLoc = getLoc(); |
6278 | if (getParser().parseAbsoluteExpression(Res&: Alignment)) |
6279 | return true; |
6280 | if (Alignment < 0 || !isPowerOf2_64(Value: Alignment)) |
6281 | return Error(L: AlignLoc, Msg: "alignment must be a power of two" ); |
6282 | |
6283 | // Alignment larger than the size of LDS is possible in theory, as long |
6284 | // as the linker manages to place to symbol at address 0, but we do want |
6285 | // to make sure the alignment fits nicely into a 32-bit integer. |
6286 | if (Alignment >= 1u << 31) |
6287 | return Error(L: AlignLoc, Msg: "alignment is too large" ); |
6288 | } |
6289 | |
6290 | if (parseEOL()) |
6291 | return true; |
6292 | |
6293 | Symbol->redefineIfPossible(); |
6294 | if (!Symbol->isUndefined()) |
6295 | return Error(L: NameLoc, Msg: "invalid symbol redefinition" ); |
6296 | |
6297 | getTargetStreamer().emitAMDGPULDS(Symbol, Size, Alignment: Align(Alignment)); |
6298 | return false; |
6299 | } |
6300 | |
6301 | bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { |
6302 | StringRef IDVal = DirectiveID.getString(); |
6303 | |
6304 | if (isHsaAbi(STI: getSTI())) { |
6305 | if (IDVal == ".amdhsa_kernel" ) |
6306 | return ParseDirectiveAMDHSAKernel(); |
6307 | |
6308 | if (IDVal == ".amdhsa_code_object_version" ) |
6309 | return ParseDirectiveAMDHSACodeObjectVersion(); |
6310 | |
6311 | // TODO: Restructure/combine with PAL metadata directive. |
6312 | if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) |
6313 | return ParseDirectiveHSAMetadata(); |
6314 | } else { |
6315 | if (IDVal == ".amd_kernel_code_t" ) |
6316 | return ParseDirectiveAMDKernelCodeT(); |
6317 | |
6318 | if (IDVal == ".amdgpu_hsa_kernel" ) |
6319 | return ParseDirectiveAMDGPUHsaKernel(); |
6320 | |
6321 | if (IDVal == ".amd_amdgpu_isa" ) |
6322 | return ParseDirectiveISAVersion(); |
6323 | |
6324 | if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) { |
6325 | return Error(L: getLoc(), Msg: (Twine(HSAMD::AssemblerDirectiveBegin) + |
6326 | Twine(" directive is " |
6327 | "not available on non-amdhsa OSes" )) |
6328 | .str()); |
6329 | } |
6330 | } |
6331 | |
6332 | if (IDVal == ".amdgcn_target" ) |
6333 | return ParseDirectiveAMDGCNTarget(); |
6334 | |
6335 | if (IDVal == ".amdgpu_lds" ) |
6336 | return ParseDirectiveAMDGPULDS(); |
6337 | |
6338 | if (IDVal == PALMD::AssemblerDirectiveBegin) |
6339 | return ParseDirectivePALMetadataBegin(); |
6340 | |
6341 | if (IDVal == PALMD::AssemblerDirective) |
6342 | return ParseDirectivePALMetadata(); |
6343 | |
6344 | return true; |
6345 | } |
6346 | |
6347 | bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, |
6348 | MCRegister Reg) { |
6349 | if (MRI.regsOverlap(RegA: TTMP12_TTMP13_TTMP14_TTMP15, RegB: Reg)) |
6350 | return isGFX9Plus(); |
6351 | |
6352 | // GFX10+ has 2 more SGPRs 104 and 105. |
6353 | if (MRI.regsOverlap(RegA: SGPR104_SGPR105, RegB: Reg)) |
6354 | return hasSGPR104_SGPR105(); |
6355 | |
6356 | switch (Reg.id()) { |
6357 | case SRC_SHARED_BASE_LO: |
6358 | case SRC_SHARED_BASE: |
6359 | case SRC_SHARED_LIMIT_LO: |
6360 | case SRC_SHARED_LIMIT: |
6361 | case SRC_PRIVATE_BASE_LO: |
6362 | case SRC_PRIVATE_BASE: |
6363 | case SRC_PRIVATE_LIMIT_LO: |
6364 | case SRC_PRIVATE_LIMIT: |
6365 | return isGFX9Plus(); |
6366 | case SRC_POPS_EXITING_WAVE_ID: |
6367 | return isGFX9Plus() && !isGFX11Plus(); |
6368 | case TBA: |
6369 | case TBA_LO: |
6370 | case TBA_HI: |
6371 | case TMA: |
6372 | case TMA_LO: |
6373 | case TMA_HI: |
6374 | return !isGFX9Plus(); |
6375 | case XNACK_MASK: |
6376 | case XNACK_MASK_LO: |
6377 | case XNACK_MASK_HI: |
6378 | return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); |
6379 | case SGPR_NULL: |
6380 | return isGFX10Plus(); |
6381 | case SRC_EXECZ: |
6382 | case SRC_VCCZ: |
6383 | return !isGFX11Plus(); |
6384 | default: |
6385 | break; |
6386 | } |
6387 | |
6388 | if (isCI()) |
6389 | return true; |
6390 | |
6391 | if (isSI() || isGFX10Plus()) { |
6392 | // No flat_scr on SI. |
6393 | // On GFX10Plus flat scratch is not a valid register operand and can only be |
6394 | // accessed with s_setreg/s_getreg. |
6395 | switch (Reg.id()) { |
6396 | case FLAT_SCR: |
6397 | case FLAT_SCR_LO: |
6398 | case FLAT_SCR_HI: |
6399 | return false; |
6400 | default: |
6401 | return true; |
6402 | } |
6403 | } |
6404 | |
6405 | // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that |
6406 | // SI/CI have. |
6407 | if (MRI.regsOverlap(RegA: SGPR102_SGPR103, RegB: Reg)) |
6408 | return hasSGPR102_SGPR103(); |
6409 | |
6410 | return true; |
6411 | } |
6412 | |
6413 | ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands, |
6414 | StringRef Mnemonic, |
6415 | OperandMode Mode) { |
6416 | ParseStatus Res = parseVOPD(Operands); |
6417 | if (Res.isSuccess() || Res.isFailure() || isToken(Kind: AsmToken::EndOfStatement)) |
6418 | return Res; |
6419 | |
6420 | // Try to parse with a custom parser |
6421 | Res = MatchOperandParserImpl(Operands, Mnemonic); |
6422 | |
6423 | // If we successfully parsed the operand or if there as an error parsing, |
6424 | // we are done. |
6425 | // |
6426 | // If we are parsing after we reach EndOfStatement then this means we |
6427 | // are appending default values to the Operands list. This is only done |
6428 | // by custom parser, so we shouldn't continue on to the generic parsing. |
6429 | if (Res.isSuccess() || Res.isFailure() || isToken(Kind: AsmToken::EndOfStatement)) |
6430 | return Res; |
6431 | |
6432 | SMLoc RBraceLoc; |
6433 | SMLoc LBraceLoc = getLoc(); |
6434 | if (Mode == OperandMode_NSA && trySkipToken(Kind: AsmToken::LBrac)) { |
6435 | unsigned Prefix = Operands.size(); |
6436 | |
6437 | for (;;) { |
6438 | auto Loc = getLoc(); |
6439 | Res = parseReg(Operands); |
6440 | if (Res.isNoMatch()) |
6441 | Error(L: Loc, Msg: "expected a register" ); |
6442 | if (!Res.isSuccess()) |
6443 | return ParseStatus::Failure; |
6444 | |
6445 | RBraceLoc = getLoc(); |
6446 | if (trySkipToken(Kind: AsmToken::RBrac)) |
6447 | break; |
6448 | |
6449 | if (!skipToken(Kind: AsmToken::Comma, |
6450 | ErrMsg: "expected a comma or a closing square bracket" )) |
6451 | return ParseStatus::Failure; |
6452 | } |
6453 | |
6454 | if (Operands.size() - Prefix > 1) { |
6455 | Operands.insert(I: Operands.begin() + Prefix, |
6456 | Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "[" , Loc: LBraceLoc)); |
6457 | Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "]" , Loc: RBraceLoc)); |
6458 | } |
6459 | |
6460 | return ParseStatus::Success; |
6461 | } |
6462 | |
6463 | return parseRegOrImm(Operands); |
6464 | } |
6465 | |
6466 | StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { |
6467 | // Clear any forced encodings from the previous instruction. |
6468 | setForcedEncodingSize(0); |
6469 | setForcedDPP(false); |
6470 | setForcedSDWA(false); |
6471 | |
6472 | if (Name.consume_back(Suffix: "_e64_dpp" )) { |
6473 | setForcedDPP(true); |
6474 | setForcedEncodingSize(64); |
6475 | return Name; |
6476 | } |
6477 | if (Name.consume_back(Suffix: "_e64" )) { |
6478 | setForcedEncodingSize(64); |
6479 | return Name; |
6480 | } |
6481 | if (Name.consume_back(Suffix: "_e32" )) { |
6482 | setForcedEncodingSize(32); |
6483 | return Name; |
6484 | } |
6485 | if (Name.consume_back(Suffix: "_dpp" )) { |
6486 | setForcedDPP(true); |
6487 | return Name; |
6488 | } |
6489 | if (Name.consume_back(Suffix: "_sdwa" )) { |
6490 | setForcedSDWA(true); |
6491 | return Name; |
6492 | } |
6493 | return Name; |
6494 | } |
6495 | |
6496 | static void applyMnemonicAliases(StringRef &Mnemonic, |
6497 | const FeatureBitset &Features, |
6498 | unsigned VariantID); |
6499 | |
6500 | bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info, |
6501 | StringRef Name, SMLoc NameLoc, |
6502 | OperandVector &Operands) { |
6503 | // Add the instruction mnemonic |
6504 | Name = parseMnemonicSuffix(Name); |
6505 | |
6506 | // If the target architecture uses MnemonicAlias, call it here to parse |
6507 | // operands correctly. |
6508 | applyMnemonicAliases(Mnemonic&: Name, Features: getAvailableFeatures(), VariantID: 0); |
6509 | |
6510 | Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: NameLoc)); |
6511 | |
6512 | bool IsMIMG = Name.starts_with(Prefix: "image_" ); |
6513 | |
6514 | while (!trySkipToken(Kind: AsmToken::EndOfStatement)) { |
6515 | OperandMode Mode = OperandMode_Default; |
6516 | if (IsMIMG && isGFX10Plus() && Operands.size() == 2) |
6517 | Mode = OperandMode_NSA; |
6518 | ParseStatus Res = parseOperand(Operands, Mnemonic: Name, Mode); |
6519 | |
6520 | if (!Res.isSuccess()) { |
6521 | checkUnsupportedInstruction(Mnemo: Name, IDLoc: NameLoc); |
6522 | if (!Parser.hasPendingError()) { |
6523 | // FIXME: use real operand location rather than the current location. |
6524 | StringRef Msg = Res.isFailure() ? "failed parsing operand." |
6525 | : "not a valid operand." ; |
6526 | Error(L: getLoc(), Msg); |
6527 | } |
6528 | while (!trySkipToken(Kind: AsmToken::EndOfStatement)) { |
6529 | lex(); |
6530 | } |
6531 | return true; |
6532 | } |
6533 | |
6534 | // Eat the comma or space if there is one. |
6535 | trySkipToken(Kind: AsmToken::Comma); |
6536 | } |
6537 | |
6538 | return false; |
6539 | } |
6540 | |
6541 | //===----------------------------------------------------------------------===// |
6542 | // Utility functions |
6543 | //===----------------------------------------------------------------------===// |
6544 | |
6545 | ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name, |
6546 | OperandVector &Operands) { |
6547 | SMLoc S = getLoc(); |
6548 | if (!trySkipId(Id: Name)) |
6549 | return ParseStatus::NoMatch; |
6550 | |
6551 | Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: S)); |
6552 | return ParseStatus::Success; |
6553 | } |
6554 | |
6555 | ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, |
6556 | int64_t &IntVal) { |
6557 | |
6558 | if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon)) |
6559 | return ParseStatus::NoMatch; |
6560 | |
6561 | return parseExpr(Imm&: IntVal) ? ParseStatus::Success : ParseStatus::Failure; |
6562 | } |
6563 | |
6564 | ParseStatus AMDGPUAsmParser::parseIntWithPrefix( |
6565 | const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, |
6566 | std::function<bool(int64_t &)> ConvertResult) { |
6567 | SMLoc S = getLoc(); |
6568 | int64_t Value = 0; |
6569 | |
6570 | ParseStatus Res = parseIntWithPrefix(Prefix, IntVal&: Value); |
6571 | if (!Res.isSuccess()) |
6572 | return Res; |
6573 | |
6574 | if (ConvertResult && !ConvertResult(Value)) { |
6575 | Error(L: S, Msg: "invalid " + StringRef(Prefix) + " value." ); |
6576 | } |
6577 | |
6578 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Value, Loc: S, Type: ImmTy)); |
6579 | return ParseStatus::Success; |
6580 | } |
6581 | |
6582 | ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix( |
6583 | const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, |
6584 | bool (*ConvertResult)(int64_t &)) { |
6585 | SMLoc S = getLoc(); |
6586 | if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon)) |
6587 | return ParseStatus::NoMatch; |
6588 | |
6589 | if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected a left square bracket" )) |
6590 | return ParseStatus::Failure; |
6591 | |
6592 | unsigned Val = 0; |
6593 | const unsigned MaxSize = 4; |
6594 | |
6595 | // FIXME: How to verify the number of elements matches the number of src |
6596 | // operands? |
6597 | for (int I = 0; ; ++I) { |
6598 | int64_t Op; |
6599 | SMLoc Loc = getLoc(); |
6600 | if (!parseExpr(Imm&: Op)) |
6601 | return ParseStatus::Failure; |
6602 | |
6603 | if (Op != 0 && Op != 1) |
6604 | return Error(L: Loc, Msg: "invalid " + StringRef(Prefix) + " value." ); |
6605 | |
6606 | Val |= (Op << I); |
6607 | |
6608 | if (trySkipToken(Kind: AsmToken::RBrac)) |
6609 | break; |
6610 | |
6611 | if (I + 1 == MaxSize) |
6612 | return Error(L: getLoc(), Msg: "expected a closing square bracket" ); |
6613 | |
6614 | if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma" )) |
6615 | return ParseStatus::Failure; |
6616 | } |
6617 | |
6618 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: ImmTy)); |
6619 | return ParseStatus::Success; |
6620 | } |
6621 | |
6622 | ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name, |
6623 | OperandVector &Operands, |
6624 | AMDGPUOperand::ImmTy ImmTy) { |
6625 | int64_t Bit; |
6626 | SMLoc S = getLoc(); |
6627 | |
6628 | if (trySkipId(Id: Name)) { |
6629 | Bit = 1; |
6630 | } else if (trySkipId(Pref: "no" , Id: Name)) { |
6631 | Bit = 0; |
6632 | } else { |
6633 | return ParseStatus::NoMatch; |
6634 | } |
6635 | |
6636 | if (Name == "r128" && !hasMIMG_R128()) |
6637 | return Error(L: S, Msg: "r128 modifier is not supported on this GPU" ); |
6638 | if (Name == "a16" && !hasA16()) |
6639 | return Error(L: S, Msg: "a16 modifier is not supported on this GPU" ); |
6640 | |
6641 | if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) |
6642 | ImmTy = AMDGPUOperand::ImmTyR128A16; |
6643 | |
6644 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Bit, Loc: S, Type: ImmTy)); |
6645 | return ParseStatus::Success; |
6646 | } |
6647 | |
6648 | unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, |
6649 | bool &Disabling) const { |
6650 | Disabling = Id.consume_front(Prefix: "no" ); |
6651 | |
6652 | if (isGFX940() && !Mnemo.starts_with(Prefix: "s_" )) { |
6653 | return StringSwitch<unsigned>(Id) |
6654 | .Case(S: "nt" , Value: AMDGPU::CPol::NT) |
6655 | .Case(S: "sc0" , Value: AMDGPU::CPol::SC0) |
6656 | .Case(S: "sc1" , Value: AMDGPU::CPol::SC1) |
6657 | .Default(Value: 0); |
6658 | } |
6659 | |
6660 | return StringSwitch<unsigned>(Id) |
6661 | .Case(S: "dlc" , Value: AMDGPU::CPol::DLC) |
6662 | .Case(S: "glc" , Value: AMDGPU::CPol::GLC) |
6663 | .Case(S: "scc" , Value: AMDGPU::CPol::SCC) |
6664 | .Case(S: "slc" , Value: AMDGPU::CPol::SLC) |
6665 | .Default(Value: 0); |
6666 | } |
6667 | |
6668 | ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { |
6669 | if (isGFX12Plus()) { |
6670 | SMLoc StringLoc = getLoc(); |
6671 | |
6672 | int64_t CPolVal = 0; |
6673 | ParseStatus ResTH = ParseStatus::NoMatch; |
6674 | ParseStatus ResScope = ParseStatus::NoMatch; |
6675 | |
6676 | for (;;) { |
6677 | if (ResTH.isNoMatch()) { |
6678 | int64_t TH; |
6679 | ResTH = parseTH(Operands, TH); |
6680 | if (ResTH.isFailure()) |
6681 | return ResTH; |
6682 | if (ResTH.isSuccess()) { |
6683 | CPolVal |= TH; |
6684 | continue; |
6685 | } |
6686 | } |
6687 | |
6688 | if (ResScope.isNoMatch()) { |
6689 | int64_t Scope; |
6690 | ResScope = parseScope(Operands, Scope); |
6691 | if (ResScope.isFailure()) |
6692 | return ResScope; |
6693 | if (ResScope.isSuccess()) { |
6694 | CPolVal |= Scope; |
6695 | continue; |
6696 | } |
6697 | } |
6698 | |
6699 | break; |
6700 | } |
6701 | |
6702 | if (ResTH.isNoMatch() && ResScope.isNoMatch()) |
6703 | return ParseStatus::NoMatch; |
6704 | |
6705 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: CPolVal, Loc: StringLoc, |
6706 | Type: AMDGPUOperand::ImmTyCPol)); |
6707 | return ParseStatus::Success; |
6708 | } |
6709 | |
6710 | StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); |
6711 | SMLoc OpLoc = getLoc(); |
6712 | unsigned Enabled = 0, Seen = 0; |
6713 | for (;;) { |
6714 | SMLoc S = getLoc(); |
6715 | bool Disabling; |
6716 | unsigned CPol = getCPolKind(Id: getId(), Mnemo, Disabling); |
6717 | if (!CPol) |
6718 | break; |
6719 | |
6720 | lex(); |
6721 | |
6722 | if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) |
6723 | return Error(L: S, Msg: "dlc modifier is not supported on this GPU" ); |
6724 | |
6725 | if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) |
6726 | return Error(L: S, Msg: "scc modifier is not supported on this GPU" ); |
6727 | |
6728 | if (Seen & CPol) |
6729 | return Error(L: S, Msg: "duplicate cache policy modifier" ); |
6730 | |
6731 | if (!Disabling) |
6732 | Enabled |= CPol; |
6733 | |
6734 | Seen |= CPol; |
6735 | } |
6736 | |
6737 | if (!Seen) |
6738 | return ParseStatus::NoMatch; |
6739 | |
6740 | Operands.push_back( |
6741 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Enabled, Loc: OpLoc, Type: AMDGPUOperand::ImmTyCPol)); |
6742 | return ParseStatus::Success; |
6743 | } |
6744 | |
6745 | ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands, |
6746 | int64_t &Scope) { |
6747 | static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE, |
6748 | CPol::SCOPE_DEV, CPol::SCOPE_SYS}; |
6749 | |
6750 | ParseStatus Res = parseStringOrIntWithPrefix( |
6751 | Operands, Name: "scope" , Ids: {"SCOPE_CU" , "SCOPE_SE" , "SCOPE_DEV" , "SCOPE_SYS" }, |
6752 | IntVal&: Scope); |
6753 | |
6754 | if (Res.isSuccess()) |
6755 | Scope = Scopes[Scope]; |
6756 | |
6757 | return Res; |
6758 | } |
6759 | |
6760 | ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) { |
6761 | TH = AMDGPU::CPol::TH_RT; // default |
6762 | |
6763 | StringRef Value; |
6764 | SMLoc StringLoc; |
6765 | ParseStatus Res = parseStringWithPrefix(Prefix: "th" , Value, StringLoc); |
6766 | if (!Res.isSuccess()) |
6767 | return Res; |
6768 | |
6769 | if (Value == "TH_DEFAULT" ) |
6770 | TH = AMDGPU::CPol::TH_RT; |
6771 | else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" || |
6772 | Value == "TH_LOAD_NT_WB" ) { |
6773 | return Error(L: StringLoc, Msg: "invalid th value" ); |
6774 | } else if (Value.consume_front(Prefix: "TH_ATOMIC_" )) { |
6775 | TH = AMDGPU::CPol::TH_TYPE_ATOMIC; |
6776 | } else if (Value.consume_front(Prefix: "TH_LOAD_" )) { |
6777 | TH = AMDGPU::CPol::TH_TYPE_LOAD; |
6778 | } else if (Value.consume_front(Prefix: "TH_STORE_" )) { |
6779 | TH = AMDGPU::CPol::TH_TYPE_STORE; |
6780 | } else { |
6781 | return Error(L: StringLoc, Msg: "invalid th value" ); |
6782 | } |
6783 | |
6784 | if (Value == "BYPASS" ) |
6785 | TH |= AMDGPU::CPol::TH_REAL_BYPASS; |
6786 | |
6787 | if (TH != 0) { |
6788 | if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC) |
6789 | TH |= StringSwitch<int64_t>(Value) |
6790 | .Case(S: "RETURN" , Value: AMDGPU::CPol::TH_ATOMIC_RETURN) |
6791 | .Case(S: "RT" , Value: AMDGPU::CPol::TH_RT) |
6792 | .Case(S: "RT_RETURN" , Value: AMDGPU::CPol::TH_ATOMIC_RETURN) |
6793 | .Case(S: "NT" , Value: AMDGPU::CPol::TH_ATOMIC_NT) |
6794 | .Case(S: "NT_RETURN" , Value: AMDGPU::CPol::TH_ATOMIC_NT | |
6795 | AMDGPU::CPol::TH_ATOMIC_RETURN) |
6796 | .Case(S: "CASCADE_RT" , Value: AMDGPU::CPol::TH_ATOMIC_CASCADE) |
6797 | .Case(S: "CASCADE_NT" , Value: AMDGPU::CPol::TH_ATOMIC_CASCADE | |
6798 | AMDGPU::CPol::TH_ATOMIC_NT) |
6799 | .Default(Value: 0xffffffff); |
6800 | else |
6801 | TH |= StringSwitch<int64_t>(Value) |
6802 | .Case(S: "RT" , Value: AMDGPU::CPol::TH_RT) |
6803 | .Case(S: "NT" , Value: AMDGPU::CPol::TH_NT) |
6804 | .Case(S: "HT" , Value: AMDGPU::CPol::TH_HT) |
6805 | .Case(S: "LU" , Value: AMDGPU::CPol::TH_LU) |
6806 | .Case(S: "WB" , Value: AMDGPU::CPol::TH_WB) |
6807 | .Case(S: "NT_RT" , Value: AMDGPU::CPol::TH_NT_RT) |
6808 | .Case(S: "RT_NT" , Value: AMDGPU::CPol::TH_RT_NT) |
6809 | .Case(S: "NT_HT" , Value: AMDGPU::CPol::TH_NT_HT) |
6810 | .Case(S: "NT_WB" , Value: AMDGPU::CPol::TH_NT_WB) |
6811 | .Case(S: "BYPASS" , Value: AMDGPU::CPol::TH_BYPASS) |
6812 | .Default(Value: 0xffffffff); |
6813 | } |
6814 | |
6815 | if (TH == 0xffffffff) |
6816 | return Error(L: StringLoc, Msg: "invalid th value" ); |
6817 | |
6818 | return ParseStatus::Success; |
6819 | } |
6820 | |
6821 | static void |
6822 | addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, |
6823 | AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, |
6824 | AMDGPUOperand::ImmTy ImmT, int64_t Default = 0, |
6825 | std::optional<unsigned> InsertAt = std::nullopt) { |
6826 | auto i = OptionalIdx.find(x: ImmT); |
6827 | if (i != OptionalIdx.end()) { |
6828 | unsigned Idx = i->second; |
6829 | const AMDGPUOperand &Op = |
6830 | static_cast<const AMDGPUOperand &>(*Operands[Idx]); |
6831 | if (InsertAt) |
6832 | Inst.insert(I: Inst.begin() + *InsertAt, Op: MCOperand::createImm(Val: Op.getImm())); |
6833 | else |
6834 | Op.addImmOperands(Inst, N: 1); |
6835 | } else { |
6836 | if (InsertAt.has_value()) |
6837 | Inst.insert(I: Inst.begin() + *InsertAt, Op: MCOperand::createImm(Val: Default)); |
6838 | else |
6839 | Inst.addOperand(Op: MCOperand::createImm(Val: Default)); |
6840 | } |
6841 | } |
6842 | |
6843 | ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, |
6844 | StringRef &Value, |
6845 | SMLoc &StringLoc) { |
6846 | if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon)) |
6847 | return ParseStatus::NoMatch; |
6848 | |
6849 | StringLoc = getLoc(); |
6850 | return parseId(Val&: Value, ErrMsg: "expected an identifier" ) ? ParseStatus::Success |
6851 | : ParseStatus::Failure; |
6852 | } |
6853 | |
6854 | ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix( |
6855 | OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids, |
6856 | int64_t &IntVal) { |
6857 | if (!trySkipId(Id: Name, Kind: AsmToken::Colon)) |
6858 | return ParseStatus::NoMatch; |
6859 | |
6860 | SMLoc StringLoc = getLoc(); |
6861 | |
6862 | StringRef Value; |
6863 | if (isToken(Kind: AsmToken::Identifier)) { |
6864 | Value = getTokenStr(); |
6865 | lex(); |
6866 | |
6867 | for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal) |
6868 | if (Value == Ids[IntVal]) |
6869 | break; |
6870 | } else if (!parseExpr(Imm&: IntVal)) |
6871 | return ParseStatus::Failure; |
6872 | |
6873 | if (IntVal < 0 || IntVal >= (int64_t)Ids.size()) |
6874 | return Error(L: StringLoc, Msg: "invalid " + Twine(Name) + " value" ); |
6875 | |
6876 | return ParseStatus::Success; |
6877 | } |
6878 | |
6879 | ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix( |
6880 | OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids, |
6881 | AMDGPUOperand::ImmTy Type) { |
6882 | SMLoc S = getLoc(); |
6883 | int64_t IntVal; |
6884 | |
6885 | ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal); |
6886 | if (Res.isSuccess()) |
6887 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S, Type)); |
6888 | |
6889 | return Res; |
6890 | } |
6891 | |
6892 | //===----------------------------------------------------------------------===// |
6893 | // MTBUF format |
6894 | //===----------------------------------------------------------------------===// |
6895 | |
6896 | bool AMDGPUAsmParser::tryParseFmt(const char *Pref, |
6897 | int64_t MaxVal, |
6898 | int64_t &Fmt) { |
6899 | int64_t Val; |
6900 | SMLoc Loc = getLoc(); |
6901 | |
6902 | auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: Val); |
6903 | if (Res.isFailure()) |
6904 | return false; |
6905 | if (Res.isNoMatch()) |
6906 | return true; |
6907 | |
6908 | if (Val < 0 || Val > MaxVal) { |
6909 | Error(L: Loc, Msg: Twine("out of range " , StringRef(Pref))); |
6910 | return false; |
6911 | } |
6912 | |
6913 | Fmt = Val; |
6914 | return true; |
6915 | } |
6916 | |
6917 | ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands, |
6918 | AMDGPUOperand::ImmTy ImmTy) { |
6919 | const char *Pref = "index_key" ; |
6920 | int64_t ImmVal = 0; |
6921 | SMLoc Loc = getLoc(); |
6922 | auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: ImmVal); |
6923 | if (!Res.isSuccess()) |
6924 | return Res; |
6925 | |
6926 | if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1)) |
6927 | return Error(L: Loc, Msg: Twine("out of range " , StringRef(Pref))); |
6928 | |
6929 | if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3)) |
6930 | return Error(L: Loc, Msg: Twine("out of range " , StringRef(Pref))); |
6931 | |
6932 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: ImmTy)); |
6933 | return ParseStatus::Success; |
6934 | } |
6935 | |
6936 | ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) { |
6937 | return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey8bit); |
6938 | } |
6939 | |
6940 | ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) { |
6941 | return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey16bit); |
6942 | } |
6943 | |
6944 | // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their |
6945 | // values to live in a joint format operand in the MCInst encoding. |
6946 | ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { |
6947 | using namespace llvm::AMDGPU::MTBUFFormat; |
6948 | |
6949 | int64_t Dfmt = DFMT_UNDEF; |
6950 | int64_t Nfmt = NFMT_UNDEF; |
6951 | |
6952 | // dfmt and nfmt can appear in either order, and each is optional. |
6953 | for (int I = 0; I < 2; ++I) { |
6954 | if (Dfmt == DFMT_UNDEF && !tryParseFmt(Pref: "dfmt" , MaxVal: DFMT_MAX, Fmt&: Dfmt)) |
6955 | return ParseStatus::Failure; |
6956 | |
6957 | if (Nfmt == NFMT_UNDEF && !tryParseFmt(Pref: "nfmt" , MaxVal: NFMT_MAX, Fmt&: Nfmt)) |
6958 | return ParseStatus::Failure; |
6959 | |
6960 | // Skip optional comma between dfmt/nfmt |
6961 | // but guard against 2 commas following each other. |
6962 | if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && |
6963 | !peekToken().is(K: AsmToken::Comma)) { |
6964 | trySkipToken(Kind: AsmToken::Comma); |
6965 | } |
6966 | } |
6967 | |
6968 | if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) |
6969 | return ParseStatus::NoMatch; |
6970 | |
6971 | Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; |
6972 | Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; |
6973 | |
6974 | Format = encodeDfmtNfmt(Dfmt, Nfmt); |
6975 | return ParseStatus::Success; |
6976 | } |
6977 | |
6978 | ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) { |
6979 | using namespace llvm::AMDGPU::MTBUFFormat; |
6980 | |
6981 | int64_t Fmt = UFMT_UNDEF; |
6982 | |
6983 | if (!tryParseFmt(Pref: "format" , MaxVal: UFMT_MAX, Fmt)) |
6984 | return ParseStatus::Failure; |
6985 | |
6986 | if (Fmt == UFMT_UNDEF) |
6987 | return ParseStatus::NoMatch; |
6988 | |
6989 | Format = Fmt; |
6990 | return ParseStatus::Success; |
6991 | } |
6992 | |
6993 | bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, |
6994 | int64_t &Nfmt, |
6995 | StringRef FormatStr, |
6996 | SMLoc Loc) { |
6997 | using namespace llvm::AMDGPU::MTBUFFormat; |
6998 | int64_t Format; |
6999 | |
7000 | Format = getDfmt(Name: FormatStr); |
7001 | if (Format != DFMT_UNDEF) { |
7002 | Dfmt = Format; |
7003 | return true; |
7004 | } |
7005 | |
7006 | Format = getNfmt(Name: FormatStr, STI: getSTI()); |
7007 | if (Format != NFMT_UNDEF) { |
7008 | Nfmt = Format; |
7009 | return true; |
7010 | } |
7011 | |
7012 | Error(L: Loc, Msg: "unsupported format" ); |
7013 | return false; |
7014 | } |
7015 | |
7016 | ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, |
7017 | SMLoc FormatLoc, |
7018 | int64_t &Format) { |
7019 | using namespace llvm::AMDGPU::MTBUFFormat; |
7020 | |
7021 | int64_t Dfmt = DFMT_UNDEF; |
7022 | int64_t Nfmt = NFMT_UNDEF; |
7023 | if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, Loc: FormatLoc)) |
7024 | return ParseStatus::Failure; |
7025 | |
7026 | if (trySkipToken(Kind: AsmToken::Comma)) { |
7027 | StringRef Str; |
7028 | SMLoc Loc = getLoc(); |
7029 | if (!parseId(Val&: Str, ErrMsg: "expected a format string" ) || |
7030 | !matchDfmtNfmt(Dfmt, Nfmt, FormatStr: Str, Loc)) |
7031 | return ParseStatus::Failure; |
7032 | if (Dfmt == DFMT_UNDEF) |
7033 | return Error(L: Loc, Msg: "duplicate numeric format" ); |
7034 | if (Nfmt == NFMT_UNDEF) |
7035 | return Error(L: Loc, Msg: "duplicate data format" ); |
7036 | } |
7037 | |
7038 | Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; |
7039 | Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; |
7040 | |
7041 | if (isGFX10Plus()) { |
7042 | auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, STI: getSTI()); |
7043 | if (Ufmt == UFMT_UNDEF) |
7044 | return Error(L: FormatLoc, Msg: "unsupported format" ); |
7045 | Format = Ufmt; |
7046 | } else { |
7047 | Format = encodeDfmtNfmt(Dfmt, Nfmt); |
7048 | } |
7049 | |
7050 | return ParseStatus::Success; |
7051 | } |
7052 | |
7053 | ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, |
7054 | SMLoc Loc, |
7055 | int64_t &Format) { |
7056 | using namespace llvm::AMDGPU::MTBUFFormat; |
7057 | |
7058 | auto Id = getUnifiedFormat(Name: FormatStr, STI: getSTI()); |
7059 | if (Id == UFMT_UNDEF) |
7060 | return ParseStatus::NoMatch; |
7061 | |
7062 | if (!isGFX10Plus()) |
7063 | return Error(L: Loc, Msg: "unified format is not supported on this GPU" ); |
7064 | |
7065 | Format = Id; |
7066 | return ParseStatus::Success; |
7067 | } |
7068 | |
7069 | ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { |
7070 | using namespace llvm::AMDGPU::MTBUFFormat; |
7071 | SMLoc Loc = getLoc(); |
7072 | |
7073 | if (!parseExpr(Imm&: Format)) |
7074 | return ParseStatus::Failure; |
7075 | if (!isValidFormatEncoding(Val: Format, STI: getSTI())) |
7076 | return Error(L: Loc, Msg: "out of range format" ); |
7077 | |
7078 | return ParseStatus::Success; |
7079 | } |
7080 | |
7081 | ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { |
7082 | using namespace llvm::AMDGPU::MTBUFFormat; |
7083 | |
7084 | if (!trySkipId(Id: "format" , Kind: AsmToken::Colon)) |
7085 | return ParseStatus::NoMatch; |
7086 | |
7087 | if (trySkipToken(Kind: AsmToken::LBrac)) { |
7088 | StringRef FormatStr; |
7089 | SMLoc Loc = getLoc(); |
7090 | if (!parseId(Val&: FormatStr, ErrMsg: "expected a format string" )) |
7091 | return ParseStatus::Failure; |
7092 | |
7093 | auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); |
7094 | if (Res.isNoMatch()) |
7095 | Res = parseSymbolicSplitFormat(FormatStr, FormatLoc: Loc, Format); |
7096 | if (!Res.isSuccess()) |
7097 | return Res; |
7098 | |
7099 | if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket" )) |
7100 | return ParseStatus::Failure; |
7101 | |
7102 | return ParseStatus::Success; |
7103 | } |
7104 | |
7105 | return parseNumericFormat(Format); |
7106 | } |
7107 | |
7108 | ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { |
7109 | using namespace llvm::AMDGPU::MTBUFFormat; |
7110 | |
7111 | int64_t Format = getDefaultFormatEncoding(STI: getSTI()); |
7112 | ParseStatus Res; |
7113 | SMLoc Loc = getLoc(); |
7114 | |
7115 | // Parse legacy format syntax. |
7116 | Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); |
7117 | if (Res.isFailure()) |
7118 | return Res; |
7119 | |
7120 | bool FormatFound = Res.isSuccess(); |
7121 | |
7122 | Operands.push_back( |
7123 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Format, Loc, Type: AMDGPUOperand::ImmTyFORMAT)); |
7124 | |
7125 | if (FormatFound) |
7126 | trySkipToken(Kind: AsmToken::Comma); |
7127 | |
7128 | if (isToken(Kind: AsmToken::EndOfStatement)) { |
7129 | // We are expecting an soffset operand, |
7130 | // but let matcher handle the error. |
7131 | return ParseStatus::Success; |
7132 | } |
7133 | |
7134 | // Parse soffset. |
7135 | Res = parseRegOrImm(Operands); |
7136 | if (!Res.isSuccess()) |
7137 | return Res; |
7138 | |
7139 | trySkipToken(Kind: AsmToken::Comma); |
7140 | |
7141 | if (!FormatFound) { |
7142 | Res = parseSymbolicOrNumericFormat(Format); |
7143 | if (Res.isFailure()) |
7144 | return Res; |
7145 | if (Res.isSuccess()) { |
7146 | auto Size = Operands.size(); |
7147 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); |
7148 | assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); |
7149 | Op.setImm(Format); |
7150 | } |
7151 | return ParseStatus::Success; |
7152 | } |
7153 | |
7154 | if (isId(Id: "format" ) && peekToken().is(K: AsmToken::Colon)) |
7155 | return Error(L: getLoc(), Msg: "duplicate format" ); |
7156 | return ParseStatus::Success; |
7157 | } |
7158 | |
7159 | ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) { |
7160 | ParseStatus Res = |
7161 | parseIntWithPrefix(Prefix: "offset" , Operands, ImmTy: AMDGPUOperand::ImmTyOffset); |
7162 | if (Res.isNoMatch()) { |
7163 | Res = parseIntWithPrefix(Prefix: "inst_offset" , Operands, |
7164 | ImmTy: AMDGPUOperand::ImmTyInstOffset); |
7165 | } |
7166 | return Res; |
7167 | } |
7168 | |
7169 | ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) { |
7170 | ParseStatus Res = |
7171 | parseNamedBit(Name: "r128" , Operands, ImmTy: AMDGPUOperand::ImmTyR128A16); |
7172 | if (Res.isNoMatch()) |
7173 | Res = parseNamedBit(Name: "a16" , Operands, ImmTy: AMDGPUOperand::ImmTyA16); |
7174 | return Res; |
7175 | } |
7176 | |
7177 | ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) { |
7178 | ParseStatus Res = |
7179 | parseIntWithPrefix(Prefix: "blgp" , Operands, ImmTy: AMDGPUOperand::ImmTyBLGP); |
7180 | if (Res.isNoMatch()) { |
7181 | Res = |
7182 | parseOperandArrayWithPrefix(Prefix: "neg" , Operands, ImmTy: AMDGPUOperand::ImmTyBLGP); |
7183 | } |
7184 | return Res; |
7185 | } |
7186 | |
7187 | //===----------------------------------------------------------------------===// |
7188 | // Exp |
7189 | //===----------------------------------------------------------------------===// |
7190 | |
7191 | void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { |
7192 | OptionalImmIndexMap OptionalIdx; |
7193 | |
7194 | unsigned OperandIdx[4]; |
7195 | unsigned EnMask = 0; |
7196 | int SrcIdx = 0; |
7197 | |
7198 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { |
7199 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
7200 | |
7201 | // Add the register arguments |
7202 | if (Op.isReg()) { |
7203 | assert(SrcIdx < 4); |
7204 | OperandIdx[SrcIdx] = Inst.size(); |
7205 | Op.addRegOperands(Inst, N: 1); |
7206 | ++SrcIdx; |
7207 | continue; |
7208 | } |
7209 | |
7210 | if (Op.isOff()) { |
7211 | assert(SrcIdx < 4); |
7212 | OperandIdx[SrcIdx] = Inst.size(); |
7213 | Inst.addOperand(Op: MCOperand::createReg(Reg: MCRegister())); |
7214 | ++SrcIdx; |
7215 | continue; |
7216 | } |
7217 | |
7218 | if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { |
7219 | Op.addImmOperands(Inst, N: 1); |
7220 | continue; |
7221 | } |
7222 | |
7223 | if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en" )) |
7224 | continue; |
7225 | |
7226 | // Handle optional arguments |
7227 | OptionalIdx[Op.getImmTy()] = i; |
7228 | } |
7229 | |
7230 | assert(SrcIdx == 4); |
7231 | |
7232 | bool Compr = false; |
7233 | if (OptionalIdx.find(x: AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { |
7234 | Compr = true; |
7235 | Inst.getOperand(i: OperandIdx[1]) = Inst.getOperand(i: OperandIdx[2]); |
7236 | Inst.getOperand(i: OperandIdx[2]).setReg(MCRegister()); |
7237 | Inst.getOperand(i: OperandIdx[3]).setReg(MCRegister()); |
7238 | } |
7239 | |
7240 | for (auto i = 0; i < SrcIdx; ++i) { |
7241 | if (Inst.getOperand(i: OperandIdx[i]).getReg()) { |
7242 | EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); |
7243 | } |
7244 | } |
7245 | |
7246 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpVM); |
7247 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpCompr); |
7248 | |
7249 | Inst.addOperand(Op: MCOperand::createImm(Val: EnMask)); |
7250 | } |
7251 | |
7252 | //===----------------------------------------------------------------------===// |
7253 | // s_waitcnt |
7254 | //===----------------------------------------------------------------------===// |
7255 | |
7256 | static bool |
7257 | encodeCnt( |
7258 | const AMDGPU::IsaVersion ISA, |
7259 | int64_t &IntVal, |
7260 | int64_t CntVal, |
7261 | bool Saturate, |
7262 | unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), |
7263 | unsigned (*decode)(const IsaVersion &Version, unsigned)) |
7264 | { |
7265 | bool Failed = false; |
7266 | |
7267 | IntVal = encode(ISA, IntVal, CntVal); |
7268 | if (CntVal != decode(ISA, IntVal)) { |
7269 | if (Saturate) { |
7270 | IntVal = encode(ISA, IntVal, -1); |
7271 | } else { |
7272 | Failed = true; |
7273 | } |
7274 | } |
7275 | return Failed; |
7276 | } |
7277 | |
7278 | bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { |
7279 | |
7280 | SMLoc CntLoc = getLoc(); |
7281 | StringRef CntName = getTokenStr(); |
7282 | |
7283 | if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name" ) || |
7284 | !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis" )) |
7285 | return false; |
7286 | |
7287 | int64_t CntVal; |
7288 | SMLoc ValLoc = getLoc(); |
7289 | if (!parseExpr(Imm&: CntVal)) |
7290 | return false; |
7291 | |
7292 | AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU()); |
7293 | |
7294 | bool Failed = true; |
7295 | bool Sat = CntName.ends_with(Suffix: "_sat" ); |
7296 | |
7297 | if (CntName == "vmcnt" || CntName == "vmcnt_sat" ) { |
7298 | Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeVmcnt, decode: decodeVmcnt); |
7299 | } else if (CntName == "expcnt" || CntName == "expcnt_sat" ) { |
7300 | Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeExpcnt, decode: decodeExpcnt); |
7301 | } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat" ) { |
7302 | Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeLgkmcnt, decode: decodeLgkmcnt); |
7303 | } else { |
7304 | Error(L: CntLoc, Msg: "invalid counter name " + CntName); |
7305 | return false; |
7306 | } |
7307 | |
7308 | if (Failed) { |
7309 | Error(L: ValLoc, Msg: "too large value for " + CntName); |
7310 | return false; |
7311 | } |
7312 | |
7313 | if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis" )) |
7314 | return false; |
7315 | |
7316 | if (trySkipToken(Kind: AsmToken::Amp) || trySkipToken(Kind: AsmToken::Comma)) { |
7317 | if (isToken(Kind: AsmToken::EndOfStatement)) { |
7318 | Error(L: getLoc(), Msg: "expected a counter name" ); |
7319 | return false; |
7320 | } |
7321 | } |
7322 | |
7323 | return true; |
7324 | } |
7325 | |
7326 | ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) { |
7327 | AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU()); |
7328 | int64_t Waitcnt = getWaitcntBitMask(Version: ISA); |
7329 | SMLoc S = getLoc(); |
7330 | |
7331 | if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) { |
7332 | while (!isToken(Kind: AsmToken::EndOfStatement)) { |
7333 | if (!parseCnt(IntVal&: Waitcnt)) |
7334 | return ParseStatus::Failure; |
7335 | } |
7336 | } else { |
7337 | if (!parseExpr(Imm&: Waitcnt)) |
7338 | return ParseStatus::Failure; |
7339 | } |
7340 | |
7341 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Waitcnt, Loc: S)); |
7342 | return ParseStatus::Success; |
7343 | } |
7344 | |
7345 | bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { |
7346 | SMLoc FieldLoc = getLoc(); |
7347 | StringRef FieldName = getTokenStr(); |
7348 | if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a field name" ) || |
7349 | !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis" )) |
7350 | return false; |
7351 | |
7352 | SMLoc ValueLoc = getLoc(); |
7353 | StringRef ValueName = getTokenStr(); |
7354 | if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a value name" ) || |
7355 | !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a right parenthesis" )) |
7356 | return false; |
7357 | |
7358 | unsigned Shift; |
7359 | if (FieldName == "instid0" ) { |
7360 | Shift = 0; |
7361 | } else if (FieldName == "instskip" ) { |
7362 | Shift = 4; |
7363 | } else if (FieldName == "instid1" ) { |
7364 | Shift = 7; |
7365 | } else { |
7366 | Error(L: FieldLoc, Msg: "invalid field name " + FieldName); |
7367 | return false; |
7368 | } |
7369 | |
7370 | int Value; |
7371 | if (Shift == 4) { |
7372 | // Parse values for instskip. |
7373 | Value = StringSwitch<int>(ValueName) |
7374 | .Case(S: "SAME" , Value: 0) |
7375 | .Case(S: "NEXT" , Value: 1) |
7376 | .Case(S: "SKIP_1" , Value: 2) |
7377 | .Case(S: "SKIP_2" , Value: 3) |
7378 | .Case(S: "SKIP_3" , Value: 4) |
7379 | .Case(S: "SKIP_4" , Value: 5) |
7380 | .Default(Value: -1); |
7381 | } else { |
7382 | // Parse values for instid0 and instid1. |
7383 | Value = StringSwitch<int>(ValueName) |
7384 | .Case(S: "NO_DEP" , Value: 0) |
7385 | .Case(S: "VALU_DEP_1" , Value: 1) |
7386 | .Case(S: "VALU_DEP_2" , Value: 2) |
7387 | .Case(S: "VALU_DEP_3" , Value: 3) |
7388 | .Case(S: "VALU_DEP_4" , Value: 4) |
7389 | .Case(S: "TRANS32_DEP_1" , Value: 5) |
7390 | .Case(S: "TRANS32_DEP_2" , Value: 6) |
7391 | .Case(S: "TRANS32_DEP_3" , Value: 7) |
7392 | .Case(S: "FMA_ACCUM_CYCLE_1" , Value: 8) |
7393 | .Case(S: "SALU_CYCLE_1" , Value: 9) |
7394 | .Case(S: "SALU_CYCLE_2" , Value: 10) |
7395 | .Case(S: "SALU_CYCLE_3" , Value: 11) |
7396 | .Default(Value: -1); |
7397 | } |
7398 | if (Value < 0) { |
7399 | Error(L: ValueLoc, Msg: "invalid value name " + ValueName); |
7400 | return false; |
7401 | } |
7402 | |
7403 | Delay |= Value << Shift; |
7404 | return true; |
7405 | } |
7406 | |
7407 | ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) { |
7408 | int64_t Delay = 0; |
7409 | SMLoc S = getLoc(); |
7410 | |
7411 | if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) { |
7412 | do { |
7413 | if (!parseDelay(Delay)) |
7414 | return ParseStatus::Failure; |
7415 | } while (trySkipToken(Kind: AsmToken::Pipe)); |
7416 | } else { |
7417 | if (!parseExpr(Imm&: Delay)) |
7418 | return ParseStatus::Failure; |
7419 | } |
7420 | |
7421 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Delay, Loc: S)); |
7422 | return ParseStatus::Success; |
7423 | } |
7424 | |
7425 | bool |
7426 | AMDGPUOperand::isSWaitCnt() const { |
7427 | return isImm(); |
7428 | } |
7429 | |
7430 | bool AMDGPUOperand::isSDelayALU() const { return isImm(); } |
7431 | |
7432 | //===----------------------------------------------------------------------===// |
7433 | // DepCtr |
7434 | //===----------------------------------------------------------------------===// |
7435 | |
7436 | void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, |
7437 | StringRef DepCtrName) { |
7438 | switch (ErrorId) { |
7439 | case OPR_ID_UNKNOWN: |
7440 | Error(L: Loc, Msg: Twine("invalid counter name " , DepCtrName)); |
7441 | return; |
7442 | case OPR_ID_UNSUPPORTED: |
7443 | Error(L: Loc, Msg: Twine(DepCtrName, " is not supported on this GPU" )); |
7444 | return; |
7445 | case OPR_ID_DUPLICATE: |
7446 | Error(L: Loc, Msg: Twine("duplicate counter name " , DepCtrName)); |
7447 | return; |
7448 | case OPR_VAL_INVALID: |
7449 | Error(L: Loc, Msg: Twine("invalid value for " , DepCtrName)); |
7450 | return; |
7451 | default: |
7452 | assert(false); |
7453 | } |
7454 | } |
7455 | |
7456 | bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { |
7457 | |
7458 | using namespace llvm::AMDGPU::DepCtr; |
7459 | |
7460 | SMLoc DepCtrLoc = getLoc(); |
7461 | StringRef DepCtrName = getTokenStr(); |
7462 | |
7463 | if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name" ) || |
7464 | !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis" )) |
7465 | return false; |
7466 | |
7467 | int64_t ExprVal; |
7468 | if (!parseExpr(Imm&: ExprVal)) |
7469 | return false; |
7470 | |
7471 | unsigned PrevOprMask = UsedOprMask; |
7472 | int CntVal = encodeDepCtr(Name: DepCtrName, Val: ExprVal, UsedOprMask, STI: getSTI()); |
7473 | |
7474 | if (CntVal < 0) { |
7475 | depCtrError(Loc: DepCtrLoc, ErrorId: CntVal, DepCtrName); |
7476 | return false; |
7477 | } |
7478 | |
7479 | if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis" )) |
7480 | return false; |
7481 | |
7482 | if (trySkipToken(Kind: AsmToken::Amp) || trySkipToken(Kind: AsmToken::Comma)) { |
7483 | if (isToken(Kind: AsmToken::EndOfStatement)) { |
7484 | Error(L: getLoc(), Msg: "expected a counter name" ); |
7485 | return false; |
7486 | } |
7487 | } |
7488 | |
7489 | unsigned CntValMask = PrevOprMask ^ UsedOprMask; |
7490 | DepCtr = (DepCtr & ~CntValMask) | CntVal; |
7491 | return true; |
7492 | } |
7493 | |
7494 | ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) { |
7495 | using namespace llvm::AMDGPU::DepCtr; |
7496 | |
7497 | int64_t DepCtr = getDefaultDepCtrEncoding(STI: getSTI()); |
7498 | SMLoc Loc = getLoc(); |
7499 | |
7500 | if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) { |
7501 | unsigned UsedOprMask = 0; |
7502 | while (!isToken(Kind: AsmToken::EndOfStatement)) { |
7503 | if (!parseDepCtr(DepCtr, UsedOprMask)) |
7504 | return ParseStatus::Failure; |
7505 | } |
7506 | } else { |
7507 | if (!parseExpr(Imm&: DepCtr)) |
7508 | return ParseStatus::Failure; |
7509 | } |
7510 | |
7511 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DepCtr, Loc)); |
7512 | return ParseStatus::Success; |
7513 | } |
7514 | |
7515 | bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } |
7516 | |
7517 | //===----------------------------------------------------------------------===// |
7518 | // hwreg |
7519 | //===----------------------------------------------------------------------===// |
7520 | |
7521 | ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg, |
7522 | OperandInfoTy &Offset, |
7523 | OperandInfoTy &Width) { |
7524 | using namespace llvm::AMDGPU::Hwreg; |
7525 | |
7526 | if (!trySkipId(Id: "hwreg" , Kind: AsmToken::LParen)) |
7527 | return ParseStatus::NoMatch; |
7528 | |
7529 | // The register may be specified by name or using a numeric code |
7530 | HwReg.Loc = getLoc(); |
7531 | if (isToken(Kind: AsmToken::Identifier) && |
7532 | (HwReg.Val = getHwregId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) { |
7533 | HwReg.IsSymbolic = true; |
7534 | lex(); // skip register name |
7535 | } else if (!parseExpr(Imm&: HwReg.Val, Expected: "a register name" )) { |
7536 | return ParseStatus::Failure; |
7537 | } |
7538 | |
7539 | if (trySkipToken(Kind: AsmToken::RParen)) |
7540 | return ParseStatus::Success; |
7541 | |
7542 | // parse optional params |
7543 | if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma or a closing parenthesis" )) |
7544 | return ParseStatus::Failure; |
7545 | |
7546 | Offset.Loc = getLoc(); |
7547 | if (!parseExpr(Imm&: Offset.Val)) |
7548 | return ParseStatus::Failure; |
7549 | |
7550 | if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma" )) |
7551 | return ParseStatus::Failure; |
7552 | |
7553 | Width.Loc = getLoc(); |
7554 | if (!parseExpr(Imm&: Width.Val) || |
7555 | !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis" )) |
7556 | return ParseStatus::Failure; |
7557 | |
7558 | return ParseStatus::Success; |
7559 | } |
7560 | |
7561 | ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { |
7562 | using namespace llvm::AMDGPU::Hwreg; |
7563 | |
7564 | int64_t ImmVal = 0; |
7565 | SMLoc Loc = getLoc(); |
7566 | |
7567 | StructuredOpField HwReg("id" , "hardware register" , HwregId::Width, |
7568 | HwregId::Default); |
7569 | StructuredOpField Offset("offset" , "bit offset" , HwregOffset::Width, |
7570 | HwregOffset::Default); |
7571 | struct : StructuredOpField { |
7572 | using StructuredOpField::StructuredOpField; |
7573 | bool validate(AMDGPUAsmParser &Parser) const override { |
7574 | if (!isUIntN(N: Width, x: Val - 1)) |
7575 | return Error(Parser, Err: "only values from 1 to 32 are legal" ); |
7576 | return true; |
7577 | } |
7578 | } Width("size" , "bitfield width" , HwregSize::Width, HwregSize::Default); |
7579 | ParseStatus Res = parseStructuredOpFields(Fields: {&HwReg, &Offset, &Width}); |
7580 | |
7581 | if (Res.isNoMatch()) |
7582 | Res = parseHwregFunc(HwReg, Offset, Width); |
7583 | |
7584 | if (Res.isSuccess()) { |
7585 | if (!validateStructuredOpFields(Fields: {&HwReg, &Offset, &Width})) |
7586 | return ParseStatus::Failure; |
7587 | ImmVal = HwregEncoding::encode(Values: HwReg.Val, Values: Offset.Val, Values: Width.Val); |
7588 | } |
7589 | |
7590 | if (Res.isNoMatch() && |
7591 | parseExpr(Imm&: ImmVal, Expected: "a hwreg macro, structured immediate" )) |
7592 | Res = ParseStatus::Success; |
7593 | |
7594 | if (!Res.isSuccess()) |
7595 | return ParseStatus::Failure; |
7596 | |
7597 | if (!isUInt<16>(x: ImmVal)) |
7598 | return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal" ); |
7599 | Operands.push_back( |
7600 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTyHwreg)); |
7601 | return ParseStatus::Success; |
7602 | } |
7603 | |
7604 | bool AMDGPUOperand::isHwreg() const { |
7605 | return isImmTy(ImmT: ImmTyHwreg); |
7606 | } |
7607 | |
7608 | //===----------------------------------------------------------------------===// |
7609 | // sendmsg |
7610 | //===----------------------------------------------------------------------===// |
7611 | |
7612 | bool |
7613 | AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, |
7614 | OperandInfoTy &Op, |
7615 | OperandInfoTy &Stream) { |
7616 | using namespace llvm::AMDGPU::SendMsg; |
7617 | |
7618 | Msg.Loc = getLoc(); |
7619 | if (isToken(Kind: AsmToken::Identifier) && |
7620 | (Msg.Val = getMsgId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) { |
7621 | Msg.IsSymbolic = true; |
7622 | lex(); // skip message name |
7623 | } else if (!parseExpr(Imm&: Msg.Val, Expected: "a message name" )) { |
7624 | return false; |
7625 | } |
7626 | |
7627 | if (trySkipToken(Kind: AsmToken::Comma)) { |
7628 | Op.IsDefined = true; |
7629 | Op.Loc = getLoc(); |
7630 | if (isToken(Kind: AsmToken::Identifier) && |
7631 | (Op.Val = getMsgOpId(MsgId: Msg.Val, Name: getTokenStr(), STI: getSTI())) != |
7632 | OPR_ID_UNKNOWN) { |
7633 | lex(); // skip operation name |
7634 | } else if (!parseExpr(Imm&: Op.Val, Expected: "an operation name" )) { |
7635 | return false; |
7636 | } |
7637 | |
7638 | if (trySkipToken(Kind: AsmToken::Comma)) { |
7639 | Stream.IsDefined = true; |
7640 | Stream.Loc = getLoc(); |
7641 | if (!parseExpr(Imm&: Stream.Val)) |
7642 | return false; |
7643 | } |
7644 | } |
7645 | |
7646 | return skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis" ); |
7647 | } |
7648 | |
7649 | bool |
7650 | AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, |
7651 | const OperandInfoTy &Op, |
7652 | const OperandInfoTy &Stream) { |
7653 | using namespace llvm::AMDGPU::SendMsg; |
7654 | |
7655 | // Validation strictness depends on whether message is specified |
7656 | // in a symbolic or in a numeric form. In the latter case |
7657 | // only encoding possibility is checked. |
7658 | bool Strict = Msg.IsSymbolic; |
7659 | |
7660 | if (Strict) { |
7661 | if (Msg.Val == OPR_ID_UNSUPPORTED) { |
7662 | Error(L: Msg.Loc, Msg: "specified message id is not supported on this GPU" ); |
7663 | return false; |
7664 | } |
7665 | } else { |
7666 | if (!isValidMsgId(MsgId: Msg.Val, STI: getSTI())) { |
7667 | Error(L: Msg.Loc, Msg: "invalid message id" ); |
7668 | return false; |
7669 | } |
7670 | } |
7671 | if (Strict && (msgRequiresOp(MsgId: Msg.Val, STI: getSTI()) != Op.IsDefined)) { |
7672 | if (Op.IsDefined) { |
7673 | Error(L: Op.Loc, Msg: "message does not support operations" ); |
7674 | } else { |
7675 | Error(L: Msg.Loc, Msg: "missing message operation" ); |
7676 | } |
7677 | return false; |
7678 | } |
7679 | if (!isValidMsgOp(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI(), Strict)) { |
7680 | if (Op.Val == OPR_ID_UNSUPPORTED) |
7681 | Error(L: Op.Loc, Msg: "specified operation id is not supported on this GPU" ); |
7682 | else |
7683 | Error(L: Op.Loc, Msg: "invalid operation id" ); |
7684 | return false; |
7685 | } |
7686 | if (Strict && !msgSupportsStream(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI()) && |
7687 | Stream.IsDefined) { |
7688 | Error(L: Stream.Loc, Msg: "message operation does not support streams" ); |
7689 | return false; |
7690 | } |
7691 | if (!isValidMsgStream(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val, STI: getSTI(), Strict)) { |
7692 | Error(L: Stream.Loc, Msg: "invalid message stream id" ); |
7693 | return false; |
7694 | } |
7695 | return true; |
7696 | } |
7697 | |
7698 | ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) { |
7699 | using namespace llvm::AMDGPU::SendMsg; |
7700 | |
7701 | int64_t ImmVal = 0; |
7702 | SMLoc Loc = getLoc(); |
7703 | |
7704 | if (trySkipId(Id: "sendmsg" , Kind: AsmToken::LParen)) { |
7705 | OperandInfoTy Msg(OPR_ID_UNKNOWN); |
7706 | OperandInfoTy Op(OP_NONE_); |
7707 | OperandInfoTy Stream(STREAM_ID_NONE_); |
7708 | if (parseSendMsgBody(Msg, Op, Stream) && |
7709 | validateSendMsg(Msg, Op, Stream)) { |
7710 | ImmVal = encodeMsg(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val); |
7711 | } else { |
7712 | return ParseStatus::Failure; |
7713 | } |
7714 | } else if (parseExpr(Imm&: ImmVal, Expected: "a sendmsg macro" )) { |
7715 | if (ImmVal < 0 || !isUInt<16>(x: ImmVal)) |
7716 | return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal" ); |
7717 | } else { |
7718 | return ParseStatus::Failure; |
7719 | } |
7720 | |
7721 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTySendMsg)); |
7722 | return ParseStatus::Success; |
7723 | } |
7724 | |
7725 | bool AMDGPUOperand::isSendMsg() const { |
7726 | return isImmTy(ImmT: ImmTySendMsg); |
7727 | } |
7728 | |
7729 | //===----------------------------------------------------------------------===// |
7730 | // v_interp |
7731 | //===----------------------------------------------------------------------===// |
7732 | |
7733 | ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { |
7734 | StringRef Str; |
7735 | SMLoc S = getLoc(); |
7736 | |
7737 | if (!parseId(Val&: Str)) |
7738 | return ParseStatus::NoMatch; |
7739 | |
7740 | int Slot = StringSwitch<int>(Str) |
7741 | .Case(S: "p10" , Value: 0) |
7742 | .Case(S: "p20" , Value: 1) |
7743 | .Case(S: "p0" , Value: 2) |
7744 | .Default(Value: -1); |
7745 | |
7746 | if (Slot == -1) |
7747 | return Error(L: S, Msg: "invalid interpolation slot" ); |
7748 | |
7749 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Slot, Loc: S, |
7750 | Type: AMDGPUOperand::ImmTyInterpSlot)); |
7751 | return ParseStatus::Success; |
7752 | } |
7753 | |
7754 | ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { |
7755 | StringRef Str; |
7756 | SMLoc S = getLoc(); |
7757 | |
7758 | if (!parseId(Val&: Str)) |
7759 | return ParseStatus::NoMatch; |
7760 | |
7761 | if (!Str.starts_with(Prefix: "attr" )) |
7762 | return Error(L: S, Msg: "invalid interpolation attribute" ); |
7763 | |
7764 | StringRef Chan = Str.take_back(N: 2); |
7765 | int AttrChan = StringSwitch<int>(Chan) |
7766 | .Case(S: ".x" , Value: 0) |
7767 | .Case(S: ".y" , Value: 1) |
7768 | .Case(S: ".z" , Value: 2) |
7769 | .Case(S: ".w" , Value: 3) |
7770 | .Default(Value: -1); |
7771 | if (AttrChan == -1) |
7772 | return Error(L: S, Msg: "invalid or missing interpolation attribute channel" ); |
7773 | |
7774 | Str = Str.drop_back(N: 2).drop_front(N: 4); |
7775 | |
7776 | uint8_t Attr; |
7777 | if (Str.getAsInteger(Radix: 10, Result&: Attr)) |
7778 | return Error(L: S, Msg: "invalid or missing interpolation attribute number" ); |
7779 | |
7780 | if (Attr > 32) |
7781 | return Error(L: S, Msg: "out of bounds interpolation attribute number" ); |
7782 | |
7783 | SMLoc SChan = SMLoc::getFromPointer(Ptr: Chan.data()); |
7784 | |
7785 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Attr, Loc: S, |
7786 | Type: AMDGPUOperand::ImmTyInterpAttr)); |
7787 | Operands.push_back(Elt: AMDGPUOperand::CreateImm( |
7788 | AsmParser: this, Val: AttrChan, Loc: SChan, Type: AMDGPUOperand::ImmTyInterpAttrChan)); |
7789 | return ParseStatus::Success; |
7790 | } |
7791 | |
7792 | //===----------------------------------------------------------------------===// |
7793 | // exp |
7794 | //===----------------------------------------------------------------------===// |
7795 | |
7796 | ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { |
7797 | using namespace llvm::AMDGPU::Exp; |
7798 | |
7799 | StringRef Str; |
7800 | SMLoc S = getLoc(); |
7801 | |
7802 | if (!parseId(Val&: Str)) |
7803 | return ParseStatus::NoMatch; |
7804 | |
7805 | unsigned Id = getTgtId(Name: Str); |
7806 | if (Id == ET_INVALID || !isSupportedTgtId(Id, STI: getSTI())) |
7807 | return Error(L: S, Msg: (Id == ET_INVALID) |
7808 | ? "invalid exp target" |
7809 | : "exp target is not supported on this GPU" ); |
7810 | |
7811 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Id, Loc: S, |
7812 | Type: AMDGPUOperand::ImmTyExpTgt)); |
7813 | return ParseStatus::Success; |
7814 | } |
7815 | |
7816 | //===----------------------------------------------------------------------===// |
7817 | // parser helpers |
7818 | //===----------------------------------------------------------------------===// |
7819 | |
7820 | bool |
7821 | AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { |
7822 | return Token.is(K: AsmToken::Identifier) && Token.getString() == Id; |
7823 | } |
7824 | |
7825 | bool |
7826 | AMDGPUAsmParser::isId(const StringRef Id) const { |
7827 | return isId(Token: getToken(), Id); |
7828 | } |
7829 | |
7830 | bool |
7831 | AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { |
7832 | return getTokenKind() == Kind; |
7833 | } |
7834 | |
7835 | StringRef AMDGPUAsmParser::getId() const { |
7836 | return isToken(Kind: AsmToken::Identifier) ? getTokenStr() : StringRef(); |
7837 | } |
7838 | |
7839 | bool |
7840 | AMDGPUAsmParser::trySkipId(const StringRef Id) { |
7841 | if (isId(Id)) { |
7842 | lex(); |
7843 | return true; |
7844 | } |
7845 | return false; |
7846 | } |
7847 | |
7848 | bool |
7849 | AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { |
7850 | if (isToken(Kind: AsmToken::Identifier)) { |
7851 | StringRef Tok = getTokenStr(); |
7852 | if (Tok.starts_with(Prefix: Pref) && Tok.drop_front(N: Pref.size()) == Id) { |
7853 | lex(); |
7854 | return true; |
7855 | } |
7856 | } |
7857 | return false; |
7858 | } |
7859 | |
7860 | bool |
7861 | AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { |
7862 | if (isId(Id) && peekToken().is(K: Kind)) { |
7863 | lex(); |
7864 | lex(); |
7865 | return true; |
7866 | } |
7867 | return false; |
7868 | } |
7869 | |
7870 | bool |
7871 | AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { |
7872 | if (isToken(Kind)) { |
7873 | lex(); |
7874 | return true; |
7875 | } |
7876 | return false; |
7877 | } |
7878 | |
7879 | bool |
7880 | AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, |
7881 | const StringRef ErrMsg) { |
7882 | if (!trySkipToken(Kind)) { |
7883 | Error(L: getLoc(), Msg: ErrMsg); |
7884 | return false; |
7885 | } |
7886 | return true; |
7887 | } |
7888 | |
7889 | bool |
7890 | AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { |
7891 | SMLoc S = getLoc(); |
7892 | |
7893 | const MCExpr *Expr; |
7894 | if (Parser.parseExpression(Res&: Expr)) |
7895 | return false; |
7896 | |
7897 | if (Expr->evaluateAsAbsolute(Res&: Imm)) |
7898 | return true; |
7899 | |
7900 | if (Expected.empty()) { |
7901 | Error(L: S, Msg: "expected absolute expression" ); |
7902 | } else { |
7903 | Error(L: S, Msg: Twine("expected " , Expected) + |
7904 | Twine(" or an absolute expression" )); |
7905 | } |
7906 | return false; |
7907 | } |
7908 | |
7909 | bool |
7910 | AMDGPUAsmParser::parseExpr(OperandVector &Operands) { |
7911 | SMLoc S = getLoc(); |
7912 | |
7913 | const MCExpr *Expr; |
7914 | if (Parser.parseExpression(Res&: Expr)) |
7915 | return false; |
7916 | |
7917 | int64_t IntVal; |
7918 | if (Expr->evaluateAsAbsolute(Res&: IntVal)) { |
7919 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S)); |
7920 | } else { |
7921 | Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S)); |
7922 | } |
7923 | return true; |
7924 | } |
7925 | |
7926 | bool |
7927 | AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { |
7928 | if (isToken(Kind: AsmToken::String)) { |
7929 | Val = getToken().getStringContents(); |
7930 | lex(); |
7931 | return true; |
7932 | } |
7933 | Error(L: getLoc(), Msg: ErrMsg); |
7934 | return false; |
7935 | } |
7936 | |
7937 | bool |
7938 | AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { |
7939 | if (isToken(Kind: AsmToken::Identifier)) { |
7940 | Val = getTokenStr(); |
7941 | lex(); |
7942 | return true; |
7943 | } |
7944 | if (!ErrMsg.empty()) |
7945 | Error(L: getLoc(), Msg: ErrMsg); |
7946 | return false; |
7947 | } |
7948 | |
7949 | AsmToken |
7950 | AMDGPUAsmParser::getToken() const { |
7951 | return Parser.getTok(); |
7952 | } |
7953 | |
7954 | AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { |
7955 | return isToken(Kind: AsmToken::EndOfStatement) |
7956 | ? getToken() |
7957 | : getLexer().peekTok(ShouldSkipSpace); |
7958 | } |
7959 | |
7960 | void |
7961 | AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { |
7962 | auto TokCount = getLexer().peekTokens(Buf: Tokens); |
7963 | |
7964 | for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) |
7965 | Tokens[Idx] = AsmToken(AsmToken::Error, "" ); |
7966 | } |
7967 | |
7968 | AsmToken::TokenKind |
7969 | AMDGPUAsmParser::getTokenKind() const { |
7970 | return getLexer().getKind(); |
7971 | } |
7972 | |
7973 | SMLoc |
7974 | AMDGPUAsmParser::getLoc() const { |
7975 | return getToken().getLoc(); |
7976 | } |
7977 | |
7978 | StringRef |
7979 | AMDGPUAsmParser::getTokenStr() const { |
7980 | return getToken().getString(); |
7981 | } |
7982 | |
7983 | void |
7984 | AMDGPUAsmParser::lex() { |
7985 | Parser.Lex(); |
7986 | } |
7987 | |
7988 | SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const { |
7989 | return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); |
7990 | } |
7991 | |
7992 | SMLoc |
7993 | AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, |
7994 | const OperandVector &Operands) const { |
7995 | for (unsigned i = Operands.size() - 1; i > 0; --i) { |
7996 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
7997 | if (Test(Op)) |
7998 | return Op.getStartLoc(); |
7999 | } |
8000 | return getInstLoc(Operands); |
8001 | } |
8002 | |
8003 | SMLoc |
8004 | AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, |
8005 | const OperandVector &Operands) const { |
8006 | auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(ImmT: Type); }; |
8007 | return getOperandLoc(Test, Operands); |
8008 | } |
8009 | |
8010 | SMLoc AMDGPUAsmParser::getRegLoc(MCRegister Reg, |
8011 | const OperandVector &Operands) const { |
8012 | auto Test = [=](const AMDGPUOperand& Op) { |
8013 | return Op.isRegKind() && Op.getReg() == Reg; |
8014 | }; |
8015 | return getOperandLoc(Test, Operands); |
8016 | } |
8017 | |
8018 | SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands, |
8019 | bool SearchMandatoryLiterals) const { |
8020 | auto Test = [](const AMDGPUOperand& Op) { |
8021 | return Op.IsImmKindLiteral() || Op.isExpr(); |
8022 | }; |
8023 | SMLoc Loc = getOperandLoc(Test, Operands); |
8024 | if (SearchMandatoryLiterals && Loc == getInstLoc(Operands)) |
8025 | Loc = getMandatoryLitLoc(Operands); |
8026 | return Loc; |
8027 | } |
8028 | |
8029 | SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const { |
8030 | auto Test = [](const AMDGPUOperand &Op) { |
8031 | return Op.IsImmKindMandatoryLiteral(); |
8032 | }; |
8033 | return getOperandLoc(Test, Operands); |
8034 | } |
8035 | |
8036 | SMLoc |
8037 | AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { |
8038 | auto Test = [](const AMDGPUOperand& Op) { |
8039 | return Op.isImmKindConst(); |
8040 | }; |
8041 | return getOperandLoc(Test, Operands); |
8042 | } |
8043 | |
8044 | ParseStatus |
8045 | AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) { |
8046 | if (!trySkipToken(Kind: AsmToken::LCurly)) |
8047 | return ParseStatus::NoMatch; |
8048 | |
8049 | bool First = true; |
8050 | while (!trySkipToken(Kind: AsmToken::RCurly)) { |
8051 | if (!First && |
8052 | !skipToken(Kind: AsmToken::Comma, ErrMsg: "comma or closing brace expected" )) |
8053 | return ParseStatus::Failure; |
8054 | |
8055 | StringRef Id = getTokenStr(); |
8056 | SMLoc IdLoc = getLoc(); |
8057 | if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "field name expected" ) || |
8058 | !skipToken(Kind: AsmToken::Colon, ErrMsg: "colon expected" )) |
8059 | return ParseStatus::Failure; |
8060 | |
8061 | const auto *I = |
8062 | find_if(Range&: Fields, P: [Id](StructuredOpField *F) { return F->Id == Id; }); |
8063 | if (I == Fields.end()) |
8064 | return Error(L: IdLoc, Msg: "unknown field" ); |
8065 | if ((*I)->IsDefined) |
8066 | return Error(L: IdLoc, Msg: "duplicate field" ); |
8067 | |
8068 | // TODO: Support symbolic values. |
8069 | (*I)->Loc = getLoc(); |
8070 | if (!parseExpr(Imm&: (*I)->Val)) |
8071 | return ParseStatus::Failure; |
8072 | (*I)->IsDefined = true; |
8073 | |
8074 | First = false; |
8075 | } |
8076 | return ParseStatus::Success; |
8077 | } |
8078 | |
8079 | bool AMDGPUAsmParser::validateStructuredOpFields( |
8080 | ArrayRef<const StructuredOpField *> Fields) { |
8081 | return all_of(Range&: Fields, P: [this](const StructuredOpField *F) { |
8082 | return F->validate(Parser&: *this); |
8083 | }); |
8084 | } |
8085 | |
8086 | //===----------------------------------------------------------------------===// |
8087 | // swizzle |
8088 | //===----------------------------------------------------------------------===// |
8089 | |
8090 | LLVM_READNONE |
8091 | static unsigned |
8092 | encodeBitmaskPerm(const unsigned AndMask, |
8093 | const unsigned OrMask, |
8094 | const unsigned XorMask) { |
8095 | using namespace llvm::AMDGPU::Swizzle; |
8096 | |
8097 | return BITMASK_PERM_ENC | |
8098 | (AndMask << BITMASK_AND_SHIFT) | |
8099 | (OrMask << BITMASK_OR_SHIFT) | |
8100 | (XorMask << BITMASK_XOR_SHIFT); |
8101 | } |
8102 | |
8103 | bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal, |
8104 | const unsigned MaxVal, |
8105 | const Twine &ErrMsg, SMLoc &Loc) { |
8106 | if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma" )) { |
8107 | return false; |
8108 | } |
8109 | Loc = getLoc(); |
8110 | if (!parseExpr(Imm&: Op)) { |
8111 | return false; |
8112 | } |
8113 | if (Op < MinVal || Op > MaxVal) { |
8114 | Error(L: Loc, Msg: ErrMsg); |
8115 | return false; |
8116 | } |
8117 | |
8118 | return true; |
8119 | } |
8120 | |
8121 | bool |
8122 | AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, |
8123 | const unsigned MinVal, |
8124 | const unsigned MaxVal, |
8125 | const StringRef ErrMsg) { |
8126 | SMLoc Loc; |
8127 | for (unsigned i = 0; i < OpNum; ++i) { |
8128 | if (!parseSwizzleOperand(Op&: Op[i], MinVal, MaxVal, ErrMsg, Loc)) |
8129 | return false; |
8130 | } |
8131 | |
8132 | return true; |
8133 | } |
8134 | |
8135 | bool |
8136 | AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { |
8137 | using namespace llvm::AMDGPU::Swizzle; |
8138 | |
8139 | int64_t Lane[LANE_NUM]; |
8140 | if (parseSwizzleOperands(OpNum: LANE_NUM, Op: Lane, MinVal: 0, MaxVal: LANE_MAX, |
8141 | ErrMsg: "expected a 2-bit lane id" )) { |
8142 | Imm = QUAD_PERM_ENC; |
8143 | for (unsigned I = 0; I < LANE_NUM; ++I) { |
8144 | Imm |= Lane[I] << (LANE_SHIFT * I); |
8145 | } |
8146 | return true; |
8147 | } |
8148 | return false; |
8149 | } |
8150 | |
8151 | bool |
8152 | AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { |
8153 | using namespace llvm::AMDGPU::Swizzle; |
8154 | |
8155 | SMLoc Loc; |
8156 | int64_t GroupSize; |
8157 | int64_t LaneIdx; |
8158 | |
8159 | if (!parseSwizzleOperand(Op&: GroupSize, |
8160 | MinVal: 2, MaxVal: 32, |
8161 | ErrMsg: "group size must be in the interval [2,32]" , |
8162 | Loc)) { |
8163 | return false; |
8164 | } |
8165 | if (!isPowerOf2_64(Value: GroupSize)) { |
8166 | Error(L: Loc, Msg: "group size must be a power of two" ); |
8167 | return false; |
8168 | } |
8169 | if (parseSwizzleOperand(Op&: LaneIdx, |
8170 | MinVal: 0, MaxVal: GroupSize - 1, |
8171 | ErrMsg: "lane id must be in the interval [0,group size - 1]" , |
8172 | Loc)) { |
8173 | Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX - GroupSize + 1, OrMask: LaneIdx, XorMask: 0); |
8174 | return true; |
8175 | } |
8176 | return false; |
8177 | } |
8178 | |
8179 | bool |
8180 | AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { |
8181 | using namespace llvm::AMDGPU::Swizzle; |
8182 | |
8183 | SMLoc Loc; |
8184 | int64_t GroupSize; |
8185 | |
8186 | if (!parseSwizzleOperand(Op&: GroupSize, |
8187 | MinVal: 2, MaxVal: 32, |
8188 | ErrMsg: "group size must be in the interval [2,32]" , |
8189 | Loc)) { |
8190 | return false; |
8191 | } |
8192 | if (!isPowerOf2_64(Value: GroupSize)) { |
8193 | Error(L: Loc, Msg: "group size must be a power of two" ); |
8194 | return false; |
8195 | } |
8196 | |
8197 | Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: 0, XorMask: GroupSize - 1); |
8198 | return true; |
8199 | } |
8200 | |
8201 | bool |
8202 | AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { |
8203 | using namespace llvm::AMDGPU::Swizzle; |
8204 | |
8205 | SMLoc Loc; |
8206 | int64_t GroupSize; |
8207 | |
8208 | if (!parseSwizzleOperand(Op&: GroupSize, |
8209 | MinVal: 1, MaxVal: 16, |
8210 | ErrMsg: "group size must be in the interval [1,16]" , |
8211 | Loc)) { |
8212 | return false; |
8213 | } |
8214 | if (!isPowerOf2_64(Value: GroupSize)) { |
8215 | Error(L: Loc, Msg: "group size must be a power of two" ); |
8216 | return false; |
8217 | } |
8218 | |
8219 | Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: 0, XorMask: GroupSize); |
8220 | return true; |
8221 | } |
8222 | |
8223 | bool |
8224 | AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { |
8225 | using namespace llvm::AMDGPU::Swizzle; |
8226 | |
8227 | if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma" )) { |
8228 | return false; |
8229 | } |
8230 | |
8231 | StringRef Ctl; |
8232 | SMLoc StrLoc = getLoc(); |
8233 | if (!parseString(Val&: Ctl)) { |
8234 | return false; |
8235 | } |
8236 | if (Ctl.size() != BITMASK_WIDTH) { |
8237 | Error(L: StrLoc, Msg: "expected a 5-character mask" ); |
8238 | return false; |
8239 | } |
8240 | |
8241 | unsigned AndMask = 0; |
8242 | unsigned OrMask = 0; |
8243 | unsigned XorMask = 0; |
8244 | |
8245 | for (size_t i = 0; i < Ctl.size(); ++i) { |
8246 | unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); |
8247 | switch(Ctl[i]) { |
8248 | default: |
8249 | Error(L: StrLoc, Msg: "invalid mask" ); |
8250 | return false; |
8251 | case '0': |
8252 | break; |
8253 | case '1': |
8254 | OrMask |= Mask; |
8255 | break; |
8256 | case 'p': |
8257 | AndMask |= Mask; |
8258 | break; |
8259 | case 'i': |
8260 | AndMask |= Mask; |
8261 | XorMask |= Mask; |
8262 | break; |
8263 | } |
8264 | } |
8265 | |
8266 | Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); |
8267 | return true; |
8268 | } |
8269 | |
8270 | bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) { |
8271 | using namespace llvm::AMDGPU::Swizzle; |
8272 | |
8273 | if (!AMDGPU::isGFX9Plus(STI: getSTI())) { |
8274 | Error(L: getLoc(), Msg: "FFT mode swizzle not supported on this GPU" ); |
8275 | return false; |
8276 | } |
8277 | |
8278 | int64_t Swizzle; |
8279 | SMLoc Loc; |
8280 | if (!parseSwizzleOperand(Op&: Swizzle, MinVal: 0, MaxVal: FFT_SWIZZLE_MAX, |
8281 | ErrMsg: "FFT swizzle must be in the interval [0," + |
8282 | Twine(FFT_SWIZZLE_MAX) + Twine(']'), |
8283 | Loc)) |
8284 | return false; |
8285 | |
8286 | Imm = FFT_MODE_ENC | Swizzle; |
8287 | return true; |
8288 | } |
8289 | |
8290 | bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) { |
8291 | using namespace llvm::AMDGPU::Swizzle; |
8292 | |
8293 | if (!AMDGPU::isGFX9Plus(STI: getSTI())) { |
8294 | Error(L: getLoc(), Msg: "Rotate mode swizzle not supported on this GPU" ); |
8295 | return false; |
8296 | } |
8297 | |
8298 | SMLoc Loc; |
8299 | int64_t Direction; |
8300 | |
8301 | if (!parseSwizzleOperand(Op&: Direction, MinVal: 0, MaxVal: 1, |
8302 | ErrMsg: "direction must be 0 (left) or 1 (right)" , Loc)) |
8303 | return false; |
8304 | |
8305 | int64_t RotateSize; |
8306 | if (!parseSwizzleOperand( |
8307 | Op&: RotateSize, MinVal: 0, MaxVal: ROTATE_MAX_SIZE, |
8308 | ErrMsg: "number of threads to rotate must be in the interval [0," + |
8309 | Twine(ROTATE_MAX_SIZE) + Twine(']'), |
8310 | Loc)) |
8311 | return false; |
8312 | |
8313 | Imm = ROTATE_MODE_ENC | (Direction << ROTATE_DIR_SHIFT) | |
8314 | (RotateSize << ROTATE_SIZE_SHIFT); |
8315 | return true; |
8316 | } |
8317 | |
8318 | bool |
8319 | AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { |
8320 | |
8321 | SMLoc OffsetLoc = getLoc(); |
8322 | |
8323 | if (!parseExpr(Imm, Expected: "a swizzle macro" )) { |
8324 | return false; |
8325 | } |
8326 | if (!isUInt<16>(x: Imm)) { |
8327 | Error(L: OffsetLoc, Msg: "expected a 16-bit offset" ); |
8328 | return false; |
8329 | } |
8330 | return true; |
8331 | } |
8332 | |
8333 | bool |
8334 | AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { |
8335 | using namespace llvm::AMDGPU::Swizzle; |
8336 | |
8337 | if (skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parentheses" )) { |
8338 | |
8339 | SMLoc ModeLoc = getLoc(); |
8340 | bool Ok = false; |
8341 | |
8342 | if (trySkipId(Id: IdSymbolic[ID_QUAD_PERM])) { |
8343 | Ok = parseSwizzleQuadPerm(Imm); |
8344 | } else if (trySkipId(Id: IdSymbolic[ID_BITMASK_PERM])) { |
8345 | Ok = parseSwizzleBitmaskPerm(Imm); |
8346 | } else if (trySkipId(Id: IdSymbolic[ID_BROADCAST])) { |
8347 | Ok = parseSwizzleBroadcast(Imm); |
8348 | } else if (trySkipId(Id: IdSymbolic[ID_SWAP])) { |
8349 | Ok = parseSwizzleSwap(Imm); |
8350 | } else if (trySkipId(Id: IdSymbolic[ID_REVERSE])) { |
8351 | Ok = parseSwizzleReverse(Imm); |
8352 | } else if (trySkipId(Id: IdSymbolic[ID_FFT])) { |
8353 | Ok = parseSwizzleFFT(Imm); |
8354 | } else if (trySkipId(Id: IdSymbolic[ID_ROTATE])) { |
8355 | Ok = parseSwizzleRotate(Imm); |
8356 | } else { |
8357 | Error(L: ModeLoc, Msg: "expected a swizzle mode" ); |
8358 | } |
8359 | |
8360 | return Ok && skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parentheses" ); |
8361 | } |
8362 | |
8363 | return false; |
8364 | } |
8365 | |
8366 | ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) { |
8367 | SMLoc S = getLoc(); |
8368 | int64_t Imm = 0; |
8369 | |
8370 | if (trySkipId(Id: "offset" )) { |
8371 | |
8372 | bool Ok = false; |
8373 | if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon" )) { |
8374 | if (trySkipId(Id: "swizzle" )) { |
8375 | Ok = parseSwizzleMacro(Imm); |
8376 | } else { |
8377 | Ok = parseSwizzleOffset(Imm); |
8378 | } |
8379 | } |
8380 | |
8381 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTySwizzle)); |
8382 | |
8383 | return Ok ? ParseStatus::Success : ParseStatus::Failure; |
8384 | } |
8385 | return ParseStatus::NoMatch; |
8386 | } |
8387 | |
8388 | bool |
8389 | AMDGPUOperand::isSwizzle() const { |
8390 | return isImmTy(ImmT: ImmTySwizzle); |
8391 | } |
8392 | |
8393 | //===----------------------------------------------------------------------===// |
8394 | // VGPR Index Mode |
8395 | //===----------------------------------------------------------------------===// |
8396 | |
8397 | int64_t AMDGPUAsmParser::parseGPRIdxMacro() { |
8398 | |
8399 | using namespace llvm::AMDGPU::VGPRIndexMode; |
8400 | |
8401 | if (trySkipToken(Kind: AsmToken::RParen)) { |
8402 | return OFF; |
8403 | } |
8404 | |
8405 | int64_t Imm = 0; |
8406 | |
8407 | while (true) { |
8408 | unsigned Mode = 0; |
8409 | SMLoc S = getLoc(); |
8410 | |
8411 | for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { |
8412 | if (trySkipId(Id: IdSymbolic[ModeId])) { |
8413 | Mode = 1 << ModeId; |
8414 | break; |
8415 | } |
8416 | } |
8417 | |
8418 | if (Mode == 0) { |
8419 | Error(L: S, Msg: (Imm == 0)? |
8420 | "expected a VGPR index mode or a closing parenthesis" : |
8421 | "expected a VGPR index mode" ); |
8422 | return UNDEF; |
8423 | } |
8424 | |
8425 | if (Imm & Mode) { |
8426 | Error(L: S, Msg: "duplicate VGPR index mode" ); |
8427 | return UNDEF; |
8428 | } |
8429 | Imm |= Mode; |
8430 | |
8431 | if (trySkipToken(Kind: AsmToken::RParen)) |
8432 | break; |
8433 | if (!skipToken(Kind: AsmToken::Comma, |
8434 | ErrMsg: "expected a comma or a closing parenthesis" )) |
8435 | return UNDEF; |
8436 | } |
8437 | |
8438 | return Imm; |
8439 | } |
8440 | |
8441 | ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { |
8442 | |
8443 | using namespace llvm::AMDGPU::VGPRIndexMode; |
8444 | |
8445 | int64_t Imm = 0; |
8446 | SMLoc S = getLoc(); |
8447 | |
8448 | if (trySkipId(Id: "gpr_idx" , Kind: AsmToken::LParen)) { |
8449 | Imm = parseGPRIdxMacro(); |
8450 | if (Imm == UNDEF) |
8451 | return ParseStatus::Failure; |
8452 | } else { |
8453 | if (getParser().parseAbsoluteExpression(Res&: Imm)) |
8454 | return ParseStatus::Failure; |
8455 | if (Imm < 0 || !isUInt<4>(x: Imm)) |
8456 | return Error(L: S, Msg: "invalid immediate: only 4-bit values are legal" ); |
8457 | } |
8458 | |
8459 | Operands.push_back( |
8460 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyGprIdxMode)); |
8461 | return ParseStatus::Success; |
8462 | } |
8463 | |
8464 | bool AMDGPUOperand::isGPRIdxMode() const { |
8465 | return isImmTy(ImmT: ImmTyGprIdxMode); |
8466 | } |
8467 | |
8468 | //===----------------------------------------------------------------------===// |
8469 | // sopp branch targets |
8470 | //===----------------------------------------------------------------------===// |
8471 | |
8472 | ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) { |
8473 | |
8474 | // Make sure we are not parsing something |
8475 | // that looks like a label or an expression but is not. |
8476 | // This will improve error messages. |
8477 | if (isRegister() || isModifier()) |
8478 | return ParseStatus::NoMatch; |
8479 | |
8480 | if (!parseExpr(Operands)) |
8481 | return ParseStatus::Failure; |
8482 | |
8483 | AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); |
8484 | assert(Opr.isImm() || Opr.isExpr()); |
8485 | SMLoc Loc = Opr.getStartLoc(); |
8486 | |
8487 | // Currently we do not support arbitrary expressions as branch targets. |
8488 | // Only labels and absolute expressions are accepted. |
8489 | if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { |
8490 | Error(L: Loc, Msg: "expected an absolute expression or a label" ); |
8491 | } else if (Opr.isImm() && !Opr.isS16Imm()) { |
8492 | Error(L: Loc, Msg: "expected a 16-bit signed jump offset" ); |
8493 | } |
8494 | |
8495 | return ParseStatus::Success; |
8496 | } |
8497 | |
8498 | //===----------------------------------------------------------------------===// |
8499 | // Boolean holding registers |
8500 | //===----------------------------------------------------------------------===// |
8501 | |
8502 | ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { |
8503 | return parseReg(Operands); |
8504 | } |
8505 | |
8506 | //===----------------------------------------------------------------------===// |
8507 | // mubuf |
8508 | //===----------------------------------------------------------------------===// |
8509 | |
8510 | void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, |
8511 | const OperandVector &Operands, |
8512 | bool IsAtomic) { |
8513 | OptionalImmIndexMap OptionalIdx; |
8514 | unsigned FirstOperandIdx = 1; |
8515 | bool IsAtomicReturn = false; |
8516 | |
8517 | if (IsAtomic) { |
8518 | IsAtomicReturn = MII.get(Opcode: Inst.getOpcode()).TSFlags & |
8519 | SIInstrFlags::IsAtomicRet; |
8520 | } |
8521 | |
8522 | for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { |
8523 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
8524 | |
8525 | // Add the register arguments |
8526 | if (Op.isReg()) { |
8527 | Op.addRegOperands(Inst, N: 1); |
8528 | // Insert a tied src for atomic return dst. |
8529 | // This cannot be postponed as subsequent calls to |
8530 | // addImmOperands rely on correct number of MC operands. |
8531 | if (IsAtomicReturn && i == FirstOperandIdx) |
8532 | Op.addRegOperands(Inst, N: 1); |
8533 | continue; |
8534 | } |
8535 | |
8536 | // Handle the case where soffset is an immediate |
8537 | if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { |
8538 | Op.addImmOperands(Inst, N: 1); |
8539 | continue; |
8540 | } |
8541 | |
8542 | // Handle tokens like 'offen' which are sometimes hard-coded into the |
8543 | // asm string. There are no MCInst operands for these. |
8544 | if (Op.isToken()) { |
8545 | continue; |
8546 | } |
8547 | assert(Op.isImm()); |
8548 | |
8549 | // Handle optional arguments |
8550 | OptionalIdx[Op.getImmTy()] = i; |
8551 | } |
8552 | |
8553 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOffset); |
8554 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyCPol, Default: 0); |
8555 | } |
8556 | |
8557 | //===----------------------------------------------------------------------===// |
8558 | // smrd |
8559 | //===----------------------------------------------------------------------===// |
8560 | |
8561 | bool AMDGPUOperand::isSMRDOffset8() const { |
8562 | return isImmLiteral() && isUInt<8>(x: getImm()); |
8563 | } |
8564 | |
8565 | bool AMDGPUOperand::isSMEMOffset() const { |
8566 | // Offset range is checked later by validator. |
8567 | return isImmLiteral(); |
8568 | } |
8569 | |
8570 | bool AMDGPUOperand::isSMRDLiteralOffset() const { |
8571 | // 32-bit literals are only supported on CI and we only want to use them |
8572 | // when the offset is > 8-bits. |
8573 | return isImmLiteral() && !isUInt<8>(x: getImm()) && isUInt<32>(x: getImm()); |
8574 | } |
8575 | |
8576 | //===----------------------------------------------------------------------===// |
8577 | // vop3 |
8578 | //===----------------------------------------------------------------------===// |
8579 | |
8580 | static bool ConvertOmodMul(int64_t &Mul) { |
8581 | if (Mul != 1 && Mul != 2 && Mul != 4) |
8582 | return false; |
8583 | |
8584 | Mul >>= 1; |
8585 | return true; |
8586 | } |
8587 | |
8588 | static bool ConvertOmodDiv(int64_t &Div) { |
8589 | if (Div == 1) { |
8590 | Div = 0; |
8591 | return true; |
8592 | } |
8593 | |
8594 | if (Div == 2) { |
8595 | Div = 3; |
8596 | return true; |
8597 | } |
8598 | |
8599 | return false; |
8600 | } |
8601 | |
8602 | // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. |
8603 | // This is intentional and ensures compatibility with sp3. |
8604 | // See bug 35397 for details. |
8605 | bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) { |
8606 | if (BoundCtrl == 0 || BoundCtrl == 1) { |
8607 | if (!isGFX11Plus()) |
8608 | BoundCtrl = 1; |
8609 | return true; |
8610 | } |
8611 | return false; |
8612 | } |
8613 | |
8614 | void AMDGPUAsmParser::onBeginOfFile() { |
8615 | if (!getParser().getStreamer().getTargetStreamer() || |
8616 | getSTI().getTargetTriple().getArch() == Triple::r600) |
8617 | return; |
8618 | |
8619 | if (!getTargetStreamer().getTargetID()) |
8620 | getTargetStreamer().initializeTargetID(STI: getSTI(), |
8621 | FeatureString: getSTI().getFeatureString()); |
8622 | |
8623 | if (isHsaAbi(STI: getSTI())) |
8624 | getTargetStreamer().EmitDirectiveAMDGCNTarget(); |
8625 | } |
8626 | |
8627 | /// Parse AMDGPU specific expressions. |
8628 | /// |
8629 | /// expr ::= or(expr, ...) | |
8630 | /// max(expr, ...) |
8631 | /// |
8632 | bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { |
8633 | using AGVK = AMDGPUMCExpr::VariantKind; |
8634 | |
8635 | if (isToken(Kind: AsmToken::Identifier)) { |
8636 | StringRef TokenId = getTokenStr(); |
8637 | AGVK VK = StringSwitch<AGVK>(TokenId) |
8638 | .Case(S: "max" , Value: AGVK::AGVK_Max) |
8639 | .Case(S: "or" , Value: AGVK::AGVK_Or) |
8640 | .Case(S: "extrasgprs" , Value: AGVK::AGVK_ExtraSGPRs) |
8641 | .Case(S: "totalnumvgprs" , Value: AGVK::AGVK_TotalNumVGPRs) |
8642 | .Case(S: "alignto" , Value: AGVK::AGVK_AlignTo) |
8643 | .Case(S: "occupancy" , Value: AGVK::AGVK_Occupancy) |
8644 | .Default(Value: AGVK::AGVK_None); |
8645 | |
8646 | if (VK != AGVK::AGVK_None && peekToken().is(K: AsmToken::LParen)) { |
8647 | SmallVector<const MCExpr *, 4> Exprs; |
8648 | uint64_t CommaCount = 0; |
8649 | lex(); // Eat Arg ('or', 'max', 'occupancy', etc.) |
8650 | lex(); // Eat '(' |
8651 | while (true) { |
8652 | if (trySkipToken(Kind: AsmToken::RParen)) { |
8653 | if (Exprs.empty()) { |
8654 | Error(L: getToken().getLoc(), |
8655 | Msg: "empty " + Twine(TokenId) + " expression" ); |
8656 | return true; |
8657 | } |
8658 | if (CommaCount + 1 != Exprs.size()) { |
8659 | Error(L: getToken().getLoc(), |
8660 | Msg: "mismatch of commas in " + Twine(TokenId) + " expression" ); |
8661 | return true; |
8662 | } |
8663 | Res = AMDGPUMCExpr::create(Kind: VK, Args: Exprs, Ctx&: getContext()); |
8664 | return false; |
8665 | } |
8666 | const MCExpr *Expr; |
8667 | if (getParser().parseExpression(Res&: Expr, EndLoc)) |
8668 | return true; |
8669 | Exprs.push_back(Elt: Expr); |
8670 | bool LastTokenWasComma = trySkipToken(Kind: AsmToken::Comma); |
8671 | if (LastTokenWasComma) |
8672 | CommaCount++; |
8673 | if (!LastTokenWasComma && !isToken(Kind: AsmToken::RParen)) { |
8674 | Error(L: getToken().getLoc(), |
8675 | Msg: "unexpected token in " + Twine(TokenId) + " expression" ); |
8676 | return true; |
8677 | } |
8678 | } |
8679 | } |
8680 | } |
8681 | return getParser().parsePrimaryExpr(Res, EndLoc, TypeInfo: nullptr); |
8682 | } |
8683 | |
8684 | ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) { |
8685 | StringRef Name = getTokenStr(); |
8686 | if (Name == "mul" ) { |
8687 | return parseIntWithPrefix(Prefix: "mul" , Operands, |
8688 | ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodMul); |
8689 | } |
8690 | |
8691 | if (Name == "div" ) { |
8692 | return parseIntWithPrefix(Prefix: "div" , Operands, |
8693 | ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodDiv); |
8694 | } |
8695 | |
8696 | return ParseStatus::NoMatch; |
8697 | } |
8698 | |
8699 | // Determines which bit DST_OP_SEL occupies in the op_sel operand according to |
8700 | // the number of src operands present, then copies that bit into src0_modifiers. |
8701 | static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) { |
8702 | int Opc = Inst.getOpcode(); |
8703 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel); |
8704 | if (OpSelIdx == -1) |
8705 | return; |
8706 | |
8707 | int SrcNum; |
8708 | const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1, |
8709 | AMDGPU::OpName::src2}; |
8710 | for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: Ops[SrcNum]); |
8711 | ++SrcNum) |
8712 | ; |
8713 | assert(SrcNum > 0); |
8714 | |
8715 | unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm(); |
8716 | |
8717 | int DstIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst); |
8718 | if (DstIdx == -1) |
8719 | return; |
8720 | |
8721 | const MCOperand &DstOp = Inst.getOperand(i: DstIdx); |
8722 | int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src0_modifiers); |
8723 | uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm(); |
8724 | if (DstOp.isReg() && |
8725 | MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: DstOp.getReg())) { |
8726 | if (AMDGPU::isHi16Reg(Reg: DstOp.getReg(), MRI)) |
8727 | ModVal |= SISrcMods::DST_OP_SEL; |
8728 | } else { |
8729 | if ((OpSel & (1 << SrcNum)) != 0) |
8730 | ModVal |= SISrcMods::DST_OP_SEL; |
8731 | } |
8732 | Inst.getOperand(i: ModIdx).setImm(ModVal); |
8733 | } |
8734 | |
8735 | void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, |
8736 | const OperandVector &Operands) { |
8737 | cvtVOP3P(Inst, Operands); |
8738 | cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI()); |
8739 | } |
8740 | |
8741 | void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, |
8742 | OptionalImmIndexMap &OptionalIdx) { |
8743 | cvtVOP3P(Inst, Operands, OptionalIdx); |
8744 | cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI()); |
8745 | } |
8746 | |
8747 | static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { |
8748 | return |
8749 | // 1. This operand is input modifiers |
8750 | Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS |
8751 | // 2. This is not last operand |
8752 | && Desc.NumOperands > (OpNum + 1) |
8753 | // 3. Next operand is register class |
8754 | && Desc.operands()[OpNum + 1].RegClass != -1 |
8755 | // 4. Next register is not tied to any other operand |
8756 | && Desc.getOperandConstraint(OpNum: OpNum + 1, |
8757 | Constraint: MCOI::OperandConstraint::TIED_TO) == -1; |
8758 | } |
8759 | |
8760 | void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) |
8761 | { |
8762 | OptionalImmIndexMap OptionalIdx; |
8763 | unsigned Opc = Inst.getOpcode(); |
8764 | |
8765 | unsigned I = 1; |
8766 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
8767 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { |
8768 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1); |
8769 | } |
8770 | |
8771 | for (unsigned E = Operands.size(); I != E; ++I) { |
8772 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); |
8773 | if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
8774 | Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2); |
8775 | } else if (Op.isInterpSlot() || Op.isInterpAttr() || |
8776 | Op.isInterpAttrChan()) { |
8777 | Inst.addOperand(Op: MCOperand::createImm(Val: Op.getImm())); |
8778 | } else if (Op.isImmModifier()) { |
8779 | OptionalIdx[Op.getImmTy()] = I; |
8780 | } else { |
8781 | llvm_unreachable("unhandled operand type" ); |
8782 | } |
8783 | } |
8784 | |
8785 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::high)) |
8786 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
8787 | ImmT: AMDGPUOperand::ImmTyHigh); |
8788 | |
8789 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp)) |
8790 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
8791 | ImmT: AMDGPUOperand::ImmTyClamp); |
8792 | |
8793 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod)) |
8794 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
8795 | ImmT: AMDGPUOperand::ImmTyOModSI); |
8796 | } |
8797 | |
8798 | void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) |
8799 | { |
8800 | OptionalImmIndexMap OptionalIdx; |
8801 | unsigned Opc = Inst.getOpcode(); |
8802 | |
8803 | unsigned I = 1; |
8804 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
8805 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { |
8806 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1); |
8807 | } |
8808 | |
8809 | for (unsigned E = Operands.size(); I != E; ++I) { |
8810 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); |
8811 | if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
8812 | Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2); |
8813 | } else if (Op.isImmModifier()) { |
8814 | OptionalIdx[Op.getImmTy()] = I; |
8815 | } else { |
8816 | llvm_unreachable("unhandled operand type" ); |
8817 | } |
8818 | } |
8819 | |
8820 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyClamp); |
8821 | |
8822 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel); |
8823 | if (OpSelIdx != -1) |
8824 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel); |
8825 | |
8826 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyWaitEXP); |
8827 | |
8828 | if (OpSelIdx == -1) |
8829 | return; |
8830 | |
8831 | const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1, |
8832 | AMDGPU::OpName::src2}; |
8833 | const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers, |
8834 | AMDGPU::OpName::src1_modifiers, |
8835 | AMDGPU::OpName::src2_modifiers}; |
8836 | |
8837 | unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm(); |
8838 | |
8839 | for (int J = 0; J < 3; ++J) { |
8840 | int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: Ops[J]); |
8841 | if (OpIdx == -1) |
8842 | break; |
8843 | |
8844 | int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]); |
8845 | uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm(); |
8846 | |
8847 | if ((OpSel & (1 << J)) != 0) |
8848 | ModVal |= SISrcMods::OP_SEL_0; |
8849 | if (ModOps[J] == AMDGPU::OpName::src0_modifiers && |
8850 | (OpSel & (1 << 3)) != 0) |
8851 | ModVal |= SISrcMods::DST_OP_SEL; |
8852 | |
8853 | Inst.getOperand(i: ModIdx).setImm(ModVal); |
8854 | } |
8855 | } |
8856 | void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst, |
8857 | const OperandVector &Operands) { |
8858 | OptionalImmIndexMap OptionalIdx; |
8859 | unsigned Opc = Inst.getOpcode(); |
8860 | unsigned I = 1; |
8861 | int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::cbsz); |
8862 | |
8863 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
8864 | |
8865 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) |
8866 | static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, N: 1); |
8867 | |
8868 | for (unsigned E = Operands.size(); I != E; ++I) { |
8869 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]); |
8870 | int NumOperands = Inst.getNumOperands(); |
8871 | // The order of operands in MCInst and parsed operands are different. |
8872 | // Adding dummy cbsz and blgp operands at corresponding MCInst operand |
8873 | // indices for parsing scale values correctly. |
8874 | if (NumOperands == CbszOpIdx) { |
8875 | Inst.addOperand(Op: MCOperand::createImm(Val: 0)); |
8876 | Inst.addOperand(Op: MCOperand::createImm(Val: 0)); |
8877 | } |
8878 | if (isRegOrImmWithInputMods(Desc, OpNum: NumOperands)) { |
8879 | Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2); |
8880 | } else if (Op.isImmModifier()) { |
8881 | OptionalIdx[Op.getImmTy()] = I; |
8882 | } else { |
8883 | Op.addRegOrImmOperands(Inst, N: 1); |
8884 | } |
8885 | } |
8886 | |
8887 | // Insert CBSZ and BLGP operands for F8F6F4 variants |
8888 | auto CbszIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyCBSZ); |
8889 | if (CbszIdx != OptionalIdx.end()) { |
8890 | int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm(); |
8891 | Inst.getOperand(i: CbszOpIdx).setImm(CbszVal); |
8892 | } |
8893 | |
8894 | int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::blgp); |
8895 | auto BlgpIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyBLGP); |
8896 | if (BlgpIdx != OptionalIdx.end()) { |
8897 | int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm(); |
8898 | Inst.getOperand(i: BlgpOpIdx).setImm(BlgpVal); |
8899 | } |
8900 | |
8901 | // Add dummy src_modifiers |
8902 | Inst.addOperand(Op: MCOperand::createImm(Val: 0)); |
8903 | Inst.addOperand(Op: MCOperand::createImm(Val: 0)); |
8904 | |
8905 | // Handle op_sel fields |
8906 | |
8907 | unsigned OpSel = 0; |
8908 | auto OpselIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyOpSel); |
8909 | if (OpselIdx != OptionalIdx.end()) { |
8910 | OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second]) |
8911 | .getImm(); |
8912 | } |
8913 | |
8914 | unsigned OpSelHi = 0; |
8915 | auto OpselHiIdx = OptionalIdx.find(x: AMDGPUOperand::ImmTyOpSelHi); |
8916 | if (OpselHiIdx != OptionalIdx.end()) { |
8917 | OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second]) |
8918 | .getImm(); |
8919 | } |
8920 | const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers, |
8921 | AMDGPU::OpName::src1_modifiers}; |
8922 | |
8923 | for (unsigned J = 0; J < 2; ++J) { |
8924 | unsigned ModVal = 0; |
8925 | if (OpSel & (1 << J)) |
8926 | ModVal |= SISrcMods::OP_SEL_0; |
8927 | if (OpSelHi & (1 << J)) |
8928 | ModVal |= SISrcMods::OP_SEL_1; |
8929 | |
8930 | const int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]); |
8931 | Inst.getOperand(i: ModIdx).setImm(ModVal); |
8932 | } |
8933 | } |
8934 | |
8935 | void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, |
8936 | OptionalImmIndexMap &OptionalIdx) { |
8937 | unsigned Opc = Inst.getOpcode(); |
8938 | |
8939 | unsigned I = 1; |
8940 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
8941 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { |
8942 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1); |
8943 | } |
8944 | |
8945 | for (unsigned E = Operands.size(); I != E; ++I) { |
8946 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); |
8947 | if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
8948 | Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2); |
8949 | } else if (Op.isImmModifier()) { |
8950 | OptionalIdx[Op.getImmTy()] = I; |
8951 | } else { |
8952 | Op.addRegOrImmOperands(Inst, N: 1); |
8953 | } |
8954 | } |
8955 | |
8956 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) { |
8957 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in)) |
8958 | Inst.addOperand(Op: Inst.getOperand(i: 0)); |
8959 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
8960 | ImmT: AMDGPUOperand::ImmTyByteSel); |
8961 | } |
8962 | |
8963 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp)) |
8964 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
8965 | ImmT: AMDGPUOperand::ImmTyClamp); |
8966 | |
8967 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod)) |
8968 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
8969 | ImmT: AMDGPUOperand::ImmTyOModSI); |
8970 | |
8971 | // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): |
8972 | // it has src2 register operand that is tied to dst operand |
8973 | // we don't allow modifiers for this operand in assembler so src2_modifiers |
8974 | // should be 0. |
8975 | if (isMAC(Opc)) { |
8976 | auto *it = Inst.begin(); |
8977 | std::advance(i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2_modifiers)); |
8978 | it = Inst.insert(I: it, Op: MCOperand::createImm(Val: 0)); // no modifiers for src2 |
8979 | ++it; |
8980 | // Copy the operand to ensure it's not invalidated when Inst grows. |
8981 | Inst.insert(I: it, Op: MCOperand(Inst.getOperand(i: 0))); // src2 = dst |
8982 | } |
8983 | } |
8984 | |
8985 | void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { |
8986 | OptionalImmIndexMap OptionalIdx; |
8987 | cvtVOP3(Inst, Operands, OptionalIdx); |
8988 | } |
8989 | |
8990 | void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, |
8991 | OptionalImmIndexMap &OptIdx) { |
8992 | const int Opc = Inst.getOpcode(); |
8993 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
8994 | |
8995 | const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; |
8996 | |
8997 | if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi || |
8998 | Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi || |
8999 | Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || |
9000 | Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || |
9001 | Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 || |
9002 | Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) { |
9003 | Inst.addOperand(Op: MCOperand::createImm(Val: 0)); // Placeholder for src2_mods |
9004 | Inst.addOperand(Op: Inst.getOperand(i: 0)); |
9005 | } |
9006 | |
9007 | // Adding vdst_in operand is already covered for these DPP instructions in |
9008 | // cvtVOP3DPP. |
9009 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in) && |
9010 | !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 || |
9011 | Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 || |
9012 | Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 || |
9013 | Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 || |
9014 | Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 || |
9015 | Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 || |
9016 | Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 || |
9017 | Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 || |
9018 | Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 || |
9019 | Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || |
9020 | Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || |
9021 | Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) { |
9022 | Inst.addOperand(Op: Inst.getOperand(i: 0)); |
9023 | } |
9024 | |
9025 | int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::bitop3); |
9026 | if (BitOp3Idx != -1) { |
9027 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyBitOp3); |
9028 | } |
9029 | |
9030 | // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 |
9031 | // instruction, and then figure out where to actually put the modifiers |
9032 | |
9033 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel); |
9034 | if (OpSelIdx != -1) { |
9035 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSel); |
9036 | } |
9037 | |
9038 | int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::op_sel_hi); |
9039 | if (OpSelHiIdx != -1) { |
9040 | int DefaultVal = IsPacked ? -1 : 0; |
9041 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSelHi, |
9042 | Default: DefaultVal); |
9043 | } |
9044 | |
9045 | int NegLoIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::neg_lo); |
9046 | if (NegLoIdx != -1) |
9047 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegLo); |
9048 | |
9049 | int NegHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::neg_hi); |
9050 | if (NegHiIdx != -1) |
9051 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegHi); |
9052 | |
9053 | const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1, |
9054 | AMDGPU::OpName::src2}; |
9055 | const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers, |
9056 | AMDGPU::OpName::src1_modifiers, |
9057 | AMDGPU::OpName::src2_modifiers}; |
9058 | |
9059 | unsigned OpSel = 0; |
9060 | unsigned OpSelHi = 0; |
9061 | unsigned NegLo = 0; |
9062 | unsigned NegHi = 0; |
9063 | |
9064 | if (OpSelIdx != -1) |
9065 | OpSel = Inst.getOperand(i: OpSelIdx).getImm(); |
9066 | |
9067 | if (OpSelHiIdx != -1) |
9068 | OpSelHi = Inst.getOperand(i: OpSelHiIdx).getImm(); |
9069 | |
9070 | if (NegLoIdx != -1) |
9071 | NegLo = Inst.getOperand(i: NegLoIdx).getImm(); |
9072 | |
9073 | if (NegHiIdx != -1) |
9074 | NegHi = Inst.getOperand(i: NegHiIdx).getImm(); |
9075 | |
9076 | for (int J = 0; J < 3; ++J) { |
9077 | int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: Ops[J]); |
9078 | if (OpIdx == -1) |
9079 | break; |
9080 | |
9081 | int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]); |
9082 | |
9083 | if (ModIdx == -1) |
9084 | continue; |
9085 | |
9086 | uint32_t ModVal = 0; |
9087 | |
9088 | const MCOperand &SrcOp = Inst.getOperand(i: OpIdx); |
9089 | if (SrcOp.isReg() && getMRI() |
9090 | ->getRegClass(i: AMDGPU::VGPR_16RegClassID) |
9091 | .contains(Reg: SrcOp.getReg())) { |
9092 | bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Reg: SrcOp.getReg(), MRI: *getMRI()); |
9093 | if (VGPRSuffixIsHi) |
9094 | ModVal |= SISrcMods::OP_SEL_0; |
9095 | } else { |
9096 | if ((OpSel & (1 << J)) != 0) |
9097 | ModVal |= SISrcMods::OP_SEL_0; |
9098 | } |
9099 | |
9100 | if ((OpSelHi & (1 << J)) != 0) |
9101 | ModVal |= SISrcMods::OP_SEL_1; |
9102 | |
9103 | if ((NegLo & (1 << J)) != 0) |
9104 | ModVal |= SISrcMods::NEG; |
9105 | |
9106 | if ((NegHi & (1 << J)) != 0) |
9107 | ModVal |= SISrcMods::NEG_HI; |
9108 | |
9109 | Inst.getOperand(i: ModIdx).setImm(Inst.getOperand(i: ModIdx).getImm() | ModVal); |
9110 | } |
9111 | } |
9112 | |
9113 | void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { |
9114 | OptionalImmIndexMap OptIdx; |
9115 | cvtVOP3(Inst, Operands, OptionalIdx&: OptIdx); |
9116 | cvtVOP3P(Inst, Operands, OptIdx); |
9117 | } |
9118 | |
9119 | static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, |
9120 | unsigned i, unsigned Opc, |
9121 | AMDGPU::OpName OpName) { |
9122 | if (AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName) != -1) |
9123 | ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, N: 2); |
9124 | else |
9125 | ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, N: 1); |
9126 | } |
9127 | |
9128 | void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) { |
9129 | unsigned Opc = Inst.getOpcode(); |
9130 | |
9131 | ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, N: 1); |
9132 | addSrcModifiersAndSrc(Inst, Operands, i: 2, Opc, OpName: AMDGPU::OpName::src0_modifiers); |
9133 | addSrcModifiersAndSrc(Inst, Operands, i: 3, Opc, OpName: AMDGPU::OpName::src1_modifiers); |
9134 | ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, N: 1); // srcTiedDef |
9135 | ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, N: 1); // src2 |
9136 | |
9137 | OptionalImmIndexMap OptIdx; |
9138 | for (unsigned i = 5; i < Operands.size(); ++i) { |
9139 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
9140 | OptIdx[Op.getImmTy()] = i; |
9141 | } |
9142 | |
9143 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_8bit)) |
9144 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, |
9145 | ImmT: AMDGPUOperand::ImmTyIndexKey8bit); |
9146 | |
9147 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_16bit)) |
9148 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, |
9149 | ImmT: AMDGPUOperand::ImmTyIndexKey16bit); |
9150 | |
9151 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp)) |
9152 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyClamp); |
9153 | |
9154 | cvtVOP3P(Inst, Operands, OptIdx); |
9155 | } |
9156 | |
9157 | //===----------------------------------------------------------------------===// |
9158 | // VOPD |
9159 | //===----------------------------------------------------------------------===// |
9160 | |
9161 | ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { |
9162 | if (!hasVOPD(STI: getSTI())) |
9163 | return ParseStatus::NoMatch; |
9164 | |
9165 | if (isToken(Kind: AsmToken::Colon) && peekToken(ShouldSkipSpace: false).is(K: AsmToken::Colon)) { |
9166 | SMLoc S = getLoc(); |
9167 | lex(); |
9168 | lex(); |
9169 | Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "::" , Loc: S)); |
9170 | SMLoc OpYLoc = getLoc(); |
9171 | StringRef OpYName; |
9172 | if (isToken(Kind: AsmToken::Identifier) && !Parser.parseIdentifier(Res&: OpYName)) { |
9173 | Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: OpYName, Loc: OpYLoc)); |
9174 | return ParseStatus::Success; |
9175 | } |
9176 | return Error(L: OpYLoc, Msg: "expected a VOPDY instruction after ::" ); |
9177 | } |
9178 | return ParseStatus::NoMatch; |
9179 | } |
9180 | |
9181 | // Create VOPD MCInst operands using parsed assembler operands. |
9182 | void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { |
9183 | auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer |
9184 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); |
9185 | if (Op.isReg()) { |
9186 | Op.addRegOperands(Inst, N: 1); |
9187 | return; |
9188 | } |
9189 | if (Op.isImm()) { |
9190 | Op.addImmOperands(Inst, N: 1); |
9191 | return; |
9192 | } |
9193 | llvm_unreachable("Unhandled operand type in cvtVOPD" ); |
9194 | }; |
9195 | |
9196 | const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Inst.getOpcode(), InstrInfo: &MII); |
9197 | |
9198 | // MCInst operands are ordered as follows: |
9199 | // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] |
9200 | |
9201 | for (auto CompIdx : VOPD::COMPONENTS) { |
9202 | addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands()); |
9203 | } |
9204 | |
9205 | for (auto CompIdx : VOPD::COMPONENTS) { |
9206 | const auto &CInfo = InstInfo[CompIdx]; |
9207 | auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum(); |
9208 | for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx) |
9209 | addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx)); |
9210 | if (CInfo.hasSrc2Acc()) |
9211 | addOp(CInfo.getIndexOfDstInParsedOperands()); |
9212 | } |
9213 | } |
9214 | |
9215 | //===----------------------------------------------------------------------===// |
9216 | // dpp |
9217 | //===----------------------------------------------------------------------===// |
9218 | |
9219 | bool AMDGPUOperand::isDPP8() const { |
9220 | return isImmTy(ImmT: ImmTyDPP8); |
9221 | } |
9222 | |
9223 | bool AMDGPUOperand::isDPPCtrl() const { |
9224 | using namespace AMDGPU::DPP; |
9225 | |
9226 | bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(x: getImm()); |
9227 | if (result) { |
9228 | int64_t Imm = getImm(); |
9229 | return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || |
9230 | (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || |
9231 | (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || |
9232 | (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || |
9233 | (Imm == DppCtrl::WAVE_SHL1) || |
9234 | (Imm == DppCtrl::WAVE_ROL1) || |
9235 | (Imm == DppCtrl::WAVE_SHR1) || |
9236 | (Imm == DppCtrl::WAVE_ROR1) || |
9237 | (Imm == DppCtrl::ROW_MIRROR) || |
9238 | (Imm == DppCtrl::ROW_HALF_MIRROR) || |
9239 | (Imm == DppCtrl::BCAST15) || |
9240 | (Imm == DppCtrl::BCAST31) || |
9241 | (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || |
9242 | (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); |
9243 | } |
9244 | return false; |
9245 | } |
9246 | |
9247 | //===----------------------------------------------------------------------===// |
9248 | // mAI |
9249 | //===----------------------------------------------------------------------===// |
9250 | |
9251 | bool AMDGPUOperand::isBLGP() const { |
9252 | return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(x: getImm()); |
9253 | } |
9254 | |
9255 | bool AMDGPUOperand::isS16Imm() const { |
9256 | return isImmLiteral() && (isInt<16>(x: getImm()) || isUInt<16>(x: getImm())); |
9257 | } |
9258 | |
9259 | bool AMDGPUOperand::isU16Imm() const { |
9260 | return isImmLiteral() && isUInt<16>(x: getImm()); |
9261 | } |
9262 | |
9263 | //===----------------------------------------------------------------------===// |
9264 | // dim |
9265 | //===----------------------------------------------------------------------===// |
9266 | |
9267 | bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { |
9268 | // We want to allow "dim:1D" etc., |
9269 | // but the initial 1 is tokenized as an integer. |
9270 | std::string Token; |
9271 | if (isToken(Kind: AsmToken::Integer)) { |
9272 | SMLoc Loc = getToken().getEndLoc(); |
9273 | Token = std::string(getTokenStr()); |
9274 | lex(); |
9275 | if (getLoc() != Loc) |
9276 | return false; |
9277 | } |
9278 | |
9279 | StringRef Suffix; |
9280 | if (!parseId(Val&: Suffix)) |
9281 | return false; |
9282 | Token += Suffix; |
9283 | |
9284 | StringRef DimId = Token; |
9285 | DimId.consume_front(Prefix: "SQ_RSRC_IMG_" ); |
9286 | |
9287 | const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(AsmSuffix: DimId); |
9288 | if (!DimInfo) |
9289 | return false; |
9290 | |
9291 | Encoding = DimInfo->Encoding; |
9292 | return true; |
9293 | } |
9294 | |
9295 | ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) { |
9296 | if (!isGFX10Plus()) |
9297 | return ParseStatus::NoMatch; |
9298 | |
9299 | SMLoc S = getLoc(); |
9300 | |
9301 | if (!trySkipId(Id: "dim" , Kind: AsmToken::Colon)) |
9302 | return ParseStatus::NoMatch; |
9303 | |
9304 | unsigned Encoding; |
9305 | SMLoc Loc = getLoc(); |
9306 | if (!parseDimId(Encoding)) |
9307 | return Error(L: Loc, Msg: "invalid dim value" ); |
9308 | |
9309 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Encoding, Loc: S, |
9310 | Type: AMDGPUOperand::ImmTyDim)); |
9311 | return ParseStatus::Success; |
9312 | } |
9313 | |
9314 | //===----------------------------------------------------------------------===// |
9315 | // dpp |
9316 | //===----------------------------------------------------------------------===// |
9317 | |
9318 | ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { |
9319 | SMLoc S = getLoc(); |
9320 | |
9321 | if (!isGFX10Plus() || !trySkipId(Id: "dpp8" , Kind: AsmToken::Colon)) |
9322 | return ParseStatus::NoMatch; |
9323 | |
9324 | // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] |
9325 | |
9326 | int64_t Sels[8]; |
9327 | |
9328 | if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket" )) |
9329 | return ParseStatus::Failure; |
9330 | |
9331 | for (size_t i = 0; i < 8; ++i) { |
9332 | if (i > 0 && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma" )) |
9333 | return ParseStatus::Failure; |
9334 | |
9335 | SMLoc Loc = getLoc(); |
9336 | if (getParser().parseAbsoluteExpression(Res&: Sels[i])) |
9337 | return ParseStatus::Failure; |
9338 | if (0 > Sels[i] || 7 < Sels[i]) |
9339 | return Error(L: Loc, Msg: "expected a 3-bit value" ); |
9340 | } |
9341 | |
9342 | if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket" )) |
9343 | return ParseStatus::Failure; |
9344 | |
9345 | unsigned DPP8 = 0; |
9346 | for (size_t i = 0; i < 8; ++i) |
9347 | DPP8 |= (Sels[i] << (i * 3)); |
9348 | |
9349 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DPP8, Loc: S, Type: AMDGPUOperand::ImmTyDPP8)); |
9350 | return ParseStatus::Success; |
9351 | } |
9352 | |
9353 | bool |
9354 | AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, |
9355 | const OperandVector &Operands) { |
9356 | if (Ctrl == "row_newbcast" ) |
9357 | return isGFX90A(); |
9358 | |
9359 | if (Ctrl == "row_share" || |
9360 | Ctrl == "row_xmask" ) |
9361 | return isGFX10Plus(); |
9362 | |
9363 | if (Ctrl == "wave_shl" || |
9364 | Ctrl == "wave_shr" || |
9365 | Ctrl == "wave_rol" || |
9366 | Ctrl == "wave_ror" || |
9367 | Ctrl == "row_bcast" ) |
9368 | return isVI() || isGFX9(); |
9369 | |
9370 | return Ctrl == "row_mirror" || |
9371 | Ctrl == "row_half_mirror" || |
9372 | Ctrl == "quad_perm" || |
9373 | Ctrl == "row_shl" || |
9374 | Ctrl == "row_shr" || |
9375 | Ctrl == "row_ror" ; |
9376 | } |
9377 | |
9378 | int64_t |
9379 | AMDGPUAsmParser::parseDPPCtrlPerm() { |
9380 | // quad_perm:[%d,%d,%d,%d] |
9381 | |
9382 | if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket" )) |
9383 | return -1; |
9384 | |
9385 | int64_t Val = 0; |
9386 | for (int i = 0; i < 4; ++i) { |
9387 | if (i > 0 && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma" )) |
9388 | return -1; |
9389 | |
9390 | int64_t Temp; |
9391 | SMLoc Loc = getLoc(); |
9392 | if (getParser().parseAbsoluteExpression(Res&: Temp)) |
9393 | return -1; |
9394 | if (Temp < 0 || Temp > 3) { |
9395 | Error(L: Loc, Msg: "expected a 2-bit value" ); |
9396 | return -1; |
9397 | } |
9398 | |
9399 | Val += (Temp << i * 2); |
9400 | } |
9401 | |
9402 | if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket" )) |
9403 | return -1; |
9404 | |
9405 | return Val; |
9406 | } |
9407 | |
9408 | int64_t |
9409 | AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { |
9410 | using namespace AMDGPU::DPP; |
9411 | |
9412 | // sel:%d |
9413 | |
9414 | int64_t Val; |
9415 | SMLoc Loc = getLoc(); |
9416 | |
9417 | if (getParser().parseAbsoluteExpression(Res&: Val)) |
9418 | return -1; |
9419 | |
9420 | struct DppCtrlCheck { |
9421 | int64_t Ctrl; |
9422 | int Lo; |
9423 | int Hi; |
9424 | }; |
9425 | |
9426 | DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) |
9427 | .Case(S: "wave_shl" , Value: {.Ctrl: DppCtrl::WAVE_SHL1, .Lo: 1, .Hi: 1}) |
9428 | .Case(S: "wave_rol" , Value: {.Ctrl: DppCtrl::WAVE_ROL1, .Lo: 1, .Hi: 1}) |
9429 | .Case(S: "wave_shr" , Value: {.Ctrl: DppCtrl::WAVE_SHR1, .Lo: 1, .Hi: 1}) |
9430 | .Case(S: "wave_ror" , Value: {.Ctrl: DppCtrl::WAVE_ROR1, .Lo: 1, .Hi: 1}) |
9431 | .Case(S: "row_shl" , Value: {.Ctrl: DppCtrl::ROW_SHL0, .Lo: 1, .Hi: 15}) |
9432 | .Case(S: "row_shr" , Value: {.Ctrl: DppCtrl::ROW_SHR0, .Lo: 1, .Hi: 15}) |
9433 | .Case(S: "row_ror" , Value: {.Ctrl: DppCtrl::ROW_ROR0, .Lo: 1, .Hi: 15}) |
9434 | .Case(S: "row_share" , Value: {.Ctrl: DppCtrl::ROW_SHARE_FIRST, .Lo: 0, .Hi: 15}) |
9435 | .Case(S: "row_xmask" , Value: {.Ctrl: DppCtrl::ROW_XMASK_FIRST, .Lo: 0, .Hi: 15}) |
9436 | .Case(S: "row_newbcast" , Value: {.Ctrl: DppCtrl::ROW_NEWBCAST_FIRST, .Lo: 0, .Hi: 15}) |
9437 | .Default(Value: {.Ctrl: -1, .Lo: 0, .Hi: 0}); |
9438 | |
9439 | bool Valid; |
9440 | if (Check.Ctrl == -1) { |
9441 | Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); |
9442 | Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; |
9443 | } else { |
9444 | Valid = Check.Lo <= Val && Val <= Check.Hi; |
9445 | Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); |
9446 | } |
9447 | |
9448 | if (!Valid) { |
9449 | Error(L: Loc, Msg: Twine("invalid " , Ctrl) + Twine(" value" )); |
9450 | return -1; |
9451 | } |
9452 | |
9453 | return Val; |
9454 | } |
9455 | |
9456 | ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { |
9457 | using namespace AMDGPU::DPP; |
9458 | |
9459 | if (!isToken(Kind: AsmToken::Identifier) || |
9460 | !isSupportedDPPCtrl(Ctrl: getTokenStr(), Operands)) |
9461 | return ParseStatus::NoMatch; |
9462 | |
9463 | SMLoc S = getLoc(); |
9464 | int64_t Val = -1; |
9465 | StringRef Ctrl; |
9466 | |
9467 | parseId(Val&: Ctrl); |
9468 | |
9469 | if (Ctrl == "row_mirror" ) { |
9470 | Val = DppCtrl::ROW_MIRROR; |
9471 | } else if (Ctrl == "row_half_mirror" ) { |
9472 | Val = DppCtrl::ROW_HALF_MIRROR; |
9473 | } else { |
9474 | if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon" )) { |
9475 | if (Ctrl == "quad_perm" ) { |
9476 | Val = parseDPPCtrlPerm(); |
9477 | } else { |
9478 | Val = parseDPPCtrlSel(Ctrl); |
9479 | } |
9480 | } |
9481 | } |
9482 | |
9483 | if (Val == -1) |
9484 | return ParseStatus::Failure; |
9485 | |
9486 | Operands.push_back( |
9487 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: AMDGPUOperand::ImmTyDppCtrl)); |
9488 | return ParseStatus::Success; |
9489 | } |
9490 | |
9491 | void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, |
9492 | bool IsDPP8) { |
9493 | OptionalImmIndexMap OptionalIdx; |
9494 | unsigned Opc = Inst.getOpcode(); |
9495 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
9496 | |
9497 | // MAC instructions are special because they have 'old' |
9498 | // operand which is not tied to dst (but assumed to be). |
9499 | // They also have dummy unused src2_modifiers. |
9500 | int OldIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::old); |
9501 | int Src2ModIdx = |
9502 | AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::src2_modifiers); |
9503 | bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 && |
9504 | Desc.getOperandConstraint(OpNum: OldIdx, Constraint: MCOI::TIED_TO) == -1; |
9505 | |
9506 | unsigned I = 1; |
9507 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { |
9508 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1); |
9509 | } |
9510 | |
9511 | int Fi = 0; |
9512 | int VdstInIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst_in); |
9513 | bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 || |
9514 | Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || |
9515 | Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || |
9516 | Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12; |
9517 | |
9518 | for (unsigned E = Operands.size(); I != E; ++I) { |
9519 | |
9520 | if (IsMAC) { |
9521 | int NumOperands = Inst.getNumOperands(); |
9522 | if (OldIdx == NumOperands) { |
9523 | // Handle old operand |
9524 | constexpr int DST_IDX = 0; |
9525 | Inst.addOperand(Op: Inst.getOperand(i: DST_IDX)); |
9526 | } else if (Src2ModIdx == NumOperands) { |
9527 | // Add unused dummy src2_modifiers |
9528 | Inst.addOperand(Op: MCOperand::createImm(Val: 0)); |
9529 | } |
9530 | } |
9531 | |
9532 | if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) { |
9533 | Inst.addOperand(Op: Inst.getOperand(i: 0)); |
9534 | } |
9535 | |
9536 | if (IsVOP3CvtSrDpp) { |
9537 | if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) { |
9538 | Inst.addOperand(Op: MCOperand::createImm(Val: 0)); |
9539 | Inst.addOperand(Op: MCOperand::createReg(Reg: MCRegister())); |
9540 | } |
9541 | } |
9542 | |
9543 | auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(), |
9544 | Constraint: MCOI::TIED_TO); |
9545 | if (TiedTo != -1) { |
9546 | assert((unsigned)TiedTo < Inst.getNumOperands()); |
9547 | // handle tied old or src2 for MAC instructions |
9548 | Inst.addOperand(Op: Inst.getOperand(i: TiedTo)); |
9549 | } |
9550 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); |
9551 | // Add the register arguments |
9552 | if (IsDPP8 && Op.isDppFI()) { |
9553 | Fi = Op.getImm(); |
9554 | } else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
9555 | Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2); |
9556 | } else if (Op.isReg()) { |
9557 | Op.addRegOperands(Inst, N: 1); |
9558 | } else if (Op.isImm() && |
9559 | Desc.operands()[Inst.getNumOperands()].RegClass != -1) { |
9560 | assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP" ); |
9561 | Op.addImmOperands(Inst, N: 1); |
9562 | } else if (Op.isImm()) { |
9563 | OptionalIdx[Op.getImmTy()] = I; |
9564 | } else { |
9565 | llvm_unreachable("unhandled operand type" ); |
9566 | } |
9567 | } |
9568 | |
9569 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) |
9570 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9571 | ImmT: AMDGPUOperand::ImmTyByteSel); |
9572 | |
9573 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp)) |
9574 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9575 | ImmT: AMDGPUOperand::ImmTyClamp); |
9576 | |
9577 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod)) |
9578 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI); |
9579 | |
9580 | if (Desc.TSFlags & SIInstrFlags::VOP3P) |
9581 | cvtVOP3P(Inst, Operands, OptIdx&: OptionalIdx); |
9582 | else if (Desc.TSFlags & SIInstrFlags::VOP3) |
9583 | cvtVOP3OpSel(Inst, Operands, OptionalIdx); |
9584 | else if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) { |
9585 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel); |
9586 | } |
9587 | |
9588 | if (IsDPP8) { |
9589 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDPP8); |
9590 | using namespace llvm::AMDGPU::DPP; |
9591 | Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0)); |
9592 | } else { |
9593 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppCtrl, Default: 0xe4); |
9594 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: 0xf); |
9595 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: 0xf); |
9596 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl); |
9597 | |
9598 | if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi)) |
9599 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9600 | ImmT: AMDGPUOperand::ImmTyDppFI); |
9601 | } |
9602 | } |
9603 | |
9604 | void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { |
9605 | OptionalImmIndexMap OptionalIdx; |
9606 | |
9607 | unsigned I = 1; |
9608 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
9609 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { |
9610 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1); |
9611 | } |
9612 | |
9613 | int Fi = 0; |
9614 | for (unsigned E = Operands.size(); I != E; ++I) { |
9615 | auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(), |
9616 | Constraint: MCOI::TIED_TO); |
9617 | if (TiedTo != -1) { |
9618 | assert((unsigned)TiedTo < Inst.getNumOperands()); |
9619 | // handle tied old or src2 for MAC instructions |
9620 | Inst.addOperand(Op: Inst.getOperand(i: TiedTo)); |
9621 | } |
9622 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); |
9623 | // Add the register arguments |
9624 | if (Op.isReg() && validateVccOperand(Reg: Op.getReg())) { |
9625 | // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. |
9626 | // Skip it. |
9627 | continue; |
9628 | } |
9629 | |
9630 | if (IsDPP8) { |
9631 | if (Op.isDPP8()) { |
9632 | Op.addImmOperands(Inst, N: 1); |
9633 | } else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
9634 | Op.addRegWithFPInputModsOperands(Inst, N: 2); |
9635 | } else if (Op.isDppFI()) { |
9636 | Fi = Op.getImm(); |
9637 | } else if (Op.isReg()) { |
9638 | Op.addRegOperands(Inst, N: 1); |
9639 | } else { |
9640 | llvm_unreachable("Invalid operand type" ); |
9641 | } |
9642 | } else { |
9643 | if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
9644 | Op.addRegWithFPInputModsOperands(Inst, N: 2); |
9645 | } else if (Op.isReg()) { |
9646 | Op.addRegOperands(Inst, N: 1); |
9647 | } else if (Op.isDPPCtrl()) { |
9648 | Op.addImmOperands(Inst, N: 1); |
9649 | } else if (Op.isImm()) { |
9650 | // Handle optional arguments |
9651 | OptionalIdx[Op.getImmTy()] = I; |
9652 | } else { |
9653 | llvm_unreachable("Invalid operand type" ); |
9654 | } |
9655 | } |
9656 | } |
9657 | |
9658 | if (IsDPP8) { |
9659 | using namespace llvm::AMDGPU::DPP; |
9660 | Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0)); |
9661 | } else { |
9662 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: 0xf); |
9663 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: 0xf); |
9664 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl); |
9665 | if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi)) { |
9666 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9667 | ImmT: AMDGPUOperand::ImmTyDppFI); |
9668 | } |
9669 | } |
9670 | } |
9671 | |
9672 | //===----------------------------------------------------------------------===// |
9673 | // sdwa |
9674 | //===----------------------------------------------------------------------===// |
9675 | |
9676 | ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, |
9677 | StringRef Prefix, |
9678 | AMDGPUOperand::ImmTy Type) { |
9679 | return parseStringOrIntWithPrefix( |
9680 | Operands, Name: Prefix, |
9681 | Ids: {"BYTE_0" , "BYTE_1" , "BYTE_2" , "BYTE_3" , "WORD_0" , "WORD_1" , "DWORD" }, |
9682 | Type); |
9683 | } |
9684 | |
9685 | ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { |
9686 | return parseStringOrIntWithPrefix( |
9687 | Operands, Name: "dst_unused" , Ids: {"UNUSED_PAD" , "UNUSED_SEXT" , "UNUSED_PRESERVE" }, |
9688 | Type: AMDGPUOperand::ImmTySDWADstUnused); |
9689 | } |
9690 | |
9691 | void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { |
9692 | cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP1); |
9693 | } |
9694 | |
9695 | void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { |
9696 | cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2); |
9697 | } |
9698 | |
9699 | void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { |
9700 | cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: true, SkipSrcVcc: true); |
9701 | } |
9702 | |
9703 | void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { |
9704 | cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: false, SkipSrcVcc: true); |
9705 | } |
9706 | |
9707 | void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { |
9708 | cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOPC, SkipDstVcc: isVI()); |
9709 | } |
9710 | |
9711 | void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, |
9712 | uint64_t BasicInstType, |
9713 | bool SkipDstVcc, |
9714 | bool SkipSrcVcc) { |
9715 | using namespace llvm::AMDGPU::SDWA; |
9716 | |
9717 | OptionalImmIndexMap OptionalIdx; |
9718 | bool SkipVcc = SkipDstVcc || SkipSrcVcc; |
9719 | bool SkippedVcc = false; |
9720 | |
9721 | unsigned I = 1; |
9722 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
9723 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { |
9724 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1); |
9725 | } |
9726 | |
9727 | for (unsigned E = Operands.size(); I != E; ++I) { |
9728 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); |
9729 | if (SkipVcc && !SkippedVcc && Op.isReg() && |
9730 | (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { |
9731 | // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. |
9732 | // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) |
9733 | // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. |
9734 | // Skip VCC only if we didn't skip it on previous iteration. |
9735 | // Note that src0 and src1 occupy 2 slots each because of modifiers. |
9736 | if (BasicInstType == SIInstrFlags::VOP2 && |
9737 | ((SkipDstVcc && Inst.getNumOperands() == 1) || |
9738 | (SkipSrcVcc && Inst.getNumOperands() == 5))) { |
9739 | SkippedVcc = true; |
9740 | continue; |
9741 | } |
9742 | if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) { |
9743 | SkippedVcc = true; |
9744 | continue; |
9745 | } |
9746 | } |
9747 | if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
9748 | Op.addRegOrImmWithInputModsOperands(Inst, N: 2); |
9749 | } else if (Op.isImm()) { |
9750 | // Handle optional arguments |
9751 | OptionalIdx[Op.getImmTy()] = I; |
9752 | } else { |
9753 | llvm_unreachable("Invalid operand type" ); |
9754 | } |
9755 | SkippedVcc = false; |
9756 | } |
9757 | |
9758 | const unsigned Opc = Inst.getOpcode(); |
9759 | if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && |
9760 | Opc != AMDGPU::V_NOP_sdwa_vi) { |
9761 | // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments |
9762 | switch (BasicInstType) { |
9763 | case SIInstrFlags::VOP1: |
9764 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp)) |
9765 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9766 | ImmT: AMDGPUOperand::ImmTyClamp, Default: 0); |
9767 | |
9768 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod)) |
9769 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9770 | ImmT: AMDGPUOperand::ImmTyOModSI, Default: 0); |
9771 | |
9772 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_sel)) |
9773 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9774 | ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD); |
9775 | |
9776 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_unused)) |
9777 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9778 | ImmT: AMDGPUOperand::ImmTySDWADstUnused, |
9779 | Default: DstUnused::UNUSED_PRESERVE); |
9780 | |
9781 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD); |
9782 | break; |
9783 | |
9784 | case SIInstrFlags::VOP2: |
9785 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9786 | ImmT: AMDGPUOperand::ImmTyClamp, Default: 0); |
9787 | |
9788 | if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::omod)) |
9789 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI, Default: 0); |
9790 | |
9791 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD); |
9792 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstUnused, Default: DstUnused::UNUSED_PRESERVE); |
9793 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD); |
9794 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD); |
9795 | break; |
9796 | |
9797 | case SIInstrFlags::VOPC: |
9798 | if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::clamp)) |
9799 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9800 | ImmT: AMDGPUOperand::ImmTyClamp, Default: 0); |
9801 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD); |
9802 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD); |
9803 | break; |
9804 | |
9805 | default: |
9806 | llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed" ); |
9807 | } |
9808 | } |
9809 | |
9810 | // special case v_mac_{f16, f32}: |
9811 | // it has src2 register operand that is tied to dst operand |
9812 | if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || |
9813 | Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { |
9814 | auto *it = Inst.begin(); |
9815 | std::advance( |
9816 | i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::src2)); |
9817 | Inst.insert(I: it, Op: Inst.getOperand(i: 0)); // src2 = dst |
9818 | } |
9819 | } |
9820 | |
9821 | /// Force static initialization. |
9822 | extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void |
9823 | LLVMInitializeAMDGPUAsmParser() { |
9824 | RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target()); |
9825 | RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); |
9826 | } |
9827 | |
9828 | #define GET_REGISTER_MATCHER |
9829 | #define GET_MATCHER_IMPLEMENTATION |
9830 | #define GET_MNEMONIC_SPELL_CHECKER |
9831 | #define GET_MNEMONIC_CHECKER |
9832 | #include "AMDGPUGenAsmMatcher.inc" |
9833 | |
9834 | ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, |
9835 | unsigned MCK) { |
9836 | switch (MCK) { |
9837 | case MCK_addr64: |
9838 | return parseTokenOp(Name: "addr64" , Operands); |
9839 | case MCK_done: |
9840 | return parseTokenOp(Name: "done" , Operands); |
9841 | case MCK_idxen: |
9842 | return parseTokenOp(Name: "idxen" , Operands); |
9843 | case MCK_lds: |
9844 | return parseTokenOp(Name: "lds" , Operands); |
9845 | case MCK_offen: |
9846 | return parseTokenOp(Name: "offen" , Operands); |
9847 | case MCK_off: |
9848 | return parseTokenOp(Name: "off" , Operands); |
9849 | case MCK_row_95_en: |
9850 | return parseTokenOp(Name: "row_en" , Operands); |
9851 | case MCK_gds: |
9852 | return parseNamedBit(Name: "gds" , Operands, ImmTy: AMDGPUOperand::ImmTyGDS); |
9853 | case MCK_tfe: |
9854 | return parseNamedBit(Name: "tfe" , Operands, ImmTy: AMDGPUOperand::ImmTyTFE); |
9855 | } |
9856 | return tryCustomParseOperand(Operands, MCK); |
9857 | } |
9858 | |
9859 | // This function should be defined after auto-generated include so that we have |
9860 | // MatchClassKind enum defined |
9861 | unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, |
9862 | unsigned Kind) { |
9863 | // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). |
9864 | // But MatchInstructionImpl() expects to meet token and fails to validate |
9865 | // operand. This method checks if we are given immediate operand but expect to |
9866 | // get corresponding token. |
9867 | AMDGPUOperand &Operand = (AMDGPUOperand&)Op; |
9868 | switch (Kind) { |
9869 | case MCK_addr64: |
9870 | return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; |
9871 | case MCK_gds: |
9872 | return Operand.isGDS() ? Match_Success : Match_InvalidOperand; |
9873 | case MCK_lds: |
9874 | return Operand.isLDS() ? Match_Success : Match_InvalidOperand; |
9875 | case MCK_idxen: |
9876 | return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; |
9877 | case MCK_offen: |
9878 | return Operand.isOffen() ? Match_Success : Match_InvalidOperand; |
9879 | case MCK_tfe: |
9880 | return Operand.isTFE() ? Match_Success : Match_InvalidOperand; |
9881 | case MCK_SSrc_b32: |
9882 | // When operands have expression values, they will return true for isToken, |
9883 | // because it is not possible to distinguish between a token and an |
9884 | // expression at parse time. MatchInstructionImpl() will always try to |
9885 | // match an operand as a token, when isToken returns true, and when the |
9886 | // name of the expression is not a valid token, the match will fail, |
9887 | // so we need to handle it here. |
9888 | return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand; |
9889 | case MCK_SSrc_f32: |
9890 | return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand; |
9891 | case MCK_SOPPBrTarget: |
9892 | return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand; |
9893 | case MCK_VReg32OrOff: |
9894 | return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; |
9895 | case MCK_InterpSlot: |
9896 | return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; |
9897 | case MCK_InterpAttr: |
9898 | return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; |
9899 | case MCK_InterpAttrChan: |
9900 | return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand; |
9901 | case MCK_SReg_64: |
9902 | case MCK_SReg_64_XEXEC: |
9903 | // Null is defined as a 32-bit register but |
9904 | // it should also be enabled with 64-bit operands or larger. |
9905 | // The following code enables it for SReg_64 and larger operands |
9906 | // used as source and destination. Remaining source |
9907 | // operands are handled in isInlinableImm. |
9908 | case MCK_SReg_96: |
9909 | case MCK_SReg_128: |
9910 | case MCK_SReg_256: |
9911 | case MCK_SReg_512: |
9912 | return Operand.isNull() ? Match_Success : Match_InvalidOperand; |
9913 | default: |
9914 | return Match_InvalidOperand; |
9915 | } |
9916 | } |
9917 | |
9918 | //===----------------------------------------------------------------------===// |
9919 | // endpgm |
9920 | //===----------------------------------------------------------------------===// |
9921 | |
9922 | ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) { |
9923 | SMLoc S = getLoc(); |
9924 | int64_t Imm = 0; |
9925 | |
9926 | if (!parseExpr(Imm)) { |
9927 | // The operand is optional, if not present default to 0 |
9928 | Imm = 0; |
9929 | } |
9930 | |
9931 | if (!isUInt<16>(x: Imm)) |
9932 | return Error(L: S, Msg: "expected a 16-bit value" ); |
9933 | |
9934 | Operands.push_back( |
9935 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyEndpgm)); |
9936 | return ParseStatus::Success; |
9937 | } |
9938 | |
9939 | bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmT: ImmTyEndpgm); } |
9940 | |
9941 | //===----------------------------------------------------------------------===// |
9942 | // Split Barrier |
9943 | //===----------------------------------------------------------------------===// |
9944 | |
9945 | bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(type: MVT::i32); } |
9946 | |