1 | //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AMDKernelCodeT.h" |
10 | #include "MCTargetDesc/AMDGPUMCExpr.h" |
11 | #include "MCTargetDesc/AMDGPUMCKernelDescriptor.h" |
12 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
13 | #include "MCTargetDesc/AMDGPUTargetStreamer.h" |
14 | #include "SIDefines.h" |
15 | #include "SIInstrInfo.h" |
16 | #include "SIRegisterInfo.h" |
17 | #include "TargetInfo/AMDGPUTargetInfo.h" |
18 | #include "Utils/AMDGPUAsmUtils.h" |
19 | #include "Utils/AMDGPUBaseInfo.h" |
20 | #include "Utils/AMDKernelCodeTUtils.h" |
21 | #include "llvm/ADT/APFloat.h" |
22 | #include "llvm/ADT/SmallBitVector.h" |
23 | #include "llvm/ADT/StringSet.h" |
24 | #include "llvm/ADT/Twine.h" |
25 | #include "llvm/BinaryFormat/ELF.h" |
26 | #include "llvm/CodeGenTypes/MachineValueType.h" |
27 | #include "llvm/MC/MCAsmInfo.h" |
28 | #include "llvm/MC/MCContext.h" |
29 | #include "llvm/MC/MCExpr.h" |
30 | #include "llvm/MC/MCInst.h" |
31 | #include "llvm/MC/MCInstrDesc.h" |
32 | #include "llvm/MC/MCParser/MCAsmLexer.h" |
33 | #include "llvm/MC/MCParser/MCAsmParser.h" |
34 | #include "llvm/MC/MCParser/MCParsedAsmOperand.h" |
35 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
36 | #include "llvm/MC/MCSymbol.h" |
37 | #include "llvm/MC/TargetRegistry.h" |
38 | #include "llvm/Support/AMDGPUMetadata.h" |
39 | #include "llvm/Support/AMDHSAKernelDescriptor.h" |
40 | #include "llvm/Support/Casting.h" |
41 | #include "llvm/Support/MathExtras.h" |
42 | #include "llvm/TargetParser/TargetParser.h" |
43 | #include <optional> |
44 | |
45 | using namespace llvm; |
46 | using namespace llvm::AMDGPU; |
47 | using namespace llvm::amdhsa; |
48 | |
49 | namespace { |
50 | |
51 | class AMDGPUAsmParser; |
52 | |
53 | enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; |
54 | |
55 | //===----------------------------------------------------------------------===// |
56 | // Operand |
57 | //===----------------------------------------------------------------------===// |
58 | |
59 | class AMDGPUOperand : public MCParsedAsmOperand { |
60 | enum KindTy { |
61 | Token, |
62 | Immediate, |
63 | Register, |
64 | Expression |
65 | } Kind; |
66 | |
67 | SMLoc StartLoc, EndLoc; |
68 | const AMDGPUAsmParser *AsmParser; |
69 | |
70 | public: |
71 | AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) |
72 | : Kind(Kind_), AsmParser(AsmParser_) {} |
73 | |
74 | using Ptr = std::unique_ptr<AMDGPUOperand>; |
75 | |
76 | struct Modifiers { |
77 | bool Abs = false; |
78 | bool Neg = false; |
79 | bool Sext = false; |
80 | bool Lit = false; |
81 | |
82 | bool hasFPModifiers() const { return Abs || Neg; } |
83 | bool hasIntModifiers() const { return Sext; } |
84 | bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } |
85 | |
86 | int64_t getFPModifiersOperand() const { |
87 | int64_t Operand = 0; |
88 | Operand |= Abs ? SISrcMods::ABS : 0u; |
89 | Operand |= Neg ? SISrcMods::NEG : 0u; |
90 | return Operand; |
91 | } |
92 | |
93 | int64_t getIntModifiersOperand() const { |
94 | int64_t Operand = 0; |
95 | Operand |= Sext ? SISrcMods::SEXT : 0u; |
96 | return Operand; |
97 | } |
98 | |
99 | int64_t getModifiersOperand() const { |
100 | assert(!(hasFPModifiers() && hasIntModifiers()) |
101 | && "fp and int modifiers should not be used simultaneously" ); |
102 | if (hasFPModifiers()) |
103 | return getFPModifiersOperand(); |
104 | if (hasIntModifiers()) |
105 | return getIntModifiersOperand(); |
106 | return 0; |
107 | } |
108 | |
109 | friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); |
110 | }; |
111 | |
112 | enum ImmTy { |
113 | ImmTyNone, |
114 | ImmTyGDS, |
115 | ImmTyLDS, |
116 | ImmTyOffen, |
117 | ImmTyIdxen, |
118 | ImmTyAddr64, |
119 | ImmTyOffset, |
120 | ImmTyInstOffset, |
121 | ImmTyOffset0, |
122 | ImmTyOffset1, |
123 | ImmTySMEMOffsetMod, |
124 | ImmTyCPol, |
125 | ImmTyTFE, |
126 | ImmTyD16, |
127 | ImmTyClamp, |
128 | ImmTyOModSI, |
129 | ImmTySDWADstSel, |
130 | ImmTySDWASrc0Sel, |
131 | ImmTySDWASrc1Sel, |
132 | ImmTySDWADstUnused, |
133 | ImmTyDMask, |
134 | ImmTyDim, |
135 | ImmTyUNorm, |
136 | ImmTyDA, |
137 | ImmTyR128A16, |
138 | ImmTyA16, |
139 | ImmTyLWE, |
140 | ImmTyExpTgt, |
141 | ImmTyExpCompr, |
142 | ImmTyExpVM, |
143 | ImmTyFORMAT, |
144 | ImmTyHwreg, |
145 | ImmTyOff, |
146 | ImmTySendMsg, |
147 | ImmTyInterpSlot, |
148 | ImmTyInterpAttr, |
149 | ImmTyInterpAttrChan, |
150 | ImmTyOpSel, |
151 | ImmTyOpSelHi, |
152 | ImmTyNegLo, |
153 | ImmTyNegHi, |
154 | ImmTyIndexKey8bit, |
155 | ImmTyIndexKey16bit, |
156 | ImmTyDPP8, |
157 | ImmTyDppCtrl, |
158 | ImmTyDppRowMask, |
159 | ImmTyDppBankMask, |
160 | ImmTyDppBoundCtrl, |
161 | ImmTyDppFI, |
162 | ImmTySwizzle, |
163 | ImmTyGprIdxMode, |
164 | ImmTyHigh, |
165 | ImmTyBLGP, |
166 | ImmTyCBSZ, |
167 | ImmTyABID, |
168 | ImmTyEndpgm, |
169 | ImmTyWaitVDST, |
170 | ImmTyWaitEXP, |
171 | ImmTyWaitVAVDst, |
172 | ImmTyWaitVMVSrc, |
173 | ImmTyByteSel, |
174 | }; |
175 | |
176 | // Immediate operand kind. |
177 | // It helps to identify the location of an offending operand after an error. |
178 | // Note that regular literals and mandatory literals (KImm) must be handled |
179 | // differently. When looking for an offending operand, we should usually |
180 | // ignore mandatory literals because they are part of the instruction and |
181 | // cannot be changed. Report location of mandatory operands only for VOPD, |
182 | // when both OpX and OpY have a KImm and there are no other literals. |
183 | enum ImmKindTy { |
184 | ImmKindTyNone, |
185 | ImmKindTyLiteral, |
186 | ImmKindTyMandatoryLiteral, |
187 | ImmKindTyConst, |
188 | }; |
189 | |
190 | private: |
191 | struct TokOp { |
192 | const char *Data; |
193 | unsigned Length; |
194 | }; |
195 | |
196 | struct ImmOp { |
197 | int64_t Val; |
198 | ImmTy Type; |
199 | bool IsFPImm; |
200 | mutable ImmKindTy Kind; |
201 | Modifiers Mods; |
202 | }; |
203 | |
204 | struct RegOp { |
205 | unsigned RegNo; |
206 | Modifiers Mods; |
207 | }; |
208 | |
209 | union { |
210 | TokOp Tok; |
211 | ImmOp Imm; |
212 | RegOp Reg; |
213 | const MCExpr *Expr; |
214 | }; |
215 | |
216 | public: |
217 | bool isToken() const override { return Kind == Token; } |
218 | |
219 | bool isSymbolRefExpr() const { |
220 | return isExpr() && Expr && isa<MCSymbolRefExpr>(Val: Expr); |
221 | } |
222 | |
223 | bool isImm() const override { |
224 | return Kind == Immediate; |
225 | } |
226 | |
227 | void setImmKindNone() const { |
228 | assert(isImm()); |
229 | Imm.Kind = ImmKindTyNone; |
230 | } |
231 | |
232 | void setImmKindLiteral() const { |
233 | assert(isImm()); |
234 | Imm.Kind = ImmKindTyLiteral; |
235 | } |
236 | |
237 | void setImmKindMandatoryLiteral() const { |
238 | assert(isImm()); |
239 | Imm.Kind = ImmKindTyMandatoryLiteral; |
240 | } |
241 | |
242 | void setImmKindConst() const { |
243 | assert(isImm()); |
244 | Imm.Kind = ImmKindTyConst; |
245 | } |
246 | |
247 | bool IsImmKindLiteral() const { |
248 | return isImm() && Imm.Kind == ImmKindTyLiteral; |
249 | } |
250 | |
251 | bool IsImmKindMandatoryLiteral() const { |
252 | return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral; |
253 | } |
254 | |
255 | bool isImmKindConst() const { |
256 | return isImm() && Imm.Kind == ImmKindTyConst; |
257 | } |
258 | |
259 | bool isInlinableImm(MVT type) const; |
260 | bool isLiteralImm(MVT type) const; |
261 | |
262 | bool isRegKind() const { |
263 | return Kind == Register; |
264 | } |
265 | |
266 | bool isReg() const override { |
267 | return isRegKind() && !hasModifiers(); |
268 | } |
269 | |
270 | bool isRegOrInline(unsigned RCID, MVT type) const { |
271 | return isRegClass(RCID) || isInlinableImm(type); |
272 | } |
273 | |
274 | bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { |
275 | return isRegOrInline(RCID, type) || isLiteralImm(type); |
276 | } |
277 | |
278 | bool isRegOrImmWithInt16InputMods() const { |
279 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16); |
280 | } |
281 | |
282 | bool isRegOrImmWithIntT16InputMods() const { |
283 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::i16); |
284 | } |
285 | |
286 | bool isRegOrImmWithInt32InputMods() const { |
287 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32); |
288 | } |
289 | |
290 | bool isRegOrInlineImmWithInt16InputMods() const { |
291 | return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16); |
292 | } |
293 | |
294 | bool isRegOrInlineImmWithInt32InputMods() const { |
295 | return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32); |
296 | } |
297 | |
298 | bool isRegOrImmWithInt64InputMods() const { |
299 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64); |
300 | } |
301 | |
302 | bool isRegOrImmWithFP16InputMods() const { |
303 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16); |
304 | } |
305 | |
306 | bool isRegOrImmWithFPT16InputMods() const { |
307 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16); |
308 | } |
309 | |
310 | bool isRegOrImmWithFP32InputMods() const { |
311 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32); |
312 | } |
313 | |
314 | bool isRegOrImmWithFP64InputMods() const { |
315 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64); |
316 | } |
317 | |
318 | template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const { |
319 | return isRegOrInline( |
320 | RCID: IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, type: MVT::f16); |
321 | } |
322 | |
323 | bool isRegOrInlineImmWithFP32InputMods() const { |
324 | return isRegOrInline(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32); |
325 | } |
326 | |
327 | bool isPackedFP16InputMods() const { |
328 | return isRegOrImmWithInputMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::v2f16); |
329 | } |
330 | |
331 | bool isVReg() const { |
332 | return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) || |
333 | isRegClass(RCID: AMDGPU::VReg_64RegClassID) || |
334 | isRegClass(RCID: AMDGPU::VReg_96RegClassID) || |
335 | isRegClass(RCID: AMDGPU::VReg_128RegClassID) || |
336 | isRegClass(RCID: AMDGPU::VReg_160RegClassID) || |
337 | isRegClass(RCID: AMDGPU::VReg_192RegClassID) || |
338 | isRegClass(RCID: AMDGPU::VReg_256RegClassID) || |
339 | isRegClass(RCID: AMDGPU::VReg_512RegClassID) || |
340 | isRegClass(RCID: AMDGPU::VReg_1024RegClassID); |
341 | } |
342 | |
343 | bool isVReg32() const { |
344 | return isRegClass(RCID: AMDGPU::VGPR_32RegClassID); |
345 | } |
346 | |
347 | bool isVReg32OrOff() const { |
348 | return isOff() || isVReg32(); |
349 | } |
350 | |
351 | bool isNull() const { |
352 | return isRegKind() && getReg() == AMDGPU::SGPR_NULL; |
353 | } |
354 | |
355 | bool isVRegWithInputMods() const; |
356 | template <bool IsFake16> bool isT16VRegWithInputMods() const; |
357 | |
358 | bool isSDWAOperand(MVT type) const; |
359 | bool isSDWAFP16Operand() const; |
360 | bool isSDWAFP32Operand() const; |
361 | bool isSDWAInt16Operand() const; |
362 | bool isSDWAInt32Operand() const; |
363 | |
364 | bool isImmTy(ImmTy ImmT) const { |
365 | return isImm() && Imm.Type == ImmT; |
366 | } |
367 | |
368 | template <ImmTy Ty> bool isImmTy() const { return isImmTy(ImmT: Ty); } |
369 | |
370 | bool isImmLiteral() const { return isImmTy(ImmT: ImmTyNone); } |
371 | |
372 | bool isImmModifier() const { |
373 | return isImm() && Imm.Type != ImmTyNone; |
374 | } |
375 | |
376 | bool isOModSI() const { return isImmTy(ImmT: ImmTyOModSI); } |
377 | bool isDim() const { return isImmTy(ImmT: ImmTyDim); } |
378 | bool isR128A16() const { return isImmTy(ImmT: ImmTyR128A16); } |
379 | bool isOff() const { return isImmTy(ImmT: ImmTyOff); } |
380 | bool isExpTgt() const { return isImmTy(ImmT: ImmTyExpTgt); } |
381 | bool isOffen() const { return isImmTy(ImmT: ImmTyOffen); } |
382 | bool isIdxen() const { return isImmTy(ImmT: ImmTyIdxen); } |
383 | bool isAddr64() const { return isImmTy(ImmT: ImmTyAddr64); } |
384 | bool isSMEMOffsetMod() const { return isImmTy(ImmT: ImmTySMEMOffsetMod); } |
385 | bool isFlatOffset() const { return isImmTy(ImmT: ImmTyOffset) || isImmTy(ImmT: ImmTyInstOffset); } |
386 | bool isGDS() const { return isImmTy(ImmT: ImmTyGDS); } |
387 | bool isLDS() const { return isImmTy(ImmT: ImmTyLDS); } |
388 | bool isCPol() const { return isImmTy(ImmT: ImmTyCPol); } |
389 | bool isIndexKey8bit() const { return isImmTy(ImmT: ImmTyIndexKey8bit); } |
390 | bool isIndexKey16bit() const { return isImmTy(ImmT: ImmTyIndexKey16bit); } |
391 | bool isTFE() const { return isImmTy(ImmT: ImmTyTFE); } |
392 | bool isFORMAT() const { return isImmTy(ImmT: ImmTyFORMAT) && isUInt<7>(x: getImm()); } |
393 | bool isDppFI() const { return isImmTy(ImmT: ImmTyDppFI); } |
394 | bool isSDWADstSel() const { return isImmTy(ImmT: ImmTySDWADstSel); } |
395 | bool isSDWASrc0Sel() const { return isImmTy(ImmT: ImmTySDWASrc0Sel); } |
396 | bool isSDWASrc1Sel() const { return isImmTy(ImmT: ImmTySDWASrc1Sel); } |
397 | bool isSDWADstUnused() const { return isImmTy(ImmT: ImmTySDWADstUnused); } |
398 | bool isInterpSlot() const { return isImmTy(ImmT: ImmTyInterpSlot); } |
399 | bool isInterpAttr() const { return isImmTy(ImmT: ImmTyInterpAttr); } |
400 | bool isInterpAttrChan() const { return isImmTy(ImmT: ImmTyInterpAttrChan); } |
401 | bool isOpSel() const { return isImmTy(ImmT: ImmTyOpSel); } |
402 | bool isOpSelHi() const { return isImmTy(ImmT: ImmTyOpSelHi); } |
403 | bool isNegLo() const { return isImmTy(ImmT: ImmTyNegLo); } |
404 | bool isNegHi() const { return isImmTy(ImmT: ImmTyNegHi); } |
405 | |
406 | bool isRegOrImm() const { |
407 | return isReg() || isImm(); |
408 | } |
409 | |
410 | bool isRegClass(unsigned RCID) const; |
411 | |
412 | bool isInlineValue() const; |
413 | |
414 | bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { |
415 | return isRegOrInline(RCID, type) && !hasModifiers(); |
416 | } |
417 | |
418 | bool isSCSrcB16() const { |
419 | return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i16); |
420 | } |
421 | |
422 | bool isSCSrcV2B16() const { |
423 | return isSCSrcB16(); |
424 | } |
425 | |
426 | bool isSCSrc_b32() const { |
427 | return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::i32); |
428 | } |
429 | |
430 | bool isSCSrc_b64() const { |
431 | return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::i64); |
432 | } |
433 | |
434 | bool isBoolReg() const; |
435 | |
436 | bool isSCSrcF16() const { |
437 | return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f16); |
438 | } |
439 | |
440 | bool isSCSrcV2F16() const { |
441 | return isSCSrcF16(); |
442 | } |
443 | |
444 | bool isSCSrcF32() const { |
445 | return isRegOrInlineNoMods(RCID: AMDGPU::SReg_32RegClassID, type: MVT::f32); |
446 | } |
447 | |
448 | bool isSCSrcF64() const { |
449 | return isRegOrInlineNoMods(RCID: AMDGPU::SReg_64RegClassID, type: MVT::f64); |
450 | } |
451 | |
452 | bool isSSrc_b32() const { |
453 | return isSCSrc_b32() || isLiteralImm(type: MVT::i32) || isExpr(); |
454 | } |
455 | |
456 | bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(type: MVT::i16); } |
457 | |
458 | bool isSSrcV2B16() const { |
459 | llvm_unreachable("cannot happen" ); |
460 | return isSSrc_b16(); |
461 | } |
462 | |
463 | bool isSSrc_b64() const { |
464 | // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. |
465 | // See isVSrc64(). |
466 | return isSCSrc_b64() || isLiteralImm(type: MVT::i64); |
467 | } |
468 | |
469 | bool isSSrc_f32() const { |
470 | return isSCSrc_b32() || isLiteralImm(type: MVT::f32) || isExpr(); |
471 | } |
472 | |
473 | bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(type: MVT::f64); } |
474 | |
475 | bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(type: MVT::bf16); } |
476 | |
477 | bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(type: MVT::f16); } |
478 | |
479 | bool isSSrcV2F16() const { |
480 | llvm_unreachable("cannot happen" ); |
481 | return isSSrc_f16(); |
482 | } |
483 | |
484 | bool isSSrcV2FP32() const { |
485 | llvm_unreachable("cannot happen" ); |
486 | return isSSrc_f32(); |
487 | } |
488 | |
489 | bool isSCSrcV2FP32() const { |
490 | llvm_unreachable("cannot happen" ); |
491 | return isSCSrcF32(); |
492 | } |
493 | |
494 | bool isSSrcV2INT32() const { |
495 | llvm_unreachable("cannot happen" ); |
496 | return isSSrc_b32(); |
497 | } |
498 | |
499 | bool isSCSrcV2INT32() const { |
500 | llvm_unreachable("cannot happen" ); |
501 | return isSCSrc_b32(); |
502 | } |
503 | |
504 | bool isSSrcOrLds_b32() const { |
505 | return isRegOrInlineNoMods(RCID: AMDGPU::SRegOrLds_32RegClassID, type: MVT::i32) || |
506 | isLiteralImm(type: MVT::i32) || isExpr(); |
507 | } |
508 | |
509 | bool isVCSrc_b32() const { |
510 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i32); |
511 | } |
512 | |
513 | bool isVCSrcB64() const { |
514 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::i64); |
515 | } |
516 | |
517 | bool isVCSrcTB16() const { |
518 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::i16); |
519 | } |
520 | |
521 | bool isVCSrcTB16_Lo128() const { |
522 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::i16); |
523 | } |
524 | |
525 | bool isVCSrcFake16B16_Lo128() const { |
526 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::i16); |
527 | } |
528 | |
529 | bool isVCSrc_b16() const { |
530 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::i16); |
531 | } |
532 | |
533 | bool isVCSrc_v2b16() const { return isVCSrc_b16(); } |
534 | |
535 | bool isVCSrc_f32() const { |
536 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f32); |
537 | } |
538 | |
539 | bool isVCSrcF64() const { |
540 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_64RegClassID, type: MVT::f64); |
541 | } |
542 | |
543 | bool isVCSrcTBF16() const { |
544 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::bf16); |
545 | } |
546 | |
547 | bool isVCSrcTF16() const { |
548 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16RegClassID, type: MVT::f16); |
549 | } |
550 | |
551 | bool isVCSrcTBF16_Lo128() const { |
552 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::bf16); |
553 | } |
554 | |
555 | bool isVCSrcTF16_Lo128() const { |
556 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_16_Lo128RegClassID, type: MVT::f16); |
557 | } |
558 | |
559 | bool isVCSrcFake16BF16_Lo128() const { |
560 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::bf16); |
561 | } |
562 | |
563 | bool isVCSrcFake16F16_Lo128() const { |
564 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32_Lo128RegClassID, type: MVT::f16); |
565 | } |
566 | |
567 | bool isVCSrc_bf16() const { |
568 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::bf16); |
569 | } |
570 | |
571 | bool isVCSrc_f16() const { |
572 | return isRegOrInlineNoMods(RCID: AMDGPU::VS_32RegClassID, type: MVT::f16); |
573 | } |
574 | |
575 | bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); } |
576 | |
577 | bool isVCSrc_v2f16() const { return isVCSrc_f16(); } |
578 | |
579 | bool isVSrc_b32() const { |
580 | return isVCSrc_f32() || isLiteralImm(type: MVT::i32) || isExpr(); |
581 | } |
582 | |
583 | bool isVSrc_b64() const { return isVCSrcF64() || isLiteralImm(type: MVT::i64); } |
584 | |
585 | bool isVSrcT_b16() const { return isVCSrcTB16() || isLiteralImm(type: MVT::i16); } |
586 | |
587 | bool isVSrcT_b16_Lo128() const { |
588 | return isVCSrcTB16_Lo128() || isLiteralImm(type: MVT::i16); |
589 | } |
590 | |
591 | bool isVSrcFake16_b16_Lo128() const { |
592 | return isVCSrcFake16B16_Lo128() || isLiteralImm(type: MVT::i16); |
593 | } |
594 | |
595 | bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(type: MVT::i16); } |
596 | |
597 | bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(type: MVT::v2i16); } |
598 | |
599 | bool isVCSrcV2FP32() const { |
600 | return isVCSrcF64(); |
601 | } |
602 | |
603 | bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(type: MVT::v2f32); } |
604 | |
605 | bool isVCSrcV2INT32() const { |
606 | return isVCSrcB64(); |
607 | } |
608 | |
609 | bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(type: MVT::v2i32); } |
610 | |
611 | bool isVSrc_f32() const { |
612 | return isVCSrc_f32() || isLiteralImm(type: MVT::f32) || isExpr(); |
613 | } |
614 | |
615 | bool isVSrc_f64() const { return isVCSrcF64() || isLiteralImm(type: MVT::f64); } |
616 | |
617 | bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(type: MVT::bf16); } |
618 | |
619 | bool isVSrcT_f16() const { return isVCSrcTF16() || isLiteralImm(type: MVT::f16); } |
620 | |
621 | bool isVSrcT_bf16_Lo128() const { |
622 | return isVCSrcTBF16_Lo128() || isLiteralImm(type: MVT::bf16); |
623 | } |
624 | |
625 | bool isVSrcT_f16_Lo128() const { |
626 | return isVCSrcTF16_Lo128() || isLiteralImm(type: MVT::f16); |
627 | } |
628 | |
629 | bool isVSrcFake16_bf16_Lo128() const { |
630 | return isVCSrcFake16BF16_Lo128() || isLiteralImm(type: MVT::bf16); |
631 | } |
632 | |
633 | bool isVSrcFake16_f16_Lo128() const { |
634 | return isVCSrcFake16F16_Lo128() || isLiteralImm(type: MVT::f16); |
635 | } |
636 | |
637 | bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(type: MVT::bf16); } |
638 | |
639 | bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(type: MVT::f16); } |
640 | |
641 | bool isVSrc_v2bf16() const { |
642 | return isVSrc_bf16() || isLiteralImm(type: MVT::v2bf16); |
643 | } |
644 | |
645 | bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(type: MVT::v2f16); } |
646 | |
647 | bool isVISrcB32() const { |
648 | return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i32); |
649 | } |
650 | |
651 | bool isVISrcB16() const { |
652 | return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::i16); |
653 | } |
654 | |
655 | bool isVISrcV2B16() const { |
656 | return isVISrcB16(); |
657 | } |
658 | |
659 | bool isVISrcF32() const { |
660 | return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f32); |
661 | } |
662 | |
663 | bool isVISrcF16() const { |
664 | return isRegOrInlineNoMods(RCID: AMDGPU::VGPR_32RegClassID, type: MVT::f16); |
665 | } |
666 | |
667 | bool isVISrcV2F16() const { |
668 | return isVISrcF16() || isVISrcB32(); |
669 | } |
670 | |
671 | bool isVISrc_64_bf16() const { |
672 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::bf16); |
673 | } |
674 | |
675 | bool isVISrc_64_f16() const { |
676 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f16); |
677 | } |
678 | |
679 | bool isVISrc_64_b32() const { |
680 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32); |
681 | } |
682 | |
683 | bool isVISrc_64B64() const { |
684 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i64); |
685 | } |
686 | |
687 | bool isVISrc_64_f64() const { |
688 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f64); |
689 | } |
690 | |
691 | bool isVISrc_64V2FP32() const { |
692 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::f32); |
693 | } |
694 | |
695 | bool isVISrc_64V2INT32() const { |
696 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_64RegClassID, type: MVT::i32); |
697 | } |
698 | |
699 | bool isVISrc_256_b32() const { |
700 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32); |
701 | } |
702 | |
703 | bool isVISrc_256_f32() const { |
704 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32); |
705 | } |
706 | |
707 | bool isVISrc_256B64() const { |
708 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i64); |
709 | } |
710 | |
711 | bool isVISrc_256_f64() const { |
712 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f64); |
713 | } |
714 | |
715 | bool isVISrc_128B16() const { |
716 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i16); |
717 | } |
718 | |
719 | bool isVISrc_128V2B16() const { |
720 | return isVISrc_128B16(); |
721 | } |
722 | |
723 | bool isVISrc_128_b32() const { |
724 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::i32); |
725 | } |
726 | |
727 | bool isVISrc_128_f32() const { |
728 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f32); |
729 | } |
730 | |
731 | bool isVISrc_256V2FP32() const { |
732 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::f32); |
733 | } |
734 | |
735 | bool isVISrc_256V2INT32() const { |
736 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_256RegClassID, type: MVT::i32); |
737 | } |
738 | |
739 | bool isVISrc_512_b32() const { |
740 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i32); |
741 | } |
742 | |
743 | bool isVISrc_512B16() const { |
744 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::i16); |
745 | } |
746 | |
747 | bool isVISrc_512V2B16() const { |
748 | return isVISrc_512B16(); |
749 | } |
750 | |
751 | bool isVISrc_512_f32() const { |
752 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f32); |
753 | } |
754 | |
755 | bool isVISrc_512F16() const { |
756 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_512RegClassID, type: MVT::f16); |
757 | } |
758 | |
759 | bool isVISrc_512V2F16() const { |
760 | return isVISrc_512F16() || isVISrc_512_b32(); |
761 | } |
762 | |
763 | bool isVISrc_1024_b32() const { |
764 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i32); |
765 | } |
766 | |
767 | bool isVISrc_1024B16() const { |
768 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::i16); |
769 | } |
770 | |
771 | bool isVISrc_1024V2B16() const { |
772 | return isVISrc_1024B16(); |
773 | } |
774 | |
775 | bool isVISrc_1024_f32() const { |
776 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f32); |
777 | } |
778 | |
779 | bool isVISrc_1024F16() const { |
780 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_1024RegClassID, type: MVT::f16); |
781 | } |
782 | |
783 | bool isVISrc_1024V2F16() const { |
784 | return isVISrc_1024F16() || isVISrc_1024_b32(); |
785 | } |
786 | |
787 | bool isAISrcB32() const { |
788 | return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i32); |
789 | } |
790 | |
791 | bool isAISrcB16() const { |
792 | return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::i16); |
793 | } |
794 | |
795 | bool isAISrcV2B16() const { |
796 | return isAISrcB16(); |
797 | } |
798 | |
799 | bool isAISrcF32() const { |
800 | return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f32); |
801 | } |
802 | |
803 | bool isAISrcF16() const { |
804 | return isRegOrInlineNoMods(RCID: AMDGPU::AGPR_32RegClassID, type: MVT::f16); |
805 | } |
806 | |
807 | bool isAISrcV2F16() const { |
808 | return isAISrcF16() || isAISrcB32(); |
809 | } |
810 | |
811 | bool isAISrc_64B64() const { |
812 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::i64); |
813 | } |
814 | |
815 | bool isAISrc_64_f64() const { |
816 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_64RegClassID, type: MVT::f64); |
817 | } |
818 | |
819 | bool isAISrc_128_b32() const { |
820 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i32); |
821 | } |
822 | |
823 | bool isAISrc_128B16() const { |
824 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::i16); |
825 | } |
826 | |
827 | bool isAISrc_128V2B16() const { |
828 | return isAISrc_128B16(); |
829 | } |
830 | |
831 | bool isAISrc_128_f32() const { |
832 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f32); |
833 | } |
834 | |
835 | bool isAISrc_128F16() const { |
836 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_128RegClassID, type: MVT::f16); |
837 | } |
838 | |
839 | bool isAISrc_128V2F16() const { |
840 | return isAISrc_128F16() || isAISrc_128_b32(); |
841 | } |
842 | |
843 | bool isVISrc_128_bf16() const { |
844 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::bf16); |
845 | } |
846 | |
847 | bool isVISrc_128_f16() const { |
848 | return isRegOrInlineNoMods(RCID: AMDGPU::VReg_128RegClassID, type: MVT::f16); |
849 | } |
850 | |
851 | bool isVISrc_128V2F16() const { |
852 | return isVISrc_128_f16() || isVISrc_128_b32(); |
853 | } |
854 | |
855 | bool isAISrc_256B64() const { |
856 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::i64); |
857 | } |
858 | |
859 | bool isAISrc_256_f64() const { |
860 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_256RegClassID, type: MVT::f64); |
861 | } |
862 | |
863 | bool isAISrc_512_b32() const { |
864 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i32); |
865 | } |
866 | |
867 | bool isAISrc_512B16() const { |
868 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::i16); |
869 | } |
870 | |
871 | bool isAISrc_512V2B16() const { |
872 | return isAISrc_512B16(); |
873 | } |
874 | |
875 | bool isAISrc_512_f32() const { |
876 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f32); |
877 | } |
878 | |
879 | bool isAISrc_512F16() const { |
880 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_512RegClassID, type: MVT::f16); |
881 | } |
882 | |
883 | bool isAISrc_512V2F16() const { |
884 | return isAISrc_512F16() || isAISrc_512_b32(); |
885 | } |
886 | |
887 | bool isAISrc_1024_b32() const { |
888 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i32); |
889 | } |
890 | |
891 | bool isAISrc_1024B16() const { |
892 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::i16); |
893 | } |
894 | |
895 | bool isAISrc_1024V2B16() const { |
896 | return isAISrc_1024B16(); |
897 | } |
898 | |
899 | bool isAISrc_1024_f32() const { |
900 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f32); |
901 | } |
902 | |
903 | bool isAISrc_1024F16() const { |
904 | return isRegOrInlineNoMods(RCID: AMDGPU::AReg_1024RegClassID, type: MVT::f16); |
905 | } |
906 | |
907 | bool isAISrc_1024V2F16() const { |
908 | return isAISrc_1024F16() || isAISrc_1024_b32(); |
909 | } |
910 | |
911 | bool isKImmFP32() const { |
912 | return isLiteralImm(type: MVT::f32); |
913 | } |
914 | |
915 | bool isKImmFP16() const { |
916 | return isLiteralImm(type: MVT::f16); |
917 | } |
918 | |
919 | bool isMem() const override { |
920 | return false; |
921 | } |
922 | |
923 | bool isExpr() const { |
924 | return Kind == Expression; |
925 | } |
926 | |
927 | bool isSOPPBrTarget() const { return isExpr() || isImm(); } |
928 | |
929 | bool isSWaitCnt() const; |
930 | bool isDepCtr() const; |
931 | bool isSDelayALU() const; |
932 | bool isHwreg() const; |
933 | bool isSendMsg() const; |
934 | bool isSplitBarrier() const; |
935 | bool isSwizzle() const; |
936 | bool isSMRDOffset8() const; |
937 | bool isSMEMOffset() const; |
938 | bool isSMRDLiteralOffset() const; |
939 | bool isDPP8() const; |
940 | bool isDPPCtrl() const; |
941 | bool isBLGP() const; |
942 | bool isGPRIdxMode() const; |
943 | bool isS16Imm() const; |
944 | bool isU16Imm() const; |
945 | bool isEndpgm() const; |
946 | |
947 | auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const { |
948 | return [=](){ return P(*this); }; |
949 | } |
950 | |
951 | StringRef getToken() const { |
952 | assert(isToken()); |
953 | return StringRef(Tok.Data, Tok.Length); |
954 | } |
955 | |
956 | int64_t getImm() const { |
957 | assert(isImm()); |
958 | return Imm.Val; |
959 | } |
960 | |
961 | void setImm(int64_t Val) { |
962 | assert(isImm()); |
963 | Imm.Val = Val; |
964 | } |
965 | |
966 | ImmTy getImmTy() const { |
967 | assert(isImm()); |
968 | return Imm.Type; |
969 | } |
970 | |
971 | MCRegister getReg() const override { |
972 | assert(isRegKind()); |
973 | return Reg.RegNo; |
974 | } |
975 | |
976 | SMLoc getStartLoc() const override { |
977 | return StartLoc; |
978 | } |
979 | |
980 | SMLoc getEndLoc() const override { |
981 | return EndLoc; |
982 | } |
983 | |
984 | SMRange getLocRange() const { |
985 | return SMRange(StartLoc, EndLoc); |
986 | } |
987 | |
988 | Modifiers getModifiers() const { |
989 | assert(isRegKind() || isImmTy(ImmTyNone)); |
990 | return isRegKind() ? Reg.Mods : Imm.Mods; |
991 | } |
992 | |
993 | void setModifiers(Modifiers Mods) { |
994 | assert(isRegKind() || isImmTy(ImmTyNone)); |
995 | if (isRegKind()) |
996 | Reg.Mods = Mods; |
997 | else |
998 | Imm.Mods = Mods; |
999 | } |
1000 | |
1001 | bool hasModifiers() const { |
1002 | return getModifiers().hasModifiers(); |
1003 | } |
1004 | |
1005 | bool hasFPModifiers() const { |
1006 | return getModifiers().hasFPModifiers(); |
1007 | } |
1008 | |
1009 | bool hasIntModifiers() const { |
1010 | return getModifiers().hasIntModifiers(); |
1011 | } |
1012 | |
1013 | uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; |
1014 | |
1015 | void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; |
1016 | |
1017 | void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; |
1018 | |
1019 | void addRegOperands(MCInst &Inst, unsigned N) const; |
1020 | |
1021 | void addRegOrImmOperands(MCInst &Inst, unsigned N) const { |
1022 | if (isRegKind()) |
1023 | addRegOperands(Inst, N); |
1024 | else |
1025 | addImmOperands(Inst, N); |
1026 | } |
1027 | |
1028 | void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { |
1029 | Modifiers Mods = getModifiers(); |
1030 | Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand())); |
1031 | if (isRegKind()) { |
1032 | addRegOperands(Inst, N); |
1033 | } else { |
1034 | addImmOperands(Inst, N, ApplyModifiers: false); |
1035 | } |
1036 | } |
1037 | |
1038 | void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { |
1039 | assert(!hasIntModifiers()); |
1040 | addRegOrImmWithInputModsOperands(Inst, N); |
1041 | } |
1042 | |
1043 | void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { |
1044 | assert(!hasFPModifiers()); |
1045 | addRegOrImmWithInputModsOperands(Inst, N); |
1046 | } |
1047 | |
1048 | void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { |
1049 | Modifiers Mods = getModifiers(); |
1050 | Inst.addOperand(Op: MCOperand::createImm(Val: Mods.getModifiersOperand())); |
1051 | assert(isRegKind()); |
1052 | addRegOperands(Inst, N); |
1053 | } |
1054 | |
1055 | void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { |
1056 | assert(!hasIntModifiers()); |
1057 | addRegWithInputModsOperands(Inst, N); |
1058 | } |
1059 | |
1060 | void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { |
1061 | assert(!hasFPModifiers()); |
1062 | addRegWithInputModsOperands(Inst, N); |
1063 | } |
1064 | |
1065 | static void printImmTy(raw_ostream& OS, ImmTy Type) { |
1066 | // clang-format off |
1067 | switch (Type) { |
1068 | case ImmTyNone: OS << "None" ; break; |
1069 | case ImmTyGDS: OS << "GDS" ; break; |
1070 | case ImmTyLDS: OS << "LDS" ; break; |
1071 | case ImmTyOffen: OS << "Offen" ; break; |
1072 | case ImmTyIdxen: OS << "Idxen" ; break; |
1073 | case ImmTyAddr64: OS << "Addr64" ; break; |
1074 | case ImmTyOffset: OS << "Offset" ; break; |
1075 | case ImmTyInstOffset: OS << "InstOffset" ; break; |
1076 | case ImmTyOffset0: OS << "Offset0" ; break; |
1077 | case ImmTyOffset1: OS << "Offset1" ; break; |
1078 | case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod" ; break; |
1079 | case ImmTyCPol: OS << "CPol" ; break; |
1080 | case ImmTyIndexKey8bit: OS << "index_key" ; break; |
1081 | case ImmTyIndexKey16bit: OS << "index_key" ; break; |
1082 | case ImmTyTFE: OS << "TFE" ; break; |
1083 | case ImmTyD16: OS << "D16" ; break; |
1084 | case ImmTyFORMAT: OS << "FORMAT" ; break; |
1085 | case ImmTyClamp: OS << "Clamp" ; break; |
1086 | case ImmTyOModSI: OS << "OModSI" ; break; |
1087 | case ImmTyDPP8: OS << "DPP8" ; break; |
1088 | case ImmTyDppCtrl: OS << "DppCtrl" ; break; |
1089 | case ImmTyDppRowMask: OS << "DppRowMask" ; break; |
1090 | case ImmTyDppBankMask: OS << "DppBankMask" ; break; |
1091 | case ImmTyDppBoundCtrl: OS << "DppBoundCtrl" ; break; |
1092 | case ImmTyDppFI: OS << "DppFI" ; break; |
1093 | case ImmTySDWADstSel: OS << "SDWADstSel" ; break; |
1094 | case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel" ; break; |
1095 | case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel" ; break; |
1096 | case ImmTySDWADstUnused: OS << "SDWADstUnused" ; break; |
1097 | case ImmTyDMask: OS << "DMask" ; break; |
1098 | case ImmTyDim: OS << "Dim" ; break; |
1099 | case ImmTyUNorm: OS << "UNorm" ; break; |
1100 | case ImmTyDA: OS << "DA" ; break; |
1101 | case ImmTyR128A16: OS << "R128A16" ; break; |
1102 | case ImmTyA16: OS << "A16" ; break; |
1103 | case ImmTyLWE: OS << "LWE" ; break; |
1104 | case ImmTyOff: OS << "Off" ; break; |
1105 | case ImmTyExpTgt: OS << "ExpTgt" ; break; |
1106 | case ImmTyExpCompr: OS << "ExpCompr" ; break; |
1107 | case ImmTyExpVM: OS << "ExpVM" ; break; |
1108 | case ImmTyHwreg: OS << "Hwreg" ; break; |
1109 | case ImmTySendMsg: OS << "SendMsg" ; break; |
1110 | case ImmTyInterpSlot: OS << "InterpSlot" ; break; |
1111 | case ImmTyInterpAttr: OS << "InterpAttr" ; break; |
1112 | case ImmTyInterpAttrChan: OS << "InterpAttrChan" ; break; |
1113 | case ImmTyOpSel: OS << "OpSel" ; break; |
1114 | case ImmTyOpSelHi: OS << "OpSelHi" ; break; |
1115 | case ImmTyNegLo: OS << "NegLo" ; break; |
1116 | case ImmTyNegHi: OS << "NegHi" ; break; |
1117 | case ImmTySwizzle: OS << "Swizzle" ; break; |
1118 | case ImmTyGprIdxMode: OS << "GprIdxMode" ; break; |
1119 | case ImmTyHigh: OS << "High" ; break; |
1120 | case ImmTyBLGP: OS << "BLGP" ; break; |
1121 | case ImmTyCBSZ: OS << "CBSZ" ; break; |
1122 | case ImmTyABID: OS << "ABID" ; break; |
1123 | case ImmTyEndpgm: OS << "Endpgm" ; break; |
1124 | case ImmTyWaitVDST: OS << "WaitVDST" ; break; |
1125 | case ImmTyWaitEXP: OS << "WaitEXP" ; break; |
1126 | case ImmTyWaitVAVDst: OS << "WaitVAVDst" ; break; |
1127 | case ImmTyWaitVMVSrc: OS << "WaitVMVSrc" ; break; |
1128 | case ImmTyByteSel: OS << "ByteSel" ; break; |
1129 | } |
1130 | // clang-format on |
1131 | } |
1132 | |
1133 | void print(raw_ostream &OS) const override { |
1134 | switch (Kind) { |
1135 | case Register: |
1136 | OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; |
1137 | break; |
1138 | case Immediate: |
1139 | OS << '<' << getImm(); |
1140 | if (getImmTy() != ImmTyNone) { |
1141 | OS << " type: " ; printImmTy(OS, Type: getImmTy()); |
1142 | } |
1143 | OS << " mods: " << Imm.Mods << '>'; |
1144 | break; |
1145 | case Token: |
1146 | OS << '\'' << getToken() << '\''; |
1147 | break; |
1148 | case Expression: |
1149 | OS << "<expr " << *Expr << '>'; |
1150 | break; |
1151 | } |
1152 | } |
1153 | |
1154 | static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, |
1155 | int64_t Val, SMLoc Loc, |
1156 | ImmTy Type = ImmTyNone, |
1157 | bool IsFPImm = false) { |
1158 | auto Op = std::make_unique<AMDGPUOperand>(args: Immediate, args&: AsmParser); |
1159 | Op->Imm.Val = Val; |
1160 | Op->Imm.IsFPImm = IsFPImm; |
1161 | Op->Imm.Kind = ImmKindTyNone; |
1162 | Op->Imm.Type = Type; |
1163 | Op->Imm.Mods = Modifiers(); |
1164 | Op->StartLoc = Loc; |
1165 | Op->EndLoc = Loc; |
1166 | return Op; |
1167 | } |
1168 | |
1169 | static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, |
1170 | StringRef Str, SMLoc Loc, |
1171 | bool HasExplicitEncodingSize = true) { |
1172 | auto Res = std::make_unique<AMDGPUOperand>(args: Token, args&: AsmParser); |
1173 | Res->Tok.Data = Str.data(); |
1174 | Res->Tok.Length = Str.size(); |
1175 | Res->StartLoc = Loc; |
1176 | Res->EndLoc = Loc; |
1177 | return Res; |
1178 | } |
1179 | |
1180 | static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, |
1181 | unsigned RegNo, SMLoc S, |
1182 | SMLoc E) { |
1183 | auto Op = std::make_unique<AMDGPUOperand>(args: Register, args&: AsmParser); |
1184 | Op->Reg.RegNo = RegNo; |
1185 | Op->Reg.Mods = Modifiers(); |
1186 | Op->StartLoc = S; |
1187 | Op->EndLoc = E; |
1188 | return Op; |
1189 | } |
1190 | |
1191 | static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, |
1192 | const class MCExpr *Expr, SMLoc S) { |
1193 | auto Op = std::make_unique<AMDGPUOperand>(args: Expression, args&: AsmParser); |
1194 | Op->Expr = Expr; |
1195 | Op->StartLoc = S; |
1196 | Op->EndLoc = S; |
1197 | return Op; |
1198 | } |
1199 | }; |
1200 | |
1201 | raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { |
1202 | OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; |
1203 | return OS; |
1204 | } |
1205 | |
1206 | //===----------------------------------------------------------------------===// |
1207 | // AsmParser |
1208 | //===----------------------------------------------------------------------===// |
1209 | |
1210 | // Holds info related to the current kernel, e.g. count of SGPRs used. |
1211 | // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next |
1212 | // .amdgpu_hsa_kernel or at EOF. |
1213 | class KernelScopeInfo { |
1214 | int SgprIndexUnusedMin = -1; |
1215 | int VgprIndexUnusedMin = -1; |
1216 | int AgprIndexUnusedMin = -1; |
1217 | MCContext *Ctx = nullptr; |
1218 | MCSubtargetInfo const *MSTI = nullptr; |
1219 | |
1220 | void usesSgprAt(int i) { |
1221 | if (i >= SgprIndexUnusedMin) { |
1222 | SgprIndexUnusedMin = ++i; |
1223 | if (Ctx) { |
1224 | MCSymbol* const Sym = |
1225 | Ctx->getOrCreateSymbol(Name: Twine(".kernel.sgpr_count" )); |
1226 | Sym->setVariableValue(MCConstantExpr::create(Value: SgprIndexUnusedMin, Ctx&: *Ctx)); |
1227 | } |
1228 | } |
1229 | } |
1230 | |
1231 | void usesVgprAt(int i) { |
1232 | if (i >= VgprIndexUnusedMin) { |
1233 | VgprIndexUnusedMin = ++i; |
1234 | if (Ctx) { |
1235 | MCSymbol* const Sym = |
1236 | Ctx->getOrCreateSymbol(Name: Twine(".kernel.vgpr_count" )); |
1237 | int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin, |
1238 | ArgNumVGPR: VgprIndexUnusedMin); |
1239 | Sym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx)); |
1240 | } |
1241 | } |
1242 | } |
1243 | |
1244 | void usesAgprAt(int i) { |
1245 | // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction |
1246 | if (!hasMAIInsts(STI: *MSTI)) |
1247 | return; |
1248 | |
1249 | if (i >= AgprIndexUnusedMin) { |
1250 | AgprIndexUnusedMin = ++i; |
1251 | if (Ctx) { |
1252 | MCSymbol* const Sym = |
1253 | Ctx->getOrCreateSymbol(Name: Twine(".kernel.agpr_count" )); |
1254 | Sym->setVariableValue(MCConstantExpr::create(Value: AgprIndexUnusedMin, Ctx&: *Ctx)); |
1255 | |
1256 | // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) |
1257 | MCSymbol* const vSym = |
1258 | Ctx->getOrCreateSymbol(Name: Twine(".kernel.vgpr_count" )); |
1259 | int totalVGPR = getTotalNumVGPRs(has90AInsts: isGFX90A(STI: *MSTI), ArgNumAGPR: AgprIndexUnusedMin, |
1260 | ArgNumVGPR: VgprIndexUnusedMin); |
1261 | vSym->setVariableValue(MCConstantExpr::create(Value: totalVGPR, Ctx&: *Ctx)); |
1262 | } |
1263 | } |
1264 | } |
1265 | |
1266 | public: |
1267 | KernelScopeInfo() = default; |
1268 | |
1269 | void initialize(MCContext &Context) { |
1270 | Ctx = &Context; |
1271 | MSTI = Ctx->getSubtargetInfo(); |
1272 | |
1273 | usesSgprAt(i: SgprIndexUnusedMin = -1); |
1274 | usesVgprAt(i: VgprIndexUnusedMin = -1); |
1275 | if (hasMAIInsts(STI: *MSTI)) { |
1276 | usesAgprAt(i: AgprIndexUnusedMin = -1); |
1277 | } |
1278 | } |
1279 | |
1280 | void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, |
1281 | unsigned RegWidth) { |
1282 | switch (RegKind) { |
1283 | case IS_SGPR: |
1284 | usesSgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1); |
1285 | break; |
1286 | case IS_AGPR: |
1287 | usesAgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1); |
1288 | break; |
1289 | case IS_VGPR: |
1290 | usesVgprAt(i: DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1); |
1291 | break; |
1292 | default: |
1293 | break; |
1294 | } |
1295 | } |
1296 | }; |
1297 | |
1298 | class AMDGPUAsmParser : public MCTargetAsmParser { |
1299 | MCAsmParser &Parser; |
1300 | |
1301 | unsigned ForcedEncodingSize = 0; |
1302 | bool ForcedDPP = false; |
1303 | bool ForcedSDWA = false; |
1304 | KernelScopeInfo KernelScope; |
1305 | |
1306 | /// @name Auto-generated Match Functions |
1307 | /// { |
1308 | |
1309 | #define |
1310 | #include "AMDGPUGenAsmMatcher.inc" |
1311 | |
1312 | /// } |
1313 | |
1314 | private: |
1315 | void createConstantSymbol(StringRef Id, int64_t Val); |
1316 | |
1317 | bool ParseAsAbsoluteExpression(uint32_t &Ret); |
1318 | bool OutOfRangeError(SMRange Range); |
1319 | /// Calculate VGPR/SGPR blocks required for given target, reserved |
1320 | /// registers, and user-specified NextFreeXGPR values. |
1321 | /// |
1322 | /// \param Features [in] Target features, used for bug corrections. |
1323 | /// \param VCCUsed [in] Whether VCC special SGPR is reserved. |
1324 | /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. |
1325 | /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. |
1326 | /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel |
1327 | /// descriptor field, if valid. |
1328 | /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. |
1329 | /// \param VGPRRange [in] Token range, used for VGPR diagnostics. |
1330 | /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. |
1331 | /// \param SGPRRange [in] Token range, used for SGPR diagnostics. |
1332 | /// \param VGPRBlocks [out] Result VGPR block count. |
1333 | /// \param SGPRBlocks [out] Result SGPR block count. |
1334 | bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed, |
1335 | const MCExpr *FlatScrUsed, bool XNACKUsed, |
1336 | std::optional<bool> EnableWavefrontSize32, |
1337 | const MCExpr *NextFreeVGPR, SMRange VGPRRange, |
1338 | const MCExpr *NextFreeSGPR, SMRange SGPRRange, |
1339 | const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks); |
1340 | bool ParseDirectiveAMDGCNTarget(); |
1341 | bool ParseDirectiveAMDHSACodeObjectVersion(); |
1342 | bool ParseDirectiveAMDHSAKernel(); |
1343 | bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &); |
1344 | bool ParseDirectiveAMDKernelCodeT(); |
1345 | // TODO: Possibly make subtargetHasRegister const. |
1346 | bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); |
1347 | bool ParseDirectiveAMDGPUHsaKernel(); |
1348 | |
1349 | bool ParseDirectiveISAVersion(); |
1350 | bool ParseDirectiveHSAMetadata(); |
1351 | bool ParseDirectivePALMetadataBegin(); |
1352 | bool ParseDirectivePALMetadata(); |
1353 | bool ParseDirectiveAMDGPULDS(); |
1354 | |
1355 | /// Common code to parse out a block of text (typically YAML) between start and |
1356 | /// end directives. |
1357 | bool ParseToEndDirective(const char *AssemblerDirectiveBegin, |
1358 | const char *AssemblerDirectiveEnd, |
1359 | std::string &CollectString); |
1360 | |
1361 | bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, |
1362 | RegisterKind RegKind, unsigned Reg1, SMLoc Loc); |
1363 | bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, |
1364 | unsigned &RegNum, unsigned &RegWidth, |
1365 | bool RestoreOnFailure = false); |
1366 | bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, |
1367 | unsigned &RegNum, unsigned &RegWidth, |
1368 | SmallVectorImpl<AsmToken> &Tokens); |
1369 | unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, |
1370 | unsigned &RegWidth, |
1371 | SmallVectorImpl<AsmToken> &Tokens); |
1372 | unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, |
1373 | unsigned &RegWidth, |
1374 | SmallVectorImpl<AsmToken> &Tokens); |
1375 | unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, |
1376 | unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); |
1377 | bool ParseRegRange(unsigned& Num, unsigned& Width); |
1378 | unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg, |
1379 | unsigned RegWidth, SMLoc Loc); |
1380 | |
1381 | bool isRegister(); |
1382 | bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; |
1383 | std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); |
1384 | void initializeGprCountSymbol(RegisterKind RegKind); |
1385 | bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, |
1386 | unsigned RegWidth); |
1387 | void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, |
1388 | bool IsAtomic); |
1389 | |
1390 | public: |
1391 | enum OperandMode { |
1392 | OperandMode_Default, |
1393 | OperandMode_NSA, |
1394 | }; |
1395 | |
1396 | using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; |
1397 | |
1398 | AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, |
1399 | const MCInstrInfo &MII, |
1400 | const MCTargetOptions &Options) |
1401 | : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { |
1402 | MCAsmParserExtension::Initialize(Parser); |
1403 | |
1404 | if (getFeatureBits().none()) { |
1405 | // Set default features. |
1406 | copySTI().ToggleFeature(FS: "southern-islands" ); |
1407 | } |
1408 | |
1409 | FeatureBitset FB = getFeatureBits(); |
1410 | if (!FB[AMDGPU::FeatureWavefrontSize64] && |
1411 | !FB[AMDGPU::FeatureWavefrontSize32]) { |
1412 | // If there is no default wave size it must be a generation before gfx10, |
1413 | // these have FeatureWavefrontSize64 in their definition already. For |
1414 | // gfx10+ set wave32 as a default. |
1415 | copySTI().ToggleFeature(FB: AMDGPU::FeatureWavefrontSize32); |
1416 | } |
1417 | |
1418 | setAvailableFeatures(ComputeAvailableFeatures(FB: getFeatureBits())); |
1419 | |
1420 | AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU()); |
1421 | if (ISA.Major >= 6 && isHsaAbi(STI: getSTI())) { |
1422 | createConstantSymbol(Id: ".amdgcn.gfx_generation_number" , Val: ISA.Major); |
1423 | createConstantSymbol(Id: ".amdgcn.gfx_generation_minor" , Val: ISA.Minor); |
1424 | createConstantSymbol(Id: ".amdgcn.gfx_generation_stepping" , Val: ISA.Stepping); |
1425 | } else { |
1426 | createConstantSymbol(Id: ".option.machine_version_major" , Val: ISA.Major); |
1427 | createConstantSymbol(Id: ".option.machine_version_minor" , Val: ISA.Minor); |
1428 | createConstantSymbol(Id: ".option.machine_version_stepping" , Val: ISA.Stepping); |
1429 | } |
1430 | if (ISA.Major >= 6 && isHsaAbi(STI: getSTI())) { |
1431 | initializeGprCountSymbol(RegKind: IS_VGPR); |
1432 | initializeGprCountSymbol(RegKind: IS_SGPR); |
1433 | } else |
1434 | KernelScope.initialize(Context&: getContext()); |
1435 | |
1436 | for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions()) |
1437 | createConstantSymbol(Id: Symbol, Val: Code); |
1438 | |
1439 | createConstantSymbol(Id: "UC_VERSION_W64_BIT" , Val: 0x2000); |
1440 | createConstantSymbol(Id: "UC_VERSION_W32_BIT" , Val: 0x4000); |
1441 | createConstantSymbol(Id: "UC_VERSION_MDP_BIT" , Val: 0x8000); |
1442 | } |
1443 | |
1444 | bool hasMIMG_R128() const { |
1445 | return AMDGPU::hasMIMG_R128(STI: getSTI()); |
1446 | } |
1447 | |
1448 | bool hasPackedD16() const { |
1449 | return AMDGPU::hasPackedD16(STI: getSTI()); |
1450 | } |
1451 | |
1452 | bool hasA16() const { return AMDGPU::hasA16(STI: getSTI()); } |
1453 | |
1454 | bool hasG16() const { return AMDGPU::hasG16(STI: getSTI()); } |
1455 | |
1456 | bool hasGDS() const { return AMDGPU::hasGDS(STI: getSTI()); } |
1457 | |
1458 | bool isSI() const { |
1459 | return AMDGPU::isSI(STI: getSTI()); |
1460 | } |
1461 | |
1462 | bool isCI() const { |
1463 | return AMDGPU::isCI(STI: getSTI()); |
1464 | } |
1465 | |
1466 | bool isVI() const { |
1467 | return AMDGPU::isVI(STI: getSTI()); |
1468 | } |
1469 | |
1470 | bool isGFX9() const { |
1471 | return AMDGPU::isGFX9(STI: getSTI()); |
1472 | } |
1473 | |
1474 | // TODO: isGFX90A is also true for GFX940. We need to clean it. |
1475 | bool isGFX90A() const { |
1476 | return AMDGPU::isGFX90A(STI: getSTI()); |
1477 | } |
1478 | |
1479 | bool isGFX940() const { |
1480 | return AMDGPU::isGFX940(STI: getSTI()); |
1481 | } |
1482 | |
1483 | bool isGFX9Plus() const { |
1484 | return AMDGPU::isGFX9Plus(STI: getSTI()); |
1485 | } |
1486 | |
1487 | bool isGFX10() const { |
1488 | return AMDGPU::isGFX10(STI: getSTI()); |
1489 | } |
1490 | |
1491 | bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(STI: getSTI()); } |
1492 | |
1493 | bool isGFX11() const { |
1494 | return AMDGPU::isGFX11(STI: getSTI()); |
1495 | } |
1496 | |
1497 | bool isGFX11Plus() const { |
1498 | return AMDGPU::isGFX11Plus(STI: getSTI()); |
1499 | } |
1500 | |
1501 | bool isGFX12() const { return AMDGPU::isGFX12(STI: getSTI()); } |
1502 | |
1503 | bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(STI: getSTI()); } |
1504 | |
1505 | bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(STI: getSTI()); } |
1506 | |
1507 | bool isGFX10_BEncoding() const { |
1508 | return AMDGPU::isGFX10_BEncoding(STI: getSTI()); |
1509 | } |
1510 | |
1511 | bool hasInv2PiInlineImm() const { |
1512 | return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; |
1513 | } |
1514 | |
1515 | bool hasFlatOffsets() const { |
1516 | return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; |
1517 | } |
1518 | |
1519 | bool hasArchitectedFlatScratch() const { |
1520 | return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; |
1521 | } |
1522 | |
1523 | bool hasSGPR102_SGPR103() const { |
1524 | return !isVI() && !isGFX9(); |
1525 | } |
1526 | |
1527 | bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } |
1528 | |
1529 | bool hasIntClamp() const { |
1530 | return getFeatureBits()[AMDGPU::FeatureIntClamp]; |
1531 | } |
1532 | |
1533 | bool hasPartialNSAEncoding() const { |
1534 | return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding]; |
1535 | } |
1536 | |
1537 | unsigned getNSAMaxSize(bool HasSampler = false) const { |
1538 | return AMDGPU::getNSAMaxSize(STI: getSTI(), HasSampler); |
1539 | } |
1540 | |
1541 | unsigned getMaxNumUserSGPRs() const { |
1542 | return AMDGPU::getMaxNumUserSGPRs(STI: getSTI()); |
1543 | } |
1544 | |
1545 | bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(STI: getSTI()); } |
1546 | |
1547 | AMDGPUTargetStreamer &getTargetStreamer() { |
1548 | MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); |
1549 | return static_cast<AMDGPUTargetStreamer &>(TS); |
1550 | } |
1551 | |
1552 | const MCRegisterInfo *getMRI() const { |
1553 | // We need this const_cast because for some reason getContext() is not const |
1554 | // in MCAsmParser. |
1555 | return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); |
1556 | } |
1557 | |
1558 | const MCInstrInfo *getMII() const { |
1559 | return &MII; |
1560 | } |
1561 | |
1562 | const FeatureBitset &getFeatureBits() const { |
1563 | return getSTI().getFeatureBits(); |
1564 | } |
1565 | |
1566 | void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } |
1567 | void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } |
1568 | void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } |
1569 | |
1570 | unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } |
1571 | bool isForcedVOP3() const { return ForcedEncodingSize == 64; } |
1572 | bool isForcedDPP() const { return ForcedDPP; } |
1573 | bool isForcedSDWA() const { return ForcedSDWA; } |
1574 | ArrayRef<unsigned> getMatchedVariants() const; |
1575 | StringRef getMatchedVariantName() const; |
1576 | |
1577 | std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); |
1578 | bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, |
1579 | bool RestoreOnFailure); |
1580 | bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; |
1581 | ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, |
1582 | SMLoc &EndLoc) override; |
1583 | unsigned checkTargetMatchPredicate(MCInst &Inst) override; |
1584 | unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, |
1585 | unsigned Kind) override; |
1586 | bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, |
1587 | OperandVector &Operands, MCStreamer &Out, |
1588 | uint64_t &ErrorInfo, |
1589 | bool MatchingInlineAsm) override; |
1590 | bool ParseDirective(AsmToken DirectiveID) override; |
1591 | ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic, |
1592 | OperandMode Mode = OperandMode_Default); |
1593 | StringRef parseMnemonicSuffix(StringRef Name); |
1594 | bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, |
1595 | SMLoc NameLoc, OperandVector &Operands) override; |
1596 | //bool ProcessInstruction(MCInst &Inst); |
1597 | |
1598 | ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands); |
1599 | |
1600 | ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int); |
1601 | |
1602 | ParseStatus |
1603 | parseIntWithPrefix(const char *Prefix, OperandVector &Operands, |
1604 | AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, |
1605 | std::function<bool(int64_t &)> ConvertResult = nullptr); |
1606 | |
1607 | ParseStatus parseOperandArrayWithPrefix( |
1608 | const char *Prefix, OperandVector &Operands, |
1609 | AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, |
1610 | bool (*ConvertResult)(int64_t &) = nullptr); |
1611 | |
1612 | ParseStatus |
1613 | parseNamedBit(StringRef Name, OperandVector &Operands, |
1614 | AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); |
1615 | unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const; |
1616 | ParseStatus parseCPol(OperandVector &Operands); |
1617 | ParseStatus parseScope(OperandVector &Operands, int64_t &Scope); |
1618 | ParseStatus parseTH(OperandVector &Operands, int64_t &TH); |
1619 | ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value, |
1620 | SMLoc &StringLoc); |
1621 | |
1622 | bool isModifier(); |
1623 | bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; |
1624 | bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; |
1625 | bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; |
1626 | bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; |
1627 | bool parseSP3NegModifier(); |
1628 | ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false, |
1629 | bool HasLit = false); |
1630 | ParseStatus parseReg(OperandVector &Operands); |
1631 | ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false, |
1632 | bool HasLit = false); |
1633 | ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands, |
1634 | bool AllowImm = true); |
1635 | ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands, |
1636 | bool AllowImm = true); |
1637 | ParseStatus parseRegWithFPInputMods(OperandVector &Operands); |
1638 | ParseStatus parseRegWithIntInputMods(OperandVector &Operands); |
1639 | ParseStatus parseVReg32OrOff(OperandVector &Operands); |
1640 | ParseStatus tryParseIndexKey(OperandVector &Operands, |
1641 | AMDGPUOperand::ImmTy ImmTy); |
1642 | ParseStatus parseIndexKey8bit(OperandVector &Operands); |
1643 | ParseStatus parseIndexKey16bit(OperandVector &Operands); |
1644 | |
1645 | ParseStatus parseDfmtNfmt(int64_t &Format); |
1646 | ParseStatus parseUfmt(int64_t &Format); |
1647 | ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, |
1648 | int64_t &Format); |
1649 | ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, |
1650 | int64_t &Format); |
1651 | ParseStatus parseFORMAT(OperandVector &Operands); |
1652 | ParseStatus parseSymbolicOrNumericFormat(int64_t &Format); |
1653 | ParseStatus parseNumericFormat(int64_t &Format); |
1654 | ParseStatus parseFlatOffset(OperandVector &Operands); |
1655 | ParseStatus parseR128A16(OperandVector &Operands); |
1656 | ParseStatus parseBLGP(OperandVector &Operands); |
1657 | bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); |
1658 | bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); |
1659 | |
1660 | void cvtExp(MCInst &Inst, const OperandVector &Operands); |
1661 | |
1662 | bool parseCnt(int64_t &IntVal); |
1663 | ParseStatus parseSWaitCnt(OperandVector &Operands); |
1664 | |
1665 | bool parseDepCtr(int64_t &IntVal, unsigned &Mask); |
1666 | void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); |
1667 | ParseStatus parseDepCtr(OperandVector &Operands); |
1668 | |
1669 | bool parseDelay(int64_t &Delay); |
1670 | ParseStatus parseSDelayALU(OperandVector &Operands); |
1671 | |
1672 | ParseStatus parseHwreg(OperandVector &Operands); |
1673 | |
1674 | private: |
1675 | struct OperandInfoTy { |
1676 | SMLoc Loc; |
1677 | int64_t Val; |
1678 | bool IsSymbolic = false; |
1679 | bool IsDefined = false; |
1680 | |
1681 | OperandInfoTy(int64_t Val) : Val(Val) {} |
1682 | }; |
1683 | |
1684 | struct StructuredOpField : OperandInfoTy { |
1685 | StringLiteral Id; |
1686 | StringLiteral Desc; |
1687 | unsigned Width; |
1688 | bool IsDefined = false; |
1689 | |
1690 | StructuredOpField(StringLiteral Id, StringLiteral Desc, unsigned Width, |
1691 | int64_t Default) |
1692 | : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {} |
1693 | virtual ~StructuredOpField() = default; |
1694 | |
1695 | bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const { |
1696 | Parser.Error(L: Loc, Msg: "invalid " + Desc + ": " + Err); |
1697 | return false; |
1698 | } |
1699 | |
1700 | virtual bool validate(AMDGPUAsmParser &Parser) const { |
1701 | if (IsSymbolic && Val == OPR_ID_UNSUPPORTED) |
1702 | return Error(Parser, Err: "not supported on this GPU" ); |
1703 | if (!isUIntN(N: Width, x: Val)) |
1704 | return Error(Parser, Err: "only " + Twine(Width) + "-bit values are legal" ); |
1705 | return true; |
1706 | } |
1707 | }; |
1708 | |
1709 | ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields); |
1710 | bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields); |
1711 | |
1712 | bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); |
1713 | bool validateSendMsg(const OperandInfoTy &Msg, |
1714 | const OperandInfoTy &Op, |
1715 | const OperandInfoTy &Stream); |
1716 | |
1717 | ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset, |
1718 | OperandInfoTy &Width); |
1719 | |
1720 | SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; |
1721 | SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; |
1722 | SMLoc getBLGPLoc(const OperandVector &Operands) const; |
1723 | |
1724 | SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, |
1725 | const OperandVector &Operands) const; |
1726 | SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; |
1727 | SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; |
1728 | SMLoc getLitLoc(const OperandVector &Operands, |
1729 | bool SearchMandatoryLiterals = false) const; |
1730 | SMLoc getMandatoryLitLoc(const OperandVector &Operands) const; |
1731 | SMLoc getConstLoc(const OperandVector &Operands) const; |
1732 | SMLoc getInstLoc(const OperandVector &Operands) const; |
1733 | |
1734 | bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); |
1735 | bool validateOffset(const MCInst &Inst, const OperandVector &Operands); |
1736 | bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); |
1737 | bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); |
1738 | bool validateSOPLiteral(const MCInst &Inst) const; |
1739 | bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); |
1740 | bool validateVOPDRegBankConstraints(const MCInst &Inst, |
1741 | const OperandVector &Operands); |
1742 | bool validateIntClampSupported(const MCInst &Inst); |
1743 | bool validateMIMGAtomicDMask(const MCInst &Inst); |
1744 | bool validateMIMGGatherDMask(const MCInst &Inst); |
1745 | bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); |
1746 | bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc); |
1747 | bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc); |
1748 | bool validateMIMGD16(const MCInst &Inst); |
1749 | bool validateMIMGMSAA(const MCInst &Inst); |
1750 | bool validateOpSel(const MCInst &Inst); |
1751 | bool validateNeg(const MCInst &Inst, int OpName); |
1752 | bool validateDPP(const MCInst &Inst, const OperandVector &Operands); |
1753 | bool validateVccOperand(unsigned Reg) const; |
1754 | bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); |
1755 | bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); |
1756 | bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands); |
1757 | bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); |
1758 | bool validateAGPRLdSt(const MCInst &Inst) const; |
1759 | bool validateVGPRAlign(const MCInst &Inst) const; |
1760 | bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); |
1761 | bool validateDS(const MCInst &Inst, const OperandVector &Operands); |
1762 | bool validateGWS(const MCInst &Inst, const OperandVector &Operands); |
1763 | bool validateDivScale(const MCInst &Inst); |
1764 | bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands); |
1765 | bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, |
1766 | const SMLoc &IDLoc); |
1767 | bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands, |
1768 | const unsigned CPol); |
1769 | bool validateExeczVcczOperands(const OperandVector &Operands); |
1770 | bool validateTFE(const MCInst &Inst, const OperandVector &Operands); |
1771 | std::optional<StringRef> validateLdsDirect(const MCInst &Inst); |
1772 | unsigned getConstantBusLimit(unsigned Opcode) const; |
1773 | bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); |
1774 | bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; |
1775 | unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; |
1776 | |
1777 | bool isSupportedMnemo(StringRef Mnemo, |
1778 | const FeatureBitset &FBS); |
1779 | bool isSupportedMnemo(StringRef Mnemo, |
1780 | const FeatureBitset &FBS, |
1781 | ArrayRef<unsigned> Variants); |
1782 | bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); |
1783 | |
1784 | bool isId(const StringRef Id) const; |
1785 | bool isId(const AsmToken &Token, const StringRef Id) const; |
1786 | bool isToken(const AsmToken::TokenKind Kind) const; |
1787 | StringRef getId() const; |
1788 | bool trySkipId(const StringRef Id); |
1789 | bool trySkipId(const StringRef Pref, const StringRef Id); |
1790 | bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); |
1791 | bool trySkipToken(const AsmToken::TokenKind Kind); |
1792 | bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); |
1793 | bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string" ); |
1794 | bool parseId(StringRef &Val, const StringRef ErrMsg = "" ); |
1795 | |
1796 | void peekTokens(MutableArrayRef<AsmToken> Tokens); |
1797 | AsmToken::TokenKind getTokenKind() const; |
1798 | bool parseExpr(int64_t &Imm, StringRef Expected = "" ); |
1799 | bool parseExpr(OperandVector &Operands); |
1800 | StringRef getTokenStr() const; |
1801 | AsmToken peekToken(bool ShouldSkipSpace = true); |
1802 | AsmToken getToken() const; |
1803 | SMLoc getLoc() const; |
1804 | void lex(); |
1805 | |
1806 | public: |
1807 | void onBeginOfFile() override; |
1808 | bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; |
1809 | |
1810 | ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK); |
1811 | |
1812 | ParseStatus parseExpTgt(OperandVector &Operands); |
1813 | ParseStatus parseSendMsg(OperandVector &Operands); |
1814 | ParseStatus parseInterpSlot(OperandVector &Operands); |
1815 | ParseStatus parseInterpAttr(OperandVector &Operands); |
1816 | ParseStatus parseSOPPBrTarget(OperandVector &Operands); |
1817 | ParseStatus parseBoolReg(OperandVector &Operands); |
1818 | |
1819 | bool parseSwizzleOperand(int64_t &Op, |
1820 | const unsigned MinVal, |
1821 | const unsigned MaxVal, |
1822 | const StringRef ErrMsg, |
1823 | SMLoc &Loc); |
1824 | bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, |
1825 | const unsigned MinVal, |
1826 | const unsigned MaxVal, |
1827 | const StringRef ErrMsg); |
1828 | ParseStatus parseSwizzle(OperandVector &Operands); |
1829 | bool parseSwizzleOffset(int64_t &Imm); |
1830 | bool parseSwizzleMacro(int64_t &Imm); |
1831 | bool parseSwizzleQuadPerm(int64_t &Imm); |
1832 | bool parseSwizzleBitmaskPerm(int64_t &Imm); |
1833 | bool parseSwizzleBroadcast(int64_t &Imm); |
1834 | bool parseSwizzleSwap(int64_t &Imm); |
1835 | bool parseSwizzleReverse(int64_t &Imm); |
1836 | |
1837 | ParseStatus parseGPRIdxMode(OperandVector &Operands); |
1838 | int64_t parseGPRIdxMacro(); |
1839 | |
1840 | void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: false); } |
1841 | void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, IsAtomic: true); } |
1842 | |
1843 | ParseStatus parseOModSI(OperandVector &Operands); |
1844 | |
1845 | void cvtVOP3(MCInst &Inst, const OperandVector &Operands, |
1846 | OptionalImmIndexMap &OptionalIdx); |
1847 | void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); |
1848 | void cvtVOP3(MCInst &Inst, const OperandVector &Operands); |
1849 | void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); |
1850 | void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands); |
1851 | |
1852 | void cvtVOPD(MCInst &Inst, const OperandVector &Operands); |
1853 | void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, |
1854 | OptionalImmIndexMap &OptionalIdx); |
1855 | void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, |
1856 | OptionalImmIndexMap &OptionalIdx); |
1857 | |
1858 | void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); |
1859 | void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); |
1860 | |
1861 | bool parseDimId(unsigned &Encoding); |
1862 | ParseStatus parseDim(OperandVector &Operands); |
1863 | bool convertDppBoundCtrl(int64_t &BoundCtrl); |
1864 | ParseStatus parseDPP8(OperandVector &Operands); |
1865 | ParseStatus parseDPPCtrl(OperandVector &Operands); |
1866 | bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); |
1867 | int64_t parseDPPCtrlSel(StringRef Ctrl); |
1868 | int64_t parseDPPCtrlPerm(); |
1869 | void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); |
1870 | void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { |
1871 | cvtDPP(Inst, Operands, IsDPP8: true); |
1872 | } |
1873 | void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, |
1874 | bool IsDPP8 = false); |
1875 | void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { |
1876 | cvtVOP3DPP(Inst, Operands, IsDPP8: true); |
1877 | } |
1878 | |
1879 | ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix, |
1880 | AMDGPUOperand::ImmTy Type); |
1881 | ParseStatus parseSDWADstUnused(OperandVector &Operands); |
1882 | void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); |
1883 | void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); |
1884 | void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); |
1885 | void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); |
1886 | void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); |
1887 | void cvtSDWA(MCInst &Inst, const OperandVector &Operands, |
1888 | uint64_t BasicInstType, |
1889 | bool SkipDstVcc = false, |
1890 | bool SkipSrcVcc = false); |
1891 | |
1892 | ParseStatus parseEndpgm(OperandVector &Operands); |
1893 | |
1894 | ParseStatus parseVOPD(OperandVector &Operands); |
1895 | }; |
1896 | |
1897 | } // end anonymous namespace |
1898 | |
1899 | // May be called with integer type with equivalent bitwidth. |
1900 | static const fltSemantics *getFltSemantics(unsigned Size) { |
1901 | switch (Size) { |
1902 | case 4: |
1903 | return &APFloat::IEEEsingle(); |
1904 | case 8: |
1905 | return &APFloat::IEEEdouble(); |
1906 | case 2: |
1907 | return &APFloat::IEEEhalf(); |
1908 | default: |
1909 | llvm_unreachable("unsupported fp type" ); |
1910 | } |
1911 | } |
1912 | |
1913 | static const fltSemantics *getFltSemantics(MVT VT) { |
1914 | return getFltSemantics(Size: VT.getSizeInBits() / 8); |
1915 | } |
1916 | |
1917 | static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { |
1918 | switch (OperandType) { |
1919 | // When floating-point immediate is used as operand of type i16, the 32-bit |
1920 | // representation of the constant truncated to the 16 LSBs should be used. |
1921 | case AMDGPU::OPERAND_REG_IMM_INT16: |
1922 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: |
1923 | case AMDGPU::OPERAND_REG_INLINE_AC_INT16: |
1924 | case AMDGPU::OPERAND_REG_IMM_INT32: |
1925 | case AMDGPU::OPERAND_REG_IMM_FP32: |
1926 | case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: |
1927 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: |
1928 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
1929 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: |
1930 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: |
1931 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: |
1932 | case AMDGPU::OPERAND_REG_IMM_V2FP32: |
1933 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: |
1934 | case AMDGPU::OPERAND_REG_IMM_V2INT32: |
1935 | case AMDGPU::OPERAND_REG_IMM_V2INT16: |
1936 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: |
1937 | case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: |
1938 | case AMDGPU::OPERAND_KIMM32: |
1939 | case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: |
1940 | return &APFloat::IEEEsingle(); |
1941 | case AMDGPU::OPERAND_REG_IMM_INT64: |
1942 | case AMDGPU::OPERAND_REG_IMM_FP64: |
1943 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: |
1944 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
1945 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: |
1946 | return &APFloat::IEEEdouble(); |
1947 | case AMDGPU::OPERAND_REG_IMM_FP16: |
1948 | case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: |
1949 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
1950 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
1951 | case AMDGPU::OPERAND_REG_INLINE_AC_FP16: |
1952 | case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: |
1953 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
1954 | case AMDGPU::OPERAND_KIMM16: |
1955 | return &APFloat::IEEEhalf(); |
1956 | case AMDGPU::OPERAND_REG_IMM_BF16: |
1957 | case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: |
1958 | case AMDGPU::OPERAND_REG_INLINE_C_BF16: |
1959 | case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: |
1960 | case AMDGPU::OPERAND_REG_INLINE_AC_BF16: |
1961 | case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: |
1962 | case AMDGPU::OPERAND_REG_IMM_V2BF16: |
1963 | return &APFloat::BFloat(); |
1964 | default: |
1965 | llvm_unreachable("unsupported fp type" ); |
1966 | } |
1967 | } |
1968 | |
1969 | //===----------------------------------------------------------------------===// |
1970 | // Operand |
1971 | //===----------------------------------------------------------------------===// |
1972 | |
1973 | static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { |
1974 | bool Lost; |
1975 | |
1976 | // Convert literal to single precision |
1977 | APFloat::opStatus Status = FPLiteral.convert(ToSemantics: *getFltSemantics(VT), |
1978 | RM: APFloat::rmNearestTiesToEven, |
1979 | losesInfo: &Lost); |
1980 | // We allow precision lost but not overflow or underflow |
1981 | if (Status != APFloat::opOK && |
1982 | Lost && |
1983 | ((Status & APFloat::opOverflow) != 0 || |
1984 | (Status & APFloat::opUnderflow) != 0)) { |
1985 | return false; |
1986 | } |
1987 | |
1988 | return true; |
1989 | } |
1990 | |
1991 | static bool isSafeTruncation(int64_t Val, unsigned Size) { |
1992 | return isUIntN(N: Size, x: Val) || isIntN(N: Size, x: Val); |
1993 | } |
1994 | |
1995 | static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { |
1996 | if (VT.getScalarType() == MVT::i16) |
1997 | return isInlinableLiteral32(Literal: Val, HasInv2Pi); |
1998 | |
1999 | if (VT.getScalarType() == MVT::f16) |
2000 | return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi); |
2001 | |
2002 | assert(VT.getScalarType() == MVT::bf16); |
2003 | |
2004 | return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi); |
2005 | } |
2006 | |
2007 | bool AMDGPUOperand::isInlinableImm(MVT type) const { |
2008 | |
2009 | // This is a hack to enable named inline values like |
2010 | // shared_base with both 32-bit and 64-bit operands. |
2011 | // Note that these values are defined as |
2012 | // 32-bit operands only. |
2013 | if (isInlineValue()) { |
2014 | return true; |
2015 | } |
2016 | |
2017 | if (!isImmTy(ImmT: ImmTyNone)) { |
2018 | // Only plain immediates are inlinable (e.g. "clamp" attribute is not) |
2019 | return false; |
2020 | } |
2021 | // TODO: We should avoid using host float here. It would be better to |
2022 | // check the float bit values which is what a few other places do. |
2023 | // We've had bot failures before due to weird NaN support on mips hosts. |
2024 | |
2025 | APInt Literal(64, Imm.Val); |
2026 | |
2027 | if (Imm.IsFPImm) { // We got fp literal token |
2028 | if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand |
2029 | return AMDGPU::isInlinableLiteral64(Literal: Imm.Val, |
2030 | HasInv2Pi: AsmParser->hasInv2PiInlineImm()); |
2031 | } |
2032 | |
2033 | APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); |
2034 | if (!canLosslesslyConvertToFPType(FPLiteral, VT: type)) |
2035 | return false; |
2036 | |
2037 | if (type.getScalarSizeInBits() == 16) { |
2038 | bool Lost = false; |
2039 | switch (type.getScalarType().SimpleTy) { |
2040 | default: |
2041 | llvm_unreachable("unknown 16-bit type" ); |
2042 | case MVT::bf16: |
2043 | FPLiteral.convert(ToSemantics: APFloatBase::BFloat(), RM: APFloat::rmNearestTiesToEven, |
2044 | losesInfo: &Lost); |
2045 | break; |
2046 | case MVT::f16: |
2047 | FPLiteral.convert(ToSemantics: APFloatBase::IEEEhalf(), RM: APFloat::rmNearestTiesToEven, |
2048 | losesInfo: &Lost); |
2049 | break; |
2050 | case MVT::i16: |
2051 | FPLiteral.convert(ToSemantics: APFloatBase::IEEEsingle(), |
2052 | RM: APFloat::rmNearestTiesToEven, losesInfo: &Lost); |
2053 | break; |
2054 | } |
2055 | // We need to use 32-bit representation here because when a floating-point |
2056 | // inline constant is used as an i16 operand, its 32-bit representation |
2057 | // representation will be used. We will need the 32-bit value to check if |
2058 | // it is FP inline constant. |
2059 | uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); |
2060 | return isInlineableLiteralOp16(Val: ImmVal, VT: type, |
2061 | HasInv2Pi: AsmParser->hasInv2PiInlineImm()); |
2062 | } |
2063 | |
2064 | // Check if single precision literal is inlinable |
2065 | return AMDGPU::isInlinableLiteral32( |
2066 | Literal: static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), |
2067 | HasInv2Pi: AsmParser->hasInv2PiInlineImm()); |
2068 | } |
2069 | |
2070 | // We got int literal token. |
2071 | if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand |
2072 | return AMDGPU::isInlinableLiteral64(Literal: Imm.Val, |
2073 | HasInv2Pi: AsmParser->hasInv2PiInlineImm()); |
2074 | } |
2075 | |
2076 | if (!isSafeTruncation(Val: Imm.Val, Size: type.getScalarSizeInBits())) { |
2077 | return false; |
2078 | } |
2079 | |
2080 | if (type.getScalarSizeInBits() == 16) { |
2081 | return isInlineableLiteralOp16( |
2082 | Val: static_cast<int16_t>(Literal.getLoBits(numBits: 16).getSExtValue()), |
2083 | VT: type, HasInv2Pi: AsmParser->hasInv2PiInlineImm()); |
2084 | } |
2085 | |
2086 | return AMDGPU::isInlinableLiteral32( |
2087 | Literal: static_cast<int32_t>(Literal.getLoBits(numBits: 32).getZExtValue()), |
2088 | HasInv2Pi: AsmParser->hasInv2PiInlineImm()); |
2089 | } |
2090 | |
2091 | bool AMDGPUOperand::isLiteralImm(MVT type) const { |
2092 | // Check that this immediate can be added as literal |
2093 | if (!isImmTy(ImmT: ImmTyNone)) { |
2094 | return false; |
2095 | } |
2096 | |
2097 | if (!Imm.IsFPImm) { |
2098 | // We got int literal token. |
2099 | |
2100 | if (type == MVT::f64 && hasFPModifiers()) { |
2101 | // Cannot apply fp modifiers to int literals preserving the same semantics |
2102 | // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, |
2103 | // disable these cases. |
2104 | return false; |
2105 | } |
2106 | |
2107 | unsigned Size = type.getSizeInBits(); |
2108 | if (Size == 64) |
2109 | Size = 32; |
2110 | |
2111 | // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP |
2112 | // types. |
2113 | return isSafeTruncation(Val: Imm.Val, Size); |
2114 | } |
2115 | |
2116 | // We got fp literal token |
2117 | if (type == MVT::f64) { // Expected 64-bit fp operand |
2118 | // We would set low 64-bits of literal to zeroes but we accept this literals |
2119 | return true; |
2120 | } |
2121 | |
2122 | if (type == MVT::i64) { // Expected 64-bit int operand |
2123 | // We don't allow fp literals in 64-bit integer instructions. It is |
2124 | // unclear how we should encode them. |
2125 | return false; |
2126 | } |
2127 | |
2128 | // We allow fp literals with f16x2 operands assuming that the specified |
2129 | // literal goes into the lower half and the upper half is zero. We also |
2130 | // require that the literal may be losslessly converted to f16. |
2131 | // |
2132 | // For i16x2 operands, we assume that the specified literal is encoded as a |
2133 | // single-precision float. This is pretty odd, but it matches SP3 and what |
2134 | // happens in hardware. |
2135 | MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16 |
2136 | : (type == MVT::v2i16) ? MVT::f32 |
2137 | : (type == MVT::v2f32) ? MVT::f32 |
2138 | : type; |
2139 | |
2140 | APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); |
2141 | return canLosslesslyConvertToFPType(FPLiteral, VT: ExpectedType); |
2142 | } |
2143 | |
2144 | bool AMDGPUOperand::isRegClass(unsigned RCID) const { |
2145 | return isRegKind() && AsmParser->getMRI()->getRegClass(i: RCID).contains(Reg: getReg()); |
2146 | } |
2147 | |
2148 | bool AMDGPUOperand::isVRegWithInputMods() const { |
2149 | return isRegClass(RCID: AMDGPU::VGPR_32RegClassID) || |
2150 | // GFX90A allows DPP on 64-bit operands. |
2151 | (isRegClass(RCID: AMDGPU::VReg_64RegClassID) && |
2152 | AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]); |
2153 | } |
2154 | |
2155 | template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const { |
2156 | return isRegClass(RCID: IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID |
2157 | : AMDGPU::VGPR_16_Lo128RegClassID); |
2158 | } |
2159 | |
2160 | bool AMDGPUOperand::isSDWAOperand(MVT type) const { |
2161 | if (AsmParser->isVI()) |
2162 | return isVReg32(); |
2163 | if (AsmParser->isGFX9Plus()) |
2164 | return isRegClass(RCID: AMDGPU::VS_32RegClassID) || isInlinableImm(type); |
2165 | return false; |
2166 | } |
2167 | |
2168 | bool AMDGPUOperand::isSDWAFP16Operand() const { |
2169 | return isSDWAOperand(type: MVT::f16); |
2170 | } |
2171 | |
2172 | bool AMDGPUOperand::isSDWAFP32Operand() const { |
2173 | return isSDWAOperand(type: MVT::f32); |
2174 | } |
2175 | |
2176 | bool AMDGPUOperand::isSDWAInt16Operand() const { |
2177 | return isSDWAOperand(type: MVT::i16); |
2178 | } |
2179 | |
2180 | bool AMDGPUOperand::isSDWAInt32Operand() const { |
2181 | return isSDWAOperand(type: MVT::i32); |
2182 | } |
2183 | |
2184 | bool AMDGPUOperand::isBoolReg() const { |
2185 | auto FB = AsmParser->getFeatureBits(); |
2186 | return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) || |
2187 | (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32())); |
2188 | } |
2189 | |
2190 | uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const |
2191 | { |
2192 | assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); |
2193 | assert(Size == 2 || Size == 4 || Size == 8); |
2194 | |
2195 | const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); |
2196 | |
2197 | if (Imm.Mods.Abs) { |
2198 | Val &= ~FpSignMask; |
2199 | } |
2200 | if (Imm.Mods.Neg) { |
2201 | Val ^= FpSignMask; |
2202 | } |
2203 | |
2204 | return Val; |
2205 | } |
2206 | |
2207 | void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { |
2208 | if (isExpr()) { |
2209 | Inst.addOperand(Op: MCOperand::createExpr(Val: Expr)); |
2210 | return; |
2211 | } |
2212 | |
2213 | if (AMDGPU::isSISrcOperand(Desc: AsmParser->getMII()->get(Opcode: Inst.getOpcode()), |
2214 | OpNo: Inst.getNumOperands())) { |
2215 | addLiteralImmOperand(Inst, Val: Imm.Val, |
2216 | ApplyModifiers: ApplyModifiers & |
2217 | isImmTy(ImmT: ImmTyNone) && Imm.Mods.hasFPModifiers()); |
2218 | } else { |
2219 | assert(!isImmTy(ImmTyNone) || !hasModifiers()); |
2220 | Inst.addOperand(Op: MCOperand::createImm(Val: Imm.Val)); |
2221 | setImmKindNone(); |
2222 | } |
2223 | } |
2224 | |
2225 | void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { |
2226 | const auto& InstDesc = AsmParser->getMII()->get(Opcode: Inst.getOpcode()); |
2227 | auto OpNum = Inst.getNumOperands(); |
2228 | // Check that this operand accepts literals |
2229 | assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); |
2230 | |
2231 | if (ApplyModifiers) { |
2232 | assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); |
2233 | const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(Desc: InstDesc, OpNo: OpNum); |
2234 | Val = applyInputFPModifiers(Val, Size); |
2235 | } |
2236 | |
2237 | APInt Literal(64, Val); |
2238 | uint8_t OpTy = InstDesc.operands()[OpNum].OperandType; |
2239 | |
2240 | if (Imm.IsFPImm) { // We got fp literal token |
2241 | switch (OpTy) { |
2242 | case AMDGPU::OPERAND_REG_IMM_INT64: |
2243 | case AMDGPU::OPERAND_REG_IMM_FP64: |
2244 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: |
2245 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
2246 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: |
2247 | if (AMDGPU::isInlinableLiteral64(Literal: Literal.getZExtValue(), |
2248 | HasInv2Pi: AsmParser->hasInv2PiInlineImm())) { |
2249 | Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getZExtValue())); |
2250 | setImmKindConst(); |
2251 | return; |
2252 | } |
2253 | |
2254 | // Non-inlineable |
2255 | if (AMDGPU::isSISrcFPOperand(Desc: InstDesc, OpNo: OpNum)) { // Expected 64-bit fp operand |
2256 | // For fp operands we check if low 32 bits are zeros |
2257 | if (Literal.getLoBits(numBits: 32) != 0) { |
2258 | const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(L: Inst.getLoc(), |
2259 | Msg: "Can't encode literal as exact 64-bit floating-point operand. " |
2260 | "Low 32-bits will be set to zero" ); |
2261 | Val &= 0xffffffff00000000u; |
2262 | } |
2263 | |
2264 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2265 | setImmKindLiteral(); |
2266 | return; |
2267 | } |
2268 | |
2269 | // We don't allow fp literals in 64-bit integer instructions. It is |
2270 | // unclear how we should encode them. This case should be checked earlier |
2271 | // in predicate methods (isLiteralImm()) |
2272 | llvm_unreachable("fp literal in 64-bit integer instruction." ); |
2273 | |
2274 | case AMDGPU::OPERAND_REG_IMM_BF16: |
2275 | case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: |
2276 | case AMDGPU::OPERAND_REG_INLINE_C_BF16: |
2277 | case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: |
2278 | case AMDGPU::OPERAND_REG_INLINE_AC_BF16: |
2279 | case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: |
2280 | case AMDGPU::OPERAND_REG_IMM_V2BF16: |
2281 | if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) { |
2282 | // This is the 1/(2*pi) which is going to be truncated to bf16 with the |
2283 | // loss of precision. The constant represents ideomatic fp32 value of |
2284 | // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16 |
2285 | // bits. Prevent rounding below. |
2286 | Inst.addOperand(Op: MCOperand::createImm(Val: 0x3e22)); |
2287 | setImmKindLiteral(); |
2288 | return; |
2289 | } |
2290 | [[fallthrough]]; |
2291 | |
2292 | case AMDGPU::OPERAND_REG_IMM_INT32: |
2293 | case AMDGPU::OPERAND_REG_IMM_FP32: |
2294 | case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: |
2295 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: |
2296 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
2297 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: |
2298 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: |
2299 | case AMDGPU::OPERAND_REG_IMM_INT16: |
2300 | case AMDGPU::OPERAND_REG_IMM_FP16: |
2301 | case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: |
2302 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: |
2303 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
2304 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: |
2305 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
2306 | case AMDGPU::OPERAND_REG_INLINE_AC_INT16: |
2307 | case AMDGPU::OPERAND_REG_INLINE_AC_FP16: |
2308 | case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: |
2309 | case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: |
2310 | case AMDGPU::OPERAND_REG_IMM_V2INT16: |
2311 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
2312 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: |
2313 | case AMDGPU::OPERAND_REG_IMM_V2FP32: |
2314 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: |
2315 | case AMDGPU::OPERAND_REG_IMM_V2INT32: |
2316 | case AMDGPU::OPERAND_KIMM32: |
2317 | case AMDGPU::OPERAND_KIMM16: |
2318 | case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: { |
2319 | bool lost; |
2320 | APFloat FPLiteral(APFloat::IEEEdouble(), Literal); |
2321 | // Convert literal to single precision |
2322 | FPLiteral.convert(ToSemantics: *getOpFltSemantics(OperandType: OpTy), |
2323 | RM: APFloat::rmNearestTiesToEven, losesInfo: &lost); |
2324 | // We allow precision lost but not overflow or underflow. This should be |
2325 | // checked earlier in isLiteralImm() |
2326 | |
2327 | uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); |
2328 | Inst.addOperand(Op: MCOperand::createImm(Val: ImmVal)); |
2329 | if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) { |
2330 | setImmKindMandatoryLiteral(); |
2331 | } else { |
2332 | setImmKindLiteral(); |
2333 | } |
2334 | return; |
2335 | } |
2336 | default: |
2337 | llvm_unreachable("invalid operand size" ); |
2338 | } |
2339 | |
2340 | return; |
2341 | } |
2342 | |
2343 | // We got int literal token. |
2344 | // Only sign extend inline immediates. |
2345 | switch (OpTy) { |
2346 | case AMDGPU::OPERAND_REG_IMM_INT32: |
2347 | case AMDGPU::OPERAND_REG_IMM_FP32: |
2348 | case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: |
2349 | case AMDGPU::OPERAND_REG_INLINE_C_INT32: |
2350 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
2351 | case AMDGPU::OPERAND_REG_INLINE_AC_INT32: |
2352 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: |
2353 | case AMDGPU::OPERAND_REG_IMM_V2INT16: |
2354 | case AMDGPU::OPERAND_REG_IMM_V2BF16: |
2355 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
2356 | case AMDGPU::OPERAND_REG_IMM_V2FP32: |
2357 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: |
2358 | case AMDGPU::OPERAND_REG_IMM_V2INT32: |
2359 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: |
2360 | case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: |
2361 | if (isSafeTruncation(Val, Size: 32) && |
2362 | AMDGPU::isInlinableLiteral32(Literal: static_cast<int32_t>(Val), |
2363 | HasInv2Pi: AsmParser->hasInv2PiInlineImm())) { |
2364 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2365 | setImmKindConst(); |
2366 | return; |
2367 | } |
2368 | |
2369 | Inst.addOperand(Op: MCOperand::createImm(Val: Val & 0xffffffff)); |
2370 | setImmKindLiteral(); |
2371 | return; |
2372 | |
2373 | case AMDGPU::OPERAND_REG_IMM_INT64: |
2374 | case AMDGPU::OPERAND_REG_IMM_FP64: |
2375 | case AMDGPU::OPERAND_REG_INLINE_C_INT64: |
2376 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
2377 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: |
2378 | if (AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: AsmParser->hasInv2PiInlineImm())) { |
2379 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2380 | setImmKindConst(); |
2381 | return; |
2382 | } |
2383 | |
2384 | Val = AMDGPU::isSISrcFPOperand(Desc: InstDesc, OpNo: OpNum) ? (uint64_t)Val << 32 |
2385 | : Lo_32(Value: Val); |
2386 | |
2387 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2388 | setImmKindLiteral(); |
2389 | return; |
2390 | |
2391 | case AMDGPU::OPERAND_REG_IMM_INT16: |
2392 | case AMDGPU::OPERAND_REG_INLINE_C_INT16: |
2393 | case AMDGPU::OPERAND_REG_INLINE_AC_INT16: |
2394 | if (isSafeTruncation(Val, Size: 16) && |
2395 | AMDGPU::isInlinableIntLiteral(Literal: static_cast<int16_t>(Val))) { |
2396 | Inst.addOperand(Op: MCOperand::createImm(Val: Val & 0xffffffff)); |
2397 | setImmKindConst(); |
2398 | return; |
2399 | } |
2400 | |
2401 | Inst.addOperand(Op: MCOperand::createImm(Val: Val & 0xffff)); |
2402 | setImmKindLiteral(); |
2403 | return; |
2404 | |
2405 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
2406 | case AMDGPU::OPERAND_REG_IMM_FP16: |
2407 | case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: |
2408 | case AMDGPU::OPERAND_REG_INLINE_AC_FP16: |
2409 | if (isSafeTruncation(Val, Size: 16) && |
2410 | AMDGPU::isInlinableLiteralFP16(Literal: static_cast<int16_t>(Val), |
2411 | HasInv2Pi: AsmParser->hasInv2PiInlineImm())) { |
2412 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2413 | setImmKindConst(); |
2414 | return; |
2415 | } |
2416 | |
2417 | Inst.addOperand(Op: MCOperand::createImm(Val: Val & 0xffff)); |
2418 | setImmKindLiteral(); |
2419 | return; |
2420 | |
2421 | case AMDGPU::OPERAND_REG_IMM_BF16: |
2422 | case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: |
2423 | case AMDGPU::OPERAND_REG_INLINE_C_BF16: |
2424 | case AMDGPU::OPERAND_REG_INLINE_AC_BF16: |
2425 | if (isSafeTruncation(Val, Size: 16) && |
2426 | AMDGPU::isInlinableLiteralBF16(Literal: static_cast<int16_t>(Val), |
2427 | HasInv2Pi: AsmParser->hasInv2PiInlineImm())) { |
2428 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2429 | setImmKindConst(); |
2430 | return; |
2431 | } |
2432 | |
2433 | Inst.addOperand(Op: MCOperand::createImm(Val: Val & 0xffff)); |
2434 | setImmKindLiteral(); |
2435 | return; |
2436 | |
2437 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: |
2438 | case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: { |
2439 | assert(isSafeTruncation(Val, 16)); |
2440 | assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))); |
2441 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2442 | return; |
2443 | } |
2444 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
2445 | case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { |
2446 | assert(isSafeTruncation(Val, 16)); |
2447 | assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val), |
2448 | AsmParser->hasInv2PiInlineImm())); |
2449 | |
2450 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2451 | return; |
2452 | } |
2453 | |
2454 | case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: |
2455 | case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: { |
2456 | assert(isSafeTruncation(Val, 16)); |
2457 | assert(AMDGPU::isInlinableLiteralBF16(static_cast<int16_t>(Val), |
2458 | AsmParser->hasInv2PiInlineImm())); |
2459 | |
2460 | Inst.addOperand(Op: MCOperand::createImm(Val)); |
2461 | return; |
2462 | } |
2463 | |
2464 | case AMDGPU::OPERAND_KIMM32: |
2465 | Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getLoBits(numBits: 32).getZExtValue())); |
2466 | setImmKindMandatoryLiteral(); |
2467 | return; |
2468 | case AMDGPU::OPERAND_KIMM16: |
2469 | Inst.addOperand(Op: MCOperand::createImm(Val: Literal.getLoBits(numBits: 16).getZExtValue())); |
2470 | setImmKindMandatoryLiteral(); |
2471 | return; |
2472 | default: |
2473 | llvm_unreachable("invalid operand size" ); |
2474 | } |
2475 | } |
2476 | |
2477 | void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { |
2478 | Inst.addOperand(Op: MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: getReg(), STI: AsmParser->getSTI()))); |
2479 | } |
2480 | |
2481 | bool AMDGPUOperand::isInlineValue() const { |
2482 | return isRegKind() && ::isInlineValue(Reg: getReg()); |
2483 | } |
2484 | |
2485 | //===----------------------------------------------------------------------===// |
2486 | // AsmParser |
2487 | //===----------------------------------------------------------------------===// |
2488 | |
2489 | void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) { |
2490 | // TODO: make those pre-defined variables read-only. |
2491 | // Currently there is none suitable machinery in the core llvm-mc for this. |
2492 | // MCSymbol::isRedefinable is intended for another purpose, and |
2493 | // AsmParser::parseDirectiveSet() cannot be specialized for specific target. |
2494 | MCContext &Ctx = getContext(); |
2495 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id); |
2496 | Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx)); |
2497 | } |
2498 | |
2499 | static int getRegClass(RegisterKind Is, unsigned RegWidth) { |
2500 | if (Is == IS_VGPR) { |
2501 | switch (RegWidth) { |
2502 | default: return -1; |
2503 | case 32: |
2504 | return AMDGPU::VGPR_32RegClassID; |
2505 | case 64: |
2506 | return AMDGPU::VReg_64RegClassID; |
2507 | case 96: |
2508 | return AMDGPU::VReg_96RegClassID; |
2509 | case 128: |
2510 | return AMDGPU::VReg_128RegClassID; |
2511 | case 160: |
2512 | return AMDGPU::VReg_160RegClassID; |
2513 | case 192: |
2514 | return AMDGPU::VReg_192RegClassID; |
2515 | case 224: |
2516 | return AMDGPU::VReg_224RegClassID; |
2517 | case 256: |
2518 | return AMDGPU::VReg_256RegClassID; |
2519 | case 288: |
2520 | return AMDGPU::VReg_288RegClassID; |
2521 | case 320: |
2522 | return AMDGPU::VReg_320RegClassID; |
2523 | case 352: |
2524 | return AMDGPU::VReg_352RegClassID; |
2525 | case 384: |
2526 | return AMDGPU::VReg_384RegClassID; |
2527 | case 512: |
2528 | return AMDGPU::VReg_512RegClassID; |
2529 | case 1024: |
2530 | return AMDGPU::VReg_1024RegClassID; |
2531 | } |
2532 | } else if (Is == IS_TTMP) { |
2533 | switch (RegWidth) { |
2534 | default: return -1; |
2535 | case 32: |
2536 | return AMDGPU::TTMP_32RegClassID; |
2537 | case 64: |
2538 | return AMDGPU::TTMP_64RegClassID; |
2539 | case 128: |
2540 | return AMDGPU::TTMP_128RegClassID; |
2541 | case 256: |
2542 | return AMDGPU::TTMP_256RegClassID; |
2543 | case 512: |
2544 | return AMDGPU::TTMP_512RegClassID; |
2545 | } |
2546 | } else if (Is == IS_SGPR) { |
2547 | switch (RegWidth) { |
2548 | default: return -1; |
2549 | case 32: |
2550 | return AMDGPU::SGPR_32RegClassID; |
2551 | case 64: |
2552 | return AMDGPU::SGPR_64RegClassID; |
2553 | case 96: |
2554 | return AMDGPU::SGPR_96RegClassID; |
2555 | case 128: |
2556 | return AMDGPU::SGPR_128RegClassID; |
2557 | case 160: |
2558 | return AMDGPU::SGPR_160RegClassID; |
2559 | case 192: |
2560 | return AMDGPU::SGPR_192RegClassID; |
2561 | case 224: |
2562 | return AMDGPU::SGPR_224RegClassID; |
2563 | case 256: |
2564 | return AMDGPU::SGPR_256RegClassID; |
2565 | case 288: |
2566 | return AMDGPU::SGPR_288RegClassID; |
2567 | case 320: |
2568 | return AMDGPU::SGPR_320RegClassID; |
2569 | case 352: |
2570 | return AMDGPU::SGPR_352RegClassID; |
2571 | case 384: |
2572 | return AMDGPU::SGPR_384RegClassID; |
2573 | case 512: |
2574 | return AMDGPU::SGPR_512RegClassID; |
2575 | } |
2576 | } else if (Is == IS_AGPR) { |
2577 | switch (RegWidth) { |
2578 | default: return -1; |
2579 | case 32: |
2580 | return AMDGPU::AGPR_32RegClassID; |
2581 | case 64: |
2582 | return AMDGPU::AReg_64RegClassID; |
2583 | case 96: |
2584 | return AMDGPU::AReg_96RegClassID; |
2585 | case 128: |
2586 | return AMDGPU::AReg_128RegClassID; |
2587 | case 160: |
2588 | return AMDGPU::AReg_160RegClassID; |
2589 | case 192: |
2590 | return AMDGPU::AReg_192RegClassID; |
2591 | case 224: |
2592 | return AMDGPU::AReg_224RegClassID; |
2593 | case 256: |
2594 | return AMDGPU::AReg_256RegClassID; |
2595 | case 288: |
2596 | return AMDGPU::AReg_288RegClassID; |
2597 | case 320: |
2598 | return AMDGPU::AReg_320RegClassID; |
2599 | case 352: |
2600 | return AMDGPU::AReg_352RegClassID; |
2601 | case 384: |
2602 | return AMDGPU::AReg_384RegClassID; |
2603 | case 512: |
2604 | return AMDGPU::AReg_512RegClassID; |
2605 | case 1024: |
2606 | return AMDGPU::AReg_1024RegClassID; |
2607 | } |
2608 | } |
2609 | return -1; |
2610 | } |
2611 | |
2612 | static unsigned getSpecialRegForName(StringRef RegName) { |
2613 | return StringSwitch<unsigned>(RegName) |
2614 | .Case(S: "exec" , Value: AMDGPU::EXEC) |
2615 | .Case(S: "vcc" , Value: AMDGPU::VCC) |
2616 | .Case(S: "flat_scratch" , Value: AMDGPU::FLAT_SCR) |
2617 | .Case(S: "xnack_mask" , Value: AMDGPU::XNACK_MASK) |
2618 | .Case(S: "shared_base" , Value: AMDGPU::SRC_SHARED_BASE) |
2619 | .Case(S: "src_shared_base" , Value: AMDGPU::SRC_SHARED_BASE) |
2620 | .Case(S: "shared_limit" , Value: AMDGPU::SRC_SHARED_LIMIT) |
2621 | .Case(S: "src_shared_limit" , Value: AMDGPU::SRC_SHARED_LIMIT) |
2622 | .Case(S: "private_base" , Value: AMDGPU::SRC_PRIVATE_BASE) |
2623 | .Case(S: "src_private_base" , Value: AMDGPU::SRC_PRIVATE_BASE) |
2624 | .Case(S: "private_limit" , Value: AMDGPU::SRC_PRIVATE_LIMIT) |
2625 | .Case(S: "src_private_limit" , Value: AMDGPU::SRC_PRIVATE_LIMIT) |
2626 | .Case(S: "pops_exiting_wave_id" , Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID) |
2627 | .Case(S: "src_pops_exiting_wave_id" , Value: AMDGPU::SRC_POPS_EXITING_WAVE_ID) |
2628 | .Case(S: "lds_direct" , Value: AMDGPU::LDS_DIRECT) |
2629 | .Case(S: "src_lds_direct" , Value: AMDGPU::LDS_DIRECT) |
2630 | .Case(S: "m0" , Value: AMDGPU::M0) |
2631 | .Case(S: "vccz" , Value: AMDGPU::SRC_VCCZ) |
2632 | .Case(S: "src_vccz" , Value: AMDGPU::SRC_VCCZ) |
2633 | .Case(S: "execz" , Value: AMDGPU::SRC_EXECZ) |
2634 | .Case(S: "src_execz" , Value: AMDGPU::SRC_EXECZ) |
2635 | .Case(S: "scc" , Value: AMDGPU::SRC_SCC) |
2636 | .Case(S: "src_scc" , Value: AMDGPU::SRC_SCC) |
2637 | .Case(S: "tba" , Value: AMDGPU::TBA) |
2638 | .Case(S: "tma" , Value: AMDGPU::TMA) |
2639 | .Case(S: "flat_scratch_lo" , Value: AMDGPU::FLAT_SCR_LO) |
2640 | .Case(S: "flat_scratch_hi" , Value: AMDGPU::FLAT_SCR_HI) |
2641 | .Case(S: "xnack_mask_lo" , Value: AMDGPU::XNACK_MASK_LO) |
2642 | .Case(S: "xnack_mask_hi" , Value: AMDGPU::XNACK_MASK_HI) |
2643 | .Case(S: "vcc_lo" , Value: AMDGPU::VCC_LO) |
2644 | .Case(S: "vcc_hi" , Value: AMDGPU::VCC_HI) |
2645 | .Case(S: "exec_lo" , Value: AMDGPU::EXEC_LO) |
2646 | .Case(S: "exec_hi" , Value: AMDGPU::EXEC_HI) |
2647 | .Case(S: "tma_lo" , Value: AMDGPU::TMA_LO) |
2648 | .Case(S: "tma_hi" , Value: AMDGPU::TMA_HI) |
2649 | .Case(S: "tba_lo" , Value: AMDGPU::TBA_LO) |
2650 | .Case(S: "tba_hi" , Value: AMDGPU::TBA_HI) |
2651 | .Case(S: "pc" , Value: AMDGPU::PC_REG) |
2652 | .Case(S: "null" , Value: AMDGPU::SGPR_NULL) |
2653 | .Default(Value: AMDGPU::NoRegister); |
2654 | } |
2655 | |
2656 | bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, |
2657 | SMLoc &EndLoc, bool RestoreOnFailure) { |
2658 | auto R = parseRegister(); |
2659 | if (!R) return true; |
2660 | assert(R->isReg()); |
2661 | RegNo = R->getReg(); |
2662 | StartLoc = R->getStartLoc(); |
2663 | EndLoc = R->getEndLoc(); |
2664 | return false; |
2665 | } |
2666 | |
2667 | bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, |
2668 | SMLoc &EndLoc) { |
2669 | return ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); |
2670 | } |
2671 | |
2672 | ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, |
2673 | SMLoc &EndLoc) { |
2674 | bool Result = ParseRegister(RegNo&: Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); |
2675 | bool PendingErrors = getParser().hasPendingError(); |
2676 | getParser().clearPendingErrors(); |
2677 | if (PendingErrors) |
2678 | return ParseStatus::Failure; |
2679 | if (Result) |
2680 | return ParseStatus::NoMatch; |
2681 | return ParseStatus::Success; |
2682 | } |
2683 | |
2684 | bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, |
2685 | RegisterKind RegKind, unsigned Reg1, |
2686 | SMLoc Loc) { |
2687 | switch (RegKind) { |
2688 | case IS_SPECIAL: |
2689 | if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { |
2690 | Reg = AMDGPU::EXEC; |
2691 | RegWidth = 64; |
2692 | return true; |
2693 | } |
2694 | if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { |
2695 | Reg = AMDGPU::FLAT_SCR; |
2696 | RegWidth = 64; |
2697 | return true; |
2698 | } |
2699 | if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { |
2700 | Reg = AMDGPU::XNACK_MASK; |
2701 | RegWidth = 64; |
2702 | return true; |
2703 | } |
2704 | if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { |
2705 | Reg = AMDGPU::VCC; |
2706 | RegWidth = 64; |
2707 | return true; |
2708 | } |
2709 | if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { |
2710 | Reg = AMDGPU::TBA; |
2711 | RegWidth = 64; |
2712 | return true; |
2713 | } |
2714 | if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { |
2715 | Reg = AMDGPU::TMA; |
2716 | RegWidth = 64; |
2717 | return true; |
2718 | } |
2719 | Error(L: Loc, Msg: "register does not fit in the list" ); |
2720 | return false; |
2721 | case IS_VGPR: |
2722 | case IS_SGPR: |
2723 | case IS_AGPR: |
2724 | case IS_TTMP: |
2725 | if (Reg1 != Reg + RegWidth / 32) { |
2726 | Error(L: Loc, Msg: "registers in a list must have consecutive indices" ); |
2727 | return false; |
2728 | } |
2729 | RegWidth += 32; |
2730 | return true; |
2731 | default: |
2732 | llvm_unreachable("unexpected register kind" ); |
2733 | } |
2734 | } |
2735 | |
2736 | struct RegInfo { |
2737 | StringLiteral Name; |
2738 | RegisterKind Kind; |
2739 | }; |
2740 | |
2741 | static constexpr RegInfo RegularRegisters[] = { |
2742 | {.Name: {"v" }, .Kind: IS_VGPR}, |
2743 | {.Name: {"s" }, .Kind: IS_SGPR}, |
2744 | {.Name: {"ttmp" }, .Kind: IS_TTMP}, |
2745 | {.Name: {"acc" }, .Kind: IS_AGPR}, |
2746 | {.Name: {"a" }, .Kind: IS_AGPR}, |
2747 | }; |
2748 | |
2749 | static bool isRegularReg(RegisterKind Kind) { |
2750 | return Kind == IS_VGPR || |
2751 | Kind == IS_SGPR || |
2752 | Kind == IS_TTMP || |
2753 | Kind == IS_AGPR; |
2754 | } |
2755 | |
2756 | static const RegInfo* getRegularRegInfo(StringRef Str) { |
2757 | for (const RegInfo &Reg : RegularRegisters) |
2758 | if (Str.starts_with(Prefix: Reg.Name)) |
2759 | return &Reg; |
2760 | return nullptr; |
2761 | } |
2762 | |
2763 | static bool getRegNum(StringRef Str, unsigned& Num) { |
2764 | return !Str.getAsInteger(Radix: 10, Result&: Num); |
2765 | } |
2766 | |
2767 | bool |
2768 | AMDGPUAsmParser::isRegister(const AsmToken &Token, |
2769 | const AsmToken &NextToken) const { |
2770 | |
2771 | // A list of consecutive registers: [s0,s1,s2,s3] |
2772 | if (Token.is(K: AsmToken::LBrac)) |
2773 | return true; |
2774 | |
2775 | if (!Token.is(K: AsmToken::Identifier)) |
2776 | return false; |
2777 | |
2778 | // A single register like s0 or a range of registers like s[0:1] |
2779 | |
2780 | StringRef Str = Token.getString(); |
2781 | const RegInfo *Reg = getRegularRegInfo(Str); |
2782 | if (Reg) { |
2783 | StringRef RegName = Reg->Name; |
2784 | StringRef RegSuffix = Str.substr(Start: RegName.size()); |
2785 | if (!RegSuffix.empty()) { |
2786 | RegSuffix.consume_back(Suffix: ".l" ); |
2787 | RegSuffix.consume_back(Suffix: ".h" ); |
2788 | unsigned Num; |
2789 | // A single register with an index: rXX |
2790 | if (getRegNum(Str: RegSuffix, Num)) |
2791 | return true; |
2792 | } else { |
2793 | // A range of registers: r[XX:YY]. |
2794 | if (NextToken.is(K: AsmToken::LBrac)) |
2795 | return true; |
2796 | } |
2797 | } |
2798 | |
2799 | return getSpecialRegForName(RegName: Str) != AMDGPU::NoRegister; |
2800 | } |
2801 | |
2802 | bool |
2803 | AMDGPUAsmParser::isRegister() |
2804 | { |
2805 | return isRegister(Token: getToken(), NextToken: peekToken()); |
2806 | } |
2807 | |
2808 | unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum, |
2809 | unsigned SubReg, unsigned RegWidth, |
2810 | SMLoc Loc) { |
2811 | assert(isRegularReg(RegKind)); |
2812 | |
2813 | unsigned AlignSize = 1; |
2814 | if (RegKind == IS_SGPR || RegKind == IS_TTMP) { |
2815 | // SGPR and TTMP registers must be aligned. |
2816 | // Max required alignment is 4 dwords. |
2817 | AlignSize = std::min(a: llvm::bit_ceil(Value: RegWidth / 32), b: 4u); |
2818 | } |
2819 | |
2820 | if (RegNum % AlignSize != 0) { |
2821 | Error(L: Loc, Msg: "invalid register alignment" ); |
2822 | return AMDGPU::NoRegister; |
2823 | } |
2824 | |
2825 | unsigned RegIdx = RegNum / AlignSize; |
2826 | int RCID = getRegClass(Is: RegKind, RegWidth); |
2827 | if (RCID == -1) { |
2828 | Error(L: Loc, Msg: "invalid or unsupported register size" ); |
2829 | return AMDGPU::NoRegister; |
2830 | } |
2831 | |
2832 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
2833 | const MCRegisterClass RC = TRI->getRegClass(i: RCID); |
2834 | if (RegIdx >= RC.getNumRegs()) { |
2835 | Error(L: Loc, Msg: "register index is out of range" ); |
2836 | return AMDGPU::NoRegister; |
2837 | } |
2838 | |
2839 | unsigned Reg = RC.getRegister(i: RegIdx); |
2840 | |
2841 | if (SubReg) { |
2842 | Reg = TRI->getSubReg(Reg, Idx: SubReg); |
2843 | |
2844 | // Currently all regular registers have their .l and .h subregisters, so |
2845 | // we should never need to generate an error here. |
2846 | assert(Reg && "Invalid subregister!" ); |
2847 | } |
2848 | |
2849 | return Reg; |
2850 | } |
2851 | |
2852 | bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { |
2853 | int64_t RegLo, RegHi; |
2854 | if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "missing register index" )) |
2855 | return false; |
2856 | |
2857 | SMLoc FirstIdxLoc = getLoc(); |
2858 | SMLoc SecondIdxLoc; |
2859 | |
2860 | if (!parseExpr(Imm&: RegLo)) |
2861 | return false; |
2862 | |
2863 | if (trySkipToken(Kind: AsmToken::Colon)) { |
2864 | SecondIdxLoc = getLoc(); |
2865 | if (!parseExpr(Imm&: RegHi)) |
2866 | return false; |
2867 | } else { |
2868 | RegHi = RegLo; |
2869 | } |
2870 | |
2871 | if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket" )) |
2872 | return false; |
2873 | |
2874 | if (!isUInt<32>(x: RegLo)) { |
2875 | Error(L: FirstIdxLoc, Msg: "invalid register index" ); |
2876 | return false; |
2877 | } |
2878 | |
2879 | if (!isUInt<32>(x: RegHi)) { |
2880 | Error(L: SecondIdxLoc, Msg: "invalid register index" ); |
2881 | return false; |
2882 | } |
2883 | |
2884 | if (RegLo > RegHi) { |
2885 | Error(L: FirstIdxLoc, Msg: "first register index should not exceed second index" ); |
2886 | return false; |
2887 | } |
2888 | |
2889 | Num = static_cast<unsigned>(RegLo); |
2890 | RegWidth = 32 * ((RegHi - RegLo) + 1); |
2891 | return true; |
2892 | } |
2893 | |
2894 | unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, |
2895 | unsigned &RegNum, unsigned &RegWidth, |
2896 | SmallVectorImpl<AsmToken> &Tokens) { |
2897 | assert(isToken(AsmToken::Identifier)); |
2898 | unsigned Reg = getSpecialRegForName(RegName: getTokenStr()); |
2899 | if (Reg) { |
2900 | RegNum = 0; |
2901 | RegWidth = 32; |
2902 | RegKind = IS_SPECIAL; |
2903 | Tokens.push_back(Elt: getToken()); |
2904 | lex(); // skip register name |
2905 | } |
2906 | return Reg; |
2907 | } |
2908 | |
2909 | unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, |
2910 | unsigned &RegNum, unsigned &RegWidth, |
2911 | SmallVectorImpl<AsmToken> &Tokens) { |
2912 | assert(isToken(AsmToken::Identifier)); |
2913 | StringRef RegName = getTokenStr(); |
2914 | auto Loc = getLoc(); |
2915 | |
2916 | const RegInfo *RI = getRegularRegInfo(Str: RegName); |
2917 | if (!RI) { |
2918 | Error(L: Loc, Msg: "invalid register name" ); |
2919 | return AMDGPU::NoRegister; |
2920 | } |
2921 | |
2922 | Tokens.push_back(Elt: getToken()); |
2923 | lex(); // skip register name |
2924 | |
2925 | RegKind = RI->Kind; |
2926 | StringRef RegSuffix = RegName.substr(Start: RI->Name.size()); |
2927 | unsigned SubReg = NoSubRegister; |
2928 | if (!RegSuffix.empty()) { |
2929 | // We don't know the opcode till we are done parsing, so we don't know if |
2930 | // registers should be 16 or 32 bit. It is therefore mandatory to put .l or |
2931 | // .h to correctly specify 16 bit registers. We also can't determine class |
2932 | // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16. |
2933 | if (RegSuffix.consume_back(Suffix: ".l" )) |
2934 | SubReg = AMDGPU::lo16; |
2935 | else if (RegSuffix.consume_back(Suffix: ".h" )) |
2936 | SubReg = AMDGPU::hi16; |
2937 | |
2938 | // Single 32-bit register: vXX. |
2939 | if (!getRegNum(Str: RegSuffix, Num&: RegNum)) { |
2940 | Error(L: Loc, Msg: "invalid register index" ); |
2941 | return AMDGPU::NoRegister; |
2942 | } |
2943 | RegWidth = 32; |
2944 | } else { |
2945 | // Range of registers: v[XX:YY]. ":YY" is optional. |
2946 | if (!ParseRegRange(Num&: RegNum, RegWidth)) |
2947 | return AMDGPU::NoRegister; |
2948 | } |
2949 | |
2950 | return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc); |
2951 | } |
2952 | |
2953 | unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, |
2954 | unsigned &RegWidth, |
2955 | SmallVectorImpl<AsmToken> &Tokens) { |
2956 | unsigned Reg = AMDGPU::NoRegister; |
2957 | auto ListLoc = getLoc(); |
2958 | |
2959 | if (!skipToken(Kind: AsmToken::LBrac, |
2960 | ErrMsg: "expected a register or a list of registers" )) { |
2961 | return AMDGPU::NoRegister; |
2962 | } |
2963 | |
2964 | // List of consecutive registers, e.g.: [s0,s1,s2,s3] |
2965 | |
2966 | auto Loc = getLoc(); |
2967 | if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) |
2968 | return AMDGPU::NoRegister; |
2969 | if (RegWidth != 32) { |
2970 | Error(L: Loc, Msg: "expected a single 32-bit register" ); |
2971 | return AMDGPU::NoRegister; |
2972 | } |
2973 | |
2974 | for (; trySkipToken(Kind: AsmToken::Comma); ) { |
2975 | RegisterKind NextRegKind; |
2976 | unsigned NextReg, NextRegNum, NextRegWidth; |
2977 | Loc = getLoc(); |
2978 | |
2979 | if (!ParseAMDGPURegister(RegKind&: NextRegKind, Reg&: NextReg, |
2980 | RegNum&: NextRegNum, RegWidth&: NextRegWidth, |
2981 | Tokens)) { |
2982 | return AMDGPU::NoRegister; |
2983 | } |
2984 | if (NextRegWidth != 32) { |
2985 | Error(L: Loc, Msg: "expected a single 32-bit register" ); |
2986 | return AMDGPU::NoRegister; |
2987 | } |
2988 | if (NextRegKind != RegKind) { |
2989 | Error(L: Loc, Msg: "registers in a list must be of the same kind" ); |
2990 | return AMDGPU::NoRegister; |
2991 | } |
2992 | if (!AddNextRegisterToList(Reg, RegWidth, RegKind, Reg1: NextReg, Loc)) |
2993 | return AMDGPU::NoRegister; |
2994 | } |
2995 | |
2996 | if (!skipToken(Kind: AsmToken::RBrac, |
2997 | ErrMsg: "expected a comma or a closing square bracket" )) { |
2998 | return AMDGPU::NoRegister; |
2999 | } |
3000 | |
3001 | if (isRegularReg(Kind: RegKind)) |
3002 | Reg = getRegularReg(RegKind, RegNum, SubReg: NoSubRegister, RegWidth, Loc: ListLoc); |
3003 | |
3004 | return Reg; |
3005 | } |
3006 | |
3007 | bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, |
3008 | unsigned &RegNum, unsigned &RegWidth, |
3009 | SmallVectorImpl<AsmToken> &Tokens) { |
3010 | auto Loc = getLoc(); |
3011 | Reg = AMDGPU::NoRegister; |
3012 | |
3013 | if (isToken(Kind: AsmToken::Identifier)) { |
3014 | Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); |
3015 | if (Reg == AMDGPU::NoRegister) |
3016 | Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); |
3017 | } else { |
3018 | Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); |
3019 | } |
3020 | |
3021 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
3022 | if (Reg == AMDGPU::NoRegister) { |
3023 | assert(Parser.hasPendingError()); |
3024 | return false; |
3025 | } |
3026 | |
3027 | if (!subtargetHasRegister(MRI: *TRI, RegNo: Reg)) { |
3028 | if (Reg == AMDGPU::SGPR_NULL) { |
3029 | Error(L: Loc, Msg: "'null' operand is not supported on this GPU" ); |
3030 | } else { |
3031 | Error(L: Loc, Msg: "register not available on this GPU" ); |
3032 | } |
3033 | return false; |
3034 | } |
3035 | |
3036 | return true; |
3037 | } |
3038 | |
3039 | bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, |
3040 | unsigned &RegNum, unsigned &RegWidth, |
3041 | bool RestoreOnFailure /*=false*/) { |
3042 | Reg = AMDGPU::NoRegister; |
3043 | |
3044 | SmallVector<AsmToken, 1> Tokens; |
3045 | if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { |
3046 | if (RestoreOnFailure) { |
3047 | while (!Tokens.empty()) { |
3048 | getLexer().UnLex(Token: Tokens.pop_back_val()); |
3049 | } |
3050 | } |
3051 | return true; |
3052 | } |
3053 | return false; |
3054 | } |
3055 | |
3056 | std::optional<StringRef> |
3057 | AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { |
3058 | switch (RegKind) { |
3059 | case IS_VGPR: |
3060 | return StringRef(".amdgcn.next_free_vgpr" ); |
3061 | case IS_SGPR: |
3062 | return StringRef(".amdgcn.next_free_sgpr" ); |
3063 | default: |
3064 | return std::nullopt; |
3065 | } |
3066 | } |
3067 | |
3068 | void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { |
3069 | auto SymbolName = getGprCountSymbolName(RegKind); |
3070 | assert(SymbolName && "initializing invalid register kind" ); |
3071 | MCSymbol *Sym = getContext().getOrCreateSymbol(Name: *SymbolName); |
3072 | Sym->setVariableValue(MCConstantExpr::create(Value: 0, Ctx&: getContext())); |
3073 | } |
3074 | |
3075 | bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, |
3076 | unsigned DwordRegIndex, |
3077 | unsigned RegWidth) { |
3078 | // Symbols are only defined for GCN targets |
3079 | if (AMDGPU::getIsaVersion(GPU: getSTI().getCPU()).Major < 6) |
3080 | return true; |
3081 | |
3082 | auto SymbolName = getGprCountSymbolName(RegKind); |
3083 | if (!SymbolName) |
3084 | return true; |
3085 | MCSymbol *Sym = getContext().getOrCreateSymbol(Name: *SymbolName); |
3086 | |
3087 | int64_t NewMax = DwordRegIndex + divideCeil(Numerator: RegWidth, Denominator: 32) - 1; |
3088 | int64_t OldCount; |
3089 | |
3090 | if (!Sym->isVariable()) |
3091 | return !Error(L: getLoc(), |
3092 | Msg: ".amdgcn.next_free_{v,s}gpr symbols must be variable" ); |
3093 | if (!Sym->getVariableValue(SetUsed: false)->evaluateAsAbsolute(Res&: OldCount)) |
3094 | return !Error( |
3095 | L: getLoc(), |
3096 | Msg: ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions" ); |
3097 | |
3098 | if (OldCount <= NewMax) |
3099 | Sym->setVariableValue(MCConstantExpr::create(Value: NewMax + 1, Ctx&: getContext())); |
3100 | |
3101 | return true; |
3102 | } |
3103 | |
3104 | std::unique_ptr<AMDGPUOperand> |
3105 | AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { |
3106 | const auto &Tok = getToken(); |
3107 | SMLoc StartLoc = Tok.getLoc(); |
3108 | SMLoc EndLoc = Tok.getEndLoc(); |
3109 | RegisterKind RegKind; |
3110 | unsigned Reg, RegNum, RegWidth; |
3111 | |
3112 | if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { |
3113 | return nullptr; |
3114 | } |
3115 | if (isHsaAbi(STI: getSTI())) { |
3116 | if (!updateGprCountSymbols(RegKind, DwordRegIndex: RegNum, RegWidth)) |
3117 | return nullptr; |
3118 | } else |
3119 | KernelScope.usesRegister(RegKind, DwordRegIndex: RegNum, RegWidth); |
3120 | return AMDGPUOperand::CreateReg(AsmParser: this, RegNo: Reg, S: StartLoc, E: EndLoc); |
3121 | } |
3122 | |
3123 | ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands, |
3124 | bool HasSP3AbsModifier, bool HasLit) { |
3125 | // TODO: add syntactic sugar for 1/(2*PI) |
3126 | |
3127 | if (isRegister()) |
3128 | return ParseStatus::NoMatch; |
3129 | assert(!isModifier()); |
3130 | |
3131 | if (!HasLit) { |
3132 | HasLit = trySkipId(Id: "lit" ); |
3133 | if (HasLit) { |
3134 | if (!skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit" )) |
3135 | return ParseStatus::Failure; |
3136 | ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit); |
3137 | if (S.isSuccess() && |
3138 | !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses" )) |
3139 | return ParseStatus::Failure; |
3140 | return S; |
3141 | } |
3142 | } |
3143 | |
3144 | const auto& Tok = getToken(); |
3145 | const auto& NextTok = peekToken(); |
3146 | bool IsReal = Tok.is(K: AsmToken::Real); |
3147 | SMLoc S = getLoc(); |
3148 | bool Negate = false; |
3149 | |
3150 | if (!IsReal && Tok.is(K: AsmToken::Minus) && NextTok.is(K: AsmToken::Real)) { |
3151 | lex(); |
3152 | IsReal = true; |
3153 | Negate = true; |
3154 | } |
3155 | |
3156 | AMDGPUOperand::Modifiers Mods; |
3157 | Mods.Lit = HasLit; |
3158 | |
3159 | if (IsReal) { |
3160 | // Floating-point expressions are not supported. |
3161 | // Can only allow floating-point literals with an |
3162 | // optional sign. |
3163 | |
3164 | StringRef Num = getTokenStr(); |
3165 | lex(); |
3166 | |
3167 | APFloat RealVal(APFloat::IEEEdouble()); |
3168 | auto roundMode = APFloat::rmNearestTiesToEven; |
3169 | if (errorToBool(Err: RealVal.convertFromString(Num, roundMode).takeError())) |
3170 | return ParseStatus::Failure; |
3171 | if (Negate) |
3172 | RealVal.changeSign(); |
3173 | |
3174 | Operands.push_back( |
3175 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: RealVal.bitcastToAPInt().getZExtValue(), Loc: S, |
3176 | Type: AMDGPUOperand::ImmTyNone, IsFPImm: true)); |
3177 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); |
3178 | Op.setModifiers(Mods); |
3179 | |
3180 | return ParseStatus::Success; |
3181 | |
3182 | } else { |
3183 | int64_t IntVal; |
3184 | const MCExpr *Expr; |
3185 | SMLoc S = getLoc(); |
3186 | |
3187 | if (HasSP3AbsModifier) { |
3188 | // This is a workaround for handling expressions |
3189 | // as arguments of SP3 'abs' modifier, for example: |
3190 | // |1.0| |
3191 | // |-1| |
3192 | // |1+x| |
3193 | // This syntax is not compatible with syntax of standard |
3194 | // MC expressions (due to the trailing '|'). |
3195 | SMLoc EndLoc; |
3196 | if (getParser().parsePrimaryExpr(Res&: Expr, EndLoc, TypeInfo: nullptr)) |
3197 | return ParseStatus::Failure; |
3198 | } else { |
3199 | if (Parser.parseExpression(Res&: Expr)) |
3200 | return ParseStatus::Failure; |
3201 | } |
3202 | |
3203 | if (Expr->evaluateAsAbsolute(Res&: IntVal)) { |
3204 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S)); |
3205 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); |
3206 | Op.setModifiers(Mods); |
3207 | } else { |
3208 | if (HasLit) |
3209 | return ParseStatus::NoMatch; |
3210 | Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S)); |
3211 | } |
3212 | |
3213 | return ParseStatus::Success; |
3214 | } |
3215 | |
3216 | return ParseStatus::NoMatch; |
3217 | } |
3218 | |
3219 | ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) { |
3220 | if (!isRegister()) |
3221 | return ParseStatus::NoMatch; |
3222 | |
3223 | if (auto R = parseRegister()) { |
3224 | assert(R->isReg()); |
3225 | Operands.push_back(Elt: std::move(R)); |
3226 | return ParseStatus::Success; |
3227 | } |
3228 | return ParseStatus::Failure; |
3229 | } |
3230 | |
3231 | ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, |
3232 | bool HasSP3AbsMod, bool HasLit) { |
3233 | ParseStatus Res = parseReg(Operands); |
3234 | if (!Res.isNoMatch()) |
3235 | return Res; |
3236 | if (isModifier()) |
3237 | return ParseStatus::NoMatch; |
3238 | return parseImm(Operands, HasSP3AbsModifier: HasSP3AbsMod, HasLit); |
3239 | } |
3240 | |
3241 | bool |
3242 | AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { |
3243 | if (Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::LParen)) { |
3244 | const auto &str = Token.getString(); |
3245 | return str == "abs" || str == "neg" || str == "sext" ; |
3246 | } |
3247 | return false; |
3248 | } |
3249 | |
3250 | bool |
3251 | AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { |
3252 | return Token.is(K: AsmToken::Identifier) && NextToken.is(K: AsmToken::Colon); |
3253 | } |
3254 | |
3255 | bool |
3256 | AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { |
3257 | return isNamedOperandModifier(Token, NextToken) || Token.is(K: AsmToken::Pipe); |
3258 | } |
3259 | |
3260 | bool |
3261 | AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { |
3262 | return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); |
3263 | } |
3264 | |
3265 | // Check if this is an operand modifier or an opcode modifier |
3266 | // which may look like an expression but it is not. We should |
3267 | // avoid parsing these modifiers as expressions. Currently |
3268 | // recognized sequences are: |
3269 | // |...| |
3270 | // abs(...) |
3271 | // neg(...) |
3272 | // sext(...) |
3273 | // -reg |
3274 | // -|...| |
3275 | // -abs(...) |
3276 | // name:... |
3277 | // |
3278 | bool |
3279 | AMDGPUAsmParser::isModifier() { |
3280 | |
3281 | AsmToken Tok = getToken(); |
3282 | AsmToken NextToken[2]; |
3283 | peekTokens(Tokens: NextToken); |
3284 | |
3285 | return isOperandModifier(Token: Tok, NextToken: NextToken[0]) || |
3286 | (Tok.is(K: AsmToken::Minus) && isRegOrOperandModifier(Token: NextToken[0], NextToken: NextToken[1])) || |
3287 | isOpcodeModifierWithVal(Token: Tok, NextToken: NextToken[0]); |
3288 | } |
3289 | |
3290 | // Check if the current token is an SP3 'neg' modifier. |
3291 | // Currently this modifier is allowed in the following context: |
3292 | // |
3293 | // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". |
3294 | // 2. Before an 'abs' modifier: -abs(...) |
3295 | // 3. Before an SP3 'abs' modifier: -|...| |
3296 | // |
3297 | // In all other cases "-" is handled as a part |
3298 | // of an expression that follows the sign. |
3299 | // |
3300 | // Note: When "-" is followed by an integer literal, |
3301 | // this is interpreted as integer negation rather |
3302 | // than a floating-point NEG modifier applied to N. |
3303 | // Beside being contr-intuitive, such use of floating-point |
3304 | // NEG modifier would have resulted in different meaning |
3305 | // of integer literals used with VOP1/2/C and VOP3, |
3306 | // for example: |
3307 | // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF |
3308 | // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 |
3309 | // Negative fp literals with preceding "-" are |
3310 | // handled likewise for uniformity |
3311 | // |
3312 | bool |
3313 | AMDGPUAsmParser::parseSP3NegModifier() { |
3314 | |
3315 | AsmToken NextToken[2]; |
3316 | peekTokens(Tokens: NextToken); |
3317 | |
3318 | if (isToken(Kind: AsmToken::Minus) && |
3319 | (isRegister(Token: NextToken[0], NextToken: NextToken[1]) || |
3320 | NextToken[0].is(K: AsmToken::Pipe) || |
3321 | isId(Token: NextToken[0], Id: "abs" ))) { |
3322 | lex(); |
3323 | return true; |
3324 | } |
3325 | |
3326 | return false; |
3327 | } |
3328 | |
3329 | ParseStatus |
3330 | AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, |
3331 | bool AllowImm) { |
3332 | bool Neg, SP3Neg; |
3333 | bool Abs, SP3Abs; |
3334 | bool Lit; |
3335 | SMLoc Loc; |
3336 | |
3337 | // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. |
3338 | if (isToken(Kind: AsmToken::Minus) && peekToken().is(K: AsmToken::Minus)) |
3339 | return Error(L: getLoc(), Msg: "invalid syntax, expected 'neg' modifier" ); |
3340 | |
3341 | SP3Neg = parseSP3NegModifier(); |
3342 | |
3343 | Loc = getLoc(); |
3344 | Neg = trySkipId(Id: "neg" ); |
3345 | if (Neg && SP3Neg) |
3346 | return Error(L: Loc, Msg: "expected register or immediate" ); |
3347 | if (Neg && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after neg" )) |
3348 | return ParseStatus::Failure; |
3349 | |
3350 | Abs = trySkipId(Id: "abs" ); |
3351 | if (Abs && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after abs" )) |
3352 | return ParseStatus::Failure; |
3353 | |
3354 | Lit = trySkipId(Id: "lit" ); |
3355 | if (Lit && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after lit" )) |
3356 | return ParseStatus::Failure; |
3357 | |
3358 | Loc = getLoc(); |
3359 | SP3Abs = trySkipToken(Kind: AsmToken::Pipe); |
3360 | if (Abs && SP3Abs) |
3361 | return Error(L: Loc, Msg: "expected register or immediate" ); |
3362 | |
3363 | ParseStatus Res; |
3364 | if (AllowImm) { |
3365 | Res = parseRegOrImm(Operands, HasSP3AbsMod: SP3Abs, HasLit: Lit); |
3366 | } else { |
3367 | Res = parseReg(Operands); |
3368 | } |
3369 | if (!Res.isSuccess()) |
3370 | return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res; |
3371 | |
3372 | if (Lit && !Operands.back()->isImm()) |
3373 | Error(L: Loc, Msg: "expected immediate with lit modifier" ); |
3374 | |
3375 | if (SP3Abs && !skipToken(Kind: AsmToken::Pipe, ErrMsg: "expected vertical bar" )) |
3376 | return ParseStatus::Failure; |
3377 | if (Abs && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses" )) |
3378 | return ParseStatus::Failure; |
3379 | if (Neg && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses" )) |
3380 | return ParseStatus::Failure; |
3381 | if (Lit && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses" )) |
3382 | return ParseStatus::Failure; |
3383 | |
3384 | AMDGPUOperand::Modifiers Mods; |
3385 | Mods.Abs = Abs || SP3Abs; |
3386 | Mods.Neg = Neg || SP3Neg; |
3387 | Mods.Lit = Lit; |
3388 | |
3389 | if (Mods.hasFPModifiers() || Lit) { |
3390 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); |
3391 | if (Op.isExpr()) |
3392 | return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression" ); |
3393 | Op.setModifiers(Mods); |
3394 | } |
3395 | return ParseStatus::Success; |
3396 | } |
3397 | |
3398 | ParseStatus |
3399 | AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, |
3400 | bool AllowImm) { |
3401 | bool Sext = trySkipId(Id: "sext" ); |
3402 | if (Sext && !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected left paren after sext" )) |
3403 | return ParseStatus::Failure; |
3404 | |
3405 | ParseStatus Res; |
3406 | if (AllowImm) { |
3407 | Res = parseRegOrImm(Operands); |
3408 | } else { |
3409 | Res = parseReg(Operands); |
3410 | } |
3411 | if (!Res.isSuccess()) |
3412 | return Sext ? ParseStatus::Failure : Res; |
3413 | |
3414 | if (Sext && !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected closing parentheses" )) |
3415 | return ParseStatus::Failure; |
3416 | |
3417 | AMDGPUOperand::Modifiers Mods; |
3418 | Mods.Sext = Sext; |
3419 | |
3420 | if (Mods.hasIntModifiers()) { |
3421 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); |
3422 | if (Op.isExpr()) |
3423 | return Error(L: Op.getStartLoc(), Msg: "expected an absolute expression" ); |
3424 | Op.setModifiers(Mods); |
3425 | } |
3426 | |
3427 | return ParseStatus::Success; |
3428 | } |
3429 | |
3430 | ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { |
3431 | return parseRegOrImmWithFPInputMods(Operands, AllowImm: false); |
3432 | } |
3433 | |
3434 | ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { |
3435 | return parseRegOrImmWithIntInputMods(Operands, AllowImm: false); |
3436 | } |
3437 | |
3438 | ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { |
3439 | auto Loc = getLoc(); |
3440 | if (trySkipId(Id: "off" )) { |
3441 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: 0, Loc, |
3442 | Type: AMDGPUOperand::ImmTyOff, IsFPImm: false)); |
3443 | return ParseStatus::Success; |
3444 | } |
3445 | |
3446 | if (!isRegister()) |
3447 | return ParseStatus::NoMatch; |
3448 | |
3449 | std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); |
3450 | if (Reg) { |
3451 | Operands.push_back(Elt: std::move(Reg)); |
3452 | return ParseStatus::Success; |
3453 | } |
3454 | |
3455 | return ParseStatus::Failure; |
3456 | } |
3457 | |
3458 | unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { |
3459 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
3460 | |
3461 | if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || |
3462 | (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || |
3463 | (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || |
3464 | (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) |
3465 | return Match_InvalidOperand; |
3466 | |
3467 | if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || |
3468 | Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { |
3469 | // v_mac_f32/16 allow only dst_sel == DWORD; |
3470 | auto OpNum = |
3471 | AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::dst_sel); |
3472 | const auto &Op = Inst.getOperand(i: OpNum); |
3473 | if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { |
3474 | return Match_InvalidOperand; |
3475 | } |
3476 | } |
3477 | |
3478 | return Match_Success; |
3479 | } |
3480 | |
3481 | static ArrayRef<unsigned> getAllVariants() { |
3482 | static const unsigned Variants[] = { |
3483 | AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, |
3484 | AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, |
3485 | AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP |
3486 | }; |
3487 | |
3488 | return ArrayRef(Variants); |
3489 | } |
3490 | |
3491 | // What asm variants we should check |
3492 | ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { |
3493 | if (isForcedDPP() && isForcedVOP3()) { |
3494 | static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; |
3495 | return ArrayRef(Variants); |
3496 | } |
3497 | if (getForcedEncodingSize() == 32) { |
3498 | static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; |
3499 | return ArrayRef(Variants); |
3500 | } |
3501 | |
3502 | if (isForcedVOP3()) { |
3503 | static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; |
3504 | return ArrayRef(Variants); |
3505 | } |
3506 | |
3507 | if (isForcedSDWA()) { |
3508 | static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, |
3509 | AMDGPUAsmVariants::SDWA9}; |
3510 | return ArrayRef(Variants); |
3511 | } |
3512 | |
3513 | if (isForcedDPP()) { |
3514 | static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; |
3515 | return ArrayRef(Variants); |
3516 | } |
3517 | |
3518 | return getAllVariants(); |
3519 | } |
3520 | |
3521 | StringRef AMDGPUAsmParser::getMatchedVariantName() const { |
3522 | if (isForcedDPP() && isForcedVOP3()) |
3523 | return "e64_dpp" ; |
3524 | |
3525 | if (getForcedEncodingSize() == 32) |
3526 | return "e32" ; |
3527 | |
3528 | if (isForcedVOP3()) |
3529 | return "e64" ; |
3530 | |
3531 | if (isForcedSDWA()) |
3532 | return "sdwa" ; |
3533 | |
3534 | if (isForcedDPP()) |
3535 | return "dpp" ; |
3536 | |
3537 | return "" ; |
3538 | } |
3539 | |
3540 | unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { |
3541 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
3542 | for (MCPhysReg Reg : Desc.implicit_uses()) { |
3543 | switch (Reg) { |
3544 | case AMDGPU::FLAT_SCR: |
3545 | case AMDGPU::VCC: |
3546 | case AMDGPU::VCC_LO: |
3547 | case AMDGPU::VCC_HI: |
3548 | case AMDGPU::M0: |
3549 | return Reg; |
3550 | default: |
3551 | break; |
3552 | } |
3553 | } |
3554 | return AMDGPU::NoRegister; |
3555 | } |
3556 | |
3557 | // NB: This code is correct only when used to check constant |
3558 | // bus limitations because GFX7 support no f16 inline constants. |
3559 | // Note that there are no cases when a GFX7 opcode violates |
3560 | // constant bus limitations due to the use of an f16 constant. |
3561 | bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, |
3562 | unsigned OpIdx) const { |
3563 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
3564 | |
3565 | if (!AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx) || |
3566 | AMDGPU::isKImmOperand(Desc, OpNo: OpIdx)) { |
3567 | return false; |
3568 | } |
3569 | |
3570 | const MCOperand &MO = Inst.getOperand(i: OpIdx); |
3571 | |
3572 | int64_t Val = MO.getImm(); |
3573 | auto OpSize = AMDGPU::getOperandSize(Desc, OpNo: OpIdx); |
3574 | |
3575 | switch (OpSize) { // expected operand size |
3576 | case 8: |
3577 | return AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi: hasInv2PiInlineImm()); |
3578 | case 4: |
3579 | return AMDGPU::isInlinableLiteral32(Literal: Val, HasInv2Pi: hasInv2PiInlineImm()); |
3580 | case 2: { |
3581 | const unsigned OperandType = Desc.operands()[OpIdx].OperandType; |
3582 | if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || |
3583 | OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || |
3584 | OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) |
3585 | return AMDGPU::isInlinableLiteralI16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm()); |
3586 | |
3587 | if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || |
3588 | OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || |
3589 | OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) |
3590 | return AMDGPU::isInlinableLiteralV2I16(Literal: Val); |
3591 | |
3592 | if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || |
3593 | OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || |
3594 | OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) |
3595 | return AMDGPU::isInlinableLiteralV2F16(Literal: Val); |
3596 | |
3597 | if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 || |
3598 | OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2BF16 || |
3599 | OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16) |
3600 | return AMDGPU::isInlinableLiteralV2BF16(Literal: Val); |
3601 | |
3602 | if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 || |
3603 | OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 || |
3604 | OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 || |
3605 | OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED) |
3606 | return AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm()); |
3607 | |
3608 | if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 || |
3609 | OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 || |
3610 | OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 || |
3611 | OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED) |
3612 | return AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi: hasInv2PiInlineImm()); |
3613 | |
3614 | llvm_unreachable("invalid operand type" ); |
3615 | } |
3616 | default: |
3617 | llvm_unreachable("invalid operand size" ); |
3618 | } |
3619 | } |
3620 | |
3621 | unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { |
3622 | if (!isGFX10Plus()) |
3623 | return 1; |
3624 | |
3625 | switch (Opcode) { |
3626 | // 64-bit shift instructions can use only one scalar value input |
3627 | case AMDGPU::V_LSHLREV_B64_e64: |
3628 | case AMDGPU::V_LSHLREV_B64_gfx10: |
3629 | case AMDGPU::V_LSHLREV_B64_e64_gfx11: |
3630 | case AMDGPU::V_LSHLREV_B64_e32_gfx12: |
3631 | case AMDGPU::V_LSHLREV_B64_e64_gfx12: |
3632 | case AMDGPU::V_LSHRREV_B64_e64: |
3633 | case AMDGPU::V_LSHRREV_B64_gfx10: |
3634 | case AMDGPU::V_LSHRREV_B64_e64_gfx11: |
3635 | case AMDGPU::V_LSHRREV_B64_e64_gfx12: |
3636 | case AMDGPU::V_ASHRREV_I64_e64: |
3637 | case AMDGPU::V_ASHRREV_I64_gfx10: |
3638 | case AMDGPU::V_ASHRREV_I64_e64_gfx11: |
3639 | case AMDGPU::V_ASHRREV_I64_e64_gfx12: |
3640 | case AMDGPU::V_LSHL_B64_e64: |
3641 | case AMDGPU::V_LSHR_B64_e64: |
3642 | case AMDGPU::V_ASHR_I64_e64: |
3643 | return 1; |
3644 | default: |
3645 | return 2; |
3646 | } |
3647 | } |
3648 | |
3649 | constexpr unsigned MAX_SRC_OPERANDS_NUM = 6; |
3650 | using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>; |
3651 | |
3652 | // Get regular operand indices in the same order as specified |
3653 | // in the instruction (but append mandatory literals to the end). |
3654 | static OperandIndices getSrcOperandIndices(unsigned Opcode, |
3655 | bool AddMandatoryLiterals = false) { |
3656 | |
3657 | int16_t ImmIdx = |
3658 | AddMandatoryLiterals ? getNamedOperandIdx(Opcode, NamedIdx: OpName::imm) : -1; |
3659 | |
3660 | if (isVOPD(Opc: Opcode)) { |
3661 | int16_t ImmDeferredIdx = |
3662 | AddMandatoryLiterals ? getNamedOperandIdx(Opcode, NamedIdx: OpName::immDeferred) |
3663 | : -1; |
3664 | |
3665 | return {getNamedOperandIdx(Opcode, NamedIdx: OpName::src0X), |
3666 | getNamedOperandIdx(Opcode, NamedIdx: OpName::vsrc1X), |
3667 | getNamedOperandIdx(Opcode, NamedIdx: OpName::src0Y), |
3668 | getNamedOperandIdx(Opcode, NamedIdx: OpName::vsrc1Y), |
3669 | ImmDeferredIdx, |
3670 | ImmIdx}; |
3671 | } |
3672 | |
3673 | return {getNamedOperandIdx(Opcode, NamedIdx: OpName::src0), |
3674 | getNamedOperandIdx(Opcode, NamedIdx: OpName::src1), |
3675 | getNamedOperandIdx(Opcode, NamedIdx: OpName::src2), ImmIdx}; |
3676 | } |
3677 | |
3678 | bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { |
3679 | const MCOperand &MO = Inst.getOperand(i: OpIdx); |
3680 | if (MO.isImm()) |
3681 | return !isInlineConstant(Inst, OpIdx); |
3682 | if (MO.isReg()) { |
3683 | auto Reg = MO.getReg(); |
3684 | if (!Reg) |
3685 | return false; |
3686 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
3687 | auto PReg = mc2PseudoReg(Reg); |
3688 | return isSGPR(Reg: PReg, TRI) && PReg != SGPR_NULL; |
3689 | } |
3690 | return true; |
3691 | } |
3692 | |
3693 | // Based on the comment for `AMDGPUInstructionSelector::selectWritelane`: |
3694 | // Writelane is special in that it can use SGPR and M0 (which would normally |
3695 | // count as using the constant bus twice - but in this case it is allowed since |
3696 | // the lane selector doesn't count as a use of the constant bus). However, it is |
3697 | // still required to abide by the 1 SGPR rule. |
3698 | static bool checkWriteLane(const MCInst &Inst) { |
3699 | const unsigned Opcode = Inst.getOpcode(); |
3700 | if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi) |
3701 | return false; |
3702 | const MCOperand &LaneSelOp = Inst.getOperand(i: 2); |
3703 | if (!LaneSelOp.isReg()) |
3704 | return false; |
3705 | auto LaneSelReg = mc2PseudoReg(Reg: LaneSelOp.getReg()); |
3706 | return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11; |
3707 | } |
3708 | |
3709 | bool AMDGPUAsmParser::validateConstantBusLimitations( |
3710 | const MCInst &Inst, const OperandVector &Operands) { |
3711 | const unsigned Opcode = Inst.getOpcode(); |
3712 | const MCInstrDesc &Desc = MII.get(Opcode); |
3713 | unsigned LastSGPR = AMDGPU::NoRegister; |
3714 | unsigned ConstantBusUseCount = 0; |
3715 | unsigned NumLiterals = 0; |
3716 | unsigned LiteralSize; |
3717 | |
3718 | if (!(Desc.TSFlags & |
3719 | (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | |
3720 | SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) && |
3721 | !isVOPD(Opc: Opcode)) |
3722 | return true; |
3723 | |
3724 | if (checkWriteLane(Inst)) |
3725 | return true; |
3726 | |
3727 | // Check special imm operands (used by madmk, etc) |
3728 | if (AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm)) { |
3729 | ++NumLiterals; |
3730 | LiteralSize = 4; |
3731 | } |
3732 | |
3733 | SmallDenseSet<unsigned> SGPRsUsed; |
3734 | unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); |
3735 | if (SGPRUsed != AMDGPU::NoRegister) { |
3736 | SGPRsUsed.insert(V: SGPRUsed); |
3737 | ++ConstantBusUseCount; |
3738 | } |
3739 | |
3740 | OperandIndices OpIndices = getSrcOperandIndices(Opcode); |
3741 | |
3742 | for (int OpIdx : OpIndices) { |
3743 | if (OpIdx == -1) |
3744 | continue; |
3745 | |
3746 | const MCOperand &MO = Inst.getOperand(i: OpIdx); |
3747 | if (usesConstantBus(Inst, OpIdx)) { |
3748 | if (MO.isReg()) { |
3749 | LastSGPR = mc2PseudoReg(Reg: MO.getReg()); |
3750 | // Pairs of registers with a partial intersections like these |
3751 | // s0, s[0:1] |
3752 | // flat_scratch_lo, flat_scratch |
3753 | // flat_scratch_lo, flat_scratch_hi |
3754 | // are theoretically valid but they are disabled anyway. |
3755 | // Note that this code mimics SIInstrInfo::verifyInstruction |
3756 | if (SGPRsUsed.insert(V: LastSGPR).second) { |
3757 | ++ConstantBusUseCount; |
3758 | } |
3759 | } else { // Expression or a literal |
3760 | |
3761 | if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) |
3762 | continue; // special operand like VINTERP attr_chan |
3763 | |
3764 | // An instruction may use only one literal. |
3765 | // This has been validated on the previous step. |
3766 | // See validateVOPLiteral. |
3767 | // This literal may be used as more than one operand. |
3768 | // If all these operands are of the same size, |
3769 | // this literal counts as one scalar value. |
3770 | // Otherwise it counts as 2 scalar values. |
3771 | // See "GFX10 Shader Programming", section 3.6.2.3. |
3772 | |
3773 | unsigned Size = AMDGPU::getOperandSize(Desc, OpNo: OpIdx); |
3774 | if (Size < 4) |
3775 | Size = 4; |
3776 | |
3777 | if (NumLiterals == 0) { |
3778 | NumLiterals = 1; |
3779 | LiteralSize = Size; |
3780 | } else if (LiteralSize != Size) { |
3781 | NumLiterals = 2; |
3782 | } |
3783 | } |
3784 | } |
3785 | } |
3786 | ConstantBusUseCount += NumLiterals; |
3787 | |
3788 | if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) |
3789 | return true; |
3790 | |
3791 | SMLoc LitLoc = getLitLoc(Operands); |
3792 | SMLoc RegLoc = getRegLoc(Reg: LastSGPR, Operands); |
3793 | SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; |
3794 | Error(L: Loc, Msg: "invalid operand (violates constant bus restrictions)" ); |
3795 | return false; |
3796 | } |
3797 | |
3798 | bool AMDGPUAsmParser::validateVOPDRegBankConstraints( |
3799 | const MCInst &Inst, const OperandVector &Operands) { |
3800 | |
3801 | const unsigned Opcode = Inst.getOpcode(); |
3802 | if (!isVOPD(Opc: Opcode)) |
3803 | return true; |
3804 | |
3805 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
3806 | |
3807 | auto getVRegIdx = [&](unsigned, unsigned OperandIdx) { |
3808 | const MCOperand &Opr = Inst.getOperand(i: OperandIdx); |
3809 | return (Opr.isReg() && !isSGPR(Reg: mc2PseudoReg(Reg: Opr.getReg()), TRI)) |
3810 | ? Opr.getReg() |
3811 | : MCRegister::NoRegister; |
3812 | }; |
3813 | |
3814 | // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. |
3815 | bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12; |
3816 | |
3817 | const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Opcode, InstrInfo: &MII); |
3818 | auto InvalidCompOprIdx = |
3819 | InstInfo.getInvalidCompOperandIndex(GetRegIdx: getVRegIdx, SkipSrc); |
3820 | if (!InvalidCompOprIdx) |
3821 | return true; |
3822 | |
3823 | auto CompOprIdx = *InvalidCompOprIdx; |
3824 | auto ParsedIdx = |
3825 | std::max(a: InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), |
3826 | b: InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); |
3827 | assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); |
3828 | |
3829 | auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); |
3830 | if (CompOprIdx == VOPD::Component::DST) { |
3831 | Error(L: Loc, Msg: "one dst register must be even and the other odd" ); |
3832 | } else { |
3833 | auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; |
3834 | Error(L: Loc, Msg: Twine("src" ) + Twine(CompSrcIdx) + |
3835 | " operands must use different VGPR banks" ); |
3836 | } |
3837 | |
3838 | return false; |
3839 | } |
3840 | |
3841 | bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { |
3842 | |
3843 | const unsigned Opc = Inst.getOpcode(); |
3844 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
3845 | |
3846 | if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { |
3847 | int ClampIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp); |
3848 | assert(ClampIdx != -1); |
3849 | return Inst.getOperand(i: ClampIdx).getImm() == 0; |
3850 | } |
3851 | |
3852 | return true; |
3853 | } |
3854 | |
3855 | constexpr uint64_t MIMGFlags = |
3856 | SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE; |
3857 | |
3858 | bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, |
3859 | const SMLoc &IDLoc) { |
3860 | |
3861 | const unsigned Opc = Inst.getOpcode(); |
3862 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
3863 | |
3864 | if ((Desc.TSFlags & MIMGFlags) == 0) |
3865 | return true; |
3866 | |
3867 | int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdata); |
3868 | int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dmask); |
3869 | int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::tfe); |
3870 | |
3871 | if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample |
3872 | return true; |
3873 | |
3874 | if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray |
3875 | return true; |
3876 | |
3877 | unsigned VDataSize = AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VDataIdx); |
3878 | unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(i: TFEIdx).getImm()) ? 1 : 0; |
3879 | unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf; |
3880 | if (DMask == 0) |
3881 | DMask = 1; |
3882 | |
3883 | bool IsPackedD16 = false; |
3884 | unsigned DataSize = |
3885 | (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(Value: DMask); |
3886 | if (hasPackedD16()) { |
3887 | int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::d16); |
3888 | IsPackedD16 = D16Idx >= 0; |
3889 | if (IsPackedD16 && Inst.getOperand(i: D16Idx).getImm()) |
3890 | DataSize = (DataSize + 1) / 2; |
3891 | } |
3892 | |
3893 | if ((VDataSize / 4) == DataSize + TFESize) |
3894 | return true; |
3895 | |
3896 | StringRef Modifiers; |
3897 | if (isGFX90A()) |
3898 | Modifiers = IsPackedD16 ? "dmask and d16" : "dmask" ; |
3899 | else |
3900 | Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe" ; |
3901 | |
3902 | Error(L: IDLoc, Msg: Twine("image data size does not match " ) + Modifiers); |
3903 | return false; |
3904 | } |
3905 | |
3906 | bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, |
3907 | const SMLoc &IDLoc) { |
3908 | const unsigned Opc = Inst.getOpcode(); |
3909 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
3910 | |
3911 | if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus()) |
3912 | return true; |
3913 | |
3914 | const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); |
3915 | |
3916 | const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = |
3917 | AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode); |
3918 | int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::vaddr0); |
3919 | int RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc |
3920 | : AMDGPU::OpName::rsrc; |
3921 | int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: RSrcOpName); |
3922 | int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dim); |
3923 | int A16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::a16); |
3924 | |
3925 | assert(VAddr0Idx != -1); |
3926 | assert(SrsrcIdx != -1); |
3927 | assert(SrsrcIdx > VAddr0Idx); |
3928 | |
3929 | bool IsA16 = (A16Idx != -1 && Inst.getOperand(i: A16Idx).getImm()); |
3930 | if (BaseOpcode->BVH) { |
3931 | if (IsA16 == BaseOpcode->A16) |
3932 | return true; |
3933 | Error(L: IDLoc, Msg: "image address size does not match a16" ); |
3934 | return false; |
3935 | } |
3936 | |
3937 | unsigned Dim = Inst.getOperand(i: DimIdx).getImm(); |
3938 | const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim); |
3939 | bool IsNSA = SrsrcIdx - VAddr0Idx > 1; |
3940 | unsigned ActualAddrSize = |
3941 | IsNSA ? SrsrcIdx - VAddr0Idx |
3942 | : AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VAddr0Idx) / 4; |
3943 | |
3944 | unsigned ExpectedAddrSize = |
3945 | AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim: DimInfo, IsA16, IsG16Supported: hasG16()); |
3946 | |
3947 | if (IsNSA) { |
3948 | if (hasPartialNSAEncoding() && |
3949 | ExpectedAddrSize > |
3950 | getNSAMaxSize(HasSampler: Desc.TSFlags & SIInstrFlags::VSAMPLE)) { |
3951 | int VAddrLastIdx = SrsrcIdx - 1; |
3952 | unsigned VAddrLastSize = |
3953 | AMDGPU::getRegOperandSize(MRI: getMRI(), Desc, OpNo: VAddrLastIdx) / 4; |
3954 | |
3955 | ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize; |
3956 | } |
3957 | } else { |
3958 | if (ExpectedAddrSize > 12) |
3959 | ExpectedAddrSize = 16; |
3960 | |
3961 | // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. |
3962 | // This provides backward compatibility for assembly created |
3963 | // before 160b/192b/224b types were directly supported. |
3964 | if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) |
3965 | return true; |
3966 | } |
3967 | |
3968 | if (ActualAddrSize == ExpectedAddrSize) |
3969 | return true; |
3970 | |
3971 | Error(L: IDLoc, Msg: "image address size does not match dim and a16" ); |
3972 | return false; |
3973 | } |
3974 | |
3975 | bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { |
3976 | |
3977 | const unsigned Opc = Inst.getOpcode(); |
3978 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
3979 | |
3980 | if ((Desc.TSFlags & MIMGFlags) == 0) |
3981 | return true; |
3982 | if (!Desc.mayLoad() || !Desc.mayStore()) |
3983 | return true; // Not atomic |
3984 | |
3985 | int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dmask); |
3986 | unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf; |
3987 | |
3988 | // This is an incomplete check because image_atomic_cmpswap |
3989 | // may only use 0x3 and 0xf while other atomic operations |
3990 | // may use 0x1 and 0x3. However these limitations are |
3991 | // verified when we check that dmask matches dst size. |
3992 | return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; |
3993 | } |
3994 | |
3995 | bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { |
3996 | |
3997 | const unsigned Opc = Inst.getOpcode(); |
3998 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
3999 | |
4000 | if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) |
4001 | return true; |
4002 | |
4003 | int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dmask); |
4004 | unsigned DMask = Inst.getOperand(i: DMaskIdx).getImm() & 0xf; |
4005 | |
4006 | // GATHER4 instructions use dmask in a different fashion compared to |
4007 | // other MIMG instructions. The only useful DMASK values are |
4008 | // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns |
4009 | // (red,red,red,red) etc.) The ISA document doesn't mention |
4010 | // this. |
4011 | return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; |
4012 | } |
4013 | |
4014 | bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { |
4015 | const unsigned Opc = Inst.getOpcode(); |
4016 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
4017 | |
4018 | if ((Desc.TSFlags & MIMGFlags) == 0) |
4019 | return true; |
4020 | |
4021 | const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); |
4022 | const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = |
4023 | AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode); |
4024 | |
4025 | if (!BaseOpcode->MSAA) |
4026 | return true; |
4027 | |
4028 | int DimIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dim); |
4029 | assert(DimIdx != -1); |
4030 | |
4031 | unsigned Dim = Inst.getOperand(i: DimIdx).getImm(); |
4032 | const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(DimEnc: Dim); |
4033 | |
4034 | return DimInfo->MSAA; |
4035 | } |
4036 | |
4037 | static bool IsMovrelsSDWAOpcode(const unsigned Opcode) |
4038 | { |
4039 | switch (Opcode) { |
4040 | case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: |
4041 | case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: |
4042 | case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: |
4043 | return true; |
4044 | default: |
4045 | return false; |
4046 | } |
4047 | } |
4048 | |
4049 | // movrels* opcodes should only allow VGPRS as src0. |
4050 | // This is specified in .td description for vop1/vop3, |
4051 | // but sdwa is handled differently. See isSDWAOperand. |
4052 | bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, |
4053 | const OperandVector &Operands) { |
4054 | |
4055 | const unsigned Opc = Inst.getOpcode(); |
4056 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
4057 | |
4058 | if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opcode: Opc)) |
4059 | return true; |
4060 | |
4061 | const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0); |
4062 | assert(Src0Idx != -1); |
4063 | |
4064 | SMLoc ErrLoc; |
4065 | const MCOperand &Src0 = Inst.getOperand(i: Src0Idx); |
4066 | if (Src0.isReg()) { |
4067 | auto Reg = mc2PseudoReg(Reg: Src0.getReg()); |
4068 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
4069 | if (!isSGPR(Reg, TRI)) |
4070 | return true; |
4071 | ErrLoc = getRegLoc(Reg, Operands); |
4072 | } else { |
4073 | ErrLoc = getConstLoc(Operands); |
4074 | } |
4075 | |
4076 | Error(L: ErrLoc, Msg: "source operand must be a VGPR" ); |
4077 | return false; |
4078 | } |
4079 | |
4080 | bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, |
4081 | const OperandVector &Operands) { |
4082 | |
4083 | const unsigned Opc = Inst.getOpcode(); |
4084 | |
4085 | if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) |
4086 | return true; |
4087 | |
4088 | const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0); |
4089 | assert(Src0Idx != -1); |
4090 | |
4091 | const MCOperand &Src0 = Inst.getOperand(i: Src0Idx); |
4092 | if (!Src0.isReg()) |
4093 | return true; |
4094 | |
4095 | auto Reg = mc2PseudoReg(Reg: Src0.getReg()); |
4096 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
4097 | if (!isGFX90A() && isSGPR(Reg, TRI)) { |
4098 | Error(L: getRegLoc(Reg, Operands), |
4099 | Msg: "source operand must be either a VGPR or an inline constant" ); |
4100 | return false; |
4101 | } |
4102 | |
4103 | return true; |
4104 | } |
4105 | |
4106 | bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst, |
4107 | const OperandVector &Operands) { |
4108 | unsigned Opcode = Inst.getOpcode(); |
4109 | const MCInstrDesc &Desc = MII.get(Opcode); |
4110 | |
4111 | if (!(Desc.TSFlags & SIInstrFlags::IsMAI) || |
4112 | !getFeatureBits()[FeatureMFMAInlineLiteralBug]) |
4113 | return true; |
4114 | |
4115 | const int Src2Idx = getNamedOperandIdx(Opcode, NamedIdx: OpName::src2); |
4116 | if (Src2Idx == -1) |
4117 | return true; |
4118 | |
4119 | if (Inst.getOperand(i: Src2Idx).isImm() && isInlineConstant(Inst, OpIdx: Src2Idx)) { |
4120 | Error(L: getConstLoc(Operands), |
4121 | Msg: "inline constants are not allowed for this operand" ); |
4122 | return false; |
4123 | } |
4124 | |
4125 | return true; |
4126 | } |
4127 | |
4128 | bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, |
4129 | const OperandVector &Operands) { |
4130 | const unsigned Opc = Inst.getOpcode(); |
4131 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
4132 | |
4133 | if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) |
4134 | return true; |
4135 | |
4136 | const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src2); |
4137 | if (Src2Idx == -1) |
4138 | return true; |
4139 | |
4140 | const MCOperand &Src2 = Inst.getOperand(i: Src2Idx); |
4141 | if (!Src2.isReg()) |
4142 | return true; |
4143 | |
4144 | MCRegister Src2Reg = Src2.getReg(); |
4145 | MCRegister DstReg = Inst.getOperand(i: 0).getReg(); |
4146 | if (Src2Reg == DstReg) |
4147 | return true; |
4148 | |
4149 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
4150 | if (TRI->getRegClass(i: Desc.operands()[0].RegClass).getSizeInBits() <= 128) |
4151 | return true; |
4152 | |
4153 | if (TRI->regsOverlap(RegA: Src2Reg, RegB: DstReg)) { |
4154 | Error(L: getRegLoc(Reg: mc2PseudoReg(Reg: Src2Reg), Operands), |
4155 | Msg: "source 2 operand must not partially overlap with dst" ); |
4156 | return false; |
4157 | } |
4158 | |
4159 | return true; |
4160 | } |
4161 | |
4162 | bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { |
4163 | switch (Inst.getOpcode()) { |
4164 | default: |
4165 | return true; |
4166 | case V_DIV_SCALE_F32_gfx6_gfx7: |
4167 | case V_DIV_SCALE_F32_vi: |
4168 | case V_DIV_SCALE_F32_gfx10: |
4169 | case V_DIV_SCALE_F64_gfx6_gfx7: |
4170 | case V_DIV_SCALE_F64_vi: |
4171 | case V_DIV_SCALE_F64_gfx10: |
4172 | break; |
4173 | } |
4174 | |
4175 | // TODO: Check that src0 = src1 or src2. |
4176 | |
4177 | for (auto Name : {AMDGPU::OpName::src0_modifiers, |
4178 | AMDGPU::OpName::src2_modifiers, |
4179 | AMDGPU::OpName::src2_modifiers}) { |
4180 | if (Inst.getOperand(i: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), NamedIdx: Name)) |
4181 | .getImm() & |
4182 | SISrcMods::ABS) { |
4183 | return false; |
4184 | } |
4185 | } |
4186 | |
4187 | return true; |
4188 | } |
4189 | |
4190 | bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { |
4191 | |
4192 | const unsigned Opc = Inst.getOpcode(); |
4193 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
4194 | |
4195 | if ((Desc.TSFlags & MIMGFlags) == 0) |
4196 | return true; |
4197 | |
4198 | int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::d16); |
4199 | if (D16Idx >= 0 && Inst.getOperand(i: D16Idx).getImm()) { |
4200 | if (isCI() || isSI()) |
4201 | return false; |
4202 | } |
4203 | |
4204 | return true; |
4205 | } |
4206 | |
4207 | static bool IsRevOpcode(const unsigned Opcode) |
4208 | { |
4209 | switch (Opcode) { |
4210 | case AMDGPU::V_SUBREV_F32_e32: |
4211 | case AMDGPU::V_SUBREV_F32_e64: |
4212 | case AMDGPU::V_SUBREV_F32_e32_gfx10: |
4213 | case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: |
4214 | case AMDGPU::V_SUBREV_F32_e32_vi: |
4215 | case AMDGPU::V_SUBREV_F32_e64_gfx10: |
4216 | case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: |
4217 | case AMDGPU::V_SUBREV_F32_e64_vi: |
4218 | |
4219 | case AMDGPU::V_SUBREV_CO_U32_e32: |
4220 | case AMDGPU::V_SUBREV_CO_U32_e64: |
4221 | case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: |
4222 | case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: |
4223 | |
4224 | case AMDGPU::V_SUBBREV_U32_e32: |
4225 | case AMDGPU::V_SUBBREV_U32_e64: |
4226 | case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: |
4227 | case AMDGPU::V_SUBBREV_U32_e32_vi: |
4228 | case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: |
4229 | case AMDGPU::V_SUBBREV_U32_e64_vi: |
4230 | |
4231 | case AMDGPU::V_SUBREV_U32_e32: |
4232 | case AMDGPU::V_SUBREV_U32_e64: |
4233 | case AMDGPU::V_SUBREV_U32_e32_gfx9: |
4234 | case AMDGPU::V_SUBREV_U32_e32_vi: |
4235 | case AMDGPU::V_SUBREV_U32_e64_gfx9: |
4236 | case AMDGPU::V_SUBREV_U32_e64_vi: |
4237 | |
4238 | case AMDGPU::V_SUBREV_F16_e32: |
4239 | case AMDGPU::V_SUBREV_F16_e64: |
4240 | case AMDGPU::V_SUBREV_F16_e32_gfx10: |
4241 | case AMDGPU::V_SUBREV_F16_e32_vi: |
4242 | case AMDGPU::V_SUBREV_F16_e64_gfx10: |
4243 | case AMDGPU::V_SUBREV_F16_e64_vi: |
4244 | |
4245 | case AMDGPU::V_SUBREV_U16_e32: |
4246 | case AMDGPU::V_SUBREV_U16_e64: |
4247 | case AMDGPU::V_SUBREV_U16_e32_vi: |
4248 | case AMDGPU::V_SUBREV_U16_e64_vi: |
4249 | |
4250 | case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: |
4251 | case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: |
4252 | case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: |
4253 | |
4254 | case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: |
4255 | case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: |
4256 | |
4257 | case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: |
4258 | case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: |
4259 | |
4260 | case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: |
4261 | case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: |
4262 | |
4263 | case AMDGPU::V_LSHRREV_B32_e32: |
4264 | case AMDGPU::V_LSHRREV_B32_e64: |
4265 | case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: |
4266 | case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: |
4267 | case AMDGPU::V_LSHRREV_B32_e32_vi: |
4268 | case AMDGPU::V_LSHRREV_B32_e64_vi: |
4269 | case AMDGPU::V_LSHRREV_B32_e32_gfx10: |
4270 | case AMDGPU::V_LSHRREV_B32_e64_gfx10: |
4271 | |
4272 | case AMDGPU::V_ASHRREV_I32_e32: |
4273 | case AMDGPU::V_ASHRREV_I32_e64: |
4274 | case AMDGPU::V_ASHRREV_I32_e32_gfx10: |
4275 | case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: |
4276 | case AMDGPU::V_ASHRREV_I32_e32_vi: |
4277 | case AMDGPU::V_ASHRREV_I32_e64_gfx10: |
4278 | case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: |
4279 | case AMDGPU::V_ASHRREV_I32_e64_vi: |
4280 | |
4281 | case AMDGPU::V_LSHLREV_B32_e32: |
4282 | case AMDGPU::V_LSHLREV_B32_e64: |
4283 | case AMDGPU::V_LSHLREV_B32_e32_gfx10: |
4284 | case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: |
4285 | case AMDGPU::V_LSHLREV_B32_e32_vi: |
4286 | case AMDGPU::V_LSHLREV_B32_e64_gfx10: |
4287 | case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: |
4288 | case AMDGPU::V_LSHLREV_B32_e64_vi: |
4289 | |
4290 | case AMDGPU::V_LSHLREV_B16_e32: |
4291 | case AMDGPU::V_LSHLREV_B16_e64: |
4292 | case AMDGPU::V_LSHLREV_B16_e32_vi: |
4293 | case AMDGPU::V_LSHLREV_B16_e64_vi: |
4294 | case AMDGPU::V_LSHLREV_B16_gfx10: |
4295 | |
4296 | case AMDGPU::V_LSHRREV_B16_e32: |
4297 | case AMDGPU::V_LSHRREV_B16_e64: |
4298 | case AMDGPU::V_LSHRREV_B16_e32_vi: |
4299 | case AMDGPU::V_LSHRREV_B16_e64_vi: |
4300 | case AMDGPU::V_LSHRREV_B16_gfx10: |
4301 | |
4302 | case AMDGPU::V_ASHRREV_I16_e32: |
4303 | case AMDGPU::V_ASHRREV_I16_e64: |
4304 | case AMDGPU::V_ASHRREV_I16_e32_vi: |
4305 | case AMDGPU::V_ASHRREV_I16_e64_vi: |
4306 | case AMDGPU::V_ASHRREV_I16_gfx10: |
4307 | |
4308 | case AMDGPU::V_LSHLREV_B64_e64: |
4309 | case AMDGPU::V_LSHLREV_B64_gfx10: |
4310 | case AMDGPU::V_LSHLREV_B64_vi: |
4311 | |
4312 | case AMDGPU::V_LSHRREV_B64_e64: |
4313 | case AMDGPU::V_LSHRREV_B64_gfx10: |
4314 | case AMDGPU::V_LSHRREV_B64_vi: |
4315 | |
4316 | case AMDGPU::V_ASHRREV_I64_e64: |
4317 | case AMDGPU::V_ASHRREV_I64_gfx10: |
4318 | case AMDGPU::V_ASHRREV_I64_vi: |
4319 | |
4320 | case AMDGPU::V_PK_LSHLREV_B16: |
4321 | case AMDGPU::V_PK_LSHLREV_B16_gfx10: |
4322 | case AMDGPU::V_PK_LSHLREV_B16_vi: |
4323 | |
4324 | case AMDGPU::V_PK_LSHRREV_B16: |
4325 | case AMDGPU::V_PK_LSHRREV_B16_gfx10: |
4326 | case AMDGPU::V_PK_LSHRREV_B16_vi: |
4327 | case AMDGPU::V_PK_ASHRREV_I16: |
4328 | case AMDGPU::V_PK_ASHRREV_I16_gfx10: |
4329 | case AMDGPU::V_PK_ASHRREV_I16_vi: |
4330 | return true; |
4331 | default: |
4332 | return false; |
4333 | } |
4334 | } |
4335 | |
4336 | std::optional<StringRef> |
4337 | AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { |
4338 | |
4339 | using namespace SIInstrFlags; |
4340 | const unsigned Opcode = Inst.getOpcode(); |
4341 | const MCInstrDesc &Desc = MII.get(Opcode); |
4342 | |
4343 | // lds_direct register is defined so that it can be used |
4344 | // with 9-bit operands only. Ignore encodings which do not accept these. |
4345 | const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; |
4346 | if ((Desc.TSFlags & Enc) == 0) |
4347 | return std::nullopt; |
4348 | |
4349 | for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { |
4350 | auto SrcIdx = getNamedOperandIdx(Opcode, NamedIdx: SrcName); |
4351 | if (SrcIdx == -1) |
4352 | break; |
4353 | const auto &Src = Inst.getOperand(i: SrcIdx); |
4354 | if (Src.isReg() && Src.getReg() == LDS_DIRECT) { |
4355 | |
4356 | if (isGFX90A() || isGFX11Plus()) |
4357 | return StringRef("lds_direct is not supported on this GPU" ); |
4358 | |
4359 | if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) |
4360 | return StringRef("lds_direct cannot be used with this instruction" ); |
4361 | |
4362 | if (SrcName != OpName::src0) |
4363 | return StringRef("lds_direct may be used as src0 only" ); |
4364 | } |
4365 | } |
4366 | |
4367 | return std::nullopt; |
4368 | } |
4369 | |
4370 | SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { |
4371 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { |
4372 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
4373 | if (Op.isFlatOffset()) |
4374 | return Op.getStartLoc(); |
4375 | } |
4376 | return getLoc(); |
4377 | } |
4378 | |
4379 | bool AMDGPUAsmParser::validateOffset(const MCInst &Inst, |
4380 | const OperandVector &Operands) { |
4381 | auto Opcode = Inst.getOpcode(); |
4382 | auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, NamedIdx: AMDGPU::OpName::offset); |
4383 | if (OpNum == -1) |
4384 | return true; |
4385 | |
4386 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
4387 | if ((TSFlags & SIInstrFlags::FLAT)) |
4388 | return validateFlatOffset(Inst, Operands); |
4389 | |
4390 | if ((TSFlags & SIInstrFlags::SMRD)) |
4391 | return validateSMEMOffset(Inst, Operands); |
4392 | |
4393 | const auto &Op = Inst.getOperand(i: OpNum); |
4394 | if (isGFX12Plus() && |
4395 | (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { |
4396 | const unsigned OffsetSize = 24; |
4397 | if (!isIntN(N: OffsetSize, x: Op.getImm())) { |
4398 | Error(L: getFlatOffsetLoc(Operands), |
4399 | Msg: Twine("expected a " ) + Twine(OffsetSize) + "-bit signed offset" ); |
4400 | return false; |
4401 | } |
4402 | } else { |
4403 | const unsigned OffsetSize = 16; |
4404 | if (!isUIntN(N: OffsetSize, x: Op.getImm())) { |
4405 | Error(L: getFlatOffsetLoc(Operands), |
4406 | Msg: Twine("expected a " ) + Twine(OffsetSize) + "-bit unsigned offset" ); |
4407 | return false; |
4408 | } |
4409 | } |
4410 | return true; |
4411 | } |
4412 | |
4413 | bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, |
4414 | const OperandVector &Operands) { |
4415 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
4416 | if ((TSFlags & SIInstrFlags::FLAT) == 0) |
4417 | return true; |
4418 | |
4419 | auto Opcode = Inst.getOpcode(); |
4420 | auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, NamedIdx: AMDGPU::OpName::offset); |
4421 | assert(OpNum != -1); |
4422 | |
4423 | const auto &Op = Inst.getOperand(i: OpNum); |
4424 | if (!hasFlatOffsets() && Op.getImm() != 0) { |
4425 | Error(L: getFlatOffsetLoc(Operands), |
4426 | Msg: "flat offset modifier is not supported on this GPU" ); |
4427 | return false; |
4428 | } |
4429 | |
4430 | // For pre-GFX12 FLAT instructions the offset must be positive; |
4431 | // MSB is ignored and forced to zero. |
4432 | unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(ST: getSTI()); |
4433 | bool AllowNegative = |
4434 | (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) || |
4435 | isGFX12Plus(); |
4436 | if (!isIntN(N: OffsetSize, x: Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) { |
4437 | Error(L: getFlatOffsetLoc(Operands), |
4438 | Msg: Twine("expected a " ) + |
4439 | (AllowNegative ? Twine(OffsetSize) + "-bit signed offset" |
4440 | : Twine(OffsetSize - 1) + "-bit unsigned offset" )); |
4441 | return false; |
4442 | } |
4443 | |
4444 | return true; |
4445 | } |
4446 | |
4447 | SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { |
4448 | // Start with second operand because SMEM Offset cannot be dst or src0. |
4449 | for (unsigned i = 2, e = Operands.size(); i != e; ++i) { |
4450 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
4451 | if (Op.isSMEMOffset() || Op.isSMEMOffsetMod()) |
4452 | return Op.getStartLoc(); |
4453 | } |
4454 | return getLoc(); |
4455 | } |
4456 | |
4457 | bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, |
4458 | const OperandVector &Operands) { |
4459 | if (isCI() || isSI()) |
4460 | return true; |
4461 | |
4462 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
4463 | if ((TSFlags & SIInstrFlags::SMRD) == 0) |
4464 | return true; |
4465 | |
4466 | auto Opcode = Inst.getOpcode(); |
4467 | auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, NamedIdx: AMDGPU::OpName::offset); |
4468 | if (OpNum == -1) |
4469 | return true; |
4470 | |
4471 | const auto &Op = Inst.getOperand(i: OpNum); |
4472 | if (!Op.isImm()) |
4473 | return true; |
4474 | |
4475 | uint64_t Offset = Op.getImm(); |
4476 | bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opc: Opcode); |
4477 | if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(ST: getSTI(), EncodedOffset: Offset) || |
4478 | AMDGPU::isLegalSMRDEncodedSignedOffset(ST: getSTI(), EncodedOffset: Offset, IsBuffer)) |
4479 | return true; |
4480 | |
4481 | Error(L: getSMEMOffsetLoc(Operands), |
4482 | Msg: isGFX12Plus() ? "expected a 24-bit signed offset" |
4483 | : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" |
4484 | : "expected a 21-bit signed offset" ); |
4485 | |
4486 | return false; |
4487 | } |
4488 | |
4489 | bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { |
4490 | unsigned Opcode = Inst.getOpcode(); |
4491 | const MCInstrDesc &Desc = MII.get(Opcode); |
4492 | if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) |
4493 | return true; |
4494 | |
4495 | const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, NamedIdx: AMDGPU::OpName::src0); |
4496 | const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, NamedIdx: AMDGPU::OpName::src1); |
4497 | |
4498 | const int OpIndices[] = { Src0Idx, Src1Idx }; |
4499 | |
4500 | unsigned NumExprs = 0; |
4501 | unsigned NumLiterals = 0; |
4502 | uint32_t LiteralValue; |
4503 | |
4504 | for (int OpIdx : OpIndices) { |
4505 | if (OpIdx == -1) break; |
4506 | |
4507 | const MCOperand &MO = Inst.getOperand(i: OpIdx); |
4508 | // Exclude special imm operands (like that used by s_set_gpr_idx_on) |
4509 | if (AMDGPU::isSISrcOperand(Desc, OpNo: OpIdx)) { |
4510 | if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { |
4511 | uint32_t Value = static_cast<uint32_t>(MO.getImm()); |
4512 | if (NumLiterals == 0 || LiteralValue != Value) { |
4513 | LiteralValue = Value; |
4514 | ++NumLiterals; |
4515 | } |
4516 | } else if (MO.isExpr()) { |
4517 | ++NumExprs; |
4518 | } |
4519 | } |
4520 | } |
4521 | |
4522 | return NumLiterals + NumExprs <= 1; |
4523 | } |
4524 | |
4525 | bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { |
4526 | const unsigned Opc = Inst.getOpcode(); |
4527 | if (isPermlane16(Opc)) { |
4528 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel); |
4529 | unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm(); |
4530 | |
4531 | if (OpSel & ~3) |
4532 | return false; |
4533 | } |
4534 | |
4535 | uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags; |
4536 | |
4537 | if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { |
4538 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel); |
4539 | if (OpSelIdx != -1) { |
4540 | if (Inst.getOperand(i: OpSelIdx).getImm() != 0) |
4541 | return false; |
4542 | } |
4543 | int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel_hi); |
4544 | if (OpSelHiIdx != -1) { |
4545 | if (Inst.getOperand(i: OpSelHiIdx).getImm() != -1) |
4546 | return false; |
4547 | } |
4548 | } |
4549 | |
4550 | // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). |
4551 | if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) && |
4552 | (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) { |
4553 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel); |
4554 | unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm(); |
4555 | if (OpSel & 3) |
4556 | return false; |
4557 | } |
4558 | |
4559 | return true; |
4560 | } |
4561 | |
4562 | bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) { |
4563 | assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi); |
4564 | |
4565 | const unsigned Opc = Inst.getOpcode(); |
4566 | uint64_t TSFlags = MII.get(Opcode: Opc).TSFlags; |
4567 | |
4568 | // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2) |
4569 | // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1) |
4570 | // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1) |
4571 | // other wmma/swmmac instructions don't have neg_lo/neg_hi operand. |
4572 | if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) && |
4573 | !(TSFlags & SIInstrFlags::IsSWMMAC)) |
4574 | return true; |
4575 | |
4576 | int NegIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: OpName); |
4577 | if (NegIdx == -1) |
4578 | return true; |
4579 | |
4580 | unsigned Neg = Inst.getOperand(i: NegIdx).getImm(); |
4581 | |
4582 | // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed |
4583 | // on some src operands but not allowed on other. |
4584 | // It is convenient that such instructions don't have src_modifiers operand |
4585 | // for src operands that don't allow neg because they also don't allow opsel. |
4586 | |
4587 | int SrcMods[3] = {AMDGPU::OpName::src0_modifiers, |
4588 | AMDGPU::OpName::src1_modifiers, |
4589 | AMDGPU::OpName::src2_modifiers}; |
4590 | |
4591 | for (unsigned i = 0; i < 3; ++i) { |
4592 | if (!AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: SrcMods[i])) { |
4593 | if (Neg & (1 << i)) |
4594 | return false; |
4595 | } |
4596 | } |
4597 | |
4598 | return true; |
4599 | } |
4600 | |
4601 | bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, |
4602 | const OperandVector &Operands) { |
4603 | const unsigned Opc = Inst.getOpcode(); |
4604 | int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dpp_ctrl); |
4605 | if (DppCtrlIdx >= 0) { |
4606 | unsigned DppCtrl = Inst.getOperand(i: DppCtrlIdx).getImm(); |
4607 | |
4608 | if (!AMDGPU::isLegalDPALU_DPPControl(DC: DppCtrl) && |
4609 | AMDGPU::isDPALU_DPP(OpDesc: MII.get(Opcode: Opc))) { |
4610 | // DP ALU DPP is supported for row_newbcast only on GFX9* |
4611 | SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyDppCtrl, Operands); |
4612 | Error(L: S, Msg: "DP ALU dpp only supports row_newbcast" ); |
4613 | return false; |
4614 | } |
4615 | } |
4616 | |
4617 | int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::dpp8); |
4618 | bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0; |
4619 | |
4620 | if (IsDPP && !hasDPPSrc1SGPR(STI: getSTI())) { |
4621 | int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src1); |
4622 | if (Src1Idx >= 0) { |
4623 | const MCOperand &Src1 = Inst.getOperand(i: Src1Idx); |
4624 | const MCRegisterInfo *TRI = getContext().getRegisterInfo(); |
4625 | if (Src1.isReg() && isSGPR(Reg: mc2PseudoReg(Reg: Src1.getReg()), TRI)) { |
4626 | auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src1Idx).getReg()); |
4627 | SMLoc S = getRegLoc(Reg, Operands); |
4628 | Error(L: S, Msg: "invalid operand for instruction" ); |
4629 | return false; |
4630 | } |
4631 | if (Src1.isImm()) { |
4632 | Error(L: getInstLoc(Operands), |
4633 | Msg: "src1 immediate operand invalid for instruction" ); |
4634 | return false; |
4635 | } |
4636 | } |
4637 | } |
4638 | |
4639 | return true; |
4640 | } |
4641 | |
4642 | // Check if VCC register matches wavefront size |
4643 | bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { |
4644 | auto FB = getFeatureBits(); |
4645 | return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || |
4646 | (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); |
4647 | } |
4648 | |
4649 | // One unique literal can be used. VOP3 literal is only allowed in GFX10+ |
4650 | bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, |
4651 | const OperandVector &Operands) { |
4652 | unsigned Opcode = Inst.getOpcode(); |
4653 | const MCInstrDesc &Desc = MII.get(Opcode); |
4654 | bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, NamedIdx: OpName::imm) != -1; |
4655 | if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && |
4656 | !HasMandatoryLiteral && !isVOPD(Opc: Opcode)) |
4657 | return true; |
4658 | |
4659 | OperandIndices OpIndices = getSrcOperandIndices(Opcode, AddMandatoryLiterals: HasMandatoryLiteral); |
4660 | |
4661 | unsigned NumExprs = 0; |
4662 | unsigned NumLiterals = 0; |
4663 | uint32_t LiteralValue; |
4664 | |
4665 | for (int OpIdx : OpIndices) { |
4666 | if (OpIdx == -1) |
4667 | continue; |
4668 | |
4669 | const MCOperand &MO = Inst.getOperand(i: OpIdx); |
4670 | if (!MO.isImm() && !MO.isExpr()) |
4671 | continue; |
4672 | if (!isSISrcOperand(Desc, OpNo: OpIdx)) |
4673 | continue; |
4674 | |
4675 | if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { |
4676 | uint64_t Value = static_cast<uint64_t>(MO.getImm()); |
4677 | bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpNo: OpIdx) && |
4678 | AMDGPU::getOperandSize(OpInfo: Desc.operands()[OpIdx]) == 8; |
4679 | bool IsValid32Op = AMDGPU::isValid32BitLiteral(Val: Value, IsFP64); |
4680 | |
4681 | if (!IsValid32Op && !isInt<32>(x: Value) && !isUInt<32>(x: Value)) { |
4682 | Error(L: getLitLoc(Operands), Msg: "invalid operand for instruction" ); |
4683 | return false; |
4684 | } |
4685 | |
4686 | if (IsFP64 && IsValid32Op) |
4687 | Value = Hi_32(Value); |
4688 | |
4689 | if (NumLiterals == 0 || LiteralValue != Value) { |
4690 | LiteralValue = Value; |
4691 | ++NumLiterals; |
4692 | } |
4693 | } else if (MO.isExpr()) { |
4694 | ++NumExprs; |
4695 | } |
4696 | } |
4697 | NumLiterals += NumExprs; |
4698 | |
4699 | if (!NumLiterals) |
4700 | return true; |
4701 | |
4702 | if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) { |
4703 | Error(L: getLitLoc(Operands), Msg: "literal operands are not supported" ); |
4704 | return false; |
4705 | } |
4706 | |
4707 | if (NumLiterals > 1) { |
4708 | Error(L: getLitLoc(Operands, SearchMandatoryLiterals: true), Msg: "only one unique literal operand is allowed" ); |
4709 | return false; |
4710 | } |
4711 | |
4712 | return true; |
4713 | } |
4714 | |
4715 | // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. |
4716 | static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, |
4717 | const MCRegisterInfo *MRI) { |
4718 | int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), NamedIdx: NameIdx); |
4719 | if (OpIdx < 0) |
4720 | return -1; |
4721 | |
4722 | const MCOperand &Op = Inst.getOperand(i: OpIdx); |
4723 | if (!Op.isReg()) |
4724 | return -1; |
4725 | |
4726 | unsigned Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0); |
4727 | auto Reg = Sub ? Sub : Op.getReg(); |
4728 | const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID); |
4729 | return AGPR32.contains(Reg) ? 1 : 0; |
4730 | } |
4731 | |
4732 | bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { |
4733 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
4734 | if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | |
4735 | SIInstrFlags::MTBUF | SIInstrFlags::MIMG | |
4736 | SIInstrFlags::DS)) == 0) |
4737 | return true; |
4738 | |
4739 | uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 |
4740 | : AMDGPU::OpName::vdata; |
4741 | |
4742 | const MCRegisterInfo *MRI = getMRI(); |
4743 | int DstAreg = IsAGPROperand(Inst, NameIdx: AMDGPU::OpName::vdst, MRI); |
4744 | int DataAreg = IsAGPROperand(Inst, NameIdx: DataNameIdx, MRI); |
4745 | |
4746 | if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { |
4747 | int Data2Areg = IsAGPROperand(Inst, NameIdx: AMDGPU::OpName::data1, MRI); |
4748 | if (Data2Areg >= 0 && Data2Areg != DataAreg) |
4749 | return false; |
4750 | } |
4751 | |
4752 | auto FB = getFeatureBits(); |
4753 | if (FB[AMDGPU::FeatureGFX90AInsts]) { |
4754 | if (DataAreg < 0 || DstAreg < 0) |
4755 | return true; |
4756 | return DstAreg == DataAreg; |
4757 | } |
4758 | |
4759 | return DstAreg < 1 && DataAreg < 1; |
4760 | } |
4761 | |
4762 | bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { |
4763 | auto FB = getFeatureBits(); |
4764 | if (!FB[AMDGPU::FeatureGFX90AInsts]) |
4765 | return true; |
4766 | |
4767 | const MCRegisterInfo *MRI = getMRI(); |
4768 | const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID); |
4769 | const MCRegisterClass &AGPR32 = MRI->getRegClass(i: AMDGPU::AGPR_32RegClassID); |
4770 | for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { |
4771 | const MCOperand &Op = Inst.getOperand(i: I); |
4772 | if (!Op.isReg()) |
4773 | continue; |
4774 | |
4775 | unsigned Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0); |
4776 | if (!Sub) |
4777 | continue; |
4778 | |
4779 | if (VGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::VGPR0) & 1)) |
4780 | return false; |
4781 | if (AGPR32.contains(Reg: Sub) && ((Sub - AMDGPU::AGPR0) & 1)) |
4782 | return false; |
4783 | } |
4784 | |
4785 | return true; |
4786 | } |
4787 | |
4788 | SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { |
4789 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { |
4790 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
4791 | if (Op.isBLGP()) |
4792 | return Op.getStartLoc(); |
4793 | } |
4794 | return SMLoc(); |
4795 | } |
4796 | |
4797 | bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, |
4798 | const OperandVector &Operands) { |
4799 | unsigned Opc = Inst.getOpcode(); |
4800 | int BlgpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::blgp); |
4801 | if (BlgpIdx == -1) |
4802 | return true; |
4803 | SMLoc BLGPLoc = getBLGPLoc(Operands); |
4804 | if (!BLGPLoc.isValid()) |
4805 | return true; |
4806 | bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with(Prefix: "neg:" ); |
4807 | auto FB = getFeatureBits(); |
4808 | bool UsesNeg = false; |
4809 | if (FB[AMDGPU::FeatureGFX940Insts]) { |
4810 | switch (Opc) { |
4811 | case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: |
4812 | case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: |
4813 | case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: |
4814 | case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: |
4815 | UsesNeg = true; |
4816 | } |
4817 | } |
4818 | |
4819 | if (IsNeg == UsesNeg) |
4820 | return true; |
4821 | |
4822 | Error(L: BLGPLoc, |
4823 | Msg: UsesNeg ? "invalid modifier: blgp is not supported" |
4824 | : "invalid modifier: neg is not supported" ); |
4825 | |
4826 | return false; |
4827 | } |
4828 | |
4829 | bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst, |
4830 | const OperandVector &Operands) { |
4831 | if (!isGFX11Plus()) |
4832 | return true; |
4833 | |
4834 | unsigned Opc = Inst.getOpcode(); |
4835 | if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 && |
4836 | Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 && |
4837 | Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 && |
4838 | Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11) |
4839 | return true; |
4840 | |
4841 | int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::sdst); |
4842 | assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg()); |
4843 | auto Reg = mc2PseudoReg(Reg: Inst.getOperand(i: Src0Idx).getReg()); |
4844 | if (Reg == AMDGPU::SGPR_NULL) |
4845 | return true; |
4846 | |
4847 | SMLoc RegLoc = getRegLoc(Reg, Operands); |
4848 | Error(L: RegLoc, Msg: "src0 must be null" ); |
4849 | return false; |
4850 | } |
4851 | |
4852 | bool AMDGPUAsmParser::validateDS(const MCInst &Inst, |
4853 | const OperandVector &Operands) { |
4854 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
4855 | if ((TSFlags & SIInstrFlags::DS) == 0) |
4856 | return true; |
4857 | if (TSFlags & SIInstrFlags::GWS) |
4858 | return validateGWS(Inst, Operands); |
4859 | // Only validate GDS for non-GWS instructions. |
4860 | if (hasGDS()) |
4861 | return true; |
4862 | int GDSIdx = |
4863 | AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::gds); |
4864 | if (GDSIdx < 0) |
4865 | return true; |
4866 | unsigned GDS = Inst.getOperand(i: GDSIdx).getImm(); |
4867 | if (GDS) { |
4868 | SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyGDS, Operands); |
4869 | Error(L: S, Msg: "gds modifier is not supported on this GPU" ); |
4870 | return false; |
4871 | } |
4872 | return true; |
4873 | } |
4874 | |
4875 | // gfx90a has an undocumented limitation: |
4876 | // DS_GWS opcodes must use even aligned registers. |
4877 | bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, |
4878 | const OperandVector &Operands) { |
4879 | if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) |
4880 | return true; |
4881 | |
4882 | int Opc = Inst.getOpcode(); |
4883 | if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && |
4884 | Opc != AMDGPU::DS_GWS_SEMA_BR_vi) |
4885 | return true; |
4886 | |
4887 | const MCRegisterInfo *MRI = getMRI(); |
4888 | const MCRegisterClass &VGPR32 = MRI->getRegClass(i: AMDGPU::VGPR_32RegClassID); |
4889 | int Data0Pos = |
4890 | AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::data0); |
4891 | assert(Data0Pos != -1); |
4892 | auto Reg = Inst.getOperand(i: Data0Pos).getReg(); |
4893 | auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); |
4894 | if (RegIdx & 1) { |
4895 | SMLoc RegLoc = getRegLoc(Reg, Operands); |
4896 | Error(L: RegLoc, Msg: "vgpr must be even aligned" ); |
4897 | return false; |
4898 | } |
4899 | |
4900 | return true; |
4901 | } |
4902 | |
4903 | bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, |
4904 | const OperandVector &Operands, |
4905 | const SMLoc &IDLoc) { |
4906 | int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), |
4907 | NamedIdx: AMDGPU::OpName::cpol); |
4908 | if (CPolPos == -1) |
4909 | return true; |
4910 | |
4911 | unsigned CPol = Inst.getOperand(i: CPolPos).getImm(); |
4912 | |
4913 | if (isGFX12Plus()) |
4914 | return validateTHAndScopeBits(Inst, Operands, CPol); |
4915 | |
4916 | uint64_t TSFlags = MII.get(Opcode: Inst.getOpcode()).TSFlags; |
4917 | if (TSFlags & SIInstrFlags::SMRD) { |
4918 | if (CPol && (isSI() || isCI())) { |
4919 | SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands); |
4920 | Error(L: S, Msg: "cache policy is not supported for SMRD instructions" ); |
4921 | return false; |
4922 | } |
4923 | if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { |
4924 | Error(L: IDLoc, Msg: "invalid cache policy for SMEM instruction" ); |
4925 | return false; |
4926 | } |
4927 | } |
4928 | |
4929 | if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { |
4930 | const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF | |
4931 | SIInstrFlags::MTBUF | SIInstrFlags::MIMG | |
4932 | SIInstrFlags::FLAT; |
4933 | if (!(TSFlags & AllowSCCModifier)) { |
4934 | SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands); |
4935 | StringRef CStr(S.getPointer()); |
4936 | S = SMLoc::getFromPointer(Ptr: &CStr.data()[CStr.find(Str: "scc" )]); |
4937 | Error(L: S, |
4938 | Msg: "scc modifier is not supported for this instruction on this GPU" ); |
4939 | return false; |
4940 | } |
4941 | } |
4942 | |
4943 | if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) |
4944 | return true; |
4945 | |
4946 | if (TSFlags & SIInstrFlags::IsAtomicRet) { |
4947 | if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { |
4948 | Error(L: IDLoc, Msg: isGFX940() ? "instruction must use sc0" |
4949 | : "instruction must use glc" ); |
4950 | return false; |
4951 | } |
4952 | } else { |
4953 | if (CPol & CPol::GLC) { |
4954 | SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands); |
4955 | StringRef CStr(S.getPointer()); |
4956 | S = SMLoc::getFromPointer( |
4957 | Ptr: &CStr.data()[CStr.find(Str: isGFX940() ? "sc0" : "glc" )]); |
4958 | Error(L: S, Msg: isGFX940() ? "instruction must not use sc0" |
4959 | : "instruction must not use glc" ); |
4960 | return false; |
4961 | } |
4962 | } |
4963 | |
4964 | return true; |
4965 | } |
4966 | |
4967 | bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst, |
4968 | const OperandVector &Operands, |
4969 | const unsigned CPol) { |
4970 | const unsigned TH = CPol & AMDGPU::CPol::TH; |
4971 | const unsigned Scope = CPol & AMDGPU::CPol::SCOPE; |
4972 | |
4973 | const unsigned Opcode = Inst.getOpcode(); |
4974 | const MCInstrDesc &TID = MII.get(Opcode); |
4975 | |
4976 | auto PrintError = [&](StringRef Msg) { |
4977 | SMLoc S = getImmLoc(Type: AMDGPUOperand::ImmTyCPol, Operands); |
4978 | Error(L: S, Msg); |
4979 | return false; |
4980 | }; |
4981 | |
4982 | if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) && |
4983 | (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) && |
4984 | (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN))) |
4985 | return PrintError("instruction must use th:TH_ATOMIC_RETURN" ); |
4986 | |
4987 | if (TH == 0) |
4988 | return true; |
4989 | |
4990 | if ((TID.TSFlags & SIInstrFlags::SMRD) && |
4991 | ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) || |
4992 | (TH == AMDGPU::CPol::TH_NT_HT))) |
4993 | return PrintError("invalid th value for SMEM instruction" ); |
4994 | |
4995 | if (TH == AMDGPU::CPol::TH_BYPASS) { |
4996 | if ((Scope != AMDGPU::CPol::SCOPE_SYS && |
4997 | CPol & AMDGPU::CPol::TH_REAL_BYPASS) || |
4998 | (Scope == AMDGPU::CPol::SCOPE_SYS && |
4999 | !(CPol & AMDGPU::CPol::TH_REAL_BYPASS))) |
5000 | return PrintError("scope and th combination is not valid" ); |
5001 | } |
5002 | |
5003 | bool IsStore = TID.mayStore(); |
5004 | bool IsAtomic = |
5005 | TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet); |
5006 | |
5007 | if (IsAtomic) { |
5008 | if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC)) |
5009 | return PrintError("invalid th value for atomic instructions" ); |
5010 | } else if (IsStore) { |
5011 | if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE)) |
5012 | return PrintError("invalid th value for store instructions" ); |
5013 | } else { |
5014 | if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD)) |
5015 | return PrintError("invalid th value for load instructions" ); |
5016 | } |
5017 | |
5018 | return true; |
5019 | } |
5020 | |
5021 | bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { |
5022 | if (!isGFX11Plus()) |
5023 | return true; |
5024 | for (auto &Operand : Operands) { |
5025 | if (!Operand->isReg()) |
5026 | continue; |
5027 | unsigned Reg = Operand->getReg(); |
5028 | if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { |
5029 | Error(L: getRegLoc(Reg, Operands), |
5030 | Msg: "execz and vccz are not supported on this GPU" ); |
5031 | return false; |
5032 | } |
5033 | } |
5034 | return true; |
5035 | } |
5036 | |
5037 | bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, |
5038 | const OperandVector &Operands) { |
5039 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
5040 | if (Desc.mayStore() && |
5041 | (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { |
5042 | SMLoc Loc = getImmLoc(Type: AMDGPUOperand::ImmTyTFE, Operands); |
5043 | if (Loc != getInstLoc(Operands)) { |
5044 | Error(L: Loc, Msg: "TFE modifier has no meaning for store instructions" ); |
5045 | return false; |
5046 | } |
5047 | } |
5048 | |
5049 | return true; |
5050 | } |
5051 | |
5052 | bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, |
5053 | const SMLoc &IDLoc, |
5054 | const OperandVector &Operands) { |
5055 | if (auto ErrMsg = validateLdsDirect(Inst)) { |
5056 | Error(L: getRegLoc(Reg: LDS_DIRECT, Operands), Msg: *ErrMsg); |
5057 | return false; |
5058 | } |
5059 | if (!validateSOPLiteral(Inst)) { |
5060 | Error(L: getLitLoc(Operands), |
5061 | Msg: "only one unique literal operand is allowed" ); |
5062 | return false; |
5063 | } |
5064 | if (!validateVOPLiteral(Inst, Operands)) { |
5065 | return false; |
5066 | } |
5067 | if (!validateConstantBusLimitations(Inst, Operands)) { |
5068 | return false; |
5069 | } |
5070 | if (!validateVOPDRegBankConstraints(Inst, Operands)) { |
5071 | return false; |
5072 | } |
5073 | if (!validateIntClampSupported(Inst)) { |
5074 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyClamp, Operands), |
5075 | Msg: "integer clamping is not supported on this GPU" ); |
5076 | return false; |
5077 | } |
5078 | if (!validateOpSel(Inst)) { |
5079 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyOpSel, Operands), |
5080 | Msg: "invalid op_sel operand" ); |
5081 | return false; |
5082 | } |
5083 | if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_lo)) { |
5084 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegLo, Operands), |
5085 | Msg: "invalid neg_lo operand" ); |
5086 | return false; |
5087 | } |
5088 | if (!validateNeg(Inst, OpName: AMDGPU::OpName::neg_hi)) { |
5089 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyNegHi, Operands), |
5090 | Msg: "invalid neg_hi operand" ); |
5091 | return false; |
5092 | } |
5093 | if (!validateDPP(Inst, Operands)) { |
5094 | return false; |
5095 | } |
5096 | // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. |
5097 | if (!validateMIMGD16(Inst)) { |
5098 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyD16, Operands), |
5099 | Msg: "d16 modifier is not supported on this GPU" ); |
5100 | return false; |
5101 | } |
5102 | if (!validateMIMGMSAA(Inst)) { |
5103 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDim, Operands), |
5104 | Msg: "invalid dim; must be MSAA type" ); |
5105 | return false; |
5106 | } |
5107 | if (!validateMIMGDataSize(Inst, IDLoc)) { |
5108 | return false; |
5109 | } |
5110 | if (!validateMIMGAddrSize(Inst, IDLoc)) |
5111 | return false; |
5112 | if (!validateMIMGAtomicDMask(Inst)) { |
5113 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands), |
5114 | Msg: "invalid atomic image dmask" ); |
5115 | return false; |
5116 | } |
5117 | if (!validateMIMGGatherDMask(Inst)) { |
5118 | Error(L: getImmLoc(Type: AMDGPUOperand::ImmTyDMask, Operands), |
5119 | Msg: "invalid image_gather dmask: only one bit must be set" ); |
5120 | return false; |
5121 | } |
5122 | if (!validateMovrels(Inst, Operands)) { |
5123 | return false; |
5124 | } |
5125 | if (!validateOffset(Inst, Operands)) { |
5126 | return false; |
5127 | } |
5128 | if (!validateMAIAccWrite(Inst, Operands)) { |
5129 | return false; |
5130 | } |
5131 | if (!validateMAISrc2(Inst, Operands)) { |
5132 | return false; |
5133 | } |
5134 | if (!validateMFMA(Inst, Operands)) { |
5135 | return false; |
5136 | } |
5137 | if (!validateCoherencyBits(Inst, Operands, IDLoc)) { |
5138 | return false; |
5139 | } |
5140 | |
5141 | if (!validateAGPRLdSt(Inst)) { |
5142 | Error(L: IDLoc, Msg: getFeatureBits()[AMDGPU::FeatureGFX90AInsts] |
5143 | ? "invalid register class: data and dst should be all VGPR or AGPR" |
5144 | : "invalid register class: agpr loads and stores not supported on this GPU" |
5145 | ); |
5146 | return false; |
5147 | } |
5148 | if (!validateVGPRAlign(Inst)) { |
5149 | Error(L: IDLoc, |
5150 | Msg: "invalid register class: vgpr tuples must be 64 bit aligned" ); |
5151 | return false; |
5152 | } |
5153 | if (!validateDS(Inst, Operands)) { |
5154 | return false; |
5155 | } |
5156 | |
5157 | if (!validateBLGP(Inst, Operands)) { |
5158 | return false; |
5159 | } |
5160 | |
5161 | if (!validateDivScale(Inst)) { |
5162 | Error(L: IDLoc, Msg: "ABS not allowed in VOP3B instructions" ); |
5163 | return false; |
5164 | } |
5165 | if (!validateWaitCnt(Inst, Operands)) { |
5166 | return false; |
5167 | } |
5168 | if (!validateExeczVcczOperands(Operands)) { |
5169 | return false; |
5170 | } |
5171 | if (!validateTFE(Inst, Operands)) { |
5172 | return false; |
5173 | } |
5174 | |
5175 | return true; |
5176 | } |
5177 | |
5178 | static std::string AMDGPUMnemonicSpellCheck(StringRef S, |
5179 | const FeatureBitset &FBS, |
5180 | unsigned VariantID = 0); |
5181 | |
5182 | static bool AMDGPUCheckMnemonic(StringRef Mnemonic, |
5183 | const FeatureBitset &AvailableFeatures, |
5184 | unsigned VariantID); |
5185 | |
5186 | bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, |
5187 | const FeatureBitset &FBS) { |
5188 | return isSupportedMnemo(Mnemo, FBS, Variants: getAllVariants()); |
5189 | } |
5190 | |
5191 | bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, |
5192 | const FeatureBitset &FBS, |
5193 | ArrayRef<unsigned> Variants) { |
5194 | for (auto Variant : Variants) { |
5195 | if (AMDGPUCheckMnemonic(Mnemonic: Mnemo, AvailableFeatures: FBS, VariantID: Variant)) |
5196 | return true; |
5197 | } |
5198 | |
5199 | return false; |
5200 | } |
5201 | |
5202 | bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, |
5203 | const SMLoc &IDLoc) { |
5204 | FeatureBitset FBS = ComputeAvailableFeatures(FB: getFeatureBits()); |
5205 | |
5206 | // Check if requested instruction variant is supported. |
5207 | if (isSupportedMnemo(Mnemo, FBS, Variants: getMatchedVariants())) |
5208 | return false; |
5209 | |
5210 | // This instruction is not supported. |
5211 | // Clear any other pending errors because they are no longer relevant. |
5212 | getParser().clearPendingErrors(); |
5213 | |
5214 | // Requested instruction variant is not supported. |
5215 | // Check if any other variants are supported. |
5216 | StringRef VariantName = getMatchedVariantName(); |
5217 | if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { |
5218 | return Error(L: IDLoc, |
5219 | Msg: Twine(VariantName, |
5220 | " variant of this instruction is not supported" )); |
5221 | } |
5222 | |
5223 | // Check if this instruction may be used with a different wavesize. |
5224 | if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && |
5225 | !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) { |
5226 | |
5227 | FeatureBitset FeaturesWS32 = getFeatureBits(); |
5228 | FeaturesWS32.flip(I: AMDGPU::FeatureWavefrontSize64) |
5229 | .flip(I: AMDGPU::FeatureWavefrontSize32); |
5230 | FeatureBitset AvailableFeaturesWS32 = |
5231 | ComputeAvailableFeatures(FB: FeaturesWS32); |
5232 | |
5233 | if (isSupportedMnemo(Mnemo, FBS: AvailableFeaturesWS32, Variants: getMatchedVariants())) |
5234 | return Error(L: IDLoc, Msg: "instruction requires wavesize=32" ); |
5235 | } |
5236 | |
5237 | // Finally check if this instruction is supported on any other GPU. |
5238 | if (isSupportedMnemo(Mnemo, FBS: FeatureBitset().set())) { |
5239 | return Error(L: IDLoc, Msg: "instruction not supported on this GPU" ); |
5240 | } |
5241 | |
5242 | // Instruction not supported on any GPU. Probably a typo. |
5243 | std::string Suggestion = AMDGPUMnemonicSpellCheck(S: Mnemo, FBS); |
5244 | return Error(L: IDLoc, Msg: "invalid instruction" + Suggestion); |
5245 | } |
5246 | |
5247 | static bool isInvalidVOPDY(const OperandVector &Operands, |
5248 | uint64_t InvalidOprIdx) { |
5249 | assert(InvalidOprIdx < Operands.size()); |
5250 | const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]); |
5251 | if (Op.isToken() && InvalidOprIdx > 1) { |
5252 | const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]); |
5253 | return PrevOp.isToken() && PrevOp.getToken() == "::" ; |
5254 | } |
5255 | return false; |
5256 | } |
5257 | |
5258 | bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, |
5259 | OperandVector &Operands, |
5260 | MCStreamer &Out, |
5261 | uint64_t &ErrorInfo, |
5262 | bool MatchingInlineAsm) { |
5263 | MCInst Inst; |
5264 | unsigned Result = Match_Success; |
5265 | for (auto Variant : getMatchedVariants()) { |
5266 | uint64_t EI; |
5267 | auto R = MatchInstructionImpl(Operands, Inst, ErrorInfo&: EI, matchingInlineAsm: MatchingInlineAsm, |
5268 | VariantID: Variant); |
5269 | // We order match statuses from least to most specific. We use most specific |
5270 | // status as resulting |
5271 | // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature |
5272 | if (R == Match_Success || R == Match_MissingFeature || |
5273 | (R == Match_InvalidOperand && Result != Match_MissingFeature) || |
5274 | (R == Match_MnemonicFail && Result != Match_InvalidOperand && |
5275 | Result != Match_MissingFeature)) { |
5276 | Result = R; |
5277 | ErrorInfo = EI; |
5278 | } |
5279 | if (R == Match_Success) |
5280 | break; |
5281 | } |
5282 | |
5283 | if (Result == Match_Success) { |
5284 | if (!validateInstruction(Inst, IDLoc, Operands)) { |
5285 | return true; |
5286 | } |
5287 | Inst.setLoc(IDLoc); |
5288 | Out.emitInstruction(Inst, STI: getSTI()); |
5289 | return false; |
5290 | } |
5291 | |
5292 | StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); |
5293 | if (checkUnsupportedInstruction(Mnemo, IDLoc)) { |
5294 | return true; |
5295 | } |
5296 | |
5297 | switch (Result) { |
5298 | default: break; |
5299 | case Match_MissingFeature: |
5300 | // It has been verified that the specified instruction |
5301 | // mnemonic is valid. A match was found but it requires |
5302 | // features which are not supported on this GPU. |
5303 | return Error(L: IDLoc, Msg: "operands are not valid for this GPU or mode" ); |
5304 | |
5305 | case Match_InvalidOperand: { |
5306 | SMLoc ErrorLoc = IDLoc; |
5307 | if (ErrorInfo != ~0ULL) { |
5308 | if (ErrorInfo >= Operands.size()) { |
5309 | return Error(L: IDLoc, Msg: "too few operands for instruction" ); |
5310 | } |
5311 | ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); |
5312 | if (ErrorLoc == SMLoc()) |
5313 | ErrorLoc = IDLoc; |
5314 | |
5315 | if (isInvalidVOPDY(Operands, InvalidOprIdx: ErrorInfo)) |
5316 | return Error(L: ErrorLoc, Msg: "invalid VOPDY instruction" ); |
5317 | } |
5318 | return Error(L: ErrorLoc, Msg: "invalid operand for instruction" ); |
5319 | } |
5320 | |
5321 | case Match_MnemonicFail: |
5322 | llvm_unreachable("Invalid instructions should have been handled already" ); |
5323 | } |
5324 | llvm_unreachable("Implement any new match types added!" ); |
5325 | } |
5326 | |
5327 | bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { |
5328 | int64_t Tmp = -1; |
5329 | if (!isToken(Kind: AsmToken::Integer) && !isToken(Kind: AsmToken::Identifier)) { |
5330 | return true; |
5331 | } |
5332 | if (getParser().parseAbsoluteExpression(Res&: Tmp)) { |
5333 | return true; |
5334 | } |
5335 | Ret = static_cast<uint32_t>(Tmp); |
5336 | return false; |
5337 | } |
5338 | |
5339 | bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { |
5340 | if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) |
5341 | return TokError(Msg: "directive only supported for amdgcn architecture" ); |
5342 | |
5343 | std::string TargetIDDirective; |
5344 | SMLoc TargetStart = getTok().getLoc(); |
5345 | if (getParser().parseEscapedString(Data&: TargetIDDirective)) |
5346 | return true; |
5347 | |
5348 | SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); |
5349 | if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) |
5350 | return getParser().Error(L: TargetRange.Start, |
5351 | Msg: (Twine(".amdgcn_target directive's target id " ) + |
5352 | Twine(TargetIDDirective) + |
5353 | Twine(" does not match the specified target id " ) + |
5354 | Twine(getTargetStreamer().getTargetID()->toString())).str()); |
5355 | |
5356 | return false; |
5357 | } |
5358 | |
5359 | bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { |
5360 | return Error(L: Range.Start, Msg: "value out of range" , Range); |
5361 | } |
5362 | |
5363 | bool AMDGPUAsmParser::calculateGPRBlocks( |
5364 | const FeatureBitset &Features, const MCExpr *VCCUsed, |
5365 | const MCExpr *FlatScrUsed, bool XNACKUsed, |
5366 | std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR, |
5367 | SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange, |
5368 | const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) { |
5369 | // TODO(scott.linder): These calculations are duplicated from |
5370 | // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. |
5371 | IsaVersion Version = getIsaVersion(GPU: getSTI().getCPU()); |
5372 | MCContext &Ctx = getContext(); |
5373 | |
5374 | const MCExpr *NumSGPRs = NextFreeSGPR; |
5375 | int64_t EvaluatedSGPRs; |
5376 | |
5377 | if (Version.Major >= 10) |
5378 | NumSGPRs = MCConstantExpr::create(Value: 0, Ctx); |
5379 | else { |
5380 | unsigned MaxAddressableNumSGPRs = |
5381 | IsaInfo::getAddressableNumSGPRs(STI: &getSTI()); |
5382 | |
5383 | if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) && Version.Major >= 8 && |
5384 | !Features.test(I: FeatureSGPRInitBug) && |
5385 | static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs) |
5386 | return OutOfRangeError(Range: SGPRRange); |
5387 | |
5388 | const MCExpr * = |
5389 | AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx); |
5390 | NumSGPRs = MCBinaryExpr::createAdd(LHS: NumSGPRs, RHS: ExtraSGPRs, Ctx); |
5391 | |
5392 | if (NumSGPRs->evaluateAsAbsolute(Res&: EvaluatedSGPRs) && |
5393 | (Version.Major <= 7 || Features.test(I: FeatureSGPRInitBug)) && |
5394 | static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs) |
5395 | return OutOfRangeError(Range: SGPRRange); |
5396 | |
5397 | if (Features.test(I: FeatureSGPRInitBug)) |
5398 | NumSGPRs = |
5399 | MCConstantExpr::create(Value: IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx); |
5400 | } |
5401 | |
5402 | // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks: |
5403 | // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1 |
5404 | auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR, |
5405 | unsigned Granule) -> const MCExpr * { |
5406 | const MCExpr *OneConst = MCConstantExpr::create(Value: 1ul, Ctx); |
5407 | const MCExpr *GranuleConst = MCConstantExpr::create(Value: Granule, Ctx); |
5408 | const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax(Args: {NumGPR, OneConst}, Ctx); |
5409 | const MCExpr *AlignToGPR = |
5410 | AMDGPUMCExpr::createAlignTo(Value: MaxNumGPR, Align: GranuleConst, Ctx); |
5411 | const MCExpr *DivGPR = |
5412 | MCBinaryExpr::createDiv(LHS: AlignToGPR, RHS: GranuleConst, Ctx); |
5413 | const MCExpr *SubGPR = MCBinaryExpr::createSub(LHS: DivGPR, RHS: OneConst, Ctx); |
5414 | return SubGPR; |
5415 | }; |
5416 | |
5417 | VGPRBlocks = GetNumGPRBlocks( |
5418 | NextFreeVGPR, |
5419 | IsaInfo::getVGPREncodingGranule(STI: &getSTI(), EnableWavefrontSize32)); |
5420 | SGPRBlocks = |
5421 | GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(STI: &getSTI())); |
5422 | |
5423 | return false; |
5424 | } |
5425 | |
5426 | bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { |
5427 | if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) |
5428 | return TokError(Msg: "directive only supported for amdgcn architecture" ); |
5429 | |
5430 | if (!isHsaAbi(STI: getSTI())) |
5431 | return TokError(Msg: "directive only supported for amdhsa OS" ); |
5432 | |
5433 | StringRef KernelName; |
5434 | if (getParser().parseIdentifier(Res&: KernelName)) |
5435 | return true; |
5436 | |
5437 | AMDGPU::MCKernelDescriptor KD = |
5438 | AMDGPU::MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor( |
5439 | STI: &getSTI(), Ctx&: getContext()); |
5440 | |
5441 | StringSet<> Seen; |
5442 | |
5443 | IsaVersion IVersion = getIsaVersion(GPU: getSTI().getCPU()); |
5444 | |
5445 | const MCExpr *ZeroExpr = MCConstantExpr::create(Value: 0, Ctx&: getContext()); |
5446 | const MCExpr *OneExpr = MCConstantExpr::create(Value: 1, Ctx&: getContext()); |
5447 | |
5448 | SMRange VGPRRange; |
5449 | const MCExpr *NextFreeVGPR = ZeroExpr; |
5450 | const MCExpr *AccumOffset = MCConstantExpr::create(Value: 0, Ctx&: getContext()); |
5451 | uint64_t SharedVGPRCount = 0; |
5452 | uint64_t PreloadLength = 0; |
5453 | uint64_t PreloadOffset = 0; |
5454 | SMRange SGPRRange; |
5455 | const MCExpr *NextFreeSGPR = ZeroExpr; |
5456 | |
5457 | // Count the number of user SGPRs implied from the enabled feature bits. |
5458 | unsigned ImpliedUserSGPRCount = 0; |
5459 | |
5460 | // Track if the asm explicitly contains the directive for the user SGPR |
5461 | // count. |
5462 | std::optional<unsigned> ExplicitUserSGPRCount; |
5463 | const MCExpr *ReserveVCC = OneExpr; |
5464 | const MCExpr *ReserveFlatScr = OneExpr; |
5465 | std::optional<bool> EnableWavefrontSize32; |
5466 | |
5467 | while (true) { |
5468 | while (trySkipToken(Kind: AsmToken::EndOfStatement)); |
5469 | |
5470 | StringRef ID; |
5471 | SMRange IDRange = getTok().getLocRange(); |
5472 | if (!parseId(Val&: ID, ErrMsg: "expected .amdhsa_ directive or .end_amdhsa_kernel" )) |
5473 | return true; |
5474 | |
5475 | if (ID == ".end_amdhsa_kernel" ) |
5476 | break; |
5477 | |
5478 | if (!Seen.insert(key: ID).second) |
5479 | return TokError(Msg: ".amdhsa_ directives cannot be repeated" ); |
5480 | |
5481 | SMLoc ValStart = getLoc(); |
5482 | const MCExpr *ExprVal; |
5483 | if (getParser().parseExpression(Res&: ExprVal)) |
5484 | return true; |
5485 | SMLoc ValEnd = getLoc(); |
5486 | SMRange ValRange = SMRange(ValStart, ValEnd); |
5487 | |
5488 | int64_t IVal = 0; |
5489 | uint64_t Val = IVal; |
5490 | bool EvaluatableExpr; |
5491 | if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(Res&: IVal))) { |
5492 | if (IVal < 0) |
5493 | return OutOfRangeError(Range: ValRange); |
5494 | Val = IVal; |
5495 | } |
5496 | |
5497 | #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ |
5498 | if (!isUInt<ENTRY##_WIDTH>(Val)) \ |
5499 | return OutOfRangeError(RANGE); \ |
5500 | AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \ |
5501 | getContext()); |
5502 | |
5503 | // Some fields use the parsed value immediately which requires the expression to |
5504 | // be solvable. |
5505 | #define EXPR_RESOLVE_OR_ERROR(RESOLVED) \ |
5506 | if (!(RESOLVED)) \ |
5507 | return Error(IDRange.Start, "directive should have resolvable expression", \ |
5508 | IDRange); |
5509 | |
5510 | if (ID == ".amdhsa_group_segment_fixed_size" ) { |
5511 | if (!isUInt<sizeof(kernel_descriptor_t::group_segment_fixed_size) * |
5512 | CHAR_BIT>(x: Val)) |
5513 | return OutOfRangeError(Range: ValRange); |
5514 | KD.group_segment_fixed_size = ExprVal; |
5515 | } else if (ID == ".amdhsa_private_segment_fixed_size" ) { |
5516 | if (!isUInt<sizeof(kernel_descriptor_t::private_segment_fixed_size) * |
5517 | CHAR_BIT>(x: Val)) |
5518 | return OutOfRangeError(Range: ValRange); |
5519 | KD.private_segment_fixed_size = ExprVal; |
5520 | } else if (ID == ".amdhsa_kernarg_size" ) { |
5521 | if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(x: Val)) |
5522 | return OutOfRangeError(Range: ValRange); |
5523 | KD.kernarg_size = ExprVal; |
5524 | } else if (ID == ".amdhsa_user_sgpr_count" ) { |
5525 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5526 | ExplicitUserSGPRCount = Val; |
5527 | } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer" ) { |
5528 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5529 | if (hasArchitectedFlatScratch()) |
5530 | return Error(L: IDRange.Start, |
5531 | Msg: "directive is not supported with architected flat scratch" , |
5532 | Range: IDRange); |
5533 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5534 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, |
5535 | ExprVal, ValRange); |
5536 | if (Val) |
5537 | ImpliedUserSGPRCount += 4; |
5538 | } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length" ) { |
5539 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5540 | if (!hasKernargPreload()) |
5541 | return Error(L: IDRange.Start, Msg: "directive requires gfx90a+" , Range: IDRange); |
5542 | |
5543 | if (Val > getMaxNumUserSGPRs()) |
5544 | return OutOfRangeError(Range: ValRange); |
5545 | PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal, |
5546 | ValRange); |
5547 | if (Val) { |
5548 | ImpliedUserSGPRCount += Val; |
5549 | PreloadLength = Val; |
5550 | } |
5551 | } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset" ) { |
5552 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5553 | if (!hasKernargPreload()) |
5554 | return Error(L: IDRange.Start, Msg: "directive requires gfx90a+" , Range: IDRange); |
5555 | |
5556 | if (Val >= 1024) |
5557 | return OutOfRangeError(Range: ValRange); |
5558 | PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal, |
5559 | ValRange); |
5560 | if (Val) |
5561 | PreloadOffset = Val; |
5562 | } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr" ) { |
5563 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5564 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5565 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal, |
5566 | ValRange); |
5567 | if (Val) |
5568 | ImpliedUserSGPRCount += 2; |
5569 | } else if (ID == ".amdhsa_user_sgpr_queue_ptr" ) { |
5570 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5571 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5572 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal, |
5573 | ValRange); |
5574 | if (Val) |
5575 | ImpliedUserSGPRCount += 2; |
5576 | } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr" ) { |
5577 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5578 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5579 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, |
5580 | ExprVal, ValRange); |
5581 | if (Val) |
5582 | ImpliedUserSGPRCount += 2; |
5583 | } else if (ID == ".amdhsa_user_sgpr_dispatch_id" ) { |
5584 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5585 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5586 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal, |
5587 | ValRange); |
5588 | if (Val) |
5589 | ImpliedUserSGPRCount += 2; |
5590 | } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init" ) { |
5591 | if (hasArchitectedFlatScratch()) |
5592 | return Error(L: IDRange.Start, |
5593 | Msg: "directive is not supported with architected flat scratch" , |
5594 | Range: IDRange); |
5595 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5596 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5597 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, |
5598 | ExprVal, ValRange); |
5599 | if (Val) |
5600 | ImpliedUserSGPRCount += 2; |
5601 | } else if (ID == ".amdhsa_user_sgpr_private_segment_size" ) { |
5602 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5603 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5604 | KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, |
5605 | ExprVal, ValRange); |
5606 | if (Val) |
5607 | ImpliedUserSGPRCount += 1; |
5608 | } else if (ID == ".amdhsa_wavefront_size32" ) { |
5609 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5610 | if (IVersion.Major < 10) |
5611 | return Error(L: IDRange.Start, Msg: "directive requires gfx10+" , Range: IDRange); |
5612 | EnableWavefrontSize32 = Val; |
5613 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5614 | KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal, |
5615 | ValRange); |
5616 | } else if (ID == ".amdhsa_uses_dynamic_stack" ) { |
5617 | PARSE_BITS_ENTRY(KD.kernel_code_properties, |
5618 | KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal, |
5619 | ValRange); |
5620 | } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset" ) { |
5621 | if (hasArchitectedFlatScratch()) |
5622 | return Error(L: IDRange.Start, |
5623 | Msg: "directive is not supported with architected flat scratch" , |
5624 | Range: IDRange); |
5625 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5626 | COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, |
5627 | ValRange); |
5628 | } else if (ID == ".amdhsa_enable_private_segment" ) { |
5629 | if (!hasArchitectedFlatScratch()) |
5630 | return Error( |
5631 | L: IDRange.Start, |
5632 | Msg: "directive is not supported without architected flat scratch" , |
5633 | Range: IDRange); |
5634 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5635 | COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal, |
5636 | ValRange); |
5637 | } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x" ) { |
5638 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5639 | COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal, |
5640 | ValRange); |
5641 | } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y" ) { |
5642 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5643 | COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal, |
5644 | ValRange); |
5645 | } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z" ) { |
5646 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5647 | COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal, |
5648 | ValRange); |
5649 | } else if (ID == ".amdhsa_system_sgpr_workgroup_info" ) { |
5650 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5651 | COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal, |
5652 | ValRange); |
5653 | } else if (ID == ".amdhsa_system_vgpr_workitem_id" ) { |
5654 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5655 | COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal, |
5656 | ValRange); |
5657 | } else if (ID == ".amdhsa_next_free_vgpr" ) { |
5658 | VGPRRange = ValRange; |
5659 | NextFreeVGPR = ExprVal; |
5660 | } else if (ID == ".amdhsa_next_free_sgpr" ) { |
5661 | SGPRRange = ValRange; |
5662 | NextFreeSGPR = ExprVal; |
5663 | } else if (ID == ".amdhsa_accum_offset" ) { |
5664 | if (!isGFX90A()) |
5665 | return Error(L: IDRange.Start, Msg: "directive requires gfx90a+" , Range: IDRange); |
5666 | AccumOffset = ExprVal; |
5667 | } else if (ID == ".amdhsa_reserve_vcc" ) { |
5668 | if (EvaluatableExpr && !isUInt<1>(x: Val)) |
5669 | return OutOfRangeError(Range: ValRange); |
5670 | ReserveVCC = ExprVal; |
5671 | } else if (ID == ".amdhsa_reserve_flat_scratch" ) { |
5672 | if (IVersion.Major < 7) |
5673 | return Error(L: IDRange.Start, Msg: "directive requires gfx7+" , Range: IDRange); |
5674 | if (hasArchitectedFlatScratch()) |
5675 | return Error(L: IDRange.Start, |
5676 | Msg: "directive is not supported with architected flat scratch" , |
5677 | Range: IDRange); |
5678 | if (EvaluatableExpr && !isUInt<1>(x: Val)) |
5679 | return OutOfRangeError(Range: ValRange); |
5680 | ReserveFlatScr = ExprVal; |
5681 | } else if (ID == ".amdhsa_reserve_xnack_mask" ) { |
5682 | if (IVersion.Major < 8) |
5683 | return Error(L: IDRange.Start, Msg: "directive requires gfx8+" , Range: IDRange); |
5684 | if (!isUInt<1>(x: Val)) |
5685 | return OutOfRangeError(Range: ValRange); |
5686 | if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) |
5687 | return getParser().Error(L: IDRange.Start, Msg: ".amdhsa_reserve_xnack_mask does not match target id" , |
5688 | Range: IDRange); |
5689 | } else if (ID == ".amdhsa_float_round_mode_32" ) { |
5690 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5691 | COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal, |
5692 | ValRange); |
5693 | } else if (ID == ".amdhsa_float_round_mode_16_64" ) { |
5694 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5695 | COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal, |
5696 | ValRange); |
5697 | } else if (ID == ".amdhsa_float_denorm_mode_32" ) { |
5698 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5699 | COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal, |
5700 | ValRange); |
5701 | } else if (ID == ".amdhsa_float_denorm_mode_16_64" ) { |
5702 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5703 | COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal, |
5704 | ValRange); |
5705 | } else if (ID == ".amdhsa_dx10_clamp" ) { |
5706 | if (IVersion.Major >= 12) |
5707 | return Error(L: IDRange.Start, Msg: "directive unsupported on gfx12+" , Range: IDRange); |
5708 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5709 | COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal, |
5710 | ValRange); |
5711 | } else if (ID == ".amdhsa_ieee_mode" ) { |
5712 | if (IVersion.Major >= 12) |
5713 | return Error(L: IDRange.Start, Msg: "directive unsupported on gfx12+" , Range: IDRange); |
5714 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5715 | COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal, |
5716 | ValRange); |
5717 | } else if (ID == ".amdhsa_fp16_overflow" ) { |
5718 | if (IVersion.Major < 9) |
5719 | return Error(L: IDRange.Start, Msg: "directive requires gfx9+" , Range: IDRange); |
5720 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5721 | COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal, |
5722 | ValRange); |
5723 | } else if (ID == ".amdhsa_tg_split" ) { |
5724 | if (!isGFX90A()) |
5725 | return Error(L: IDRange.Start, Msg: "directive requires gfx90a+" , Range: IDRange); |
5726 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, |
5727 | ExprVal, ValRange); |
5728 | } else if (ID == ".amdhsa_workgroup_processor_mode" ) { |
5729 | if (IVersion.Major < 10) |
5730 | return Error(L: IDRange.Start, Msg: "directive requires gfx10+" , Range: IDRange); |
5731 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5732 | COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal, |
5733 | ValRange); |
5734 | } else if (ID == ".amdhsa_memory_ordered" ) { |
5735 | if (IVersion.Major < 10) |
5736 | return Error(L: IDRange.Start, Msg: "directive requires gfx10+" , Range: IDRange); |
5737 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5738 | COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal, |
5739 | ValRange); |
5740 | } else if (ID == ".amdhsa_forward_progress" ) { |
5741 | if (IVersion.Major < 10) |
5742 | return Error(L: IDRange.Start, Msg: "directive requires gfx10+" , Range: IDRange); |
5743 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5744 | COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal, |
5745 | ValRange); |
5746 | } else if (ID == ".amdhsa_shared_vgpr_count" ) { |
5747 | EXPR_RESOLVE_OR_ERROR(EvaluatableExpr); |
5748 | if (IVersion.Major < 10 || IVersion.Major >= 12) |
5749 | return Error(L: IDRange.Start, Msg: "directive requires gfx10 or gfx11" , |
5750 | Range: IDRange); |
5751 | SharedVGPRCount = Val; |
5752 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, |
5753 | COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal, |
5754 | ValRange); |
5755 | } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op" ) { |
5756 | PARSE_BITS_ENTRY( |
5757 | KD.compute_pgm_rsrc2, |
5758 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, |
5759 | ExprVal, ValRange); |
5760 | } else if (ID == ".amdhsa_exception_fp_denorm_src" ) { |
5761 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5762 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, |
5763 | ExprVal, ValRange); |
5764 | } else if (ID == ".amdhsa_exception_fp_ieee_div_zero" ) { |
5765 | PARSE_BITS_ENTRY( |
5766 | KD.compute_pgm_rsrc2, |
5767 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, |
5768 | ExprVal, ValRange); |
5769 | } else if (ID == ".amdhsa_exception_fp_ieee_overflow" ) { |
5770 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5771 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, |
5772 | ExprVal, ValRange); |
5773 | } else if (ID == ".amdhsa_exception_fp_ieee_underflow" ) { |
5774 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5775 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, |
5776 | ExprVal, ValRange); |
5777 | } else if (ID == ".amdhsa_exception_fp_ieee_inexact" ) { |
5778 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5779 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, |
5780 | ExprVal, ValRange); |
5781 | } else if (ID == ".amdhsa_exception_int_div_zero" ) { |
5782 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, |
5783 | COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, |
5784 | ExprVal, ValRange); |
5785 | } else if (ID == ".amdhsa_round_robin_scheduling" ) { |
5786 | if (IVersion.Major < 12) |
5787 | return Error(L: IDRange.Start, Msg: "directive requires gfx12+" , Range: IDRange); |
5788 | PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, |
5789 | COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal, |
5790 | ValRange); |
5791 | } else { |
5792 | return Error(L: IDRange.Start, Msg: "unknown .amdhsa_kernel directive" , Range: IDRange); |
5793 | } |
5794 | |
5795 | #undef PARSE_BITS_ENTRY |
5796 | } |
5797 | |
5798 | if (!Seen.contains(key: ".amdhsa_next_free_vgpr" )) |
5799 | return TokError(Msg: ".amdhsa_next_free_vgpr directive is required" ); |
5800 | |
5801 | if (!Seen.contains(key: ".amdhsa_next_free_sgpr" )) |
5802 | return TokError(Msg: ".amdhsa_next_free_sgpr directive is required" ); |
5803 | |
5804 | const MCExpr *VGPRBlocks; |
5805 | const MCExpr *SGPRBlocks; |
5806 | if (calculateGPRBlocks(Features: getFeatureBits(), VCCUsed: ReserveVCC, FlatScrUsed: ReserveFlatScr, |
5807 | XNACKUsed: getTargetStreamer().getTargetID()->isXnackOnOrAny(), |
5808 | EnableWavefrontSize32, NextFreeVGPR, |
5809 | VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, |
5810 | SGPRBlocks)) |
5811 | return true; |
5812 | |
5813 | int64_t EvaluatedVGPRBlocks; |
5814 | bool VGPRBlocksEvaluatable = |
5815 | VGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedVGPRBlocks); |
5816 | if (VGPRBlocksEvaluatable && |
5817 | !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( |
5818 | x: static_cast<uint64_t>(EvaluatedVGPRBlocks))) { |
5819 | return OutOfRangeError(Range: VGPRRange); |
5820 | } |
5821 | AMDGPU::MCKernelDescriptor::bits_set( |
5822 | Dst&: KD.compute_pgm_rsrc1, Value: VGPRBlocks, |
5823 | Shift: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT, |
5824 | Mask: COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, Ctx&: getContext()); |
5825 | |
5826 | int64_t EvaluatedSGPRBlocks; |
5827 | if (SGPRBlocks->evaluateAsAbsolute(Res&: EvaluatedSGPRBlocks) && |
5828 | !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( |
5829 | x: static_cast<uint64_t>(EvaluatedSGPRBlocks))) |
5830 | return OutOfRangeError(Range: SGPRRange); |
5831 | AMDGPU::MCKernelDescriptor::bits_set( |
5832 | Dst&: KD.compute_pgm_rsrc1, Value: SGPRBlocks, |
5833 | Shift: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT, |
5834 | Mask: COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, Ctx&: getContext()); |
5835 | |
5836 | if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) |
5837 | return TokError(Msg: "amdgpu_user_sgpr_count smaller than than implied by " |
5838 | "enabled user SGPRs" ); |
5839 | |
5840 | unsigned UserSGPRCount = |
5841 | ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; |
5842 | |
5843 | if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(x: UserSGPRCount)) |
5844 | return TokError(Msg: "too many user SGPRs enabled" ); |
5845 | AMDGPU::MCKernelDescriptor::bits_set( |
5846 | Dst&: KD.compute_pgm_rsrc2, Value: MCConstantExpr::create(Value: UserSGPRCount, Ctx&: getContext()), |
5847 | Shift: COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT, |
5848 | Mask: COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, Ctx&: getContext()); |
5849 | |
5850 | int64_t IVal = 0; |
5851 | if (!KD.kernarg_size->evaluateAsAbsolute(Res&: IVal)) |
5852 | return TokError(Msg: "Kernarg size should be resolvable" ); |
5853 | uint64_t kernarg_size = IVal; |
5854 | if (PreloadLength && kernarg_size && |
5855 | (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size)) |
5856 | return TokError(Msg: "Kernarg preload length + offset is larger than the " |
5857 | "kernarg segment size" ); |
5858 | |
5859 | if (isGFX90A()) { |
5860 | if (!Seen.contains(key: ".amdhsa_accum_offset" )) |
5861 | return TokError(Msg: ".amdhsa_accum_offset directive is required" ); |
5862 | int64_t EvaluatedAccum; |
5863 | bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(Res&: EvaluatedAccum); |
5864 | uint64_t UEvaluatedAccum = EvaluatedAccum; |
5865 | if (AccumEvaluatable && |
5866 | (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3))) |
5867 | return TokError(Msg: "accum_offset should be in range [4..256] in " |
5868 | "increments of 4" ); |
5869 | |
5870 | int64_t EvaluatedNumVGPR; |
5871 | if (NextFreeVGPR->evaluateAsAbsolute(Res&: EvaluatedNumVGPR) && |
5872 | AccumEvaluatable && |
5873 | UEvaluatedAccum > |
5874 | alignTo(Value: std::max(a: (uint64_t)1, b: (uint64_t)EvaluatedNumVGPR), Align: 4)) |
5875 | return TokError(Msg: "accum_offset exceeds total VGPR allocation" ); |
5876 | const MCExpr *AdjustedAccum = MCBinaryExpr::createSub( |
5877 | LHS: MCBinaryExpr::createDiv( |
5878 | LHS: AccumOffset, RHS: MCConstantExpr::create(Value: 4, Ctx&: getContext()), Ctx&: getContext()), |
5879 | RHS: MCConstantExpr::create(Value: 1, Ctx&: getContext()), Ctx&: getContext()); |
5880 | MCKernelDescriptor::bits_set(Dst&: KD.compute_pgm_rsrc3, Value: AdjustedAccum, |
5881 | Shift: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, |
5882 | Mask: COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, |
5883 | Ctx&: getContext()); |
5884 | } |
5885 | |
5886 | if (IVersion.Major >= 10 && IVersion.Major < 12) { |
5887 | // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS |
5888 | if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { |
5889 | return TokError(Msg: "shared_vgpr_count directive not valid on " |
5890 | "wavefront size 32" ); |
5891 | } |
5892 | |
5893 | if (VGPRBlocksEvaluatable && |
5894 | (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) > |
5895 | 63)) { |
5896 | return TokError(Msg: "shared_vgpr_count*2 + " |
5897 | "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " |
5898 | "exceed 63\n" ); |
5899 | } |
5900 | } |
5901 | |
5902 | getTargetStreamer().EmitAmdhsaKernelDescriptor(STI: getSTI(), KernelName, KernelDescriptor: KD, |
5903 | NextVGPR: NextFreeVGPR, NextSGPR: NextFreeSGPR, |
5904 | ReserveVCC, ReserveFlatScr); |
5905 | return false; |
5906 | } |
5907 | |
5908 | bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() { |
5909 | uint32_t Version; |
5910 | if (ParseAsAbsoluteExpression(Ret&: Version)) |
5911 | return true; |
5912 | |
5913 | getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(COV: Version); |
5914 | return false; |
5915 | } |
5916 | |
5917 | bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, |
5918 | AMDGPUMCKernelCodeT &C) { |
5919 | // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing |
5920 | // assembly for backwards compatibility. |
5921 | if (ID == "max_scratch_backing_memory_byte_size" ) { |
5922 | Parser.eatToEndOfStatement(); |
5923 | return false; |
5924 | } |
5925 | |
5926 | SmallString<40> ErrStr; |
5927 | raw_svector_ostream Err(ErrStr); |
5928 | if (!C.ParseKernelCodeT(ID, MCParser&: getParser(), Err)) { |
5929 | return TokError(Msg: Err.str()); |
5930 | } |
5931 | Lex(); |
5932 | |
5933 | if (ID == "enable_wavefront_size32" ) { |
5934 | if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { |
5935 | if (!isGFX10Plus()) |
5936 | return TokError(Msg: "enable_wavefront_size32=1 is only allowed on GFX10+" ); |
5937 | if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) |
5938 | return TokError(Msg: "enable_wavefront_size32=1 requires +WavefrontSize32" ); |
5939 | } else { |
5940 | if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) |
5941 | return TokError(Msg: "enable_wavefront_size32=0 requires +WavefrontSize64" ); |
5942 | } |
5943 | } |
5944 | |
5945 | if (ID == "wavefront_size" ) { |
5946 | if (C.wavefront_size == 5) { |
5947 | if (!isGFX10Plus()) |
5948 | return TokError(Msg: "wavefront_size=5 is only allowed on GFX10+" ); |
5949 | if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) |
5950 | return TokError(Msg: "wavefront_size=5 requires +WavefrontSize32" ); |
5951 | } else if (C.wavefront_size == 6) { |
5952 | if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) |
5953 | return TokError(Msg: "wavefront_size=6 requires +WavefrontSize64" ); |
5954 | } |
5955 | } |
5956 | |
5957 | return false; |
5958 | } |
5959 | |
5960 | bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { |
5961 | AMDGPUMCKernelCodeT KernelCode; |
5962 | KernelCode.initDefault(STI: &getSTI(), Ctx&: getContext()); |
5963 | |
5964 | while (true) { |
5965 | // Lex EndOfStatement. This is in a while loop, because lexing a comment |
5966 | // will set the current token to EndOfStatement. |
5967 | while(trySkipToken(Kind: AsmToken::EndOfStatement)); |
5968 | |
5969 | StringRef ID; |
5970 | if (!parseId(Val&: ID, ErrMsg: "expected value identifier or .end_amd_kernel_code_t" )) |
5971 | return true; |
5972 | |
5973 | if (ID == ".end_amd_kernel_code_t" ) |
5974 | break; |
5975 | |
5976 | if (ParseAMDKernelCodeTValue(ID, C&: KernelCode)) |
5977 | return true; |
5978 | } |
5979 | |
5980 | KernelCode.validate(STI: &getSTI(), Ctx&: getContext()); |
5981 | getTargetStreamer().EmitAMDKernelCodeT(Header&: KernelCode); |
5982 | |
5983 | return false; |
5984 | } |
5985 | |
5986 | bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { |
5987 | StringRef KernelName; |
5988 | if (!parseId(Val&: KernelName, ErrMsg: "expected symbol name" )) |
5989 | return true; |
5990 | |
5991 | getTargetStreamer().EmitAMDGPUSymbolType(SymbolName: KernelName, |
5992 | Type: ELF::STT_AMDGPU_HSA_KERNEL); |
5993 | |
5994 | KernelScope.initialize(Context&: getContext()); |
5995 | return false; |
5996 | } |
5997 | |
5998 | bool AMDGPUAsmParser::ParseDirectiveISAVersion() { |
5999 | if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { |
6000 | return Error(L: getLoc(), |
6001 | Msg: ".amd_amdgpu_isa directive is not available on non-amdgcn " |
6002 | "architectures" ); |
6003 | } |
6004 | |
6005 | auto TargetIDDirective = getLexer().getTok().getStringContents(); |
6006 | if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) |
6007 | return Error(L: getParser().getTok().getLoc(), Msg: "target id must match options" ); |
6008 | |
6009 | getTargetStreamer().EmitISAVersion(); |
6010 | Lex(); |
6011 | |
6012 | return false; |
6013 | } |
6014 | |
6015 | bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { |
6016 | assert(isHsaAbi(getSTI())); |
6017 | |
6018 | std::string HSAMetadataString; |
6019 | if (ParseToEndDirective(AssemblerDirectiveBegin: HSAMD::V3::AssemblerDirectiveBegin, |
6020 | AssemblerDirectiveEnd: HSAMD::V3::AssemblerDirectiveEnd, CollectString&: HSAMetadataString)) |
6021 | return true; |
6022 | |
6023 | if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) |
6024 | return Error(L: getLoc(), Msg: "invalid HSA metadata" ); |
6025 | |
6026 | return false; |
6027 | } |
6028 | |
6029 | /// Common code to parse out a block of text (typically YAML) between start and |
6030 | /// end directives. |
6031 | bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, |
6032 | const char *AssemblerDirectiveEnd, |
6033 | std::string &CollectString) { |
6034 | |
6035 | raw_string_ostream CollectStream(CollectString); |
6036 | |
6037 | getLexer().setSkipSpace(false); |
6038 | |
6039 | bool FoundEnd = false; |
6040 | while (!isToken(Kind: AsmToken::Eof)) { |
6041 | while (isToken(Kind: AsmToken::Space)) { |
6042 | CollectStream << getTokenStr(); |
6043 | Lex(); |
6044 | } |
6045 | |
6046 | if (trySkipId(Id: AssemblerDirectiveEnd)) { |
6047 | FoundEnd = true; |
6048 | break; |
6049 | } |
6050 | |
6051 | CollectStream << Parser.parseStringToEndOfStatement() |
6052 | << getContext().getAsmInfo()->getSeparatorString(); |
6053 | |
6054 | Parser.eatToEndOfStatement(); |
6055 | } |
6056 | |
6057 | getLexer().setSkipSpace(true); |
6058 | |
6059 | if (isToken(Kind: AsmToken::Eof) && !FoundEnd) { |
6060 | return TokError(Msg: Twine("expected directive " ) + |
6061 | Twine(AssemblerDirectiveEnd) + Twine(" not found" )); |
6062 | } |
6063 | |
6064 | CollectStream.flush(); |
6065 | return false; |
6066 | } |
6067 | |
6068 | /// Parse the assembler directive for new MsgPack-format PAL metadata. |
6069 | bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { |
6070 | std::string String; |
6071 | if (ParseToEndDirective(AssemblerDirectiveBegin: AMDGPU::PALMD::AssemblerDirectiveBegin, |
6072 | AssemblerDirectiveEnd: AMDGPU::PALMD::AssemblerDirectiveEnd, CollectString&: String)) |
6073 | return true; |
6074 | |
6075 | auto PALMetadata = getTargetStreamer().getPALMetadata(); |
6076 | if (!PALMetadata->setFromString(String)) |
6077 | return Error(L: getLoc(), Msg: "invalid PAL metadata" ); |
6078 | return false; |
6079 | } |
6080 | |
6081 | /// Parse the assembler directive for old linear-format PAL metadata. |
6082 | bool AMDGPUAsmParser::ParseDirectivePALMetadata() { |
6083 | if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { |
6084 | return Error(L: getLoc(), |
6085 | Msg: (Twine(PALMD::AssemblerDirective) + Twine(" directive is " |
6086 | "not available on non-amdpal OSes" )).str()); |
6087 | } |
6088 | |
6089 | auto PALMetadata = getTargetStreamer().getPALMetadata(); |
6090 | PALMetadata->setLegacy(); |
6091 | for (;;) { |
6092 | uint32_t Key, Value; |
6093 | if (ParseAsAbsoluteExpression(Ret&: Key)) { |
6094 | return TokError(Msg: Twine("invalid value in " ) + |
6095 | Twine(PALMD::AssemblerDirective)); |
6096 | } |
6097 | if (!trySkipToken(Kind: AsmToken::Comma)) { |
6098 | return TokError(Msg: Twine("expected an even number of values in " ) + |
6099 | Twine(PALMD::AssemblerDirective)); |
6100 | } |
6101 | if (ParseAsAbsoluteExpression(Ret&: Value)) { |
6102 | return TokError(Msg: Twine("invalid value in " ) + |
6103 | Twine(PALMD::AssemblerDirective)); |
6104 | } |
6105 | PALMetadata->setRegister(Reg: Key, Val: Value); |
6106 | if (!trySkipToken(Kind: AsmToken::Comma)) |
6107 | break; |
6108 | } |
6109 | return false; |
6110 | } |
6111 | |
6112 | /// ParseDirectiveAMDGPULDS |
6113 | /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] |
6114 | bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { |
6115 | if (getParser().checkForValidSection()) |
6116 | return true; |
6117 | |
6118 | StringRef Name; |
6119 | SMLoc NameLoc = getLoc(); |
6120 | if (getParser().parseIdentifier(Res&: Name)) |
6121 | return TokError(Msg: "expected identifier in directive" ); |
6122 | |
6123 | MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); |
6124 | if (getParser().parseComma()) |
6125 | return true; |
6126 | |
6127 | unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(STI: &getSTI()); |
6128 | |
6129 | int64_t Size; |
6130 | SMLoc SizeLoc = getLoc(); |
6131 | if (getParser().parseAbsoluteExpression(Res&: Size)) |
6132 | return true; |
6133 | if (Size < 0) |
6134 | return Error(L: SizeLoc, Msg: "size must be non-negative" ); |
6135 | if (Size > LocalMemorySize) |
6136 | return Error(L: SizeLoc, Msg: "size is too large" ); |
6137 | |
6138 | int64_t Alignment = 4; |
6139 | if (trySkipToken(Kind: AsmToken::Comma)) { |
6140 | SMLoc AlignLoc = getLoc(); |
6141 | if (getParser().parseAbsoluteExpression(Res&: Alignment)) |
6142 | return true; |
6143 | if (Alignment < 0 || !isPowerOf2_64(Value: Alignment)) |
6144 | return Error(L: AlignLoc, Msg: "alignment must be a power of two" ); |
6145 | |
6146 | // Alignment larger than the size of LDS is possible in theory, as long |
6147 | // as the linker manages to place to symbol at address 0, but we do want |
6148 | // to make sure the alignment fits nicely into a 32-bit integer. |
6149 | if (Alignment >= 1u << 31) |
6150 | return Error(L: AlignLoc, Msg: "alignment is too large" ); |
6151 | } |
6152 | |
6153 | if (parseEOL()) |
6154 | return true; |
6155 | |
6156 | Symbol->redefineIfPossible(); |
6157 | if (!Symbol->isUndefined()) |
6158 | return Error(L: NameLoc, Msg: "invalid symbol redefinition" ); |
6159 | |
6160 | getTargetStreamer().emitAMDGPULDS(Symbol, Size, Alignment: Align(Alignment)); |
6161 | return false; |
6162 | } |
6163 | |
6164 | bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { |
6165 | StringRef IDVal = DirectiveID.getString(); |
6166 | |
6167 | if (isHsaAbi(STI: getSTI())) { |
6168 | if (IDVal == ".amdhsa_kernel" ) |
6169 | return ParseDirectiveAMDHSAKernel(); |
6170 | |
6171 | if (IDVal == ".amdhsa_code_object_version" ) |
6172 | return ParseDirectiveAMDHSACodeObjectVersion(); |
6173 | |
6174 | // TODO: Restructure/combine with PAL metadata directive. |
6175 | if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) |
6176 | return ParseDirectiveHSAMetadata(); |
6177 | } else { |
6178 | if (IDVal == ".amd_kernel_code_t" ) |
6179 | return ParseDirectiveAMDKernelCodeT(); |
6180 | |
6181 | if (IDVal == ".amdgpu_hsa_kernel" ) |
6182 | return ParseDirectiveAMDGPUHsaKernel(); |
6183 | |
6184 | if (IDVal == ".amd_amdgpu_isa" ) |
6185 | return ParseDirectiveISAVersion(); |
6186 | |
6187 | if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) { |
6188 | return Error(L: getLoc(), Msg: (Twine(HSAMD::AssemblerDirectiveBegin) + |
6189 | Twine(" directive is " |
6190 | "not available on non-amdhsa OSes" )) |
6191 | .str()); |
6192 | } |
6193 | } |
6194 | |
6195 | if (IDVal == ".amdgcn_target" ) |
6196 | return ParseDirectiveAMDGCNTarget(); |
6197 | |
6198 | if (IDVal == ".amdgpu_lds" ) |
6199 | return ParseDirectiveAMDGPULDS(); |
6200 | |
6201 | if (IDVal == PALMD::AssemblerDirectiveBegin) |
6202 | return ParseDirectivePALMetadataBegin(); |
6203 | |
6204 | if (IDVal == PALMD::AssemblerDirective) |
6205 | return ParseDirectivePALMetadata(); |
6206 | |
6207 | return true; |
6208 | } |
6209 | |
6210 | bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, |
6211 | unsigned RegNo) { |
6212 | |
6213 | if (MRI.regsOverlap(RegA: AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegB: RegNo)) |
6214 | return isGFX9Plus(); |
6215 | |
6216 | // GFX10+ has 2 more SGPRs 104 and 105. |
6217 | if (MRI.regsOverlap(RegA: AMDGPU::SGPR104_SGPR105, RegB: RegNo)) |
6218 | return hasSGPR104_SGPR105(); |
6219 | |
6220 | switch (RegNo) { |
6221 | case AMDGPU::SRC_SHARED_BASE_LO: |
6222 | case AMDGPU::SRC_SHARED_BASE: |
6223 | case AMDGPU::SRC_SHARED_LIMIT_LO: |
6224 | case AMDGPU::SRC_SHARED_LIMIT: |
6225 | case AMDGPU::SRC_PRIVATE_BASE_LO: |
6226 | case AMDGPU::SRC_PRIVATE_BASE: |
6227 | case AMDGPU::SRC_PRIVATE_LIMIT_LO: |
6228 | case AMDGPU::SRC_PRIVATE_LIMIT: |
6229 | return isGFX9Plus(); |
6230 | case AMDGPU::SRC_POPS_EXITING_WAVE_ID: |
6231 | return isGFX9Plus() && !isGFX11Plus(); |
6232 | case AMDGPU::TBA: |
6233 | case AMDGPU::TBA_LO: |
6234 | case AMDGPU::TBA_HI: |
6235 | case AMDGPU::TMA: |
6236 | case AMDGPU::TMA_LO: |
6237 | case AMDGPU::TMA_HI: |
6238 | return !isGFX9Plus(); |
6239 | case AMDGPU::XNACK_MASK: |
6240 | case AMDGPU::XNACK_MASK_LO: |
6241 | case AMDGPU::XNACK_MASK_HI: |
6242 | return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); |
6243 | case AMDGPU::SGPR_NULL: |
6244 | return isGFX10Plus(); |
6245 | default: |
6246 | break; |
6247 | } |
6248 | |
6249 | if (isCI()) |
6250 | return true; |
6251 | |
6252 | if (isSI() || isGFX10Plus()) { |
6253 | // No flat_scr on SI. |
6254 | // On GFX10Plus flat scratch is not a valid register operand and can only be |
6255 | // accessed with s_setreg/s_getreg. |
6256 | switch (RegNo) { |
6257 | case AMDGPU::FLAT_SCR: |
6258 | case AMDGPU::FLAT_SCR_LO: |
6259 | case AMDGPU::FLAT_SCR_HI: |
6260 | return false; |
6261 | default: |
6262 | return true; |
6263 | } |
6264 | } |
6265 | |
6266 | // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that |
6267 | // SI/CI have. |
6268 | if (MRI.regsOverlap(RegA: AMDGPU::SGPR102_SGPR103, RegB: RegNo)) |
6269 | return hasSGPR102_SGPR103(); |
6270 | |
6271 | return true; |
6272 | } |
6273 | |
6274 | ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands, |
6275 | StringRef Mnemonic, |
6276 | OperandMode Mode) { |
6277 | ParseStatus Res = parseVOPD(Operands); |
6278 | if (Res.isSuccess() || Res.isFailure() || isToken(Kind: AsmToken::EndOfStatement)) |
6279 | return Res; |
6280 | |
6281 | // Try to parse with a custom parser |
6282 | Res = MatchOperandParserImpl(Operands, Mnemonic); |
6283 | |
6284 | // If we successfully parsed the operand or if there as an error parsing, |
6285 | // we are done. |
6286 | // |
6287 | // If we are parsing after we reach EndOfStatement then this means we |
6288 | // are appending default values to the Operands list. This is only done |
6289 | // by custom parser, so we shouldn't continue on to the generic parsing. |
6290 | if (Res.isSuccess() || Res.isFailure() || isToken(Kind: AsmToken::EndOfStatement)) |
6291 | return Res; |
6292 | |
6293 | SMLoc RBraceLoc; |
6294 | SMLoc LBraceLoc = getLoc(); |
6295 | if (Mode == OperandMode_NSA && trySkipToken(Kind: AsmToken::LBrac)) { |
6296 | unsigned Prefix = Operands.size(); |
6297 | |
6298 | for (;;) { |
6299 | auto Loc = getLoc(); |
6300 | Res = parseReg(Operands); |
6301 | if (Res.isNoMatch()) |
6302 | Error(L: Loc, Msg: "expected a register" ); |
6303 | if (!Res.isSuccess()) |
6304 | return ParseStatus::Failure; |
6305 | |
6306 | RBraceLoc = getLoc(); |
6307 | if (trySkipToken(Kind: AsmToken::RBrac)) |
6308 | break; |
6309 | |
6310 | if (!skipToken(Kind: AsmToken::Comma, |
6311 | ErrMsg: "expected a comma or a closing square bracket" )) |
6312 | return ParseStatus::Failure; |
6313 | } |
6314 | |
6315 | if (Operands.size() - Prefix > 1) { |
6316 | Operands.insert(I: Operands.begin() + Prefix, |
6317 | Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "[" , Loc: LBraceLoc)); |
6318 | Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "]" , Loc: RBraceLoc)); |
6319 | } |
6320 | |
6321 | return ParseStatus::Success; |
6322 | } |
6323 | |
6324 | return parseRegOrImm(Operands); |
6325 | } |
6326 | |
6327 | StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { |
6328 | // Clear any forced encodings from the previous instruction. |
6329 | setForcedEncodingSize(0); |
6330 | setForcedDPP(false); |
6331 | setForcedSDWA(false); |
6332 | |
6333 | if (Name.ends_with(Suffix: "_e64_dpp" )) { |
6334 | setForcedDPP(true); |
6335 | setForcedEncodingSize(64); |
6336 | return Name.substr(Start: 0, N: Name.size() - 8); |
6337 | } |
6338 | if (Name.ends_with(Suffix: "_e64" )) { |
6339 | setForcedEncodingSize(64); |
6340 | return Name.substr(Start: 0, N: Name.size() - 4); |
6341 | } |
6342 | if (Name.ends_with(Suffix: "_e32" )) { |
6343 | setForcedEncodingSize(32); |
6344 | return Name.substr(Start: 0, N: Name.size() - 4); |
6345 | } |
6346 | if (Name.ends_with(Suffix: "_dpp" )) { |
6347 | setForcedDPP(true); |
6348 | return Name.substr(Start: 0, N: Name.size() - 4); |
6349 | } |
6350 | if (Name.ends_with(Suffix: "_sdwa" )) { |
6351 | setForcedSDWA(true); |
6352 | return Name.substr(Start: 0, N: Name.size() - 5); |
6353 | } |
6354 | return Name; |
6355 | } |
6356 | |
6357 | static void applyMnemonicAliases(StringRef &Mnemonic, |
6358 | const FeatureBitset &Features, |
6359 | unsigned VariantID); |
6360 | |
6361 | bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, |
6362 | StringRef Name, |
6363 | SMLoc NameLoc, OperandVector &Operands) { |
6364 | // Add the instruction mnemonic |
6365 | Name = parseMnemonicSuffix(Name); |
6366 | |
6367 | // If the target architecture uses MnemonicAlias, call it here to parse |
6368 | // operands correctly. |
6369 | applyMnemonicAliases(Mnemonic&: Name, Features: getAvailableFeatures(), VariantID: 0); |
6370 | |
6371 | Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: NameLoc)); |
6372 | |
6373 | bool IsMIMG = Name.starts_with(Prefix: "image_" ); |
6374 | |
6375 | while (!trySkipToken(Kind: AsmToken::EndOfStatement)) { |
6376 | OperandMode Mode = OperandMode_Default; |
6377 | if (IsMIMG && isGFX10Plus() && Operands.size() == 2) |
6378 | Mode = OperandMode_NSA; |
6379 | ParseStatus Res = parseOperand(Operands, Mnemonic: Name, Mode); |
6380 | |
6381 | if (!Res.isSuccess()) { |
6382 | checkUnsupportedInstruction(Mnemo: Name, IDLoc: NameLoc); |
6383 | if (!Parser.hasPendingError()) { |
6384 | // FIXME: use real operand location rather than the current location. |
6385 | StringRef Msg = Res.isFailure() ? "failed parsing operand." |
6386 | : "not a valid operand." ; |
6387 | Error(L: getLoc(), Msg); |
6388 | } |
6389 | while (!trySkipToken(Kind: AsmToken::EndOfStatement)) { |
6390 | lex(); |
6391 | } |
6392 | return true; |
6393 | } |
6394 | |
6395 | // Eat the comma or space if there is one. |
6396 | trySkipToken(Kind: AsmToken::Comma); |
6397 | } |
6398 | |
6399 | return false; |
6400 | } |
6401 | |
6402 | //===----------------------------------------------------------------------===// |
6403 | // Utility functions |
6404 | //===----------------------------------------------------------------------===// |
6405 | |
6406 | ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name, |
6407 | OperandVector &Operands) { |
6408 | SMLoc S = getLoc(); |
6409 | if (!trySkipId(Id: Name)) |
6410 | return ParseStatus::NoMatch; |
6411 | |
6412 | Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: Name, Loc: S)); |
6413 | return ParseStatus::Success; |
6414 | } |
6415 | |
6416 | ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, |
6417 | int64_t &IntVal) { |
6418 | |
6419 | if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon)) |
6420 | return ParseStatus::NoMatch; |
6421 | |
6422 | return parseExpr(Imm&: IntVal) ? ParseStatus::Success : ParseStatus::Failure; |
6423 | } |
6424 | |
6425 | ParseStatus AMDGPUAsmParser::parseIntWithPrefix( |
6426 | const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, |
6427 | std::function<bool(int64_t &)> ConvertResult) { |
6428 | SMLoc S = getLoc(); |
6429 | int64_t Value = 0; |
6430 | |
6431 | ParseStatus Res = parseIntWithPrefix(Prefix, IntVal&: Value); |
6432 | if (!Res.isSuccess()) |
6433 | return Res; |
6434 | |
6435 | if (ConvertResult && !ConvertResult(Value)) { |
6436 | Error(L: S, Msg: "invalid " + StringRef(Prefix) + " value." ); |
6437 | } |
6438 | |
6439 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Value, Loc: S, Type: ImmTy)); |
6440 | return ParseStatus::Success; |
6441 | } |
6442 | |
6443 | ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix( |
6444 | const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, |
6445 | bool (*ConvertResult)(int64_t &)) { |
6446 | SMLoc S = getLoc(); |
6447 | if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon)) |
6448 | return ParseStatus::NoMatch; |
6449 | |
6450 | if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected a left square bracket" )) |
6451 | return ParseStatus::Failure; |
6452 | |
6453 | unsigned Val = 0; |
6454 | const unsigned MaxSize = 4; |
6455 | |
6456 | // FIXME: How to verify the number of elements matches the number of src |
6457 | // operands? |
6458 | for (int I = 0; ; ++I) { |
6459 | int64_t Op; |
6460 | SMLoc Loc = getLoc(); |
6461 | if (!parseExpr(Imm&: Op)) |
6462 | return ParseStatus::Failure; |
6463 | |
6464 | if (Op != 0 && Op != 1) |
6465 | return Error(L: Loc, Msg: "invalid " + StringRef(Prefix) + " value." ); |
6466 | |
6467 | Val |= (Op << I); |
6468 | |
6469 | if (trySkipToken(Kind: AsmToken::RBrac)) |
6470 | break; |
6471 | |
6472 | if (I + 1 == MaxSize) |
6473 | return Error(L: getLoc(), Msg: "expected a closing square bracket" ); |
6474 | |
6475 | if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma" )) |
6476 | return ParseStatus::Failure; |
6477 | } |
6478 | |
6479 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: ImmTy)); |
6480 | return ParseStatus::Success; |
6481 | } |
6482 | |
6483 | ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name, |
6484 | OperandVector &Operands, |
6485 | AMDGPUOperand::ImmTy ImmTy) { |
6486 | int64_t Bit; |
6487 | SMLoc S = getLoc(); |
6488 | |
6489 | if (trySkipId(Id: Name)) { |
6490 | Bit = 1; |
6491 | } else if (trySkipId(Pref: "no" , Id: Name)) { |
6492 | Bit = 0; |
6493 | } else { |
6494 | return ParseStatus::NoMatch; |
6495 | } |
6496 | |
6497 | if (Name == "r128" && !hasMIMG_R128()) |
6498 | return Error(L: S, Msg: "r128 modifier is not supported on this GPU" ); |
6499 | if (Name == "a16" && !hasA16()) |
6500 | return Error(L: S, Msg: "a16 modifier is not supported on this GPU" ); |
6501 | |
6502 | if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) |
6503 | ImmTy = AMDGPUOperand::ImmTyR128A16; |
6504 | |
6505 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Bit, Loc: S, Type: ImmTy)); |
6506 | return ParseStatus::Success; |
6507 | } |
6508 | |
6509 | unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, |
6510 | bool &Disabling) const { |
6511 | Disabling = Id.consume_front(Prefix: "no" ); |
6512 | |
6513 | if (isGFX940() && !Mnemo.starts_with(Prefix: "s_" )) { |
6514 | return StringSwitch<unsigned>(Id) |
6515 | .Case(S: "nt" , Value: AMDGPU::CPol::NT) |
6516 | .Case(S: "sc0" , Value: AMDGPU::CPol::SC0) |
6517 | .Case(S: "sc1" , Value: AMDGPU::CPol::SC1) |
6518 | .Default(Value: 0); |
6519 | } |
6520 | |
6521 | return StringSwitch<unsigned>(Id) |
6522 | .Case(S: "dlc" , Value: AMDGPU::CPol::DLC) |
6523 | .Case(S: "glc" , Value: AMDGPU::CPol::GLC) |
6524 | .Case(S: "scc" , Value: AMDGPU::CPol::SCC) |
6525 | .Case(S: "slc" , Value: AMDGPU::CPol::SLC) |
6526 | .Default(Value: 0); |
6527 | } |
6528 | |
6529 | ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { |
6530 | if (isGFX12Plus()) { |
6531 | SMLoc StringLoc = getLoc(); |
6532 | |
6533 | int64_t CPolVal = 0; |
6534 | ParseStatus ResTH = ParseStatus::NoMatch; |
6535 | ParseStatus ResScope = ParseStatus::NoMatch; |
6536 | |
6537 | for (;;) { |
6538 | if (ResTH.isNoMatch()) { |
6539 | int64_t TH; |
6540 | ResTH = parseTH(Operands, TH); |
6541 | if (ResTH.isFailure()) |
6542 | return ResTH; |
6543 | if (ResTH.isSuccess()) { |
6544 | CPolVal |= TH; |
6545 | continue; |
6546 | } |
6547 | } |
6548 | |
6549 | if (ResScope.isNoMatch()) { |
6550 | int64_t Scope; |
6551 | ResScope = parseScope(Operands, Scope); |
6552 | if (ResScope.isFailure()) |
6553 | return ResScope; |
6554 | if (ResScope.isSuccess()) { |
6555 | CPolVal |= Scope; |
6556 | continue; |
6557 | } |
6558 | } |
6559 | |
6560 | break; |
6561 | } |
6562 | |
6563 | if (ResTH.isNoMatch() && ResScope.isNoMatch()) |
6564 | return ParseStatus::NoMatch; |
6565 | |
6566 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: CPolVal, Loc: StringLoc, |
6567 | Type: AMDGPUOperand::ImmTyCPol)); |
6568 | return ParseStatus::Success; |
6569 | } |
6570 | |
6571 | StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); |
6572 | SMLoc OpLoc = getLoc(); |
6573 | unsigned Enabled = 0, Seen = 0; |
6574 | for (;;) { |
6575 | SMLoc S = getLoc(); |
6576 | bool Disabling; |
6577 | unsigned CPol = getCPolKind(Id: getId(), Mnemo, Disabling); |
6578 | if (!CPol) |
6579 | break; |
6580 | |
6581 | lex(); |
6582 | |
6583 | if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) |
6584 | return Error(L: S, Msg: "dlc modifier is not supported on this GPU" ); |
6585 | |
6586 | if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) |
6587 | return Error(L: S, Msg: "scc modifier is not supported on this GPU" ); |
6588 | |
6589 | if (Seen & CPol) |
6590 | return Error(L: S, Msg: "duplicate cache policy modifier" ); |
6591 | |
6592 | if (!Disabling) |
6593 | Enabled |= CPol; |
6594 | |
6595 | Seen |= CPol; |
6596 | } |
6597 | |
6598 | if (!Seen) |
6599 | return ParseStatus::NoMatch; |
6600 | |
6601 | Operands.push_back( |
6602 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Enabled, Loc: OpLoc, Type: AMDGPUOperand::ImmTyCPol)); |
6603 | return ParseStatus::Success; |
6604 | } |
6605 | |
6606 | ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands, |
6607 | int64_t &Scope) { |
6608 | Scope = AMDGPU::CPol::SCOPE_CU; // default; |
6609 | |
6610 | StringRef Value; |
6611 | SMLoc StringLoc; |
6612 | ParseStatus Res; |
6613 | |
6614 | Res = parseStringWithPrefix(Prefix: "scope" , Value, StringLoc); |
6615 | if (!Res.isSuccess()) |
6616 | return Res; |
6617 | |
6618 | Scope = StringSwitch<int64_t>(Value) |
6619 | .Case(S: "SCOPE_CU" , Value: AMDGPU::CPol::SCOPE_CU) |
6620 | .Case(S: "SCOPE_SE" , Value: AMDGPU::CPol::SCOPE_SE) |
6621 | .Case(S: "SCOPE_DEV" , Value: AMDGPU::CPol::SCOPE_DEV) |
6622 | .Case(S: "SCOPE_SYS" , Value: AMDGPU::CPol::SCOPE_SYS) |
6623 | .Default(Value: 0xffffffff); |
6624 | |
6625 | if (Scope == 0xffffffff) |
6626 | return Error(L: StringLoc, Msg: "invalid scope value" ); |
6627 | |
6628 | return ParseStatus::Success; |
6629 | } |
6630 | |
6631 | ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) { |
6632 | TH = AMDGPU::CPol::TH_RT; // default |
6633 | |
6634 | StringRef Value; |
6635 | SMLoc StringLoc; |
6636 | ParseStatus Res = parseStringWithPrefix(Prefix: "th" , Value, StringLoc); |
6637 | if (!Res.isSuccess()) |
6638 | return Res; |
6639 | |
6640 | if (Value == "TH_DEFAULT" ) |
6641 | TH = AMDGPU::CPol::TH_RT; |
6642 | else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" || |
6643 | Value == "TH_LOAD_NT_WB" ) { |
6644 | return Error(L: StringLoc, Msg: "invalid th value" ); |
6645 | } else if (Value.consume_front(Prefix: "TH_ATOMIC_" )) { |
6646 | TH = AMDGPU::CPol::TH_TYPE_ATOMIC; |
6647 | } else if (Value.consume_front(Prefix: "TH_LOAD_" )) { |
6648 | TH = AMDGPU::CPol::TH_TYPE_LOAD; |
6649 | } else if (Value.consume_front(Prefix: "TH_STORE_" )) { |
6650 | TH = AMDGPU::CPol::TH_TYPE_STORE; |
6651 | } else { |
6652 | return Error(L: StringLoc, Msg: "invalid th value" ); |
6653 | } |
6654 | |
6655 | if (Value == "BYPASS" ) |
6656 | TH |= AMDGPU::CPol::TH_REAL_BYPASS; |
6657 | |
6658 | if (TH != 0) { |
6659 | if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC) |
6660 | TH |= StringSwitch<int64_t>(Value) |
6661 | .Case(S: "RETURN" , Value: AMDGPU::CPol::TH_ATOMIC_RETURN) |
6662 | .Case(S: "RT" , Value: AMDGPU::CPol::TH_RT) |
6663 | .Case(S: "RT_RETURN" , Value: AMDGPU::CPol::TH_ATOMIC_RETURN) |
6664 | .Case(S: "NT" , Value: AMDGPU::CPol::TH_ATOMIC_NT) |
6665 | .Case(S: "NT_RETURN" , Value: AMDGPU::CPol::TH_ATOMIC_NT | |
6666 | AMDGPU::CPol::TH_ATOMIC_RETURN) |
6667 | .Case(S: "CASCADE_RT" , Value: AMDGPU::CPol::TH_ATOMIC_CASCADE) |
6668 | .Case(S: "CASCADE_NT" , Value: AMDGPU::CPol::TH_ATOMIC_CASCADE | |
6669 | AMDGPU::CPol::TH_ATOMIC_NT) |
6670 | .Default(Value: 0xffffffff); |
6671 | else |
6672 | TH |= StringSwitch<int64_t>(Value) |
6673 | .Case(S: "RT" , Value: AMDGPU::CPol::TH_RT) |
6674 | .Case(S: "NT" , Value: AMDGPU::CPol::TH_NT) |
6675 | .Case(S: "HT" , Value: AMDGPU::CPol::TH_HT) |
6676 | .Case(S: "LU" , Value: AMDGPU::CPol::TH_LU) |
6677 | .Case(S: "RT_WB" , Value: AMDGPU::CPol::TH_RT_WB) |
6678 | .Case(S: "NT_RT" , Value: AMDGPU::CPol::TH_NT_RT) |
6679 | .Case(S: "RT_NT" , Value: AMDGPU::CPol::TH_RT_NT) |
6680 | .Case(S: "NT_HT" , Value: AMDGPU::CPol::TH_NT_HT) |
6681 | .Case(S: "NT_WB" , Value: AMDGPU::CPol::TH_NT_WB) |
6682 | .Case(S: "BYPASS" , Value: AMDGPU::CPol::TH_BYPASS) |
6683 | .Default(Value: 0xffffffff); |
6684 | } |
6685 | |
6686 | if (TH == 0xffffffff) |
6687 | return Error(L: StringLoc, Msg: "invalid th value" ); |
6688 | |
6689 | return ParseStatus::Success; |
6690 | } |
6691 | |
6692 | static void addOptionalImmOperand( |
6693 | MCInst& Inst, const OperandVector& Operands, |
6694 | AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, |
6695 | AMDGPUOperand::ImmTy ImmT, |
6696 | int64_t Default = 0) { |
6697 | auto i = OptionalIdx.find(x: ImmT); |
6698 | if (i != OptionalIdx.end()) { |
6699 | unsigned Idx = i->second; |
6700 | ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, N: 1); |
6701 | } else { |
6702 | Inst.addOperand(Op: MCOperand::createImm(Val: Default)); |
6703 | } |
6704 | } |
6705 | |
6706 | ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, |
6707 | StringRef &Value, |
6708 | SMLoc &StringLoc) { |
6709 | if (!trySkipId(Id: Prefix, Kind: AsmToken::Colon)) |
6710 | return ParseStatus::NoMatch; |
6711 | |
6712 | StringLoc = getLoc(); |
6713 | return parseId(Val&: Value, ErrMsg: "expected an identifier" ) ? ParseStatus::Success |
6714 | : ParseStatus::Failure; |
6715 | } |
6716 | |
6717 | //===----------------------------------------------------------------------===// |
6718 | // MTBUF format |
6719 | //===----------------------------------------------------------------------===// |
6720 | |
6721 | bool AMDGPUAsmParser::tryParseFmt(const char *Pref, |
6722 | int64_t MaxVal, |
6723 | int64_t &Fmt) { |
6724 | int64_t Val; |
6725 | SMLoc Loc = getLoc(); |
6726 | |
6727 | auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: Val); |
6728 | if (Res.isFailure()) |
6729 | return false; |
6730 | if (Res.isNoMatch()) |
6731 | return true; |
6732 | |
6733 | if (Val < 0 || Val > MaxVal) { |
6734 | Error(L: Loc, Msg: Twine("out of range " , StringRef(Pref))); |
6735 | return false; |
6736 | } |
6737 | |
6738 | Fmt = Val; |
6739 | return true; |
6740 | } |
6741 | |
6742 | ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands, |
6743 | AMDGPUOperand::ImmTy ImmTy) { |
6744 | const char *Pref = "index_key" ; |
6745 | int64_t ImmVal = 0; |
6746 | SMLoc Loc = getLoc(); |
6747 | auto Res = parseIntWithPrefix(Prefix: Pref, IntVal&: ImmVal); |
6748 | if (!Res.isSuccess()) |
6749 | return Res; |
6750 | |
6751 | if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1)) |
6752 | return Error(L: Loc, Msg: Twine("out of range " , StringRef(Pref))); |
6753 | |
6754 | if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3)) |
6755 | return Error(L: Loc, Msg: Twine("out of range " , StringRef(Pref))); |
6756 | |
6757 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: ImmTy)); |
6758 | return ParseStatus::Success; |
6759 | } |
6760 | |
6761 | ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) { |
6762 | return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey8bit); |
6763 | } |
6764 | |
6765 | ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) { |
6766 | return tryParseIndexKey(Operands, ImmTy: AMDGPUOperand::ImmTyIndexKey16bit); |
6767 | } |
6768 | |
6769 | // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their |
6770 | // values to live in a joint format operand in the MCInst encoding. |
6771 | ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { |
6772 | using namespace llvm::AMDGPU::MTBUFFormat; |
6773 | |
6774 | int64_t Dfmt = DFMT_UNDEF; |
6775 | int64_t Nfmt = NFMT_UNDEF; |
6776 | |
6777 | // dfmt and nfmt can appear in either order, and each is optional. |
6778 | for (int I = 0; I < 2; ++I) { |
6779 | if (Dfmt == DFMT_UNDEF && !tryParseFmt(Pref: "dfmt" , MaxVal: DFMT_MAX, Fmt&: Dfmt)) |
6780 | return ParseStatus::Failure; |
6781 | |
6782 | if (Nfmt == NFMT_UNDEF && !tryParseFmt(Pref: "nfmt" , MaxVal: NFMT_MAX, Fmt&: Nfmt)) |
6783 | return ParseStatus::Failure; |
6784 | |
6785 | // Skip optional comma between dfmt/nfmt |
6786 | // but guard against 2 commas following each other. |
6787 | if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && |
6788 | !peekToken().is(K: AsmToken::Comma)) { |
6789 | trySkipToken(Kind: AsmToken::Comma); |
6790 | } |
6791 | } |
6792 | |
6793 | if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) |
6794 | return ParseStatus::NoMatch; |
6795 | |
6796 | Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; |
6797 | Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; |
6798 | |
6799 | Format = encodeDfmtNfmt(Dfmt, Nfmt); |
6800 | return ParseStatus::Success; |
6801 | } |
6802 | |
6803 | ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) { |
6804 | using namespace llvm::AMDGPU::MTBUFFormat; |
6805 | |
6806 | int64_t Fmt = UFMT_UNDEF; |
6807 | |
6808 | if (!tryParseFmt(Pref: "format" , MaxVal: UFMT_MAX, Fmt)) |
6809 | return ParseStatus::Failure; |
6810 | |
6811 | if (Fmt == UFMT_UNDEF) |
6812 | return ParseStatus::NoMatch; |
6813 | |
6814 | Format = Fmt; |
6815 | return ParseStatus::Success; |
6816 | } |
6817 | |
6818 | bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, |
6819 | int64_t &Nfmt, |
6820 | StringRef FormatStr, |
6821 | SMLoc Loc) { |
6822 | using namespace llvm::AMDGPU::MTBUFFormat; |
6823 | int64_t Format; |
6824 | |
6825 | Format = getDfmt(Name: FormatStr); |
6826 | if (Format != DFMT_UNDEF) { |
6827 | Dfmt = Format; |
6828 | return true; |
6829 | } |
6830 | |
6831 | Format = getNfmt(Name: FormatStr, STI: getSTI()); |
6832 | if (Format != NFMT_UNDEF) { |
6833 | Nfmt = Format; |
6834 | return true; |
6835 | } |
6836 | |
6837 | Error(L: Loc, Msg: "unsupported format" ); |
6838 | return false; |
6839 | } |
6840 | |
6841 | ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, |
6842 | SMLoc FormatLoc, |
6843 | int64_t &Format) { |
6844 | using namespace llvm::AMDGPU::MTBUFFormat; |
6845 | |
6846 | int64_t Dfmt = DFMT_UNDEF; |
6847 | int64_t Nfmt = NFMT_UNDEF; |
6848 | if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, Loc: FormatLoc)) |
6849 | return ParseStatus::Failure; |
6850 | |
6851 | if (trySkipToken(Kind: AsmToken::Comma)) { |
6852 | StringRef Str; |
6853 | SMLoc Loc = getLoc(); |
6854 | if (!parseId(Val&: Str, ErrMsg: "expected a format string" ) || |
6855 | !matchDfmtNfmt(Dfmt, Nfmt, FormatStr: Str, Loc)) |
6856 | return ParseStatus::Failure; |
6857 | if (Dfmt == DFMT_UNDEF) |
6858 | return Error(L: Loc, Msg: "duplicate numeric format" ); |
6859 | if (Nfmt == NFMT_UNDEF) |
6860 | return Error(L: Loc, Msg: "duplicate data format" ); |
6861 | } |
6862 | |
6863 | Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; |
6864 | Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; |
6865 | |
6866 | if (isGFX10Plus()) { |
6867 | auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, STI: getSTI()); |
6868 | if (Ufmt == UFMT_UNDEF) |
6869 | return Error(L: FormatLoc, Msg: "unsupported format" ); |
6870 | Format = Ufmt; |
6871 | } else { |
6872 | Format = encodeDfmtNfmt(Dfmt, Nfmt); |
6873 | } |
6874 | |
6875 | return ParseStatus::Success; |
6876 | } |
6877 | |
6878 | ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, |
6879 | SMLoc Loc, |
6880 | int64_t &Format) { |
6881 | using namespace llvm::AMDGPU::MTBUFFormat; |
6882 | |
6883 | auto Id = getUnifiedFormat(Name: FormatStr, STI: getSTI()); |
6884 | if (Id == UFMT_UNDEF) |
6885 | return ParseStatus::NoMatch; |
6886 | |
6887 | if (!isGFX10Plus()) |
6888 | return Error(L: Loc, Msg: "unified format is not supported on this GPU" ); |
6889 | |
6890 | Format = Id; |
6891 | return ParseStatus::Success; |
6892 | } |
6893 | |
6894 | ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { |
6895 | using namespace llvm::AMDGPU::MTBUFFormat; |
6896 | SMLoc Loc = getLoc(); |
6897 | |
6898 | if (!parseExpr(Imm&: Format)) |
6899 | return ParseStatus::Failure; |
6900 | if (!isValidFormatEncoding(Val: Format, STI: getSTI())) |
6901 | return Error(L: Loc, Msg: "out of range format" ); |
6902 | |
6903 | return ParseStatus::Success; |
6904 | } |
6905 | |
6906 | ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { |
6907 | using namespace llvm::AMDGPU::MTBUFFormat; |
6908 | |
6909 | if (!trySkipId(Id: "format" , Kind: AsmToken::Colon)) |
6910 | return ParseStatus::NoMatch; |
6911 | |
6912 | if (trySkipToken(Kind: AsmToken::LBrac)) { |
6913 | StringRef FormatStr; |
6914 | SMLoc Loc = getLoc(); |
6915 | if (!parseId(Val&: FormatStr, ErrMsg: "expected a format string" )) |
6916 | return ParseStatus::Failure; |
6917 | |
6918 | auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); |
6919 | if (Res.isNoMatch()) |
6920 | Res = parseSymbolicSplitFormat(FormatStr, FormatLoc: Loc, Format); |
6921 | if (!Res.isSuccess()) |
6922 | return Res; |
6923 | |
6924 | if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket" )) |
6925 | return ParseStatus::Failure; |
6926 | |
6927 | return ParseStatus::Success; |
6928 | } |
6929 | |
6930 | return parseNumericFormat(Format); |
6931 | } |
6932 | |
6933 | ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { |
6934 | using namespace llvm::AMDGPU::MTBUFFormat; |
6935 | |
6936 | int64_t Format = getDefaultFormatEncoding(STI: getSTI()); |
6937 | ParseStatus Res; |
6938 | SMLoc Loc = getLoc(); |
6939 | |
6940 | // Parse legacy format syntax. |
6941 | Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); |
6942 | if (Res.isFailure()) |
6943 | return Res; |
6944 | |
6945 | bool FormatFound = Res.isSuccess(); |
6946 | |
6947 | Operands.push_back( |
6948 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Format, Loc, Type: AMDGPUOperand::ImmTyFORMAT)); |
6949 | |
6950 | if (FormatFound) |
6951 | trySkipToken(Kind: AsmToken::Comma); |
6952 | |
6953 | if (isToken(Kind: AsmToken::EndOfStatement)) { |
6954 | // We are expecting an soffset operand, |
6955 | // but let matcher handle the error. |
6956 | return ParseStatus::Success; |
6957 | } |
6958 | |
6959 | // Parse soffset. |
6960 | Res = parseRegOrImm(Operands); |
6961 | if (!Res.isSuccess()) |
6962 | return Res; |
6963 | |
6964 | trySkipToken(Kind: AsmToken::Comma); |
6965 | |
6966 | if (!FormatFound) { |
6967 | Res = parseSymbolicOrNumericFormat(Format); |
6968 | if (Res.isFailure()) |
6969 | return Res; |
6970 | if (Res.isSuccess()) { |
6971 | auto Size = Operands.size(); |
6972 | AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); |
6973 | assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); |
6974 | Op.setImm(Format); |
6975 | } |
6976 | return ParseStatus::Success; |
6977 | } |
6978 | |
6979 | if (isId(Id: "format" ) && peekToken().is(K: AsmToken::Colon)) |
6980 | return Error(L: getLoc(), Msg: "duplicate format" ); |
6981 | return ParseStatus::Success; |
6982 | } |
6983 | |
6984 | ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) { |
6985 | ParseStatus Res = |
6986 | parseIntWithPrefix(Prefix: "offset" , Operands, ImmTy: AMDGPUOperand::ImmTyOffset); |
6987 | if (Res.isNoMatch()) { |
6988 | Res = parseIntWithPrefix(Prefix: "inst_offset" , Operands, |
6989 | ImmTy: AMDGPUOperand::ImmTyInstOffset); |
6990 | } |
6991 | return Res; |
6992 | } |
6993 | |
6994 | ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) { |
6995 | ParseStatus Res = |
6996 | parseNamedBit(Name: "r128" , Operands, ImmTy: AMDGPUOperand::ImmTyR128A16); |
6997 | if (Res.isNoMatch()) |
6998 | Res = parseNamedBit(Name: "a16" , Operands, ImmTy: AMDGPUOperand::ImmTyA16); |
6999 | return Res; |
7000 | } |
7001 | |
7002 | ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) { |
7003 | ParseStatus Res = |
7004 | parseIntWithPrefix(Prefix: "blgp" , Operands, ImmTy: AMDGPUOperand::ImmTyBLGP); |
7005 | if (Res.isNoMatch()) { |
7006 | Res = |
7007 | parseOperandArrayWithPrefix(Prefix: "neg" , Operands, ImmTy: AMDGPUOperand::ImmTyBLGP); |
7008 | } |
7009 | return Res; |
7010 | } |
7011 | |
7012 | //===----------------------------------------------------------------------===// |
7013 | // Exp |
7014 | //===----------------------------------------------------------------------===// |
7015 | |
7016 | void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { |
7017 | OptionalImmIndexMap OptionalIdx; |
7018 | |
7019 | unsigned OperandIdx[4]; |
7020 | unsigned EnMask = 0; |
7021 | int SrcIdx = 0; |
7022 | |
7023 | for (unsigned i = 1, e = Operands.size(); i != e; ++i) { |
7024 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
7025 | |
7026 | // Add the register arguments |
7027 | if (Op.isReg()) { |
7028 | assert(SrcIdx < 4); |
7029 | OperandIdx[SrcIdx] = Inst.size(); |
7030 | Op.addRegOperands(Inst, N: 1); |
7031 | ++SrcIdx; |
7032 | continue; |
7033 | } |
7034 | |
7035 | if (Op.isOff()) { |
7036 | assert(SrcIdx < 4); |
7037 | OperandIdx[SrcIdx] = Inst.size(); |
7038 | Inst.addOperand(Op: MCOperand::createReg(Reg: AMDGPU::NoRegister)); |
7039 | ++SrcIdx; |
7040 | continue; |
7041 | } |
7042 | |
7043 | if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { |
7044 | Op.addImmOperands(Inst, N: 1); |
7045 | continue; |
7046 | } |
7047 | |
7048 | if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en" )) |
7049 | continue; |
7050 | |
7051 | // Handle optional arguments |
7052 | OptionalIdx[Op.getImmTy()] = i; |
7053 | } |
7054 | |
7055 | assert(SrcIdx == 4); |
7056 | |
7057 | bool Compr = false; |
7058 | if (OptionalIdx.find(x: AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { |
7059 | Compr = true; |
7060 | Inst.getOperand(i: OperandIdx[1]) = Inst.getOperand(i: OperandIdx[2]); |
7061 | Inst.getOperand(i: OperandIdx[2]).setReg(AMDGPU::NoRegister); |
7062 | Inst.getOperand(i: OperandIdx[3]).setReg(AMDGPU::NoRegister); |
7063 | } |
7064 | |
7065 | for (auto i = 0; i < SrcIdx; ++i) { |
7066 | if (Inst.getOperand(i: OperandIdx[i]).getReg() != AMDGPU::NoRegister) { |
7067 | EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); |
7068 | } |
7069 | } |
7070 | |
7071 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpVM); |
7072 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyExpCompr); |
7073 | |
7074 | Inst.addOperand(Op: MCOperand::createImm(Val: EnMask)); |
7075 | } |
7076 | |
7077 | //===----------------------------------------------------------------------===// |
7078 | // s_waitcnt |
7079 | //===----------------------------------------------------------------------===// |
7080 | |
7081 | static bool |
7082 | encodeCnt( |
7083 | const AMDGPU::IsaVersion ISA, |
7084 | int64_t &IntVal, |
7085 | int64_t CntVal, |
7086 | bool Saturate, |
7087 | unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), |
7088 | unsigned (*decode)(const IsaVersion &Version, unsigned)) |
7089 | { |
7090 | bool Failed = false; |
7091 | |
7092 | IntVal = encode(ISA, IntVal, CntVal); |
7093 | if (CntVal != decode(ISA, IntVal)) { |
7094 | if (Saturate) { |
7095 | IntVal = encode(ISA, IntVal, -1); |
7096 | } else { |
7097 | Failed = true; |
7098 | } |
7099 | } |
7100 | return Failed; |
7101 | } |
7102 | |
7103 | bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { |
7104 | |
7105 | SMLoc CntLoc = getLoc(); |
7106 | StringRef CntName = getTokenStr(); |
7107 | |
7108 | if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name" ) || |
7109 | !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis" )) |
7110 | return false; |
7111 | |
7112 | int64_t CntVal; |
7113 | SMLoc ValLoc = getLoc(); |
7114 | if (!parseExpr(Imm&: CntVal)) |
7115 | return false; |
7116 | |
7117 | AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU()); |
7118 | |
7119 | bool Failed = true; |
7120 | bool Sat = CntName.ends_with(Suffix: "_sat" ); |
7121 | |
7122 | if (CntName == "vmcnt" || CntName == "vmcnt_sat" ) { |
7123 | Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeVmcnt, decode: decodeVmcnt); |
7124 | } else if (CntName == "expcnt" || CntName == "expcnt_sat" ) { |
7125 | Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeExpcnt, decode: decodeExpcnt); |
7126 | } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat" ) { |
7127 | Failed = encodeCnt(ISA, IntVal, CntVal, Saturate: Sat, encode: encodeLgkmcnt, decode: decodeLgkmcnt); |
7128 | } else { |
7129 | Error(L: CntLoc, Msg: "invalid counter name " + CntName); |
7130 | return false; |
7131 | } |
7132 | |
7133 | if (Failed) { |
7134 | Error(L: ValLoc, Msg: "too large value for " + CntName); |
7135 | return false; |
7136 | } |
7137 | |
7138 | if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis" )) |
7139 | return false; |
7140 | |
7141 | if (trySkipToken(Kind: AsmToken::Amp) || trySkipToken(Kind: AsmToken::Comma)) { |
7142 | if (isToken(Kind: AsmToken::EndOfStatement)) { |
7143 | Error(L: getLoc(), Msg: "expected a counter name" ); |
7144 | return false; |
7145 | } |
7146 | } |
7147 | |
7148 | return true; |
7149 | } |
7150 | |
7151 | ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) { |
7152 | AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(GPU: getSTI().getCPU()); |
7153 | int64_t Waitcnt = getWaitcntBitMask(Version: ISA); |
7154 | SMLoc S = getLoc(); |
7155 | |
7156 | if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) { |
7157 | while (!isToken(Kind: AsmToken::EndOfStatement)) { |
7158 | if (!parseCnt(IntVal&: Waitcnt)) |
7159 | return ParseStatus::Failure; |
7160 | } |
7161 | } else { |
7162 | if (!parseExpr(Imm&: Waitcnt)) |
7163 | return ParseStatus::Failure; |
7164 | } |
7165 | |
7166 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Waitcnt, Loc: S)); |
7167 | return ParseStatus::Success; |
7168 | } |
7169 | |
7170 | bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { |
7171 | SMLoc FieldLoc = getLoc(); |
7172 | StringRef FieldName = getTokenStr(); |
7173 | if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a field name" ) || |
7174 | !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis" )) |
7175 | return false; |
7176 | |
7177 | SMLoc ValueLoc = getLoc(); |
7178 | StringRef ValueName = getTokenStr(); |
7179 | if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a value name" ) || |
7180 | !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a right parenthesis" )) |
7181 | return false; |
7182 | |
7183 | unsigned Shift; |
7184 | if (FieldName == "instid0" ) { |
7185 | Shift = 0; |
7186 | } else if (FieldName == "instskip" ) { |
7187 | Shift = 4; |
7188 | } else if (FieldName == "instid1" ) { |
7189 | Shift = 7; |
7190 | } else { |
7191 | Error(L: FieldLoc, Msg: "invalid field name " + FieldName); |
7192 | return false; |
7193 | } |
7194 | |
7195 | int Value; |
7196 | if (Shift == 4) { |
7197 | // Parse values for instskip. |
7198 | Value = StringSwitch<int>(ValueName) |
7199 | .Case(S: "SAME" , Value: 0) |
7200 | .Case(S: "NEXT" , Value: 1) |
7201 | .Case(S: "SKIP_1" , Value: 2) |
7202 | .Case(S: "SKIP_2" , Value: 3) |
7203 | .Case(S: "SKIP_3" , Value: 4) |
7204 | .Case(S: "SKIP_4" , Value: 5) |
7205 | .Default(Value: -1); |
7206 | } else { |
7207 | // Parse values for instid0 and instid1. |
7208 | Value = StringSwitch<int>(ValueName) |
7209 | .Case(S: "NO_DEP" , Value: 0) |
7210 | .Case(S: "VALU_DEP_1" , Value: 1) |
7211 | .Case(S: "VALU_DEP_2" , Value: 2) |
7212 | .Case(S: "VALU_DEP_3" , Value: 3) |
7213 | .Case(S: "VALU_DEP_4" , Value: 4) |
7214 | .Case(S: "TRANS32_DEP_1" , Value: 5) |
7215 | .Case(S: "TRANS32_DEP_2" , Value: 6) |
7216 | .Case(S: "TRANS32_DEP_3" , Value: 7) |
7217 | .Case(S: "FMA_ACCUM_CYCLE_1" , Value: 8) |
7218 | .Case(S: "SALU_CYCLE_1" , Value: 9) |
7219 | .Case(S: "SALU_CYCLE_2" , Value: 10) |
7220 | .Case(S: "SALU_CYCLE_3" , Value: 11) |
7221 | .Default(Value: -1); |
7222 | } |
7223 | if (Value < 0) { |
7224 | Error(L: ValueLoc, Msg: "invalid value name " + ValueName); |
7225 | return false; |
7226 | } |
7227 | |
7228 | Delay |= Value << Shift; |
7229 | return true; |
7230 | } |
7231 | |
7232 | ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) { |
7233 | int64_t Delay = 0; |
7234 | SMLoc S = getLoc(); |
7235 | |
7236 | if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) { |
7237 | do { |
7238 | if (!parseDelay(Delay)) |
7239 | return ParseStatus::Failure; |
7240 | } while (trySkipToken(Kind: AsmToken::Pipe)); |
7241 | } else { |
7242 | if (!parseExpr(Imm&: Delay)) |
7243 | return ParseStatus::Failure; |
7244 | } |
7245 | |
7246 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Delay, Loc: S)); |
7247 | return ParseStatus::Success; |
7248 | } |
7249 | |
7250 | bool |
7251 | AMDGPUOperand::isSWaitCnt() const { |
7252 | return isImm(); |
7253 | } |
7254 | |
7255 | bool AMDGPUOperand::isSDelayALU() const { return isImm(); } |
7256 | |
7257 | //===----------------------------------------------------------------------===// |
7258 | // DepCtr |
7259 | //===----------------------------------------------------------------------===// |
7260 | |
7261 | void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, |
7262 | StringRef DepCtrName) { |
7263 | switch (ErrorId) { |
7264 | case OPR_ID_UNKNOWN: |
7265 | Error(L: Loc, Msg: Twine("invalid counter name " , DepCtrName)); |
7266 | return; |
7267 | case OPR_ID_UNSUPPORTED: |
7268 | Error(L: Loc, Msg: Twine(DepCtrName, " is not supported on this GPU" )); |
7269 | return; |
7270 | case OPR_ID_DUPLICATE: |
7271 | Error(L: Loc, Msg: Twine("duplicate counter name " , DepCtrName)); |
7272 | return; |
7273 | case OPR_VAL_INVALID: |
7274 | Error(L: Loc, Msg: Twine("invalid value for " , DepCtrName)); |
7275 | return; |
7276 | default: |
7277 | assert(false); |
7278 | } |
7279 | } |
7280 | |
7281 | bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { |
7282 | |
7283 | using namespace llvm::AMDGPU::DepCtr; |
7284 | |
7285 | SMLoc DepCtrLoc = getLoc(); |
7286 | StringRef DepCtrName = getTokenStr(); |
7287 | |
7288 | if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "expected a counter name" ) || |
7289 | !skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parenthesis" )) |
7290 | return false; |
7291 | |
7292 | int64_t ExprVal; |
7293 | if (!parseExpr(Imm&: ExprVal)) |
7294 | return false; |
7295 | |
7296 | unsigned PrevOprMask = UsedOprMask; |
7297 | int CntVal = encodeDepCtr(Name: DepCtrName, Val: ExprVal, UsedOprMask, STI: getSTI()); |
7298 | |
7299 | if (CntVal < 0) { |
7300 | depCtrError(Loc: DepCtrLoc, ErrorId: CntVal, DepCtrName); |
7301 | return false; |
7302 | } |
7303 | |
7304 | if (!skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis" )) |
7305 | return false; |
7306 | |
7307 | if (trySkipToken(Kind: AsmToken::Amp) || trySkipToken(Kind: AsmToken::Comma)) { |
7308 | if (isToken(Kind: AsmToken::EndOfStatement)) { |
7309 | Error(L: getLoc(), Msg: "expected a counter name" ); |
7310 | return false; |
7311 | } |
7312 | } |
7313 | |
7314 | unsigned CntValMask = PrevOprMask ^ UsedOprMask; |
7315 | DepCtr = (DepCtr & ~CntValMask) | CntVal; |
7316 | return true; |
7317 | } |
7318 | |
7319 | ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) { |
7320 | using namespace llvm::AMDGPU::DepCtr; |
7321 | |
7322 | int64_t DepCtr = getDefaultDepCtrEncoding(STI: getSTI()); |
7323 | SMLoc Loc = getLoc(); |
7324 | |
7325 | if (isToken(Kind: AsmToken::Identifier) && peekToken().is(K: AsmToken::LParen)) { |
7326 | unsigned UsedOprMask = 0; |
7327 | while (!isToken(Kind: AsmToken::EndOfStatement)) { |
7328 | if (!parseDepCtr(DepCtr, UsedOprMask)) |
7329 | return ParseStatus::Failure; |
7330 | } |
7331 | } else { |
7332 | if (!parseExpr(Imm&: DepCtr)) |
7333 | return ParseStatus::Failure; |
7334 | } |
7335 | |
7336 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DepCtr, Loc)); |
7337 | return ParseStatus::Success; |
7338 | } |
7339 | |
7340 | bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } |
7341 | |
7342 | //===----------------------------------------------------------------------===// |
7343 | // hwreg |
7344 | //===----------------------------------------------------------------------===// |
7345 | |
7346 | ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg, |
7347 | OperandInfoTy &Offset, |
7348 | OperandInfoTy &Width) { |
7349 | using namespace llvm::AMDGPU::Hwreg; |
7350 | |
7351 | if (!trySkipId(Id: "hwreg" , Kind: AsmToken::LParen)) |
7352 | return ParseStatus::NoMatch; |
7353 | |
7354 | // The register may be specified by name or using a numeric code |
7355 | HwReg.Loc = getLoc(); |
7356 | if (isToken(Kind: AsmToken::Identifier) && |
7357 | (HwReg.Val = getHwregId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) { |
7358 | HwReg.IsSymbolic = true; |
7359 | lex(); // skip register name |
7360 | } else if (!parseExpr(Imm&: HwReg.Val, Expected: "a register name" )) { |
7361 | return ParseStatus::Failure; |
7362 | } |
7363 | |
7364 | if (trySkipToken(Kind: AsmToken::RParen)) |
7365 | return ParseStatus::Success; |
7366 | |
7367 | // parse optional params |
7368 | if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma or a closing parenthesis" )) |
7369 | return ParseStatus::Failure; |
7370 | |
7371 | Offset.Loc = getLoc(); |
7372 | if (!parseExpr(Imm&: Offset.Val)) |
7373 | return ParseStatus::Failure; |
7374 | |
7375 | if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma" )) |
7376 | return ParseStatus::Failure; |
7377 | |
7378 | Width.Loc = getLoc(); |
7379 | if (!parseExpr(Imm&: Width.Val) || |
7380 | !skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis" )) |
7381 | return ParseStatus::Failure; |
7382 | |
7383 | return ParseStatus::Success; |
7384 | } |
7385 | |
7386 | ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { |
7387 | using namespace llvm::AMDGPU::Hwreg; |
7388 | |
7389 | int64_t ImmVal = 0; |
7390 | SMLoc Loc = getLoc(); |
7391 | |
7392 | StructuredOpField HwReg("id" , "hardware register" , HwregId::Width, |
7393 | HwregId::Default); |
7394 | StructuredOpField Offset("offset" , "bit offset" , HwregOffset::Width, |
7395 | HwregOffset::Default); |
7396 | struct : StructuredOpField { |
7397 | using StructuredOpField::StructuredOpField; |
7398 | bool validate(AMDGPUAsmParser &Parser) const override { |
7399 | if (!isUIntN(N: Width, x: Val - 1)) |
7400 | return Error(Parser, Err: "only values from 1 to 32 are legal" ); |
7401 | return true; |
7402 | } |
7403 | } Width("size" , "bitfield width" , HwregSize::Width, HwregSize::Default); |
7404 | ParseStatus Res = parseStructuredOpFields(Fields: {&HwReg, &Offset, &Width}); |
7405 | |
7406 | if (Res.isNoMatch()) |
7407 | Res = parseHwregFunc(HwReg, Offset, Width); |
7408 | |
7409 | if (Res.isSuccess()) { |
7410 | if (!validateStructuredOpFields(Fields: {&HwReg, &Offset, &Width})) |
7411 | return ParseStatus::Failure; |
7412 | ImmVal = HwregEncoding::encode(Values: HwReg.Val, Values: Offset.Val, Values: Width.Val); |
7413 | } |
7414 | |
7415 | if (Res.isNoMatch() && |
7416 | parseExpr(Imm&: ImmVal, Expected: "a hwreg macro, structured immediate" )) |
7417 | Res = ParseStatus::Success; |
7418 | |
7419 | if (!Res.isSuccess()) |
7420 | return ParseStatus::Failure; |
7421 | |
7422 | if (!isUInt<16>(x: ImmVal)) |
7423 | return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal" ); |
7424 | Operands.push_back( |
7425 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTyHwreg)); |
7426 | return ParseStatus::Success; |
7427 | } |
7428 | |
7429 | bool AMDGPUOperand::isHwreg() const { |
7430 | return isImmTy(ImmT: ImmTyHwreg); |
7431 | } |
7432 | |
7433 | //===----------------------------------------------------------------------===// |
7434 | // sendmsg |
7435 | //===----------------------------------------------------------------------===// |
7436 | |
7437 | bool |
7438 | AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, |
7439 | OperandInfoTy &Op, |
7440 | OperandInfoTy &Stream) { |
7441 | using namespace llvm::AMDGPU::SendMsg; |
7442 | |
7443 | Msg.Loc = getLoc(); |
7444 | if (isToken(Kind: AsmToken::Identifier) && |
7445 | (Msg.Val = getMsgId(Name: getTokenStr(), STI: getSTI())) != OPR_ID_UNKNOWN) { |
7446 | Msg.IsSymbolic = true; |
7447 | lex(); // skip message name |
7448 | } else if (!parseExpr(Imm&: Msg.Val, Expected: "a message name" )) { |
7449 | return false; |
7450 | } |
7451 | |
7452 | if (trySkipToken(Kind: AsmToken::Comma)) { |
7453 | Op.IsDefined = true; |
7454 | Op.Loc = getLoc(); |
7455 | if (isToken(Kind: AsmToken::Identifier) && |
7456 | (Op.Val = getMsgOpId(MsgId: Msg.Val, Name: getTokenStr(), STI: getSTI())) != |
7457 | OPR_ID_UNKNOWN) { |
7458 | lex(); // skip operation name |
7459 | } else if (!parseExpr(Imm&: Op.Val, Expected: "an operation name" )) { |
7460 | return false; |
7461 | } |
7462 | |
7463 | if (trySkipToken(Kind: AsmToken::Comma)) { |
7464 | Stream.IsDefined = true; |
7465 | Stream.Loc = getLoc(); |
7466 | if (!parseExpr(Imm&: Stream.Val)) |
7467 | return false; |
7468 | } |
7469 | } |
7470 | |
7471 | return skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parenthesis" ); |
7472 | } |
7473 | |
7474 | bool |
7475 | AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, |
7476 | const OperandInfoTy &Op, |
7477 | const OperandInfoTy &Stream) { |
7478 | using namespace llvm::AMDGPU::SendMsg; |
7479 | |
7480 | // Validation strictness depends on whether message is specified |
7481 | // in a symbolic or in a numeric form. In the latter case |
7482 | // only encoding possibility is checked. |
7483 | bool Strict = Msg.IsSymbolic; |
7484 | |
7485 | if (Strict) { |
7486 | if (Msg.Val == OPR_ID_UNSUPPORTED) { |
7487 | Error(L: Msg.Loc, Msg: "specified message id is not supported on this GPU" ); |
7488 | return false; |
7489 | } |
7490 | } else { |
7491 | if (!isValidMsgId(MsgId: Msg.Val, STI: getSTI())) { |
7492 | Error(L: Msg.Loc, Msg: "invalid message id" ); |
7493 | return false; |
7494 | } |
7495 | } |
7496 | if (Strict && (msgRequiresOp(MsgId: Msg.Val, STI: getSTI()) != Op.IsDefined)) { |
7497 | if (Op.IsDefined) { |
7498 | Error(L: Op.Loc, Msg: "message does not support operations" ); |
7499 | } else { |
7500 | Error(L: Msg.Loc, Msg: "missing message operation" ); |
7501 | } |
7502 | return false; |
7503 | } |
7504 | if (!isValidMsgOp(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI(), Strict)) { |
7505 | if (Op.Val == OPR_ID_UNSUPPORTED) |
7506 | Error(L: Op.Loc, Msg: "specified operation id is not supported on this GPU" ); |
7507 | else |
7508 | Error(L: Op.Loc, Msg: "invalid operation id" ); |
7509 | return false; |
7510 | } |
7511 | if (Strict && !msgSupportsStream(MsgId: Msg.Val, OpId: Op.Val, STI: getSTI()) && |
7512 | Stream.IsDefined) { |
7513 | Error(L: Stream.Loc, Msg: "message operation does not support streams" ); |
7514 | return false; |
7515 | } |
7516 | if (!isValidMsgStream(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val, STI: getSTI(), Strict)) { |
7517 | Error(L: Stream.Loc, Msg: "invalid message stream id" ); |
7518 | return false; |
7519 | } |
7520 | return true; |
7521 | } |
7522 | |
7523 | ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) { |
7524 | using namespace llvm::AMDGPU::SendMsg; |
7525 | |
7526 | int64_t ImmVal = 0; |
7527 | SMLoc Loc = getLoc(); |
7528 | |
7529 | if (trySkipId(Id: "sendmsg" , Kind: AsmToken::LParen)) { |
7530 | OperandInfoTy Msg(OPR_ID_UNKNOWN); |
7531 | OperandInfoTy Op(OP_NONE_); |
7532 | OperandInfoTy Stream(STREAM_ID_NONE_); |
7533 | if (parseSendMsgBody(Msg, Op, Stream) && |
7534 | validateSendMsg(Msg, Op, Stream)) { |
7535 | ImmVal = encodeMsg(MsgId: Msg.Val, OpId: Op.Val, StreamId: Stream.Val); |
7536 | } else { |
7537 | return ParseStatus::Failure; |
7538 | } |
7539 | } else if (parseExpr(Imm&: ImmVal, Expected: "a sendmsg macro" )) { |
7540 | if (ImmVal < 0 || !isUInt<16>(x: ImmVal)) |
7541 | return Error(L: Loc, Msg: "invalid immediate: only 16-bit values are legal" ); |
7542 | } else { |
7543 | return ParseStatus::Failure; |
7544 | } |
7545 | |
7546 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: ImmVal, Loc, Type: AMDGPUOperand::ImmTySendMsg)); |
7547 | return ParseStatus::Success; |
7548 | } |
7549 | |
7550 | bool AMDGPUOperand::isSendMsg() const { |
7551 | return isImmTy(ImmT: ImmTySendMsg); |
7552 | } |
7553 | |
7554 | //===----------------------------------------------------------------------===// |
7555 | // v_interp |
7556 | //===----------------------------------------------------------------------===// |
7557 | |
7558 | ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { |
7559 | StringRef Str; |
7560 | SMLoc S = getLoc(); |
7561 | |
7562 | if (!parseId(Val&: Str)) |
7563 | return ParseStatus::NoMatch; |
7564 | |
7565 | int Slot = StringSwitch<int>(Str) |
7566 | .Case(S: "p10" , Value: 0) |
7567 | .Case(S: "p20" , Value: 1) |
7568 | .Case(S: "p0" , Value: 2) |
7569 | .Default(Value: -1); |
7570 | |
7571 | if (Slot == -1) |
7572 | return Error(L: S, Msg: "invalid interpolation slot" ); |
7573 | |
7574 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Slot, Loc: S, |
7575 | Type: AMDGPUOperand::ImmTyInterpSlot)); |
7576 | return ParseStatus::Success; |
7577 | } |
7578 | |
7579 | ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { |
7580 | StringRef Str; |
7581 | SMLoc S = getLoc(); |
7582 | |
7583 | if (!parseId(Val&: Str)) |
7584 | return ParseStatus::NoMatch; |
7585 | |
7586 | if (!Str.starts_with(Prefix: "attr" )) |
7587 | return Error(L: S, Msg: "invalid interpolation attribute" ); |
7588 | |
7589 | StringRef Chan = Str.take_back(N: 2); |
7590 | int AttrChan = StringSwitch<int>(Chan) |
7591 | .Case(S: ".x" , Value: 0) |
7592 | .Case(S: ".y" , Value: 1) |
7593 | .Case(S: ".z" , Value: 2) |
7594 | .Case(S: ".w" , Value: 3) |
7595 | .Default(Value: -1); |
7596 | if (AttrChan == -1) |
7597 | return Error(L: S, Msg: "invalid or missing interpolation attribute channel" ); |
7598 | |
7599 | Str = Str.drop_back(N: 2).drop_front(N: 4); |
7600 | |
7601 | uint8_t Attr; |
7602 | if (Str.getAsInteger(Radix: 10, Result&: Attr)) |
7603 | return Error(L: S, Msg: "invalid or missing interpolation attribute number" ); |
7604 | |
7605 | if (Attr > 32) |
7606 | return Error(L: S, Msg: "out of bounds interpolation attribute number" ); |
7607 | |
7608 | SMLoc SChan = SMLoc::getFromPointer(Ptr: Chan.data()); |
7609 | |
7610 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Attr, Loc: S, |
7611 | Type: AMDGPUOperand::ImmTyInterpAttr)); |
7612 | Operands.push_back(Elt: AMDGPUOperand::CreateImm( |
7613 | AsmParser: this, Val: AttrChan, Loc: SChan, Type: AMDGPUOperand::ImmTyInterpAttrChan)); |
7614 | return ParseStatus::Success; |
7615 | } |
7616 | |
7617 | //===----------------------------------------------------------------------===// |
7618 | // exp |
7619 | //===----------------------------------------------------------------------===// |
7620 | |
7621 | ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { |
7622 | using namespace llvm::AMDGPU::Exp; |
7623 | |
7624 | StringRef Str; |
7625 | SMLoc S = getLoc(); |
7626 | |
7627 | if (!parseId(Val&: Str)) |
7628 | return ParseStatus::NoMatch; |
7629 | |
7630 | unsigned Id = getTgtId(Name: Str); |
7631 | if (Id == ET_INVALID || !isSupportedTgtId(Id, STI: getSTI())) |
7632 | return Error(L: S, Msg: (Id == ET_INVALID) |
7633 | ? "invalid exp target" |
7634 | : "exp target is not supported on this GPU" ); |
7635 | |
7636 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Id, Loc: S, |
7637 | Type: AMDGPUOperand::ImmTyExpTgt)); |
7638 | return ParseStatus::Success; |
7639 | } |
7640 | |
7641 | //===----------------------------------------------------------------------===// |
7642 | // parser helpers |
7643 | //===----------------------------------------------------------------------===// |
7644 | |
7645 | bool |
7646 | AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { |
7647 | return Token.is(K: AsmToken::Identifier) && Token.getString() == Id; |
7648 | } |
7649 | |
7650 | bool |
7651 | AMDGPUAsmParser::isId(const StringRef Id) const { |
7652 | return isId(Token: getToken(), Id); |
7653 | } |
7654 | |
7655 | bool |
7656 | AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { |
7657 | return getTokenKind() == Kind; |
7658 | } |
7659 | |
7660 | StringRef AMDGPUAsmParser::getId() const { |
7661 | return isToken(Kind: AsmToken::Identifier) ? getTokenStr() : StringRef(); |
7662 | } |
7663 | |
7664 | bool |
7665 | AMDGPUAsmParser::trySkipId(const StringRef Id) { |
7666 | if (isId(Id)) { |
7667 | lex(); |
7668 | return true; |
7669 | } |
7670 | return false; |
7671 | } |
7672 | |
7673 | bool |
7674 | AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { |
7675 | if (isToken(Kind: AsmToken::Identifier)) { |
7676 | StringRef Tok = getTokenStr(); |
7677 | if (Tok.starts_with(Prefix: Pref) && Tok.drop_front(N: Pref.size()) == Id) { |
7678 | lex(); |
7679 | return true; |
7680 | } |
7681 | } |
7682 | return false; |
7683 | } |
7684 | |
7685 | bool |
7686 | AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { |
7687 | if (isId(Id) && peekToken().is(K: Kind)) { |
7688 | lex(); |
7689 | lex(); |
7690 | return true; |
7691 | } |
7692 | return false; |
7693 | } |
7694 | |
7695 | bool |
7696 | AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { |
7697 | if (isToken(Kind)) { |
7698 | lex(); |
7699 | return true; |
7700 | } |
7701 | return false; |
7702 | } |
7703 | |
7704 | bool |
7705 | AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, |
7706 | const StringRef ErrMsg) { |
7707 | if (!trySkipToken(Kind)) { |
7708 | Error(L: getLoc(), Msg: ErrMsg); |
7709 | return false; |
7710 | } |
7711 | return true; |
7712 | } |
7713 | |
7714 | bool |
7715 | AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { |
7716 | SMLoc S = getLoc(); |
7717 | |
7718 | const MCExpr *Expr; |
7719 | if (Parser.parseExpression(Res&: Expr)) |
7720 | return false; |
7721 | |
7722 | if (Expr->evaluateAsAbsolute(Res&: Imm)) |
7723 | return true; |
7724 | |
7725 | if (Expected.empty()) { |
7726 | Error(L: S, Msg: "expected absolute expression" ); |
7727 | } else { |
7728 | Error(L: S, Msg: Twine("expected " , Expected) + |
7729 | Twine(" or an absolute expression" )); |
7730 | } |
7731 | return false; |
7732 | } |
7733 | |
7734 | bool |
7735 | AMDGPUAsmParser::parseExpr(OperandVector &Operands) { |
7736 | SMLoc S = getLoc(); |
7737 | |
7738 | const MCExpr *Expr; |
7739 | if (Parser.parseExpression(Res&: Expr)) |
7740 | return false; |
7741 | |
7742 | int64_t IntVal; |
7743 | if (Expr->evaluateAsAbsolute(Res&: IntVal)) { |
7744 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: IntVal, Loc: S)); |
7745 | } else { |
7746 | Operands.push_back(Elt: AMDGPUOperand::CreateExpr(AsmParser: this, Expr, S)); |
7747 | } |
7748 | return true; |
7749 | } |
7750 | |
7751 | bool |
7752 | AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { |
7753 | if (isToken(Kind: AsmToken::String)) { |
7754 | Val = getToken().getStringContents(); |
7755 | lex(); |
7756 | return true; |
7757 | } |
7758 | Error(L: getLoc(), Msg: ErrMsg); |
7759 | return false; |
7760 | } |
7761 | |
7762 | bool |
7763 | AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { |
7764 | if (isToken(Kind: AsmToken::Identifier)) { |
7765 | Val = getTokenStr(); |
7766 | lex(); |
7767 | return true; |
7768 | } |
7769 | if (!ErrMsg.empty()) |
7770 | Error(L: getLoc(), Msg: ErrMsg); |
7771 | return false; |
7772 | } |
7773 | |
7774 | AsmToken |
7775 | AMDGPUAsmParser::getToken() const { |
7776 | return Parser.getTok(); |
7777 | } |
7778 | |
7779 | AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { |
7780 | return isToken(Kind: AsmToken::EndOfStatement) |
7781 | ? getToken() |
7782 | : getLexer().peekTok(ShouldSkipSpace); |
7783 | } |
7784 | |
7785 | void |
7786 | AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { |
7787 | auto TokCount = getLexer().peekTokens(Buf: Tokens); |
7788 | |
7789 | for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) |
7790 | Tokens[Idx] = AsmToken(AsmToken::Error, "" ); |
7791 | } |
7792 | |
7793 | AsmToken::TokenKind |
7794 | AMDGPUAsmParser::getTokenKind() const { |
7795 | return getLexer().getKind(); |
7796 | } |
7797 | |
7798 | SMLoc |
7799 | AMDGPUAsmParser::getLoc() const { |
7800 | return getToken().getLoc(); |
7801 | } |
7802 | |
7803 | StringRef |
7804 | AMDGPUAsmParser::getTokenStr() const { |
7805 | return getToken().getString(); |
7806 | } |
7807 | |
7808 | void |
7809 | AMDGPUAsmParser::lex() { |
7810 | Parser.Lex(); |
7811 | } |
7812 | |
7813 | SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const { |
7814 | return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); |
7815 | } |
7816 | |
7817 | SMLoc |
7818 | AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, |
7819 | const OperandVector &Operands) const { |
7820 | for (unsigned i = Operands.size() - 1; i > 0; --i) { |
7821 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
7822 | if (Test(Op)) |
7823 | return Op.getStartLoc(); |
7824 | } |
7825 | return getInstLoc(Operands); |
7826 | } |
7827 | |
7828 | SMLoc |
7829 | AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, |
7830 | const OperandVector &Operands) const { |
7831 | auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(ImmT: Type); }; |
7832 | return getOperandLoc(Test, Operands); |
7833 | } |
7834 | |
7835 | SMLoc |
7836 | AMDGPUAsmParser::getRegLoc(unsigned Reg, |
7837 | const OperandVector &Operands) const { |
7838 | auto Test = [=](const AMDGPUOperand& Op) { |
7839 | return Op.isRegKind() && Op.getReg() == Reg; |
7840 | }; |
7841 | return getOperandLoc(Test, Operands); |
7842 | } |
7843 | |
7844 | SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands, |
7845 | bool SearchMandatoryLiterals) const { |
7846 | auto Test = [](const AMDGPUOperand& Op) { |
7847 | return Op.IsImmKindLiteral() || Op.isExpr(); |
7848 | }; |
7849 | SMLoc Loc = getOperandLoc(Test, Operands); |
7850 | if (SearchMandatoryLiterals && Loc == getInstLoc(Operands)) |
7851 | Loc = getMandatoryLitLoc(Operands); |
7852 | return Loc; |
7853 | } |
7854 | |
7855 | SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const { |
7856 | auto Test = [](const AMDGPUOperand &Op) { |
7857 | return Op.IsImmKindMandatoryLiteral(); |
7858 | }; |
7859 | return getOperandLoc(Test, Operands); |
7860 | } |
7861 | |
7862 | SMLoc |
7863 | AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { |
7864 | auto Test = [](const AMDGPUOperand& Op) { |
7865 | return Op.isImmKindConst(); |
7866 | }; |
7867 | return getOperandLoc(Test, Operands); |
7868 | } |
7869 | |
7870 | ParseStatus |
7871 | AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) { |
7872 | if (!trySkipToken(Kind: AsmToken::LCurly)) |
7873 | return ParseStatus::NoMatch; |
7874 | |
7875 | bool First = true; |
7876 | while (!trySkipToken(Kind: AsmToken::RCurly)) { |
7877 | if (!First && |
7878 | !skipToken(Kind: AsmToken::Comma, ErrMsg: "comma or closing brace expected" )) |
7879 | return ParseStatus::Failure; |
7880 | |
7881 | StringRef Id = getTokenStr(); |
7882 | SMLoc IdLoc = getLoc(); |
7883 | if (!skipToken(Kind: AsmToken::Identifier, ErrMsg: "field name expected" ) || |
7884 | !skipToken(Kind: AsmToken::Colon, ErrMsg: "colon expected" )) |
7885 | return ParseStatus::Failure; |
7886 | |
7887 | auto I = |
7888 | find_if(Range&: Fields, P: [Id](StructuredOpField *F) { return F->Id == Id; }); |
7889 | if (I == Fields.end()) |
7890 | return Error(L: IdLoc, Msg: "unknown field" ); |
7891 | if ((*I)->IsDefined) |
7892 | return Error(L: IdLoc, Msg: "duplicate field" ); |
7893 | |
7894 | // TODO: Support symbolic values. |
7895 | (*I)->Loc = getLoc(); |
7896 | if (!parseExpr(Imm&: (*I)->Val)) |
7897 | return ParseStatus::Failure; |
7898 | (*I)->IsDefined = true; |
7899 | |
7900 | First = false; |
7901 | } |
7902 | return ParseStatus::Success; |
7903 | } |
7904 | |
7905 | bool AMDGPUAsmParser::validateStructuredOpFields( |
7906 | ArrayRef<const StructuredOpField *> Fields) { |
7907 | return all_of(Range&: Fields, P: [this](const StructuredOpField *F) { |
7908 | return F->validate(Parser&: *this); |
7909 | }); |
7910 | } |
7911 | |
7912 | //===----------------------------------------------------------------------===// |
7913 | // swizzle |
7914 | //===----------------------------------------------------------------------===// |
7915 | |
7916 | LLVM_READNONE |
7917 | static unsigned |
7918 | encodeBitmaskPerm(const unsigned AndMask, |
7919 | const unsigned OrMask, |
7920 | const unsigned XorMask) { |
7921 | using namespace llvm::AMDGPU::Swizzle; |
7922 | |
7923 | return BITMASK_PERM_ENC | |
7924 | (AndMask << BITMASK_AND_SHIFT) | |
7925 | (OrMask << BITMASK_OR_SHIFT) | |
7926 | (XorMask << BITMASK_XOR_SHIFT); |
7927 | } |
7928 | |
7929 | bool |
7930 | AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, |
7931 | const unsigned MinVal, |
7932 | const unsigned MaxVal, |
7933 | const StringRef ErrMsg, |
7934 | SMLoc &Loc) { |
7935 | if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma" )) { |
7936 | return false; |
7937 | } |
7938 | Loc = getLoc(); |
7939 | if (!parseExpr(Imm&: Op)) { |
7940 | return false; |
7941 | } |
7942 | if (Op < MinVal || Op > MaxVal) { |
7943 | Error(L: Loc, Msg: ErrMsg); |
7944 | return false; |
7945 | } |
7946 | |
7947 | return true; |
7948 | } |
7949 | |
7950 | bool |
7951 | AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, |
7952 | const unsigned MinVal, |
7953 | const unsigned MaxVal, |
7954 | const StringRef ErrMsg) { |
7955 | SMLoc Loc; |
7956 | for (unsigned i = 0; i < OpNum; ++i) { |
7957 | if (!parseSwizzleOperand(Op&: Op[i], MinVal, MaxVal, ErrMsg, Loc)) |
7958 | return false; |
7959 | } |
7960 | |
7961 | return true; |
7962 | } |
7963 | |
7964 | bool |
7965 | AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { |
7966 | using namespace llvm::AMDGPU::Swizzle; |
7967 | |
7968 | int64_t Lane[LANE_NUM]; |
7969 | if (parseSwizzleOperands(OpNum: LANE_NUM, Op: Lane, MinVal: 0, MaxVal: LANE_MAX, |
7970 | ErrMsg: "expected a 2-bit lane id" )) { |
7971 | Imm = QUAD_PERM_ENC; |
7972 | for (unsigned I = 0; I < LANE_NUM; ++I) { |
7973 | Imm |= Lane[I] << (LANE_SHIFT * I); |
7974 | } |
7975 | return true; |
7976 | } |
7977 | return false; |
7978 | } |
7979 | |
7980 | bool |
7981 | AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { |
7982 | using namespace llvm::AMDGPU::Swizzle; |
7983 | |
7984 | SMLoc Loc; |
7985 | int64_t GroupSize; |
7986 | int64_t LaneIdx; |
7987 | |
7988 | if (!parseSwizzleOperand(Op&: GroupSize, |
7989 | MinVal: 2, MaxVal: 32, |
7990 | ErrMsg: "group size must be in the interval [2,32]" , |
7991 | Loc)) { |
7992 | return false; |
7993 | } |
7994 | if (!isPowerOf2_64(Value: GroupSize)) { |
7995 | Error(L: Loc, Msg: "group size must be a power of two" ); |
7996 | return false; |
7997 | } |
7998 | if (parseSwizzleOperand(Op&: LaneIdx, |
7999 | MinVal: 0, MaxVal: GroupSize - 1, |
8000 | ErrMsg: "lane id must be in the interval [0,group size - 1]" , |
8001 | Loc)) { |
8002 | Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX - GroupSize + 1, OrMask: LaneIdx, XorMask: 0); |
8003 | return true; |
8004 | } |
8005 | return false; |
8006 | } |
8007 | |
8008 | bool |
8009 | AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { |
8010 | using namespace llvm::AMDGPU::Swizzle; |
8011 | |
8012 | SMLoc Loc; |
8013 | int64_t GroupSize; |
8014 | |
8015 | if (!parseSwizzleOperand(Op&: GroupSize, |
8016 | MinVal: 2, MaxVal: 32, |
8017 | ErrMsg: "group size must be in the interval [2,32]" , |
8018 | Loc)) { |
8019 | return false; |
8020 | } |
8021 | if (!isPowerOf2_64(Value: GroupSize)) { |
8022 | Error(L: Loc, Msg: "group size must be a power of two" ); |
8023 | return false; |
8024 | } |
8025 | |
8026 | Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: 0, XorMask: GroupSize - 1); |
8027 | return true; |
8028 | } |
8029 | |
8030 | bool |
8031 | AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { |
8032 | using namespace llvm::AMDGPU::Swizzle; |
8033 | |
8034 | SMLoc Loc; |
8035 | int64_t GroupSize; |
8036 | |
8037 | if (!parseSwizzleOperand(Op&: GroupSize, |
8038 | MinVal: 1, MaxVal: 16, |
8039 | ErrMsg: "group size must be in the interval [1,16]" , |
8040 | Loc)) { |
8041 | return false; |
8042 | } |
8043 | if (!isPowerOf2_64(Value: GroupSize)) { |
8044 | Error(L: Loc, Msg: "group size must be a power of two" ); |
8045 | return false; |
8046 | } |
8047 | |
8048 | Imm = encodeBitmaskPerm(AndMask: BITMASK_MAX, OrMask: 0, XorMask: GroupSize); |
8049 | return true; |
8050 | } |
8051 | |
8052 | bool |
8053 | AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { |
8054 | using namespace llvm::AMDGPU::Swizzle; |
8055 | |
8056 | if (!skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma" )) { |
8057 | return false; |
8058 | } |
8059 | |
8060 | StringRef Ctl; |
8061 | SMLoc StrLoc = getLoc(); |
8062 | if (!parseString(Val&: Ctl)) { |
8063 | return false; |
8064 | } |
8065 | if (Ctl.size() != BITMASK_WIDTH) { |
8066 | Error(L: StrLoc, Msg: "expected a 5-character mask" ); |
8067 | return false; |
8068 | } |
8069 | |
8070 | unsigned AndMask = 0; |
8071 | unsigned OrMask = 0; |
8072 | unsigned XorMask = 0; |
8073 | |
8074 | for (size_t i = 0; i < Ctl.size(); ++i) { |
8075 | unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); |
8076 | switch(Ctl[i]) { |
8077 | default: |
8078 | Error(L: StrLoc, Msg: "invalid mask" ); |
8079 | return false; |
8080 | case '0': |
8081 | break; |
8082 | case '1': |
8083 | OrMask |= Mask; |
8084 | break; |
8085 | case 'p': |
8086 | AndMask |= Mask; |
8087 | break; |
8088 | case 'i': |
8089 | AndMask |= Mask; |
8090 | XorMask |= Mask; |
8091 | break; |
8092 | } |
8093 | } |
8094 | |
8095 | Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); |
8096 | return true; |
8097 | } |
8098 | |
8099 | bool |
8100 | AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { |
8101 | |
8102 | SMLoc OffsetLoc = getLoc(); |
8103 | |
8104 | if (!parseExpr(Imm, Expected: "a swizzle macro" )) { |
8105 | return false; |
8106 | } |
8107 | if (!isUInt<16>(x: Imm)) { |
8108 | Error(L: OffsetLoc, Msg: "expected a 16-bit offset" ); |
8109 | return false; |
8110 | } |
8111 | return true; |
8112 | } |
8113 | |
8114 | bool |
8115 | AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { |
8116 | using namespace llvm::AMDGPU::Swizzle; |
8117 | |
8118 | if (skipToken(Kind: AsmToken::LParen, ErrMsg: "expected a left parentheses" )) { |
8119 | |
8120 | SMLoc ModeLoc = getLoc(); |
8121 | bool Ok = false; |
8122 | |
8123 | if (trySkipId(Id: IdSymbolic[ID_QUAD_PERM])) { |
8124 | Ok = parseSwizzleQuadPerm(Imm); |
8125 | } else if (trySkipId(Id: IdSymbolic[ID_BITMASK_PERM])) { |
8126 | Ok = parseSwizzleBitmaskPerm(Imm); |
8127 | } else if (trySkipId(Id: IdSymbolic[ID_BROADCAST])) { |
8128 | Ok = parseSwizzleBroadcast(Imm); |
8129 | } else if (trySkipId(Id: IdSymbolic[ID_SWAP])) { |
8130 | Ok = parseSwizzleSwap(Imm); |
8131 | } else if (trySkipId(Id: IdSymbolic[ID_REVERSE])) { |
8132 | Ok = parseSwizzleReverse(Imm); |
8133 | } else { |
8134 | Error(L: ModeLoc, Msg: "expected a swizzle mode" ); |
8135 | } |
8136 | |
8137 | return Ok && skipToken(Kind: AsmToken::RParen, ErrMsg: "expected a closing parentheses" ); |
8138 | } |
8139 | |
8140 | return false; |
8141 | } |
8142 | |
8143 | ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) { |
8144 | SMLoc S = getLoc(); |
8145 | int64_t Imm = 0; |
8146 | |
8147 | if (trySkipId(Id: "offset" )) { |
8148 | |
8149 | bool Ok = false; |
8150 | if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon" )) { |
8151 | if (trySkipId(Id: "swizzle" )) { |
8152 | Ok = parseSwizzleMacro(Imm); |
8153 | } else { |
8154 | Ok = parseSwizzleOffset(Imm); |
8155 | } |
8156 | } |
8157 | |
8158 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTySwizzle)); |
8159 | |
8160 | return Ok ? ParseStatus::Success : ParseStatus::Failure; |
8161 | } |
8162 | return ParseStatus::NoMatch; |
8163 | } |
8164 | |
8165 | bool |
8166 | AMDGPUOperand::isSwizzle() const { |
8167 | return isImmTy(ImmT: ImmTySwizzle); |
8168 | } |
8169 | |
8170 | //===----------------------------------------------------------------------===// |
8171 | // VGPR Index Mode |
8172 | //===----------------------------------------------------------------------===// |
8173 | |
8174 | int64_t AMDGPUAsmParser::parseGPRIdxMacro() { |
8175 | |
8176 | using namespace llvm::AMDGPU::VGPRIndexMode; |
8177 | |
8178 | if (trySkipToken(Kind: AsmToken::RParen)) { |
8179 | return OFF; |
8180 | } |
8181 | |
8182 | int64_t Imm = 0; |
8183 | |
8184 | while (true) { |
8185 | unsigned Mode = 0; |
8186 | SMLoc S = getLoc(); |
8187 | |
8188 | for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { |
8189 | if (trySkipId(Id: IdSymbolic[ModeId])) { |
8190 | Mode = 1 << ModeId; |
8191 | break; |
8192 | } |
8193 | } |
8194 | |
8195 | if (Mode == 0) { |
8196 | Error(L: S, Msg: (Imm == 0)? |
8197 | "expected a VGPR index mode or a closing parenthesis" : |
8198 | "expected a VGPR index mode" ); |
8199 | return UNDEF; |
8200 | } |
8201 | |
8202 | if (Imm & Mode) { |
8203 | Error(L: S, Msg: "duplicate VGPR index mode" ); |
8204 | return UNDEF; |
8205 | } |
8206 | Imm |= Mode; |
8207 | |
8208 | if (trySkipToken(Kind: AsmToken::RParen)) |
8209 | break; |
8210 | if (!skipToken(Kind: AsmToken::Comma, |
8211 | ErrMsg: "expected a comma or a closing parenthesis" )) |
8212 | return UNDEF; |
8213 | } |
8214 | |
8215 | return Imm; |
8216 | } |
8217 | |
8218 | ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { |
8219 | |
8220 | using namespace llvm::AMDGPU::VGPRIndexMode; |
8221 | |
8222 | int64_t Imm = 0; |
8223 | SMLoc S = getLoc(); |
8224 | |
8225 | if (trySkipId(Id: "gpr_idx" , Kind: AsmToken::LParen)) { |
8226 | Imm = parseGPRIdxMacro(); |
8227 | if (Imm == UNDEF) |
8228 | return ParseStatus::Failure; |
8229 | } else { |
8230 | if (getParser().parseAbsoluteExpression(Res&: Imm)) |
8231 | return ParseStatus::Failure; |
8232 | if (Imm < 0 || !isUInt<4>(x: Imm)) |
8233 | return Error(L: S, Msg: "invalid immediate: only 4-bit values are legal" ); |
8234 | } |
8235 | |
8236 | Operands.push_back( |
8237 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyGprIdxMode)); |
8238 | return ParseStatus::Success; |
8239 | } |
8240 | |
8241 | bool AMDGPUOperand::isGPRIdxMode() const { |
8242 | return isImmTy(ImmT: ImmTyGprIdxMode); |
8243 | } |
8244 | |
8245 | //===----------------------------------------------------------------------===// |
8246 | // sopp branch targets |
8247 | //===----------------------------------------------------------------------===// |
8248 | |
8249 | ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) { |
8250 | |
8251 | // Make sure we are not parsing something |
8252 | // that looks like a label or an expression but is not. |
8253 | // This will improve error messages. |
8254 | if (isRegister() || isModifier()) |
8255 | return ParseStatus::NoMatch; |
8256 | |
8257 | if (!parseExpr(Operands)) |
8258 | return ParseStatus::Failure; |
8259 | |
8260 | AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); |
8261 | assert(Opr.isImm() || Opr.isExpr()); |
8262 | SMLoc Loc = Opr.getStartLoc(); |
8263 | |
8264 | // Currently we do not support arbitrary expressions as branch targets. |
8265 | // Only labels and absolute expressions are accepted. |
8266 | if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { |
8267 | Error(L: Loc, Msg: "expected an absolute expression or a label" ); |
8268 | } else if (Opr.isImm() && !Opr.isS16Imm()) { |
8269 | Error(L: Loc, Msg: "expected a 16-bit signed jump offset" ); |
8270 | } |
8271 | |
8272 | return ParseStatus::Success; |
8273 | } |
8274 | |
8275 | //===----------------------------------------------------------------------===// |
8276 | // Boolean holding registers |
8277 | //===----------------------------------------------------------------------===// |
8278 | |
8279 | ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { |
8280 | return parseReg(Operands); |
8281 | } |
8282 | |
8283 | //===----------------------------------------------------------------------===// |
8284 | // mubuf |
8285 | //===----------------------------------------------------------------------===// |
8286 | |
8287 | void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, |
8288 | const OperandVector &Operands, |
8289 | bool IsAtomic) { |
8290 | OptionalImmIndexMap OptionalIdx; |
8291 | unsigned FirstOperandIdx = 1; |
8292 | bool IsAtomicReturn = false; |
8293 | |
8294 | if (IsAtomic) { |
8295 | IsAtomicReturn = MII.get(Opcode: Inst.getOpcode()).TSFlags & |
8296 | SIInstrFlags::IsAtomicRet; |
8297 | } |
8298 | |
8299 | for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { |
8300 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
8301 | |
8302 | // Add the register arguments |
8303 | if (Op.isReg()) { |
8304 | Op.addRegOperands(Inst, N: 1); |
8305 | // Insert a tied src for atomic return dst. |
8306 | // This cannot be postponed as subsequent calls to |
8307 | // addImmOperands rely on correct number of MC operands. |
8308 | if (IsAtomicReturn && i == FirstOperandIdx) |
8309 | Op.addRegOperands(Inst, N: 1); |
8310 | continue; |
8311 | } |
8312 | |
8313 | // Handle the case where soffset is an immediate |
8314 | if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { |
8315 | Op.addImmOperands(Inst, N: 1); |
8316 | continue; |
8317 | } |
8318 | |
8319 | // Handle tokens like 'offen' which are sometimes hard-coded into the |
8320 | // asm string. There are no MCInst operands for these. |
8321 | if (Op.isToken()) { |
8322 | continue; |
8323 | } |
8324 | assert(Op.isImm()); |
8325 | |
8326 | // Handle optional arguments |
8327 | OptionalIdx[Op.getImmTy()] = i; |
8328 | } |
8329 | |
8330 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOffset); |
8331 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyCPol, Default: 0); |
8332 | } |
8333 | |
8334 | //===----------------------------------------------------------------------===// |
8335 | // smrd |
8336 | //===----------------------------------------------------------------------===// |
8337 | |
8338 | bool AMDGPUOperand::isSMRDOffset8() const { |
8339 | return isImmLiteral() && isUInt<8>(x: getImm()); |
8340 | } |
8341 | |
8342 | bool AMDGPUOperand::isSMEMOffset() const { |
8343 | // Offset range is checked later by validator. |
8344 | return isImmLiteral(); |
8345 | } |
8346 | |
8347 | bool AMDGPUOperand::isSMRDLiteralOffset() const { |
8348 | // 32-bit literals are only supported on CI and we only want to use them |
8349 | // when the offset is > 8-bits. |
8350 | return isImmLiteral() && !isUInt<8>(x: getImm()) && isUInt<32>(x: getImm()); |
8351 | } |
8352 | |
8353 | //===----------------------------------------------------------------------===// |
8354 | // vop3 |
8355 | //===----------------------------------------------------------------------===// |
8356 | |
8357 | static bool ConvertOmodMul(int64_t &Mul) { |
8358 | if (Mul != 1 && Mul != 2 && Mul != 4) |
8359 | return false; |
8360 | |
8361 | Mul >>= 1; |
8362 | return true; |
8363 | } |
8364 | |
8365 | static bool ConvertOmodDiv(int64_t &Div) { |
8366 | if (Div == 1) { |
8367 | Div = 0; |
8368 | return true; |
8369 | } |
8370 | |
8371 | if (Div == 2) { |
8372 | Div = 3; |
8373 | return true; |
8374 | } |
8375 | |
8376 | return false; |
8377 | } |
8378 | |
8379 | // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. |
8380 | // This is intentional and ensures compatibility with sp3. |
8381 | // See bug 35397 for details. |
8382 | bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) { |
8383 | if (BoundCtrl == 0 || BoundCtrl == 1) { |
8384 | if (!isGFX11Plus()) |
8385 | BoundCtrl = 1; |
8386 | return true; |
8387 | } |
8388 | return false; |
8389 | } |
8390 | |
8391 | void AMDGPUAsmParser::onBeginOfFile() { |
8392 | if (!getParser().getStreamer().getTargetStreamer() || |
8393 | getSTI().getTargetTriple().getArch() == Triple::r600) |
8394 | return; |
8395 | |
8396 | if (!getTargetStreamer().getTargetID()) |
8397 | getTargetStreamer().initializeTargetID(STI: getSTI(), |
8398 | FeatureString: getSTI().getFeatureString()); |
8399 | |
8400 | if (isHsaAbi(STI: getSTI())) |
8401 | getTargetStreamer().EmitDirectiveAMDGCNTarget(); |
8402 | } |
8403 | |
8404 | /// Parse AMDGPU specific expressions. |
8405 | /// |
8406 | /// expr ::= or(expr, ...) | |
8407 | /// max(expr, ...) |
8408 | /// |
8409 | bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { |
8410 | using AGVK = AMDGPUMCExpr::VariantKind; |
8411 | |
8412 | if (isToken(Kind: AsmToken::Identifier)) { |
8413 | StringRef TokenId = getTokenStr(); |
8414 | AGVK VK = StringSwitch<AGVK>(TokenId) |
8415 | .Case(S: "max" , Value: AGVK::AGVK_Max) |
8416 | .Case(S: "or" , Value: AGVK::AGVK_Or) |
8417 | .Case(S: "extrasgprs" , Value: AGVK::AGVK_ExtraSGPRs) |
8418 | .Case(S: "totalnumvgprs" , Value: AGVK::AGVK_TotalNumVGPRs) |
8419 | .Case(S: "alignto" , Value: AGVK::AGVK_AlignTo) |
8420 | .Case(S: "occupancy" , Value: AGVK::AGVK_Occupancy) |
8421 | .Default(Value: AGVK::AGVK_None); |
8422 | |
8423 | if (VK != AGVK::AGVK_None && peekToken().is(K: AsmToken::LParen)) { |
8424 | SmallVector<const MCExpr *, 4> Exprs; |
8425 | uint64_t CommaCount = 0; |
8426 | lex(); // Eat Arg ('or', 'max', 'occupancy', etc.) |
8427 | lex(); // Eat '(' |
8428 | while (true) { |
8429 | if (trySkipToken(Kind: AsmToken::RParen)) { |
8430 | if (Exprs.empty()) { |
8431 | Error(L: getToken().getLoc(), |
8432 | Msg: "empty " + Twine(TokenId) + " expression" ); |
8433 | return true; |
8434 | } |
8435 | if (CommaCount + 1 != Exprs.size()) { |
8436 | Error(L: getToken().getLoc(), |
8437 | Msg: "mismatch of commas in " + Twine(TokenId) + " expression" ); |
8438 | return true; |
8439 | } |
8440 | Res = AMDGPUMCExpr::create(Kind: VK, Args: Exprs, Ctx&: getContext()); |
8441 | return false; |
8442 | } |
8443 | const MCExpr *Expr; |
8444 | if (getParser().parseExpression(Res&: Expr, EndLoc)) |
8445 | return true; |
8446 | Exprs.push_back(Elt: Expr); |
8447 | bool LastTokenWasComma = trySkipToken(Kind: AsmToken::Comma); |
8448 | if (LastTokenWasComma) |
8449 | CommaCount++; |
8450 | if (!LastTokenWasComma && !isToken(Kind: AsmToken::RParen)) { |
8451 | Error(L: getToken().getLoc(), |
8452 | Msg: "unexpected token in " + Twine(TokenId) + " expression" ); |
8453 | return true; |
8454 | } |
8455 | } |
8456 | } |
8457 | } |
8458 | return getParser().parsePrimaryExpr(Res, EndLoc, TypeInfo: nullptr); |
8459 | } |
8460 | |
8461 | ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) { |
8462 | StringRef Name = getTokenStr(); |
8463 | if (Name == "mul" ) { |
8464 | return parseIntWithPrefix(Prefix: "mul" , Operands, |
8465 | ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodMul); |
8466 | } |
8467 | |
8468 | if (Name == "div" ) { |
8469 | return parseIntWithPrefix(Prefix: "div" , Operands, |
8470 | ImmTy: AMDGPUOperand::ImmTyOModSI, ConvertResult: ConvertOmodDiv); |
8471 | } |
8472 | |
8473 | return ParseStatus::NoMatch; |
8474 | } |
8475 | |
8476 | // Determines which bit DST_OP_SEL occupies in the op_sel operand according to |
8477 | // the number of src operands present, then copies that bit into src0_modifiers. |
8478 | static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) { |
8479 | int Opc = Inst.getOpcode(); |
8480 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel); |
8481 | if (OpSelIdx == -1) |
8482 | return; |
8483 | |
8484 | int SrcNum; |
8485 | const int Ops[] = { AMDGPU::OpName::src0, |
8486 | AMDGPU::OpName::src1, |
8487 | AMDGPU::OpName::src2 }; |
8488 | for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: Ops[SrcNum]); |
8489 | ++SrcNum) |
8490 | ; |
8491 | assert(SrcNum > 0); |
8492 | |
8493 | unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm(); |
8494 | |
8495 | int DstIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst); |
8496 | if (DstIdx == -1) |
8497 | return; |
8498 | |
8499 | const MCOperand &DstOp = Inst.getOperand(i: DstIdx); |
8500 | int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0_modifiers); |
8501 | uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm(); |
8502 | if (DstOp.isReg() && |
8503 | MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID).contains(Reg: DstOp.getReg())) { |
8504 | if (AMDGPU::isHi(Reg: DstOp.getReg(), MRI)) |
8505 | ModVal |= SISrcMods::DST_OP_SEL; |
8506 | } else { |
8507 | if ((OpSel & (1 << SrcNum)) != 0) |
8508 | ModVal |= SISrcMods::DST_OP_SEL; |
8509 | } |
8510 | Inst.getOperand(i: ModIdx).setImm(ModVal); |
8511 | } |
8512 | |
8513 | void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, |
8514 | const OperandVector &Operands) { |
8515 | cvtVOP3P(Inst, Operands); |
8516 | cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI()); |
8517 | } |
8518 | |
8519 | void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, |
8520 | OptionalImmIndexMap &OptionalIdx) { |
8521 | cvtVOP3P(Inst, Operands, OptionalIdx); |
8522 | cvtVOP3DstOpSelOnly(Inst, MRI: *getMRI()); |
8523 | } |
8524 | |
8525 | static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { |
8526 | return |
8527 | // 1. This operand is input modifiers |
8528 | Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS |
8529 | // 2. This is not last operand |
8530 | && Desc.NumOperands > (OpNum + 1) |
8531 | // 3. Next operand is register class |
8532 | && Desc.operands()[OpNum + 1].RegClass != -1 |
8533 | // 4. Next register is not tied to any other operand |
8534 | && Desc.getOperandConstraint(OpNum: OpNum + 1, |
8535 | Constraint: MCOI::OperandConstraint::TIED_TO) == -1; |
8536 | } |
8537 | |
8538 | void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) |
8539 | { |
8540 | OptionalImmIndexMap OptionalIdx; |
8541 | unsigned Opc = Inst.getOpcode(); |
8542 | |
8543 | unsigned I = 1; |
8544 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
8545 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { |
8546 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1); |
8547 | } |
8548 | |
8549 | for (unsigned E = Operands.size(); I != E; ++I) { |
8550 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); |
8551 | if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
8552 | Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2); |
8553 | } else if (Op.isInterpSlot() || Op.isInterpAttr() || |
8554 | Op.isInterpAttrChan()) { |
8555 | Inst.addOperand(Op: MCOperand::createImm(Val: Op.getImm())); |
8556 | } else if (Op.isImmModifier()) { |
8557 | OptionalIdx[Op.getImmTy()] = I; |
8558 | } else { |
8559 | llvm_unreachable("unhandled operand type" ); |
8560 | } |
8561 | } |
8562 | |
8563 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::high)) |
8564 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
8565 | ImmT: AMDGPUOperand::ImmTyHigh); |
8566 | |
8567 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp)) |
8568 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
8569 | ImmT: AMDGPUOperand::ImmTyClamp); |
8570 | |
8571 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod)) |
8572 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
8573 | ImmT: AMDGPUOperand::ImmTyOModSI); |
8574 | } |
8575 | |
8576 | void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) |
8577 | { |
8578 | OptionalImmIndexMap OptionalIdx; |
8579 | unsigned Opc = Inst.getOpcode(); |
8580 | |
8581 | unsigned I = 1; |
8582 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
8583 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { |
8584 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1); |
8585 | } |
8586 | |
8587 | for (unsigned E = Operands.size(); I != E; ++I) { |
8588 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); |
8589 | if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
8590 | Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2); |
8591 | } else if (Op.isImmModifier()) { |
8592 | OptionalIdx[Op.getImmTy()] = I; |
8593 | } else { |
8594 | llvm_unreachable("unhandled operand type" ); |
8595 | } |
8596 | } |
8597 | |
8598 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyClamp); |
8599 | |
8600 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel); |
8601 | if (OpSelIdx != -1) |
8602 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel); |
8603 | |
8604 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyWaitEXP); |
8605 | |
8606 | if (OpSelIdx == -1) |
8607 | return; |
8608 | |
8609 | const int Ops[] = { AMDGPU::OpName::src0, |
8610 | AMDGPU::OpName::src1, |
8611 | AMDGPU::OpName::src2 }; |
8612 | const int ModOps[] = { AMDGPU::OpName::src0_modifiers, |
8613 | AMDGPU::OpName::src1_modifiers, |
8614 | AMDGPU::OpName::src2_modifiers }; |
8615 | |
8616 | unsigned OpSel = Inst.getOperand(i: OpSelIdx).getImm(); |
8617 | |
8618 | for (int J = 0; J < 3; ++J) { |
8619 | int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: Ops[J]); |
8620 | if (OpIdx == -1) |
8621 | break; |
8622 | |
8623 | int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: ModOps[J]); |
8624 | uint32_t ModVal = Inst.getOperand(i: ModIdx).getImm(); |
8625 | |
8626 | if ((OpSel & (1 << J)) != 0) |
8627 | ModVal |= SISrcMods::OP_SEL_0; |
8628 | if (ModOps[J] == AMDGPU::OpName::src0_modifiers && |
8629 | (OpSel & (1 << 3)) != 0) |
8630 | ModVal |= SISrcMods::DST_OP_SEL; |
8631 | |
8632 | Inst.getOperand(i: ModIdx).setImm(ModVal); |
8633 | } |
8634 | } |
8635 | |
8636 | void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, |
8637 | OptionalImmIndexMap &OptionalIdx) { |
8638 | unsigned Opc = Inst.getOpcode(); |
8639 | |
8640 | unsigned I = 1; |
8641 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
8642 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { |
8643 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1); |
8644 | } |
8645 | |
8646 | for (unsigned E = Operands.size(); I != E; ++I) { |
8647 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); |
8648 | if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
8649 | Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2); |
8650 | } else if (Op.isImmModifier()) { |
8651 | OptionalIdx[Op.getImmTy()] = I; |
8652 | } else if (Op.isRegOrImm()) { |
8653 | Op.addRegOrImmOperands(Inst, N: 1); |
8654 | } else { |
8655 | llvm_unreachable("unhandled operand type" ); |
8656 | } |
8657 | } |
8658 | |
8659 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) { |
8660 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in)) |
8661 | Inst.addOperand(Op: Inst.getOperand(i: 0)); |
8662 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
8663 | ImmT: AMDGPUOperand::ImmTyByteSel); |
8664 | } |
8665 | |
8666 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp)) |
8667 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
8668 | ImmT: AMDGPUOperand::ImmTyClamp); |
8669 | |
8670 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod)) |
8671 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
8672 | ImmT: AMDGPUOperand::ImmTyOModSI); |
8673 | |
8674 | // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): |
8675 | // it has src2 register operand that is tied to dst operand |
8676 | // we don't allow modifiers for this operand in assembler so src2_modifiers |
8677 | // should be 0. |
8678 | if (isMAC(Opc)) { |
8679 | auto it = Inst.begin(); |
8680 | std::advance(i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src2_modifiers)); |
8681 | it = Inst.insert(I: it, Op: MCOperand::createImm(Val: 0)); // no modifiers for src2 |
8682 | ++it; |
8683 | // Copy the operand to ensure it's not invalidated when Inst grows. |
8684 | Inst.insert(I: it, Op: MCOperand(Inst.getOperand(i: 0))); // src2 = dst |
8685 | } |
8686 | } |
8687 | |
8688 | void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { |
8689 | OptionalImmIndexMap OptionalIdx; |
8690 | cvtVOP3(Inst, Operands, OptionalIdx); |
8691 | } |
8692 | |
8693 | void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, |
8694 | OptionalImmIndexMap &OptIdx) { |
8695 | const int Opc = Inst.getOpcode(); |
8696 | const MCInstrDesc &Desc = MII.get(Opcode: Opc); |
8697 | |
8698 | const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; |
8699 | |
8700 | if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || |
8701 | Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || |
8702 | Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 || |
8703 | Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) { |
8704 | Inst.addOperand(Op: MCOperand::createImm(Val: 0)); // Placeholder for src2_mods |
8705 | Inst.addOperand(Op: Inst.getOperand(i: 0)); |
8706 | } |
8707 | |
8708 | // Adding vdst_in operand is already covered for these DPP instructions in |
8709 | // cvtVOP3DPP. |
8710 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in) && |
8711 | !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 || |
8712 | Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 || |
8713 | Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 || |
8714 | Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12 || |
8715 | Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 || |
8716 | Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || |
8717 | Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || |
8718 | Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12)) { |
8719 | assert(!IsPacked); |
8720 | Inst.addOperand(Op: Inst.getOperand(i: 0)); |
8721 | } |
8722 | |
8723 | // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 |
8724 | // instruction, and then figure out where to actually put the modifiers |
8725 | |
8726 | int OpSelIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel); |
8727 | if (OpSelIdx != -1) { |
8728 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSel); |
8729 | } |
8730 | |
8731 | int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel_hi); |
8732 | if (OpSelHiIdx != -1) { |
8733 | int DefaultVal = IsPacked ? -1 : 0; |
8734 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyOpSelHi, |
8735 | Default: DefaultVal); |
8736 | } |
8737 | |
8738 | int NegLoIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_lo); |
8739 | if (NegLoIdx != -1) |
8740 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegLo); |
8741 | |
8742 | int NegHiIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_hi); |
8743 | if (NegHiIdx != -1) |
8744 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyNegHi); |
8745 | |
8746 | const int Ops[] = { AMDGPU::OpName::src0, |
8747 | AMDGPU::OpName::src1, |
8748 | AMDGPU::OpName::src2 }; |
8749 | const int ModOps[] = { AMDGPU::OpName::src0_modifiers, |
8750 | AMDGPU::OpName::src1_modifiers, |
8751 | AMDGPU::OpName::src2_modifiers }; |
8752 | |
8753 | unsigned OpSel = 0; |
8754 | unsigned OpSelHi = 0; |
8755 | unsigned NegLo = 0; |
8756 | unsigned NegHi = 0; |
8757 | |
8758 | if (OpSelIdx != -1) |
8759 | OpSel = Inst.getOperand(i: OpSelIdx).getImm(); |
8760 | |
8761 | if (OpSelHiIdx != -1) |
8762 | OpSelHi = Inst.getOperand(i: OpSelHiIdx).getImm(); |
8763 | |
8764 | if (NegLoIdx != -1) |
8765 | NegLo = Inst.getOperand(i: NegLoIdx).getImm(); |
8766 | |
8767 | if (NegHiIdx != -1) |
8768 | NegHi = Inst.getOperand(i: NegHiIdx).getImm(); |
8769 | |
8770 | for (int J = 0; J < 3; ++J) { |
8771 | int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: Ops[J]); |
8772 | if (OpIdx == -1) |
8773 | break; |
8774 | |
8775 | int ModIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: ModOps[J]); |
8776 | |
8777 | if (ModIdx == -1) |
8778 | continue; |
8779 | |
8780 | uint32_t ModVal = 0; |
8781 | |
8782 | const MCOperand &SrcOp = Inst.getOperand(i: OpIdx); |
8783 | if (SrcOp.isReg() && getMRI() |
8784 | ->getRegClass(i: AMDGPU::VGPR_16RegClassID) |
8785 | .contains(Reg: SrcOp.getReg())) { |
8786 | bool VGPRSuffixIsHi = AMDGPU::isHi(Reg: SrcOp.getReg(), MRI: *getMRI()); |
8787 | if (VGPRSuffixIsHi) |
8788 | ModVal |= SISrcMods::OP_SEL_0; |
8789 | } else { |
8790 | if ((OpSel & (1 << J)) != 0) |
8791 | ModVal |= SISrcMods::OP_SEL_0; |
8792 | } |
8793 | |
8794 | if ((OpSelHi & (1 << J)) != 0) |
8795 | ModVal |= SISrcMods::OP_SEL_1; |
8796 | |
8797 | if ((NegLo & (1 << J)) != 0) |
8798 | ModVal |= SISrcMods::NEG; |
8799 | |
8800 | if ((NegHi & (1 << J)) != 0) |
8801 | ModVal |= SISrcMods::NEG_HI; |
8802 | |
8803 | Inst.getOperand(i: ModIdx).setImm(Inst.getOperand(i: ModIdx).getImm() | ModVal); |
8804 | } |
8805 | } |
8806 | |
8807 | void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { |
8808 | OptionalImmIndexMap OptIdx; |
8809 | cvtVOP3(Inst, Operands, OptionalIdx&: OptIdx); |
8810 | cvtVOP3P(Inst, Operands, OptIdx); |
8811 | } |
8812 | |
8813 | static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, |
8814 | unsigned i, unsigned Opc, unsigned OpName) { |
8815 | if (AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: OpName) != -1) |
8816 | ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, N: 2); |
8817 | else |
8818 | ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, N: 1); |
8819 | } |
8820 | |
8821 | void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) { |
8822 | unsigned Opc = Inst.getOpcode(); |
8823 | |
8824 | ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, N: 1); |
8825 | addSrcModifiersAndSrc(Inst, Operands, i: 2, Opc, OpName: AMDGPU::OpName::src0_modifiers); |
8826 | addSrcModifiersAndSrc(Inst, Operands, i: 3, Opc, OpName: AMDGPU::OpName::src1_modifiers); |
8827 | ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, N: 1); // srcTiedDef |
8828 | ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, N: 1); // src2 |
8829 | |
8830 | OptionalImmIndexMap OptIdx; |
8831 | for (unsigned i = 5; i < Operands.size(); ++i) { |
8832 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); |
8833 | OptIdx[Op.getImmTy()] = i; |
8834 | } |
8835 | |
8836 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_8bit)) |
8837 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, |
8838 | ImmT: AMDGPUOperand::ImmTyIndexKey8bit); |
8839 | |
8840 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::index_key_16bit)) |
8841 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, |
8842 | ImmT: AMDGPUOperand::ImmTyIndexKey16bit); |
8843 | |
8844 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp)) |
8845 | addOptionalImmOperand(Inst, Operands, OptionalIdx&: OptIdx, ImmT: AMDGPUOperand::ImmTyClamp); |
8846 | |
8847 | cvtVOP3P(Inst, Operands, OptIdx); |
8848 | } |
8849 | |
8850 | //===----------------------------------------------------------------------===// |
8851 | // VOPD |
8852 | //===----------------------------------------------------------------------===// |
8853 | |
8854 | ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { |
8855 | if (!hasVOPD(STI: getSTI())) |
8856 | return ParseStatus::NoMatch; |
8857 | |
8858 | if (isToken(Kind: AsmToken::Colon) && peekToken(ShouldSkipSpace: false).is(K: AsmToken::Colon)) { |
8859 | SMLoc S = getLoc(); |
8860 | lex(); |
8861 | lex(); |
8862 | Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: "::" , Loc: S)); |
8863 | SMLoc OpYLoc = getLoc(); |
8864 | StringRef OpYName; |
8865 | if (isToken(Kind: AsmToken::Identifier) && !Parser.parseIdentifier(Res&: OpYName)) { |
8866 | Operands.push_back(Elt: AMDGPUOperand::CreateToken(AsmParser: this, Str: OpYName, Loc: OpYLoc)); |
8867 | return ParseStatus::Success; |
8868 | } |
8869 | return Error(L: OpYLoc, Msg: "expected a VOPDY instruction after ::" ); |
8870 | } |
8871 | return ParseStatus::NoMatch; |
8872 | } |
8873 | |
8874 | // Create VOPD MCInst operands using parsed assembler operands. |
8875 | void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { |
8876 | auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer |
8877 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); |
8878 | if (Op.isReg()) { |
8879 | Op.addRegOperands(Inst, N: 1); |
8880 | return; |
8881 | } |
8882 | if (Op.isImm()) { |
8883 | Op.addImmOperands(Inst, N: 1); |
8884 | return; |
8885 | } |
8886 | llvm_unreachable("Unhandled operand type in cvtVOPD" ); |
8887 | }; |
8888 | |
8889 | const auto &InstInfo = getVOPDInstInfo(VOPDOpcode: Inst.getOpcode(), InstrInfo: &MII); |
8890 | |
8891 | // MCInst operands are ordered as follows: |
8892 | // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] |
8893 | |
8894 | for (auto CompIdx : VOPD::COMPONENTS) { |
8895 | addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands()); |
8896 | } |
8897 | |
8898 | for (auto CompIdx : VOPD::COMPONENTS) { |
8899 | const auto &CInfo = InstInfo[CompIdx]; |
8900 | auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum(); |
8901 | for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx) |
8902 | addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx)); |
8903 | if (CInfo.hasSrc2Acc()) |
8904 | addOp(CInfo.getIndexOfDstInParsedOperands()); |
8905 | } |
8906 | } |
8907 | |
8908 | //===----------------------------------------------------------------------===// |
8909 | // dpp |
8910 | //===----------------------------------------------------------------------===// |
8911 | |
8912 | bool AMDGPUOperand::isDPP8() const { |
8913 | return isImmTy(ImmT: ImmTyDPP8); |
8914 | } |
8915 | |
8916 | bool AMDGPUOperand::isDPPCtrl() const { |
8917 | using namespace AMDGPU::DPP; |
8918 | |
8919 | bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(x: getImm()); |
8920 | if (result) { |
8921 | int64_t Imm = getImm(); |
8922 | return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || |
8923 | (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || |
8924 | (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || |
8925 | (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || |
8926 | (Imm == DppCtrl::WAVE_SHL1) || |
8927 | (Imm == DppCtrl::WAVE_ROL1) || |
8928 | (Imm == DppCtrl::WAVE_SHR1) || |
8929 | (Imm == DppCtrl::WAVE_ROR1) || |
8930 | (Imm == DppCtrl::ROW_MIRROR) || |
8931 | (Imm == DppCtrl::ROW_HALF_MIRROR) || |
8932 | (Imm == DppCtrl::BCAST15) || |
8933 | (Imm == DppCtrl::BCAST31) || |
8934 | (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || |
8935 | (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); |
8936 | } |
8937 | return false; |
8938 | } |
8939 | |
8940 | //===----------------------------------------------------------------------===// |
8941 | // mAI |
8942 | //===----------------------------------------------------------------------===// |
8943 | |
8944 | bool AMDGPUOperand::isBLGP() const { |
8945 | return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(x: getImm()); |
8946 | } |
8947 | |
8948 | bool AMDGPUOperand::isS16Imm() const { |
8949 | return isImmLiteral() && (isInt<16>(x: getImm()) || isUInt<16>(x: getImm())); |
8950 | } |
8951 | |
8952 | bool AMDGPUOperand::isU16Imm() const { |
8953 | return isImmLiteral() && isUInt<16>(x: getImm()); |
8954 | } |
8955 | |
8956 | //===----------------------------------------------------------------------===// |
8957 | // dim |
8958 | //===----------------------------------------------------------------------===// |
8959 | |
8960 | bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { |
8961 | // We want to allow "dim:1D" etc., |
8962 | // but the initial 1 is tokenized as an integer. |
8963 | std::string Token; |
8964 | if (isToken(Kind: AsmToken::Integer)) { |
8965 | SMLoc Loc = getToken().getEndLoc(); |
8966 | Token = std::string(getTokenStr()); |
8967 | lex(); |
8968 | if (getLoc() != Loc) |
8969 | return false; |
8970 | } |
8971 | |
8972 | StringRef Suffix; |
8973 | if (!parseId(Val&: Suffix)) |
8974 | return false; |
8975 | Token += Suffix; |
8976 | |
8977 | StringRef DimId = Token; |
8978 | if (DimId.starts_with(Prefix: "SQ_RSRC_IMG_" )) |
8979 | DimId = DimId.drop_front(N: 12); |
8980 | |
8981 | const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(AsmSuffix: DimId); |
8982 | if (!DimInfo) |
8983 | return false; |
8984 | |
8985 | Encoding = DimInfo->Encoding; |
8986 | return true; |
8987 | } |
8988 | |
8989 | ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) { |
8990 | if (!isGFX10Plus()) |
8991 | return ParseStatus::NoMatch; |
8992 | |
8993 | SMLoc S = getLoc(); |
8994 | |
8995 | if (!trySkipId(Id: "dim" , Kind: AsmToken::Colon)) |
8996 | return ParseStatus::NoMatch; |
8997 | |
8998 | unsigned Encoding; |
8999 | SMLoc Loc = getLoc(); |
9000 | if (!parseDimId(Encoding)) |
9001 | return Error(L: Loc, Msg: "invalid dim value" ); |
9002 | |
9003 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Encoding, Loc: S, |
9004 | Type: AMDGPUOperand::ImmTyDim)); |
9005 | return ParseStatus::Success; |
9006 | } |
9007 | |
9008 | //===----------------------------------------------------------------------===// |
9009 | // dpp |
9010 | //===----------------------------------------------------------------------===// |
9011 | |
9012 | ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { |
9013 | SMLoc S = getLoc(); |
9014 | |
9015 | if (!isGFX10Plus() || !trySkipId(Id: "dpp8" , Kind: AsmToken::Colon)) |
9016 | return ParseStatus::NoMatch; |
9017 | |
9018 | // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] |
9019 | |
9020 | int64_t Sels[8]; |
9021 | |
9022 | if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket" )) |
9023 | return ParseStatus::Failure; |
9024 | |
9025 | for (size_t i = 0; i < 8; ++i) { |
9026 | if (i > 0 && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma" )) |
9027 | return ParseStatus::Failure; |
9028 | |
9029 | SMLoc Loc = getLoc(); |
9030 | if (getParser().parseAbsoluteExpression(Res&: Sels[i])) |
9031 | return ParseStatus::Failure; |
9032 | if (0 > Sels[i] || 7 < Sels[i]) |
9033 | return Error(L: Loc, Msg: "expected a 3-bit value" ); |
9034 | } |
9035 | |
9036 | if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket" )) |
9037 | return ParseStatus::Failure; |
9038 | |
9039 | unsigned DPP8 = 0; |
9040 | for (size_t i = 0; i < 8; ++i) |
9041 | DPP8 |= (Sels[i] << (i * 3)); |
9042 | |
9043 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: DPP8, Loc: S, Type: AMDGPUOperand::ImmTyDPP8)); |
9044 | return ParseStatus::Success; |
9045 | } |
9046 | |
9047 | bool |
9048 | AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, |
9049 | const OperandVector &Operands) { |
9050 | if (Ctrl == "row_newbcast" ) |
9051 | return isGFX90A(); |
9052 | |
9053 | if (Ctrl == "row_share" || |
9054 | Ctrl == "row_xmask" ) |
9055 | return isGFX10Plus(); |
9056 | |
9057 | if (Ctrl == "wave_shl" || |
9058 | Ctrl == "wave_shr" || |
9059 | Ctrl == "wave_rol" || |
9060 | Ctrl == "wave_ror" || |
9061 | Ctrl == "row_bcast" ) |
9062 | return isVI() || isGFX9(); |
9063 | |
9064 | return Ctrl == "row_mirror" || |
9065 | Ctrl == "row_half_mirror" || |
9066 | Ctrl == "quad_perm" || |
9067 | Ctrl == "row_shl" || |
9068 | Ctrl == "row_shr" || |
9069 | Ctrl == "row_ror" ; |
9070 | } |
9071 | |
9072 | int64_t |
9073 | AMDGPUAsmParser::parseDPPCtrlPerm() { |
9074 | // quad_perm:[%d,%d,%d,%d] |
9075 | |
9076 | if (!skipToken(Kind: AsmToken::LBrac, ErrMsg: "expected an opening square bracket" )) |
9077 | return -1; |
9078 | |
9079 | int64_t Val = 0; |
9080 | for (int i = 0; i < 4; ++i) { |
9081 | if (i > 0 && !skipToken(Kind: AsmToken::Comma, ErrMsg: "expected a comma" )) |
9082 | return -1; |
9083 | |
9084 | int64_t Temp; |
9085 | SMLoc Loc = getLoc(); |
9086 | if (getParser().parseAbsoluteExpression(Res&: Temp)) |
9087 | return -1; |
9088 | if (Temp < 0 || Temp > 3) { |
9089 | Error(L: Loc, Msg: "expected a 2-bit value" ); |
9090 | return -1; |
9091 | } |
9092 | |
9093 | Val += (Temp << i * 2); |
9094 | } |
9095 | |
9096 | if (!skipToken(Kind: AsmToken::RBrac, ErrMsg: "expected a closing square bracket" )) |
9097 | return -1; |
9098 | |
9099 | return Val; |
9100 | } |
9101 | |
9102 | int64_t |
9103 | AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { |
9104 | using namespace AMDGPU::DPP; |
9105 | |
9106 | // sel:%d |
9107 | |
9108 | int64_t Val; |
9109 | SMLoc Loc = getLoc(); |
9110 | |
9111 | if (getParser().parseAbsoluteExpression(Res&: Val)) |
9112 | return -1; |
9113 | |
9114 | struct DppCtrlCheck { |
9115 | int64_t Ctrl; |
9116 | int Lo; |
9117 | int Hi; |
9118 | }; |
9119 | |
9120 | DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) |
9121 | .Case(S: "wave_shl" , Value: {.Ctrl: DppCtrl::WAVE_SHL1, .Lo: 1, .Hi: 1}) |
9122 | .Case(S: "wave_rol" , Value: {.Ctrl: DppCtrl::WAVE_ROL1, .Lo: 1, .Hi: 1}) |
9123 | .Case(S: "wave_shr" , Value: {.Ctrl: DppCtrl::WAVE_SHR1, .Lo: 1, .Hi: 1}) |
9124 | .Case(S: "wave_ror" , Value: {.Ctrl: DppCtrl::WAVE_ROR1, .Lo: 1, .Hi: 1}) |
9125 | .Case(S: "row_shl" , Value: {.Ctrl: DppCtrl::ROW_SHL0, .Lo: 1, .Hi: 15}) |
9126 | .Case(S: "row_shr" , Value: {.Ctrl: DppCtrl::ROW_SHR0, .Lo: 1, .Hi: 15}) |
9127 | .Case(S: "row_ror" , Value: {.Ctrl: DppCtrl::ROW_ROR0, .Lo: 1, .Hi: 15}) |
9128 | .Case(S: "row_share" , Value: {.Ctrl: DppCtrl::ROW_SHARE_FIRST, .Lo: 0, .Hi: 15}) |
9129 | .Case(S: "row_xmask" , Value: {.Ctrl: DppCtrl::ROW_XMASK_FIRST, .Lo: 0, .Hi: 15}) |
9130 | .Case(S: "row_newbcast" , Value: {.Ctrl: DppCtrl::ROW_NEWBCAST_FIRST, .Lo: 0, .Hi: 15}) |
9131 | .Default(Value: {.Ctrl: -1, .Lo: 0, .Hi: 0}); |
9132 | |
9133 | bool Valid; |
9134 | if (Check.Ctrl == -1) { |
9135 | Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); |
9136 | Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; |
9137 | } else { |
9138 | Valid = Check.Lo <= Val && Val <= Check.Hi; |
9139 | Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); |
9140 | } |
9141 | |
9142 | if (!Valid) { |
9143 | Error(L: Loc, Msg: Twine("invalid " , Ctrl) + Twine(" value" )); |
9144 | return -1; |
9145 | } |
9146 | |
9147 | return Val; |
9148 | } |
9149 | |
9150 | ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { |
9151 | using namespace AMDGPU::DPP; |
9152 | |
9153 | if (!isToken(Kind: AsmToken::Identifier) || |
9154 | !isSupportedDPPCtrl(Ctrl: getTokenStr(), Operands)) |
9155 | return ParseStatus::NoMatch; |
9156 | |
9157 | SMLoc S = getLoc(); |
9158 | int64_t Val = -1; |
9159 | StringRef Ctrl; |
9160 | |
9161 | parseId(Val&: Ctrl); |
9162 | |
9163 | if (Ctrl == "row_mirror" ) { |
9164 | Val = DppCtrl::ROW_MIRROR; |
9165 | } else if (Ctrl == "row_half_mirror" ) { |
9166 | Val = DppCtrl::ROW_HALF_MIRROR; |
9167 | } else { |
9168 | if (skipToken(Kind: AsmToken::Colon, ErrMsg: "expected a colon" )) { |
9169 | if (Ctrl == "quad_perm" ) { |
9170 | Val = parseDPPCtrlPerm(); |
9171 | } else { |
9172 | Val = parseDPPCtrlSel(Ctrl); |
9173 | } |
9174 | } |
9175 | } |
9176 | |
9177 | if (Val == -1) |
9178 | return ParseStatus::Failure; |
9179 | |
9180 | Operands.push_back( |
9181 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val, Loc: S, Type: AMDGPUOperand::ImmTyDppCtrl)); |
9182 | return ParseStatus::Success; |
9183 | } |
9184 | |
9185 | void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, |
9186 | bool IsDPP8) { |
9187 | OptionalImmIndexMap OptionalIdx; |
9188 | unsigned Opc = Inst.getOpcode(); |
9189 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
9190 | |
9191 | // MAC instructions are special because they have 'old' |
9192 | // operand which is not tied to dst (but assumed to be). |
9193 | // They also have dummy unused src2_modifiers. |
9194 | int OldIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::old); |
9195 | int Src2ModIdx = |
9196 | AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::src2_modifiers); |
9197 | bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 && |
9198 | Desc.getOperandConstraint(OpNum: OldIdx, Constraint: MCOI::TIED_TO) == -1; |
9199 | |
9200 | unsigned I = 1; |
9201 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { |
9202 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1); |
9203 | } |
9204 | |
9205 | int Fi = 0; |
9206 | for (unsigned E = Operands.size(); I != E; ++I) { |
9207 | |
9208 | if (IsMAC) { |
9209 | int NumOperands = Inst.getNumOperands(); |
9210 | if (OldIdx == NumOperands) { |
9211 | // Handle old operand |
9212 | constexpr int DST_IDX = 0; |
9213 | Inst.addOperand(Op: Inst.getOperand(i: DST_IDX)); |
9214 | } else if (Src2ModIdx == NumOperands) { |
9215 | // Add unused dummy src2_modifiers |
9216 | Inst.addOperand(Op: MCOperand::createImm(Val: 0)); |
9217 | } |
9218 | } |
9219 | |
9220 | int VdstInIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in); |
9221 | if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) { |
9222 | Inst.addOperand(Op: Inst.getOperand(i: 0)); |
9223 | } |
9224 | |
9225 | bool IsVOP3CvtSrDpp = |
9226 | Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 || |
9227 | Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 || |
9228 | Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 || |
9229 | Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12; |
9230 | if (IsVOP3CvtSrDpp) { |
9231 | if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) { |
9232 | Inst.addOperand(Op: MCOperand::createImm(Val: 0)); |
9233 | Inst.addOperand(Op: MCOperand::createReg(Reg: 0)); |
9234 | } |
9235 | } |
9236 | |
9237 | auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(), |
9238 | Constraint: MCOI::TIED_TO); |
9239 | if (TiedTo != -1) { |
9240 | assert((unsigned)TiedTo < Inst.getNumOperands()); |
9241 | // handle tied old or src2 for MAC instructions |
9242 | Inst.addOperand(Op: Inst.getOperand(i: TiedTo)); |
9243 | } |
9244 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); |
9245 | // Add the register arguments |
9246 | if (IsDPP8 && Op.isDppFI()) { |
9247 | Fi = Op.getImm(); |
9248 | } else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
9249 | Op.addRegOrImmWithFPInputModsOperands(Inst, N: 2); |
9250 | } else if (Op.isReg()) { |
9251 | Op.addRegOperands(Inst, N: 1); |
9252 | } else if (Op.isImm() && |
9253 | Desc.operands()[Inst.getNumOperands()].RegClass != -1) { |
9254 | assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP" ); |
9255 | Op.addImmOperands(Inst, N: 1); |
9256 | } else if (Op.isImm()) { |
9257 | OptionalIdx[Op.getImmTy()] = I; |
9258 | } else { |
9259 | llvm_unreachable("unhandled operand type" ); |
9260 | } |
9261 | } |
9262 | |
9263 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::byte_sel)) |
9264 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9265 | ImmT: AMDGPUOperand::ImmTyByteSel); |
9266 | |
9267 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp)) |
9268 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9269 | ImmT: AMDGPUOperand::ImmTyClamp); |
9270 | |
9271 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod)) |
9272 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI); |
9273 | |
9274 | if (Desc.TSFlags & SIInstrFlags::VOP3P) |
9275 | cvtVOP3P(Inst, Operands, OptIdx&: OptionalIdx); |
9276 | else if (Desc.TSFlags & SIInstrFlags::VOP3) |
9277 | cvtVOP3OpSel(Inst, Operands, OptionalIdx); |
9278 | else if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) { |
9279 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOpSel); |
9280 | } |
9281 | |
9282 | if (IsDPP8) { |
9283 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDPP8); |
9284 | using namespace llvm::AMDGPU::DPP; |
9285 | Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0)); |
9286 | } else { |
9287 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppCtrl, Default: 0xe4); |
9288 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: 0xf); |
9289 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: 0xf); |
9290 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl); |
9291 | |
9292 | if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi)) |
9293 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9294 | ImmT: AMDGPUOperand::ImmTyDppFI); |
9295 | } |
9296 | } |
9297 | |
9298 | void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { |
9299 | OptionalImmIndexMap OptionalIdx; |
9300 | |
9301 | unsigned I = 1; |
9302 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
9303 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { |
9304 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1); |
9305 | } |
9306 | |
9307 | int Fi = 0; |
9308 | for (unsigned E = Operands.size(); I != E; ++I) { |
9309 | auto TiedTo = Desc.getOperandConstraint(OpNum: Inst.getNumOperands(), |
9310 | Constraint: MCOI::TIED_TO); |
9311 | if (TiedTo != -1) { |
9312 | assert((unsigned)TiedTo < Inst.getNumOperands()); |
9313 | // handle tied old or src2 for MAC instructions |
9314 | Inst.addOperand(Op: Inst.getOperand(i: TiedTo)); |
9315 | } |
9316 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); |
9317 | // Add the register arguments |
9318 | if (Op.isReg() && validateVccOperand(Reg: Op.getReg())) { |
9319 | // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. |
9320 | // Skip it. |
9321 | continue; |
9322 | } |
9323 | |
9324 | if (IsDPP8) { |
9325 | if (Op.isDPP8()) { |
9326 | Op.addImmOperands(Inst, N: 1); |
9327 | } else if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
9328 | Op.addRegWithFPInputModsOperands(Inst, N: 2); |
9329 | } else if (Op.isDppFI()) { |
9330 | Fi = Op.getImm(); |
9331 | } else if (Op.isReg()) { |
9332 | Op.addRegOperands(Inst, N: 1); |
9333 | } else { |
9334 | llvm_unreachable("Invalid operand type" ); |
9335 | } |
9336 | } else { |
9337 | if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
9338 | Op.addRegWithFPInputModsOperands(Inst, N: 2); |
9339 | } else if (Op.isReg()) { |
9340 | Op.addRegOperands(Inst, N: 1); |
9341 | } else if (Op.isDPPCtrl()) { |
9342 | Op.addImmOperands(Inst, N: 1); |
9343 | } else if (Op.isImm()) { |
9344 | // Handle optional arguments |
9345 | OptionalIdx[Op.getImmTy()] = I; |
9346 | } else { |
9347 | llvm_unreachable("Invalid operand type" ); |
9348 | } |
9349 | } |
9350 | } |
9351 | |
9352 | if (IsDPP8) { |
9353 | using namespace llvm::AMDGPU::DPP; |
9354 | Inst.addOperand(Op: MCOperand::createImm(Val: Fi? DPP8_FI_1 : DPP8_FI_0)); |
9355 | } else { |
9356 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppRowMask, Default: 0xf); |
9357 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBankMask, Default: 0xf); |
9358 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyDppBoundCtrl); |
9359 | if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::fi)) { |
9360 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9361 | ImmT: AMDGPUOperand::ImmTyDppFI); |
9362 | } |
9363 | } |
9364 | } |
9365 | |
9366 | //===----------------------------------------------------------------------===// |
9367 | // sdwa |
9368 | //===----------------------------------------------------------------------===// |
9369 | |
9370 | ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, |
9371 | StringRef Prefix, |
9372 | AMDGPUOperand::ImmTy Type) { |
9373 | using namespace llvm::AMDGPU::SDWA; |
9374 | |
9375 | SMLoc S = getLoc(); |
9376 | StringRef Value; |
9377 | |
9378 | SMLoc StringLoc; |
9379 | ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc); |
9380 | if (!Res.isSuccess()) |
9381 | return Res; |
9382 | |
9383 | int64_t Int; |
9384 | Int = StringSwitch<int64_t>(Value) |
9385 | .Case(S: "BYTE_0" , Value: SdwaSel::BYTE_0) |
9386 | .Case(S: "BYTE_1" , Value: SdwaSel::BYTE_1) |
9387 | .Case(S: "BYTE_2" , Value: SdwaSel::BYTE_2) |
9388 | .Case(S: "BYTE_3" , Value: SdwaSel::BYTE_3) |
9389 | .Case(S: "WORD_0" , Value: SdwaSel::WORD_0) |
9390 | .Case(S: "WORD_1" , Value: SdwaSel::WORD_1) |
9391 | .Case(S: "DWORD" , Value: SdwaSel::DWORD) |
9392 | .Default(Value: 0xffffffff); |
9393 | |
9394 | if (Int == 0xffffffff) |
9395 | return Error(L: StringLoc, Msg: "invalid " + Twine(Prefix) + " value" ); |
9396 | |
9397 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Int, Loc: S, Type)); |
9398 | return ParseStatus::Success; |
9399 | } |
9400 | |
9401 | ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { |
9402 | using namespace llvm::AMDGPU::SDWA; |
9403 | |
9404 | SMLoc S = getLoc(); |
9405 | StringRef Value; |
9406 | |
9407 | SMLoc StringLoc; |
9408 | ParseStatus Res = parseStringWithPrefix(Prefix: "dst_unused" , Value, StringLoc); |
9409 | if (!Res.isSuccess()) |
9410 | return Res; |
9411 | |
9412 | int64_t Int; |
9413 | Int = StringSwitch<int64_t>(Value) |
9414 | .Case(S: "UNUSED_PAD" , Value: DstUnused::UNUSED_PAD) |
9415 | .Case(S: "UNUSED_SEXT" , Value: DstUnused::UNUSED_SEXT) |
9416 | .Case(S: "UNUSED_PRESERVE" , Value: DstUnused::UNUSED_PRESERVE) |
9417 | .Default(Value: 0xffffffff); |
9418 | |
9419 | if (Int == 0xffffffff) |
9420 | return Error(L: StringLoc, Msg: "invalid dst_unused value" ); |
9421 | |
9422 | Operands.push_back(Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Int, Loc: S, Type: AMDGPUOperand::ImmTySDWADstUnused)); |
9423 | return ParseStatus::Success; |
9424 | } |
9425 | |
9426 | void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { |
9427 | cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP1); |
9428 | } |
9429 | |
9430 | void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { |
9431 | cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2); |
9432 | } |
9433 | |
9434 | void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { |
9435 | cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: true, SkipSrcVcc: true); |
9436 | } |
9437 | |
9438 | void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { |
9439 | cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOP2, SkipDstVcc: false, SkipSrcVcc: true); |
9440 | } |
9441 | |
9442 | void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { |
9443 | cvtSDWA(Inst, Operands, BasicInstType: SIInstrFlags::VOPC, SkipDstVcc: isVI()); |
9444 | } |
9445 | |
9446 | void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, |
9447 | uint64_t BasicInstType, |
9448 | bool SkipDstVcc, |
9449 | bool SkipSrcVcc) { |
9450 | using namespace llvm::AMDGPU::SDWA; |
9451 | |
9452 | OptionalImmIndexMap OptionalIdx; |
9453 | bool SkipVcc = SkipDstVcc || SkipSrcVcc; |
9454 | bool SkippedVcc = false; |
9455 | |
9456 | unsigned I = 1; |
9457 | const MCInstrDesc &Desc = MII.get(Opcode: Inst.getOpcode()); |
9458 | for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { |
9459 | ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, N: 1); |
9460 | } |
9461 | |
9462 | for (unsigned E = Operands.size(); I != E; ++I) { |
9463 | AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); |
9464 | if (SkipVcc && !SkippedVcc && Op.isReg() && |
9465 | (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { |
9466 | // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. |
9467 | // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) |
9468 | // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. |
9469 | // Skip VCC only if we didn't skip it on previous iteration. |
9470 | // Note that src0 and src1 occupy 2 slots each because of modifiers. |
9471 | if (BasicInstType == SIInstrFlags::VOP2 && |
9472 | ((SkipDstVcc && Inst.getNumOperands() == 1) || |
9473 | (SkipSrcVcc && Inst.getNumOperands() == 5))) { |
9474 | SkippedVcc = true; |
9475 | continue; |
9476 | } |
9477 | if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) { |
9478 | SkippedVcc = true; |
9479 | continue; |
9480 | } |
9481 | } |
9482 | if (isRegOrImmWithInputMods(Desc, OpNum: Inst.getNumOperands())) { |
9483 | Op.addRegOrImmWithInputModsOperands(Inst, N: 2); |
9484 | } else if (Op.isImm()) { |
9485 | // Handle optional arguments |
9486 | OptionalIdx[Op.getImmTy()] = I; |
9487 | } else { |
9488 | llvm_unreachable("Invalid operand type" ); |
9489 | } |
9490 | SkippedVcc = false; |
9491 | } |
9492 | |
9493 | const unsigned Opc = Inst.getOpcode(); |
9494 | if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && |
9495 | Opc != AMDGPU::V_NOP_sdwa_vi) { |
9496 | // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments |
9497 | switch (BasicInstType) { |
9498 | case SIInstrFlags::VOP1: |
9499 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::clamp)) |
9500 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9501 | ImmT: AMDGPUOperand::ImmTyClamp, Default: 0); |
9502 | |
9503 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::omod)) |
9504 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9505 | ImmT: AMDGPUOperand::ImmTyOModSI, Default: 0); |
9506 | |
9507 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_sel)) |
9508 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9509 | ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD); |
9510 | |
9511 | if (AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::dst_unused)) |
9512 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9513 | ImmT: AMDGPUOperand::ImmTySDWADstUnused, |
9514 | Default: DstUnused::UNUSED_PRESERVE); |
9515 | |
9516 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD); |
9517 | break; |
9518 | |
9519 | case SIInstrFlags::VOP2: |
9520 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9521 | ImmT: AMDGPUOperand::ImmTyClamp, Default: 0); |
9522 | |
9523 | if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::omod)) |
9524 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTyOModSI, Default: 0); |
9525 | |
9526 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstSel, Default: SdwaSel::DWORD); |
9527 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWADstUnused, Default: DstUnused::UNUSED_PRESERVE); |
9528 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD); |
9529 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD); |
9530 | break; |
9531 | |
9532 | case SIInstrFlags::VOPC: |
9533 | if (AMDGPU::hasNamedOperand(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::clamp)) |
9534 | addOptionalImmOperand(Inst, Operands, OptionalIdx, |
9535 | ImmT: AMDGPUOperand::ImmTyClamp, Default: 0); |
9536 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc0Sel, Default: SdwaSel::DWORD); |
9537 | addOptionalImmOperand(Inst, Operands, OptionalIdx, ImmT: AMDGPUOperand::ImmTySDWASrc1Sel, Default: SdwaSel::DWORD); |
9538 | break; |
9539 | |
9540 | default: |
9541 | llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed" ); |
9542 | } |
9543 | } |
9544 | |
9545 | // special case v_mac_{f16, f32}: |
9546 | // it has src2 register operand that is tied to dst operand |
9547 | if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || |
9548 | Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { |
9549 | auto it = Inst.begin(); |
9550 | std::advance( |
9551 | i&: it, n: AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), NamedIdx: AMDGPU::OpName::src2)); |
9552 | Inst.insert(I: it, Op: Inst.getOperand(i: 0)); // src2 = dst |
9553 | } |
9554 | } |
9555 | |
9556 | /// Force static initialization. |
9557 | extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { |
9558 | RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target()); |
9559 | RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); |
9560 | } |
9561 | |
9562 | #define GET_REGISTER_MATCHER |
9563 | #define GET_MATCHER_IMPLEMENTATION |
9564 | #define GET_MNEMONIC_SPELL_CHECKER |
9565 | #define GET_MNEMONIC_CHECKER |
9566 | #include "AMDGPUGenAsmMatcher.inc" |
9567 | |
9568 | ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, |
9569 | unsigned MCK) { |
9570 | switch (MCK) { |
9571 | case MCK_addr64: |
9572 | return parseTokenOp(Name: "addr64" , Operands); |
9573 | case MCK_done: |
9574 | return parseTokenOp(Name: "done" , Operands); |
9575 | case MCK_idxen: |
9576 | return parseTokenOp(Name: "idxen" , Operands); |
9577 | case MCK_lds: |
9578 | return parseTokenOp(Name: "lds" , Operands); |
9579 | case MCK_offen: |
9580 | return parseTokenOp(Name: "offen" , Operands); |
9581 | case MCK_off: |
9582 | return parseTokenOp(Name: "off" , Operands); |
9583 | case MCK_row_95_en: |
9584 | return parseTokenOp(Name: "row_en" , Operands); |
9585 | case MCK_gds: |
9586 | return parseNamedBit(Name: "gds" , Operands, ImmTy: AMDGPUOperand::ImmTyGDS); |
9587 | case MCK_tfe: |
9588 | return parseNamedBit(Name: "tfe" , Operands, ImmTy: AMDGPUOperand::ImmTyTFE); |
9589 | } |
9590 | return tryCustomParseOperand(Operands, MCK); |
9591 | } |
9592 | |
9593 | // This function should be defined after auto-generated include so that we have |
9594 | // MatchClassKind enum defined |
9595 | unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, |
9596 | unsigned Kind) { |
9597 | // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). |
9598 | // But MatchInstructionImpl() expects to meet token and fails to validate |
9599 | // operand. This method checks if we are given immediate operand but expect to |
9600 | // get corresponding token. |
9601 | AMDGPUOperand &Operand = (AMDGPUOperand&)Op; |
9602 | switch (Kind) { |
9603 | case MCK_addr64: |
9604 | return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; |
9605 | case MCK_gds: |
9606 | return Operand.isGDS() ? Match_Success : Match_InvalidOperand; |
9607 | case MCK_lds: |
9608 | return Operand.isLDS() ? Match_Success : Match_InvalidOperand; |
9609 | case MCK_idxen: |
9610 | return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; |
9611 | case MCK_offen: |
9612 | return Operand.isOffen() ? Match_Success : Match_InvalidOperand; |
9613 | case MCK_tfe: |
9614 | return Operand.isTFE() ? Match_Success : Match_InvalidOperand; |
9615 | case MCK_SSrc_b32: |
9616 | // When operands have expression values, they will return true for isToken, |
9617 | // because it is not possible to distinguish between a token and an |
9618 | // expression at parse time. MatchInstructionImpl() will always try to |
9619 | // match an operand as a token, when isToken returns true, and when the |
9620 | // name of the expression is not a valid token, the match will fail, |
9621 | // so we need to handle it here. |
9622 | return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand; |
9623 | case MCK_SSrc_f32: |
9624 | return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand; |
9625 | case MCK_SOPPBrTarget: |
9626 | return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand; |
9627 | case MCK_VReg32OrOff: |
9628 | return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; |
9629 | case MCK_InterpSlot: |
9630 | return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; |
9631 | case MCK_InterpAttr: |
9632 | return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; |
9633 | case MCK_InterpAttrChan: |
9634 | return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand; |
9635 | case MCK_SReg_64: |
9636 | case MCK_SReg_64_XEXEC: |
9637 | // Null is defined as a 32-bit register but |
9638 | // it should also be enabled with 64-bit operands. |
9639 | // The following code enables it for SReg_64 operands |
9640 | // used as source and destination. Remaining source |
9641 | // operands are handled in isInlinableImm. |
9642 | return Operand.isNull() ? Match_Success : Match_InvalidOperand; |
9643 | default: |
9644 | return Match_InvalidOperand; |
9645 | } |
9646 | } |
9647 | |
9648 | //===----------------------------------------------------------------------===// |
9649 | // endpgm |
9650 | //===----------------------------------------------------------------------===// |
9651 | |
9652 | ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) { |
9653 | SMLoc S = getLoc(); |
9654 | int64_t Imm = 0; |
9655 | |
9656 | if (!parseExpr(Imm)) { |
9657 | // The operand is optional, if not present default to 0 |
9658 | Imm = 0; |
9659 | } |
9660 | |
9661 | if (!isUInt<16>(x: Imm)) |
9662 | return Error(L: S, Msg: "expected a 16-bit value" ); |
9663 | |
9664 | Operands.push_back( |
9665 | Elt: AMDGPUOperand::CreateImm(AsmParser: this, Val: Imm, Loc: S, Type: AMDGPUOperand::ImmTyEndpgm)); |
9666 | return ParseStatus::Success; |
9667 | } |
9668 | |
9669 | bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmT: ImmTyEndpgm); } |
9670 | |
9671 | //===----------------------------------------------------------------------===// |
9672 | // Split Barrier |
9673 | //===----------------------------------------------------------------------===// |
9674 | |
9675 | bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(type: MVT::i32); } |
9676 | |