1//===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Interface definition for SIInstrInfo.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15#define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16
17#include "AMDGPUMIRFormatter.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "SIRegisterInfo.h"
20#include "Utils/AMDGPUBaseInfo.h"
21#include "llvm/ADT/SetVector.h"
22#include "llvm/CodeGen/TargetInstrInfo.h"
23#include "llvm/CodeGen/TargetSchedule.h"
24
25#define GET_INSTRINFO_HEADER
26#include "AMDGPUGenInstrInfo.inc"
27
28namespace llvm {
29
30class APInt;
31class GCNSubtarget;
32class LiveVariables;
33class MachineDominatorTree;
34class MachineRegisterInfo;
35class RegScavenger;
36class SIMachineFunctionInfo;
37class TargetRegisterClass;
38class ScheduleHazardRecognizer;
39
40constexpr unsigned DefaultMemoryClusterDWordsLimit = 8;
41
42/// Mark the MMO of a uniform load if there are no potentially clobbering stores
43/// on any path from the start of an entry function to this load.
44static const MachineMemOperand::Flags MONoClobber =
45 MachineMemOperand::MOTargetFlag1;
46
47/// Mark the MMO of a load as the last use.
48static const MachineMemOperand::Flags MOLastUse =
49 MachineMemOperand::MOTargetFlag2;
50
51/// Mark the MMO of cooperative load/store atomics.
52static const MachineMemOperand::Flags MOCooperative =
53 MachineMemOperand::MOTargetFlag3;
54
55/// Mark the MMO of accesses to memory locations that are
56/// never written to by other threads.
57static const MachineMemOperand::Flags MOThreadPrivate =
58 MachineMemOperand::MOTargetFlag4;
59
60/// Utility to store machine instructions worklist.
61struct SIInstrWorklist {
62 SIInstrWorklist() = default;
63
64 void insert(MachineInstr *MI);
65
66 MachineInstr *top() const {
67 const auto *iter = InstrList.begin();
68 return *iter;
69 }
70
71 void erase_top() {
72 const auto *iter = InstrList.begin();
73 InstrList.erase(I: iter);
74 }
75
76 bool empty() const { return InstrList.empty(); }
77
78 void clear() {
79 InstrList.clear();
80 DeferredList.clear();
81 }
82
83 bool isDeferred(MachineInstr *MI);
84
85 SetVector<MachineInstr *> &getDeferredList() { return DeferredList; }
86
87private:
88 /// InstrList contains the MachineInstrs.
89 SetVector<MachineInstr *> InstrList;
90 /// Deferred instructions are specific MachineInstr
91 /// that will be added by insert method.
92 SetVector<MachineInstr *> DeferredList;
93};
94
95class SIInstrInfo final : public AMDGPUGenInstrInfo {
96 struct ThreeAddressUpdates;
97
98private:
99 const SIRegisterInfo RI;
100 const GCNSubtarget &ST;
101 TargetSchedModel SchedModel;
102 mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
103
104 // The inverse predicate should have the negative value.
105 enum BranchPredicate {
106 INVALID_BR = 0,
107 SCC_TRUE = 1,
108 SCC_FALSE = -1,
109 VCCNZ = 2,
110 VCCZ = -2,
111 EXECNZ = -3,
112 EXECZ = 3
113 };
114
115 using SetVectorType = SmallSetVector<MachineInstr *, 32>;
116
117 static unsigned getBranchOpcode(BranchPredicate Cond);
118 static BranchPredicate getBranchPredicate(unsigned Opcode);
119
120public:
121 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
122 MachineRegisterInfo &MRI,
123 const MachineOperand &SuperReg,
124 const TargetRegisterClass *SuperRC,
125 unsigned SubIdx,
126 const TargetRegisterClass *SubRC) const;
127 MachineOperand buildExtractSubRegOrImm(
128 MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI,
129 const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC,
130 unsigned SubIdx, const TargetRegisterClass *SubRC) const;
131
132private:
133 bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
134 bool NeedInversion) const;
135
136 bool invertSCCUse(MachineInstr *SCCDef) const;
137
138 void swapOperands(MachineInstr &Inst) const;
139
140 std::pair<bool, MachineBasicBlock *>
141 moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
142 MachineDominatorTree *MDT = nullptr) const;
143
144 void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
145 MachineDominatorTree *MDT = nullptr) const;
146
147 void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
148
149 void lowerScalarAbsDiff(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
150
151 void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
152
153 void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst,
154 unsigned Opcode) const;
155
156 void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst,
157 unsigned Opcode) const;
158
159 void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
160 unsigned Opcode, bool Swap = false) const;
161
162 void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
163 unsigned Opcode,
164 MachineDominatorTree *MDT = nullptr) const;
165
166 void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst,
167 MachineDominatorTree *MDT) const;
168
169 void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst,
170 MachineDominatorTree *MDT) const;
171
172 void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst,
173 MachineDominatorTree *MDT = nullptr) const;
174
175 void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
176 MachineInstr &Inst) const;
177 void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
178 void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
179 unsigned Opcode,
180 MachineDominatorTree *MDT = nullptr) const;
181 void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
182 MachineInstr &Inst) const;
183
184 void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
185 SIInstrWorklist &Worklist) const;
186
187 void addSCCDefUsersToVALUWorklist(const MachineOperand &Op,
188 MachineInstr &SCCDefInst,
189 SIInstrWorklist &Worklist,
190 Register NewCond = Register()) const;
191 void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
192 SIInstrWorklist &Worklist) const;
193
194 const TargetRegisterClass *
195 getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
196
197 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
198 const MachineInstr &MIb) const;
199
200 Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
201
202 bool verifyCopy(const MachineInstr &MI, const MachineRegisterInfo &MRI,
203 StringRef &ErrInfo) const;
204
205 bool resultDependsOnExec(const MachineInstr &MI) const;
206
207 MachineInstr *convertToThreeAddressImpl(MachineInstr &MI,
208 ThreeAddressUpdates &Updates) const;
209
210protected:
211 /// If the specific machine instruction is a instruction that moves/copies
212 /// value from one register to another register return destination and source
213 /// registers as machine operands.
214 std::optional<DestSourcePair>
215 isCopyInstrImpl(const MachineInstr &MI) const override;
216
217 bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0,
218 AMDGPU::OpName Src0OpName, MachineOperand &Src1,
219 AMDGPU::OpName Src1OpName) const;
220 bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx,
221 unsigned toIdx) const;
222 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
223 unsigned OpIdx0,
224 unsigned OpIdx1) const override;
225
226public:
227 enum TargetOperandFlags {
228 MO_MASK = 0xf,
229
230 MO_NONE = 0,
231 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
232 MO_GOTPCREL = 1,
233 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
234 MO_GOTPCREL32 = 2,
235 MO_GOTPCREL32_LO = 2,
236 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
237 MO_GOTPCREL32_HI = 3,
238 // MO_GOTPCREL64 -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
239 MO_GOTPCREL64 = 4,
240 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
241 MO_REL32 = 5,
242 MO_REL32_LO = 5,
243 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
244 MO_REL32_HI = 6,
245 MO_REL64 = 7,
246
247 MO_FAR_BRANCH_OFFSET = 8,
248
249 MO_ABS32_LO = 9,
250 MO_ABS32_HI = 10,
251 MO_ABS64 = 11,
252 };
253
254 explicit SIInstrInfo(const GCNSubtarget &ST);
255
256 const SIRegisterInfo &getRegisterInfo() const {
257 return RI;
258 }
259
260 const GCNSubtarget &getSubtarget() const {
261 return ST;
262 }
263
264 bool isReMaterializableImpl(const MachineInstr &MI) const override;
265
266 bool isIgnorableUse(const MachineOperand &MO) const override;
267
268 bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo,
269 MachineCycleInfo *CI) const override;
270
271 bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
272 int64_t &Offset1) const override;
273
274 bool isGlobalMemoryObject(const MachineInstr *MI) const override;
275
276 bool getMemOperandsWithOffsetWidth(
277 const MachineInstr &LdSt,
278 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
279 bool &OffsetIsScalable, LocationSize &Width,
280 const TargetRegisterInfo *TRI) const final;
281
282 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
283 int64_t Offset1, bool OffsetIsScalable1,
284 ArrayRef<const MachineOperand *> BaseOps2,
285 int64_t Offset2, bool OffsetIsScalable2,
286 unsigned ClusterSize,
287 unsigned NumBytes) const override;
288
289 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
290 int64_t Offset1, unsigned NumLoads) const override;
291
292 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
293 const DebugLoc &DL, Register DestReg, Register SrcReg,
294 bool KillSrc, bool RenamableDest = false,
295 bool RenamableSrc = false) const override;
296
297 const TargetRegisterClass *getPreferredSelectRegClass(
298 unsigned Size) const;
299
300 Register insertNE(MachineBasicBlock *MBB,
301 MachineBasicBlock::iterator I, const DebugLoc &DL,
302 Register SrcReg, int Value) const;
303
304 Register insertEQ(MachineBasicBlock *MBB,
305 MachineBasicBlock::iterator I, const DebugLoc &DL,
306 Register SrcReg, int Value) const;
307
308 bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg,
309 int64_t &ImmVal) const override;
310
311 std::optional<int64_t> getImmOrMaterializedImm(MachineOperand &Op) const;
312
313 unsigned getVectorRegSpillSaveOpcode(Register Reg,
314 const TargetRegisterClass *RC,
315 unsigned Size,
316 const SIMachineFunctionInfo &MFI) const;
317 unsigned
318 getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC,
319 unsigned Size,
320 const SIMachineFunctionInfo &MFI) const;
321
322 void storeRegToStackSlot(
323 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
324 bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg,
325 MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
326
327 void loadRegFromStackSlot(
328 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
329 int FrameIndex, const TargetRegisterClass *RC, Register VReg,
330 unsigned SubReg = 0,
331 MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
332
333 bool expandPostRAPseudo(MachineInstr &MI) const override;
334
335 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
336 Register DestReg, unsigned SubIdx,
337 const MachineInstr &Orig) const override;
338
339 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
340 // instructions. Returns a pair of generated instructions.
341 // Can split either post-RA with physical registers or pre-RA with
342 // virtual registers. In latter case IR needs to be in SSA form and
343 // and a REG_SEQUENCE is produced to define original register.
344 std::pair<MachineInstr*, MachineInstr*>
345 expandMovDPP64(MachineInstr &MI) const;
346
347 // Returns an opcode that can be used to move a value to a \p DstRC
348 // register. If there is no hardware instruction that can store to \p
349 // DstRC, then AMDGPU::COPY is returned.
350 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
351
352 const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
353 unsigned EltSize,
354 bool IsSGPR) const;
355
356 const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
357 bool IsIndirectSrc) const;
358 LLVM_READONLY
359 int commuteOpcode(unsigned Opc) const;
360
361 LLVM_READONLY
362 inline int commuteOpcode(const MachineInstr &MI) const {
363 return commuteOpcode(Opc: MI.getOpcode());
364 }
365
366 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0,
367 unsigned &SrcOpIdx1) const override;
368
369 bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0,
370 unsigned &SrcOpIdx1) const;
371
372 bool isBranchOffsetInRange(unsigned BranchOpc,
373 int64_t BrOffset) const override;
374
375 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
376
377 /// Return whether the block terminate with divergent branch.
378 /// Note this only work before lowering the pseudo control flow instructions.
379 bool hasDivergentBranch(const MachineBasicBlock *MBB) const;
380
381 void insertIndirectBranch(MachineBasicBlock &MBB,
382 MachineBasicBlock &NewDestBB,
383 MachineBasicBlock &RestoreBB, const DebugLoc &DL,
384 int64_t BrOffset, RegScavenger *RS) const override;
385
386 bool analyzeBranchImpl(MachineBasicBlock &MBB,
387 MachineBasicBlock::iterator I,
388 MachineBasicBlock *&TBB,
389 MachineBasicBlock *&FBB,
390 SmallVectorImpl<MachineOperand> &Cond,
391 bool AllowModify) const;
392
393 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
394 MachineBasicBlock *&FBB,
395 SmallVectorImpl<MachineOperand> &Cond,
396 bool AllowModify = false) const override;
397
398 unsigned removeBranch(MachineBasicBlock &MBB,
399 int *BytesRemoved = nullptr) const override;
400
401 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
402 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
403 const DebugLoc &DL,
404 int *BytesAdded = nullptr) const override;
405
406 bool reverseBranchCondition(
407 SmallVectorImpl<MachineOperand> &Cond) const override;
408
409 bool canInsertSelect(const MachineBasicBlock &MBB,
410 ArrayRef<MachineOperand> Cond, Register DstReg,
411 Register TrueReg, Register FalseReg, int &CondCycles,
412 int &TrueCycles, int &FalseCycles) const override;
413
414 void insertSelect(MachineBasicBlock &MBB,
415 MachineBasicBlock::iterator I, const DebugLoc &DL,
416 Register DstReg, ArrayRef<MachineOperand> Cond,
417 Register TrueReg, Register FalseReg) const override;
418
419 void insertVectorSelect(MachineBasicBlock &MBB,
420 MachineBasicBlock::iterator I, const DebugLoc &DL,
421 Register DstReg, ArrayRef<MachineOperand> Cond,
422 Register TrueReg, Register FalseReg) const;
423
424 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
425 Register &SrcReg2, int64_t &CmpMask,
426 int64_t &CmpValue) const override;
427
428 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
429 Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
430 const MachineRegisterInfo *MRI) const override;
431
432 bool
433 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
434 const MachineInstr &MIb) const override;
435
436 static bool isFoldableCopy(const MachineInstr &MI);
437 static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
438
439 void removeModOperands(MachineInstr &MI) const;
440
441 void mutateAndCleanupImplicit(MachineInstr &MI,
442 const MCInstrDesc &NewDesc) const;
443
444 /// Return the extracted immediate value in a subregister use from a constant
445 /// materialized in a super register.
446 ///
447 /// e.g. %imm = S_MOV_B64 K[0:63]
448 /// USE %imm.sub1
449 /// This will return K[32:63]
450 static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal,
451 unsigned SubRegIndex);
452
453 bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
454 MachineRegisterInfo *MRI) const final;
455
456 unsigned getMachineCSELookAheadLimit() const override { return 500; }
457
458 MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
459 LiveIntervals *LIS) const override;
460
461 bool isSchedulingBoundary(const MachineInstr &MI,
462 const MachineBasicBlock *MBB,
463 const MachineFunction &MF) const override;
464
465 static bool isSALU(const MachineInstr &MI) {
466 return MI.getDesc().TSFlags & SIInstrFlags::SALU;
467 }
468
469 bool isSALU(uint32_t Opcode) const {
470 return get(Opcode).TSFlags & SIInstrFlags::SALU;
471 }
472
473 static bool isVALU(const MachineInstr &MI) {
474 return MI.getDesc().TSFlags & SIInstrFlags::VALU;
475 }
476
477 bool isVALU(uint32_t Opcode) const {
478 return get(Opcode).TSFlags & SIInstrFlags::VALU;
479 }
480
481 static bool isImage(const MachineInstr &MI) {
482 return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI);
483 }
484
485 bool isImage(uint32_t Opcode) const {
486 return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode);
487 }
488
489 static bool isVMEM(const MachineInstr &MI) {
490 return isMUBUF(MI) || isMTBUF(MI) || isImage(MI) || isFLAT(MI);
491 }
492
493 bool isVMEM(uint32_t Opcode) const {
494 return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode) ||
495 isFLAT(Opcode);
496 }
497
498 static bool isSOP1(const MachineInstr &MI) {
499 return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
500 }
501
502 bool isSOP1(uint32_t Opcode) const {
503 return get(Opcode).TSFlags & SIInstrFlags::SOP1;
504 }
505
506 static bool isSOP2(const MachineInstr &MI) {
507 return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
508 }
509
510 bool isSOP2(uint32_t Opcode) const {
511 return get(Opcode).TSFlags & SIInstrFlags::SOP2;
512 }
513
514 static bool isSOPC(const MachineInstr &MI) {
515 return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
516 }
517
518 bool isSOPC(uint32_t Opcode) const {
519 return get(Opcode).TSFlags & SIInstrFlags::SOPC;
520 }
521
522 static bool isSOPK(const MachineInstr &MI) {
523 return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
524 }
525
526 bool isSOPK(uint32_t Opcode) const {
527 return get(Opcode).TSFlags & SIInstrFlags::SOPK;
528 }
529
530 static bool isSOPP(const MachineInstr &MI) {
531 return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
532 }
533
534 bool isSOPP(uint32_t Opcode) const {
535 return get(Opcode).TSFlags & SIInstrFlags::SOPP;
536 }
537
538 static bool isPacked(const MachineInstr &MI) {
539 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
540 }
541
542 bool isPacked(uint32_t Opcode) const {
543 return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
544 }
545
546 static bool isVOP1(const MachineInstr &MI) {
547 return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
548 }
549
550 bool isVOP1(uint32_t Opcode) const {
551 return get(Opcode).TSFlags & SIInstrFlags::VOP1;
552 }
553
554 static bool isVOP2(const MachineInstr &MI) {
555 return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
556 }
557
558 bool isVOP2(uint32_t Opcode) const {
559 return get(Opcode).TSFlags & SIInstrFlags::VOP2;
560 }
561
562 static bool isVOP3(const MCInstrDesc &Desc) {
563 return Desc.TSFlags & SIInstrFlags::VOP3;
564 }
565
566 static bool isVOP3(const MachineInstr &MI) { return isVOP3(Desc: MI.getDesc()); }
567
568 bool isVOP3(uint32_t Opcode) const { return isVOP3(Desc: get(Opcode)); }
569
570 static bool isSDWA(const MachineInstr &MI) {
571 return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
572 }
573
574 bool isSDWA(uint32_t Opcode) const {
575 return get(Opcode).TSFlags & SIInstrFlags::SDWA;
576 }
577
578 static bool isVOPC(const MachineInstr &MI) {
579 return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
580 }
581
582 bool isVOPC(uint32_t Opcode) const {
583 return get(Opcode).TSFlags & SIInstrFlags::VOPC;
584 }
585
586 static bool isMUBUF(const MachineInstr &MI) {
587 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
588 }
589
590 bool isMUBUF(uint32_t Opcode) const {
591 return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
592 }
593
594 static bool isMTBUF(const MachineInstr &MI) {
595 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
596 }
597
598 bool isMTBUF(uint32_t Opcode) const {
599 return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
600 }
601
602 static bool isBUF(const MachineInstr &MI) {
603 return isMUBUF(MI) || isMTBUF(MI);
604 }
605
606 static bool isSMRD(const MachineInstr &MI) {
607 return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
608 }
609
610 bool isSMRD(uint32_t Opcode) const {
611 return get(Opcode).TSFlags & SIInstrFlags::SMRD;
612 }
613
614 bool isBufferSMRD(const MachineInstr &MI) const;
615
616 static bool isDS(const MachineInstr &MI) {
617 return MI.getDesc().TSFlags & SIInstrFlags::DS;
618 }
619
620 bool isDS(uint32_t Opcode) const {
621 return get(Opcode).TSFlags & SIInstrFlags::DS;
622 }
623
624 static bool isLDSDMA(const MachineInstr &MI) {
625 return (isVALU(MI) && (isMUBUF(MI) || isFLAT(MI))) ||
626 (MI.getDesc().TSFlags & SIInstrFlags::TENSOR_CNT);
627 }
628
629 bool isLDSDMA(uint32_t Opcode) {
630 return (isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode))) ||
631 (get(Opcode).TSFlags & SIInstrFlags::TENSOR_CNT);
632 }
633
634 static bool isGWS(const MachineInstr &MI) {
635 return MI.getDesc().TSFlags & SIInstrFlags::GWS;
636 }
637
638 bool isGWS(uint32_t Opcode) const {
639 return get(Opcode).TSFlags & SIInstrFlags::GWS;
640 }
641
642 bool isAlwaysGDS(uint32_t Opcode) const;
643
644 static bool isMIMG(const MachineInstr &MI) {
645 return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
646 }
647
648 bool isMIMG(uint32_t Opcode) const {
649 return get(Opcode).TSFlags & SIInstrFlags::MIMG;
650 }
651
652 static bool isVIMAGE(const MachineInstr &MI) {
653 return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE;
654 }
655
656 bool isVIMAGE(uint32_t Opcode) const {
657 return get(Opcode).TSFlags & SIInstrFlags::VIMAGE;
658 }
659
660 static bool isVSAMPLE(const MachineInstr &MI) {
661 return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE;
662 }
663
664 bool isVSAMPLE(uint32_t Opcode) const {
665 return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE;
666 }
667
668 static bool isGather4(const MachineInstr &MI) {
669 return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
670 }
671
672 bool isGather4(uint32_t Opcode) const {
673 return get(Opcode).TSFlags & SIInstrFlags::Gather4;
674 }
675
676 static bool isFLAT(const MachineInstr &MI) {
677 return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
678 }
679
680 // Is a FLAT encoded instruction which accesses a specific segment,
681 // i.e. global_* or scratch_*.
682 static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
683 auto Flags = MI.getDesc().TSFlags;
684 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
685 }
686
687 bool isSegmentSpecificFLAT(uint32_t Opcode) const {
688 auto Flags = get(Opcode).TSFlags;
689 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
690 }
691
692 static bool isFLATGlobal(const MachineInstr &MI) {
693 return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
694 }
695
696 bool isFLATGlobal(uint32_t Opcode) const {
697 return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
698 }
699
700 static bool isFLATScratch(const MachineInstr &MI) {
701 return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
702 }
703
704 bool isFLATScratch(uint32_t Opcode) const {
705 return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
706 }
707
708 // Any FLAT encoded instruction, including global_* and scratch_*.
709 bool isFLAT(uint32_t Opcode) const {
710 return get(Opcode).TSFlags & SIInstrFlags::FLAT;
711 }
712
713 /// \returns true for SCRATCH_ instructions, or FLAT/BUF instructions unless
714 /// the MMOs do not include scratch.
715 /// Conservatively correct; will return true if \p MI cannot be proven
716 /// to not hit scratch.
717 bool mayAccessScratch(const MachineInstr &MI) const;
718
719 /// \returns true for FLAT instructions that can access VMEM.
720 bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const;
721
722 /// \returns true for FLAT instructions that can access LDS.
723 bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
724
725 static bool isBlockLoadStore(uint32_t Opcode) {
726 switch (Opcode) {
727 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
728 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
729 case AMDGPU::SCRATCH_STORE_BLOCK_SADDR:
730 case AMDGPU::SCRATCH_LOAD_BLOCK_SADDR:
731 case AMDGPU::SCRATCH_STORE_BLOCK_SVS:
732 case AMDGPU::SCRATCH_LOAD_BLOCK_SVS:
733 return true;
734 default:
735 return false;
736 }
737 }
738
739 static bool setsSCCIfResultIsNonZero(const MachineInstr &MI) {
740 switch (MI.getOpcode()) {
741 case AMDGPU::S_ABSDIFF_I32:
742 case AMDGPU::S_ABS_I32:
743 case AMDGPU::S_AND_B32:
744 case AMDGPU::S_AND_B64:
745 case AMDGPU::S_ANDN2_B32:
746 case AMDGPU::S_ANDN2_B64:
747 case AMDGPU::S_ASHR_I32:
748 case AMDGPU::S_ASHR_I64:
749 case AMDGPU::S_BCNT0_I32_B32:
750 case AMDGPU::S_BCNT0_I32_B64:
751 case AMDGPU::S_BCNT1_I32_B32:
752 case AMDGPU::S_BCNT1_I32_B64:
753 case AMDGPU::S_BFE_I32:
754 case AMDGPU::S_BFE_I64:
755 case AMDGPU::S_BFE_U32:
756 case AMDGPU::S_BFE_U64:
757 case AMDGPU::S_LSHL_B32:
758 case AMDGPU::S_LSHL_B64:
759 case AMDGPU::S_LSHR_B32:
760 case AMDGPU::S_LSHR_B64:
761 case AMDGPU::S_NAND_B32:
762 case AMDGPU::S_NAND_B64:
763 case AMDGPU::S_NOR_B32:
764 case AMDGPU::S_NOR_B64:
765 case AMDGPU::S_NOT_B32:
766 case AMDGPU::S_NOT_B64:
767 case AMDGPU::S_OR_B32:
768 case AMDGPU::S_OR_B64:
769 case AMDGPU::S_ORN2_B32:
770 case AMDGPU::S_ORN2_B64:
771 case AMDGPU::S_QUADMASK_B32:
772 case AMDGPU::S_QUADMASK_B64:
773 case AMDGPU::S_WQM_B32:
774 case AMDGPU::S_WQM_B64:
775 case AMDGPU::S_XNOR_B32:
776 case AMDGPU::S_XNOR_B64:
777 case AMDGPU::S_XOR_B32:
778 case AMDGPU::S_XOR_B64:
779 return true;
780 default:
781 return false;
782 }
783 }
784
785 static bool isEXP(const MachineInstr &MI) {
786 return MI.getDesc().TSFlags & SIInstrFlags::EXP;
787 }
788
789 static bool isDualSourceBlendEXP(const MachineInstr &MI) {
790 if (!isEXP(MI))
791 return false;
792 unsigned Target = MI.getOperand(i: 0).getImm();
793 return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
794 Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
795 }
796
797 bool isEXP(uint32_t Opcode) const {
798 return get(Opcode).TSFlags & SIInstrFlags::EXP;
799 }
800
801 static bool isAtomicNoRet(const MachineInstr &MI) {
802 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
803 }
804
805 bool isAtomicNoRet(uint32_t Opcode) const {
806 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
807 }
808
809 static bool isAtomicRet(const MachineInstr &MI) {
810 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
811 }
812
813 bool isAtomicRet(uint32_t Opcode) const {
814 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
815 }
816
817 static bool isAtomic(const MachineInstr &MI) {
818 return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
819 SIInstrFlags::IsAtomicNoRet);
820 }
821
822 bool isAtomic(uint32_t Opcode) const {
823 return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
824 SIInstrFlags::IsAtomicNoRet);
825 }
826
827 static bool mayWriteLDSThroughDMA(const MachineInstr &MI) {
828 unsigned Opc = MI.getOpcode();
829 // Exclude instructions that read FROM LDS (not write to it)
830 return isLDSDMA(MI) && Opc != AMDGPU::BUFFER_STORE_LDS_DWORD &&
831 Opc != AMDGPU::TENSOR_STORE_FROM_LDS_d2 &&
832 Opc != AMDGPU::TENSOR_STORE_FROM_LDS_d4;
833 }
834
835 static bool isSBarrierSCCWrite(unsigned Opcode) {
836 return Opcode == AMDGPU::S_BARRIER_LEAVE ||
837 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM ||
838 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0;
839 }
840
841 static bool isCBranchVCCZRead(const MachineInstr &MI) {
842 unsigned Opc = MI.getOpcode();
843 return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) &&
844 !MI.getOperand(i: 1).isUndef();
845 }
846
847 static bool isWQM(const MachineInstr &MI) {
848 return MI.getDesc().TSFlags & SIInstrFlags::WQM;
849 }
850
851 bool isWQM(uint32_t Opcode) const {
852 return get(Opcode).TSFlags & SIInstrFlags::WQM;
853 }
854
855 static bool isDisableWQM(const MachineInstr &MI) {
856 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
857 }
858
859 bool isDisableWQM(uint32_t Opcode) const {
860 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
861 }
862
863 // SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of
864 // SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions
865 // therefore we need an explicit check for them since just checking if the
866 // Spill bit is set and what instruction type it came from misclassifies
867 // them.
868 static bool isVGPRSpill(const MachineInstr &MI) {
869 return MI.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR &&
870 MI.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
871 (isSpill(MI) && isVALU(MI));
872 }
873
874 bool isVGPRSpill(uint32_t Opcode) const {
875 return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR &&
876 Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
877 (isSpill(Opcode) && isVALU(Opcode));
878 }
879
880 static bool isSGPRSpill(const MachineInstr &MI) {
881 return MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR ||
882 MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
883 (isSpill(MI) && isSALU(MI));
884 }
885
886 bool isSGPRSpill(uint32_t Opcode) const {
887 return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR ||
888 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
889 (isSpill(Opcode) && isSALU(Opcode));
890 }
891
892 bool isSpill(uint32_t Opcode) const {
893 return get(Opcode).TSFlags & SIInstrFlags::Spill;
894 }
895
896 static bool isSpill(const MCInstrDesc &Desc) {
897 return Desc.TSFlags & SIInstrFlags::Spill;
898 }
899
900 static bool isSpill(const MachineInstr &MI) { return isSpill(Desc: MI.getDesc()); }
901
902 static bool isWWMRegSpillOpcode(uint32_t Opcode) {
903 return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
904 Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
905 Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE ||
906 Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
907 }
908
909 static bool isChainCallOpcode(uint64_t Opcode) {
910 return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 ||
911 Opcode == AMDGPU::SI_CS_CHAIN_TC_W64;
912 }
913
914 static bool isDPP(const MachineInstr &MI) {
915 return MI.getDesc().TSFlags & SIInstrFlags::DPP;
916 }
917
918 bool isDPP(uint32_t Opcode) const {
919 return get(Opcode).TSFlags & SIInstrFlags::DPP;
920 }
921
922 static bool isTRANS(const MachineInstr &MI) {
923 return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
924 }
925
926 bool isTRANS(uint32_t Opcode) const {
927 return get(Opcode).TSFlags & SIInstrFlags::TRANS;
928 }
929
930 static bool isVOP3P(const MachineInstr &MI) {
931 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
932 }
933
934 bool isVOP3P(uint32_t Opcode) const {
935 return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
936 }
937
938 static bool isVINTRP(const MachineInstr &MI) {
939 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
940 }
941
942 bool isVINTRP(uint32_t Opcode) const {
943 return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
944 }
945
946 static bool isMAI(const MCInstrDesc &Desc) {
947 return Desc.TSFlags & SIInstrFlags::IsMAI;
948 }
949
950 static bool isMAI(const MachineInstr &MI) { return isMAI(Desc: MI.getDesc()); }
951
952 bool isMAI(uint32_t Opcode) const { return isMAI(Desc: get(Opcode)); }
953
954 static bool isMFMA(const MachineInstr &MI) {
955 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
956 MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
957 }
958
959 bool isMFMA(uint32_t Opcode) const {
960 return isMAI(Opcode) && Opcode != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
961 Opcode != AMDGPU::V_ACCVGPR_READ_B32_e64;
962 }
963
964 static bool isDOT(const MachineInstr &MI) {
965 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
966 }
967
968 static bool isWMMA(const MachineInstr &MI) {
969 return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
970 }
971
972 bool isWMMA(uint32_t Opcode) const {
973 return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
974 }
975
976 static bool isMFMAorWMMA(const MachineInstr &MI) {
977 return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI);
978 }
979
980 bool isMFMAorWMMA(uint32_t Opcode) const {
981 return isMFMA(Opcode) || isWMMA(Opcode) || isSWMMAC(Opcode);
982 }
983
984 static bool isSWMMAC(const MachineInstr &MI) {
985 return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC;
986 }
987
988 bool isSWMMAC(uint32_t Opcode) const {
989 return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC;
990 }
991
992 bool isDOT(uint32_t Opcode) const {
993 return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
994 }
995
996 bool isXDLWMMA(const MachineInstr &MI) const;
997
998 bool isXDL(const MachineInstr &MI) const;
999
1000 static bool isDGEMM(unsigned Opcode) { return AMDGPU::getMAIIsDGEMM(Opc: Opcode); }
1001
1002 static bool isLDSDIR(const MachineInstr &MI) {
1003 return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
1004 }
1005
1006 bool isLDSDIR(uint32_t Opcode) const {
1007 return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
1008 }
1009
1010 static bool isVINTERP(const MachineInstr &MI) {
1011 return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
1012 }
1013
1014 bool isVINTERP(uint32_t Opcode) const {
1015 return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
1016 }
1017
1018 static bool isScalarUnit(const MachineInstr &MI) {
1019 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
1020 }
1021
1022 static bool usesVM_CNT(const MachineInstr &MI) {
1023 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
1024 }
1025
1026 static bool usesLGKM_CNT(const MachineInstr &MI) {
1027 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
1028 }
1029
1030 static bool usesASYNC_CNT(const MachineInstr &MI) {
1031 return MI.getDesc().TSFlags & SIInstrFlags::ASYNC_CNT;
1032 }
1033
1034 bool usesASYNC_CNT(uint32_t Opcode) const {
1035 return get(Opcode).TSFlags & SIInstrFlags::ASYNC_CNT;
1036 }
1037
1038 // Most sopk treat the immediate as a signed 16-bit, however some
1039 // use it as unsigned.
1040 static bool sopkIsZext(unsigned Opcode) {
1041 return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 ||
1042 Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 ||
1043 Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 ||
1044 Opcode == AMDGPU::S_GETREG_B32 ||
1045 Opcode == AMDGPU::S_GETREG_B32_const;
1046 }
1047
1048 /// \returns true if this is an s_store_dword* instruction. This is more
1049 /// specific than isSMEM && mayStore.
1050 static bool isScalarStore(const MachineInstr &MI) {
1051 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
1052 }
1053
1054 bool isScalarStore(uint32_t Opcode) const {
1055 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
1056 }
1057
1058 static bool isFixedSize(const MachineInstr &MI) {
1059 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
1060 }
1061
1062 bool isFixedSize(uint32_t Opcode) const {
1063 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
1064 }
1065
1066 static bool hasFPClamp(const MachineInstr &MI) {
1067 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
1068 }
1069
1070 bool hasFPClamp(uint32_t Opcode) const {
1071 return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
1072 }
1073
1074 static bool hasIntClamp(const MachineInstr &MI) {
1075 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
1076 }
1077
1078 uint64_t getClampMask(const MachineInstr &MI) const {
1079 const uint64_t ClampFlags = SIInstrFlags::FPClamp |
1080 SIInstrFlags::IntClamp |
1081 SIInstrFlags::ClampLo |
1082 SIInstrFlags::ClampHi;
1083 return MI.getDesc().TSFlags & ClampFlags;
1084 }
1085
1086 static bool usesFPDPRounding(const MachineInstr &MI) {
1087 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
1088 }
1089
1090 bool usesFPDPRounding(uint32_t Opcode) const {
1091 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
1092 }
1093
1094 static bool isFPAtomic(const MachineInstr &MI) {
1095 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
1096 }
1097
1098 bool isFPAtomic(uint32_t Opcode) const {
1099 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
1100 }
1101
1102 static bool isNeverUniform(const MachineInstr &MI) {
1103 return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
1104 }
1105
1106 // Check to see if opcode is for a barrier start. Pre gfx12 this is just the
1107 // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want
1108 // to check for the barrier start (S_BARRIER_SIGNAL*)
1109 bool isBarrierStart(unsigned Opcode) const {
1110 return Opcode == AMDGPU::S_BARRIER ||
1111 Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 ||
1112 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 ||
1113 Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM ||
1114 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
1115 }
1116
1117 bool isBarrier(unsigned Opcode) const {
1118 return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
1119 Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
1120 Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
1121 Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
1122 Opcode == AMDGPU::S_BARRIER_LEAVE || Opcode == AMDGPU::DS_GWS_INIT ||
1123 Opcode == AMDGPU::DS_GWS_BARRIER;
1124 }
1125
1126 static bool isGFX12CacheInvOrWBInst(unsigned Opc) {
1127 return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB ||
1128 Opc == AMDGPU::GLOBAL_WBINV;
1129 }
1130
1131 static bool isF16PseudoScalarTrans(unsigned Opcode) {
1132 return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
1133 Opcode == AMDGPU::V_S_LOG_F16_e64 ||
1134 Opcode == AMDGPU::V_S_RCP_F16_e64 ||
1135 Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
1136 Opcode == AMDGPU::V_S_SQRT_F16_e64;
1137 }
1138
1139 static bool doesNotReadTiedSource(const MachineInstr &MI) {
1140 return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
1141 }
1142
1143 bool doesNotReadTiedSource(uint32_t Opcode) const {
1144 return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
1145 }
1146
1147 bool isIGLP(unsigned Opcode) const {
1148 return Opcode == AMDGPU::SCHED_BARRIER ||
1149 Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
1150 }
1151
1152 bool isIGLP(const MachineInstr &MI) const { return isIGLP(Opcode: MI.getOpcode()); }
1153
1154 // Return true if the instruction is mutually exclusive with all non-IGLP DAG
1155 // mutations, requiring all other mutations to be disabled.
1156 bool isIGLPMutationOnly(unsigned Opcode) const {
1157 return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
1158 }
1159
1160 static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
1161 switch (Opcode) {
1162 case AMDGPU::S_WAITCNT_soft:
1163 return AMDGPU::S_WAITCNT;
1164 case AMDGPU::S_WAITCNT_VSCNT_soft:
1165 return AMDGPU::S_WAITCNT_VSCNT;
1166 case AMDGPU::S_WAIT_LOADCNT_soft:
1167 return AMDGPU::S_WAIT_LOADCNT;
1168 case AMDGPU::S_WAIT_STORECNT_soft:
1169 return AMDGPU::S_WAIT_STORECNT;
1170 case AMDGPU::S_WAIT_SAMPLECNT_soft:
1171 return AMDGPU::S_WAIT_SAMPLECNT;
1172 case AMDGPU::S_WAIT_BVHCNT_soft:
1173 return AMDGPU::S_WAIT_BVHCNT;
1174 case AMDGPU::S_WAIT_DSCNT_soft:
1175 return AMDGPU::S_WAIT_DSCNT;
1176 case AMDGPU::S_WAIT_KMCNT_soft:
1177 return AMDGPU::S_WAIT_KMCNT;
1178 case AMDGPU::S_WAIT_XCNT_soft:
1179 return AMDGPU::S_WAIT_XCNT;
1180 default:
1181 return Opcode;
1182 }
1183 }
1184
1185 static bool isWaitcnt(unsigned Opcode) {
1186 switch (getNonSoftWaitcntOpcode(Opcode)) {
1187 case AMDGPU::S_WAITCNT:
1188 case AMDGPU::S_WAITCNT_VSCNT:
1189 case AMDGPU::S_WAITCNT_VMCNT:
1190 case AMDGPU::S_WAITCNT_EXPCNT:
1191 case AMDGPU::S_WAITCNT_LGKMCNT:
1192 case AMDGPU::S_WAIT_LOADCNT:
1193 case AMDGPU::S_WAIT_LOADCNT_DSCNT:
1194 case AMDGPU::S_WAIT_STORECNT:
1195 case AMDGPU::S_WAIT_STORECNT_DSCNT:
1196 case AMDGPU::S_WAIT_SAMPLECNT:
1197 case AMDGPU::S_WAIT_BVHCNT:
1198 case AMDGPU::S_WAIT_EXPCNT:
1199 case AMDGPU::S_WAIT_DSCNT:
1200 case AMDGPU::S_WAIT_KMCNT:
1201 case AMDGPU::S_WAIT_IDLE:
1202 return true;
1203 default:
1204 return false;
1205 }
1206 }
1207
1208 bool isVGPRCopy(const MachineInstr &MI) const {
1209 assert(isCopyInstr(MI));
1210 Register Dest = MI.getOperand(i: 0).getReg();
1211 const MachineFunction &MF = *MI.getMF();
1212 const MachineRegisterInfo &MRI = MF.getRegInfo();
1213 return !RI.isSGPRReg(MRI, Reg: Dest);
1214 }
1215
1216 bool hasVGPRUses(const MachineInstr &MI) const {
1217 const MachineFunction &MF = *MI.getMF();
1218 const MachineRegisterInfo &MRI = MF.getRegInfo();
1219 return llvm::any_of(Range: MI.explicit_uses(),
1220 P: [&MRI, this](const MachineOperand &MO) {
1221 return MO.isReg() && RI.isVGPR(MRI, Reg: MO.getReg());});
1222 }
1223
1224 /// Return true if the instruction modifies the mode register.q
1225 static bool modifiesModeRegister(const MachineInstr &MI);
1226
1227 /// This function is used to determine if an instruction can be safely
1228 /// executed under EXEC = 0 without hardware error, indeterminate results,
1229 /// and/or visible effects on future vector execution or outside the shader.
1230 /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is
1231 /// used in removing branches over short EXEC = 0 sequences.
1232 /// As such it embeds certain assumptions which may not apply to every case
1233 /// of EXEC = 0 execution.
1234 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
1235
1236 /// Returns true if the instruction could potentially depend on the value of
1237 /// exec. If false, exec dependencies may safely be ignored.
1238 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
1239
1240 bool isInlineConstant(const APInt &Imm) const;
1241
1242 bool isInlineConstant(const APFloat &Imm) const;
1243
1244 // Returns true if this non-register operand definitely does not need to be
1245 // encoded as a 32-bit literal. Note that this function handles all kinds of
1246 // operands, not just immediates.
1247 //
1248 // Some operands like FrameIndexes could resolve to an inline immediate value
1249 // that will not require an additional 4-bytes; this function assumes that it
1250 // will.
1251 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const {
1252 if (!MO.isImm())
1253 return false;
1254 return isInlineConstant(ImmVal: MO.getImm(), OperandType);
1255 }
1256 bool isInlineConstant(int64_t ImmVal, uint8_t OperandType) const;
1257
1258 bool isInlineConstant(const MachineOperand &MO,
1259 const MCOperandInfo &OpInfo) const {
1260 return isInlineConstant(MO, OperandType: OpInfo.OperandType);
1261 }
1262
1263 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
1264 /// be an inline immediate.
1265 bool isInlineConstant(const MachineInstr &MI,
1266 const MachineOperand &UseMO,
1267 const MachineOperand &DefMO) const {
1268 assert(UseMO.getParent() == &MI);
1269 int OpIdx = UseMO.getOperandNo();
1270 if (OpIdx >= MI.getDesc().NumOperands)
1271 return false;
1272
1273 return isInlineConstant(MO: DefMO, OpInfo: MI.getDesc().operands()[OpIdx]);
1274 }
1275
1276 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
1277 /// immediate.
1278 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
1279 const MachineOperand &MO = MI.getOperand(i: OpIdx);
1280 return isInlineConstant(MO, OperandType: MI.getDesc().operands()[OpIdx].OperandType);
1281 }
1282
1283 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1284 int64_t ImmVal) const {
1285 if (OpIdx >= MI.getDesc().NumOperands)
1286 return false;
1287
1288 if (isCopyInstr(MI)) {
1289 unsigned Size = getOpSize(MI, OpNo: OpIdx);
1290 assert(Size == 8 || Size == 4);
1291
1292 uint8_t OpType = (Size == 8) ?
1293 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
1294 return isInlineConstant(ImmVal, OperandType: OpType);
1295 }
1296
1297 return isInlineConstant(ImmVal, OperandType: MI.getDesc().operands()[OpIdx].OperandType);
1298 }
1299
1300 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1301 const MachineOperand &MO) const {
1302 return isInlineConstant(MI, OpIdx, ImmVal: MO.getImm());
1303 }
1304
1305 bool isInlineConstant(const MachineOperand &MO) const {
1306 return isInlineConstant(MI: *MO.getParent(), OpIdx: MO.getOperandNo());
1307 }
1308
1309 bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
1310 const MachineOperand &MO) const;
1311
1312 bool isLiteralOperandLegal(const MCInstrDesc &InstDesc,
1313 const MCOperandInfo &OpInfo) const;
1314
1315 bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
1316 int64_t ImmVal) const;
1317
1318 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
1319 const MachineOperand &MO) const {
1320 return isImmOperandLegal(InstDesc: MI.getDesc(), OpNo, MO);
1321 }
1322
1323 bool isNeverCoissue(MachineInstr &MI) const;
1324
1325 /// Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
1326 bool isLegalAV64PseudoImm(uint64_t Imm) const;
1327
1328 /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
1329 /// This function will return false if you pass it a 32-bit instruction.
1330 bool hasVALU32BitEncoding(unsigned Opcode) const;
1331
1332 bool physRegUsesConstantBus(const MachineOperand &Reg) const;
1333 bool regUsesConstantBus(const MachineOperand &Reg,
1334 const MachineRegisterInfo &MRI) const;
1335
1336 /// Returns true if this operand uses the constant bus.
1337 bool usesConstantBus(const MachineRegisterInfo &MRI,
1338 const MachineOperand &MO,
1339 const MCOperandInfo &OpInfo) const;
1340
1341 bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineInstr &MI,
1342 int OpIdx) const {
1343 return usesConstantBus(MRI, MO: MI.getOperand(i: OpIdx),
1344 OpInfo: MI.getDesc().operands()[OpIdx]);
1345 }
1346
1347 /// Return true if this instruction has any modifiers.
1348 /// e.g. src[012]_mod, omod, clamp.
1349 bool hasModifiers(unsigned Opcode) const;
1350
1351 bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const;
1352 bool hasAnyModifiersSet(const MachineInstr &MI) const;
1353
1354 bool canShrink(const MachineInstr &MI,
1355 const MachineRegisterInfo &MRI) const;
1356
1357 MachineInstr *buildShrunkInst(MachineInstr &MI,
1358 unsigned NewOpcode) const;
1359
1360 bool verifyInstruction(const MachineInstr &MI,
1361 StringRef &ErrInfo) const override;
1362
1363 unsigned getVALUOp(const MachineInstr &MI) const;
1364
1365 void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
1366 MachineBasicBlock::iterator MBBI,
1367 const DebugLoc &DL, Register Reg, bool IsSCCLive,
1368 SlotIndexes *Indexes = nullptr) const;
1369
1370 void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
1371 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1372 Register Reg, SlotIndexes *Indexes = nullptr) const;
1373
1374 MachineInstr *getWholeWaveFunctionSetup(MachineFunction &MF) const;
1375
1376 /// Return the correct register class for \p OpNo. For target-specific
1377 /// instructions, this will return the register class that has been defined
1378 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
1379 /// the register class of its machine operand.
1380 /// to infer the correct register class base on the other operands.
1381 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
1382 unsigned OpNo) const;
1383
1384 /// Return the size in bytes of the operand OpNo on the given
1385 // instruction opcode.
1386 unsigned getOpSize(uint32_t Opcode, unsigned OpNo) const {
1387 const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo];
1388
1389 if (OpInfo.RegClass == -1) {
1390 // If this is an immediate operand, this must be a 32-bit literal.
1391 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
1392 return 4;
1393 }
1394
1395 return RI.getRegSizeInBits(RC: *RI.getRegClass(i: getOpRegClassID(OpInfo))) / 8;
1396 }
1397
1398 /// This form should usually be preferred since it handles operands
1399 /// with unknown register classes.
1400 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
1401 const MachineOperand &MO = MI.getOperand(i: OpNo);
1402 if (MO.isReg()) {
1403 if (unsigned SubReg = MO.getSubReg()) {
1404 return RI.getSubRegIdxSize(Idx: SubReg) / 8;
1405 }
1406 }
1407 return RI.getRegSizeInBits(RC: *getOpRegClass(MI, OpNo)) / 8;
1408 }
1409
1410 /// Legalize the \p OpIndex operand of this instruction by inserting
1411 /// a MOV. For example:
1412 /// ADD_I32_e32 VGPR0, 15
1413 /// to
1414 /// MOV VGPR1, 15
1415 /// ADD_I32_e32 VGPR0, VGPR1
1416 ///
1417 /// If the operand being legalized is a register, then a COPY will be used
1418 /// instead of MOV.
1419 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
1420
1421 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
1422 /// for \p MI.
1423 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
1424 const MachineOperand *MO = nullptr) const;
1425
1426 /// Check if \p MO would be a valid operand for the given operand
1427 /// definition \p OpInfo. Note this does not attempt to validate constant bus
1428 /// restrictions (e.g. literal constant usage).
1429 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
1430 const MCOperandInfo &OpInfo,
1431 const MachineOperand &MO) const;
1432
1433 /// Check if \p MO (a register operand) is a legal register for the
1434 /// given operand description or operand index.
1435 /// The operand index version provide more legality checks
1436 bool isLegalRegOperand(const MachineRegisterInfo &MRI,
1437 const MCOperandInfo &OpInfo,
1438 const MachineOperand &MO) const;
1439 bool isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
1440 const MachineOperand &MO) const;
1441
1442 /// Check if \p MO would be a legal operand for gfx12+ packed math FP32
1443 /// instructions. Packed math FP32 instructions typically accept SGPRs or
1444 /// VGPRs as source operands. On gfx12+, if a source operand uses SGPRs, the
1445 /// HW can only read the first SGPR and use it for both the low and high
1446 /// operations.
1447 /// \p SrcN can be 0, 1, or 2, representing src0, src1, and src2,
1448 /// respectively. If \p MO is nullptr, the operand corresponding to SrcN will
1449 /// be used.
1450 bool isLegalGFX12PlusPackedMathFP32Operand(
1451 const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,
1452 const MachineOperand *MO = nullptr) const;
1453
1454 /// Legalize operands in \p MI by either commuting it or inserting a
1455 /// copy of src1.
1456 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1457
1458 /// Fix operands in \p MI to satisfy constant bus requirements.
1459 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1460
1461 /// Copy a value from a VGPR (\p SrcReg) to SGPR. The desired register class
1462 /// for the dst register (\p DstRC) can be optionally supplied. This function
1463 /// can only be used when it is know that the value in SrcReg is same across
1464 /// all threads in the wave.
1465 /// \returns The SGPR register that \p SrcReg was copied to.
1466 Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
1467 MachineRegisterInfo &MRI,
1468 const TargetRegisterClass *DstRC = nullptr) const;
1469
1470 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1471 void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1472
1473 void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
1474 MachineBasicBlock::iterator I,
1475 const TargetRegisterClass *DstRC,
1476 MachineOperand &Op, MachineRegisterInfo &MRI,
1477 const DebugLoc &DL) const;
1478
1479 /// Legalize all operands in this instruction. This function may create new
1480 /// instructions and control-flow around \p MI. If present, \p MDT is
1481 /// updated.
1482 /// \returns A new basic block that contains \p MI if new blocks were created.
1483 MachineBasicBlock *
1484 legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
1485
1486 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1487 /// was moved to VGPR. \returns true if succeeded.
1488 bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
1489
1490 /// Fix operands in Inst to fix 16bit SALU to VALU lowering.
1491 void legalizeOperandsVALUt16(MachineInstr &Inst,
1492 MachineRegisterInfo &MRI) const;
1493 void legalizeOperandsVALUt16(MachineInstr &Inst, unsigned OpIdx,
1494 MachineRegisterInfo &MRI) const;
1495
1496 /// Replace the instructions opcode with the equivalent VALU
1497 /// opcode. This function will also move the users of MachineInstruntions
1498 /// in the \p WorkList to the VALU if necessary. If present, \p MDT is
1499 /// updated.
1500 void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const;
1501
1502 void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT,
1503 MachineInstr &Inst) const;
1504
1505 void insertNoop(MachineBasicBlock &MBB,
1506 MachineBasicBlock::iterator MI) const override;
1507
1508 void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1509 unsigned Quantity) const override;
1510
1511 void insertReturn(MachineBasicBlock &MBB) const;
1512
1513 /// Build instructions that simulate the behavior of a `s_trap 2` instructions
1514 /// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is
1515 /// interpreted as a nop.
1516 MachineBasicBlock *insertSimulatedTrap(MachineRegisterInfo &MRI,
1517 MachineBasicBlock &MBB,
1518 MachineInstr &MI,
1519 const DebugLoc &DL) const;
1520
1521 /// Return the number of wait states that result from executing this
1522 /// instruction.
1523 static unsigned getNumWaitStates(const MachineInstr &MI);
1524
1525 /// Returns the operand named \p Op. If \p MI does not have an
1526 /// operand named \c Op, this function returns nullptr.
1527 LLVM_READONLY
1528 MachineOperand *getNamedOperand(MachineInstr &MI,
1529 AMDGPU::OpName OperandName) const;
1530
1531 LLVM_READONLY
1532 const MachineOperand *getNamedOperand(const MachineInstr &MI,
1533 AMDGPU::OpName OperandName) const {
1534 return getNamedOperand(MI&: const_cast<MachineInstr &>(MI), OperandName);
1535 }
1536
1537 /// Get required immediate operand
1538 int64_t getNamedImmOperand(const MachineInstr &MI,
1539 AMDGPU::OpName OperandName) const {
1540 int Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: OperandName);
1541 return MI.getOperand(i: Idx).getImm();
1542 }
1543
1544 uint64_t getDefaultRsrcDataFormat() const;
1545 uint64_t getScratchRsrcWords23() const;
1546
1547 bool isLowLatencyInstruction(const MachineInstr &MI) const;
1548 bool isHighLatencyDef(int Opc) const override;
1549
1550 /// Return the descriptor of the target-specific machine instruction
1551 /// that corresponds to the specified pseudo or native opcode.
1552 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
1553 return get(Opcode: pseudoToMCOpcode(Opcode));
1554 }
1555
1556 Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1557 Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1558
1559 Register isLoadFromStackSlot(const MachineInstr &MI,
1560 int &FrameIndex) const override;
1561 Register isStoreToStackSlot(const MachineInstr &MI,
1562 int &FrameIndex) const override;
1563
1564 unsigned getInstBundleSize(const MachineInstr &MI) const;
1565 unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
1566
1567 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
1568
1569 std::pair<unsigned, unsigned>
1570 decomposeMachineOperandsTargetFlags(unsigned TF) const override;
1571
1572 ArrayRef<std::pair<int, const char *>>
1573 getSerializableTargetIndices() const override;
1574
1575 ArrayRef<std::pair<unsigned, const char *>>
1576 getSerializableDirectMachineOperandTargetFlags() const override;
1577
1578 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
1579 getSerializableMachineMemOperandTargetFlags() const override;
1580
1581 ScheduleHazardRecognizer *
1582 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
1583 const ScheduleDAG *DAG) const override;
1584
1585 ScheduleHazardRecognizer *
1586 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF,
1587 MachineLoopInfo *MLI) const override;
1588
1589 ScheduleHazardRecognizer *
1590 CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
1591 const ScheduleDAGMI *DAG) const override;
1592
1593 unsigned getLiveRangeSplitOpcode(Register Reg,
1594 const MachineFunction &MF) const override;
1595
1596 bool isBasicBlockPrologue(const MachineInstr &MI,
1597 Register Reg = Register()) const override;
1598
1599 bool canAddToBBProlog(const MachineInstr &MI) const;
1600
1601 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
1602 MachineBasicBlock::iterator InsPt,
1603 const DebugLoc &DL, Register Src,
1604 Register Dst) const override;
1605
1606 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1607 MachineBasicBlock::iterator InsPt,
1608 const DebugLoc &DL, Register Src,
1609 unsigned SrcSubReg,
1610 Register Dst) const override;
1611
1612 bool isWave32() const;
1613
1614 /// Return a partially built integer add instruction without carry.
1615 /// Caller must add source operands.
1616 /// For pre-GFX9 it will generate unused carry destination operand.
1617 /// TODO: After GFX9 it should return a no-carry operation.
1618 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1619 MachineBasicBlock::iterator I,
1620 const DebugLoc &DL,
1621 Register DestReg) const;
1622
1623 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1624 MachineBasicBlock::iterator I,
1625 const DebugLoc &DL,
1626 Register DestReg,
1627 RegScavenger &RS) const;
1628
1629 static bool isKillTerminator(unsigned Opcode);
1630 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
1631
1632 bool isLegalMUBUFImmOffset(unsigned Imm) const;
1633
1634 static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST);
1635
1636 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1637 Align Alignment = Align(4)) const;
1638
1639 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1640 /// encoded instruction with the given \p FlatVariant.
1641 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1642 uint64_t FlatVariant) const;
1643
1644 /// Split \p COffsetVal into {immediate offset field, remainder offset}
1645 /// values.
1646 std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
1647 unsigned AddrSpace,
1648 uint64_t FlatVariant) const;
1649
1650 /// Returns true if negative offsets are allowed for the given \p FlatVariant.
1651 bool allowNegativeFlatOffset(uint64_t FlatVariant) const;
1652
1653 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1654 /// Return -1 if the target-specific opcode for the pseudo instruction does
1655 /// not exist. If Opcode is not a pseudo instruction, this is identity.
1656 int pseudoToMCOpcode(int Opcode) const;
1657
1658 /// \brief Check if this instruction should only be used by assembler.
1659 /// Return true if this opcode should not be used by codegen.
1660 bool isAsmOnlyOpcode(int MCOp) const;
1661
1662 void fixImplicitOperands(MachineInstr &MI) const;
1663
1664 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
1665 ArrayRef<unsigned> Ops,
1666 MachineBasicBlock::iterator InsertPt,
1667 int FrameIndex,
1668 LiveIntervals *LIS = nullptr,
1669 VirtRegMap *VRM = nullptr) const override;
1670
1671 unsigned getInstrLatency(const InstrItineraryData *ItinData,
1672 const MachineInstr &MI,
1673 unsigned *PredCost = nullptr) const override;
1674
1675 const MachineOperand &getCalleeOperand(const MachineInstr &MI) const override;
1676
1677 InstructionUniformity
1678 getInstructionUniformity(const MachineInstr &MI) const final;
1679
1680 InstructionUniformity
1681 getGenericInstructionUniformity(const MachineInstr &MI) const;
1682
1683 const MIRFormatter *getMIRFormatter() const override;
1684
1685 static unsigned getDSShaderTypeValue(const MachineFunction &MF);
1686
1687 const TargetSchedModel &getSchedModel() const { return SchedModel; }
1688
1689 // FIXME: This should be removed
1690 // Enforce operand's \p OpName even alignment if required by target.
1691 // This is used if an operand is a 32 bit register but needs to be aligned
1692 // regardless.
1693 void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const;
1694};
1695
1696/// \brief Returns true if a reg:subreg pair P has a TRC class
1697inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1698 const TargetRegisterClass &TRC,
1699 MachineRegisterInfo &MRI) {
1700 auto *RC = MRI.getRegClass(Reg: P.Reg);
1701 if (!P.SubReg)
1702 return RC == &TRC;
1703 auto *TRI = MRI.getTargetRegisterInfo();
1704 return RC == TRI->getMatchingSuperRegClass(A: RC, B: &TRC, Idx: P.SubReg);
1705}
1706
1707/// \brief Create RegSubRegPair from a register MachineOperand
1708inline
1709TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1710 assert(O.isReg());
1711 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1712}
1713
1714/// \brief Return the SubReg component from REG_SEQUENCE
1715TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1716 unsigned SubReg);
1717
1718/// \brief Return the defining instruction for a given reg:subreg pair
1719/// skipping copy like instructions and subreg-manipulation pseudos.
1720/// Following another subreg of a reg:subreg isn't supported.
1721MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1722 const MachineRegisterInfo &MRI);
1723
1724/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1725/// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1726/// attempt to track between blocks.
1727bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1728 Register VReg,
1729 const MachineInstr &DefMI,
1730 const MachineInstr &UseMI);
1731
1732/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1733/// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1734/// track between blocks.
1735bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1736 Register VReg,
1737 const MachineInstr &DefMI);
1738
1739namespace AMDGPU {
1740
1741 LLVM_READONLY
1742 int32_t getVOPe64(uint32_t Opcode);
1743
1744 LLVM_READONLY
1745 int32_t getVOPe32(uint32_t Opcode);
1746
1747 LLVM_READONLY
1748 int32_t getSDWAOp(uint32_t Opcode);
1749
1750 LLVM_READONLY
1751 int32_t getDPPOp32(uint32_t Opcode);
1752
1753 LLVM_READONLY
1754 int32_t getDPPOp64(uint32_t Opcode);
1755
1756 LLVM_READONLY
1757 int32_t getBasicFromSDWAOp(uint32_t Opcode);
1758
1759 LLVM_READONLY
1760 int32_t getCommuteRev(uint32_t Opcode);
1761
1762 LLVM_READONLY
1763 int32_t getCommuteOrig(uint32_t Opcode);
1764
1765 LLVM_READONLY
1766 int32_t getAddr64Inst(uint32_t Opcode);
1767
1768 /// Check if \p Opcode is an Addr64 opcode.
1769 ///
1770 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1771 LLVM_READONLY
1772 int32_t getIfAddr64Inst(uint32_t Opcode);
1773
1774 LLVM_READONLY
1775 int32_t getSOPKOp(uint32_t Opcode);
1776
1777 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1778 /// of a VADDR form.
1779 LLVM_READONLY
1780 int32_t getGlobalSaddrOp(uint32_t Opcode);
1781
1782 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1783 /// of a SADDR form.
1784 LLVM_READONLY
1785 int32_t getGlobalVaddrOp(uint32_t Opcode);
1786
1787 LLVM_READONLY
1788 int32_t getVCMPXNoSDstOp(uint32_t Opcode);
1789
1790 /// \returns ST form with only immediate offset of a FLAT Scratch instruction
1791 /// given an \p Opcode of an SS (SADDR) form.
1792 LLVM_READONLY
1793 int32_t getFlatScratchInstSTfromSS(uint32_t Opcode);
1794
1795 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1796 /// of an SVS (SADDR + VADDR) form.
1797 LLVM_READONLY
1798 int32_t getFlatScratchInstSVfromSVS(uint32_t Opcode);
1799
1800 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1801 /// of an SV (VADDR) form.
1802 LLVM_READONLY
1803 int32_t getFlatScratchInstSSfromSV(uint32_t Opcode);
1804
1805 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1806 /// of an SS (SADDR) form.
1807 LLVM_READONLY
1808 int32_t getFlatScratchInstSVfromSS(uint32_t Opcode);
1809
1810 /// \returns earlyclobber version of a MAC MFMA is exists.
1811 LLVM_READONLY
1812 int32_t getMFMAEarlyClobberOp(uint32_t Opcode);
1813
1814 /// \returns Version of an MFMA instruction which uses AGPRs for srcC and
1815 /// vdst, given an \p Opcode of an MFMA which uses VGPRs for srcC/vdst.
1816 LLVM_READONLY
1817 int32_t getMFMASrcCVDstAGPROp(uint32_t Opcode);
1818
1819 /// \returns v_cmpx version of a v_cmp instruction.
1820 LLVM_READONLY
1821 int32_t getVCMPXOpFromVCMP(uint32_t Opcode);
1822
1823 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1824 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1825 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1826 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1827
1828} // end namespace AMDGPU
1829
1830namespace AMDGPU {
1831enum AsmComments {
1832 // For sgpr to vgpr spill instructions
1833 SGPR_SPILL = MachineInstr::TAsmComments
1834};
1835} // namespace AMDGPU
1836
1837namespace SI {
1838namespace KernelInputOffsets {
1839
1840/// Offsets in bytes from the start of the input buffer
1841enum Offsets {
1842 NGROUPS_X = 0,
1843 NGROUPS_Y = 4,
1844 NGROUPS_Z = 8,
1845 GLOBAL_SIZE_X = 12,
1846 GLOBAL_SIZE_Y = 16,
1847 GLOBAL_SIZE_Z = 20,
1848 LOCAL_SIZE_X = 24,
1849 LOCAL_SIZE_Y = 28,
1850 LOCAL_SIZE_Z = 32
1851};
1852
1853} // end namespace KernelInputOffsets
1854} // end namespace SI
1855
1856} // end namespace llvm
1857
1858#endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1859