1//===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Interface definition for SIInstrInfo.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15#define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16
17#include "AMDGPUMIRFormatter.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "SIRegisterInfo.h"
20#include "Utils/AMDGPUBaseInfo.h"
21#include "llvm/ADT/SetVector.h"
22#include "llvm/CodeGen/TargetInstrInfo.h"
23#include "llvm/CodeGen/TargetSchedule.h"
24
25#define GET_INSTRINFO_HEADER
26#include "AMDGPUGenInstrInfo.inc"
27
28namespace llvm {
29
30class APInt;
31class GCNSubtarget;
32class LiveVariables;
33class MachineDominatorTree;
34class MachineRegisterInfo;
35class RegScavenger;
36class SIMachineFunctionInfo;
37class TargetRegisterClass;
38class ScheduleHazardRecognizer;
39
40constexpr unsigned DefaultMemoryClusterDWordsLimit = 8;
41
42/// Mark the MMO of a uniform load if there are no potentially clobbering stores
43/// on any path from the start of an entry function to this load.
44static const MachineMemOperand::Flags MONoClobber =
45 MachineMemOperand::MOTargetFlag1;
46
47/// Mark the MMO of a load as the last use.
48static const MachineMemOperand::Flags MOLastUse =
49 MachineMemOperand::MOTargetFlag2;
50
51/// Mark the MMO of cooperative load/store atomics.
52static const MachineMemOperand::Flags MOCooperative =
53 MachineMemOperand::MOTargetFlag3;
54
55/// Utility to store machine instructions worklist.
56struct SIInstrWorklist {
57 SIInstrWorklist() = default;
58
59 void insert(MachineInstr *MI);
60
61 MachineInstr *top() const {
62 const auto *iter = InstrList.begin();
63 return *iter;
64 }
65
66 void erase_top() {
67 const auto *iter = InstrList.begin();
68 InstrList.erase(I: iter);
69 }
70
71 bool empty() const { return InstrList.empty(); }
72
73 void clear() {
74 InstrList.clear();
75 DeferredList.clear();
76 }
77
78 bool isDeferred(MachineInstr *MI);
79
80 SetVector<MachineInstr *> &getDeferredList() { return DeferredList; }
81
82private:
83 /// InstrList contains the MachineInstrs.
84 SetVector<MachineInstr *> InstrList;
85 /// Deferred instructions are specific MachineInstr
86 /// that will be added by insert method.
87 SetVector<MachineInstr *> DeferredList;
88};
89
90class SIInstrInfo final : public AMDGPUGenInstrInfo {
91 struct ThreeAddressUpdates;
92
93private:
94 const SIRegisterInfo RI;
95 const GCNSubtarget &ST;
96 TargetSchedModel SchedModel;
97 mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
98
99 // The inverse predicate should have the negative value.
100 enum BranchPredicate {
101 INVALID_BR = 0,
102 SCC_TRUE = 1,
103 SCC_FALSE = -1,
104 VCCNZ = 2,
105 VCCZ = -2,
106 EXECNZ = -3,
107 EXECZ = 3
108 };
109
110 using SetVectorType = SmallSetVector<MachineInstr *, 32>;
111
112 static unsigned getBranchOpcode(BranchPredicate Cond);
113 static BranchPredicate getBranchPredicate(unsigned Opcode);
114
115public:
116 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
117 MachineRegisterInfo &MRI,
118 const MachineOperand &SuperReg,
119 const TargetRegisterClass *SuperRC,
120 unsigned SubIdx,
121 const TargetRegisterClass *SubRC) const;
122 MachineOperand buildExtractSubRegOrImm(
123 MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI,
124 const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC,
125 unsigned SubIdx, const TargetRegisterClass *SubRC) const;
126
127private:
128 bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine,
129 bool NeedInversion) const;
130
131 bool invertSCCUse(MachineInstr *SCCDef) const;
132
133 void swapOperands(MachineInstr &Inst) const;
134
135 std::pair<bool, MachineBasicBlock *>
136 moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
137 MachineDominatorTree *MDT = nullptr) const;
138
139 void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
140 MachineDominatorTree *MDT = nullptr) const;
141
142 void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
143
144 void lowerScalarAbsDiff(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
145
146 void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
147
148 void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst,
149 unsigned Opcode) const;
150
151 void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst,
152 unsigned Opcode) const;
153
154 void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
155 unsigned Opcode, bool Swap = false) const;
156
157 void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
158 unsigned Opcode,
159 MachineDominatorTree *MDT = nullptr) const;
160
161 void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst,
162 MachineDominatorTree *MDT) const;
163
164 void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst,
165 MachineDominatorTree *MDT) const;
166
167 void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst,
168 MachineDominatorTree *MDT = nullptr) const;
169
170 void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
171 MachineInstr &Inst) const;
172 void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
173 void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
174 unsigned Opcode,
175 MachineDominatorTree *MDT = nullptr) const;
176 void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
177 MachineInstr &Inst) const;
178
179 void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
180 SIInstrWorklist &Worklist) const;
181
182 void addSCCDefUsersToVALUWorklist(const MachineOperand &Op,
183 MachineInstr &SCCDefInst,
184 SIInstrWorklist &Worklist,
185 Register NewCond = Register()) const;
186 void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
187 SIInstrWorklist &Worklist) const;
188
189 const TargetRegisterClass *
190 getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
191
192 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
193 const MachineInstr &MIb) const;
194
195 Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
196
197 bool verifyCopy(const MachineInstr &MI, const MachineRegisterInfo &MRI,
198 StringRef &ErrInfo) const;
199
200 bool resultDependsOnExec(const MachineInstr &MI) const;
201
202 MachineInstr *convertToThreeAddressImpl(MachineInstr &MI,
203 ThreeAddressUpdates &Updates) const;
204
205protected:
206 /// If the specific machine instruction is a instruction that moves/copies
207 /// value from one register to another register return destination and source
208 /// registers as machine operands.
209 std::optional<DestSourcePair>
210 isCopyInstrImpl(const MachineInstr &MI) const override;
211
212 bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0,
213 AMDGPU::OpName Src0OpName, MachineOperand &Src1,
214 AMDGPU::OpName Src1OpName) const;
215 bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx,
216 unsigned toIdx) const;
217 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
218 unsigned OpIdx0,
219 unsigned OpIdx1) const override;
220
221public:
222 enum TargetOperandFlags {
223 MO_MASK = 0xf,
224
225 MO_NONE = 0,
226 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
227 MO_GOTPCREL = 1,
228 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
229 MO_GOTPCREL32 = 2,
230 MO_GOTPCREL32_LO = 2,
231 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
232 MO_GOTPCREL32_HI = 3,
233 // MO_GOTPCREL64 -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
234 MO_GOTPCREL64 = 4,
235 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
236 MO_REL32 = 5,
237 MO_REL32_LO = 5,
238 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
239 MO_REL32_HI = 6,
240 MO_REL64 = 7,
241
242 MO_FAR_BRANCH_OFFSET = 8,
243
244 MO_ABS32_LO = 9,
245 MO_ABS32_HI = 10,
246 MO_ABS64 = 11,
247 };
248
249 explicit SIInstrInfo(const GCNSubtarget &ST);
250
251 const SIRegisterInfo &getRegisterInfo() const {
252 return RI;
253 }
254
255 const GCNSubtarget &getSubtarget() const {
256 return ST;
257 }
258
259 bool isReMaterializableImpl(const MachineInstr &MI) const override;
260
261 bool isIgnorableUse(const MachineOperand &MO) const override;
262
263 bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo,
264 MachineCycleInfo *CI) const override;
265
266 bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
267 int64_t &Offset1) const override;
268
269 bool isGlobalMemoryObject(const MachineInstr *MI) const override;
270
271 bool getMemOperandsWithOffsetWidth(
272 const MachineInstr &LdSt,
273 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
274 bool &OffsetIsScalable, LocationSize &Width,
275 const TargetRegisterInfo *TRI) const final;
276
277 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
278 int64_t Offset1, bool OffsetIsScalable1,
279 ArrayRef<const MachineOperand *> BaseOps2,
280 int64_t Offset2, bool OffsetIsScalable2,
281 unsigned ClusterSize,
282 unsigned NumBytes) const override;
283
284 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
285 int64_t Offset1, unsigned NumLoads) const override;
286
287 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
288 const DebugLoc &DL, Register DestReg, Register SrcReg,
289 bool KillSrc, bool RenamableDest = false,
290 bool RenamableSrc = false) const override;
291
292 const TargetRegisterClass *getPreferredSelectRegClass(
293 unsigned Size) const;
294
295 Register insertNE(MachineBasicBlock *MBB,
296 MachineBasicBlock::iterator I, const DebugLoc &DL,
297 Register SrcReg, int Value) const;
298
299 Register insertEQ(MachineBasicBlock *MBB,
300 MachineBasicBlock::iterator I, const DebugLoc &DL,
301 Register SrcReg, int Value) const;
302
303 bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg,
304 int64_t &ImmVal) const override;
305
306 std::optional<int64_t> getImmOrMaterializedImm(MachineOperand &Op) const;
307
308 unsigned getVectorRegSpillSaveOpcode(Register Reg,
309 const TargetRegisterClass *RC,
310 unsigned Size,
311 const SIMachineFunctionInfo &MFI) const;
312 unsigned
313 getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC,
314 unsigned Size,
315 const SIMachineFunctionInfo &MFI) const;
316
317 void storeRegToStackSlot(
318 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
319 bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg,
320 MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
321
322 void loadRegFromStackSlot(
323 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
324 int FrameIndex, const TargetRegisterClass *RC, Register VReg,
325 unsigned SubReg = 0,
326 MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
327
328 bool expandPostRAPseudo(MachineInstr &MI) const override;
329
330 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
331 Register DestReg, unsigned SubIdx,
332 const MachineInstr &Orig) const override;
333
334 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
335 // instructions. Returns a pair of generated instructions.
336 // Can split either post-RA with physical registers or pre-RA with
337 // virtual registers. In latter case IR needs to be in SSA form and
338 // and a REG_SEQUENCE is produced to define original register.
339 std::pair<MachineInstr*, MachineInstr*>
340 expandMovDPP64(MachineInstr &MI) const;
341
342 // Returns an opcode that can be used to move a value to a \p DstRC
343 // register. If there is no hardware instruction that can store to \p
344 // DstRC, then AMDGPU::COPY is returned.
345 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
346
347 const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
348 unsigned EltSize,
349 bool IsSGPR) const;
350
351 const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
352 bool IsIndirectSrc) const;
353 LLVM_READONLY
354 int commuteOpcode(unsigned Opc) const;
355
356 LLVM_READONLY
357 inline int commuteOpcode(const MachineInstr &MI) const {
358 return commuteOpcode(Opc: MI.getOpcode());
359 }
360
361 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0,
362 unsigned &SrcOpIdx1) const override;
363
364 bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0,
365 unsigned &SrcOpIdx1) const;
366
367 bool isBranchOffsetInRange(unsigned BranchOpc,
368 int64_t BrOffset) const override;
369
370 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
371
372 /// Return whether the block terminate with divergent branch.
373 /// Note this only work before lowering the pseudo control flow instructions.
374 bool hasDivergentBranch(const MachineBasicBlock *MBB) const;
375
376 void insertIndirectBranch(MachineBasicBlock &MBB,
377 MachineBasicBlock &NewDestBB,
378 MachineBasicBlock &RestoreBB, const DebugLoc &DL,
379 int64_t BrOffset, RegScavenger *RS) const override;
380
381 bool analyzeBranchImpl(MachineBasicBlock &MBB,
382 MachineBasicBlock::iterator I,
383 MachineBasicBlock *&TBB,
384 MachineBasicBlock *&FBB,
385 SmallVectorImpl<MachineOperand> &Cond,
386 bool AllowModify) const;
387
388 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
389 MachineBasicBlock *&FBB,
390 SmallVectorImpl<MachineOperand> &Cond,
391 bool AllowModify = false) const override;
392
393 unsigned removeBranch(MachineBasicBlock &MBB,
394 int *BytesRemoved = nullptr) const override;
395
396 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
397 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
398 const DebugLoc &DL,
399 int *BytesAdded = nullptr) const override;
400
401 bool reverseBranchCondition(
402 SmallVectorImpl<MachineOperand> &Cond) const override;
403
404 bool canInsertSelect(const MachineBasicBlock &MBB,
405 ArrayRef<MachineOperand> Cond, Register DstReg,
406 Register TrueReg, Register FalseReg, int &CondCycles,
407 int &TrueCycles, int &FalseCycles) const override;
408
409 void insertSelect(MachineBasicBlock &MBB,
410 MachineBasicBlock::iterator I, const DebugLoc &DL,
411 Register DstReg, ArrayRef<MachineOperand> Cond,
412 Register TrueReg, Register FalseReg) const override;
413
414 void insertVectorSelect(MachineBasicBlock &MBB,
415 MachineBasicBlock::iterator I, const DebugLoc &DL,
416 Register DstReg, ArrayRef<MachineOperand> Cond,
417 Register TrueReg, Register FalseReg) const;
418
419 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
420 Register &SrcReg2, int64_t &CmpMask,
421 int64_t &CmpValue) const override;
422
423 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
424 Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
425 const MachineRegisterInfo *MRI) const override;
426
427 bool
428 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
429 const MachineInstr &MIb) const override;
430
431 static bool isFoldableCopy(const MachineInstr &MI);
432 static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
433
434 void removeModOperands(MachineInstr &MI) const;
435
436 void mutateAndCleanupImplicit(MachineInstr &MI,
437 const MCInstrDesc &NewDesc) const;
438
439 /// Return the extracted immediate value in a subregister use from a constant
440 /// materialized in a super register.
441 ///
442 /// e.g. %imm = S_MOV_B64 K[0:63]
443 /// USE %imm.sub1
444 /// This will return K[32:63]
445 static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal,
446 unsigned SubRegIndex);
447
448 bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
449 MachineRegisterInfo *MRI) const final;
450
451 unsigned getMachineCSELookAheadLimit() const override { return 500; }
452
453 MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
454 LiveIntervals *LIS) const override;
455
456 bool isSchedulingBoundary(const MachineInstr &MI,
457 const MachineBasicBlock *MBB,
458 const MachineFunction &MF) const override;
459
460 static bool isSALU(const MachineInstr &MI) {
461 return MI.getDesc().TSFlags & SIInstrFlags::SALU;
462 }
463
464 bool isSALU(uint16_t Opcode) const {
465 return get(Opcode).TSFlags & SIInstrFlags::SALU;
466 }
467
468 static bool isVALU(const MachineInstr &MI) {
469 return MI.getDesc().TSFlags & SIInstrFlags::VALU;
470 }
471
472 bool isVALU(uint16_t Opcode) const {
473 return get(Opcode).TSFlags & SIInstrFlags::VALU;
474 }
475
476 static bool isImage(const MachineInstr &MI) {
477 return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI);
478 }
479
480 bool isImage(uint16_t Opcode) const {
481 return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode);
482 }
483
484 static bool isVMEM(const MachineInstr &MI) {
485 return isMUBUF(MI) || isMTBUF(MI) || isImage(MI) || isFLAT(MI);
486 }
487
488 bool isVMEM(uint16_t Opcode) const {
489 return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode);
490 }
491
492 static bool isSOP1(const MachineInstr &MI) {
493 return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
494 }
495
496 bool isSOP1(uint16_t Opcode) const {
497 return get(Opcode).TSFlags & SIInstrFlags::SOP1;
498 }
499
500 static bool isSOP2(const MachineInstr &MI) {
501 return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
502 }
503
504 bool isSOP2(uint16_t Opcode) const {
505 return get(Opcode).TSFlags & SIInstrFlags::SOP2;
506 }
507
508 static bool isSOPC(const MachineInstr &MI) {
509 return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
510 }
511
512 bool isSOPC(uint16_t Opcode) const {
513 return get(Opcode).TSFlags & SIInstrFlags::SOPC;
514 }
515
516 static bool isSOPK(const MachineInstr &MI) {
517 return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
518 }
519
520 bool isSOPK(uint16_t Opcode) const {
521 return get(Opcode).TSFlags & SIInstrFlags::SOPK;
522 }
523
524 static bool isSOPP(const MachineInstr &MI) {
525 return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
526 }
527
528 bool isSOPP(uint16_t Opcode) const {
529 return get(Opcode).TSFlags & SIInstrFlags::SOPP;
530 }
531
532 static bool isPacked(const MachineInstr &MI) {
533 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
534 }
535
536 bool isPacked(uint16_t Opcode) const {
537 return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
538 }
539
540 static bool isVOP1(const MachineInstr &MI) {
541 return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
542 }
543
544 bool isVOP1(uint16_t Opcode) const {
545 return get(Opcode).TSFlags & SIInstrFlags::VOP1;
546 }
547
548 static bool isVOP2(const MachineInstr &MI) {
549 return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
550 }
551
552 bool isVOP2(uint16_t Opcode) const {
553 return get(Opcode).TSFlags & SIInstrFlags::VOP2;
554 }
555
556 static bool isVOP3(const MCInstrDesc &Desc) {
557 return Desc.TSFlags & SIInstrFlags::VOP3;
558 }
559
560 static bool isVOP3(const MachineInstr &MI) { return isVOP3(Desc: MI.getDesc()); }
561
562 bool isVOP3(uint16_t Opcode) const { return isVOP3(Desc: get(Opcode)); }
563
564 static bool isSDWA(const MachineInstr &MI) {
565 return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
566 }
567
568 bool isSDWA(uint16_t Opcode) const {
569 return get(Opcode).TSFlags & SIInstrFlags::SDWA;
570 }
571
572 static bool isVOPC(const MachineInstr &MI) {
573 return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
574 }
575
576 bool isVOPC(uint16_t Opcode) const {
577 return get(Opcode).TSFlags & SIInstrFlags::VOPC;
578 }
579
580 static bool isMUBUF(const MachineInstr &MI) {
581 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
582 }
583
584 bool isMUBUF(uint16_t Opcode) const {
585 return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
586 }
587
588 static bool isMTBUF(const MachineInstr &MI) {
589 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
590 }
591
592 bool isMTBUF(uint16_t Opcode) const {
593 return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
594 }
595
596 static bool isBUF(const MachineInstr &MI) {
597 return isMUBUF(MI) || isMTBUF(MI);
598 }
599
600 static bool isSMRD(const MachineInstr &MI) {
601 return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
602 }
603
604 bool isSMRD(uint16_t Opcode) const {
605 return get(Opcode).TSFlags & SIInstrFlags::SMRD;
606 }
607
608 bool isBufferSMRD(const MachineInstr &MI) const;
609
610 static bool isDS(const MachineInstr &MI) {
611 return MI.getDesc().TSFlags & SIInstrFlags::DS;
612 }
613
614 bool isDS(uint16_t Opcode) const {
615 return get(Opcode).TSFlags & SIInstrFlags::DS;
616 }
617
618 static bool isLDSDMA(const MachineInstr &MI) {
619 return (isVALU(MI) && (isMUBUF(MI) || isFLAT(MI))) ||
620 (MI.getDesc().TSFlags & SIInstrFlags::TENSOR_CNT);
621 }
622
623 bool isLDSDMA(uint16_t Opcode) {
624 return (isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode))) ||
625 (get(Opcode).TSFlags & SIInstrFlags::TENSOR_CNT);
626 }
627
628 static bool isGWS(const MachineInstr &MI) {
629 return MI.getDesc().TSFlags & SIInstrFlags::GWS;
630 }
631
632 bool isGWS(uint16_t Opcode) const {
633 return get(Opcode).TSFlags & SIInstrFlags::GWS;
634 }
635
636 bool isAlwaysGDS(uint16_t Opcode) const;
637
638 static bool isMIMG(const MachineInstr &MI) {
639 return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
640 }
641
642 bool isMIMG(uint16_t Opcode) const {
643 return get(Opcode).TSFlags & SIInstrFlags::MIMG;
644 }
645
646 static bool isVIMAGE(const MachineInstr &MI) {
647 return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE;
648 }
649
650 bool isVIMAGE(uint16_t Opcode) const {
651 return get(Opcode).TSFlags & SIInstrFlags::VIMAGE;
652 }
653
654 static bool isVSAMPLE(const MachineInstr &MI) {
655 return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE;
656 }
657
658 bool isVSAMPLE(uint16_t Opcode) const {
659 return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE;
660 }
661
662 static bool isGather4(const MachineInstr &MI) {
663 return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
664 }
665
666 bool isGather4(uint16_t Opcode) const {
667 return get(Opcode).TSFlags & SIInstrFlags::Gather4;
668 }
669
670 static bool isFLAT(const MachineInstr &MI) {
671 return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
672 }
673
674 // Is a FLAT encoded instruction which accesses a specific segment,
675 // i.e. global_* or scratch_*.
676 static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
677 auto Flags = MI.getDesc().TSFlags;
678 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
679 }
680
681 bool isSegmentSpecificFLAT(uint16_t Opcode) const {
682 auto Flags = get(Opcode).TSFlags;
683 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
684 }
685
686 static bool isFLATGlobal(const MachineInstr &MI) {
687 return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
688 }
689
690 bool isFLATGlobal(uint16_t Opcode) const {
691 return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
692 }
693
694 static bool isFLATScratch(const MachineInstr &MI) {
695 return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
696 }
697
698 bool isFLATScratch(uint16_t Opcode) const {
699 return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
700 }
701
702 // Any FLAT encoded instruction, including global_* and scratch_*.
703 bool isFLAT(uint16_t Opcode) const {
704 return get(Opcode).TSFlags & SIInstrFlags::FLAT;
705 }
706
707 /// \returns true for SCRATCH_ instructions, or FLAT/BUF instructions unless
708 /// the MMOs do not include scratch.
709 /// Conservatively correct; will return true if \p MI cannot be proven
710 /// to not hit scratch.
711 bool mayAccessScratch(const MachineInstr &MI) const;
712
713 /// \returns true for FLAT instructions that can access VMEM.
714 bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const;
715
716 /// \returns true for FLAT instructions that can access LDS.
717 bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
718
719 static bool isBlockLoadStore(uint16_t Opcode) {
720 switch (Opcode) {
721 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
722 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
723 case AMDGPU::SCRATCH_STORE_BLOCK_SADDR:
724 case AMDGPU::SCRATCH_LOAD_BLOCK_SADDR:
725 case AMDGPU::SCRATCH_STORE_BLOCK_SVS:
726 case AMDGPU::SCRATCH_LOAD_BLOCK_SVS:
727 return true;
728 default:
729 return false;
730 }
731 }
732
733 static bool setsSCCIfResultIsNonZero(const MachineInstr &MI) {
734 switch (MI.getOpcode()) {
735 case AMDGPU::S_ABSDIFF_I32:
736 case AMDGPU::S_ABS_I32:
737 case AMDGPU::S_AND_B32:
738 case AMDGPU::S_AND_B64:
739 case AMDGPU::S_ANDN2_B32:
740 case AMDGPU::S_ANDN2_B64:
741 case AMDGPU::S_ASHR_I32:
742 case AMDGPU::S_ASHR_I64:
743 case AMDGPU::S_BCNT0_I32_B32:
744 case AMDGPU::S_BCNT0_I32_B64:
745 case AMDGPU::S_BCNT1_I32_B32:
746 case AMDGPU::S_BCNT1_I32_B64:
747 case AMDGPU::S_BFE_I32:
748 case AMDGPU::S_BFE_I64:
749 case AMDGPU::S_BFE_U32:
750 case AMDGPU::S_BFE_U64:
751 case AMDGPU::S_LSHL_B32:
752 case AMDGPU::S_LSHL_B64:
753 case AMDGPU::S_LSHR_B32:
754 case AMDGPU::S_LSHR_B64:
755 case AMDGPU::S_NAND_B32:
756 case AMDGPU::S_NAND_B64:
757 case AMDGPU::S_NOR_B32:
758 case AMDGPU::S_NOR_B64:
759 case AMDGPU::S_NOT_B32:
760 case AMDGPU::S_NOT_B64:
761 case AMDGPU::S_OR_B32:
762 case AMDGPU::S_OR_B64:
763 case AMDGPU::S_ORN2_B32:
764 case AMDGPU::S_ORN2_B64:
765 case AMDGPU::S_QUADMASK_B32:
766 case AMDGPU::S_QUADMASK_B64:
767 case AMDGPU::S_WQM_B32:
768 case AMDGPU::S_WQM_B64:
769 case AMDGPU::S_XNOR_B32:
770 case AMDGPU::S_XNOR_B64:
771 case AMDGPU::S_XOR_B32:
772 case AMDGPU::S_XOR_B64:
773 return true;
774 default:
775 return false;
776 }
777 }
778
779 static bool isEXP(const MachineInstr &MI) {
780 return MI.getDesc().TSFlags & SIInstrFlags::EXP;
781 }
782
783 static bool isDualSourceBlendEXP(const MachineInstr &MI) {
784 if (!isEXP(MI))
785 return false;
786 unsigned Target = MI.getOperand(i: 0).getImm();
787 return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
788 Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
789 }
790
791 bool isEXP(uint16_t Opcode) const {
792 return get(Opcode).TSFlags & SIInstrFlags::EXP;
793 }
794
795 static bool isAtomicNoRet(const MachineInstr &MI) {
796 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
797 }
798
799 bool isAtomicNoRet(uint16_t Opcode) const {
800 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
801 }
802
803 static bool isAtomicRet(const MachineInstr &MI) {
804 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
805 }
806
807 bool isAtomicRet(uint16_t Opcode) const {
808 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
809 }
810
811 static bool isAtomic(const MachineInstr &MI) {
812 return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
813 SIInstrFlags::IsAtomicNoRet);
814 }
815
816 bool isAtomic(uint16_t Opcode) const {
817 return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
818 SIInstrFlags::IsAtomicNoRet);
819 }
820
821 static bool mayWriteLDSThroughDMA(const MachineInstr &MI) {
822 unsigned Opc = MI.getOpcode();
823 // Exclude instructions that read FROM LDS (not write to it)
824 return isLDSDMA(MI) && Opc != AMDGPU::BUFFER_STORE_LDS_DWORD &&
825 Opc != AMDGPU::TENSOR_STORE_FROM_LDS &&
826 Opc != AMDGPU::TENSOR_STORE_FROM_LDS_D2;
827 }
828
829 static bool isSBarrierSCCWrite(unsigned Opcode) {
830 return Opcode == AMDGPU::S_BARRIER_LEAVE ||
831 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM ||
832 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0;
833 }
834
835 static bool isCBranchVCCZRead(const MachineInstr &MI) {
836 unsigned Opc = MI.getOpcode();
837 return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) &&
838 !MI.getOperand(i: 1).isUndef();
839 }
840
841 static bool isWQM(const MachineInstr &MI) {
842 return MI.getDesc().TSFlags & SIInstrFlags::WQM;
843 }
844
845 bool isWQM(uint16_t Opcode) const {
846 return get(Opcode).TSFlags & SIInstrFlags::WQM;
847 }
848
849 static bool isDisableWQM(const MachineInstr &MI) {
850 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
851 }
852
853 bool isDisableWQM(uint16_t Opcode) const {
854 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
855 }
856
857 // SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of
858 // SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions
859 // therefore we need an explicit check for them since just checking if the
860 // Spill bit is set and what instruction type it came from misclassifies
861 // them.
862 static bool isVGPRSpill(const MachineInstr &MI) {
863 return MI.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR &&
864 MI.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
865 (isSpill(MI) && isVALU(MI));
866 }
867
868 bool isVGPRSpill(uint16_t Opcode) const {
869 return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR &&
870 Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR &&
871 (isSpill(Opcode) && isVALU(Opcode));
872 }
873
874 static bool isSGPRSpill(const MachineInstr &MI) {
875 return MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR ||
876 MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
877 (isSpill(MI) && isSALU(MI));
878 }
879
880 bool isSGPRSpill(uint16_t Opcode) const {
881 return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR ||
882 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
883 (isSpill(Opcode) && isSALU(Opcode));
884 }
885
886 bool isSpill(uint16_t Opcode) const {
887 return get(Opcode).TSFlags & SIInstrFlags::Spill;
888 }
889
890 static bool isSpill(const MCInstrDesc &Desc) {
891 return Desc.TSFlags & SIInstrFlags::Spill;
892 }
893
894 static bool isSpill(const MachineInstr &MI) { return isSpill(Desc: MI.getDesc()); }
895
896 static bool isWWMRegSpillOpcode(uint16_t Opcode) {
897 return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
898 Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
899 Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE ||
900 Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
901 }
902
903 static bool isChainCallOpcode(uint64_t Opcode) {
904 return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 ||
905 Opcode == AMDGPU::SI_CS_CHAIN_TC_W64;
906 }
907
908 static bool isDPP(const MachineInstr &MI) {
909 return MI.getDesc().TSFlags & SIInstrFlags::DPP;
910 }
911
912 bool isDPP(uint16_t Opcode) const {
913 return get(Opcode).TSFlags & SIInstrFlags::DPP;
914 }
915
916 static bool isTRANS(const MachineInstr &MI) {
917 return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
918 }
919
920 bool isTRANS(uint16_t Opcode) const {
921 return get(Opcode).TSFlags & SIInstrFlags::TRANS;
922 }
923
924 static bool isVOP3P(const MachineInstr &MI) {
925 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
926 }
927
928 bool isVOP3P(uint16_t Opcode) const {
929 return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
930 }
931
932 static bool isVINTRP(const MachineInstr &MI) {
933 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
934 }
935
936 bool isVINTRP(uint16_t Opcode) const {
937 return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
938 }
939
940 static bool isMAI(const MCInstrDesc &Desc) {
941 return Desc.TSFlags & SIInstrFlags::IsMAI;
942 }
943
944 static bool isMAI(const MachineInstr &MI) { return isMAI(Desc: MI.getDesc()); }
945
946 bool isMAI(uint16_t Opcode) const { return isMAI(Desc: get(Opcode)); }
947
948 static bool isMFMA(const MachineInstr &MI) {
949 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
950 MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
951 }
952
953 bool isMFMA(uint16_t Opcode) const {
954 return isMAI(Opcode) && Opcode != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
955 Opcode != AMDGPU::V_ACCVGPR_READ_B32_e64;
956 }
957
958 static bool isDOT(const MachineInstr &MI) {
959 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
960 }
961
962 static bool isWMMA(const MachineInstr &MI) {
963 return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
964 }
965
966 bool isWMMA(uint16_t Opcode) const {
967 return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
968 }
969
970 static bool isMFMAorWMMA(const MachineInstr &MI) {
971 return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI);
972 }
973
974 bool isMFMAorWMMA(uint16_t Opcode) const {
975 return isMFMA(Opcode) || isWMMA(Opcode) || isSWMMAC(Opcode);
976 }
977
978 static bool isSWMMAC(const MachineInstr &MI) {
979 return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC;
980 }
981
982 bool isSWMMAC(uint16_t Opcode) const {
983 return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC;
984 }
985
986 bool isDOT(uint16_t Opcode) const {
987 return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
988 }
989
990 bool isXDLWMMA(const MachineInstr &MI) const;
991
992 bool isXDL(const MachineInstr &MI) const;
993
994 static bool isDGEMM(unsigned Opcode) { return AMDGPU::getMAIIsDGEMM(Opc: Opcode); }
995
996 static bool isLDSDIR(const MachineInstr &MI) {
997 return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
998 }
999
1000 bool isLDSDIR(uint16_t Opcode) const {
1001 return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
1002 }
1003
1004 static bool isVINTERP(const MachineInstr &MI) {
1005 return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
1006 }
1007
1008 bool isVINTERP(uint16_t Opcode) const {
1009 return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
1010 }
1011
1012 static bool isScalarUnit(const MachineInstr &MI) {
1013 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
1014 }
1015
1016 static bool usesVM_CNT(const MachineInstr &MI) {
1017 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
1018 }
1019
1020 static bool usesLGKM_CNT(const MachineInstr &MI) {
1021 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
1022 }
1023
1024 static bool usesASYNC_CNT(const MachineInstr &MI) {
1025 return MI.getDesc().TSFlags & SIInstrFlags::ASYNC_CNT;
1026 }
1027
1028 bool usesASYNC_CNT(uint16_t Opcode) const {
1029 return get(Opcode).TSFlags & SIInstrFlags::ASYNC_CNT;
1030 }
1031
1032 // Most sopk treat the immediate as a signed 16-bit, however some
1033 // use it as unsigned.
1034 static bool sopkIsZext(unsigned Opcode) {
1035 return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 ||
1036 Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 ||
1037 Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 ||
1038 Opcode == AMDGPU::S_GETREG_B32 ||
1039 Opcode == AMDGPU::S_GETREG_B32_const;
1040 }
1041
1042 /// \returns true if this is an s_store_dword* instruction. This is more
1043 /// specific than isSMEM && mayStore.
1044 static bool isScalarStore(const MachineInstr &MI) {
1045 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
1046 }
1047
1048 bool isScalarStore(uint16_t Opcode) const {
1049 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
1050 }
1051
1052 static bool isFixedSize(const MachineInstr &MI) {
1053 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
1054 }
1055
1056 bool isFixedSize(uint16_t Opcode) const {
1057 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
1058 }
1059
1060 static bool hasFPClamp(const MachineInstr &MI) {
1061 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
1062 }
1063
1064 bool hasFPClamp(uint16_t Opcode) const {
1065 return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
1066 }
1067
1068 static bool hasIntClamp(const MachineInstr &MI) {
1069 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
1070 }
1071
1072 uint64_t getClampMask(const MachineInstr &MI) const {
1073 const uint64_t ClampFlags = SIInstrFlags::FPClamp |
1074 SIInstrFlags::IntClamp |
1075 SIInstrFlags::ClampLo |
1076 SIInstrFlags::ClampHi;
1077 return MI.getDesc().TSFlags & ClampFlags;
1078 }
1079
1080 static bool usesFPDPRounding(const MachineInstr &MI) {
1081 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
1082 }
1083
1084 bool usesFPDPRounding(uint16_t Opcode) const {
1085 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
1086 }
1087
1088 static bool isFPAtomic(const MachineInstr &MI) {
1089 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
1090 }
1091
1092 bool isFPAtomic(uint16_t Opcode) const {
1093 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
1094 }
1095
1096 static bool isNeverUniform(const MachineInstr &MI) {
1097 return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
1098 }
1099
1100 // Check to see if opcode is for a barrier start. Pre gfx12 this is just the
1101 // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want
1102 // to check for the barrier start (S_BARRIER_SIGNAL*)
1103 bool isBarrierStart(unsigned Opcode) const {
1104 return Opcode == AMDGPU::S_BARRIER ||
1105 Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 ||
1106 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 ||
1107 Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM ||
1108 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
1109 }
1110
1111 bool isBarrier(unsigned Opcode) const {
1112 return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
1113 Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
1114 Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
1115 Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
1116 Opcode == AMDGPU::S_BARRIER_LEAVE || Opcode == AMDGPU::DS_GWS_INIT ||
1117 Opcode == AMDGPU::DS_GWS_BARRIER;
1118 }
1119
1120 static bool isGFX12CacheInvOrWBInst(unsigned Opc) {
1121 return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB ||
1122 Opc == AMDGPU::GLOBAL_WBINV;
1123 }
1124
1125 static bool isF16PseudoScalarTrans(unsigned Opcode) {
1126 return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
1127 Opcode == AMDGPU::V_S_LOG_F16_e64 ||
1128 Opcode == AMDGPU::V_S_RCP_F16_e64 ||
1129 Opcode == AMDGPU::V_S_RSQ_F16_e64 ||
1130 Opcode == AMDGPU::V_S_SQRT_F16_e64;
1131 }
1132
1133 static bool doesNotReadTiedSource(const MachineInstr &MI) {
1134 return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
1135 }
1136
1137 bool doesNotReadTiedSource(uint16_t Opcode) const {
1138 return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
1139 }
1140
1141 bool isIGLP(unsigned Opcode) const {
1142 return Opcode == AMDGPU::SCHED_BARRIER ||
1143 Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
1144 }
1145
1146 bool isIGLP(const MachineInstr &MI) const { return isIGLP(Opcode: MI.getOpcode()); }
1147
1148 // Return true if the instruction is mutually exclusive with all non-IGLP DAG
1149 // mutations, requiring all other mutations to be disabled.
1150 bool isIGLPMutationOnly(unsigned Opcode) const {
1151 return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT;
1152 }
1153
1154 static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
1155 switch (Opcode) {
1156 case AMDGPU::S_WAITCNT_soft:
1157 return AMDGPU::S_WAITCNT;
1158 case AMDGPU::S_WAITCNT_VSCNT_soft:
1159 return AMDGPU::S_WAITCNT_VSCNT;
1160 case AMDGPU::S_WAIT_LOADCNT_soft:
1161 return AMDGPU::S_WAIT_LOADCNT;
1162 case AMDGPU::S_WAIT_STORECNT_soft:
1163 return AMDGPU::S_WAIT_STORECNT;
1164 case AMDGPU::S_WAIT_SAMPLECNT_soft:
1165 return AMDGPU::S_WAIT_SAMPLECNT;
1166 case AMDGPU::S_WAIT_BVHCNT_soft:
1167 return AMDGPU::S_WAIT_BVHCNT;
1168 case AMDGPU::S_WAIT_DSCNT_soft:
1169 return AMDGPU::S_WAIT_DSCNT;
1170 case AMDGPU::S_WAIT_KMCNT_soft:
1171 return AMDGPU::S_WAIT_KMCNT;
1172 case AMDGPU::S_WAIT_XCNT_soft:
1173 return AMDGPU::S_WAIT_XCNT;
1174 default:
1175 return Opcode;
1176 }
1177 }
1178
1179 static bool isWaitcnt(unsigned Opcode) {
1180 switch (getNonSoftWaitcntOpcode(Opcode)) {
1181 case AMDGPU::S_WAITCNT:
1182 case AMDGPU::S_WAITCNT_VSCNT:
1183 case AMDGPU::S_WAITCNT_VMCNT:
1184 case AMDGPU::S_WAITCNT_EXPCNT:
1185 case AMDGPU::S_WAITCNT_LGKMCNT:
1186 case AMDGPU::S_WAIT_LOADCNT:
1187 case AMDGPU::S_WAIT_LOADCNT_DSCNT:
1188 case AMDGPU::S_WAIT_STORECNT:
1189 case AMDGPU::S_WAIT_STORECNT_DSCNT:
1190 case AMDGPU::S_WAIT_SAMPLECNT:
1191 case AMDGPU::S_WAIT_BVHCNT:
1192 case AMDGPU::S_WAIT_EXPCNT:
1193 case AMDGPU::S_WAIT_DSCNT:
1194 case AMDGPU::S_WAIT_KMCNT:
1195 case AMDGPU::S_WAIT_IDLE:
1196 return true;
1197 default:
1198 return false;
1199 }
1200 }
1201
1202 bool isVGPRCopy(const MachineInstr &MI) const {
1203 assert(isCopyInstr(MI));
1204 Register Dest = MI.getOperand(i: 0).getReg();
1205 const MachineFunction &MF = *MI.getMF();
1206 const MachineRegisterInfo &MRI = MF.getRegInfo();
1207 return !RI.isSGPRReg(MRI, Reg: Dest);
1208 }
1209
1210 bool hasVGPRUses(const MachineInstr &MI) const {
1211 const MachineFunction &MF = *MI.getMF();
1212 const MachineRegisterInfo &MRI = MF.getRegInfo();
1213 return llvm::any_of(Range: MI.explicit_uses(),
1214 P: [&MRI, this](const MachineOperand &MO) {
1215 return MO.isReg() && RI.isVGPR(MRI, Reg: MO.getReg());});
1216 }
1217
1218 /// Return true if the instruction modifies the mode register.q
1219 static bool modifiesModeRegister(const MachineInstr &MI);
1220
1221 /// This function is used to determine if an instruction can be safely
1222 /// executed under EXEC = 0 without hardware error, indeterminate results,
1223 /// and/or visible effects on future vector execution or outside the shader.
1224 /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is
1225 /// used in removing branches over short EXEC = 0 sequences.
1226 /// As such it embeds certain assumptions which may not apply to every case
1227 /// of EXEC = 0 execution.
1228 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
1229
1230 /// Returns true if the instruction could potentially depend on the value of
1231 /// exec. If false, exec dependencies may safely be ignored.
1232 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
1233
1234 bool isInlineConstant(const APInt &Imm) const;
1235
1236 bool isInlineConstant(const APFloat &Imm) const;
1237
1238 // Returns true if this non-register operand definitely does not need to be
1239 // encoded as a 32-bit literal. Note that this function handles all kinds of
1240 // operands, not just immediates.
1241 //
1242 // Some operands like FrameIndexes could resolve to an inline immediate value
1243 // that will not require an additional 4-bytes; this function assumes that it
1244 // will.
1245 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const {
1246 if (!MO.isImm())
1247 return false;
1248 return isInlineConstant(ImmVal: MO.getImm(), OperandType);
1249 }
1250 bool isInlineConstant(int64_t ImmVal, uint8_t OperandType) const;
1251
1252 bool isInlineConstant(const MachineOperand &MO,
1253 const MCOperandInfo &OpInfo) const {
1254 return isInlineConstant(MO, OperandType: OpInfo.OperandType);
1255 }
1256
1257 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
1258 /// be an inline immediate.
1259 bool isInlineConstant(const MachineInstr &MI,
1260 const MachineOperand &UseMO,
1261 const MachineOperand &DefMO) const {
1262 assert(UseMO.getParent() == &MI);
1263 int OpIdx = UseMO.getOperandNo();
1264 if (OpIdx >= MI.getDesc().NumOperands)
1265 return false;
1266
1267 return isInlineConstant(MO: DefMO, OpInfo: MI.getDesc().operands()[OpIdx]);
1268 }
1269
1270 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
1271 /// immediate.
1272 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
1273 const MachineOperand &MO = MI.getOperand(i: OpIdx);
1274 return isInlineConstant(MO, OperandType: MI.getDesc().operands()[OpIdx].OperandType);
1275 }
1276
1277 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1278 int64_t ImmVal) const {
1279 if (OpIdx >= MI.getDesc().NumOperands)
1280 return false;
1281
1282 if (isCopyInstr(MI)) {
1283 unsigned Size = getOpSize(MI, OpNo: OpIdx);
1284 assert(Size == 8 || Size == 4);
1285
1286 uint8_t OpType = (Size == 8) ?
1287 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
1288 return isInlineConstant(ImmVal, OperandType: OpType);
1289 }
1290
1291 return isInlineConstant(ImmVal, OperandType: MI.getDesc().operands()[OpIdx].OperandType);
1292 }
1293
1294 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1295 const MachineOperand &MO) const {
1296 return isInlineConstant(MI, OpIdx, ImmVal: MO.getImm());
1297 }
1298
1299 bool isInlineConstant(const MachineOperand &MO) const {
1300 return isInlineConstant(MI: *MO.getParent(), OpIdx: MO.getOperandNo());
1301 }
1302
1303 bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
1304 const MachineOperand &MO) const;
1305
1306 bool isLiteralOperandLegal(const MCInstrDesc &InstDesc,
1307 const MCOperandInfo &OpInfo) const;
1308
1309 bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
1310 int64_t ImmVal) const;
1311
1312 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
1313 const MachineOperand &MO) const {
1314 return isImmOperandLegal(InstDesc: MI.getDesc(), OpNo, MO);
1315 }
1316
1317 bool isNeverCoissue(MachineInstr &MI) const;
1318
1319 /// Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
1320 bool isLegalAV64PseudoImm(uint64_t Imm) const;
1321
1322 /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
1323 /// This function will return false if you pass it a 32-bit instruction.
1324 bool hasVALU32BitEncoding(unsigned Opcode) const;
1325
1326 bool physRegUsesConstantBus(const MachineOperand &Reg) const;
1327 bool regUsesConstantBus(const MachineOperand &Reg,
1328 const MachineRegisterInfo &MRI) const;
1329
1330 /// Returns true if this operand uses the constant bus.
1331 bool usesConstantBus(const MachineRegisterInfo &MRI,
1332 const MachineOperand &MO,
1333 const MCOperandInfo &OpInfo) const;
1334
1335 bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineInstr &MI,
1336 int OpIdx) const {
1337 return usesConstantBus(MRI, MO: MI.getOperand(i: OpIdx),
1338 OpInfo: MI.getDesc().operands()[OpIdx]);
1339 }
1340
1341 /// Return true if this instruction has any modifiers.
1342 /// e.g. src[012]_mod, omod, clamp.
1343 bool hasModifiers(unsigned Opcode) const;
1344
1345 bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const;
1346 bool hasAnyModifiersSet(const MachineInstr &MI) const;
1347
1348 bool canShrink(const MachineInstr &MI,
1349 const MachineRegisterInfo &MRI) const;
1350
1351 MachineInstr *buildShrunkInst(MachineInstr &MI,
1352 unsigned NewOpcode) const;
1353
1354 bool verifyInstruction(const MachineInstr &MI,
1355 StringRef &ErrInfo) const override;
1356
1357 unsigned getVALUOp(const MachineInstr &MI) const;
1358
1359 void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
1360 MachineBasicBlock::iterator MBBI,
1361 const DebugLoc &DL, Register Reg, bool IsSCCLive,
1362 SlotIndexes *Indexes = nullptr) const;
1363
1364 void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
1365 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1366 Register Reg, SlotIndexes *Indexes = nullptr) const;
1367
1368 MachineInstr *getWholeWaveFunctionSetup(MachineFunction &MF) const;
1369
1370 /// Return the correct register class for \p OpNo. For target-specific
1371 /// instructions, this will return the register class that has been defined
1372 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
1373 /// the register class of its machine operand.
1374 /// to infer the correct register class base on the other operands.
1375 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
1376 unsigned OpNo) const;
1377
1378 /// Return the size in bytes of the operand OpNo on the given
1379 // instruction opcode.
1380 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
1381 const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo];
1382
1383 if (OpInfo.RegClass == -1) {
1384 // If this is an immediate operand, this must be a 32-bit literal.
1385 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
1386 return 4;
1387 }
1388
1389 return RI.getRegSizeInBits(RC: *RI.getRegClass(i: getOpRegClassID(OpInfo))) / 8;
1390 }
1391
1392 /// This form should usually be preferred since it handles operands
1393 /// with unknown register classes.
1394 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
1395 const MachineOperand &MO = MI.getOperand(i: OpNo);
1396 if (MO.isReg()) {
1397 if (unsigned SubReg = MO.getSubReg()) {
1398 return RI.getSubRegIdxSize(Idx: SubReg) / 8;
1399 }
1400 }
1401 return RI.getRegSizeInBits(RC: *getOpRegClass(MI, OpNo)) / 8;
1402 }
1403
1404 /// Legalize the \p OpIndex operand of this instruction by inserting
1405 /// a MOV. For example:
1406 /// ADD_I32_e32 VGPR0, 15
1407 /// to
1408 /// MOV VGPR1, 15
1409 /// ADD_I32_e32 VGPR0, VGPR1
1410 ///
1411 /// If the operand being legalized is a register, then a COPY will be used
1412 /// instead of MOV.
1413 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
1414
1415 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
1416 /// for \p MI.
1417 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
1418 const MachineOperand *MO = nullptr) const;
1419
1420 /// Check if \p MO would be a valid operand for the given operand
1421 /// definition \p OpInfo. Note this does not attempt to validate constant bus
1422 /// restrictions (e.g. literal constant usage).
1423 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
1424 const MCOperandInfo &OpInfo,
1425 const MachineOperand &MO) const;
1426
1427 /// Check if \p MO (a register operand) is a legal register for the
1428 /// given operand description or operand index.
1429 /// The operand index version provide more legality checks
1430 bool isLegalRegOperand(const MachineRegisterInfo &MRI,
1431 const MCOperandInfo &OpInfo,
1432 const MachineOperand &MO) const;
1433 bool isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
1434 const MachineOperand &MO) const;
1435
1436 /// Check if \p MO would be a legal operand for gfx12+ packed math FP32
1437 /// instructions. Packed math FP32 instructions typically accept SGPRs or
1438 /// VGPRs as source operands. On gfx12+, if a source operand uses SGPRs, the
1439 /// HW can only read the first SGPR and use it for both the low and high
1440 /// operations.
1441 /// \p SrcN can be 0, 1, or 2, representing src0, src1, and src2,
1442 /// respectively. If \p MO is nullptr, the operand corresponding to SrcN will
1443 /// be used.
1444 bool isLegalGFX12PlusPackedMathFP32Operand(
1445 const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,
1446 const MachineOperand *MO = nullptr) const;
1447
1448 /// Legalize operands in \p MI by either commuting it or inserting a
1449 /// copy of src1.
1450 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1451
1452 /// Fix operands in \p MI to satisfy constant bus requirements.
1453 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1454
1455 /// Copy a value from a VGPR (\p SrcReg) to SGPR. The desired register class
1456 /// for the dst register (\p DstRC) can be optionally supplied. This function
1457 /// can only be used when it is know that the value in SrcReg is same across
1458 /// all threads in the wave.
1459 /// \returns The SGPR register that \p SrcReg was copied to.
1460 Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
1461 MachineRegisterInfo &MRI,
1462 const TargetRegisterClass *DstRC = nullptr) const;
1463
1464 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1465 void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1466
1467 void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
1468 MachineBasicBlock::iterator I,
1469 const TargetRegisterClass *DstRC,
1470 MachineOperand &Op, MachineRegisterInfo &MRI,
1471 const DebugLoc &DL) const;
1472
1473 /// Legalize all operands in this instruction. This function may create new
1474 /// instructions and control-flow around \p MI. If present, \p MDT is
1475 /// updated.
1476 /// \returns A new basic block that contains \p MI if new blocks were created.
1477 MachineBasicBlock *
1478 legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
1479
1480 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1481 /// was moved to VGPR. \returns true if succeeded.
1482 bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
1483
1484 /// Fix operands in Inst to fix 16bit SALU to VALU lowering.
1485 void legalizeOperandsVALUt16(MachineInstr &Inst,
1486 MachineRegisterInfo &MRI) const;
1487 void legalizeOperandsVALUt16(MachineInstr &Inst, unsigned OpIdx,
1488 MachineRegisterInfo &MRI) const;
1489
1490 /// Replace the instructions opcode with the equivalent VALU
1491 /// opcode. This function will also move the users of MachineInstruntions
1492 /// in the \p WorkList to the VALU if necessary. If present, \p MDT is
1493 /// updated.
1494 void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const;
1495
1496 void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT,
1497 MachineInstr &Inst) const;
1498
1499 void insertNoop(MachineBasicBlock &MBB,
1500 MachineBasicBlock::iterator MI) const override;
1501
1502 void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1503 unsigned Quantity) const override;
1504
1505 void insertReturn(MachineBasicBlock &MBB) const;
1506
1507 /// Build instructions that simulate the behavior of a `s_trap 2` instructions
1508 /// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is
1509 /// interpreted as a nop.
1510 MachineBasicBlock *insertSimulatedTrap(MachineRegisterInfo &MRI,
1511 MachineBasicBlock &MBB,
1512 MachineInstr &MI,
1513 const DebugLoc &DL) const;
1514
1515 /// Return the number of wait states that result from executing this
1516 /// instruction.
1517 static unsigned getNumWaitStates(const MachineInstr &MI);
1518
1519 /// Returns the operand named \p Op. If \p MI does not have an
1520 /// operand named \c Op, this function returns nullptr.
1521 LLVM_READONLY
1522 MachineOperand *getNamedOperand(MachineInstr &MI,
1523 AMDGPU::OpName OperandName) const;
1524
1525 LLVM_READONLY
1526 const MachineOperand *getNamedOperand(const MachineInstr &MI,
1527 AMDGPU::OpName OperandName) const {
1528 return getNamedOperand(MI&: const_cast<MachineInstr &>(MI), OperandName);
1529 }
1530
1531 /// Get required immediate operand
1532 int64_t getNamedImmOperand(const MachineInstr &MI,
1533 AMDGPU::OpName OperandName) const {
1534 int Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: OperandName);
1535 return MI.getOperand(i: Idx).getImm();
1536 }
1537
1538 uint64_t getDefaultRsrcDataFormat() const;
1539 uint64_t getScratchRsrcWords23() const;
1540
1541 bool isLowLatencyInstruction(const MachineInstr &MI) const;
1542 bool isHighLatencyDef(int Opc) const override;
1543
1544 /// Return the descriptor of the target-specific machine instruction
1545 /// that corresponds to the specified pseudo or native opcode.
1546 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
1547 return get(Opcode: pseudoToMCOpcode(Opcode));
1548 }
1549
1550 Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1551 Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1552
1553 Register isLoadFromStackSlot(const MachineInstr &MI,
1554 int &FrameIndex) const override;
1555 Register isStoreToStackSlot(const MachineInstr &MI,
1556 int &FrameIndex) const override;
1557
1558 unsigned getInstBundleSize(const MachineInstr &MI) const;
1559 unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
1560
1561 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
1562
1563 std::pair<unsigned, unsigned>
1564 decomposeMachineOperandsTargetFlags(unsigned TF) const override;
1565
1566 ArrayRef<std::pair<int, const char *>>
1567 getSerializableTargetIndices() const override;
1568
1569 ArrayRef<std::pair<unsigned, const char *>>
1570 getSerializableDirectMachineOperandTargetFlags() const override;
1571
1572 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
1573 getSerializableMachineMemOperandTargetFlags() const override;
1574
1575 ScheduleHazardRecognizer *
1576 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
1577 const ScheduleDAG *DAG) const override;
1578
1579 ScheduleHazardRecognizer *
1580 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
1581
1582 ScheduleHazardRecognizer *
1583 CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
1584 const ScheduleDAGMI *DAG) const override;
1585
1586 unsigned getLiveRangeSplitOpcode(Register Reg,
1587 const MachineFunction &MF) const override;
1588
1589 bool isBasicBlockPrologue(const MachineInstr &MI,
1590 Register Reg = Register()) const override;
1591
1592 bool canAddToBBProlog(const MachineInstr &MI) const;
1593
1594 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
1595 MachineBasicBlock::iterator InsPt,
1596 const DebugLoc &DL, Register Src,
1597 Register Dst) const override;
1598
1599 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1600 MachineBasicBlock::iterator InsPt,
1601 const DebugLoc &DL, Register Src,
1602 unsigned SrcSubReg,
1603 Register Dst) const override;
1604
1605 bool isWave32() const;
1606
1607 /// Return a partially built integer add instruction without carry.
1608 /// Caller must add source operands.
1609 /// For pre-GFX9 it will generate unused carry destination operand.
1610 /// TODO: After GFX9 it should return a no-carry operation.
1611 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1612 MachineBasicBlock::iterator I,
1613 const DebugLoc &DL,
1614 Register DestReg) const;
1615
1616 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1617 MachineBasicBlock::iterator I,
1618 const DebugLoc &DL,
1619 Register DestReg,
1620 RegScavenger &RS) const;
1621
1622 static bool isKillTerminator(unsigned Opcode);
1623 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
1624
1625 bool isLegalMUBUFImmOffset(unsigned Imm) const;
1626
1627 static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST);
1628
1629 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1630 Align Alignment = Align(4)) const;
1631
1632 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1633 /// encoded instruction with the given \p FlatVariant.
1634 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1635 uint64_t FlatVariant) const;
1636
1637 /// Split \p COffsetVal into {immediate offset field, remainder offset}
1638 /// values.
1639 std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
1640 unsigned AddrSpace,
1641 uint64_t FlatVariant) const;
1642
1643 /// Returns true if negative offsets are allowed for the given \p FlatVariant.
1644 bool allowNegativeFlatOffset(uint64_t FlatVariant) const;
1645
1646 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1647 /// Return -1 if the target-specific opcode for the pseudo instruction does
1648 /// not exist. If Opcode is not a pseudo instruction, this is identity.
1649 int pseudoToMCOpcode(int Opcode) const;
1650
1651 /// \brief Check if this instruction should only be used by assembler.
1652 /// Return true if this opcode should not be used by codegen.
1653 bool isAsmOnlyOpcode(int MCOp) const;
1654
1655 void fixImplicitOperands(MachineInstr &MI) const;
1656
1657 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
1658 ArrayRef<unsigned> Ops,
1659 MachineBasicBlock::iterator InsertPt,
1660 int FrameIndex,
1661 LiveIntervals *LIS = nullptr,
1662 VirtRegMap *VRM = nullptr) const override;
1663
1664 unsigned getInstrLatency(const InstrItineraryData *ItinData,
1665 const MachineInstr &MI,
1666 unsigned *PredCost = nullptr) const override;
1667
1668 const MachineOperand &getCalleeOperand(const MachineInstr &MI) const override;
1669
1670 InstructionUniformity
1671 getInstructionUniformity(const MachineInstr &MI) const final;
1672
1673 InstructionUniformity
1674 getGenericInstructionUniformity(const MachineInstr &MI) const;
1675
1676 const MIRFormatter *getMIRFormatter() const override;
1677
1678 static unsigned getDSShaderTypeValue(const MachineFunction &MF);
1679
1680 const TargetSchedModel &getSchedModel() const { return SchedModel; }
1681
1682 // FIXME: This should be removed
1683 // Enforce operand's \p OpName even alignment if required by target.
1684 // This is used if an operand is a 32 bit register but needs to be aligned
1685 // regardless.
1686 void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const;
1687};
1688
1689/// \brief Returns true if a reg:subreg pair P has a TRC class
1690inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1691 const TargetRegisterClass &TRC,
1692 MachineRegisterInfo &MRI) {
1693 auto *RC = MRI.getRegClass(Reg: P.Reg);
1694 if (!P.SubReg)
1695 return RC == &TRC;
1696 auto *TRI = MRI.getTargetRegisterInfo();
1697 return RC == TRI->getMatchingSuperRegClass(A: RC, B: &TRC, Idx: P.SubReg);
1698}
1699
1700/// \brief Create RegSubRegPair from a register MachineOperand
1701inline
1702TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1703 assert(O.isReg());
1704 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1705}
1706
1707/// \brief Return the SubReg component from REG_SEQUENCE
1708TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1709 unsigned SubReg);
1710
1711/// \brief Return the defining instruction for a given reg:subreg pair
1712/// skipping copy like instructions and subreg-manipulation pseudos.
1713/// Following another subreg of a reg:subreg isn't supported.
1714MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1715 const MachineRegisterInfo &MRI);
1716
1717/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1718/// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1719/// attempt to track between blocks.
1720bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1721 Register VReg,
1722 const MachineInstr &DefMI,
1723 const MachineInstr &UseMI);
1724
1725/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1726/// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1727/// track between blocks.
1728bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1729 Register VReg,
1730 const MachineInstr &DefMI);
1731
1732namespace AMDGPU {
1733
1734 LLVM_READONLY
1735 int getVOPe64(uint16_t Opcode);
1736
1737 LLVM_READONLY
1738 int getVOPe32(uint16_t Opcode);
1739
1740 LLVM_READONLY
1741 int getSDWAOp(uint16_t Opcode);
1742
1743 LLVM_READONLY
1744 int getDPPOp32(uint16_t Opcode);
1745
1746 LLVM_READONLY
1747 int getDPPOp64(uint16_t Opcode);
1748
1749 LLVM_READONLY
1750 int getBasicFromSDWAOp(uint16_t Opcode);
1751
1752 LLVM_READONLY
1753 int getCommuteRev(uint16_t Opcode);
1754
1755 LLVM_READONLY
1756 int getCommuteOrig(uint16_t Opcode);
1757
1758 LLVM_READONLY
1759 int getAddr64Inst(uint16_t Opcode);
1760
1761 /// Check if \p Opcode is an Addr64 opcode.
1762 ///
1763 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1764 LLVM_READONLY
1765 int getIfAddr64Inst(uint16_t Opcode);
1766
1767 LLVM_READONLY
1768 int getSOPKOp(uint16_t Opcode);
1769
1770 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1771 /// of a VADDR form.
1772 LLVM_READONLY
1773 int getGlobalSaddrOp(uint16_t Opcode);
1774
1775 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1776 /// of a SADDR form.
1777 LLVM_READONLY
1778 int getGlobalVaddrOp(uint16_t Opcode);
1779
1780 LLVM_READONLY
1781 int getVCMPXNoSDstOp(uint16_t Opcode);
1782
1783 /// \returns ST form with only immediate offset of a FLAT Scratch instruction
1784 /// given an \p Opcode of an SS (SADDR) form.
1785 LLVM_READONLY
1786 int getFlatScratchInstSTfromSS(uint16_t Opcode);
1787
1788 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1789 /// of an SVS (SADDR + VADDR) form.
1790 LLVM_READONLY
1791 int getFlatScratchInstSVfromSVS(uint16_t Opcode);
1792
1793 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1794 /// of an SV (VADDR) form.
1795 LLVM_READONLY
1796 int getFlatScratchInstSSfromSV(uint16_t Opcode);
1797
1798 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1799 /// of an SS (SADDR) form.
1800 LLVM_READONLY
1801 int getFlatScratchInstSVfromSS(uint16_t Opcode);
1802
1803 /// \returns earlyclobber version of a MAC MFMA is exists.
1804 LLVM_READONLY
1805 int getMFMAEarlyClobberOp(uint16_t Opcode);
1806
1807 /// \returns Version of an MFMA instruction which uses AGPRs for srcC and
1808 /// vdst, given an \p Opcode of an MFMA which uses VGPRs for srcC/vdst.
1809 LLVM_READONLY
1810 int getMFMASrcCVDstAGPROp(uint16_t Opcode);
1811
1812 /// \returns v_cmpx version of a v_cmp instruction.
1813 LLVM_READONLY
1814 int getVCMPXOpFromVCMP(uint16_t Opcode);
1815
1816 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1817 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1818 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1819 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1820
1821} // end namespace AMDGPU
1822
1823namespace AMDGPU {
1824enum AsmComments {
1825 // For sgpr to vgpr spill instructions
1826 SGPR_SPILL = MachineInstr::TAsmComments
1827};
1828} // namespace AMDGPU
1829
1830namespace SI {
1831namespace KernelInputOffsets {
1832
1833/// Offsets in bytes from the start of the input buffer
1834enum Offsets {
1835 NGROUPS_X = 0,
1836 NGROUPS_Y = 4,
1837 NGROUPS_Z = 8,
1838 GLOBAL_SIZE_X = 12,
1839 GLOBAL_SIZE_Y = 16,
1840 GLOBAL_SIZE_Z = 20,
1841 LOCAL_SIZE_X = 24,
1842 LOCAL_SIZE_Y = 28,
1843 LOCAL_SIZE_Z = 32
1844};
1845
1846} // end namespace KernelInputOffsets
1847} // end namespace SI
1848
1849} // end namespace llvm
1850
1851#endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1852