1 | //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Interface definition for SIInstrInfo. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H |
15 | #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H |
16 | |
17 | #include "AMDGPUMIRFormatter.h" |
18 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
19 | #include "SIRegisterInfo.h" |
20 | #include "Utils/AMDGPUBaseInfo.h" |
21 | #include "llvm/ADT/SetVector.h" |
22 | #include "llvm/CodeGen/TargetInstrInfo.h" |
23 | #include "llvm/CodeGen/TargetSchedule.h" |
24 | |
25 | #define |
26 | #include "AMDGPUGenInstrInfo.inc" |
27 | |
28 | namespace llvm { |
29 | |
30 | class APInt; |
31 | class GCNSubtarget; |
32 | class LiveVariables; |
33 | class MachineDominatorTree; |
34 | class MachineRegisterInfo; |
35 | class RegScavenger; |
36 | class TargetRegisterClass; |
37 | class ScheduleHazardRecognizer; |
38 | |
39 | constexpr unsigned DefaultMemoryClusterDWordsLimit = 8; |
40 | |
41 | /// Mark the MMO of a uniform load if there are no potentially clobbering stores |
42 | /// on any path from the start of an entry function to this load. |
43 | static const MachineMemOperand::Flags MONoClobber = |
44 | MachineMemOperand::MOTargetFlag1; |
45 | |
46 | /// Mark the MMO of a load as the last use. |
47 | static const MachineMemOperand::Flags MOLastUse = |
48 | MachineMemOperand::MOTargetFlag2; |
49 | |
50 | /// Utility to store machine instructions worklist. |
51 | struct SIInstrWorklist { |
52 | SIInstrWorklist() = default; |
53 | |
54 | void insert(MachineInstr *MI); |
55 | |
56 | MachineInstr *top() const { |
57 | const auto *iter = InstrList.begin(); |
58 | return *iter; |
59 | } |
60 | |
61 | void erase_top() { |
62 | const auto *iter = InstrList.begin(); |
63 | InstrList.erase(I: iter); |
64 | } |
65 | |
66 | bool empty() const { return InstrList.empty(); } |
67 | |
68 | void clear() { |
69 | InstrList.clear(); |
70 | DeferredList.clear(); |
71 | } |
72 | |
73 | bool isDeferred(MachineInstr *MI); |
74 | |
75 | SetVector<MachineInstr *> &getDeferredList() { return DeferredList; } |
76 | |
77 | private: |
78 | /// InstrList contains the MachineInstrs. |
79 | SetVector<MachineInstr *> InstrList; |
80 | /// Deferred instructions are specific MachineInstr |
81 | /// that will be added by insert method. |
82 | SetVector<MachineInstr *> DeferredList; |
83 | }; |
84 | |
85 | class SIInstrInfo final : public AMDGPUGenInstrInfo { |
86 | private: |
87 | const SIRegisterInfo RI; |
88 | const GCNSubtarget &ST; |
89 | TargetSchedModel SchedModel; |
90 | mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter; |
91 | |
92 | // The inverse predicate should have the negative value. |
93 | enum BranchPredicate { |
94 | INVALID_BR = 0, |
95 | SCC_TRUE = 1, |
96 | SCC_FALSE = -1, |
97 | VCCNZ = 2, |
98 | VCCZ = -2, |
99 | EXECNZ = -3, |
100 | EXECZ = 3 |
101 | }; |
102 | |
103 | using SetVectorType = SmallSetVector<MachineInstr *, 32>; |
104 | |
105 | static unsigned getBranchOpcode(BranchPredicate Cond); |
106 | static BranchPredicate getBranchPredicate(unsigned Opcode); |
107 | |
108 | public: |
109 | unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, |
110 | MachineRegisterInfo &MRI, |
111 | const MachineOperand &SuperReg, |
112 | const TargetRegisterClass *SuperRC, |
113 | unsigned SubIdx, |
114 | const TargetRegisterClass *SubRC) const; |
115 | MachineOperand ( |
116 | MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, |
117 | const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, |
118 | unsigned SubIdx, const TargetRegisterClass *SubRC) const; |
119 | |
120 | private: |
121 | void swapOperands(MachineInstr &Inst) const; |
122 | |
123 | std::pair<bool, MachineBasicBlock *> |
124 | moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst, |
125 | MachineDominatorTree *MDT = nullptr) const; |
126 | |
127 | void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, |
128 | MachineDominatorTree *MDT = nullptr) const; |
129 | |
130 | void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const; |
131 | |
132 | void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const; |
133 | |
134 | void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst, |
135 | unsigned Opcode) const; |
136 | |
137 | void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst, |
138 | unsigned Opcode) const; |
139 | |
140 | void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, |
141 | unsigned Opcode, bool Swap = false) const; |
142 | |
143 | void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, |
144 | unsigned Opcode, |
145 | MachineDominatorTree *MDT = nullptr) const; |
146 | |
147 | void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst, |
148 | MachineDominatorTree *MDT) const; |
149 | |
150 | void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst, |
151 | MachineDominatorTree *MDT) const; |
152 | |
153 | void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst, |
154 | MachineDominatorTree *MDT = nullptr) const; |
155 | |
156 | void splitScalar64BitBCNT(SIInstrWorklist &Worklist, |
157 | MachineInstr &Inst) const; |
158 | void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const; |
159 | void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst, |
160 | unsigned Opcode, |
161 | MachineDominatorTree *MDT = nullptr) const; |
162 | void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI, |
163 | MachineInstr &Inst) const; |
164 | |
165 | void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI, |
166 | SIInstrWorklist &Worklist) const; |
167 | |
168 | void addSCCDefUsersToVALUWorklist(MachineOperand &Op, |
169 | MachineInstr &SCCDefInst, |
170 | SIInstrWorklist &Worklist, |
171 | Register NewCond = Register()) const; |
172 | void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, |
173 | SIInstrWorklist &Worklist) const; |
174 | |
175 | const TargetRegisterClass * |
176 | getDestEquivalentVGPRClass(const MachineInstr &Inst) const; |
177 | |
178 | bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, |
179 | const MachineInstr &MIb) const; |
180 | |
181 | Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; |
182 | |
183 | bool verifyCopy(const MachineInstr &MI, const MachineRegisterInfo &MRI, |
184 | StringRef &ErrInfo) const; |
185 | |
186 | bool resultDependsOnExec(const MachineInstr &MI) const; |
187 | |
188 | protected: |
189 | /// If the specific machine instruction is a instruction that moves/copies |
190 | /// value from one register to another register return destination and source |
191 | /// registers as machine operands. |
192 | std::optional<DestSourcePair> |
193 | isCopyInstrImpl(const MachineInstr &MI) const override; |
194 | |
195 | bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, |
196 | AMDGPU::OpName Src0OpName, MachineOperand &Src1, |
197 | AMDGPU::OpName Src1OpName) const; |
198 | bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, |
199 | const MachineOperand *fromMO, unsigned toIdx, |
200 | const MachineOperand *toMO) const; |
201 | MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, |
202 | unsigned OpIdx0, |
203 | unsigned OpIdx1) const override; |
204 | |
205 | public: |
206 | enum TargetOperandFlags { |
207 | MO_MASK = 0xf, |
208 | |
209 | MO_NONE = 0, |
210 | // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. |
211 | MO_GOTPCREL = 1, |
212 | // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. |
213 | MO_GOTPCREL32 = 2, |
214 | MO_GOTPCREL32_LO = 2, |
215 | // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. |
216 | MO_GOTPCREL32_HI = 3, |
217 | // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. |
218 | MO_REL32 = 4, |
219 | MO_REL32_LO = 4, |
220 | // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. |
221 | MO_REL32_HI = 5, |
222 | |
223 | MO_FAR_BRANCH_OFFSET = 6, |
224 | |
225 | MO_ABS32_LO = 8, |
226 | MO_ABS32_HI = 9, |
227 | }; |
228 | |
229 | explicit SIInstrInfo(const GCNSubtarget &ST); |
230 | |
231 | const SIRegisterInfo &getRegisterInfo() const { |
232 | return RI; |
233 | } |
234 | |
235 | const GCNSubtarget &getSubtarget() const { |
236 | return ST; |
237 | } |
238 | |
239 | bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; |
240 | |
241 | bool isIgnorableUse(const MachineOperand &MO) const override; |
242 | |
243 | bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, |
244 | MachineCycleInfo *CI) const override; |
245 | |
246 | bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, |
247 | int64_t &Offset1) const override; |
248 | |
249 | bool isGlobalMemoryObject(const MachineInstr *MI) const override; |
250 | |
251 | bool getMemOperandsWithOffsetWidth( |
252 | const MachineInstr &LdSt, |
253 | SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset, |
254 | bool &OffsetIsScalable, LocationSize &Width, |
255 | const TargetRegisterInfo *TRI) const final; |
256 | |
257 | bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, |
258 | int64_t Offset1, bool OffsetIsScalable1, |
259 | ArrayRef<const MachineOperand *> BaseOps2, |
260 | int64_t Offset2, bool OffsetIsScalable2, |
261 | unsigned ClusterSize, |
262 | unsigned NumBytes) const override; |
263 | |
264 | bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, |
265 | int64_t Offset1, unsigned NumLoads) const override; |
266 | |
267 | void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
268 | const DebugLoc &DL, Register DestReg, Register SrcReg, |
269 | bool KillSrc, bool RenamableDest = false, |
270 | bool RenamableSrc = false) const override; |
271 | |
272 | const TargetRegisterClass *getPreferredSelectRegClass( |
273 | unsigned Size) const; |
274 | |
275 | Register insertNE(MachineBasicBlock *MBB, |
276 | MachineBasicBlock::iterator I, const DebugLoc &DL, |
277 | Register SrcReg, int Value) const; |
278 | |
279 | Register insertEQ(MachineBasicBlock *MBB, |
280 | MachineBasicBlock::iterator I, const DebugLoc &DL, |
281 | Register SrcReg, int Value) const; |
282 | |
283 | bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, |
284 | int64_t &ImmVal) const override; |
285 | |
286 | void storeRegToStackSlot( |
287 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, |
288 | bool isKill, int FrameIndex, const TargetRegisterClass *RC, |
289 | const TargetRegisterInfo *TRI, Register VReg, |
290 | MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; |
291 | |
292 | void loadRegFromStackSlot( |
293 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, |
294 | int FrameIndex, const TargetRegisterClass *RC, |
295 | const TargetRegisterInfo *TRI, Register VReg, |
296 | MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; |
297 | |
298 | bool expandPostRAPseudo(MachineInstr &MI) const override; |
299 | |
300 | void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
301 | Register DestReg, unsigned SubIdx, |
302 | const MachineInstr &Orig, |
303 | const TargetRegisterInfo &TRI) const override; |
304 | |
305 | // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp |
306 | // instructions. Returns a pair of generated instructions. |
307 | // Can split either post-RA with physical registers or pre-RA with |
308 | // virtual registers. In latter case IR needs to be in SSA form and |
309 | // and a REG_SEQUENCE is produced to define original register. |
310 | std::pair<MachineInstr*, MachineInstr*> |
311 | expandMovDPP64(MachineInstr &MI) const; |
312 | |
313 | // Returns an opcode that can be used to move a value to a \p DstRC |
314 | // register. If there is no hardware instruction that can store to \p |
315 | // DstRC, then AMDGPU::COPY is returned. |
316 | unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; |
317 | |
318 | const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize, |
319 | unsigned EltSize, |
320 | bool IsSGPR) const; |
321 | |
322 | const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize, |
323 | bool IsIndirectSrc) const; |
324 | LLVM_READONLY |
325 | int commuteOpcode(unsigned Opc) const; |
326 | |
327 | LLVM_READONLY |
328 | inline int commuteOpcode(const MachineInstr &MI) const { |
329 | return commuteOpcode(Opc: MI.getOpcode()); |
330 | } |
331 | |
332 | bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, |
333 | unsigned &SrcOpIdx1) const override; |
334 | |
335 | bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0, |
336 | unsigned &SrcOpIdx1) const; |
337 | |
338 | bool isBranchOffsetInRange(unsigned BranchOpc, |
339 | int64_t BrOffset) const override; |
340 | |
341 | MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; |
342 | |
343 | /// Return whether the block terminate with divergent branch. |
344 | /// Note this only work before lowering the pseudo control flow instructions. |
345 | bool hasDivergentBranch(const MachineBasicBlock *MBB) const; |
346 | |
347 | void insertIndirectBranch(MachineBasicBlock &MBB, |
348 | MachineBasicBlock &NewDestBB, |
349 | MachineBasicBlock &RestoreBB, const DebugLoc &DL, |
350 | int64_t BrOffset, RegScavenger *RS) const override; |
351 | |
352 | bool analyzeBranchImpl(MachineBasicBlock &MBB, |
353 | MachineBasicBlock::iterator I, |
354 | MachineBasicBlock *&TBB, |
355 | MachineBasicBlock *&FBB, |
356 | SmallVectorImpl<MachineOperand> &Cond, |
357 | bool AllowModify) const; |
358 | |
359 | bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, |
360 | MachineBasicBlock *&FBB, |
361 | SmallVectorImpl<MachineOperand> &Cond, |
362 | bool AllowModify = false) const override; |
363 | |
364 | unsigned removeBranch(MachineBasicBlock &MBB, |
365 | int *BytesRemoved = nullptr) const override; |
366 | |
367 | unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, |
368 | MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, |
369 | const DebugLoc &DL, |
370 | int *BytesAdded = nullptr) const override; |
371 | |
372 | bool reverseBranchCondition( |
373 | SmallVectorImpl<MachineOperand> &Cond) const override; |
374 | |
375 | bool canInsertSelect(const MachineBasicBlock &MBB, |
376 | ArrayRef<MachineOperand> Cond, Register DstReg, |
377 | Register TrueReg, Register FalseReg, int &CondCycles, |
378 | int &TrueCycles, int &FalseCycles) const override; |
379 | |
380 | void insertSelect(MachineBasicBlock &MBB, |
381 | MachineBasicBlock::iterator I, const DebugLoc &DL, |
382 | Register DstReg, ArrayRef<MachineOperand> Cond, |
383 | Register TrueReg, Register FalseReg) const override; |
384 | |
385 | void insertVectorSelect(MachineBasicBlock &MBB, |
386 | MachineBasicBlock::iterator I, const DebugLoc &DL, |
387 | Register DstReg, ArrayRef<MachineOperand> Cond, |
388 | Register TrueReg, Register FalseReg) const; |
389 | |
390 | bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, |
391 | Register &SrcReg2, int64_t &CmpMask, |
392 | int64_t &CmpValue) const override; |
393 | |
394 | bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, |
395 | Register SrcReg2, int64_t CmpMask, int64_t CmpValue, |
396 | const MachineRegisterInfo *MRI) const override; |
397 | |
398 | bool |
399 | areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, |
400 | const MachineInstr &MIb) const override; |
401 | |
402 | static bool isFoldableCopy(const MachineInstr &MI); |
403 | |
404 | void removeModOperands(MachineInstr &MI) const; |
405 | |
406 | /// Return the extracted immediate value in a subregister use from a constant |
407 | /// materialized in a super register. |
408 | /// |
409 | /// e.g. %imm = S_MOV_B64 K[0:63] |
410 | /// USE %imm.sub1 |
411 | /// This will return K[32:63] |
412 | static std::optional<int64_t> (int64_t ImmVal, |
413 | unsigned SubRegIndex); |
414 | |
415 | bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, |
416 | MachineRegisterInfo *MRI) const final; |
417 | |
418 | unsigned getMachineCSELookAheadLimit() const override { return 500; } |
419 | |
420 | MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, |
421 | LiveIntervals *LIS) const override; |
422 | |
423 | bool isSchedulingBoundary(const MachineInstr &MI, |
424 | const MachineBasicBlock *MBB, |
425 | const MachineFunction &MF) const override; |
426 | |
427 | static bool isSALU(const MachineInstr &MI) { |
428 | return MI.getDesc().TSFlags & SIInstrFlags::SALU; |
429 | } |
430 | |
431 | bool isSALU(uint16_t Opcode) const { |
432 | return get(Opcode).TSFlags & SIInstrFlags::SALU; |
433 | } |
434 | |
435 | static bool isVALU(const MachineInstr &MI) { |
436 | return MI.getDesc().TSFlags & SIInstrFlags::VALU; |
437 | } |
438 | |
439 | bool isVALU(uint16_t Opcode) const { |
440 | return get(Opcode).TSFlags & SIInstrFlags::VALU; |
441 | } |
442 | |
443 | static bool isImage(const MachineInstr &MI) { |
444 | return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI); |
445 | } |
446 | |
447 | bool isImage(uint16_t Opcode) const { |
448 | return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode); |
449 | } |
450 | |
451 | static bool isVMEM(const MachineInstr &MI) { |
452 | return isMUBUF(MI) || isMTBUF(MI) || isImage(MI) || isFLAT(MI); |
453 | } |
454 | |
455 | bool isVMEM(uint16_t Opcode) const { |
456 | return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode); |
457 | } |
458 | |
459 | static bool isSOP1(const MachineInstr &MI) { |
460 | return MI.getDesc().TSFlags & SIInstrFlags::SOP1; |
461 | } |
462 | |
463 | bool isSOP1(uint16_t Opcode) const { |
464 | return get(Opcode).TSFlags & SIInstrFlags::SOP1; |
465 | } |
466 | |
467 | static bool isSOP2(const MachineInstr &MI) { |
468 | return MI.getDesc().TSFlags & SIInstrFlags::SOP2; |
469 | } |
470 | |
471 | bool isSOP2(uint16_t Opcode) const { |
472 | return get(Opcode).TSFlags & SIInstrFlags::SOP2; |
473 | } |
474 | |
475 | static bool isSOPC(const MachineInstr &MI) { |
476 | return MI.getDesc().TSFlags & SIInstrFlags::SOPC; |
477 | } |
478 | |
479 | bool isSOPC(uint16_t Opcode) const { |
480 | return get(Opcode).TSFlags & SIInstrFlags::SOPC; |
481 | } |
482 | |
483 | static bool isSOPK(const MachineInstr &MI) { |
484 | return MI.getDesc().TSFlags & SIInstrFlags::SOPK; |
485 | } |
486 | |
487 | bool isSOPK(uint16_t Opcode) const { |
488 | return get(Opcode).TSFlags & SIInstrFlags::SOPK; |
489 | } |
490 | |
491 | static bool isSOPP(const MachineInstr &MI) { |
492 | return MI.getDesc().TSFlags & SIInstrFlags::SOPP; |
493 | } |
494 | |
495 | bool isSOPP(uint16_t Opcode) const { |
496 | return get(Opcode).TSFlags & SIInstrFlags::SOPP; |
497 | } |
498 | |
499 | static bool isPacked(const MachineInstr &MI) { |
500 | return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; |
501 | } |
502 | |
503 | bool isPacked(uint16_t Opcode) const { |
504 | return get(Opcode).TSFlags & SIInstrFlags::IsPacked; |
505 | } |
506 | |
507 | static bool isVOP1(const MachineInstr &MI) { |
508 | return MI.getDesc().TSFlags & SIInstrFlags::VOP1; |
509 | } |
510 | |
511 | bool isVOP1(uint16_t Opcode) const { |
512 | return get(Opcode).TSFlags & SIInstrFlags::VOP1; |
513 | } |
514 | |
515 | static bool isVOP2(const MachineInstr &MI) { |
516 | return MI.getDesc().TSFlags & SIInstrFlags::VOP2; |
517 | } |
518 | |
519 | bool isVOP2(uint16_t Opcode) const { |
520 | return get(Opcode).TSFlags & SIInstrFlags::VOP2; |
521 | } |
522 | |
523 | static bool isVOP3(const MachineInstr &MI) { |
524 | return MI.getDesc().TSFlags & SIInstrFlags::VOP3; |
525 | } |
526 | |
527 | bool isVOP3(uint16_t Opcode) const { |
528 | return get(Opcode).TSFlags & SIInstrFlags::VOP3; |
529 | } |
530 | |
531 | static bool isSDWA(const MachineInstr &MI) { |
532 | return MI.getDesc().TSFlags & SIInstrFlags::SDWA; |
533 | } |
534 | |
535 | bool isSDWA(uint16_t Opcode) const { |
536 | return get(Opcode).TSFlags & SIInstrFlags::SDWA; |
537 | } |
538 | |
539 | static bool isVOPC(const MachineInstr &MI) { |
540 | return MI.getDesc().TSFlags & SIInstrFlags::VOPC; |
541 | } |
542 | |
543 | bool isVOPC(uint16_t Opcode) const { |
544 | return get(Opcode).TSFlags & SIInstrFlags::VOPC; |
545 | } |
546 | |
547 | static bool isMUBUF(const MachineInstr &MI) { |
548 | return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; |
549 | } |
550 | |
551 | bool isMUBUF(uint16_t Opcode) const { |
552 | return get(Opcode).TSFlags & SIInstrFlags::MUBUF; |
553 | } |
554 | |
555 | static bool isMTBUF(const MachineInstr &MI) { |
556 | return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; |
557 | } |
558 | |
559 | bool isMTBUF(uint16_t Opcode) const { |
560 | return get(Opcode).TSFlags & SIInstrFlags::MTBUF; |
561 | } |
562 | |
563 | static bool isSMRD(const MachineInstr &MI) { |
564 | return MI.getDesc().TSFlags & SIInstrFlags::SMRD; |
565 | } |
566 | |
567 | bool isSMRD(uint16_t Opcode) const { |
568 | return get(Opcode).TSFlags & SIInstrFlags::SMRD; |
569 | } |
570 | |
571 | bool isBufferSMRD(const MachineInstr &MI) const; |
572 | |
573 | static bool isDS(const MachineInstr &MI) { |
574 | return MI.getDesc().TSFlags & SIInstrFlags::DS; |
575 | } |
576 | |
577 | bool isDS(uint16_t Opcode) const { |
578 | return get(Opcode).TSFlags & SIInstrFlags::DS; |
579 | } |
580 | |
581 | static bool isLDSDMA(const MachineInstr &MI) { |
582 | return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI)); |
583 | } |
584 | |
585 | bool isLDSDMA(uint16_t Opcode) { |
586 | return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode)); |
587 | } |
588 | |
589 | static bool isGWS(const MachineInstr &MI) { |
590 | return MI.getDesc().TSFlags & SIInstrFlags::GWS; |
591 | } |
592 | |
593 | bool isGWS(uint16_t Opcode) const { |
594 | return get(Opcode).TSFlags & SIInstrFlags::GWS; |
595 | } |
596 | |
597 | bool isAlwaysGDS(uint16_t Opcode) const; |
598 | |
599 | static bool isMIMG(const MachineInstr &MI) { |
600 | return MI.getDesc().TSFlags & SIInstrFlags::MIMG; |
601 | } |
602 | |
603 | bool isMIMG(uint16_t Opcode) const { |
604 | return get(Opcode).TSFlags & SIInstrFlags::MIMG; |
605 | } |
606 | |
607 | static bool isVIMAGE(const MachineInstr &MI) { |
608 | return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE; |
609 | } |
610 | |
611 | bool isVIMAGE(uint16_t Opcode) const { |
612 | return get(Opcode).TSFlags & SIInstrFlags::VIMAGE; |
613 | } |
614 | |
615 | static bool isVSAMPLE(const MachineInstr &MI) { |
616 | return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE; |
617 | } |
618 | |
619 | bool isVSAMPLE(uint16_t Opcode) const { |
620 | return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE; |
621 | } |
622 | |
623 | static bool isGather4(const MachineInstr &MI) { |
624 | return MI.getDesc().TSFlags & SIInstrFlags::Gather4; |
625 | } |
626 | |
627 | bool isGather4(uint16_t Opcode) const { |
628 | return get(Opcode).TSFlags & SIInstrFlags::Gather4; |
629 | } |
630 | |
631 | static bool isFLAT(const MachineInstr &MI) { |
632 | return MI.getDesc().TSFlags & SIInstrFlags::FLAT; |
633 | } |
634 | |
635 | // Is a FLAT encoded instruction which accesses a specific segment, |
636 | // i.e. global_* or scratch_*. |
637 | static bool isSegmentSpecificFLAT(const MachineInstr &MI) { |
638 | auto Flags = MI.getDesc().TSFlags; |
639 | return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); |
640 | } |
641 | |
642 | bool isSegmentSpecificFLAT(uint16_t Opcode) const { |
643 | auto Flags = get(Opcode).TSFlags; |
644 | return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); |
645 | } |
646 | |
647 | static bool isFLATGlobal(const MachineInstr &MI) { |
648 | return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal; |
649 | } |
650 | |
651 | bool isFLATGlobal(uint16_t Opcode) const { |
652 | return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal; |
653 | } |
654 | |
655 | static bool isFLATScratch(const MachineInstr &MI) { |
656 | return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch; |
657 | } |
658 | |
659 | bool isFLATScratch(uint16_t Opcode) const { |
660 | return get(Opcode).TSFlags & SIInstrFlags::FlatScratch; |
661 | } |
662 | |
663 | // Any FLAT encoded instruction, including global_* and scratch_*. |
664 | bool isFLAT(uint16_t Opcode) const { |
665 | return get(Opcode).TSFlags & SIInstrFlags::FLAT; |
666 | } |
667 | |
668 | static bool isBlockLoadStore(uint16_t Opcode) { |
669 | switch (Opcode) { |
670 | case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: |
671 | case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: |
672 | case AMDGPU::SCRATCH_STORE_BLOCK_SADDR: |
673 | case AMDGPU::SCRATCH_LOAD_BLOCK_SADDR: |
674 | case AMDGPU::SCRATCH_STORE_BLOCK_SVS: |
675 | case AMDGPU::SCRATCH_LOAD_BLOCK_SVS: |
676 | return true; |
677 | default: |
678 | return false; |
679 | } |
680 | } |
681 | |
682 | static bool isEXP(const MachineInstr &MI) { |
683 | return MI.getDesc().TSFlags & SIInstrFlags::EXP; |
684 | } |
685 | |
686 | static bool isDualSourceBlendEXP(const MachineInstr &MI) { |
687 | if (!isEXP(MI)) |
688 | return false; |
689 | unsigned Target = MI.getOperand(i: 0).getImm(); |
690 | return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 || |
691 | Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1; |
692 | } |
693 | |
694 | bool isEXP(uint16_t Opcode) const { |
695 | return get(Opcode).TSFlags & SIInstrFlags::EXP; |
696 | } |
697 | |
698 | static bool isAtomicNoRet(const MachineInstr &MI) { |
699 | return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet; |
700 | } |
701 | |
702 | bool isAtomicNoRet(uint16_t Opcode) const { |
703 | return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet; |
704 | } |
705 | |
706 | static bool isAtomicRet(const MachineInstr &MI) { |
707 | return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet; |
708 | } |
709 | |
710 | bool isAtomicRet(uint16_t Opcode) const { |
711 | return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet; |
712 | } |
713 | |
714 | static bool isAtomic(const MachineInstr &MI) { |
715 | return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet | |
716 | SIInstrFlags::IsAtomicNoRet); |
717 | } |
718 | |
719 | bool isAtomic(uint16_t Opcode) const { |
720 | return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet | |
721 | SIInstrFlags::IsAtomicNoRet); |
722 | } |
723 | |
724 | static bool mayWriteLDSThroughDMA(const MachineInstr &MI) { |
725 | return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD; |
726 | } |
727 | |
728 | static bool isWQM(const MachineInstr &MI) { |
729 | return MI.getDesc().TSFlags & SIInstrFlags::WQM; |
730 | } |
731 | |
732 | bool isWQM(uint16_t Opcode) const { |
733 | return get(Opcode).TSFlags & SIInstrFlags::WQM; |
734 | } |
735 | |
736 | static bool isDisableWQM(const MachineInstr &MI) { |
737 | return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; |
738 | } |
739 | |
740 | bool isDisableWQM(uint16_t Opcode) const { |
741 | return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; |
742 | } |
743 | |
744 | // SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of |
745 | // SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions |
746 | // therefore we need an explicit check for them since just checking if the |
747 | // Spill bit is set and what instruction type it came from misclassifies |
748 | // them. |
749 | static bool isVGPRSpill(const MachineInstr &MI) { |
750 | return MI.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR && |
751 | MI.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR && |
752 | (isSpill(MI) && isVALU(MI)); |
753 | } |
754 | |
755 | bool isVGPRSpill(uint16_t Opcode) const { |
756 | return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR && |
757 | Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR && |
758 | (isSpill(Opcode) && isVALU(Opcode)); |
759 | } |
760 | |
761 | static bool isSGPRSpill(const MachineInstr &MI) { |
762 | return MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR || |
763 | MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR || |
764 | (isSpill(MI) && isSALU(MI)); |
765 | } |
766 | |
767 | bool isSGPRSpill(uint16_t Opcode) const { |
768 | return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR || |
769 | Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR || |
770 | (isSpill(Opcode) && isSALU(Opcode)); |
771 | } |
772 | |
773 | bool isSpill(uint16_t Opcode) const { |
774 | return get(Opcode).TSFlags & SIInstrFlags::Spill; |
775 | } |
776 | |
777 | static bool isSpill(const MachineInstr &MI) { |
778 | return MI.getDesc().TSFlags & SIInstrFlags::Spill; |
779 | } |
780 | |
781 | static bool isWWMRegSpillOpcode(uint16_t Opcode) { |
782 | return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || |
783 | Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE || |
784 | Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE || |
785 | Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE; |
786 | } |
787 | |
788 | static bool isChainCallOpcode(uint64_t Opcode) { |
789 | return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 || |
790 | Opcode == AMDGPU::SI_CS_CHAIN_TC_W64; |
791 | } |
792 | |
793 | static bool isDPP(const MachineInstr &MI) { |
794 | return MI.getDesc().TSFlags & SIInstrFlags::DPP; |
795 | } |
796 | |
797 | bool isDPP(uint16_t Opcode) const { |
798 | return get(Opcode).TSFlags & SIInstrFlags::DPP; |
799 | } |
800 | |
801 | static bool isTRANS(const MachineInstr &MI) { |
802 | return MI.getDesc().TSFlags & SIInstrFlags::TRANS; |
803 | } |
804 | |
805 | bool isTRANS(uint16_t Opcode) const { |
806 | return get(Opcode).TSFlags & SIInstrFlags::TRANS; |
807 | } |
808 | |
809 | static bool isVOP3P(const MachineInstr &MI) { |
810 | return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; |
811 | } |
812 | |
813 | bool isVOP3P(uint16_t Opcode) const { |
814 | return get(Opcode).TSFlags & SIInstrFlags::VOP3P; |
815 | } |
816 | |
817 | static bool isVINTRP(const MachineInstr &MI) { |
818 | return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; |
819 | } |
820 | |
821 | bool isVINTRP(uint16_t Opcode) const { |
822 | return get(Opcode).TSFlags & SIInstrFlags::VINTRP; |
823 | } |
824 | |
825 | static bool isMAI(const MachineInstr &MI) { |
826 | return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; |
827 | } |
828 | |
829 | bool isMAI(uint16_t Opcode) const { |
830 | return get(Opcode).TSFlags & SIInstrFlags::IsMAI; |
831 | } |
832 | |
833 | static bool isMFMA(const MachineInstr &MI) { |
834 | return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && |
835 | MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; |
836 | } |
837 | |
838 | static bool isDOT(const MachineInstr &MI) { |
839 | return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; |
840 | } |
841 | |
842 | static bool isWMMA(const MachineInstr &MI) { |
843 | return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA; |
844 | } |
845 | |
846 | bool isWMMA(uint16_t Opcode) const { |
847 | return get(Opcode).TSFlags & SIInstrFlags::IsWMMA; |
848 | } |
849 | |
850 | static bool isMFMAorWMMA(const MachineInstr &MI) { |
851 | return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI); |
852 | } |
853 | |
854 | static bool isSWMMAC(const MachineInstr &MI) { |
855 | return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC; |
856 | } |
857 | |
858 | bool isSWMMAC(uint16_t Opcode) const { |
859 | return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC; |
860 | } |
861 | |
862 | bool isDOT(uint16_t Opcode) const { |
863 | return get(Opcode).TSFlags & SIInstrFlags::IsDOT; |
864 | } |
865 | |
866 | bool isXDL(const MachineInstr &MI) const; |
867 | |
868 | static bool isDGEMM(unsigned Opcode) { return AMDGPU::getMAIIsDGEMM(Opc: Opcode); } |
869 | |
870 | static bool isLDSDIR(const MachineInstr &MI) { |
871 | return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; |
872 | } |
873 | |
874 | bool isLDSDIR(uint16_t Opcode) const { |
875 | return get(Opcode).TSFlags & SIInstrFlags::LDSDIR; |
876 | } |
877 | |
878 | static bool isVINTERP(const MachineInstr &MI) { |
879 | return MI.getDesc().TSFlags & SIInstrFlags::VINTERP; |
880 | } |
881 | |
882 | bool isVINTERP(uint16_t Opcode) const { |
883 | return get(Opcode).TSFlags & SIInstrFlags::VINTERP; |
884 | } |
885 | |
886 | static bool isScalarUnit(const MachineInstr &MI) { |
887 | return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); |
888 | } |
889 | |
890 | static bool usesVM_CNT(const MachineInstr &MI) { |
891 | return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; |
892 | } |
893 | |
894 | static bool usesLGKM_CNT(const MachineInstr &MI) { |
895 | return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; |
896 | } |
897 | |
898 | // Most sopk treat the immediate as a signed 16-bit, however some |
899 | // use it as unsigned. |
900 | static bool sopkIsZext(unsigned Opcode) { |
901 | return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 || |
902 | Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 || |
903 | Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 || |
904 | Opcode == AMDGPU::S_GETREG_B32; |
905 | } |
906 | |
907 | /// \returns true if this is an s_store_dword* instruction. This is more |
908 | /// specific than isSMEM && mayStore. |
909 | static bool isScalarStore(const MachineInstr &MI) { |
910 | return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; |
911 | } |
912 | |
913 | bool isScalarStore(uint16_t Opcode) const { |
914 | return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; |
915 | } |
916 | |
917 | static bool isFixedSize(const MachineInstr &MI) { |
918 | return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; |
919 | } |
920 | |
921 | bool isFixedSize(uint16_t Opcode) const { |
922 | return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; |
923 | } |
924 | |
925 | static bool hasFPClamp(const MachineInstr &MI) { |
926 | return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; |
927 | } |
928 | |
929 | bool hasFPClamp(uint16_t Opcode) const { |
930 | return get(Opcode).TSFlags & SIInstrFlags::FPClamp; |
931 | } |
932 | |
933 | static bool hasIntClamp(const MachineInstr &MI) { |
934 | return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; |
935 | } |
936 | |
937 | uint64_t getClampMask(const MachineInstr &MI) const { |
938 | const uint64_t ClampFlags = SIInstrFlags::FPClamp | |
939 | SIInstrFlags::IntClamp | |
940 | SIInstrFlags::ClampLo | |
941 | SIInstrFlags::ClampHi; |
942 | return MI.getDesc().TSFlags & ClampFlags; |
943 | } |
944 | |
945 | static bool usesFPDPRounding(const MachineInstr &MI) { |
946 | return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; |
947 | } |
948 | |
949 | bool usesFPDPRounding(uint16_t Opcode) const { |
950 | return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; |
951 | } |
952 | |
953 | static bool isFPAtomic(const MachineInstr &MI) { |
954 | return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; |
955 | } |
956 | |
957 | bool isFPAtomic(uint16_t Opcode) const { |
958 | return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; |
959 | } |
960 | |
961 | static bool isNeverUniform(const MachineInstr &MI) { |
962 | return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform; |
963 | } |
964 | |
965 | // Check to see if opcode is for a barrier start. Pre gfx12 this is just the |
966 | // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want |
967 | // to check for the barrier start (S_BARRIER_SIGNAL*) |
968 | bool isBarrierStart(unsigned Opcode) const { |
969 | return Opcode == AMDGPU::S_BARRIER || |
970 | Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 || |
971 | Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 || |
972 | Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM || |
973 | Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM; |
974 | } |
975 | |
976 | bool isBarrier(unsigned Opcode) const { |
977 | return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT || |
978 | Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER; |
979 | } |
980 | |
981 | static bool isF16PseudoScalarTrans(unsigned Opcode) { |
982 | return Opcode == AMDGPU::V_S_EXP_F16_e64 || |
983 | Opcode == AMDGPU::V_S_LOG_F16_e64 || |
984 | Opcode == AMDGPU::V_S_RCP_F16_e64 || |
985 | Opcode == AMDGPU::V_S_RSQ_F16_e64 || |
986 | Opcode == AMDGPU::V_S_SQRT_F16_e64; |
987 | } |
988 | |
989 | static bool doesNotReadTiedSource(const MachineInstr &MI) { |
990 | return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; |
991 | } |
992 | |
993 | bool doesNotReadTiedSource(uint16_t Opcode) const { |
994 | return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead; |
995 | } |
996 | |
997 | bool isIGLP(unsigned Opcode) const { |
998 | return Opcode == AMDGPU::SCHED_BARRIER || |
999 | Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT; |
1000 | } |
1001 | |
1002 | bool isIGLP(const MachineInstr &MI) const { return isIGLP(Opcode: MI.getOpcode()); } |
1003 | |
1004 | // Return true if the instruction is mutually exclusive with all non-IGLP DAG |
1005 | // mutations, requiring all other mutations to be disabled. |
1006 | bool isIGLPMutationOnly(unsigned Opcode) const { |
1007 | return Opcode == AMDGPU::SCHED_GROUP_BARRIER || Opcode == AMDGPU::IGLP_OPT; |
1008 | } |
1009 | |
1010 | static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) { |
1011 | switch (Opcode) { |
1012 | case AMDGPU::S_WAITCNT_soft: |
1013 | return AMDGPU::S_WAITCNT; |
1014 | case AMDGPU::S_WAITCNT_VSCNT_soft: |
1015 | return AMDGPU::S_WAITCNT_VSCNT; |
1016 | case AMDGPU::S_WAIT_LOADCNT_soft: |
1017 | return AMDGPU::S_WAIT_LOADCNT; |
1018 | case AMDGPU::S_WAIT_STORECNT_soft: |
1019 | return AMDGPU::S_WAIT_STORECNT; |
1020 | case AMDGPU::S_WAIT_SAMPLECNT_soft: |
1021 | return AMDGPU::S_WAIT_SAMPLECNT; |
1022 | case AMDGPU::S_WAIT_BVHCNT_soft: |
1023 | return AMDGPU::S_WAIT_BVHCNT; |
1024 | case AMDGPU::S_WAIT_DSCNT_soft: |
1025 | return AMDGPU::S_WAIT_DSCNT; |
1026 | case AMDGPU::S_WAIT_KMCNT_soft: |
1027 | return AMDGPU::S_WAIT_KMCNT; |
1028 | default: |
1029 | return Opcode; |
1030 | } |
1031 | } |
1032 | |
1033 | bool isWaitcnt(unsigned Opcode) const { |
1034 | switch (getNonSoftWaitcntOpcode(Opcode)) { |
1035 | case AMDGPU::S_WAITCNT: |
1036 | case AMDGPU::S_WAITCNT_VSCNT: |
1037 | case AMDGPU::S_WAITCNT_VMCNT: |
1038 | case AMDGPU::S_WAITCNT_EXPCNT: |
1039 | case AMDGPU::S_WAITCNT_LGKMCNT: |
1040 | case AMDGPU::S_WAIT_LOADCNT: |
1041 | case AMDGPU::S_WAIT_LOADCNT_DSCNT: |
1042 | case AMDGPU::S_WAIT_STORECNT: |
1043 | case AMDGPU::S_WAIT_STORECNT_DSCNT: |
1044 | case AMDGPU::S_WAIT_SAMPLECNT: |
1045 | case AMDGPU::S_WAIT_BVHCNT: |
1046 | case AMDGPU::S_WAIT_EXPCNT: |
1047 | case AMDGPU::S_WAIT_DSCNT: |
1048 | case AMDGPU::S_WAIT_KMCNT: |
1049 | case AMDGPU::S_WAIT_IDLE: |
1050 | return true; |
1051 | default: |
1052 | return false; |
1053 | } |
1054 | } |
1055 | |
1056 | bool isVGPRCopy(const MachineInstr &MI) const { |
1057 | assert(isCopyInstr(MI)); |
1058 | Register Dest = MI.getOperand(i: 0).getReg(); |
1059 | const MachineFunction &MF = *MI.getParent()->getParent(); |
1060 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
1061 | return !RI.isSGPRReg(MRI, Reg: Dest); |
1062 | } |
1063 | |
1064 | bool hasVGPRUses(const MachineInstr &MI) const { |
1065 | const MachineFunction &MF = *MI.getParent()->getParent(); |
1066 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
1067 | return llvm::any_of(Range: MI.explicit_uses(), |
1068 | P: [&MRI, this](const MachineOperand &MO) { |
1069 | return MO.isReg() && RI.isVGPR(MRI, Reg: MO.getReg());}); |
1070 | } |
1071 | |
1072 | /// Return true if the instruction modifies the mode register.q |
1073 | static bool modifiesModeRegister(const MachineInstr &MI); |
1074 | |
1075 | /// This function is used to determine if an instruction can be safely |
1076 | /// executed under EXEC = 0 without hardware error, indeterminate results, |
1077 | /// and/or visible effects on future vector execution or outside the shader. |
1078 | /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is |
1079 | /// used in removing branches over short EXEC = 0 sequences. |
1080 | /// As such it embeds certain assumptions which may not apply to every case |
1081 | /// of EXEC = 0 execution. |
1082 | bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; |
1083 | |
1084 | /// Returns true if the instruction could potentially depend on the value of |
1085 | /// exec. If false, exec dependencies may safely be ignored. |
1086 | bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; |
1087 | |
1088 | bool isInlineConstant(const APInt &Imm) const; |
1089 | |
1090 | bool isInlineConstant(const APFloat &Imm) const; |
1091 | |
1092 | // Returns true if this non-register operand definitely does not need to be |
1093 | // encoded as a 32-bit literal. Note that this function handles all kinds of |
1094 | // operands, not just immediates. |
1095 | // |
1096 | // Some operands like FrameIndexes could resolve to an inline immediate value |
1097 | // that will not require an additional 4-bytes; this function assumes that it |
1098 | // will. |
1099 | bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const { |
1100 | assert(!MO.isReg() && "isInlineConstant called on register operand!" ); |
1101 | if (!MO.isImm()) |
1102 | return false; |
1103 | return isInlineConstant(ImmVal: MO.getImm(), OperandType); |
1104 | } |
1105 | bool isInlineConstant(int64_t ImmVal, uint8_t OperandType) const; |
1106 | |
1107 | bool isInlineConstant(const MachineOperand &MO, |
1108 | const MCOperandInfo &OpInfo) const { |
1109 | return isInlineConstant(MO, OperandType: OpInfo.OperandType); |
1110 | } |
1111 | |
1112 | /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would |
1113 | /// be an inline immediate. |
1114 | bool isInlineConstant(const MachineInstr &MI, |
1115 | const MachineOperand &UseMO, |
1116 | const MachineOperand &DefMO) const { |
1117 | assert(UseMO.getParent() == &MI); |
1118 | int OpIdx = UseMO.getOperandNo(); |
1119 | if (OpIdx >= MI.getDesc().NumOperands) |
1120 | return false; |
1121 | |
1122 | return isInlineConstant(MO: DefMO, OpInfo: MI.getDesc().operands()[OpIdx]); |
1123 | } |
1124 | |
1125 | /// \p returns true if the operand \p OpIdx in \p MI is a valid inline |
1126 | /// immediate. |
1127 | bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { |
1128 | const MachineOperand &MO = MI.getOperand(i: OpIdx); |
1129 | return isInlineConstant(MO, OperandType: MI.getDesc().operands()[OpIdx].OperandType); |
1130 | } |
1131 | |
1132 | bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, |
1133 | int64_t ImmVal) const { |
1134 | if (OpIdx >= MI.getDesc().NumOperands) |
1135 | return false; |
1136 | |
1137 | if (isCopyInstr(MI)) { |
1138 | unsigned Size = getOpSize(MI, OpNo: OpIdx); |
1139 | assert(Size == 8 || Size == 4); |
1140 | |
1141 | uint8_t OpType = (Size == 8) ? |
1142 | AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; |
1143 | return isInlineConstant(ImmVal, OperandType: OpType); |
1144 | } |
1145 | |
1146 | return isInlineConstant(ImmVal, OperandType: MI.getDesc().operands()[OpIdx].OperandType); |
1147 | } |
1148 | |
1149 | bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, |
1150 | const MachineOperand &MO) const { |
1151 | return isInlineConstant(MI, OpIdx, ImmVal: MO.getImm()); |
1152 | } |
1153 | |
1154 | bool isInlineConstant(const MachineOperand &MO) const { |
1155 | return isInlineConstant(MI: *MO.getParent(), OpIdx: MO.getOperandNo()); |
1156 | } |
1157 | |
1158 | bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, |
1159 | const MachineOperand &MO) const; |
1160 | |
1161 | /// Return true if this 64-bit VALU instruction has a 32-bit encoding. |
1162 | /// This function will return false if you pass it a 32-bit instruction. |
1163 | bool hasVALU32BitEncoding(unsigned Opcode) const; |
1164 | |
1165 | /// Returns true if this operand uses the constant bus. |
1166 | bool usesConstantBus(const MachineRegisterInfo &MRI, |
1167 | const MachineOperand &MO, |
1168 | const MCOperandInfo &OpInfo) const; |
1169 | |
1170 | bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineInstr &MI, |
1171 | int OpIdx) const { |
1172 | return usesConstantBus(MRI, MO: MI.getOperand(i: OpIdx), |
1173 | OpInfo: MI.getDesc().operands()[OpIdx]); |
1174 | } |
1175 | |
1176 | /// Return true if this instruction has any modifiers. |
1177 | /// e.g. src[012]_mod, omod, clamp. |
1178 | bool hasModifiers(unsigned Opcode) const; |
1179 | |
1180 | bool (const MachineInstr &MI, AMDGPU::OpName OpName) const; |
1181 | bool (const MachineInstr &MI) const; |
1182 | |
1183 | bool canShrink(const MachineInstr &MI, |
1184 | const MachineRegisterInfo &MRI) const; |
1185 | |
1186 | MachineInstr *buildShrunkInst(MachineInstr &MI, |
1187 | unsigned NewOpcode) const; |
1188 | |
1189 | bool verifyInstruction(const MachineInstr &MI, |
1190 | StringRef &ErrInfo) const override; |
1191 | |
1192 | unsigned getVALUOp(const MachineInstr &MI) const; |
1193 | |
1194 | void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, |
1195 | MachineBasicBlock::iterator MBBI, |
1196 | const DebugLoc &DL, Register Reg, bool IsSCCLive, |
1197 | SlotIndexes *Indexes = nullptr) const; |
1198 | |
1199 | void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, |
1200 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, |
1201 | Register Reg, SlotIndexes *Indexes = nullptr) const; |
1202 | |
1203 | /// Return the correct register class for \p OpNo. For target-specific |
1204 | /// instructions, this will return the register class that has been defined |
1205 | /// in tablegen. For generic instructions, like REG_SEQUENCE it will return |
1206 | /// the register class of its machine operand. |
1207 | /// to infer the correct register class base on the other operands. |
1208 | const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, |
1209 | unsigned OpNo) const; |
1210 | |
1211 | /// Return the size in bytes of the operand OpNo on the given |
1212 | // instruction opcode. |
1213 | unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { |
1214 | const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo]; |
1215 | |
1216 | if (OpInfo.RegClass == -1) { |
1217 | // If this is an immediate operand, this must be a 32-bit literal. |
1218 | assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); |
1219 | return 4; |
1220 | } |
1221 | |
1222 | return RI.getRegSizeInBits(RC: *RI.getRegClass(RCID: OpInfo.RegClass)) / 8; |
1223 | } |
1224 | |
1225 | /// This form should usually be preferred since it handles operands |
1226 | /// with unknown register classes. |
1227 | unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { |
1228 | const MachineOperand &MO = MI.getOperand(i: OpNo); |
1229 | if (MO.isReg()) { |
1230 | if (unsigned SubReg = MO.getSubReg()) { |
1231 | return RI.getSubRegIdxSize(Idx: SubReg) / 8; |
1232 | } |
1233 | } |
1234 | return RI.getRegSizeInBits(RC: *getOpRegClass(MI, OpNo)) / 8; |
1235 | } |
1236 | |
1237 | /// Legalize the \p OpIndex operand of this instruction by inserting |
1238 | /// a MOV. For example: |
1239 | /// ADD_I32_e32 VGPR0, 15 |
1240 | /// to |
1241 | /// MOV VGPR1, 15 |
1242 | /// ADD_I32_e32 VGPR0, VGPR1 |
1243 | /// |
1244 | /// If the operand being legalized is a register, then a COPY will be used |
1245 | /// instead of MOV. |
1246 | void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; |
1247 | |
1248 | /// Check if \p MO is a legal operand if it was the \p OpIdx Operand |
1249 | /// for \p MI. |
1250 | bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, |
1251 | const MachineOperand *MO = nullptr) const; |
1252 | |
1253 | /// Check if \p MO would be a valid operand for the given operand |
1254 | /// definition \p OpInfo. Note this does not attempt to validate constant bus |
1255 | /// restrictions (e.g. literal constant usage). |
1256 | bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, |
1257 | const MCOperandInfo &OpInfo, |
1258 | const MachineOperand &MO) const; |
1259 | |
1260 | /// Check if \p MO (a register operand) is a legal register for the |
1261 | /// given operand description or operand index. |
1262 | /// The operand index version provide more legality checks |
1263 | bool isLegalRegOperand(const MachineRegisterInfo &MRI, |
1264 | const MCOperandInfo &OpInfo, |
1265 | const MachineOperand &MO) const; |
1266 | bool isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx, |
1267 | const MachineOperand &MO) const; |
1268 | /// Legalize operands in \p MI by either commuting it or inserting a |
1269 | /// copy of src1. |
1270 | void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; |
1271 | |
1272 | /// Fix operands in \p MI to satisfy constant bus requirements. |
1273 | void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; |
1274 | |
1275 | /// Copy a value from a VGPR (\p SrcReg) to SGPR. The desired register class |
1276 | /// for the dst register (\p DstRC) can be optionally supplied. This function |
1277 | /// can only be used when it is know that the value in SrcReg is same across |
1278 | /// all threads in the wave. |
1279 | /// \returns The SGPR register that \p SrcReg was copied to. |
1280 | Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, |
1281 | MachineRegisterInfo &MRI, |
1282 | const TargetRegisterClass *DstRC = nullptr) const; |
1283 | |
1284 | void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; |
1285 | void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const; |
1286 | |
1287 | void legalizeGenericOperand(MachineBasicBlock &InsertMBB, |
1288 | MachineBasicBlock::iterator I, |
1289 | const TargetRegisterClass *DstRC, |
1290 | MachineOperand &Op, MachineRegisterInfo &MRI, |
1291 | const DebugLoc &DL) const; |
1292 | |
1293 | /// Legalize all operands in this instruction. This function may create new |
1294 | /// instructions and control-flow around \p MI. If present, \p MDT is |
1295 | /// updated. |
1296 | /// \returns A new basic block that contains \p MI if new blocks were created. |
1297 | MachineBasicBlock * |
1298 | legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; |
1299 | |
1300 | /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand |
1301 | /// was moved to VGPR. \returns true if succeeded. |
1302 | bool moveFlatAddrToVGPR(MachineInstr &Inst) const; |
1303 | |
1304 | /// Fix operands in Inst to fix 16bit SALU to VALU lowering. |
1305 | void legalizeOperandsVALUt16(MachineInstr &Inst, |
1306 | MachineRegisterInfo &MRI) const; |
1307 | void legalizeOperandsVALUt16(MachineInstr &Inst, unsigned OpIdx, |
1308 | MachineRegisterInfo &MRI) const; |
1309 | |
1310 | /// Replace the instructions opcode with the equivalent VALU |
1311 | /// opcode. This function will also move the users of MachineInstruntions |
1312 | /// in the \p WorkList to the VALU if necessary. If present, \p MDT is |
1313 | /// updated. |
1314 | void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const; |
1315 | |
1316 | void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, |
1317 | MachineInstr &Inst) const; |
1318 | |
1319 | void insertNoop(MachineBasicBlock &MBB, |
1320 | MachineBasicBlock::iterator MI) const override; |
1321 | |
1322 | void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
1323 | unsigned Quantity) const override; |
1324 | |
1325 | void insertReturn(MachineBasicBlock &MBB) const; |
1326 | |
1327 | /// Build instructions that simulate the behavior of a `s_trap 2` instructions |
1328 | /// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is |
1329 | /// interpreted as a nop. |
1330 | MachineBasicBlock *insertSimulatedTrap(MachineRegisterInfo &MRI, |
1331 | MachineBasicBlock &MBB, |
1332 | MachineInstr &MI, |
1333 | const DebugLoc &DL) const; |
1334 | |
1335 | /// Return the number of wait states that result from executing this |
1336 | /// instruction. |
1337 | static unsigned getNumWaitStates(const MachineInstr &MI); |
1338 | |
1339 | /// Returns the operand named \p Op. If \p MI does not have an |
1340 | /// operand named \c Op, this function returns nullptr. |
1341 | LLVM_READONLY |
1342 | MachineOperand *getNamedOperand(MachineInstr &MI, |
1343 | AMDGPU::OpName OperandName) const; |
1344 | |
1345 | LLVM_READONLY |
1346 | const MachineOperand *getNamedOperand(const MachineInstr &MI, |
1347 | AMDGPU::OpName OperandName) const { |
1348 | return getNamedOperand(MI&: const_cast<MachineInstr &>(MI), OperandName); |
1349 | } |
1350 | |
1351 | /// Get required immediate operand |
1352 | int64_t getNamedImmOperand(const MachineInstr &MI, |
1353 | AMDGPU::OpName OperandName) const { |
1354 | int Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: OperandName); |
1355 | return MI.getOperand(i: Idx).getImm(); |
1356 | } |
1357 | |
1358 | uint64_t getDefaultRsrcDataFormat() const; |
1359 | uint64_t getScratchRsrcWords23() const; |
1360 | |
1361 | bool isLowLatencyInstruction(const MachineInstr &MI) const; |
1362 | bool isHighLatencyDef(int Opc) const override; |
1363 | |
1364 | /// Return the descriptor of the target-specific machine instruction |
1365 | /// that corresponds to the specified pseudo or native opcode. |
1366 | const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { |
1367 | return get(Opcode: pseudoToMCOpcode(Opcode)); |
1368 | } |
1369 | |
1370 | unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; |
1371 | unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; |
1372 | |
1373 | Register isLoadFromStackSlot(const MachineInstr &MI, |
1374 | int &FrameIndex) const override; |
1375 | Register isStoreToStackSlot(const MachineInstr &MI, |
1376 | int &FrameIndex) const override; |
1377 | |
1378 | unsigned getInstBundleSize(const MachineInstr &MI) const; |
1379 | unsigned getInstSizeInBytes(const MachineInstr &MI) const override; |
1380 | |
1381 | bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; |
1382 | |
1383 | std::pair<unsigned, unsigned> |
1384 | decomposeMachineOperandsTargetFlags(unsigned TF) const override; |
1385 | |
1386 | ArrayRef<std::pair<int, const char *>> |
1387 | getSerializableTargetIndices() const override; |
1388 | |
1389 | ArrayRef<std::pair<unsigned, const char *>> |
1390 | getSerializableDirectMachineOperandTargetFlags() const override; |
1391 | |
1392 | ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> |
1393 | getSerializableMachineMemOperandTargetFlags() const override; |
1394 | |
1395 | ScheduleHazardRecognizer * |
1396 | CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, |
1397 | const ScheduleDAG *DAG) const override; |
1398 | |
1399 | ScheduleHazardRecognizer * |
1400 | CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; |
1401 | |
1402 | ScheduleHazardRecognizer * |
1403 | CreateTargetMIHazardRecognizer(const InstrItineraryData *II, |
1404 | const ScheduleDAGMI *DAG) const override; |
1405 | |
1406 | unsigned getLiveRangeSplitOpcode(Register Reg, |
1407 | const MachineFunction &MF) const override; |
1408 | |
1409 | bool isBasicBlockPrologue(const MachineInstr &MI, |
1410 | Register Reg = Register()) const override; |
1411 | |
1412 | MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, |
1413 | MachineBasicBlock::iterator InsPt, |
1414 | const DebugLoc &DL, Register Src, |
1415 | Register Dst) const override; |
1416 | |
1417 | MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, |
1418 | MachineBasicBlock::iterator InsPt, |
1419 | const DebugLoc &DL, Register Src, |
1420 | unsigned SrcSubReg, |
1421 | Register Dst) const override; |
1422 | |
1423 | bool isWave32() const; |
1424 | |
1425 | /// Return a partially built integer add instruction without carry. |
1426 | /// Caller must add source operands. |
1427 | /// For pre-GFX9 it will generate unused carry destination operand. |
1428 | /// TODO: After GFX9 it should return a no-carry operation. |
1429 | MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, |
1430 | MachineBasicBlock::iterator I, |
1431 | const DebugLoc &DL, |
1432 | Register DestReg) const; |
1433 | |
1434 | MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, |
1435 | MachineBasicBlock::iterator I, |
1436 | const DebugLoc &DL, |
1437 | Register DestReg, |
1438 | RegScavenger &RS) const; |
1439 | |
1440 | static bool isKillTerminator(unsigned Opcode); |
1441 | const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; |
1442 | |
1443 | bool isLegalMUBUFImmOffset(unsigned Imm) const; |
1444 | |
1445 | static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST); |
1446 | |
1447 | bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, |
1448 | Align Alignment = Align(4)) const; |
1449 | |
1450 | /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT |
1451 | /// encoded instruction. If \p Signed, this is for an instruction that |
1452 | /// interprets the offset as signed. |
1453 | bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, |
1454 | uint64_t FlatVariant) const; |
1455 | |
1456 | /// Split \p COffsetVal into {immediate offset field, remainder offset} |
1457 | /// values. |
1458 | std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal, |
1459 | unsigned AddrSpace, |
1460 | uint64_t FlatVariant) const; |
1461 | |
1462 | /// Returns true if negative offsets are allowed for the given \p FlatVariant. |
1463 | bool allowNegativeFlatOffset(uint64_t FlatVariant) const; |
1464 | |
1465 | /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. |
1466 | /// Return -1 if the target-specific opcode for the pseudo instruction does |
1467 | /// not exist. If Opcode is not a pseudo instruction, this is identity. |
1468 | int pseudoToMCOpcode(int Opcode) const; |
1469 | |
1470 | /// \brief Check if this instruction should only be used by assembler. |
1471 | /// Return true if this opcode should not be used by codegen. |
1472 | bool isAsmOnlyOpcode(int MCOp) const; |
1473 | |
1474 | const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, |
1475 | const TargetRegisterInfo *TRI, |
1476 | const MachineFunction &MF) |
1477 | const override; |
1478 | |
1479 | void fixImplicitOperands(MachineInstr &MI) const; |
1480 | |
1481 | MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, |
1482 | ArrayRef<unsigned> Ops, |
1483 | MachineBasicBlock::iterator InsertPt, |
1484 | int FrameIndex, |
1485 | LiveIntervals *LIS = nullptr, |
1486 | VirtRegMap *VRM = nullptr) const override; |
1487 | |
1488 | unsigned getInstrLatency(const InstrItineraryData *ItinData, |
1489 | const MachineInstr &MI, |
1490 | unsigned *PredCost = nullptr) const override; |
1491 | |
1492 | InstructionUniformity |
1493 | getInstructionUniformity(const MachineInstr &MI) const override final; |
1494 | |
1495 | InstructionUniformity |
1496 | getGenericInstructionUniformity(const MachineInstr &MI) const; |
1497 | |
1498 | const MIRFormatter *getMIRFormatter() const override { |
1499 | if (!Formatter) |
1500 | Formatter = std::make_unique<AMDGPUMIRFormatter>(); |
1501 | return Formatter.get(); |
1502 | } |
1503 | |
1504 | static unsigned getDSShaderTypeValue(const MachineFunction &MF); |
1505 | |
1506 | const TargetSchedModel &getSchedModel() const { return SchedModel; } |
1507 | |
1508 | // Enforce operand's \p OpName even alignment if required by target. |
1509 | // This is used if an operand is a 32 bit register but needs to be aligned |
1510 | // regardless. |
1511 | void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const; |
1512 | }; |
1513 | |
1514 | /// \brief Returns true if a reg:subreg pair P has a TRC class |
1515 | inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, |
1516 | const TargetRegisterClass &TRC, |
1517 | MachineRegisterInfo &MRI) { |
1518 | auto *RC = MRI.getRegClass(Reg: P.Reg); |
1519 | if (!P.SubReg) |
1520 | return RC == &TRC; |
1521 | auto *TRI = MRI.getTargetRegisterInfo(); |
1522 | return RC == TRI->getMatchingSuperRegClass(A: RC, B: &TRC, Idx: P.SubReg); |
1523 | } |
1524 | |
1525 | /// \brief Create RegSubRegPair from a register MachineOperand |
1526 | inline |
1527 | TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) { |
1528 | assert(O.isReg()); |
1529 | return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg()); |
1530 | } |
1531 | |
1532 | /// \brief Return the SubReg component from REG_SEQUENCE |
1533 | TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, |
1534 | unsigned SubReg); |
1535 | |
1536 | /// \brief Return the defining instruction for a given reg:subreg pair |
1537 | /// skipping copy like instructions and subreg-manipulation pseudos. |
1538 | /// Following another subreg of a reg:subreg isn't supported. |
1539 | MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, |
1540 | MachineRegisterInfo &MRI); |
1541 | |
1542 | /// \brief Return false if EXEC is not changed between the def of \p VReg at \p |
1543 | /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not |
1544 | /// attempt to track between blocks. |
1545 | bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, |
1546 | Register VReg, |
1547 | const MachineInstr &DefMI, |
1548 | const MachineInstr &UseMI); |
1549 | |
1550 | /// \brief Return false if EXEC is not changed between the def of \p VReg at \p |
1551 | /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to |
1552 | /// track between blocks. |
1553 | bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, |
1554 | Register VReg, |
1555 | const MachineInstr &DefMI); |
1556 | |
1557 | namespace AMDGPU { |
1558 | |
1559 | LLVM_READONLY |
1560 | int getVOPe64(uint16_t Opcode); |
1561 | |
1562 | LLVM_READONLY |
1563 | int getVOPe32(uint16_t Opcode); |
1564 | |
1565 | LLVM_READONLY |
1566 | int getSDWAOp(uint16_t Opcode); |
1567 | |
1568 | LLVM_READONLY |
1569 | int getDPPOp32(uint16_t Opcode); |
1570 | |
1571 | LLVM_READONLY |
1572 | int getDPPOp64(uint16_t Opcode); |
1573 | |
1574 | LLVM_READONLY |
1575 | int getBasicFromSDWAOp(uint16_t Opcode); |
1576 | |
1577 | LLVM_READONLY |
1578 | int getCommuteRev(uint16_t Opcode); |
1579 | |
1580 | LLVM_READONLY |
1581 | int getCommuteOrig(uint16_t Opcode); |
1582 | |
1583 | LLVM_READONLY |
1584 | int getAddr64Inst(uint16_t Opcode); |
1585 | |
1586 | /// Check if \p Opcode is an Addr64 opcode. |
1587 | /// |
1588 | /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. |
1589 | LLVM_READONLY |
1590 | int getIfAddr64Inst(uint16_t Opcode); |
1591 | |
1592 | LLVM_READONLY |
1593 | int getSOPKOp(uint16_t Opcode); |
1594 | |
1595 | /// \returns SADDR form of a FLAT Global instruction given an \p Opcode |
1596 | /// of a VADDR form. |
1597 | LLVM_READONLY |
1598 | int getGlobalSaddrOp(uint16_t Opcode); |
1599 | |
1600 | /// \returns VADDR form of a FLAT Global instruction given an \p Opcode |
1601 | /// of a SADDR form. |
1602 | LLVM_READONLY |
1603 | int getGlobalVaddrOp(uint16_t Opcode); |
1604 | |
1605 | LLVM_READONLY |
1606 | int getVCMPXNoSDstOp(uint16_t Opcode); |
1607 | |
1608 | /// \returns ST form with only immediate offset of a FLAT Scratch instruction |
1609 | /// given an \p Opcode of an SS (SADDR) form. |
1610 | LLVM_READONLY |
1611 | int getFlatScratchInstSTfromSS(uint16_t Opcode); |
1612 | |
1613 | /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode |
1614 | /// of an SVS (SADDR + VADDR) form. |
1615 | LLVM_READONLY |
1616 | int getFlatScratchInstSVfromSVS(uint16_t Opcode); |
1617 | |
1618 | /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode |
1619 | /// of an SV (VADDR) form. |
1620 | LLVM_READONLY |
1621 | int getFlatScratchInstSSfromSV(uint16_t Opcode); |
1622 | |
1623 | /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode |
1624 | /// of an SS (SADDR) form. |
1625 | LLVM_READONLY |
1626 | int getFlatScratchInstSVfromSS(uint16_t Opcode); |
1627 | |
1628 | /// \returns earlyclobber version of a MAC MFMA is exists. |
1629 | LLVM_READONLY |
1630 | int getMFMAEarlyClobberOp(uint16_t Opcode); |
1631 | |
1632 | /// \returns Version of an MFMA instruction which uses AGPRs for srcC and |
1633 | /// vdst, given an \p Opcode of an MFMA which uses VGPRs for srcC/vdst. |
1634 | LLVM_READONLY |
1635 | int getMFMASrcCVDstAGPROp(uint16_t Opcode); |
1636 | |
1637 | /// \returns v_cmpx version of a v_cmp instruction. |
1638 | LLVM_READONLY |
1639 | int getVCMPXOpFromVCMP(uint16_t Opcode); |
1640 | |
1641 | const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; |
1642 | const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); |
1643 | const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); |
1644 | const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); |
1645 | |
1646 | } // end namespace AMDGPU |
1647 | |
1648 | namespace AMDGPU { |
1649 | enum { |
1650 | // For sgpr to vgpr spill instructions |
1651 | SGPR_SPILL = MachineInstr::TAsmComments |
1652 | }; |
1653 | } // namespace AMDGPU |
1654 | |
1655 | namespace SI { |
1656 | namespace KernelInputOffsets { |
1657 | |
1658 | /// Offsets in bytes from the start of the input buffer |
1659 | enum Offsets { |
1660 | NGROUPS_X = 0, |
1661 | NGROUPS_Y = 4, |
1662 | NGROUPS_Z = 8, |
1663 | GLOBAL_SIZE_X = 12, |
1664 | GLOBAL_SIZE_Y = 16, |
1665 | GLOBAL_SIZE_Z = 20, |
1666 | LOCAL_SIZE_X = 24, |
1667 | LOCAL_SIZE_Y = 28, |
1668 | LOCAL_SIZE_Z = 32 |
1669 | }; |
1670 | |
1671 | } // end namespace KernelInputOffsets |
1672 | } // end namespace SI |
1673 | |
1674 | } // end namespace llvm |
1675 | |
1676 | #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H |
1677 | |