1 | //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Interface definition for SIInstrInfo. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H |
15 | #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H |
16 | |
17 | #include "AMDGPUMIRFormatter.h" |
18 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
19 | #include "SIRegisterInfo.h" |
20 | #include "Utils/AMDGPUBaseInfo.h" |
21 | #include "llvm/ADT/SetVector.h" |
22 | #include "llvm/CodeGen/TargetInstrInfo.h" |
23 | #include "llvm/CodeGen/TargetSchedule.h" |
24 | |
25 | #define |
26 | #include "AMDGPUGenInstrInfo.inc" |
27 | |
28 | namespace llvm { |
29 | |
30 | class APInt; |
31 | class GCNSubtarget; |
32 | class LiveVariables; |
33 | class MachineDominatorTree; |
34 | class MachineRegisterInfo; |
35 | class RegScavenger; |
36 | class TargetRegisterClass; |
37 | class ScheduleHazardRecognizer; |
38 | |
39 | /// Mark the MMO of a uniform load if there are no potentially clobbering stores |
40 | /// on any path from the start of an entry function to this load. |
41 | static const MachineMemOperand::Flags MONoClobber = |
42 | MachineMemOperand::MOTargetFlag1; |
43 | |
44 | /// Mark the MMO of a load as the last use. |
45 | static const MachineMemOperand::Flags MOLastUse = |
46 | MachineMemOperand::MOTargetFlag2; |
47 | |
48 | /// Utility to store machine instructions worklist. |
49 | struct SIInstrWorklist { |
50 | SIInstrWorklist() = default; |
51 | |
52 | void insert(MachineInstr *MI); |
53 | |
54 | MachineInstr *top() const { |
55 | auto iter = InstrList.begin(); |
56 | return *iter; |
57 | } |
58 | |
59 | void erase_top() { |
60 | auto iter = InstrList.begin(); |
61 | InstrList.erase(I: iter); |
62 | } |
63 | |
64 | bool empty() const { return InstrList.empty(); } |
65 | |
66 | void clear() { |
67 | InstrList.clear(); |
68 | DeferredList.clear(); |
69 | } |
70 | |
71 | bool isDeferred(MachineInstr *MI); |
72 | |
73 | SetVector<MachineInstr *> &getDeferredList() { return DeferredList; } |
74 | |
75 | private: |
76 | /// InstrList contains the MachineInstrs. |
77 | SetVector<MachineInstr *> InstrList; |
78 | /// Deferred instructions are specific MachineInstr |
79 | /// that will be added by insert method. |
80 | SetVector<MachineInstr *> DeferredList; |
81 | }; |
82 | |
83 | class SIInstrInfo final : public AMDGPUGenInstrInfo { |
84 | private: |
85 | const SIRegisterInfo RI; |
86 | const GCNSubtarget &ST; |
87 | TargetSchedModel SchedModel; |
88 | mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter; |
89 | |
90 | // The inverse predicate should have the negative value. |
91 | enum BranchPredicate { |
92 | INVALID_BR = 0, |
93 | SCC_TRUE = 1, |
94 | SCC_FALSE = -1, |
95 | VCCNZ = 2, |
96 | VCCZ = -2, |
97 | EXECNZ = -3, |
98 | EXECZ = 3 |
99 | }; |
100 | |
101 | using SetVectorType = SmallSetVector<MachineInstr *, 32>; |
102 | |
103 | static unsigned getBranchOpcode(BranchPredicate Cond); |
104 | static BranchPredicate getBranchPredicate(unsigned Opcode); |
105 | |
106 | public: |
107 | unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, |
108 | MachineRegisterInfo &MRI, |
109 | const MachineOperand &SuperReg, |
110 | const TargetRegisterClass *SuperRC, |
111 | unsigned SubIdx, |
112 | const TargetRegisterClass *SubRC) const; |
113 | MachineOperand ( |
114 | MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, |
115 | const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, |
116 | unsigned SubIdx, const TargetRegisterClass *SubRC) const; |
117 | |
118 | private: |
119 | void swapOperands(MachineInstr &Inst) const; |
120 | |
121 | std::pair<bool, MachineBasicBlock *> |
122 | moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst, |
123 | MachineDominatorTree *MDT = nullptr) const; |
124 | |
125 | void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst, |
126 | MachineDominatorTree *MDT = nullptr) const; |
127 | |
128 | void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const; |
129 | |
130 | void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const; |
131 | |
132 | void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst, |
133 | unsigned Opcode) const; |
134 | |
135 | void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst, |
136 | unsigned Opcode) const; |
137 | |
138 | void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, |
139 | unsigned Opcode, bool Swap = false) const; |
140 | |
141 | void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst, |
142 | unsigned Opcode, |
143 | MachineDominatorTree *MDT = nullptr) const; |
144 | |
145 | void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst, |
146 | MachineDominatorTree *MDT) const; |
147 | |
148 | void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst, |
149 | MachineDominatorTree *MDT) const; |
150 | |
151 | void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst, |
152 | MachineDominatorTree *MDT = nullptr) const; |
153 | |
154 | void splitScalar64BitBCNT(SIInstrWorklist &Worklist, |
155 | MachineInstr &Inst) const; |
156 | void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const; |
157 | void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst, |
158 | unsigned Opcode, |
159 | MachineDominatorTree *MDT = nullptr) const; |
160 | void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI, |
161 | MachineInstr &Inst) const; |
162 | |
163 | void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI, |
164 | SIInstrWorklist &Worklist) const; |
165 | |
166 | void addSCCDefUsersToVALUWorklist(MachineOperand &Op, |
167 | MachineInstr &SCCDefInst, |
168 | SIInstrWorklist &Worklist, |
169 | Register NewCond = Register()) const; |
170 | void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, |
171 | SIInstrWorklist &Worklist) const; |
172 | |
173 | const TargetRegisterClass * |
174 | getDestEquivalentVGPRClass(const MachineInstr &Inst) const; |
175 | |
176 | bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa, |
177 | const MachineInstr &MIb) const; |
178 | |
179 | Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; |
180 | |
181 | protected: |
182 | /// If the specific machine instruction is a instruction that moves/copies |
183 | /// value from one register to another register return destination and source |
184 | /// registers as machine operands. |
185 | std::optional<DestSourcePair> |
186 | isCopyInstrImpl(const MachineInstr &MI) const override; |
187 | |
188 | bool swapSourceModifiers(MachineInstr &MI, |
189 | MachineOperand &Src0, unsigned Src0OpName, |
190 | MachineOperand &Src1, unsigned Src1OpName) const; |
191 | |
192 | MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, |
193 | unsigned OpIdx0, |
194 | unsigned OpIdx1) const override; |
195 | |
196 | public: |
197 | enum TargetOperandFlags { |
198 | MO_MASK = 0xf, |
199 | |
200 | MO_NONE = 0, |
201 | // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. |
202 | MO_GOTPCREL = 1, |
203 | // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. |
204 | MO_GOTPCREL32 = 2, |
205 | MO_GOTPCREL32_LO = 2, |
206 | // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. |
207 | MO_GOTPCREL32_HI = 3, |
208 | // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. |
209 | MO_REL32 = 4, |
210 | MO_REL32_LO = 4, |
211 | // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. |
212 | MO_REL32_HI = 5, |
213 | |
214 | MO_FAR_BRANCH_OFFSET = 6, |
215 | |
216 | MO_ABS32_LO = 8, |
217 | MO_ABS32_HI = 9, |
218 | }; |
219 | |
220 | explicit SIInstrInfo(const GCNSubtarget &ST); |
221 | |
222 | const SIRegisterInfo &getRegisterInfo() const { |
223 | return RI; |
224 | } |
225 | |
226 | const GCNSubtarget &getSubtarget() const { |
227 | return ST; |
228 | } |
229 | |
230 | bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; |
231 | |
232 | bool isIgnorableUse(const MachineOperand &MO) const override; |
233 | |
234 | bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, |
235 | MachineCycleInfo *CI) const override; |
236 | |
237 | bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, |
238 | int64_t &Offset1) const override; |
239 | |
240 | bool getMemOperandsWithOffsetWidth( |
241 | const MachineInstr &LdSt, |
242 | SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset, |
243 | bool &OffsetIsScalable, LocationSize &Width, |
244 | const TargetRegisterInfo *TRI) const final; |
245 | |
246 | bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1, |
247 | int64_t Offset1, bool OffsetIsScalable1, |
248 | ArrayRef<const MachineOperand *> BaseOps2, |
249 | int64_t Offset2, bool OffsetIsScalable2, |
250 | unsigned ClusterSize, |
251 | unsigned NumBytes) const override; |
252 | |
253 | bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, |
254 | int64_t Offset1, unsigned NumLoads) const override; |
255 | |
256 | void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
257 | const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, |
258 | bool KillSrc) const override; |
259 | |
260 | void materializeImmediate(MachineBasicBlock &MBB, |
261 | MachineBasicBlock::iterator MI, const DebugLoc &DL, |
262 | Register DestReg, int64_t Value) const; |
263 | |
264 | const TargetRegisterClass *getPreferredSelectRegClass( |
265 | unsigned Size) const; |
266 | |
267 | Register insertNE(MachineBasicBlock *MBB, |
268 | MachineBasicBlock::iterator I, const DebugLoc &DL, |
269 | Register SrcReg, int Value) const; |
270 | |
271 | Register insertEQ(MachineBasicBlock *MBB, |
272 | MachineBasicBlock::iterator I, const DebugLoc &DL, |
273 | Register SrcReg, int Value) const; |
274 | |
275 | void storeRegToStackSlot(MachineBasicBlock &MBB, |
276 | MachineBasicBlock::iterator MI, Register SrcReg, |
277 | bool isKill, int FrameIndex, |
278 | const TargetRegisterClass *RC, |
279 | const TargetRegisterInfo *TRI, |
280 | Register VReg) const override; |
281 | |
282 | void loadRegFromStackSlot(MachineBasicBlock &MBB, |
283 | MachineBasicBlock::iterator MI, Register DestReg, |
284 | int FrameIndex, const TargetRegisterClass *RC, |
285 | const TargetRegisterInfo *TRI, |
286 | Register VReg) const override; |
287 | |
288 | bool expandPostRAPseudo(MachineInstr &MI) const override; |
289 | |
290 | void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
291 | Register DestReg, unsigned SubIdx, |
292 | const MachineInstr &Orig, |
293 | const TargetRegisterInfo &TRI) const override; |
294 | |
295 | // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp |
296 | // instructions. Returns a pair of generated instructions. |
297 | // Can split either post-RA with physical registers or pre-RA with |
298 | // virtual registers. In latter case IR needs to be in SSA form and |
299 | // and a REG_SEQUENCE is produced to define original register. |
300 | std::pair<MachineInstr*, MachineInstr*> |
301 | expandMovDPP64(MachineInstr &MI) const; |
302 | |
303 | // Returns an opcode that can be used to move a value to a \p DstRC |
304 | // register. If there is no hardware instruction that can store to \p |
305 | // DstRC, then AMDGPU::COPY is returned. |
306 | unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; |
307 | |
308 | const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize, |
309 | unsigned EltSize, |
310 | bool IsSGPR) const; |
311 | |
312 | const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize, |
313 | bool IsIndirectSrc) const; |
314 | LLVM_READONLY |
315 | int commuteOpcode(unsigned Opc) const; |
316 | |
317 | LLVM_READONLY |
318 | inline int commuteOpcode(const MachineInstr &MI) const { |
319 | return commuteOpcode(Opc: MI.getOpcode()); |
320 | } |
321 | |
322 | bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, |
323 | unsigned &SrcOpIdx1) const override; |
324 | |
325 | bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0, |
326 | unsigned &SrcOpIdx1) const; |
327 | |
328 | bool isBranchOffsetInRange(unsigned BranchOpc, |
329 | int64_t BrOffset) const override; |
330 | |
331 | MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; |
332 | |
333 | /// Return whether the block terminate with divergent branch. |
334 | /// Note this only work before lowering the pseudo control flow instructions. |
335 | bool hasDivergentBranch(const MachineBasicBlock *MBB) const; |
336 | |
337 | void insertIndirectBranch(MachineBasicBlock &MBB, |
338 | MachineBasicBlock &NewDestBB, |
339 | MachineBasicBlock &RestoreBB, const DebugLoc &DL, |
340 | int64_t BrOffset, RegScavenger *RS) const override; |
341 | |
342 | bool analyzeBranchImpl(MachineBasicBlock &MBB, |
343 | MachineBasicBlock::iterator I, |
344 | MachineBasicBlock *&TBB, |
345 | MachineBasicBlock *&FBB, |
346 | SmallVectorImpl<MachineOperand> &Cond, |
347 | bool AllowModify) const; |
348 | |
349 | bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, |
350 | MachineBasicBlock *&FBB, |
351 | SmallVectorImpl<MachineOperand> &Cond, |
352 | bool AllowModify = false) const override; |
353 | |
354 | unsigned removeBranch(MachineBasicBlock &MBB, |
355 | int *BytesRemoved = nullptr) const override; |
356 | |
357 | unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, |
358 | MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, |
359 | const DebugLoc &DL, |
360 | int *BytesAdded = nullptr) const override; |
361 | |
362 | bool reverseBranchCondition( |
363 | SmallVectorImpl<MachineOperand> &Cond) const override; |
364 | |
365 | bool canInsertSelect(const MachineBasicBlock &MBB, |
366 | ArrayRef<MachineOperand> Cond, Register DstReg, |
367 | Register TrueReg, Register FalseReg, int &CondCycles, |
368 | int &TrueCycles, int &FalseCycles) const override; |
369 | |
370 | void insertSelect(MachineBasicBlock &MBB, |
371 | MachineBasicBlock::iterator I, const DebugLoc &DL, |
372 | Register DstReg, ArrayRef<MachineOperand> Cond, |
373 | Register TrueReg, Register FalseReg) const override; |
374 | |
375 | void insertVectorSelect(MachineBasicBlock &MBB, |
376 | MachineBasicBlock::iterator I, const DebugLoc &DL, |
377 | Register DstReg, ArrayRef<MachineOperand> Cond, |
378 | Register TrueReg, Register FalseReg) const; |
379 | |
380 | bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, |
381 | Register &SrcReg2, int64_t &CmpMask, |
382 | int64_t &CmpValue) const override; |
383 | |
384 | bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, |
385 | Register SrcReg2, int64_t CmpMask, int64_t CmpValue, |
386 | const MachineRegisterInfo *MRI) const override; |
387 | |
388 | bool |
389 | areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, |
390 | const MachineInstr &MIb) const override; |
391 | |
392 | static bool isFoldableCopy(const MachineInstr &MI); |
393 | |
394 | void removeModOperands(MachineInstr &MI) const; |
395 | |
396 | bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, |
397 | MachineRegisterInfo *MRI) const final; |
398 | |
399 | unsigned getMachineCSELookAheadLimit() const override { return 500; } |
400 | |
401 | MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, |
402 | LiveIntervals *LIS) const override; |
403 | |
404 | bool isSchedulingBoundary(const MachineInstr &MI, |
405 | const MachineBasicBlock *MBB, |
406 | const MachineFunction &MF) const override; |
407 | |
408 | static bool isSALU(const MachineInstr &MI) { |
409 | return MI.getDesc().TSFlags & SIInstrFlags::SALU; |
410 | } |
411 | |
412 | bool isSALU(uint16_t Opcode) const { |
413 | return get(Opcode).TSFlags & SIInstrFlags::SALU; |
414 | } |
415 | |
416 | static bool isVALU(const MachineInstr &MI) { |
417 | return MI.getDesc().TSFlags & SIInstrFlags::VALU; |
418 | } |
419 | |
420 | bool isVALU(uint16_t Opcode) const { |
421 | return get(Opcode).TSFlags & SIInstrFlags::VALU; |
422 | } |
423 | |
424 | static bool isImage(const MachineInstr &MI) { |
425 | return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI); |
426 | } |
427 | |
428 | bool isImage(uint16_t Opcode) const { |
429 | return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode); |
430 | } |
431 | |
432 | static bool isVMEM(const MachineInstr &MI) { |
433 | return isMUBUF(MI) || isMTBUF(MI) || isImage(MI); |
434 | } |
435 | |
436 | bool isVMEM(uint16_t Opcode) const { |
437 | return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode); |
438 | } |
439 | |
440 | static bool isSOP1(const MachineInstr &MI) { |
441 | return MI.getDesc().TSFlags & SIInstrFlags::SOP1; |
442 | } |
443 | |
444 | bool isSOP1(uint16_t Opcode) const { |
445 | return get(Opcode).TSFlags & SIInstrFlags::SOP1; |
446 | } |
447 | |
448 | static bool isSOP2(const MachineInstr &MI) { |
449 | return MI.getDesc().TSFlags & SIInstrFlags::SOP2; |
450 | } |
451 | |
452 | bool isSOP2(uint16_t Opcode) const { |
453 | return get(Opcode).TSFlags & SIInstrFlags::SOP2; |
454 | } |
455 | |
456 | static bool isSOPC(const MachineInstr &MI) { |
457 | return MI.getDesc().TSFlags & SIInstrFlags::SOPC; |
458 | } |
459 | |
460 | bool isSOPC(uint16_t Opcode) const { |
461 | return get(Opcode).TSFlags & SIInstrFlags::SOPC; |
462 | } |
463 | |
464 | static bool isSOPK(const MachineInstr &MI) { |
465 | return MI.getDesc().TSFlags & SIInstrFlags::SOPK; |
466 | } |
467 | |
468 | bool isSOPK(uint16_t Opcode) const { |
469 | return get(Opcode).TSFlags & SIInstrFlags::SOPK; |
470 | } |
471 | |
472 | static bool isSOPP(const MachineInstr &MI) { |
473 | return MI.getDesc().TSFlags & SIInstrFlags::SOPP; |
474 | } |
475 | |
476 | bool isSOPP(uint16_t Opcode) const { |
477 | return get(Opcode).TSFlags & SIInstrFlags::SOPP; |
478 | } |
479 | |
480 | static bool isPacked(const MachineInstr &MI) { |
481 | return MI.getDesc().TSFlags & SIInstrFlags::IsPacked; |
482 | } |
483 | |
484 | bool isPacked(uint16_t Opcode) const { |
485 | return get(Opcode).TSFlags & SIInstrFlags::IsPacked; |
486 | } |
487 | |
488 | static bool isVOP1(const MachineInstr &MI) { |
489 | return MI.getDesc().TSFlags & SIInstrFlags::VOP1; |
490 | } |
491 | |
492 | bool isVOP1(uint16_t Opcode) const { |
493 | return get(Opcode).TSFlags & SIInstrFlags::VOP1; |
494 | } |
495 | |
496 | static bool isVOP2(const MachineInstr &MI) { |
497 | return MI.getDesc().TSFlags & SIInstrFlags::VOP2; |
498 | } |
499 | |
500 | bool isVOP2(uint16_t Opcode) const { |
501 | return get(Opcode).TSFlags & SIInstrFlags::VOP2; |
502 | } |
503 | |
504 | static bool isVOP3(const MachineInstr &MI) { |
505 | return MI.getDesc().TSFlags & SIInstrFlags::VOP3; |
506 | } |
507 | |
508 | bool isVOP3(uint16_t Opcode) const { |
509 | return get(Opcode).TSFlags & SIInstrFlags::VOP3; |
510 | } |
511 | |
512 | static bool isSDWA(const MachineInstr &MI) { |
513 | return MI.getDesc().TSFlags & SIInstrFlags::SDWA; |
514 | } |
515 | |
516 | bool isSDWA(uint16_t Opcode) const { |
517 | return get(Opcode).TSFlags & SIInstrFlags::SDWA; |
518 | } |
519 | |
520 | static bool isVOPC(const MachineInstr &MI) { |
521 | return MI.getDesc().TSFlags & SIInstrFlags::VOPC; |
522 | } |
523 | |
524 | bool isVOPC(uint16_t Opcode) const { |
525 | return get(Opcode).TSFlags & SIInstrFlags::VOPC; |
526 | } |
527 | |
528 | static bool isMUBUF(const MachineInstr &MI) { |
529 | return MI.getDesc().TSFlags & SIInstrFlags::MUBUF; |
530 | } |
531 | |
532 | bool isMUBUF(uint16_t Opcode) const { |
533 | return get(Opcode).TSFlags & SIInstrFlags::MUBUF; |
534 | } |
535 | |
536 | static bool isMTBUF(const MachineInstr &MI) { |
537 | return MI.getDesc().TSFlags & SIInstrFlags::MTBUF; |
538 | } |
539 | |
540 | bool isMTBUF(uint16_t Opcode) const { |
541 | return get(Opcode).TSFlags & SIInstrFlags::MTBUF; |
542 | } |
543 | |
544 | static bool isSMRD(const MachineInstr &MI) { |
545 | return MI.getDesc().TSFlags & SIInstrFlags::SMRD; |
546 | } |
547 | |
548 | bool isSMRD(uint16_t Opcode) const { |
549 | return get(Opcode).TSFlags & SIInstrFlags::SMRD; |
550 | } |
551 | |
552 | bool isBufferSMRD(const MachineInstr &MI) const; |
553 | |
554 | static bool isDS(const MachineInstr &MI) { |
555 | return MI.getDesc().TSFlags & SIInstrFlags::DS; |
556 | } |
557 | |
558 | bool isDS(uint16_t Opcode) const { |
559 | return get(Opcode).TSFlags & SIInstrFlags::DS; |
560 | } |
561 | |
562 | static bool isLDSDMA(const MachineInstr &MI) { |
563 | return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI)); |
564 | } |
565 | |
566 | bool isLDSDMA(uint16_t Opcode) { |
567 | return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode)); |
568 | } |
569 | |
570 | static bool isGWS(const MachineInstr &MI) { |
571 | return MI.getDesc().TSFlags & SIInstrFlags::GWS; |
572 | } |
573 | |
574 | bool isGWS(uint16_t Opcode) const { |
575 | return get(Opcode).TSFlags & SIInstrFlags::GWS; |
576 | } |
577 | |
578 | bool isAlwaysGDS(uint16_t Opcode) const; |
579 | |
580 | static bool isMIMG(const MachineInstr &MI) { |
581 | return MI.getDesc().TSFlags & SIInstrFlags::MIMG; |
582 | } |
583 | |
584 | bool isMIMG(uint16_t Opcode) const { |
585 | return get(Opcode).TSFlags & SIInstrFlags::MIMG; |
586 | } |
587 | |
588 | static bool isVIMAGE(const MachineInstr &MI) { |
589 | return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE; |
590 | } |
591 | |
592 | bool isVIMAGE(uint16_t Opcode) const { |
593 | return get(Opcode).TSFlags & SIInstrFlags::VIMAGE; |
594 | } |
595 | |
596 | static bool isVSAMPLE(const MachineInstr &MI) { |
597 | return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE; |
598 | } |
599 | |
600 | bool isVSAMPLE(uint16_t Opcode) const { |
601 | return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE; |
602 | } |
603 | |
604 | static bool isGather4(const MachineInstr &MI) { |
605 | return MI.getDesc().TSFlags & SIInstrFlags::Gather4; |
606 | } |
607 | |
608 | bool isGather4(uint16_t Opcode) const { |
609 | return get(Opcode).TSFlags & SIInstrFlags::Gather4; |
610 | } |
611 | |
612 | static bool isFLAT(const MachineInstr &MI) { |
613 | return MI.getDesc().TSFlags & SIInstrFlags::FLAT; |
614 | } |
615 | |
616 | // Is a FLAT encoded instruction which accesses a specific segment, |
617 | // i.e. global_* or scratch_*. |
618 | static bool isSegmentSpecificFLAT(const MachineInstr &MI) { |
619 | auto Flags = MI.getDesc().TSFlags; |
620 | return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); |
621 | } |
622 | |
623 | bool isSegmentSpecificFLAT(uint16_t Opcode) const { |
624 | auto Flags = get(Opcode).TSFlags; |
625 | return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch); |
626 | } |
627 | |
628 | static bool isFLATGlobal(const MachineInstr &MI) { |
629 | return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal; |
630 | } |
631 | |
632 | bool isFLATGlobal(uint16_t Opcode) const { |
633 | return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal; |
634 | } |
635 | |
636 | static bool isFLATScratch(const MachineInstr &MI) { |
637 | return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch; |
638 | } |
639 | |
640 | bool isFLATScratch(uint16_t Opcode) const { |
641 | return get(Opcode).TSFlags & SIInstrFlags::FlatScratch; |
642 | } |
643 | |
644 | // Any FLAT encoded instruction, including global_* and scratch_*. |
645 | bool isFLAT(uint16_t Opcode) const { |
646 | return get(Opcode).TSFlags & SIInstrFlags::FLAT; |
647 | } |
648 | |
649 | static bool isEXP(const MachineInstr &MI) { |
650 | return MI.getDesc().TSFlags & SIInstrFlags::EXP; |
651 | } |
652 | |
653 | static bool isDualSourceBlendEXP(const MachineInstr &MI) { |
654 | if (!isEXP(MI)) |
655 | return false; |
656 | unsigned Target = MI.getOperand(i: 0).getImm(); |
657 | return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 || |
658 | Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1; |
659 | } |
660 | |
661 | bool isEXP(uint16_t Opcode) const { |
662 | return get(Opcode).TSFlags & SIInstrFlags::EXP; |
663 | } |
664 | |
665 | static bool isAtomicNoRet(const MachineInstr &MI) { |
666 | return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet; |
667 | } |
668 | |
669 | bool isAtomicNoRet(uint16_t Opcode) const { |
670 | return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet; |
671 | } |
672 | |
673 | static bool isAtomicRet(const MachineInstr &MI) { |
674 | return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet; |
675 | } |
676 | |
677 | bool isAtomicRet(uint16_t Opcode) const { |
678 | return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet; |
679 | } |
680 | |
681 | static bool isAtomic(const MachineInstr &MI) { |
682 | return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet | |
683 | SIInstrFlags::IsAtomicNoRet); |
684 | } |
685 | |
686 | bool isAtomic(uint16_t Opcode) const { |
687 | return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet | |
688 | SIInstrFlags::IsAtomicNoRet); |
689 | } |
690 | |
691 | static bool mayWriteLDSThroughDMA(const MachineInstr &MI) { |
692 | return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD; |
693 | } |
694 | |
695 | static bool isWQM(const MachineInstr &MI) { |
696 | return MI.getDesc().TSFlags & SIInstrFlags::WQM; |
697 | } |
698 | |
699 | bool isWQM(uint16_t Opcode) const { |
700 | return get(Opcode).TSFlags & SIInstrFlags::WQM; |
701 | } |
702 | |
703 | static bool isDisableWQM(const MachineInstr &MI) { |
704 | return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM; |
705 | } |
706 | |
707 | bool isDisableWQM(uint16_t Opcode) const { |
708 | return get(Opcode).TSFlags & SIInstrFlags::DisableWQM; |
709 | } |
710 | |
711 | // SI_SPILL_S32_TO_VGPR and SI_RESTORE_S32_FROM_VGPR form a special case of |
712 | // SGPRs spilling to VGPRs which are SGPR spills but from VALU instructions |
713 | // therefore we need an explicit check for them since just checking if the |
714 | // Spill bit is set and what instruction type it came from misclassifies |
715 | // them. |
716 | static bool isVGPRSpill(const MachineInstr &MI) { |
717 | return MI.getOpcode() != AMDGPU::SI_SPILL_S32_TO_VGPR && |
718 | MI.getOpcode() != AMDGPU::SI_RESTORE_S32_FROM_VGPR && |
719 | (isSpill(MI) && isVALU(MI)); |
720 | } |
721 | |
722 | bool isVGPRSpill(uint16_t Opcode) const { |
723 | return Opcode != AMDGPU::SI_SPILL_S32_TO_VGPR && |
724 | Opcode != AMDGPU::SI_RESTORE_S32_FROM_VGPR && |
725 | (isSpill(Opcode) && isVALU(Opcode)); |
726 | } |
727 | |
728 | static bool isSGPRSpill(const MachineInstr &MI) { |
729 | return MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR || |
730 | MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR || |
731 | (isSpill(MI) && isSALU(MI)); |
732 | } |
733 | |
734 | bool isSGPRSpill(uint16_t Opcode) const { |
735 | return Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR || |
736 | Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR || |
737 | (isSpill(Opcode) && isSALU(Opcode)); |
738 | } |
739 | |
740 | bool isSpill(uint16_t Opcode) const { |
741 | return get(Opcode).TSFlags & SIInstrFlags::Spill; |
742 | } |
743 | |
744 | static bool isSpill(const MachineInstr &MI) { |
745 | return MI.getDesc().TSFlags & SIInstrFlags::Spill; |
746 | } |
747 | |
748 | static bool isWWMRegSpillOpcode(uint16_t Opcode) { |
749 | return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE || |
750 | Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE || |
751 | Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE || |
752 | Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE; |
753 | } |
754 | |
755 | static bool isChainCallOpcode(uint64_t Opcode) { |
756 | return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 || |
757 | Opcode == AMDGPU::SI_CS_CHAIN_TC_W64; |
758 | } |
759 | |
760 | static bool isDPP(const MachineInstr &MI) { |
761 | return MI.getDesc().TSFlags & SIInstrFlags::DPP; |
762 | } |
763 | |
764 | bool isDPP(uint16_t Opcode) const { |
765 | return get(Opcode).TSFlags & SIInstrFlags::DPP; |
766 | } |
767 | |
768 | static bool isTRANS(const MachineInstr &MI) { |
769 | return MI.getDesc().TSFlags & SIInstrFlags::TRANS; |
770 | } |
771 | |
772 | bool isTRANS(uint16_t Opcode) const { |
773 | return get(Opcode).TSFlags & SIInstrFlags::TRANS; |
774 | } |
775 | |
776 | static bool isVOP3P(const MachineInstr &MI) { |
777 | return MI.getDesc().TSFlags & SIInstrFlags::VOP3P; |
778 | } |
779 | |
780 | bool isVOP3P(uint16_t Opcode) const { |
781 | return get(Opcode).TSFlags & SIInstrFlags::VOP3P; |
782 | } |
783 | |
784 | static bool isVINTRP(const MachineInstr &MI) { |
785 | return MI.getDesc().TSFlags & SIInstrFlags::VINTRP; |
786 | } |
787 | |
788 | bool isVINTRP(uint16_t Opcode) const { |
789 | return get(Opcode).TSFlags & SIInstrFlags::VINTRP; |
790 | } |
791 | |
792 | static bool isMAI(const MachineInstr &MI) { |
793 | return MI.getDesc().TSFlags & SIInstrFlags::IsMAI; |
794 | } |
795 | |
796 | bool isMAI(uint16_t Opcode) const { |
797 | return get(Opcode).TSFlags & SIInstrFlags::IsMAI; |
798 | } |
799 | |
800 | static bool isMFMA(const MachineInstr &MI) { |
801 | return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 && |
802 | MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64; |
803 | } |
804 | |
805 | static bool isDOT(const MachineInstr &MI) { |
806 | return MI.getDesc().TSFlags & SIInstrFlags::IsDOT; |
807 | } |
808 | |
809 | static bool isWMMA(const MachineInstr &MI) { |
810 | return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA; |
811 | } |
812 | |
813 | bool isWMMA(uint16_t Opcode) const { |
814 | return get(Opcode).TSFlags & SIInstrFlags::IsWMMA; |
815 | } |
816 | |
817 | static bool isMFMAorWMMA(const MachineInstr &MI) { |
818 | return isMFMA(MI) || isWMMA(MI) || isSWMMAC(MI); |
819 | } |
820 | |
821 | static bool isSWMMAC(const MachineInstr &MI) { |
822 | return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC; |
823 | } |
824 | |
825 | bool isSWMMAC(uint16_t Opcode) const { |
826 | return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC; |
827 | } |
828 | |
829 | bool isDOT(uint16_t Opcode) const { |
830 | return get(Opcode).TSFlags & SIInstrFlags::IsDOT; |
831 | } |
832 | |
833 | static bool isLDSDIR(const MachineInstr &MI) { |
834 | return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR; |
835 | } |
836 | |
837 | bool isLDSDIR(uint16_t Opcode) const { |
838 | return get(Opcode).TSFlags & SIInstrFlags::LDSDIR; |
839 | } |
840 | |
841 | static bool isVINTERP(const MachineInstr &MI) { |
842 | return MI.getDesc().TSFlags & SIInstrFlags::VINTERP; |
843 | } |
844 | |
845 | bool isVINTERP(uint16_t Opcode) const { |
846 | return get(Opcode).TSFlags & SIInstrFlags::VINTERP; |
847 | } |
848 | |
849 | static bool isScalarUnit(const MachineInstr &MI) { |
850 | return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD); |
851 | } |
852 | |
853 | static bool usesVM_CNT(const MachineInstr &MI) { |
854 | return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; |
855 | } |
856 | |
857 | static bool usesLGKM_CNT(const MachineInstr &MI) { |
858 | return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT; |
859 | } |
860 | |
861 | // Most sopk treat the immediate as a signed 16-bit, however some |
862 | // use it as unsigned. |
863 | static bool sopkIsZext(unsigned Opcode) { |
864 | return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 || |
865 | Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 || |
866 | Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 || |
867 | Opcode == AMDGPU::S_GETREG_B32; |
868 | } |
869 | |
870 | /// \returns true if this is an s_store_dword* instruction. This is more |
871 | /// specific than isSMEM && mayStore. |
872 | static bool isScalarStore(const MachineInstr &MI) { |
873 | return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; |
874 | } |
875 | |
876 | bool isScalarStore(uint16_t Opcode) const { |
877 | return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE; |
878 | } |
879 | |
880 | static bool isFixedSize(const MachineInstr &MI) { |
881 | return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE; |
882 | } |
883 | |
884 | bool isFixedSize(uint16_t Opcode) const { |
885 | return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE; |
886 | } |
887 | |
888 | static bool hasFPClamp(const MachineInstr &MI) { |
889 | return MI.getDesc().TSFlags & SIInstrFlags::FPClamp; |
890 | } |
891 | |
892 | bool hasFPClamp(uint16_t Opcode) const { |
893 | return get(Opcode).TSFlags & SIInstrFlags::FPClamp; |
894 | } |
895 | |
896 | static bool hasIntClamp(const MachineInstr &MI) { |
897 | return MI.getDesc().TSFlags & SIInstrFlags::IntClamp; |
898 | } |
899 | |
900 | uint64_t getClampMask(const MachineInstr &MI) const { |
901 | const uint64_t ClampFlags = SIInstrFlags::FPClamp | |
902 | SIInstrFlags::IntClamp | |
903 | SIInstrFlags::ClampLo | |
904 | SIInstrFlags::ClampHi; |
905 | return MI.getDesc().TSFlags & ClampFlags; |
906 | } |
907 | |
908 | static bool usesFPDPRounding(const MachineInstr &MI) { |
909 | return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding; |
910 | } |
911 | |
912 | bool usesFPDPRounding(uint16_t Opcode) const { |
913 | return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding; |
914 | } |
915 | |
916 | static bool isFPAtomic(const MachineInstr &MI) { |
917 | return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic; |
918 | } |
919 | |
920 | bool isFPAtomic(uint16_t Opcode) const { |
921 | return get(Opcode).TSFlags & SIInstrFlags::FPAtomic; |
922 | } |
923 | |
924 | static bool isNeverUniform(const MachineInstr &MI) { |
925 | return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform; |
926 | } |
927 | |
928 | // Check to see if opcode is for a barrier start. Pre gfx12 this is just the |
929 | // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want |
930 | // to check for the barrier start (S_BARRIER_SIGNAL*) |
931 | bool isBarrierStart(unsigned Opcode) const { |
932 | return Opcode == AMDGPU::S_BARRIER || |
933 | Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 || |
934 | Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 || |
935 | Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM || |
936 | Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM; |
937 | } |
938 | |
939 | bool isBarrier(unsigned Opcode) const { |
940 | return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT || |
941 | Opcode == AMDGPU::S_BARRIER_INIT_M0 || |
942 | Opcode == AMDGPU::S_BARRIER_INIT_IMM || |
943 | Opcode == AMDGPU::S_BARRIER_JOIN_IMM || |
944 | Opcode == AMDGPU::S_BARRIER_LEAVE || |
945 | Opcode == AMDGPU::DS_GWS_INIT || |
946 | Opcode == AMDGPU::DS_GWS_BARRIER; |
947 | } |
948 | |
949 | static bool isF16PseudoScalarTrans(unsigned Opcode) { |
950 | return Opcode == AMDGPU::V_S_EXP_F16_e64 || |
951 | Opcode == AMDGPU::V_S_LOG_F16_e64 || |
952 | Opcode == AMDGPU::V_S_RCP_F16_e64 || |
953 | Opcode == AMDGPU::V_S_RSQ_F16_e64 || |
954 | Opcode == AMDGPU::V_S_SQRT_F16_e64; |
955 | } |
956 | |
957 | static bool doesNotReadTiedSource(const MachineInstr &MI) { |
958 | return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead; |
959 | } |
960 | |
961 | bool doesNotReadTiedSource(uint16_t Opcode) const { |
962 | return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead; |
963 | } |
964 | |
965 | static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) { |
966 | switch (Opcode) { |
967 | case AMDGPU::S_WAITCNT_soft: |
968 | return AMDGPU::S_WAITCNT; |
969 | case AMDGPU::S_WAITCNT_VSCNT_soft: |
970 | return AMDGPU::S_WAITCNT_VSCNT; |
971 | case AMDGPU::S_WAIT_LOADCNT_soft: |
972 | return AMDGPU::S_WAIT_LOADCNT; |
973 | case AMDGPU::S_WAIT_STORECNT_soft: |
974 | return AMDGPU::S_WAIT_STORECNT; |
975 | case AMDGPU::S_WAIT_SAMPLECNT_soft: |
976 | return AMDGPU::S_WAIT_SAMPLECNT; |
977 | case AMDGPU::S_WAIT_BVHCNT_soft: |
978 | return AMDGPU::S_WAIT_BVHCNT; |
979 | case AMDGPU::S_WAIT_DSCNT_soft: |
980 | return AMDGPU::S_WAIT_DSCNT; |
981 | case AMDGPU::S_WAIT_KMCNT_soft: |
982 | return AMDGPU::S_WAIT_KMCNT; |
983 | default: |
984 | return Opcode; |
985 | } |
986 | } |
987 | |
988 | bool isWaitcnt(unsigned Opcode) const { |
989 | switch (getNonSoftWaitcntOpcode(Opcode)) { |
990 | case AMDGPU::S_WAITCNT: |
991 | case AMDGPU::S_WAITCNT_VSCNT: |
992 | case AMDGPU::S_WAITCNT_VMCNT: |
993 | case AMDGPU::S_WAITCNT_EXPCNT: |
994 | case AMDGPU::S_WAITCNT_LGKMCNT: |
995 | case AMDGPU::S_WAIT_LOADCNT: |
996 | case AMDGPU::S_WAIT_LOADCNT_DSCNT: |
997 | case AMDGPU::S_WAIT_STORECNT: |
998 | case AMDGPU::S_WAIT_STORECNT_DSCNT: |
999 | case AMDGPU::S_WAIT_SAMPLECNT: |
1000 | case AMDGPU::S_WAIT_BVHCNT: |
1001 | case AMDGPU::S_WAIT_EXPCNT: |
1002 | case AMDGPU::S_WAIT_DSCNT: |
1003 | case AMDGPU::S_WAIT_KMCNT: |
1004 | case AMDGPU::S_WAIT_IDLE: |
1005 | return true; |
1006 | default: |
1007 | return false; |
1008 | } |
1009 | } |
1010 | |
1011 | bool isVGPRCopy(const MachineInstr &MI) const { |
1012 | assert(isCopyInstr(MI)); |
1013 | Register Dest = MI.getOperand(i: 0).getReg(); |
1014 | const MachineFunction &MF = *MI.getParent()->getParent(); |
1015 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
1016 | return !RI.isSGPRReg(MRI, Reg: Dest); |
1017 | } |
1018 | |
1019 | bool hasVGPRUses(const MachineInstr &MI) const { |
1020 | const MachineFunction &MF = *MI.getParent()->getParent(); |
1021 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
1022 | return llvm::any_of(Range: MI.explicit_uses(), |
1023 | P: [&MRI, this](const MachineOperand &MO) { |
1024 | return MO.isReg() && RI.isVGPR(MRI, Reg: MO.getReg());}); |
1025 | } |
1026 | |
1027 | /// Return true if the instruction modifies the mode register.q |
1028 | static bool modifiesModeRegister(const MachineInstr &MI); |
1029 | |
1030 | /// This function is used to determine if an instruction can be safely |
1031 | /// executed under EXEC = 0 without hardware error, indeterminate results, |
1032 | /// and/or visible effects on future vector execution or outside the shader. |
1033 | /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is |
1034 | /// used in removing branches over short EXEC = 0 sequences. |
1035 | /// As such it embeds certain assumptions which may not apply to every case |
1036 | /// of EXEC = 0 execution. |
1037 | bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; |
1038 | |
1039 | /// Returns true if the instruction could potentially depend on the value of |
1040 | /// exec. If false, exec dependencies may safely be ignored. |
1041 | bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; |
1042 | |
1043 | bool isInlineConstant(const APInt &Imm) const; |
1044 | |
1045 | bool isInlineConstant(const APFloat &Imm) const; |
1046 | |
1047 | // Returns true if this non-register operand definitely does not need to be |
1048 | // encoded as a 32-bit literal. Note that this function handles all kinds of |
1049 | // operands, not just immediates. |
1050 | // |
1051 | // Some operands like FrameIndexes could resolve to an inline immediate value |
1052 | // that will not require an additional 4-bytes; this function assumes that it |
1053 | // will. |
1054 | bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; |
1055 | |
1056 | bool isInlineConstant(const MachineOperand &MO, |
1057 | const MCOperandInfo &OpInfo) const { |
1058 | return isInlineConstant(MO, OperandType: OpInfo.OperandType); |
1059 | } |
1060 | |
1061 | /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would |
1062 | /// be an inline immediate. |
1063 | bool isInlineConstant(const MachineInstr &MI, |
1064 | const MachineOperand &UseMO, |
1065 | const MachineOperand &DefMO) const { |
1066 | assert(UseMO.getParent() == &MI); |
1067 | int OpIdx = UseMO.getOperandNo(); |
1068 | if (OpIdx >= MI.getDesc().NumOperands) |
1069 | return false; |
1070 | |
1071 | return isInlineConstant(MO: DefMO, OpInfo: MI.getDesc().operands()[OpIdx]); |
1072 | } |
1073 | |
1074 | /// \p returns true if the operand \p OpIdx in \p MI is a valid inline |
1075 | /// immediate. |
1076 | bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const { |
1077 | const MachineOperand &MO = MI.getOperand(i: OpIdx); |
1078 | return isInlineConstant(MO, OperandType: MI.getDesc().operands()[OpIdx].OperandType); |
1079 | } |
1080 | |
1081 | bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx, |
1082 | const MachineOperand &MO) const { |
1083 | if (OpIdx >= MI.getDesc().NumOperands) |
1084 | return false; |
1085 | |
1086 | if (isCopyInstr(MI)) { |
1087 | unsigned Size = getOpSize(MI, OpNo: OpIdx); |
1088 | assert(Size == 8 || Size == 4); |
1089 | |
1090 | uint8_t OpType = (Size == 8) ? |
1091 | AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32; |
1092 | return isInlineConstant(MO, OperandType: OpType); |
1093 | } |
1094 | |
1095 | return isInlineConstant(MO, OperandType: MI.getDesc().operands()[OpIdx].OperandType); |
1096 | } |
1097 | |
1098 | bool isInlineConstant(const MachineOperand &MO) const { |
1099 | return isInlineConstant(MI: *MO.getParent(), OpIdx: MO.getOperandNo()); |
1100 | } |
1101 | |
1102 | bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, |
1103 | const MachineOperand &MO) const; |
1104 | |
1105 | /// Return true if this 64-bit VALU instruction has a 32-bit encoding. |
1106 | /// This function will return false if you pass it a 32-bit instruction. |
1107 | bool hasVALU32BitEncoding(unsigned Opcode) const; |
1108 | |
1109 | /// Returns true if this operand uses the constant bus. |
1110 | bool usesConstantBus(const MachineRegisterInfo &MRI, |
1111 | const MachineOperand &MO, |
1112 | const MCOperandInfo &OpInfo) const; |
1113 | |
1114 | /// Return true if this instruction has any modifiers. |
1115 | /// e.g. src[012]_mod, omod, clamp. |
1116 | bool hasModifiers(unsigned Opcode) const; |
1117 | |
1118 | bool (const MachineInstr &MI, |
1119 | unsigned OpName) const; |
1120 | bool (const MachineInstr &MI) const; |
1121 | |
1122 | bool canShrink(const MachineInstr &MI, |
1123 | const MachineRegisterInfo &MRI) const; |
1124 | |
1125 | MachineInstr *buildShrunkInst(MachineInstr &MI, |
1126 | unsigned NewOpcode) const; |
1127 | |
1128 | bool verifyInstruction(const MachineInstr &MI, |
1129 | StringRef &ErrInfo) const override; |
1130 | |
1131 | unsigned getVALUOp(const MachineInstr &MI) const; |
1132 | |
1133 | void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, |
1134 | MachineBasicBlock::iterator MBBI, |
1135 | const DebugLoc &DL, Register Reg, bool IsSCCLive, |
1136 | SlotIndexes *Indexes = nullptr) const; |
1137 | |
1138 | void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, |
1139 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, |
1140 | Register Reg, SlotIndexes *Indexes = nullptr) const; |
1141 | |
1142 | /// Return the correct register class for \p OpNo. For target-specific |
1143 | /// instructions, this will return the register class that has been defined |
1144 | /// in tablegen. For generic instructions, like REG_SEQUENCE it will return |
1145 | /// the register class of its machine operand. |
1146 | /// to infer the correct register class base on the other operands. |
1147 | const TargetRegisterClass *getOpRegClass(const MachineInstr &MI, |
1148 | unsigned OpNo) const; |
1149 | |
1150 | /// Return the size in bytes of the operand OpNo on the given |
1151 | // instruction opcode. |
1152 | unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const { |
1153 | const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo]; |
1154 | |
1155 | if (OpInfo.RegClass == -1) { |
1156 | // If this is an immediate operand, this must be a 32-bit literal. |
1157 | assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE); |
1158 | return 4; |
1159 | } |
1160 | |
1161 | return RI.getRegSizeInBits(RC: *RI.getRegClass(RCID: OpInfo.RegClass)) / 8; |
1162 | } |
1163 | |
1164 | /// This form should usually be preferred since it handles operands |
1165 | /// with unknown register classes. |
1166 | unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { |
1167 | const MachineOperand &MO = MI.getOperand(i: OpNo); |
1168 | if (MO.isReg()) { |
1169 | if (unsigned SubReg = MO.getSubReg()) { |
1170 | return RI.getSubRegIdxSize(Idx: SubReg) / 8; |
1171 | } |
1172 | } |
1173 | return RI.getRegSizeInBits(RC: *getOpRegClass(MI, OpNo)) / 8; |
1174 | } |
1175 | |
1176 | /// Legalize the \p OpIndex operand of this instruction by inserting |
1177 | /// a MOV. For example: |
1178 | /// ADD_I32_e32 VGPR0, 15 |
1179 | /// to |
1180 | /// MOV VGPR1, 15 |
1181 | /// ADD_I32_e32 VGPR0, VGPR1 |
1182 | /// |
1183 | /// If the operand being legalized is a register, then a COPY will be used |
1184 | /// instead of MOV. |
1185 | void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const; |
1186 | |
1187 | /// Check if \p MO is a legal operand if it was the \p OpIdx Operand |
1188 | /// for \p MI. |
1189 | bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, |
1190 | const MachineOperand *MO = nullptr) const; |
1191 | |
1192 | /// Check if \p MO would be a valid operand for the given operand |
1193 | /// definition \p OpInfo. Note this does not attempt to validate constant bus |
1194 | /// restrictions (e.g. literal constant usage). |
1195 | bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, |
1196 | const MCOperandInfo &OpInfo, |
1197 | const MachineOperand &MO) const; |
1198 | |
1199 | /// Check if \p MO (a register operand) is a legal register for the |
1200 | /// given operand description. |
1201 | bool isLegalRegOperand(const MachineRegisterInfo &MRI, |
1202 | const MCOperandInfo &OpInfo, |
1203 | const MachineOperand &MO) const; |
1204 | |
1205 | /// Legalize operands in \p MI by either commuting it or inserting a |
1206 | /// copy of src1. |
1207 | void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const; |
1208 | |
1209 | /// Fix operands in \p MI to satisfy constant bus requirements. |
1210 | void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const; |
1211 | |
1212 | /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only |
1213 | /// be used when it is know that the value in SrcReg is same across all |
1214 | /// threads in the wave. |
1215 | /// \returns The SGPR register that \p SrcReg was copied to. |
1216 | Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, |
1217 | MachineRegisterInfo &MRI) const; |
1218 | |
1219 | void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const; |
1220 | void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const; |
1221 | |
1222 | void legalizeGenericOperand(MachineBasicBlock &InsertMBB, |
1223 | MachineBasicBlock::iterator I, |
1224 | const TargetRegisterClass *DstRC, |
1225 | MachineOperand &Op, MachineRegisterInfo &MRI, |
1226 | const DebugLoc &DL) const; |
1227 | |
1228 | /// Legalize all operands in this instruction. This function may create new |
1229 | /// instructions and control-flow around \p MI. If present, \p MDT is |
1230 | /// updated. |
1231 | /// \returns A new basic block that contains \p MI if new blocks were created. |
1232 | MachineBasicBlock * |
1233 | legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const; |
1234 | |
1235 | /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand |
1236 | /// was moved to VGPR. \returns true if succeeded. |
1237 | bool moveFlatAddrToVGPR(MachineInstr &Inst) const; |
1238 | |
1239 | /// Replace the instructions opcode with the equivalent VALU |
1240 | /// opcode. This function will also move the users of MachineInstruntions |
1241 | /// in the \p WorkList to the VALU if necessary. If present, \p MDT is |
1242 | /// updated. |
1243 | void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const; |
1244 | |
1245 | void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, |
1246 | MachineInstr &Inst) const; |
1247 | |
1248 | void insertNoop(MachineBasicBlock &MBB, |
1249 | MachineBasicBlock::iterator MI) const override; |
1250 | |
1251 | void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, |
1252 | unsigned Quantity) const override; |
1253 | |
1254 | void insertReturn(MachineBasicBlock &MBB) const; |
1255 | |
1256 | /// Build instructions that simulate the behavior of a `s_trap 2` instructions |
1257 | /// for hardware (namely, gfx11) that runs in PRIV=1 mode. There, s_trap is |
1258 | /// interpreted as a nop. |
1259 | MachineBasicBlock *insertSimulatedTrap(MachineRegisterInfo &MRI, |
1260 | MachineBasicBlock &MBB, |
1261 | MachineInstr &MI, |
1262 | const DebugLoc &DL) const; |
1263 | |
1264 | /// Return the number of wait states that result from executing this |
1265 | /// instruction. |
1266 | static unsigned getNumWaitStates(const MachineInstr &MI); |
1267 | |
1268 | /// Returns the operand named \p Op. If \p MI does not have an |
1269 | /// operand named \c Op, this function returns nullptr. |
1270 | LLVM_READONLY |
1271 | MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; |
1272 | |
1273 | LLVM_READONLY |
1274 | const MachineOperand *getNamedOperand(const MachineInstr &MI, |
1275 | unsigned OpName) const { |
1276 | return getNamedOperand(MI&: const_cast<MachineInstr &>(MI), OperandName: OpName); |
1277 | } |
1278 | |
1279 | /// Get required immediate operand |
1280 | int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const { |
1281 | int Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), NamedIdx: OpName); |
1282 | return MI.getOperand(i: Idx).getImm(); |
1283 | } |
1284 | |
1285 | uint64_t getDefaultRsrcDataFormat() const; |
1286 | uint64_t getScratchRsrcWords23() const; |
1287 | |
1288 | bool isLowLatencyInstruction(const MachineInstr &MI) const; |
1289 | bool isHighLatencyDef(int Opc) const override; |
1290 | |
1291 | /// Return the descriptor of the target-specific machine instruction |
1292 | /// that corresponds to the specified pseudo or native opcode. |
1293 | const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const { |
1294 | return get(Opcode: pseudoToMCOpcode(Opcode)); |
1295 | } |
1296 | |
1297 | unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; |
1298 | unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; |
1299 | |
1300 | Register isLoadFromStackSlot(const MachineInstr &MI, |
1301 | int &FrameIndex) const override; |
1302 | Register isStoreToStackSlot(const MachineInstr &MI, |
1303 | int &FrameIndex) const override; |
1304 | |
1305 | unsigned getInstBundleSize(const MachineInstr &MI) const; |
1306 | unsigned getInstSizeInBytes(const MachineInstr &MI) const override; |
1307 | |
1308 | bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; |
1309 | |
1310 | bool isNonUniformBranchInstr(MachineInstr &Instr) const; |
1311 | |
1312 | void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, |
1313 | MachineBasicBlock *IfEnd) const; |
1314 | |
1315 | void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, |
1316 | MachineBasicBlock *LoopEnd) const; |
1317 | |
1318 | std::pair<unsigned, unsigned> |
1319 | decomposeMachineOperandsTargetFlags(unsigned TF) const override; |
1320 | |
1321 | ArrayRef<std::pair<int, const char *>> |
1322 | getSerializableTargetIndices() const override; |
1323 | |
1324 | ArrayRef<std::pair<unsigned, const char *>> |
1325 | getSerializableDirectMachineOperandTargetFlags() const override; |
1326 | |
1327 | ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> |
1328 | getSerializableMachineMemOperandTargetFlags() const override; |
1329 | |
1330 | ScheduleHazardRecognizer * |
1331 | CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, |
1332 | const ScheduleDAG *DAG) const override; |
1333 | |
1334 | ScheduleHazardRecognizer * |
1335 | CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; |
1336 | |
1337 | ScheduleHazardRecognizer * |
1338 | CreateTargetMIHazardRecognizer(const InstrItineraryData *II, |
1339 | const ScheduleDAGMI *DAG) const override; |
1340 | |
1341 | unsigned getLiveRangeSplitOpcode(Register Reg, |
1342 | const MachineFunction &MF) const override; |
1343 | |
1344 | bool isBasicBlockPrologue(const MachineInstr &MI, |
1345 | Register Reg = Register()) const override; |
1346 | |
1347 | MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, |
1348 | MachineBasicBlock::iterator InsPt, |
1349 | const DebugLoc &DL, Register Src, |
1350 | Register Dst) const override; |
1351 | |
1352 | MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, |
1353 | MachineBasicBlock::iterator InsPt, |
1354 | const DebugLoc &DL, Register Src, |
1355 | unsigned SrcSubReg, |
1356 | Register Dst) const override; |
1357 | |
1358 | bool isWave32() const; |
1359 | |
1360 | /// Return a partially built integer add instruction without carry. |
1361 | /// Caller must add source operands. |
1362 | /// For pre-GFX9 it will generate unused carry destination operand. |
1363 | /// TODO: After GFX9 it should return a no-carry operation. |
1364 | MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, |
1365 | MachineBasicBlock::iterator I, |
1366 | const DebugLoc &DL, |
1367 | Register DestReg) const; |
1368 | |
1369 | MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, |
1370 | MachineBasicBlock::iterator I, |
1371 | const DebugLoc &DL, |
1372 | Register DestReg, |
1373 | RegScavenger &RS) const; |
1374 | |
1375 | static bool isKillTerminator(unsigned Opcode); |
1376 | const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; |
1377 | |
1378 | bool isLegalMUBUFImmOffset(unsigned Imm) const; |
1379 | |
1380 | static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST); |
1381 | |
1382 | bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, |
1383 | Align Alignment = Align(4)) const; |
1384 | |
1385 | /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT |
1386 | /// encoded instruction. If \p Signed, this is for an instruction that |
1387 | /// interprets the offset as signed. |
1388 | bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, |
1389 | uint64_t FlatVariant) const; |
1390 | |
1391 | /// Split \p COffsetVal into {immediate offset field, remainder offset} |
1392 | /// values. |
1393 | std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal, |
1394 | unsigned AddrSpace, |
1395 | uint64_t FlatVariant) const; |
1396 | |
1397 | /// Returns true if negative offsets are allowed for the given \p FlatVariant. |
1398 | bool allowNegativeFlatOffset(uint64_t FlatVariant) const; |
1399 | |
1400 | /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. |
1401 | /// Return -1 if the target-specific opcode for the pseudo instruction does |
1402 | /// not exist. If Opcode is not a pseudo instruction, this is identity. |
1403 | int pseudoToMCOpcode(int Opcode) const; |
1404 | |
1405 | /// \brief Check if this instruction should only be used by assembler. |
1406 | /// Return true if this opcode should not be used by codegen. |
1407 | bool isAsmOnlyOpcode(int MCOp) const; |
1408 | |
1409 | const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum, |
1410 | const TargetRegisterInfo *TRI, |
1411 | const MachineFunction &MF) |
1412 | const override; |
1413 | |
1414 | void fixImplicitOperands(MachineInstr &MI) const; |
1415 | |
1416 | MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, |
1417 | ArrayRef<unsigned> Ops, |
1418 | MachineBasicBlock::iterator InsertPt, |
1419 | int FrameIndex, |
1420 | LiveIntervals *LIS = nullptr, |
1421 | VirtRegMap *VRM = nullptr) const override; |
1422 | |
1423 | unsigned getInstrLatency(const InstrItineraryData *ItinData, |
1424 | const MachineInstr &MI, |
1425 | unsigned *PredCost = nullptr) const override; |
1426 | |
1427 | InstructionUniformity |
1428 | getInstructionUniformity(const MachineInstr &MI) const override final; |
1429 | |
1430 | InstructionUniformity |
1431 | getGenericInstructionUniformity(const MachineInstr &MI) const; |
1432 | |
1433 | const MIRFormatter *getMIRFormatter() const override { |
1434 | if (!Formatter) |
1435 | Formatter = std::make_unique<AMDGPUMIRFormatter>(); |
1436 | return Formatter.get(); |
1437 | } |
1438 | |
1439 | static unsigned getDSShaderTypeValue(const MachineFunction &MF); |
1440 | |
1441 | const TargetSchedModel &getSchedModel() const { return SchedModel; } |
1442 | |
1443 | // Enforce operand's \p OpName even alignment if required by target. |
1444 | // This is used if an operand is a 32 bit register but needs to be aligned |
1445 | // regardless. |
1446 | void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const; |
1447 | }; |
1448 | |
1449 | /// \brief Returns true if a reg:subreg pair P has a TRC class |
1450 | inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, |
1451 | const TargetRegisterClass &TRC, |
1452 | MachineRegisterInfo &MRI) { |
1453 | auto *RC = MRI.getRegClass(Reg: P.Reg); |
1454 | if (!P.SubReg) |
1455 | return RC == &TRC; |
1456 | auto *TRI = MRI.getTargetRegisterInfo(); |
1457 | return RC == TRI->getMatchingSuperRegClass(A: RC, B: &TRC, Idx: P.SubReg); |
1458 | } |
1459 | |
1460 | /// \brief Create RegSubRegPair from a register MachineOperand |
1461 | inline |
1462 | TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) { |
1463 | assert(O.isReg()); |
1464 | return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg()); |
1465 | } |
1466 | |
1467 | /// \brief Return the SubReg component from REG_SEQUENCE |
1468 | TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, |
1469 | unsigned SubReg); |
1470 | |
1471 | /// \brief Return the defining instruction for a given reg:subreg pair |
1472 | /// skipping copy like instructions and subreg-manipulation pseudos. |
1473 | /// Following another subreg of a reg:subreg isn't supported. |
1474 | MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, |
1475 | MachineRegisterInfo &MRI); |
1476 | |
1477 | /// \brief Return false if EXEC is not changed between the def of \p VReg at \p |
1478 | /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not |
1479 | /// attempt to track between blocks. |
1480 | bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, |
1481 | Register VReg, |
1482 | const MachineInstr &DefMI, |
1483 | const MachineInstr &UseMI); |
1484 | |
1485 | /// \brief Return false if EXEC is not changed between the def of \p VReg at \p |
1486 | /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to |
1487 | /// track between blocks. |
1488 | bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, |
1489 | Register VReg, |
1490 | const MachineInstr &DefMI); |
1491 | |
1492 | namespace AMDGPU { |
1493 | |
1494 | LLVM_READONLY |
1495 | int getVOPe64(uint16_t Opcode); |
1496 | |
1497 | LLVM_READONLY |
1498 | int getVOPe32(uint16_t Opcode); |
1499 | |
1500 | LLVM_READONLY |
1501 | int getSDWAOp(uint16_t Opcode); |
1502 | |
1503 | LLVM_READONLY |
1504 | int getDPPOp32(uint16_t Opcode); |
1505 | |
1506 | LLVM_READONLY |
1507 | int getDPPOp64(uint16_t Opcode); |
1508 | |
1509 | LLVM_READONLY |
1510 | int getBasicFromSDWAOp(uint16_t Opcode); |
1511 | |
1512 | LLVM_READONLY |
1513 | int getCommuteRev(uint16_t Opcode); |
1514 | |
1515 | LLVM_READONLY |
1516 | int getCommuteOrig(uint16_t Opcode); |
1517 | |
1518 | LLVM_READONLY |
1519 | int getAddr64Inst(uint16_t Opcode); |
1520 | |
1521 | /// Check if \p Opcode is an Addr64 opcode. |
1522 | /// |
1523 | /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1. |
1524 | LLVM_READONLY |
1525 | int getIfAddr64Inst(uint16_t Opcode); |
1526 | |
1527 | LLVM_READONLY |
1528 | int getSOPKOp(uint16_t Opcode); |
1529 | |
1530 | /// \returns SADDR form of a FLAT Global instruction given an \p Opcode |
1531 | /// of a VADDR form. |
1532 | LLVM_READONLY |
1533 | int getGlobalSaddrOp(uint16_t Opcode); |
1534 | |
1535 | /// \returns VADDR form of a FLAT Global instruction given an \p Opcode |
1536 | /// of a SADDR form. |
1537 | LLVM_READONLY |
1538 | int getGlobalVaddrOp(uint16_t Opcode); |
1539 | |
1540 | LLVM_READONLY |
1541 | int getVCMPXNoSDstOp(uint16_t Opcode); |
1542 | |
1543 | /// \returns ST form with only immediate offset of a FLAT Scratch instruction |
1544 | /// given an \p Opcode of an SS (SADDR) form. |
1545 | LLVM_READONLY |
1546 | int getFlatScratchInstSTfromSS(uint16_t Opcode); |
1547 | |
1548 | /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode |
1549 | /// of an SVS (SADDR + VADDR) form. |
1550 | LLVM_READONLY |
1551 | int getFlatScratchInstSVfromSVS(uint16_t Opcode); |
1552 | |
1553 | /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode |
1554 | /// of an SV (VADDR) form. |
1555 | LLVM_READONLY |
1556 | int getFlatScratchInstSSfromSV(uint16_t Opcode); |
1557 | |
1558 | /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode |
1559 | /// of an SS (SADDR) form. |
1560 | LLVM_READONLY |
1561 | int getFlatScratchInstSVfromSS(uint16_t Opcode); |
1562 | |
1563 | /// \returns earlyclobber version of a MAC MFMA is exists. |
1564 | LLVM_READONLY |
1565 | int getMFMAEarlyClobberOp(uint16_t Opcode); |
1566 | |
1567 | /// \returns v_cmpx version of a v_cmp instruction. |
1568 | LLVM_READONLY |
1569 | int getVCMPXOpFromVCMP(uint16_t Opcode); |
1570 | |
1571 | const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; |
1572 | const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); |
1573 | const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); |
1574 | const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23); |
1575 | |
1576 | } // end namespace AMDGPU |
1577 | |
1578 | namespace AMDGPU { |
1579 | enum { |
1580 | // For sgpr to vgpr spill instructions |
1581 | SGPR_SPILL = MachineInstr::TAsmComments |
1582 | }; |
1583 | } // namespace AMDGPU |
1584 | |
1585 | namespace SI { |
1586 | namespace KernelInputOffsets { |
1587 | |
1588 | /// Offsets in bytes from the start of the input buffer |
1589 | enum Offsets { |
1590 | NGROUPS_X = 0, |
1591 | NGROUPS_Y = 4, |
1592 | NGROUPS_Z = 8, |
1593 | GLOBAL_SIZE_X = 12, |
1594 | GLOBAL_SIZE_Y = 16, |
1595 | GLOBAL_SIZE_Z = 20, |
1596 | LOCAL_SIZE_X = 24, |
1597 | LOCAL_SIZE_Y = 28, |
1598 | LOCAL_SIZE_Z = 32 |
1599 | }; |
1600 | |
1601 | } // end namespace KernelInputOffsets |
1602 | } // end namespace SI |
1603 | |
1604 | } // end namespace llvm |
1605 | |
1606 | #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H |
1607 | |