1//===- AMDGPURegBankLegalizeHelper ------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZEHELPER_H
10#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZEHELPER_H
11
12#include "AMDGPURegBankLegalizeRules.h"
13#include "llvm/ADT/SmallSet.h"
14#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
15#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
16#include "llvm/CodeGen/MachineRegisterInfo.h"
17
18namespace llvm {
19
20class MachineIRBuilder;
21
22namespace AMDGPU {
23
24/// Holds waterfall loop information: the set of SGPR operand registers that
25/// need waterfalling, and an instruction range [Start, End) to wrap in the
26/// loop.
27struct WaterfallInfo {
28 SmallSet<Register, 4> SgprWaterfallOperandRegs;
29 MachineBasicBlock::iterator Start;
30 MachineBasicBlock::iterator End;
31};
32
33// Receives list of RegBankLLTMappingApplyID and applies register banks on all
34// operands. It is user's responsibility to provide RegBankLLTMappingApplyIDs
35// for all register operands, there is no need to specify NonReg for trailing
36// imm operands. This finishes selection of register banks if there is no need
37// to replace instruction. In other case InstApplyMethod will create new
38// instruction(s).
39class RegBankLegalizeHelper {
40 MachineFunction &MF;
41 const GCNSubtarget &ST;
42 MachineIRBuilder &B;
43 MachineRegisterInfo &MRI;
44 const MachineUniformityInfo &MUI;
45 const RegisterBankInfo &RBI;
46 MachineOptimizationRemarkEmitter MORE;
47 const RegBankLegalizeRules &RBLRules;
48 const bool IsWave32;
49 const RegisterBank *SgprRB;
50 const RegisterBank *VgprRB;
51 const RegisterBank *VccRB;
52
53 static constexpr LLT S1 = LLT::scalar(SizeInBits: 1);
54 static constexpr LLT S16 = LLT::scalar(SizeInBits: 16);
55 static constexpr LLT S32 = LLT::scalar(SizeInBits: 32);
56 static constexpr LLT S64 = LLT::scalar(SizeInBits: 64);
57 static constexpr LLT S96 = LLT::scalar(SizeInBits: 96);
58 static constexpr LLT S128 = LLT::scalar(SizeInBits: 128);
59 static constexpr LLT S256 = LLT::scalar(SizeInBits: 256);
60
61 static constexpr LLT V2S16 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
62 static constexpr LLT V4S16 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16);
63 static constexpr LLT V6S16 = LLT::fixed_vector(NumElements: 6, ScalarSizeInBits: 16);
64 static constexpr LLT V8S16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16);
65 static constexpr LLT V16S16 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 16);
66 static constexpr LLT V32S16 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 16);
67
68 static constexpr LLT V2S32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
69 static constexpr LLT V3S32 = LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32);
70 static constexpr LLT V4S32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
71 static constexpr LLT V6S32 = LLT::fixed_vector(NumElements: 6, ScalarSizeInBits: 32);
72 static constexpr LLT V7S32 = LLT::fixed_vector(NumElements: 7, ScalarSizeInBits: 32);
73 static constexpr LLT V8S32 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 32);
74 static constexpr LLT V16S32 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 32);
75
76 static constexpr LLT V2S64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
77 static constexpr LLT V3S64 = LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 64);
78 static constexpr LLT V4S64 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 64);
79 static constexpr LLT V8S64 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 64);
80 static constexpr LLT V16S64 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 64);
81
82 static constexpr LLT P1 = LLT::pointer(AddressSpace: 1, SizeInBits: 64);
83 static constexpr LLT P4 = LLT::pointer(AddressSpace: 4, SizeInBits: 64);
84 static constexpr LLT P6 = LLT::pointer(AddressSpace: 6, SizeInBits: 32);
85
86 MachineRegisterInfo::VRegAttrs SgprRB_S32 = {.RCOrRB: SgprRB, .Ty: S32};
87 MachineRegisterInfo::VRegAttrs SgprRB_S16 = {.RCOrRB: SgprRB, .Ty: S16};
88 MachineRegisterInfo::VRegAttrs VgprRB_S32 = {.RCOrRB: VgprRB, .Ty: S32};
89 MachineRegisterInfo::VRegAttrs VccRB_S1 = {.RCOrRB: VccRB, .Ty: S1};
90
91public:
92 RegBankLegalizeHelper(MachineIRBuilder &B, const MachineUniformityInfo &MUI,
93 const RegisterBankInfo &RBI,
94 const RegBankLegalizeRules &RBLRules);
95
96 bool findRuleAndApplyMapping(MachineInstr &MI);
97
98 // Manual apply helpers.
99 bool applyMappingPHI(MachineInstr &MI);
100 void applyMappingTrivial(MachineInstr &MI);
101
102private:
103 bool executeInWaterfallLoop(MachineIRBuilder &B, const WaterfallInfo &WFI);
104
105 LLT getTyFromID(RegBankLLTMappingApplyID ID);
106 LLT getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty);
107
108 const RegisterBank *getRegBankFromID(RegBankLLTMappingApplyID ID);
109
110 bool
111 applyMappingDst(MachineInstr &MI, unsigned &OpIdx,
112 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs);
113
114 bool
115 applyMappingSrc(MachineInstr &MI, unsigned &OpIdx,
116 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs,
117 WaterfallInfo &WFI);
118
119 bool splitLoad(MachineInstr &MI, ArrayRef<LLT> LLTBreakdown,
120 LLT MergeTy = LLT());
121 bool widenLoad(MachineInstr &MI, LLT WideTy, LLT MergeTy = LLT());
122 bool widenMMOToS32(GAnyLoad &MI) const;
123
124 bool lower(MachineInstr &MI, const RegBankLLTMapping &Mapping,
125 WaterfallInfo &WFI);
126
127 bool lowerVccExtToSel(MachineInstr &MI);
128 std::pair<Register, Register> unpackZExt(Register Reg);
129 std::pair<Register, Register> unpackSExt(Register Reg);
130 std::pair<Register, Register> unpackAExt(Register Reg);
131 std::pair<Register, Register> unpackAExtTruncS16(Register Reg);
132 bool lowerUnpackBitShift(MachineInstr &MI);
133 bool lowerV_BFE(MachineInstr &MI);
134 bool lowerS_BFE(MachineInstr &MI);
135 bool lowerUniMAD64(MachineInstr &MI);
136 bool lowerSplitTo32(MachineInstr &MI);
137 bool lowerSplitTo32Mul(MachineInstr &MI);
138 bool lowerSplitTo16(MachineInstr &MI);
139 bool lowerSplitTo32Select(MachineInstr &MI);
140 bool lowerSplitTo32SExtInReg(MachineInstr &MI);
141 bool lowerUnpackMinMax(MachineInstr &MI);
142 bool lowerUnpackAExt(MachineInstr &MI);
143 bool applyRegisterBanksINTRIN_IMAGE(MachineInstr &MI);
144};
145
146} // end namespace AMDGPU
147} // end namespace llvm
148
149#endif
150