1//===- AMDGPURegBankLegalizeHelper ------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZEHELPER_H
10#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZEHELPER_H
11
12#include "AMDGPURegBankLegalizeRules.h"
13#include "llvm/ADT/SmallSet.h"
14#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
15#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
16#include "llvm/CodeGen/MachineRegisterInfo.h"
17
18namespace llvm {
19
20class MachineIRBuilder;
21
22namespace AMDGPU {
23
24// Receives list of RegBankLLTMappingApplyID and applies register banks on all
25// operands. It is user's responsibility to provide RegBankLLTMappingApplyIDs
26// for all register operands, there is no need to specify NonReg for trailing
27// imm operands. This finishes selection of register banks if there is no need
28// to replace instruction. In other case InstApplyMethod will create new
29// instruction(s).
30class RegBankLegalizeHelper {
31 MachineFunction &MF;
32 const GCNSubtarget &ST;
33 MachineIRBuilder &B;
34 MachineRegisterInfo &MRI;
35 const MachineUniformityInfo &MUI;
36 const RegisterBankInfo &RBI;
37 MachineOptimizationRemarkEmitter MORE;
38 const RegBankLegalizeRules &RBLRules;
39 const bool IsWave32;
40 const RegisterBank *SgprRB;
41 const RegisterBank *VgprRB;
42 const RegisterBank *VccRB;
43
44 static constexpr LLT S1 = LLT::scalar(SizeInBits: 1);
45 static constexpr LLT S16 = LLT::scalar(SizeInBits: 16);
46 static constexpr LLT S32 = LLT::scalar(SizeInBits: 32);
47 static constexpr LLT S64 = LLT::scalar(SizeInBits: 64);
48 static constexpr LLT S96 = LLT::scalar(SizeInBits: 96);
49 static constexpr LLT S128 = LLT::scalar(SizeInBits: 128);
50 static constexpr LLT S256 = LLT::scalar(SizeInBits: 256);
51
52 static constexpr LLT V2S16 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
53 static constexpr LLT V4S16 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16);
54 static constexpr LLT V6S16 = LLT::fixed_vector(NumElements: 6, ScalarSizeInBits: 16);
55 static constexpr LLT V8S16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16);
56 static constexpr LLT V16S16 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 16);
57 static constexpr LLT V32S16 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 16);
58
59 static constexpr LLT V2S32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
60 static constexpr LLT V3S32 = LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32);
61 static constexpr LLT V4S32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
62 static constexpr LLT V6S32 = LLT::fixed_vector(NumElements: 6, ScalarSizeInBits: 32);
63 static constexpr LLT V7S32 = LLT::fixed_vector(NumElements: 7, ScalarSizeInBits: 32);
64 static constexpr LLT V8S32 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 32);
65 static constexpr LLT V16S32 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 32);
66
67 static constexpr LLT V2S64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
68 static constexpr LLT V3S64 = LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 64);
69 static constexpr LLT V4S64 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 64);
70 static constexpr LLT V8S64 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 64);
71 static constexpr LLT V16S64 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 64);
72
73 static constexpr LLT P1 = LLT::pointer(AddressSpace: 1, SizeInBits: 64);
74 static constexpr LLT P4 = LLT::pointer(AddressSpace: 4, SizeInBits: 64);
75 static constexpr LLT P6 = LLT::pointer(AddressSpace: 6, SizeInBits: 32);
76
77 MachineRegisterInfo::VRegAttrs SgprRB_S32 = {.RCOrRB: SgprRB, .Ty: S32};
78 MachineRegisterInfo::VRegAttrs SgprRB_S16 = {.RCOrRB: SgprRB, .Ty: S16};
79 MachineRegisterInfo::VRegAttrs VgprRB_S32 = {.RCOrRB: VgprRB, .Ty: S32};
80 MachineRegisterInfo::VRegAttrs VccRB_S1 = {.RCOrRB: VccRB, .Ty: S1};
81
82public:
83 RegBankLegalizeHelper(MachineIRBuilder &B, const MachineUniformityInfo &MUI,
84 const RegisterBankInfo &RBI,
85 const RegBankLegalizeRules &RBLRules);
86
87 bool findRuleAndApplyMapping(MachineInstr &MI);
88
89 // Manual apply helpers.
90 bool applyMappingPHI(MachineInstr &MI);
91 void applyMappingTrivial(MachineInstr &MI);
92
93private:
94 bool executeInWaterfallLoop(MachineIRBuilder &B,
95 iterator_range<MachineBasicBlock::iterator> Range,
96 SmallSet<Register, 4> &SgprOperandRegs);
97
98 LLT getTyFromID(RegBankLLTMappingApplyID ID);
99 LLT getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty);
100
101 const RegisterBank *getRegBankFromID(RegBankLLTMappingApplyID ID);
102
103 bool
104 applyMappingDst(MachineInstr &MI, unsigned &OpIdx,
105 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs);
106
107 bool
108 applyMappingSrc(MachineInstr &MI, unsigned &OpIdx,
109 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs,
110 SmallSet<Register, 4> &SgprWaterfallOperandRegs);
111
112 bool splitLoad(MachineInstr &MI, ArrayRef<LLT> LLTBreakdown,
113 LLT MergeTy = LLT());
114 bool widenLoad(MachineInstr &MI, LLT WideTy, LLT MergeTy = LLT());
115 bool widenMMOToS32(GAnyLoad &MI) const;
116
117 bool lower(MachineInstr &MI, const RegBankLLTMapping &Mapping,
118 SmallSet<Register, 4> &SgprWaterfallOperandRegs);
119
120 bool lowerVccExtToSel(MachineInstr &MI);
121 std::pair<Register, Register> unpackZExt(Register Reg);
122 std::pair<Register, Register> unpackSExt(Register Reg);
123 std::pair<Register, Register> unpackAExt(Register Reg);
124 std::pair<Register, Register> unpackAExtTruncS16(Register Reg);
125 bool lowerUnpackBitShift(MachineInstr &MI);
126 bool lowerV_BFE(MachineInstr &MI);
127 bool lowerS_BFE(MachineInstr &MI);
128 bool lowerUniMAD64(MachineInstr &MI);
129 bool lowerSplitTo32(MachineInstr &MI);
130 bool lowerSplitTo32Mul(MachineInstr &MI);
131 bool lowerSplitTo16(MachineInstr &MI);
132 bool lowerSplitTo32Select(MachineInstr &MI);
133 bool lowerSplitTo32SExtInReg(MachineInstr &MI);
134 bool lowerUnpackMinMax(MachineInstr &MI);
135 bool lowerUnpackAExt(MachineInstr &MI);
136};
137
138} // end namespace AMDGPU
139} // end namespace llvm
140
141#endif
142