1 | //===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements the targeting of the Machinelegalizer class for RISC-V. |
10 | /// \todo This should be generated by TableGen. |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "RISCVLegalizerInfo.h" |
14 | #include "MCTargetDesc/RISCVMatInt.h" |
15 | #include "RISCVMachineFunctionInfo.h" |
16 | #include "RISCVSubtarget.h" |
17 | #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h" |
18 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
19 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
20 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
21 | #include "llvm/CodeGen/MachineConstantPool.h" |
22 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
23 | #include "llvm/CodeGen/TargetOpcodes.h" |
24 | #include "llvm/CodeGen/ValueTypes.h" |
25 | #include "llvm/IR/DerivedTypes.h" |
26 | #include "llvm/IR/Type.h" |
27 | |
28 | using namespace llvm; |
29 | using namespace LegalityPredicates; |
30 | using namespace LegalizeMutations; |
31 | |
32 | // Is this type supported by scalar FP arithmetic operations given the current |
33 | // subtarget. |
34 | static LegalityPredicate typeIsScalarFPArith(unsigned TypeIdx, |
35 | const RISCVSubtarget &ST) { |
36 | return [=, &ST](const LegalityQuery &Query) { |
37 | return Query.Types[TypeIdx].isScalar() && |
38 | ((ST.hasStdExtZfh() && Query.Types[TypeIdx].getSizeInBits() == 16) || |
39 | (ST.hasStdExtF() && Query.Types[TypeIdx].getSizeInBits() == 32) || |
40 | (ST.hasStdExtD() && Query.Types[TypeIdx].getSizeInBits() == 64)); |
41 | }; |
42 | } |
43 | |
44 | static LegalityPredicate |
45 | typeIsLegalIntOrFPVec(unsigned TypeIdx, |
46 | std::initializer_list<LLT> IntOrFPVecTys, |
47 | const RISCVSubtarget &ST) { |
48 | LegalityPredicate P = [=, &ST](const LegalityQuery &Query) { |
49 | return ST.hasVInstructions() && |
50 | (Query.Types[TypeIdx].getScalarSizeInBits() != 64 || |
51 | ST.hasVInstructionsI64()) && |
52 | (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 || |
53 | ST.getELen() == 64); |
54 | }; |
55 | |
56 | return all(P0: typeInSet(TypeIdx, TypesInit: IntOrFPVecTys), P1: P); |
57 | } |
58 | |
59 | static LegalityPredicate |
60 | typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys, |
61 | const RISCVSubtarget &ST) { |
62 | LegalityPredicate P = [=, &ST](const LegalityQuery &Query) { |
63 | return ST.hasVInstructions() && |
64 | (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 || |
65 | ST.getELen() == 64); |
66 | }; |
67 | return all(P0: typeInSet(TypeIdx, TypesInit: BoolVecTys), P1: P); |
68 | } |
69 | |
70 | RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) |
71 | : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(SizeInBits: XLen)) { |
72 | const LLT sDoubleXLen = LLT::scalar(SizeInBits: 2 * XLen); |
73 | const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: XLen); |
74 | const LLT s1 = LLT::scalar(SizeInBits: 1); |
75 | const LLT s8 = LLT::scalar(SizeInBits: 8); |
76 | const LLT s16 = LLT::scalar(SizeInBits: 16); |
77 | const LLT s32 = LLT::scalar(SizeInBits: 32); |
78 | const LLT s64 = LLT::scalar(SizeInBits: 64); |
79 | |
80 | const LLT nxv1s1 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s1); |
81 | const LLT nxv2s1 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s1); |
82 | const LLT nxv4s1 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s1); |
83 | const LLT nxv8s1 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s1); |
84 | const LLT nxv16s1 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s1); |
85 | const LLT nxv32s1 = LLT::scalable_vector(MinNumElements: 32, ScalarTy: s1); |
86 | const LLT nxv64s1 = LLT::scalable_vector(MinNumElements: 64, ScalarTy: s1); |
87 | |
88 | const LLT nxv1s8 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s8); |
89 | const LLT nxv2s8 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s8); |
90 | const LLT nxv4s8 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s8); |
91 | const LLT nxv8s8 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s8); |
92 | const LLT nxv16s8 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s8); |
93 | const LLT nxv32s8 = LLT::scalable_vector(MinNumElements: 32, ScalarTy: s8); |
94 | const LLT nxv64s8 = LLT::scalable_vector(MinNumElements: 64, ScalarTy: s8); |
95 | |
96 | const LLT nxv1s16 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s16); |
97 | const LLT nxv2s16 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s16); |
98 | const LLT nxv4s16 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s16); |
99 | const LLT nxv8s16 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s16); |
100 | const LLT nxv16s16 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s16); |
101 | const LLT nxv32s16 = LLT::scalable_vector(MinNumElements: 32, ScalarTy: s16); |
102 | |
103 | const LLT nxv1s32 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s32); |
104 | const LLT nxv2s32 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s32); |
105 | const LLT nxv4s32 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s32); |
106 | const LLT nxv8s32 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s32); |
107 | const LLT nxv16s32 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s32); |
108 | |
109 | const LLT nxv1s64 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s64); |
110 | const LLT nxv2s64 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s64); |
111 | const LLT nxv4s64 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s64); |
112 | const LLT nxv8s64 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s64); |
113 | |
114 | using namespace TargetOpcode; |
115 | |
116 | auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1}; |
117 | |
118 | auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8, |
119 | nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16, |
120 | nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32, |
121 | nxv1s64, nxv2s64, nxv4s64, nxv8s64}; |
122 | |
123 | getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB, G_AND, G_OR, G_XOR}) |
124 | .legalFor(Types: {s32, sXLen}) |
125 | .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST)) |
126 | .widenScalarToNextPow2(TypeIdx: 0) |
127 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen); |
128 | |
129 | getActionDefinitionsBuilder( |
130 | Opcodes: {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower(); |
131 | |
132 | getActionDefinitionsBuilder(Opcodes: {G_SADDO, G_SSUBO}).minScalar(TypeIdx: 0, Ty: sXLen).lower(); |
133 | |
134 | // TODO: Use Vector Single-Width Saturating Instructions for vector types. |
135 | getActionDefinitionsBuilder(Opcodes: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}) |
136 | .lower(); |
137 | |
138 | auto &ShiftActions = getActionDefinitionsBuilder(Opcodes: {G_ASHR, G_LSHR, G_SHL}); |
139 | if (ST.is64Bit()) |
140 | ShiftActions.customFor(Types: {{s32, s32}}); |
141 | ShiftActions.legalFor(Types: {{s32, s32}, {s32, sXLen}, {sXLen, sXLen}}) |
142 | .widenScalarToNextPow2(TypeIdx: 0) |
143 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sXLen) |
144 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen) |
145 | .minScalarSameAs(TypeIdx: 1, LargeTypeIdx: 0) |
146 | .widenScalarToNextPow2(TypeIdx: 1); |
147 | |
148 | auto &ExtActions = |
149 | getActionDefinitionsBuilder(Opcodes: {G_ZEXT, G_SEXT, G_ANYEXT}) |
150 | .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST), |
151 | P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST))); |
152 | if (ST.is64Bit()) { |
153 | ExtActions.legalFor(Types: {{sXLen, s32}}); |
154 | getActionDefinitionsBuilder(Opcode: G_SEXT_INREG) |
155 | .customFor(Types: {sXLen}) |
156 | .maxScalar(TypeIdx: 0, Ty: sXLen) |
157 | .lower(); |
158 | } else { |
159 | getActionDefinitionsBuilder(Opcode: G_SEXT_INREG).maxScalar(TypeIdx: 0, Ty: sXLen).lower(); |
160 | } |
161 | ExtActions.customIf(Predicate: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST)) |
162 | .maxScalar(TypeIdx: 0, Ty: sXLen); |
163 | |
164 | // Merge/Unmerge |
165 | for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { |
166 | auto &MergeUnmergeActions = getActionDefinitionsBuilder(Opcode: Op); |
167 | unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; |
168 | unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; |
169 | if (XLen == 32 && ST.hasStdExtD()) { |
170 | MergeUnmergeActions.legalIf( |
171 | Predicate: all(P0: typeIs(TypeIdx: BigTyIdx, TypesInit: s64), P1: typeIs(TypeIdx: LitTyIdx, TypesInit: s32))); |
172 | } |
173 | MergeUnmergeActions.widenScalarToNextPow2(TypeIdx: LitTyIdx, MinSize: XLen) |
174 | .widenScalarToNextPow2(TypeIdx: BigTyIdx, MinSize: XLen) |
175 | .clampScalar(TypeIdx: LitTyIdx, MinTy: sXLen, MaxTy: sXLen) |
176 | .clampScalar(TypeIdx: BigTyIdx, MinTy: sXLen, MaxTy: sXLen); |
177 | } |
178 | |
179 | getActionDefinitionsBuilder(Opcodes: {G_FSHL, G_FSHR}).lower(); |
180 | |
181 | auto &RotateActions = getActionDefinitionsBuilder(Opcodes: {G_ROTL, G_ROTR}); |
182 | if (ST.hasStdExtZbb() || ST.hasStdExtZbkb()) { |
183 | RotateActions.legalFor(Types: {{s32, sXLen}, {sXLen, sXLen}}); |
184 | // Widen s32 rotate amount to s64 so SDAG patterns will match. |
185 | if (ST.is64Bit()) |
186 | RotateActions.widenScalarIf(Predicate: all(P0: typeIs(TypeIdx: 0, TypesInit: s32), P1: typeIs(TypeIdx: 1, TypesInit: s32)), |
187 | Mutation: changeTo(TypeIdx: 1, Ty: sXLen)); |
188 | } |
189 | RotateActions.lower(); |
190 | |
191 | getActionDefinitionsBuilder(Opcode: G_BITREVERSE).maxScalar(TypeIdx: 0, Ty: sXLen).lower(); |
192 | |
193 | getActionDefinitionsBuilder(Opcode: G_BITCAST).legalIf( |
194 | Predicate: all(P0: LegalityPredicates::any(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST), |
195 | P1: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST)), |
196 | P1: LegalityPredicates::any(P0: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST), |
197 | P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST)))); |
198 | |
199 | auto &BSWAPActions = getActionDefinitionsBuilder(Opcode: G_BSWAP); |
200 | if (ST.hasStdExtZbb() || ST.hasStdExtZbkb()) |
201 | BSWAPActions.legalFor(Types: {sXLen}).clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen); |
202 | else |
203 | BSWAPActions.maxScalar(TypeIdx: 0, Ty: sXLen).lower(); |
204 | |
205 | auto &CountZerosActions = getActionDefinitionsBuilder(Opcodes: {G_CTLZ, G_CTTZ}); |
206 | auto &CountZerosUndefActions = |
207 | getActionDefinitionsBuilder(Opcodes: {G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF}); |
208 | if (ST.hasStdExtZbb()) { |
209 | CountZerosActions.legalFor(Types: {{s32, s32}, {sXLen, sXLen}}) |
210 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen) |
211 | .widenScalarToNextPow2(TypeIdx: 0) |
212 | .scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0); |
213 | } else { |
214 | CountZerosActions.maxScalar(TypeIdx: 0, Ty: sXLen).scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0).lower(); |
215 | CountZerosUndefActions.maxScalar(TypeIdx: 0, Ty: sXLen).scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0); |
216 | } |
217 | CountZerosUndefActions.lower(); |
218 | |
219 | auto &CTPOPActions = getActionDefinitionsBuilder(Opcode: G_CTPOP); |
220 | if (ST.hasStdExtZbb()) { |
221 | CTPOPActions.legalFor(Types: {{s32, s32}, {sXLen, sXLen}}) |
222 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen) |
223 | .widenScalarToNextPow2(TypeIdx: 0) |
224 | .scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0); |
225 | } else { |
226 | CTPOPActions.maxScalar(TypeIdx: 0, Ty: sXLen).scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0).lower(); |
227 | } |
228 | |
229 | auto &ConstantActions = getActionDefinitionsBuilder(Opcode: G_CONSTANT); |
230 | ConstantActions.legalFor(Types: {s32, p0}); |
231 | if (ST.is64Bit()) |
232 | ConstantActions.customFor(Types: {s64}); |
233 | ConstantActions.widenScalarToNextPow2(TypeIdx: 0).clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen); |
234 | |
235 | // TODO: transform illegal vector types into legal vector type |
236 | getActionDefinitionsBuilder( |
237 | Opcodes: {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER, G_FREEZE}) |
238 | .legalFor(Types: {s32, sXLen, p0}) |
239 | .legalIf(Predicate: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST)) |
240 | .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST)) |
241 | .widenScalarToNextPow2(TypeIdx: 0) |
242 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen); |
243 | |
244 | getActionDefinitionsBuilder(Opcode: G_ICMP) |
245 | .legalFor(Types: {{sXLen, sXLen}, {sXLen, p0}}) |
246 | .legalIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST), |
247 | P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST))) |
248 | .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, MinSize: 8) |
249 | .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen) |
250 | .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen); |
251 | |
252 | auto &SelectActions = |
253 | getActionDefinitionsBuilder(Opcode: G_SELECT) |
254 | .legalFor(Types: {{s32, sXLen}, {p0, sXLen}}) |
255 | .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST), |
256 | P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST))); |
257 | if (XLen == 64 || ST.hasStdExtD()) |
258 | SelectActions.legalFor(Types: {{s64, sXLen}}); |
259 | SelectActions.widenScalarToNextPow2(TypeIdx: 0) |
260 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: (XLen == 64 || ST.hasStdExtD()) ? s64 : s32) |
261 | .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen); |
262 | |
263 | auto &LoadStoreActions = |
264 | getActionDefinitionsBuilder(Opcodes: {G_LOAD, G_STORE}) |
265 | .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, |
266 | {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 16}, |
267 | {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: 32}, |
268 | {.Type0: p0, .Type1: p0, .MemTy: sXLen, .Align: XLen}}); |
269 | auto &ExtLoadActions = |
270 | getActionDefinitionsBuilder(Opcodes: {G_SEXTLOAD, G_ZEXTLOAD}) |
271 | .legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: 16}}); |
272 | if (XLen == 64) { |
273 | LoadStoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 8}, |
274 | {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 16}, |
275 | {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 32}, |
276 | {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 64}}); |
277 | ExtLoadActions.legalForTypesWithMemDesc( |
278 | TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: 8}, {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: 16}, {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: 32}}); |
279 | } else if (ST.hasStdExtD()) { |
280 | LoadStoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: 64}}); |
281 | } |
282 | LoadStoreActions.clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen).lower(); |
283 | ExtLoadActions.widenScalarToNextPow2(TypeIdx: 0).clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen).lower(); |
284 | |
285 | getActionDefinitionsBuilder(Opcodes: {G_PTR_ADD, G_PTRMASK}).legalFor(Types: {{p0, sXLen}}); |
286 | |
287 | getActionDefinitionsBuilder(Opcode: G_PTRTOINT) |
288 | .legalFor(Types: {{sXLen, p0}}) |
289 | .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen); |
290 | |
291 | getActionDefinitionsBuilder(Opcode: G_INTTOPTR) |
292 | .legalFor(Types: {{p0, sXLen}}) |
293 | .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen); |
294 | |
295 | getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {sXLen}).minScalar(TypeIdx: 0, Ty: sXLen); |
296 | |
297 | getActionDefinitionsBuilder(Opcode: G_BRJT).legalFor(Types: {{p0, sXLen}}); |
298 | |
299 | getActionDefinitionsBuilder(Opcode: G_BRINDIRECT).legalFor(Types: {p0}); |
300 | |
301 | getActionDefinitionsBuilder(Opcode: G_PHI) |
302 | .legalFor(Types: {p0, sXLen}) |
303 | .widenScalarToNextPow2(TypeIdx: 0) |
304 | .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen); |
305 | |
306 | getActionDefinitionsBuilder(Opcodes: {G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL}) |
307 | .legalFor(Types: {p0}); |
308 | |
309 | if (ST.hasStdExtZmmul()) { |
310 | getActionDefinitionsBuilder(Opcode: G_MUL) |
311 | .legalFor(Types: {s32, sXLen}) |
312 | .widenScalarToNextPow2(TypeIdx: 0) |
313 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen); |
314 | |
315 | // clang-format off |
316 | getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH}) |
317 | .legalFor(Types: {sXLen}) |
318 | .lower(); |
319 | // clang-format on |
320 | |
321 | getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO}).minScalar(TypeIdx: 0, Ty: sXLen).lower(); |
322 | } else { |
323 | getActionDefinitionsBuilder(Opcode: G_MUL) |
324 | .libcallFor(Types: {sXLen, sDoubleXLen}) |
325 | .widenScalarToNextPow2(TypeIdx: 0) |
326 | .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sDoubleXLen); |
327 | |
328 | getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH}).lowerFor(Types: {sXLen}); |
329 | |
330 | getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO}) |
331 | .minScalar(TypeIdx: 0, Ty: sXLen) |
332 | // Widen sXLen to sDoubleXLen so we can use a single libcall to get |
333 | // the low bits for the mul result and high bits to do the overflow |
334 | // check. |
335 | .widenScalarIf(Predicate: typeIs(TypeIdx: 0, TypesInit: sXLen), |
336 | Mutation: LegalizeMutations::changeTo(TypeIdx: 0, Ty: sDoubleXLen)) |
337 | .lower(); |
338 | } |
339 | |
340 | if (ST.hasStdExtM()) { |
341 | getActionDefinitionsBuilder(Opcodes: {G_UDIV, G_SDIV, G_UREM, G_SREM}) |
342 | .legalFor(Types: {s32, sXLen}) |
343 | .libcallFor(Types: {sDoubleXLen}) |
344 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sDoubleXLen) |
345 | .widenScalarToNextPow2(TypeIdx: 0); |
346 | } else { |
347 | getActionDefinitionsBuilder(Opcodes: {G_UDIV, G_SDIV, G_UREM, G_SREM}) |
348 | .libcallFor(Types: {sXLen, sDoubleXLen}) |
349 | .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sDoubleXLen) |
350 | .widenScalarToNextPow2(TypeIdx: 0); |
351 | } |
352 | |
353 | // TODO: Use libcall for sDoubleXLen. |
354 | getActionDefinitionsBuilder(Opcodes: {G_UDIVREM, G_SDIVREM}).lower(); |
355 | |
356 | auto &AbsActions = getActionDefinitionsBuilder(Opcode: G_ABS); |
357 | if (ST.hasStdExtZbb()) |
358 | AbsActions.customFor(Types: {s32, sXLen}).minScalar(TypeIdx: 0, Ty: sXLen); |
359 | AbsActions.lower(); |
360 | |
361 | auto &MinMaxActions = |
362 | getActionDefinitionsBuilder(Opcodes: {G_UMAX, G_UMIN, G_SMAX, G_SMIN}); |
363 | if (ST.hasStdExtZbb()) |
364 | MinMaxActions.legalFor(Types: {sXLen}).minScalar(TypeIdx: 0, Ty: sXLen); |
365 | MinMaxActions.lower(); |
366 | |
367 | getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0}); |
368 | |
369 | getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); |
370 | |
371 | getActionDefinitionsBuilder(Opcode: G_DYN_STACKALLOC).lower(); |
372 | |
373 | // FP Operations |
374 | |
375 | getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG, |
376 | G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM}) |
377 | .legalIf(Predicate: typeIsScalarFPArith(TypeIdx: 0, ST)); |
378 | |
379 | getActionDefinitionsBuilder(Opcode: G_FREM) |
380 | .libcallFor(Types: {s32, s64}) |
381 | .minScalar(TypeIdx: 0, Ty: s32) |
382 | .scalarize(TypeIdx: 0); |
383 | |
384 | getActionDefinitionsBuilder(Opcode: G_FCOPYSIGN) |
385 | .legalIf(Predicate: all(P0: typeIsScalarFPArith(TypeIdx: 0, ST), P1: typeIsScalarFPArith(TypeIdx: 1, ST))); |
386 | |
387 | // FIXME: Use Zfhmin. |
388 | getActionDefinitionsBuilder(Opcode: G_FPTRUNC).legalIf( |
389 | Predicate: [=, &ST](const LegalityQuery &Query) -> bool { |
390 | return (ST.hasStdExtD() && typeIs(TypeIdx: 0, TypesInit: s32)(Query) && |
391 | typeIs(TypeIdx: 1, TypesInit: s64)(Query)) || |
392 | (ST.hasStdExtZfh() && typeIs(TypeIdx: 0, TypesInit: s16)(Query) && |
393 | typeIs(TypeIdx: 1, TypesInit: s32)(Query)) || |
394 | (ST.hasStdExtZfh() && ST.hasStdExtD() && typeIs(TypeIdx: 0, TypesInit: s16)(Query) && |
395 | typeIs(TypeIdx: 1, TypesInit: s64)(Query)); |
396 | }); |
397 | getActionDefinitionsBuilder(Opcode: G_FPEXT).legalIf( |
398 | Predicate: [=, &ST](const LegalityQuery &Query) -> bool { |
399 | return (ST.hasStdExtD() && typeIs(TypeIdx: 0, TypesInit: s64)(Query) && |
400 | typeIs(TypeIdx: 1, TypesInit: s32)(Query)) || |
401 | (ST.hasStdExtZfh() && typeIs(TypeIdx: 0, TypesInit: s32)(Query) && |
402 | typeIs(TypeIdx: 1, TypesInit: s16)(Query)) || |
403 | (ST.hasStdExtZfh() && ST.hasStdExtD() && typeIs(TypeIdx: 0, TypesInit: s64)(Query) && |
404 | typeIs(TypeIdx: 1, TypesInit: s16)(Query)); |
405 | }); |
406 | |
407 | getActionDefinitionsBuilder(Opcode: G_FCMP) |
408 | .legalIf(Predicate: all(P0: typeIs(TypeIdx: 0, TypesInit: sXLen), P1: typeIsScalarFPArith(TypeIdx: 1, ST))) |
409 | .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen); |
410 | |
411 | // TODO: Support vector version of G_IS_FPCLASS. |
412 | getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS) |
413 | .customIf(Predicate: all(P0: typeIs(TypeIdx: 0, TypesInit: s1), P1: typeIsScalarFPArith(TypeIdx: 1, ST))); |
414 | |
415 | getActionDefinitionsBuilder(Opcode: G_FCONSTANT) |
416 | .legalIf(Predicate: typeIsScalarFPArith(TypeIdx: 0, ST)) |
417 | .lowerFor(Types: {s32, s64}); |
418 | |
419 | getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI}) |
420 | .legalIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s32, sXLen}), P1: typeIsScalarFPArith(TypeIdx: 1, ST))) |
421 | .widenScalarToNextPow2(TypeIdx: 0) |
422 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen) |
423 | .libcall(); |
424 | |
425 | getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP}) |
426 | .legalIf(Predicate: all(P0: typeIsScalarFPArith(TypeIdx: 0, ST), P1: typeInSet(TypeIdx: 1, TypesInit: {s32, sXLen}))) |
427 | .widenScalarToNextPow2(TypeIdx: 1) |
428 | .clampScalar(TypeIdx: 1, MinTy: s32, MaxTy: sXLen); |
429 | |
430 | // FIXME: We can do custom inline expansion like SelectionDAG. |
431 | // FIXME: Legal with Zfa. |
432 | getActionDefinitionsBuilder(Opcodes: {G_FCEIL, G_FFLOOR}) |
433 | .libcallFor(Types: {s32, s64}); |
434 | |
435 | getActionDefinitionsBuilder(Opcode: G_VASTART).customFor(Types: {p0}); |
436 | |
437 | // va_list must be a pointer, but most sized types are pretty easy to handle |
438 | // as the destination. |
439 | getActionDefinitionsBuilder(Opcode: G_VAARG) |
440 | // TODO: Implement narrowScalar and widenScalar for G_VAARG for types |
441 | // outside the [s32, sXLen] range. |
442 | .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen) |
443 | .lowerForCartesianProduct(Types0: {s32, sXLen, p0}, Types1: {p0}); |
444 | |
445 | getActionDefinitionsBuilder(Opcode: G_VSCALE) |
446 | .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen) |
447 | .customFor(Types: {sXLen}); |
448 | |
449 | auto &SplatActions = |
450 | getActionDefinitionsBuilder(Opcode: G_SPLAT_VECTOR) |
451 | .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST), |
452 | P1: typeIs(TypeIdx: 1, TypesInit: sXLen))) |
453 | .customIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST), P1: typeIs(TypeIdx: 1, TypesInit: s1))); |
454 | // Handle case of s64 element vectors on RV32. If the subtarget does not have |
455 | // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget |
456 | // does have f64, then we don't know whether the type is an f64 or an i64, |
457 | // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it, |
458 | // depending on how the instructions it consumes are legalized. They are not |
459 | // legalized yet since legalization is in reverse postorder, so we cannot |
460 | // make the decision at this moment. |
461 | if (XLen == 32) { |
462 | if (ST.hasVInstructionsF64() && ST.hasStdExtD()) |
463 | SplatActions.legalIf(Predicate: all( |
464 | P0: typeInSet(TypeIdx: 0, TypesInit: {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), P1: typeIs(TypeIdx: 1, TypesInit: s64))); |
465 | else if (ST.hasVInstructionsI64()) |
466 | SplatActions.customIf(Predicate: all( |
467 | P0: typeInSet(TypeIdx: 0, TypesInit: {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), P1: typeIs(TypeIdx: 1, TypesInit: s64))); |
468 | } |
469 | |
470 | SplatActions.clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen); |
471 | |
472 | getLegacyLegalizerInfo().computeTables(); |
473 | } |
474 | |
475 | bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, |
476 | MachineInstr &MI) const { |
477 | Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val&: MI).getIntrinsicID(); |
478 | switch (IntrinsicID) { |
479 | default: |
480 | return false; |
481 | case Intrinsic::vacopy: { |
482 | // vacopy arguments must be legal because of the intrinsic signature. |
483 | // No need to check here. |
484 | |
485 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
486 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
487 | MachineFunction &MF = *MI.getMF(); |
488 | const DataLayout &DL = MIRBuilder.getDataLayout(); |
489 | LLVMContext &Ctx = MF.getFunction().getContext(); |
490 | |
491 | Register DstLst = MI.getOperand(i: 1).getReg(); |
492 | LLT PtrTy = MRI.getType(Reg: DstLst); |
493 | |
494 | // Load the source va_list |
495 | Align Alignment = DL.getABITypeAlign(Ty: getTypeForLLT(Ty: PtrTy, C&: Ctx)); |
496 | MachineMemOperand *LoadMMO = MF.getMachineMemOperand( |
497 | PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad, MemTy: PtrTy, base_alignment: Alignment); |
498 | auto Tmp = MIRBuilder.buildLoad(Res: PtrTy, Addr: MI.getOperand(i: 2), MMO&: *LoadMMO); |
499 | |
500 | // Store the result in the destination va_list |
501 | MachineMemOperand *StoreMMO = MF.getMachineMemOperand( |
502 | PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOStore, MemTy: PtrTy, base_alignment: Alignment); |
503 | MIRBuilder.buildStore(Val: Tmp, Addr: DstLst, MMO&: *StoreMMO); |
504 | |
505 | MI.eraseFromParent(); |
506 | return true; |
507 | } |
508 | } |
509 | } |
510 | |
511 | bool RISCVLegalizerInfo::legalizeShlAshrLshr( |
512 | MachineInstr &MI, MachineIRBuilder &MIRBuilder, |
513 | GISelChangeObserver &Observer) const { |
514 | assert(MI.getOpcode() == TargetOpcode::G_ASHR || |
515 | MI.getOpcode() == TargetOpcode::G_LSHR || |
516 | MI.getOpcode() == TargetOpcode::G_SHL); |
517 | MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); |
518 | // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the |
519 | // imported patterns can select it later. Either way, it will be legal. |
520 | Register AmtReg = MI.getOperand(i: 2).getReg(); |
521 | auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: AmtReg, MRI); |
522 | if (!VRegAndVal) |
523 | return true; |
524 | // Check the shift amount is in range for an immediate form. |
525 | uint64_t Amount = VRegAndVal->Value.getZExtValue(); |
526 | if (Amount > 31) |
527 | return true; // This will have to remain a register variant. |
528 | auto ExtCst = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: Amount); |
529 | Observer.changingInstr(MI); |
530 | MI.getOperand(i: 2).setReg(ExtCst.getReg(Idx: 0)); |
531 | Observer.changedInstr(MI); |
532 | return true; |
533 | } |
534 | |
535 | bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI, |
536 | MachineIRBuilder &MIRBuilder) const { |
537 | // Stores the address of the VarArgsFrameIndex slot into the memory location |
538 | assert(MI.getOpcode() == TargetOpcode::G_VASTART); |
539 | MachineFunction *MF = MI.getParent()->getParent(); |
540 | RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>(); |
541 | int FI = FuncInfo->getVarArgsFrameIndex(); |
542 | LLT AddrTy = MIRBuilder.getMRI()->getType(Reg: MI.getOperand(i: 0).getReg()); |
543 | auto FINAddr = MIRBuilder.buildFrameIndex(Res: AddrTy, Idx: FI); |
544 | assert(MI.hasOneMemOperand()); |
545 | MIRBuilder.buildStore(Val: FINAddr, Addr: MI.getOperand(i: 0).getReg(), |
546 | MMO&: *MI.memoperands()[0]); |
547 | MI.eraseFromParent(); |
548 | return true; |
549 | } |
550 | |
551 | bool RISCVLegalizerInfo::shouldBeInConstantPool(APInt APImm, |
552 | bool ShouldOptForSize) const { |
553 | assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64); |
554 | int64_t Imm = APImm.getSExtValue(); |
555 | // All simm32 constants should be handled by isel. |
556 | // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making |
557 | // this check redundant, but small immediates are common so this check |
558 | // should have better compile time. |
559 | if (isInt<32>(x: Imm)) |
560 | return false; |
561 | |
562 | // We only need to cost the immediate, if constant pool lowering is enabled. |
563 | if (!STI.useConstantPoolForLargeInts()) |
564 | return false; |
565 | |
566 | RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI); |
567 | if (Seq.size() <= STI.getMaxBuildIntsCost()) |
568 | return false; |
569 | |
570 | // Optimizations below are disabled for opt size. If we're optimizing for |
571 | // size, use a constant pool. |
572 | if (ShouldOptForSize) |
573 | return true; |
574 | // |
575 | // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do |
576 | // that if it will avoid a constant pool. |
577 | // It will require an extra temporary register though. |
578 | // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where |
579 | // low and high 32 bits are the same and bit 31 and 63 are set. |
580 | unsigned ShiftAmt, AddOpc; |
581 | RISCVMatInt::InstSeq SeqLo = |
582 | RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI, ShiftAmt, AddOpc); |
583 | return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost()); |
584 | } |
585 | |
586 | bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI, |
587 | MachineIRBuilder &MIB) const { |
588 | const LLT XLenTy(STI.getXLenVT()); |
589 | Register Dst = MI.getOperand(i: 0).getReg(); |
590 | |
591 | // We define our scalable vector types for lmul=1 to use a 64 bit known |
592 | // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate |
593 | // vscale as VLENB / 8. |
594 | static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!" ); |
595 | if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock) |
596 | // Support for VLEN==32 is incomplete. |
597 | return false; |
598 | |
599 | // We assume VLENB is a multiple of 8. We manually choose the best shift |
600 | // here because SimplifyDemandedBits isn't always able to simplify it. |
601 | uint64_t Val = MI.getOperand(i: 1).getCImm()->getZExtValue(); |
602 | if (isPowerOf2_64(Value: Val)) { |
603 | uint64_t Log2 = Log2_64(Value: Val); |
604 | if (Log2 < 3) { |
605 | auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {}); |
606 | MIB.buildLShr(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: 3 - Log2)); |
607 | } else if (Log2 > 3) { |
608 | auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {}); |
609 | MIB.buildShl(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: Log2 - 3)); |
610 | } else { |
611 | MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {Dst}, SrcOps: {}); |
612 | } |
613 | } else if ((Val % 8) == 0) { |
614 | // If the multiplier is a multiple of 8, scale it down to avoid needing |
615 | // to shift the VLENB value. |
616 | auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {}); |
617 | MIB.buildMul(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: Val / 8)); |
618 | } else { |
619 | auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {}); |
620 | auto VScale = MIB.buildLShr(Dst: XLenTy, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: 3)); |
621 | MIB.buildMul(Dst, Src0: VScale, Src1: MIB.buildConstant(Res: XLenTy, Val)); |
622 | } |
623 | MI.eraseFromParent(); |
624 | return true; |
625 | } |
626 | |
627 | // Custom-lower extensions from mask vectors by using a vselect either with 1 |
628 | // for zero/any-extension or -1 for sign-extension: |
629 | // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) |
630 | // Note that any-extension is lowered identically to zero-extension. |
631 | bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI, |
632 | MachineIRBuilder &MIB) const { |
633 | |
634 | unsigned Opc = MI.getOpcode(); |
635 | assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT || |
636 | Opc == TargetOpcode::G_ANYEXT); |
637 | |
638 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
639 | Register Dst = MI.getOperand(i: 0).getReg(); |
640 | Register Src = MI.getOperand(i: 1).getReg(); |
641 | |
642 | LLT DstTy = MRI.getType(Reg: Dst); |
643 | int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1; |
644 | LLT DstEltTy = DstTy.getElementType(); |
645 | auto SplatZero = MIB.buildSplatVector(Res: DstTy, Val: MIB.buildConstant(Res: DstEltTy, Val: 0)); |
646 | auto SplatTrue = |
647 | MIB.buildSplatVector(Res: DstTy, Val: MIB.buildConstant(Res: DstEltTy, Val: ExtTrueVal)); |
648 | MIB.buildSelect(Res: Dst, Tst: Src, Op0: SplatTrue, Op1: SplatZero); |
649 | |
650 | MI.eraseFromParent(); |
651 | return true; |
652 | } |
653 | |
654 | /// Return the type of the mask type suitable for masking the provided |
655 | /// vector type. This is simply an i1 element type vector of the same |
656 | /// (possibly scalable) length. |
657 | static LLT getMaskTypeFor(LLT VecTy) { |
658 | assert(VecTy.isVector()); |
659 | ElementCount EC = VecTy.getElementCount(); |
660 | return LLT::vector(EC, ScalarTy: LLT::scalar(SizeInBits: 1)); |
661 | } |
662 | |
663 | /// Creates an all ones mask suitable for masking a vector of type VecTy with |
664 | /// vector length VL. |
665 | static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL, |
666 | MachineIRBuilder &MIB, |
667 | MachineRegisterInfo &MRI) { |
668 | LLT MaskTy = getMaskTypeFor(VecTy); |
669 | return MIB.buildInstr(Opc: RISCV::G_VMSET_VL, DstOps: {MaskTy}, SrcOps: {VL}); |
670 | } |
671 | |
672 | /// Gets the two common "VL" operands: an all-ones mask and the vector length. |
673 | /// VecTy is a scalable vector type. |
674 | static std::pair<MachineInstrBuilder, Register> |
675 | buildDefaultVLOps(const DstOp &Dst, MachineIRBuilder &MIB, |
676 | MachineRegisterInfo &MRI) { |
677 | LLT VecTy = Dst.getLLTTy(MRI); |
678 | assert(VecTy.isScalableVector() && "Expecting scalable container type" ); |
679 | Register VL(RISCV::X0); |
680 | MachineInstrBuilder Mask = buildAllOnesMask(VecTy, VL, MIB, MRI); |
681 | return {Mask, VL}; |
682 | } |
683 | |
684 | static MachineInstrBuilder |
685 | buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo, |
686 | Register Hi, Register VL, MachineIRBuilder &MIB, |
687 | MachineRegisterInfo &MRI) { |
688 | // TODO: If the Hi bits of the splat are undefined, then it's fine to just |
689 | // splat Lo even if it might be sign extended. I don't think we have |
690 | // introduced a case where we're build a s64 where the upper bits are undef |
691 | // yet. |
692 | |
693 | // Fall back to a stack store and stride x0 vector load. |
694 | // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in |
695 | // preprocessDAG in SDAG. |
696 | return MIB.buildInstr(Opc: RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, DstOps: {Dst}, |
697 | SrcOps: {Passthru, Lo, Hi, VL}); |
698 | } |
699 | |
700 | static MachineInstrBuilder |
701 | buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru, |
702 | const SrcOp &Scalar, Register VL, |
703 | MachineIRBuilder &MIB, MachineRegisterInfo &MRI) { |
704 | assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!" ); |
705 | auto Unmerge = MIB.buildUnmerge(Res: LLT::scalar(SizeInBits: 32), Op: Scalar); |
706 | return buildSplatPartsS64WithVL(Dst, Passthru, Lo: Unmerge.getReg(Idx: 0), |
707 | Hi: Unmerge.getReg(Idx: 1), VL, MIB, MRI); |
708 | } |
709 | |
710 | // Lower splats of s1 types to G_ICMP. For each mask vector type, we have a |
711 | // legal equivalently-sized i8 type, so we can use that as a go-between. |
712 | // Splats of s1 types that have constant value can be legalized as VMSET_VL or |
713 | // VMCLR_VL. |
714 | bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI, |
715 | MachineIRBuilder &MIB) const { |
716 | assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR); |
717 | |
718 | MachineRegisterInfo &MRI = *MIB.getMRI(); |
719 | |
720 | Register Dst = MI.getOperand(i: 0).getReg(); |
721 | Register SplatVal = MI.getOperand(i: 1).getReg(); |
722 | |
723 | LLT VecTy = MRI.getType(Reg: Dst); |
724 | LLT XLenTy(STI.getXLenVT()); |
725 | |
726 | // Handle case of s64 element vectors on rv32 |
727 | if (XLenTy.getSizeInBits() == 32 && |
728 | VecTy.getElementType().getSizeInBits() == 64) { |
729 | auto [_, VL] = buildDefaultVLOps(Dst, MIB, MRI); |
730 | buildSplatSplitS64WithVL(Dst, Passthru: MIB.buildUndef(Res: VecTy), Scalar: SplatVal, VL, MIB, |
731 | MRI); |
732 | MI.eraseFromParent(); |
733 | return true; |
734 | } |
735 | |
736 | // All-zeros or all-ones splats are handled specially. |
737 | MachineInstr &SplatValMI = *MRI.getVRegDef(Reg: SplatVal); |
738 | if (isAllOnesOrAllOnesSplat(MI: SplatValMI, MRI)) { |
739 | auto VL = buildDefaultVLOps(Dst: VecTy, MIB, MRI).second; |
740 | MIB.buildInstr(Opc: RISCV::G_VMSET_VL, DstOps: {Dst}, SrcOps: {VL}); |
741 | MI.eraseFromParent(); |
742 | return true; |
743 | } |
744 | if (isNullOrNullSplat(MI: SplatValMI, MRI)) { |
745 | auto VL = buildDefaultVLOps(Dst: VecTy, MIB, MRI).second; |
746 | MIB.buildInstr(Opc: RISCV::G_VMCLR_VL, DstOps: {Dst}, SrcOps: {VL}); |
747 | MI.eraseFromParent(); |
748 | return true; |
749 | } |
750 | |
751 | // Handle non-constant mask splat (i.e. not sure if it's all zeros or all |
752 | // ones) by promoting it to an s8 splat. |
753 | LLT InterEltTy = LLT::scalar(SizeInBits: 8); |
754 | LLT InterTy = VecTy.changeElementType(NewEltTy: InterEltTy); |
755 | auto ZExtSplatVal = MIB.buildZExt(Res: InterEltTy, Op: SplatVal); |
756 | auto And = |
757 | MIB.buildAnd(Dst: InterEltTy, Src0: ZExtSplatVal, Src1: MIB.buildConstant(Res: InterEltTy, Val: 1)); |
758 | auto LHS = MIB.buildSplatVector(Res: InterTy, Val: And); |
759 | auto ZeroSplat = |
760 | MIB.buildSplatVector(Res: InterTy, Val: MIB.buildConstant(Res: InterEltTy, Val: 0)); |
761 | MIB.buildICmp(Pred: CmpInst::Predicate::ICMP_NE, Res: Dst, Op0: LHS, Op1: ZeroSplat); |
762 | MI.eraseFromParent(); |
763 | return true; |
764 | } |
765 | |
766 | bool RISCVLegalizerInfo::legalizeCustom( |
767 | LegalizerHelper &Helper, MachineInstr &MI, |
768 | LostDebugLocObserver &LocObserver) const { |
769 | MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
770 | GISelChangeObserver &Observer = Helper.Observer; |
771 | MachineFunction &MF = *MI.getParent()->getParent(); |
772 | switch (MI.getOpcode()) { |
773 | default: |
774 | // No idea what to do. |
775 | return false; |
776 | case TargetOpcode::G_ABS: |
777 | return Helper.lowerAbsToMaxNeg(MI); |
778 | // TODO: G_FCONSTANT |
779 | case TargetOpcode::G_CONSTANT: { |
780 | const Function &F = MF.getFunction(); |
781 | // TODO: if PSI and BFI are present, add " || |
782 | // llvm::shouldOptForSize(*CurMBB, PSI, BFI)". |
783 | bool ShouldOptForSize = F.hasOptSize() || F.hasMinSize(); |
784 | const ConstantInt *ConstVal = MI.getOperand(i: 1).getCImm(); |
785 | if (!shouldBeInConstantPool(APImm: ConstVal->getValue(), ShouldOptForSize)) |
786 | return true; |
787 | return Helper.lowerConstant(MI); |
788 | } |
789 | case TargetOpcode::G_SHL: |
790 | case TargetOpcode::G_ASHR: |
791 | case TargetOpcode::G_LSHR: |
792 | return legalizeShlAshrLshr(MI, MIRBuilder, Observer); |
793 | case TargetOpcode::G_SEXT_INREG: { |
794 | // Source size of 32 is sext.w. |
795 | int64_t SizeInBits = MI.getOperand(i: 2).getImm(); |
796 | if (SizeInBits == 32) |
797 | return true; |
798 | |
799 | return Helper.lower(MI, TypeIdx: 0, /* Unused hint type */ Ty: LLT()) == |
800 | LegalizerHelper::Legalized; |
801 | } |
802 | case TargetOpcode::G_IS_FPCLASS: { |
803 | Register GISFPCLASS = MI.getOperand(i: 0).getReg(); |
804 | Register Src = MI.getOperand(i: 1).getReg(); |
805 | const MachineOperand &ImmOp = MI.getOperand(i: 2); |
806 | MachineIRBuilder MIB(MI); |
807 | |
808 | // Turn LLVM IR's floating point classes to that in RISC-V, |
809 | // by simply rotating the 10-bit immediate right by two bits. |
810 | APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm())); |
811 | auto FClassMask = MIB.buildConstant(Res: sXLen, Val: GFpClassImm.rotr(rotateAmt: 2).zext(width: XLen)); |
812 | auto ConstZero = MIB.buildConstant(Res: sXLen, Val: 0); |
813 | |
814 | auto GFClass = MIB.buildInstr(Opc: RISCV::G_FCLASS, DstOps: {sXLen}, SrcOps: {Src}); |
815 | auto And = MIB.buildAnd(Dst: sXLen, Src0: GFClass, Src1: FClassMask); |
816 | MIB.buildICmp(Pred: CmpInst::ICMP_NE, Res: GISFPCLASS, Op0: And, Op1: ConstZero); |
817 | |
818 | MI.eraseFromParent(); |
819 | return true; |
820 | } |
821 | case TargetOpcode::G_VASTART: |
822 | return legalizeVAStart(MI, MIRBuilder); |
823 | case TargetOpcode::G_VSCALE: |
824 | return legalizeVScale(MI, MIB&: MIRBuilder); |
825 | case TargetOpcode::G_ZEXT: |
826 | case TargetOpcode::G_SEXT: |
827 | case TargetOpcode::G_ANYEXT: |
828 | return legalizeExt(MI, MIB&: MIRBuilder); |
829 | case TargetOpcode::G_SPLAT_VECTOR: |
830 | return legalizeSplatVector(MI, MIB&: MIRBuilder); |
831 | } |
832 | |
833 | llvm_unreachable("expected switch to return" ); |
834 | } |
835 | |