1//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for RISC-V.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "RISCVLegalizerInfo.h"
14#include "MCTargetDesc/RISCVMatInt.h"
15#include "RISCVMachineFunctionInfo.h"
16#include "RISCVSubtarget.h"
17#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
18#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
19#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
20#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
21#include "llvm/CodeGen/MachineConstantPool.h"
22#include "llvm/CodeGen/MachineJumpTableInfo.h"
23#include "llvm/CodeGen/MachineMemOperand.h"
24#include "llvm/CodeGen/MachineOperand.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/TargetOpcodes.h"
27#include "llvm/CodeGen/ValueTypes.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Type.h"
30
31using namespace llvm;
32using namespace LegalityPredicates;
33using namespace LegalizeMutations;
34
35static LegalityPredicate
36typeIsLegalIntOrFPVec(unsigned TypeIdx,
37 std::initializer_list<LLT> IntOrFPVecTys,
38 const RISCVSubtarget &ST) {
39 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
40 return ST.hasVInstructions() &&
41 (Query.Types[TypeIdx].getScalarSizeInBits() != 64 ||
42 ST.hasVInstructionsI64()) &&
43 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
44 ST.getELen() == 64);
45 };
46
47 return all(P0: typeInSet(TypeIdx, TypesInit: IntOrFPVecTys), P1: P);
48}
49
50static LegalityPredicate
51typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,
52 const RISCVSubtarget &ST) {
53 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
54 return ST.hasVInstructions() &&
55 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
56 ST.getELen() == 64);
57 };
58 return all(P0: typeInSet(TypeIdx, TypesInit: BoolVecTys), P1: P);
59}
60
61static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx,
62 std::initializer_list<LLT> PtrVecTys,
63 const RISCVSubtarget &ST) {
64 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
65 return ST.hasVInstructions() &&
66 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
67 ST.getELen() == 64) &&
68 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 16 ||
69 Query.Types[TypeIdx].getScalarSizeInBits() == 32);
70 };
71 return all(P0: typeInSet(TypeIdx, TypesInit: PtrVecTys), P1: P);
72}
73
74RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
75 : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(SizeInBits: XLen)) {
76 const LLT sDoubleXLen = LLT::scalar(SizeInBits: 2 * XLen);
77 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: XLen);
78 const LLT s1 = LLT::scalar(SizeInBits: 1);
79 const LLT s8 = LLT::scalar(SizeInBits: 8);
80 const LLT s16 = LLT::scalar(SizeInBits: 16);
81 const LLT s32 = LLT::scalar(SizeInBits: 32);
82 const LLT s64 = LLT::scalar(SizeInBits: 64);
83 const LLT s128 = LLT::scalar(SizeInBits: 128);
84
85 const LLT nxv1s1 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s1);
86 const LLT nxv2s1 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s1);
87 const LLT nxv4s1 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s1);
88 const LLT nxv8s1 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s1);
89 const LLT nxv16s1 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s1);
90 const LLT nxv32s1 = LLT::scalable_vector(MinNumElements: 32, ScalarTy: s1);
91 const LLT nxv64s1 = LLT::scalable_vector(MinNumElements: 64, ScalarTy: s1);
92
93 const LLT nxv1s8 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s8);
94 const LLT nxv2s8 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s8);
95 const LLT nxv4s8 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s8);
96 const LLT nxv8s8 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s8);
97 const LLT nxv16s8 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s8);
98 const LLT nxv32s8 = LLT::scalable_vector(MinNumElements: 32, ScalarTy: s8);
99 const LLT nxv64s8 = LLT::scalable_vector(MinNumElements: 64, ScalarTy: s8);
100
101 const LLT nxv1s16 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s16);
102 const LLT nxv2s16 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s16);
103 const LLT nxv4s16 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s16);
104 const LLT nxv8s16 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s16);
105 const LLT nxv16s16 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s16);
106 const LLT nxv32s16 = LLT::scalable_vector(MinNumElements: 32, ScalarTy: s16);
107
108 const LLT nxv1s32 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s32);
109 const LLT nxv2s32 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s32);
110 const LLT nxv4s32 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s32);
111 const LLT nxv8s32 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s32);
112 const LLT nxv16s32 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s32);
113
114 const LLT nxv1s64 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s64);
115 const LLT nxv2s64 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s64);
116 const LLT nxv4s64 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s64);
117 const LLT nxv8s64 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s64);
118
119 const LLT nxv1p0 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: p0);
120 const LLT nxv2p0 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: p0);
121 const LLT nxv4p0 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: p0);
122 const LLT nxv8p0 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: p0);
123 const LLT nxv16p0 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: p0);
124
125 using namespace TargetOpcode;
126
127 auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};
128
129 auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,
130 nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
131 nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
132 nxv1s64, nxv2s64, nxv4s64, nxv8s64};
133
134 auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0};
135
136 getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB})
137 .legalFor(Types: {sXLen})
138 .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST))
139 .customFor(Pred: ST.is64Bit(), Types: {s32})
140 .widenScalarToNextPow2(TypeIdx: 0)
141 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
142
143 getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR})
144 .legalFor(Types: {sXLen})
145 .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST))
146 .widenScalarToNextPow2(TypeIdx: 0)
147 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
148
149 getActionDefinitionsBuilder(
150 Opcodes: {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();
151
152 getActionDefinitionsBuilder(Opcodes: {G_SADDO, G_SSUBO}).minScalar(TypeIdx: 0, Ty: sXLen).lower();
153
154 // TODO: Use Vector Single-Width Saturating Instructions for vector types.
155 getActionDefinitionsBuilder(Opcodes: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
156 .lower();
157
158 getActionDefinitionsBuilder(Opcodes: {G_SHL, G_ASHR, G_LSHR})
159 .legalFor(Types: {{sXLen, sXLen}})
160 .customFor(Pred: ST.is64Bit(), Types: {{s32, s32}})
161 .widenScalarToNextPow2(TypeIdx: 0)
162 .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen)
163 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
164
165 getActionDefinitionsBuilder(Opcodes: {G_ZEXT, G_SEXT, G_ANYEXT})
166 .legalFor(Types: {{s32, s16}})
167 .legalFor(Pred: ST.is64Bit(), Types: {{s64, s16}, {s64, s32}})
168 .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
169 P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST)))
170 .customIf(Predicate: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST))
171 .maxScalar(TypeIdx: 0, Ty: sXLen);
172
173 getActionDefinitionsBuilder(Opcode: G_SEXT_INREG)
174 .customFor(Types: {sXLen})
175 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
176 .lower();
177
178 // Merge/Unmerge
179 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
180 auto &MergeUnmergeActions = getActionDefinitionsBuilder(Opcode: Op);
181 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
182 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
183 if (XLen == 32 && ST.hasStdExtD()) {
184 MergeUnmergeActions.legalIf(
185 Predicate: all(P0: typeIs(TypeIdx: BigTyIdx, TypesInit: s64), P1: typeIs(TypeIdx: LitTyIdx, TypesInit: s32)));
186 }
187 MergeUnmergeActions.widenScalarToNextPow2(TypeIdx: LitTyIdx, MinSize: XLen)
188 .widenScalarToNextPow2(TypeIdx: BigTyIdx, MinSize: XLen)
189 .clampScalar(TypeIdx: LitTyIdx, MinTy: sXLen, MaxTy: sXLen)
190 .clampScalar(TypeIdx: BigTyIdx, MinTy: sXLen, MaxTy: sXLen);
191 }
192
193 getActionDefinitionsBuilder(Opcodes: {G_FSHL, G_FSHR}).lower();
194
195 getActionDefinitionsBuilder(Opcodes: {G_ROTR, G_ROTL})
196 .legalFor(Pred: ST.hasStdExtZbb() || ST.hasStdExtZbkb(), Types: {{sXLen, sXLen}})
197 .customFor(Pred: ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()),
198 Types: {{s32, s32}})
199 .lower();
200
201 getActionDefinitionsBuilder(Opcode: G_BITREVERSE).maxScalar(TypeIdx: 0, Ty: sXLen).lower();
202
203 getActionDefinitionsBuilder(Opcode: G_BITCAST).legalIf(
204 Predicate: all(P0: LegalityPredicates::any(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
205 P1: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST)),
206 P1: LegalityPredicates::any(P0: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST),
207 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST))));
208
209 auto &BSWAPActions = getActionDefinitionsBuilder(Opcode: G_BSWAP);
210 if (ST.hasStdExtZbb() || ST.hasStdExtZbkb())
211 BSWAPActions.legalFor(Types: {sXLen}).clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
212 else
213 BSWAPActions.maxScalar(TypeIdx: 0, Ty: sXLen).lower();
214
215 auto &CountZerosActions = getActionDefinitionsBuilder(Opcodes: {G_CTLZ, G_CTTZ});
216 auto &CountZerosUndefActions =
217 getActionDefinitionsBuilder(Opcodes: {G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
218 if (ST.hasStdExtZbb()) {
219 CountZerosActions.legalFor(Types: {{sXLen, sXLen}})
220 .customFor(Types: {{s32, s32}})
221 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen)
222 .widenScalarToNextPow2(TypeIdx: 0)
223 .scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0);
224 } else {
225 CountZerosActions.maxScalar(TypeIdx: 0, Ty: sXLen).scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0).lower();
226 CountZerosUndefActions.maxScalar(TypeIdx: 0, Ty: sXLen).scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0);
227 }
228 CountZerosUndefActions.lower();
229
230 auto &CTPOPActions = getActionDefinitionsBuilder(Opcode: G_CTPOP);
231 if (ST.hasStdExtZbb()) {
232 CTPOPActions.legalFor(Types: {{sXLen, sXLen}})
233 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
234 .scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0);
235 } else {
236 CTPOPActions.maxScalar(TypeIdx: 0, Ty: sXLen).scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0).lower();
237 }
238
239 getActionDefinitionsBuilder(Opcode: G_CONSTANT)
240 .legalFor(Types: {p0})
241 .legalFor(Pred: !ST.is64Bit(), Types: {s32})
242 .customFor(Pred: ST.is64Bit(), Types: {s64})
243 .widenScalarToNextPow2(TypeIdx: 0)
244 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
245
246 // TODO: transform illegal vector types into legal vector type
247 getActionDefinitionsBuilder(Opcode: G_FREEZE)
248 .legalFor(Types: {s16, s32, p0})
249 .legalFor(Pred: ST.is64Bit(), Types: {s64})
250 .legalIf(Predicate: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST))
251 .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST))
252 .widenScalarToNextPow2(TypeIdx: 0)
253 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: sXLen);
254
255 // TODO: transform illegal vector types into legal vector type
256 // TODO: Merge with G_FREEZE?
257 getActionDefinitionsBuilder(
258 Opcodes: {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
259 .legalFor(Types: {s32, sXLen, p0})
260 .legalIf(Predicate: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST))
261 .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST))
262 .widenScalarToNextPow2(TypeIdx: 0)
263 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen);
264
265 getActionDefinitionsBuilder(Opcode: G_ICMP)
266 .legalFor(Types: {{sXLen, sXLen}, {sXLen, p0}})
267 .legalIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST),
268 P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST)))
269 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, MinSize: 8)
270 .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen)
271 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
272
273 getActionDefinitionsBuilder(Opcode: G_SELECT)
274 .legalFor(Types: {{s32, sXLen}, {p0, sXLen}})
275 .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
276 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST)))
277 .legalFor(Pred: XLen == 64 || ST.hasStdExtD(), Types: {{s64, sXLen}})
278 .widenScalarToNextPow2(TypeIdx: 0)
279 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
280 .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen);
281
282 auto &LoadActions = getActionDefinitionsBuilder(Opcode: G_LOAD);
283 auto &StoreActions = getActionDefinitionsBuilder(Opcode: G_STORE);
284 auto &ExtLoadActions = getActionDefinitionsBuilder(Opcodes: {G_SEXTLOAD, G_ZEXTLOAD});
285
286 // Return the alignment needed for scalar memory ops. If unaligned scalar mem
287 // is supported, we only require byte alignment. Otherwise, we need the memory
288 // op to be natively aligned.
289 auto getScalarMemAlign = [&ST](unsigned Size) {
290 return ST.enableUnalignedScalarMem() ? 8 : Size;
291 };
292
293 LoadActions.legalForTypesWithMemDesc(
294 TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
295 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
296 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
297 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
298 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)},
299 {.Type0: p0, .Type1: p0, .MemTy: sXLen, .Align: getScalarMemAlign(XLen)}});
300 StoreActions.legalForTypesWithMemDesc(
301 TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
302 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
303 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
304 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
305 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)},
306 {.Type0: p0, .Type1: p0, .MemTy: sXLen, .Align: getScalarMemAlign(XLen)}});
307 ExtLoadActions.legalForTypesWithMemDesc(
308 TypesAndMemDesc: {{.Type0: sXLen, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
309 {.Type0: sXLen, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)}});
310 if (XLen == 64) {
311 LoadActions.legalForTypesWithMemDesc(
312 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
313 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
314 {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)},
315 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign(64)}});
316 StoreActions.legalForTypesWithMemDesc(
317 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
318 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
319 {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)},
320 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign(64)}});
321 ExtLoadActions.legalForTypesWithMemDesc(
322 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)}});
323 } else if (ST.hasStdExtD()) {
324 LoadActions.legalForTypesWithMemDesc(
325 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign(64)}});
326 StoreActions.legalForTypesWithMemDesc(
327 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign(64)}});
328 }
329
330 // Vector loads/stores.
331 if (ST.hasVInstructions()) {
332 LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv2s8, .Type1: p0, .MemTy: nxv2s8, .Align: 8},
333 {.Type0: nxv4s8, .Type1: p0, .MemTy: nxv4s8, .Align: 8},
334 {.Type0: nxv8s8, .Type1: p0, .MemTy: nxv8s8, .Align: 8},
335 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
336 {.Type0: nxv32s8, .Type1: p0, .MemTy: nxv32s8, .Align: 8},
337 {.Type0: nxv64s8, .Type1: p0, .MemTy: nxv64s8, .Align: 8},
338 {.Type0: nxv2s16, .Type1: p0, .MemTy: nxv2s16, .Align: 16},
339 {.Type0: nxv4s16, .Type1: p0, .MemTy: nxv4s16, .Align: 16},
340 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 16},
341 {.Type0: nxv16s16, .Type1: p0, .MemTy: nxv16s16, .Align: 16},
342 {.Type0: nxv32s16, .Type1: p0, .MemTy: nxv32s16, .Align: 16},
343 {.Type0: nxv2s32, .Type1: p0, .MemTy: nxv2s32, .Align: 32},
344 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 32},
345 {.Type0: nxv8s32, .Type1: p0, .MemTy: nxv8s32, .Align: 32},
346 {.Type0: nxv16s32, .Type1: p0, .MemTy: nxv16s32, .Align: 32}});
347 StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv2s8, .Type1: p0, .MemTy: nxv2s8, .Align: 8},
348 {.Type0: nxv4s8, .Type1: p0, .MemTy: nxv4s8, .Align: 8},
349 {.Type0: nxv8s8, .Type1: p0, .MemTy: nxv8s8, .Align: 8},
350 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
351 {.Type0: nxv32s8, .Type1: p0, .MemTy: nxv32s8, .Align: 8},
352 {.Type0: nxv64s8, .Type1: p0, .MemTy: nxv64s8, .Align: 8},
353 {.Type0: nxv2s16, .Type1: p0, .MemTy: nxv2s16, .Align: 16},
354 {.Type0: nxv4s16, .Type1: p0, .MemTy: nxv4s16, .Align: 16},
355 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 16},
356 {.Type0: nxv16s16, .Type1: p0, .MemTy: nxv16s16, .Align: 16},
357 {.Type0: nxv32s16, .Type1: p0, .MemTy: nxv32s16, .Align: 16},
358 {.Type0: nxv2s32, .Type1: p0, .MemTy: nxv2s32, .Align: 32},
359 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 32},
360 {.Type0: nxv8s32, .Type1: p0, .MemTy: nxv8s32, .Align: 32},
361 {.Type0: nxv16s32, .Type1: p0, .MemTy: nxv16s32, .Align: 32}});
362
363 if (ST.getELen() == 64) {
364 LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s8, .Type1: p0, .MemTy: nxv1s8, .Align: 8},
365 {.Type0: nxv1s16, .Type1: p0, .MemTy: nxv1s16, .Align: 16},
366 {.Type0: nxv1s32, .Type1: p0, .MemTy: nxv1s32, .Align: 32}});
367 StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s8, .Type1: p0, .MemTy: nxv1s8, .Align: 8},
368 {.Type0: nxv1s16, .Type1: p0, .MemTy: nxv1s16, .Align: 16},
369 {.Type0: nxv1s32, .Type1: p0, .MemTy: nxv1s32, .Align: 32}});
370 }
371
372 if (ST.hasVInstructionsI64()) {
373 LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s64, .Type1: p0, .MemTy: nxv1s64, .Align: 64},
374 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 64},
375 {.Type0: nxv4s64, .Type1: p0, .MemTy: nxv4s64, .Align: 64},
376 {.Type0: nxv8s64, .Type1: p0, .MemTy: nxv8s64, .Align: 64}});
377 StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s64, .Type1: p0, .MemTy: nxv1s64, .Align: 64},
378 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 64},
379 {.Type0: nxv4s64, .Type1: p0, .MemTy: nxv4s64, .Align: 64},
380 {.Type0: nxv8s64, .Type1: p0, .MemTy: nxv8s64, .Align: 64}});
381 }
382
383 // we will take the custom lowering logic if we have scalable vector types
384 // with non-standard alignments
385 LoadActions.customIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST));
386 StoreActions.customIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST));
387
388 // Pointers require that XLen sized elements are legal.
389 if (XLen <= ST.getELen()) {
390 LoadActions.customIf(Predicate: typeIsLegalPtrVec(TypeIdx: 0, PtrVecTys, ST));
391 StoreActions.customIf(Predicate: typeIsLegalPtrVec(TypeIdx: 0, PtrVecTys, ST));
392 }
393 }
394
395 LoadActions.widenScalarToNextPow2(TypeIdx: 0, /* MinSize = */ 8)
396 .lowerIfMemSizeNotByteSizePow2()
397 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: sXLen)
398 .lower();
399 StoreActions
400 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: sXLen)
401 .lowerIfMemSizeNotByteSizePow2()
402 .lower();
403
404 ExtLoadActions.widenScalarToNextPow2(TypeIdx: 0).clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen).lower();
405
406 getActionDefinitionsBuilder(Opcodes: {G_PTR_ADD, G_PTRMASK}).legalFor(Types: {{p0, sXLen}});
407
408 getActionDefinitionsBuilder(Opcode: G_PTRTOINT)
409 .legalFor(Types: {{sXLen, p0}})
410 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
411
412 getActionDefinitionsBuilder(Opcode: G_INTTOPTR)
413 .legalFor(Types: {{p0, sXLen}})
414 .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen);
415
416 getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {sXLen}).minScalar(TypeIdx: 0, Ty: sXLen);
417
418 getActionDefinitionsBuilder(Opcode: G_BRJT).customFor(Types: {{p0, sXLen}});
419
420 getActionDefinitionsBuilder(Opcode: G_BRINDIRECT).legalFor(Types: {p0});
421
422 getActionDefinitionsBuilder(Opcode: G_PHI)
423 .legalFor(Types: {p0, s32, sXLen})
424 .widenScalarToNextPow2(TypeIdx: 0)
425 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen);
426
427 getActionDefinitionsBuilder(Opcodes: {G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
428 .legalFor(Types: {p0});
429
430 if (ST.hasStdExtZmmul()) {
431 getActionDefinitionsBuilder(Opcode: G_MUL)
432 .legalFor(Types: {sXLen})
433 .widenScalarToNextPow2(TypeIdx: 0)
434 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
435
436 // clang-format off
437 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH})
438 .legalFor(Types: {sXLen})
439 .lower();
440 // clang-format on
441
442 getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO}).minScalar(TypeIdx: 0, Ty: sXLen).lower();
443 } else {
444 getActionDefinitionsBuilder(Opcode: G_MUL)
445 .libcallFor(Types: {sXLen, sDoubleXLen})
446 .widenScalarToNextPow2(TypeIdx: 0)
447 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sDoubleXLen);
448
449 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH}).lowerFor(Types: {sXLen});
450
451 getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO})
452 .minScalar(TypeIdx: 0, Ty: sXLen)
453 // Widen sXLen to sDoubleXLen so we can use a single libcall to get
454 // the low bits for the mul result and high bits to do the overflow
455 // check.
456 .widenScalarIf(Predicate: typeIs(TypeIdx: 0, TypesInit: sXLen),
457 Mutation: LegalizeMutations::changeTo(TypeIdx: 0, Ty: sDoubleXLen))
458 .lower();
459 }
460
461 if (ST.hasStdExtM()) {
462 getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV, G_UREM})
463 .legalFor(Types: {sXLen})
464 .customFor(Types: {s32})
465 .libcallFor(Types: {sDoubleXLen})
466 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sDoubleXLen)
467 .widenScalarToNextPow2(TypeIdx: 0);
468 getActionDefinitionsBuilder(Opcode: G_SREM)
469 .legalFor(Types: {sXLen})
470 .libcallFor(Types: {sDoubleXLen})
471 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sDoubleXLen)
472 .widenScalarToNextPow2(TypeIdx: 0);
473 } else {
474 getActionDefinitionsBuilder(Opcodes: {G_UDIV, G_SDIV, G_UREM, G_SREM})
475 .libcallFor(Types: {sXLen, sDoubleXLen})
476 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sDoubleXLen)
477 .widenScalarToNextPow2(TypeIdx: 0);
478 }
479
480 // TODO: Use libcall for sDoubleXLen.
481 getActionDefinitionsBuilder(Opcodes: {G_SDIVREM, G_UDIVREM}).lower();
482
483 getActionDefinitionsBuilder(Opcode: G_ABS)
484 .customFor(Pred: ST.hasStdExtZbb(), Types: {sXLen})
485 .minScalar(Pred: ST.hasStdExtZbb(), TypeIdx: 0, Ty: sXLen)
486 .lower();
487
488 getActionDefinitionsBuilder(Opcodes: {G_UMAX, G_UMIN, G_SMAX, G_SMIN})
489 .legalFor(Pred: ST.hasStdExtZbb(), Types: {sXLen})
490 .minScalar(Pred: ST.hasStdExtZbb(), TypeIdx: 0, Ty: sXLen)
491 .lower();
492
493 getActionDefinitionsBuilder(Opcodes: {G_SCMP, G_UCMP}).lower();
494
495 getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0});
496
497 getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
498
499 getActionDefinitionsBuilder(Opcodes: {G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
500 .lower();
501
502 // FP Operations
503
504 // FIXME: Support s128 for rv32 when libcall handling is able to use sret.
505 getActionDefinitionsBuilder(
506 Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
507 .legalFor(Pred: ST.hasStdExtF(), Types: {s32})
508 .legalFor(Pred: ST.hasStdExtD(), Types: {s64})
509 .legalFor(Pred: ST.hasStdExtZfh(), Types: {s16})
510 .libcallFor(Types: {s32, s64})
511 .libcallFor(Pred: ST.is64Bit(), Types: {s128});
512
513 getActionDefinitionsBuilder(Opcodes: {G_FNEG, G_FABS})
514 .legalFor(Pred: ST.hasStdExtF(), Types: {s32})
515 .legalFor(Pred: ST.hasStdExtD(), Types: {s64})
516 .legalFor(Pred: ST.hasStdExtZfh(), Types: {s16})
517 .lowerFor(Types: {s32, s64, s128});
518
519 getActionDefinitionsBuilder(Opcode: G_FREM)
520 .libcallFor(Types: {s32, s64})
521 .libcallFor(Pred: ST.is64Bit(), Types: {s128})
522 .minScalar(TypeIdx: 0, Ty: s32)
523 .scalarize(TypeIdx: 0);
524
525 getActionDefinitionsBuilder(Opcode: G_FCOPYSIGN)
526 .legalFor(Pred: ST.hasStdExtF(), Types: {{s32, s32}})
527 .legalFor(Pred: ST.hasStdExtD(), Types: {{s64, s64}, {s32, s64}, {s64, s32}})
528 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{s16, s16}, {s16, s32}, {s32, s16}})
529 .legalFor(Pred: ST.hasStdExtZfh() && ST.hasStdExtD(), Types: {{s16, s64}, {s64, s16}})
530 .lower();
531
532 // FIXME: Use Zfhmin.
533 getActionDefinitionsBuilder(Opcode: G_FPTRUNC)
534 .legalFor(Pred: ST.hasStdExtD(), Types: {{s32, s64}})
535 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{s16, s32}})
536 .legalFor(Pred: ST.hasStdExtZfh() && ST.hasStdExtD(), Types: {{s16, s64}})
537 .libcallFor(Types: {{s32, s64}})
538 .libcallFor(Pred: ST.is64Bit(), Types: {{s32, s128}, {s64, s128}});
539 getActionDefinitionsBuilder(Opcode: G_FPEXT)
540 .legalFor(Pred: ST.hasStdExtD(), Types: {{s64, s32}})
541 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{s32, s16}})
542 .legalFor(Pred: ST.hasStdExtZfh() && ST.hasStdExtD(), Types: {{s64, s16}})
543 .libcallFor(Types: {{s64, s32}})
544 .libcallFor(Pred: ST.is64Bit(), Types: {{s128, s32}, {s128, s64}});
545
546 getActionDefinitionsBuilder(Opcode: G_FCMP)
547 .legalFor(Pred: ST.hasStdExtF(), Types: {{sXLen, s32}})
548 .legalFor(Pred: ST.hasStdExtD(), Types: {{sXLen, s64}})
549 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{sXLen, s16}})
550 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
551 .libcallFor(Types: {{sXLen, s32}, {sXLen, s64}})
552 .libcallFor(Pred: ST.is64Bit(), Types: {{sXLen, s128}});
553
554 // TODO: Support vector version of G_IS_FPCLASS.
555 getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS)
556 .customFor(Pred: ST.hasStdExtF(), Types: {{s1, s32}})
557 .customFor(Pred: ST.hasStdExtD(), Types: {{s1, s64}})
558 .customFor(Pred: ST.hasStdExtZfh(), Types: {{s1, s16}})
559 .lowerFor(Types: {{s1, s32}, {s1, s64}});
560
561 getActionDefinitionsBuilder(Opcode: G_FCONSTANT)
562 .legalFor(Pred: ST.hasStdExtF(), Types: {s32})
563 .legalFor(Pred: ST.hasStdExtD(), Types: {s64})
564 .legalFor(Pred: ST.hasStdExtZfh(), Types: {s16})
565 .lowerFor(Types: {s32, s64, s128});
566
567 getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI})
568 .legalFor(Pred: ST.hasStdExtF(), Types: {{sXLen, s32}})
569 .legalFor(Pred: ST.hasStdExtD(), Types: {{sXLen, s64}})
570 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{sXLen, s16}})
571 .customFor(Pred: ST.is64Bit() && ST.hasStdExtF(), Types: {{s32, s32}})
572 .customFor(Pred: ST.is64Bit() && ST.hasStdExtD(), Types: {{s32, s64}})
573 .customFor(Pred: ST.is64Bit() && ST.hasStdExtZfh(), Types: {{s32, s16}})
574 .widenScalarToNextPow2(TypeIdx: 0)
575 .minScalar(TypeIdx: 0, Ty: s32)
576 .libcallFor(Types: {{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
577 .libcallFor(Pred: ST.is64Bit(), Types: {{s32, s128}, {s64, s128}}) // FIXME RV32.
578 .libcallFor(Pred: ST.is64Bit(), Types: {{s128, s32}, {s128, s64}, {s128, s128}});
579
580 getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP})
581 .legalFor(Pred: ST.hasStdExtF(), Types: {{s32, sXLen}})
582 .legalFor(Pred: ST.hasStdExtD(), Types: {{s64, sXLen}})
583 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{s16, sXLen}})
584 .widenScalarToNextPow2(TypeIdx: 1)
585 // Promote to XLen if the operation is legal.
586 .widenScalarIf(
587 Predicate: [=, &ST](const LegalityQuery &Query) {
588 return Query.Types[0].isScalar() && Query.Types[1].isScalar() &&
589 (Query.Types[1].getSizeInBits() < ST.getXLen()) &&
590 ((ST.hasStdExtF() && Query.Types[0].getSizeInBits() == 32) ||
591 (ST.hasStdExtD() && Query.Types[0].getSizeInBits() == 64) ||
592 (ST.hasStdExtZfh() &&
593 Query.Types[0].getSizeInBits() == 16));
594 },
595 Mutation: LegalizeMutations::changeTo(TypeIdx: 1, Ty: sXLen))
596 // Otherwise only promote to s32 since we have si libcalls.
597 .minScalar(TypeIdx: 1, Ty: s32)
598 .libcallFor(Types: {{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
599 .libcallFor(Pred: ST.is64Bit(), Types: {{s128, s32}, {s128, s64}}) // FIXME RV32.
600 .libcallFor(Pred: ST.is64Bit(), Types: {{s32, s128}, {s64, s128}, {s128, s128}});
601
602 // FIXME: We can do custom inline expansion like SelectionDAG.
603 getActionDefinitionsBuilder(Opcodes: {G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
604 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
605 G_INTRINSIC_ROUNDEVEN})
606 .legalFor(Pred: ST.hasStdExtZfa(), Types: {s32})
607 .legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtD(), Types: {s64})
608 .legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtZfh(), Types: {s16})
609 .libcallFor(Types: {s32, s64})
610 .libcallFor(Pred: ST.is64Bit(), Types: {s128});
611
612 getActionDefinitionsBuilder(Opcodes: {G_FMAXIMUM, G_FMINIMUM})
613 .legalFor(Pred: ST.hasStdExtZfa(), Types: {s32})
614 .legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtD(), Types: {s64})
615 .legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtZfh(), Types: {s16});
616
617 getActionDefinitionsBuilder(Opcodes: {G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
618 G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
619 G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
620 G_FTANH})
621 .libcallFor(Types: {s32, s64})
622 .libcallFor(Pred: ST.is64Bit(), Types: {s128});
623 getActionDefinitionsBuilder(Opcodes: {G_FPOWI, G_FLDEXP})
624 .libcallFor(Types: {{s32, s32}, {s64, s32}})
625 .libcallFor(Pred: ST.is64Bit(), Types: {s128, s32});
626
627 getActionDefinitionsBuilder(Opcode: G_VASTART).customFor(Types: {p0});
628
629 // va_list must be a pointer, but most sized types are pretty easy to handle
630 // as the destination.
631 getActionDefinitionsBuilder(Opcode: G_VAARG)
632 // TODO: Implement narrowScalar and widenScalar for G_VAARG for types
633 // other than sXLen.
634 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
635 .lowerForCartesianProduct(Types0: {sXLen, p0}, Types1: {p0});
636
637 getActionDefinitionsBuilder(Opcode: G_VSCALE)
638 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
639 .customFor(Types: {sXLen});
640
641 auto &SplatActions =
642 getActionDefinitionsBuilder(Opcode: G_SPLAT_VECTOR)
643 .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
644 P1: typeIs(TypeIdx: 1, TypesInit: sXLen)))
645 .customIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST), P1: typeIs(TypeIdx: 1, TypesInit: s1)));
646 // Handle case of s64 element vectors on RV32. If the subtarget does not have
647 // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget
648 // does have f64, then we don't know whether the type is an f64 or an i64,
649 // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,
650 // depending on how the instructions it consumes are legalized. They are not
651 // legalized yet since legalization is in reverse postorder, so we cannot
652 // make the decision at this moment.
653 if (XLen == 32) {
654 if (ST.hasVInstructionsF64() && ST.hasStdExtD())
655 SplatActions.legalIf(Predicate: all(
656 P0: typeInSet(TypeIdx: 0, TypesInit: {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), P1: typeIs(TypeIdx: 1, TypesInit: s64)));
657 else if (ST.hasVInstructionsI64())
658 SplatActions.customIf(Predicate: all(
659 P0: typeInSet(TypeIdx: 0, TypesInit: {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), P1: typeIs(TypeIdx: 1, TypesInit: s64)));
660 }
661
662 SplatActions.clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen);
663
664 LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
665 LLT DstTy = Query.Types[0];
666 LLT SrcTy = Query.Types[1];
667 return DstTy.getElementType() == LLT::scalar(SizeInBits: 1) &&
668 DstTy.getElementCount().getKnownMinValue() >= 8 &&
669 SrcTy.getElementCount().getKnownMinValue() >= 8;
670 };
671 getActionDefinitionsBuilder(Opcode: G_EXTRACT_SUBVECTOR)
672 // We don't have the ability to slide mask vectors down indexed by their
673 // i1 elements; the smallest we can do is i8. Often we are able to bitcast
674 // to equivalent i8 vectors.
675 .bitcastIf(
676 Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST),
677 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST), args: ExtractSubvecBitcastPred),
678 Mutation: [=](const LegalityQuery &Query) {
679 LLT CastTy = LLT::vector(
680 EC: Query.Types[0].getElementCount().divideCoefficientBy(RHS: 8), ScalarSizeInBits: 8);
681 return std::pair(0, CastTy);
682 })
683 .customIf(Predicate: LegalityPredicates::any(
684 P0: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST),
685 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST)),
686 P1: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
687 P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST))));
688
689 getActionDefinitionsBuilder(Opcode: G_INSERT_SUBVECTOR)
690 .customIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST),
691 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST)))
692 .customIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
693 P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST)));
694
695 getLegacyLegalizerInfo().computeTables();
696 verify(MII: *ST.getInstrInfo());
697}
698
699bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
700 MachineInstr &MI) const {
701 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
702 switch (IntrinsicID) {
703 default:
704 return false;
705 case Intrinsic::vacopy: {
706 // vacopy arguments must be legal because of the intrinsic signature.
707 // No need to check here.
708
709 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
710 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
711 MachineFunction &MF = *MI.getMF();
712 const DataLayout &DL = MIRBuilder.getDataLayout();
713 LLVMContext &Ctx = MF.getFunction().getContext();
714
715 Register DstLst = MI.getOperand(i: 1).getReg();
716 LLT PtrTy = MRI.getType(Reg: DstLst);
717
718 // Load the source va_list
719 Align Alignment = DL.getABITypeAlign(Ty: getTypeForLLT(Ty: PtrTy, C&: Ctx));
720 MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
721 PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad, MemTy: PtrTy, base_alignment: Alignment);
722 auto Tmp = MIRBuilder.buildLoad(Res: PtrTy, Addr: MI.getOperand(i: 2), MMO&: *LoadMMO);
723
724 // Store the result in the destination va_list
725 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
726 PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOStore, MemTy: PtrTy, base_alignment: Alignment);
727 MIRBuilder.buildStore(Val: Tmp, Addr: DstLst, MMO&: *StoreMMO);
728
729 MI.eraseFromParent();
730 return true;
731 }
732 }
733}
734
735bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
736 MachineIRBuilder &MIRBuilder) const {
737 // Stores the address of the VarArgsFrameIndex slot into the memory location
738 assert(MI.getOpcode() == TargetOpcode::G_VASTART);
739 MachineFunction *MF = MI.getParent()->getParent();
740 RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>();
741 int FI = FuncInfo->getVarArgsFrameIndex();
742 LLT AddrTy = MIRBuilder.getMRI()->getType(Reg: MI.getOperand(i: 0).getReg());
743 auto FINAddr = MIRBuilder.buildFrameIndex(Res: AddrTy, Idx: FI);
744 assert(MI.hasOneMemOperand());
745 MIRBuilder.buildStore(Val: FINAddr, Addr: MI.getOperand(i: 0).getReg(),
746 MMO&: *MI.memoperands()[0]);
747 MI.eraseFromParent();
748 return true;
749}
750
751bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI,
752 MachineIRBuilder &MIRBuilder) const {
753 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
754 auto &MF = *MI.getParent()->getParent();
755 const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
756 unsigned EntrySize = MJTI->getEntrySize(TD: MF.getDataLayout());
757
758 Register PtrReg = MI.getOperand(i: 0).getReg();
759 LLT PtrTy = MRI.getType(Reg: PtrReg);
760 Register IndexReg = MI.getOperand(i: 2).getReg();
761 LLT IndexTy = MRI.getType(Reg: IndexReg);
762
763 if (!isPowerOf2_32(Value: EntrySize))
764 return false;
765
766 auto ShiftAmt = MIRBuilder.buildConstant(Res: IndexTy, Val: Log2_32(Value: EntrySize));
767 IndexReg = MIRBuilder.buildShl(Dst: IndexTy, Src0: IndexReg, Src1: ShiftAmt).getReg(Idx: 0);
768
769 auto Addr = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: PtrReg, Op1: IndexReg);
770
771 MachineMemOperand *MMO = MF.getMachineMemOperand(
772 PtrInfo: MachinePointerInfo::getJumpTable(MF), F: MachineMemOperand::MOLoad,
773 Size: EntrySize, BaseAlignment: Align(MJTI->getEntryAlignment(TD: MF.getDataLayout())));
774
775 Register TargetReg;
776 switch (MJTI->getEntryKind()) {
777 default:
778 return false;
779 case MachineJumpTableInfo::EK_LabelDifference32: {
780 // For PIC, the sequence is:
781 // BRIND(load(Jumptable + index) + RelocBase)
782 // RelocBase can be JumpTable, GOT or some sort of global base.
783 unsigned LoadOpc =
784 STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD;
785 auto Load = MIRBuilder.buildLoadInstr(Opcode: LoadOpc, Res: IndexTy, Addr, MMO&: *MMO);
786 TargetReg = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: PtrReg, Op1: Load).getReg(Idx: 0);
787 break;
788 }
789 case MachineJumpTableInfo::EK_Custom32: {
790 auto Load = MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_SEXTLOAD, Res: IndexTy,
791 Addr, MMO&: *MMO);
792 TargetReg = MIRBuilder.buildIntToPtr(Dst: PtrTy, Src: Load).getReg(Idx: 0);
793 break;
794 }
795 case MachineJumpTableInfo::EK_BlockAddress:
796 TargetReg = MIRBuilder.buildLoad(Res: PtrTy, Addr, MMO&: *MMO).getReg(Idx: 0);
797 break;
798 }
799
800 MIRBuilder.buildBrIndirect(Tgt: TargetReg);
801
802 MI.eraseFromParent();
803 return true;
804}
805
806bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm,
807 bool ShouldOptForSize) const {
808 assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64);
809 int64_t Imm = APImm.getSExtValue();
810 // All simm32 constants should be handled by isel.
811 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
812 // this check redundant, but small immediates are common so this check
813 // should have better compile time.
814 if (isInt<32>(x: Imm))
815 return false;
816
817 // We only need to cost the immediate, if constant pool lowering is enabled.
818 if (!STI.useConstantPoolForLargeInts())
819 return false;
820
821 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI);
822 if (Seq.size() <= STI.getMaxBuildIntsCost())
823 return false;
824
825 // Optimizations below are disabled for opt size. If we're optimizing for
826 // size, use a constant pool.
827 if (ShouldOptForSize)
828 return true;
829 //
830 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
831 // that if it will avoid a constant pool.
832 // It will require an extra temporary register though.
833 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
834 // low and high 32 bits are the same and bit 31 and 63 are set.
835 unsigned ShiftAmt, AddOpc;
836 RISCVMatInt::InstSeq SeqLo =
837 RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI, ShiftAmt, AddOpc);
838 return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
839}
840
841bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
842 MachineIRBuilder &MIB) const {
843 const LLT XLenTy(STI.getXLenVT());
844 Register Dst = MI.getOperand(i: 0).getReg();
845
846 // We define our scalable vector types for lmul=1 to use a 64 bit known
847 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
848 // vscale as VLENB / 8.
849 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
850 if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
851 // Support for VLEN==32 is incomplete.
852 return false;
853
854 // We assume VLENB is a multiple of 8. We manually choose the best shift
855 // here because SimplifyDemandedBits isn't always able to simplify it.
856 uint64_t Val = MI.getOperand(i: 1).getCImm()->getZExtValue();
857 if (isPowerOf2_64(Value: Val)) {
858 uint64_t Log2 = Log2_64(Value: Val);
859 if (Log2 < 3) {
860 auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {});
861 MIB.buildLShr(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: 3 - Log2));
862 } else if (Log2 > 3) {
863 auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {});
864 MIB.buildShl(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: Log2 - 3));
865 } else {
866 MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {Dst}, SrcOps: {});
867 }
868 } else if ((Val % 8) == 0) {
869 // If the multiplier is a multiple of 8, scale it down to avoid needing
870 // to shift the VLENB value.
871 auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {});
872 MIB.buildMul(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: Val / 8));
873 } else {
874 auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {});
875 auto VScale = MIB.buildLShr(Dst: XLenTy, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: 3));
876 MIB.buildMul(Dst, Src0: VScale, Src1: MIB.buildConstant(Res: XLenTy, Val));
877 }
878 MI.eraseFromParent();
879 return true;
880}
881
882// Custom-lower extensions from mask vectors by using a vselect either with 1
883// for zero/any-extension or -1 for sign-extension:
884// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
885// Note that any-extension is lowered identically to zero-extension.
886bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,
887 MachineIRBuilder &MIB) const {
888
889 unsigned Opc = MI.getOpcode();
890 assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT ||
891 Opc == TargetOpcode::G_ANYEXT);
892
893 MachineRegisterInfo &MRI = *MIB.getMRI();
894 Register Dst = MI.getOperand(i: 0).getReg();
895 Register Src = MI.getOperand(i: 1).getReg();
896
897 LLT DstTy = MRI.getType(Reg: Dst);
898 int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1;
899 LLT DstEltTy = DstTy.getElementType();
900 auto SplatZero = MIB.buildSplatVector(Res: DstTy, Val: MIB.buildConstant(Res: DstEltTy, Val: 0));
901 auto SplatTrue =
902 MIB.buildSplatVector(Res: DstTy, Val: MIB.buildConstant(Res: DstEltTy, Val: ExtTrueVal));
903 MIB.buildSelect(Res: Dst, Tst: Src, Op0: SplatTrue, Op1: SplatZero);
904
905 MI.eraseFromParent();
906 return true;
907}
908
909bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI,
910 LegalizerHelper &Helper,
911 MachineIRBuilder &MIB) const {
912 assert((isa<GLoad>(MI) || isa<GStore>(MI)) &&
913 "Machine instructions must be Load/Store.");
914 MachineRegisterInfo &MRI = *MIB.getMRI();
915 MachineFunction *MF = MI.getMF();
916 const DataLayout &DL = MIB.getDataLayout();
917 LLVMContext &Ctx = MF->getFunction().getContext();
918
919 Register DstReg = MI.getOperand(i: 0).getReg();
920 LLT DataTy = MRI.getType(Reg: DstReg);
921 if (!DataTy.isVector())
922 return false;
923
924 if (!MI.hasOneMemOperand())
925 return false;
926
927 MachineMemOperand *MMO = *MI.memoperands_begin();
928
929 const auto *TLI = STI.getTargetLowering();
930 EVT VT = EVT::getEVT(Ty: getTypeForLLT(Ty: DataTy, C&: Ctx));
931
932 if (TLI->allowsMemoryAccessForAlignment(Context&: Ctx, DL, VT, MMO: *MMO))
933 return true;
934
935 unsigned EltSizeBits = DataTy.getScalarSizeInBits();
936 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
937 "Unexpected unaligned RVV load type");
938
939 // Calculate the new vector type with i8 elements
940 unsigned NumElements =
941 DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8);
942 LLT NewDataTy = LLT::scalable_vector(MinNumElements: NumElements, ScalarSizeInBits: 8);
943
944 Helper.bitcast(MI, TypeIdx: 0, Ty: NewDataTy);
945
946 return true;
947}
948
949/// Return the type of the mask type suitable for masking the provided
950/// vector type. This is simply an i1 element type vector of the same
951/// (possibly scalable) length.
952static LLT getMaskTypeFor(LLT VecTy) {
953 assert(VecTy.isVector());
954 ElementCount EC = VecTy.getElementCount();
955 return LLT::vector(EC, ScalarTy: LLT::scalar(SizeInBits: 1));
956}
957
958/// Creates an all ones mask suitable for masking a vector of type VecTy with
959/// vector length VL.
960static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
961 MachineIRBuilder &MIB,
962 MachineRegisterInfo &MRI) {
963 LLT MaskTy = getMaskTypeFor(VecTy);
964 return MIB.buildInstr(Opc: RISCV::G_VMSET_VL, DstOps: {MaskTy}, SrcOps: {VL});
965}
966
967/// Gets the two common "VL" operands: an all-ones mask and the vector length.
968/// VecTy is a scalable vector type.
969static std::pair<MachineInstrBuilder, MachineInstrBuilder>
970buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
971 assert(VecTy.isScalableVector() && "Expecting scalable container type");
972 const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>();
973 LLT XLenTy(STI.getXLenVT());
974 auto VL = MIB.buildConstant(Res: XLenTy, Val: -1);
975 auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
976 return {Mask, VL};
977}
978
979static MachineInstrBuilder
980buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,
981 Register Hi, const SrcOp &VL, MachineIRBuilder &MIB,
982 MachineRegisterInfo &MRI) {
983 // TODO: If the Hi bits of the splat are undefined, then it's fine to just
984 // splat Lo even if it might be sign extended. I don't think we have
985 // introduced a case where we're build a s64 where the upper bits are undef
986 // yet.
987
988 // Fall back to a stack store and stride x0 vector load.
989 // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in
990 // preprocessDAG in SDAG.
991 return MIB.buildInstr(Opc: RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, DstOps: {Dst},
992 SrcOps: {Passthru, Lo, Hi, VL});
993}
994
995static MachineInstrBuilder
996buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
997 const SrcOp &Scalar, const SrcOp &VL,
998 MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
999 assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!");
1000 auto Unmerge = MIB.buildUnmerge(Res: LLT::scalar(SizeInBits: 32), Op: Scalar);
1001 return buildSplatPartsS64WithVL(Dst, Passthru, Lo: Unmerge.getReg(Idx: 0),
1002 Hi: Unmerge.getReg(Idx: 1), VL, MIB, MRI);
1003}
1004
1005// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
1006// legal equivalently-sized i8 type, so we can use that as a go-between.
1007// Splats of s1 types that have constant value can be legalized as VMSET_VL or
1008// VMCLR_VL.
1009bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
1010 MachineIRBuilder &MIB) const {
1011 assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);
1012
1013 MachineRegisterInfo &MRI = *MIB.getMRI();
1014
1015 Register Dst = MI.getOperand(i: 0).getReg();
1016 Register SplatVal = MI.getOperand(i: 1).getReg();
1017
1018 LLT VecTy = MRI.getType(Reg: Dst);
1019 LLT XLenTy(STI.getXLenVT());
1020
1021 // Handle case of s64 element vectors on rv32
1022 if (XLenTy.getSizeInBits() == 32 &&
1023 VecTy.getElementType().getSizeInBits() == 64) {
1024 auto [_, VL] = buildDefaultVLOps(VecTy: MRI.getType(Reg: Dst), MIB, MRI);
1025 buildSplatSplitS64WithVL(Dst, Passthru: MIB.buildUndef(Res: VecTy), Scalar: SplatVal, VL, MIB,
1026 MRI);
1027 MI.eraseFromParent();
1028 return true;
1029 }
1030
1031 // All-zeros or all-ones splats are handled specially.
1032 MachineInstr &SplatValMI = *MRI.getVRegDef(Reg: SplatVal);
1033 if (isAllOnesOrAllOnesSplat(MI: SplatValMI, MRI)) {
1034 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1035 MIB.buildInstr(Opc: RISCV::G_VMSET_VL, DstOps: {Dst}, SrcOps: {VL});
1036 MI.eraseFromParent();
1037 return true;
1038 }
1039 if (isNullOrNullSplat(MI: SplatValMI, MRI)) {
1040 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1041 MIB.buildInstr(Opc: RISCV::G_VMCLR_VL, DstOps: {Dst}, SrcOps: {VL});
1042 MI.eraseFromParent();
1043 return true;
1044 }
1045
1046 // Handle non-constant mask splat (i.e. not sure if it's all zeros or all
1047 // ones) by promoting it to an s8 splat.
1048 LLT InterEltTy = LLT::scalar(SizeInBits: 8);
1049 LLT InterTy = VecTy.changeElementType(NewEltTy: InterEltTy);
1050 auto ZExtSplatVal = MIB.buildZExt(Res: InterEltTy, Op: SplatVal);
1051 auto And =
1052 MIB.buildAnd(Dst: InterEltTy, Src0: ZExtSplatVal, Src1: MIB.buildConstant(Res: InterEltTy, Val: 1));
1053 auto LHS = MIB.buildSplatVector(Res: InterTy, Val: And);
1054 auto ZeroSplat =
1055 MIB.buildSplatVector(Res: InterTy, Val: MIB.buildConstant(Res: InterEltTy, Val: 0));
1056 MIB.buildICmp(Pred: CmpInst::Predicate::ICMP_NE, Res: Dst, Op0: LHS, Op1: ZeroSplat);
1057 MI.eraseFromParent();
1058 return true;
1059}
1060
1061static LLT getLMUL1Ty(LLT VecTy) {
1062 assert(VecTy.getElementType().getSizeInBits() <= 64 &&
1063 "Unexpected vector LLT");
1064 return LLT::scalable_vector(MinNumElements: RISCV::RVVBitsPerBlock /
1065 VecTy.getElementType().getSizeInBits(),
1066 ScalarTy: VecTy.getElementType());
1067}
1068
1069bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
1070 MachineIRBuilder &MIB) const {
1071 GExtractSubvector &ES = cast<GExtractSubvector>(Val&: MI);
1072
1073 MachineRegisterInfo &MRI = *MIB.getMRI();
1074
1075 Register Dst = ES.getReg(Idx: 0);
1076 Register Src = ES.getSrcVec();
1077 uint64_t Idx = ES.getIndexImm();
1078
1079 // With an index of 0 this is a cast-like subvector, which can be performed
1080 // with subregister operations.
1081 if (Idx == 0)
1082 return true;
1083
1084 LLT LitTy = MRI.getType(Reg: Dst);
1085 LLT BigTy = MRI.getType(Reg: Src);
1086
1087 if (LitTy.getElementType() == LLT::scalar(SizeInBits: 1)) {
1088 // We can't slide this mask vector up indexed by its i1 elements.
1089 // This poses a problem when we wish to insert a scalable vector which
1090 // can't be re-expressed as a larger type. Just choose the slow path and
1091 // extend to a larger type, then truncate back down.
1092 LLT ExtBigTy = BigTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 8));
1093 LLT ExtLitTy = LitTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 8));
1094 auto BigZExt = MIB.buildZExt(Res: ExtBigTy, Op: Src);
1095 auto ExtractZExt = MIB.buildExtractSubvector(Res: ExtLitTy, Src: BigZExt, Index: Idx);
1096 auto SplatZero = MIB.buildSplatVector(
1097 Res: ExtLitTy, Val: MIB.buildConstant(Res: ExtLitTy.getElementType(), Val: 0));
1098 MIB.buildICmp(Pred: CmpInst::Predicate::ICMP_NE, Res: Dst, Op0: ExtractZExt, Op1: SplatZero);
1099 MI.eraseFromParent();
1100 return true;
1101 }
1102
1103 // extract_subvector scales the index by vscale if the subvector is scalable,
1104 // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
1105 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1106 MVT LitTyMVT = getMVTForLLT(Ty: LitTy);
1107 auto Decompose =
1108 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1109 VecVT: getMVTForLLT(Ty: BigTy), SubVecVT: LitTyMVT, InsertExtractIdx: Idx, TRI);
1110 unsigned RemIdx = Decompose.second;
1111
1112 // If the Idx has been completely eliminated then this is a subvector extract
1113 // which naturally aligns to a vector register. These can easily be handled
1114 // using subregister manipulation.
1115 if (RemIdx == 0)
1116 return true;
1117
1118 // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
1119 // was > M1 then the index would need to be a multiple of VLMAX, and so would
1120 // divide exactly.
1121 assert(
1122 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(LitTyMVT)).second ||
1123 RISCVTargetLowering::getLMUL(LitTyMVT) == RISCVVType::LMUL_1);
1124
1125 // If the vector type is an LMUL-group type, extract a subvector equal to the
1126 // nearest full vector register type.
1127 LLT InterLitTy = BigTy;
1128 Register Vec = Src;
1129 if (TypeSize::isKnownGT(LHS: BigTy.getSizeInBits(),
1130 RHS: getLMUL1Ty(VecTy: BigTy).getSizeInBits())) {
1131 // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
1132 // we should have successfully decomposed the extract into a subregister.
1133 assert(Decompose.first != RISCV::NoSubRegister);
1134 InterLitTy = getLMUL1Ty(VecTy: BigTy);
1135 // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
1136 // specified on the source Register (the equivalent) since generic virtual
1137 // register does not allow subregister index.
1138 Vec = MIB.buildExtractSubvector(Res: InterLitTy, Src, Index: Idx - RemIdx).getReg(Idx: 0);
1139 }
1140
1141 // Slide this vector register down by the desired number of elements in order
1142 // to place the desired subvector starting at element 0.
1143 const LLT XLenTy(STI.getXLenVT());
1144 auto SlidedownAmt = MIB.buildVScale(Res: XLenTy, MinElts: RemIdx);
1145 auto [Mask, VL] = buildDefaultVLOps(VecTy: LitTy, MIB, MRI);
1146 uint64_t Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
1147 auto Slidedown = MIB.buildInstr(
1148 Opc: RISCV::G_VSLIDEDOWN_VL, DstOps: {InterLitTy},
1149 SrcOps: {MIB.buildUndef(Res: InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
1150
1151 // Now the vector is in the right position, extract our final subvector. This
1152 // should resolve to a COPY.
1153 MIB.buildExtractSubvector(Res: Dst, Src: Slidedown, Index: 0);
1154
1155 MI.eraseFromParent();
1156 return true;
1157}
1158
1159bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
1160 LegalizerHelper &Helper,
1161 MachineIRBuilder &MIB) const {
1162 GInsertSubvector &IS = cast<GInsertSubvector>(Val&: MI);
1163
1164 MachineRegisterInfo &MRI = *MIB.getMRI();
1165
1166 Register Dst = IS.getReg(Idx: 0);
1167 Register BigVec = IS.getBigVec();
1168 Register LitVec = IS.getSubVec();
1169 uint64_t Idx = IS.getIndexImm();
1170
1171 LLT BigTy = MRI.getType(Reg: BigVec);
1172 LLT LitTy = MRI.getType(Reg: LitVec);
1173
1174 if (Idx == 0 ||
1175 MRI.getVRegDef(Reg: BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1176 return true;
1177
1178 // We don't have the ability to slide mask vectors up indexed by their i1
1179 // elements; the smallest we can do is i8. Often we are able to bitcast to
1180 // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
1181 // vectors and truncate down after the insert.
1182 if (LitTy.getElementType() == LLT::scalar(SizeInBits: 1)) {
1183 auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
1184 auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
1185 if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
1186 return Helper.bitcast(
1187 MI&: IS, TypeIdx: 0,
1188 Ty: LLT::vector(EC: BigTy.getElementCount().divideCoefficientBy(RHS: 8), ScalarSizeInBits: 8));
1189
1190 // We can't slide this mask vector up indexed by its i1 elements.
1191 // This poses a problem when we wish to insert a scalable vector which
1192 // can't be re-expressed as a larger type. Just choose the slow path and
1193 // extend to a larger type, then truncate back down.
1194 LLT ExtBigTy = BigTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 8));
1195 return Helper.widenScalar(MI&: IS, TypeIdx: 0, WideTy: ExtBigTy);
1196 }
1197
1198 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1199 unsigned SubRegIdx, RemIdx;
1200 std::tie(args&: SubRegIdx, args&: RemIdx) =
1201 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1202 VecVT: getMVTForLLT(Ty: BigTy), SubVecVT: getMVTForLLT(Ty: LitTy), InsertExtractIdx: Idx, TRI);
1203
1204 TypeSize VecRegSize = TypeSize::getScalable(MinimumSize: RISCV::RVVBitsPerBlock);
1205 assert(isPowerOf2_64(
1206 STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
1207 bool ExactlyVecRegSized =
1208 STI.expandVScale(X: LitTy.getSizeInBits())
1209 .isKnownMultipleOf(RHS: STI.expandVScale(X: VecRegSize));
1210
1211 // If the Idx has been completely eliminated and this subvector's size is a
1212 // vector register or a multiple thereof, or the surrounding elements are
1213 // undef, then this is a subvector insert which naturally aligns to a vector
1214 // register. These can easily be handled using subregister manipulation.
1215 if (RemIdx == 0 && ExactlyVecRegSized)
1216 return true;
1217
1218 // If the subvector is smaller than a vector register, then the insertion
1219 // must preserve the undisturbed elements of the register. We do this by
1220 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1221 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
1222 // subvector within the vector register, and an INSERT_SUBVECTOR of that
1223 // LMUL=1 type back into the larger vector (resolving to another subregister
1224 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1225 // to avoid allocating a large register group to hold our subvector.
1226
1227 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1228 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1229 // (in our case undisturbed). This means we can set up a subvector insertion
1230 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1231 // size of the subvector.
1232 const LLT XLenTy(STI.getXLenVT());
1233 LLT InterLitTy = BigTy;
1234 Register AlignedExtract = BigVec;
1235 unsigned AlignedIdx = Idx - RemIdx;
1236 if (TypeSize::isKnownGT(LHS: BigTy.getSizeInBits(),
1237 RHS: getLMUL1Ty(VecTy: BigTy).getSizeInBits())) {
1238 InterLitTy = getLMUL1Ty(VecTy: BigTy);
1239 // Extract a subvector equal to the nearest full vector register type. This
1240 // should resolve to a G_EXTRACT on a subreg.
1241 AlignedExtract =
1242 MIB.buildExtractSubvector(Res: InterLitTy, Src: BigVec, Index: AlignedIdx).getReg(Idx: 0);
1243 }
1244
1245 auto Insert = MIB.buildInsertSubvector(Res: InterLitTy, Src0: MIB.buildUndef(Res: InterLitTy),
1246 Src1: LitVec, Index: 0);
1247
1248 auto [Mask, _] = buildDefaultVLOps(VecTy: BigTy, MIB, MRI);
1249 auto VL = MIB.buildVScale(Res: XLenTy, MinElts: LitTy.getElementCount().getKnownMinValue());
1250
1251 // If we're inserting into the lowest elements, use a tail undisturbed
1252 // vmv.v.v.
1253 MachineInstrBuilder Inserted;
1254 bool NeedInsertSubvec =
1255 TypeSize::isKnownGT(LHS: BigTy.getSizeInBits(), RHS: InterLitTy.getSizeInBits());
1256 Register InsertedDst =
1257 NeedInsertSubvec ? MRI.createGenericVirtualRegister(Ty: InterLitTy) : Dst;
1258 if (RemIdx == 0) {
1259 Inserted = MIB.buildInstr(Opc: RISCV::G_VMV_V_V_VL, DstOps: {InsertedDst},
1260 SrcOps: {AlignedExtract, Insert, VL});
1261 } else {
1262 auto SlideupAmt = MIB.buildVScale(Res: XLenTy, MinElts: RemIdx);
1263 // Construct the vector length corresponding to RemIdx + length(LitTy).
1264 VL = MIB.buildAdd(Dst: XLenTy, Src0: SlideupAmt, Src1: VL);
1265 // Use tail agnostic policy if we're inserting over InterLitTy's tail.
1266 ElementCount EndIndex =
1267 ElementCount::getScalable(MinVal: RemIdx) + LitTy.getElementCount();
1268 uint64_t Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;
1269 if (STI.expandVScale(X: EndIndex) ==
1270 STI.expandVScale(X: InterLitTy.getElementCount()))
1271 Policy = RISCVVType::TAIL_AGNOSTIC;
1272
1273 Inserted =
1274 MIB.buildInstr(Opc: RISCV::G_VSLIDEUP_VL, DstOps: {InsertedDst},
1275 SrcOps: {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
1276 }
1277
1278 // If required, insert this subvector back into the correct vector register.
1279 // This should resolve to an INSERT_SUBREG instruction.
1280 if (NeedInsertSubvec)
1281 MIB.buildInsertSubvector(Res: Dst, Src0: BigVec, Src1: Inserted, Index: AlignedIdx);
1282
1283 MI.eraseFromParent();
1284 return true;
1285}
1286
1287static unsigned getRISCVWOpcode(unsigned Opcode) {
1288 switch (Opcode) {
1289 default:
1290 llvm_unreachable("Unexpected opcode");
1291 case TargetOpcode::G_ASHR:
1292 return RISCV::G_SRAW;
1293 case TargetOpcode::G_LSHR:
1294 return RISCV::G_SRLW;
1295 case TargetOpcode::G_SHL:
1296 return RISCV::G_SLLW;
1297 case TargetOpcode::G_SDIV:
1298 return RISCV::G_DIVW;
1299 case TargetOpcode::G_UDIV:
1300 return RISCV::G_DIVUW;
1301 case TargetOpcode::G_UREM:
1302 return RISCV::G_REMUW;
1303 case TargetOpcode::G_ROTL:
1304 return RISCV::G_ROLW;
1305 case TargetOpcode::G_ROTR:
1306 return RISCV::G_RORW;
1307 case TargetOpcode::G_CTLZ:
1308 return RISCV::G_CLZW;
1309 case TargetOpcode::G_CTTZ:
1310 return RISCV::G_CTZW;
1311 case TargetOpcode::G_FPTOSI:
1312 return RISCV::G_FCVT_W_RV64;
1313 case TargetOpcode::G_FPTOUI:
1314 return RISCV::G_FCVT_WU_RV64;
1315 }
1316}
1317
1318bool RISCVLegalizerInfo::legalizeCustom(
1319 LegalizerHelper &Helper, MachineInstr &MI,
1320 LostDebugLocObserver &LocObserver) const {
1321 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1322 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1323 MachineFunction &MF = *MI.getParent()->getParent();
1324 switch (MI.getOpcode()) {
1325 default:
1326 // No idea what to do.
1327 return false;
1328 case TargetOpcode::G_ABS:
1329 return Helper.lowerAbsToMaxNeg(MI);
1330 // TODO: G_FCONSTANT
1331 case TargetOpcode::G_CONSTANT: {
1332 const Function &F = MF.getFunction();
1333 // TODO: if PSI and BFI are present, add " ||
1334 // llvm::shouldOptForSize(*CurMBB, PSI, BFI)".
1335 bool ShouldOptForSize = F.hasOptSize();
1336 const ConstantInt *ConstVal = MI.getOperand(i: 1).getCImm();
1337 if (!shouldBeInConstantPool(APImm: ConstVal->getValue(), ShouldOptForSize))
1338 return true;
1339 return Helper.lowerConstant(MI);
1340 }
1341 case TargetOpcode::G_SUB:
1342 case TargetOpcode::G_ADD: {
1343 Helper.Observer.changingInstr(MI);
1344 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
1345 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
1346
1347 Register DstALU = MRI.createGenericVirtualRegister(Ty: sXLen);
1348
1349 MachineOperand &MO = MI.getOperand(i: 0);
1350 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
1351 auto DstSext = MIRBuilder.buildSExtInReg(Res: sXLen, Op: DstALU, ImmOp: 32);
1352
1353 MIRBuilder.buildInstr(Opc: TargetOpcode::G_TRUNC, DstOps: {MO}, SrcOps: {DstSext});
1354 MO.setReg(DstALU);
1355
1356 Helper.Observer.changedInstr(MI);
1357 return true;
1358 }
1359 case TargetOpcode::G_SEXT_INREG: {
1360 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1361 int64_t SizeInBits = MI.getOperand(i: 2).getImm();
1362 // Source size of 32 is sext.w.
1363 if (DstTy.getSizeInBits() == 64 && SizeInBits == 32)
1364 return true;
1365
1366 if (STI.hasStdExtZbb() && (SizeInBits == 8 || SizeInBits == 16))
1367 return true;
1368
1369 return Helper.lower(MI, TypeIdx: 0, /* Unused hint type */ Ty: LLT()) ==
1370 LegalizerHelper::Legalized;
1371 }
1372 case TargetOpcode::G_ASHR:
1373 case TargetOpcode::G_LSHR:
1374 case TargetOpcode::G_SHL: {
1375 if (getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 2).getReg(), MRI)) {
1376 // We don't need a custom node for shift by constant. Just widen the
1377 // source and the shift amount.
1378 unsigned ExtOpc = TargetOpcode::G_ANYEXT;
1379 if (MI.getOpcode() == TargetOpcode::G_ASHR)
1380 ExtOpc = TargetOpcode::G_SEXT;
1381 else if (MI.getOpcode() == TargetOpcode::G_LSHR)
1382 ExtOpc = TargetOpcode::G_ZEXT;
1383
1384 Helper.Observer.changingInstr(MI);
1385 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: ExtOpc);
1386 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
1387 Helper.widenScalarDst(MI, WideTy: sXLen);
1388 Helper.Observer.changedInstr(MI);
1389 return true;
1390 }
1391
1392 Helper.Observer.changingInstr(MI);
1393 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
1394 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
1395 Helper.widenScalarDst(MI, WideTy: sXLen);
1396 MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1397 Helper.Observer.changedInstr(MI);
1398 return true;
1399 }
1400 case TargetOpcode::G_SDIV:
1401 case TargetOpcode::G_UDIV:
1402 case TargetOpcode::G_UREM:
1403 case TargetOpcode::G_ROTL:
1404 case TargetOpcode::G_ROTR: {
1405 Helper.Observer.changingInstr(MI);
1406 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
1407 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
1408 Helper.widenScalarDst(MI, WideTy: sXLen);
1409 MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1410 Helper.Observer.changedInstr(MI);
1411 return true;
1412 }
1413 case TargetOpcode::G_CTLZ:
1414 case TargetOpcode::G_CTTZ: {
1415 Helper.Observer.changingInstr(MI);
1416 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
1417 Helper.widenScalarDst(MI, WideTy: sXLen);
1418 MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1419 Helper.Observer.changedInstr(MI);
1420 return true;
1421 }
1422 case TargetOpcode::G_FPTOSI:
1423 case TargetOpcode::G_FPTOUI: {
1424 Helper.Observer.changingInstr(MI);
1425 Helper.widenScalarDst(MI, WideTy: sXLen);
1426 MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1427 MI.addOperand(Op: MachineOperand::CreateImm(Val: RISCVFPRndMode::RTZ));
1428 Helper.Observer.changedInstr(MI);
1429 return true;
1430 }
1431 case TargetOpcode::G_IS_FPCLASS: {
1432 Register GISFPCLASS = MI.getOperand(i: 0).getReg();
1433 Register Src = MI.getOperand(i: 1).getReg();
1434 const MachineOperand &ImmOp = MI.getOperand(i: 2);
1435 MachineIRBuilder MIB(MI);
1436
1437 // Turn LLVM IR's floating point classes to that in RISC-V,
1438 // by simply rotating the 10-bit immediate right by two bits.
1439 APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
1440 auto FClassMask = MIB.buildConstant(Res: sXLen, Val: GFpClassImm.rotr(rotateAmt: 2).zext(width: XLen));
1441 auto ConstZero = MIB.buildConstant(Res: sXLen, Val: 0);
1442
1443 auto GFClass = MIB.buildInstr(Opc: RISCV::G_FCLASS, DstOps: {sXLen}, SrcOps: {Src});
1444 auto And = MIB.buildAnd(Dst: sXLen, Src0: GFClass, Src1: FClassMask);
1445 MIB.buildICmp(Pred: CmpInst::ICMP_NE, Res: GISFPCLASS, Op0: And, Op1: ConstZero);
1446
1447 MI.eraseFromParent();
1448 return true;
1449 }
1450 case TargetOpcode::G_BRJT:
1451 return legalizeBRJT(MI, MIRBuilder);
1452 case TargetOpcode::G_VASTART:
1453 return legalizeVAStart(MI, MIRBuilder);
1454 case TargetOpcode::G_VSCALE:
1455 return legalizeVScale(MI, MIB&: MIRBuilder);
1456 case TargetOpcode::G_ZEXT:
1457 case TargetOpcode::G_SEXT:
1458 case TargetOpcode::G_ANYEXT:
1459 return legalizeExt(MI, MIB&: MIRBuilder);
1460 case TargetOpcode::G_SPLAT_VECTOR:
1461 return legalizeSplatVector(MI, MIB&: MIRBuilder);
1462 case TargetOpcode::G_EXTRACT_SUBVECTOR:
1463 return legalizeExtractSubvector(MI, MIB&: MIRBuilder);
1464 case TargetOpcode::G_INSERT_SUBVECTOR:
1465 return legalizeInsertSubvector(MI, Helper, MIB&: MIRBuilder);
1466 case TargetOpcode::G_LOAD:
1467 case TargetOpcode::G_STORE:
1468 return legalizeLoadStore(MI, Helper, MIB&: MIRBuilder);
1469 }
1470
1471 llvm_unreachable("expected switch to return");
1472}
1473