1//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for RISC-V.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "RISCVLegalizerInfo.h"
14#include "MCTargetDesc/RISCVMatInt.h"
15#include "RISCVMachineFunctionInfo.h"
16#include "RISCVSubtarget.h"
17#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
18#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
19#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
20#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
21#include "llvm/CodeGen/MachineConstantPool.h"
22#include "llvm/CodeGen/MachineJumpTableInfo.h"
23#include "llvm/CodeGen/MachineMemOperand.h"
24#include "llvm/CodeGen/MachineOperand.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/TargetOpcodes.h"
27#include "llvm/CodeGen/ValueTypes.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsRISCV.h"
31#include "llvm/IR/Type.h"
32
33using namespace llvm;
34using namespace LegalityPredicates;
35using namespace LegalizeMutations;
36
37static LegalityPredicate
38typeIsLegalIntOrFPVec(unsigned TypeIdx,
39 std::initializer_list<LLT> IntOrFPVecTys,
40 const RISCVSubtarget &ST) {
41 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
42 return ST.hasVInstructions() &&
43 (Query.Types[TypeIdx].getScalarSizeInBits() != 64 ||
44 ST.hasVInstructionsI64()) &&
45 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
46 ST.getELen() == 64);
47 };
48
49 return all(P0: typeInSet(TypeIdx, TypesInit: IntOrFPVecTys), P1: P);
50}
51
52static LegalityPredicate
53typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,
54 const RISCVSubtarget &ST) {
55 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
56 return ST.hasVInstructions() &&
57 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
58 ST.getELen() == 64);
59 };
60 return all(P0: typeInSet(TypeIdx, TypesInit: BoolVecTys), P1: P);
61}
62
63static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx,
64 std::initializer_list<LLT> PtrVecTys,
65 const RISCVSubtarget &ST) {
66 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
67 return ST.hasVInstructions() &&
68 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
69 ST.getELen() == 64) &&
70 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 16 ||
71 Query.Types[TypeIdx].getScalarSizeInBits() == 32);
72 };
73 return all(P0: typeInSet(TypeIdx, TypesInit: PtrVecTys), P1: P);
74}
75
76RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
77 : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(SizeInBits: XLen)) {
78 const LLT sDoubleXLen = LLT::scalar(SizeInBits: 2 * XLen);
79 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: XLen);
80 const LLT s1 = LLT::scalar(SizeInBits: 1);
81 const LLT s8 = LLT::scalar(SizeInBits: 8);
82 const LLT s16 = LLT::scalar(SizeInBits: 16);
83 const LLT s32 = LLT::scalar(SizeInBits: 32);
84 const LLT s64 = LLT::scalar(SizeInBits: 64);
85 const LLT s128 = LLT::scalar(SizeInBits: 128);
86
87 const LLT nxv1s1 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s1);
88 const LLT nxv2s1 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s1);
89 const LLT nxv4s1 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s1);
90 const LLT nxv8s1 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s1);
91 const LLT nxv16s1 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s1);
92 const LLT nxv32s1 = LLT::scalable_vector(MinNumElements: 32, ScalarTy: s1);
93 const LLT nxv64s1 = LLT::scalable_vector(MinNumElements: 64, ScalarTy: s1);
94
95 const LLT nxv1s8 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s8);
96 const LLT nxv2s8 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s8);
97 const LLT nxv4s8 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s8);
98 const LLT nxv8s8 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s8);
99 const LLT nxv16s8 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s8);
100 const LLT nxv32s8 = LLT::scalable_vector(MinNumElements: 32, ScalarTy: s8);
101 const LLT nxv64s8 = LLT::scalable_vector(MinNumElements: 64, ScalarTy: s8);
102
103 const LLT nxv1s16 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s16);
104 const LLT nxv2s16 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s16);
105 const LLT nxv4s16 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s16);
106 const LLT nxv8s16 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s16);
107 const LLT nxv16s16 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s16);
108 const LLT nxv32s16 = LLT::scalable_vector(MinNumElements: 32, ScalarTy: s16);
109
110 const LLT nxv1s32 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s32);
111 const LLT nxv2s32 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s32);
112 const LLT nxv4s32 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s32);
113 const LLT nxv8s32 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s32);
114 const LLT nxv16s32 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s32);
115
116 const LLT nxv1s64 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s64);
117 const LLT nxv2s64 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s64);
118 const LLT nxv4s64 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s64);
119 const LLT nxv8s64 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s64);
120
121 const LLT nxv1p0 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: p0);
122 const LLT nxv2p0 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: p0);
123 const LLT nxv4p0 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: p0);
124 const LLT nxv8p0 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: p0);
125 const LLT nxv16p0 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: p0);
126
127 using namespace TargetOpcode;
128
129 auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};
130
131 auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,
132 nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
133 nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
134 nxv1s64, nxv2s64, nxv4s64, nxv8s64};
135
136 auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0};
137
138 getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB})
139 .legalFor(Types: {sXLen})
140 .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST))
141 .customFor(Pred: ST.is64Bit(), Types: {s32})
142 .widenScalarToNextPow2(TypeIdx: 0)
143 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
144
145 getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR})
146 .legalFor(Types: {sXLen})
147 .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST))
148 .widenScalarToNextPow2(TypeIdx: 0)
149 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
150
151 getActionDefinitionsBuilder(
152 Opcodes: {G_UADDE, G_UADDO, G_USUBE, G_USUBO, G_READ_REGISTER, G_WRITE_REGISTER})
153 .lower();
154
155 getActionDefinitionsBuilder(Opcodes: {G_SADDE, G_SADDO, G_SSUBE, G_SSUBO})
156 .minScalar(TypeIdx: 0, Ty: sXLen)
157 .lower();
158
159 // TODO: Use Vector Single-Width Saturating Instructions for vector types.
160 getActionDefinitionsBuilder(
161 Opcodes: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT, G_SSHLSAT, G_USHLSAT})
162 .lower();
163
164 getActionDefinitionsBuilder(Opcodes: {G_SHL, G_ASHR, G_LSHR})
165 .legalFor(Types: {{sXLen, sXLen}})
166 .customFor(Pred: ST.is64Bit(), Types: {{s32, s32}})
167 .widenScalarToNextPow2(TypeIdx: 0)
168 .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen)
169 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
170
171 getActionDefinitionsBuilder(Opcodes: {G_ZEXT, G_SEXT, G_ANYEXT})
172 .legalFor(Types: {{s32, s16}})
173 .legalFor(Pred: ST.is64Bit(), Types: {{s64, s16}, {s64, s32}})
174 .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
175 P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST)))
176 .customIf(Predicate: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST))
177 .maxScalar(TypeIdx: 0, Ty: sXLen);
178
179 getActionDefinitionsBuilder(Opcode: G_SEXT_INREG)
180 .customFor(Types: {sXLen})
181 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
182 .lower();
183
184 // Merge/Unmerge
185 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
186 auto &MergeUnmergeActions = getActionDefinitionsBuilder(Opcode: Op);
187 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
188 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
189 if (XLen == 32 && ST.hasStdExtD()) {
190 MergeUnmergeActions.legalIf(
191 Predicate: all(P0: typeIs(TypeIdx: BigTyIdx, TypesInit: s64), P1: typeIs(TypeIdx: LitTyIdx, TypesInit: s32)));
192 }
193 MergeUnmergeActions.widenScalarToNextPow2(TypeIdx: LitTyIdx, MinSize: XLen)
194 .widenScalarToNextPow2(TypeIdx: BigTyIdx, MinSize: XLen)
195 .clampScalar(TypeIdx: LitTyIdx, MinTy: sXLen, MaxTy: sXLen)
196 .clampScalar(TypeIdx: BigTyIdx, MinTy: sXLen, MaxTy: sXLen);
197 }
198
199 getActionDefinitionsBuilder(Opcodes: {G_FSHL, G_FSHR}).lower();
200
201 getActionDefinitionsBuilder(Opcodes: {G_ROTR, G_ROTL})
202 .legalFor(Pred: ST.hasStdExtZbb() || ST.hasStdExtZbkb(), Types: {{sXLen, sXLen}})
203 .customFor(Pred: ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()),
204 Types: {{s32, s32}})
205 .lower();
206
207 getActionDefinitionsBuilder(Opcode: G_BITREVERSE).maxScalar(TypeIdx: 0, Ty: sXLen).lower();
208
209 getActionDefinitionsBuilder(Opcode: G_BITCAST).legalIf(
210 Predicate: all(P0: LegalityPredicates::any(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
211 P1: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST)),
212 P1: LegalityPredicates::any(P0: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST),
213 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST))));
214
215 auto &BSWAPActions = getActionDefinitionsBuilder(Opcode: G_BSWAP);
216 if (ST.hasStdExtZbb() || ST.hasStdExtZbkb())
217 BSWAPActions.legalFor(Types: {sXLen}).clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
218 else
219 BSWAPActions.maxScalar(TypeIdx: 0, Ty: sXLen).lower();
220
221 auto &CountZerosActions = getActionDefinitionsBuilder(Opcodes: {G_CTLZ, G_CTTZ});
222 auto &CountZerosUndefActions =
223 getActionDefinitionsBuilder(Opcodes: {G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
224 if (ST.hasStdExtZbb()) {
225 CountZerosActions.legalFor(Types: {{sXLen, sXLen}})
226 .customFor(Types: {{s32, s32}})
227 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen)
228 .widenScalarToNextPow2(TypeIdx: 0)
229 .scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0);
230 } else {
231 CountZerosActions.maxScalar(TypeIdx: 0, Ty: sXLen).scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0).lower();
232 CountZerosUndefActions.maxScalar(TypeIdx: 0, Ty: sXLen).scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0);
233 }
234 CountZerosUndefActions.lower();
235
236 auto &CountSignActions = getActionDefinitionsBuilder(Opcode: G_CTLS);
237 if (ST.hasStdExtP()) {
238 CountSignActions.legalFor(Types: {{sXLen, sXLen}})
239 .customFor(Types: {{s32, s32}})
240 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen)
241 .widenScalarToNextPow2(TypeIdx: 0)
242 .scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0);
243 } else {
244 CountSignActions.maxScalar(TypeIdx: 0, Ty: sXLen).scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0).lower();
245 }
246
247 auto &CTPOPActions = getActionDefinitionsBuilder(Opcode: G_CTPOP);
248 if (ST.hasStdExtZbb()) {
249 CTPOPActions.legalFor(Types: {{sXLen, sXLen}})
250 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
251 .scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0);
252 } else {
253 CTPOPActions.widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8)
254 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sXLen)
255 .scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0)
256 .lower();
257 }
258
259 getActionDefinitionsBuilder(Opcode: G_CONSTANT)
260 .legalFor(Types: {p0})
261 .legalFor(Pred: !ST.is64Bit(), Types: {s32})
262 .customFor(Pred: ST.is64Bit(), Types: {s64})
263 .widenScalarToNextPow2(TypeIdx: 0)
264 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
265
266 // TODO: transform illegal vector types into legal vector type
267 getActionDefinitionsBuilder(Opcode: G_FREEZE)
268 .legalFor(Types: {s16, s32, p0})
269 .legalFor(Pred: ST.is64Bit(), Types: {s64})
270 .legalIf(Predicate: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST))
271 .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST))
272 .widenScalarToNextPow2(TypeIdx: 0)
273 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: sXLen);
274
275 // TODO: transform illegal vector types into legal vector type
276 // TODO: Merge with G_FREEZE?
277 getActionDefinitionsBuilder(
278 Opcodes: {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
279 .legalFor(Types: {s32, sXLen, p0})
280 .legalIf(Predicate: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST))
281 .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST))
282 .widenScalarToNextPow2(TypeIdx: 0)
283 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen);
284
285 getActionDefinitionsBuilder(Opcode: G_ICMP)
286 .legalFor(Types: {{sXLen, sXLen}, {sXLen, p0}})
287 .legalIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST),
288 P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST)))
289 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, MinSize: 8)
290 .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen)
291 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
292
293 getActionDefinitionsBuilder(Opcode: G_SELECT)
294 .legalFor(Types: {{s32, sXLen}, {p0, sXLen}})
295 .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
296 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST)))
297 .legalFor(Pred: XLen == 64 || ST.hasStdExtD(), Types: {{s64, sXLen}})
298 .widenScalarToNextPow2(TypeIdx: 0)
299 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
300 .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen);
301
302 auto &LoadActions = getActionDefinitionsBuilder(Opcode: G_LOAD);
303 auto &StoreActions = getActionDefinitionsBuilder(Opcode: G_STORE);
304 auto &ExtLoadActions = getActionDefinitionsBuilder(Opcodes: {G_SEXTLOAD, G_ZEXTLOAD});
305
306 // Return the alignment needed for scalar memory ops. If unaligned scalar mem
307 // is supported, we only require byte alignment. Otherwise, we need the memory
308 // op to be natively aligned.
309 auto getScalarMemAlign = [&ST](unsigned Size) {
310 return ST.enableUnalignedScalarMem() ? 8 : Size;
311 };
312
313 LoadActions.legalForTypesWithMemDesc(
314 TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
315 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
316 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
317 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
318 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)},
319 {.Type0: p0, .Type1: p0, .MemTy: sXLen, .Align: getScalarMemAlign(XLen)}});
320 StoreActions.legalForTypesWithMemDesc(
321 TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
322 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
323 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
324 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
325 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)},
326 {.Type0: p0, .Type1: p0, .MemTy: sXLen, .Align: getScalarMemAlign(XLen)}});
327 ExtLoadActions.legalForTypesWithMemDesc(
328 TypesAndMemDesc: {{.Type0: sXLen, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
329 {.Type0: sXLen, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)}});
330 if (XLen == 64) {
331 LoadActions.legalForTypesWithMemDesc(
332 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
333 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
334 {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)},
335 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign(64)}});
336 StoreActions.legalForTypesWithMemDesc(
337 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
338 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
339 {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)},
340 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign(64)}});
341 ExtLoadActions.legalForTypesWithMemDesc(
342 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)}});
343 } else if (ST.hasStdExtD()) {
344 LoadActions.legalForTypesWithMemDesc(
345 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign(64)}});
346 StoreActions.legalForTypesWithMemDesc(
347 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign(64)}});
348 }
349
350 // Vector loads/stores.
351 if (ST.hasVInstructions()) {
352 LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv2s8, .Type1: p0, .MemTy: nxv2s8, .Align: 8},
353 {.Type0: nxv4s8, .Type1: p0, .MemTy: nxv4s8, .Align: 8},
354 {.Type0: nxv8s8, .Type1: p0, .MemTy: nxv8s8, .Align: 8},
355 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
356 {.Type0: nxv32s8, .Type1: p0, .MemTy: nxv32s8, .Align: 8},
357 {.Type0: nxv64s8, .Type1: p0, .MemTy: nxv64s8, .Align: 8},
358 {.Type0: nxv2s16, .Type1: p0, .MemTy: nxv2s16, .Align: 16},
359 {.Type0: nxv4s16, .Type1: p0, .MemTy: nxv4s16, .Align: 16},
360 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 16},
361 {.Type0: nxv16s16, .Type1: p0, .MemTy: nxv16s16, .Align: 16},
362 {.Type0: nxv32s16, .Type1: p0, .MemTy: nxv32s16, .Align: 16},
363 {.Type0: nxv2s32, .Type1: p0, .MemTy: nxv2s32, .Align: 32},
364 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 32},
365 {.Type0: nxv8s32, .Type1: p0, .MemTy: nxv8s32, .Align: 32},
366 {.Type0: nxv16s32, .Type1: p0, .MemTy: nxv16s32, .Align: 32}});
367 StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv2s8, .Type1: p0, .MemTy: nxv2s8, .Align: 8},
368 {.Type0: nxv4s8, .Type1: p0, .MemTy: nxv4s8, .Align: 8},
369 {.Type0: nxv8s8, .Type1: p0, .MemTy: nxv8s8, .Align: 8},
370 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
371 {.Type0: nxv32s8, .Type1: p0, .MemTy: nxv32s8, .Align: 8},
372 {.Type0: nxv64s8, .Type1: p0, .MemTy: nxv64s8, .Align: 8},
373 {.Type0: nxv2s16, .Type1: p0, .MemTy: nxv2s16, .Align: 16},
374 {.Type0: nxv4s16, .Type1: p0, .MemTy: nxv4s16, .Align: 16},
375 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 16},
376 {.Type0: nxv16s16, .Type1: p0, .MemTy: nxv16s16, .Align: 16},
377 {.Type0: nxv32s16, .Type1: p0, .MemTy: nxv32s16, .Align: 16},
378 {.Type0: nxv2s32, .Type1: p0, .MemTy: nxv2s32, .Align: 32},
379 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 32},
380 {.Type0: nxv8s32, .Type1: p0, .MemTy: nxv8s32, .Align: 32},
381 {.Type0: nxv16s32, .Type1: p0, .MemTy: nxv16s32, .Align: 32}});
382
383 if (ST.getELen() == 64) {
384 LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s8, .Type1: p0, .MemTy: nxv1s8, .Align: 8},
385 {.Type0: nxv1s16, .Type1: p0, .MemTy: nxv1s16, .Align: 16},
386 {.Type0: nxv1s32, .Type1: p0, .MemTy: nxv1s32, .Align: 32}});
387 StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s8, .Type1: p0, .MemTy: nxv1s8, .Align: 8},
388 {.Type0: nxv1s16, .Type1: p0, .MemTy: nxv1s16, .Align: 16},
389 {.Type0: nxv1s32, .Type1: p0, .MemTy: nxv1s32, .Align: 32}});
390 }
391
392 if (ST.hasVInstructionsI64()) {
393 LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s64, .Type1: p0, .MemTy: nxv1s64, .Align: 64},
394 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 64},
395 {.Type0: nxv4s64, .Type1: p0, .MemTy: nxv4s64, .Align: 64},
396 {.Type0: nxv8s64, .Type1: p0, .MemTy: nxv8s64, .Align: 64}});
397 StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s64, .Type1: p0, .MemTy: nxv1s64, .Align: 64},
398 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 64},
399 {.Type0: nxv4s64, .Type1: p0, .MemTy: nxv4s64, .Align: 64},
400 {.Type0: nxv8s64, .Type1: p0, .MemTy: nxv8s64, .Align: 64}});
401 }
402
403 // we will take the custom lowering logic if we have scalable vector types
404 // with non-standard alignments
405 LoadActions.customIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST));
406 StoreActions.customIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST));
407
408 // Pointers require that XLen sized elements are legal.
409 if (XLen <= ST.getELen()) {
410 LoadActions.customIf(Predicate: typeIsLegalPtrVec(TypeIdx: 0, PtrVecTys, ST));
411 StoreActions.customIf(Predicate: typeIsLegalPtrVec(TypeIdx: 0, PtrVecTys, ST));
412 }
413 }
414
415 LoadActions.widenScalarToNextPow2(TypeIdx: 0, /* MinSize = */ 8)
416 .lowerIfMemSizeNotByteSizePow2()
417 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: sXLen)
418 .lower();
419 StoreActions
420 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: sXLen)
421 .lowerIfMemSizeNotByteSizePow2()
422 .lower();
423
424 ExtLoadActions.widenScalarToNextPow2(TypeIdx: 0).clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen).lower();
425
426 getActionDefinitionsBuilder(Opcodes: {G_PTR_ADD, G_PTRMASK}).legalFor(Types: {{p0, sXLen}});
427
428 getActionDefinitionsBuilder(Opcode: G_PTRTOINT)
429 .legalFor(Types: {{sXLen, p0}})
430 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
431
432 getActionDefinitionsBuilder(Opcode: G_INTTOPTR)
433 .legalFor(Types: {{p0, sXLen}})
434 .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen);
435
436 getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {sXLen}).minScalar(TypeIdx: 0, Ty: sXLen);
437
438 getActionDefinitionsBuilder(Opcode: G_BRJT).customFor(Types: {{p0, sXLen}});
439
440 getActionDefinitionsBuilder(Opcode: G_BRINDIRECT).legalFor(Types: {p0});
441
442 getActionDefinitionsBuilder(Opcode: G_PHI)
443 .legalFor(Types: {p0, s32, sXLen})
444 .widenScalarToNextPow2(TypeIdx: 0)
445 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen);
446
447 getActionDefinitionsBuilder(Opcodes: {G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
448 .legalFor(Types: {p0});
449
450 if (ST.hasStdExtZmmul()) {
451 getActionDefinitionsBuilder(Opcode: G_MUL)
452 .legalFor(Types: {sXLen})
453 .widenScalarToNextPow2(TypeIdx: 0)
454 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
455
456 // clang-format off
457 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH})
458 .legalFor(Types: {sXLen})
459 .lower();
460 // clang-format on
461
462 getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO}).minScalar(TypeIdx: 0, Ty: sXLen).lower();
463 } else {
464 getActionDefinitionsBuilder(Opcode: G_MUL)
465 .libcallFor(Types: {sXLen, sDoubleXLen})
466 .widenScalarToNextPow2(TypeIdx: 0)
467 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sDoubleXLen);
468
469 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH}).lowerFor(Types: {sXLen});
470
471 getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO})
472 .minScalar(TypeIdx: 0, Ty: sXLen)
473 // Widen sXLen to sDoubleXLen so we can use a single libcall to get
474 // the low bits for the mul result and high bits to do the overflow
475 // check.
476 .widenScalarIf(Predicate: typeIs(TypeIdx: 0, TypesInit: sXLen),
477 Mutation: LegalizeMutations::changeTo(TypeIdx: 0, Ty: sDoubleXLen))
478 .lower();
479 }
480
481 if (ST.hasStdExtM()) {
482 getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV, G_UREM})
483 .legalFor(Types: {sXLen})
484 .customFor(Types: {s32})
485 .libcallFor(Types: {sDoubleXLen})
486 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sDoubleXLen)
487 .widenScalarToNextPow2(TypeIdx: 0);
488 getActionDefinitionsBuilder(Opcode: G_SREM)
489 .legalFor(Types: {sXLen})
490 .libcallFor(Types: {sDoubleXLen})
491 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sDoubleXLen)
492 .widenScalarToNextPow2(TypeIdx: 0);
493 } else {
494 getActionDefinitionsBuilder(Opcodes: {G_UDIV, G_SDIV, G_UREM, G_SREM})
495 .libcallFor(Types: {sXLen, sDoubleXLen})
496 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sDoubleXLen)
497 .widenScalarToNextPow2(TypeIdx: 0);
498 }
499
500 // TODO: Use libcall for sDoubleXLen.
501 getActionDefinitionsBuilder(Opcodes: {G_SDIVREM, G_UDIVREM}).lower();
502
503 getActionDefinitionsBuilder(Opcode: G_ABS)
504 .customFor(Pred: ST.hasStdExtZbb(), Types: {sXLen})
505 .minScalar(Pred: ST.hasStdExtZbb(), TypeIdx: 0, Ty: sXLen)
506 .lower();
507
508 getActionDefinitionsBuilder(Opcodes: {G_ABDS, G_ABDU})
509 .minScalar(Pred: ST.hasStdExtZbb(), TypeIdx: 0, Ty: sXLen)
510 .lower();
511
512 getActionDefinitionsBuilder(Opcodes: {G_UMAX, G_UMIN, G_SMAX, G_SMIN})
513 .legalFor(Pred: ST.hasStdExtZbb(), Types: {sXLen})
514 .minScalar(Pred: ST.hasStdExtZbb(), TypeIdx: 0, Ty: sXLen)
515 .lower();
516
517 getActionDefinitionsBuilder(Opcodes: {G_SCMP, G_UCMP}).lower();
518
519 getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0});
520
521 getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
522
523 getActionDefinitionsBuilder(Opcodes: {G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
524 .lower();
525
526 // FP Operations
527
528 // FIXME: Support s128 for rv32 when libcall handling is able to use sret.
529 getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT,
530 G_FMAXNUM, G_FMINNUM, G_FMAXIMUMNUM,
531 G_FMINIMUMNUM})
532 .legalFor(Pred: ST.hasStdExtF(), Types: {s32})
533 .legalFor(Pred: ST.hasStdExtD(), Types: {s64})
534 .legalFor(Pred: ST.hasStdExtZfh(), Types: {s16})
535 .libcallFor(Types: {s32, s64})
536 .libcallFor(Pred: ST.is64Bit(), Types: {s128});
537
538 getActionDefinitionsBuilder(Opcodes: {G_FNEG, G_FABS})
539 .legalFor(Pred: ST.hasStdExtF(), Types: {s32})
540 .legalFor(Pred: ST.hasStdExtD(), Types: {s64})
541 .legalFor(Pred: ST.hasStdExtZfh(), Types: {s16})
542 .lowerFor(Types: {s32, s64, s128});
543
544 getActionDefinitionsBuilder(Opcode: G_FREM)
545 .libcallFor(Types: {s32, s64})
546 .libcallFor(Pred: ST.is64Bit(), Types: {s128})
547 .minScalar(TypeIdx: 0, Ty: s32)
548 .scalarize(TypeIdx: 0);
549
550 getActionDefinitionsBuilder(Opcode: G_FCOPYSIGN)
551 .legalFor(Pred: ST.hasStdExtF(), Types: {{s32, s32}})
552 .legalFor(Pred: ST.hasStdExtD(), Types: {{s64, s64}, {s32, s64}, {s64, s32}})
553 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{s16, s16}, {s16, s32}, {s32, s16}})
554 .legalFor(Pred: ST.hasStdExtZfh() && ST.hasStdExtD(), Types: {{s16, s64}, {s64, s16}})
555 .lower();
556
557 // FIXME: Use Zfhmin.
558 getActionDefinitionsBuilder(Opcode: G_FPTRUNC)
559 .legalFor(Pred: ST.hasStdExtD(), Types: {{s32, s64}})
560 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{s16, s32}})
561 .legalFor(Pred: ST.hasStdExtZfh() && ST.hasStdExtD(), Types: {{s16, s64}})
562 .libcallFor(Types: {{s32, s64}})
563 .libcallFor(Pred: ST.is64Bit(), Types: {{s32, s128}, {s64, s128}});
564 getActionDefinitionsBuilder(Opcode: G_FPEXT)
565 .legalFor(Pred: ST.hasStdExtD(), Types: {{s64, s32}})
566 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{s32, s16}})
567 .legalFor(Pred: ST.hasStdExtZfh() && ST.hasStdExtD(), Types: {{s64, s16}})
568 .libcallFor(Types: {{s64, s32}})
569 .libcallFor(Pred: ST.is64Bit(), Types: {{s128, s32}, {s128, s64}});
570
571 getActionDefinitionsBuilder(Opcode: G_FCMP)
572 .legalFor(Pred: ST.hasStdExtF(), Types: {{sXLen, s32}})
573 .legalFor(Pred: ST.hasStdExtD(), Types: {{sXLen, s64}})
574 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{sXLen, s16}})
575 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
576 .libcallFor(Types: {{sXLen, s32}, {sXLen, s64}})
577 .libcallFor(Pred: ST.is64Bit(), Types: {{sXLen, s128}});
578
579 // TODO: Support vector version of G_IS_FPCLASS.
580 getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS)
581 .customFor(Pred: ST.hasStdExtF(), Types: {{s1, s32}})
582 .customFor(Pred: ST.hasStdExtD(), Types: {{s1, s64}})
583 .customFor(Pred: ST.hasStdExtZfh(), Types: {{s1, s16}})
584 .lowerFor(Types: {{s1, s32}, {s1, s64}});
585
586 getActionDefinitionsBuilder(Opcode: G_FCONSTANT)
587 .legalFor(Pred: ST.hasStdExtF(), Types: {s32})
588 .legalFor(Pred: ST.hasStdExtD(), Types: {s64})
589 .legalFor(Pred: ST.hasStdExtZfh(), Types: {s16})
590 .customFor(Pred: !ST.is64Bit(), Types: {s32})
591 .customFor(Pred: ST.is64Bit(), Types: {s32, s64})
592 .lowerFor(Types: {s64, s128});
593
594 getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI})
595 .legalFor(Pred: ST.hasStdExtF(), Types: {{sXLen, s32}})
596 .legalFor(Pred: ST.hasStdExtD(), Types: {{sXLen, s64}})
597 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{sXLen, s16}})
598 .customFor(Pred: ST.is64Bit() && ST.hasStdExtF(), Types: {{s32, s32}})
599 .customFor(Pred: ST.is64Bit() && ST.hasStdExtD(), Types: {{s32, s64}})
600 .customFor(Pred: ST.is64Bit() && ST.hasStdExtZfh(), Types: {{s32, s16}})
601 .widenScalarToNextPow2(TypeIdx: 0)
602 .minScalar(TypeIdx: 0, Ty: s32)
603 .libcallFor(Types: {{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
604 .libcallFor(Pred: ST.is64Bit(), Types: {{s32, s128}, {s64, s128}}) // FIXME RV32.
605 .libcallFor(Pred: ST.is64Bit(), Types: {{s128, s32}, {s128, s64}, {s128, s128}});
606
607 getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP})
608 .legalFor(Pred: ST.hasStdExtF(), Types: {{s32, sXLen}})
609 .legalFor(Pred: ST.hasStdExtD(), Types: {{s64, sXLen}})
610 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{s16, sXLen}})
611 .widenScalarToNextPow2(TypeIdx: 1)
612 // Promote to XLen if the operation is legal.
613 .widenScalarIf(
614 Predicate: [=, &ST](const LegalityQuery &Query) {
615 return Query.Types[0].isScalar() && Query.Types[1].isScalar() &&
616 (Query.Types[1].getSizeInBits() < ST.getXLen()) &&
617 ((ST.hasStdExtF() && Query.Types[0].getSizeInBits() == 32) ||
618 (ST.hasStdExtD() && Query.Types[0].getSizeInBits() == 64) ||
619 (ST.hasStdExtZfh() &&
620 Query.Types[0].getSizeInBits() == 16));
621 },
622 Mutation: LegalizeMutations::changeTo(TypeIdx: 1, Ty: sXLen))
623 // Otherwise only promote to s32 since we have si libcalls.
624 .minScalar(TypeIdx: 1, Ty: s32)
625 .libcallFor(Types: {{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
626 .libcallFor(Pred: ST.is64Bit(), Types: {{s128, s32}, {s128, s64}}) // FIXME RV32.
627 .libcallFor(Pred: ST.is64Bit(), Types: {{s32, s128}, {s64, s128}, {s128, s128}});
628
629 // FIXME: We can do custom inline expansion like SelectionDAG.
630 getActionDefinitionsBuilder(Opcodes: {G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
631 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
632 G_INTRINSIC_ROUNDEVEN})
633 .legalFor(Pred: ST.hasStdExtZfa(), Types: {s32})
634 .legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtD(), Types: {s64})
635 .legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtZfh(), Types: {s16})
636 .libcallFor(Types: {s32, s64})
637 .libcallFor(Pred: ST.is64Bit(), Types: {s128});
638
639 getActionDefinitionsBuilder(Opcodes: {G_FMAXIMUM, G_FMINIMUM})
640 .legalFor(Pred: ST.hasStdExtZfa(), Types: {s32})
641 .legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtD(), Types: {s64})
642 .legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtZfh(), Types: {s16});
643
644 getActionDefinitionsBuilder(Opcodes: {G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
645 G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
646 G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
647 G_FTANH, G_FMODF})
648 .libcallFor(Types: {s32, s64})
649 .libcallFor(Pred: ST.is64Bit(), Types: {s128});
650 getActionDefinitionsBuilder(Opcodes: {G_FPOWI, G_FLDEXP})
651 .libcallFor(Types: {{s32, s32}, {s64, s32}})
652 .libcallFor(Pred: ST.is64Bit(), Types: {s128, s32});
653
654 getActionDefinitionsBuilder(Opcode: G_FCANONICALIZE)
655 .legalFor(Pred: ST.hasStdExtF(), Types: {s32})
656 .legalFor(Pred: ST.hasStdExtD(), Types: {s64})
657 .legalFor(Pred: ST.hasStdExtZfh(), Types: {s16});
658
659 getActionDefinitionsBuilder(Opcode: G_VASTART).customFor(Types: {p0});
660
661 // va_list must be a pointer, but most sized types are pretty easy to handle
662 // as the destination.
663 getActionDefinitionsBuilder(Opcode: G_VAARG)
664 // TODO: Implement narrowScalar and widenScalar for G_VAARG for types
665 // other than sXLen.
666 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
667 .lowerForCartesianProduct(Types0: {sXLen, p0}, Types1: {p0});
668
669 getActionDefinitionsBuilder(Opcode: G_VSCALE)
670 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
671 .customFor(Types: {sXLen});
672
673 auto &SplatActions =
674 getActionDefinitionsBuilder(Opcode: G_SPLAT_VECTOR)
675 .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
676 P1: typeIs(TypeIdx: 1, TypesInit: sXLen)))
677 .customIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST), P1: typeIs(TypeIdx: 1, TypesInit: s1)));
678 // Handle case of s64 element vectors on RV32. If the subtarget does not have
679 // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget
680 // does have f64, then we don't know whether the type is an f64 or an i64,
681 // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,
682 // depending on how the instructions it consumes are legalized. They are not
683 // legalized yet since legalization is in reverse postorder, so we cannot
684 // make the decision at this moment.
685 if (XLen == 32) {
686 if (ST.hasVInstructionsF64() && ST.hasStdExtD())
687 SplatActions.legalIf(Predicate: all(
688 P0: typeInSet(TypeIdx: 0, TypesInit: {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), P1: typeIs(TypeIdx: 1, TypesInit: s64)));
689 else if (ST.hasVInstructionsI64())
690 SplatActions.customIf(Predicate: all(
691 P0: typeInSet(TypeIdx: 0, TypesInit: {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), P1: typeIs(TypeIdx: 1, TypesInit: s64)));
692 }
693
694 SplatActions.clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen);
695
696 LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
697 LLT DstTy = Query.Types[0];
698 LLT SrcTy = Query.Types[1];
699 return DstTy.getElementType() == LLT::scalar(SizeInBits: 1) &&
700 DstTy.getElementCount().getKnownMinValue() >= 8 &&
701 SrcTy.getElementCount().getKnownMinValue() >= 8;
702 };
703 getActionDefinitionsBuilder(Opcode: G_EXTRACT_SUBVECTOR)
704 // We don't have the ability to slide mask vectors down indexed by their
705 // i1 elements; the smallest we can do is i8. Often we are able to bitcast
706 // to equivalent i8 vectors.
707 .bitcastIf(
708 Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST),
709 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST), args: ExtractSubvecBitcastPred),
710 Mutation: [=](const LegalityQuery &Query) {
711 LLT CastTy = LLT::vector(
712 EC: Query.Types[0].getElementCount().divideCoefficientBy(RHS: 8), ScalarSizeInBits: 8);
713 return std::pair(0, CastTy);
714 })
715 .customIf(Predicate: LegalityPredicates::any(
716 P0: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST),
717 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST)),
718 P1: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
719 P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST))));
720
721 getActionDefinitionsBuilder(Opcode: G_INSERT_SUBVECTOR)
722 .customIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST),
723 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST)))
724 .customIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
725 P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST)));
726
727 getActionDefinitionsBuilder(Opcode: G_ATOMIC_CMPXCHG_WITH_SUCCESS)
728 .lowerIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32, s64}), P1: typeIs(TypeIdx: 2, TypesInit: p0)));
729
730 getActionDefinitionsBuilder(Opcodes: {G_ATOMIC_CMPXCHG, G_ATOMICRMW_ADD})
731 .legalFor(Pred: ST.hasStdExtA(), Types: {{sXLen, p0}})
732 .libcallFor(Pred: !ST.hasStdExtA(), Types: {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
733 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
734
735 getActionDefinitionsBuilder(Opcode: G_ATOMICRMW_SUB)
736 .libcallFor(Pred: !ST.hasStdExtA(), Types: {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
737 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
738 .lower();
739
740 LegalityPredicate InsertVectorEltPred = [=](const LegalityQuery &Query) {
741 LLT VecTy = Query.Types[0];
742 LLT EltTy = Query.Types[1];
743 return VecTy.getElementType() == EltTy;
744 };
745
746 getActionDefinitionsBuilder(Opcode: G_INSERT_VECTOR_ELT)
747 .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
748 P1: InsertVectorEltPred, args: typeIs(TypeIdx: 2, TypesInit: sXLen)))
749 .legalIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST), P1: InsertVectorEltPred,
750 args: typeIs(TypeIdx: 2, TypesInit: sXLen)));
751
752 getLegacyLegalizerInfo().computeTables();
753 verify(MII: *ST.getInstrInfo());
754}
755
756bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
757 MachineInstr &MI) const {
758 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
759
760 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
761 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntrinsicID)) {
762 if (II->hasScalarOperand() && !II->IsFPIntrinsic) {
763 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
764 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
765
766 auto OldScalar = MI.getOperand(i: II->ScalarOperand + 2).getReg();
767 // Legalize integer vx form intrinsic.
768 if (MRI.getType(Reg: OldScalar).isScalar()) {
769 if (MRI.getType(Reg: OldScalar).getSizeInBits() < sXLen.getSizeInBits()) {
770 Helper.Observer.changingInstr(MI);
771 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: II->ScalarOperand + 2,
772 ExtOpcode: TargetOpcode::G_ANYEXT);
773 Helper.Observer.changedInstr(MI);
774 } else if (MRI.getType(Reg: OldScalar).getSizeInBits() >
775 sXLen.getSizeInBits()) {
776 // TODO: i64 in riscv32.
777 return false;
778 }
779 }
780 }
781 return true;
782 }
783
784 switch (IntrinsicID) {
785 default:
786 return false;
787 case Intrinsic::vacopy: {
788 // vacopy arguments must be legal because of the intrinsic signature.
789 // No need to check here.
790
791 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
792 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
793 MachineFunction &MF = *MI.getMF();
794 const DataLayout &DL = MIRBuilder.getDataLayout();
795 LLVMContext &Ctx = MF.getFunction().getContext();
796
797 Register DstLst = MI.getOperand(i: 1).getReg();
798 LLT PtrTy = MRI.getType(Reg: DstLst);
799
800 // Load the source va_list
801 Align Alignment = DL.getABITypeAlign(Ty: getTypeForLLT(Ty: PtrTy, C&: Ctx));
802 MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
803 PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad, MemTy: PtrTy, base_alignment: Alignment);
804 auto Tmp = MIRBuilder.buildLoad(Res: PtrTy, Addr: MI.getOperand(i: 2), MMO&: *LoadMMO);
805
806 // Store the result in the destination va_list
807 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
808 PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOStore, MemTy: PtrTy, base_alignment: Alignment);
809 MIRBuilder.buildStore(Val: Tmp, Addr: DstLst, MMO&: *StoreMMO);
810
811 MI.eraseFromParent();
812 return true;
813 }
814 case Intrinsic::riscv_vsetvli:
815 case Intrinsic::riscv_vsetvlimax:
816 case Intrinsic::riscv_masked_atomicrmw_add:
817 case Intrinsic::riscv_masked_atomicrmw_sub:
818 case Intrinsic::riscv_masked_cmpxchg:
819 return true;
820 }
821}
822
823bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
824 MachineIRBuilder &MIRBuilder) const {
825 // Stores the address of the VarArgsFrameIndex slot into the memory location
826 assert(MI.getOpcode() == TargetOpcode::G_VASTART);
827 MachineFunction *MF = MI.getParent()->getParent();
828 RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>();
829 int FI = FuncInfo->getVarArgsFrameIndex();
830 LLT AddrTy = MIRBuilder.getMRI()->getType(Reg: MI.getOperand(i: 0).getReg());
831 auto FINAddr = MIRBuilder.buildFrameIndex(Res: AddrTy, Idx: FI);
832 assert(MI.hasOneMemOperand());
833 MIRBuilder.buildStore(Val: FINAddr, Addr: MI.getOperand(i: 0).getReg(),
834 MMO&: *MI.memoperands()[0]);
835 MI.eraseFromParent();
836 return true;
837}
838
839bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI,
840 MachineIRBuilder &MIRBuilder) const {
841 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
842 auto &MF = *MI.getParent()->getParent();
843 const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
844 unsigned EntrySize = MJTI->getEntrySize(TD: MF.getDataLayout());
845
846 Register PtrReg = MI.getOperand(i: 0).getReg();
847 LLT PtrTy = MRI.getType(Reg: PtrReg);
848 Register IndexReg = MI.getOperand(i: 2).getReg();
849 LLT IndexTy = MRI.getType(Reg: IndexReg);
850
851 if (!isPowerOf2_32(Value: EntrySize))
852 return false;
853
854 auto ShiftAmt = MIRBuilder.buildConstant(Res: IndexTy, Val: Log2_32(Value: EntrySize));
855 IndexReg = MIRBuilder.buildShl(Dst: IndexTy, Src0: IndexReg, Src1: ShiftAmt).getReg(Idx: 0);
856
857 auto Addr = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: PtrReg, Op1: IndexReg);
858
859 MachineMemOperand *MMO = MF.getMachineMemOperand(
860 PtrInfo: MachinePointerInfo::getJumpTable(MF), F: MachineMemOperand::MOLoad,
861 Size: EntrySize, BaseAlignment: Align(MJTI->getEntryAlignment(TD: MF.getDataLayout())));
862
863 Register TargetReg;
864 switch (MJTI->getEntryKind()) {
865 default:
866 return false;
867 case MachineJumpTableInfo::EK_LabelDifference32: {
868 // For PIC, the sequence is:
869 // BRIND(load(Jumptable + index) + RelocBase)
870 // RelocBase can be JumpTable, GOT or some sort of global base.
871 unsigned LoadOpc =
872 STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD;
873 auto Load = MIRBuilder.buildLoadInstr(Opcode: LoadOpc, Res: IndexTy, Addr, MMO&: *MMO);
874 TargetReg = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: PtrReg, Op1: Load).getReg(Idx: 0);
875 break;
876 }
877 case MachineJumpTableInfo::EK_Custom32: {
878 auto Load = MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_SEXTLOAD, Res: IndexTy,
879 Addr, MMO&: *MMO);
880 TargetReg = MIRBuilder.buildIntToPtr(Dst: PtrTy, Src: Load).getReg(Idx: 0);
881 break;
882 }
883 case MachineJumpTableInfo::EK_BlockAddress:
884 TargetReg = MIRBuilder.buildLoad(Res: PtrTy, Addr, MMO&: *MMO).getReg(Idx: 0);
885 break;
886 }
887
888 MIRBuilder.buildBrIndirect(Tgt: TargetReg);
889
890 MI.eraseFromParent();
891 return true;
892}
893
894bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm,
895 bool ShouldOptForSize) const {
896 assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64);
897 int64_t Imm = APImm.getSExtValue();
898 // All simm32 constants should be handled by isel.
899 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
900 // this check redundant, but small immediates are common so this check
901 // should have better compile time.
902 if (isInt<32>(x: Imm))
903 return false;
904
905 // We only need to cost the immediate, if constant pool lowering is enabled.
906 if (!STI.useConstantPoolForLargeInts())
907 return false;
908
909 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI);
910 if (Seq.size() <= STI.getMaxBuildIntsCost())
911 return false;
912
913 // Optimizations below are disabled for opt size. If we're optimizing for
914 // size, use a constant pool.
915 if (ShouldOptForSize)
916 return true;
917 //
918 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
919 // that if it will avoid a constant pool.
920 // It will require an extra temporary register though.
921 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
922 // low and high 32 bits are the same and bit 31 and 63 are set.
923 unsigned ShiftAmt, AddOpc;
924 RISCVMatInt::InstSeq SeqLo =
925 RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI, ShiftAmt, AddOpc);
926 return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
927}
928
929bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
930 MachineIRBuilder &MIB) const {
931 const LLT XLenTy(STI.getXLenVT());
932 Register Dst = MI.getOperand(i: 0).getReg();
933
934 // We define our scalable vector types for lmul=1 to use a 64 bit known
935 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
936 // vscale as VLENB / 8.
937 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
938 if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
939 // Support for VLEN==32 is incomplete.
940 return false;
941
942 // We assume VLENB is a multiple of 8. We manually choose the best shift
943 // here because SimplifyDemandedBits isn't always able to simplify it.
944 uint64_t Val = MI.getOperand(i: 1).getCImm()->getZExtValue();
945 if (isPowerOf2_64(Value: Val)) {
946 uint64_t Log2 = Log2_64(Value: Val);
947 if (Log2 < 3) {
948 auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {});
949 MIB.buildLShr(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: 3 - Log2));
950 } else if (Log2 > 3) {
951 auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {});
952 MIB.buildShl(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: Log2 - 3));
953 } else {
954 MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {Dst}, SrcOps: {});
955 }
956 } else if ((Val % 8) == 0) {
957 // If the multiplier is a multiple of 8, scale it down to avoid needing
958 // to shift the VLENB value.
959 auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {});
960 MIB.buildMul(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: Val / 8));
961 } else {
962 auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {});
963 auto VScale = MIB.buildLShr(Dst: XLenTy, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: 3));
964 MIB.buildMul(Dst, Src0: VScale, Src1: MIB.buildConstant(Res: XLenTy, Val));
965 }
966 MI.eraseFromParent();
967 return true;
968}
969
970// Custom-lower extensions from mask vectors by using a vselect either with 1
971// for zero/any-extension or -1 for sign-extension:
972// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
973// Note that any-extension is lowered identically to zero-extension.
974bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,
975 MachineIRBuilder &MIB) const {
976
977 unsigned Opc = MI.getOpcode();
978 assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT ||
979 Opc == TargetOpcode::G_ANYEXT);
980
981 MachineRegisterInfo &MRI = *MIB.getMRI();
982 Register Dst = MI.getOperand(i: 0).getReg();
983 Register Src = MI.getOperand(i: 1).getReg();
984
985 LLT DstTy = MRI.getType(Reg: Dst);
986 int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1;
987 LLT DstEltTy = DstTy.getElementType();
988 auto SplatZero = MIB.buildSplatVector(Res: DstTy, Val: MIB.buildConstant(Res: DstEltTy, Val: 0));
989 auto SplatTrue =
990 MIB.buildSplatVector(Res: DstTy, Val: MIB.buildConstant(Res: DstEltTy, Val: ExtTrueVal));
991 MIB.buildSelect(Res: Dst, Tst: Src, Op0: SplatTrue, Op1: SplatZero);
992
993 MI.eraseFromParent();
994 return true;
995}
996
997bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI,
998 LegalizerHelper &Helper,
999 MachineIRBuilder &MIB) const {
1000 assert((isa<GLoad>(MI) || isa<GStore>(MI)) &&
1001 "Machine instructions must be Load/Store.");
1002 MachineRegisterInfo &MRI = *MIB.getMRI();
1003 MachineFunction *MF = MI.getMF();
1004 const DataLayout &DL = MIB.getDataLayout();
1005 LLVMContext &Ctx = MF->getFunction().getContext();
1006
1007 Register DstReg = MI.getOperand(i: 0).getReg();
1008 LLT DataTy = MRI.getType(Reg: DstReg);
1009 if (!DataTy.isVector())
1010 return false;
1011
1012 if (!MI.hasOneMemOperand())
1013 return false;
1014
1015 MachineMemOperand *MMO = *MI.memoperands_begin();
1016
1017 const auto *TLI = STI.getTargetLowering();
1018 EVT VT = EVT::getEVT(Ty: getTypeForLLT(Ty: DataTy, C&: Ctx));
1019
1020 if (TLI->allowsMemoryAccessForAlignment(Context&: Ctx, DL, VT, MMO: *MMO))
1021 return true;
1022
1023 unsigned EltSizeBits = DataTy.getScalarSizeInBits();
1024 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
1025 "Unexpected unaligned RVV load type");
1026
1027 // Calculate the new vector type with i8 elements
1028 unsigned NumElements =
1029 DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8);
1030 LLT NewDataTy = LLT::scalable_vector(MinNumElements: NumElements, ScalarSizeInBits: 8);
1031
1032 Helper.bitcast(MI, TypeIdx: 0, Ty: NewDataTy);
1033
1034 return true;
1035}
1036
1037/// Return the type of the mask type suitable for masking the provided
1038/// vector type. This is simply an i1 element type vector of the same
1039/// (possibly scalable) length.
1040static LLT getMaskTypeFor(LLT VecTy) {
1041 assert(VecTy.isVector());
1042 ElementCount EC = VecTy.getElementCount();
1043 return LLT::vector(EC, ScalarTy: LLT::scalar(SizeInBits: 1));
1044}
1045
1046/// Creates an all ones mask suitable for masking a vector of type VecTy with
1047/// vector length VL.
1048static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
1049 MachineIRBuilder &MIB,
1050 MachineRegisterInfo &MRI) {
1051 LLT MaskTy = getMaskTypeFor(VecTy);
1052 return MIB.buildInstr(Opc: RISCV::G_VMSET_VL, DstOps: {MaskTy}, SrcOps: {VL});
1053}
1054
1055/// Gets the two common "VL" operands: an all-ones mask and the vector length.
1056/// VecTy is a scalable vector type.
1057static std::pair<MachineInstrBuilder, MachineInstrBuilder>
1058buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
1059 assert(VecTy.isScalableVector() && "Expecting scalable container type");
1060 const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>();
1061 LLT XLenTy(STI.getXLenVT());
1062 auto VL = MIB.buildConstant(Res: XLenTy, Val: -1);
1063 auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
1064 return {Mask, VL};
1065}
1066
1067static MachineInstrBuilder
1068buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,
1069 Register Hi, const SrcOp &VL, MachineIRBuilder &MIB,
1070 MachineRegisterInfo &MRI) {
1071 // TODO: If the Hi bits of the splat are undefined, then it's fine to just
1072 // splat Lo even if it might be sign extended. I don't think we have
1073 // introduced a case where we're build a s64 where the upper bits are undef
1074 // yet.
1075
1076 // Fall back to a stack store and stride x0 vector load.
1077 // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in
1078 // preprocessDAG in SDAG.
1079 return MIB.buildInstr(Opc: RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, DstOps: {Dst},
1080 SrcOps: {Passthru, Lo, Hi, VL});
1081}
1082
1083static MachineInstrBuilder
1084buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
1085 const SrcOp &Scalar, const SrcOp &VL,
1086 MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
1087 assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!");
1088 auto Unmerge = MIB.buildUnmerge(Res: LLT::scalar(SizeInBits: 32), Op: Scalar);
1089 return buildSplatPartsS64WithVL(Dst, Passthru, Lo: Unmerge.getReg(Idx: 0),
1090 Hi: Unmerge.getReg(Idx: 1), VL, MIB, MRI);
1091}
1092
1093// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
1094// legal equivalently-sized i8 type, so we can use that as a go-between.
1095// Splats of s1 types that have constant value can be legalized as VMSET_VL or
1096// VMCLR_VL.
1097bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
1098 MachineIRBuilder &MIB) const {
1099 assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);
1100
1101 MachineRegisterInfo &MRI = *MIB.getMRI();
1102
1103 Register Dst = MI.getOperand(i: 0).getReg();
1104 Register SplatVal = MI.getOperand(i: 1).getReg();
1105
1106 LLT VecTy = MRI.getType(Reg: Dst);
1107 LLT XLenTy(STI.getXLenVT());
1108
1109 // Handle case of s64 element vectors on rv32
1110 if (XLenTy.getSizeInBits() == 32 &&
1111 VecTy.getElementType().getSizeInBits() == 64) {
1112 auto [_, VL] = buildDefaultVLOps(VecTy: MRI.getType(Reg: Dst), MIB, MRI);
1113 buildSplatSplitS64WithVL(Dst, Passthru: MIB.buildUndef(Res: VecTy), Scalar: SplatVal, VL, MIB,
1114 MRI);
1115 MI.eraseFromParent();
1116 return true;
1117 }
1118
1119 // All-zeros or all-ones splats are handled specially.
1120 MachineInstr &SplatValMI = *MRI.getVRegDef(Reg: SplatVal);
1121 if (isAllOnesOrAllOnesSplat(MI: SplatValMI, MRI)) {
1122 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1123 MIB.buildInstr(Opc: RISCV::G_VMSET_VL, DstOps: {Dst}, SrcOps: {VL});
1124 MI.eraseFromParent();
1125 return true;
1126 }
1127 if (isNullOrNullSplat(MI: SplatValMI, MRI)) {
1128 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1129 MIB.buildInstr(Opc: RISCV::G_VMCLR_VL, DstOps: {Dst}, SrcOps: {VL});
1130 MI.eraseFromParent();
1131 return true;
1132 }
1133
1134 // Handle non-constant mask splat (i.e. not sure if it's all zeros or all
1135 // ones) by promoting it to an s8 splat.
1136 LLT InterEltTy = LLT::scalar(SizeInBits: 8);
1137 LLT InterTy = VecTy.changeElementType(NewEltTy: InterEltTy);
1138 auto ZExtSplatVal = MIB.buildZExt(Res: InterEltTy, Op: SplatVal);
1139 auto And =
1140 MIB.buildAnd(Dst: InterEltTy, Src0: ZExtSplatVal, Src1: MIB.buildConstant(Res: InterEltTy, Val: 1));
1141 auto LHS = MIB.buildSplatVector(Res: InterTy, Val: And);
1142 auto ZeroSplat =
1143 MIB.buildSplatVector(Res: InterTy, Val: MIB.buildConstant(Res: InterEltTy, Val: 0));
1144 MIB.buildICmp(Pred: CmpInst::Predicate::ICMP_NE, Res: Dst, Op0: LHS, Op1: ZeroSplat);
1145 MI.eraseFromParent();
1146 return true;
1147}
1148
1149static LLT getLMUL1Ty(LLT VecTy) {
1150 assert(VecTy.getElementType().getSizeInBits() <= 64 &&
1151 "Unexpected vector LLT");
1152 return LLT::scalable_vector(MinNumElements: RISCV::RVVBitsPerBlock /
1153 VecTy.getElementType().getSizeInBits(),
1154 ScalarTy: VecTy.getElementType());
1155}
1156
1157bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
1158 MachineIRBuilder &MIB) const {
1159 GExtractSubvector &ES = cast<GExtractSubvector>(Val&: MI);
1160
1161 MachineRegisterInfo &MRI = *MIB.getMRI();
1162
1163 Register Dst = ES.getReg(Idx: 0);
1164 Register Src = ES.getSrcVec();
1165 uint64_t Idx = ES.getIndexImm();
1166
1167 // With an index of 0 this is a cast-like subvector, which can be performed
1168 // with subregister operations.
1169 if (Idx == 0)
1170 return true;
1171
1172 LLT LitTy = MRI.getType(Reg: Dst);
1173 LLT BigTy = MRI.getType(Reg: Src);
1174
1175 if (LitTy.getElementType() == LLT::scalar(SizeInBits: 1)) {
1176 // We can't slide this mask vector up indexed by its i1 elements.
1177 // This poses a problem when we wish to insert a scalable vector which
1178 // can't be re-expressed as a larger type. Just choose the slow path and
1179 // extend to a larger type, then truncate back down.
1180 LLT ExtBigTy = BigTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 8));
1181 LLT ExtLitTy = LitTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 8));
1182 auto BigZExt = MIB.buildZExt(Res: ExtBigTy, Op: Src);
1183 auto ExtractZExt = MIB.buildExtractSubvector(Res: ExtLitTy, Src: BigZExt, Index: Idx);
1184 auto SplatZero = MIB.buildSplatVector(
1185 Res: ExtLitTy, Val: MIB.buildConstant(Res: ExtLitTy.getElementType(), Val: 0));
1186 MIB.buildICmp(Pred: CmpInst::Predicate::ICMP_NE, Res: Dst, Op0: ExtractZExt, Op1: SplatZero);
1187 MI.eraseFromParent();
1188 return true;
1189 }
1190
1191 // extract_subvector scales the index by vscale if the subvector is scalable,
1192 // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
1193 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1194 MVT LitTyMVT = getMVTForLLT(Ty: LitTy);
1195 auto Decompose =
1196 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1197 VecVT: getMVTForLLT(Ty: BigTy), SubVecVT: LitTyMVT, InsertExtractIdx: Idx, TRI);
1198 unsigned RemIdx = Decompose.second;
1199
1200 // If the Idx has been completely eliminated then this is a subvector extract
1201 // which naturally aligns to a vector register. These can easily be handled
1202 // using subregister manipulation.
1203 if (RemIdx == 0)
1204 return true;
1205
1206 // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
1207 // was > M1 then the index would need to be a multiple of VLMAX, and so would
1208 // divide exactly.
1209 assert(
1210 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(LitTyMVT)).second ||
1211 RISCVTargetLowering::getLMUL(LitTyMVT) == RISCVVType::LMUL_1);
1212
1213 // If the vector type is an LMUL-group type, extract a subvector equal to the
1214 // nearest full vector register type.
1215 LLT InterLitTy = BigTy;
1216 Register Vec = Src;
1217 if (TypeSize::isKnownGT(LHS: BigTy.getSizeInBits(),
1218 RHS: getLMUL1Ty(VecTy: BigTy).getSizeInBits())) {
1219 // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
1220 // we should have successfully decomposed the extract into a subregister.
1221 assert(Decompose.first != RISCV::NoSubRegister);
1222 InterLitTy = getLMUL1Ty(VecTy: BigTy);
1223 // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
1224 // specified on the source Register (the equivalent) since generic virtual
1225 // register does not allow subregister index.
1226 Vec = MIB.buildExtractSubvector(Res: InterLitTy, Src, Index: Idx - RemIdx).getReg(Idx: 0);
1227 }
1228
1229 // Slide this vector register down by the desired number of elements in order
1230 // to place the desired subvector starting at element 0.
1231 const LLT XLenTy(STI.getXLenVT());
1232 auto SlidedownAmt = MIB.buildVScale(Res: XLenTy, MinElts: RemIdx);
1233 auto [Mask, VL] = buildDefaultVLOps(VecTy: InterLitTy, MIB, MRI);
1234 uint64_t Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
1235 auto Slidedown = MIB.buildInstr(
1236 Opc: RISCV::G_VSLIDEDOWN_VL, DstOps: {InterLitTy},
1237 SrcOps: {MIB.buildUndef(Res: InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
1238
1239 // Now the vector is in the right position, extract our final subvector. This
1240 // should resolve to a COPY.
1241 MIB.buildExtractSubvector(Res: Dst, Src: Slidedown, Index: 0);
1242
1243 MI.eraseFromParent();
1244 return true;
1245}
1246
1247bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
1248 LegalizerHelper &Helper,
1249 MachineIRBuilder &MIB) const {
1250 GInsertSubvector &IS = cast<GInsertSubvector>(Val&: MI);
1251
1252 MachineRegisterInfo &MRI = *MIB.getMRI();
1253
1254 Register Dst = IS.getReg(Idx: 0);
1255 Register BigVec = IS.getBigVec();
1256 Register LitVec = IS.getSubVec();
1257 uint64_t Idx = IS.getIndexImm();
1258
1259 LLT BigTy = MRI.getType(Reg: BigVec);
1260 LLT LitTy = MRI.getType(Reg: LitVec);
1261
1262 if (Idx == 0 &&
1263 MRI.getVRegDef(Reg: BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1264 return true;
1265
1266 // We don't have the ability to slide mask vectors up indexed by their i1
1267 // elements; the smallest we can do is i8. Often we are able to bitcast to
1268 // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
1269 // vectors and truncate down after the insert.
1270 if (LitTy.getElementType() == LLT::scalar(SizeInBits: 1)) {
1271 auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
1272 auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
1273 if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
1274 return Helper.bitcast(
1275 MI&: IS, TypeIdx: 0,
1276 Ty: LLT::vector(EC: BigTy.getElementCount().divideCoefficientBy(RHS: 8), ScalarSizeInBits: 8));
1277
1278 // We can't slide this mask vector up indexed by its i1 elements.
1279 // This poses a problem when we wish to insert a scalable vector which
1280 // can't be re-expressed as a larger type. Just choose the slow path and
1281 // extend to a larger type, then truncate back down.
1282 LLT ExtBigTy = BigTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 8));
1283 return Helper.widenScalar(MI&: IS, TypeIdx: 0, WideTy: ExtBigTy);
1284 }
1285
1286 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1287 unsigned SubRegIdx, RemIdx;
1288 std::tie(args&: SubRegIdx, args&: RemIdx) =
1289 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1290 VecVT: getMVTForLLT(Ty: BigTy), SubVecVT: getMVTForLLT(Ty: LitTy), InsertExtractIdx: Idx, TRI);
1291
1292 TypeSize VecRegSize = TypeSize::getScalable(MinimumSize: RISCV::RVVBitsPerBlock);
1293 assert(isPowerOf2_64(
1294 STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
1295 bool ExactlyVecRegSized =
1296 STI.expandVScale(X: LitTy.getSizeInBits())
1297 .isKnownMultipleOf(RHS: STI.expandVScale(X: VecRegSize));
1298
1299 // If the Idx has been completely eliminated and this subvector's size is a
1300 // vector register or a multiple thereof, or the surrounding elements are
1301 // undef, then this is a subvector insert which naturally aligns to a vector
1302 // register. These can easily be handled using subregister manipulation.
1303 if (RemIdx == 0 && ExactlyVecRegSized)
1304 return true;
1305
1306 // If the subvector is smaller than a vector register, then the insertion
1307 // must preserve the undisturbed elements of the register. We do this by
1308 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1309 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
1310 // subvector within the vector register, and an INSERT_SUBVECTOR of that
1311 // LMUL=1 type back into the larger vector (resolving to another subregister
1312 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1313 // to avoid allocating a large register group to hold our subvector.
1314
1315 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1316 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1317 // (in our case undisturbed). This means we can set up a subvector insertion
1318 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1319 // size of the subvector.
1320 const LLT XLenTy(STI.getXLenVT());
1321 LLT InterLitTy = BigTy;
1322 Register AlignedExtract = BigVec;
1323 unsigned AlignedIdx = Idx - RemIdx;
1324 if (TypeSize::isKnownGT(LHS: BigTy.getSizeInBits(),
1325 RHS: getLMUL1Ty(VecTy: BigTy).getSizeInBits())) {
1326 InterLitTy = getLMUL1Ty(VecTy: BigTy);
1327 // Extract a subvector equal to the nearest full vector register type. This
1328 // should resolve to a G_EXTRACT on a subreg.
1329 AlignedExtract =
1330 MIB.buildExtractSubvector(Res: InterLitTy, Src: BigVec, Index: AlignedIdx).getReg(Idx: 0);
1331 }
1332
1333 auto Insert = MIB.buildInsertSubvector(Res: InterLitTy, Src0: MIB.buildUndef(Res: InterLitTy),
1334 Src1: LitVec, Index: 0);
1335
1336 auto [Mask, _] = buildDefaultVLOps(VecTy: InterLitTy, MIB, MRI);
1337 auto VL = MIB.buildVScale(Res: XLenTy, MinElts: LitTy.getElementCount().getKnownMinValue());
1338
1339 // If we're inserting into the lowest elements, use a tail undisturbed
1340 // vmv.v.v.
1341 MachineInstrBuilder Inserted;
1342 bool NeedInsertSubvec =
1343 TypeSize::isKnownGT(LHS: BigTy.getSizeInBits(), RHS: InterLitTy.getSizeInBits());
1344 Register InsertedDst =
1345 NeedInsertSubvec ? MRI.createGenericVirtualRegister(Ty: InterLitTy) : Dst;
1346 if (RemIdx == 0) {
1347 Inserted = MIB.buildInstr(Opc: RISCV::G_VMV_V_V_VL, DstOps: {InsertedDst},
1348 SrcOps: {AlignedExtract, Insert, VL});
1349 } else {
1350 auto SlideupAmt = MIB.buildVScale(Res: XLenTy, MinElts: RemIdx);
1351 // Construct the vector length corresponding to RemIdx + length(LitTy).
1352 VL = MIB.buildAdd(Dst: XLenTy, Src0: SlideupAmt, Src1: VL);
1353 // Use tail agnostic policy if we're inserting over InterLitTy's tail.
1354 ElementCount EndIndex =
1355 ElementCount::getScalable(MinVal: RemIdx) + LitTy.getElementCount();
1356 uint64_t Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;
1357 if (STI.expandVScale(X: EndIndex) ==
1358 STI.expandVScale(X: InterLitTy.getElementCount()))
1359 Policy = RISCVVType::TAIL_AGNOSTIC;
1360
1361 Inserted =
1362 MIB.buildInstr(Opc: RISCV::G_VSLIDEUP_VL, DstOps: {InsertedDst},
1363 SrcOps: {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
1364 }
1365
1366 // If required, insert this subvector back into the correct vector register.
1367 // This should resolve to an INSERT_SUBREG instruction.
1368 if (NeedInsertSubvec)
1369 MIB.buildInsertSubvector(Res: Dst, Src0: BigVec, Src1: Inserted, Index: AlignedIdx);
1370
1371 MI.eraseFromParent();
1372 return true;
1373}
1374
1375static unsigned getRISCVWOpcode(unsigned Opcode) {
1376 switch (Opcode) {
1377 default:
1378 llvm_unreachable("Unexpected opcode");
1379 case TargetOpcode::G_ASHR:
1380 return RISCV::G_SRAW;
1381 case TargetOpcode::G_LSHR:
1382 return RISCV::G_SRLW;
1383 case TargetOpcode::G_SHL:
1384 return RISCV::G_SLLW;
1385 case TargetOpcode::G_SDIV:
1386 return RISCV::G_DIVW;
1387 case TargetOpcode::G_UDIV:
1388 return RISCV::G_DIVUW;
1389 case TargetOpcode::G_UREM:
1390 return RISCV::G_REMUW;
1391 case TargetOpcode::G_ROTL:
1392 return RISCV::G_ROLW;
1393 case TargetOpcode::G_ROTR:
1394 return RISCV::G_RORW;
1395 case TargetOpcode::G_CTLZ:
1396 return RISCV::G_CLZW;
1397 case TargetOpcode::G_CTTZ:
1398 return RISCV::G_CTZW;
1399 case TargetOpcode::G_CTLS:
1400 return RISCV::G_CLSW;
1401 case TargetOpcode::G_FPTOSI:
1402 return RISCV::G_FCVT_W_RV64;
1403 case TargetOpcode::G_FPTOUI:
1404 return RISCV::G_FCVT_WU_RV64;
1405 }
1406}
1407
1408bool RISCVLegalizerInfo::legalizeCustom(
1409 LegalizerHelper &Helper, MachineInstr &MI,
1410 LostDebugLocObserver &LocObserver) const {
1411 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1412 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1413 MachineFunction &MF = *MI.getParent()->getParent();
1414 switch (MI.getOpcode()) {
1415 default:
1416 // No idea what to do.
1417 return false;
1418 case TargetOpcode::G_ABS:
1419 return Helper.lowerAbsToMaxNeg(MI);
1420 case TargetOpcode::G_FCONSTANT: {
1421 const APFloat &FVal = MI.getOperand(i: 1).getFPImm()->getValueAPF();
1422
1423 // Convert G_FCONSTANT to G_CONSTANT.
1424 Register DstReg = MI.getOperand(i: 0).getReg();
1425 MIRBuilder.buildConstant(Res: DstReg, Val: FVal.bitcastToAPInt());
1426
1427 MI.eraseFromParent();
1428 return true;
1429 }
1430 case TargetOpcode::G_CONSTANT: {
1431 const Function &F = MF.getFunction();
1432 // TODO: if PSI and BFI are present, add " ||
1433 // llvm::shouldOptForSize(*CurMBB, PSI, BFI)".
1434 bool ShouldOptForSize = F.hasOptSize();
1435 const ConstantInt *ConstVal = MI.getOperand(i: 1).getCImm();
1436 if (!shouldBeInConstantPool(APImm: ConstVal->getValue(), ShouldOptForSize))
1437 return true;
1438 return Helper.lowerConstant(MI);
1439 }
1440 case TargetOpcode::G_SUB:
1441 case TargetOpcode::G_ADD: {
1442 Helper.Observer.changingInstr(MI);
1443 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
1444 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
1445
1446 Register DstALU = MRI.createGenericVirtualRegister(Ty: sXLen);
1447
1448 MachineOperand &MO = MI.getOperand(i: 0);
1449 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
1450 auto DstSext = MIRBuilder.buildSExtInReg(Res: sXLen, Op: DstALU, ImmOp: 32);
1451
1452 MIRBuilder.buildInstr(Opc: TargetOpcode::G_TRUNC, DstOps: {MO}, SrcOps: {DstSext});
1453 MO.setReg(DstALU);
1454
1455 Helper.Observer.changedInstr(MI);
1456 return true;
1457 }
1458 case TargetOpcode::G_SEXT_INREG: {
1459 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1460 int64_t SizeInBits = MI.getOperand(i: 2).getImm();
1461 // Source size of 32 is sext.w.
1462 if (DstTy.getSizeInBits() == 64 && SizeInBits == 32)
1463 return true;
1464
1465 if (STI.hasStdExtZbb() && (SizeInBits == 8 || SizeInBits == 16))
1466 return true;
1467
1468 return Helper.lower(MI, TypeIdx: 0, /* Unused hint type */ Ty: LLT()) ==
1469 LegalizerHelper::Legalized;
1470 }
1471 case TargetOpcode::G_ASHR:
1472 case TargetOpcode::G_LSHR:
1473 case TargetOpcode::G_SHL: {
1474 if (getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 2).getReg(), MRI)) {
1475 // We don't need a custom node for shift by constant. Just widen the
1476 // source and the shift amount.
1477 unsigned ExtOpc = TargetOpcode::G_ANYEXT;
1478 if (MI.getOpcode() == TargetOpcode::G_ASHR)
1479 ExtOpc = TargetOpcode::G_SEXT;
1480 else if (MI.getOpcode() == TargetOpcode::G_LSHR)
1481 ExtOpc = TargetOpcode::G_ZEXT;
1482
1483 Helper.Observer.changingInstr(MI);
1484 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: ExtOpc);
1485 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
1486 Helper.widenScalarDst(MI, WideTy: sXLen);
1487 Helper.Observer.changedInstr(MI);
1488 return true;
1489 }
1490
1491 Helper.Observer.changingInstr(MI);
1492 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
1493 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
1494 Helper.widenScalarDst(MI, WideTy: sXLen);
1495 MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1496 Helper.Observer.changedInstr(MI);
1497 return true;
1498 }
1499 case TargetOpcode::G_SDIV:
1500 case TargetOpcode::G_UDIV:
1501 case TargetOpcode::G_UREM:
1502 case TargetOpcode::G_ROTL:
1503 case TargetOpcode::G_ROTR: {
1504 Helper.Observer.changingInstr(MI);
1505 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
1506 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
1507 Helper.widenScalarDst(MI, WideTy: sXLen);
1508 MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1509 Helper.Observer.changedInstr(MI);
1510 return true;
1511 }
1512 case TargetOpcode::G_CTLZ:
1513 case TargetOpcode::G_CTTZ:
1514 case TargetOpcode::G_CTLS: {
1515 Helper.Observer.changingInstr(MI);
1516 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
1517 Helper.widenScalarDst(MI, WideTy: sXLen);
1518 MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1519 Helper.Observer.changedInstr(MI);
1520 return true;
1521 }
1522 case TargetOpcode::G_FPTOSI:
1523 case TargetOpcode::G_FPTOUI: {
1524 Helper.Observer.changingInstr(MI);
1525 Helper.widenScalarDst(MI, WideTy: sXLen);
1526 MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1527 MI.addOperand(Op: MachineOperand::CreateImm(Val: RISCVFPRndMode::RTZ));
1528 Helper.Observer.changedInstr(MI);
1529 return true;
1530 }
1531 case TargetOpcode::G_IS_FPCLASS: {
1532 Register GISFPCLASS = MI.getOperand(i: 0).getReg();
1533 Register Src = MI.getOperand(i: 1).getReg();
1534 const MachineOperand &ImmOp = MI.getOperand(i: 2);
1535 MachineIRBuilder MIB(MI);
1536
1537 // Turn LLVM IR's floating point classes to that in RISC-V,
1538 // by simply rotating the 10-bit immediate right by two bits.
1539 APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
1540 auto FClassMask = MIB.buildConstant(Res: sXLen, Val: GFpClassImm.rotr(rotateAmt: 2).zext(width: XLen));
1541 auto ConstZero = MIB.buildConstant(Res: sXLen, Val: 0);
1542
1543 auto GFClass = MIB.buildInstr(Opc: RISCV::G_FCLASS, DstOps: {sXLen}, SrcOps: {Src});
1544 auto And = MIB.buildAnd(Dst: sXLen, Src0: GFClass, Src1: FClassMask);
1545 MIB.buildICmp(Pred: CmpInst::ICMP_NE, Res: GISFPCLASS, Op0: And, Op1: ConstZero);
1546
1547 MI.eraseFromParent();
1548 return true;
1549 }
1550 case TargetOpcode::G_BRJT:
1551 return legalizeBRJT(MI, MIRBuilder);
1552 case TargetOpcode::G_VASTART:
1553 return legalizeVAStart(MI, MIRBuilder);
1554 case TargetOpcode::G_VSCALE:
1555 return legalizeVScale(MI, MIB&: MIRBuilder);
1556 case TargetOpcode::G_ZEXT:
1557 case TargetOpcode::G_SEXT:
1558 case TargetOpcode::G_ANYEXT:
1559 return legalizeExt(MI, MIB&: MIRBuilder);
1560 case TargetOpcode::G_SPLAT_VECTOR:
1561 return legalizeSplatVector(MI, MIB&: MIRBuilder);
1562 case TargetOpcode::G_EXTRACT_SUBVECTOR:
1563 return legalizeExtractSubvector(MI, MIB&: MIRBuilder);
1564 case TargetOpcode::G_INSERT_SUBVECTOR:
1565 return legalizeInsertSubvector(MI, Helper, MIB&: MIRBuilder);
1566 case TargetOpcode::G_LOAD:
1567 case TargetOpcode::G_STORE:
1568 return legalizeLoadStore(MI, Helper, MIB&: MIRBuilder);
1569 }
1570
1571 llvm_unreachable("expected switch to return");
1572}
1573