1//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the Machinelegalizer class for RISC-V.
10/// \todo This should be generated by TableGen.
11//===----------------------------------------------------------------------===//
12
13#include "RISCVLegalizerInfo.h"
14#include "MCTargetDesc/RISCVMatInt.h"
15#include "RISCVMachineFunctionInfo.h"
16#include "RISCVSubtarget.h"
17#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
18#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
19#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
20#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
21#include "llvm/CodeGen/MachineConstantPool.h"
22#include "llvm/CodeGen/MachineJumpTableInfo.h"
23#include "llvm/CodeGen/MachineMemOperand.h"
24#include "llvm/CodeGen/MachineOperand.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/TargetOpcodes.h"
27#include "llvm/CodeGen/ValueTypes.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsRISCV.h"
31#include "llvm/IR/Type.h"
32
33using namespace llvm;
34using namespace LegalityPredicates;
35using namespace LegalizeMutations;
36
37static LegalityPredicate
38typeIsLegalIntOrFPVec(unsigned TypeIdx,
39 std::initializer_list<LLT> IntOrFPVecTys,
40 const RISCVSubtarget &ST) {
41 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
42 return ST.hasVInstructions() &&
43 (Query.Types[TypeIdx].getScalarSizeInBits() != 64 ||
44 ST.hasVInstructionsI64()) &&
45 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
46 ST.getELen() == 64);
47 };
48
49 return all(P0: typeInSet(TypeIdx, TypesInit: IntOrFPVecTys), P1: P);
50}
51
52static LegalityPredicate
53typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,
54 const RISCVSubtarget &ST) {
55 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
56 return ST.hasVInstructions() &&
57 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
58 ST.getELen() == 64);
59 };
60 return all(P0: typeInSet(TypeIdx, TypesInit: BoolVecTys), P1: P);
61}
62
63static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx,
64 std::initializer_list<LLT> PtrVecTys,
65 const RISCVSubtarget &ST) {
66 LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
67 return ST.hasVInstructions() &&
68 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||
69 ST.getELen() == 64) &&
70 (Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 16 ||
71 Query.Types[TypeIdx].getScalarSizeInBits() == 32);
72 };
73 return all(P0: typeInSet(TypeIdx, TypesInit: PtrVecTys), P1: P);
74}
75
76RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
77 : STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(SizeInBits: XLen)) {
78 const LLT sDoubleXLen = LLT::scalar(SizeInBits: 2 * XLen);
79 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: XLen);
80 const LLT s1 = LLT::scalar(SizeInBits: 1);
81 const LLT s8 = LLT::scalar(SizeInBits: 8);
82 const LLT s16 = LLT::scalar(SizeInBits: 16);
83 const LLT s32 = LLT::scalar(SizeInBits: 32);
84 const LLT s64 = LLT::scalar(SizeInBits: 64);
85 const LLT s128 = LLT::scalar(SizeInBits: 128);
86
87 const LLT nxv1s1 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s1);
88 const LLT nxv2s1 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s1);
89 const LLT nxv4s1 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s1);
90 const LLT nxv8s1 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s1);
91 const LLT nxv16s1 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s1);
92 const LLT nxv32s1 = LLT::scalable_vector(MinNumElements: 32, ScalarTy: s1);
93 const LLT nxv64s1 = LLT::scalable_vector(MinNumElements: 64, ScalarTy: s1);
94
95 const LLT nxv1s8 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s8);
96 const LLT nxv2s8 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s8);
97 const LLT nxv4s8 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s8);
98 const LLT nxv8s8 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s8);
99 const LLT nxv16s8 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s8);
100 const LLT nxv32s8 = LLT::scalable_vector(MinNumElements: 32, ScalarTy: s8);
101 const LLT nxv64s8 = LLT::scalable_vector(MinNumElements: 64, ScalarTy: s8);
102
103 const LLT nxv1s16 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s16);
104 const LLT nxv2s16 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s16);
105 const LLT nxv4s16 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s16);
106 const LLT nxv8s16 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s16);
107 const LLT nxv16s16 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s16);
108 const LLT nxv32s16 = LLT::scalable_vector(MinNumElements: 32, ScalarTy: s16);
109
110 const LLT nxv1s32 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s32);
111 const LLT nxv2s32 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s32);
112 const LLT nxv4s32 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s32);
113 const LLT nxv8s32 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s32);
114 const LLT nxv16s32 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: s32);
115
116 const LLT nxv1s64 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: s64);
117 const LLT nxv2s64 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: s64);
118 const LLT nxv4s64 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: s64);
119 const LLT nxv8s64 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: s64);
120
121 const LLT nxv1p0 = LLT::scalable_vector(MinNumElements: 1, ScalarTy: p0);
122 const LLT nxv2p0 = LLT::scalable_vector(MinNumElements: 2, ScalarTy: p0);
123 const LLT nxv4p0 = LLT::scalable_vector(MinNumElements: 4, ScalarTy: p0);
124 const LLT nxv8p0 = LLT::scalable_vector(MinNumElements: 8, ScalarTy: p0);
125 const LLT nxv16p0 = LLT::scalable_vector(MinNumElements: 16, ScalarTy: p0);
126
127 using namespace TargetOpcode;
128
129 auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};
130
131 auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,
132 nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
133 nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
134 nxv1s64, nxv2s64, nxv4s64, nxv8s64};
135
136 auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0};
137
138 getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB})
139 .legalFor(Types: {sXLen})
140 .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST))
141 .customFor(Pred: ST.is64Bit(), Types: {s32})
142 .widenScalarToNextPow2(TypeIdx: 0)
143 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
144
145 getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR})
146 .legalFor(Types: {sXLen})
147 .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST))
148 .widenScalarToNextPow2(TypeIdx: 0)
149 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
150
151 getActionDefinitionsBuilder(
152 Opcodes: {G_UADDE, G_UADDO, G_USUBE, G_USUBO, G_READ_REGISTER, G_WRITE_REGISTER})
153 .lower();
154
155 getActionDefinitionsBuilder(Opcodes: {G_SADDE, G_SADDO, G_SSUBE, G_SSUBO})
156 .minScalar(TypeIdx: 0, Ty: sXLen)
157 .lower();
158
159 // TODO: Use Vector Single-Width Saturating Instructions for vector types.
160 getActionDefinitionsBuilder(
161 Opcodes: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT, G_SSHLSAT, G_USHLSAT})
162 .lower();
163
164 getActionDefinitionsBuilder(Opcodes: {G_SHL, G_ASHR, G_LSHR})
165 .legalFor(Types: {{sXLen, sXLen}})
166 .customFor(Pred: ST.is64Bit(), Types: {{s32, s32}})
167 .widenScalarToNextPow2(TypeIdx: 0)
168 .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen)
169 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
170
171 getActionDefinitionsBuilder(Opcodes: {G_ZEXT, G_SEXT, G_ANYEXT})
172 .legalFor(Types: {{s32, s16}})
173 .legalFor(Pred: ST.is64Bit(), Types: {{s64, s16}, {s64, s32}})
174 .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
175 P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST)))
176 .customIf(Predicate: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST))
177 .maxScalar(TypeIdx: 0, Ty: sXLen);
178
179 getActionDefinitionsBuilder(Opcode: G_TRUNC).alwaysLegal();
180
181 getActionDefinitionsBuilder(Opcode: G_SEXT_INREG)
182 .customFor(Types: {sXLen})
183 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
184 .lower();
185
186 // Merge/Unmerge
187 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
188 auto &MergeUnmergeActions = getActionDefinitionsBuilder(Opcode: Op);
189 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
190 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
191 if (XLen == 32 && ST.hasStdExtD()) {
192 MergeUnmergeActions.legalIf(
193 Predicate: all(P0: typeIs(TypeIdx: BigTyIdx, TypesInit: s64), P1: typeIs(TypeIdx: LitTyIdx, TypesInit: s32)));
194 }
195 MergeUnmergeActions.widenScalarToNextPow2(TypeIdx: LitTyIdx, MinSize: XLen)
196 .widenScalarToNextPow2(TypeIdx: BigTyIdx, MinSize: XLen)
197 .clampScalar(TypeIdx: LitTyIdx, MinTy: sXLen, MaxTy: sXLen)
198 .clampScalar(TypeIdx: BigTyIdx, MinTy: sXLen, MaxTy: sXLen);
199 }
200
201 getActionDefinitionsBuilder(Opcodes: {G_FSHL, G_FSHR}).lower();
202
203 getActionDefinitionsBuilder(Opcodes: {G_ROTR, G_ROTL})
204 .legalFor(Pred: ST.hasStdExtZbb() || ST.hasStdExtZbkb(), Types: {{sXLen, sXLen}})
205 .customFor(Pred: ST.is64Bit() && (ST.hasStdExtZbb() || ST.hasStdExtZbkb()),
206 Types: {{s32, s32}})
207 .lower();
208
209 getActionDefinitionsBuilder(Opcode: G_BITREVERSE)
210 .customFor(Pred: ST.hasStdExtZbkb(), Types: {s8})
211 .maxScalar(TypeIdx: 0, Ty: sXLen)
212 .lower();
213
214 getActionDefinitionsBuilder(Opcode: G_BITCAST).legalIf(
215 Predicate: all(P0: LegalityPredicates::any(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
216 P1: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST)),
217 P1: LegalityPredicates::any(P0: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST),
218 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST))));
219
220 auto &BSWAPActions = getActionDefinitionsBuilder(Opcode: G_BSWAP);
221 if (ST.hasStdExtZbb() || ST.hasStdExtZbkb())
222 BSWAPActions.legalFor(Types: {sXLen}).clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
223 else
224 BSWAPActions.maxScalar(TypeIdx: 0, Ty: sXLen).lower();
225
226 auto &CountZerosActions = getActionDefinitionsBuilder(Opcodes: {G_CTLZ, G_CTTZ});
227 auto &CountZerosPoisonActions =
228 getActionDefinitionsBuilder(Opcodes: {G_CTLZ_ZERO_POISON, G_CTTZ_ZERO_POISON});
229 if (ST.hasStdExtZbb()) {
230 CountZerosActions.legalFor(Types: {{sXLen, sXLen}})
231 .customFor(Types: {{s32, s32}})
232 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen)
233 .widenScalarToNextPow2(TypeIdx: 0)
234 .scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0);
235 } else {
236 CountZerosActions.maxScalar(TypeIdx: 0, Ty: sXLen).scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0).lower();
237 CountZerosPoisonActions.maxScalar(TypeIdx: 0, Ty: sXLen).scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0);
238 }
239 CountZerosPoisonActions.lower();
240
241 auto &CountSignActions = getActionDefinitionsBuilder(Opcode: G_CTLS);
242 if (ST.hasStdExtP()) {
243 CountSignActions.legalFor(Types: {{sXLen, sXLen}})
244 .customFor(Types: {{s32, s32}})
245 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen)
246 .widenScalarToNextPow2(TypeIdx: 0)
247 .scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0);
248 } else {
249 CountSignActions.maxScalar(TypeIdx: 0, Ty: sXLen).scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0).lower();
250 }
251
252 auto &CTPOPActions = getActionDefinitionsBuilder(Opcode: G_CTPOP);
253 if (ST.hasStdExtZbb()) {
254 CTPOPActions.legalFor(Types: {{sXLen, sXLen}})
255 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
256 .scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0);
257 } else {
258 CTPOPActions.widenScalarToNextPow2(TypeIdx: 0, /*Min*/ MinSize: 8)
259 .clampScalar(TypeIdx: 0, MinTy: s8, MaxTy: sXLen)
260 .scalarSameSizeAs(TypeIdx: 1, SameSizeIdx: 0)
261 .lower();
262 }
263
264 getActionDefinitionsBuilder(Opcode: G_CONSTANT)
265 .legalFor(Types: {p0})
266 .legalFor(Pred: !ST.is64Bit(), Types: {s32})
267 .customFor(Pred: ST.is64Bit(), Types: {s64})
268 .widenScalarToNextPow2(TypeIdx: 0)
269 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
270
271 // TODO: transform illegal vector types into legal vector type
272 getActionDefinitionsBuilder(Opcode: G_FREEZE)
273 .legalFor(Types: {s16, s32, p0})
274 .legalFor(Pred: ST.is64Bit(), Types: {s64})
275 .legalIf(Predicate: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST))
276 .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST))
277 .widenScalarToNextPow2(TypeIdx: 0)
278 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: sXLen);
279
280 // TODO: transform illegal vector types into legal vector type
281 // TODO: Merge with G_FREEZE?
282 getActionDefinitionsBuilder(
283 Opcodes: {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
284 .legalFor(Types: {s32, sXLen, p0})
285 .legalIf(Predicate: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST))
286 .legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST))
287 .widenScalarToNextPow2(TypeIdx: 0)
288 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen);
289
290 getActionDefinitionsBuilder(Opcode: G_ICMP)
291 .legalFor(Types: {{sXLen, sXLen}, {sXLen, p0}})
292 .legalIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST),
293 P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST)))
294 .widenScalarOrEltToNextPow2OrMinSize(TypeIdx: 1, MinSize: 8)
295 .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen)
296 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
297
298 getActionDefinitionsBuilder(Opcode: G_SELECT)
299 .legalFor(Types: {{s32, sXLen}, {p0, sXLen}})
300 .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
301 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST)))
302 .legalFor(Pred: XLen == 64 || ST.hasStdExtD(), Types: {{s64, sXLen}})
303 .widenScalarToNextPow2(TypeIdx: 0)
304 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)
305 .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen);
306
307 auto &LoadActions = getActionDefinitionsBuilder(Opcode: G_LOAD);
308 auto &StoreActions = getActionDefinitionsBuilder(Opcode: G_STORE);
309 auto &ExtLoadActions = getActionDefinitionsBuilder(Opcodes: {G_SEXTLOAD, G_ZEXTLOAD});
310
311 // Return the alignment needed for scalar memory ops. If unaligned scalar mem
312 // is supported, we only require byte alignment. Otherwise, we need the memory
313 // op to be natively aligned.
314 auto getScalarMemAlign = [&ST](unsigned Size) {
315 return ST.enableUnalignedScalarMem() ? 8 : Size;
316 };
317
318 LoadActions.legalForTypesWithMemDesc(
319 TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
320 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
321 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
322 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
323 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)},
324 {.Type0: p0, .Type1: p0, .MemTy: sXLen, .Align: getScalarMemAlign(XLen)}});
325 StoreActions.legalForTypesWithMemDesc(
326 TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
327 {.Type0: s32, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
328 {.Type0: s16, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
329 {.Type0: s32, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
330 {.Type0: s32, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)},
331 {.Type0: p0, .Type1: p0, .MemTy: sXLen, .Align: getScalarMemAlign(XLen)}});
332 ExtLoadActions.legalForTypesWithMemDesc(
333 TypesAndMemDesc: {{.Type0: sXLen, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
334 {.Type0: sXLen, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)}});
335 if (XLen == 64) {
336 LoadActions.legalForTypesWithMemDesc(
337 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
338 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
339 {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)},
340 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign(64)}});
341 StoreActions.legalForTypesWithMemDesc(
342 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign(8)},
343 {.Type0: s64, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign(16)},
344 {.Type0: s64, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)},
345 {.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign(64)}});
346 ExtLoadActions.legalForTypesWithMemDesc(
347 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign(32)}});
348 } else if (ST.hasStdExtD()) {
349 LoadActions.legalForTypesWithMemDesc(
350 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign(64)}});
351 StoreActions.legalForTypesWithMemDesc(
352 TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign(64)}});
353 }
354
355 // Vector loads/stores.
356 if (ST.hasVInstructions()) {
357 LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv2s8, .Type1: p0, .MemTy: nxv2s8, .Align: 8},
358 {.Type0: nxv4s8, .Type1: p0, .MemTy: nxv4s8, .Align: 8},
359 {.Type0: nxv8s8, .Type1: p0, .MemTy: nxv8s8, .Align: 8},
360 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
361 {.Type0: nxv32s8, .Type1: p0, .MemTy: nxv32s8, .Align: 8},
362 {.Type0: nxv64s8, .Type1: p0, .MemTy: nxv64s8, .Align: 8},
363 {.Type0: nxv2s16, .Type1: p0, .MemTy: nxv2s16, .Align: 16},
364 {.Type0: nxv4s16, .Type1: p0, .MemTy: nxv4s16, .Align: 16},
365 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 16},
366 {.Type0: nxv16s16, .Type1: p0, .MemTy: nxv16s16, .Align: 16},
367 {.Type0: nxv32s16, .Type1: p0, .MemTy: nxv32s16, .Align: 16},
368 {.Type0: nxv2s32, .Type1: p0, .MemTy: nxv2s32, .Align: 32},
369 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 32},
370 {.Type0: nxv8s32, .Type1: p0, .MemTy: nxv8s32, .Align: 32},
371 {.Type0: nxv16s32, .Type1: p0, .MemTy: nxv16s32, .Align: 32}});
372 StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv2s8, .Type1: p0, .MemTy: nxv2s8, .Align: 8},
373 {.Type0: nxv4s8, .Type1: p0, .MemTy: nxv4s8, .Align: 8},
374 {.Type0: nxv8s8, .Type1: p0, .MemTy: nxv8s8, .Align: 8},
375 {.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: 8},
376 {.Type0: nxv32s8, .Type1: p0, .MemTy: nxv32s8, .Align: 8},
377 {.Type0: nxv64s8, .Type1: p0, .MemTy: nxv64s8, .Align: 8},
378 {.Type0: nxv2s16, .Type1: p0, .MemTy: nxv2s16, .Align: 16},
379 {.Type0: nxv4s16, .Type1: p0, .MemTy: nxv4s16, .Align: 16},
380 {.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: 16},
381 {.Type0: nxv16s16, .Type1: p0, .MemTy: nxv16s16, .Align: 16},
382 {.Type0: nxv32s16, .Type1: p0, .MemTy: nxv32s16, .Align: 16},
383 {.Type0: nxv2s32, .Type1: p0, .MemTy: nxv2s32, .Align: 32},
384 {.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: 32},
385 {.Type0: nxv8s32, .Type1: p0, .MemTy: nxv8s32, .Align: 32},
386 {.Type0: nxv16s32, .Type1: p0, .MemTy: nxv16s32, .Align: 32}});
387
388 if (ST.getELen() == 64) {
389 LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s8, .Type1: p0, .MemTy: nxv1s8, .Align: 8},
390 {.Type0: nxv1s16, .Type1: p0, .MemTy: nxv1s16, .Align: 16},
391 {.Type0: nxv1s32, .Type1: p0, .MemTy: nxv1s32, .Align: 32}});
392 StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s8, .Type1: p0, .MemTy: nxv1s8, .Align: 8},
393 {.Type0: nxv1s16, .Type1: p0, .MemTy: nxv1s16, .Align: 16},
394 {.Type0: nxv1s32, .Type1: p0, .MemTy: nxv1s32, .Align: 32}});
395 }
396
397 if (ST.hasVInstructionsI64()) {
398 LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s64, .Type1: p0, .MemTy: nxv1s64, .Align: 64},
399 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 64},
400 {.Type0: nxv4s64, .Type1: p0, .MemTy: nxv4s64, .Align: 64},
401 {.Type0: nxv8s64, .Type1: p0, .MemTy: nxv8s64, .Align: 64}});
402 StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s64, .Type1: p0, .MemTy: nxv1s64, .Align: 64},
403 {.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: 64},
404 {.Type0: nxv4s64, .Type1: p0, .MemTy: nxv4s64, .Align: 64},
405 {.Type0: nxv8s64, .Type1: p0, .MemTy: nxv8s64, .Align: 64}});
406 }
407
408 // we will take the custom lowering logic if we have scalable vector types
409 // with non-standard alignments
410 LoadActions.customIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST));
411 StoreActions.customIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST));
412
413 // Pointers require that XLen sized elements are legal.
414 if (XLen <= ST.getELen()) {
415 LoadActions.customIf(Predicate: typeIsLegalPtrVec(TypeIdx: 0, PtrVecTys, ST));
416 StoreActions.customIf(Predicate: typeIsLegalPtrVec(TypeIdx: 0, PtrVecTys, ST));
417 }
418 }
419
420 LoadActions.widenScalarToNextPow2(TypeIdx: 0, /* MinSize = */ 8)
421 .lowerIfMemSizeNotByteSizePow2()
422 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: sXLen)
423 .lower();
424 StoreActions
425 .clampScalar(TypeIdx: 0, MinTy: s16, MaxTy: sXLen)
426 .lowerIfMemSizeNotByteSizePow2()
427 .lower();
428
429 ExtLoadActions.widenScalarToNextPow2(TypeIdx: 0).clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen).lower();
430
431 getActionDefinitionsBuilder(Opcodes: {G_PTR_ADD, G_PTRMASK}).legalFor(Types: {{p0, sXLen}});
432
433 getActionDefinitionsBuilder(Opcode: G_PTRTOINT)
434 .legalFor(Types: {{sXLen, p0}})
435 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
436
437 getActionDefinitionsBuilder(Opcode: G_INTTOPTR)
438 .legalFor(Types: {{p0, sXLen}})
439 .clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen);
440
441 getActionDefinitionsBuilder(Opcode: G_BR).alwaysLegal();
442
443 getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {sXLen}).minScalar(TypeIdx: 0, Ty: sXLen);
444
445 getActionDefinitionsBuilder(Opcode: G_BRJT).customFor(Types: {{p0, sXLen}});
446
447 getActionDefinitionsBuilder(Opcode: G_BRINDIRECT).legalFor(Types: {p0});
448
449 getActionDefinitionsBuilder(Opcode: G_PHI)
450 .legalFor(Types: {p0, s32, sXLen})
451 .widenScalarToNextPow2(TypeIdx: 0)
452 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sXLen);
453
454 getActionDefinitionsBuilder(Opcodes: {G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
455 .legalFor(Types: {p0});
456
457 if (ST.hasStdExtZmmul()) {
458 getActionDefinitionsBuilder(Opcode: G_MUL)
459 .legalFor(Types: {sXLen})
460 .widenScalarToNextPow2(TypeIdx: 0)
461 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
462
463 // clang-format off
464 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH})
465 .legalFor(Types: {sXLen})
466 .lower();
467 // clang-format on
468
469 getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO}).minScalar(TypeIdx: 0, Ty: sXLen).lower();
470 } else {
471 getActionDefinitionsBuilder(Opcode: G_MUL)
472 .libcallFor(Types: {sXLen, sDoubleXLen})
473 .widenScalarToNextPow2(TypeIdx: 0)
474 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sDoubleXLen);
475
476 getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH}).lowerFor(Types: {sXLen});
477
478 getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO})
479 .minScalar(TypeIdx: 0, Ty: sXLen)
480 // Widen sXLen to sDoubleXLen so we can use a single libcall to get
481 // the low bits for the mul result and high bits to do the overflow
482 // check.
483 .widenScalarIf(Predicate: typeIs(TypeIdx: 0, TypesInit: sXLen),
484 Mutation: LegalizeMutations::changeTo(TypeIdx: 0, Ty: sDoubleXLen))
485 .lower();
486 }
487
488 if (ST.hasStdExtM()) {
489 getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV, G_UREM})
490 .legalFor(Types: {sXLen})
491 .customFor(Types: {s32})
492 .libcallFor(Types: {sDoubleXLen})
493 .clampScalar(TypeIdx: 0, MinTy: s32, MaxTy: sDoubleXLen)
494 .widenScalarToNextPow2(TypeIdx: 0);
495 getActionDefinitionsBuilder(Opcode: G_SREM)
496 .legalFor(Types: {sXLen})
497 .libcallFor(Types: {sDoubleXLen})
498 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sDoubleXLen)
499 .widenScalarToNextPow2(TypeIdx: 0);
500 } else {
501 getActionDefinitionsBuilder(Opcodes: {G_UDIV, G_SDIV, G_UREM, G_SREM})
502 .libcallFor(Types: {sXLen, sDoubleXLen})
503 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sDoubleXLen)
504 .widenScalarToNextPow2(TypeIdx: 0);
505 }
506
507 // TODO: Use libcall for sDoubleXLen.
508 getActionDefinitionsBuilder(Opcodes: {G_SDIVREM, G_UDIVREM}).lower();
509
510 getActionDefinitionsBuilder(Opcode: G_ABS)
511 .customFor(Pred: ST.hasStdExtZbb(), Types: {sXLen})
512 .minScalar(Pred: ST.hasStdExtZbb(), TypeIdx: 0, Ty: sXLen)
513 .lower();
514
515 getActionDefinitionsBuilder(Opcodes: {G_ABDS, G_ABDU})
516 .minScalar(Pred: ST.hasStdExtZbb(), TypeIdx: 0, Ty: sXLen)
517 .lower();
518
519 getActionDefinitionsBuilder(Opcodes: {G_UMAX, G_UMIN, G_SMAX, G_SMIN})
520 .legalFor(Pred: ST.hasStdExtZbb(), Types: {sXLen})
521 .minScalar(Pred: ST.hasStdExtZbb(), TypeIdx: 0, Ty: sXLen)
522 .lower();
523
524 getActionDefinitionsBuilder(Opcodes: {G_SCMP, G_UCMP}).lower();
525
526 getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0});
527
528 getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
529
530 getActionDefinitionsBuilder(Opcodes: {G_MEMCPY_INLINE, G_MEMSET_INLINE}).lower();
531
532 getActionDefinitionsBuilder(Opcodes: {G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
533 .lower();
534
535 // FP Operations
536
537 // FIXME: Support s128 for rv32 when libcall handling is able to use sret.
538 getActionDefinitionsBuilder(Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT,
539 G_FMAXNUM, G_FMINNUM, G_FMAXIMUMNUM,
540 G_FMINIMUMNUM})
541 .legalFor(Pred: ST.hasStdExtF(), Types: {s32})
542 .legalFor(Pred: ST.hasStdExtD(), Types: {s64})
543 .legalFor(Pred: ST.hasStdExtZfh(), Types: {s16})
544 .libcallFor(Types: {s32, s64})
545 .libcallFor(Pred: ST.is64Bit(), Types: {s128});
546
547 getActionDefinitionsBuilder(Opcodes: {G_FNEG, G_FABS})
548 .legalFor(Pred: ST.hasStdExtF(), Types: {s32})
549 .legalFor(Pred: ST.hasStdExtD(), Types: {s64})
550 .legalFor(Pred: ST.hasStdExtZfh(), Types: {s16})
551 .lowerFor(Types: {s32, s64, s128});
552
553 getActionDefinitionsBuilder(Opcode: G_FREM)
554 .libcallFor(Types: {s32, s64})
555 .libcallFor(Pred: ST.is64Bit(), Types: {s128})
556 .minScalar(TypeIdx: 0, Ty: s32)
557 .scalarize(TypeIdx: 0);
558
559 getActionDefinitionsBuilder(Opcode: G_FCOPYSIGN)
560 .legalFor(Pred: ST.hasStdExtF(), Types: {{s32, s32}})
561 .legalFor(Pred: ST.hasStdExtD(), Types: {{s64, s64}, {s32, s64}, {s64, s32}})
562 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{s16, s16}, {s16, s32}, {s32, s16}})
563 .legalFor(Pred: ST.hasStdExtZfh() && ST.hasStdExtD(), Types: {{s16, s64}, {s64, s16}})
564 .lower();
565
566 // FIXME: Use Zfhmin.
567 getActionDefinitionsBuilder(Opcode: G_FPTRUNC)
568 .legalFor(Pred: ST.hasStdExtD(), Types: {{s32, s64}})
569 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{s16, s32}})
570 .legalFor(Pred: ST.hasStdExtZfh() && ST.hasStdExtD(), Types: {{s16, s64}})
571 .libcallFor(Types: {{s32, s64}})
572 .libcallFor(Pred: ST.is64Bit(), Types: {{s32, s128}, {s64, s128}});
573 getActionDefinitionsBuilder(Opcode: G_FPEXT)
574 .legalFor(Pred: ST.hasStdExtD(), Types: {{s64, s32}})
575 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{s32, s16}})
576 .legalFor(Pred: ST.hasStdExtZfh() && ST.hasStdExtD(), Types: {{s64, s16}})
577 .libcallFor(Types: {{s64, s32}})
578 .libcallFor(Pred: ST.is64Bit(), Types: {{s128, s32}, {s128, s64}});
579
580 getActionDefinitionsBuilder(Opcode: G_FCMP)
581 .legalFor(Pred: ST.hasStdExtF(), Types: {{sXLen, s32}})
582 .legalFor(Pred: ST.hasStdExtD(), Types: {{sXLen, s64}})
583 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{sXLen, s16}})
584 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
585 .libcallFor(Types: {{sXLen, s32}, {sXLen, s64}})
586 .libcallFor(Pred: ST.is64Bit(), Types: {{sXLen, s128}});
587
588 // TODO: Support vector version of G_IS_FPCLASS.
589 getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS)
590 .customFor(Pred: ST.hasStdExtF(), Types: {{s1, s32}})
591 .customFor(Pred: ST.hasStdExtD(), Types: {{s1, s64}})
592 .customFor(Pred: ST.hasStdExtZfh(), Types: {{s1, s16}})
593 .lower();
594
595 getActionDefinitionsBuilder(Opcode: G_FCONSTANT)
596 .legalFor(Pred: ST.hasStdExtF(), Types: {s32})
597 .legalFor(Pred: ST.hasStdExtD(), Types: {s64})
598 .legalFor(Pred: ST.hasStdExtZfh(), Types: {s16})
599 .customFor(Pred: !ST.is64Bit(), Types: {s32})
600 .customFor(Pred: ST.is64Bit(), Types: {s32, s64})
601 .lowerFor(Types: {s64, s128});
602
603 getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI})
604 .legalFor(Pred: ST.hasStdExtF(), Types: {{sXLen, s32}})
605 .legalFor(Pred: ST.hasStdExtD(), Types: {{sXLen, s64}})
606 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{sXLen, s16}})
607 .customFor(Pred: ST.is64Bit() && ST.hasStdExtF(), Types: {{s32, s32}})
608 .customFor(Pred: ST.is64Bit() && ST.hasStdExtD(), Types: {{s32, s64}})
609 .customFor(Pred: ST.is64Bit() && ST.hasStdExtZfh(), Types: {{s32, s16}})
610 .widenScalarToNextPow2(TypeIdx: 0)
611 .minScalar(TypeIdx: 0, Ty: s32)
612 .libcallFor(Types: {{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
613 .libcallFor(Pred: ST.is64Bit(), Types: {{s32, s128}, {s64, s128}}) // FIXME RV32.
614 .libcallFor(Pred: ST.is64Bit(), Types: {{s128, s32}, {s128, s64}, {s128, s128}});
615
616 getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP})
617 .legalFor(Pred: ST.hasStdExtF(), Types: {{s32, sXLen}})
618 .legalFor(Pred: ST.hasStdExtD(), Types: {{s64, sXLen}})
619 .legalFor(Pred: ST.hasStdExtZfh(), Types: {{s16, sXLen}})
620 .widenScalarToNextPow2(TypeIdx: 1)
621 // Promote to XLen if the operation is legal.
622 .widenScalarIf(
623 Predicate: [=, &ST](const LegalityQuery &Query) {
624 return Query.Types[0].isScalar() && Query.Types[1].isScalar() &&
625 (Query.Types[1].getSizeInBits() < ST.getXLen()) &&
626 ((ST.hasStdExtF() && Query.Types[0].getSizeInBits() == 32) ||
627 (ST.hasStdExtD() && Query.Types[0].getSizeInBits() == 64) ||
628 (ST.hasStdExtZfh() &&
629 Query.Types[0].getSizeInBits() == 16));
630 },
631 Mutation: LegalizeMutations::changeTo(TypeIdx: 1, Ty: sXLen))
632 // Otherwise only promote to s32 since we have si libcalls.
633 .minScalar(TypeIdx: 1, Ty: s32)
634 .libcallFor(Types: {{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
635 .libcallFor(Pred: ST.is64Bit(), Types: {{s128, s32}, {s128, s64}}) // FIXME RV32.
636 .libcallFor(Pred: ST.is64Bit(), Types: {{s32, s128}, {s64, s128}, {s128, s128}});
637
638 // FIXME: We can do custom inline expansion like SelectionDAG.
639 getActionDefinitionsBuilder(Opcodes: {G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
640 G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
641 G_INTRINSIC_ROUNDEVEN})
642 .legalFor(Pred: ST.hasStdExtZfa(), Types: {s32})
643 .legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtD(), Types: {s64})
644 .legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtZfh(), Types: {s16})
645 .libcallFor(Types: {s32, s64})
646 .libcallFor(Pred: ST.is64Bit(), Types: {s128});
647
648 getActionDefinitionsBuilder(Opcodes: {G_FMAXIMUM, G_FMINIMUM})
649 .legalFor(Pred: ST.hasStdExtZfa(), Types: {s32})
650 .legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtD(), Types: {s64})
651 .legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtZfh(), Types: {s16});
652
653 getActionDefinitionsBuilder(Opcodes: {G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
654 G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
655 G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
656 G_FTANH, G_FMODF})
657 .libcallFor(Types: {s32, s64})
658 .libcallFor(Pred: ST.is64Bit(), Types: {s128});
659 getActionDefinitionsBuilder(Opcodes: {G_FPOWI, G_FLDEXP})
660 .libcallFor(Types: {{s32, s32}, {s64, s32}})
661 .libcallFor(Pred: ST.is64Bit(), Types: {s128, s32});
662
663 getActionDefinitionsBuilder(Opcode: G_FCANONICALIZE)
664 .legalFor(Pred: ST.hasStdExtF(), Types: {s32})
665 .legalFor(Pred: ST.hasStdExtD(), Types: {s64})
666 .legalFor(Pred: ST.hasStdExtZfh(), Types: {s16});
667
668 getActionDefinitionsBuilder(Opcode: G_VASTART).customFor(Types: {p0});
669
670 // va_list must be a pointer, but most sized types are pretty easy to handle
671 // as the destination.
672 getActionDefinitionsBuilder(Opcode: G_VAARG)
673 // TODO: Implement narrowScalar and widenScalar for G_VAARG for types
674 // other than sXLen.
675 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
676 .lowerForCartesianProduct(Types0: {sXLen, p0}, Types1: {p0});
677
678 getActionDefinitionsBuilder(Opcode: G_VSCALE)
679 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
680 .customFor(Types: {sXLen});
681
682 auto &SplatActions =
683 getActionDefinitionsBuilder(Opcode: G_SPLAT_VECTOR)
684 .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
685 P1: typeIs(TypeIdx: 1, TypesInit: sXLen)))
686 .customIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST), P1: typeIs(TypeIdx: 1, TypesInit: s1)));
687 // Handle case of s64 element vectors on RV32. If the subtarget does not have
688 // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget
689 // does have f64, then we don't know whether the type is an f64 or an i64,
690 // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,
691 // depending on how the instructions it consumes are legalized. They are not
692 // legalized yet since legalization is in reverse postorder, so we cannot
693 // make the decision at this moment.
694 if (XLen == 32) {
695 if (ST.hasVInstructionsF64() && ST.hasStdExtD())
696 SplatActions.legalIf(Predicate: all(
697 P0: typeInSet(TypeIdx: 0, TypesInit: {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), P1: typeIs(TypeIdx: 1, TypesInit: s64)));
698 else if (ST.hasVInstructionsI64())
699 SplatActions.customIf(Predicate: all(
700 P0: typeInSet(TypeIdx: 0, TypesInit: {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), P1: typeIs(TypeIdx: 1, TypesInit: s64)));
701 }
702
703 SplatActions.clampScalar(TypeIdx: 1, MinTy: sXLen, MaxTy: sXLen);
704
705 LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
706 LLT DstTy = Query.Types[0];
707 LLT SrcTy = Query.Types[1];
708 return DstTy.getElementType() == LLT::scalar(SizeInBits: 1) &&
709 DstTy.getElementCount().getKnownMinValue() >= 8 &&
710 SrcTy.getElementCount().getKnownMinValue() >= 8;
711 };
712 getActionDefinitionsBuilder(Opcode: G_EXTRACT_SUBVECTOR)
713 // We don't have the ability to slide mask vectors down indexed by their
714 // i1 elements; the smallest we can do is i8. Often we are able to bitcast
715 // to equivalent i8 vectors.
716 .bitcastIf(
717 Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST),
718 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST), args: ExtractSubvecBitcastPred),
719 Mutation: [=](const LegalityQuery &Query) {
720 LLT CastTy = LLT::vector(
721 EC: Query.Types[0].getElementCount().divideCoefficientBy(RHS: 8), ScalarSizeInBits: 8);
722 return std::pair(0, CastTy);
723 })
724 .customIf(Predicate: LegalityPredicates::any(
725 P0: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST),
726 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST)),
727 P1: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
728 P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST))));
729
730 getActionDefinitionsBuilder(Opcode: G_INSERT_SUBVECTOR)
731 .customIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST),
732 P1: typeIsLegalBoolVec(TypeIdx: 1, BoolVecTys, ST)))
733 .customIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
734 P1: typeIsLegalIntOrFPVec(TypeIdx: 1, IntOrFPVecTys, ST)));
735
736 getActionDefinitionsBuilder(Opcode: G_ATOMIC_CMPXCHG_WITH_SUCCESS)
737 .lowerIf(Predicate: all(P0: typeInSet(TypeIdx: 0, TypesInit: {s8, s16, s32, s64}), P1: typeIs(TypeIdx: 2, TypesInit: p0)));
738
739 getActionDefinitionsBuilder(Opcodes: {G_ATOMIC_CMPXCHG, G_ATOMICRMW_ADD,
740 G_ATOMICRMW_AND, G_ATOMICRMW_OR,
741 G_ATOMICRMW_XOR})
742 .legalFor(Pred: ST.hasStdExtA(), Types: {{sXLen, p0}})
743 .libcallFor(Pred: !ST.hasStdExtA(), Types: {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
744 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen);
745
746 getActionDefinitionsBuilder(Opcode: G_ATOMICRMW_SUB)
747 .libcallFor(Pred: !ST.hasStdExtA(), Types: {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
748 .clampScalar(TypeIdx: 0, MinTy: sXLen, MaxTy: sXLen)
749 .lower();
750
751 LegalityPredicate InsertVectorEltPred = [=](const LegalityQuery &Query) {
752 LLT VecTy = Query.Types[0];
753 LLT EltTy = Query.Types[1];
754 return VecTy.getElementType() == EltTy;
755 };
756
757 getActionDefinitionsBuilder(Opcode: G_INSERT_VECTOR_ELT)
758 .legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: 0, IntOrFPVecTys, ST),
759 P1: InsertVectorEltPred, args: typeIs(TypeIdx: 2, TypesInit: sXLen)))
760 .legalIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: 0, BoolVecTys, ST), P1: InsertVectorEltPred,
761 args: typeIs(TypeIdx: 2, TypesInit: sXLen)));
762
763 getActionDefinitionsBuilder(Opcodes: {G_INTRINSIC, G_INTRINSIC_W_SIDE_EFFECTS})
764 .alwaysLegal();
765
766 getActionDefinitionsBuilder(Opcode: G_FENCE).alwaysLegal();
767
768 getActionDefinitionsBuilder(Opcodes: {G_TRAP, G_DEBUGTRAP, G_UBSANTRAP}).alwaysLegal();
769
770 getLegacyLegalizerInfo().computeTables();
771 verify(MII: *ST.getInstrInfo());
772}
773
774bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
775 MachineInstr &MI) const {
776 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
777
778 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
779 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntrinsicID)) {
780 if (II->hasScalarOperand() && !II->IsFPIntrinsic) {
781 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
782 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
783
784 auto OldScalar = MI.getOperand(i: II->ScalarOperand + 2).getReg();
785 // Legalize integer vx form intrinsic.
786 if (MRI.getType(Reg: OldScalar).isScalar()) {
787 if (MRI.getType(Reg: OldScalar).getSizeInBits() < sXLen.getSizeInBits()) {
788 Helper.Observer.changingInstr(MI);
789 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: II->ScalarOperand + 2,
790 ExtOpcode: TargetOpcode::G_ANYEXT);
791 Helper.Observer.changedInstr(MI);
792 } else if (MRI.getType(Reg: OldScalar).getSizeInBits() >
793 sXLen.getSizeInBits()) {
794 // TODO: i64 in riscv32.
795 return false;
796 }
797 }
798 }
799 return true;
800 }
801
802 switch (IntrinsicID) {
803 default:
804 return false;
805 case Intrinsic::vacopy: {
806 // vacopy arguments must be legal because of the intrinsic signature.
807 // No need to check here.
808
809 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
810 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
811 MachineFunction &MF = *MI.getMF();
812 const DataLayout &DL = MIRBuilder.getDataLayout();
813 LLVMContext &Ctx = MF.getFunction().getContext();
814
815 Register DstLst = MI.getOperand(i: 1).getReg();
816 LLT PtrTy = MRI.getType(Reg: DstLst);
817
818 // Load the source va_list
819 Align Alignment = DL.getABITypeAlign(Ty: getTypeForLLT(Ty: PtrTy, C&: Ctx));
820 MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
821 PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad, MemTy: PtrTy, base_alignment: Alignment);
822 auto Tmp = MIRBuilder.buildLoad(Res: PtrTy, Addr: MI.getOperand(i: 2), MMO&: *LoadMMO);
823
824 // Store the result in the destination va_list
825 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
826 PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOStore, MemTy: PtrTy, base_alignment: Alignment);
827 MIRBuilder.buildStore(Val: Tmp, Addr: DstLst, MMO&: *StoreMMO);
828
829 MI.eraseFromParent();
830 return true;
831 }
832 case Intrinsic::riscv_vsetvli:
833 case Intrinsic::riscv_vsetvlimax:
834 case Intrinsic::riscv_masked_atomicrmw_add:
835 case Intrinsic::riscv_masked_atomicrmw_sub:
836 case Intrinsic::riscv_masked_cmpxchg:
837 return true;
838 }
839}
840
841bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
842 MachineIRBuilder &MIRBuilder) const {
843 // Stores the address of the VarArgsFrameIndex slot into the memory location
844 assert(MI.getOpcode() == TargetOpcode::G_VASTART);
845 MachineFunction *MF = MI.getParent()->getParent();
846 RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>();
847 int FI = FuncInfo->getVarArgsFrameIndex();
848 LLT AddrTy = MIRBuilder.getMRI()->getType(Reg: MI.getOperand(i: 0).getReg());
849 auto FINAddr = MIRBuilder.buildFrameIndex(Res: AddrTy, Idx: FI);
850 assert(MI.hasOneMemOperand());
851 MIRBuilder.buildStore(Val: FINAddr, Addr: MI.getOperand(i: 0).getReg(),
852 MMO&: *MI.memoperands()[0]);
853 MI.eraseFromParent();
854 return true;
855}
856
857bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI,
858 MachineIRBuilder &MIRBuilder) const {
859 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
860 auto &MF = *MI.getParent()->getParent();
861 const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
862 unsigned EntrySize = MJTI->getEntrySize(TD: MF.getDataLayout());
863
864 Register PtrReg = MI.getOperand(i: 0).getReg();
865 LLT PtrTy = MRI.getType(Reg: PtrReg);
866 Register IndexReg = MI.getOperand(i: 2).getReg();
867 LLT IndexTy = MRI.getType(Reg: IndexReg);
868
869 if (!isPowerOf2_32(Value: EntrySize))
870 return false;
871
872 auto ShiftAmt = MIRBuilder.buildConstant(Res: IndexTy, Val: Log2_32(Value: EntrySize));
873 IndexReg = MIRBuilder.buildShl(Dst: IndexTy, Src0: IndexReg, Src1: ShiftAmt).getReg(Idx: 0);
874
875 auto Addr = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: PtrReg, Op1: IndexReg);
876
877 MachineMemOperand *MMO = MF.getMachineMemOperand(
878 PtrInfo: MachinePointerInfo::getJumpTable(MF), F: MachineMemOperand::MOLoad,
879 Size: EntrySize, BaseAlignment: Align(MJTI->getEntryAlignment(TD: MF.getDataLayout())));
880
881 Register TargetReg;
882 switch (MJTI->getEntryKind()) {
883 default:
884 return false;
885 case MachineJumpTableInfo::EK_LabelDifference32: {
886 // For PIC, the sequence is:
887 // BRIND(load(Jumptable + index) + RelocBase)
888 // RelocBase can be JumpTable, GOT or some sort of global base.
889 unsigned LoadOpc =
890 STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD;
891 auto Load = MIRBuilder.buildLoadInstr(Opcode: LoadOpc, Res: IndexTy, Addr, MMO&: *MMO);
892 TargetReg = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: PtrReg, Op1: Load).getReg(Idx: 0);
893 break;
894 }
895 case MachineJumpTableInfo::EK_Custom32: {
896 auto Load = MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_SEXTLOAD, Res: IndexTy,
897 Addr, MMO&: *MMO);
898 TargetReg = MIRBuilder.buildIntToPtr(Dst: PtrTy, Src: Load).getReg(Idx: 0);
899 break;
900 }
901 case MachineJumpTableInfo::EK_BlockAddress:
902 TargetReg = MIRBuilder.buildLoad(Res: PtrTy, Addr, MMO&: *MMO).getReg(Idx: 0);
903 break;
904 }
905
906 MIRBuilder.buildBrIndirect(Tgt: TargetReg);
907
908 MI.eraseFromParent();
909 return true;
910}
911
912bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm,
913 bool ShouldOptForSize) const {
914 assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64);
915 int64_t Imm = APImm.getSExtValue();
916 // All simm32 constants should be handled by isel.
917 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
918 // this check redundant, but small immediates are common so this check
919 // should have better compile time.
920 if (isInt<32>(x: Imm))
921 return false;
922
923 // We only need to cost the immediate, if constant pool lowering is enabled.
924 if (!STI.useConstantPoolForLargeInts())
925 return false;
926
927 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI);
928 if (Seq.size() <= STI.getMaxBuildIntsCost())
929 return false;
930
931 // Optimizations below are disabled for opt size. If we're optimizing for
932 // size, use a constant pool.
933 if (ShouldOptForSize)
934 return true;
935 //
936 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
937 // that if it will avoid a constant pool.
938 // It will require an extra temporary register though.
939 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
940 // low and high 32 bits are the same and bit 31 and 63 are set.
941 unsigned ShiftAmt, AddOpc;
942 RISCVMatInt::InstSeq SeqLo =
943 RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI, ShiftAmt, AddOpc);
944 return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
945}
946
947bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
948 MachineIRBuilder &MIB) const {
949 Register Dst = MI.getOperand(i: 0).getReg();
950
951 // We define our scalable vector types for lmul=1 to use a 64 bit known
952 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
953 // vscale as VLENB / 8.
954 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
955 if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
956 // Support for VLEN==32 is incomplete.
957 return false;
958
959 // We assume VLENB is a multiple of 8. We manually choose the best shift
960 // here because SimplifyDemandedBits isn't always able to simplify it.
961 uint64_t Val = MI.getOperand(i: 1).getCImm()->getZExtValue();
962 if (isPowerOf2_64(Value: Val)) {
963 uint64_t Log2 = Log2_64(Value: Val);
964 if (Log2 < 3) {
965 auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {sXLen}, SrcOps: {});
966 MIB.buildLShr(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: sXLen, Val: 3 - Log2),
967 Flags: MachineInstr::IsExact);
968 } else if (Log2 > 3) {
969 auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {sXLen}, SrcOps: {});
970 MIB.buildShl(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: sXLen, Val: Log2 - 3));
971 } else {
972 MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {Dst}, SrcOps: {});
973 }
974 } else if ((Val % 8) == 0) {
975 // If the multiplier is a multiple of 8, scale it down to avoid needing
976 // to shift the VLENB value.
977 auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {sXLen}, SrcOps: {});
978 MIB.buildMul(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: sXLen, Val: Val / 8));
979 } else {
980 auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {sXLen}, SrcOps: {});
981 auto VScale = MIB.buildLShr(Dst: sXLen, Src0: VLENB, Src1: MIB.buildConstant(Res: sXLen, Val: 3),
982 Flags: MachineInstr::IsExact);
983 MIB.buildMul(Dst, Src0: VScale, Src1: MIB.buildConstant(Res: sXLen, Val));
984 }
985 MI.eraseFromParent();
986 return true;
987}
988
989// Custom-lower extensions from mask vectors by using a vselect either with 1
990// for zero/any-extension or -1 for sign-extension:
991// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
992// Note that any-extension is lowered identically to zero-extension.
993bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,
994 MachineIRBuilder &MIB) const {
995
996 unsigned Opc = MI.getOpcode();
997 assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT ||
998 Opc == TargetOpcode::G_ANYEXT);
999
1000 MachineRegisterInfo &MRI = *MIB.getMRI();
1001 Register Dst = MI.getOperand(i: 0).getReg();
1002 Register Src = MI.getOperand(i: 1).getReg();
1003
1004 LLT DstTy = MRI.getType(Reg: Dst);
1005 int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1;
1006 LLT DstEltTy = DstTy.getElementType();
1007 auto SplatZero = MIB.buildSplatVector(Res: DstTy, Val: MIB.buildConstant(Res: DstEltTy, Val: 0));
1008 auto SplatTrue =
1009 MIB.buildSplatVector(Res: DstTy, Val: MIB.buildConstant(Res: DstEltTy, Val: ExtTrueVal));
1010 MIB.buildSelect(Res: Dst, Tst: Src, Op0: SplatTrue, Op1: SplatZero);
1011
1012 MI.eraseFromParent();
1013 return true;
1014}
1015
1016bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI,
1017 LegalizerHelper &Helper,
1018 MachineIRBuilder &MIB) const {
1019 assert((isa<GLoad>(MI) || isa<GStore>(MI)) &&
1020 "Machine instructions must be Load/Store.");
1021 MachineRegisterInfo &MRI = *MIB.getMRI();
1022 MachineFunction *MF = MI.getMF();
1023 const DataLayout &DL = MIB.getDataLayout();
1024 LLVMContext &Ctx = MF->getFunction().getContext();
1025
1026 Register DstReg = MI.getOperand(i: 0).getReg();
1027 LLT DataTy = MRI.getType(Reg: DstReg);
1028 if (!DataTy.isVector())
1029 return false;
1030
1031 if (!MI.hasOneMemOperand())
1032 return false;
1033
1034 MachineMemOperand *MMO = *MI.memoperands_begin();
1035
1036 const auto *TLI = STI.getTargetLowering();
1037 EVT VT = EVT::getEVT(Ty: getTypeForLLT(Ty: DataTy, C&: Ctx));
1038
1039 if (TLI->allowsMemoryAccessForAlignment(Context&: Ctx, DL, VT, MMO: *MMO))
1040 return true;
1041
1042 unsigned EltSizeBits = DataTy.getScalarSizeInBits();
1043 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
1044 "Unexpected unaligned RVV load type");
1045
1046 // Calculate the new vector type with i8 elements
1047 unsigned NumElements =
1048 DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / 8);
1049 LLT NewDataTy = LLT::scalable_vector(MinNumElements: NumElements, ScalarSizeInBits: 8);
1050
1051 Helper.bitcast(MI, TypeIdx: 0, Ty: NewDataTy);
1052
1053 return true;
1054}
1055
1056/// Return the type of the mask type suitable for masking the provided
1057/// vector type. This is simply an i1 element type vector of the same
1058/// (possibly scalable) length.
1059static LLT getMaskTypeFor(LLT VecTy) {
1060 assert(VecTy.isVector());
1061 ElementCount EC = VecTy.getElementCount();
1062 return LLT::vector(EC, ScalarTy: LLT::scalar(SizeInBits: 1));
1063}
1064
1065/// Creates an all ones mask suitable for masking a vector of type VecTy with
1066/// vector length VL.
1067static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
1068 MachineIRBuilder &MIB,
1069 MachineRegisterInfo &MRI) {
1070 LLT MaskTy = getMaskTypeFor(VecTy);
1071 return MIB.buildInstr(Opc: RISCV::G_VMSET_VL, DstOps: {MaskTy}, SrcOps: {VL});
1072}
1073
1074/// Gets the two common "VL" operands: an all-ones mask and the vector length.
1075/// VecTy is a scalable vector type.
1076static std::pair<MachineInstrBuilder, MachineInstrBuilder>
1077buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
1078 assert(VecTy.isScalableVector() && "Expecting scalable container type");
1079 const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>();
1080 LLT XLenTy(STI.getXLenVT());
1081 auto VL = MIB.buildConstant(Res: XLenTy, Val: -1);
1082 auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
1083 return {Mask, VL};
1084}
1085
1086static MachineInstrBuilder
1087buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,
1088 Register Hi, const SrcOp &VL, MachineIRBuilder &MIB,
1089 MachineRegisterInfo &MRI) {
1090 // TODO: If the Hi bits of the splat are undefined, then it's fine to just
1091 // splat Lo even if it might be sign extended. I don't think we have
1092 // introduced a case where we're build a s64 where the upper bits are undef
1093 // yet.
1094
1095 // Fall back to a stack store and stride x0 vector load.
1096 // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in
1097 // preprocessDAG in SDAG.
1098 return MIB.buildInstr(Opc: RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, DstOps: {Dst},
1099 SrcOps: {Passthru, Lo, Hi, VL});
1100}
1101
1102static MachineInstrBuilder
1103buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
1104 const SrcOp &Scalar, const SrcOp &VL,
1105 MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
1106 assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!");
1107 auto Unmerge = MIB.buildUnmerge(Res: LLT::scalar(SizeInBits: 32), Op: Scalar);
1108 return buildSplatPartsS64WithVL(Dst, Passthru, Lo: Unmerge.getReg(Idx: 0),
1109 Hi: Unmerge.getReg(Idx: 1), VL, MIB, MRI);
1110}
1111
1112// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
1113// legal equivalently-sized i8 type, so we can use that as a go-between.
1114// Splats of s1 types that have constant value can be legalized as VMSET_VL or
1115// VMCLR_VL.
1116bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
1117 MachineIRBuilder &MIB) const {
1118 assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);
1119
1120 MachineRegisterInfo &MRI = *MIB.getMRI();
1121
1122 Register Dst = MI.getOperand(i: 0).getReg();
1123 Register SplatVal = MI.getOperand(i: 1).getReg();
1124
1125 LLT VecTy = MRI.getType(Reg: Dst);
1126 LLT XLenTy(STI.getXLenVT());
1127
1128 // Handle case of s64 element vectors on rv32
1129 if (XLenTy.getSizeInBits() == 32 &&
1130 VecTy.getElementType().getSizeInBits() == 64) {
1131 auto [_, VL] = buildDefaultVLOps(VecTy: MRI.getType(Reg: Dst), MIB, MRI);
1132 buildSplatSplitS64WithVL(Dst, Passthru: MIB.buildUndef(Res: VecTy), Scalar: SplatVal, VL, MIB,
1133 MRI);
1134 MI.eraseFromParent();
1135 return true;
1136 }
1137
1138 // All-zeros or all-ones splats are handled specially.
1139 MachineInstr &SplatValMI = *MRI.getVRegDef(Reg: SplatVal);
1140 if (isAllOnesOrAllOnesSplat(MI: SplatValMI, MRI)) {
1141 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1142 MIB.buildInstr(Opc: RISCV::G_VMSET_VL, DstOps: {Dst}, SrcOps: {VL});
1143 MI.eraseFromParent();
1144 return true;
1145 }
1146 if (isNullOrNullSplat(MI: SplatValMI, MRI)) {
1147 auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1148 MIB.buildInstr(Opc: RISCV::G_VMCLR_VL, DstOps: {Dst}, SrcOps: {VL});
1149 MI.eraseFromParent();
1150 return true;
1151 }
1152
1153 // Handle non-constant mask splat (i.e. not sure if it's all zeros or all
1154 // ones) by promoting it to an s8 splat.
1155 LLT InterEltTy = LLT::scalar(SizeInBits: 8);
1156 LLT InterTy = VecTy.changeElementType(NewEltTy: InterEltTy);
1157 auto ZExtSplatVal = MIB.buildZExt(Res: InterEltTy, Op: SplatVal);
1158 auto And =
1159 MIB.buildAnd(Dst: InterEltTy, Src0: ZExtSplatVal, Src1: MIB.buildConstant(Res: InterEltTy, Val: 1));
1160 auto LHS = MIB.buildSplatVector(Res: InterTy, Val: And);
1161 auto ZeroSplat =
1162 MIB.buildSplatVector(Res: InterTy, Val: MIB.buildConstant(Res: InterEltTy, Val: 0));
1163 MIB.buildICmp(Pred: CmpInst::Predicate::ICMP_NE, Res: Dst, Op0: LHS, Op1: ZeroSplat);
1164 MI.eraseFromParent();
1165 return true;
1166}
1167
1168static LLT getLMUL1Ty(LLT VecTy) {
1169 assert(VecTy.getElementType().getSizeInBits() <= 64 &&
1170 "Unexpected vector LLT");
1171 return LLT::scalable_vector(MinNumElements: RISCV::RVVBitsPerBlock /
1172 VecTy.getElementType().getSizeInBits(),
1173 ScalarTy: VecTy.getElementType());
1174}
1175
1176bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
1177 MachineIRBuilder &MIB) const {
1178 GExtractSubvector &ES = cast<GExtractSubvector>(Val&: MI);
1179
1180 MachineRegisterInfo &MRI = *MIB.getMRI();
1181
1182 Register Dst = ES.getReg(Idx: 0);
1183 Register Src = ES.getSrcVec();
1184 uint64_t Idx = ES.getIndexImm();
1185
1186 // With an index of 0 this is a cast-like subvector, which can be performed
1187 // with subregister operations.
1188 if (Idx == 0)
1189 return true;
1190
1191 LLT LitTy = MRI.getType(Reg: Dst);
1192 LLT BigTy = MRI.getType(Reg: Src);
1193
1194 if (LitTy.getElementType() == LLT::scalar(SizeInBits: 1)) {
1195 // We can't slide this mask vector up indexed by its i1 elements.
1196 // This poses a problem when we wish to insert a scalable vector which
1197 // can't be re-expressed as a larger type. Just choose the slow path and
1198 // extend to a larger type, then truncate back down.
1199 LLT ExtBigTy = BigTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 8));
1200 LLT ExtLitTy = LitTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 8));
1201 auto BigZExt = MIB.buildZExt(Res: ExtBigTy, Op: Src);
1202 auto ExtractZExt = MIB.buildExtractSubvector(Res: ExtLitTy, Src: BigZExt, Index: Idx);
1203 auto SplatZero = MIB.buildSplatVector(
1204 Res: ExtLitTy, Val: MIB.buildConstant(Res: ExtLitTy.getElementType(), Val: 0));
1205 MIB.buildICmp(Pred: CmpInst::Predicate::ICMP_NE, Res: Dst, Op0: ExtractZExt, Op1: SplatZero);
1206 MI.eraseFromParent();
1207 return true;
1208 }
1209
1210 // extract_subvector scales the index by vscale if the subvector is scalable,
1211 // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
1212 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1213 MVT LitTyMVT = getMVTForLLT(Ty: LitTy);
1214 auto Decompose =
1215 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1216 VecVT: getMVTForLLT(Ty: BigTy), SubVecVT: LitTyMVT, InsertExtractIdx: Idx, TRI);
1217 unsigned RemIdx = Decompose.second;
1218
1219 // If the Idx has been completely eliminated then this is a subvector extract
1220 // which naturally aligns to a vector register. These can easily be handled
1221 // using subregister manipulation.
1222 if (RemIdx == 0)
1223 return true;
1224
1225 // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
1226 // was > M1 then the index would need to be a multiple of VLMAX, and so would
1227 // divide exactly.
1228 assert(
1229 RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(LitTyMVT)).second ||
1230 RISCVTargetLowering::getLMUL(LitTyMVT) == RISCVVType::LMUL_1);
1231
1232 // If the vector type is an LMUL-group type, extract a subvector equal to the
1233 // nearest full vector register type.
1234 LLT InterLitTy = BigTy;
1235 Register Vec = Src;
1236 if (TypeSize::isKnownGT(LHS: BigTy.getSizeInBits(),
1237 RHS: getLMUL1Ty(VecTy: BigTy).getSizeInBits())) {
1238 // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
1239 // we should have successfully decomposed the extract into a subregister.
1240 assert(Decompose.first != RISCV::NoSubRegister);
1241 InterLitTy = getLMUL1Ty(VecTy: BigTy);
1242 // SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
1243 // specified on the source Register (the equivalent) since generic virtual
1244 // register does not allow subregister index.
1245 Vec = MIB.buildExtractSubvector(Res: InterLitTy, Src, Index: Idx - RemIdx).getReg(Idx: 0);
1246 }
1247
1248 // Slide this vector register down by the desired number of elements in order
1249 // to place the desired subvector starting at element 0.
1250 const LLT XLenTy(STI.getXLenVT());
1251 auto SlidedownAmt = MIB.buildVScale(Res: XLenTy, MinElts: RemIdx);
1252 auto [Mask, VL] = buildDefaultVLOps(VecTy: InterLitTy, MIB, MRI);
1253 uint64_t Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
1254 auto Slidedown = MIB.buildInstr(
1255 Opc: RISCV::G_VSLIDEDOWN_VL, DstOps: {InterLitTy},
1256 SrcOps: {MIB.buildUndef(Res: InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
1257
1258 // Now the vector is in the right position, extract our final subvector. This
1259 // should resolve to a COPY.
1260 MIB.buildExtractSubvector(Res: Dst, Src: Slidedown, Index: 0);
1261
1262 MI.eraseFromParent();
1263 return true;
1264}
1265
1266bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
1267 LegalizerHelper &Helper,
1268 MachineIRBuilder &MIB) const {
1269 GInsertSubvector &IS = cast<GInsertSubvector>(Val&: MI);
1270
1271 MachineRegisterInfo &MRI = *MIB.getMRI();
1272
1273 Register Dst = IS.getReg(Idx: 0);
1274 Register BigVec = IS.getBigVec();
1275 Register LitVec = IS.getSubVec();
1276 uint64_t Idx = IS.getIndexImm();
1277
1278 LLT BigTy = MRI.getType(Reg: BigVec);
1279 LLT LitTy = MRI.getType(Reg: LitVec);
1280
1281 if (Idx == 0 &&
1282 MRI.getVRegDef(Reg: BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1283 return true;
1284
1285 // We don't have the ability to slide mask vectors up indexed by their i1
1286 // elements; the smallest we can do is i8. Often we are able to bitcast to
1287 // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
1288 // vectors and truncate down after the insert.
1289 if (LitTy.getElementType() == LLT::scalar(SizeInBits: 1)) {
1290 auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
1291 auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
1292 if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
1293 return Helper.bitcast(
1294 MI&: IS, TypeIdx: 0,
1295 Ty: LLT::vector(EC: BigTy.getElementCount().divideCoefficientBy(RHS: 8), ScalarSizeInBits: 8));
1296
1297 // We can't slide this mask vector up indexed by its i1 elements.
1298 // This poses a problem when we wish to insert a scalable vector which
1299 // can't be re-expressed as a larger type. Just choose the slow path and
1300 // extend to a larger type, then truncate back down.
1301 LLT ExtBigTy = BigTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 8));
1302 return Helper.widenScalar(MI&: IS, TypeIdx: 0, WideTy: ExtBigTy);
1303 }
1304
1305 const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1306 unsigned SubRegIdx, RemIdx;
1307 std::tie(args&: SubRegIdx, args&: RemIdx) =
1308 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1309 VecVT: getMVTForLLT(Ty: BigTy), SubVecVT: getMVTForLLT(Ty: LitTy), InsertExtractIdx: Idx, TRI);
1310
1311 TypeSize VecRegSize = TypeSize::getScalable(MinimumSize: RISCV::RVVBitsPerBlock);
1312 assert(isPowerOf2_64(
1313 STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
1314 bool ExactlyVecRegSized =
1315 STI.expandVScale(X: LitTy.getSizeInBits())
1316 .isKnownMultipleOf(RHS: STI.expandVScale(X: VecRegSize));
1317
1318 // If the Idx has been completely eliminated and this subvector's size is a
1319 // vector register or a multiple thereof, or the surrounding elements are
1320 // undef, then this is a subvector insert which naturally aligns to a vector
1321 // register. These can easily be handled using subregister manipulation.
1322 if (RemIdx == 0 && ExactlyVecRegSized)
1323 return true;
1324
1325 // If the subvector is smaller than a vector register, then the insertion
1326 // must preserve the undisturbed elements of the register. We do this by
1327 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1328 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
1329 // subvector within the vector register, and an INSERT_SUBVECTOR of that
1330 // LMUL=1 type back into the larger vector (resolving to another subregister
1331 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1332 // to avoid allocating a large register group to hold our subvector.
1333
1334 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1335 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1336 // (in our case undisturbed). This means we can set up a subvector insertion
1337 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1338 // size of the subvector.
1339 const LLT XLenTy(STI.getXLenVT());
1340 LLT InterLitTy = BigTy;
1341 Register AlignedExtract = BigVec;
1342 unsigned AlignedIdx = Idx - RemIdx;
1343 if (TypeSize::isKnownGT(LHS: BigTy.getSizeInBits(),
1344 RHS: getLMUL1Ty(VecTy: BigTy).getSizeInBits())) {
1345 InterLitTy = getLMUL1Ty(VecTy: BigTy);
1346 // Extract a subvector equal to the nearest full vector register type. This
1347 // should resolve to a G_EXTRACT on a subreg.
1348 AlignedExtract =
1349 MIB.buildExtractSubvector(Res: InterLitTy, Src: BigVec, Index: AlignedIdx).getReg(Idx: 0);
1350 }
1351
1352 auto Insert = MIB.buildInsertSubvector(Res: InterLitTy, Src0: MIB.buildUndef(Res: InterLitTy),
1353 Src1: LitVec, Index: 0);
1354
1355 auto [Mask, _] = buildDefaultVLOps(VecTy: InterLitTy, MIB, MRI);
1356 auto VL = MIB.buildVScale(Res: XLenTy, MinElts: LitTy.getElementCount().getKnownMinValue());
1357
1358 // If we're inserting into the lowest elements, use a tail undisturbed
1359 // vmv.v.v.
1360 MachineInstrBuilder Inserted;
1361 bool NeedInsertSubvec =
1362 TypeSize::isKnownGT(LHS: BigTy.getSizeInBits(), RHS: InterLitTy.getSizeInBits());
1363 Register InsertedDst =
1364 NeedInsertSubvec ? MRI.createGenericVirtualRegister(Ty: InterLitTy) : Dst;
1365 if (RemIdx == 0) {
1366 Inserted = MIB.buildInstr(Opc: RISCV::G_VMV_V_V_VL, DstOps: {InsertedDst},
1367 SrcOps: {AlignedExtract, Insert, VL});
1368 } else {
1369 auto SlideupAmt = MIB.buildVScale(Res: XLenTy, MinElts: RemIdx);
1370 // Construct the vector length corresponding to RemIdx + length(LitTy).
1371 VL = MIB.buildAdd(Dst: XLenTy, Src0: SlideupAmt, Src1: VL);
1372 // Use tail agnostic policy if we're inserting over InterLitTy's tail.
1373 ElementCount EndIndex =
1374 ElementCount::getScalable(MinVal: RemIdx) + LitTy.getElementCount();
1375 uint64_t Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;
1376 if (STI.expandVScale(X: EndIndex) ==
1377 STI.expandVScale(X: InterLitTy.getElementCount()))
1378 Policy = RISCVVType::TAIL_AGNOSTIC;
1379
1380 Inserted =
1381 MIB.buildInstr(Opc: RISCV::G_VSLIDEUP_VL, DstOps: {InsertedDst},
1382 SrcOps: {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
1383 }
1384
1385 // If required, insert this subvector back into the correct vector register.
1386 // This should resolve to an INSERT_SUBREG instruction.
1387 if (NeedInsertSubvec)
1388 MIB.buildInsertSubvector(Res: Dst, Src0: BigVec, Src1: Inserted, Index: AlignedIdx);
1389
1390 MI.eraseFromParent();
1391 return true;
1392}
1393
1394bool RISCVLegalizerInfo::legalizeBitreverse(MachineInstr &MI,
1395 MachineIRBuilder &MIB) const {
1396 assert(MI.getOpcode() == TargetOpcode::G_BITREVERSE && "Unexpected opcode");
1397
1398 if (!STI.hasStdExtZbkb())
1399 return false;
1400
1401 MachineRegisterInfo &MRI = *MIB.getMRI();
1402
1403 Register Dst = MI.getOperand(i: 0).getReg();
1404 Register Src = MI.getOperand(i: 1).getReg();
1405
1406 if (!MRI.getType(Reg: Dst).isScalar(Size: 8))
1407 return false;
1408
1409 auto WideSrc = MIB.buildAnyExt(Res: sXLen, Op: Src);
1410 auto Brev = MIB.buildInstr(Opc: RISCV::G_BREV8, DstOps: {sXLen}, SrcOps: {WideSrc.getReg(Idx: 0)});
1411 MIB.buildTrunc(Res: Dst, Op: Brev.getReg(Idx: 0));
1412
1413 MI.eraseFromParent();
1414 return true;
1415}
1416
1417static unsigned getRISCVWOpcode(unsigned Opcode) {
1418 switch (Opcode) {
1419 default:
1420 llvm_unreachable("Unexpected opcode");
1421 case TargetOpcode::G_ASHR:
1422 return RISCV::G_SRAW;
1423 case TargetOpcode::G_LSHR:
1424 return RISCV::G_SRLW;
1425 case TargetOpcode::G_SHL:
1426 return RISCV::G_SLLW;
1427 case TargetOpcode::G_SDIV:
1428 return RISCV::G_DIVW;
1429 case TargetOpcode::G_UDIV:
1430 return RISCV::G_DIVUW;
1431 case TargetOpcode::G_UREM:
1432 return RISCV::G_REMUW;
1433 case TargetOpcode::G_ROTL:
1434 return RISCV::G_ROLW;
1435 case TargetOpcode::G_ROTR:
1436 return RISCV::G_RORW;
1437 case TargetOpcode::G_CTLZ:
1438 return RISCV::G_CLZW;
1439 case TargetOpcode::G_CTTZ:
1440 return RISCV::G_CTZW;
1441 case TargetOpcode::G_CTLS:
1442 return RISCV::G_CLSW;
1443 case TargetOpcode::G_FPTOSI:
1444 return RISCV::G_FCVT_W_RV64;
1445 case TargetOpcode::G_FPTOUI:
1446 return RISCV::G_FCVT_WU_RV64;
1447 }
1448}
1449
1450bool RISCVLegalizerInfo::legalizeCustom(
1451 LegalizerHelper &Helper, MachineInstr &MI,
1452 LostDebugLocObserver &LocObserver) const {
1453 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1454 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1455 MachineFunction &MF = *MI.getParent()->getParent();
1456 switch (MI.getOpcode()) {
1457 default:
1458 // No idea what to do.
1459 return false;
1460 case TargetOpcode::G_ABS:
1461 return Helper.lowerAbsToMaxNeg(MI);
1462 case TargetOpcode::G_FCONSTANT: {
1463 const APFloat &FVal = MI.getOperand(i: 1).getFPImm()->getValueAPF();
1464
1465 // Convert G_FCONSTANT to G_CONSTANT.
1466 Register DstReg = MI.getOperand(i: 0).getReg();
1467 MIRBuilder.buildConstant(Res: DstReg, Val: FVal.bitcastToAPInt());
1468
1469 MI.eraseFromParent();
1470 return true;
1471 }
1472 case TargetOpcode::G_CONSTANT: {
1473 const Function &F = MF.getFunction();
1474 // TODO: if PSI and BFI are present, add " ||
1475 // llvm::shouldOptForSize(*CurMBB, PSI, BFI)".
1476 bool ShouldOptForSize = F.hasOptSize();
1477 const ConstantInt *ConstVal = MI.getOperand(i: 1).getCImm();
1478 if (!shouldBeInConstantPool(APImm: ConstVal->getValue(), ShouldOptForSize))
1479 return true;
1480 return Helper.lowerConstant(MI);
1481 }
1482 case TargetOpcode::G_SUB:
1483 case TargetOpcode::G_ADD: {
1484 Helper.Observer.changingInstr(MI);
1485 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
1486 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
1487
1488 Register DstALU = MRI.createGenericVirtualRegister(Ty: sXLen);
1489
1490 MachineOperand &MO = MI.getOperand(i: 0);
1491 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
1492 auto DstSext = MIRBuilder.buildSExtInReg(Res: sXLen, Op: DstALU, ImmOp: 32);
1493
1494 MIRBuilder.buildInstr(Opc: TargetOpcode::G_TRUNC, DstOps: {MO}, SrcOps: {DstSext});
1495 MO.setReg(DstALU);
1496
1497 Helper.Observer.changedInstr(MI);
1498 return true;
1499 }
1500 case TargetOpcode::G_SEXT_INREG: {
1501 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1502 int64_t SizeInBits = MI.getOperand(i: 2).getImm();
1503 // Source size of 32 is sext.w.
1504 if (DstTy.getSizeInBits() == 64 && SizeInBits == 32)
1505 return true;
1506
1507 if (STI.hasStdExtZbb() && (SizeInBits == 8 || SizeInBits == 16))
1508 return true;
1509
1510 return Helper.lower(MI, TypeIdx: 0, /* Unused hint type */ Ty: LLT()) ==
1511 LegalizerHelper::Legalized;
1512 }
1513 case TargetOpcode::G_ASHR:
1514 case TargetOpcode::G_LSHR:
1515 case TargetOpcode::G_SHL: {
1516 if (getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: 2).getReg(), MRI)) {
1517 // We don't need a custom node for shift by constant. Just widen the
1518 // source and the shift amount.
1519 unsigned ExtOpc = TargetOpcode::G_ANYEXT;
1520 if (MI.getOpcode() == TargetOpcode::G_ASHR)
1521 ExtOpc = TargetOpcode::G_SEXT;
1522 else if (MI.getOpcode() == TargetOpcode::G_LSHR)
1523 ExtOpc = TargetOpcode::G_ZEXT;
1524
1525 Helper.Observer.changingInstr(MI);
1526 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: ExtOpc);
1527 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
1528 Helper.widenScalarDst(MI, WideTy: sXLen);
1529 Helper.Observer.changedInstr(MI);
1530 return true;
1531 }
1532
1533 Helper.Observer.changingInstr(MI);
1534 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
1535 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
1536 Helper.widenScalarDst(MI, WideTy: sXLen);
1537 MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1538 Helper.Observer.changedInstr(MI);
1539 return true;
1540 }
1541 case TargetOpcode::G_SDIV:
1542 case TargetOpcode::G_UDIV:
1543 case TargetOpcode::G_UREM:
1544 case TargetOpcode::G_ROTL:
1545 case TargetOpcode::G_ROTR: {
1546 Helper.Observer.changingInstr(MI);
1547 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
1548 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
1549 Helper.widenScalarDst(MI, WideTy: sXLen);
1550 MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1551 Helper.Observer.changedInstr(MI);
1552 return true;
1553 }
1554 case TargetOpcode::G_CTLZ:
1555 case TargetOpcode::G_CTTZ:
1556 case TargetOpcode::G_CTLS: {
1557 Helper.Observer.changingInstr(MI);
1558 Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
1559 Helper.widenScalarDst(MI, WideTy: sXLen);
1560 MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1561 Helper.Observer.changedInstr(MI);
1562 return true;
1563 }
1564 case TargetOpcode::G_FPTOSI:
1565 case TargetOpcode::G_FPTOUI: {
1566 Helper.Observer.changingInstr(MI);
1567 Helper.widenScalarDst(MI, WideTy: sXLen);
1568 MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1569 MI.addOperand(Op: MachineOperand::CreateImm(Val: RISCVFPRndMode::RTZ));
1570 Helper.Observer.changedInstr(MI);
1571 return true;
1572 }
1573 case TargetOpcode::G_IS_FPCLASS: {
1574 Register GISFPCLASS = MI.getOperand(i: 0).getReg();
1575 Register Src = MI.getOperand(i: 1).getReg();
1576 const MachineOperand &ImmOp = MI.getOperand(i: 2);
1577 MachineIRBuilder MIB(MI);
1578
1579 // Turn LLVM IR's floating point classes to that in RISC-V,
1580 // by simply rotating the 10-bit immediate right by two bits.
1581 APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));
1582 auto FClassMask = MIB.buildConstant(Res: sXLen, Val: GFpClassImm.rotr(rotateAmt: 2).zext(width: XLen));
1583 auto ConstZero = MIB.buildConstant(Res: sXLen, Val: 0);
1584
1585 auto GFClass = MIB.buildInstr(Opc: RISCV::G_FCLASS, DstOps: {sXLen}, SrcOps: {Src});
1586 auto And = MIB.buildAnd(Dst: sXLen, Src0: GFClass, Src1: FClassMask);
1587 MIB.buildICmp(Pred: CmpInst::ICMP_NE, Res: GISFPCLASS, Op0: And, Op1: ConstZero);
1588
1589 MI.eraseFromParent();
1590 return true;
1591 }
1592 case TargetOpcode::G_BRJT:
1593 return legalizeBRJT(MI, MIRBuilder);
1594 case TargetOpcode::G_VASTART:
1595 return legalizeVAStart(MI, MIRBuilder);
1596 case TargetOpcode::G_VSCALE:
1597 return legalizeVScale(MI, MIB&: MIRBuilder);
1598 case TargetOpcode::G_ZEXT:
1599 case TargetOpcode::G_SEXT:
1600 case TargetOpcode::G_ANYEXT:
1601 return legalizeExt(MI, MIB&: MIRBuilder);
1602 case TargetOpcode::G_SPLAT_VECTOR:
1603 return legalizeSplatVector(MI, MIB&: MIRBuilder);
1604 case TargetOpcode::G_EXTRACT_SUBVECTOR:
1605 return legalizeExtractSubvector(MI, MIB&: MIRBuilder);
1606 case TargetOpcode::G_INSERT_SUBVECTOR:
1607 return legalizeInsertSubvector(MI, Helper, MIB&: MIRBuilder);
1608 case TargetOpcode::G_BITREVERSE:
1609 return legalizeBitreverse(MI, MIB&: MIRBuilder);
1610 case TargetOpcode::G_LOAD:
1611 case TargetOpcode::G_STORE:
1612 return legalizeLoadStore(MI, Helper, MIB&: MIRBuilder);
1613 }
1614
1615 llvm_unreachable("expected switch to return");
1616}
1617