RISCVLegalizerInfo.cpp source code [llvm_projects/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp]

1	//===-- RISCVLegalizerInfo.cpp ----------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file implements the targeting of the Machinelegalizer class for RISC-V.
10	/// \todo This should be generated by TableGen.
11	//===----------------------------------------------------------------------===//
12
13	#include "RISCVLegalizerInfo.h"
14	#include "MCTargetDesc/RISCVMatInt.h"
15	#include "RISCVMachineFunctionInfo.h"
16	#include "RISCVSubtarget.h"
17	#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
18	#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
19	#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
20	#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
21	#include "llvm/CodeGen/MachineConstantPool.h"
22	#include "llvm/CodeGen/MachineJumpTableInfo.h"
23	#include "llvm/CodeGen/MachineMemOperand.h"
24	#include "llvm/CodeGen/MachineOperand.h"
25	#include "llvm/CodeGen/MachineRegisterInfo.h"
26	#include "llvm/CodeGen/TargetOpcodes.h"
27	#include "llvm/CodeGen/ValueTypes.h"
28	#include "llvm/IR/DerivedTypes.h"
29	#include "llvm/IR/Type.h"
30
31	using namespace llvm;
32	using namespace LegalityPredicates;
33	using namespace LegalizeMutations;
34
35	static LegalityPredicate
36	typeIsLegalIntOrFPVec(unsigned TypeIdx,
37	std::initializer_list<LLT> IntOrFPVecTys,
38	const RISCVSubtarget &ST) {
39	LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
40	return ST.hasVInstructions() &&
41	(Query.Types [TypeIdx].getScalarSizeInBits() != `64` \|\|
42	ST.hasVInstructionsI64()) &&
43	(Query.Types [TypeIdx].getElementCount().getKnownMinValue() != `1` \|\|
44	ST.getELen() == `64`);
45	};
46
47	return all(P0: typeInSet(TypeIdx, TypesInit: IntOrFPVecTys), P1: P);
48	}
49
50	static LegalityPredicate
51	typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,
52	const RISCVSubtarget &ST) {
53	LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
54	return ST.hasVInstructions() &&
55	(Query.Types [TypeIdx].getElementCount().getKnownMinValue() != `1` \|\|
56	ST.getELen() == `64`);
57	};
58	return all(P0: typeInSet(TypeIdx, TypesInit: BoolVecTys), P1: P);
59	}
60
61	static LegalityPredicate typeIsLegalPtrVec(unsigned TypeIdx,
62	std::initializer_list<LLT> PtrVecTys,
63	const RISCVSubtarget &ST) {
64	LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {
65	return ST.hasVInstructions() &&
66	(Query.Types [TypeIdx].getElementCount().getKnownMinValue() != `1` \|\|
67	ST.getELen() == `64`) &&
68	(Query.Types [TypeIdx].getElementCount().getKnownMinValue() != `16` \|\|
69	Query.Types [TypeIdx].getScalarSizeInBits() == `32`);
70	};
71	return all(P0: typeInSet(TypeIdx, TypesInit: PtrVecTys), P1: P);
72	}
73
74	RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
75	: STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(SizeInBits: XLen)) {
76	const LLT sDoubleXLen = LLT::scalar(SizeInBits: `2` * XLen);
77	const LLT p0 = LLT::pointer(AddressSpace: `0`, SizeInBits: XLen);
78	const LLT s1 = LLT::scalar(SizeInBits: `1`);
79	const LLT s8 = LLT::scalar(SizeInBits: `8`);
80	const LLT s16 = LLT::scalar(SizeInBits: `16`);
81	const LLT s32 = LLT::scalar(SizeInBits: `32`);
82	const LLT s64 = LLT::scalar(SizeInBits: `64`);
83	const LLT s128 = LLT::scalar(SizeInBits: `128`);
84
85	const LLT nxv1s1 = LLT::scalable_vector(MinNumElements: `1`, ScalarTy: s1);
86	const LLT nxv2s1 = LLT::scalable_vector(MinNumElements: `2`, ScalarTy: s1);
87	const LLT nxv4s1 = LLT::scalable_vector(MinNumElements: `4`, ScalarTy: s1);
88	const LLT nxv8s1 = LLT::scalable_vector(MinNumElements: `8`, ScalarTy: s1);
89	const LLT nxv16s1 = LLT::scalable_vector(MinNumElements: `16`, ScalarTy: s1);
90	const LLT nxv32s1 = LLT::scalable_vector(MinNumElements: `32`, ScalarTy: s1);
91	const LLT nxv64s1 = LLT::scalable_vector(MinNumElements: `64`, ScalarTy: s1);
92
93	const LLT nxv1s8 = LLT::scalable_vector(MinNumElements: `1`, ScalarTy: s8);
94	const LLT nxv2s8 = LLT::scalable_vector(MinNumElements: `2`, ScalarTy: s8);
95	const LLT nxv4s8 = LLT::scalable_vector(MinNumElements: `4`, ScalarTy: s8);
96	const LLT nxv8s8 = LLT::scalable_vector(MinNumElements: `8`, ScalarTy: s8);
97	const LLT nxv16s8 = LLT::scalable_vector(MinNumElements: `16`, ScalarTy: s8);
98	const LLT nxv32s8 = LLT::scalable_vector(MinNumElements: `32`, ScalarTy: s8);
99	const LLT nxv64s8 = LLT::scalable_vector(MinNumElements: `64`, ScalarTy: s8);
100
101	const LLT nxv1s16 = LLT::scalable_vector(MinNumElements: `1`, ScalarTy: s16);
102	const LLT nxv2s16 = LLT::scalable_vector(MinNumElements: `2`, ScalarTy: s16);
103	const LLT nxv4s16 = LLT::scalable_vector(MinNumElements: `4`, ScalarTy: s16);
104	const LLT nxv8s16 = LLT::scalable_vector(MinNumElements: `8`, ScalarTy: s16);
105	const LLT nxv16s16 = LLT::scalable_vector(MinNumElements: `16`, ScalarTy: s16);
106	const LLT nxv32s16 = LLT::scalable_vector(MinNumElements: `32`, ScalarTy: s16);
107
108	const LLT nxv1s32 = LLT::scalable_vector(MinNumElements: `1`, ScalarTy: s32);
109	const LLT nxv2s32 = LLT::scalable_vector(MinNumElements: `2`, ScalarTy: s32);
110	const LLT nxv4s32 = LLT::scalable_vector(MinNumElements: `4`, ScalarTy: s32);
111	const LLT nxv8s32 = LLT::scalable_vector(MinNumElements: `8`, ScalarTy: s32);
112	const LLT nxv16s32 = LLT::scalable_vector(MinNumElements: `16`, ScalarTy: s32);
113
114	const LLT nxv1s64 = LLT::scalable_vector(MinNumElements: `1`, ScalarTy: s64);
115	const LLT nxv2s64 = LLT::scalable_vector(MinNumElements: `2`, ScalarTy: s64);
116	const LLT nxv4s64 = LLT::scalable_vector(MinNumElements: `4`, ScalarTy: s64);
117	const LLT nxv8s64 = LLT::scalable_vector(MinNumElements: `8`, ScalarTy: s64);
118
119	const LLT nxv1p0 = LLT::scalable_vector(MinNumElements: `1`, ScalarTy: p0);
120	const LLT nxv2p0 = LLT::scalable_vector(MinNumElements: `2`, ScalarTy: p0);
121	const LLT nxv4p0 = LLT::scalable_vector(MinNumElements: `4`, ScalarTy: p0);
122	const LLT nxv8p0 = LLT::scalable_vector(MinNumElements: `8`, ScalarTy: p0);
123	const LLT nxv16p0 = LLT::scalable_vector(MinNumElements: `16`, ScalarTy: p0);
124
125	using namespace TargetOpcode;
126
127	auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};
128
129	auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,
130	nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
131	nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
132	nxv1s64, nxv2s64, nxv4s64, nxv8s64};
133
134	auto PtrVecTys = {nxv1p0, nxv2p0, nxv4p0, nxv8p0, nxv16p0};
135
136	getActionDefinitionsBuilder(Opcodes: {G_ADD, G_SUB})
137	.legalFor(Types: {sXLen})
138	.legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: `0`, IntOrFPVecTys, ST))
139	.customFor(Pred: ST.is64Bit(), Types: {s32})
140	.widenScalarToNextPow2(TypeIdx: `0`)
141	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen);
142
143	getActionDefinitionsBuilder(Opcodes: {G_AND, G_OR, G_XOR})
144	.legalFor(Types: {sXLen})
145	.legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: `0`, IntOrFPVecTys, ST))
146	.widenScalarToNextPow2(TypeIdx: `0`)
147	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen);
148
149	getActionDefinitionsBuilder(
150	Opcodes: {G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();
151
152	getActionDefinitionsBuilder(Opcodes: {G_SADDO, G_SSUBO}).minScalar(TypeIdx: `0`, Ty: sXLen).lower();
153
154	// TODO: Use Vector Single-Width Saturating Instructions for vector types.
155	getActionDefinitionsBuilder(Opcodes: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
156	.lower();
157
158	getActionDefinitionsBuilder(Opcodes: {G_SHL, G_ASHR, G_LSHR})
159	.legalFor(Types: {{sXLen, sXLen}})
160	.customFor(Pred: ST.is64Bit(), Types: {{s32, s32}})
161	.widenScalarToNextPow2(TypeIdx: `0`)
162	.clampScalar(TypeIdx: `1`, MinTy: sXLen, MaxTy: sXLen)
163	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen);
164
165	getActionDefinitionsBuilder(Opcodes: {G_ZEXT, G_SEXT, G_ANYEXT})
166	.legalFor(Types: {{s32, s16}})
167	.legalFor(Pred: ST.is64Bit(), Types: {{s64, s16}, {s64, s32}})
168	.legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: `0`, IntOrFPVecTys, ST),
169	P1: typeIsLegalIntOrFPVec(TypeIdx: `1`, IntOrFPVecTys, ST)))
170	.customIf(Predicate: typeIsLegalBoolVec(TypeIdx: `1`, BoolVecTys, ST))
171	.maxScalar(TypeIdx: `0`, Ty: sXLen);
172
173	getActionDefinitionsBuilder(Opcode: G_SEXT_INREG)
174	.customFor(Types: {sXLen})
175	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen)
176	.lower();
177
178	// Merge/Unmerge
179	for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
180	auto &MergeUnmergeActions = getActionDefinitionsBuilder(Opcode: Op);
181	unsigned BigTyIdx = Op == G_MERGE_VALUES ? `0` : `1`;
182	unsigned LitTyIdx = Op == G_MERGE_VALUES ? `1` : `0`;
183	if (XLen == `32` && ST.hasStdExtD()) {
184	MergeUnmergeActions.legalIf(
185	Predicate: all(P0: typeIs(TypeIdx: BigTyIdx, TypesInit: s64), P1: typeIs(TypeIdx: LitTyIdx, TypesInit: s32)));
186	}
187	MergeUnmergeActions.widenScalarToNextPow2(TypeIdx: LitTyIdx, MinSize: XLen)
188	.widenScalarToNextPow2(TypeIdx: BigTyIdx, MinSize: XLen)
189	.clampScalar(TypeIdx: LitTyIdx, MinTy: sXLen, MaxTy: sXLen)
190	.clampScalar(TypeIdx: BigTyIdx, MinTy: sXLen, MaxTy: sXLen);
191	}
192
193	getActionDefinitionsBuilder(Opcodes: {G_FSHL, G_FSHR}).lower();
194
195	getActionDefinitionsBuilder(Opcodes: {G_ROTR, G_ROTL})
196	.legalFor(Pred: ST.hasStdExtZbb() \|\| ST.hasStdExtZbkb(), Types: {{sXLen, sXLen}})
197	.customFor(Pred: ST.is64Bit() && (ST.hasStdExtZbb() \|\| ST.hasStdExtZbkb()),
198	Types: {{s32, s32}})
199	.lower();
200
201	getActionDefinitionsBuilder(Opcode: G_BITREVERSE).maxScalar(TypeIdx: `0`, Ty: sXLen).lower();
202
203	getActionDefinitionsBuilder(Opcode: G_BITCAST).legalIf(
204	Predicate: all(P0: LegalityPredicates::any(P0: typeIsLegalIntOrFPVec(TypeIdx: `0`, IntOrFPVecTys, ST),
205	P1: typeIsLegalBoolVec(TypeIdx: `0`, BoolVecTys, ST)),
206	P1: LegalityPredicates::any(P0: typeIsLegalIntOrFPVec(TypeIdx: `1`, IntOrFPVecTys, ST),
207	P1: typeIsLegalBoolVec(TypeIdx: `1`, BoolVecTys, ST))));
208
209	auto &BSWAPActions = getActionDefinitionsBuilder(Opcode: G_BSWAP);
210	if (ST.hasStdExtZbb() \|\| ST.hasStdExtZbkb())
211	BSWAPActions.legalFor(Types: {sXLen}).clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen);
212	else
213	BSWAPActions.maxScalar(TypeIdx: `0`, Ty: sXLen).lower();
214
215	auto &CountZerosActions = getActionDefinitionsBuilder(Opcodes: {G_CTLZ, G_CTTZ});
216	auto &CountZerosUndefActions =
217	getActionDefinitionsBuilder(Opcodes: {G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});
218	if (ST.hasStdExtZbb()) {
219	CountZerosActions.legalFor(Types: {{sXLen, sXLen}})
220	.customFor(Types: {{s32, s32}})
221	.clampScalar(TypeIdx: `0`, MinTy: s32, MaxTy: sXLen)
222	.widenScalarToNextPow2(TypeIdx: `0`)
223	.scalarSameSizeAs(TypeIdx: `1`, SameSizeIdx: `0`);
224	} else {
225	CountZerosActions.maxScalar(TypeIdx: `0`, Ty: sXLen).scalarSameSizeAs(TypeIdx: `1`, SameSizeIdx: `0`).lower();
226	CountZerosUndefActions.maxScalar(TypeIdx: `0`, Ty: sXLen).scalarSameSizeAs(TypeIdx: `1`, SameSizeIdx: `0`);
227	}
228	CountZerosUndefActions.lower();
229
230	auto &CTPOPActions = getActionDefinitionsBuilder(Opcode: G_CTPOP);
231	if (ST.hasStdExtZbb()) {
232	CTPOPActions.legalFor(Types: {{sXLen, sXLen}})
233	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen)
234	.scalarSameSizeAs(TypeIdx: `1`, SameSizeIdx: `0`);
235	} else {
236	CTPOPActions.maxScalar(TypeIdx: `0`, Ty: sXLen).scalarSameSizeAs(TypeIdx: `1`, SameSizeIdx: `0`).lower();
237	}
238
239	getActionDefinitionsBuilder(Opcode: G_CONSTANT)
240	.legalFor(Types: {p0})
241	.legalFor(Pred: !ST.is64Bit(), Types: {s32})
242	.customFor(Pred: ST.is64Bit(), Types: {s64})
243	.widenScalarToNextPow2(TypeIdx: `0`)
244	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen);
245
246	// TODO: transform illegal vector types into legal vector type
247	getActionDefinitionsBuilder(Opcode: G_FREEZE)
248	.legalFor(Types: {s16, s32, p0})
249	.legalFor(Pred: ST.is64Bit(), Types: {s64})
250	.legalIf(Predicate: typeIsLegalBoolVec(TypeIdx: `0`, BoolVecTys, ST))
251	.legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: `0`, IntOrFPVecTys, ST))
252	.widenScalarToNextPow2(TypeIdx: `0`)
253	.clampScalar(TypeIdx: `0`, MinTy: s16, MaxTy: sXLen);
254
255	// TODO: transform illegal vector types into legal vector type
256	// TODO: Merge with G_FREEZE?
257	getActionDefinitionsBuilder(
258	Opcodes: {G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
259	.legalFor(Types: {s32, sXLen, p0})
260	.legalIf(Predicate: typeIsLegalBoolVec(TypeIdx: `0`, BoolVecTys, ST))
261	.legalIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: `0`, IntOrFPVecTys, ST))
262	.widenScalarToNextPow2(TypeIdx: `0`)
263	.clampScalar(TypeIdx: `0`, MinTy: s32, MaxTy: sXLen);
264
265	getActionDefinitionsBuilder(Opcode: G_ICMP)
266	.legalFor(Types: {{sXLen, sXLen}, {sXLen, p0}})
267	.legalIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: `0`, BoolVecTys, ST),
268	P1: typeIsLegalIntOrFPVec(TypeIdx: `1`, IntOrFPVecTys, ST)))
269	.widenScalarOrEltToNextPow2OrMinSize(TypeIdx: `1`, MinSize: `8`)
270	.clampScalar(TypeIdx: `1`, MinTy: sXLen, MaxTy: sXLen)
271	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen);
272
273	getActionDefinitionsBuilder(Opcode: G_SELECT)
274	.legalFor(Types: {{s32, sXLen}, {p0, sXLen}})
275	.legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: `0`, IntOrFPVecTys, ST),
276	P1: typeIsLegalBoolVec(TypeIdx: `1`, BoolVecTys, ST)))
277	.legalFor(Pred: XLen == `64` \|\| ST.hasStdExtD(), Types: {{s64, sXLen}})
278	.widenScalarToNextPow2(TypeIdx: `0`)
279	.clampScalar(TypeIdx: `0`, MinTy: s32, MaxTy: (XLen == `64` \|\| ST.hasStdExtD()) ? s64 : s32)
280	.clampScalar(TypeIdx: `1`, MinTy: sXLen, MaxTy: sXLen);
281
282	auto &LoadActions = getActionDefinitionsBuilder(Opcode: G_LOAD);
283	auto &StoreActions = getActionDefinitionsBuilder(Opcode: G_STORE);
284	auto &ExtLoadActions = getActionDefinitionsBuilder(Opcodes: {G_SEXTLOAD, G_ZEXTLOAD});
285
286	// Return the alignment needed for scalar memory ops. If unaligned scalar mem
287	// is supported, we only require byte alignment. Otherwise, we need the memory
288	// op to be natively aligned.
289	auto getScalarMemAlign = [&ST](unsigned Size) {
290	return ST.enableUnalignedScalarMem() ? `8` : Size;
291	};
292
293	LoadActions.legalForTypesWithMemDesc(
294	TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign (`8`)},
295	{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign (`8`)},
296	{.Type0: s16, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign (`16`)},
297	{.Type0: s32, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign (`16`)},
298	{.Type0: s32, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign (`32`)},
299	{.Type0: p0, .Type1: p0, .MemTy: sXLen, .Align: getScalarMemAlign (XLen)}});
300	StoreActions.legalForTypesWithMemDesc(
301	TypesAndMemDesc: {{.Type0: s16, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign (`8`)},
302	{.Type0: s32, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign (`8`)},
303	{.Type0: s16, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign (`16`)},
304	{.Type0: s32, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign (`16`)},
305	{.Type0: s32, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign (`32`)},
306	{.Type0: p0, .Type1: p0, .MemTy: sXLen, .Align: getScalarMemAlign (XLen)}});
307	ExtLoadActions.legalForTypesWithMemDesc(
308	TypesAndMemDesc: {{.Type0: sXLen, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign (`8`)},
309	{.Type0: sXLen, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign (`16`)}});
310	if (XLen == `64`) {
311	LoadActions.legalForTypesWithMemDesc(
312	TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign (`8`)},
313	{.Type0: s64, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign (`16`)},
314	{.Type0: s64, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign (`32`)},
315	{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign (`64`)}});
316	StoreActions.legalForTypesWithMemDesc(
317	TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s8, .Align: getScalarMemAlign (`8`)},
318	{.Type0: s64, .Type1: p0, .MemTy: s16, .Align: getScalarMemAlign (`16`)},
319	{.Type0: s64, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign (`32`)},
320	{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign (`64`)}});
321	ExtLoadActions.legalForTypesWithMemDesc(
322	TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s32, .Align: getScalarMemAlign (`32`)}});
323	} else if (ST.hasStdExtD()) {
324	LoadActions.legalForTypesWithMemDesc(
325	TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign (`64`)}});
326	StoreActions.legalForTypesWithMemDesc(
327	TypesAndMemDesc: {{.Type0: s64, .Type1: p0, .MemTy: s64, .Align: getScalarMemAlign (`64`)}});
328	}
329
330	// Vector loads/stores.
331	if (ST.hasVInstructions()) {
332	LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv2s8, .Type1: p0, .MemTy: nxv2s8, .Align: `8`},
333	{.Type0: nxv4s8, .Type1: p0, .MemTy: nxv4s8, .Align: `8`},
334	{.Type0: nxv8s8, .Type1: p0, .MemTy: nxv8s8, .Align: `8`},
335	{.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: `8`},
336	{.Type0: nxv32s8, .Type1: p0, .MemTy: nxv32s8, .Align: `8`},
337	{.Type0: nxv64s8, .Type1: p0, .MemTy: nxv64s8, .Align: `8`},
338	{.Type0: nxv2s16, .Type1: p0, .MemTy: nxv2s16, .Align: `16`},
339	{.Type0: nxv4s16, .Type1: p0, .MemTy: nxv4s16, .Align: `16`},
340	{.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: `16`},
341	{.Type0: nxv16s16, .Type1: p0, .MemTy: nxv16s16, .Align: `16`},
342	{.Type0: nxv32s16, .Type1: p0, .MemTy: nxv32s16, .Align: `16`},
343	{.Type0: nxv2s32, .Type1: p0, .MemTy: nxv2s32, .Align: `32`},
344	{.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: `32`},
345	{.Type0: nxv8s32, .Type1: p0, .MemTy: nxv8s32, .Align: `32`},
346	{.Type0: nxv16s32, .Type1: p0, .MemTy: nxv16s32, .Align: `32`}});
347	StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv2s8, .Type1: p0, .MemTy: nxv2s8, .Align: `8`},
348	{.Type0: nxv4s8, .Type1: p0, .MemTy: nxv4s8, .Align: `8`},
349	{.Type0: nxv8s8, .Type1: p0, .MemTy: nxv8s8, .Align: `8`},
350	{.Type0: nxv16s8, .Type1: p0, .MemTy: nxv16s8, .Align: `8`},
351	{.Type0: nxv32s8, .Type1: p0, .MemTy: nxv32s8, .Align: `8`},
352	{.Type0: nxv64s8, .Type1: p0, .MemTy: nxv64s8, .Align: `8`},
353	{.Type0: nxv2s16, .Type1: p0, .MemTy: nxv2s16, .Align: `16`},
354	{.Type0: nxv4s16, .Type1: p0, .MemTy: nxv4s16, .Align: `16`},
355	{.Type0: nxv8s16, .Type1: p0, .MemTy: nxv8s16, .Align: `16`},
356	{.Type0: nxv16s16, .Type1: p0, .MemTy: nxv16s16, .Align: `16`},
357	{.Type0: nxv32s16, .Type1: p0, .MemTy: nxv32s16, .Align: `16`},
358	{.Type0: nxv2s32, .Type1: p0, .MemTy: nxv2s32, .Align: `32`},
359	{.Type0: nxv4s32, .Type1: p0, .MemTy: nxv4s32, .Align: `32`},
360	{.Type0: nxv8s32, .Type1: p0, .MemTy: nxv8s32, .Align: `32`},
361	{.Type0: nxv16s32, .Type1: p0, .MemTy: nxv16s32, .Align: `32`}});
362
363	if (ST.getELen() == `64`) {
364	LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s8, .Type1: p0, .MemTy: nxv1s8, .Align: `8`},
365	{.Type0: nxv1s16, .Type1: p0, .MemTy: nxv1s16, .Align: `16`},
366	{.Type0: nxv1s32, .Type1: p0, .MemTy: nxv1s32, .Align: `32`}});
367	StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s8, .Type1: p0, .MemTy: nxv1s8, .Align: `8`},
368	{.Type0: nxv1s16, .Type1: p0, .MemTy: nxv1s16, .Align: `16`},
369	{.Type0: nxv1s32, .Type1: p0, .MemTy: nxv1s32, .Align: `32`}});
370	}
371
372	if (ST.hasVInstructionsI64()) {
373	LoadActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s64, .Type1: p0, .MemTy: nxv1s64, .Align: `64`},
374	{.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: `64`},
375	{.Type0: nxv4s64, .Type1: p0, .MemTy: nxv4s64, .Align: `64`},
376	{.Type0: nxv8s64, .Type1: p0, .MemTy: nxv8s64, .Align: `64`}});
377	StoreActions.legalForTypesWithMemDesc(TypesAndMemDesc: {{.Type0: nxv1s64, .Type1: p0, .MemTy: nxv1s64, .Align: `64`},
378	{.Type0: nxv2s64, .Type1: p0, .MemTy: nxv2s64, .Align: `64`},
379	{.Type0: nxv4s64, .Type1: p0, .MemTy: nxv4s64, .Align: `64`},
380	{.Type0: nxv8s64, .Type1: p0, .MemTy: nxv8s64, .Align: `64`}});
381	}
382
383	// we will take the custom lowering logic if we have scalable vector types
384	// with non-standard alignments
385	LoadActions.customIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: `0`, IntOrFPVecTys, ST));
386	StoreActions.customIf(Predicate: typeIsLegalIntOrFPVec(TypeIdx: `0`, IntOrFPVecTys, ST));
387
388	// Pointers require that XLen sized elements are legal.
389	if (XLen <= ST.getELen()) {
390	LoadActions.customIf(Predicate: typeIsLegalPtrVec(TypeIdx: `0`, PtrVecTys, ST));
391	StoreActions.customIf(Predicate: typeIsLegalPtrVec(TypeIdx: `0`, PtrVecTys, ST));
392	}
393	}
394
395	LoadActions.widenScalarToNextPow2(TypeIdx: `0`, / MinSize = / `8`)
396	.lowerIfMemSizeNotByteSizePow2()
397	.clampScalar(TypeIdx: `0`, MinTy: s16, MaxTy: sXLen)
398	.lower();
399	StoreActions
400	.clampScalar(TypeIdx: `0`, MinTy: s16, MaxTy: sXLen)
401	.lowerIfMemSizeNotByteSizePow2()
402	.lower();
403
404	ExtLoadActions.widenScalarToNextPow2(TypeIdx: `0`).clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen).lower();
405
406	getActionDefinitionsBuilder(Opcodes: {G_PTR_ADD, G_PTRMASK}).legalFor(Types: {{p0, sXLen}});
407
408	getActionDefinitionsBuilder(Opcode: G_PTRTOINT)
409	.legalFor(Types: {{sXLen, p0}})
410	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen);
411
412	getActionDefinitionsBuilder(Opcode: G_INTTOPTR)
413	.legalFor(Types: {{p0, sXLen}})
414	.clampScalar(TypeIdx: `1`, MinTy: sXLen, MaxTy: sXLen);
415
416	getActionDefinitionsBuilder(Opcode: G_BRCOND).legalFor(Types: {sXLen}).minScalar(TypeIdx: `0`, Ty: sXLen);
417
418	getActionDefinitionsBuilder(Opcode: G_BRJT).customFor(Types: {{p0, sXLen}});
419
420	getActionDefinitionsBuilder(Opcode: G_BRINDIRECT).legalFor(Types: {p0});
421
422	getActionDefinitionsBuilder(Opcode: G_PHI)
423	.legalFor(Types: {p0, s32, sXLen})
424	.widenScalarToNextPow2(TypeIdx: `0`)
425	.clampScalar(TypeIdx: `0`, MinTy: s32, MaxTy: sXLen);
426
427	getActionDefinitionsBuilder(Opcodes: {G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})
428	.legalFor(Types: {p0});
429
430	if (ST.hasStdExtZmmul()) {
431	getActionDefinitionsBuilder(Opcode: G_MUL)
432	.legalFor(Types: {sXLen})
433	.widenScalarToNextPow2(TypeIdx: `0`)
434	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen);
435
436	// clang-format off
437	getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH})
438	.legalFor(Types: {sXLen})
439	.lower();
440	// clang-format on
441
442	getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO}).minScalar(TypeIdx: `0`, Ty: sXLen).lower();
443	} else {
444	getActionDefinitionsBuilder(Opcode: G_MUL)
445	.libcallFor(Types: {sXLen, sDoubleXLen})
446	.widenScalarToNextPow2(TypeIdx: `0`)
447	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sDoubleXLen);
448
449	getActionDefinitionsBuilder(Opcodes: {G_SMULH, G_UMULH}).lowerFor(Types: {sXLen});
450
451	getActionDefinitionsBuilder(Opcodes: {G_SMULO, G_UMULO})
452	.minScalar(TypeIdx: `0`, Ty: sXLen)
453	// Widen sXLen to sDoubleXLen so we can use a single libcall to get
454	// the low bits for the mul result and high bits to do the overflow
455	// check.
456	.widenScalarIf(Predicate: typeIs(TypeIdx: `0`, TypesInit: sXLen),
457	Mutation: LegalizeMutations::changeTo(TypeIdx: `0`, Ty: sDoubleXLen))
458	.lower();
459	}
460
461	if (ST.hasStdExtM()) {
462	getActionDefinitionsBuilder(Opcodes: {G_SDIV, G_UDIV, G_UREM})
463	.legalFor(Types: {sXLen})
464	.customFor(Types: {s32})
465	.libcallFor(Types: {sDoubleXLen})
466	.clampScalar(TypeIdx: `0`, MinTy: s32, MaxTy: sDoubleXLen)
467	.widenScalarToNextPow2(TypeIdx: `0`);
468	getActionDefinitionsBuilder(Opcode: G_SREM)
469	.legalFor(Types: {sXLen})
470	.libcallFor(Types: {sDoubleXLen})
471	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sDoubleXLen)
472	.widenScalarToNextPow2(TypeIdx: `0`);
473	} else {
474	getActionDefinitionsBuilder(Opcodes: {G_UDIV, G_SDIV, G_UREM, G_SREM})
475	.libcallFor(Types: {sXLen, sDoubleXLen})
476	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sDoubleXLen)
477	.widenScalarToNextPow2(TypeIdx: `0`);
478	}
479
480	// TODO: Use libcall for sDoubleXLen.
481	getActionDefinitionsBuilder(Opcodes: {G_SDIVREM, G_UDIVREM}).lower();
482
483	getActionDefinitionsBuilder(Opcode: G_ABS)
484	.customFor(Pred: ST.hasStdExtZbb(), Types: {sXLen})
485	.minScalar(Pred: ST.hasStdExtZbb(), TypeIdx: `0`, Ty: sXLen)
486	.lower();
487
488	getActionDefinitionsBuilder(Opcodes: {G_UMAX, G_UMIN, G_SMAX, G_SMIN})
489	.legalFor(Pred: ST.hasStdExtZbb(), Types: {sXLen})
490	.minScalar(Pred: ST.hasStdExtZbb(), TypeIdx: `0`, Ty: sXLen)
491	.lower();
492
493	getActionDefinitionsBuilder(Opcodes: {G_SCMP, G_UCMP}).lower();
494
495	getActionDefinitionsBuilder(Opcode: G_FRAME_INDEX).legalFor(Types: {p0});
496
497	getActionDefinitionsBuilder(Opcodes: {G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
498
499	getActionDefinitionsBuilder(Opcodes: {G_DYN_STACKALLOC, G_STACKSAVE, G_STACKRESTORE})
500	.lower();
501
502	// FP Operations
503
504	// FIXME: Support s128 for rv32 when libcall handling is able to use sret.
505	getActionDefinitionsBuilder(
506	Opcodes: {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM})
507	.legalFor(Pred: ST.hasStdExtF(), Types: {s32})
508	.legalFor(Pred: ST.hasStdExtD(), Types: {s64})
509	.legalFor(Pred: ST.hasStdExtZfh(), Types: {s16})
510	.libcallFor(Types: {s32, s64})
511	.libcallFor(Pred: ST.is64Bit(), Types: {s128});
512
513	getActionDefinitionsBuilder(Opcodes: {G_FNEG, G_FABS})
514	.legalFor(Pred: ST.hasStdExtF(), Types: {s32})
515	.legalFor(Pred: ST.hasStdExtD(), Types: {s64})
516	.legalFor(Pred: ST.hasStdExtZfh(), Types: {s16})
517	.lowerFor(Types: {s32, s64, s128});
518
519	getActionDefinitionsBuilder(Opcode: G_FREM)
520	.libcallFor(Types: {s32, s64})
521	.libcallFor(Pred: ST.is64Bit(), Types: {s128})
522	.minScalar(TypeIdx: `0`, Ty: s32)
523	.scalarize(TypeIdx: `0`);
524
525	getActionDefinitionsBuilder(Opcode: G_FCOPYSIGN)
526	.legalFor(Pred: ST.hasStdExtF(), Types: {{s32, s32}})
527	.legalFor(Pred: ST.hasStdExtD(), Types: {{s64, s64}, {s32, s64}, {s64, s32}})
528	.legalFor(Pred: ST.hasStdExtZfh(), Types: {{s16, s16}, {s16, s32}, {s32, s16}})
529	.legalFor(Pred: ST.hasStdExtZfh() && ST.hasStdExtD(), Types: {{s16, s64}, {s64, s16}})
530	.lower();
531
532	// FIXME: Use Zfhmin.
533	getActionDefinitionsBuilder(Opcode: G_FPTRUNC)
534	.legalFor(Pred: ST.hasStdExtD(), Types: {{s32, s64}})
535	.legalFor(Pred: ST.hasStdExtZfh(), Types: {{s16, s32}})
536	.legalFor(Pred: ST.hasStdExtZfh() && ST.hasStdExtD(), Types: {{s16, s64}})
537	.libcallFor(Types: {{s32, s64}})
538	.libcallFor(Pred: ST.is64Bit(), Types: {{s32, s128}, {s64, s128}});
539	getActionDefinitionsBuilder(Opcode: G_FPEXT)
540	.legalFor(Pred: ST.hasStdExtD(), Types: {{s64, s32}})
541	.legalFor(Pred: ST.hasStdExtZfh(), Types: {{s32, s16}})
542	.legalFor(Pred: ST.hasStdExtZfh() && ST.hasStdExtD(), Types: {{s64, s16}})
543	.libcallFor(Types: {{s64, s32}})
544	.libcallFor(Pred: ST.is64Bit(), Types: {{s128, s32}, {s128, s64}});
545
546	getActionDefinitionsBuilder(Opcode: G_FCMP)
547	.legalFor(Pred: ST.hasStdExtF(), Types: {{sXLen, s32}})
548	.legalFor(Pred: ST.hasStdExtD(), Types: {{sXLen, s64}})
549	.legalFor(Pred: ST.hasStdExtZfh(), Types: {{sXLen, s16}})
550	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen)
551	.libcallFor(Types: {{sXLen, s32}, {sXLen, s64}})
552	.libcallFor(Pred: ST.is64Bit(), Types: {{sXLen, s128}});
553
554	// TODO: Support vector version of G_IS_FPCLASS.
555	getActionDefinitionsBuilder(Opcode: G_IS_FPCLASS)
556	.customFor(Pred: ST.hasStdExtF(), Types: {{s1, s32}})
557	.customFor(Pred: ST.hasStdExtD(), Types: {{s1, s64}})
558	.customFor(Pred: ST.hasStdExtZfh(), Types: {{s1, s16}})
559	.lowerFor(Types: {{s1, s32}, {s1, s64}});
560
561	getActionDefinitionsBuilder(Opcode: G_FCONSTANT)
562	.legalFor(Pred: ST.hasStdExtF(), Types: {s32})
563	.legalFor(Pred: ST.hasStdExtD(), Types: {s64})
564	.legalFor(Pred: ST.hasStdExtZfh(), Types: {s16})
565	.lowerFor(Types: {s32, s64, s128});
566
567	getActionDefinitionsBuilder(Opcodes: {G_FPTOSI, G_FPTOUI})
568	.legalFor(Pred: ST.hasStdExtF(), Types: {{sXLen, s32}})
569	.legalFor(Pred: ST.hasStdExtD(), Types: {{sXLen, s64}})
570	.legalFor(Pred: ST.hasStdExtZfh(), Types: {{sXLen, s16}})
571	.customFor(Pred: ST.is64Bit() && ST.hasStdExtF(), Types: {{s32, s32}})
572	.customFor(Pred: ST.is64Bit() && ST.hasStdExtD(), Types: {{s32, s64}})
573	.customFor(Pred: ST.is64Bit() && ST.hasStdExtZfh(), Types: {{s32, s16}})
574	.widenScalarToNextPow2(TypeIdx: `0`)
575	.minScalar(TypeIdx: `0`, Ty: s32)
576	.libcallFor(Types: {{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
577	.libcallFor(Pred: ST.is64Bit(), Types: {{s32, s128}, {s64, s128}}) // FIXME RV32.
578	.libcallFor(Pred: ST.is64Bit(), Types: {{s128, s32}, {s128, s64}, {s128, s128}});
579
580	getActionDefinitionsBuilder(Opcodes: {G_SITOFP, G_UITOFP})
581	.legalFor(Pred: ST.hasStdExtF(), Types: {{s32, sXLen}})
582	.legalFor(Pred: ST.hasStdExtD(), Types: {{s64, sXLen}})
583	.legalFor(Pred: ST.hasStdExtZfh(), Types: {{s16, sXLen}})
584	.widenScalarToNextPow2(TypeIdx: `1`)
585	// Promote to XLen if the operation is legal.
586	.widenScalarIf(
587	Predicate: [=, &ST](const LegalityQuery &Query) {
588	return Query.Types [`0`].isScalar() && Query.Types [`1`].isScalar() &&
589	(Query.Types [`1`].getSizeInBits() < ST.getXLen()) &&
590	((ST.hasStdExtF() && Query.Types [`0`].getSizeInBits() == `32`) \|\|
591	(ST.hasStdExtD() && Query.Types [`0`].getSizeInBits() == `64`) \|\|
592	(ST.hasStdExtZfh() &&
593	Query.Types [`0`].getSizeInBits() == `16`));
594	},
595	Mutation: LegalizeMutations::changeTo(TypeIdx: `1`, Ty: sXLen))
596	// Otherwise only promote to s32 since we have si libcalls.
597	.minScalar(TypeIdx: `1`, Ty: s32)
598	.libcallFor(Types: {{s32, s32}, {s64, s32}, {s32, s64}, {s64, s64}})
599	.libcallFor(Pred: ST.is64Bit(), Types: {{s128, s32}, {s128, s64}}) // FIXME RV32.
600	.libcallFor(Pred: ST.is64Bit(), Types: {{s32, s128}, {s64, s128}, {s128, s128}});
601
602	// FIXME: We can do custom inline expansion like SelectionDAG.
603	getActionDefinitionsBuilder(Opcodes: {G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
604	G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
605	G_INTRINSIC_ROUNDEVEN})
606	.legalFor(Pred: ST.hasStdExtZfa(), Types: {s32})
607	.legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtD(), Types: {s64})
608	.legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtZfh(), Types: {s16})
609	.libcallFor(Types: {s32, s64})
610	.libcallFor(Pred: ST.is64Bit(), Types: {s128});
611
612	getActionDefinitionsBuilder(Opcodes: {G_FMAXIMUM, G_FMINIMUM})
613	.legalFor(Pred: ST.hasStdExtZfa(), Types: {s32})
614	.legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtD(), Types: {s64})
615	.legalFor(Pred: ST.hasStdExtZfa() && ST.hasStdExtZfh(), Types: {s16});
616
617	getActionDefinitionsBuilder(Opcodes: {G_FCOS, G_FSIN, G_FTAN, G_FPOW, G_FLOG, G_FLOG2,
618	G_FLOG10, G_FEXP, G_FEXP2, G_FEXP10, G_FACOS,
619	G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH,
620	G_FTANH})
621	.libcallFor(Types: {s32, s64})
622	.libcallFor(Pred: ST.is64Bit(), Types: {s128});
623	getActionDefinitionsBuilder(Opcodes: {G_FPOWI, G_FLDEXP})
624	.libcallFor(Types: {{s32, s32}, {s64, s32}})
625	.libcallFor(Pred: ST.is64Bit(), Types: {s128, s32});
626
627	getActionDefinitionsBuilder(Opcode: G_VASTART).customFor(Types: {p0});
628
629	// va_list must be a pointer, but most sized types are pretty easy to handle
630	// as the destination.
631	getActionDefinitionsBuilder(Opcode: G_VAARG)
632	// TODO: Implement narrowScalar and widenScalar for G_VAARG for types
633	// other than sXLen.
634	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen)
635	.lowerForCartesianProduct(Types0: {sXLen, p0}, Types1: {p0});
636
637	getActionDefinitionsBuilder(Opcode: G_VSCALE)
638	.clampScalar(TypeIdx: `0`, MinTy: sXLen, MaxTy: sXLen)
639	.customFor(Types: {sXLen});
640
641	auto &SplatActions =
642	getActionDefinitionsBuilder(Opcode: G_SPLAT_VECTOR)
643	.legalIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: `0`, IntOrFPVecTys, ST),
644	P1: typeIs(TypeIdx: `1`, TypesInit: sXLen)))
645	.customIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: `0`, BoolVecTys, ST), P1: typeIs(TypeIdx: `1`, TypesInit: s1)));
646	// Handle case of s64 element vectors on RV32. If the subtarget does not have
647	// f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget
648	// does have f64, then we don't know whether the type is an f64 or an i64,
649	// so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,
650	// depending on how the instructions it consumes are legalized. They are not
651	// legalized yet since legalization is in reverse postorder, so we cannot
652	// make the decision at this moment.
653	if (XLen == `32`) {
654	if (ST.hasVInstructionsF64() && ST.hasStdExtD())
655	SplatActions.legalIf(Predicate: all(
656	P0: typeInSet(TypeIdx: `0`, TypesInit: {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), P1: typeIs(TypeIdx: `1`, TypesInit: s64)));
657	else if (ST.hasVInstructionsI64())
658	SplatActions.customIf(Predicate: all(
659	P0: typeInSet(TypeIdx: `0`, TypesInit: {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), P1: typeIs(TypeIdx: `1`, TypesInit: s64)));
660	}
661
662	SplatActions.clampScalar(TypeIdx: `1`, MinTy: sXLen, MaxTy: sXLen);
663
664	LegalityPredicate ExtractSubvecBitcastPred = [=](const LegalityQuery &Query) {
665	LLT DstTy = Query.Types [`0`];
666	LLT SrcTy = Query.Types [`1`];
667	return DstTy.getElementType() == LLT::scalar(SizeInBits: `1`) &&
668	DstTy.getElementCount().getKnownMinValue() >= `8` &&
669	SrcTy.getElementCount().getKnownMinValue() >= `8`;
670	};
671	getActionDefinitionsBuilder(Opcode: G_EXTRACT_SUBVECTOR)
672	// We don't have the ability to slide mask vectors down indexed by their
673	// i1 elements; the smallest we can do is i8. Often we are able to bitcast
674	// to equivalent i8 vectors.
675	.bitcastIf(
676	Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: `0`, BoolVecTys, ST),
677	P1: typeIsLegalBoolVec(TypeIdx: `1`, BoolVecTys, ST), args: ExtractSubvecBitcastPred),
678	Mutation: [=](const LegalityQuery &Query) {
679	LLT CastTy = LLT::vector(
680	EC: Query.Types [`0`].getElementCount().divideCoefficientBy(RHS: `8`), ScalarSizeInBits: `8`);
681	return std::pair(`0`, CastTy);
682	})
683	.customIf(Predicate: LegalityPredicates::any(
684	P0: all(P0: typeIsLegalBoolVec(TypeIdx: `0`, BoolVecTys, ST),
685	P1: typeIsLegalBoolVec(TypeIdx: `1`, BoolVecTys, ST)),
686	P1: all(P0: typeIsLegalIntOrFPVec(TypeIdx: `0`, IntOrFPVecTys, ST),
687	P1: typeIsLegalIntOrFPVec(TypeIdx: `1`, IntOrFPVecTys, ST))));
688
689	getActionDefinitionsBuilder(Opcode: G_INSERT_SUBVECTOR)
690	.customIf(Predicate: all(P0: typeIsLegalBoolVec(TypeIdx: `0`, BoolVecTys, ST),
691	P1: typeIsLegalBoolVec(TypeIdx: `1`, BoolVecTys, ST)))
692	.customIf(Predicate: all(P0: typeIsLegalIntOrFPVec(TypeIdx: `0`, IntOrFPVecTys, ST),
693	P1: typeIsLegalIntOrFPVec(TypeIdx: `1`, IntOrFPVecTys, ST)));
694
695	getLegacyLegalizerInfo().computeTables();
696	verify(MII: *ST.getInstrInfo());
697	}
698
699	bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
700	MachineInstr &MI) const {
701	Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
702	switch (IntrinsicID) {
703	default:
704	return false;
705	case Intrinsic::vacopy: {
706	// vacopy arguments must be legal because of the intrinsic signature.
707	// No need to check here.
708
709	MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
710	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
711	MachineFunction &MF = *MI.getMF();
712	const DataLayout &DL = MIRBuilder.getDataLayout();
713	LLVMContext &Ctx = MF.getFunction().getContext();
714
715	Register DstLst = MI.getOperand(i: `1`).getReg();
716	LLT PtrTy = MRI.getType(Reg: DstLst);
717
718	// Load the source va_list
719	Align Alignment = DL.getABITypeAlign(Ty: getTypeForLLT(Ty: PtrTy, C&: Ctx));
720	MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
721	PtrInfo: MachinePointerInfo (), f: MachineMemOperand::MOLoad, MemTy: PtrTy, base_alignment: Alignment);
722	auto Tmp = MIRBuilder.buildLoad(Res: PtrTy, Addr: MI.getOperand(i: `2`), MMO&: *LoadMMO);
723
724	// Store the result in the destination va_list
725	MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
726	PtrInfo: MachinePointerInfo (), f: MachineMemOperand::MOStore, MemTy: PtrTy, base_alignment: Alignment);
727	MIRBuilder.buildStore(Val: Tmp, Addr: DstLst, MMO&: *StoreMMO);
728
729	MI.eraseFromParent();
730	return true;
731	}
732	}
733	}
734
735	bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,
736	MachineIRBuilder &MIRBuilder) const {
737	// Stores the address of the VarArgsFrameIndex slot into the memory location
738	assert(MI.getOpcode() == TargetOpcode::G_VASTART);
739	MachineFunction *MF = MI.getParent()->getParent();
740	RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>();
741	int FI = FuncInfo->getVarArgsFrameIndex();
742	LLT AddrTy = MIRBuilder.getMRI()->getType(Reg: MI.getOperand(i: `0`).getReg());
743	auto FINAddr = MIRBuilder.buildFrameIndex(Res: AddrTy, Idx: FI);
744	assert(MI.hasOneMemOperand());
745	MIRBuilder.buildStore(Val: FINAddr, Addr: MI.getOperand(i: `0`).getReg(),
746	MMO&: *MI.memoperands()[`0`]);
747	MI.eraseFromParent();
748	return true;
749	}
750
751	bool RISCVLegalizerInfo::legalizeBRJT(MachineInstr &MI,
752	MachineIRBuilder &MIRBuilder) const {
753	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
754	auto &MF = *MI.getParent()->getParent();
755	const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
756	unsigned EntrySize = MJTI->getEntrySize(TD: MF.getDataLayout());
757
758	Register PtrReg = MI.getOperand(i: `0`).getReg();
759	LLT PtrTy = MRI.getType(Reg: PtrReg);
760	Register IndexReg = MI.getOperand(i: `2`).getReg();
761	LLT IndexTy = MRI.getType(Reg: IndexReg);
762
763	if (!isPowerOf2_32(Value: EntrySize))
764	return false;
765
766	auto ShiftAmt = MIRBuilder.buildConstant(Res: IndexTy, Val: Log2_32(Value: EntrySize));
767	IndexReg = MIRBuilder.buildShl(Dst: IndexTy, Src0: IndexReg, Src1: ShiftAmt).getReg(Idx: `0`);
768
769	auto Addr = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: PtrReg, Op1: IndexReg);
770
771	MachineMemOperand *MMO = MF.getMachineMemOperand(
772	PtrInfo: MachinePointerInfo::getJumpTable(MF), F: MachineMemOperand::MOLoad,
773	Size: EntrySize, BaseAlignment: Align (MJTI->getEntryAlignment(TD: MF.getDataLayout())));
774
775	Register TargetReg;
776	switch (MJTI->getEntryKind()) {
777	default:
778	return false;
779	case MachineJumpTableInfo::EK_LabelDifference32: {
780	// For PIC, the sequence is:
781	// BRIND(load(Jumptable + index) + RelocBase)
782	// RelocBase can be JumpTable, GOT or some sort of global base.
783	unsigned LoadOpc =
784	STI.is64Bit() ? TargetOpcode::G_SEXTLOAD : TargetOpcode::G_LOAD;
785	auto Load = MIRBuilder.buildLoadInstr(Opcode: LoadOpc, Res: IndexTy, Addr, MMO&: *MMO);
786	TargetReg = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: PtrReg, Op1: Load).getReg(Idx: `0`);
787	break;
788	}
789	case MachineJumpTableInfo::EK_Custom32: {
790	auto Load = MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_SEXTLOAD, Res: IndexTy,
791	Addr, MMO&: *MMO);
792	TargetReg = MIRBuilder.buildIntToPtr(Dst: PtrTy, Src: Load).getReg(Idx: `0`);
793	break;
794	}
795	case MachineJumpTableInfo::EK_BlockAddress:
796	TargetReg = MIRBuilder.buildLoad(Res: PtrTy, Addr, MMO&: *MMO).getReg(Idx: `0`);
797	break;
798	}
799
800	MIRBuilder.buildBrIndirect(Tgt: TargetReg);
801
802	MI.eraseFromParent();
803	return true;
804	}
805
806	bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm,
807	bool ShouldOptForSize) const {
808	assert(APImm.getBitWidth() == `32` \|\| APImm.getBitWidth() == `64`);
809	int64_t Imm = APImm.getSExtValue();
810	// All simm32 constants should be handled by isel.
811	// NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
812	// this check redundant, but small immediates are common so this check
813	// should have better compile time.
814	if (isInt<`32`>(x: Imm))
815	return false;
816
817	// We only need to cost the immediate, if constant pool lowering is enabled.
818	if (!STI.useConstantPoolForLargeInts())
819	return false;
820
821	RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val: Imm, STI);
822	if (Seq.size() <= STI.getMaxBuildIntsCost())
823	return false;
824
825	// Optimizations below are disabled for opt size. If we're optimizing for
826	// size, use a constant pool.
827	if (ShouldOptForSize)
828	return true;
829	//
830	// Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
831	// that if it will avoid a constant pool.
832	// It will require an extra temporary register though.
833	// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
834	// low and high 32 bits are the same and bit 31 and 63 are set.
835	unsigned ShiftAmt, AddOpc;
836	RISCVMatInt::InstSeq SeqLo =
837	RISCVMatInt::generateTwoRegInstSeq(Val: Imm, STI, ShiftAmt, AddOpc);
838	return !(!SeqLo.empty() && (SeqLo.size() + `2`) <= STI.getMaxBuildIntsCost());
839	}
840
841	bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
842	MachineIRBuilder &MIB) const {
843	const LLT XLenTy(STI.getXLenVT());
844	Register Dst = MI.getOperand(i: `0`).getReg();
845
846	// We define our scalable vector types for lmul=1 to use a 64 bit known
847	// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
848	// vscale as VLENB / 8.
849	static_assert(RISCV::RVVBitsPerBlock == `64`, "Unexpected bits per block!");
850	if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
851	// Support for VLEN==32 is incomplete.
852	return false;
853
854	// We assume VLENB is a multiple of 8. We manually choose the best shift
855	// here because SimplifyDemandedBits isn't always able to simplify it.
856	uint64_t Val = MI.getOperand(i: `1`).getCImm()->getZExtValue();
857	if (isPowerOf2_64(Value: Val)) {
858	uint64_t Log2 = Log2_64(Value: Val);
859	if (Log2 < `3`) {
860	auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {});
861	MIB.buildLShr(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: `3` - Log2));
862	} else if (Log2 > `3`) {
863	auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {});
864	MIB.buildShl(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: Log2 - `3`));
865	} else {
866	MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {Dst}, SrcOps: {});
867	}
868	} else if ((Val % `8`) == `0`) {
869	// If the multiplier is a multiple of 8, scale it down to avoid needing
870	// to shift the VLENB value.
871	auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {});
872	MIB.buildMul(Dst, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: Val / `8`));
873	} else {
874	auto VLENB = MIB.buildInstr(Opc: RISCV::G_READ_VLENB, DstOps: {XLenTy}, SrcOps: {});
875	auto VScale = MIB.buildLShr(Dst: XLenTy, Src0: VLENB, Src1: MIB.buildConstant(Res: XLenTy, Val: `3`));
876	MIB.buildMul(Dst, Src0: VScale, Src1: MIB.buildConstant(Res: XLenTy, Val));
877	}
878	MI.eraseFromParent();
879	return true;
880	}
881
882	// Custom-lower extensions from mask vectors by using a vselect either with 1
883	// for zero/any-extension or -1 for sign-extension:
884	// (vXiN = (s\|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
885	// Note that any-extension is lowered identically to zero-extension.
886	bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,
887	MachineIRBuilder &MIB) const {
888
889	unsigned Opc = MI.getOpcode();
890	assert(Opc == TargetOpcode::G_ZEXT \|\| Opc == TargetOpcode::G_SEXT \|\|
891	Opc == TargetOpcode::G_ANYEXT);
892
893	MachineRegisterInfo &MRI = *MIB.getMRI();
894	Register Dst = MI.getOperand(i: `0`).getReg();
895	Register Src = MI.getOperand(i: `1`).getReg();
896
897	LLT DstTy = MRI.getType(Reg: Dst);
898	int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -`1` : `1`;
899	LLT DstEltTy = DstTy.getElementType();
900	auto SplatZero = MIB.buildSplatVector(Res: DstTy, Val: MIB.buildConstant(Res: DstEltTy, Val: `0`));
901	auto SplatTrue =
902	MIB.buildSplatVector(Res: DstTy, Val: MIB.buildConstant(Res: DstEltTy, Val: ExtTrueVal));
903	MIB.buildSelect(Res: Dst, Tst: Src, Op0: SplatTrue, Op1: SplatZero);
904
905	MI.eraseFromParent();
906	return true;
907	}
908
909	bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI,
910	LegalizerHelper &Helper,
911	MachineIRBuilder &MIB) const {
912	assert((isa<GLoad>(MI) \|\| isa<GStore>(MI)) &&
913	"Machine instructions must be Load/Store.");
914	MachineRegisterInfo &MRI = *MIB.getMRI();
915	MachineFunction *MF = MI.getMF();
916	const DataLayout &DL = MIB.getDataLayout();
917	LLVMContext &Ctx = MF->getFunction().getContext();
918
919	Register DstReg = MI.getOperand(i: `0`).getReg();
920	LLT DataTy = MRI.getType(Reg: DstReg);
921	if (!DataTy.isVector())
922	return false;
923
924	if (!MI.hasOneMemOperand())
925	return false;
926
927	MachineMemOperand MMO = MI.memoperands_begin();
928
929	const auto *TLI = STI.getTargetLowering();
930	EVT VT = EVT::getEVT(Ty: getTypeForLLT(Ty: DataTy, C&: Ctx));
931
932	if (TLI->allowsMemoryAccessForAlignment(Context&: Ctx, DL, VT, MMO: *MMO))
933	return true;
934
935	unsigned EltSizeBits = DataTy.getScalarSizeInBits();
936	assert((EltSizeBits == `16` \|\| EltSizeBits == `32` \|\| EltSizeBits == `64`) &&
937	"Unexpected unaligned RVV load type");
938
939	// Calculate the new vector type with i8 elements
940	unsigned NumElements =
941	DataTy.getElementCount().getKnownMinValue() * (EltSizeBits / `8`);
942	LLT NewDataTy = LLT::scalable_vector(MinNumElements: NumElements, ScalarSizeInBits: `8`);
943
944	Helper.bitcast(MI, TypeIdx: `0`, Ty: NewDataTy);
945
946	return true;
947	}
948
949	/// Return the type of the mask type suitable for masking the provided
950	/// vector type. This is simply an i1 element type vector of the same
951	/// (possibly scalable) length.
952	static LLT getMaskTypeFor(LLT VecTy) {
953	assert(VecTy.isVector());
954	ElementCount EC = VecTy.getElementCount();
955	return LLT::vector(EC, ScalarTy: LLT::scalar(SizeInBits: `1`));
956	}
957
958	/// Creates an all ones mask suitable for masking a vector of type VecTy with
959	/// vector length VL.
960	static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
961	MachineIRBuilder &MIB,
962	MachineRegisterInfo &MRI) {
963	LLT MaskTy = getMaskTypeFor(VecTy);
964	return MIB.buildInstr(Opc: RISCV::G_VMSET_VL, DstOps: {MaskTy}, SrcOps: {VL});
965	}
966
967	/// Gets the two common "VL" operands: an all-ones mask and the vector length.
968	/// VecTy is a scalable vector type.
969	static std::pair<MachineInstrBuilder, MachineInstrBuilder>
970	buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
971	assert(VecTy.isScalableVector() && "Expecting scalable container type");
972	const RISCVSubtarget &STI = MIB.getMF().getSubtarget<RISCVSubtarget>();
973	LLT XLenTy(STI.getXLenVT());
974	auto VL = MIB.buildConstant(Res: XLenTy, Val: -`1`);
975	auto Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
976	return {Mask, VL};
977	}
978
979	static MachineInstrBuilder
980	buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,
981	Register Hi, const SrcOp &VL, MachineIRBuilder &MIB,
982	MachineRegisterInfo &MRI) {
983	// TODO: If the Hi bits of the splat are undefined, then it's fine to just
984	// splat Lo even if it might be sign extended. I don't think we have
985	// introduced a case where we're build a s64 where the upper bits are undef
986	// yet.
987
988	// Fall back to a stack store and stride x0 vector load.
989	// TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in
990	// preprocessDAG in SDAG.
991	return MIB.buildInstr(Opc: RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, DstOps: {Dst},
992	SrcOps: {Passthru, Lo, Hi, VL});
993	}
994
995	static MachineInstrBuilder
996	buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
997	const SrcOp &Scalar, const SrcOp &VL,
998	MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
999	assert(Scalar.getLLTTy(MRI) == LLT::scalar(`64`) && "Unexpected VecTy!");
1000	auto Unmerge = MIB.buildUnmerge(Res: LLT::scalar(SizeInBits: `32`), Op: Scalar);
1001	return buildSplatPartsS64WithVL(Dst, Passthru, Lo: Unmerge.getReg(Idx: `0`),
1002	Hi: Unmerge.getReg(Idx: `1`), VL, MIB, MRI);
1003	}
1004
1005	// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
1006	// legal equivalently-sized i8 type, so we can use that as a go-between.
1007	// Splats of s1 types that have constant value can be legalized as VMSET_VL or
1008	// VMCLR_VL.
1009	bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
1010	MachineIRBuilder &MIB) const {
1011	assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);
1012
1013	MachineRegisterInfo &MRI = *MIB.getMRI();
1014
1015	Register Dst = MI.getOperand(i: `0`).getReg();
1016	Register SplatVal = MI.getOperand(i: `1`).getReg();
1017
1018	LLT VecTy = MRI.getType(Reg: Dst);
1019	LLT XLenTy(STI.getXLenVT());
1020
1021	// Handle case of s64 element vectors on rv32
1022	if (XLenTy.getSizeInBits() == `32` &&
1023	VecTy.getElementType().getSizeInBits() == `64`) {
1024	auto [_, VL] = buildDefaultVLOps(VecTy: MRI.getType(Reg: Dst), MIB, MRI);
1025	buildSplatSplitS64WithVL(Dst, Passthru: MIB.buildUndef(Res: VecTy), Scalar: SplatVal, VL, MIB,
1026	MRI);
1027	MI.eraseFromParent();
1028	return true;
1029	}
1030
1031	// All-zeros or all-ones splats are handled specially.
1032	MachineInstr &SplatValMI = *MRI.getVRegDef(Reg: SplatVal);
1033	if (isAllOnesOrAllOnesSplat(MI: SplatValMI, MRI)) {
1034	auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1035	MIB.buildInstr(Opc: RISCV::G_VMSET_VL, DstOps: {Dst}, SrcOps: {VL});
1036	MI.eraseFromParent();
1037	return true;
1038	}
1039	if (isNullOrNullSplat(MI: SplatValMI, MRI)) {
1040	auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;
1041	MIB.buildInstr(Opc: RISCV::G_VMCLR_VL, DstOps: {Dst}, SrcOps: {VL});
1042	MI.eraseFromParent();
1043	return true;
1044	}
1045
1046	// Handle non-constant mask splat (i.e. not sure if it's all zeros or all
1047	// ones) by promoting it to an s8 splat.
1048	LLT InterEltTy = LLT::scalar(SizeInBits: `8`);
1049	LLT InterTy = VecTy.changeElementType(NewEltTy: InterEltTy);
1050	auto ZExtSplatVal = MIB.buildZExt(Res: InterEltTy, Op: SplatVal);
1051	auto And =
1052	MIB.buildAnd(Dst: InterEltTy, Src0: ZExtSplatVal, Src1: MIB.buildConstant(Res: InterEltTy, Val: `1`));
1053	auto LHS = MIB.buildSplatVector(Res: InterTy, Val: And);
1054	auto ZeroSplat =
1055	MIB.buildSplatVector(Res: InterTy, Val: MIB.buildConstant(Res: InterEltTy, Val: `0`));
1056	MIB.buildICmp(Pred: CmpInst::Predicate::ICMP_NE, Res: Dst, Op0: LHS, Op1: ZeroSplat);
1057	MI.eraseFromParent();
1058	return true;
1059	}
1060
1061	static LLT getLMUL1Ty(LLT VecTy) {
1062	assert(VecTy.getElementType().getSizeInBits() <= `64` &&
1063	"Unexpected vector LLT");
1064	return LLT::scalable_vector(MinNumElements: RISCV::RVVBitsPerBlock /
1065	VecTy.getElementType().getSizeInBits(),
1066	ScalarTy: VecTy.getElementType());
1067	}
1068
1069	bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
1070	MachineIRBuilder &MIB) const {
1071	GExtractSubvector &ES = cast<GExtractSubvector>(Val&: MI);
1072
1073	MachineRegisterInfo &MRI = *MIB.getMRI();
1074
1075	Register Dst = ES.getReg(Idx: `0`);
1076	Register Src = ES.getSrcVec();
1077	uint64_t Idx = ES.getIndexImm();
1078
1079	// With an index of 0 this is a cast-like subvector, which can be performed
1080	// with subregister operations.
1081	if (Idx == `0`)
1082	return true;
1083
1084	LLT LitTy = MRI.getType(Reg: Dst);
1085	LLT BigTy = MRI.getType(Reg: Src);
1086
1087	if (LitTy.getElementType() == LLT::scalar(SizeInBits: `1`)) {
1088	// We can't slide this mask vector up indexed by its i1 elements.
1089	// This poses a problem when we wish to insert a scalable vector which
1090	// can't be re-expressed as a larger type. Just choose the slow path and
1091	// extend to a larger type, then truncate back down.
1092	LLT ExtBigTy = BigTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: `8`));
1093	LLT ExtLitTy = LitTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: `8`));
1094	auto BigZExt = MIB.buildZExt(Res: ExtBigTy, Op: Src);
1095	auto ExtractZExt = MIB.buildExtractSubvector(Res: ExtLitTy, Src: BigZExt, Index: Idx);
1096	auto SplatZero = MIB.buildSplatVector(
1097	Res: ExtLitTy, Val: MIB.buildConstant(Res: ExtLitTy.getElementType(), Val: `0`));
1098	MIB.buildICmp(Pred: CmpInst::Predicate::ICMP_NE, Res: Dst, Op0: ExtractZExt, Op1: SplatZero);
1099	MI.eraseFromParent();
1100	return true;
1101	}
1102
1103	// extract_subvector scales the index by vscale if the subvector is scalable,
1104	// and decomposeSubvectorInsertExtractToSubRegs takes this into account.
1105	const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1106	MVT LitTyMVT = getMVTForLLT(Ty: LitTy);
1107	auto Decompose =
1108	RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1109	VecVT: getMVTForLLT(Ty: BigTy), SubVecVT: LitTyMVT, InsertExtractIdx: Idx, TRI);
1110	unsigned RemIdx = Decompose.second;
1111
1112	// If the Idx has been completely eliminated then this is a subvector extract
1113	// which naturally aligns to a vector register. These can easily be handled
1114	// using subregister manipulation.
1115	if (RemIdx == `0`)
1116	return true;
1117
1118	// Else LitTy is M1 or smaller and may need to be slid down: if LitTy
1119	// was > M1 then the index would need to be a multiple of VLMAX, and so would
1120	// divide exactly.
1121	assert(
1122	RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(LitTyMVT)).second \|\|
1123	RISCVTargetLowering::getLMUL(LitTyMVT) == RISCVVType::LMUL_1);
1124
1125	// If the vector type is an LMUL-group type, extract a subvector equal to the
1126	// nearest full vector register type.
1127	LLT InterLitTy = BigTy;
1128	Register Vec = Src;
1129	if (TypeSize::isKnownGT(LHS: BigTy.getSizeInBits(),
1130	RHS: getLMUL1Ty(VecTy: BigTy).getSizeInBits())) {
1131	// If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
1132	// we should have successfully decomposed the extract into a subregister.
1133	assert(Decompose.first != RISCV::NoSubRegister);
1134	InterLitTy = getLMUL1Ty(VecTy: BigTy);
1135	// SDAG builds a TargetExtractSubreg. We cannot create a a Copy with SubReg
1136	// specified on the source Register (the equivalent) since generic virtual
1137	// register does not allow subregister index.
1138	Vec = MIB.buildExtractSubvector(Res: InterLitTy, Src, Index: Idx - RemIdx).getReg(Idx: `0`);
1139	}
1140
1141	// Slide this vector register down by the desired number of elements in order
1142	// to place the desired subvector starting at element 0.
1143	const LLT XLenTy(STI.getXLenVT());
1144	auto SlidedownAmt = MIB.buildVScale(Res: XLenTy, MinElts: RemIdx);
1145	auto [Mask, VL] = buildDefaultVLOps(VecTy: LitTy, MIB, MRI);
1146	uint64_t Policy = RISCVVType::TAIL_AGNOSTIC \| RISCVVType::MASK_AGNOSTIC;
1147	auto Slidedown = MIB.buildInstr(
1148	Opc: RISCV::G_VSLIDEDOWN_VL, DstOps: {InterLitTy},
1149	SrcOps: {MIB.buildUndef(Res: InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
1150
1151	// Now the vector is in the right position, extract our final subvector. This
1152	// should resolve to a COPY.
1153	MIB.buildExtractSubvector(Res: Dst, Src: Slidedown, Index: `0`);
1154
1155	MI.eraseFromParent();
1156	return true;
1157	}
1158
1159	bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
1160	LegalizerHelper &Helper,
1161	MachineIRBuilder &MIB) const {
1162	GInsertSubvector &IS = cast<GInsertSubvector>(Val&: MI);
1163
1164	MachineRegisterInfo &MRI = *MIB.getMRI();
1165
1166	Register Dst = IS.getReg(Idx: `0`);
1167	Register BigVec = IS.getBigVec();
1168	Register LitVec = IS.getSubVec();
1169	uint64_t Idx = IS.getIndexImm();
1170
1171	LLT BigTy = MRI.getType(Reg: BigVec);
1172	LLT LitTy = MRI.getType(Reg: LitVec);
1173
1174	if (Idx == `0` \|\|
1175	MRI.getVRegDef(Reg: BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1176	return true;
1177
1178	// We don't have the ability to slide mask vectors up indexed by their i1
1179	// elements; the smallest we can do is i8. Often we are able to bitcast to
1180	// equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
1181	// vectors and truncate down after the insert.
1182	if (LitTy.getElementType() == LLT::scalar(SizeInBits: `1`)) {
1183	auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
1184	auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
1185	if (BigTyMinElts >= `8` && LitTyMinElts >= `8`)
1186	return Helper.bitcast(
1187	MI&: IS, TypeIdx: `0`,
1188	Ty: LLT::vector(EC: BigTy.getElementCount().divideCoefficientBy(RHS: `8`), ScalarSizeInBits: `8`));
1189
1190	// We can't slide this mask vector up indexed by its i1 elements.
1191	// This poses a problem when we wish to insert a scalable vector which
1192	// can't be re-expressed as a larger type. Just choose the slow path and
1193	// extend to a larger type, then truncate back down.
1194	LLT ExtBigTy = BigTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: `8`));
1195	return Helper.widenScalar(MI&: IS, TypeIdx: `0`, WideTy: ExtBigTy);
1196	}
1197
1198	const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
1199	unsigned SubRegIdx, RemIdx;
1200	std::tie(args&: SubRegIdx, args&: RemIdx) =
1201	RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1202	VecVT: getMVTForLLT(Ty: BigTy), SubVecVT: getMVTForLLT(Ty: LitTy), InsertExtractIdx: Idx, TRI);
1203
1204	TypeSize VecRegSize = TypeSize::getScalable(MinimumSize: RISCV::RVVBitsPerBlock);
1205	assert(isPowerOf2_64(
1206	STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
1207	bool ExactlyVecRegSized =
1208	STI.expandVScale(X: LitTy.getSizeInBits())
1209	.isKnownMultipleOf(RHS: STI.expandVScale(X: VecRegSize));
1210
1211	// If the Idx has been completely eliminated and this subvector's size is a
1212	// vector register or a multiple thereof, or the surrounding elements are
1213	// undef, then this is a subvector insert which naturally aligns to a vector
1214	// register. These can easily be handled using subregister manipulation.
1215	if (RemIdx == `0` && ExactlyVecRegSized)
1216	return true;
1217
1218	// If the subvector is smaller than a vector register, then the insertion
1219	// must preserve the undisturbed elements of the register. We do this by
1220	// lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1221	// (which resolves to a subregister copy), performing a VSLIDEUP to place the
1222	// subvector within the vector register, and an INSERT_SUBVECTOR of that
1223	// LMUL=1 type back into the larger vector (resolving to another subregister
1224	// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1225	// to avoid allocating a large register group to hold our subvector.
1226
1227	// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1228	// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1229	// (in our case undisturbed). This means we can set up a subvector insertion
1230	// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1231	// size of the subvector.
1232	const LLT XLenTy(STI.getXLenVT());
1233	LLT InterLitTy = BigTy;
1234	Register AlignedExtract = BigVec;
1235	unsigned AlignedIdx = Idx - RemIdx;
1236	if (TypeSize::isKnownGT(LHS: BigTy.getSizeInBits(),
1237	RHS: getLMUL1Ty(VecTy: BigTy).getSizeInBits())) {
1238	InterLitTy = getLMUL1Ty(VecTy: BigTy);
1239	// Extract a subvector equal to the nearest full vector register type. This
1240	// should resolve to a G_EXTRACT on a subreg.
1241	AlignedExtract =
1242	MIB.buildExtractSubvector(Res: InterLitTy, Src: BigVec, Index: AlignedIdx).getReg(Idx: `0`);
1243	}
1244
1245	auto Insert = MIB.buildInsertSubvector(Res: InterLitTy, Src0: MIB.buildUndef(Res: InterLitTy),
1246	Src1: LitVec, Index: `0`);
1247
1248	auto [Mask, _] = buildDefaultVLOps(VecTy: BigTy, MIB, MRI);
1249	auto VL = MIB.buildVScale(Res: XLenTy, MinElts: LitTy.getElementCount().getKnownMinValue());
1250
1251	// If we're inserting into the lowest elements, use a tail undisturbed
1252	// vmv.v.v.
1253	MachineInstrBuilder Inserted;
1254	bool NeedInsertSubvec =
1255	TypeSize::isKnownGT(LHS: BigTy.getSizeInBits(), RHS: InterLitTy.getSizeInBits());
1256	Register InsertedDst =
1257	NeedInsertSubvec ? MRI.createGenericVirtualRegister(Ty: InterLitTy) : Dst;
1258	if (RemIdx == `0`) {
1259	Inserted = MIB.buildInstr(Opc: RISCV::G_VMV_V_V_VL, DstOps: {InsertedDst},
1260	SrcOps: {AlignedExtract, Insert, VL});
1261	} else {
1262	auto SlideupAmt = MIB.buildVScale(Res: XLenTy, MinElts: RemIdx);
1263	// Construct the vector length corresponding to RemIdx + length(LitTy).
1264	VL = MIB.buildAdd(Dst: XLenTy, Src0: SlideupAmt, Src1: VL);
1265	// Use tail agnostic policy if we're inserting over InterLitTy's tail.
1266	ElementCount EndIndex =
1267	ElementCount::getScalable(MinVal: RemIdx) + LitTy.getElementCount();
1268	uint64_t Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;
1269	if (STI.expandVScale(X: EndIndex) ==
1270	STI.expandVScale(X: InterLitTy.getElementCount()))
1271	Policy = RISCVVType::TAIL_AGNOSTIC;
1272
1273	Inserted =
1274	MIB.buildInstr(Opc: RISCV::G_VSLIDEUP_VL, DstOps: {InsertedDst},
1275	SrcOps: {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
1276	}
1277
1278	// If required, insert this subvector back into the correct vector register.
1279	// This should resolve to an INSERT_SUBREG instruction.
1280	if (NeedInsertSubvec)
1281	MIB.buildInsertSubvector(Res: Dst, Src0: BigVec, Src1: Inserted, Index: AlignedIdx);
1282
1283	MI.eraseFromParent();
1284	return true;
1285	}
1286
1287	static unsigned getRISCVWOpcode(unsigned Opcode) {
1288	switch (Opcode) {
1289	default:
1290	llvm_unreachable("Unexpected opcode");
1291	case TargetOpcode::G_ASHR:
1292	return RISCV::G_SRAW;
1293	case TargetOpcode::G_LSHR:
1294	return RISCV::G_SRLW;
1295	case TargetOpcode::G_SHL:
1296	return RISCV::G_SLLW;
1297	case TargetOpcode::G_SDIV:
1298	return RISCV::G_DIVW;
1299	case TargetOpcode::G_UDIV:
1300	return RISCV::G_DIVUW;
1301	case TargetOpcode::G_UREM:
1302	return RISCV::G_REMUW;
1303	case TargetOpcode::G_ROTL:
1304	return RISCV::G_ROLW;
1305	case TargetOpcode::G_ROTR:
1306	return RISCV::G_RORW;
1307	case TargetOpcode::G_CTLZ:
1308	return RISCV::G_CLZW;
1309	case TargetOpcode::G_CTTZ:
1310	return RISCV::G_CTZW;
1311	case TargetOpcode::G_FPTOSI:
1312	return RISCV::G_FCVT_W_RV64;
1313	case TargetOpcode::G_FPTOUI:
1314	return RISCV::G_FCVT_WU_RV64;
1315	}
1316	}
1317
1318	bool RISCVLegalizerInfo::legalizeCustom(
1319	LegalizerHelper &Helper, MachineInstr &MI,
1320	LostDebugLocObserver &LocObserver) const {
1321	MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1322	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1323	MachineFunction &MF = *MI.getParent()->getParent();
1324	switch (MI.getOpcode()) {
1325	default:
1326	// No idea what to do.
1327	return false;
1328	case TargetOpcode::G_ABS:
1329	return Helper.lowerAbsToMaxNeg(MI);
1330	// TODO: G_FCONSTANT
1331	case TargetOpcode::G_CONSTANT: {
1332	const Function &F = MF.getFunction();
1333	// TODO: if PSI and BFI are present, add " \|\|
1334	// llvm::shouldOptForSize(CurMBB, PSI, BFI)".*
1335	bool ShouldOptForSize = F.hasOptSize();
1336	const ConstantInt *ConstVal = MI.getOperand(i: `1`).getCImm();
1337	if (!shouldBeInConstantPool(APImm: ConstVal->getValue(), ShouldOptForSize))
1338	return true;
1339	return Helper.lowerConstant(MI);
1340	}
1341	case TargetOpcode::G_SUB:
1342	case TargetOpcode::G_ADD: {
1343	Helper.Observer.changingInstr(MI);
1344	Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: `1`, ExtOpcode: TargetOpcode::G_ANYEXT);
1345	Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: `2`, ExtOpcode: TargetOpcode::G_ANYEXT);
1346
1347	Register DstALU = MRI.createGenericVirtualRegister(Ty: sXLen);
1348
1349	MachineOperand &MO = MI.getOperand(i: `0`);
1350	MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
1351	auto DstSext = MIRBuilder.buildSExtInReg(Res: sXLen, Op: DstALU, ImmOp: `32`);
1352
1353	MIRBuilder.buildInstr(Opc: TargetOpcode::G_TRUNC, DstOps: {MO}, SrcOps: {DstSext});
1354	MO.setReg(DstALU);
1355
1356	Helper.Observer.changedInstr(MI);
1357	return true;
1358	}
1359	case TargetOpcode::G_SEXT_INREG: {
1360	LLT DstTy = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
1361	int64_t SizeInBits = MI.getOperand(i: `2`).getImm();
1362	// Source size of 32 is sext.w.
1363	if (DstTy.getSizeInBits() == `64` && SizeInBits == `32`)
1364	return true;
1365
1366	if (STI.hasStdExtZbb() && (SizeInBits == `8` \|\| SizeInBits == `16`))
1367	return true;
1368
1369	return Helper.lower(MI, TypeIdx: `0`, / Unused hint type / Ty: LLT ()) ==
1370	LegalizerHelper::Legalized;
1371	}
1372	case TargetOpcode::G_ASHR:
1373	case TargetOpcode::G_LSHR:
1374	case TargetOpcode::G_SHL: {
1375	if (getIConstantVRegValWithLookThrough(VReg: MI.getOperand(i: `2`).getReg(), MRI)) {
1376	// We don't need a custom node for shift by constant. Just widen the
1377	// source and the shift amount.
1378	unsigned ExtOpc = TargetOpcode::G_ANYEXT;
1379	if (MI.getOpcode() == TargetOpcode::G_ASHR)
1380	ExtOpc = TargetOpcode::G_SEXT;
1381	else if (MI.getOpcode() == TargetOpcode::G_LSHR)
1382	ExtOpc = TargetOpcode::G_ZEXT;
1383
1384	Helper.Observer.changingInstr(MI);
1385	Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: `1`, ExtOpcode: ExtOpc);
1386	Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: `2`, ExtOpcode: TargetOpcode::G_ZEXT);
1387	Helper.widenScalarDst(MI, WideTy: sXLen);
1388	Helper.Observer.changedInstr(MI);
1389	return true;
1390	}
1391
1392	Helper.Observer.changingInstr(MI);
1393	Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: `1`, ExtOpcode: TargetOpcode::G_ANYEXT);
1394	Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: `2`, ExtOpcode: TargetOpcode::G_ANYEXT);
1395	Helper.widenScalarDst(MI, WideTy: sXLen);
1396	MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1397	Helper.Observer.changedInstr(MI);
1398	return true;
1399	}
1400	case TargetOpcode::G_SDIV:
1401	case TargetOpcode::G_UDIV:
1402	case TargetOpcode::G_UREM:
1403	case TargetOpcode::G_ROTL:
1404	case TargetOpcode::G_ROTR: {
1405	Helper.Observer.changingInstr(MI);
1406	Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: `1`, ExtOpcode: TargetOpcode::G_ANYEXT);
1407	Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: `2`, ExtOpcode: TargetOpcode::G_ANYEXT);
1408	Helper.widenScalarDst(MI, WideTy: sXLen);
1409	MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1410	Helper.Observer.changedInstr(MI);
1411	return true;
1412	}
1413	case TargetOpcode::G_CTLZ:
1414	case TargetOpcode::G_CTTZ: {
1415	Helper.Observer.changingInstr(MI);
1416	Helper.widenScalarSrc(MI, WideTy: sXLen, OpIdx: `1`, ExtOpcode: TargetOpcode::G_ANYEXT);
1417	Helper.widenScalarDst(MI, WideTy: sXLen);
1418	MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1419	Helper.Observer.changedInstr(MI);
1420	return true;
1421	}
1422	case TargetOpcode::G_FPTOSI:
1423	case TargetOpcode::G_FPTOUI: {
1424	Helper.Observer.changingInstr(MI);
1425	Helper.widenScalarDst(MI, WideTy: sXLen);
1426	MI.setDesc(MIRBuilder.getTII().get(Opcode: getRISCVWOpcode(Opcode: MI.getOpcode())));
1427	MI.addOperand(Op: MachineOperand::CreateImm(Val: RISCVFPRndMode::RTZ));
1428	Helper.Observer.changedInstr(MI);
1429	return true;
1430	}
1431	case TargetOpcode::G_IS_FPCLASS: {
1432	Register GISFPCLASS = MI.getOperand(i: `0`).getReg();
1433	Register Src = MI.getOperand(i: `1`).getReg();
1434	const MachineOperand &ImmOp = MI.getOperand(i: `2`);
1435	MachineIRBuilder MIB(MI);
1436
1437	// Turn LLVM IR's floating point classes to that in RISC-V,
1438	// by simply rotating the 10-bit immediate right by two bits.
1439	APInt GFpClassImm(`10`, static_cast<uint64_t>(ImmOp.getImm()));
1440	auto FClassMask = MIB.buildConstant(Res: sXLen, Val: GFpClassImm.rotr(rotateAmt: `2`).zext(width: XLen));
1441	auto ConstZero = MIB.buildConstant(Res: sXLen, Val: `0`);
1442
1443	auto GFClass = MIB.buildInstr(Opc: RISCV::G_FCLASS, DstOps: {sXLen}, SrcOps: {Src});
1444	auto And = MIB.buildAnd(Dst: sXLen, Src0: GFClass, Src1: FClassMask);
1445	MIB.buildICmp(Pred: CmpInst::ICMP_NE, Res: GISFPCLASS, Op0: And, Op1: ConstZero);
1446
1447	MI.eraseFromParent();
1448	return true;
1449	}
1450	case TargetOpcode::G_BRJT:
1451	return legalizeBRJT(MI, MIRBuilder);
1452	case TargetOpcode::G_VASTART:
1453	return legalizeVAStart(MI, MIRBuilder);
1454	case TargetOpcode::G_VSCALE:
1455	return legalizeVScale(MI, MIB&: MIRBuilder);
1456	case TargetOpcode::G_ZEXT:
1457	case TargetOpcode::G_SEXT:
1458	case TargetOpcode::G_ANYEXT:
1459	return legalizeExt(MI, MIB&: MIRBuilder);
1460	case TargetOpcode::G_SPLAT_VECTOR:
1461	return legalizeSplatVector(MI, MIB&: MIRBuilder);
1462	case TargetOpcode::G_EXTRACT_SUBVECTOR:
1463	return legalizeExtractSubvector(MI, MIB&: MIRBuilder);
1464	case TargetOpcode::G_INSERT_SUBVECTOR:
1465	return legalizeInsertSubvector(MI, Helper, MIB&: MIRBuilder);
1466	case TargetOpcode::G_LOAD:
1467	case TargetOpcode::G_STORE:
1468	return legalizeLoadStore(MI, Helper, MIB&: MIRBuilder);
1469	}
1470
1471	llvm_unreachable("expected switch to return");
1472	}
1473

Browse the source code of llvm_projects/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp